Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says: ==================== pull-request: bpf-next 2024-05-13 We've added 119 non-merge commits during the last 14 day(s) which contain a total of 134 files changed, 9462 insertions(+), 4742 deletions(-). The main changes are: 1) Add BPF JIT support for 32-bit ARCv2 processors, from Shahab Vahedi. 2) Add BPF range computation improvements to the verifier in particular around XOR and OR operators, refactoring of checks for range computation and relaxing MUL range computation so that src_reg can also be an unknown scalar, from Cupertino Miranda. 3) Add support to attach kprobe BPF programs through kprobe_multi link in a session mode, meaning, a BPF program is attached to both function entry and return, the entry program can decide if the return program gets executed and the entry program can share u64 cookie value with return program. Session mode is a common use-case for tetragon and bpftrace, from Jiri Olsa. 4) Fix a potential overflow in libbpf's ring__consume_n() and improve libbpf as well as BPF selftest's struct_ops handling, from Andrii Nakryiko. 5) Improvements to BPF selftests in context of BPF gcc backend, from Jose E. Marchesi & David Faust. 6) Migrate remaining BPF selftest tests from test_sock_addr.c to prog_test- -style in order to retire the old test, run it in BPF CI and additionally expand test coverage, from Jordan Rife. 7) Big batch for BPF selftest refactoring in order to remove duplicate code around common network helpers, from Geliang Tang. 8) Another batch of improvements to BPF selftests to retire obsolete bpf_tcp_helpers.h as everything is available vmlinux.h, from Martin KaFai Lau. 9) Fix BPF map tear-down to not walk the map twice on free when both timer and wq is used, from Benjamin Tissoires. 10) Fix BPF verifier assumptions about socket->sk that it can be non-NULL, from Alexei Starovoitov. 11) Change BTF build scripts to using --btf_features for pahole v1.26+, from Alan Maguire. 12) Small improvements to BPF reusing struct_size() and krealloc_array(), from Andy Shevchenko. 13) Fix s390 JIT to emit a barrier for BPF_FETCH instructions, from Ilya Leoshkevich. 14) Extend TCP ->cong_control() callback in order to feed in ack and flag parameters and allow write-access to tp->snd_cwnd_stamp from BPF program, from Miao Xu. 15) Add support for internal-only per-CPU instructions to inline bpf_get_smp_processor_id() helper call for arm64 and riscv64 BPF JITs, from Puranjay Mohan. 16) Follow-up to remove the redundant ethtool.h from tooling infrastructure, from Tushar Vyavahare. 17) Extend libbpf to support "module:<function>" syntax for tracing programs, from Viktor Malik. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (119 commits) bpf: make list_for_each_entry portable bpf: ignore expected GCC warning in test_global_func10.c bpf: disable strict aliasing in test_global_func9.c selftests/bpf: Free strdup memory in xdp_hw_metadata selftests/bpf: Fix a few tests for GCC related warnings. bpf: avoid gcc overflow warning in test_xdp_vlan.c tools: remove redundant ethtool.h from tooling infra selftests/bpf: Expand ATTACH_REJECT tests selftests/bpf: Expand getsockname and getpeername tests sefltests/bpf: Expand sockaddr hook deny tests selftests/bpf: Expand sockaddr program return value tests selftests/bpf: Retire test_sock_addr.(c|sh) selftests/bpf: Remove redundant sendmsg test cases selftests/bpf: Migrate ATTACH_REJECT test cases selftests/bpf: Migrate expected_attach_type tests selftests/bpf: Migrate wildcard destination rewrite test selftests/bpf: Migrate sendmsg6 v4 mapped address tests selftests/bpf: Migrate sendmsg deny test cases selftests/bpf: Migrate WILDCARD_IP test selftests/bpf: Handle SYSCALL_EPERM and SYSCALL_ENOTSUPP test cases ... ==================== Link: https://lore.kernel.org/r/20240513134114.17575-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
author: Jakub Kicinski <kuba@kernel.org> 2024-05-13 16:40:22 -0700
committer: Jakub Kicinski <kuba@kernel.org> 2024-05-13 16:41:10 -0700
commit: 6e62702feb6d474e969b52f0379de93e9729e457 (patch)
tree: aa109c16cb31139adffc01d01e40e28275ce7507 /arch/arc
parent: afd29f36aaf733985df1ba162424581b8b8853b4 (diff)
parent: ba39486d2c43ba7c103c438540aa56c8bde3b6c7 (diff)
download: linux-6e62702feb6d474e969b52f0379de93e9729e457.tar.gz
linux-6e62702feb6d474e969b52f0379de93e9729e457.tar.bz2
linux-6e62702feb6d474e969b52f0379de93e9729e457.zip
6 files changed, 4602 insertions, 0 deletions
diff --git a/arch/arc/Kbuild b/arch/arc/Kbuild
index b94102fff68b..20ea7dd482d4 100644
--- a/arch/arc/Kbuild
+++ b/arch/arc/Kbuild
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-y += kernel/
 obj-y += mm/
+obj-y += net/
 
 # for cleaning
 subdir- += boot
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 4092bec198be..fd0b0a0d4686 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -51,6 +51,7 @@ config ARC
 	select PCI_SYSCALL if PCI
 	select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
 	select TRACE_IRQFLAGS_SUPPORT
+	select HAVE_EBPF_JIT if ISA_ARCV2
 
 config LOCKDEP_SUPPORT
 	def_bool y
diff --git a/arch/arc/net/Makefile b/arch/arc/net/Makefile
new file mode 100644
index 000000000000..ea5790952e9a
--- /dev/null
+++ b/arch/arc/net/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+ifeq ($(CONFIG_ISA_ARCV2),y)
+	obj-$(CONFIG_BPF_JIT) += bpf_jit_core.o
+	obj-$(CONFIG_BPF_JIT) += bpf_jit_arcv2.o
+endif
diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h
new file mode 100644
index 000000000000..ec44873c42d1
--- /dev/null
+++ b/arch/arc/net/bpf_jit.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The interface that a back-end should provide to bpf_jit_core.c.
+ *
+ * Copyright (c) 2024 Synopsys Inc.
+ * Author: Shahab Vahedi <shahab@synopsys.com>
+ */
+
+#ifndef _ARC_BPF_JIT_H
+#define _ARC_BPF_JIT_H
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+/* Print debug info and assert. */
+//#define ARC_BPF_JIT_DEBUG
+
+/* Determine the address type of the target. */
+#ifdef CONFIG_ISA_ARCV2
+#define ARC_ADDR u32
+#endif
+
+/*
+ * For the translation of some BPF instructions, a temporary register
+ * might be needed for some interim data.
+ */
+#define JIT_REG_TMP MAX_BPF_JIT_REG
+
+/*
+ * Buffer access: If buffer "b" is not NULL, advance by "n" bytes.
+ *
+ * This macro must be used in any place that potentially requires a
+ * "buf + len". This way, we make sure that the "buf" argument for
+ * the underlying "arc_*(buf, ...)" ends up as NULL instead of something
+ * like "0+4" or "0+8", etc. Those "arc_*()" functions check their "buf"
+ * value to decide if instructions should be emitted or not.
+ */
+#define BUF(b, n) (((b) != NULL) ? ((b) + (n)) : (b))
+
+/************** Functions that the back-end must provide **************/
+/* Extension for 32-bit operations. */
+inline u8 zext(u8 *buf, u8 rd);
+/***** Moves *****/
+u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
+u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm);
+u8 mov_r64(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
+u8 mov_r64_i32(u8 *buf, u8 reg, s32 imm);
+u8 mov_r64_i64(u8 *buf, u8 reg, u32 lo, u32 hi);
+/***** Loads and stores *****/
+u8 load_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size, bool sign_ext);
+u8 store_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size);
+u8 store_i(u8 *buf, s32 imm, u8 rd, s16 off, u8 size);
+/***** Addition *****/
+u8 add_r32(u8 *buf, u8 rd, u8 rs);
+u8 add_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 add_r64(u8 *buf, u8 rd, u8 rs);
+u8 add_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Subtraction *****/
+u8 sub_r32(u8 *buf, u8 rd, u8 rs);
+u8 sub_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 sub_r64(u8 *buf, u8 rd, u8 rs);
+u8 sub_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Multiplication *****/
+u8 mul_r32(u8 *buf, u8 rd, u8 rs);
+u8 mul_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 mul_r64(u8 *buf, u8 rd, u8 rs);
+u8 mul_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Division *****/
+u8 div_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext);
+u8 div_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext);
+/***** Remainder *****/
+u8 mod_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext);
+u8 mod_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext);
+/***** Bitwise AND *****/
+u8 and_r32(u8 *buf, u8 rd, u8 rs);
+u8 and_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 and_r64(u8 *buf, u8 rd, u8 rs);
+u8 and_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise OR *****/
+u8 or_r32(u8 *buf, u8 rd, u8 rs);
+u8 or_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 or_r64(u8 *buf, u8 rd, u8 rs);
+u8 or_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise XOR *****/
+u8 xor_r32(u8 *buf, u8 rd, u8 rs);
+u8 xor_r32_i32(u8 *buf, u8 rd, s32 imm);
+u8 xor_r64(u8 *buf, u8 rd, u8 rs);
+u8 xor_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise Negate *****/
+u8 neg_r32(u8 *buf, u8 r);
+u8 neg_r64(u8 *buf, u8 r);
+/***** Bitwise left shift *****/
+u8 lsh_r32(u8 *buf, u8 rd, u8 rs);
+u8 lsh_r32_i32(u8 *buf, u8 rd, u8 imm);
+u8 lsh_r64(u8 *buf, u8 rd, u8 rs);
+u8 lsh_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise right shift (logical) *****/
+u8 rsh_r32(u8 *buf, u8 rd, u8 rs);
+u8 rsh_r32_i32(u8 *buf, u8 rd, u8 imm);
+u8 rsh_r64(u8 *buf, u8 rd, u8 rs);
+u8 rsh_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Bitwise right shift (arithmetic) *****/
+u8 arsh_r32(u8 *buf, u8 rd, u8 rs);
+u8 arsh_r32_i32(u8 *buf, u8 rd, u8 imm);
+u8 arsh_r64(u8 *buf, u8 rd, u8 rs);
+u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm);
+/***** Frame related *****/
+u32 mask_for_used_regs(u8 bpf_reg, bool is_call);
+u8 arc_prologue(u8 *buf, u32 usage, u16 frame_size);
+u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size);
+/***** Jumps *****/
+/*
+ * Different sorts of conditions (ARC enum as opposed to BPF_*).
+ *
+ * Do not change the order of enums here. ARC_CC_SLE+1 is used
+ * to determine the number of JCCs.
+ */
+enum ARC_CC {
+	ARC_CC_UGT = 0,		/* unsigned >  */
+	ARC_CC_UGE,		/* unsigned >= */
+	ARC_CC_ULT,		/* unsigned <  */
+	ARC_CC_ULE,		/* unsigned <= */
+	ARC_CC_SGT,		/*   signed >  */
+	ARC_CC_SGE,		/*   signed >= */
+	ARC_CC_SLT,		/*   signed <  */
+	ARC_CC_SLE,		/*   signed <= */
+	ARC_CC_AL,		/* always      */
+	ARC_CC_EQ,		/*          == */
+	ARC_CC_NE,		/*          != */
+	ARC_CC_SET,		/* test        */
+	ARC_CC_LAST
+};
+
+/*
+ * A few notes:
+ *
+ * - check_jmp_*() are prerequisites before calling the gen_jmp_*().
+ *   They return "true" if the jump is possible and "false" otherwise.
+ *
+ * - The notion of "*_off" is to emphasize that these parameters are
+ *   merely offsets in the JIT stream and not absolute addresses. One
+ *   can look at them as addresses if the JIT code would start from
+ *   address 0x0000_0000. Nonetheless, since the buffer address for the
+ *   JIT is on a word-aligned address, this works and actually makes
+ *   things simpler (offsets are in the range of u32 which is more than
+ *   enough).
+ */
+bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond);
+bool check_jmp_64(u32 curr_off, u32 targ_off, u8 cond);
+u8 gen_jmp_32(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off);
+u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off);
+/***** Miscellaneous *****/
+u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func);
+u8 arc_to_bpf_return(u8 *buf);
+/*
+ * - Perform byte swaps on "rd" based on the "size".
+ * - If "force" is set, do it unconditionally. Otherwise, consider the
+ *   desired "endian"ness and the host endianness.
+ * - For data "size"s up to 32 bits, perform a zero-extension if asked
+ *   by the "do_zext" boolean.
+ */
+u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext);
+
+#endif /* _ARC_BPF_JIT_H */
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
new file mode 100644
index 000000000000..31bfb6e9ce00
--- /dev/null
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -0,0 +1,3005 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The ARCv2 backend of Just-In-Time compiler for eBPF bytecode.
+ *
+ * Copyright (c) 2024 Synopsys Inc.
+ * Author: Shahab Vahedi <shahab@synopsys.com>
+ */
+#include <linux/bug.h>
+#include "bpf_jit.h"
+
+/* ARC core registers. */
+enum {
+	ARC_R_0,  ARC_R_1,  ARC_R_2,  ARC_R_3,  ARC_R_4,  ARC_R_5,
+	ARC_R_6,  ARC_R_7,  ARC_R_8,  ARC_R_9,  ARC_R_10, ARC_R_11,
+	ARC_R_12, ARC_R_13, ARC_R_14, ARC_R_15, ARC_R_16, ARC_R_17,
+	ARC_R_18, ARC_R_19, ARC_R_20, ARC_R_21, ARC_R_22, ARC_R_23,
+	ARC_R_24, ARC_R_25, ARC_R_26, ARC_R_FP, ARC_R_SP, ARC_R_ILINK,
+	ARC_R_30, ARC_R_BLINK,
+	/*
+	 * Having ARC_R_IMM encoded as source register means there is an
+	 * immediate that must be interpreted from the next 4 bytes. If
+	 * encoded as the destination register though, it implies that the
+	 * output of the operation is not assigned to any register. The
+	 * latter is helpful if we only care about updating the CPU status
+	 * flags.
+	 */
+	ARC_R_IMM = 62
+};
+
+/*
+ * Remarks about the rationale behind the chosen mapping:
+ *
+ * - BPF_REG_{1,2,3,4} are the argument registers and must be mapped to
+ *   argument registers in ARCv2 ABI: r0-r7. The r7 registers is the last
+ *   argument register in the ABI. Therefore BPF_REG_5, as the fifth
+ *   argument, must be pushed onto the stack. This is a must for calling
+ *   in-kernel functions.
+ *
+ * - In ARCv2 ABI, the return value is in r0 for 32-bit results and (r1,r0)
+ *   for 64-bit results. However, because they're already used for BPF_REG_1,
+ *   the next available scratch registers, r8 and r9, are the best candidates
+ *   for BPF_REG_0. After a "call" to a(n) (in-kernel) function, the result
+ *   is "mov"ed to these registers. At a BPF_EXIT, their value is "mov"ed to
+ *   (r1,r0).
+ *   It is worth mentioning that scratch registers are the best choice for
+ *   BPF_REG_0, because it is very popular in BPF instruction encoding.
+ *
+ * - JIT_REG_TMP is an artifact needed to translate some BPF instructions.
+ *   Its life span is one single BPF instruction. Since during the
+ *   analyze_reg_usage(), it is not known if temporary registers are used,
+ *   it is mapped to ARC's scratch registers: r10 and r11. Therefore, they
+ *   don't matter in analysing phase and don't need saving. This temporary
+ *   register is added as yet another index in the bpf2arc array, so it will
+ *   unfold like the rest of registers during the code generation process.
+ *
+ * - Mapping of callee-saved BPF registers, BPF_REG_{6,7,8,9}, starts from
+ *   (r15,r14) register pair. The (r13,r12) is not a good choice, because
+ *   in ARCv2 ABI, r12 is not a callee-saved register and this can cause
+ *   problem when calling an in-kernel function. Theoretically, the mapping
+ *   could start from (r14,r13), but it is not a conventional ARCv2 register
+ *   pair. To have a future proof design, I opted for this arrangement.
+ *   If/when we decide to add ARCv2 instructions that do use register pairs,
+ *   the mapping, hopefully, doesn't need to be revisited.
+ */
+const u8 bpf2arc[][2] = {
+	/* Return value from in-kernel function, and exit value from eBPF */
+	[BPF_REG_0] = {ARC_R_8, ARC_R_9},
+	/* Arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = {ARC_R_0, ARC_R_1},
+	[BPF_REG_2] = {ARC_R_2, ARC_R_3},
+	[BPF_REG_3] = {ARC_R_4, ARC_R_5},
+	[BPF_REG_4] = {ARC_R_6, ARC_R_7},
+	/* Remaining arguments, to be passed on the stack per 32-bit ABI */
+	[BPF_REG_5] = {ARC_R_22, ARC_R_23},
+	/* Callee-saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = {ARC_R_14, ARC_R_15},
+	[BPF_REG_7] = {ARC_R_16, ARC_R_17},
+	[BPF_REG_8] = {ARC_R_18, ARC_R_19},
+	[BPF_REG_9] = {ARC_R_20, ARC_R_21},
+	/* Read-only frame pointer to access the eBPF stack. 32-bit only. */
+	[BPF_REG_FP] = {ARC_R_FP, },
+	/* Register for blinding constants */
+	[BPF_REG_AX] = {ARC_R_24, ARC_R_25},
+	/* Temporary registers for internal use */
+	[JIT_REG_TMP] = {ARC_R_10, ARC_R_11}
+};
+
+#define ARC_CALLEE_SAVED_REG_FIRST ARC_R_13
+#define ARC_CALLEE_SAVED_REG_LAST  ARC_R_25
+
+#define REG_LO(r) (bpf2arc[(r)][0])
+#define REG_HI(r) (bpf2arc[(r)][1])
+
+/*
+ * To comply with ARCv2 ABI, BPF's arg5 must be put on stack. After which,
+ * the stack needs to be restored by ARG5_SIZE.
+ */
+#define ARG5_SIZE 8
+
+/* Instruction lengths in bytes. */
+enum {
+	INSN_len_normal = 4,	/* Normal instructions length. */
+	INSN_len_imm = 4	/* Length of an extra 32-bit immediate. */
+};
+
+/* ZZ defines the size of operation in encodings that it is used. */
+enum {
+	ZZ_1_byte = 1,
+	ZZ_2_byte = 2,
+	ZZ_4_byte = 0,
+	ZZ_8_byte = 3
+};
+
+/*
+ * AA is mostly about address write back mode. It determines if the
+ * address in question should be updated before usage or after:
+ *   addr += offset; data = *addr;
+ *   data = *addr; addr += offset;
+ *
+ * In "scaling" mode, the effective address will become the sum
+ * of "address" + "index"*"size". The "size" is specified by the
+ * "ZZ" field. There is no write back when AA is set for scaling:
+ *   data = *(addr + offset<<zz)
+ */
+enum {
+	AA_none  = 0,
+	AA_pre   = 1,	/* in assembly known as "a/aw". */
+	AA_post  = 2,	/* in assembly known as "ab". */
+	AA_scale = 3	/* in assembly known as "as". */
+};
+
+/* X flag determines the mode of extension. */
+enum {
+	X_zero = 0,
+	X_sign = 1
+};
+
+/* Condition codes. */
+enum {
+	CC_always     = 0,	/* condition is true all the time */
+	CC_equal      = 1,	/* if status32.z flag is set */
+	CC_unequal    = 2,	/* if status32.z flag is clear */
+	CC_positive   = 3,	/* if status32.n flag is clear */
+	CC_negative   = 4,	/* if status32.n flag is set */
+	CC_less_u     = 5,	/* less than (unsigned) */
+	CC_less_eq_u  = 14,	/* less than or equal (unsigned) */
+	CC_great_eq_u = 6,	/* greater than or equal (unsigned) */
+	CC_great_u    = 13,	/* greater than (unsigned) */
+	CC_less_s     = 11,	/* less than (signed) */
+	CC_less_eq_s  = 12,	/* less than or equal (signed) */
+	CC_great_eq_s = 10,	/* greater than or equal (signed) */
+	CC_great_s    = 9	/* greater than (signed) */
+};
+
+#define IN_U6_RANGE(x)	((x) <= (0x40      - 1) && (x) >= 0)
+#define IN_S9_RANGE(x)	((x) <= (0x100     - 1) && (x) >= -0x100)
+#define IN_S12_RANGE(x)	((x) <= (0x800     - 1) && (x) >= -0x800)
+#define IN_S21_RANGE(x)	((x) <= (0x100000  - 1) && (x) >= -0x100000)
+#define IN_S25_RANGE(x)	((x) <= (0x1000000 - 1) && (x) >= -0x1000000)
+
+/* Operands in most of the encodings. */
+#define OP_A(x)	((x) & 0x03f)
+#define OP_B(x)	((((x) & 0x07) << 24) | (((x) & 0x38) <<  9))
+#define OP_C(x)	(((x) & 0x03f) << 6)
+#define OP_IMM	(OP_C(ARC_R_IMM))
+#define COND(x)	(OP_A((x) & 31))
+#define FLAG(x)	(((x) & 1) << 15)
+
+/*
+ * The 4-byte encoding of "mov b,c":
+ *
+ * 0010_0bbb 0000_1010 0BBB_cccc cc00_0000
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_MOV		0x200a0000
+
+/*
+ * The 4-byte encoding of "mov b,s12" (used for moving small immediates):
+ *
+ * 0010_0bbb 1000_1010 0BBB_ssss ssSS_SSSS
+ *
+ * b:  BBBbbb		destination register
+ * s:  SSSSSSssssss	source immediate (signed)
+ */
+#define OPC_MOVI	0x208a0000
+#define MOVI_S12(x)	((((x) & 0xfc0) >> 6) | (((x) & 0x3f) << 6))
+
+/*
+ * The 4-byte encoding of "mov[.qq] b,u6", used for conditional
+ * moving of even smaller immediates:
+ *
+ * 0010_0bbb 1100_1010 0BBB_cccc cciq_qqqq
+ *
+ * qq: qqqqq		condition code
+ * i:			If set, c is considered a 6-bit immediate, else a reg.
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source
+ */
+#define OPC_MOV_CC	0x20ca0000
+#define MOV_CC_I	BIT(5)
+#define OPC_MOVU_CC	(OPC_MOV_CC | MOV_CC_I)
+
+/*
+ * The 4-byte encoding of "sexb b,c" (8-bit sign extension):
+ *
+ * 0010_0bbb 0010_1111 0BBB_cccc cc00_0101
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_SEXB	0x202f0005
+
+/*
+ * The 4-byte encoding of "sexh b,c" (16-bit sign extension):
+ *
+ * 0010_0bbb 0010_1111 0BBB_cccc cc00_0110
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_SEXH	0x202f0006
+
+/*
+ * The 4-byte encoding of "ld[zz][.x][.aa] c,[b,s9]":
+ *
+ * 0001_0bbb ssss_ssss SBBB_0aaz zxcc_cccc
+ *
+ * zz:			size mode
+ * aa:			address write back mode
+ * x:			extension mode
+ *
+ * s9: S_ssss_ssss	9-bit signed number
+ * b:  BBBbbb		source reg for address
+ * c:  cccccc		destination register
+ */
+#define OPC_LOAD	0x10000000
+#define LOAD_X(x)	((x) << 6)
+#define LOAD_ZZ(x)	((x) << 7)
+#define LOAD_AA(x)	((x) << 9)
+#define LOAD_S9(x)	((((x) & 0x0ff) << 16) | (((x) & 0x100) <<  7))
+#define LOAD_C(x)	((x) & 0x03f)
+/* Unsigned and signed loads. */
+#define OPC_LDU		(OPC_LOAD | LOAD_X(X_zero))
+#define OPC_LDS		(OPC_LOAD | LOAD_X(X_sign))
+/* 32-bit load. */
+#define OPC_LD32	(OPC_LDU | LOAD_ZZ(ZZ_4_byte))
+/* "pop reg" is merely a "ld.ab reg,[sp,4]". */
+#define OPC_POP		\
+	(OPC_LD32 | LOAD_AA(AA_post) | LOAD_S9(4) | OP_B(ARC_R_SP))
+
+/*
+ * The 4-byte encoding of "st[zz][.aa] c,[b,s9]":
+ *
+ * 0001_1bbb ssss_ssss SBBB_cccc cc0a_azz0
+ *
+ * zz: zz		size mode
+ * aa: aa		address write back mode
+ *
+ * s9: S_ssss_ssss	9-bit signed number
+ * b:  BBBbbb		source reg for address
+ * c:  cccccc		source reg to be stored
+ */
+#define OPC_STORE	0x18000000
+#define STORE_ZZ(x)	((x) << 1)
+#define STORE_AA(x)	((x) << 3)
+#define STORE_S9(x)	((((x) & 0x0ff) << 16) | (((x) & 0x100) <<  7))
+/* 32-bit store. */
+#define OPC_ST32	(OPC_STORE | STORE_ZZ(ZZ_4_byte))
+/* "push reg" is merely a "st.aw reg,[sp,-4]". */
+#define OPC_PUSH	\
+	(OPC_ST32 | STORE_AA(AA_pre) | STORE_S9(-4) | OP_B(ARC_R_SP))
+
+/*
+ * The 4-byte encoding of "add a,b,c":
+ *
+ * 0010_0bbb 0i00_0000 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if flags (carry, etc.) should be updated
+ * i:			If set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_ADD		0x20000000
+/* Addition with updating the pertinent flags in "status32" register. */
+#define OPC_ADDF	(OPC_ADD | FLAG(1))
+#define ADDI		BIT(22)
+#define ADDI_U6(x)	OP_C(x)
+#define OPC_ADDI	(OPC_ADD | ADDI)
+#define OPC_ADDIF	(OPC_ADDI | FLAG(1))
+#define OPC_ADD_I	(OPC_ADD | OP_IMM)
+
+/*
+ * The 4-byte encoding of "adc a,b,c" (addition with carry):
+ *
+ * 0010_0bbb 0i00_0001 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_ADC		0x20010000
+#define ADCI		BIT(22)
+#define ADCI_U6(x)	OP_C(x)
+#define OPC_ADCI	(OPC_ADC | ADCI)
+
+/*
+ * The 4-byte encoding of "sub a,b,c":
+ *
+ * 0010_0bbb 0i00_0010 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if flags (carry, etc.) should be updated
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_SUB		0x20020000
+/* Subtraction with updating the pertinent flags in "status32" register. */
+#define OPC_SUBF	(OPC_SUB | FLAG(1))
+#define SUBI		BIT(22)
+#define SUBI_U6(x)	OP_C(x)
+#define OPC_SUBI	(OPC_SUB | SUBI)
+#define OPC_SUB_I	(OPC_SUB | OP_IMM)
+
+/*
+ * The 4-byte encoding of "sbc a,b,c" (subtraction with carry):
+ *
+ * 0010_0bbb 0000_0011 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if flags (carry, etc.) should be updated
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_SBC		0x20030000
+
+/*
+ * The 4-byte encoding of "cmp[.qq] b,c":
+ *
+ * 0010_0bbb 1100_1100 1BBB_cccc cc0q_qqqq
+ *
+ * qq:	qqqqq		condition code
+ *
+ * b:  BBBbbb		the 1st operand
+ * c:  cccccc		the 2nd operand
+ */
+#define OPC_CMP		0x20cc8000
+
+/*
+ * The 4-byte encoding of "neg a,b":
+ *
+ * 0010_0bbb 0100_1110 0BBB_0000 00aa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		input
+ */
+#define OPC_NEG		0x204e0000
+
+/*
+ * The 4-byte encoding of "mpy a,b,c".
+ * mpy is the signed 32-bit multiplication with the lower 32-bit
+ * of the product as the result.
+ *
+ * 0010_0bbb 0001_1010 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_MPY		0x201a0000
+#define OPC_MPYI	(OPC_MPY | OP_IMM)
+
+/*
+ * The 4-byte encoding of "mpydu a,b,c".
+ * mpydu is the unsigned 32-bit multiplication with the lower 32-bit of
+ * the product in register "a" and the higher 32-bit in register "a+1".
+ *
+ * 0010_1bbb 0001_1001 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		64-bit result in registers (R_a+1,R_a)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_MPYDU	0x28190000
+#define OPC_MPYDUI	(OPC_MPYDU | OP_IMM)
+
+/*
+ * The 4-byte encoding of "divu a,b,c" (unsigned division):
+ *
+ * 0010_1bbb 0000_0101 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (quotient)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_DIVU	0x28050000
+#define OPC_DIVUI	(OPC_DIVU | OP_IMM)
+
+/*
+ * The 4-byte encoding of "div a,b,c" (signed division):
+ *
+ * 0010_1bbb 0000_0100 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (quotient)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_DIVS	0x28040000
+#define OPC_DIVSI	(OPC_DIVS | OP_IMM)
+
+/*
+ * The 4-byte encoding of "remu a,b,c" (unsigned remainder):
+ *
+ * 0010_1bbb 0000_1001 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (remainder)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_REMU	0x28090000
+#define OPC_REMUI	(OPC_REMU | OP_IMM)
+
+/*
+ * The 4-byte encoding of "rem a,b,c" (signed remainder):
+ *
+ * 0010_1bbb 0000_1000 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result (remainder)
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand (divisor)
+ */
+#define OPC_REMS	0x28080000
+#define OPC_REMSI	(OPC_REMS | OP_IMM)
+
+/*
+ * The 4-byte encoding of "and a,b,c":
+ *
+ * 0010_0bbb 0000_0100 fBBB_cccc ccaa_aaaa
+ *
+ * f:                   indicates if zero and negative flags should be updated
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_AND		0x20040000
+#define OPC_ANDI	(OPC_AND | OP_IMM)
+
+/*
+ * The 4-byte encoding of "tst[.qq] b,c".
+ * Checks if the two input operands have any bit set at the same
+ * position.
+ *
+ * 0010_0bbb 1100_1011 1BBB_cccc cc0q_qqqq
+ *
+ * qq:	qqqqq		condition code
+ *
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_TST		0x20cb8000
+
+/*
+ * The 4-byte encoding of "or a,b,c":
+ *
+ * 0010_0bbb 0000_0101 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_OR		0x20050000
+#define OPC_ORI		(OPC_OR | OP_IMM)
+
+/*
+ * The 4-byte encoding of "xor a,b,c":
+ *
+ * 0010_0bbb 0000_0111 0BBB_cccc ccaa_aaaa
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		the 1st input operand
+ * c:  cccccc		the 2nd input operand
+ */
+#define OPC_XOR		0x20070000
+#define OPC_XORI	(OPC_XOR | OP_IMM)
+
+/*
+ * The 4-byte encoding of "not b,c":
+ *
+ * 0010_0bbb 0010_1111 0BBB_cccc cc00_1010
+ *
+ * b:  BBBbbb		result
+ * c:  cccccc		input
+ */
+#define OPC_NOT		0x202f000a
+
+/*
+ * The 4-byte encoding of "btst b,u6":
+ *
+ * 0010_0bbb 0101_0001 1BBB_uuuu uu00_0000
+ *
+ * b:  BBBbbb		input number to check
+ * u6: uuuuuu		6-bit unsigned number specifying bit position to check
+ */
+#define OPC_BTSTU6	0x20518000
+#define BTST_U6(x)	(OP_C((x) & 63))
+
+/*
+ * The 4-byte encoding of "asl[.qq] b,b,c" (arithmetic shift left):
+ *
+ * 0010_1bbb 0i00_0000 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 5-bit immediate, else a reg.
+ *
+ * b:  BBBbbb		result and the first operand (number to be shifted)
+ * c:  cccccc		amount to be shifted
+ */
+#define OPC_ASL		0x28000000
+#define ASL_I		BIT(22)
+#define ASLI_U6(x)	OP_C((x) & 31)
+#define OPC_ASLI	(OPC_ASL | ASL_I)
+
+/*
+ * The 4-byte encoding of "asr a,b,c" (arithmetic shift right):
+ *
+ * 0010_1bbb 0i00_0010 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		first input:  number to be shifted
+ * c:  cccccc		second input: amount to be shifted
+ */
+#define OPC_ASR		0x28020000
+#define ASR_I		ASL_I
+#define ASRI_U6(x)	ASLI_U6(x)
+#define OPC_ASRI	(OPC_ASR | ASR_I)
+
+/*
+ * The 4-byte encoding of "lsr a,b,c" (logical shift right):
+ *
+ * 0010_1bbb 0i00_0001 0BBB_cccc ccaa_aaaa
+ *
+ * i:			if set, c is considered a 6-bit immediate, else a reg.
+ *
+ * a:  aaaaaa		result
+ * b:  BBBbbb		first input:  number to be shifted
+ * c:  cccccc		second input: amount to be shifted
+ */
+#define OPC_LSR		0x28010000
+#define LSR_I		ASL_I
+#define LSRI_U6(x)	ASLI_U6(x)
+#define OPC_LSRI	(OPC_LSR | LSR_I)
+
+/*
+ * The 4-byte encoding of "swape b,c":
+ *
+ * 0010_1bbb 0010_1111 0bbb_cccc cc00_1001
+ *
+ * b:  BBBbbb		destination register
+ * c:  cccccc		source register
+ */
+#define OPC_SWAPE	0x282f0009
+
+/*
+ * Encoding for jump to an address in register:
+ * j reg_c
+ *
+ * 0010_0000 1110_0000 0000_cccc cc00_0000
+ *
+ * c:  cccccc		register holding the destination address
+ */
+#define OPC_JMP		0x20e00000
+/* Jump to "branch-and-link" register, which effectively is a "return". */
+#define OPC_J_BLINK	(OPC_JMP | OP_C(ARC_R_BLINK))
+
+/*
+ * Encoding for jump-and-link to an address in register:
+ * jl reg_c
+ *
+ * 0010_0000 0010_0010 0000_cccc cc00_0000
+ *
+ * c:  cccccc		register holding the destination address
+ */
+#define OPC_JL		0x20220000
+
+/*
+ * Encoding for (conditional) branch to an offset from the current location
+ * that is word aligned: (PC & 0xffff_fffc) + s21
+ * B[qq] s21
+ *
+ * 0000_0sss ssss_sss0 SSSS_SSSS SS0q_qqqq
+ *
+ * qq:	qqqqq				condition code
+ * s21:	SSSS SSSS_SSss ssss_ssss	The displacement (21-bit signed)
+ *
+ * The displacement is supposed to be 16-bit (2-byte) aligned. Therefore,
+ * it should be a multiple of 2. Hence, there is an implied '0' bit at its
+ * LSB: S_SSSS SSSS_Ssss ssss_sss0
+ */
+#define OPC_BCC		0x00000000
+#define BCC_S21(d)	((((d) & 0x7fe) << 16) | (((d) & 0x1ff800) >> 5))
+
+/*
+ * Encoding for unconditional branch to an offset from the current location
+ * that is word aligned: (PC & 0xffff_fffc) + s25
+ * B s25
+ *
+ * 0000_0sss ssss_sss1 SSSS_SSSS SS00_TTTT
+ *
+ * s25:	TTTT SSSS SSSS_SSss ssss_ssss	The displacement (25-bit signed)
+ *
+ * The displacement is supposed to be 16-bit (2-byte) aligned. Therefore,
+ * it should be a multiple of 2. Hence, there is an implied '0' bit at its
+ * LSB: T TTTS_SSSS SSSS_Ssss ssss_sss0
+ */
+#define OPC_B		0x00010000
+#define B_S25(d)	((((d) & 0x1e00000) >> 21) | BCC_S21(d))
+
+static inline void emit_2_bytes(u8 *buf, u16 bytes)
+{
+	*((u16 *)buf) = bytes;
+}
+
+static inline void emit_4_bytes(u8 *buf, u32 bytes)
+{
+	emit_2_bytes(buf, bytes >> 16);
+	emit_2_bytes(buf + 2, bytes & 0xffff);
+}
+
+static inline u8 bpf_to_arc_size(u8 size)
+{
+	switch (size) {
+	case BPF_B:
+		return ZZ_1_byte;
+	case BPF_H:
+		return ZZ_2_byte;
+	case BPF_W:
+		return ZZ_4_byte;
+	case BPF_DW:
+		return ZZ_8_byte;
+	default:
+		return ZZ_4_byte;
+	}
+}
+
+/************** Encoders (Deal with ARC regs) ************/
+
+/* Move an immediate to register with a 4-byte instruction. */
+static u8 arc_movi_r(u8 *buf, u8 reg, s16 imm)
+{
+	const u32 insn = OPC_MOVI | OP_B(reg) | MOVI_S12(imm);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* rd <- rs */
+static u8 arc_mov_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_MOV | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* The emitted code may have different sizes based on "imm". */
+static u8 arc_mov_i(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_MOV | OP_B(rd) | OP_IMM;
+
+	if (IN_S12_RANGE(imm))
+		return arc_movi_r(buf, rd, imm);
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* The emitted code will always have the same size (8). */
+static u8 arc_mov_i_fixed(u8 *buf, u8 rd, s32 imm)
+{
+	const u32 insn = OPC_MOV | OP_B(rd) | OP_IMM;
+
+	if (buf) {
+		emit_4_bytes(buf, insn);
+		emit_4_bytes(buf + INSN_len_normal, imm);
+	}
+	return INSN_len_normal + INSN_len_imm;
+}
+
+/* Conditional move. */
+static u8 arc_mov_cc_r(u8 *buf, u8 cc, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_MOV_CC | OP_B(rd) | OP_C(rs) | COND(cc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* Conditional move of a small immediate to rd. */
+static u8 arc_movu_cc_r(u8 *buf, u8 cc, u8 rd, u8 imm)
+{
+	const u32 insn = OPC_MOVU_CC | OP_B(rd) | OP_C(imm) | COND(cc);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* Sign extension from a byte. */
+static u8 arc_sexb_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_SEXB | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);
+	return INSN_len_normal;
+}
+
+/* Sign extension from two bytes. */
+static u8 arc_sexh_r(u8 *buf, u8 rd, u8 rs)
+{
+	const u32 insn = OPC_SEXH | OP_B(rd) | OP_C(rs);
+
+	if (buf)
+		emit_4_bytes(buf, insn);</
author	Jakub Kicinski <kuba@kernel.org>	2024-05-13 16:40:22 -0700
committer	Jakub Kicinski <kuba@kernel.org>	2024-05-13 16:41:10 -0700
commit	6e62702feb6d474e969b52f0379de93e9729e457 (patch)
tree	aa109c16cb31139adffc01d01e40e28275ce7507 /arch/arc
parent	afd29f36aaf733985df1ba162424581b8b8853b4 (diff)
parent	ba39486d2c43ba7c103c438540aa56c8bde3b6c7 (diff)
download	linux-6e62702feb6d474e969b52f0379de93e9729e457.tar.gz linux-6e62702feb6d474e969b52f0379de93e9729e457.tar.bz2 linux-6e62702feb6d474e969b52f0379de93e9729e457.zip