diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-08-03 15:34:36 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-08-03 15:34:36 -0700 |
commit | d07b7b32da6f678d42d96a8b9824cf0a181ce140 (patch) | |
tree | 606829d4b33a57dbe0f0e825ca8505e0b5fcb759 | |
parent | 35b1b1fd96388d5e3cf179bf36bd8a4153baf4a3 (diff) | |
parent | 648880e9331c68b2008430fd90f3648d1795399d (diff) | |
download | linux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.tar.gz linux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.tar.bz2 linux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.zip |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says:
====================
pull-request: bpf-next 2023-08-03
We've added 54 non-merge commits during the last 10 day(s) which contain
a total of 84 files changed, 4026 insertions(+), 562 deletions(-).
The main changes are:
1) Add SO_REUSEPORT support for TC bpf_sk_assign from Lorenz Bauer,
Daniel Borkmann
2) Support new insns from cpu v4 from Yonghong Song
3) Non-atomically allocate freelist during prefill from YiFei Zhu
4) Support defragmenting IPv(4|6) packets in BPF from Daniel Xu
5) Add tracepoint to xdp attaching failure from Leon Hwang
6) struct netdev_rx_queue and xdp.h reshuffling to reduce
rebuild time from Jakub Kicinski
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (54 commits)
net: invert the netdevice.h vs xdp.h dependency
net: move struct netdev_rx_queue out of netdevice.h
eth: add missing xdp.h includes in drivers
selftests/bpf: Add testcase for xdp attaching failure tracepoint
bpf, xdp: Add tracepoint to xdp attaching failure
selftests/bpf: fix static assert compilation issue for test_cls_*.c
bpf: fix bpf_probe_read_kernel prototype mismatch
riscv, bpf: Adapt bpf trampoline to optimized riscv ftrace framework
libbpf: fix typos in Makefile
tracing: bpf: use struct trace_entry in struct syscall_tp_t
bpf, devmap: Remove unused dtab field from bpf_dtab_netdev
bpf, cpumap: Remove unused cmap field from bpf_cpu_map_entry
netfilter: bpf: Only define get_proto_defrag_hook() if necessary
bpf: Fix an array-index-out-of-bounds issue in disasm.c
net: remove duplicate INDIRECT_CALLABLE_DECLARE of udp[6]_ehashfn
docs/bpf: Fix malformed documentation
bpf: selftests: Add defrag selftests
bpf: selftests: Support custom type and proto for client sockets
bpf: selftests: Support not connecting client socket
netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link
...
====================
Link: https://lore.kernel.org/r/20230803174845.825419-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
84 files changed, 4026 insertions, 562 deletions
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst index 38372a956d65..eb19c945f4d5 100644 --- a/Documentation/bpf/bpf_design_QA.rst +++ b/Documentation/bpf/bpf_design_QA.rst @@ -140,11 +140,6 @@ A: Because if we picked one-to-one relationship to x64 it would have made it more complicated to support on arm64 and other archs. Also it needs div-by-zero runtime check. -Q: Why there is no BPF_SDIV for signed divide operation? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A: Because it would be rarely used. llvm errors in such case and -prints a suggestion to use unsigned divide instead. - Q: Why BPF has implicit prologue and epilogue? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A: Because architectures like sparc have register windows and in general diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst index 751e657973f0..655494ac7af6 100644 --- a/Documentation/bpf/standardization/instruction-set.rst +++ b/Documentation/bpf/standardization/instruction-set.rst @@ -154,24 +154,27 @@ otherwise identical operations. The 'code' field encodes the operation as below, where 'src' and 'dst' refer to the values of the source and destination registers, respectively. -======== ===== ========================================================== -code value description -======== ===== ========================================================== -BPF_ADD 0x00 dst += src -BPF_SUB 0x10 dst -= src -BPF_MUL 0x20 dst \*= src -BPF_DIV 0x30 dst = (src != 0) ? (dst / src) : 0 -BPF_OR 0x40 dst \|= src -BPF_AND 0x50 dst &= src -BPF_LSH 0x60 dst <<= (src & mask) -BPF_RSH 0x70 dst >>= (src & mask) -BPF_NEG 0x80 dst = -src -BPF_MOD 0x90 dst = (src != 0) ? (dst % src) : dst -BPF_XOR 0xa0 dst ^= src -BPF_MOV 0xb0 dst = src -BPF_ARSH 0xc0 sign extending dst >>= (src & mask) -BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below) -======== ===== ========================================================== +========= ===== ======= ========================================================== +code value offset description +========= ===== ======= ========================================================== +BPF_ADD 0x00 0 dst += src +BPF_SUB 0x10 0 dst -= src +BPF_MUL 0x20 0 dst \*= src +BPF_DIV 0x30 0 dst = (src != 0) ? (dst / src) : 0 +BPF_SDIV 0x30 1 dst = (src != 0) ? (dst s/ src) : 0 +BPF_OR 0x40 0 dst \|= src +BPF_AND 0x50 0 dst &= src +BPF_LSH 0x60 0 dst <<= (src & mask) +BPF_RSH 0x70 0 dst >>= (src & mask) +BPF_NEG 0x80 0 dst = -dst +BPF_MOD 0x90 0 dst = (src != 0) ? (dst % src) : dst +BPF_SMOD 0x90 1 dst = (src != 0) ? (dst s% src) : dst +BPF_XOR 0xa0 0 dst ^= src +BPF_MOV 0xb0 0 dst = src +BPF_MOVSX 0xb0 8/16/32 dst = (s8,s16,s32)src +BPF_ARSH 0xc0 0 sign extending dst >>= (src & mask) +BPF_END 0xd0 0 byte swap operations (see `Byte swap instructions`_ below) +========= ===== ======= ========================================================== Underflow and overflow are allowed during arithmetic operations, meaning the 64-bit or 32-bit value will wrap. If eBPF program execution would @@ -198,33 +201,51 @@ where '(u32)' indicates that the upper 32 bits are zeroed. dst = dst ^ imm32 -Also note that the division and modulo operations are unsigned. Thus, for -``BPF_ALU``, 'imm' is first interpreted as an unsigned 32-bit value, whereas -for ``BPF_ALU64``, 'imm' is first sign extended to 64 bits and the result -interpreted as an unsigned 64-bit value. There are no instructions for -signed division or modulo. +Note that most instructions have instruction offset of 0. Only three instructions +(``BPF_SDIV``, ``BPF_SMOD``, ``BPF_MOVSX``) have a non-zero offset. + +The devision and modulo operations support both unsigned and signed flavors. + +For unsigned operations (``BPF_DIV`` and ``BPF_MOD``), for ``BPF_ALU``, +'imm' is interpreted as a 32-bit unsigned value. For ``BPF_ALU64``, +'imm' is first sign extended from 32 to 64 bits, and then interpreted as +a 64-bit unsigned value. + +For signed operations (``BPF_SDIV`` and ``BPF_SMOD``), for ``BPF_ALU``, +'imm' is interpreted as a 32-bit signed value. For ``BPF_ALU64``, 'imm' +is first sign extended from 32 to 64 bits, and then interpreted as a +64-bit signed value. + +The ``BPF_MOVSX`` instruction does a move operation with sign extension. +``BPF_ALU | BPF_MOVSX`` sign extends 8-bit and 16-bit operands into 32 +bit operands, and zeroes the remaining upper 32 bits. +``BPF_ALU64 | BPF_MOVSX`` sign extends 8-bit, 16-bit, and 32-bit +operands into 64 bit operands. Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31) for 32-bit operations. Byte swap instructions -~~~~~~~~~~~~~~~~~~~~~~ +---------------------- -The byte swap instructions use an instruction class of ``BPF_ALU`` and a 4-bit -'code' field of ``BPF_END``. +The byte swap instructions use instruction classes of ``BPF_ALU`` and ``BPF_ALU64`` +and a 4-bit 'code' field of ``BPF_END``. The byte swap instructions operate on the destination register only and do not use a separate source register or immediate value. -The 1-bit source operand field in the opcode is used to select what byte -order the operation convert from or to: +For ``BPF_ALU``, the 1-bit source operand field in the opcode is used to +select what byte order the operation converts from or to. For +``BPF_ALU64``, the 1-bit source operand field in the opcode is reserved +and must be set to 0. -========= ===== ================================================= -source value description -========= ===== ================================================= -BPF_TO_LE 0x00 convert between host byte order and little endian -BPF_TO_BE 0x08 convert between host byte order and big endian -========= ===== ================================================= +========= ========= ===== ================================================= +class source value description +========= ========= ===== ================================================= +BPF_ALU BPF_TO_LE 0x00 convert between host byte order and little endian +BPF_ALU BPF_TO_BE 0x08 convert between host byte order and big endian +BPF_ALU64 Reserved 0x00 do byte swap unconditionally +========= ========= ===== ================================================= The 'imm' field encodes the width of the swap operations. The following widths are supported: 16, 32 and 64. @@ -239,6 +260,12 @@ Examples: dst = htobe64(dst) +``BPF_ALU64 | BPF_TO_LE | BPF_END`` with imm = 16/32/64 means:: + + dst = bswap16 dst + dst = bswap32 dst + dst = bswap64 dst + Jump instructions ----------------- @@ -249,7 +276,8 @@ The 'code' field encodes the operation as below: ======== ===== === =========================================== ========================================= code value src description notes ======== ===== === =========================================== ========================================= -BPF_JA 0x0 0x0 PC += offset BPF_JMP only +BPF_JA 0x0 0x0 PC += offset BPF_JMP class +BPF_JA 0x0 0x0 PC += imm BPF_JMP32 class BPF_JEQ 0x1 any PC += offset if dst == src BPF_JGT 0x2 any PC += offset if dst > src unsigned BPF_JGE 0x3 any PC += offset if dst >= src unsigned @@ -278,6 +306,19 @@ Example: where 's>=' indicates a signed '>=' comparison. +``BPF_JA | BPF_K | BPF_JMP32`` (0x06) means:: + + gotol +imm + +where 'imm' means the branch offset comes from insn 'imm' field. + +Note that there are two flavors of ``BPF_JA`` instructions. The +``BPF_JMP`` class permits a 16-bit jump offset specified by the 'offset' +field, whereas the ``BPF_JMP32`` class permits a 32-bit jump offset +specified by the 'imm' field. A > 16-bit conditional jump may be +converted to a < 16-bit conditional jump plus a 32-bit unconditional +jump. + Helper functions ~~~~~~~~~~~~~~~~ @@ -320,6 +361,7 @@ The mode modifier is one of: BPF_ABS 0x20 legacy BPF packet access (absolute) `Legacy BPF Packet access instructions`_ BPF_IND 0x40 legacy BPF packet access (indirect) `Legacy BPF Packet access instructions`_ BPF_MEM 0x60 regular load and store operations `Regular load and store operations`_ + BPF_MEMSX 0x80 sign-extension load operations `Sign-extension load operations`_ BPF_ATOMIC 0xc0 atomic operations `Atomic operations`_ ============= ===== ==================================== ============= @@ -350,9 +392,23 @@ instructions that transfer data between a register and memory. ``BPF_MEM | <size> | BPF_LDX`` means:: - dst = *(size *) (src + offset) + dst = *(unsigned size *) (src + offset) + +Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW`` and +'unsigned size' is one of u8, u16, u32 or u64. + +Sign-extension load operations +------------------------------ + +The ``BPF_MEMSX`` mode modifier is used to encode sign-extension load +instructions that transfer data between a register and memory. + +``BPF_MEMSX | <size> | BPF_LDX`` means:: + + dst = *(signed size *) (src + offset) -Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``. +Where size is one of: ``BPF_B``, ``BPF_H`` or ``BPF_W``, and +'signed size' is one of s8, s16 or s32. Atomic operations ----------------- diff --git a/MAINTAINERS b/MAINTAINERS index 3f32da783b31..5e2bb1059ab6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3704,7 +3704,7 @@ M: Daniel Borkmann <daniel@iogearbox.net> M: Andrii Nakryiko <andrii@kernel.org> R: Martin KaFai Lau <martin.lau@linux.dev> R: Song Liu <song@kernel.org> -R: Yonghong Song <yhs@fb.com> +R: Yonghong Song <yonghong.song@linux.dev> R: John Fastabend <john.fastabend@gmail.com> R: KP Singh <kpsingh@kernel.org> R: Stanislav Fomichev <sdf@google.com> @@ -3743,7 +3743,7 @@ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ BPF [ITERATOR] -M: Yonghong Song <yhs@fb.com> +M: Yonghong Song <yonghong.song@linux.dev> L: bpf@vger.kernel.org S: Maintained F: kernel/bpf/*iter.c diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index c648864c8cd1..0ca4f5c0097c 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -13,6 +13,8 @@ #include <asm/patch.h> #include "bpf_jit.h" +#define RV_FENTRY_NINSNS 2 + #define RV_REG_TCC RV_REG_A6 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ @@ -241,7 +243,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) if (!is_tail_call) emit_mv(RV_REG_A0, RV_REG_A5, ctx); emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, - is_tail_call ? 20 : 0, /* skip reserved nops and TCC init */ + is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */ ctx); } @@ -618,32 +620,7 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; } -static int gen_call_or_nops(void *target, void *ip, u32 *insns) -{ - s64 rvoff; - int i, ret; - struct rv_jit_context ctx; - - ctx.ninsns = 0; - ctx.insns = (u16 *)insns; - - if (!target) { - for (i = 0; i < 4; i++) - emit(rv_nop(), &ctx); - return 0; - } - |