summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-08-03 15:34:36 -0700
committerJakub Kicinski <kuba@kernel.org>2023-08-03 15:34:36 -0700
commitd07b7b32da6f678d42d96a8b9824cf0a181ce140 (patch)
tree606829d4b33a57dbe0f0e825ca8505e0b5fcb759
parent35b1b1fd96388d5e3cf179bf36bd8a4153baf4a3 (diff)
parent648880e9331c68b2008430fd90f3648d1795399d (diff)
downloadlinux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.tar.gz
linux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.tar.bz2
linux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.zip
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says: ==================== pull-request: bpf-next 2023-08-03 We've added 54 non-merge commits during the last 10 day(s) which contain a total of 84 files changed, 4026 insertions(+), 562 deletions(-). The main changes are: 1) Add SO_REUSEPORT support for TC bpf_sk_assign from Lorenz Bauer, Daniel Borkmann 2) Support new insns from cpu v4 from Yonghong Song 3) Non-atomically allocate freelist during prefill from YiFei Zhu 4) Support defragmenting IPv(4|6) packets in BPF from Daniel Xu 5) Add tracepoint to xdp attaching failure from Leon Hwang 6) struct netdev_rx_queue and xdp.h reshuffling to reduce rebuild time from Jakub Kicinski * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (54 commits) net: invert the netdevice.h vs xdp.h dependency net: move struct netdev_rx_queue out of netdevice.h eth: add missing xdp.h includes in drivers selftests/bpf: Add testcase for xdp attaching failure tracepoint bpf, xdp: Add tracepoint to xdp attaching failure selftests/bpf: fix static assert compilation issue for test_cls_*.c bpf: fix bpf_probe_read_kernel prototype mismatch riscv, bpf: Adapt bpf trampoline to optimized riscv ftrace framework libbpf: fix typos in Makefile tracing: bpf: use struct trace_entry in struct syscall_tp_t bpf, devmap: Remove unused dtab field from bpf_dtab_netdev bpf, cpumap: Remove unused cmap field from bpf_cpu_map_entry netfilter: bpf: Only define get_proto_defrag_hook() if necessary bpf: Fix an array-index-out-of-bounds issue in disasm.c net: remove duplicate INDIRECT_CALLABLE_DECLARE of udp[6]_ehashfn docs/bpf: Fix malformed documentation bpf: selftests: Add defrag selftests bpf: selftests: Support custom type and proto for client sockets bpf: selftests: Support not connecting client socket netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link ... ==================== Link: https://lore.kernel.org/r/20230803174845.825419-1-martin.lau@linux.dev Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--Documentation/bpf/bpf_design_QA.rst5
-rw-r--r--Documentation/bpf/standardization/instruction-set.rst130
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c153
-rw-r--r--arch/x86/net/bpf_jit_comp.c141
-rw-r--r--drivers/net/bonding/bond_main.c1
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.h1
-rw-r--r--drivers/net/ethernet/engleder/tsnep.h1
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h1
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h1
-rw-r--r--drivers/net/ethernet/freescale/fec.h1
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_txrx.h1
-rw-r--r--drivers/net/ethernet/google/gve/gve.h1
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h1
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.h1
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h1
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.h1
-rw-r--r--drivers/net/hyperv/hyperv_net.h1
-rw-r--r--drivers/net/tap.c1
-rw-r--r--drivers/net/virtio_net.c1
-rw-r--r--include/linux/bpf.h12
-rw-r--r--include/linux/filter.h34
-rw-r--r--include/linux/netdevice.h55
-rw-r--r--include/linux/netfilter.h10
-rw-r--r--include/net/busy_poll.h1
-rw-r--r--include/net/inet6_hashtables.h81
-rw-r--r--include/net/inet_hashtables.h74
-rw-r--r--include/net/mana/mana.h2
-rw-r--r--include/net/netdev_rx_queue.h53
-rw-r--r--include/net/sock.h7
-rw-r--r--include/net/xdp.h29
-rw-r--r--include/trace/events/xdp.h18
-rw-r--r--include/uapi/linux/bpf.h9
-rw-r--r--kernel/bpf/btf.c1
-rw-r--r--kernel/bpf/core.c206
-rw-r--r--kernel/bpf/cpumap.c3
-rw-r--r--kernel/bpf/devmap.c2
-rw-r--r--kernel/bpf/disasm.c58
-rw-r--r--kernel/bpf/memalloc.c24
-rw-r--r--kernel/bpf/offload.c1
-rw-r--r--kernel/bpf/verifier.c349
-rw-r--r--kernel/trace/bpf_trace.c11
-rw-r--r--kernel/trace/trace_syscalls.c12
-rw-r--r--net/bpf/test_run.c1
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/ipv4/inet_hashtables.c66
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c17
-rw-r--r--net/ipv4/udp.c88
-rw-r--r--net/ipv6/inet6_hashtables.c69
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c11
-rw-r--r--net/ipv6/udp.c96
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/nf_bpf_link.c125
-rw-r--r--net/netfilter/nf_conntrack_bpf.c1
-rw-r--r--net/xdp/xsk.c1
-rw-r--r--tools/include/uapi/linux/bpf.h9
-rw-r--r--tools/lib/bpf/Makefile4
-rw-r--r--tools/testing/selftests/bpf/.gitignore2
-rw-r--r--tools/testing/selftests/bpf/Makefile33
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c9
-rwxr-xr-xtools/testing/selftests/bpf/generate_udp_fragments.py90
-rw-r--r--tools/testing/selftests/bpf/ip_check_defrag_frags.h57
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c29
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/assign_reuse.c199
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c283
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c139
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_attach.c65
-rw-r--r--tools/testing/selftests/bpf/progs/ip_check_defrag.c104
-rw-r--r--tools/testing/selftests/bpf/progs/test_assign_reuse.c142
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.h9
-rw-r--r--tools/testing/selftests/bpf/progs/test_ldsx_insn.c118
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c54
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bswap.c59
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotol.c44
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c131
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c213
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sdiv.c781
-rw-r--r--tools/testing/selftests/bpf/verifier/basic_instr.c6
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c2
84 files changed, 4026 insertions, 562 deletions
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index 38372a956d65..eb19c945f4d5 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -140,11 +140,6 @@ A: Because if we picked one-to-one relationship to x64 it would have made
it more complicated to support on arm64 and other archs. Also it
needs div-by-zero runtime check.
-Q: Why there is no BPF_SDIV for signed divide operation?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A: Because it would be rarely used. llvm errors in such case and
-prints a suggestion to use unsigned divide instead.
-
Q: Why BPF has implicit prologue and epilogue?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A: Because architectures like sparc have register windows and in general
diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index 751e657973f0..655494ac7af6 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -154,24 +154,27 @@ otherwise identical operations.
The 'code' field encodes the operation as below, where 'src' and 'dst' refer
to the values of the source and destination registers, respectively.
-======== ===== ==========================================================
-code value description
-======== ===== ==========================================================
-BPF_ADD 0x00 dst += src
-BPF_SUB 0x10 dst -= src
-BPF_MUL 0x20 dst \*= src
-BPF_DIV 0x30 dst = (src != 0) ? (dst / src) : 0
-BPF_OR 0x40 dst \|= src
-BPF_AND 0x50 dst &= src
-BPF_LSH 0x60 dst <<= (src & mask)
-BPF_RSH 0x70 dst >>= (src & mask)
-BPF_NEG 0x80 dst = -src
-BPF_MOD 0x90 dst = (src != 0) ? (dst % src) : dst
-BPF_XOR 0xa0 dst ^= src
-BPF_MOV 0xb0 dst = src
-BPF_ARSH 0xc0 sign extending dst >>= (src & mask)
-BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below)
-======== ===== ==========================================================
+========= ===== ======= ==========================================================
+code value offset description
+========= ===== ======= ==========================================================
+BPF_ADD 0x00 0 dst += src
+BPF_SUB 0x10 0 dst -= src
+BPF_MUL 0x20 0 dst \*= src
+BPF_DIV 0x30 0 dst = (src != 0) ? (dst / src) : 0
+BPF_SDIV 0x30 1 dst = (src != 0) ? (dst s/ src) : 0
+BPF_OR 0x40 0 dst \|= src
+BPF_AND 0x50 0 dst &= src
+BPF_LSH 0x60 0 dst <<= (src & mask)
+BPF_RSH 0x70 0 dst >>= (src & mask)
+BPF_NEG 0x80 0 dst = -dst
+BPF_MOD 0x90 0 dst = (src != 0) ? (dst % src) : dst
+BPF_SMOD 0x90 1 dst = (src != 0) ? (dst s% src) : dst
+BPF_XOR 0xa0 0 dst ^= src
+BPF_MOV 0xb0 0 dst = src
+BPF_MOVSX 0xb0 8/16/32 dst = (s8,s16,s32)src
+BPF_ARSH 0xc0 0 sign extending dst >>= (src & mask)
+BPF_END 0xd0 0 byte swap operations (see `Byte swap instructions`_ below)
+========= ===== ======= ==========================================================
Underflow and overflow are allowed during arithmetic operations, meaning
the 64-bit or 32-bit value will wrap. If eBPF program execution would
@@ -198,33 +201,51 @@ where '(u32)' indicates that the upper 32 bits are zeroed.
dst = dst ^ imm32
-Also note that the division and modulo operations are unsigned. Thus, for
-``BPF_ALU``, 'imm' is first interpreted as an unsigned 32-bit value, whereas
-for ``BPF_ALU64``, 'imm' is first sign extended to 64 bits and the result
-interpreted as an unsigned 64-bit value. There are no instructions for
-signed division or modulo.
+Note that most instructions have instruction offset of 0. Only three instructions
+(``BPF_SDIV``, ``BPF_SMOD``, ``BPF_MOVSX``) have a non-zero offset.
+
+The devision and modulo operations support both unsigned and signed flavors.
+
+For unsigned operations (``BPF_DIV`` and ``BPF_MOD``), for ``BPF_ALU``,
+'imm' is interpreted as a 32-bit unsigned value. For ``BPF_ALU64``,
+'imm' is first sign extended from 32 to 64 bits, and then interpreted as
+a 64-bit unsigned value.
+
+For signed operations (``BPF_SDIV`` and ``BPF_SMOD``), for ``BPF_ALU``,
+'imm' is interpreted as a 32-bit signed value. For ``BPF_ALU64``, 'imm'
+is first sign extended from 32 to 64 bits, and then interpreted as a
+64-bit signed value.
+
+The ``BPF_MOVSX`` instruction does a move operation with sign extension.
+``BPF_ALU | BPF_MOVSX`` sign extends 8-bit and 16-bit operands into 32
+bit operands, and zeroes the remaining upper 32 bits.
+``BPF_ALU64 | BPF_MOVSX`` sign extends 8-bit, 16-bit, and 32-bit
+operands into 64 bit operands.
Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31)
for 32-bit operations.
Byte swap instructions
-~~~~~~~~~~~~~~~~~~~~~~
+----------------------
-The byte swap instructions use an instruction class of ``BPF_ALU`` and a 4-bit
-'code' field of ``BPF_END``.
+The byte swap instructions use instruction classes of ``BPF_ALU`` and ``BPF_ALU64``
+and a 4-bit 'code' field of ``BPF_END``.
The byte swap instructions operate on the destination register
only and do not use a separate source register or immediate value.
-The 1-bit source operand field in the opcode is used to select what byte
-order the operation convert from or to:
+For ``BPF_ALU``, the 1-bit source operand field in the opcode is used to
+select what byte order the operation converts from or to. For
+``BPF_ALU64``, the 1-bit source operand field in the opcode is reserved
+and must be set to 0.
-========= ===== =================================================
-source value description
-========= ===== =================================================
-BPF_TO_LE 0x00 convert between host byte order and little endian
-BPF_TO_BE 0x08 convert between host byte order and big endian
-========= ===== =================================================
+========= ========= ===== =================================================
+class source value description
+========= ========= ===== =================================================
+BPF_ALU BPF_TO_LE 0x00 convert between host byte order and little endian
+BPF_ALU BPF_TO_BE 0x08 convert between host byte order and big endian
+BPF_ALU64 Reserved 0x00 do byte swap unconditionally
+========= ========= ===== =================================================
The 'imm' field encodes the width of the swap operations. The following widths
are supported: 16, 32 and 64.
@@ -239,6 +260,12 @@ Examples:
dst = htobe64(dst)
+``BPF_ALU64 | BPF_TO_LE | BPF_END`` with imm = 16/32/64 means::
+
+ dst = bswap16 dst
+ dst = bswap32 dst
+ dst = bswap64 dst
+
Jump instructions
-----------------
@@ -249,7 +276,8 @@ The 'code' field encodes the operation as below:
======== ===== === =========================================== =========================================
code value src description notes
======== ===== === =========================================== =========================================
-BPF_JA 0x0 0x0 PC += offset BPF_JMP only
+BPF_JA 0x0 0x0 PC += offset BPF_JMP class
+BPF_JA 0x0 0x0 PC += imm BPF_JMP32 class
BPF_JEQ 0x1 any PC += offset if dst == src
BPF_JGT 0x2 any PC += offset if dst > src unsigned
BPF_JGE 0x3 any PC += offset if dst >= src unsigned
@@ -278,6 +306,19 @@ Example:
where 's>=' indicates a signed '>=' comparison.
+``BPF_JA | BPF_K | BPF_JMP32`` (0x06) means::
+
+ gotol +imm
+
+where 'imm' means the branch offset comes from insn 'imm' field.
+
+Note that there are two flavors of ``BPF_JA`` instructions. The
+``BPF_JMP`` class permits a 16-bit jump offset specified by the 'offset'
+field, whereas the ``BPF_JMP32`` class permits a 32-bit jump offset
+specified by the 'imm' field. A > 16-bit conditional jump may be
+converted to a < 16-bit conditional jump plus a 32-bit unconditional
+jump.
+
Helper functions
~~~~~~~~~~~~~~~~
@@ -320,6 +361,7 @@ The mode modifier is one of:
BPF_ABS 0x20 legacy BPF packet access (absolute) `Legacy BPF Packet access instructions`_
BPF_IND 0x40 legacy BPF packet access (indirect) `Legacy BPF Packet access instructions`_
BPF_MEM 0x60 regular load and store operations `Regular load and store operations`_
+ BPF_MEMSX 0x80 sign-extension load operations `Sign-extension load operations`_
BPF_ATOMIC 0xc0 atomic operations `Atomic operations`_
============= ===== ==================================== =============
@@ -350,9 +392,23 @@ instructions that transfer data between a register and memory.
``BPF_MEM | <size> | BPF_LDX`` means::
- dst = *(size *) (src + offset)
+ dst = *(unsigned size *) (src + offset)
+
+Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW`` and
+'unsigned size' is one of u8, u16, u32 or u64.
+
+Sign-extension load operations
+------------------------------
+
+The ``BPF_MEMSX`` mode modifier is used to encode sign-extension load
+instructions that transfer data between a register and memory.
+
+``BPF_MEMSX | <size> | BPF_LDX`` means::
+
+ dst = *(signed size *) (src + offset)
-Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``.
+Where size is one of: ``BPF_B``, ``BPF_H`` or ``BPF_W``, and
+'signed size' is one of s8, s16 or s32.
Atomic operations
-----------------
diff --git a/MAINTAINERS b/MAINTAINERS
index 3f32da783b31..5e2bb1059ab6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3704,7 +3704,7 @@ M: Daniel Borkmann <daniel@iogearbox.net>
M: Andrii Nakryiko <andrii@kernel.org>
R: Martin KaFai Lau <martin.lau@linux.dev>
R: Song Liu <song@kernel.org>
-R: Yonghong Song <yhs@fb.com>
+R: Yonghong Song <yonghong.song@linux.dev>
R: John Fastabend <john.fastabend@gmail.com>
R: KP Singh <kpsingh@kernel.org>
R: Stanislav Fomichev <sdf@google.com>
@@ -3743,7 +3743,7 @@ F: tools/lib/bpf/
F: tools/testing/selftests/bpf/
BPF [ITERATOR]
-M: Yonghong Song <yhs@fb.com>
+M: Yonghong Song <yonghong.song@linux.dev>
L: bpf@vger.kernel.org
S: Maintained
F: kernel/bpf/*iter.c
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index c648864c8cd1..0ca4f5c0097c 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -13,6 +13,8 @@
#include <asm/patch.h>
#include "bpf_jit.h"
+#define RV_FENTRY_NINSNS 2
+
#define RV_REG_TCC RV_REG_A6
#define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */
@@ -241,7 +243,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
if (!is_tail_call)
emit_mv(RV_REG_A0, RV_REG_A5, ctx);
emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
- is_tail_call ? 20 : 0, /* skip reserved nops and TCC init */
+ is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */
ctx);
}
@@ -618,32 +620,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
return 0;
}
-static int gen_call_or_nops(void *target, void *ip, u32 *insns)
-{
- s64 rvoff;
- int i, ret;
- struct rv_jit_context ctx;
-
- ctx.ninsns = 0;
- ctx.insns = (u16 *)insns;
-
- if (!target) {
- for (i = 0; i < 4; i++)
- emit(rv_nop(), &ctx);
- return 0;
- }
-