summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-03-26 21:44:13 -0700
committerDavid S. Miller <davem@davemloft.net>2019-03-26 21:44:13 -0700
commit5133a4a800fd3a5d1ab7c016196fd416af3a3f1c (patch)
treeece3cfb648899b67a0e6fd145361744f1c3f6f90
parentfa7e428c6b7ed3281610511a2b2ec716d9894be8 (diff)
parentb4b6aa83433ea4675d4ba1be56774623db81f14f (diff)
downloadlinux-5133a4a800fd3a5d1ab7c016196fd416af3a3f1c.tar.gz
linux-5133a4a800fd3a5d1ab7c016196fd416af3a3f1c.tar.bz2
linux-5133a4a800fd3a5d1ab7c016196fd416af3a3f1c.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-03-26 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) introduce bpf_tcp_check_syncookie() helper for XDP and tc, from Lorenz. 2) allow bpf_skb_ecn_set_ce() in tc, from Peter. 3) numerous bpf tc tunneling improvements, from Willem. 4) and other miscellaneous improvements from Adrian, Alan, Daniel, Ivan, Stanislav. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/bpf.h1
-rw-r--r--include/uapi/linux/bpf.h65
-rw-r--r--kernel/bpf/verifier.c33
-rw-r--r--net/core/filter.c350
-rw-r--r--samples/bpf/.gitignore1
-rw-r--r--tools/include/uapi/linux/bpf.h65
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile9
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h26
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_edt.c109
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c261
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c129
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_edt.sh99
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh186
-rwxr-xr-xtools/testing/selftests/bpf/test_tcp_check_syncookie.sh81
-rw-r--r--tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c212
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c6
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c15
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c126
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c8
22 files changed, 1673 insertions, 128 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f02367faa58d..f62897198844 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -205,6 +205,7 @@ enum bpf_return_type {
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
+ RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 929c8e537a14..837024512baf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1478,13 +1478,27 @@ union bpf_attr {
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
*
- * There is a single supported mode at this time:
+ * There are two supported modes at this time:
+ *
+ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ * (room space is added or removed below the layer 2 header).
*
* * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
* (room space is added or removed below the layer 3 header).
*
- * All values for *flags* are reserved for future usage, and must
- * be left at zero.
+ * The following flags are supported at this time:
+ *
+ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ * Adjusting mss in this way is not allowed for datagrams.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
+ * Any new space is reserved to hold a tunnel header.
+ * Configure skb offsets and other fields accordingly.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
+ * Use with ENCAP_L3 flags to further specify the tunnel type.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2431,6 +2445,38 @@ union bpf_attr {
* Return
* A **struct bpf_sock** pointer on success, or **NULL** in
* case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * This function is identical to bpf_sk_lookup_tcp, except that it
+ * also returns timewait or request sockets. Use bpf_sk_fullsock
+ * or bpf_tcp_socket to access the full structure.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * Description
+ * Check whether iph and th contain a valid SYN cookie ACK for
+ * the listening socket in sk.
+ *
+ * iph points to the start of the IPv4 or IPv6 header, while
+ * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
+ *
+ * th points to the start of the TCP header, while th_len contains
+ * sizeof(struct tcphdr).
+ *
+ * Return
+ * 0 if iph and th are a valid SYN cookie ACK, or a negative error
+ * otherwise.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2531,7 +2577,9 @@ union bpf_attr {
FN(sk_fullsock), \
FN(tcp_sock), \
FN(skb_ecn_set_ce), \
- FN(get_listener_sock),
+ FN(get_listener_sock), \
+ FN(skc_lookup_tcp), \
+ FN(tcp_check_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2590,9 +2638,18 @@ enum bpf_func_id {
/* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L)
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
+ BPF_ADJ_ROOM_MAC,
};
/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 86f9cd5d1c4e..dffeec3706ce 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -369,7 +369,8 @@ static bool is_release_function(enum bpf_func_id func_id)
static bool is_acquire_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_sk_lookup_tcp ||
- func_id == BPF_FUNC_sk_lookup_udp;
+ func_id == BPF_FUNC_sk_lookup_udp ||
+ func_id == BPF_FUNC_skc_lookup_tcp;
}
static bool is_ptr_cast_function(enum bpf_func_id func_id)
@@ -3147,19 +3148,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
- if (is_acquire_function(func_id)) {
- int id = acquire_reference_state(env, insn_idx);
-
- if (id < 0)
- return id;
- /* For mark_ptr_or_null_reg() */
- regs[BPF_REG_0].id = id;
- /* For release_reference() */
- regs[BPF_REG_0].ref_obj_id = id;
- } else {
- /* For mark_ptr_or_null_reg() */
- regs[BPF_REG_0].id = ++env->id_gen;
- }
+ regs[BPF_REG_0].id = ++env->id_gen;
+ } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
+ regs[BPF_REG_0].id = ++env->id_gen;
} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
@@ -3170,9 +3163,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
- if (is_ptr_cast_function(func_id))
+ if (is_ptr_cast_function(func_id)) {
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+ } else if (is_acquire_function(func_id)) {
+ int id = acquire_reference_state(env, insn_idx);
+
+ if (id < 0)
+ return id;
+ /* For mark_ptr_or_null_reg() */
+ regs[BPF_REG_0].id = id;
+ /* For release_reference() */
+ regs[BPF_REG_0].ref_obj_id = id;
+ }
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
diff --git a/net/core/filter.c b/net/core/filter.c
index 647c63a7b25b..22eb2edf5573 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2963,42 +2963,113 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
}
}
-static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
+ BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+
+#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
+ BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
+ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
+ BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+
+static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
+ u64 flags)
{
- u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+ bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
+ u16 mac_len = 0, inner_net = 0, inner_trans = 0;
+ unsigned int gso_type = SKB_GSO_DODGY;
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+ /* udp gso_size delineates datagrams, only allow if fixed */
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+ !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+ return -ENOTSUPP;
+ }
- ret = skb_cow(skb, len_diff);
+ ret = skb_cow_head(skb, len_diff);
if (unlikely(ret < 0))
return ret;
+ if (encap) {
+ if (skb->protocol != htons(ETH_P_IP) &&
+ skb->protocol != htons(ETH_P_IPV6))
+ return -ENOTSUPP;
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+ return -EINVAL;
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
+ flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+ return -EINVAL;
+
+ if (skb->encapsulation)
+ return -EALREADY;
+
+ mac_len = skb->network_header - skb->mac_header;
+ inner_net = skb->network_header;
+ inner_trans = skb->transport_header;
+ }
+
ret = bpf_skb_net_hdr_push(skb, off, len_diff);
if (unlikely(ret < 0))
return ret;
+ if (encap) {
+ /* inner mac == inner_net on l3 encap */
+ skb->inner_mac_header = inner_net;
+ skb->inner_network_header = inner_net;
+ skb->inner_transport_header = inner_trans;
+ skb_set_inner_protocol(skb, skb->protocol);
+
+ skb->encapsulation = 1;
+ skb_set_network_header(skb, mac_len);
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
+ gso_type |= SKB_GSO_GRE;
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+ gso_type |= SKB_GSO_IPXIP6;
+ else
+ gso_type |= SKB_GSO_IPXIP4;
+
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
+ flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
+ int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
+ sizeof(struct ipv6hdr) :
+ sizeof(struct iphdr);
+
+ skb_set_transport_header(skb, mac_len + nh_len);
+ }
+ }
+
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
/* Due to header grow, MSS needs to be downgraded. */
- skb_decrease_gso_size(shinfo, len_diff);
+ if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+ skb_decrease_gso_size(shinfo, len_diff);
+
/* Header must be checked, and gso_segs recomputed. */
- shinfo->gso_type |= SKB_GSO_DODGY;
+ shinfo->gso_type |= gso_type;
shinfo->gso_segs = 0;
}
return 0;
}
-static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
+static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
+ u64 flags)
{
- u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
+ if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
+ /* udp gso_size delineates datagrams, only allow if fixed */
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
+ !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+ return -ENOTSUPP;
+ }
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
@@ -3012,7 +3083,9 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
struct skb_shared_info *shinfo = skb_shinfo(skb);
/* Due to header shrink, MSS can be upgraded. */
- skb_increase_gso_size(shinfo, len_diff);
+ if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
+ skb_increase_gso_size(shinfo, len_diff);
+
/* Header must be checked, and gso_segs recomputed. */
shinfo->gso_type |= SKB_GSO_DODGY;
shinfo->gso_segs = 0;
@@ -3027,49 +3100,50 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
SKB_MAX_ALLOC;
}
-static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
+BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+ u32, mode, u64, flags)
{
- bool trans_same = skb->transport_header == skb->network_header;
u32 len_cur, len_diff_abs = abs(len_diff);
u32 len_min = bpf_skb_net_base_len(skb);
u32 len_max = __bpf_skb_max_len(skb);
__be16 proto = skb->protocol;
bool shrink = len_diff < 0;
+ u32 off;
int ret;
+ if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
+ return -EINVAL;
if (unlikely(len_diff_abs > 0xfffU))
return -EFAULT;
if (unlikely(proto != htons(ETH_P_IP) &&
proto != htons(ETH_P_IPV6)))
return -ENOTSUPP;
+ off = skb_mac_header_len(skb);
+ switch (mode) {
+ case BPF_ADJ_ROOM_NET:
+ off += bpf_skb_net_base_len(skb);
+ break;
+ case BPF_ADJ_ROOM_MAC:
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
len_cur = skb->len - skb_network_offset(skb);
- if (skb_transport_header_was_set(skb) && !trans_same)
- len_cur = skb_network_header_len(skb);
if ((shrink && (len_diff_abs >= len_cur ||
len_cur - len_diff_abs < len_min)) ||
(!shrink && (skb->len + len_diff_abs > len_max &&
!skb_is_gso(skb))))
return -ENOTSUPP;
- ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
- bpf_skb_net_grow(skb, len_diff_abs);
+ ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
+ bpf_skb_net_grow(skb, off, len_diff_abs, flags);
bpf_compute_data_pointers(skb);
return ret;
}
-BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
- u32, mode, u64, flags)
-{
- if (unlikely(flags))
- return -EINVAL;
- if (likely(mode == BPF_ADJ_ROOM_NET))
- return bpf_skb_adjust_net(skb, len_diff);
-
- return -ENOTSUPP;
-}
-
static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
.func = bpf_skb_adjust_room,
.gpl_only = false,
@@ -5156,15 +5230,15 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
return sk;
}
-/* bpf_sk_lookup performs the core lookup for different types of sockets,
+/* bpf_skc_lookup performs the core lookup for different types of sockets,
* taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
* Returns the socket as an 'unsigned long' to simplify the casting in the
* callers to satisfy BPF_CALL declarations.
*/
-static unsigned long
-__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
- struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
- u64 flags)
+static struct sock *
+__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+ u64 flags)
{
struct sock *sk = NULL;
u8 family = AF_UNSPEC;
@@ -5192,15 +5266,27 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
put_net(net);
}
+out:
+ return sk;
+}
+
+static struct sock *
+__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+ u64 flags)
+{
+ struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
+ ifindex, proto, netns_id, flags);
+
if (sk)
sk = sk_to_full_sk(sk);
-out:
- return (unsigned long) sk;
+
+ return sk;
}
-static unsigned long
-bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
- u8 proto, u64 netns_id, u64 flags)
+static struct sock *
+bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ u8 proto, u64 netns_id, u64 flags)
{
struct net *caller_net;
int ifindex;
@@ -5213,14 +5299,47 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
ifindex = 0;
}
- return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex,
- proto, netns_id, flags);
+ return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
+ netns_id, flags);
+}
+
+static struct sock *
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ u8 proto, u64 netns_id, u64 flags)
+{
+ struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
+ flags);
+
+ if (sk)
+ sk = sk_to_full_sk(sk);
+
+ return sk;
+}
+
+BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
+ netns_id, flags);
}
+static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
+ .func = bpf_skc_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags);
+ return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
+ netns_id, flags);
}
static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
@@ -5238,7 +5357,8 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags);
+ return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
+ netns_id, flags);
}
static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
@@ -5273,8 +5393,9 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
struct net *caller_net = dev_net(ctx->rxq->dev);
int ifindex = ctx->rxq->dev->ifindex;
- return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
- IPPROTO_UDP, netns_id, flags);
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+ ifindex, IPPROTO_UDP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
@@ -5289,14 +5410,38 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
.arg5_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+ struct net *caller_net = dev_net(ctx->rxq->dev);
+ int ifindex = ctx->rxq->dev->ifindex;
+
+ return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+ flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
+ .func = bpf_xdp_skc_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
{
struct net *caller_net = dev_net(ctx->rxq->dev);
int ifindex = ctx->rxq->dev->ifindex;
- return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
- IPPROTO_TCP, netns_id, flags);
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
+ ifindex, IPPROTO_TCP, netns_id,
+ flags);
}
static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
@@ -5311,11 +5456,31 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
.arg5_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
+ sock_net(ctx->sk), 0,
+ IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
+ .func = bpf_sock_addr_skc_lookup_tcp,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
- IPPROTO_TCP, netns_id, flags);
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+ sock_net(ctx->sk), 0, IPPROTO_TCP,
+ netns_id, flags);
}
static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
@@ -5332,8 +5497,9 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
- return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
- IPPROTO_UDP, netns_id, flags);
+ return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
+ sock_net(ctx->sk), 0, IPPROTO_UDP,
+ netns_id, flags);
}
static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
@@ -5461,6 +5627,74 @@ static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
};
+
+BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+ struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+ u32 cookie;
+ int ret;
+
+ if (unlikely(th_len < sizeof(*th)))
+ return -EINVAL;
+
+ /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
+ if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+ return -EINVAL;
+
+ if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ return -EINVAL;
+
+ if (!th->ack || th->rst || th->syn)
+ return -ENOENT;
+
+ if (tcp_synq_no_recent_overflow(sk))
+ return -ENOENT;
+
+ cookie = ntohl(th->ack_seq) - 1;
+
+ switch (sk->sk_family) {
+ case AF_INET:
+ if (unlikely(iph_len < sizeof(struct iphdr)))
+ return -EINVAL;
+
+ ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+ break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ case AF_INET6:
+ if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+ return -EINVAL;
+
+ ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+ break;
+#endif /* CONFIG_IPV6 */
+
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ if (ret > 0)
+ return 0;
+
+ return -ENOENT;
+#else
+ return -ENOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
+ .func = bpf_tcp_check_syncookie,
+ .gpl_only = true,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -5586,6 +5820,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sock_addr_sk_lookup_udp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+ return &bpf_sock_addr_skc_lookup_tcp_proto;
#endif /* CONFIG_INET */
default:
return bpf_base_func_proto(func_id);
@@ -5719,6 +5955,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_sock_proto;
case BPF_FUNC_get_listener_sock:
return &bpf_get_listener_sock_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+ return &bpf_skc_lookup_tcp_proto;
+ case BPF_FUNC_tcp_check_syncookie:
+ return &bpf_tcp_check_syncookie_proto;
+ case BPF_FUNC_skb_ecn_set_ce:
+ return &bpf_skb_ecn_set_ce_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -5754,6 +5996,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_sk_lookup_tcp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+ return &bpf_xdp_skc_lookup_tcp_proto;
+ case BPF_FUNC_tcp_check_syncookie:
+ return &bpf_tcp_check_syncookie_proto;
#endif
default:
return bpf_base_func_proto(func_id);
@@ -5846,6 +6092,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_lookup_udp_proto;
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
+ case BPF_FUNC_skc_lookup_tcp:
+ return &bpf_skc_lookup_tcp_proto;
#endif
default:
return bpf_base_func_proto(func_id);
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index dbb817dbacfc..59e40998e249 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -44,5 +44,6 @@ xdp_redirect_cpu
xdp_redirect_map
xdp_router_ipv4
xdp_rxq_info
+xdp_sample_pkts
xdp_tx_iptunnel
xdpsock
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 929c8e537a14..837024512baf 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1478,13 +1478,27 @@ union bpf_attr {
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
*
- * There is a single supported mode at this time:
+ * There are two supported modes at this time:
+ *
+ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ * (room space is added or removed below the layer 2 header).
*
* * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
* (room space is added or removed below the layer 3 header).
*
- * All values for *flags* are reserved for future usage, and must
- * be left at zero.
+ * The following flags are supported at this time:
+ *
+ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ * Adjusting mss in this way is not allowed for datagrams.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **:
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **:
+ * Any new space is reserved to hold a tunnel header.
+ * Configure skb offsets and other fields accordingly.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **:
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
+ * Use with ENCAP_L3 flags to further specify the tunnel type.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2431,6 +2445,38 @@ union bpf_attr {
* Return
* A **struct bpf_sock** pointer on success, or **NULL** in
* case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * This function is identical to bpf_sk_lookup_tcp, except that it
+ * also returns timewait or request sockets. Use bpf_sk_fullsock
+ * or bpf_tcp_socket to access the full structure.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * Description
+ * Check whether iph and th contain a valid SYN cookie ACK for
+ * the listening socket in sk.
+ *
+ * iph points to the start of the IPv4 or IPv6 header, while
+ * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr).
+ *
+ * th points to the start of the TCP header, while th_len contains
+ * sizeof(struct tcphdr).
+ *
+ * Return
+ * 0 if iph and th are a valid SYN cookie ACK, or a negative error
+ * otherwise.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2531,7 +2577,9 @@ union bpf_attr {
FN(sk_fullsock), \
FN(tcp_sock), \
FN(skb_ecn_set_ce), \
- FN(get_listener_sock),
+ FN(get_listener_sock), \
+ FN(skc_lookup_tcp), \
+ FN(tcp_check_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2590,9 +2638,18 @@ enum bpf_func_id {
/* Current network namespace */
#define BPF_F_CURRENT_NETNS (-1L)
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL <