summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c6
-rw-r--r--kernel/bpf/cgroup.c39
-rw-r--r--kernel/bpf/disasm.c52
-rw-r--r--kernel/bpf/disasm.h5
-rw-r--r--kernel/bpf/inode.c3
-rw-r--r--kernel/bpf/sockmap.c1020
-rw-r--r--kernel/bpf/stackmap.c257
-rw-r--r--kernel/bpf/syscall.c198
-rw-r--r--kernel/bpf/verifier.c73
-rw-r--r--kernel/cgroup/cgroup.c21
-rw-r--r--kernel/irq/Kconfig6
-rw-r--r--kernel/irq/autoprobe.c2
-rw-r--r--kernel/irq/chip.c10
-rw-r--r--kernel/irq/cpuhotplug.c1
-rw-r--r--kernel/irq/debugfs.c8
-rw-r--r--kernel/irq/devres.c1
-rw-r--r--kernel/irq/dummychip.c1
-rw-r--r--kernel/irq/generic-chip.c1
-rw-r--r--kernel/irq/handle.c23
-rw-r--r--kernel/irq/ipi.c3
-rw-r--r--kernel/irq/irq_sim.c14
-rw-r--r--kernel/irq/irqdesc.c23
-rw-r--r--kernel/irq/irqdomain.c2
-rw-r--r--kernel/irq/manage.c21
-rw-r--r--kernel/irq/matrix.c8
-rw-r--r--kernel/irq/msi.c3
-rw-r--r--kernel/irq/pm.c3
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/irq/resend.c2
-rw-r--r--kernel/irq/spurious.c2
-rw-r--r--kernel/irq/timings.c13
-rw-r--r--kernel/pid_namespace.c67
-rw-r--r--kernel/power/qos.c2
-rw-r--r--kernel/printk/printk.c59
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/softirq.c82
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/time/alarmtimer.c34
-rw-r--r--kernel/time/clocksource.c66
-rw-r--r--kernel/time/hrtimer.c16
-rw-r--r--kernel/time/posix-stubs.c2
-rw-r--r--kernel/time/posix-timers.c26
-rw-r--r--kernel/time/tick-common.c15
-rw-r--r--kernel/time/tick-internal.h6
-rw-r--r--kernel/time/tick-sched.c9
-rw-r--r--kernel/time/time.c12
-rw-r--r--kernel/time/timekeeping.c219
-rw-r--r--kernel/time/timekeeping.h1
-rw-r--r--kernel/trace/bpf_trace.c226
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--kernel/trace/trace_printk.c4
-rw-r--r--kernel/workqueue.c60
52 files changed, 2155 insertions, 593 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 227db99b0f19..d97e8f0f73ca 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -443,15 +443,15 @@ static int audit_set_failure(u32 state)
* Drop any references inside the auditd connection tracking struct and free
* the memory.
*/
- static void auditd_conn_free(struct rcu_head *rcu)
- {
+static void auditd_conn_free(struct rcu_head *rcu)
+{
struct auditd_connection *ac;
ac = container_of(rcu, struct auditd_connection, rcu);
put_pid(ac->pid);
put_net(ac->net);
kfree(ac);
- }
+}
/**
* auditd_set - Set/Reset the auditd connection state
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index c1c0b60d3f2f..43171a0bb02b 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -495,6 +495,42 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
/**
+ * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
+ * provided by user sockaddr
+ * @sk: sock struct that will use sockaddr
+ * @uaddr: sockaddr struct provided by user
+ * @type: The type of program to be exectuted
+ *
+ * socket is expected to be of type INET or INET6.
+ *
+ * This function will return %-EPERM if an attached program is found and
+ * returned value != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
+ struct sockaddr *uaddr,
+ enum bpf_attach_type type)
+{
+ struct bpf_sock_addr_kern ctx = {
+ .sk = sk,
+ .uaddr = uaddr,
+ };
+ struct cgroup *cgrp;
+ int ret;
+
+ /* Check socket family since not all sockets represent network
+ * endpoint (e.g. AF_UNIX).
+ */
+ if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+ return 0;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+
+ return ret == 1 ? 0 : -EPERM;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
+
+/**
* __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
* @sk: socket to get cgroup from
* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
@@ -545,7 +581,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
static const struct bpf_func_proto *
-cgroup_dev_func_proto(enum bpf_func_id func_id)
+cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -566,6 +602,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id)
static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
+ const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 8740406df2cd..d6b76377cb6e 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -113,16 +113,16 @@ static const char *const bpf_jmp_string[16] = {
};
static void print_bpf_end_insn(bpf_insn_print_t verbose,
- struct bpf_verifier_env *env,
+ void *private_data,
const struct bpf_insn *insn)
{
- verbose(env, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg,
+ verbose(private_data, "(%02x) r%d = %s%d r%d\n",
+ insn->code, insn->dst_reg,
BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le",
insn->imm, insn->dst_reg);
}
void print_bpf_insn(const struct bpf_insn_cbs *cbs,
- struct bpf_verifier_env *env,
const struct bpf_insn *insn,
bool allow_ptr_leaks)
{
@@ -132,23 +132,23 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
if (class == BPF_ALU || class == BPF_ALU64) {
if (BPF_OP(insn->code) == BPF_END) {
if (class == BPF_ALU64)
- verbose(env, "BUG_alu64_%02x\n", insn->code);
+ verbose(cbs->private_data, "BUG_alu64_%02x\n", insn->code);
else
- print_bpf_end_insn(verbose, env, insn);
+ print_bpf_end_insn(verbose, cbs->private_data, insn);
} else if (BPF_OP(insn->code) == BPF_NEG) {
- verbose(env, "(%02x) r%d = %s-r%d\n",
+ verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n",
insn->code, insn->dst_reg,
class == BPF_ALU ? "(u32) " : "",
insn->dst_reg);
} else if (BPF_SRC(insn->code) == BPF_X) {
- verbose(env, "(%02x) %sr%d %s %sr%d\n",
+ verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n",
insn->code, class == BPF_ALU ? "(u32) " : "",
insn->dst_reg,
bpf_alu_string[BPF_OP(insn->code) >> 4],
class == BPF_ALU ? "(u32) " : "",
insn->src_reg);
} else {
- verbose(env, "(%02x) %sr%d %s %s%d\n",
+ verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n",
insn->code, class == BPF_ALU ? "(u32) " : "",
insn->dst_reg,
bpf_alu_string[BPF_OP(insn->code) >> 4],
@@ -157,46 +157,46 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
}
} else if (class == BPF_STX) {
if (BPF_MODE(insn->code) == BPF_MEM)
- verbose(env, "(%02x) *(%s *)(r%d %+d) = r%d\n",
+ verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = r%d\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg,
insn->off, insn->src_reg);
else if (BPF_MODE(insn->code) == BPF_XADD)
- verbose(env, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
+ verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off,
insn->src_reg);
else
- verbose(env, "BUG_%02x\n", insn->code);
+ verbose(cbs->private_data, "BUG_%02x\n", insn->code);
} else if (class == BPF_ST) {
if (BPF_MODE(insn->code) != BPF_MEM) {
- verbose(env, "BUG_st_%02x\n", insn->code);
+ verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
return;
}
- verbose(env, "(%02x) *(%s *)(r%d %+d) = %d\n",
+ verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg,
insn->off, insn->imm);
} else if (class == BPF_LDX) {
if (BPF_MODE(insn->code) != BPF_MEM) {
- verbose(env, "BUG_ldx_%02x\n", insn->code);
+ verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
return;
}
- verbose(env, "(%02x) r%d = *(%s *)(r%d %+d)\n",
+ verbose(cbs->private_data, "(%02x) r%d = *(%s *)(r%d %+d)\n",
insn->code, insn->dst_reg,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->src_reg, insn->off);
} else if (class == BPF_LD) {
if (BPF_MODE(insn->code) == BPF_ABS) {
- verbose(env, "(%02x) r0 = *(%s *)skb[%d]\n",
+ verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[%d]\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->imm);
} else if (BPF_MODE(insn->code) == BPF_IND) {
- verbose(env, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
+ verbose(cbs->private_data, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
insn->code,
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->src_reg, insn->imm);
@@ -212,12 +212,12 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
if (map_ptr && !allow_ptr_leaks)
imm = 0;
- verbose(env, "(%02x) r%d = %s\n",
+ verbose(cbs->private_data, "(%02x) r%d = %s\n",
insn->code, insn->dst_reg,
__func_imm_name(cbs, insn, imm,
tmp, sizeof(tmp)));
} else {
- verbose(env, "BUG_ld_%02x\n", insn->code);
+ verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code);
return;
}
} else if (class == BPF_JMP) {
@@ -227,35 +227,35 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
char tmp[64];
if (insn->src_reg == BPF_PSEUDO_CALL) {
- verbose(env, "(%02x) call pc%s\n",
+ verbose(cbs->private_data, "(%02x) call pc%s\n",
insn->code,
__func_get_name(cbs, insn,
tmp, sizeof(tmp)));
} else {
strcpy(tmp, "unknown");
- verbose(env, "(%02x) call %s#%d\n", insn->code,
+ verbose(cbs->private_data, "(%02x) call %s#%d\n", insn->code,
__func_get_name(cbs, insn,
tmp, sizeof(tmp)),
insn->imm);
}
} else if (insn->code == (BPF_JMP | BPF_JA)) {
- verbose(env, "(%02x) goto pc%+d\n",
+ verbose(cbs->private_data, "(%02x) goto pc%+d\n",
insn->code, insn->off);
} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
- verbose(env, "(%02x) exit\n", insn->code);
+ verbose(cbs->private_data, "(%02x) exit\n", insn->code);
} else if (BPF_SRC(insn->code) == BPF_X) {
- verbose(env, "(%02x) if r%d %s r%d goto pc%+d\n",
+ verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n",
insn->code, insn->dst_reg,
bpf_jmp_string[BPF_OP(insn->code) >> 4],
insn->src_reg, insn->off);
} else {
- verbose(env, "(%02x) if r%d %s 0x%x goto pc%+d\n",
+ verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n",
insn->code, insn->dst_reg,
bpf_jmp_string[BPF_OP(insn->code) >> 4],
insn->imm, insn->off);
}
} else {
- verbose(env, "(%02x) %s\n",
+ verbose(cbs->private_data, "(%02x) %s\n",
insn->code, bpf_class_string[class]);
}
}
diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h
index 266fe8ee542b..e1324a834a24 100644
--- a/kernel/bpf/disasm.h
+++ b/kernel/bpf/disasm.h
@@ -22,14 +22,12 @@
#include <string.h>
#endif
-struct bpf_verifier_env;
-
extern const char *const bpf_alu_string[16];
extern const char *const bpf_class_string[8];
const char *func_id_name(int id);
-typedef __printf(2, 3) void (*bpf_insn_print_t)(struct bpf_verifier_env *env,
+typedef __printf(2, 3) void (*bpf_insn_print_t)(void *private_data,
const char *, ...);
typedef const char *(*bpf_insn_revmap_call_t)(void *private_data,
const struct bpf_insn *insn);
@@ -45,7 +43,6 @@ struct bpf_insn_cbs {
};
void print_bpf_insn(const struct bpf_insn_cbs *cbs,
- struct bpf_verifier_env *env,
const struct bpf_insn *insn,
bool allow_ptr_leaks);
#endif
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 81e2f6995adb..bf6da59ae0d0 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -178,6 +178,9 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
static struct dentry *
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
{
+ /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
+ * extensions.
+ */
if (strchr(dentry->d_name.name, '.'))
return ERR_PTR(-EPERM);
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index a927e89dad6e..d2bda5aa25d7 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -38,8 +38,11 @@
#include <linux/skbuff.h>
#include <linux/workqueue.h>
#include <linux/list.h>
+#include <linux/mm.h>
#include <net/strparser.h>
#include <net/tcp.h>
+#include <linux/ptr_ring.h>
+#include <net/inet_common.h>
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@@ -47,6 +50,7 @@
struct bpf_stab {
struct bpf_map map;
struct sock **sock_map;
+ struct bpf_prog *bpf_tx_msg;
struct bpf_prog *bpf_parse;
struct bpf_prog *bpf_verdict;
};
@@ -62,8 +66,7 @@ struct smap_psock_map_entry {
struct smap_psock {
struct rcu_head rcu;
- /* refcnt is used inside sk_callback_lock */
- u32 refcnt;
+ refcount_t refcnt;
/* datapath variables */
struct sk_buff_head rxqueue;
@@ -74,7 +77,17 @@ struct smap_psock {
int save_off;
struct sk_buff *save_skb;
+ /* datapath variables for tx_msg ULP */
+ struct sock *sk_redir;
+ int apply_bytes;
+ int cork_bytes;
+ int sg_size;
+ int eval;
+ struct sk_msg_buff *cork;
+ struct list_head ingress;
+
struct strparser strp;
+ struct bpf_prog *bpf_tx_msg;
struct bpf_prog *bpf_parse;
struct bpf_prog *bpf_verdict;
struct list_head maps;
@@ -92,11 +105,33 @@ struct smap_psock {
void (*save_write_space)(struct sock *sk);
};
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int nonblock, int flags, int *addr_len);
+static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags);
+
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
{
return rcu_dereference_sk_user_data(sk);
}
+static bool bpf_tcp_stream_read(const struct sock *sk)
+{
+ struct smap_psock *psock;
+ bool empty = true;
+
+ rcu_read_lock();
+ psock = smap_psock_sk(sk);
+ if (unlikely(!psock))
+ goto out;
+ empty = list_empty(&psock->ingress);
+out:
+ rcu_read_unlock();
+ return !empty;
+}
+
static struct proto tcp_bpf_proto;
static int bpf_tcp_init(struct sock *sk)
{
@@ -116,31 +151,48 @@ static int bpf_tcp_init(struct sock *sk)
psock->save_close = sk->sk_prot->close;
psock->sk_proto = sk->sk_prot;
+
+ if (psock->bpf_tx_msg) {
+ tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
+ tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
+ tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
+ tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
+ }
+
sk->sk_prot = &tcp_bpf_proto;
rcu_read_unlock();
return 0;
}
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md);
+
static void bpf_tcp_release(struct sock *sk)
{
struct smap_psock *psock;
rcu_read_lock();
psock = smap_psock_sk(sk);
+ if (unlikely(!psock))
+ goto out;
- if (likely(psock)) {
- sk->sk_prot = psock->sk_proto;
- psock->sk_proto = NULL;
+ if (psock->cork) {
+ free_start_sg(psock->sock, psock->cork);
+ kfree(psock->cork);
+ psock->cork = NULL;
}
+
+ sk->sk_prot = psock->sk_proto;
+ psock->sk_proto = NULL;
+out:
rcu_read_unlock();
}
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-
static void bpf_tcp_close(struct sock *sk, long timeout)
{
void (*close_fun)(struct sock *sk, long timeout);
struct smap_psock_map_entry *e, *tmp;
+ struct sk_msg_buff *md, *mtmp;
struct smap_psock *psock;
struct sock *osk;
@@ -159,6 +211,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
close_fun = psock->save_close;
write_lock_bh(&sk->sk_callback_lock);
+ list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
+ list_del(&md->list);
+ free_start_sg(psock->sock, md);
+ kfree(md);
+ }
+
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
osk = cmpxchg(e->entry, sk, NULL);
if (osk == sk) {
@@ -175,6 +233,7 @@ enum __sk_action {
__SK_DROP = 0,
__SK_PASS,
__SK_REDIRECT,
+ __SK_NONE,
};
static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
@@ -186,10 +245,782 @@ static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
.release = bpf_tcp_release,
};
+static int memcopy_from_iter(struct sock *sk,
+ struct sk_msg_buff *md,
+ struct iov_iter *from, int bytes)
+{
+ struct scatterlist *sg = md->sg_data;
+ int i = md->sg_curr, rc = -ENOSPC;
+
+ do {
+ int copy;
+ char *to;
+
+ if (md->sg_copybreak >= sg[i].length) {
+ md->sg_copybreak = 0;
+
+ if (++i == MAX_SKB_FRAGS)
+ i = 0;
+
+ if (i == md->sg_end)
+ break;
+ }
+
+ copy = sg[i].length - md->sg_copybreak;
+ to = sg_virt(&sg[i]) + md->sg_copybreak;
+ md->sg_copybreak += copy;
+
+ if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
+ rc = copy_from_iter_nocache(to, copy, from);
+ else
+ rc = copy_from_iter(to, copy, from);
+
+ if (rc != copy) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ bytes -= copy;
+ if (!bytes)
+ break;
+
+ md->sg_copybreak = 0;
+ if (++i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != md->sg_end);
+out:
+ md->sg_curr = i;
+ return rc;
+}
+
+static int bpf_tcp_push(struct sock *sk, int apply_bytes,
+ struct sk_msg_buff *md,
+ int flags, bool uncharge)
+{
+ bool apply = apply_bytes;
+ struct scatterlist *sg;
+ int offset, ret = 0;
+ struct page *p;
+ size_t size;
+
+ while (1) {
+ sg = md->sg_data + md->sg_start;
+ size = (apply && apply_bytes < sg->length) ?
+ apply_bytes : sg->length;
+ offset = sg->offset;
+
+ tcp_rate_check_app_limited(sk);
+ p = sg_page(sg);
+retry:
+ ret = do_tcp_sendpages(sk, p, offset, size, flags);
+ if (ret != size) {
+ if (ret > 0) {
+ if (apply)
+ apply_bytes -= ret;
+ size -= ret;
+ offset += ret;
+ if (uncharge)
+ sk_mem_uncharge(sk, ret);
+ goto retry;
+ }
+
+ sg->length = size;
+ sg->offset = offset;
+ return ret;
+ }
+
+ if (apply)
+ apply_bytes -= ret;
+ sg->offset += ret;
+ sg->length -= ret;
+ if (uncharge)
+ sk_mem_uncharge(sk, ret);
+
+ if (!sg->length) {
+ put_page(p);
+ md->sg_start++;
+ if (md->sg_start == MAX_SKB_FRAGS)
+ md->sg_start = 0;
+ sg_init_table(sg, 1);
+
+ if (md->sg_start == md->sg_end)
+ break;
+ }
+
+ if (apply && !apply_bytes)
+ break;
+ }
+ return 0;
+}
+
+static inline void bpf_compute_data_pointers_sg(struct sk_msg_buff *md)
+{
+ struct scatterlist *sg = md->sg_data + md->sg_start;
+
+ if (md->sg_copy[md->sg_start]) {
+ md->data = md->data_end = 0;
+ } else {
+ md->data = sg_virt(sg);
+ md->data_end = md->data + sg->length;
+ }
+}
+
+static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+ struct scatterlist *sg = md->sg_data;
+ int i = md->sg_start;
+
+ do {
+ int uncharge = (bytes < sg[i].length) ? bytes : sg[i].length;
+
+ sk_mem_uncharge(sk, uncharge);
+ bytes -= uncharge;
+ if (!bytes)
+ break;
+ i++;
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ } while (i != md->sg_end);
+}
+
+static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
+{
+ struct scatterlist *sg = md->sg_data;
+ int i = md->sg_start, free;
+
+ while (bytes && sg[i].length) {
+ free = sg[i].length;
+ if (bytes < free) {
+ sg[i].length -= bytes;
+ sg[i].offset += bytes;
+ sk_mem_uncharge(sk, bytes);
+ break;
+ }
+
+ sk_mem_uncharge(sk, sg[i].length);
+ put_page(sg_page(&sg[i]));
+ bytes -= sg[i].length;
+ sg[i].length = 0;
+ sg[i].page_link = 0;
+ sg[i].offset = 0;
+ i++;
+
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ }
+}
+
+static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
+{
+ struct scatterlist *sg = md->sg_data;
+ int i = start, free = 0;
+
+ while (sg[i].length) {
+ free += sg[i].length;
+ sk_mem_uncharge(sk, sg[i].length);
+ put_page(sg_page(&sg[i]));
+ sg[i].length = 0;
+ sg[i].page_link = 0;
+ sg[i].offset = 0;
+ i++;
+
+ if (i == MAX_SKB_FRAGS)
+ i = 0;
+ }
+
+ return free;
+}
+
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+ int free = free_sg(sk, md->sg_start, md);
+
+ md->sg_start = md->sg_end;
+ return free;
+}
+
+static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
+{
+ return free_sg(sk, md->sg_curr, md);
+}
+
+static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
+{
+ return ((_rc == SK_PASS) ?
+ (md->map ? __SK_REDIRECT : __SK_PASS) :
+ __SK_DROP);
+}
+
+static unsigned int smap_do_tx_msg(struct sock *sk,
+ struct smap_psock *psock,
+ struct sk_msg_buff *md)
+{
+ struct bpf_prog *prog;
+ unsigned int rc, _rc;
+
+ preempt_disable();
+ rcu_read_lock();
+
+ /* If the policy was removed mid-send then default to 'accept' */
+ prog = READ_ONCE(psock->bpf_tx_msg);
+ if (unlikely(!prog)) {
+ _rc = SK_PASS;
+ goto verdict;
+ }
+
+ bpf_compute_data_pointers_sg(md);
+ rc = (*prog->bpf_func)