Merge branches 'topic/twl4030' and 'topic/twl6040' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into asoc-twl-breakage

author: Mark Brown <broonie@kernel.org> 2018-01-25 18:16:26 +0000
committer: Mark Brown <broonie@kernel.org> 2018-01-25 18:16:26 +0000
commit: 0b5eca67bd2d0e6f6d0ccdc316aced0cc4bf2e9f (patch)
tree: b6597ffe774a67ab5b8eebd57c9a723732d3a39c /kernel/bpf
parent: 3bb0f7c31b1aedd0f85c675297031281799145d7 (diff)
parent: 93a00c467fe998bf5716cbc9cabc127046054782 (diff)
download: linux-0b5eca67bd2d0e6f6d0ccdc316aced0cc4bf2e9f.tar.gz
linux-0b5eca67bd2d0e6f6d0ccdc316aced0cc4bf2e9f.tar.bz2
linux-0b5eca67bd2d0e6f6d0ccdc316aced0cc4bf2e9f.zip
17 files changed, 3003 insertions, 878 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index af3ab6164ff5..e691da0b3bab 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,8 +3,11 @@ obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
+obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
+obj-$(CONFIG_BPF_SYSCALL) += offload.o
 ifeq ($(CONFIG_STREAM_PARSER),y)
 obj-$(CONFIG_BPF_SYSCALL) += sockmap.o
 endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e2636737b69b..7c25426d3cf5 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -19,6 +19,9 @@
 
 #include "map_in_map.h"
 
+#define ARRAY_CREATE_FLAG_MASK \
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
 static void bpf_array_free_percpu(struct bpf_array *array)
 {
 	int i;
@@ -56,7 +59,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    attr->value_size == 0 || attr->map_flags & ~BPF_F_NUMA_NODE ||
+	    attr->value_size == 0 ||
+	    attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
 	    (percpu && numa_node != NUMA_NO_NODE))
 		return ERR_PTR(-EINVAL);
 
@@ -492,7 +496,7 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
 
 	ee = ERR_PTR(-EOPNOTSUPP);
 	event = perf_file->private_data;
-	if (perf_event_read_local(event, &value) == -EOPNOTSUPP)
+	if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
 		goto err_out;
 
 	ee = bpf_event_entry_gen(perf_file, map_file);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 546113430049..b789ab78d28f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -27,129 +27,405 @@ void cgroup_bpf_put(struct cgroup *cgrp)
 {
 	unsigned int type;
 
-	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
-		struct bpf_prog *prog = cgrp->bpf.prog[type];
-
-		if (prog) {
-			bpf_prog_put(prog);
+	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
+		struct list_head *progs = &cgrp->bpf.progs[type];
+		struct bpf_prog_list *pl, *tmp;
+
+		list_for_each_entry_safe(pl, tmp, progs, node) {
+			list_del(&pl->node);
+			bpf_prog_put(pl->prog);
+			kfree(pl);
 			static_branch_dec(&cgroup_bpf_enabled_key);
 		}
+		bpf_prog_array_free(cgrp->bpf.effective[type]);
+	}
+}
+
+/* count number of elements in the list.
+ * it's slow but the list cannot be long
+ */
+static u32 prog_list_length(struct list_head *head)
+{
+	struct bpf_prog_list *pl;
+	u32 cnt = 0;
+
+	list_for_each_entry(pl, head, node) {
+		if (!pl->prog)
+			continue;
+		cnt++;
 	}
+	return cnt;
+}
+
+/* if parent has non-overridable prog attached,
+ * disallow attaching new programs to the descendent cgroup.
+ * if parent has overridable or multi-prog, allow attaching
+ */
+static bool hierarchy_allows_attach(struct cgroup *cgrp,
+				    enum bpf_attach_type type,
+				    u32 new_flags)
+{
+	struct cgroup *p;
+
+	p = cgroup_parent(cgrp);
+	if (!p)
+		return true;
+	do {
+		u32 flags = p->bpf.flags[type];
+		u32 cnt;
+
+		if (flags & BPF_F_ALLOW_MULTI)
+			return true;
+		cnt = prog_list_length(&p->bpf.progs[type]);
+		WARN_ON_ONCE(cnt > 1);
+		if (cnt == 1)
+			return !!(flags & BPF_F_ALLOW_OVERRIDE);
+		p = cgroup_parent(p);
+	} while (p);
+	return true;
+}
+
+/* compute a chain of effective programs for a given cgroup:
+ * start from the list of programs in this cgroup and add
+ * all parent programs.
+ * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
+ * to programs in this cgroup
+ */
+static int compute_effective_progs(struct cgroup *cgrp,
+				   enum bpf_attach_type type,
+				   struct bpf_prog_array __rcu **array)
+{
+	struct bpf_prog_array __rcu *progs;
+	struct bpf_prog_list *pl;
+	struct cgroup *p = cgrp;
+	int cnt = 0;
+
+	/* count number of effective programs by walking parents */
+	do {
+		if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+			cnt += prog_list_length(&p->bpf.progs[type]);
+		p = cgroup_parent(p);
+	} while (p);
+
+	progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
+	if (!progs)
+		return -ENOMEM;
+
+	/* populate the array with effective progs */
+	cnt = 0;
+	p = cgrp;
+	do {
+		if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+			list_for_each_entry(pl,
+					    &p->bpf.progs[type], node) {
+				if (!pl->prog)
+					continue;
+				rcu_dereference_protected(progs, 1)->
+					progs[cnt++] = pl->prog;
+			}
+		p = cgroup_parent(p);
+	} while (p);
+
+	*array = progs;
+	return 0;
+}
+
+static void activate_effective_progs(struct cgroup *cgrp,
+				     enum bpf_attach_type type,
+				     struct bpf_prog_array __rcu *array)
+{
+	struct bpf_prog_array __rcu *old_array;
+
+	old_array = xchg(&cgrp->bpf.effective[type], array);
+	/* free prog array after grace period, since __cgroup_bpf_run_*()
+	 * might be still walking the array
+	 */
+	bpf_prog_array_free(old_array);
 }
 
 /**
  * cgroup_bpf_inherit() - inherit effective programs from parent
  * @cgrp: the cgroup to modify
- * @parent: the parent to inherit from
  */
-void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
+int cgroup_bpf_inherit(struct cgroup *cgrp)
 {
-	unsigned int type;
+/* has to use marco instead of const int, since compiler thinks
+ * that array below is variable length
+ */
+#define	NR ARRAY_SIZE(cgrp->bpf.effective)
+	struct bpf_prog_array __rcu *arrays[NR] = {};
+	int i;
 
-	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
-		struct bpf_prog *e;
+	for (i = 0; i < NR; i++)
+		INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
 
-		e = rcu_dereference_protected(parent->bpf.effective[type],
-					      lockdep_is_held(&cgroup_mutex));
-		rcu_assign_pointer(cgrp->bpf.effective[type], e);
-		cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
-	}
+	for (i = 0; i < NR; i++)
+		if (compute_effective_progs(cgrp, i, &arrays[i]))
+			goto cleanup;
+
+	for (i = 0; i < NR; i++)
+		activate_effective_progs(cgrp, i, arrays[i]);
+
+	return 0;
+cleanup:
+	for (i = 0; i < NR; i++)
+		bpf_prog_array_free(arrays[i]);
+	return -ENOMEM;
 }
 
+#define BPF_CGROUP_MAX_PROGS 64
+
 /**
- * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
+ * __cgroup_bpf_attach() - Attach the program to a cgroup, and
  *                         propagate the change to descendants
  * @cgrp: The cgroup which descendants to traverse
- * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
- * @prog: A new program to pin
- * @type: Type of pinning operation (ingress/egress)
- *
- * Each cgroup has a set of two pointers for bpf programs; one for eBPF
- * programs it owns, and which is effective for execution.
- *
- * If @prog is not %NULL, this function attaches a new program to the cgroup
- * and releases the one that is currently attached, if any. @prog is then made
- * the effective program of type @type in that cgroup.
- *
- * If @prog is %NULL, the currently attached program of type @type is released,
- * and the effective program of the parent cgroup (if any) is inherited to
- * @cgrp.
- *
- * Then, the descendants of @cgrp are walked and the effective program for
- * each of them is set to the effective program of @cgrp unless the
- * descendant has its own program attached, in which case the subbranch is
- * skipped. This ensures that delegated subcgroups with own programs are left
- * untouched.
+ * @prog: A program to attach
+ * @type: Type of attach operation
  *
  * Must be called with cgroup_mutex held.
  */
-int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
-			struct bpf_prog *prog, enum bpf_attach_type type,
-			bool new_overridable)
+int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+			enum bpf_attach_type type, u32 flags)
 {
-	struct bpf_prog *old_prog, *effective = NULL;
-	struct cgroup_subsys_state *pos;
-	bool overridable = true;
-
-	if (parent) {
-		overridable = !parent->bpf.disallow_override[type];
-		effective = rcu_dereference_protected(parent->bpf.effective[type],
-						      lockdep_is_held(&cgroup_mutex));
-	}
-
-	if (prog && effective && !overridable)
-		/* if parent has non-overridable prog attached, disallow
-		 * attaching new programs to descendent cgroup
-		 */
+	struct list_head *progs = &cgrp->bpf.progs[type];
+	struct bpf_prog *old_prog = NULL;
+	struct cgroup_subsys_state *css;
+	struct bpf_prog_list *pl;
+	bool pl_was_allocated;
+	int err;
+
+	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
+		/* invalid combination */
+		return -EINVAL;
+
+	if (!hierarchy_allows_attach(cgrp, type, flags))
 		return -EPERM;
 
-	if (prog && effective && overridable != new_overridable)
-		/* if parent has overridable prog attached, only
-		 * allow overridable programs in descendent cgroup
+	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
+		/* Disallow attaching non-overridable on top
+		 * of existing overridable in this cgroup.
+		 * Disallow attaching multi-prog if overridable or none
 		 */
 		return -EPERM;
 
-	old_prog = cgrp->bpf.prog[type];
-
-	if (prog) {
-		overridable = new_overridable;
-		effective = prog;
-		if (old_prog &&
-		    cgrp->bpf.disallow_override[type] == new_overridable)
-			/* disallow attaching non-overridable on top
-			 * of existing overridable in this cgroup
-			 * and vice versa
-			 */
-			return -EPERM;
+	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
+		return -E2BIG;
+
+	if (flags & BPF_F_ALLOW_MULTI) {
+		list_for_each_entry(pl, progs, node)
+			if (pl->prog == prog)
+				/* disallow attaching the same prog twice */
+				return -EINVAL;
+
+		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
+		if (!pl)
+			return -ENOMEM;
+		pl_was_allocated = true;
+		pl->prog = prog;
+		list_add_tail(&pl->node, progs);
+	} else {
+		if (list_empty(progs)) {
+			pl = kmalloc(sizeof(*pl), GFP_KERNEL);
+			if (!pl)
+				return -ENOMEM;
+			pl_was_allocated = true;
+			list_add_tail(&pl->node, progs);
+		} else {
+			pl = list_first_entry(progs, typeof(*pl), node);
+			old_prog = pl->prog;
+			pl_was_allocated = false;
+		}
+		pl->prog = prog;
 	}
 
-	if (!prog && !old_prog)
-		/* report error when trying to detach and nothing is attached */
-		return -ENOENT;
+	cgrp->bpf.flags[type] = flags;
 
-	cgrp->bpf.prog[type] = prog;
+	/* allocate and recompute effective prog arrays */
+	css_for_each_descendant_pre(css, &cgrp->self) {
+		struct cgroup *desc = container_of(css, struct cgroup, self);
 
-	css_for_each_descendant_pre(pos, &cgrp->self) {
-		struct cgroup *desc = container_of(pos, struct cgroup, self);
-
-		/* skip the subtree if the descendant has its own program */
-		if (desc->bpf.prog[type] && desc != cgrp) {
-			pos = css_rightmost_descendant(pos);
-		} else {
-			rcu_assign_pointer(desc->bpf.effective[type],
-					   effective);
-			desc->bpf.disallow_override[type] = !overridable;
-		}
+		err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+		if (err)
+			goto cleanup;
 	}
 
-	if (prog)
-		static_branch_inc(&cgroup_bpf_enabled_key);
+	/* all allocations were successful. Activate all prog arrays */
+	css_for_each_descendant_pre(css, &cgrp->self) {
+		struct cgroup *desc = container_of(css, struct cgroup, self);
 
+		activate_effective_progs(desc, type, desc->bpf.inactive);
+		desc->bpf.inactive = NULL;
+	}
+
+	static_branch_inc(&cgroup_bpf_enabled_key);
 	if (old_prog) {
 		bpf_prog_put(old_prog);
 		static_branch_dec(&cgroup_bpf_enabled_key);
 	}
 	return 0;
+
+cleanup:
+	/* oom while computing effective. Free all computed effective arrays
+	 * since they were not activated
+	 */
+	css_for_each_descendant_pre(css, &cgrp->self) {
+		struct cgroup *desc = container_of(css, struct cgroup, self);
+
+		bpf_prog_array_free(desc->bpf.inactive);
+		desc->bpf.inactive = NULL;
+	}
+
+	/* and cleanup the prog list */
+	pl->prog = old_prog;
+	if (pl_was_allocated) {
+		list_del(&pl->node);
+		kfree(pl);
+	}
+	return err;
+}
+
+/**
+ * __cgroup_bpf_detach() - Detach the program from a cgroup, and
+ *                         propagate the change to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @prog: A program to detach or NULL
+ * @type: Type of detach operation
+ *
+ * Must be called with cgroup_mutex held.
+ */
+int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+			enum bpf_attach_type type, u32 unused_flags)
+{
+	struct list_head *progs = &cgrp->bpf.progs[type];
+	u32 flags = cgrp->bpf.flags[type];
+	struct bpf_prog *old_prog = NULL;
+	struct cgroup_subsys_state *css;
+	struct bpf_prog_list *pl;
+	int err;
+
+	if (flags & BPF_F_ALLOW_MULTI) {
+		if (!prog)
+			/* to detach MULTI prog the user has to specify valid FD
+			 * of the program to be detached
+			 */
+			return -EINVAL;
+	} else {
+		if (list_empty(progs))
+			/* report error when trying to detach and nothing is attached */
+			return -ENOENT;
+	}
+
+	if (flags & BPF_F_ALLOW_MULTI) {
+		/* find the prog and detach it */
+		list_for_each_entry(pl, progs, node) {
+			if (pl->prog != prog)
+				continue;
+			old_prog = prog;
+			/* mark it deleted, so it's ignored while
+			 * recomputing effective
+			 */
+			pl->prog = NULL;
+			break;
+		}
+		if (!old_prog)
+			return -ENOENT;
+	} else {
+		/* to maintain backward compatibility NONE and OVERRIDE cgroups
+		 * allow detaching with invalid FD (prog==NULL)
+		 */
+		pl = list_first_entry(progs, typeof(*pl), node);
+		old_prog = pl->prog;
+		pl->prog = NULL;
+	}
+
+	/* allocate and recompute effective prog arrays */
+	css_for_each_descendant_pre(css, &cgrp->self) {
+		struct cgroup *desc = container_of(css, struct cgroup, self);
+
+		err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+		if (err)
+			goto cleanup;
+	}
+
+	/* all allocations were successful. Activate all prog arrays */
+	css_for_each_descendant_pre(css, &cgrp->self) {
+		struct cgroup *desc = container_of(css, struct cgroup, self);
+
+		activate_effective_progs(desc, type, desc->bpf.inactive);
+		desc->bpf.inactive = NULL;
+	}
+
+	/* now can actually delete it from this cgroup list */
+	list_del(&pl->node);
+	kfree(pl);
+	if (list_empty(progs))
+		/* last program was detached, reset flags to zero */
+		cgrp->bpf.flags[type] = 0;
+
+	bpf_prog_put(old_prog);
+	static_branch_dec(&cgroup_bpf_enabled_key);
+	return 0;
+
+cleanup:
+	/* oom while computing effective. Free all computed effective arrays
+	 * since they were not activated
+	 */
+	css_for_each_descendant_pre(css, &cgrp->self) {
+		struct cgroup *desc = container_of(css, struct cgroup, self);
+
+		bpf_prog_array_free(desc->bpf.inactive);
+		desc->bpf.inactive = NULL;
+	}
+
+	/* and restore back old_prog */
+	pl->prog = old_prog;
+	return err;
+}
+
+/* Must be called with cgroup_mutex held to avoid races. */
+int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+		       union bpf_attr __user *uattr)
+{
+	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+	enum bpf_attach_type type = attr->query.attach_type;
+	struct list_head *progs = &cgrp->bpf.progs[type];
+	u32 flags = cgrp->bpf.flags[type];
+	int cnt, ret = 0, i;
+
+	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
+		cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
+	else
+		cnt = prog_list_length(progs);
+
+	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
+		return -EFAULT;
+	if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
+		return -EFAULT;
+	if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
+		/* return early if user requested only program count + flags */
+		return 0;
+	if (attr->query.prog_cnt < cnt) {
+		cnt = attr->query.prog_cnt;
+		ret = -ENOSPC;
+	}
+
+	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+		return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
+						   prog_ids, cnt);
+	} else {
+		struct bpf_prog_list *pl;
+		u32 id;
+
+		i = 0;
+		list_for_each_entry(pl, progs, node) {
+			id = pl->prog->aux->id;
+			if (copy_to_user(prog_ids + i, &id, sizeof(id)))
+				return -EFAULT;
+			if (++i == cnt)
+				break;
+		}
+	}
+	return ret;
 }
 
 /**
@@ -171,36 +447,26 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 				struct sk_buff *skb,
 				enum bpf_attach_type type)
 {
-	struct bpf_prog *prog;
+	unsigned int offset = skb->data - skb_network_header(skb);
+	struct sock *save_sk;
 	struct cgroup *cgrp;
-	int ret = 0;
+	int ret;
 
 	if (!sk || !sk_fullsock(sk))
 		return 0;
 
-	if (sk->sk_family != AF_INET &&
-	    sk->sk_family != AF_INET6)
+	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
 		return 0;
 
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-
-	rcu_read_lock();
-
-	prog = rcu_dereference(cgrp->bpf.effective[type]);
-	if (prog) {
-		unsigned int offset = skb->data - skb_network_header(skb);
-		struct sock *save_sk = skb->sk;
-
-		skb->sk = sk;
-		__skb_push(skb, offset);
-		ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
-		__skb_pull(skb, offset);
-		skb->sk = save_sk;
-	}
-
-	rcu_read_unlock();
-
-	return ret;
+	save_sk = skb->sk;
+	skb->sk = sk;
+	__skb_push(skb, offset);
+	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
+				 bpf_prog_run_save_cb);
+	__skb_pull(skb, offset);
+	skb->sk = save_sk;
+	return ret == 1 ? 0 : -EPERM;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
 
@@ -221,19 +487,10 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 			       enum bpf_attach_type type)
 {
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	struct bpf_prog *prog;
-	int ret = 0;
+	int ret;
 
-
-	rcu_read_lock();
-
-	prog = rcu_dereference(cgrp->bpf.effective[type]);
-	if (prog)
-		ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
-
-	rcu_read_unlock();
-
-	return ret;
+	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
+	return ret == 1 ? 0 : -EPERM;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 
@@ -258,18 +515,77 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 				     enum bpf_attach_type type)
 {
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	struct bpf_prog *prog;
-	int ret = 0;
+	int ret;
 
+	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
+				 BPF_PROG_RUN);
+	return ret == 1 ? 0 : -EPERM;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
+
+int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
+				      short access, enum bpf_attach_type type)
+{
+	struct cgroup *cgrp;
+	struct bpf_cgroup_dev_ctx ctx = {
+		.access_type = (access << 16) | dev_type,
+		.major = major,
+		.minor = minor,
+	};
+	int allow = 1;
 
 	rcu_read_lock();
+	cgrp = task_dfl_cgroup(current);
+	allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
+				   BPF_PROG_RUN);
+	rcu_read_unlock();
 
-	prog = rcu_dereference(cgrp->bpf.effective[type]);
-	if (prog)
-		ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
+	return !allow;
+}
+EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
 
-	rcu_read_unlock();
+static const struct bpf_func_proto *
+cgroup_dev_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_map_update_elem:
+		return &bpf_map_update_elem_proto;
+	case BPF_FUNC_map_delete_elem:
+		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_trace_printk:
+		if (capable(CAP_SYS_ADMIN))
+			return bpf_get_trace_printk_proto();
+	default:
+		return NULL;
+	}
+}
 
-	return ret;
+static bool cgroup_dev_is_valid_access(int off, int size,
+				       enum bpf_access_type type,
+				       struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE)
+		return false;
+
+	if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
+		return false;
+	/* The verifier guarantees that size > 0. */
+	if (off % size != 0)
+		return false;
+	if (size != sizeof(__u32))
+		return false;
+
+	return true;
 }
-EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
+
+const struct bpf_prog_ops cg_dev_prog_ops = {
+};
+
+const struct bpf_verifier_ops cg_dev_verifier_ops = {
+	.get_func_proto		= cgroup_dev_func_proto,
+	.is_valid_access	= cgroup_dev_is_valid_access,
+};
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7b62df86be1d..b9f8686a84cf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -85,8 +85,6 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	if (fp == NULL)
 		return NULL;
 
-	kmemcheck_annotate_bitfield(fp, meta);
-
 	aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
 	if (aux == NULL) {
 		vfree(fp);
@@ -127,8 +125,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 	if (fp == NULL) {
 		__bpf_prog_uncharge(fp_old->aux->user, delta);
 	} else {
-		kmemcheck_annotate_bitfield(fp, meta);
-
 		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
 		fp->pages = pages;
 		fp->aux->prog = fp;
@@ -309,12 +305,25 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
 
 static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
 {
+	const char *end = sym + KSYM_NAME_LEN;
+
 	BUILD_BUG_ON(sizeof("bpf_prog_") +
-		     sizeof(prog->tag) * 2 + 1 > KSYM_NAME_LEN);
+		     sizeof(prog->tag) * 2 +
+		     /* name has been null terminated.
+		      * We should need +1 for the '_' preceding
+		      * the name.  However, the null character
+		      * is double counted between the name and the
+		      * sizeof("bpf_prog_") above, so we omit
+		      * the +1 here.
+		      */
+		     sizeof(prog->aux->name) > KSYM_NAME_LEN);
 
 	sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
 	sym  = bin2hex(sym, prog->tag, sizeof(prog->tag));
-	*sym = 0;
+	if (prog->aux->name[0])
+		snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
+	else
+		*sym = 0;
 }
 
 static __always_inline unsigned long
@@ -662,8 +671,6 @@ static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
 
 	fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
 	if (fp != NULL) {
-		kmemcheck_annotate_bitfield(fp, meta);
-
 		/* aux->prog still points to the fp_other one, so
 		 * when promoting the clone to the real program,
 		 * this still needs to be adapted.
@@ -1367,7 +1374,13 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 	 * valid program, which in this case would simply not
 	 * be JITed, but falls back to the interpreter.
 	 */
-	fp = bpf_int_jit_compile(fp);
+	if (!bpf_prog_is_dev_bound(fp->aux)) {
+		fp = bpf_int_jit_compile(fp);
+	} else {
+		*err = bpf_prog_offload_compile(fp);
+		if (*err)
+			return fp;
+	}
 	bpf_prog_lock_ro(fp);
 
 	/* The tail call compatibility check can only be done at
@@ -1381,11 +1394,163 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
+static unsigned int __bpf_prog_ret1(const void *ctx,
+				    const struct bpf_insn *insn)
+{
+	return 1;
+}
+
+static struct bpf_prog_dummy {
+	struct bpf_prog prog;
+} dummy_bpf_prog = {
+	.prog = {
+		.bpf_func = __bpf_prog_ret1,
+	},
+};
+
+/* to avoid allocating empty bpf_prog_array for cgroups that
+ * don't have bpf program attached use one global 'empty_prog_array'
+ * It will not be modified the caller of bpf_prog_array_alloc()
+ * (since caller requested prog_cnt == 0)
+ * that pointer should be 'freed' by bpf_prog_array_free()
+ */
+static struct {
+	struct bpf_prog_array hdr;
+	struct bpf_prog *null_prog;
+} empty_prog_array = {
+	.null_prog = NULL,
+};
+
+struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
+{
+	if (prog_cnt)
+		return kzalloc(sizeof(struct bpf_prog_array) +
+			       sizeof(struct bpf_prog *) * (prog_cnt + 1),
+			       flags);
+
+	return &empty_prog_array.hdr;
+}
+
+void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
+{
+	if (!progs ||
+	    progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
+		return;
+	kfree_rcu(progs, rcu);
+}
+
+int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
+{
+	struct bpf_prog **prog;
+	u32 cnt = 0;
+
+	rcu_read_lock();
+	prog = rcu_dereference(progs)->progs;
+	for (; *prog; prog++)
+		cnt++;
+	rcu_read_unlock();
+	return cnt;
+}
+
+int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
+				__u32 __user *prog_ids, u32 cnt)
+{
+	struct bpf_prog **prog;
+	u32 i = 0, id;
+
+	rcu_read_lock();
+	prog = rcu_dereference(progs)->progs;
+	for (; *prog; prog++) {
+		id = (*prog)->aux->id;
+		if (copy_to_user(prog_ids + i, &id, sizeof(id))) {
+			rcu_read_unlock();
+			return -EFAULT;
+		}
+		if (++i == cnt) {
+			prog++;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	if (*prog)
+		return -ENOSPC;
+	return 0;
+}
+
+void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
+				struct bpf_prog *old_prog)
+{
+	struct bpf_prog **prog = progs->progs;
+
+	for (; *prog; prog++)
+		if (*prog == old_prog) {
+			WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
+			break;
+		}
+}
+
+int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
+			struct bpf_prog *exclude_prog,
+			struct bpf_prog *include_prog,
+			struct bpf_prog_array **new_array)
+{
+	int new_prog_cnt, carry_prog_cnt = 0;
+	struct bpf_prog **existing_prog;
+	struct bpf_prog_array *array;
+	int new_prog_idx = 0;
+
+	/* Figure out how many existing progs we need to carry over to
+	 * the new array.
+	 */
+	if (old_array) {
+		existing_prog = old_array->progs;
+		for (; *existing_prog; existing_prog++) {
+			if (*existing_prog != exclude_prog &&
+			    *existing_prog != &dummy_bpf_prog.prog)
+				carry_prog_cnt++;
+			if (*existing_prog == include_prog)
+				return -EEXIST;
+		}
+	}
+
+	/* How many progs (not NULL) will be in the new array? */
+	new_prog_cnt = carry_prog_cnt;
+	if (include_prog)
+		new_prog_cnt += 1;
+
+	/* Do we have any prog (not NULL) in the new array? */
+	if (!new_prog_cnt) {
+		*new_array = NULL;
+		return 0;
+	}
+
+	/* +1 as the end of prog_array is marked with NULL */
+	array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
+	if (!array)
+		return -ENOMEM;
+
+	/* Fill in the new prog array */
+	if (carry_prog_cnt) {
+		existing_prog = old_array->progs;
+		for (; *existing_prog; existing_prog++)
+			if (*existing_prog != exclude_prog &&
+			    *existing_prog != &dummy_bpf_prog.prog)
+				array->progs[new_prog_idx++] = *existing_prog;
+	}
+	if (include_prog)
+		array->progs[new_prog_idx++] = include_prog;
+	array->progs[new_prog_idx] = NULL;
+	*new_array = array;
+	return 0;
+}
+
 static void bpf_prog_free_deferred(struct work_struct *work)
 {
 	struct bpf_prog_aux *aux;
 
 	aux = container_of(work, struct bpf_prog_aux, work);
+	if (bpf_prog_is_dev_bound(aux))
+		bpf_prog_offload_destroy(aux->prog);
 	bpf_jit_free(aux->prog);
 }
 
@@ -1498,5 +1663,8 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
 
+/* These are only used within the BPF_SYSCALL code */
+#ifdef CONFIG_BPF_SYSCALL
 EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
+#endif
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
new file mode 100644
index 000000000000..ce5b669003b2
--- /dev/null
+++ b/kernel/bpf/cpumap.c
@@ -0,0 +1,706 @@
+/* bpf/cpumap.c
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2.  See COPYING.
+ */
+
+/* The 'cpumap' is primarily used as a backend map for XDP BPF helper
+ * call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'.
+ *
+ * Unlike devmap which redirects XDP frames out another NIC device,
+ * this map type redirects raw XDP frames to another CPU.  The remote
+ * CPU will do SKB-allocation
author	Mark Brown <broonie@kernel.org>	2018-01-25 18:16:26 +0000
committer	Mark Brown <broonie@kernel.org>	2018-01-25 18:16:26 +0000
commit	0b5eca67bd2d0e6f6d0ccdc316aced0cc4bf2e9f (patch)
tree	b6597ffe774a67ab5b8eebd57c9a723732d3a39c /kernel/bpf
parent	3bb0f7c31b1aedd0f85c675297031281799145d7 (diff)
parent	93a00c467fe998bf5716cbc9cabc127046054782 (diff)
download	linux-0b5eca67bd2d0e6f6d0ccdc316aced0cc4bf2e9f.tar.gz linux-0b5eca67bd2d0e6f6d0ccdc316aced0cc4bf2e9f.tar.bz2 linux-0b5eca67bd2d0e6f6d0ccdc316aced0cc4bf2e9f.zip