summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-12-04 07:48:11 -0800
committerJakub Kicinski <kuba@kernel.org>2020-12-04 07:48:12 -0800
commita1dd1d86973182458da7798a95f26cfcbea599b4 (patch)
tree1adda22ea30ccfac7651a7eed7b7c90356f8243a /kernel
parent55fd59b003f6e8fd88cf16590e79823d7ccf3026 (diff)
parenteceae70bdeaeb6b8ceb662983cf663ff352fbc96 (diff)
downloadlinux-a1dd1d86973182458da7798a95f26cfcbea599b4.tar.gz
linux-a1dd1d86973182458da7798a95f26cfcbea599b4.tar.bz2
linux-a1dd1d86973182458da7798a95f26cfcbea599b4.zip
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-12-03 The main changes are: 1) Support BTF in kernel modules, from Andrii. 2) Introduce preferred busy-polling, from Björn. 3) bpf_ima_inode_hash() and bpf_bprm_opts_set() helpers, from KP Singh. 4) Memcg-based memory accounting for bpf objects, from Roman. 5) Allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks, from Stanislav. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (118 commits) selftests/bpf: Fix invalid use of strncat in test_sockmap libbpf: Use memcpy instead of strncpy to please GCC selftests/bpf: Add fentry/fexit/fmod_ret selftest for kernel module selftests/bpf: Add tp_btf CO-RE reloc test for modules libbpf: Support attachment of BPF tracing programs to kernel modules libbpf: Factor out low-level BPF program loading helper bpf: Allow to specify kernel module BTFs when attaching BPF programs bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier selftests/bpf: Add CO-RE relocs selftest relying on kernel module BTF selftests/bpf: Add support for marking sub-tests as skipped selftests/bpf: Add bpf_testmod kernel module for testing libbpf: Add kernel module BTF support for CO-RE relocations libbpf: Refactor CO-RE relocs to not assume a single BTF object libbpf: Add internal helper to load BTF data by FD bpf: Keep module's btf_data_size intact after load bpf: Fix bpf_put_raw_tracepoint()'s use of __module_address() selftests/bpf: Add Userspace tests for TCP_WINDOW_CLAMP bpf: Adds support for setting window clamp samples/bpf: Fix spelling mistake "recieving" -> "receiving" bpf: Fix cold build of test_progs-no_alu32 ... ==================== Link: https://lore.kernel.org/r/20201204021936.85653-1-alexei.starovoitov@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c30
-rw-r--r--kernel/bpf/bpf_local_storage.c20
-rw-r--r--kernel/bpf/bpf_lsm.c52
-rw-r--r--kernel/bpf/bpf_struct_ops.c19
-rw-r--r--kernel/bpf/btf.c70
-rw-r--r--kernel/bpf/core.c23
-rw-r--r--kernel/bpf/cpumap.c37
-rw-r--r--kernel/bpf/devmap.c25
-rw-r--r--kernel/bpf/hashtab.c43
-rw-r--r--kernel/bpf/helpers.c13
-rw-r--r--kernel/bpf/local_storage.c44
-rw-r--r--kernel/bpf/lpm_trie.c19
-rw-r--r--kernel/bpf/queue_stack_maps.c16
-rw-r--r--kernel/bpf/reuseport_array.c12
-rw-r--r--kernel/bpf/ringbuf.c35
-rw-r--r--kernel/bpf/stackmap.c16
-rw-r--r--kernel/bpf/syscall.c310
-rw-r--r--kernel/bpf/task_iter.c54
-rw-r--r--kernel/bpf/verifier.c256
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/trace/bpf_trace.c10
22 files changed, 529 insertions, 586 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c6c81eceb68f..1f8453343bf2 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -34,8 +34,8 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
int i;
for (i = 0; i < array->map.max_entries; i++) {
- ptr = __alloc_percpu_gfp(array->elem_size, 8,
- GFP_USER | __GFP_NOWARN);
+ ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
+ GFP_USER | __GFP_NOWARN);
if (!ptr) {
bpf_array_free_percpu(array);
return -ENOMEM;
@@ -81,11 +81,10 @@ int array_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
- int ret, numa_node = bpf_map_attr_numa_node(attr);
+ int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool bypass_spec_v1 = bpf_bypass_spec_v1();
- u64 cost, array_size, mask64;
- struct bpf_map_memory mem;
+ u64 array_size, mask64;
struct bpf_array *array;
elem_size = round_up(attr->value_size, 8);
@@ -126,44 +125,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
}
}
- /* make sure there is no u32 overflow later in round_up() */
- cost = array_size;
- if (percpu)
- cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
-
- ret = bpf_map_charge_init(&mem, cost);
- if (ret < 0)
- return ERR_PTR(ret);
-
/* allocate all map elements and zero-initialize them */
if (attr->map_flags & BPF_F_MMAPABLE) {
void *data;
/* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
data = bpf_map_area_mmapable_alloc(array_size, numa_node);
- if (!data) {
- bpf_map_charge_finish(&mem);
+ if (!data)
return ERR_PTR(-ENOMEM);
- }
array = data + PAGE_ALIGN(sizeof(struct bpf_array))
- offsetof(struct bpf_array, value);
} else {
array = bpf_map_area_alloc(array_size, numa_node);
}
- if (!array) {
- bpf_map_charge_finish(&mem);
+ if (!array)
return ERR_PTR(-ENOMEM);
- }
array->index_mask = index_mask;
array->map.bypass_spec_v1 = bypass_spec_v1;
/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
- bpf_map_charge_move(&array->map.memory, &mem);
array->elem_size = elem_size;
if (percpu && bpf_array_alloc_percpu(array)) {
- bpf_map_charge_finish(&array->map.memory);
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
@@ -1018,7 +1002,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
struct bpf_array_aux *aux;
struct bpf_map *map;
- aux = kzalloc(sizeof(*aux), GFP_KERNEL);
+ aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
if (!aux)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 5d3a7af9ba9b..dd5aedee99e7 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -67,7 +67,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (charge_mem && mem_charge(smap, owner, smap->elem_size))
return NULL;
- selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+ selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
+ GFP_ATOMIC | __GFP_NOWARN);
if (selem) {
if (value)
memcpy(SDATA(selem)->data, value, smap->map.value_size);
@@ -264,7 +265,8 @@ int bpf_local_storage_alloc(void *owner,
if (err)
return err;
- storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN);
+ storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
+ GFP_ATOMIC | __GFP_NOWARN);
if (!storage) {
err = -ENOMEM;
goto uncharge;
@@ -543,10 +545,8 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
struct bpf_local_storage_map *smap;
unsigned int i;
u32 nbuckets;
- u64 cost;
- int ret;
- smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
+ smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
@@ -555,18 +555,10 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
nbuckets = max_t(u32, 2, nbuckets);
smap->bucket_log = ilog2(nbuckets);
- cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
-
- ret = bpf_map_charge_init(&smap->map.memory, cost);
- if (ret < 0) {
- kfree(smap);
- return ERR_PTR(ret);
- }
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
- GFP_USER | __GFP_NOWARN);
+ GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
if (!smap->buckets) {
- bpf_map_charge_finish(&smap->map.memory);
kfree(smap);
return ERR_PTR(-ENOMEM);
}
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 553107f4706a..70e5e0b6d69d 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -7,6 +7,7 @@
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/btf.h>
+#include <linux/binfmts.h>
#include <linux/lsm_hooks.h>
#include <linux/bpf_lsm.h>
#include <linux/kallsyms.h>
@@ -14,6 +15,7 @@
#include <net/bpf_sk_storage.h>
#include <linux/bpf_local_storage.h>
#include <linux/btf_ids.h>
+#include <linux/ima.h>
/* For every LSM hook that allows attachment of BPF programs, declare a nop
* function where a BPF program can be attached.
@@ -51,6 +53,52 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
return 0;
}
+/* Mask for all the currently supported BPRM option flags */
+#define BPF_F_BRPM_OPTS_MASK BPF_F_BPRM_SECUREEXEC
+
+BPF_CALL_2(bpf_bprm_opts_set, struct linux_binprm *, bprm, u64, flags)
+{
+ if (flags & ~BPF_F_BRPM_OPTS_MASK)
+ return -EINVAL;
+
+ bprm->secureexec = (flags & BPF_F_BPRM_SECUREEXEC);
+ return 0;
+}
+
+BTF_ID_LIST_SINGLE(bpf_bprm_opts_set_btf_ids, struct, linux_binprm)
+
+const static struct bpf_func_proto bpf_bprm_opts_set_proto = {
+ .func = bpf_bprm_opts_set,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_bprm_opts_set_btf_ids[0],
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_ima_inode_hash, struct inode *, inode, void *, dst, u32, size)
+{
+ return ima_inode_hash(inode, dst, size);
+}
+
+static bool bpf_ima_inode_hash_allowed(const struct bpf_prog *prog)
+{
+ return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
+}
+
+BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
+
+const static struct bpf_func_proto bpf_ima_inode_hash_proto = {
+ .func = bpf_ima_inode_hash,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0],
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .allowed = bpf_ima_inode_hash_allowed,
+};
+
static const struct bpf_func_proto *
bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -71,6 +119,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
+ case BPF_FUNC_bprm_opts_set:
+ return &bpf_bprm_opts_set_proto;
+ case BPF_FUNC_ima_inode_hash:
+ return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
default:
return tracing_prog_func_proto(func_id, prog);
}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 4c3b543bb33b..1a666a975416 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -548,12 +548,10 @@ static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
{
const struct bpf_struct_ops *st_ops;
- size_t map_total_size, st_map_size;
+ size_t st_map_size;
struct bpf_struct_ops_map *st_map;
const struct btf_type *t, *vt;
- struct bpf_map_memory mem;
struct bpf_map *map;
- int err;
if (!bpf_capable())
return ERR_PTR(-EPERM);
@@ -573,20 +571,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
* struct bpf_struct_ops_tcp_congestions_ops
*/
(vt->size - sizeof(struct bpf_struct_ops_value));
- map_total_size = st_map_size +
- /* uvalue */
- sizeof(vt->size) +
- /* struct bpf_progs **progs */
- btf_type_vlen(t) * sizeof(struct bpf_prog *);
- err = bpf_map_charge_init(&mem, map_total_size);
- if (err < 0)
- return ERR_PTR(err);
st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
- if (!st_map) {
- bpf_map_charge_finish(&mem);
+ if (!st_map)
return ERR_PTR(-ENOMEM);
- }
+
st_map->st_ops = st_ops;
map = &st_map->map;
@@ -597,14 +586,12 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!st_map->uvalue || !st_map->progs || !st_map->image) {
bpf_struct_ops_map_free(map);
- bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}
mutex_init(&st_map->lock);
set_vm_flush_reset_perms(st_map->image);
bpf_map_init_from_attr(map, attr);
- bpf_map_charge_move(&map->memory, &mem);
return map;
}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 6b2d508b33d4..8d6bdb4f4d61 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1524,6 +1524,11 @@ static void btf_free_rcu(struct rcu_head *rcu)
btf_free(btf);
}
+void btf_get(struct btf *btf)
+{
+ refcount_inc(&btf->refcnt);
+}
+
void btf_put(struct btf *btf)
{
if (btf && refcount_dec_and_test(&btf->refcnt)) {
@@ -4555,11 +4560,10 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
{
struct bpf_prog *tgt_prog = prog->aux->dst_prog;
- if (tgt_prog) {
+ if (tgt_prog)
return tgt_prog->aux->btf;
- } else {
- return btf_vmlinux;
- }
+ else
+ return prog->aux->attach_btf;
}
static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
@@ -4700,6 +4704,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (ctx_arg_info->offset == off) {
info->reg_type = ctx_arg_info->reg_type;
+ info->btf = btf_vmlinux;
info->btf_id = ctx_arg_info->btf_id;
return true;
}
@@ -4716,6 +4721,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
ret = btf_translate_to_vmlinux(log, btf, t, tgt_type, arg);
if (ret > 0) {
+ info->btf = btf_vmlinux;
info->btf_id = ret;
return true;
} else {
@@ -4723,6 +4729,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
}
+ info->btf = btf;
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
/* skip modifiers */
@@ -4749,7 +4756,7 @@ enum bpf_struct_walk_result {
WALK_STRUCT,
};
-static int btf_struct_walk(struct bpf_verifier_log *log,
+static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
u32 *next_btf_id)
{
@@ -4760,7 +4767,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log,
u32 vlen, elem_id, mid;
again:
- tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
+ tname = __btf_name_by_offset(btf, t->name_off);
if (!btf_type_is_struct(t)) {
bpf_log(log, "Type '%s' is not a struct\n", tname);
return -EINVAL;
@@ -4777,7 +4784,7 @@ again:
goto error;
member = btf_type_member(t) + vlen - 1;
- mtype = btf_type_skip_modifiers(btf_vmlinux, member->type,
+ mtype = btf_type_skip_modifiers(btf, member->type,
NULL);
if (!btf_type_is_array(mtype))
goto error;
@@ -4793,7 +4800,7 @@ again:
/* Only allow structure for now, can be relaxed for
* other types later.
*/
- t = btf_type_skip_modifiers(btf_vmlinux, array_elem->type,
+ t = btf_type_skip_modifiers(btf, array_elem->type,
NULL);
if (!btf_type_is_struct(t))
goto error;
@@ -4851,10 +4858,10 @@ error:
/* type of the field */
mid = member->type;
- mtype = btf_type_by_id(btf_vmlinux, member->type);
- mname = __btf_name_by_offset(btf_vmlinux, member->name_off);
+ mtype = btf_type_by_id(btf, member->type);
+ mname = __btf_name_by_offset(btf, member->name_off);
- mtype = __btf_resolve_size(btf_vmlinux, mtype, &msize,
+ mtype = __btf_resolve_size(btf, mtype, &msize,
&elem_type, &elem_id, &total_nelems,
&mid);
if (IS_ERR(mtype)) {
@@ -4949,7 +4956,7 @@ error:
mname, moff, tname, off, size);
return -EACCES;
}
- stype = btf_type_skip_modifiers(btf_vmlinux, mtype->type, &id);
+ stype = btf_type_skip_modifiers(btf, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
return WALK_PTR;
@@ -4975,7 +4982,7 @@ error:
return -EINVAL;
}
-int btf_struct_access(struct bpf_verifier_log *log,
+int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype __maybe_unused,
u32 *next_btf_id)
@@ -4984,7 +4991,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
u32 id;
do {
- err = btf_struct_walk(log, t, off, size, &id);
+ err = btf_struct_walk(log, btf, t, off, size, &id);
switch (err) {
case WALK_PTR:
@@ -5000,7 +5007,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
* by diving in it. At this point the offset is
* aligned with the new type, so set it to 0.
*/
- t = btf_type_by_id(btf_vmlinux, id);
+ t = btf_type_by_id(btf, id);
off = 0;
break;
default:
@@ -5016,21 +5023,37 @@ int btf_struct_access(struct bpf_verifier_log *log,
return -EINVAL;
}
+/* Check that two BTF types, each specified as an BTF object + id, are exactly
+ * the same. Trivial ID check is not enough due to module BTFs, because we can
+ * end up with two different module BTFs, but IDs point to the common type in
+ * vmlinux BTF.
+ */
+static bool btf_types_are_same(const struct btf *btf1, u32 id1,
+ const struct btf *btf2, u32 id2)
+{
+ if (id1 != id2)
+ return false;
+ if (btf1 == btf2)
+ return true;
+ return btf_type_by_id(btf1, id1) == btf_type_by_id(btf2, id2);
+}
+
bool btf_struct_ids_match(struct bpf_verifier_log *log,
- int off, u32 id, u32 need_type_id)
+ const struct btf *btf, u32 id, int off,
+ const struct btf *need_btf, u32 need_type_id)
{
const struct btf_type *type;
int err;
/* Are we already done? */
- if (need_type_id == id && off == 0)
+ if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id))
return true;
again:
- type = btf_type_by_id(btf_vmlinux, id);
+ type = btf_type_by_id(btf, id);
if (!type)
return false;
- err = btf_struct_walk(log, type, off, 1, &id);
+ err = btf_struct_walk(log, btf, type, off, 1, &id);
if (err != WALK_STRUCT)
return false;
@@ -5039,7 +5062,7 @@ again:
* continue the search with offset 0 in the new
* type.
*/
- if (need_type_id != id) {
+ if (!btf_types_are_same(btf, id, need_btf, need_type_id)) {
off = 0;
goto again;
}
@@ -5710,11 +5733,16 @@ int btf_get_fd_by_id(u32 id)
return fd;
}
-u32 btf_id(const struct btf *btf)
+u32 btf_obj_id(const struct btf *btf)
{
return btf->id;
}
+bool btf_is_kernel(const struct btf *btf)
+{
+ return btf->kernel_btf;
+}
+
static int btf_id_cmp_func(const void *a, const void *b)
{
const int *pa = a, *pb = b;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 55454d2278b1..261f8692d0d2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -77,7 +77,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog_aux *aux;
struct bpf_prog *fp;
@@ -86,7 +86,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
if (fp == NULL)
return NULL;
- aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
+ aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags);
if (aux == NULL) {
vfree(fp);
return NULL;
@@ -106,7 +106,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *prog;
int cpu;
@@ -138,7 +138,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
sizeof(*prog->aux->jited_linfo),
- GFP_KERNEL | __GFP_NOWARN);
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!prog->aux->jited_linfo)
return -ENOMEM;
@@ -219,25 +219,17 @@ void bpf_prog_free_linfo(struct bpf_prog *prog)
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *fp;
- u32 pages, delta;
- int ret;
+ u32 pages;
size = round_up(size, PAGE_SIZE);
pages = size / PAGE_SIZE;
if (pages <= fp_old->pages)
return fp_old;
- delta = pages - fp_old->pages;
- ret = __bpf_prog_charge(fp_old->aux->user, delta);
- if (ret)
- return NULL;
-
fp = __vmalloc(size, gfp_flags);
- if (fp == NULL) {
- __bpf_prog_uncharge(fp_old->aux->user, delta);
- } else {
+ if (fp) {
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
fp->pages = pages;
fp->aux->prog = fp;
@@ -2211,6 +2203,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
+const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index c61a23b564aa..747313698178 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -84,8 +84,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
u32 value_size = attr->value_size;
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
- u64 cost;
- int ret;
if (!bpf_capable())
return ERR_PTR(-EPERM);
@@ -97,7 +95,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
attr->map_flags & ~BPF_F_NUMA_NODE)
return ERR_PTR(-EINVAL);
- cmap = kzalloc(sizeof(*cmap), GFP_USER);
+ cmap = kzalloc(sizeof(*cmap), GFP_USER | __GFP_ACCOUNT);
if (!cmap)
return ERR_PTR(-ENOMEM);
@@ -109,26 +107,14 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
goto free_cmap;
}
- /* make sure page count doesn't overflow */
- cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
-
- /* Notice returns -EPERM on if map size is larger than memlock limit */
- ret = bpf_map_charge_init(&cmap->map.memory, cost);
- if (ret) {
- err = ret;
- goto free_cmap;
- }
-
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
sizeof(struct bpf_cpu_map_entry *),
cmap->map.numa_node);
if (!cmap->cpu_map)
- goto free_charge;
+ goto free_cmap;
return &cmap->map;
-free_charge:
- bpf_map_charge_finish(&cmap->map.memory);
free_cmap:
kfree(cmap);
return ERR_PTR(err);
@@ -412,7 +398,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
}
static struct bpf_cpu_map_entry *
-__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
+__cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
+ u32 cpu)
{
int numa, err, i, fd = value->bpf_prog.fd;
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
@@ -422,13 +409,13 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
/* Have map->numa_node, but choose node of redirect target CPU */
numa = cpu_to_node(cpu);
- rcpu = kzalloc_node(sizeof(*rcpu), gfp, numa);
+ rcpu = bpf_map_kmalloc_node(map, sizeof(*rcpu), gfp | __GFP_ZERO, numa);
if (!rcpu)
return NULL;
/* Alloc percpu bulkq */
- rcpu->bulkq = __alloc_percpu_gfp(sizeof(*rcpu->bulkq),
- sizeof(void *), gfp);
+ rcpu->bulkq = bpf_map_alloc_percpu(map, sizeof(*rcpu->bulkq),
+ sizeof(void *), gfp);
if (!rcpu->bulkq)
goto free_rcu;
@@ -438,7 +425,8 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
}
/* Alloc queue */
- rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa);
+ rcpu->queue = bpf_map_kmalloc_node(map, sizeof(*rcpu->queue), gfp,
+ numa);
if (!rcpu->queue)
goto free_bulkq;
@@ -447,7 +435,7 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
goto free_queue;
rcpu->cpu = cpu;
- rcpu->map_id = map_id;
+ rcpu->map_id = map->id;
rcpu->value.qsize = value->qsize;
if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd))
@@ -455,7 +443,8 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id)
/* Setup kthread */
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
- "cpumap/%d/map:%d", cpu, map_id);
+ "cpumap/%d/map:%d", cpu,
+ map->id);
if (IS_ERR(rcpu->kthread))
goto free_prog;
@@ -571,7 +560,7 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
rcpu = NULL; /* Same as deleting */
} else {
/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
- rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id);
+ rcpu = __cpu_map_entry_alloc(map, &cpumap_value, key_cpu);
if (!rcpu)
return -ENOMEM;
rcpu->cmap = cmap;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2b5ca93c17de..f6e9c68afdd4 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -109,8 +109,6 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
u32 valsize = attr->value_size;
- u64 cost = 0;
- int err;
/* check sanity of attributes. 2 value sizes supported:
* 4 bytes: ifindex
@@ -135,21 +133,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
if (!dtab->n_buckets) /* Overflow check */
return -EINVAL;
- cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets;
- } else {
- cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
}
- /* if map size is larger than memlock limit, reject it */
- err = bpf_map_charge_init(&dtab->map.memory, cost);
- if (err)
- return -EINVAL;
-
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
dtab->map.numa_node);
if (!dtab->dev_index_head)
- goto free_charge;
+ return -ENOMEM;
spin_lock_init(&dtab->index_lock);
} else {
@@ -157,14 +147,10 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node);
if (!dtab->netdev_map)
- goto free_charge;
+ return -ENOMEM;
}
return 0;
-
-free_charge:
- bpf_map_charge_finish(&dtab->map.memory);
- return -ENOMEM;
}
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
@@ -175,7 +161,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
- dtab = kzalloc(sizeof(*dtab), GFP_USER);
+ dtab = kzalloc(sizeof(*dtab), GFP_USER | __GFP_ACCOUNT);
if (!dtab)
return ERR_PTR(-ENOMEM);
@@ -602,8 +588,9 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_prog *prog = NULL;
struct bpf_dtab_netdev *dev;
- dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
- dtab->map.numa_node);
+ dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
+ GFP_ATOMIC | __GFP_NOWARN,
+ dtab->map.numa_node);
if (!dev)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index ec46266aaf1c..fe7a0733a63a 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -292,7 +292,8 @@ static int prealloc_init(struct bpf_htab *htab)
u32 size = round_up(htab->map.value_size, 8);
void __percpu *pptr;
- pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
+ pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
+ GFP_USER | __GFP_NOWARN);
if (!pptr)
goto free_elems;
htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
@@ -346,8 +347,8 @@ static int alloc_extra_elems(struct bpf_htab *htab)
struct pcpu_freelist_node *l;
int cpu;
- pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
- GFP_USER | __GFP_NOWARN);
+ pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8,
+ GFP_USER | __GFP_NOWARN);
if (!pptr)
return -ENOMEM;
@@ -442,9 +443,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
struct bpf_htab *htab;
int err, i;
- u64 cost;
- htab = kzalloc(sizeof(*htab), GFP_USER);
+ htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -480,30 +480,18 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
htab->n_buckets > U32_MAX / sizeof(struct bucket))
goto free_htab;
- cost = (u64) htab->n_buckets * sizeof(struct bucket) +
- (u64) htab->elem_size * htab->map.max_entries;
-
- if (percpu)
- cost += (u64) round_up(htab->map.value_size, 8) *
- num_possible_cpus() * htab->map.max_entries;
- else
- cost += (u64) htab->elem_size * num_possible_cpus();
-
- /* if map size is larger than memlock limit, reject it */
- err = bpf_map_charge_init(&htab->map.memory, cost);
- if (err)
- goto free_htab;
-
err = -ENOMEM;
htab->buckets = bpf_map_area_alloc(htab->n_buckets *
sizeof(struct bucket),
htab->map.numa_node);
if (!htab->buckets)
- goto free_charge;
+ goto free_htab;
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
- htab->map_locked[i] = __alloc_percpu_gfp(sizeof(int),
- sizeof(int), GFP_USER);
+ htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map,
+ sizeof(int),
+ sizeof(int),
+ GFP_USER);
if (!htab->map_locked[i])
goto free_map_locked;
}
@@ -538,8 +526,6 @@ free_map_locked:
for (i = 0; i < HASH