diff options
Diffstat (limited to 'kernel/bpf/btf.c')
| -rw-r--r-- | kernel/bpf/btf.c | 1313 |
1 files changed, 749 insertions, 564 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index eba603cec2c5..f7dd8af06413 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -19,6 +19,7 @@ #include <linux/bpf_verifier.h> #include <linux/btf.h> #include <linux/btf_ids.h> +#include <linux/bpf_lsm.h> #include <linux/skmsg.h> #include <linux/perf_event.h> #include <linux/bsearch.h> @@ -199,11 +200,13 @@ DEFINE_IDR(btf_idr); DEFINE_SPINLOCK(btf_idr_lock); enum btf_kfunc_hook { + BTF_KFUNC_HOOK_COMMON, BTF_KFUNC_HOOK_XDP, BTF_KFUNC_HOOK_TC, BTF_KFUNC_HOOK_STRUCT_OPS, BTF_KFUNC_HOOK_TRACING, BTF_KFUNC_HOOK_SYSCALL, + BTF_KFUNC_HOOK_FMODRET, BTF_KFUNC_HOOK_MAX, }; @@ -237,6 +240,7 @@ struct btf { struct rcu_head rcu; struct btf_kfunc_set_tab *kfunc_set_tab; struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; + struct btf_struct_metas *struct_meta_tab; /* split BTF support */ struct btf *base_btf; @@ -477,16 +481,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t) return !t || btf_type_nosize(t); } -static bool __btf_type_is_struct(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; -} - -static bool btf_type_is_array(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; -} - static bool btf_type_is_datasec(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; @@ -1642,8 +1636,30 @@ static void btf_free_dtor_kfunc_tab(struct btf *btf) btf->dtor_kfunc_tab = NULL; } +static void btf_struct_metas_free(struct btf_struct_metas *tab) +{ + int i; + + if (!tab) + return; + for (i = 0; i < tab->cnt; i++) { + btf_record_free(tab->types[i].record); + kfree(tab->types[i].field_offs); + } + kfree(tab); +} + +static void btf_free_struct_meta_tab(struct btf *btf) +{ + struct btf_struct_metas *tab = btf->struct_meta_tab; + + btf_struct_metas_free(tab); + btf->struct_meta_tab = NULL; +} + static void btf_free(struct btf *btf) { + btf_free_struct_meta_tab(btf); btf_free_dtor_kfunc_tab(btf); btf_free_kfunc_set_tab(btf); kvfree(btf->types); @@ -3191,7 +3207,7 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } -enum btf_field_type { +enum btf_field_info_type { BTF_FIELD_SPIN_LOCK, BTF_FIELD_TIMER, BTF_FIELD_KPTR, @@ -3203,18 +3219,28 @@ enum { }; struct btf_field_info { - u32 type_id; + enum btf_field_type type; u32 off; - enum bpf_kptr_type type; + union { + struct { + u32 type_id; + } kptr; + struct { + const char *node_name; + u32 value_btf_id; + } list_head; + }; }; static int btf_find_struct(const struct btf *btf, const struct btf_type *t, - u32 off, int sz, struct btf_field_info *info) + u32 off, int sz, enum btf_field_type field_type, + struct btf_field_info *info) { if (!__btf_type_is_struct(t)) return BTF_FIELD_IGNORE; if (t->size != sz) return BTF_FIELD_IGNORE; + info->type = field_type; info->off = off; return BTF_FIELD_FOUND; } @@ -3222,9 +3248,12 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t, static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, u32 off, int sz, struct btf_field_info *info) { - enum bpf_kptr_type type; + enum btf_field_type type; u32 res_id; + /* Permit modifiers on the pointer itself */ + if (btf_type_is_volatile(t)) + t = btf_type_by_id(btf, t->type); /* For PTR, sz is always == 8 */ if (!btf_type_is_ptr(t)) return BTF_FIELD_IGNORE; @@ -3248,28 +3277,135 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, if (!__btf_type_is_struct(t)) return -EINVAL; - info->type_id = res_id; - info->off = off; info->type = type; + info->off = off; + info->kptr.type_id = res_id; + return BTF_FIELD_FOUND; +} + +static const char *btf_find_decl_tag_value(const struct btf *btf, + const struct btf_type *pt, + int comp_idx, const char *tag_key) +{ + int i; + + for (i = 1; i < btf_nr_types(btf); i++) { + const struct btf_type *t = btf_type_by_id(btf, i); + int len = strlen(tag_key); + + if (!btf_type_is_decl_tag(t)) + continue; + if (pt != btf_type_by_id(btf, t->type) || + btf_type_decl_tag(t)->component_idx != comp_idx) + continue; + if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len)) + continue; + return __btf_name_by_offset(btf, t->name_off) + len; + } + return NULL; +} + +static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt, + const struct btf_type *t, int comp_idx, + u32 off, int sz, struct btf_field_info *info) +{ + const char *value_type; + const char *list_node; + s32 id; + + if (!__btf_type_is_struct(t)) + return BTF_FIELD_IGNORE; + if (t->size != sz) + return BTF_FIELD_IGNORE; + value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:"); + if (!value_type) + return -EINVAL; + list_node = strstr(value_type, ":"); + if (!list_node) + return -EINVAL; + value_type = kstrndup(value_type, list_node - value_type, GFP_KERNEL | __GFP_NOWARN); + if (!value_type) + return -ENOMEM; + id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT); + kfree(value_type); + if (id < 0) + return id; + list_node++; + if (str_is_empty(list_node)) + return -EINVAL; + info->type = BPF_LIST_HEAD; + info->off = off; + info->list_head.value_btf_id = id; + info->list_head.node_name = list_node; return BTF_FIELD_FOUND; } -static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t, - const char *name, int sz, int align, - enum btf_field_type field_type, +static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask, + int *align, int *sz) +{ + int type = 0; + + if (field_mask & BPF_SPIN_LOCK) { + if (!strcmp(name, "bpf_spin_lock")) { + if (*seen_mask & BPF_SPIN_LOCK) + return -E2BIG; + *seen_mask |= BPF_SPIN_LOCK; + type = BPF_SPIN_LOCK; + goto end; + } + } + if (field_mask & BPF_TIMER) { + if (!strcmp(name, "bpf_timer")) { + if (*seen_mask & BPF_TIMER) + return -E2BIG; + *seen_mask |= BPF_TIMER; + type = BPF_TIMER; + goto end; + } + } + if (field_mask & BPF_LIST_HEAD) { + if (!strcmp(name, "bpf_list_head")) { + type = BPF_LIST_HEAD; + goto end; + } + } + if (field_mask & BPF_LIST_NODE) { + if (!strcmp(name, "bpf_list_node")) { + type = BPF_LIST_NODE; + goto end; + } + } + /* Only return BPF_KPTR when all other types with matchable names fail */ + if (field_mask & BPF_KPTR) { + type = BPF_KPTR_REF; + goto end; + } + return 0; +end: + *sz = btf_field_type_size(type); + *align = btf_field_type_align(type); + return type; +} + +static int btf_find_struct_field(const struct btf *btf, + const struct btf_type *t, u32 field_mask, struct btf_field_info *info, int info_cnt) { + int ret, idx = 0, align, sz, field_type; const struct btf_member *member; struct btf_field_info tmp; - int ret, idx = 0; - u32 i, off; + u32 i, off, seen_mask = 0; for_each_member(i, t, member) { const struct btf_type *member_type = btf_type_by_id(btf, member->type); - if (name && strcmp(__btf_name_by_offset(btf, member_type->name_off), name)) + field_type = btf_get_field_type(__btf_name_by_offset(btf, member_type->name_off), + field_mask, &seen_mask, &align, &sz); + if (field_type == 0) continue; + if (field_type < 0) + return field_type; off = __btf_member_bit_offset(t, member); if (off % 8) @@ -3277,22 +3413,30 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t return -EINVAL; off /= 8; if (off % align) - return -EINVAL; + continue; switch (field_type) { - case BTF_FIELD_SPIN_LOCK: - case BTF_FIELD_TIMER: - ret = btf_find_struct(btf, member_type, off, sz, + case BPF_SPIN_LOCK: + case BPF_TIMER: + case BPF_LIST_NODE: + ret = btf_find_struct(btf, member_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) return ret; break; - case BTF_FIELD_KPTR: + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: ret = btf_find_kptr(btf, member_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) return ret; break; + case BPF_LIST_HEAD: + ret = btf_find_list_head(btf, t, member_type, i, off, sz, + idx < info_cnt ? &info[idx] : &tmp); + if (ret < 0) + return ret; + break; default: return -EFAULT; } @@ -3307,42 +3451,53 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t } static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, - const char *name, int sz, int align, - enum btf_field_type field_type, - struct btf_field_info *info, int info_cnt) + u32 field_mask, struct btf_field_info *info, + int info_cnt) { + int ret, idx = 0, align, sz, field_type; const struct btf_var_secinfo *vsi; struct btf_field_info tmp; - int ret, idx = 0; - u32 i, off; + u32 i, off, seen_mask = 0; for_each_vsi(i, t, vsi) { const struct btf_type *var = btf_type_by_id(btf, vsi->type); const struct btf_type *var_type = btf_type_by_id(btf, var->type); - off = vsi->offset; - - if (name && strcmp(__btf_name_by_offset(btf, var_type->name_off), name)) + field_type = btf_get_field_type(__btf_name_by_offset(btf, var_type->name_off), + field_mask, &seen_mask, &align, &sz); + if (field_type == 0) continue; + if (field_type < 0) + return field_type; + + off = vsi->offset; if (vsi->size != sz) continue; if (off % align) - return -EINVAL; + continue; switch (field_type) { - case BTF_FIELD_SPIN_LOCK: - case BTF_FIELD_TIMER: - ret = btf_find_struct(btf, var_type, off, sz, + case BPF_SPIN_LOCK: + case BPF_TIMER: + case BPF_LIST_NODE: + ret = btf_find_struct(btf, var_type, off, sz, field_type, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) return ret; break; - case BTF_FIELD_KPTR: + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: ret = btf_find_kptr(btf, var_type, off, sz, idx < info_cnt ? &info[idx] : &tmp); if (ret < 0) return ret; break; + case BPF_LIST_HEAD: + ret = btf_find_list_head(btf, var, var_type, -1, off, sz, + idx < info_cnt ? &info[idx] : &tmp); + if (ret < 0) + return ret; + break; default: return -EFAULT; } @@ -3357,169 +3512,327 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, } static int btf_find_field(const struct btf *btf, const struct btf_type *t, - enum btf_field_type field_type, - struct btf_field_info *info, int info_cnt) + u32 field_mask, struct btf_field_info *info, + int info_cnt) { - const char *name; - int sz, align; - - switch (field_type) { - case BTF_FIELD_SPIN_LOCK: - name = "bpf_spin_lock"; - sz = sizeof(struct bpf_spin_lock); - align = __alignof__(struct bpf_spin_lock); - break; - case BTF_FIELD_TIMER: - name = "bpf_timer"; - sz = sizeof(struct bpf_timer); - align = __alignof__(struct bpf_timer); - break; - case BTF_FIELD_KPTR: - name = NULL; - sz = sizeof(u64); - align = 8; - break; - default: - return -EFAULT; - } - if (__btf_type_is_struct(t)) - return btf_find_struct_field(btf, t, name, sz, align, field_type, info, info_cnt); + return btf_find_struct_field(btf, t, field_mask, info, info_cnt); else if (btf_type_is_datasec(t)) - return btf_find_datasec_var(btf, t, name, sz, align, field_type, info, info_cnt); + return btf_find_datasec_var(btf, t, field_mask, info, info_cnt); return -EINVAL; } -/* find 'struct bpf_spin_lock' in map value. - * return >= 0 offset if found - * and < 0 in case of error - */ -int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, + struct btf_field_info *info) { - struct btf_field_info info; + struct module *mod = NULL; + const struct btf_type *t; + struct btf *kernel_btf; int ret; + s32 id; - ret = btf_find_field(btf, t, BTF_FIELD_SPIN_LOCK, &info, 1); - if (ret < 0) - return ret; - if (!ret) - return -ENOENT; - return info.off; + /* Find type in map BTF, and use it to look up the matching type + * in vmlinux or module BTFs, by name and kind. + */ + t = btf_type_by_id(btf, info->kptr.type_id); + id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info), + &kernel_btf); + if (id < 0) + return id; + + /* Find and stash the function pointer for the destruction function that + * needs to be eventually invoked from the map free path. + */ + if (info->type == BPF_KPTR_REF) { + const struct btf_type *dtor_func; + const char *dtor_func_name; + unsigned long addr; + s32 dtor_btf_id; + + /* This call also serves as a whitelist of allowed objects that + * can be used as a referenced pointer and be stored in a map at + * the same time. + */ + dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id); + if (dtor_btf_id < 0) { + ret = dtor_btf_id; + goto end_btf; + } + + dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id); + if (!dtor_func) { + ret = -ENOENT; + goto end_btf; + } + + if (btf_is_module(kernel_btf)) { + mod = btf_try_get_module(kernel_btf); + if (!mod) { + ret = -ENXIO; + goto end_btf; + } + } + + /* We already verified dtor_func to be btf_type_is_func + * in register_btf_id_dtor_kfuncs. + */ + dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off); + addr = kallsyms_lookup_name(dtor_func_name); + if (!addr) { + ret = -EINVAL; + goto end_mod; + } + field->kptr.dtor = (void *)addr; + } + + field->kptr.btf_id = id; + field->kptr.btf = kernel_btf; + field->kptr.module = mod; + return 0; +end_mod: + module_put(mod); +end_btf: + btf_put(kernel_btf); + return ret; } -int btf_find_timer(const struct btf *btf, const struct btf_type *t) +static int btf_parse_list_head(const struct btf *btf, struct btf_field *field, + struct btf_field_info *info) { - struct btf_field_info info; - int ret; + const struct btf_type *t, *n = NULL; + const struct btf_member *member; + u32 offset; + int i; - ret = btf_find_field(btf, t, BTF_FIELD_TIMER, &info, 1); - if (ret < 0) - return ret; - if (!ret) + t = btf_type_by_id(btf, info->list_head.value_btf_id); + /* We've already checked that value_btf_id is a struct type. We + * just need to figure out the offset of the list_node, and + * verify its type. + */ + for_each_member(i, t, member) { + if (strcmp(info->list_head.node_name, __btf_name_by_offset(btf, member->name_off))) + continue; + /* Invalid BTF, two members with same name */ + if (n) + return -EINVAL; + n = btf_type_by_id(btf, member->type); + if (!__btf_type_is_struct(n)) + return -EINVAL; + if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off))) + return -EINVAL; + offset = __btf_member_bit_offset(n, member); + if (offset % 8) + return -EINVAL; + offset /= 8; + if (offset % __alignof__(struct bpf_list_node)) + return -EINVAL; + + field->list_head.btf = (struct btf *)btf; + field->list_head.value_btf_id = info->list_head.value_btf_id; + field->list_head.node_offset = offset; + } + if (!n) return -ENOENT; - return info.off; + return 0; } -struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, - const struct btf_type *t) +struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, + u32 field_mask, u32 value_size) { - struct btf_field_info info_arr[BPF_MAP_VALUE_OFF_MAX]; - struct bpf_map_value_off *tab; - struct btf *kernel_btf = NULL; - struct module *mod = NULL; - int ret, i, nr_off; + struct btf_field_info info_arr[BTF_FIELDS_MAX]; + struct btf_record *rec; + u32 next_off = 0; + int ret, i, cnt; - ret = btf_find_field(btf, t, BTF_FIELD_KPTR, info_arr, ARRAY_SIZE(info_arr)); + ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr)); if (ret < 0) return ERR_PTR(ret); if (!ret) return NULL; - nr_off = ret; - tab = kzalloc(offsetof(struct bpf_map_value_off, off[nr_off]), GFP_KERNEL | __GFP_NOWARN); - if (!tab) + cnt = ret; + /* This needs to be kzalloc to zero out padding and unused fields, see + * comment in btf_record_equal. + */ + rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN); + if (!rec) return ERR_PTR(-ENOMEM); - for (i = 0; i < nr_off; i++) { - const struct btf_type *t; - s32 id; + rec->spin_lock_off = -EINVAL; + rec->timer_off = -EINVAL; + for (i = 0; i < cnt; i++) { + if (info_arr[i].off + btf_field_type_size(info_arr[i].type) > value_size) { + WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size); + ret = -EFAULT; + goto end; + } + if (info_arr[i].off < next_off) { + ret = -EEXIST; + goto end; + } + next_off = info_arr[i].off + btf_field_type_size(info_arr[i].type); - /* Find type in map BTF, and use it to look up the matching type - * in vmlinux or module BTFs, by name and kind. - */ - t = btf_type_by_id(btf, info_arr[i].type_id); - id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info), - &kernel_btf); - if (id < 0) { - ret = id; + rec->field_mask |= info_arr[i].type; + rec->fields[i].offset = info_arr[i].off; + rec->fields[i].type = info_arr[i].type; + + switch (info_arr[i].type) { + case BPF_SPIN_LOCK: + WARN_ON_ONCE(rec->spin_lock_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->spin_lock_off = rec->fields[i].offset; + break; + case BPF_TIMER: + WARN_ON_ONCE(rec->timer_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->timer_off = rec->fields[i].offset; + break; + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); + if (ret < 0) + goto end; + break; + case BPF_LIST_HEAD: + ret = btf_parse_list_head(btf, &rec->fields[i], &info_arr[i]); + if (ret < 0) + goto end; + break; + case BPF_LIST_NODE: + break; + default: + ret = -EFAULT; goto end; } + rec->cnt++; + } - /* Find and stash the function pointer for the destruction function that - * needs to be eventually invoked from the map free path. - */ - if (info_arr[i].type == BPF_KPTR_REF) { - const struct btf_type *dtor_func; - const char *dtor_func_name; - unsigned long addr; - s32 dtor_btf_id; - - /* This call also serves as a whitelist of allowed objects that - * can be used as a referenced pointer and be stored in a map at - * the same time. - */ - dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id); - if (dtor_btf_id < 0) { - ret = dtor_btf_id; - goto end_btf; - } + /* bpf_list_head requires bpf_spin_lock */ + if (btf_record_has_field(rec, BPF_LIST_HEAD) && rec->spin_lock_off < 0) { + ret = -EINVAL; + goto end; + } - dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id); - if (!dtor_func) { - ret = -ENOENT; - goto end_btf; - } + return rec; +end: + btf_record_free(rec); + return ERR_PTR(ret); +} - if (btf_is_module(kernel_btf)) { - mod = btf_try_get_module(kernel_btf); - if (!mod) { - ret = -ENXIO; - goto end_btf; - } - } +int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) +{ + int i; - /* We already verified dtor_func to be btf_type_is_func - * in register_btf_id_dtor_kfuncs. - */ - dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off); - addr = kallsyms_lookup_name(dtor_func_name); - if (!addr) { - ret = -EINVAL; - goto end_mod; - } - tab->off[i].kptr.dtor = (void *)addr; - } + /* There are two owning types, kptr_ref and bpf_list_head. The former + * only supports storing kernel types, which can never store references + * to program allocated local types, atleast not yet. Hence we only need + * to ensure that bpf_list_head ownership does not form cycles. + */ + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_LIST_HEAD)) + return 0; + for (i = 0; i < rec->cnt; i++) { + struct btf_struct_meta *meta; + u32 btf_id; + + if (!(rec->fields[i].type & BPF_LIST_HEAD)) + continue; + btf_id = rec->fields[i].list_head.value_btf_id; + meta = btf_find_struct_meta(btf, btf_id); + if (!meta) + return -EFAULT; + rec->fields[i].list_head.value_rec = meta->record; - tab->off[i].offset = info_arr[i].off; - tab->off[i].type = info_arr[i].type; - tab->off[i].kptr.btf_id = id; - tab->off[i].kptr.btf = kernel_btf; - tab->off[i].kptr.module = mod; + if (!(rec->field_mask & BPF_LIST_NODE)) + continue; + + /* We need to ensure ownership acyclicity among all types. The + * proper way to do it would be to topologically sort all BTF + * IDs based on the ownership edges, since there can be multiple + * bpf_list_head in a type. Instead, we use the following + * reasoning: + * + * - A type can only be owned by another type in user BTF if it + * has a bpf_list_node. + * - A type can only _own_ another type in user BTF if it has a + * bpf_list_head. + * + * We ensure that if a type has both bpf_list_head and + * bpf_list_node, its element types cannot be owning types. + * + * To ensure acyclicity: + * + * When A only has bpf_list_head, ownership chain can be: + * A -> B -> C + * Where: + * - B has both bpf_list_head and bpf_list_node. + * - C only has bpf_list_node. + * + * When A has both bpf_list_head and bpf_list_node, some other + * type already owns it in the BTF domain, hence it can not own + * another owning type through any of the bpf_list_head edges. + * A -> B + * Where: + * - B only has bpf_list_node. + */ + if (meta->record->field_mask & BPF_LIST_HEAD) + return -ELOOP; } - tab->nr_off = nr_off; - return tab; -end_mod: - module_put(mod); -end_btf: - btf_put(kernel_btf); -end: - while (i--) { - btf_put(tab->off[i].kptr.btf); - if (tab->off[i].kptr.module) - module_put(tab->off[i].kptr.module); + return 0; +} + +static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv) +{ + const u32 a = *(const u32 *)_a; + const u32 b = *(const u32 *)_b; + + if (a < b) + return -1; + else if (a > b) + return 1; + return 0; +} + +static void btf_field_offs_swap(void *_a, void *_b, int size, const void *priv) +{ + struct btf_field_offs *foffs = (void *)priv; + u32 *off_base = foffs->field_off; + u32 *a = _a, *b = _b; + u8 *sz_a, *sz_b; + + sz_a = foffs->field_sz + (a - off_base); + sz_b = foffs->field_sz + (b - off_base); + + swap(*a, *b); + swap(*sz_a, *sz_b); +} + +struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec) +{ + struct btf_field_offs *foffs; + u32 i, *off; + u8 *sz; + + BUILD_BUG_ON(ARRAY_SIZE(foffs->field_off) != ARRAY_SIZE(foffs->field_sz)); + if (IS_ERR_OR_NULL(rec)) + return NULL; + + foffs = kzalloc(sizeof(*foffs), GFP_KERNEL | __GFP_NOWARN); + if (!foffs) + return ERR_PTR(-ENOMEM); + + off = foffs->field_off; + sz = foffs->field_sz; + for (i = 0; i < rec->cnt; i++) { + off[i] = rec->fields[i].offset; + sz[i] = btf_field_type_size(rec->fields[i].type); } - kfree(tab); - return ERR_PTR(ret); + foffs->cnt = rec->cnt; + + if (foffs->cnt == 1) + return foffs; + sort_r(foffs->field_off, foffs->cnt, sizeof(foffs->field_off[0]), + btf_field_offs_cmp, btf_field_offs_swap, foffs); + return foffs; } static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, @@ -4436,6 +4749,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env, return -EINVAL; } + if (btf_type_is_resolve_source_only(ret_type)) { + btf_verifier_log_type(env, t, "Invalid return type"); + return -EINVAL; + } + if (btf_type_needs_resolve(ret_type) && !env_type_is_resolved(env, ret_type_id)) { err = btf_resolve(env, ret_type, ret_type_id); @@ -4463,7 +4781,6 @@ static int btf_func_proto_check(struct btf_verifier_env *env, nr_args--; } - err = 0; for (i = 0; i < nr_args; i++) { const struct btf_type *arg_type; u32 arg_type_id; @@ -4472,8 +4789,12 @@ static int btf_func_proto_check(struct btf_verifier_env *env, arg_type = btf_type_by_id(btf, arg_type_id); if (!arg_type) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); - err = -EINVAL; - break; + return -EINVAL; + } + + if (btf_type_is_resolve_source_only(arg_type)) { + btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); + return -EINVAL; } if (args[i].name_off && @@ -4481,25 +4802,23 @@ static int btf_func_proto_check(struct btf_verifier_env *env, !btf_name_valid_identifier(btf, args[i].name_off))) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); - err = -EINVAL; - break; + return -EINVAL; } if (btf_type_needs_resolve(arg_type) && !env_type_is_resolved(env, arg_type_id)) { err = btf_resolve(env, arg_type, arg_type_id); if (err) - break; + return err; } if (!btf_type_id_size(btf, &arg_type_id, NULL)) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); - err = -EINVAL; - break; + return -EINVAL; } } - return err; + return 0; } static int btf_func_check(struct btf_verifier_env *env, @@ -4913,6 +5232,119 @@ static int btf_parse_hdr(struct btf_verifier_env *env) return btf_check_sec_info(env, btf_data_size); } +static const char *alloc_obj_fields[] = { + "bpf_spin_lock", + "bpf_list_head", + "bpf_list_node", +}; + +static struct btf_struct_metas * +btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) +{ + union { + struct btf_id_set set; + struct { + u32 _cnt; + u32 _ids[ARRAY_SIZE(alloc_obj_fields)]; + } _arr; + } aof; + struct btf_struct_metas *tab = NULL; + int i, n, id, ret; + + BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0); + BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32)); + + memset(&aof, 0, sizeof(aof)); + for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) { + /* Try to find whether this special type exists in user BTF, and + * if so remember its ID so we can easily find it among members + * of structs that we iterate in the next loop. + */ + id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT); + if (id < 0) + continue; + aof.set.ids[aof.set.cnt++] = id; + } + + if (!aof.set.cnt) + return NULL; + sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL); + + n = btf_nr_types(btf); + for (i = 1; i < n; i++) { + struct btf_struct_metas *new_tab; + const struct btf_member *member; + struct btf_field_offs *foffs; + struct btf_struct_meta *type; + struct btf_record *record; + const struct btf_type *t; + int j, tab_cnt; + + t = btf_type_by_id(btf, i); + if (!t) { + ret = -EINVAL; + goto free; + } + if (!__btf_type_is_struct(t)) + continue; + + cond_resched(); + + for_each_member(j, t, member) { + if (btf_id_set_contains(&aof.set, member->type)) + goto parse; + } + continue; + parse: + tab_cnt = tab ? tab->cnt : 0; + new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]), + GFP_KERNEL | __GFP_NOWARN); + if (!new_tab) { + ret = -ENOMEM; + goto free; + } + if (!tab) + new_tab->cnt = 0; + tab = new_tab; + + type = &tab->types[tab->cnt]; + type->btf_id = i; + record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE, t->size); + /* The record cannot be unset, treat it as an error if so */ + if (IS_ERR_OR_NULL(record)) { + ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; + goto free; + } + foffs = btf_parse_field_offs(record); + /* We need the field_offs to be valid for a valid record, + * either both should be set or both should be unset. + */ + if (IS_ERR_OR_NULL(foffs)) { + btf_record_free(record); + ret = -EFAULT; + goto free; + } + type->record = record; + type->field_offs = foffs; + tab->cnt++; + } + return tab; +free: + btf_struct_metas_free(tab); + return ERR_PTR(ret); +} + +struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id) +{ + struct btf_struct_metas *tab; + + BUILD_BUG_ON(offsetof(struct btf_struct_meta, btf_id) != 0); + tab = btf->struct_meta_tab; + if (!tab) + return NULL; + return bsearch(&btf_id, tab->types, tab->cnt, sizeof(tab->types[0]), btf_id_cmp_func); +} + static int btf_check_type_tags(struct btf_verifier_env *env, struct btf *btf, int start_id) { @@ -4963,6 +5395,7 @@ static int btf_check_type_tags(struct btf_verifier_env *env, static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, u32 log_level, char __user *log_ubuf, u32 log_size) { + struct btf_struct_metas *struct_meta_tab; struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; struct btf *btf = NULL; @@ -5031,15 +5464,34 @@ static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size, if (err) goto errout; + struct_meta_tab = btf_parse_struct_metas(log, btf); + if (IS_ERR(struct_meta_tab)) { + err = PTR_ERR(struct_meta_tab); + goto errout; + } + btf->struct_meta_tab = struct_meta_tab; + + if (struct_meta_tab) { + int i; + + for (i = 0; i < struct_meta_tab->cnt; i++) { + err = btf_check_and_fixup_fields(btf, struct_meta_tab->types[i].record); + if (err < 0) + goto errout_meta; + } + } + if (log->level && bpf_verifier_log_full(log)) { err = -ENOSPC; - goto errout; + goto errout_meta; } btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; +errout_meta: + btf_free_struct_meta_tab(btf); errout: btf_verifier_env_free(env); if (btf) @@ -5081,7 +5533,7 @@ static u8 bpf_ctx_convert_map[] = { #undef BPF_MAP_TYPE #undef BPF_LINK_TYPE -static const struct btf_member * +const struct btf_member * btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg) @@ -5154,6 +5606,26 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, return kern_ctx_type->type; } +int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) +{ + const struct btf_member *kctx_member; + const struct btf_type *conv_struct; + const struct btf_type *kctx_type; + u32 kctx_type_id; + + conv_struct = bpf_ctx_convert.t; + /* get member for kernel ctx type */ + kctx_member = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1; + kctx_type_id = kctx_member->type; + kctx_type = btf_type_by_id(btf_vmlinux, kctx_type_id); + if (!btf_type_is_struct(kctx_type)) { + bpf_log(log, "kern ctx type id %u is not a struct\n", kctx_type_id); + return -EINVAL; + } + + return kctx_type_id; +} + BTF_ID_LIST(bpf_ctx_convert_btf_id) BTF_ID(struct, bpf_ctx_convert) @@ -5351,6 +5823,22 @@ static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, return nr_args + 1; } +static bool prog_args_trusted(const struct bpf_prog *prog) +{ + enum bpf_attach_type atype = prog->expected_attach_type; + + switch (prog->type) { + case BPF_PROG_TYPE_TRACING: + return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER; + case BPF_PROG_TYPE_LSM: + return bpf_lsm_is_trusted(prog); + case BPF_PROG_TYPE_STRUCT_OPS: + return true; + default: + return false; + } +} + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -5494,6 +5982,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, } info->reg_type = PTR_TO_BTF_ID; + if (prog_args_trusted(prog)) + info->reg_type |= PTR_TRUSTED; + if (tgt_prog) { enum bpf_prog_type tgt_type; @@ -5760,6 +6251,9 @@ error: /* check __percpu tag */ if (strcmp(tag_value, "percpu") == 0) tmp_flag = MEM_PERCPU; + /* check __rcu tag */ + if (strcmp(tag_value, "rcu") == 0) + tmp_flag = MEM_RCU; } stype = btf_type_skip_modifiers(btf, mtype->type, &id); @@ -5789,20 +6283,50 @@ error: return -EINVAL; } -int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype __maybe_unused, +int btf_str |
