diff options
Diffstat (limited to 'tools/lib/bpf/libbpf.c')
| -rw-r--r-- | tools/lib/bpf/libbpf.c | 2222 |
1 files changed, 704 insertions, 1518 deletions
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e89cc9c885b3..50d41815f431 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -31,7 +31,6 @@ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/filter.h> -#include <linux/list.h> #include <linux/limits.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> @@ -72,6 +71,135 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static const char * const attach_type_name[] = { + [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", + [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", + [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", + [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", + [BPF_CGROUP_DEVICE] = "cgroup_device", + [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", + [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", + [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", + [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", + [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", + [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", + [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", + [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", + [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", + [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", + [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", + [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", + [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", + [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", + [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", + [BPF_LIRC_MODE2] = "lirc_mode2", + [BPF_FLOW_DISSECTOR] = "flow_dissector", + [BPF_TRACE_RAW_TP] = "trace_raw_tp", + [BPF_TRACE_FENTRY] = "trace_fentry", + [BPF_TRACE_FEXIT] = "trace_fexit", + [BPF_MODIFY_RETURN] = "modify_return", + [BPF_LSM_MAC] = "lsm_mac", + [BPF_LSM_CGROUP] = "lsm_cgroup", + [BPF_SK_LOOKUP] = "sk_lookup", + [BPF_TRACE_ITER] = "trace_iter", + [BPF_XDP_DEVMAP] = "xdp_devmap", + [BPF_XDP_CPUMAP] = "xdp_cpumap", + [BPF_XDP] = "xdp", + [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", + [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", + [BPF_PERF_EVENT] = "perf_event", + [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", +}; + +static const char * const link_type_name[] = { + [BPF_LINK_TYPE_UNSPEC] = "unspec", + [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_LINK_TYPE_TRACING] = "tracing", + [BPF_LINK_TYPE_CGROUP] = "cgroup", + [BPF_LINK_TYPE_ITER] = "iter", + [BPF_LINK_TYPE_NETNS] = "netns", + [BPF_LINK_TYPE_XDP] = "xdp", + [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", + [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", + [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", +}; + +static const char * const map_type_name[] = { + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_XSKMAP] = "xskmap", + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", + [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", + [BPF_MAP_TYPE_QUEUE] = "queue", + [BPF_MAP_TYPE_STACK] = "stack", + [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", + [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_MAP_TYPE_RINGBUF] = "ringbuf", + [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", + [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", + [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", +}; + +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", + [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", + [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", + [BPF_PROG_TYPE_TRACING] = "tracing", + [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_PROG_TYPE_EXT] = "ext", + [BPF_PROG_TYPE_LSM] = "lsm", + [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", + [BPF_PROG_TYPE_SYSCALL] = "syscall", +}; + static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { @@ -151,12 +279,9 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } -/* this goes away in libbpf 1.0 */ -enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE; - int libbpf_set_strict_mode(enum libbpf_strict_mode mode) { - libbpf_mode = mode; + /* as of v1.0 libbpf_set_strict_mode() is a no-op */ return 0; } @@ -219,12 +344,8 @@ enum sec_def_flags { SEC_ATTACH_BTF = 4, /* BPF program type allows sleeping/blocking in kernel */ SEC_SLEEPABLE = 8, - /* allow non-strict prefix matching */ - SEC_SLOPPY_PFX = 16, /* BPF program support non-linear XDP buffer */ - SEC_XDP_FRAGS = 32, - /* deprecated sec definitions not supposed to be used */ - SEC_DEPRECATED = 64, + SEC_XDP_FRAGS = 16, }; struct bpf_sec_def { @@ -244,9 +365,10 @@ struct bpf_sec_def { * linux/filter.h. */ struct bpf_program { - const struct bpf_sec_def *sec_def; + char *name; char *sec_name; size_t sec_idx; + const struct bpf_sec_def *sec_def; /* this program's instruction offset (in number of instructions) * within its containing ELF section */ @@ -266,12 +388,6 @@ struct bpf_program { */ size_t sub_insn_off; - char *name; - /* name with / replaced by _; makes recursive pinning - * in bpf_object__pin_programs easier - */ - char *pin_name; - /* instructions that belong to BPF program; insns[0] is located at * sec_insn_off instruction within its ELF section in ELF file, so * when mapping ELF file instruction index to the local instruction, @@ -292,24 +408,19 @@ struct bpf_program { size_t log_size; __u32 log_level; - struct { - int nr; - int *fds; - } instances; - bpf_program_prep_t preprocessor; - struct bpf_object *obj; - void *priv; - bpf_program_clear_priv_t clear_priv; + int fd; bool autoload; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; + int prog_ifindex; __u32 attach_btf_obj_fd; __u32 attach_btf_id; __u32 attach_prog_fd; + void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@ -356,6 +467,14 @@ enum libbpf_map_type { LIBBPF_MAP_KCONFIG, }; +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + struct bpf_map { struct bpf_object *obj; char *name; @@ -376,8 +495,6 @@ struct bpf_map { __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 btf_vmlinux_value_type_id; - void *priv; - bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; void *mmaped; struct bpf_struct_ops *st_ops; @@ -440,8 +557,6 @@ struct extern_desc { }; }; -static LIST_HEAD(bpf_objects_list); - struct module_btf { struct btf *btf; char *name; @@ -510,12 +625,6 @@ struct bpf_object { /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ struct elf_state efile; - /* - * All loaded bpf_object are linked in a list, which is - * hidden to caller. bpf_objects__<func> handlers deal with - * all objects. - */ - struct list_head list; struct btf *btf; struct btf_ext *btf_ext; @@ -541,9 +650,6 @@ struct bpf_object { size_t log_size; __u32 log_level; - void *priv; - bpf_object_clear_priv_t clear_priv; - int *fd_array; size_t fd_array_cap; size_t fd_array_cnt; @@ -565,25 +671,10 @@ static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); void bpf_program__unload(struct bpf_program *prog) { - int i; - if (!prog) return; - /* - * If the object is opened but the program was never loaded, - * it is possible that prog->instances.nr == -1. - */ - if (prog->instances.nr > 0) { - for (i = 0; i < prog->instances.nr; i++) - zclose(prog->instances.fds[i]); - } else if (prog->instances.nr != -1) { - pr_warn("Internal error: instances.nr is %d\n", - prog->instances.nr); - } - - prog->instances.nr = -1; - zfree(&prog->instances.fds); + zclose(prog->fd); zfree(&prog->func_info); zfree(&prog->line_info); @@ -594,16 +685,9 @@ static void bpf_program__exit(struct bpf_program *prog) if (!prog) return; - if (prog->clear_priv) - prog->clear_priv(prog, prog->priv); - - prog->priv = NULL; - prog->clear_priv = NULL; - bpf_program__unload(prog); zfree(&prog->name); zfree(&prog->sec_name); - zfree(&prog->pin_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -612,26 +696,6 @@ static void bpf_program__exit(struct bpf_program *prog) prog->sec_idx = -1; } -static char *__bpf_program__pin_name(struct bpf_program *prog) -{ - char *name, *p; - - if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) - name = strdup(prog->name); - else - name = strdup(prog->sec_name); - - if (!name) - return NULL; - - p = name; - - while ((p = strchr(p, '/'))) - *p = '_'; - - return name; -} - static bool insn_is_subprog_call(const struct bpf_insn *insn) { return BPF_CLASS(insn->code) == BPF_JMP && @@ -673,6 +737,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->insns_cnt = prog->sec_insn_cnt; prog->type = BPF_PROG_TYPE_UNSPEC; + prog->fd = -1; /* libbpf's convention for SEC("?abc...") is that it's just like * SEC("abc...") but the corresponding bpf_program starts out with @@ -686,9 +751,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->autoload = true; } - prog->instances.fds = NULL; - prog->instances.nr = -1; - /* inherit object's log_level */ prog->log_level = obj->log_level; @@ -700,10 +762,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, if (!prog->name) goto errout; - prog->pin_name = __bpf_program__pin_name(prog); - if (!prog->pin_name) - goto errout; - prog->insns = malloc(insn_data_sz); if (!prog->insns) goto errout; @@ -1185,7 +1243,6 @@ static struct bpf_object *bpf_object__new(const char *path, size_t obj_buf_sz, const char *obj_name) { - bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); struct bpf_object *obj; char *end; @@ -1223,9 +1280,6 @@ static struct bpf_object *bpf_object__new(const char *path, obj->kern_version = get_kernel_version(); obj->loaded = false; - INIT_LIST_HEAD(&obj->list); - if (!strict) - list_add(&obj->list, &bpf_objects_list); return obj; } @@ -1258,10 +1312,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) } if (obj->efile.obj_buf_sz > 0) { - /* - * obj_buf should have been validated by - * bpf_object__open_buffer(). - */ + /* obj_buf should have been validated by bpf_object__open_mem(). */ elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); @@ -1643,7 +1694,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, switch (ext->kcfg.type) { case KCFG_BOOL: if (value == 'm') { - pr_warn("extern (kcfg) %s=%c should be tristate or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", ext->name, value); return -EINVAL; } @@ -1664,7 +1715,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, case KCFG_INT: case KCFG_CHAR_ARR: default: - pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", ext->name, value); return -EINVAL; } @@ -1678,7 +1729,8 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, size_t len; if (ext->kcfg.type != KCFG_CHAR_ARR) { - pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", + ext->name, value); return -EINVAL; } @@ -1692,7 +1744,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, /* strip quotes */ len -= 2; if (len >= ext->kcfg.sz) { - pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", ext->name, value, len, ext->kcfg.sz - 1); len = ext->kcfg.sz - 1; } @@ -1749,13 +1801,20 @@ static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, __u64 value) { - if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { - pr_warn("extern (kcfg) %s=%llu should be integer\n", + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && + ext->kcfg.type != KCFG_BOOL) { + pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", ext->name, (unsigned long long)value); return -EINVAL; } + if (ext->kcfg.type == KCFG_BOOL && value > 1) { + pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", + ext->name, (unsigned long long)value); + return -EINVAL; + + } if (!is_kcfg_value_in_range(ext, value)) { - pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n", + pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", ext->name, (unsigned long long)value, ext->kcfg.sz); return -ERANGE; } @@ -1819,16 +1878,19 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, /* assume integer */ err = parse_u64(value, &num); if (err) { - pr_warn("extern (kcfg) %s=%s should be integer\n", - ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); return err; } + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { + pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); + return -EINVAL; + } err = set_kcfg_value_num(ext, ext_val, num); break; } if (err) return err; - pr_debug("extern (kcfg) %s=%s\n", ext->name, value); + pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); return 0; } @@ -1924,143 +1986,6 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj) return 0; } -static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) -{ - Elf_Data *symbols = obj->efile.symbols; - int i, map_def_sz = 0, nr_maps = 0, nr_syms; - Elf_Data *data = NULL; - Elf_Scn *scn; - - if (obj->efile.maps_shndx < 0) - return 0; - - if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) { - pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n"); - return -EOPNOTSUPP; - } - - if (!symbols) - return -EINVAL; - - scn = elf_sec_by_idx(obj, obj->efile.maps_shndx); - data = elf_sec_data(obj, scn); - if (!scn || !data) { - pr_warn("elf: failed to get legacy map definitions for %s\n", - obj->path); - return -EINVAL; - } - - /* - * Count number of maps. Each map has a name. - * Array of maps is not supported: only the first element is - * considered. - * - * TODO: Detect array of map and report error. - */ - nr_syms = symbols->d_size / sizeof(Elf64_Sym); - for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); - - if (sym->st_shndx != obj->efile.maps_shndx) - continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) - continue; - nr_maps++; - } - /* Assume equally sized map definitions */ - pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n", - nr_maps, data->d_size, obj->path); - - if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { - pr_warn("elf: unable to determine legacy map definition size in %s\n", - obj->path); - return -EINVAL; - } - map_def_sz = data->d_size / nr_maps; - - /* Fill obj->maps using data in "maps" section. */ - for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); - const char *map_name; - struct bpf_map_def *def; - struct bpf_map *map; - - if (sym->st_shndx != obj->efile.maps_shndx) - continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) - continue; - - map = bpf_object__add_map(obj); - if (IS_ERR(map)) - return PTR_ERR(map); - - map_name = elf_sym_str(obj, sym->st_name); - if (!map_name) { - pr_warn("failed to get map #%d name sym string for obj %s\n", - i, obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - - pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name); - - if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { - pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); - return -ENOTSUP; - } - - map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->sec_idx = sym->st_shndx; - map->sec_offset = sym->st_value; - pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", - map_name, map->sec_idx, map->sec_offset); - if (sym->st_value + map_def_sz > data->d_size) { - pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", - obj->path, map_name); - return -EINVAL; - } - - map->name = strdup(map_name); - if (!map->name) { - pr_warn("map '%s': failed to alloc map name\n", map_name); - return -ENOMEM; - } - pr_debug("map %d is \"%s\"\n", i, map->name); - def = (struct bpf_map_def *)(data->d_buf + sym->st_value); - /* - * If the definition of the map in the object file fits in - * bpf_map_def, copy it. Any extra fields in our version - * of bpf_map_def will default to zero as a result of the - * calloc above. - */ - if (map_def_sz <= sizeof(struct bpf_map_def)) { - memcpy(&map->def, def, map_def_sz); - } else { - /* - * Here the map structure being read is bigger than what - * we expect, truncate if the excess bits are all zero. - * If they are not zero, reject this map as - * incompatible. - */ - char *b; - - for (b = ((char *)def) + sizeof(struct bpf_map_def); - b < ((char *)def) + map_def_sz; b++) { - if (*b != 0) { - pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n", - obj->path, map_name); - if (strict) - return -EINVAL; - } - } - memcpy(&map->def, def, sizeof(struct bpf_map_def)); - } - - /* btf info may not exist but fill it in if it does exist */ - (void) bpf_map_find_btf_info(obj, map); - } - return 0; -} - const struct btf_type * skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) { @@ -2114,6 +2039,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_FLOAT: return "float"; case BTF_KIND_DECL_TAG: return "decl_tag"; case BTF_KIND_TYPE_TAG: return "type_tag"; + case BTF_KIND_ENUM64: return "enum64"; default: return "unknown"; } } @@ -2177,6 +2103,13 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) return bpf_map__set_pin_path(map, buf); } +/* should match definition in bpf_helpers.h */ +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + int parse_btf_map_def(const char *map_name, struct btf *btf, const struct btf_type *def_t, bool strict, struct btf_map_def *map_def, struct btf_map_def *inner_def) @@ -2398,6 +2331,37 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, return 0; } +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) { map->def.type = def->map_type; @@ -2411,6 +2375,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->btf_key_type_id = def->key_type_id; map->btf_value_type_id = def->value_type_id; + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map->def.type == BPF_MAP_TYPE_RINGBUF) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + if (def->parts & MAP_DEF_MAP_TYPE) pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); @@ -2609,12 +2577,11 @@ static int bpf_object__init_maps(struct bpf_object *obj, { const char *pin_root_path; bool strict; - int err; + int err = 0; strict = !OPTS_GET(opts, relaxed_maps, false); pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - err = bpf_object__init_user_maps(obj, strict); err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); err = err ?: bpf_object__init_global_data_maps(obj); err = err ?: bpf_object__init_kconfig_map(obj); @@ -2642,12 +2609,13 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag; + !has_decl_tag || !has_type_tag || !has_enum64; } -static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) +static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); @@ -2655,6 +2623,8 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + int enum64_placeholder_id = 0; struct btf_type *t; int i, j, vlen; @@ -2717,8 +2687,32 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) /* replace TYPE_TAG with a CONST */ t->name_off = 0; t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); - } + } else if (!has_enum64 && btf_is_enum(t)) { + /* clear the kflag */ + t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); + } else if (!has_enum64 && btf_is_enum64(t)) { + /* replace ENUM64 with a union */ + struct btf_member *m; + + if (enum64_placeholder_id == 0) { + enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); + if (enum64_placeholder_id < 0) + return enum64_placeholder_id; + + t = (struct btf_type *)btf__type_by_id(btf, i); + } + + m = btf_members(t); + vlen = btf_vlen(t); + t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); + for (j = 0; j < vlen; j++, m++) { + m->type = enum64_placeholder_id; + m->offset = 0; + } + } } + + return 0; } static bool libbpf_needs_btf(const struct bpf_object *obj) @@ -2905,11 +2899,6 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) return libbpf_err(err); } -int btf__finalize_data(struct bpf_object *obj, struct btf *btf) -{ - return btf_finalize_data(obj, btf); -} - static int bpf_object__finalize_btf(struct bpf_object *obj) { int err; @@ -3056,7 +3045,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) /* enforce 8-byte pointers for BPF-targeted BTFs */ btf__set_pointer_size(obj->btf, 8); - bpf_object__sanitize_btf(obj, kern_btf); + err = bpf_object__sanitize_btf(obj, kern_btf); + if (err) + return err; } if (obj->gen_loader) { @@ -3563,6 +3554,10 @@ static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, if (strcmp(name, "libbpf_tristate")) return KCFG_UNKNOWN; return KCFG_TRISTATE; + case BTF_KIND_ENUM64: + if (strcmp(name, "libbpf_tristate")) + return KCFG_UNKNOWN; + return KCFG_TRISTATE; case BTF_KIND_ARRAY: if (btf_array(t)->nelems == 0) return KCFG_UNKNOWN; @@ -3738,7 +3733,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) ext->kcfg.type = find_kcfg_type(obj->btf, t->type, &ext->kcfg.is_signed); if (ext->kcfg.type == KCFG_UNKNOWN) { - pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); + pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { @@ -3860,41 +3855,8 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return 0; } -struct bpf_program * -bpf_object__find_program_by_title(const struct bpf_object *obj, - const char *title) +static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - struct bpf_program *pos; - - bpf_object__for_each_program(pos, obj) { - if (pos->sec_name && !strcmp(pos->sec_name, title)) - return pos; - } - return errno = ENOENT, NULL; -} - -static bool prog_is_subprog(const struct bpf_object *obj, - const struct bpf_program *prog) -{ - /* For legacy reasons, libbpf supports an entry-point BPF programs - * without SEC() attribute, i.e., those in the .text section. But if - * there are 2 or more such programs in the .text section, they all - * must be subprograms called from entry-point BPF programs in - * designated SEC()'tions, otherwise there is no way to distinguish - * which of those programs should be loaded vs which are a subprogram. - * Similarly, if there is a function/program in .text and at least one - * other BPF program with custom SEC() attribute, then we just assume - * .text programs are subprograms (even if they are not called from - * other programs), because libbpf never explicitly supported mixing - * SEC()-designated BPF programs and .text entry-point BPF programs. - * - * In libbpf 1.0 strict mode, we always consider .text - * programs to be subprograms. - */ - - if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) - return prog->sec_idx == obj->efile.text_shndx; - return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; } @@ -4235,9 +4197,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) { - struct bpf_map_def *def = &map->def; - __u32 key_type_id = 0, value_type_id = 0; - int ret; + int id; if (!obj->btf) return -ENOENT; @@ -4246,31 +4206,22 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) * For struct_ops map, it does not need btf_key_type_id and * btf_value_type_id. */ - if (map->sec_idx == obj->efile.btf_maps_shndx || - bpf_map__is_struct_ops(map)) + if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) return 0; - if (!bpf_map__is_internal(map)) { - pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n"); -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, - def->value_size, &key_type_id, - &value_type_id); -#pragma GCC diagnostic pop - } else { - /* - * LLVM annotates global data differently in BTF, that is, - * only as '.data', '.bss' or '.rodata'. - */ - ret = btf__find_by_name(obj->btf, map->real_name); - } - if (ret < 0) - return ret; + /* + * LLVM annotates global data differently in BTF, that is, + * only as '.data', '.bss' or '.rodata'. + */ + if (!bpf_map__is_internal(map)) + return -ENOENT; - map->btf_key_type_id = key_type_id; - map->btf_value_type_id = bpf_map__is_internal(map) ? - ret : value_type_id; + id = btf__find_by_name(obj->btf, map->real_name); + if (id < 0) + return id; + + map->btf_key_type_id = 0; + map->btf_value_type_id = id; return 0; } @@ -4327,7 +4278,7 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; - __u32 len = sizeof(info); + __u32 len = sizeof(info), name_len; int new_fd, err; char *new_name; @@ -4337,7 +4288,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) if (err) return libbpf_err(err); - new_name = strdup(info.name); + name_len = strlen(info.name); + if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) + new_name = strdup(map->name); + else + new_name = strdup(info.name); + if (!new_name) return libbpf_err(-errno); @@ -4396,18 +4352,16 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { - if (map->fd >= 0) + if (map->obj->loaded) return libbpf_err(-EBUSY); + map->def.max_entries = max_entries; - return 0; -} -int bpf_map__resize(struct bpf_map *map, __u32 max_entries) -{ - if (!map || !max_entries) - return libbpf_err(-EINVAL); + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map->def.type == BPF_MAP_TYPE_RINGBUF) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); - return bpf_map__set_max_entries(map, max_entries); + return 0; } static int @@ -4746,6 +4700,19 @@ static int probe_kern_bpf_cookie(void) return probe_fd(ret); } +static int probe_kern_btf_enum64(void) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_syscall_wrapper(void); + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4811,6 +4778,12 @@ static struct kern_feature_desc { [FEAT_BPF_COOKIE] = { "BPF cookie support", probe_kern_bpf_cookie, }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -4943,42 +4916,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); -static bool is_pow_of_2(size_t x) -{ - return x && (x & (x - 1)); -} - -static size_t adjust_ringbuf_sz(size_t sz) -{ - __u32 page_sz = sysconf(_SC_PAGE_SIZE); - __u32 mul; - - /* if user forgot to set any size, make sure they see error */ - if (sz == 0) - return 0; - /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be - * a power-of-2 multiple of kernel's page size. If user diligently - * satisified these conditions, pass the size through. - */ - if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) - return sz; - - /* Otherwise find closest (page_sz * power_of_2) product bigger than - * user-set size to satisfy both user size request and kernel - * requirements and substitute correct max_entries for map creation. - */ - for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { - if (mul * page_sz > sz) - return mul * page_sz; - } - - /* if it's impossible to satisfy the conditions (i.e., user size is - * very close to UINT_MAX but is not a power-of-2 multiple of - * page_size) then just return original size and let kernel reject it - */ - return sz; -} - static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_in |
