diff options
Diffstat (limited to 'tools/perf/util')
27 files changed, 853 insertions, 374 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d8fe514c9ec9..9dfae1bda9cc 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -289,6 +289,7 @@ CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_parse-events.o += -Wno-redundant-decls CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE +CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE $(call rule_mkdir) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 5e390a1a79ab..091987dd3966 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -220,6 +220,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) break; case ARM_SPE_DATA_SOURCE: + decoder->record.source = payload; break; case ARM_SPE_BAD: break; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 69b31084d6be..46a61df1145b 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -29,6 +29,17 @@ enum arm_spe_op_type { ARM_SPE_ST = 1 << 1, }; +enum arm_spe_neoverse_data_source { + ARM_SPE_NV_L1D = 0x0, + ARM_SPE_NV_L2 = 0x8, + ARM_SPE_NV_PEER_CORE = 0x9, + ARM_SPE_NV_LOCAL_CLUSTER = 0xa, + ARM_SPE_NV_SYS_CACHE = 0xb, + ARM_SPE_NV_PEER_CLUSTER = 0xc, + ARM_SPE_NV_REMOTE = 0xd, + ARM_SPE_NV_DRAM = 0xe, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; @@ -40,6 +51,7 @@ struct arm_spe_record { u64 virt_addr; u64 phys_addr; u64 context_id; + u16 source; }; struct arm_spe_insn; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index d040406f3314..22dcfe07e886 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -34,6 +34,7 @@ #include "arm-spe-decoder/arm-spe-decoder.h" #include "arm-spe-decoder/arm-spe-pkt-decoder.h" +#include "../../arch/arm64/include/asm/cputype.h" #define MAX_TIMESTAMP (~0ULL) struct arm_spe { @@ -45,6 +46,7 @@ struct arm_spe { struct perf_session *session; struct machine *machine; u32 pmu_type; + u64 midr; struct perf_tsc_conversion tc; @@ -387,35 +389,128 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) +static const struct midr_range neoverse_spe[] = { + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, +}; + +static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { - union perf_mem_data_src data_src = { 0 }; + /* + * Even though four levels of cache hierarchy are possible, no known + * production Neoverse systems currently include more than three levels + * so for the time being we assume three exist. If a production system + * is built with four the this function would have to be changed to + * detect the number of levels for reporting. + */ - if (record->op == ARM_SPE_LD) - data_src.mem_op = PERF_MEM_OP_LOAD; - else if (record->op == ARM_SPE_ST) - data_src.mem_op = PERF_MEM_OP_STORE; - else - return 0; + /* + * We have no data on the hit level or data source for stores in the + * Neoverse SPE records. + */ + if (record->op & ARM_SPE_ST) { + data_src->mem_lvl = PERF_MEM_LVL_NA; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; + data_src->mem_snoop = PERF_MEM_SNOOP_NA; + return; + } + + switch (record->source) { + case ARM_SPE_NV_L1D: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_NV_L2: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_NV_PEER_CORE: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + /* + * We don't know if this is L1, L2 but we do know it was a cache-2-cache + * transfer, so set SNOOPX_PEER + */ + case ARM_SPE_NV_LOCAL_CLUSTER: + case ARM_SPE_NV_PEER_CLUSTER: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + /* + * System cache is assumed to be L3 + */ + case ARM_SPE_NV_SYS_CACHE: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + break; + /* + * We don't know what level it hit in, except it came from the other + * socket + */ + case ARM_SPE_NV_REMOTE: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_NV_DRAM: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} +static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { - data_src.mem_lvl = PERF_MEM_LVL_L3; + data_src->mem_lvl = PERF_MEM_LVL_L3; if (record->type & ARM_SPE_LLC_MISS) - data_src.mem_lvl |= PERF_MEM_LVL_MISS; + data_src->mem_lvl |= PERF_MEM_LVL_MISS; else - data_src.mem_lvl |= PERF_MEM_LVL_HIT; + data_src->mem_lvl |= PERF_MEM_LVL_HIT; } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { - data_src.mem_lvl = PERF_MEM_LVL_L1; + data_src->mem_lvl = PERF_MEM_LVL_L1; if (record->type & ARM_SPE_L1D_MISS) - data_src.mem_lvl |= PERF_MEM_LVL_MISS; + data_src->mem_lvl |= PERF_MEM_LVL_MISS; else - data_src.mem_lvl |= PERF_MEM_LVL_HIT; + data_src->mem_lvl |= PERF_MEM_LVL_HIT; } if (record->type & ARM_SPE_REMOTE_ACCESS) - data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1; + data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; +} + +static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) +{ + union perf_mem_data_src data_src = { 0 }; + bool is_neoverse = is_midr_in_range(midr, neoverse_spe); + + if (record->op == ARM_SPE_LD) + data_src.mem_op = PERF_MEM_OP_LOAD; + else if (record->op == ARM_SPE_ST) + data_src.mem_op = PERF_MEM_OP_STORE; + else + return 0; + + if (is_neoverse) + arm_spe__synth_data_source_neoverse(record, &data_src); + else + arm_spe__synth_data_source_generic(record, &data_src); if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { data_src.mem_dtlb = PERF_MEM_TLB_WK; @@ -436,7 +531,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) u64 data_src; int err; - data_src = arm_spe__synth_data_source(record); + data_src = arm_spe__synth_data_source(record, spe->midr); if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { @@ -1178,6 +1273,8 @@ int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; struct perf_record_time_conv *tc = &session->time_conv; + const char *cpuid = perf_env__cpuid(session->evlist->env); + u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; int err; @@ -1197,6 +1294,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + spe->midr = midr; spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index d2c9b09ddb48..e2052f4fed33 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1879,7 +1879,7 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name) if (asprintf(&event_definition, "bpf-output/no-inherit=1,name=%s/", name) < 0) return ERR_PTR(-ENOMEM); - err = parse_events(evlist, event_definition, NULL); + err = parse_event(evlist, event_definition); free(event_definition); if (err) { diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index f289b7713598..c257813e674e 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -11,11 +11,13 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/cgroup.h" +#include "util/strlist.h" #include <bpf/bpf.h> #include "bpf_skel/off_cpu.skel.h" #define MAX_STACKS 32 +#define MAX_PROC 4096 /* we don't need actual timestamp, just want to put the samples at last */ #define OFF_CPU_TIMESTAMP (~0ull << 32) @@ -78,6 +80,7 @@ static void off_cpu_start(void *arg) u8 val = 1; skel->bss->has_task = 1; + skel->bss->uses_tgid = 1; fd = bpf_map__fd(skel->maps.task_filter); pid = perf_thread_map__pid(evlist->core.threads, 0); bpf_map_update_elem(fd, &pid, &val, BPF_ANY); @@ -124,6 +127,8 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, { int err, fd, i; int ncpus = 1, ntasks = 1, ncgrps = 1; + struct strlist *pid_slist = NULL; + struct str_node *pos; if (off_cpu_config(evlist) < 0) { pr_err("Failed to config off-cpu BPF event\n"); @@ -142,9 +147,34 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); } - if (target__has_task(target)) { + if (target->pid) { + pid_slist = strlist__new(target->pid, NULL); + if (!pid_slist) { + pr_err("Failed to create a strlist for pid\n"); + return -1; + } + + ntasks = 0; + strlist__for_each_entry(pos, pid_slist) { + char *end_ptr; + int pid = strtol(pos->s, &end_ptr, 10); + + if (pid == INT_MIN || pid == INT_MAX || + (*end_ptr != '\0' && *end_ptr != ',')) + continue; + + ntasks++; + } + + if (ntasks < MAX_PROC) + ntasks = MAX_PROC; + + bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + } else if (target__has_task(target)) { ntasks = perf_thread_map__nr(evlist->core.threads); bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + } else if (target__none(target)) { + bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC); } if (evlist__first(evlist)->cgrp) { @@ -184,7 +214,26 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, } } - if (target__has_task(target)) { + if (target->pid) { + u8 val = 1; + + skel->bss->has_task = 1; + skel->bss->uses_tgid = 1; + fd = bpf_map__fd(skel->maps.task_filter); + + strlist__for_each_entry(pos, pid_slist) { + char *end_ptr; + u32 tgid; + int pid = strtol(pos->s, &end_ptr, 10); + + if (pid == INT_MIN || pid == INT_MAX || + (*end_ptr != '\0' && *end_ptr != ',')) + continue; + + tgid = pid; + bpf_map_update_elem(fd, &tgid, &val, BPF_ANY); + } + } else if (target__has_task(target)) { u32 pid; u8 val = 1; diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index cc6d7fd55118..c4ba2bcf179f 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -12,6 +12,9 @@ #define TASK_INTERRUPTIBLE 0x0001 #define TASK_UNINTERRUPTIBLE 0x0002 +/* create a new thread */ +#define CLONE_THREAD 0x10000 + #define MAX_STACKS 32 #define MAX_ENTRIES 102400 @@ -85,6 +88,7 @@ int enabled = 0; int has_cpu = 0; int has_task = 0; int has_cgroup = 0; +int uses_tgid = 0; const volatile bool has_prev_state = false; const volatile bool needs_cgroup = false; @@ -144,7 +148,12 @@ static inline int can_record(struct task_struct *t, int state) if (has_task) { __u8 *ok; - __u32 pid = t->pid; + __u32 pid; + + if (uses_tgid) + pid = t->tgid; + else + pid = t->pid; ok = bpf_map_lookup_elem(&task_filter, &pid); if (!ok) @@ -214,6 +223,33 @@ next: return 0; } +SEC("tp_btf/task_newtask") +int on_newtask(u64 *ctx) +{ + struct task_struct *task; + u64 clone_flags; + u32 pid; + u8 val = 1; + + if (!uses_tgid) + return 0; + + task = (struct task_struct *)bpf_get_current_task(); + + pid = BPF_CORE_READ(task, tgid); + if (!bpf_map_lookup_elem(&task_filter, &pid)) + return 0; + + task = (struct task_struct *)ctx[0]; + clone_flags = ctx[1]; + + pid = task->tgid; + if (!(clone_flags & CLONE_THREAD)) + bpf_map_update_elem(&task_filter, &pid, &val, BPF_NOEXIST); + + return 0; +} + SEC("tp_btf/sched_switch") int on_switch(u64 *ctx) { diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 9e176146eb10..ec18ed5caf3e 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -652,17 +652,21 @@ static char *build_id_cache__find_debug(const char *sbuild_id, nsinfo__mountns_exit(&nsc); #ifdef HAVE_DEBUGINFOD_SUPPORT - if (realname == NULL) { - debuginfod_client* c = debuginfod_begin(); - if (c != NULL) { - int fd = debuginfod_find_debuginfo(c, - (const unsigned char*)sbuild_id, 0, - &realname); - if (fd >= 0) - close(fd); /* retaining reference by realname */ - debuginfod_end(c); - } - } + if (realname == NULL) { + debuginfod_client* c; + + pr_debug("Downloading debug info with build id %s\n", sbuild_id); + + c = debuginfod_begin(); + if (c != NULL) { + int fd = debuginfod_find_debuginfo(c, + (const unsigned char*)sbuild_id, 0, + &realname); + if (fd >= 0) + close(fd); /* retaining reference by realname */ + debuginfod_end(c); + } + } #endif out: diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index 1b0006092265..040ab9d0a803 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -22,7 +22,7 @@ * * The total_period is needed because by default auto-freq is used, so * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get - * the total number of low level events, it is necessary to to sum all struct + * the total number of low level events, it is necessary to sum all struct * perf_record_sample.period and stash the result in total_period. */ struct events_stats { diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index a23255773c60..4e6632203704 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -845,8 +845,13 @@ jit_process(struct perf_session *session, if (jit_detect(filename, pid, nsi)) { nsinfo__put(nsi); - // Strip //anon* mmaps if we processed a jitdump for this pid - if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0)) + /* + * Strip //anon*, [anon:* and /memfd:* mmaps if we processed a jitdump for this pid + */ + if (jit_has_pid(machine, pid) && + ((strncmp(filename, "//anon", 6) == 0) || + (strncmp(filename, "[anon:", 6) == 0) || + (strncmp(filename, "/memfd:", 7) == 0))) return 1; return 0; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index facc13fbf16e..2a16cae28407 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -236,6 +236,7 @@ void machine__exit(struct machine *machine) zfree(&machine->root_dir); zfree(&machine->mmap_name); zfree(&machine->current_tid); + zfree(&machine->kallsyms_filename); for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index c3c21a9c350b..764883183519 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -410,6 +410,11 @@ static const char * const snoop_access[] = { "HitM", }; +static const char * const snoopx_access[] = { + "Fwd", + "Peer", +}; + int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; @@ -430,13 +435,20 @@ int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) } l += scnprintf(out + l, sz - l, snoop_access[i]); } - if (mem_info && - (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) { + + m = 0; + if (mem_info) + m = mem_info->data_src.mem_snoopx; + + for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { strcat(out, " or "); l += 4; } - l += scnprintf(out + l, sz - l, "Fwd"); + l += scnprintf(out + l, sz - l, snoopx_access[i]); } if (*out == '\0') @@ -513,6 +525,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) u64 op = data_src->mem_op; u64 lvl = data_src->mem_lvl; u64 snoop = data_src->mem_snoop; + u64 snoopx = data_src->mem_snoopx; u64 lock = data_src->mem_lock; u64 blk = data_src->mem_blk; /* @@ -532,6 +545,12 @@ do { \ stats->tot_hitm++; \ } while (0) +#define PEER_INC(__f) \ +do { \ + stats->__f++; \ + stats->tot_peer++; \ +} while (0) + #define P(a, b) PERF_MEM_##a##_##b stats->nr_entries++; @@ -555,12 +574,20 @@ do { \ if (lvl & P(LVL, IO)) stats->ld_io++; if (lvl & P(LVL, LFB)) stats->ld_fbhit++; if (lvl & P(LVL, L1 )) stats->ld_l1hit++; - if (lvl & P(LVL, L2 )) stats->ld_l2hit++; + if (lvl & P(LVL, L2)) { + stats->ld_l2hit++; + + if (snoopx & P(SNOOPX, PEER)) + PEER_INC(lcl_peer); + } if (lvl & P(LVL, L3 )) { if (snoop & P(SNOOP, HITM)) HITM_INC(lcl_hitm); else stats->ld_llchit++; + + if (snoopx & P(SNOOPX, PEER)) + PEER_INC(lcl_peer); } if (lvl & P(LVL, LOC_RAM)) { @@ -585,10 +612,14 @@ do { \ if ((lvl & P(LVL, REM_CCE1)) || (lvl & P(LVL, REM_CCE2)) || mrem) { - if (snoop & P(SNOOP, HIT)) + if (snoop & P(SNOOP, HIT)) { stats->rmt_hit++; - else if (snoop & P(SNOOP, HITM)) + } else if (snoop & P(SNOOP, HITM)) { HITM_INC(rmt_hitm); + } else if (snoopx & P(SNOOPX, PEER)) { + stats->rmt_hit++; + PEER_INC(rmt_peer); + } } if ((lvl & P(LVL, MISS))) @@ -652,6 +683,9 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) stats->lcl_hitm += add->lcl_hitm; stats->rmt_hitm += add->rmt_hitm; stats->tot_hitm += add->tot_hitm; + stats->lcl_peer += add->lcl_peer; + stats->rmt_peer += add->rmt_peer; + stats->tot_peer += add->tot_peer; stats->rmt_hit += add->rmt_hit; stats->lcl_dram += add->lcl_dram; stats->rmt_dram += add->rmt_dram; diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 8a8b568baeee..12372309d60e 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -78,6 +78,9 @@ struct c2c_stats { u32 lcl_hitm; /* count of loads with local HITM */ u32 rmt_hitm; /* count of loads with remote HITM */ u32 tot_hitm; /* count of loads with local and remote HITM */ + u32 lcl_peer; /* count of loads with local peer cache */ + u32 rmt_peer; /* count of loads with remote peer cache */ + u32 tot_peer; /* count of loads with local and remote peer cache */ u32 rmt_hit; /* count of loads with remote hit clean; */ u32 lcl_dram; /* count of loads miss to local DRAM */ u32 rmt_dram; /* count of loads miss to remote DRAM */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8f7baeabc5cf..464475fd6b9a 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -502,14 +502,14 @@ struct metricgroup_print_sys_idata { bool details; }; -typedef int (*metricgroup_sys_event_iter_fn)(const struct pmu_event *pe, void *); - struct metricgroup_iter_data { - metricgroup_sys_event_iter_fn fn; + pmu_event_iter_fn fn; void *data; }; -static int metricgroup__sys_event_iter(const struct pmu_event *pe, void *data) +static int metricgroup__sys_event_iter(const struct pmu_event *pe, + const struct pmu_events_table *table, + void *data) { struct metricgroup_iter_data *d = data; struct perf_pmu *pmu = NULL; @@ -522,13 +522,15 @@ static int metricgroup__sys_event_iter(const struct pmu_event *pe, void *data) if (!pmu->id || strcmp(pmu->id, pe->compat)) continue; - return d->fn(pe, d->data); + return d->fn(pe, table, d->data); } return 0; } -static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *data) +static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, + const struct pmu_events_table *table __maybe_unused, + void *data) { struct metricgroup_print_sys_idata *d = data; @@ -536,15 +538,40 @@ static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *d d->details, d->groups, d->metriclist); } +struct metricgroup_print_data { + const char *pmu_name; + struct strlist *metriclist; + char *filter; + struct rblist *groups; + bool metricgroups; + bool raw; + bool details; +}; + +static int metricgroup__print_callback(const struct pmu_event *pe, + const struct pmu_events_table *table __maybe_unused, + void *vdata) +{ + struct metricgroup_print_data *data = vdata; + + if (!pe->metric_expr) + return 0; + + if (data->pmu_name && perf_pmu__is_hybrid(pe->pmu) && strcmp(data->pmu_name, pe->pmu)) + return 0; + + return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter, + data->raw, data->details, data->groups, + data->metriclist); +} + void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw, bool details, const char *pmu_name) { - const struct pmu_events_map *map = pmu_events_map__find(); - const struct pmu_event *pe; - int i; struct rblist groups; struct rb_node *node, *next; struct strlist *metriclist = NULL; + const struct pmu_events_table *table; if (!metricgroups) { metriclist = strlist__new(NULL, NULL); @@ -556,23 +583,22 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, groups.node_new = mep_new; groups.node_cmp = mep_cmp; groups.node_delete = mep_delete; - for (i = 0; map; i++) { - pe = &map->table[i]; + table = pmu_events_table__find(); + if (table) { + struct metricgroup_print_data data = { + .pmu_name = pmu_name, + .metriclist = metriclist, + .metricgroups = metricgroups, + .filter = filter, + .raw = raw, + .details = details, + .groups = &groups, + }; - if (!pe->name && !pe->metric_group && !pe->metric_name) - break; - if (!pe->metric_expr) - continue; - if (pmu_name && perf_pmu__is_hybrid(pe->pmu) && - strcmp(pmu_name, pe->pmu)) { - continue; - } - if (metricgroup__print_pmu_event(pe, metricgroups, filter, - raw, details, &groups, - metriclist) < 0) - return; + pmu_events_table_for_each_event(table, + metricgroup__print_callback, + &data); } - { struct metricgroup_iter_data data = { .fn = metricgroup__print_sys_event_iter, @@ -850,16 +876,20 @@ struct metricgroup_add_iter_data { bool metric_no_group; struct metric *root_metric; const struct visited_metric *visited; - const struct pmu_events_map *map; + const struct pmu_events_table *table; }; +static bool metricgroup__find_metric(const char *metric, + const struct pmu_events_table *table, + struct pmu_event *pe); + static int add_metric(struct list_head *metric_list, const struct pmu_event *pe, const char *modifier, bool metric_no_group, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_map *map); + const struct pmu_events_table *table); /** * resolve_metric - Locate metrics within the root metric and recursively add @@ -874,7 +904,7 @@ static int add_metric(struct list_head *metric_list, * metrics. When adding a root this argument is NULL. * @visited: A singly linked list of metric names being added that is used to * detect recursion. - * @map: The map that is searched for metrics, most commonly the table for the + * @table: The table that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ static int resolve_metric(struct list_head *metric_list, @@ -882,13 +912,13 @@ static int resolve_metric(struct list_head *metric_list, bool metric_no_group, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_map *map) + const struct pmu_events_table *table) { struct hashmap_entry *cur; size_t bkt; struct to_resolve { /* The metric to resolve. */ - const struct pmu_event *pe; + struct pmu_event pe; /* * The key in the IDs map, this may differ from in case, * etc. from pe->metric_name. @@ -902,16 +932,15 @@ static int resolve_metric(struct list_head *metric_list, * the pending array. */ hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { - const struct pmu_event *pe; + struct pmu_event pe; - pe = metricgroup__find_metric(cur->key, map); - if (pe) { + if (metricgroup__find_metric(cur->key, table, &pe)) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) return -ENOMEM; - pending[pending_cnt].pe = pe; + memcpy(&pending[pending_cnt].pe, &pe, sizeof(pe)); pending[pending_cnt].key = cur->key; pending_cnt++; } @@ -926,8 +955,8 @@ static int resolve_metric(struct list_head *metric_list, * context. */ for (i = 0; i < pending_cnt; i++) { - ret = add_metric(metric_list, pending[i].pe, modifier, metric_no_group, - root_metric, visited, map); + ret = add_metric(metric_list, &pending[i].pe, modifier, metric_no_group, + root_metric, visited, table); if (ret) break; } @@ -950,7 +979,7 @@ static int resolve_metric(struct list_head *metric_list, * metrics. When adding a root this argument is NULL. * @visited: A singly linke |
