summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tools/arch/arm64/include/uapi/asm/perf_regs.h7
-rw-r--r--tools/arch/x86/include/asm/msr-index.h19
-rw-r--r--tools/build/Makefile.feature4
-rw-r--r--tools/build/feature/Makefile20
-rw-r--r--tools/build/feature/test-libbpf-bpf_map_create.c8
-rw-r--r--tools/build/feature/test-libbpf-bpf_object__next_map.c8
-rw-r--r--tools/build/feature/test-libbpf-bpf_object__next_program.c8
-rw-r--r--tools/build/feature/test-libbpf-bpf_prog_load.c9
-rw-r--r--tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c5
-rw-r--r--tools/build/feature/test-libbpf-btf__raw_data.c8
-rw-r--r--tools/lib/perf/evlist.c71
-rw-r--r--tools/lib/perf/include/internal/evsel.h11
-rw-r--r--tools/perf/Documentation/perf-record.txt10
-rw-r--r--tools/perf/Makefile.config25
-rw-r--r--tools/perf/Makefile.perf1
-rw-r--r--tools/perf/arch/arm64/util/perf_regs.c38
-rw-r--r--tools/perf/arch/arm64/util/unwind-libunwind.c73
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c31
-rw-r--r--tools/perf/builtin-c2c.c6
-rw-r--r--tools/perf/builtin-record.c64
-rw-r--r--tools/perf/builtin-stat.c5
-rw-r--r--tools/perf/pmu-events/jevents.c2
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py272
-rw-r--r--tools/perf/tests/shell/lib/perf_csv_output_lint.py48
-rwxr-xr-xtools/perf/tests/shell/record_offcpu.sh60
-rwxr-xr-xtools/perf/tests/shell/stat+csv_output.sh147
-rwxr-xr-xtools/perf/tests/shell/test_intel_pt.sh71
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/auxtrace.c15
-rw-r--r--tools/perf/util/auxtrace.h13
-rw-r--r--tools/perf/util/bpf-event.c24
-rw-r--r--tools/perf/util/bpf_counter.c6
-rw-r--r--tools/perf/util/bpf_off_cpu.c338
-rw-r--r--tools/perf/util/bpf_skel/off_cpu.bpf.c229
-rw-r--r--tools/perf/util/evlist.c61
-rw-r--r--tools/perf/util/evlist.h5
-rw-r--r--tools/perf/util/evsel.c7
-rw-r--r--tools/perf/util/libunwind/arm64.c2
-rw-r--r--tools/perf/util/mmap.c4
-rw-r--r--tools/perf/util/off_cpu.h29
-rw-r--r--tools/perf/util/parse-events.c2
-rw-r--r--tools/perf/util/perf_regs.c2
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c21
44 files changed, 1594 insertions, 197 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/perf_regs.h b/tools/arch/arm64/include/uapi/asm/perf_regs.h
index d54daafa89e3..fd157f46727e 100644
--- a/tools/arch/arm64/include/uapi/asm/perf_regs.h
+++ b/tools/arch/arm64/include/uapi/asm/perf_regs.h
@@ -36,6 +36,11 @@ enum perf_event_arm_regs {
PERF_REG_ARM64_LR,
PERF_REG_ARM64_SP,
PERF_REG_ARM64_PC,
- PERF_REG_ARM64_MAX,
+
+ /* Extended/pseudo registers */
+ PERF_REG_ARM64_VG = 46, // SVE Vector Granule
+
+ PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1,
+ PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1
};
#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index ee15311b6be1..403e83b4adc8 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -76,6 +76,8 @@
/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
#define MSR_IA32_CORE_CAPS 0x000000cf
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT)
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
@@ -154,6 +156,11 @@
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_POWER_CTL_BIT_EE 19
+/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
+#define MSR_INTEGRITY_CAPS 0x000002d9
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
@@ -312,6 +319,7 @@
/* Run Time Average Power Limiting (RAPL) Interface */
+#define MSR_VR_CURRENT_CONFIG 0x00000601
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
@@ -502,8 +510,10 @@
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
@@ -524,6 +534,11 @@
#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+/* AMD Performance Counter Global Status and Control MSRs */
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
+#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
@@ -688,6 +703,10 @@
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
+/* AMD Branch Sampling configuration */
+#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
+#define MSR_AMD_SAMP_BR_FROM 0xc0010300
+
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c6a48d0ef9ff..888a0421d43b 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -99,6 +99,10 @@ FEATURE_TESTS_EXTRA := \
clang \
libbpf \
libbpf-btf__load_from_kernel_by_id \
+ libbpf-bpf_prog_load \
+ libbpf-bpf_object__next_program \
+ libbpf-bpf_object__next_map \
+ libbpf-bpf_create_map \
libpfm4 \
libdebuginfod \
clang-bpf-co-re
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index cb4a2a4fa2e4..7c2a17e23c30 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -58,6 +58,11 @@ FILES= \
test-bpf.bin \
test-libbpf.bin \
test-libbpf-btf__load_from_kernel_by_id.bin \
+ test-libbpf-bpf_prog_load.bin \
+ test-libbpf-bpf_map_create.bin \
+ test-libbpf-bpf_object__next_program.bin \
+ test-libbpf-bpf_object__next_map.bin \
+ test-libbpf-btf__raw_data.bin \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
@@ -291,6 +296,21 @@ $(OUTPUT)test-libbpf.bin:
$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin:
$(BUILD) -lbpf
+$(OUTPUT)test-libbpf-bpf_prog_load.bin:
+ $(BUILD) -lbpf
+
+$(OUTPUT)test-libbpf-bpf_map_create.bin:
+ $(BUILD) -lbpf
+
+$(OUTPUT)test-libbpf-bpf_object__next_program.bin:
+ $(BUILD) -lbpf
+
+$(OUTPUT)test-libbpf-bpf_object__next_map.bin:
+ $(BUILD) -lbpf
+
+$(OUTPUT)test-libbpf-btf__raw_data.bin:
+ $(BUILD) -lbpf
+
$(OUTPUT)test-sdt.bin:
$(BUILD)
diff --git a/tools/build/feature/test-libbpf-bpf_map_create.c b/tools/build/feature/test-libbpf-bpf_map_create.c
new file mode 100644
index 000000000000..b9f550e332c8
--- /dev/null
+++ b/tools/build/feature/test-libbpf-bpf_map_create.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/bpf.h>
+
+int main(void)
+{
+ return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */,
+ 0 /* value_size */, 0 /* max_entries */, NULL /* opts */);
+}
diff --git a/tools/build/feature/test-libbpf-bpf_object__next_map.c b/tools/build/feature/test-libbpf-bpf_object__next_map.c
new file mode 100644
index 000000000000..64adb519e97e
--- /dev/null
+++ b/tools/build/feature/test-libbpf-bpf_object__next_map.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+ bpf_object__next_map(NULL /* obj */, NULL /* prev */);
+ return 0;
+}
diff --git a/tools/build/feature/test-libbpf-bpf_object__next_program.c b/tools/build/feature/test-libbpf-bpf_object__next_program.c
new file mode 100644
index 000000000000..8bf4fd26b545
--- /dev/null
+++ b/tools/build/feature/test-libbpf-bpf_object__next_program.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+ bpf_object__next_program(NULL /* obj */, NULL /* prev */);
+ return 0;
+}
diff --git a/tools/build/feature/test-libbpf-bpf_prog_load.c b/tools/build/feature/test-libbpf-bpf_prog_load.c
new file mode 100644
index 000000000000..47f516d63ebc
--- /dev/null
+++ b/tools/build/feature/test-libbpf-bpf_prog_load.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/bpf.h>
+
+int main(void)
+{
+ return bpf_prog_load(0 /* prog_type */, NULL /* prog_name */,
+ NULL /* license */, NULL /* insns */,
+ 0 /* insn_cnt */, NULL /* opts */);
+}
diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
index f7c084428735..a17647f7d5a4 100644
--- a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
+++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <bpf/libbpf.h>
+#include <bpf/btf.h>
int main(void)
{
- return btf__load_from_kernel_by_id(20151128, NULL);
+ btf__load_from_kernel_by_id(20151128);
+ return 0;
}
diff --git a/tools/build/feature/test-libbpf-btf__raw_data.c b/tools/build/feature/test-libbpf-btf__raw_data.c
new file mode 100644
index 000000000000..57da31dd7581
--- /dev/null
+++ b/tools/build/feature/test-libbpf-btf__raw_data.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/btf.h>
+
+int main(void)
+{
+ btf__raw_data(NULL /* btf_ro */, NULL /* size */);
+ return 0;
+}
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index ed66f2e38464..e6c98a6e3908 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -23,6 +23,7 @@
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <api/fd/array.h>
+#include "internal.h"
void perf_evlist__init(struct perf_evlist *evlist)
{
@@ -39,10 +40,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
* We already have cpus for evsel (via PMU sysfs) so
* keep it, if there's no target cpu list defined.
*/
- if (!evsel->own_cpus || evlist->has_user_cpus) {
- perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
- } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) {
+ if (!evsel->own_cpus ||
+ (!evsel->system_wide && evlist->has_user_cpus) ||
+ (!evsel->system_wide &&
+ !evsel->requires_cpu &&
+ perf_cpu_map__empty(evlist->user_requested_cpus))) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (evsel->cpus != evsel->own_cpus) {
@@ -50,8 +52,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
}
- perf_thread_map__put(evsel->threads);
- evsel->threads = perf_thread_map__get(evlist->threads);
+ if (!evsel->system_wide) {
+ perf_thread_map__put(evsel->threads);
+ evsel->threads = perf_thread_map__get(evlist->threads);
+ }
+
evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
}
@@ -298,7 +303,7 @@ add:
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
- int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
int nr_threads = perf_thread_map__nr(evlist->threads);
int nfds = 0;
struct perf_evsel *evsel;
@@ -428,9 +433,9 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_
static int
mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
- int thread, int *_output, int *_output_overwrite)
+ int thread, int *_output, int *_output_overwrite, int *nr_mmaps)
{
- struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx);
+ struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;
@@ -484,6 +489,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
return -1;
+ *nr_mmaps += 1;
+
if (!idx)
perf_evlist__set_mmap_first(evlist, map, overwrite);
} else {
@@ -513,34 +520,12 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
}
static int
-mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
- struct perf_mmap_param *mp)
-{
- int thread;
- int nr_threads = perf_thread_map__nr(evlist->threads);
-
- for (thread = 0; thread < nr_threads; thread++) {
- int output = -1;
- int output_overwrite = -1;
-
- if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
- &output, &output_overwrite))
- goto out_unmap;
- }
-
- return 0;
-
-out_unmap:
- perf_evlist__munmap(evlist);
- return -1;
-}
-
-static int
mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int nr_threads = perf_thread_map__nr(evlist->threads);
- int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
+ int nr_mmaps = 0;
int cpu, thread;
for (cpu = 0; cpu < nr_cpus; cpu++) {
@@ -549,11 +534,14 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
for (thread = 0; thread < nr_threads; thread++) {
if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
- thread, &output, &output_overwrite))
+ thread, &output, &output_overwrite, &nr_mmaps))
goto out_unmap;
}
}
+ if (nr_mmaps != evlist->nr_mmaps)
+ pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);
+
return 0;
out_unmap:
@@ -565,9 +553,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
{
int nr_mmaps;
- nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus);
- if (perf_cpu_map__empty(evlist->user_requested_cpus))
- nr_mmaps = perf_thread_map__nr(evlist->threads);
+ /* One for each CPU */
+ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus);
+ if (perf_cpu_map__empty(evlist->all_cpus)) {
+ /* Plus one for each thread */
+ nr_mmaps += perf_thread_map__nr(evlist->threads);
+ /* Minus the per-thread CPU (-1) */
+ nr_mmaps -= 1;
+ }
return nr_mmaps;
}
@@ -577,7 +570,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_mmap_param *mp)
{
struct perf_evsel *evsel;
- const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
@@ -596,9 +588,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
- if (perf_cpu_map__empty(cpus))
- return mmap_per_thread(evlist, ops, mp);
-
return mmap_per_cpu(evlist, ops, mp);
}
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index cfc9ebd7968e..2a912a1f1989 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -49,7 +49,18 @@ struct perf_evsel {
/* parse modifier helper */
int nr_members;
+ /*
+ * system_wide is for events that need to be on every CPU, irrespective
+ * of user requested CPUs or threads. Map propagation will set cpus to
+ * this event's own_cpus, whereby they will contribute to evlist
+ * all_cpus.
+ */
bool system_wide;
+ /*
+ * Some events, for example uncore events, require a CPU.
+ * i.e. it cannot be the 'any CPU' value of -1.
+ */
+ bool requires_cpu;
int idx;
};
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 465be4e62a17..b4e9ef7edfef 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -758,6 +758,16 @@ include::intel-hybrid.txt[]
If the URLs is not specified, the value of DEBUGINFOD_URLS
system environment variable is used.
+--off-cpu::
+ Enable off-cpu profiling with BPF. The BPF program will collect
+ task scheduling information with (user) stacktrace and save them
+ as sample data of a software event named "offcpu-time". The
+ sample period will have the time the task slept in nanoseconds.
+
+ Note that BPF can collect stack traces using frame pointer ("fp")
+ only, as of now. So the applications built without the frame
+ pointer might see bogus addresses.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index e0304e70f182..73e0762092fe 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -573,11 +573,36 @@ ifndef NO_LIBELF
ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1)
CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
endif
+ $(call feature_check,libbpf-bpf_prog_load)
+ ifeq ($(feature-libbpf-bpf_prog_load), 1)
+ CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD
+ endif
+ $(call feature_check,libbpf-bpf_object__next_program)
+ ifeq ($(feature-libbpf-bpf_object__next_program), 1)
+ CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
+ endif
+ $(call feature_check,libbpf-bpf_object__next_map)
+ ifeq ($(feature-libbpf-bpf_object__next_map), 1)
+ CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
+ endif
+ $(call feature_check,libbpf-btf__raw_data)
+ ifeq ($(feature-libbpf-btf__raw_data), 1)
+ CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA
+ endif
+ $(call feature_check,libbpf-bpf_map_create)
+ ifeq ($(feature-libbpf-bpf_map_create), 1)
+ CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE
+ endif
else
dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
endif
else
CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+ CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD
+ CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
+ CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
+ CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA
+ CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE
endif
endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 6e5aded855cc..8f738e11356d 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1038,6 +1038,7 @@ SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
+SKELETONS += $(SKEL_OUT)/off_cpu.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 476b037eea1c..006692c9b040 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -2,13 +2,19 @@
#include <errno.h>
#include <regex.h>
#include <string.h>
+#include <sys/auxv.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
+#include "../../../perf-sys.h"
#include "../../../util/debug.h"
#include "../../../util/event.h"
#include "../../../util/perf_regs.h"
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22)
+#endif
+
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(x0, PERF_REG_ARM64_X0),
SMPL_REG(x1, PERF_REG_ARM64_X1),
@@ -43,6 +49,7 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(lr, PERF_REG_ARM64_LR),
SMPL_REG(sp, PERF_REG_ARM64_SP),
SMPL_REG(pc, PERF_REG_ARM64_PC),
+ SMPL_REG(vg, PERF_REG_ARM64_VG),
SMPL_REG_END
};
@@ -131,3 +138,34 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
+
+uint64_t arch__user_reg_mask(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_REGS_USER,
+ .disabled = 1,
+ .exclude_kernel = 1,
+ .sample_period = 1,
+ .sample_regs_user = PERF_REGS_MASK
+ };
+ int fd;
+
+ if (getauxval(AT_HWCAP) & HWCAP_SVE)
+ attr.sample_regs_user |= SMPL_REG_MASK(PERF_REG_ARM64_VG);
+
+ /*
+ * Check if the pmu supports perf extended regs, before
+ * returning the register mask to sample.
+ */
+ if (attr.sample_regs_user != PERF_REGS_MASK) {
+ event_attr_init(&attr);
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd != -1) {
+ close(fd);
+ return attr.sample_regs_user;
+ }
+ }
+ return PERF_REGS_MASK;
+}
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
index 5aecf88e3de6..871af5992298 100644
--- a/tools/perf/arch/arm64/util/unwind-libunwind.c
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -10,77 +10,8 @@
int LIBUNWIND__ARCH_REG_ID(int regnum)
{
- switch (regnum) {
- case UNW_AARCH64_X0:
- return PERF_REG_ARM64_X0;
- case UNW_AARCH64_X1:
- return PERF_REG_ARM64_X1;
- case UNW_AARCH64_X2:
- return PERF_REG_ARM64_X2;
- case UNW_AARCH64_X3:
- return PERF_REG_ARM64_X3;
- case UNW_AARCH64_X4:
- return PERF_REG_ARM64_X4;
- case UNW_AARCH64_X5:
- return PERF_REG_ARM64_X5;
- case UNW_AARCH64_X6:
- return PERF_REG_ARM64_X6;
- case UNW_AARCH64_X7:
- return PERF_REG_ARM64_X7;
- case UNW_AARCH64_X8:
- return PERF_REG_ARM64_X8;
- case UNW_AARCH64_X9:
- return PERF_REG_ARM64_X9;
- case UNW_AARCH64_X10:
- return PERF_REG_ARM64_X10;
- case UNW_AARCH64_X11:
- return PERF_REG_ARM64_X11;
- case UNW_AARCH64_X12:
- return PERF_REG_ARM64_X12;
- case UNW_AARCH64_X13:
- return PERF_REG_ARM64_X13;
- case UNW_AARCH64_X14:
- return PERF_REG_ARM64_X14;
- case UNW_AARCH64_X15:
- return PERF_REG_ARM64_X15;
- case UNW_AARCH64_X16:
- return PERF_REG_ARM64_X16;
- case UNW_AARCH64_X17:
- return PERF_REG_ARM64_X17;
- case UNW_AARCH64_X18:
- return PERF_REG_ARM64_X18;
- case UNW_AARCH64_X19:
- return PERF_REG_ARM64_X19;
- case UNW_AARCH64_X20:
- return PERF_REG_ARM64_X20;
- case UNW_AARCH64_X21:
- return PERF_REG_ARM64_X21;
- case UNW_AARCH64_X22:
- return PERF_REG_ARM64_X22;
- case UNW_AARCH64_X23:
- return PERF_REG_ARM64_X23;
- case UNW_AARCH64_X24:
- return PERF_REG_ARM64_X24;
- case UNW_AARCH64_X25:
- return PERF_REG_ARM64_X25;
- case UNW_AARCH64_X26:
- return PERF_REG_ARM64_X26;
- case UNW_AARCH64_X27:
- return PERF_REG_ARM64_X27;
- case UNW_AARCH64_X28:
- return PERF_REG_ARM64_X28;
- case UNW_AARCH64_X29:
- return PERF_REG_ARM64_X29;
- case UNW_AARCH64_X30:
- return PERF_REG_ARM64_LR;
- case UNW_AARCH64_SP:
- return PERF_REG_ARM64_SP;
- case UNW_AARCH64_PC:
- return PERF_REG_ARM64_PC;
- default:
- pr_err("unwind: invalid reg id %d\n", regnum);
+ if (regnum < 0 || regnum >= PERF_REG_ARM64_EXTENDED_MAX)
return -EINVAL;
- }
- return -EINVAL;
+ return regnum;
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 2eaac4638aab..06c2cdfd8f2f 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -811,18 +811,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
if (!cpu_wide && perf_can_record_cpu_wide()) {
struct evsel *switch_evsel;
- err = parse_events(evlist, "dummy:u", NULL);
- if (err)
- return err;
+ switch_evsel = evlist__add_dummy_on_all_cpus(evlist);
<