summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-04-13 09:02:07 +0200
committerIngo Molnar <mingo@kernel.org>2016-04-13 09:02:07 +0200
commitbed9441ba787da1026f95ce6160b38994c510fe3 (patch)
tree204a4a8d1179eb4dd709ae060627304a91c47193
parentaeaae7d612ff2ad647ba422099da56eb3aa89237 (diff)
parent00768a2bd3245eace0690fcf2c02776a256b66d7 (diff)
downloadlinux-bed9441ba787da1026f95ce6160b38994c510fe3.tar.gz
linux-bed9441ba787da1026f95ce6160b38994c510fe3.tar.bz2
linux-bed9441ba787da1026f95ce6160b38994c510fe3.zip
Merge tag 'perf-core-for-mingo-20160411' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements from Arnaldo Carvalho de Melo: User visible changes: - Automagically create a 'bpf-output' event, easing the setup of BPF C "scripts" that produce output via the perf ring buffer. Now it is just a matter of calling any perf tool, such as 'trace', with a C source file that references the __bpf_stdout__ output channel and that channel will be created and connected to the script: # trace -e nanosleep --event test_bpf_stdout.c usleep 1 0.013 ( 0.013 ms): usleep/2818 nanosleep(rqtp: 0x7ffcead45f40 ) ... 0.013 ( ): __bpf_stdout__:Raise a BPF event!..) 0.015 ( ): perf_bpf_probe:func_begin:(ffffffff81112460)) 0.261 ( ): __bpf_stdout__:Raise a BPF event!..) 0.262 ( ): perf_bpf_probe:func_end:(ffffffff81112460 <- ffffffff81003d92)) 0.264 ( 0.264 ms): usleep/2818 ... [continued]: nanosleep()) = 0 # Further work is needed to reduce the number of lines in a perf bpf C source file, this being the part where we greatly reduce the command line setup (Wang Nan) - 'perf trace' now supports callchains, with 'trace --call-graph dwarf' using libunwind, just like 'perf top', to ask the kernel for stack dumps for CFI processing. This reduces the overhead by asking just for userspace callchains and also only for the syscall exit tracepoint (raw_syscalls:sys_exit) (Milian Wolff, Arnaldo Carvalho de Melo) Try it with, for instance: # perf trace --call dwarf ping 127.0.0.1 An excerpt of a system wide 'perf trace --call dwarf" session is at: https://fedorapeople.org/~acme/perf/perf-trace--call-graph-dwarf--all-cpus.txt You may need to bump the number of mmap pages, using -m/--mmap-pages, but on a Broadwell machine the defaults allowed system wide tracing to work without losing that many records, experiment with just some syscalls, like: # perf trace --call dwarf -e nanosleep,futex All the targets available for 'perf record', 'perf top' (--pid, --tid, --cpu, etc) should work. Also --duration may be interesting to try. To get filenames from in various syscalls pointer args (open, ettc), add this to the mix: # perf probe 'vfs_getname=getname_flags:72 pathname=filename:string' Making this work is next in line: # trace --call dwarf --ev sched:sched_switch/call-graph=fp/ usleep 1 I.e. honouring per-tracepoint callchains in 'perf trace' in addition to in raw_syscalls:sys_exit. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/perf/Documentation/perf-trace.txt9
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c2
-rw-r--r--tools/perf/arch/x86/util/dwarf-regs.c8
-rw-r--r--tools/perf/builtin-kvm.c2
-rw-r--r--tools/perf/builtin-record.c10
-rw-r--r--tools/perf/builtin-script.c78
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/builtin-trace.c65
-rw-r--r--tools/perf/tests/bpf.c2
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/keep-tracking.c2
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c2
-rw-r--r--tools/perf/tests/perf-record.c2
-rw-r--r--tools/perf/tests/switch-tracking.c2
-rw-r--r--tools/perf/util/bpf-loader.c143
-rw-r--r--tools/perf/util/bpf-loader.h19
-rw-r--r--tools/perf/util/event.c12
-rw-r--r--tools/perf/util/evlist.c18
-rw-r--r--tools/perf/util/evlist.h16
-rw-r--r--tools/perf/util/evsel.c16
-rw-r--r--tools/perf/util/evsel.h14
-rw-r--r--tools/perf/util/parse-events.c60
-rw-r--r--tools/perf/util/record.c5
-rw-r--r--tools/perf/util/session.c95
-rw-r--r--tools/perf/util/session.h8
-rw-r--r--tools/perf/util/symbol.c25
-rw-r--r--tools/perf/util/symbol.h6
-rw-r--r--tools/perf/util/thread_map.c8
28 files changed, 487 insertions, 146 deletions
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 13293de8869f..1bbcf305d233 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -117,6 +117,15 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
--syscalls::
Trace system calls. This options is enabled by default.
+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
+ Setup and enable call-graph (stack chain/backtrace) recording.
+ See `--call-graph` section in perf-record and perf-report
+ man pages for details. The ones that are most useful in 'perf trace'
+ are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
+
+--kernel-syscall-graph::
+ Show the kernel callchains on the syscall exit path.
+
--event::
Trace other events, see 'perf list' for a complete list.
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 9d29ee283ac5..d4aa567a29c4 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -71,7 +71,7 @@ int test__perf_time_to_tsc(int subtest __maybe_unused)
CHECK__(parse_events(evlist, "cycles:u", NULL));
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
evsel = perf_evlist__first(evlist);
diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c
index 9223c164e545..1f86ee8fb831 100644
--- a/tools/perf/arch/x86/util/dwarf-regs.c
+++ b/tools/perf/arch/x86/util/dwarf-regs.c
@@ -63,6 +63,8 @@ struct pt_regs_offset {
# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
#endif
+/* TODO: switching by dwarf address size */
+#ifndef __x86_64__
static const struct pt_regs_offset x86_32_regoffset_table[] = {
REG_OFFSET_NAME_32("%ax", eax),
REG_OFFSET_NAME_32("%cx", ecx),
@@ -75,6 +77,8 @@ static const struct pt_regs_offset x86_32_regoffset_table[] = {
REG_OFFSET_END,
};
+#define regoffset_table x86_32_regoffset_table
+#else
static const struct pt_regs_offset x86_64_regoffset_table[] = {
REG_OFFSET_NAME_64("%ax", rax),
REG_OFFSET_NAME_64("%dx", rdx),
@@ -95,11 +99,7 @@ static const struct pt_regs_offset x86_64_regoffset_table[] = {
REG_OFFSET_END,
};
-/* TODO: switching by dwarf address size */
-#ifdef __x86_64__
#define regoffset_table x86_64_regoffset_table
-#else
-#define regoffset_table x86_32_regoffset_table
#endif
/* Minus 1 for the ending REG_OFFSET_END */
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index bff666458b28..6487c06d2708 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -982,7 +982,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
struct perf_evlist *evlist = kvm->evlist;
char sbuf[STRERR_BUFSIZE];
- perf_evlist__config(evlist, &kvm->opts);
+ perf_evlist__config(evlist, &kvm->opts, NULL);
/*
* Note: exclude_{guest,host} do not apply here.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 410035c6e300..eb6a199a833c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -284,7 +284,7 @@ static int record__open(struct record *rec)
struct record_opts *opts = &rec->opts;
int rc = 0;
- perf_evlist__config(evlist, opts);
+ perf_evlist__config(evlist, opts, &callchain_param);
evlist__for_each(evlist, pos) {
try_again:
@@ -1276,6 +1276,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (err)
return err;
+ err = bpf__setup_stdout(rec->evlist);
+ if (err) {
+ bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
+ pr_err("ERROR: Setup BPF stdout failed: %s\n",
+ errbuf);
+ return err;
+ }
+
err = -ENOMEM;
symbol__init(NULL);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 59009aa7e2ca..ddd5b79e94c2 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -579,8 +579,8 @@ static void print_sample_bts(struct perf_sample *sample,
print_opts &= ~PRINT_IP_OPT_SRCLINE;
}
}
- perf_evsel__print_ip(evsel, sample, al, print_opts,
- scripting_max_stack);
+ perf_evsel__fprintf_sym(evsel, sample, al, 0, print_opts,
+ scripting_max_stack, stdout);
}
/* print branch_to information */
@@ -788,9 +788,9 @@ static void process_event(struct perf_script *script,
else
printf("\n");
- perf_evsel__print_ip(evsel, sample, al,
- output[attr->type].print_ip_opts,
- scripting_max_stack);
+ perf_evsel__fprintf_sym(evsel, sample, al, 0,
+ output[attr->type].print_ip_opts,
+ scripting_max_stack, stdout);
}
if (PRINT_FIELD(IREGS))
@@ -1415,21 +1415,19 @@ static int is_directory(const char *base_path, const struct dirent *dent)
return S_ISDIR(st.st_mode);
}
-#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
- while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \
- lang_next) \
- if ((lang_dirent.d_type == DT_DIR || \
- (lang_dirent.d_type == DT_UNKNOWN && \
- is_directory(scripts_path, &lang_dirent))) && \
- (strcmp(lang_dirent.d_name, ".")) && \
- (strcmp(lang_dirent.d_name, "..")))
+#define for_each_lang(scripts_path, scripts_dir, lang_dirent) \
+ while ((lang_dirent = readdir(scripts_dir)) != NULL) \
+ if ((lang_dirent->d_type == DT_DIR || \
+ (lang_dirent->d_type == DT_UNKNOWN && \
+ is_directory(scripts_path, lang_dirent))) && \
+ (strcmp(lang_dirent->d_name, ".")) && \
+ (strcmp(lang_dirent->d_name, "..")))
-#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
- while (!readdir_r(lang_dir, &script_dirent, &script_next) && \
- script_next) \
- if (script_dirent.d_type != DT_DIR && \
- (script_dirent.d_type != DT_UNKNOWN || \
- !is_directory(lang_path, &script_dirent)))
+#define for_each_script(lang_path, lang_dir, script_dirent) \
+ while ((script_dirent = readdir(lang_dir)) != NULL) \
+ if (script_dirent->d_type != DT_DIR && \
+ (script_dirent->d_type != DT_UNKNOWN || \
+ !is_directory(lang_path, script_dirent)))
#define RECORD_SUFFIX "-record"
@@ -1575,7 +1573,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
const char *s __maybe_unused,
int unset __maybe_unused)
{
- struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
+ struct dirent *script_dirent, *lang_dirent;
char scripts_path[MAXPATHLEN];
DIR *scripts_dir, *lang_dir;
char script_path[MAXPATHLEN];
@@ -1590,19 +1588,19 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
if (!scripts_dir)
return -1;
- for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
+ for_each_lang(scripts_path, scripts_dir, lang_dirent) {
snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
- lang_dirent.d_name);
+ lang_dirent->d_name);
lang_dir = opendir(lang_path);
if (!lang_dir)
continue;
- for_each_script(lang_path, lang_dir, script_dirent, script_next) {
- script_root = get_script_root(&script_dirent, REPORT_SUFFIX);
+ for_each_script(lang_path, lang_dir, script_dirent) {
+ script_root = get_script_root(script_dirent, REPORT_SUFFIX);
if (script_root) {
desc = script_desc__findnew(script_root);
snprintf(script_path, MAXPATHLEN, "%s/%s",
- lang_path, script_dirent.d_name);
+ lang_path, script_dirent->d_name);
read_script_info(desc, script_path);
free(script_root);
}
@@ -1690,7 +1688,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
*/
int find_scripts(char **scripts_array, char **scripts_path_array)
{
- struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
+ struct dirent *script_dirent, *lang_dirent;
char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
DIR *scripts_dir, *lang_dir;
struct perf_session *session;
@@ -1713,9 +1711,9 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
return -1;
}
- for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
+ for_each_lang(scripts_path, scripts_dir, lang_dirent) {
snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
- lang_dirent.d_name);
+ lang_dirent->d_name);
#ifdef NO_LIBPERL
if (strstr(lang_path, "perl"))
continue;
@@ -1729,16 +1727,16 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
if (!lang_dir)
continue;
- for_each_script(lang_path, lang_dir, script_dirent, script_next) {
+ for_each_script(lang_path, lang_dir, script_dirent) {
/* Skip those real time scripts: xxxtop.p[yl] */
- if (strstr(script_dirent.d_name, "top."))
+ if (strstr(script_dirent->d_name, "top."))
continue;
sprintf(scripts_path_array[i], "%s/%s", lang_path,
- script_dirent.d_name);
- temp = strchr(script_dirent.d_name, '.');
+ script_dirent->d_name);
+ temp = strchr(script_dirent->d_name, '.');
snprintf(scripts_array[i],
- (temp - script_dirent.d_name) + 1,
- "%s", script_dirent.d_name);
+ (temp - script_dirent->d_name) + 1,
+ "%s", script_dirent->d_name);
if (check_ev_match(lang_path,
scripts_array[i], session))
@@ -1756,7 +1754,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
static char *get_script_path(const char *script_root, const char *suffix)
{
- struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
+ struct dirent *script_dirent, *lang_dirent;
char scripts_path[MAXPATHLEN];
char script_path[MAXPATHLEN];
DIR *scripts_dir, *lang_dir;
@@ -1769,21 +1767,21 @@ static char *get_script_path(const char *script_root, const char *suffix)
if (!scripts_dir)
return NULL;
- for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
+ for_each_lang(scripts_path, scripts_dir, lang_dirent) {
snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
- lang_dirent.d_name);
+ lang_dirent->d_name);
lang_dir = opendir(lang_path);
if (!lang_dir)
continue;
- for_each_script(lang_path, lang_dir, script_dirent, script_next) {
- __script_root = get_script_root(&script_dirent, suffix);
+ for_each_script(lang_path, lang_dir, script_dirent) {
+ __script_root = get_script_root(script_dirent, suffix);
if (__script_root && !strcmp(script_root, __script_root)) {
free(__script_root);
closedir(lang_dir);
closedir(scripts_dir);
snprintf(script_path, MAXPATHLEN, "%s/%s",
- lang_path, script_dirent.d_name);
+ lang_path, script_dirent->d_name);
return strdup(script_path);
}
free(__script_root);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 833214979c4f..8846df0ec0c3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -886,7 +886,7 @@ static int perf_top__start_counters(struct perf_top *top)
struct perf_evlist *evlist = top->evlist;
struct record_opts *opts = &top->record_opts;
- perf_evlist__config(evlist, opts);
+ perf_evlist__config(evlist, opts, &callchain_param);
evlist__for_each(evlist, counter) {
try_again:
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 11290b57ce04..2ec53edcf649 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -34,6 +34,7 @@
#include "trace-event.h"
#include "util/parse-events.h"
#include "util/bpf-loader.h"
+#include "callchain.h"
#include "syscalltbl.h"
#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
@@ -158,6 +159,7 @@ struct trace {
bool show_comm;
bool show_tool_stats;
bool trace_syscalls;
+ bool kernel_syscallchains;
bool force;
bool vfs_getname;
int trace_pgfaults;
@@ -2190,6 +2192,22 @@ signed_print:
goto signed_print;
fputc('\n', trace->output);
+
+ if (sample->callchain) {
+ struct addr_location al;
+ /* TODO: user-configurable print_opts */
+ const unsigned int print_opts = PRINT_IP_OPT_SYM |
+ PRINT_IP_OPT_DSO |
+ PRINT_IP_OPT_UNKNOWN_AS_ADDR;
+
+ if (machine__resolve(trace->host, &al, sample) < 0) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
+ goto out_put;
+ }
+ perf_evsel__fprintf_callchain(evsel, sample, &al, 38, print_opts,
+ scripting_max_stack, trace->output);
+ }
out:
ttrace->entry_pending = false;
err = 0;
@@ -2645,6 +2663,15 @@ static int trace__add_syscall_newtp(struct trace *trace)
perf_evlist__add(evlist, sys_enter);
perf_evlist__add(evlist, sys_exit);
+ if (trace->opts.callgraph_set && !trace->kernel_syscallchains) {
+ /*
+ * We're interested only in the user space callchain
+ * leading to the syscall, allow overriding that for
+ * debugging reasons using --kernel_syscall_callchains
+ */
+ sys_exit->attr.exclude_callchain_kernel = 1;
+ }
+
trace->syscalls.events.sys_enter = sys_enter;
trace->syscalls.events.sys_exit = sys_exit;
@@ -2723,7 +2750,27 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
goto out_delete_evlist;
}
- perf_evlist__config(evlist, &trace->opts);
+ perf_evlist__config(evlist, &trace->opts, NULL);
+
+ if (trace->opts.callgraph_set && trace->syscalls.events.sys_exit) {
+ perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
+ &trace->opts, &callchain_param);
+ /*
+ * Now we have evsels with different sample_ids, use
+ * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
+ * from a fixed position in each ring buffer record.
+ *
+ * As of this the changeset introducing this comment, this
+ * isn't strictly needed, as the fields that can come before
+ * PERF_SAMPLE_ID are all used, but we'll probably disable
+ * some of those for things like copying the payload of
+ * pointer syscall arguments, and for vfs_getname we don't
+ * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
+ * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
+ */
+ perf_evlist__set_sample_bit(evlist, IDENTIFIER);
+ perf_evlist__reset_sample_bit(evlist, ID);
+ }
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
@@ -3205,6 +3252,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
.output = stderr,
.show_comm = true,
.trace_syscalls = true,
+ .kernel_syscallchains = false,
};
const char *output_name = NULL;
const char *ev_qualifier_str = NULL;
@@ -3250,6 +3298,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
"Trace pagefaults", parse_pagefaults, "maj"),
OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+ OPT_CALLBACK(0, "call-graph", &trace.opts,
+ "record_mode[,record_size]", record_callchain_help,
+ &record_parse_callchain_opt),
+ OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
+ "Show the kernel callchains on the syscall exit path"),
OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_END()
@@ -3273,11 +3326,21 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+ err = bpf__setup_stdout(trace.evlist);
+ if (err) {
+ bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
+ pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
+ goto out;
+ }
+
if (trace.trace_pgfaults) {
trace.opts.sample_address = true;
trace.opts.sample_time = true;
}
+ if (trace.opts.callgraph_set)
+ symbol_conf.use_callchain = true;
+
if (trace.evlist->nr_entries > 0)
evlist__set_evsel_handler(trace.evlist, trace__event_handler);
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 199501c71e27..f31eed31c1a9 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -138,7 +138,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
perf_evlist__splice_list_tail(evlist, &parse_evlist.list);
evlist->nr_groups = parse_evlist.nr_groups;
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
err = perf_evlist__open(evlist);
if (err < 0) {
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index abd3f0ec0c0b..68a69a195545 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -532,7 +532,7 @@ static int do_test_code_reading(bool try_kcore)
goto out_put;
}
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
evsel = perf_evlist__first(evlist);
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index ddb78fae064a..614e45a3c603 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -80,7 +80,7 @@ int test__keep_tracking(int subtest __maybe_unused)
CHECK__(parse_events(evlist, "dummy:u", NULL));
CHECK__(parse_events(evlist, "cycles:u", NULL));
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
evsel = perf_evlist__first(evlist);
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index eb99a105f31c..4344fe482c1d 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -44,7 +44,7 @@ int test__syscall_openat_tp_fields(int subtest __maybe_unused)
goto out_delete_evlist;
}
- perf_evsel__config(evsel, &opts);
+ perf_evsel__config(evsel, &opts, NULL);
thread_map__set_pid(evlist->threads, 0, getpid());
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 1cc78cefe399..b836ee6a8d9b 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -99,7 +99,7 @@ int test__PERF_RECORD(int subtest __maybe_unused)
perf_evsel__set_sample_bit(evsel, CPU);
perf_evsel__set_sample_bit(evsel, TID);
perf_evsel__set_sample_bit(evsel, TIME);
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
if (err < 0) {
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index ebd80168d51e..39a689bf7574 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -417,7 +417,7 @@ int test__switch_tracking(int subtest __maybe_unused)
perf_evsel__set_sample_bit(tracking_evsel, TIME);
/* Config events */
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
/* Check moved event is still at the front */
if (cycles_evsel != perf_evlist__first(evlist)) {
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 0967ce601931..493307d1414c 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -842,6 +842,58 @@ bpf_map_op__new(struct parse_events_term *term)
return op;
}
+static struct bpf_map_op *
+bpf_map_op__clone(struct bpf_map_op *op)
+{
+ struct bpf_map_op *newop;
+
+ newop = memdup(op, sizeof(*op));
+ if (!newop) {
+ pr_debug("Failed to alloc bpf_map_op\n");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&newop->list);
+ if (op->key_type == BPF_MAP_KEY_RANGES) {
+ size_t memsz = op->k.array.nr_ranges *
+ sizeof(op->k.array.ranges[0]);
+
+ newop->k.array.ranges = memdup(op->k.array.ranges, memsz);
+ if (!newop->k.array.ranges) {
+ pr_debug("Failed to alloc indices for map\n");
+ free(newop);
+ return NULL;
+ }
+ }
+
+ return newop;
+}
+
+static struct bpf_map_priv *
+bpf_map_priv__clone(struct bpf_map_priv *priv)
+{
+ struct bpf_map_priv *newpriv;
+ struct bpf_map_op *pos, *newop;
+
+ newpriv = zalloc(sizeof(*newpriv));
+ if (!newpriv) {
+ pr_debug("No enough memory to alloc map private\n");
+ return NULL;
+ }
+ INIT_LIST_HEAD(&newpriv->ops_list);
+
+ list_for_each_entry(pos, &priv->ops_list, list) {
+ newop = bpf_map_op__clone(pos);
+ if (!newop) {
+ bpf_map_priv__purge(newpriv);
+ return NULL;
+ }
+ list_add_tail(&newop->list, &newpriv->ops_list);
+ }
+
+ return newpriv;
+}
+
static int
bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
{
@@ -1417,6 +1469,89 @@ int bpf__apply_obj_config(void)
return 0;
}
+#define bpf__for_each_map(pos, obj, objtmp) \
+ bpf_object__for_each_safe(obj, objtmp) \
+ bpf_map__for_each(pos, obj)
+
+#define bpf__for_each_stdout_map(pos, obj, objtmp) \
+ bpf__for_each_map(pos, obj, objtmp) \
+ if (bpf_map__get_name(pos) && \
+ (strcmp("__bpf_stdout__", \
+ bpf_map__get_name(pos)) == 0))
+
+int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
+{
+ struct bpf_map_priv *tmpl_priv = NULL;
+ struct bpf_object *obj, *tmp;
+ struct perf_evsel *evsel = NULL;
+ struct bpf_map *map;
+ int err;
+ bool need_init = false;
+
+ bpf__for_each_stdout_map(map, obj, tmp) {
+ struct bpf_map_priv *priv;
+
+ err = bpf_map__get_private(map, (void **)&priv);
+ if (err)
+ return -BPF_LOADER_ERRNO__INTERNAL;
+
+ /*
+ * No need to check map type: type should have been
+ * verified by kernel.
+ */
+ if (!need_init && !priv)
+ need_init = !priv;
+ if (!tmpl_priv && priv)
+ tmpl_priv = priv;
+ }
+
+ if (!need_init)
+ return 0;
+
+ if (!tmpl_priv) {
+ err = parse_events(evlist, "bpf-output/no-inherit=1,name=__bpf_stdout__/",
+ NULL);
+ if (err) {
+ pr_debug("ERROR: failed to create bpf-output event\n");
+ return -err;
+ }
+
+ evsel = perf_evlist__last(evlist);
+ }
+
+ bpf__for_each_stdout_map(map, obj, tmp) {
+ struct bpf_map_priv *priv;
+
+ err = bpf_map__get_private(map, (void **)&priv);
+ if (err)
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ if (priv)
+ continue;
+
+ if (tmpl_priv) {
+ priv = bpf_map_priv__clone(tmpl_priv);
+ if (!priv)
+ return -ENOMEM;
+
+ err = bpf_map__set_private(map, priv, bpf_map_priv__clear);
+ if (err) {
+ bpf_map_priv__clear(map, priv);
+ return err;
+ }
+ } else if (evsel) {
+ struct bpf_map_op *op;
+
+ op = bpf_map__add_newop(map, NULL);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+ op->op_type = BPF_MAP_OP_SET_EVSEL;
+ op->v.evsel = evsel;
+ }
+ }
+
+ return 0;
+}
+
#define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START)
#define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
#define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -1590,3 +1725,11 @@ int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
bpf__strerror_end(buf, size);
return 0;
}
+
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
+ int err, char *buf, size_t size)
+{
+ bpf__strerror_head(err, buf, size);
+ bpf__strerror_end(buf, size);
+ return 0;
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index be4311944e3d..941e17275aa7 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -79,6 +79,11 @@ int bpf__strerror_config_obj(struct bpf_object *obj,
size_t size);
int bpf__apply_obj_config(void);
int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
+
+int bpf__setup_stdout(struct perf_evlist *evlist);
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err,