diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-01-11 06:53:06 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-01-11 06:53:06 +0100 |
commit | 1ccb8feda7471efb62ebf68d70055b4c51fa7d92 (patch) | |
tree | b4346248eabef5e345c89688cab8007328a4a0fe | |
parent | 9128d3ed9de3882c83b927eb553d5d44c84505f5 (diff) | |
parent | 5d64db2966e38bfd99114ecf0b54f97d33023dcd (diff) | |
download | linux-1ccb8feda7471efb62ebf68d70055b4c51fa7d92.tar.gz linux-1ccb8feda7471efb62ebf68d70055b4c51fa7d92.tar.bz2 linux-1ccb8feda7471efb62ebf68d70055b4c51fa7d92.zip |
Merge tag 'perf-core-for-mingo-4.16-20180110' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
- The 'perf test bpf' entry hooked a eBPF proggie to the
SyS_epoll_wait() kernel function and expected it to be hit when calling
the epoll_wait() libc wrapper, which changed recently, in systems such
as Fedora 27, with the glibc wrapper calling instead the epoll_pwait()
syscall, so switch to epoll_pwait() for both the kernel and libc
function, getting it to work both in old and new systems (Arnaldo Carvalho de Melo)
- Beautify 'gettid' syscall result in 'perf trace', and in doing so
noticed that we need to handle namespaces in 'perf trace', will be
dealt with in follow up patches where we'll try to figure out if
the recent support for namespace in tools/perf/ can be used for this
purpose as well. (Arnaldo Carvalho de Melo)
- Introduce 'perf report --mmaps' and 'perf report --tasks' to show
info present in 'perf.data' (Jiri Olsa, Arnaldo Carvalho de Melo)
- Synchronize kernel <-> tooling headers wrt meltdown/spectre changes
(Arnaldo Carvalho de Melo)
- Fix a wrong offset issue when using /proc/kcore (Jin Yao)
- Fix bug that prevented annotating symbols in perf.data files
generated with 'perf record --branch-any' (Jin Yao)
- Add infrastructure to record first and last sample time to the
perf.data file header, so that when processing all samples in
a 'perf record' session, such as when doing build-id processing,
or when specifically requesting that that info be recorded, use
that in 'perf report --time', that also got support for percent
slices in addition to absolute ones.
I.e. now it is possible to ask for the samples in the 10%-20%
time slice of a perf.data file (Jin Yao)
- Enable building with libbabeltrace by default (Jiri Olsa)
- Display perf_event_attr::namespaces when duping the attributes
in verbose mode (Jiri Olsa)
- Allocate context task_ctx_data for child event (Jiri Olsa)
- Update comments for PERF_RECORD_ITRACE_START and PERF_RECORD_MISC_* (Jiri Olsa)
- Add support for showing PERF_RECORD_LOST events in 'perf script' (Jiri Olsa)
- Add 'perf report --stats' option to display quick statistics about
metadata events (PERF_RECORD_*) i.e. what we get at the end of 'perf
report -D' (Jiri Olsa)
- Fix compile error with libunwind x86 (Wang Nan)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
36 files changed, 884 insertions, 140 deletions
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b9a4953018ed..c77c9a2ebbbb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) @@ -864,6 +867,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 1b2be63c8528..772a43fea825 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -179,21 +179,6 @@ put_callchain_entry(int rctx) } struct perf_callchain_entry * -perf_callchain(struct perf_event *event, struct pt_regs *regs) -{ - bool kernel = !event->attr.exclude_callchain_kernel; - bool user = !event->attr.exclude_callchain_user; - /* Disallow cross-task user callchains. */ - bool crosstask = event->ctx->task && event->ctx->task != current; - const u32 max_stack = event->attr.sample_max_stack; - - if (!kernel && !user) - return NULL; - - return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true); -} - -struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark) { diff --git a/kernel/events/core.c b/kernel/events/core.c index 4df5b695bf0d..4e1a1bf8d867 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5815,19 +5815,11 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_read(handle, event); if (sample_type & PERF_SAMPLE_CALLCHAIN) { - if (data->callchain) { - int size = 1; - - if (data->callchain) - size += data->callchain->nr; - - size *= sizeof(u64); + int size = 1; - __output_copy(handle, data->callchain, size); - } else { - u64 nr = 0; - perf_output_put(handle, nr); - } + size += data->callchain->nr; + size *= sizeof(u64); + __output_copy(handle, data->callchain, size); } if (sample_type & PERF_SAMPLE_RAW) { @@ -5980,6 +5972,26 @@ static u64 perf_virt_to_phys(u64 virt) return phys_addr; } +static struct perf_callchain_entry __empty_callchain = { .nr = 0, }; + +static struct perf_callchain_entry * +perf_callchain(struct perf_event *event, struct pt_regs *regs) +{ + bool kernel = !event->attr.exclude_callchain_kernel; + bool user = !event->attr.exclude_callchain_user; + /* Disallow cross-task user callchains. */ + bool crosstask = event->ctx->task && event->ctx->task != current; + const u32 max_stack = event->attr.sample_max_stack; + struct perf_callchain_entry *callchain; + + if (!kernel && !user) + return &__empty_callchain; + + callchain = get_perf_callchain(regs, 0, kernel, user, + max_stack, crosstask, true); + return callchain ?: &__empty_callchain; +} + void perf_prepare_sample(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event, @@ -6002,9 +6014,7 @@ void perf_prepare_sample(struct perf_event_header *header, int size = 1; data->callchain = perf_callchain(event, regs); - - if (data->callchain) - size += data->callchain->nr; + size += data->callchain->nr; header->size += size * sizeof(u64); } @@ -10703,6 +10713,19 @@ inherit_event(struct perf_event *parent_event, if (IS_ERR(child_event)) return child_event; + + if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) && + !child_ctx->task_ctx_data) { + struct pmu *pmu = child_event->pmu; + + child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size, + GFP_KERNEL); + if (!child_ctx->task_ctx_data) { + free_event(child_event); + return NULL; + } + } + /* * is_orphaned_event() and list_add_tail(&parent_event->child_list) * must be under the same lock in order to serialize against @@ -10713,6 +10736,7 @@ inherit_event(struct perf_event *parent_event, if (is_orphaned_event(parent_event) || !atomic_long_inc_not_zero(&parent_event->refcount)) { mutex_unlock(&parent_event->child_mutex); + /* task_ctx_data is freed with child_ctx */ free_event(child_event); return NULL; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09b1537ae06c..6dc725a7e7bc 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -201,10 +201,6 @@ arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) -/* Callchain handling */ -extern struct perf_callchain_entry * -perf_callchain(struct perf_event *event, struct pt_regs *regs); - static inline int get_recursion_context(int *recursion) { int rctx; diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 800104c8a3ed..21ac898df2d8 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -197,11 +197,12 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ - +#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ @@ -340,5 +341,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 14d6d5007314..b027633e7300 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,12 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define DISABLE_PTI 0 +#else +# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -60,7 +66,7 @@ #define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 -#define DISABLED_MASK7 0 +#define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_MPX) #define DISABLED_MASK10 0 diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b9a4953018ed..c77c9a2ebbbb 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) @@ -864,6 +867,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5a626ef666c2..3eea6de35a38 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -430,6 +430,9 @@ Configure all used events to run in user space. --timestamp-filename Append timestamp to output file name. +--timestamp-boundary:: +Record timestamp boundary (time of first/last samples). + --switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one based on 'mode' value: diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index ddde2b54af57..63d0db3184c9 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -402,6 +402,26 @@ OPTIONS stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. + Also support time percent with multiple time range. Time string is + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + + For example: + Select the second 10% time slice: + + perf report --time 10%/2 + + Select from 0% to 10% time slice: + + perf report --time 0%-10% + + Select the first and second 10% time slices: + + perf report --time 10%/1,10%/2 + + Select from 0% to 10% and 30% to 40% slices: + + perf report --time 0%-10%,30%-40% + --itrace:: Options for decoding instruction tracing data. The options are: @@ -437,8 +457,23 @@ include::itrace.txt[] will be printed. Each entry is function name or file/line. Enabled by default, disable with --no-inline. +--mmaps:: + Show --tasks output plus mmap information in a format similar to + /proc/<PID>/maps. + + Please note that not all mmaps are stored, options affecting which ones + are include 'perf record --data', for instance. + +--stats:: + Display overall events statistics without any further processing. + (like the one at the end of the perf report -D command) + +--tasks:: + Display monitored tasks stored in perf data. Displaying pid/tid/ppid + plus the command string aligned to distinguish parent and child tasks. + include::callchain-overhead-calculation.txt[] SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-annotate[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 974ceb12c7f3..806ec6391fd6 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric. + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -225,6 +225,24 @@ OPTIONS that the metric computed is averaged over the whole sampling period, not just for the sample point. + For sample events it's possible to display misc field with -F +misc option, + following letters are displayed for each bit: + + PERF_RECORD_MISC_KERNEL K + PERF_RECORD_MISC_USER U + PERF_RECORD_MISC_HYPERVISOR H + PERF_RECORD_MISC_GUEST_KERNEL G + PERF_RECORD_MISC_GUEST_USER g + PERF_RECORD_MISC_MMAP_DATA* M + PERF_RECORD_MISC_COMM_EXEC E + PERF_RECORD_MISC_SWITCH_OUT S + + $ perf script -F +misc ... + sched-messaging 1414 K 28690.636582: 4590 cycles ... + sched-messaging 1407 U 28690.636600: 325620 cycles ... + sched-messaging 1414 K 28690.636608: 19473 cycles ... + misc field ___________/ + -k:: --vmlinux=<file>:: vmlinux pathname @@ -282,6 +300,9 @@ OPTIONS Display context switch events i.e. events of type PERF_RECORD_SWITCH or PERF_RECORD_SWITCH_CPU_WIDE. +--show-lost-events + Display lost events i.e. events of type PERF_RECORD_LOST. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. @@ -329,6 +350,22 @@ include::itrace.txt[] stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. + Also support time percent with multipe time range. Time string is + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + + For example: + Select the second 10% time slice + perf script --time 10%/2 + + Select from 0% to 10% time slice + perf script --time 0%-10% + + Select the first and second 10% time slices + perf script --time 10%/1,10%/2 + + Select from 0% to 10% and 30% to 40% slices + perf script --time 0%-10%,30%-40% + --max-blocks:: Set the maximum number of program blocks to print with brstackasm for each sample. diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 15e8b48077ba..f7d85e89a98a 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -261,6 +261,10 @@ struct { struct perf_header_string map; }[number_of_cache_levels]; + HEADER_SAMPLE_TIME = 21, + +Two uint64_t for the time of first sample and the time of last sample. + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f050f38d8fa3..12dec6ea5ed2 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -780,7 +780,7 @@ else NO_PERF_READ_VDSOX32 := 1 endif -ifdef LIBBABELTRACE +ifndef NO_LIBBABELTRACE $(call feature_check,libbabeltrace) ifeq ($(feature-libbabeltrace), 1) CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 68cf1360a3f3..9fdefd748e2e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -77,7 +77,7 @@ include ../scripts/utilities.mak # # Define NO_ZLIB if you do not want to support compressed kernel modules # -# Define LIBBABELTRACE if you DO want libbabeltrace support +# Define NO_LIBBABELTRACE if you do not want libbabeltrace support # for CTF data format. # # Define NO_LZMA if you do not want to support compressed (xz) kernel modules diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index 9c917f80c906..05920e3edf7a 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#ifndef REMOTE_UNWIND_LIBUNWIND #include <errno.h> +#ifndef REMOTE_UNWIND_LIBUNWIND #include <libunwind.h> #include "perf_regs.h" #include "../../util/unwind.h" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 50385d89c497..65681a1a292a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -78,6 +78,7 @@ struct record { bool no_buildid_cache_set; bool buildid_all; bool timestamp_filename; + bool timestamp_boundary; struct switch_output switch_output; unsigned long long samples; }; @@ -409,8 +410,15 @@ static int process_sample_event(struct perf_tool *tool, { struct record *rec = container_of(tool, struct record, tool); - rec->samples++; + if (rec->evlist->first_sample_time == 0) + rec->evlist->first_sample_time = sample->time; + + rec->evlist->last_sample_time = sample->time; + if (rec->buildid_all) + return 0; + + rec->samples++; return build_id__mark_dso_hit(tool, event, sample, evsel, machine); } @@ -435,9 +443,11 @@ static int process_buildids(struct record *rec) /* * If --buildid-all is given, it marks all DSO regardless of hits, - * so no need to process samples. + * so no need to process samples. But if timestamp_boundary is enabled, + * it still needs to walk on all samples to get the timestamps of + * first/last samples. */ - if (rec->buildid_all) + if (rec->buildid_all && !rec->timestamp_boundary) rec->tool.sample = NULL; return perf_session__process_events(session); @@ -1621,6 +1631,8 @@ static struct option __record_options[] = { "Record build-id of all DSOs regardless of hits"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), + OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, + "Record timestamp boundary (time of first/last samples)"), OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, &record.switch_output.set, "signal,size,time", "Switch output when receive SIGUSR2 or cross size,time threshold", diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index eb9ce6327e71..dd4df9a5cd06 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -15,6 +15,7 @@ #include "util/color.h" #include <linux/list.h> #include <linux/rbtree.h> +#include <linux/err.h> #include "util/symbol.h" #include "util/callchain.h" #include "util/values.h" @@ -51,6 +52,9 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> +#include <linux/mman.h> + +#define PTIME_RANGE_MAX 10 struct report { struct perf_tool tool; @@ -60,6 +64,9 @@ struct report { bool show_threads; bool inverted_callchain; bool mem_mode; + bool stats_mode; + bool tasks_mode; + bool mmaps_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -69,7 +76,8 @@ struct report { const char *cpu_list; const char *symbol_filter_str; const char *time_str; - struct perf_time_interval ptime; + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + int range_num; float min_percent; u64 nr_entries; u64 queue_size; @@ -162,12 +170,28 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, struct hist_entry *he = iter->he; struct report *rep = arg; struct branch_info *bi; + struct perf_sample *sample = iter->sample; + struct perf_evsel *evsel = iter->evsel; + int err; + + if (!ui__has_annotation()) + return 0; + + hist__account_cycles(sample->branch_stack, al, sample, + rep->nonany_branch_mode); bi = he->branch_info; + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + if (err) + goto out; + + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + branch_type_count(&rep->brtype_stat, &bi->flags, bi->from.addr, bi->to.addr); - return 0; +out: + return err; } static int process_sample_event(struct perf_tool *tool, @@ -186,8 +210,10 @@ static int process_sample_event(struct perf_tool *tool, }; int ret = 0; - if (perf_time__skip_sample(&rep->ptime, sample->time)) + if (perf_time__ranges_skip_sample(rep->ptime_range, rep->range_num, + sample->time)) { return 0; + } if (machine__resolve(machine, &al, sample) < 0) { pr_debug("problem processing %d event, skipping it.\n", @@ -567,6 +593,174 @@ static void report__output_resort(struct report *rep) ui_progress__finish(); } +static void stats_setup(struct report *rep) +{ + memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.no_warn = true; +} + +static int stats_print(struct report *rep) +{ + struct perf_session *session = rep->session; + + perf_session__fprintf_nr_events(session, stdout); + return 0; +} + +static void tasks_setup(struct report *rep) +{ + memset(&rep->tool, 0, sizeof(rep->tool)); + if (rep->mmaps_mode) { + rep->tool.mmap = perf_event__process_mmap; + rep->tool.mmap2 = perf_event__process_mmap2; + } + rep->tool.comm = perf_event__process_comm; + rep->tool.exit = perf_event__process_exit; + rep->tool.fork = perf_event__process_fork; + rep->tool.no_warn = true; +} + +struct task { + struct thread *thread; + struct list_head list; + struct list_head children; +}; + +static struct task *tasks_list(struct task *task, struct machine *machine) +{ + struct thread *parent_thread, *thread = task->thread; + struct task *parent_task; + + /* Already listed. */ + if (!list_empty(&task->list)) + return NULL; + + /* Last one in the chain. */ + if (thread->ppid == -1) + return task; + + parent_thread = machine__find_thread(machine, -1, thread->ppid); + if (!parent_thread) + return ERR_PTR(-ENOENT); + |