summaryrefslogtreecommitdiff
path: root/tools/perf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2020-04-04 10:35:15 +0200
committerIngo Molnar <mingo@kernel.org>2020-04-04 10:35:15 +0200
commit7dc41b9b99cd0037a418ac47e342d56a438df649 (patch)
treee257a65d57043746503d55e877697339e0eea80a /tools/perf
parent42595ce90b9d4a6b9d8c5a1ea78da4eeaf7e086a (diff)
parent9ff76cea4e9e6d49a6f764ae114fc0fb8de97816 (diff)
downloadlinux-7dc41b9b99cd0037a418ac47e342d56a438df649.tar.gz
linux-7dc41b9b99cd0037a418ac47e342d56a438df649.tar.bz2
linux-7dc41b9b99cd0037a418ac47e342d56a438df649.zip
Merge tag 'perf-urgent-for-mingo-5.7-20200403' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/urgent fixes and improvements from Arnaldo Carvalho de Melo: perf python: Arnaldo Carvalho de Melo: - Fix clang detection to strip out options passed in $CC. build: He Zhe: - Normalize gcc parameter when generating arch errno table, fixing the build by removing options from $(CC). Sam Lunt: - Support Python 3.8+ in Makefile. perf report/top: Arnaldo Carvalho de Melo: - Fix title line formatting. perf script: Andreas Gerstmayr: - Fix SEGFAULT when using DWARF mode. - Fix invalid read of directory entry after closedir(), found with valgrind. Hagen Paul Pfeifer: - Introduce --deltatime option. Stephane Eranian: - Allow --symbol to accept hexadecimal addresses. Ian Rogers: - Add -S/--symbols documentation Namhyung Kim: - Add --show-cgroup-events option. perf python: Arnaldo Carvalho de Melo: - Include rwsem.c in the python binding, needed by the cgroups improvements. build-test: Arnaldo Carvalho de Melo: - Honour JOBS to override detection of number of cores perf top: Jin Yao: - Support --group-sort-idx to change the sort order - perf top: Support hotkey to change sort order perf pmu-events x86: Jin Yao: - Use CPU_CLK_UNHALTED.THREAD in Kernel_Utilization metric perf symbols arm64: Kemeng Shi: - Fix arm64 gap between kernel start and module end kernel perf subsystem: Namhyung Kim: - Add PERF_RECORD_CGROUP event and Add PERF_SAMPLE_CGROUP feature, to allow cgroup tracking, saving a link between cgroup path and its id number. perf cgroup: Namhyung Kim: - Maintain cgroup hierarchy. perf report: Namhyung Kim: - Add 'cgroup' sort key. perf record: Namhyung Kim: - Support synthesizing cgroup events for pre-existing cgroups. - Add --all-cgroups option Documentation: Tony Jones: - Update docs regarding kernel/user space unwinding. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-config.txt14
-rw-r--r--tools/perf/Documentation/perf-record.txt23
-rw-r--r--tools/perf/Documentation/perf-report.txt6
-rw-r--r--tools/perf/Documentation/perf-script.txt14
-rw-r--r--tools/perf/Documentation/perf-top.txt9
-rw-r--r--tools/perf/Makefile.config15
-rw-r--r--tools/perf/Makefile.perf11
-rw-r--r--tools/perf/arch/arm64/util/Build2
-rw-r--r--tools/perf/arch/arm64/util/machine.c27
-rw-r--r--tools/perf/arch/arm64/util/sym-handling.c19
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c10
-rw-r--r--tools/perf/builtin-diff.c1
-rw-r--r--tools/perf/builtin-record.c16
-rw-r--r--tools/perf/builtin-report.c17
-rw-r--r--tools/perf/builtin-script.c60
-rw-r--r--tools/perf/builtin-top.c30
-rw-r--r--tools/perf/pmu-events/arch/test/test_cpu/branch.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/branch.json)0
-rw-r--r--tools/perf/pmu-events/arch/test/test_cpu/other.json26
-rw-r--r--tools/perf/pmu-events/arch/test/test_cpu/uncore.json21
-rw-r--r--tools/perf/pmu-events/arch/x86/amdfam17h/cache.json329
-rw-r--r--tools/perf/pmu-events/arch/x86/amdfam17h/other.json65
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/branch.json23
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/cache.json294
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/core.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/core.json)15
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json)64
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/memory.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/memory.json)82
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/other.json56
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/branch.json52
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/cache.json338
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/core.json130
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json140
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/memory.json341
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/other.json115
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv3
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json2
-rw-r--r--tools/perf/pmu-events/jevents.c30
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/builtin-test.c4
-rw-r--r--tools/perf/tests/make10
-rw-r--r--tools/perf/tests/pmu-events.c379
-rw-r--r--tools/perf/tests/sample-parsing.c6
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/ui/browsers/hists.c126
-rw-r--r--tools/perf/ui/hist.c93
-rw-r--r--tools/perf/ui/keysyms.h1
-rw-r--r--tools/perf/util/annotate.h1
-rw-r--r--tools/perf/util/cgroup.c80
-rw-r--r--tools/perf/util/cgroup.h17
-rw-r--r--tools/perf/util/cpumap.c10
-rw-r--r--tools/perf/util/dsos.c22
-rw-r--r--tools/perf/util/env.c2
-rw-r--r--tools/perf/util/env.h6
-rw-r--r--tools/perf/util/event.c39
-rw-r--r--tools/perf/util/event.h6
-rw-r--r--tools/perf/util/evsel.c18
-rw-r--r--tools/perf/util/evsel.h1
-rw-r--r--tools/perf/util/hist.c13
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/machine.c19
-rw-r--r--tools/perf/util/machine.h3
-rw-r--r--tools/perf/util/metricgroup.c49
-rw-r--r--tools/perf/util/parse-events.c6
-rw-r--r--tools/perf/util/parse-events.l12
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c2
-rw-r--r--tools/perf/util/pmu.c39
-rw-r--r--tools/perf/util/pmu.h5
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/record.h1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c3
-rw-r--r--tools/perf/util/session.c4
-rw-r--r--tools/perf/util/setup.py2
-rw-r--r--tools/perf/util/sort.c43
-rw-r--r--tools/perf/util/sort.h2
-rw-r--r--tools/perf/util/stat-display.c6
-rw-r--r--tools/perf/util/symbol-elf.c10
-rw-r--r--tools/perf/util/symbol_conf.h1
-rw-r--r--tools/perf/util/synthetic-events.c130
-rw-r--r--tools/perf/util/synthetic-events.h1
-rw-r--r--tools/perf/util/tool.h2
91 files changed, 2987 insertions, 615 deletions
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 8ead55593984..f16d8a71d3f5 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -405,14 +405,16 @@ ui.*::
This option is only applied to TUI.
call-graph.*::
- When sub-commands 'top' and 'report' work with -g/—-children
- there're options in control of call-graph.
+ The following controls the handling of call-graphs (obtained via the
+ -g/--call-graph options).
call-graph.record-mode::
- The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
- The value of 'dwarf' is effective only if perf detect needed library
- (libunwind or a recent version of libdw).
- 'lbr' only work for cpus that support it.
+ The mode for user space can be 'fp' (frame pointer), 'dwarf'
+ and 'lbr'. The value 'dwarf' is effective only if libunwind
+ (or a recent version of libdw) is present on the system;
+ the value 'lbr' only works for certain cpus. The method for
+ kernel space is controlled not by this option but by the
+ kernel config (CONFIG_UNWINDER_*).
call-graph.dump-size::
The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 7f4db7592467..b3f3b3f1c161 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -237,16 +237,22 @@ OPTIONS
option and remains only for backward compatibility. See --event.
-g::
- Enables call-graph (stack chain/backtrace) recording.
+ Enables call-graph (stack chain/backtrace) recording for both
+ kernel space and user space.
--call-graph::
Setup and enable call-graph (stack chain/backtrace) recording,
- implies -g. Default is "fp".
+ implies -g. Default is "fp" (for user space).
- Allows specifying "fp" (frame pointer) or "dwarf"
- (DWARF's CFI - Call Frame Information) or "lbr"
- (Hardware Last Branch Record facility) as the method to collect
- the information used to show the call graphs.
+ The unwinding method used for kernel space is dependent on the
+ unwinder used by the active kernel configuration, i.e
+ CONFIG_UNWINDER_FRAME_POINTER (fp) or CONFIG_UNWINDER_ORC (orc)
+
+ Any option specified here controls the method used for user space.
+
+ Valid options are "fp" (frame pointer), "dwarf" (DWARF's CFI -
+ Call Frame Information) or "lbr" (Hardware Last Branch Record
+ facility).
In some systems, where binaries are build with gcc
--fomit-frame-pointer, using the "fp" method will produce bogus
@@ -385,7 +391,10 @@ displayed with the weight and local_weight sort keys. This currently works for
abort events and some memory events in precise mode on modern Intel CPUs.
--namespaces::
-Record events of type PERF_RECORD_NAMESPACES.
+Record events of type PERF_RECORD_NAMESPACES. This enables 'cgroup_id' sort key.
+
+--all-cgroups::
+Record events of type PERF_RECORD_CGROUP. This enables 'cgroup' sort key.
--transaction::
Record transaction flags for transaction related events.
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index bd0a029d4c08..f569b9ea4002 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -95,6 +95,7 @@ OPTIONS
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- cgroup_id: ID derived from cgroup namespace device and inode numbers.
+ - cgroup: cgroup pathname in the cgroupfs.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
@@ -377,6 +378,11 @@ OPTIONS
Show event group information together. It forces group output also
if there are no groups defined in data file.
+--group-sort-idx::
+ Sort the output by the event at the index n in group. If n is invalid,
+ sort by the first event. It can support multiple groups with different
+ amount of events. WARNING: This should be used on grouped events.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index db6a36aac47e..963487e82edc 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -319,6 +319,9 @@ OPTIONS
--show-bpf-events
Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT.
+--show-cgroup-events
+ Display cgroup events i.e. events of type PERF_RECORD_CGROUP.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
@@ -390,6 +393,9 @@ include::itrace.txt[]
--reltime::
Print time stamps relative to trace start.
+--deltatime::
+ Print time stamps relative to previous event.
+
--per-event-dump::
Create per event files with a "perf.data.EVENT.dump" name instead of
printing to stdout, useful, for instance, for generating flamegraphs.
@@ -406,6 +412,14 @@ include::itrace.txt[]
--xed::
Run xed disassembler on output. Requires installing the xed disassembler.
+-S::
+--symbols=symbol[,symbol...]::
+ Only consider the listed symbols. Symbols are typically a name
+ but they may also be hexadecimal address.
+
+ For example, to select the symbol noploop or the address 0x4007a0:
+ perf script --symbols=noploop,0x4007a0
+
--call-trace::
Show call stream for intel_pt traces. The CPUs are interleaved, but
can be filtered with -C.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 324b6b53c86b..487737a725e9 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -53,6 +53,11 @@ Default is to monitor all CPUS.
--group::
Put the counters into a counter group.
+--group-sort-idx::
+ Sort the output by the event at the index n in group. If n is invalid,
+ sort by the first event. It can support multiple groups with different
+ amount of events. WARNING: This should be used on grouped events.
+
-F <freq>::
--freq=<freq>::
Profile at this frequency. Use 'max' to use the currently maximum
@@ -272,6 +277,10 @@ Default is to monitor all CPUS.
Record events of type PERF_RECORD_NAMESPACES and display it with the
'cgroup_id' sort key.
+--all-cgroups::
+ Record events of type PERF_RECORD_CGROUP and display it with the
+ 'cgroup' sort key.
+
--switch-on EVENT_NAME::
Only consider events after this event is found.
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 80e55e796be9..12a8204d63c6 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -228,8 +228,17 @@ strip-libs = $(filter-out -l%,$(1))
PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+# Python 3.8 changed the output of `python-config --ldflags` to not include the
+# '-lpythonX.Y' flag unless '--embed' is also passed. The feature check for
+# libpython fails if that flag is not included in LDFLAGS
+ifeq ($(shell $(PYTHON_CONFIG_SQ) --ldflags --embed 2>&1 1>/dev/null; echo $$?), 0)
+ PYTHON_CONFIG_LDFLAGS := --ldflags --embed
+else
+ PYTHON_CONFIG_LDFLAGS := --ldflags
+endif
+
ifdef PYTHON_CONFIG
- PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+ PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null)
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
@@ -348,6 +357,10 @@ ifeq ($(feature-gettid), 1)
CFLAGS += -DHAVE_GETTID
endif
+ifeq ($(feature-file-handle), 1)
+ CFLAGS += -DHAVE_FILE_HANDLE
+endif
+
ifdef NO_LIBELF
NO_DWARF := 1
NO_DEMANGLE := 1
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 3eda9d4b88e7..d15a311408f1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -231,6 +231,7 @@ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
BPF_DIR = $(srctree)/tools/lib/bpf/
SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
LIBPERF_DIR = $(srctree)/tools/lib/perf/
+DOC_DIR = $(srctree)/tools/perf/Documentation/
# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
# Without this setting the output feature dump file misses some features, for
@@ -573,7 +574,7 @@ arch_errno_hdr_dir := $(srctree)/tools
arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
$(arch_errno_name_array): $(arch_errno_tbl)
- $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@
+ $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@
sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
@@ -792,7 +793,6 @@ $(LIBSUBCMD): FORCE
$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
$(LI