summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-10-26 09:22:45 +0200
committerIngo Molnar <mingo@kernel.org>2018-10-26 09:22:45 +0200
commitefe8eaf7b525f1be26fe20d723d2bfbfcd7455fd (patch)
tree79b4182adcaf4506780194347dbad3656f2a08ca /tools
parent034bda1cd5abbe7b170ce76b618768d164030bbd (diff)
parentfe57120e18a1f9124ca758c89cc54f91333d1847 (diff)
downloadlinux-efe8eaf7b525f1be26fe20d723d2bfbfcd7455fd.tar.gz
linux-efe8eaf7b525f1be26fe20d723d2bfbfcd7455fd.tar.bz2
linux-efe8eaf7b525f1be26fe20d723d2bfbfcd7455fd.zip
Merge tag 'perf-core-for-mingo-4.20-20181025' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Introduce 'perf trace --max-events' for stopping 'perf trace' when that many syscalls (enter+exit), tracepoints or other events such as page faults take place. Support that as well on a per-event basis, e.g.: perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/ Will stop when 2 context switches, 4 block plugs, 1 block unplug and 3 net_dev_queue tracepoints take place. (Arnaldo Carvalho de Melo) - Poll for monitored tasks being alive in 'perf stat -p/-t', exiting when those tasks all terminate (Jiri Olsa) - Encode -k clockid frequency into perf.data to enable timestamps derived metrics conversion into wall clock time on reporting stage. (Alexey Budankov) - Improve Intel PT call graph from SQL database and GUI python scripts, including adopting the Qt MDI interface to allow for multiple subwindows for all the tables, helping in better visualizing the data in the SQL tables, also uses, when available, the Intel XED disassembler libraries to present the Intel PT data as x86 asm mnemonics. This last feature is not currently working in some cases, fix is being discussed (Adrian Hunter) - Implement a ftrace function_graph view in 'perf script' when processing hardware trace data such as Intel PT (Andi Kleen) - Better integration with the Intel XED disassembler, when available, in 'perf script' (Andi Kleen) - Some 'perf trace' drop refcount fixes (Arnaldo Carvalho de Melo) - Add Sparc support to 'perf annotate', jitdump (David Miller) - Fix PLT symbols entry/header sizes properly on Sparc (David Miller) - Fix generation of system call table failure with /tmp mounted with 'noexec' in arm64 (Hongxu Jia) - Allow extended console debug output in 'perf script' (Milian Wolff) - Flush output stream after events in 'perf script' verbose mode (Milian Wolff) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/lib/subcmd/parse-options.c19
-rw-r--r--tools/lib/subcmd/parse-options.h2
-rw-r--r--tools/perf/Documentation/build-xed.txt19
-rw-r--r--tools/perf/Documentation/intel-pt.txt2
-rw-r--r--tools/perf/Documentation/itrace.txt7
-rw-r--r--tools/perf/Documentation/perf-script.txt18
-rw-r--r--tools/perf/Documentation/perf-trace.txt67
-rwxr-xr-xtools/perf/arch/arm64/entry/syscalls/mksyscalltbl2
-rw-r--r--tools/perf/arch/sparc/Makefile2
-rw-r--r--tools/perf/arch/sparc/annotate/instructions.c169
-rw-r--r--tools/perf/builtin-record.c24
-rw-r--r--tools/perf/builtin-script.c166
-rw-r--r--tools/perf/builtin-stat.c24
-rw-r--r--tools/perf/builtin-trace.c50
-rw-r--r--tools/perf/perf.h1
-rw-r--r--tools/perf/scripts/python/call-graph-from-sql.py339
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py2
-rw-r--r--tools/perf/scripts/python/export-to-sqlite.py2
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py2128
-rw-r--r--tools/perf/util/annotate.c8
-rw-r--r--tools/perf/util/auxtrace.c17
-rw-r--r--tools/perf/util/auxtrace.h5
-rw-r--r--tools/perf/util/cs-etm.c3
-rw-r--r--tools/perf/util/env.h1
-rw-r--r--tools/perf/util/evlist.c2
-rw-r--r--tools/perf/util/evsel.c27
-rw-r--r--tools/perf/util/evsel.h5
-rw-r--r--tools/perf/util/genelf.h6
-rw-r--r--tools/perf/util/header.c23
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/intel-bts.c3
-rw-r--r--tools/perf/util/intel-pt.c3
-rw-r--r--tools/perf/util/parse-events.c8
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/parse-events.l1
-rw-r--r--tools/perf/util/symbol-elf.c12
-rw-r--r--tools/perf/util/symbol.h3
-rw-r--r--tools/perf/util/thread.h2
38 files changed, 2778 insertions, 396 deletions
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index cb7154eccbdc..dbb9efbf718a 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p,
case OPTION_INTEGER:
case OPTION_UINTEGER:
case OPTION_LONG:
+ case OPTION_ULONG:
case OPTION_U64:
default:
break;
@@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p,
case OPTION_INTEGER:
case OPTION_UINTEGER:
case OPTION_LONG:
+ case OPTION_ULONG:
case OPTION_U64:
default:
break;
@@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p,
return opterror(opt, "expects a numerical value", flags);
return 0;
+ case OPTION_ULONG:
+ if (unset) {
+ *(unsigned long *)opt->value = 0;
+ return 0;
+ }
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+ *(unsigned long *)opt->value = opt->defval;
+ return 0;
+ }
+ if (get_arg(p, opt, flags, &arg))
+ return -1;
+ *(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
+ if (*s)
+ return opterror(opt, "expects a numerical value", flags);
+ return 0;
+
case OPTION_U64:
if (unset) {
*(u64 *)opt->value = 0;
@@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full)
case OPTION_ARGUMENT:
break;
case OPTION_LONG:
+ case OPTION_ULONG:
case OPTION_U64:
case OPTION_INTEGER:
case OPTION_UINTEGER:
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 92fdbe1519f6..6ca2a8bfe716 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -25,6 +25,7 @@ enum parse_opt_type {
OPTION_STRING,
OPTION_INTEGER,
OPTION_LONG,
+ OPTION_ULONG,
OPTION_CALLBACK,
OPTION_U64,
OPTION_UINTEGER,
@@ -133,6 +134,7 @@ struct option {
#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
+#define OPT_ULONG(s, l, v, h) { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
diff --git a/tools/perf/Documentation/build-xed.txt b/tools/perf/Documentation/build-xed.txt
new file mode 100644
index 000000000000..6222c1e7231f
--- /dev/null
+++ b/tools/perf/Documentation/build-xed.txt
@@ -0,0 +1,19 @@
+
+For --xed the xed tool is needed. Here is how to install it:
+
+ $ git clone https://github.com/intelxed/mbuild.git mbuild
+ $ git clone https://github.com/intelxed/xed
+ $ cd xed
+ $ ./mfile.py --share
+ $ ./mfile.py examples
+ $ sudo ./mfile.py --prefix=/usr/local install
+ $ sudo ldconfig
+ $ sudo cp obj/examples/xed /usr/local/bin
+
+Basic xed testing:
+
+ $ xed | head -3
+ ERROR: required argument(s) were missing
+ Copyright (C) 2017, Intel Corporation. All rights reserved.
+ XED version: [v10.0-328-g7d62c8c49b7b]
+ $
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 76971d2e4164..115eaacc455f 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -106,7 +106,7 @@ in transaction, respectively.
While it is possible to create scripts to analyze the data, an alternative
approach is available to export the data to a sqlite or postgresql database.
Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
-and to script call-graph-from-sql.py for an example of using the database.
+and to script exported-sql-viewer.py for an example of using the database.
There is also script intel-pt-events.py which provides an example of how to
unpack the raw data for power events and PTWRITE.
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index a3abe04c779d..c2182cbabde3 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -11,10 +11,11 @@
l synthesize last branch entries (use with i or x)
s skip initial number of events
- The default is all events i.e. the same as --itrace=ibxwpe
+ The default is all events i.e. the same as --itrace=ibxwpe,
+ except for perf script where it is --itrace=ce
- In addition, the period (default 100000) for instructions events
- can be specified in units of:
+ In addition, the period (default 100000, except for perf script where it is 1)
+ for instructions events can be specified in units of:
i instructions
t ticks
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index afdafe2110a1..a2b37ce48094 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -383,6 +383,24 @@ include::itrace.txt[]
will be printed. Each entry has function name and file/line. Enabled by
default, disable with --no-inline.
+--insn-trace::
+ Show instruction stream for intel_pt traces. Combine with --xed to
+ show disassembly.
+
+--xed::
+ Run xed disassembler on output. Requires installing the xed disassembler.
+
+--call-trace::
+ Show call stream for intel_pt traces. The CPUs are interleaved, but
+ can be filtered with -C.
+
+--call-ret-trace::
+ Show call and return stream for intel_pt traces.
+
+--graph-function::
+ For itrace only show specified functions and their callees for
+ itrace. Multiple functions can be separated by comma.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 115db9e06ecd..e113450503d2 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
--kernel-syscall-graph::
Show the kernel callchains on the syscall exit path.
+--max-events=N::
+ Stop after processing N events. Note that strace-like events are considered
+ only at exit time or when a syscall is interrupted, i.e. in those cases this
+ option is equivalent to the number of lines printed.
+
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. Note that at this point
@@ -238,6 +243,68 @@ Trace syscalls, major and minor pagefaults:
As you can see, there was major pagefault in python process, from
CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
+Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):
+
+ $ perf trace -e open* --max-events 4
+ [root@jouet perf]# trace -e open* --max-events 4
+ 2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
+ 2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+ 3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+ 4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
+ $
+
+Trace the first minor page fault when running a workload:
+
+ # perf trace -F min --max-stack=7 --max-events 1 sleep 1
+ 0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
+ __clear_user ([kernel.kallsyms])
+ load_elf_binary ([kernel.kallsyms])
+ search_binary_handler ([kernel.kallsyms])
+ __do_execve_file.isra.33 ([kernel.kallsyms])
+ __x64_sys_execve ([kernel.kallsyms])
+ do_syscall_64 ([kernel.kallsyms])
+ entry_SYSCALL_64 ([kernel.kallsyms])
+ #
+
+Trace the next min page page fault to take place on the first CPU:
+
+ # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
+ 0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
+ js::gc::FreeSpan::initAsEmpty (inlined)
+ js::gc::Arena::setAsNotAllocated (inlined)
+ js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
+ js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
+ js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
+ js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
+ js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
+ js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+ js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
+ JSThinInlineString::new_<(js::AllowGC)1> (inlined)
+ AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
+ js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+ [0x18b26e6bc2bd] (/tmp/perf-17136.map)
+ #
+
+Trace the next two sched:sched_switch events, four block:*_plug events, the
+next block:*_unplug and the next three net:*dev_queue events, this last one
+with a backtrace of at most 16 entries, system wide:
+
+ # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
+ 0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120]
+ 0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120]
+ 254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66
+ __dev_queue_xmit ([kernel.kallsyms])
+ 273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78
+ __dev_queue_xmit ([kernel.kallsyms])
+ 274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78
+ __dev_queue_xmit ([kernel.kallsyms])
+ 2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58]
+ 2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1
+ 4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
+ 8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+ 8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+ #
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
index 2dbb8cade048..c88fd32563eb 100755
--- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -23,7 +23,7 @@ create_table_from_c()
{
local sc nr last_sc
- create_table_exe=`mktemp /tmp/create-table-XXXXXX`
+ create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
{
diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
index 7fbca175099e..275dea7ff59a 100644
--- a/tools/perf/arch/sparc/Makefile
+++ b/tools/perf/arch/sparc/Makefile
@@ -1,3 +1,5 @@
ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
endif
+
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c
new file mode 100644
index 000000000000..2614c010c235
--- /dev/null
+++ b/tools/perf/arch/sparc/annotate/instructions.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+static int is_branch_cond(const char *cond)
+{
+ if (cond[0] == '\0')
+ return 1;
+
+ if (cond[0] == 'a' && cond[1] == '\0')
+ return 1;
+
+ if (cond[0] == 'c' &&
+ (cond[1] == 'c' || cond[1] == 's') &&
+ cond[2] == '\0')
+ return 1;
+
+ if (cond[0] == 'e' &&
+ (cond[1] == '\0' ||
+ (cond[1] == 'q' && cond[2] == '\0')))
+ return 1;
+
+ if (cond[0] == 'g' &&
+ (cond[1] == '\0' ||
+ (cond[1] == 't' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+ return 1;
+
+ if (cond[0] == 'l' &&
+ (cond[1] == '\0' ||
+ (cond[1] == 't' && cond[2] == '\0') ||
+ (cond[1] == 'u' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+ return 1;
+
+ if (cond[0] == 'n' &&
+ (cond[1] == '\0' ||
+ (cond[1] == 'e' && cond[2] == '\0') ||
+ (cond[1] == 'z' && cond[2] == '\0') ||
+ (cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0')))
+ return 1;
+
+ if (cond[0] == 'b' &&
+ cond[1] == 'p' &&
+ cond[2] == 'o' &&
+ cond[3] == 's' &&
+ cond[4] == '\0')
+ return 1;
+
+ if (cond[0] == 'v' &&
+ (cond[1] == 'c' || cond[1] == 's') &&
+ cond[2] == '\0')
+ return 1;
+
+ if (cond[0] == 'b' &&
+ cond[1] == 'z' &&
+ cond[2] == '\0')
+ return 1;
+
+ return 0;
+}
+
+static int is_branch_reg_cond(const char *cond)
+{
+ if ((cond[0] == 'n' || cond[0] == 'l') &&
+ cond[1] == 'z' &&
+ cond[2] == '\0')
+ return 1;
+
+ if (cond[0] == 'z' &&
+ cond[1] == '\0')
+ return 1;
+
+ if ((cond[0] == 'g' || cond[0] == 'l') &&
+ cond[1] == 'e' &&
+ cond[2] == 'z' &&
+ cond[3] == '\0')
+ return 1;
+
+ if (cond[0] == 'g' &&
+ cond[1] == 'z' &&
+ cond[2] == '\0')
+ return 1;
+
+ return 0;
+}
+
+static int is_branch_float_cond(const char *cond)
+{
+ if (cond[0] == '\0')
+ return 1;
+
+ if ((cond[0] == 'a' || cond[0] == 'e' ||
+ cond[0] == 'z' || cond[0] == 'g' ||
+ cond[0] == 'l' || cond[0] == 'n' ||
+ cond[0] == 'o' || cond[0] == 'u') &&
+ cond[1] == '\0')
+ return 1;
+
+ if (((cond[0] == 'g' && cond[1] == 'e') ||
+ (cond[0] == 'l' && (cond[1] == 'e' ||
+ cond[1] == 'g')) ||
+ (cond[0] == 'n' && (cond[1] == 'e' ||
+ cond[1] == 'z')) ||
+ (cond[0] == 'u' && (cond[1] == 'e' ||
+ cond[1] == 'g' ||
+ cond[1] == 'l'))) &&
+ cond[2] == '\0')
+ return 1;
+
+ if (cond[0] == 'u' &&
+ (cond[1] == 'g' || cond[1] == 'l') &&
+ cond[2] == 'e' &&
+ cond[3] == '\0')
+ return 1;
+
+ return 0;
+}
+
+static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name)
+{
+ struct ins_ops *ops = NULL;
+
+ if (!strcmp(name, "call") ||
+ !strcmp(name, "jmp") ||
+ !strcmp(name, "jmpl")) {
+ ops = &call_ops;
+ } else if (!strcmp(name, "ret") ||
+ !strcmp(name, "retl") ||
+ !strcmp(name, "return")) {
+ ops = &ret_ops;
+ } else if (!strcmp(name, "mov")) {
+ ops = &mov_ops;
+ } else {
+ if (name[0] == 'c' &&
+ (name[1] == 'w' || name[1] == 'x'))
+ name += 2;
+
+ if (name[0] == 'b') {
+ const char *cond = name + 1;
+
+ if (cond[0] == 'r') {
+ if (is_branch_reg_cond(cond + 1))
+ ops = &jump_ops;
+ } else if (is_branch_cond(cond)) {
+ ops = &jump_ops;
+ }
+ } else if (name[0] == 'f' && name[1] == 'b') {
+ if (is_branch_float_cond(name + 2))
+ ops = &jump_ops;
+ }
+ }
+
+ if (ops)
+ arch__associate_ins_ops(arch, name, ops);
+
+ return ops;
+}
+
+static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+ if (!arch->initialized) {
+ arch->initialized = true;
+ arch->associate_instruction_ops = sparc__associate_instruction_ops;
+ arch->objdump.comment_char = '#';
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0980dfe3396b..10cf889c6d75 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -592,6 +592,9 @@ static void record__init_features(struct record *rec)
if (!rec->opts.full_auxtrace)
perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+ if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
+ perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+
perf_header__clear_feat(&session->header, HEADER_STAT);
}
@@ -897,6 +900,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__init_features(rec);
+ if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+ session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+
if (forks) {
err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
argv, data->is_pipe,
@@ -1337,6 +1343,19 @@ static const struct clockid_map clockids[] = {
CLOCKID_END,
};
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+ struct timespec res;
+
+ *res_ns = 0;
+ if (!clock_getres(clk_id, &res))
+ *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+ else
+ pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+ return 0;
+}
+
static int parse_clockid(const struct option *opt, const char *str, int unset)
{
struct record_opts *opts = (struct record_opts *)opt->value;
@@ -1360,7 +1379,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
/* if its a number, we're done */
if (sscanf(str, "%d", &opts->clockid) == 1)
- return 0;
+ return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
/* allow a "CLOCK_" prefix to the name */
if (!strncasecmp(str, "CLOCK_", 6))
@@ -1369,7 +1388,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
for (cm = clockids; cm->name; cm++) {
if (!strcasecmp(str, cm->name)) {
opts->clockid = cm->clockid;
- return 0;
+ return get_clockid_res(opts->clockid,
+ &opts->clockid_res_ns);
}
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 4da5e32b9e03..b5bc85bd0bbe 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -44,6 +44,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
+#include <subcmd/pager.h>
#include "sane_ctype.h"
@@ -912,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
struct perf_insn *x, u8 *inbuf, int len,
- int insn, FILE *fp)
+ int insn, FILE *fp, int *total_cycles)
{
int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
dump_insn(x, ip, inbuf, len, NULL),
@@ -921,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
en->flags.in_tx ? " INTX" : "",
en->flags.abort ? " ABORT" : "");
if (en->flags.cycles) {
- printed += fprintf(fp, " %d cycles", en->flags.cycles);
+ *total_cycles += en->flags.cycles;
+ printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles);
if (insn)
printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
}
@@ -978,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
u8 buffer[MAXBB];
unsigned off;
struct symbol *lastsym = NULL;
+ int total_cycles = 0;
if (!(br && br->nr))
return 0;
@@ -998,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
x.cpumode, x.cpu, &lastsym, attr, fp);
printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
- &x, buffer, len, 0, fp);
+ &x, buffer, len, 0, fp, &total_cycles);
}
/* Print all blocks */
@@ -1026,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
- printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
+ printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
+ &total_cycles);
break;
} else {
printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
@@ -1104,6 +1108,35 @@ out:
return printed;
}
+static const char *resolve_branch_sym(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct thread *thread,
+ struct addr_location *al,
+ u64 *ip)
+{
+ struct addr_location addr_al;
+ struct perf_event_attr *attr = &evsel->attr;
+ const char *name = NULL;
+
+ if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+ if (sample_addr_correlates_sym(attr)) {
+ thread__resolve(thread, &addr_al, sample);
+ if (addr_al.sym)
+ name = addr_al.sym->name;
+ else
+ *ip = sample->addr;
+ } else {
+ *ip = sample->addr;
+ }
+ } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+ if (al->sym)
+ name = al->sym->name;
+ else
+ *ip = sample->ip;
+ }
+ return name;
+}
+
static int perf_sample__fprintf_callindent(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
@@ -1111,7 +1144,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
{
struct perf_event_attr *attr = &evsel->attr;
size_t depth = thread_stack__depth(thread);
- struct addr_location addr_al;
const char *name = NULL;
static int spacing;
int len = 0;
@@ -1125,22 +1157,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
depth += 1;
- if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
- if (sample_addr_correlates_sym(attr)) {
- thread__resolve(thread, &addr_al, sample);
- if (addr_al.sym)
- name = addr_al.sym->name;
- else
- ip = sample->addr;
- } else {
- ip = sample->addr;
- }
- } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
- if (al->sym)
- name = al->sym->name;
- else
- ip = sample->ip;
- }
+ name = resolve_branch_sym(sample, evsel, thread, al, &ip);
if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
dlen += fprintf(fp, "(");
@@ -1646,6 +1663,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
}
}
+