diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 12:47:28 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 12:47:28 -0700 |
| commit | 12b7bcb43e6ea834ab2f5dc52d971e379a0ca109 (patch) | |
| tree | 65218ee4792a1bae88feb75e615c5ec4e602c7a2 /tools/perf/bench/numa.c | |
| parent | 00bcf5cdd6c0e2e92ce3dd852ca68a3b779fa4ec (diff) | |
| parent | 41aad2a6d4fcdda8d73c9739daf7a9f3f49499d6 (diff) | |
| download | linux-12b7bcb43e6ea834ab2f5dc52d971e379a0ca109.tar.gz linux-12b7bcb43e6ea834ab2f5dc52d971e379a0ca109.tar.bz2 linux-12b7bcb43e6ea834ab2f5dc52d971e379a0ca109.zip | |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"The main kernel side changes were:
- uprobes enhancements (Masami Hiramatsu)
- Uncore group events enhancements (David Carrillo-Cisneros)
- x86 Intel: Add support for Skylake server uncore PMUs (Kan Liang)
- x86 Intel: LBR cleanups and enhancements, for better branch
annotation tracking (Peter Zijlstra)
- x86 Intel: Add support for PTWRITE and power event tracing
(Alexander Shishkin)
- ... various fixes, cleanups and smaller enhancements.
Lots of tooling changes - a couple of highlights:
- Support event group view with hierarchy mode in 'perf top' and
'perf report' (Namhyung Kim)
e.g.:
$ perf record -e '{cycles,instructions}' make
$ perf report --hierarchy --stdio
...
# Overhead Command / Shared Object / Symbol
# ...................... ..................................
...
25.74% 27.18%sh
19.96% 24.14%libc-2.24.so
9.55% 14.64%[.] __strcmp_sse2
1.54% 0.00%[.] __tfind
1.07% 1.13%[.] _int_malloc
0.95% 0.00%[.] __strchr_sse2
0.89% 1.39%[.] __tsearch
0.76% 0.00%[.] strlen
- Add branch stack / basic block info to 'perf annotate --stdio',
where for each branch, we add an asm comment after the instruction
with information on how often it was taken and predicted. See
example with color output at:
http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png
(Peter Zijlstra)
- Add support for using symbols in address filters with Intel PT and
ARM CoreSight (hardware assisted tracing facilities) (Adrian
Hunter, Mathieu Poirier)
- Add support for interacting with Coresight PMU ETMs/PTMs, that are
IP blocks to perform hardware assisted tracing on a ARM CPU core
(Mathieu Poirier)
- Support generating cross arch probes, i.e. if you specify a vmlinux
file for different arch than the one in the host machine,
$ perf probe --definition function_name args
will generate the probe definition string needed to append to the
target machine /sys/kernel/debug/tracing/kprobes_events file, using
scripting (Masami Hiramatsu).
- Allow configuring the default 'perf report -s' sort order in
~/.perfconfig, for instance, "sym,dso" may be more fitting for
kernel developers. (Arnaldo Carvalho de Melo)
- ... plus lots of other changes, refactorings, features and fixes"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (149 commits)
perf tests: Add dwarf unwind test for powerpc
perf probe: Match linkage name with mangled name
perf probe: Fix to cut off incompatible chars from group name
perf probe: Skip if the function address is 0
perf probe: Ignore the error of finding inline instance
perf intel-pt: Fix decoding when there are address filters
perf intel-pt: Enable decoder to handle TIP.PGD with missing IP
perf intel-pt: Read address filter from AUXTRACE_INFO event
perf intel-pt: Record address filter in AUXTRACE_INFO event
perf intel-pt: Add a helper function for processing AUXTRACE_INFO
perf intel-pt: Fix missing error codes processing auxtrace_info
perf intel-pt: Add support for recording the max non-turbo ratio
perf intel-pt: Fix snapshot overlap detection decoder errors
perf probe: Increase debug level of SDT debug messages
perf record: Add support for using symbols in address filters
perf symbols: Add dso__last_symbol()
perf record: Fix error paths
perf record: Rename label 'out_symbol_exit'
perf script: Fix vanished idle symbols
perf evsel: Add support for address filters
...
Diffstat (limited to 'tools/perf/bench/numa.c')
| -rw-r--r-- | tools/perf/bench/numa.c | 53 |
1 files changed, 27 insertions, 26 deletions
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index f7f530081aa9..8efe904e486b 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -30,6 +30,7 @@ #include <sys/wait.h> #include <sys/prctl.h> #include <sys/types.h> +#include <linux/time64.h> #include <numa.h> #include <numaif.h> @@ -1004,7 +1005,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence) if (strong && process_groups == g->p.nr_proc) { if (!*convergence) { *convergence = runtime_ns_max; - tprintf(" (%6.1fs converged)\n", *convergence/1e9); + tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC); if (g->p.measure_convergence) { g->all_converged = true; g->stop_work = true; @@ -1012,7 +1013,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence) } } else { if (*convergence) { - tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9); + tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC); *convergence = 0; } tprintf("\n"); @@ -1022,7 +1023,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence) static void show_summary(double runtime_ns_max, int l, double *convergence) { tprintf("\r # %5.1f%% [%.1f mins]", - (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0); + (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0); calc_convergence(runtime_ns_max, convergence); @@ -1179,8 +1180,8 @@ static void *worker_thread(void *__tdata) if (details >= 3) { timersub(&stop, &start, &diff); - runtime_ns_max = diff.tv_sec * 1000000000; - runtime_ns_max += diff.tv_usec * 1000; + runtime_ns_max = diff.tv_sec * NSEC_PER_SEC; + runtime_ns_max += diff.tv_usec * NSEC_PER_USEC; if (details >= 0) { printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n", @@ -1192,23 +1193,23 @@ static void *worker_thread(void *__tdata) continue; timersub(&stop, &start0, &diff); - runtime_ns_max = diff.tv_sec * 1000000000ULL; - runtime_ns_max += diff.tv_usec * 1000ULL; + runtime_ns_max = diff.tv_sec * NSEC_PER_SEC; + runtime_ns_max += diff.tv_usec * NSEC_PER_USEC; show_summary(runtime_ns_max, l, &convergence); } gettimeofday(&stop, NULL); timersub(&stop, &start0, &diff); - td->runtime_ns = diff.tv_sec * 1000000000ULL; - td->runtime_ns += diff.tv_usec * 1000ULL; - td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9; + td->runtime_ns = diff.tv_sec * NSEC_PER_SEC; + td->runtime_ns += diff.tv_usec * NSEC_PER_USEC; + td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9; getrusage(RUSAGE_THREAD, &rusage); - td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL; - td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL; - td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL; - td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL; + td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC; + td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC; + td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC; + td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC; free_data(thread_data, g->p.bytes_thread); @@ -1469,7 +1470,7 @@ static int __bench_numa(const char *name) } /* Wait for all the threads to start up: */ while (g->nr_tasks_started != g->p.nr_tasks) - usleep(1000); + usleep(USEC_PER_MSEC); BUG_ON(g->nr_tasks_started != g->p.nr_tasks); @@ -1488,9 +1489,9 @@ static int __bench_numa(const char *name) timersub(&stop, &start, &diff); - startup_sec = diff.tv_sec * 1000000000.0; - startup_sec += diff.tv_usec * 1000.0; - startup_sec /= 1e9; + startup_sec = diff.tv_sec * NSEC_PER_SEC; + startup_sec += diff.tv_usec * NSEC_PER_USEC; + startup_sec /= NSEC_PER_SEC; tprintf(" threads initialized in %.6f seconds.\n", startup_sec); tprintf(" #\n"); @@ -1529,14 +1530,14 @@ static int __bench_numa(const char *name) tprintf("\n ###\n"); tprintf("\n"); - runtime_sec_max = diff.tv_sec * 1000000000.0; - runtime_sec_max += diff.tv_usec * 1000.0; - runtime_sec_max /= 1e9; + runtime_sec_max = diff.tv_sec * NSEC_PER_SEC; + runtime_sec_max += diff.tv_usec * NSEC_PER_USEC; + runtime_sec_max /= NSEC_PER_SEC; - runtime_sec_min = runtime_ns_min/1e9; + runtime_sec_min = runtime_ns_min / NSEC_PER_SEC; bytes = g->bytes_done; - runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9; + runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC; if (g->p.measure_convergence) { print_res(name, runtime_sec_max, @@ -1562,7 +1563,7 @@ static int __bench_numa(const char *name) print_res(name, bytes / 1e9, "GB,", "data-total", "GB data processed, total"); - print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks), + print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks), "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime"); print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max, @@ -1581,9 +1582,9 @@ static int __bench_numa(const char *name) snprintf(tname, 32, "process%d:thread%d", p, t); print_res(tname, td->speed_gbs, "GB/sec", "thread-speed", "GB/sec/thread speed"); - print_res(tname, td->system_time_ns / 1e9, + print_res(tname, td->system_time_ns / NSEC_PER_SEC, "secs", "thread-system-time", "system CPU time/thread"); - print_res(tname, td->user_time_ns / 1e9, + print_res(tname, td->user_time_ns / NSEC_PER_SEC, "secs", "thread-user-time", "user CPU time/thread"); } } |
