Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar: "The main kernel side changes were: - uprobes enhancements (Masami Hiramatsu) - Uncore group events enhancements (David Carrillo-Cisneros) - x86 Intel: Add support for Skylake server uncore PMUs (Kan Liang) - x86 Intel: LBR cleanups and enhancements, for better branch annotation tracking (Peter Zijlstra) - x86 Intel: Add support for PTWRITE and power event tracing (Alexander Shishkin) - ... various fixes, cleanups and smaller enhancements. Lots of tooling changes - a couple of highlights: - Support event group view with hierarchy mode in 'perf top' and 'perf report' (Namhyung Kim) e.g.: $ perf record -e '{cycles,instructions}' make $ perf report --hierarchy --stdio ... # Overhead Command / Shared Object / Symbol # ...................... .................................. ... 25.74% 27.18%sh 19.96% 24.14%libc-2.24.so 9.55% 14.64%[.] __strcmp_sse2 1.54% 0.00%[.] __tfind 1.07% 1.13%[.] _int_malloc 0.95% 0.00%[.] __strchr_sse2 0.89% 1.39%[.] __tsearch 0.76% 0.00%[.] strlen - Add branch stack / basic block info to 'perf annotate --stdio', where for each branch, we add an asm comment after the instruction with information on how often it was taken and predicted. See example with color output at: http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png (Peter Zijlstra) - Add support for using symbols in address filters with Intel PT and ARM CoreSight (hardware assisted tracing facilities) (Adrian Hunter, Mathieu Poirier) - Add support for interacting with Coresight PMU ETMs/PTMs, that are IP blocks to perform hardware assisted tracing on a ARM CPU core (Mathieu Poirier) - Support generating cross arch probes, i.e. if you specify a vmlinux file for different arch than the one in the host machine, $ perf probe --definition function_name args will generate the probe definition string needed to append to the target machine /sys/kernel/debug/tracing/kprobes_events file, using scripting (Masami Hiramatsu). - Allow configuring the default 'perf report -s' sort order in ~/.perfconfig, for instance, "sym,dso" may be more fitting for kernel developers. (Arnaldo Carvalho de Melo) - ... plus lots of other changes, refactorings, features and fixes" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (149 commits) perf tests: Add dwarf unwind test for powerpc perf probe: Match linkage name with mangled name perf probe: Fix to cut off incompatible chars from group name perf probe: Skip if the function address is 0 perf probe: Ignore the error of finding inline instance perf intel-pt: Fix decoding when there are address filters perf intel-pt: Enable decoder to handle TIP.PGD with missing IP perf intel-pt: Read address filter from AUXTRACE_INFO event perf intel-pt: Record address filter in AUXTRACE_INFO event perf intel-pt: Add a helper function for processing AUXTRACE_INFO perf intel-pt: Fix missing error codes processing auxtrace_info perf intel-pt: Add support for recording the max non-turbo ratio perf intel-pt: Fix snapshot overlap detection decoder errors perf probe: Increase debug level of SDT debug messages perf record: Add support for using symbols in address filters perf symbols: Add dso__last_symbol() perf record: Fix error paths perf record: Rename label 'out_symbol_exit' perf script: Fix vanished idle symbols perf evsel: Add support for address filters ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-10-03 12:47:28 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-10-03 12:47:28 -0700
commit: 12b7bcb43e6ea834ab2f5dc52d971e379a0ca109 (patch)
tree: 65218ee4792a1bae88feb75e615c5ec4e602c7a2 /tools/perf/bench/numa.c
parent: 00bcf5cdd6c0e2e92ce3dd852ca68a3b779fa4ec (diff)
parent: 41aad2a6d4fcdda8d73c9739daf7a9f3f49499d6 (diff)
download: linux-12b7bcb43e6ea834ab2f5dc52d971e379a0ca109.tar.gz
linux-12b7bcb43e6ea834ab2f5dc52d971e379a0ca109.tar.bz2
linux-12b7bcb43e6ea834ab2f5dc52d971e379a0ca109.zip
1 files changed, 27 insertions, 26 deletions
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index f7f530081aa9..8efe904e486b 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -30,6 +30,7 @@
 #include <sys/wait.h>
 #include <sys/prctl.h>
 #include <sys/types.h>
+#include <linux/time64.h>
 
 #include <numa.h>
 #include <numaif.h>
@@ -1004,7 +1005,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 	if (strong && process_groups == g->p.nr_proc) {
 		if (!*convergence) {
 			*convergence = runtime_ns_max;
-			tprintf(" (%6.1fs converged)\n", *convergence/1e9);
+			tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC);
 			if (g->p.measure_convergence) {
 				g->all_converged = true;
 				g->stop_work = true;
@@ -1012,7 +1013,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 		}
 	} else {
 		if (*convergence) {
-			tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
+			tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC);
 			*convergence = 0;
 		}
 		tprintf("\n");
@@ -1022,7 +1023,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 static void show_summary(double runtime_ns_max, int l, double *convergence)
 {
 	tprintf("\r #  %5.1f%%  [%.1f mins]",
-		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
+		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0);
 
 	calc_convergence(runtime_ns_max, convergence);
 
@@ -1179,8 +1180,8 @@ static void *worker_thread(void *__tdata)
 
 		if (details >= 3) {
 			timersub(&stop, &start, &diff);
-			runtime_ns_max = diff.tv_sec * 1000000000;
-			runtime_ns_max += diff.tv_usec * 1000;
+			runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+			runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
 
 			if (details >= 0) {
 				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
@@ -1192,23 +1193,23 @@ static void *worker_thread(void *__tdata)
 			continue;
 
 		timersub(&stop, &start0, &diff);
-		runtime_ns_max = diff.tv_sec * 1000000000ULL;
-		runtime_ns_max += diff.tv_usec * 1000ULL;
+		runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+		runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
 
 		show_summary(runtime_ns_max, l, &convergence);
 	}
 
 	gettimeofday(&stop, NULL);
 	timersub(&stop, &start0, &diff);
-	td->runtime_ns = diff.tv_sec * 1000000000ULL;
-	td->runtime_ns += diff.tv_usec * 1000ULL;
-	td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
+	td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
+	td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
+	td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9;
 
 	getrusage(RUSAGE_THREAD, &rusage);
-	td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
-	td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
-	td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
-	td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
+	td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
+	td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC;
+	td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC;
+	td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC;
 
 	free_data(thread_data, g->p.bytes_thread);
 
@@ -1469,7 +1470,7 @@ static int __bench_numa(const char *name)
 	}
 	/* Wait for all the threads to start up: */
 	while (g->nr_tasks_started != g->p.nr_tasks)
-		usleep(1000);
+		usleep(USEC_PER_MSEC);
 
 	BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
 
@@ -1488,9 +1489,9 @@ static int __bench_numa(const char *name)
 
 		timersub(&stop, &start, &diff);
 
-		startup_sec = diff.tv_sec * 1000000000.0;
-		startup_sec += diff.tv_usec * 1000.0;
-		startup_sec /= 1e9;
+		startup_sec = diff.tv_sec * NSEC_PER_SEC;
+		startup_sec += diff.tv_usec * NSEC_PER_USEC;
+		startup_sec /= NSEC_PER_SEC;
 
 		tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
 		tprintf(" #\n");
@@ -1529,14 +1530,14 @@ static int __bench_numa(const char *name)
 	tprintf("\n ###\n");
 	tprintf("\n");
 
-	runtime_sec_max = diff.tv_sec * 1000000000.0;
-	runtime_sec_max += diff.tv_usec * 1000.0;
-	runtime_sec_max /= 1e9;
+	runtime_sec_max = diff.tv_sec * NSEC_PER_SEC;
+	runtime_sec_max += diff.tv_usec * NSEC_PER_USEC;
+	runtime_sec_max /= NSEC_PER_SEC;
 
-	runtime_sec_min = runtime_ns_min/1e9;
+	runtime_sec_min = runtime_ns_min / NSEC_PER_SEC;
 
 	bytes = g->bytes_done;
-	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
+	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC;
 
 	if (g->p.measure_convergence) {
 		print_res(name, runtime_sec_max,
@@ -1562,7 +1563,7 @@ static int __bench_numa(const char *name)
 	print_res(name, bytes / 1e9,
 		"GB,", "data-total",		"GB data processed, total");
 
-	print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
+	print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks),
 		"nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
 
 	print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
@@ -1581,9 +1582,9 @@ static int __bench_numa(const char *name)
 				snprintf(tname, 32, "process%d:thread%d", p, t);
 				print_res(tname, td->speed_gbs,
 					"GB/sec",	"thread-speed", "GB/sec/thread speed");
-				print_res(tname, td->system_time_ns / 1e9,
+				print_res(tname, td->system_time_ns / NSEC_PER_SEC,
 					"secs",	"thread-system-time", "system CPU time/thread");
-				print_res(tname, td->user_time_ns / 1e9,
+				print_res(tname, td->user_time_ns / NSEC_PER_SEC,
 					"secs",	"thread-user-time", "user CPU time/thread");
 			}
 		}
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-10-03 12:47:28 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-10-03 12:47:28 -0700
commit	12b7bcb43e6ea834ab2f5dc52d971e379a0ca109 (patch)
tree	65218ee4792a1bae88feb75e615c5ec4e602c7a2 /tools/perf/bench/numa.c
parent	00bcf5cdd6c0e2e92ce3dd852ca68a3b779fa4ec (diff)
parent	41aad2a6d4fcdda8d73c9739daf7a9f3f49499d6 (diff)
download	linux-12b7bcb43e6ea834ab2f5dc52d971e379a0ca109.tar.gz linux-12b7bcb43e6ea834ab2f5dc52d971e379a0ca109.tar.bz2 linux-12b7bcb43e6ea834ab2f5dc52d971e379a0ca109.zip