Merge tag 'perf-core-for-mingo-20160908' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Add branch stack / basic block info to 'perf annotate --stdio', where for each branch, we add an asm comment after the instruction with information on how often it was taken and predicted. See example with color output at: http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png (Peter Zijlstra) - Only open an evsel in CPUs in its cpu map, fixing some use cases in systems with multiple PMUs with different CPU maps (Mark Rutland) - Fix handling of huge TLB maps, recognizing it as anonymous (Wang Nan) Infrastructure changes: - Remove the symbol filtering code, i.e. the callbacks passed to all functions that could end up loading a DSO symtab, simplifying the code, eventually allowing what we should have had since day one: removing the 'map' parameter from dso__load() functions (Arnaldo Carvalho de Melo) Arch specific build fixes: - Fix detached tarball build on powerpc, where we were still accessing a file outside tools/ (Ravi Bangoria) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2016-09-09 07:46:13 +0200
committer: Ingo Molnar <mingo@kernel.org> 2016-09-09 07:46:13 +0200
commit: 14520d630adb4314722dd57ccb689ffdc6a31383 (patch)
tree: 315bf902dc1d03622dc43c362b6fc48455713149
parent: c0b172e5b6770048751b2c0a4fe44346c2080c5d (diff)
parent: 25b8592e912f085ce2ff736a2927584ddeab238c (diff)
download: linux-14520d630adb4314722dd57ccb689ffdc6a31383.tar.gz
linux-14520d630adb4314722dd57ccb689ffdc6a31383.tar.bz2
linux-14520d630adb4314722dd57ccb689ffdc6a31383.zip
32 files changed, 817 insertions, 286 deletions
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index ba7094b945ff..f99f49e4a31e 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -34,6 +34,10 @@
 #define TRACEFS_MAGIC          0x74726163
 #endif
 
+#ifndef HUGETLBFS_MAGIC
+#define HUGETLBFS_MAGIC        0x958458f6
+#endif
+
 static const char * const sysfs__fs_known_mountpoints[] = {
 	"/sys",
 	0,
@@ -67,6 +71,10 @@ static const char * const tracefs__known_mountpoints[] = {
 	0,
 };
 
+static const char * const hugetlbfs__known_mountpoints[] = {
+	0,
+};
+
 struct fs {
 	const char		*name;
 	const char * const	*mounts;
@@ -80,6 +88,7 @@ enum {
 	FS__PROCFS  = 1,
 	FS__DEBUGFS = 2,
 	FS__TRACEFS = 3,
+	FS__HUGETLBFS = 4,
 };
 
 #ifndef TRACEFS_MAGIC
@@ -107,6 +116,11 @@ static struct fs fs__entries[] = {
 		.mounts	= tracefs__known_mountpoints,
 		.magic	= TRACEFS_MAGIC,
 	},
+	[FS__HUGETLBFS] = {
+		.name	= "hugetlbfs",
+		.mounts = hugetlbfs__known_mountpoints,
+		.magic	= HUGETLBFS_MAGIC,
+	},
 };
 
 static bool fs__read_mounts(struct fs *fs)
@@ -265,6 +279,7 @@ FS(sysfs,   FS__SYSFS);
 FS(procfs,  FS__PROCFS);
 FS(debugfs, FS__DEBUGFS);
 FS(tracefs, FS__TRACEFS);
+FS(hugetlbfs, FS__HUGETLBFS);
 
 int filename__read_int(const char *filename, int *value)
 {
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 16c9c2ed7c5b..a63269f5d20c 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -21,6 +21,7 @@ FS(sysfs)
 FS(procfs)
 FS(debugfs)
 FS(tracefs)
+FS(hugetlbfs)
 
 #undef FS
 
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 35745a733100..ed9d5d15d5b6 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -108,7 +108,7 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
 	int i = 0;
 
 	map = get_target_map(pev->target, pev->uprobes);
-	if (!map || map__load(map, NULL) < 0)
+	if (!map || map__load(map) < 0)
 		return;
 
 	for (i = 0; i < ntevs; i++) {
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f07b23011b22..ebb628332a6e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -30,6 +30,7 @@
 #include "util/tool.h"
 #include "util/data.h"
 #include "arch/common.h"
+#include "util/block-range.h"
 
 #include <dlfcn.h>
 #include <linux/bitmap.h>
@@ -46,6 +47,103 @@ struct perf_annotate {
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+/*
+ * Given one basic block:
+ *
+ *	from	to		branch_i
+ *	* ----> *
+ *		|
+ *		| block
+ *		v
+ *		* ----> *
+ *		from	to	branch_i+1
+ *
+ * where the horizontal are the branches and the vertical is the executed
+ * block of instructions.
+ *
+ * We count, for each 'instruction', the number of blocks that covered it as
+ * well as count the ratio each branch is taken.
+ *
+ * We can do this without knowing the actual instruction stream by keeping
+ * track of the address ranges. We break down ranges such that there is no
+ * overlap and iterate from the start until the end.
+ *
+ * @acme: once we parse the objdump output _before_ processing the samples,
+ * we can easily fold the branch.cycles IPC bits in.
+ */
+static void process_basic_block(struct addr_map_symbol *start,
+				struct addr_map_symbol *end,
+				struct branch_flags *flags)
+{
+	struct symbol *sym = start->sym;
+	struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
+	struct block_range_iter iter;
+	struct block_range *entry;
+
+	/*
+	 * Sanity; NULL isn't executable and the CPU cannot execute backwards
+	 */
+	if (!start->addr || start->addr > end->addr)
+		return;
+
+	iter = block_range__create(start->addr, end->addr);
+	if (!block_range_iter__valid(&iter))
+		return;
+
+	/*
+	 * First block in range is a branch target.
+	 */
+	entry = block_range_iter(&iter);
+	assert(entry->is_target);
+	entry->entry++;
+
+	do {
+		entry = block_range_iter(&iter);
+
+		entry->coverage++;
+		entry->sym = sym;
+
+		if (notes)
+			notes->max_coverage = max(notes->max_coverage, entry->coverage);
+
+	} while (block_range_iter__next(&iter));
+
+	/*
+	 * Last block in rage is a branch.
+	 */
+	entry = block_range_iter(&iter);
+	assert(entry->is_branch);
+	entry->taken++;
+	if (flags->predicted)
+		entry->pred++;
+}
+
+static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
+				 struct perf_sample *sample)
+{
+	struct addr_map_symbol *prev = NULL;
+	struct branch_info *bi;
+	int i;
+
+	if (!bs || !bs->nr)
+		return;
+
+	bi = sample__resolve_bstack(sample, al);
+	if (!bi)
+		return;
+
+	for (i = bs->nr - 1; i >= 0; i--) {
+		/*
+		 * XXX filter against symbol
+		 */
+		if (prev)
+			process_basic_block(prev, &bi[i].from, &bi[i].flags);
+		prev = &bi[i].to;
+	}
+
+	free(bi);
+}
+
 static int perf_evsel__add_sample(struct perf_evsel *evsel,
 				  struct perf_sample *sample,
 				  struct addr_location *al,
@@ -72,6 +170,12 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 		return 0;
 	}
 
+	/*
+	 * XXX filtered samples can still have branch entires pointing into our
+	 * symbol and are missed.
+	 */
+	process_branch_stack(sample->branch_stack, al, sample);
+
 	sample->period = 1;
 	sample->weight = 1;
 
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 73c1c4cc3600..b9bc7e39833a 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -429,7 +429,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 	if (al.map != NULL) {
 		if (!al.map->dso->hit) {
 			al.map->dso->hit = 1;
-			if (map__load(al.map, NULL) >= 0) {
+			if (map__load(al.map) >= 0) {
 				dso__inject_build_id(al.map->dso, tool, machine);
 				/*
 				 * If this fails, too bad, let the other side
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fdde1bd3e306..d426dcb18ce9 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -330,7 +330,7 @@ static int build_alloc_func_list(void)
 	}
 
 	kernel_map = machine__kernel_map(machine);
-	if (map__load(kernel_map, NULL) < 0) {
+	if (map__load(kernel_map) < 0) {
 		pr_err("cannot load kernel map\n");
 		return -ENOENT;
 	}
@@ -979,7 +979,7 @@ static void __print_slab_result(struct rb_root *root,
 		if (is_caller) {
 			addr = data->call_site;
 			if (!raw_ip)
-				sym = machine__find_kernel_function(machine, addr, &map, NULL);
+				sym = machine__find_kernel_function(machine, addr, &map);
 		} else
 			addr = data->ptr;
 
@@ -1043,8 +1043,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite,
-						    &map, NULL);
+		sym = machine__find_kernel_function(machine, data->callsite, &map);
 		if (sym && sym->name)
 			caller = sym->name;
 		else
@@ -1086,8 +1085,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite,
-						    &map, NULL);
+		sym = machine__find_kernel_function(machine, data->callsite, &map);
 		if (sym && sym->name)
 			caller = sym->name;
 		else
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 6b3c8b0d3276..7228d141a789 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -522,11 +522,11 @@ static void print_sample_brstacksym(struct perf_sample *sample,
 
 		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
 		if (alf.map)
-			alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
+			alf.sym = map__find_symbol(alf.map, alf.addr);
 
 		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
 		if (alt.map)
-			alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
+			alt.sym = map__find_symbol(alt.map, alt.addr);
 
 		symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
 		putchar('/');
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e0919006fcba..400785702566 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -657,34 +657,6 @@ repeat:
 	return NULL;
 }
 
-static int symbol_filter(struct map *map, struct symbol *sym)
-{
-	const char *name = sym->name;
-
-	if (!__map__is_kernel(map))
-		return 0;
-	/*
-	 * ppc64 uses function descriptors and appends a '.' to the
-	 * start of every instruction address. Remove it.
-	 */
-	if (name[0] == '.')
-		name++;
-
-	if (!strcmp(name, "_text") ||
-	    !strcmp(name, "_etext") ||
-	    !strcmp(name, "_sinittext") ||
-	    !strncmp("init_module", name, 11) ||
-	    !strncmp("cleanup_module", name, 14) ||
-	    strstr(name, "_text_start") ||
-	    strstr(name, "_text_end"))
-		return 1;
-
-	if (symbol__is_idle(sym))
-		sym->idle = 1;
-
-	return 0;
-}
-
 static int hist_iter__top_callback(struct hist_entry_iter *iter,
 				   struct addr_location *al, bool single,
 				   void *arg)
@@ -949,8 +921,6 @@ static int __cmd_top(struct perf_top *top)
 	if (top->session == NULL)
 		return -1;
 
-	machines__set_symbol_filter(&top->session->machines, symbol_filter);
-
 	if (!objdump_path) {
 		ret = perf_env__lookup_objdump(&top->session->header.env);
 		if (ret)
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 7ed72a475c57..e4b717e9eb6c 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -20,7 +20,6 @@
 #endif
 
 #ifdef __powerpc__
-#include "../../arch/powerpc/include/uapi/asm/unistd.h"
 #define CPUINFO_PROC	{"cpu"}
 #endif
 
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 2af156a8d4e5..ff5bc6363a79 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -263,7 +263,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	 * Converting addresses for use by objdump requires more information.
 	 * map__load() does that.  See map__rip_2objdump() for details.
 	 */
-	if (map__load(al.map, NULL))
+	if (map__load(al.map))
 		return -1;
 
 	/* objdump struggles with kcore - try each map only once */
@@ -511,7 +511,7 @@ static int do_test_code_reading(bool try_kcore)
 
 	/* Load kernel map */
 	map = machine__kernel_map(machine);
-	ret = map__load(map, NULL);
+	ret = map__load(map);
 	if (ret < 0) {
 		pr_debug("map__load failed\n");
 		goto out_err;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 77513bf99d1b..a5082331f246 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -8,14 +8,6 @@
 #include "debug.h"
 #include "machine.h"
 
-static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
-					   struct symbol *sym)
-{
-	bool *visited = symbol__priv(sym);
-	*visited = true;
-	return 0;
-}
-
 #define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
 
 int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
@@ -62,7 +54,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 	 * be compacted against the list of modules found in the "vmlinux"
 	 * code and with the one got from /proc/modules from the "kallsyms" code.
 	 */
-	if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) {
+	if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) {
 		pr_debug("dso__load_kallsyms ");
 		goto out;
 	}
@@ -100,8 +92,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 	 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
 	 * to fixup the symbols.
 	 */
-	if (machine__load_vmlinux_path(&vmlinux, type,
-				       vmlinux_matches_kallsyms_filter) <= 0) {
+	if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
 		pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
 		err = TEST_SKIP;
 		goto out;
@@ -127,7 +118,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 		mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
 
 		first_pair = machine__find_kernel_symbol(&kallsyms, type,
-							 mem_start, NULL, NULL);
+							 mem_start, NULL);
 		pair = first_pair;
 
 		if (pair && UM(pair->start) == mem_start) {
@@ -156,7 +147,7 @@ next_pair:
 				 */
 				continue;
 			} else {
-				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
+				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
 				if (pair) {
 					if (UM(pair->start) == mem_start)
 						goto next_pair;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 2e2d10022355..4c18271c71c9 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -495,7 +495,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
 	if (!ins__is_call(dl->ins))
 		return false;
 
-	if (map_groups__find_ams(&target, NULL) ||
+	if (map_groups__find_ams(&target) ||
 	    map__rip_2objdump(target.map, target.map->map_ip(target.map,
 							     target.addr)) !=
 	    dl->ops.target.addr) {
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 80912778bb6d..98a34664bb7e 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -52,9 +52,9 @@ static int map_browser__search(struct map_browser *browser)
 
 	if (target[0] == '0' && tolower(target[1]) == 'x') {
 		u64 addr = strtoull(target, NULL, 16);
-		sym = map__find_symbol(browser->map, addr, NULL);
+		sym = map__find_symbol(browser->map, addr);
 	} else
-		sym = map__find_symbol_by_name(browser->map, target, NULL);
+		sym = map__find_symbol_by_name(browser->map, target);
 
 	if (sym != NULL) {
 		u32 *idx = symbol__browser_index(sym);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index f1a6d17c5a37..96f99d608d00 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,5 +1,6 @@
 libperf-y += alias.o
 libperf-y += annotate.o
+libperf-y += block-range.o
 libperf-y += build-id.o
 libperf-y += config.o
 libperf-y += ctype.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1b59e3129216..7a80c7362a03 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -17,6 +17,7 @@
 #include "debug.h"
 #include "annotate.h"
 #include "evsel.h"
+#include "block-range.h"
 #include <regex.h>
 #include <pthread.h>
 #include <linux/bitops.h>
@@ -859,6 +860,89 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
 	return percent;
 }
 
+static const char *annotate__address_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark red for >75% coverage */
+		if (cov > 0.75)
+			return PERF_COLOR_RED;
+
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_MAGENTA;
+}
+
+static const char *annotate__asm_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_BLUE;
+}
+
+static void annotate__branch_printf(struct block_range *br, u64 addr)
+{
+	bool emit_comment = true;
+
+	if (!br)
+		return;
+
+#if 1
+	if (br->is_target && br->start == addr) {
+		struct block_range *branch = br;
+		double p;
+
+		/*
+		 * Find matching branch to our target.
+		 */
+		while (!branch->is_branch)
+			branch = block_range__next(branch);
+
+		p = 100 *(double)br->entry / branch->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage joined at this target in relation
+			 * to the next branch.
+			 */
+			printf(" +%.2f%%", p);
+		}
+	}
+#endif
+	if (br->is_branch && br->end == addr) {
+		double p = 100*(double)br->taken / br->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage leaving at this branch, and
+			 * its prediction ratio.
+			 */
+			printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred  / br->taken);
+		}
+	}
+}
+
+
 static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start,
 		      struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
 		      int max_lines, struct disasm_line *queue)
@@ -878,6 +962,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		s64 offset = dl->offset;
 		const u64 addr = start + offset;
 		struct disasm_line *next;
+		struct block_range *br;
 
 		next = disasm__get_next_ip_line(&notes->src->source, dl);
 
@@ -947,8 +1032,12 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		}
 
 		printf(" :	");
-		color_fprintf(stdout, PERF_COLOR_MAGENTA, "  %" PRIx64 ":", addr);
-		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", dl->line);
+
+		br = block_range__find(addr);
+		color_fprintf(stdout, annotate__address_color(br), "  %" PRIx64 ":", addr);
+		color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line);
+		annotate__branch_printf(br, addr);
+		printf("\n");
 
 		if (ppercents != &percent)
 			free(ppercents);
@@ -1077,7 +1166,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
 			.addr = dl->ops.target.addr,
 		};
 
-		if (!map_groups__find_ams(&target, NULL) &&
+		if (!map_groups__find_ams(&target) &&
 		    target.sym->start == target.al_addr)
 			dl->ops.target.name = strdup(target.sym->name);
 	}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index e96f4daed9b9..ea44e4ff19c6 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -130,6 +130,7 @@ struct annotated_source {
 
 struct annotation {
 	pthread_mutex_t		lock;
+	u64			max_coverage;
 	struct annotated_source *src;
 };
 
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
new file mode 100644
index 000000000000..7b3e1d75d803
--- /dev/null
+++ b/tools/perf/util/block-range.c
@@ -0,0 +1,328 @@
+#include "block-range.h"
+#include "annotate.h"
+
+struct {
+	struct rb_root root;
+	u64 blocks;
+} block_ranges;
+
+static void block_range__debug(void)
+{
+	/*
+	 * XXX still paranoid for now; see if we can make this depend on
+	 * DEBUG=1 builds.
+	 */
+#if 1
+	struct rb_node *rb;
+	u64 old = 0; /* NULL isn't executable */
+
+	for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) {
+		struct block_range *entry = rb_entry(rb, struct block_range, node);
+
+		assert(old < entry->start);
+		assert(entry->start <= entry->end); /* single instruction block; jump to a jump */
+
+		old = entry->end;
+	}
+#endif
+}
+
+struct block_range *block_range__find(u64 addr)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *parent = NULL;
+	struct block_range *entry;
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (addr < entry->start)
+			p = &parent->rb_left;
+		else if (addr > entry->end)
+			p = &parent->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_left;
+	while (*p) {
+		node = *p;
+		p = &node->rb_right;
+	}
+	rb_link_node(left, node, p);
+}
+
+static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_right;
+	while (*p) {
+		node = *p;
+		p = &node->rb_left;
+	}
+	rb_link_node(right, node, p);
+}
+
+/**
+ * block_range__create
+ * @start: branch target starting this basic block
+ * @end:   branch ending this basic block
+ *
+ * Create all the required block ranges to precisely span the given range.
+ */
+struct block_range_iter block_range__create(u64 start, u64 end)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *n, *parent = NULL;
+	struct block_range *next, *entry = NULL;
+	struct block_range_iter iter = { NULL, NULL };
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (start < entry->start)
+			p = &parent->rb_left;
+		else if (start > entry->end)
+			p = &parent->rb_right;
+		else
+			break;
+	}
+
+	/*
+	 * Didn't find anything.. there's a hole at @start, however @end might
+	 * be inside/behind the next range.
+	 */
+	if (!*p) {
+		if (!entry) /* tree empty */
+			goto do_whole;
+
+		/*
+		 * If the last node is before, advance one to find the next.
+		 */
+		n = parent;
+		if (entry->end < start) {
+			n = rb_next(n);
+			if (!n)
+				goto do_whole;
+		}
+		next = rb_entry(n, struct block_range, node);
+
+		if (next->start <= end) { /* add head: [start...][n->start...] */
+			struct block_range *head = malloc(sizeof(struct block_range));
+			if (!head)
+				return iter;
+
+			*head = (struct block_range){
+				.start		= start,
+				.end		= next->start - 1,
+				.is_target	= 1,
+				.is_branch	= 0,
+			};
+
+			rb_link_left_of_node(&head->node, &next->node);
+			rb_insert_color(&head->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.start = head;
+			goto do_tail;
+		}
+
+do_whole:
+		/*
+		 * The whole [start..end] range is non-overlapping.
+		 */
+		entry = malloc(sizeof(struct block_range));
+		if (!entry)
+			return iter;
+
+		*entry = (struct block_range){
+			.start		= start,
+			.end		= end,
+			.is_target	= 1,
+			.is_branch	= 1,
+		};
+
+		rb_link_node(&entry->node, parent, p);
+		rb_insert_color(&entry->node, &block_ranges.root);
+		block_range__debug();
+
+		iter.start = entry;
+		iter.end   = entry;
+		goto done;
+	}
+
+	/*
+	 * We found a range that overlapped with ours, split if needed.
+	 */
+	if (entry->start < start) { /* split: [e->start...][start...] */
+		struct block_range *head = malloc(sizeof(struct block_range));
+		if (!head)
+			return iter;
+
+		*head = (struct block_range){
+			.start		= entry->start,
+			.end		= start - 1,
+			.is_target	= entry->is_target,
+			.is_branch	= 0,
+
+			.coverage	= entry->coverage,
+			.entry		= entry->entry,
+		};
+
+		entry->start		= start;
+		entry->is_target	= 1;
+		entry->entry		= 0;
+
+		rb_link_left_of_node(&head->node, &entry->node);
+		rb_insert_color(&head->node, &block_ranges.root);
+		block_range__debug();
+
+	} else if (entry->start == start)
+		entry->is_target = 1;
+
+	iter.start = entry;
+
+do_tail:
+	/*
+	 * At this point we've got: @iter.start = [@start...] but @end can still be
+	 * inside or beyond it.
+	 */
+	entry = iter.start;
+	for (;;) {
author	Ingo Molnar <mingo@kernel.org>	2016-09-09 07:46:13 +0200
committer	Ingo Molnar <mingo@kernel.org>	2016-09-09 07:46:13 +0200
commit	14520d630adb4314722dd57ccb689ffdc6a31383 (patch)
tree	315bf902dc1d03622dc43c362b6fc48455713149
parent	c0b172e5b6770048751b2c0a4fe44346c2080c5d (diff)
parent	25b8592e912f085ce2ff736a2927584ddeab238c (diff)
download	linux-14520d630adb4314722dd57ccb689ffdc6a31383.tar.gz linux-14520d630adb4314722dd57ccb689ffdc6a31383.tar.bz2 linux-14520d630adb4314722dd57ccb689ffdc6a31383.zip