summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-09-09 07:46:13 +0200
committerIngo Molnar <mingo@kernel.org>2016-09-09 07:46:13 +0200
commit14520d630adb4314722dd57ccb689ffdc6a31383 (patch)
tree315bf902dc1d03622dc43c362b6fc48455713149
parentc0b172e5b6770048751b2c0a4fe44346c2080c5d (diff)
parent25b8592e912f085ce2ff736a2927584ddeab238c (diff)
downloadlinux-14520d630adb4314722dd57ccb689ffdc6a31383.tar.gz
linux-14520d630adb4314722dd57ccb689ffdc6a31383.tar.bz2
linux-14520d630adb4314722dd57ccb689ffdc6a31383.zip
Merge tag 'perf-core-for-mingo-20160908' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Add branch stack / basic block info to 'perf annotate --stdio', where for each branch, we add an asm comment after the instruction with information on how often it was taken and predicted. See example with color output at: http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png (Peter Zijlstra) - Only open an evsel in CPUs in its cpu map, fixing some use cases in systems with multiple PMUs with different CPU maps (Mark Rutland) - Fix handling of huge TLB maps, recognizing it as anonymous (Wang Nan) Infrastructure changes: - Remove the symbol filtering code, i.e. the callbacks passed to all functions that could end up loading a DSO symtab, simplifying the code, eventually allowing what we should have had since day one: removing the 'map' parameter from dso__load() functions (Arnaldo Carvalho de Melo) Arch specific build fixes: - Fix detached tarball build on powerpc, where we were still accessing a file outside tools/ (Ravi Bangoria) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/lib/api/fs/fs.c15
-rw-r--r--tools/lib/api/fs/fs.h1
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c2
-rw-r--r--tools/perf/builtin-annotate.c104
-rw-r--r--tools/perf/builtin-inject.c2
-rw-r--r--tools/perf/builtin-kmem.c10
-rw-r--r--tools/perf/builtin-script.c4
-rw-r--r--tools/perf/builtin-top.c30
-rw-r--r--tools/perf/perf-sys.h1
-rw-r--r--tools/perf/tests/code-reading.c4
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c17
-rw-r--r--tools/perf/ui/browsers/annotate.c2
-rw-r--r--tools/perf/ui/browsers/map.c4
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/annotate.c95
-rw-r--r--tools/perf/util/annotate.h1
-rw-r--r--tools/perf/util/block-range.c328
-rw-r--r--tools/perf/util/block-range.h71
-rw-r--r--tools/perf/util/event.c21
-rw-r--r--tools/perf/util/evlist.c8
-rw-r--r--tools/perf/util/intel-bts.c2
-rw-r--r--tools/perf/util/intel-pt.c4
-rw-r--r--tools/perf/util/machine.c38
-rw-r--r--tools/perf/util/machine.h34
-rw-r--r--tools/perf/util/map.c50
-rw-r--r--tools/perf/util/map.h32
-rw-r--r--tools/perf/util/pmu.c15
-rw-r--r--tools/perf/util/probe-event.c17
-rw-r--r--tools/perf/util/symbol-elf.c32
-rw-r--r--tools/perf/util/symbol-minimal.c4
-rw-r--r--tools/perf/util/symbol.c134
-rw-r--r--tools/perf/util/symbol.h20
32 files changed, 817 insertions, 286 deletions
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index ba7094b945ff..f99f49e4a31e 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -34,6 +34,10 @@
#define TRACEFS_MAGIC 0x74726163
#endif
+#ifndef HUGETLBFS_MAGIC
+#define HUGETLBFS_MAGIC 0x958458f6
+#endif
+
static const char * const sysfs__fs_known_mountpoints[] = {
"/sys",
0,
@@ -67,6 +71,10 @@ static const char * const tracefs__known_mountpoints[] = {
0,
};
+static const char * const hugetlbfs__known_mountpoints[] = {
+ 0,
+};
+
struct fs {
const char *name;
const char * const *mounts;
@@ -80,6 +88,7 @@ enum {
FS__PROCFS = 1,
FS__DEBUGFS = 2,
FS__TRACEFS = 3,
+ FS__HUGETLBFS = 4,
};
#ifndef TRACEFS_MAGIC
@@ -107,6 +116,11 @@ static struct fs fs__entries[] = {
.mounts = tracefs__known_mountpoints,
.magic = TRACEFS_MAGIC,
},
+ [FS__HUGETLBFS] = {
+ .name = "hugetlbfs",
+ .mounts = hugetlbfs__known_mountpoints,
+ .magic = HUGETLBFS_MAGIC,
+ },
};
static bool fs__read_mounts(struct fs *fs)
@@ -265,6 +279,7 @@ FS(sysfs, FS__SYSFS);
FS(procfs, FS__PROCFS);
FS(debugfs, FS__DEBUGFS);
FS(tracefs, FS__TRACEFS);
+FS(hugetlbfs, FS__HUGETLBFS);
int filename__read_int(const char *filename, int *value)
{
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 16c9c2ed7c5b..a63269f5d20c 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -21,6 +21,7 @@ FS(sysfs)
FS(procfs)
FS(debugfs)
FS(tracefs)
+FS(hugetlbfs)
#undef FS
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 35745a733100..ed9d5d15d5b6 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -108,7 +108,7 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
int i = 0;
map = get_target_map(pev->target, pev->uprobes);
- if (!map || map__load(map, NULL) < 0)
+ if (!map || map__load(map) < 0)
return;
for (i = 0; i < ntevs; i++) {
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f07b23011b22..ebb628332a6e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -30,6 +30,7 @@
#include "util/tool.h"
#include "util/data.h"
#include "arch/common.h"
+#include "util/block-range.h"
#include <dlfcn.h>
#include <linux/bitmap.h>
@@ -46,6 +47,103 @@ struct perf_annotate {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
+/*
+ * Given one basic block:
+ *
+ * from to branch_i
+ * * ----> *
+ * |
+ * | block
+ * v
+ * * ----> *
+ * from to branch_i+1
+ *
+ * where the horizontal are the branches and the vertical is the executed
+ * block of instructions.
+ *
+ * We count, for each 'instruction', the number of blocks that covered it as
+ * well as count the ratio each branch is taken.
+ *
+ * We can do this without knowing the actual instruction stream by keeping
+ * track of the address ranges. We break down ranges such that there is no
+ * overlap and iterate from the start until the end.
+ *
+ * @acme: once we parse the objdump output _before_ processing the samples,
+ * we can easily fold the branch.cycles IPC bits in.
+ */
+static void process_basic_block(struct addr_map_symbol *start,
+ struct addr_map_symbol *end,
+ struct branch_flags *flags)
+{
+ struct symbol *sym = start->sym;
+ struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
+ struct block_range_iter iter;
+ struct block_range *entry;
+
+ /*
+ * Sanity; NULL isn't executable and the CPU cannot execute backwards
+ */
+ if (!start->addr || start->addr > end->addr)
+ return;
+
+ iter = block_range__create(start->addr, end->addr);
+ if (!block_range_iter__valid(&iter))
+ return;
+
+ /*
+ * First block in range is a branch target.
+ */
+ entry = block_range_iter(&iter);
+ assert(entry->is_target);
+ entry->entry++;
+
+ do {
+ entry = block_range_iter(&iter);
+
+ entry->coverage++;
+ entry->sym = sym;
+
+ if (notes)
+ notes->max_coverage = max(notes->max_coverage, entry->coverage);
+
+ } while (block_range_iter__next(&iter));
+
+ /*
+ * Last block in rage is a branch.
+ */
+ entry = block_range_iter(&iter);
+ assert(entry->is_branch);
+ entry->taken++;
+ if (flags->predicted)
+ entry->pred++;
+}
+
+static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample)
+{
+ struct addr_map_symbol *prev = NULL;
+ struct branch_info *bi;
+ int i;
+
+ if (!bs || !bs->nr)
+ return;
+
+ bi = sample__resolve_bstack(sample, al);
+ if (!bi)
+ return;
+
+ for (i = bs->nr - 1; i >= 0; i--) {
+ /*
+ * XXX filter against symbol
+ */
+ if (prev)
+ process_basic_block(prev, &bi[i].from, &bi[i].flags);
+ prev = &bi[i].to;
+ }
+
+ free(bi);
+}
+
static int perf_evsel__add_sample(struct perf_evsel *evsel,
struct perf_sample *sample,
struct addr_location *al,
@@ -72,6 +170,12 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}
+ /*
+ * XXX filtered samples can still have branch entires pointing into our
+ * symbol and are missed.
+ */
+ process_branch_stack(sample->branch_stack, al, sample);
+
sample->period = 1;
sample->weight = 1;
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 73c1c4cc3600..b9bc7e39833a 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -429,7 +429,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
if (al.map != NULL) {
if (!al.map->dso->hit) {
al.map->dso->hit = 1;
- if (map__load(al.map, NULL) >= 0) {
+ if (map__load(al.map) >= 0) {
dso__inject_build_id(al.map->dso, tool, machine);
/*
* If this fails, too bad, let the other side
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fdde1bd3e306..d426dcb18ce9 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -330,7 +330,7 @@ static int build_alloc_func_list(void)
}
kernel_map = machine__kernel_map(machine);
- if (map__load(kernel_map, NULL) < 0) {
+ if (map__load(kernel_map) < 0) {
pr_err("cannot load kernel map\n");
return -ENOENT;
}
@@ -979,7 +979,7 @@ static void __print_slab_result(struct rb_root *root,
if (is_caller) {
addr = data->call_site;
if (!raw_ip)
- sym = machine__find_kernel_function(machine, addr, &map, NULL);
+ sym = machine__find_kernel_function(machine, addr, &map);
} else
addr = data->ptr;
@@ -1043,8 +1043,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
char *caller = buf;
data = rb_entry(next, struct page_stat, node);
- sym = machine__find_kernel_function(machine, data->callsite,
- &map, NULL);
+ sym = machine__find_kernel_function(machine, data->callsite, &map);
if (sym && sym->name)
caller = sym->name;
else
@@ -1086,8 +1085,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
char *caller = buf;
data = rb_entry(next, struct page_stat, node);
- sym = machine__find_kernel_function(machine, data->callsite,
- &map, NULL);
+ sym = machine__find_kernel_function(machine, data->callsite, &map);
if (sym && sym->name)
caller = sym->name;
else
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 6b3c8b0d3276..7228d141a789 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -522,11 +522,11 @@ static void print_sample_brstacksym(struct perf_sample *sample,
thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
if (alf.map)
- alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
+ alf.sym = map__find_symbol(alf.map, alf.addr);
thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
if (alt.map)
- alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
+ alt.sym = map__find_symbol(alt.map, alt.addr);
symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
putchar('/');
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e0919006fcba..400785702566 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -657,34 +657,6 @@ repeat:
return NULL;
}
-static int symbol_filter(struct map *map, struct symbol *sym)
-{
- const char *name = sym->name;
-
- if (!__map__is_kernel(map))
- return 0;
- /*
- * ppc64 uses function descriptors and appends a '.' to the
- * start of every instruction address. Remove it.
- */
- if (name[0] == '.')
- name++;
-
- if (!strcmp(name, "_text") ||
- !strcmp(name, "_etext") ||
- !strcmp(name, "_sinittext") ||
- !strncmp("init_module", name, 11) ||
- !strncmp("cleanup_module", name, 14) ||
- strstr(name, "_text_start") ||
- strstr(name, "_text_end"))
- return 1;
-
- if (symbol__is_idle(sym))
- sym->idle = 1;
-
- return 0;
-}
-
static int hist_iter__top_callback(struct hist_entry_iter *iter,
struct addr_location *al, bool single,
void *arg)
@@ -949,8 +921,6 @@ static int __cmd_top(struct perf_top *top)
if (top->session == NULL)
return -1;
- machines__set_symbol_filter(&top->session->machines, symbol_filter);
-
if (!objdump_path) {
ret = perf_env__lookup_objdump(&top->session->header.env);
if (ret)
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 7ed72a475c57..e4b717e9eb6c 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -20,7 +20,6 @@
#endif
#ifdef __powerpc__
-#include "../../arch/powerpc/include/uapi/asm/unistd.h"
#define CPUINFO_PROC {"cpu"}
#endif
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 2af156a8d4e5..ff5bc6363a79 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -263,7 +263,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
* Converting addresses for use by objdump requires more information.
* map__load() does that. See map__rip_2objdump() for details.
*/
- if (map__load(al.map, NULL))
+ if (map__load(al.map))
return -1;
/* objdump struggles with kcore - try each map only once */
@@ -511,7 +511,7 @@ static int do_test_code_reading(bool try_kcore)
/* Load kernel map */
map = machine__kernel_map(machine);
- ret = map__load(map, NULL);
+ ret = map__load(map);
if (ret < 0) {
pr_debug("map__load failed\n");
goto out_err;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 77513bf99d1b..a5082331f246 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -8,14 +8,6 @@
#include "debug.h"
#include "machine.h"
-static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
- struct symbol *sym)
-{
- bool *visited = symbol__priv(sym);
- *visited = true;
- return 0;
-}
-
#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
@@ -62,7 +54,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
* be compacted against the list of modules found in the "vmlinux"
* code and with the one got from /proc/modules from the "kallsyms" code.
*/
- if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) {
+ if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) {
pr_debug("dso__load_kallsyms ");
goto out;
}
@@ -100,8 +92,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
* maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
* to fixup the symbols.
*/
- if (machine__load_vmlinux_path(&vmlinux, type,
- vmlinux_matches_kallsyms_filter) <= 0) {
+ if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
err = TEST_SKIP;
goto out;
@@ -127,7 +118,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
first_pair = machine__find_kernel_symbol(&kallsyms, type,
- mem_start, NULL, NULL);
+ mem_start, NULL);
pair = first_pair;
if (pair && UM(pair->start) == mem_start) {
@@ -156,7 +147,7 @@ next_pair:
*/
continue;
} else {
- pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
+ pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
if (pair) {
if (UM(pair->start) == mem_start)
goto next_pair;
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 2e2d10022355..4c18271c71c9 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -495,7 +495,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
if (!ins__is_call(dl->ins))
return false;
- if (map_groups__find_ams(&target, NULL) ||
+ if (map_groups__find_ams(&target) ||
map__rip_2objdump(target.map, target.map->map_ip(target.map,
target.addr)) !=
dl->ops.target.addr) {
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 80912778bb6d..98a34664bb7e 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -52,9 +52,9 @@ static int map_browser__search(struct map_browser *browser)
if (target[0] == '0' && tolower(target[1]) == 'x') {
u64 addr = strtoull(target, NULL, 16);
- sym = map__find_symbol(browser->map, addr, NULL);
+ sym = map__find_symbol(browser->map, addr);
} else
- sym = map__find_symbol_by_name(browser->map, target, NULL);
+ sym = map__find_symbol_by_name(browser->map, target);
if (sym != NULL) {
u32 *idx = symbol__browser_index(sym);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index f1a6d17c5a37..96f99d608d00 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,5 +1,6 @@
libperf-y += alias.o
libperf-y += annotate.o
+libperf-y += block-range.o
libperf-y += build-id.o
libperf-y += config.o
libperf-y += ctype.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1b59e3129216..7a80c7362a03 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -17,6 +17,7 @@
#include "debug.h"
#include "annotate.h"
#include "evsel.h"
+#include "block-range.h"
#include <regex.h>
#include <pthread.h>
#include <linux/bitops.h>
@@ -859,6 +860,89 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
return percent;
}
+static const char *annotate__address_color(struct block_range *br)
+{
+ double cov = block_range__coverage(br);
+
+ if (cov >= 0) {
+ /* mark red for >75% coverage */
+ if (cov > 0.75)
+ return PERF_COLOR_RED;
+
+ /* mark dull for <1% coverage */
+ if (cov < 0.01)
+ return PERF_COLOR_NORMAL;
+ }
+
+ return PERF_COLOR_MAGENTA;
+}
+
+static const char *annotate__asm_color(struct block_range *br)
+{
+ double cov = block_range__coverage(br);
+
+ if (cov >= 0) {
+ /* mark dull for <1% coverage */
+ if (cov < 0.01)
+ return PERF_COLOR_NORMAL;
+ }
+
+ return PERF_COLOR_BLUE;
+}
+
+static void annotate__branch_printf(struct block_range *br, u64 addr)
+{
+ bool emit_comment = true;
+
+ if (!br)
+ return;
+
+#if 1
+ if (br->is_target && br->start == addr) {
+ struct block_range *branch = br;
+ double p;
+
+ /*
+ * Find matching branch to our target.
+ */
+ while (!branch->is_branch)
+ branch = block_range__next(branch);
+
+ p = 100 *(double)br->entry / branch->coverage;
+
+ if (p > 0.1) {
+ if (emit_comment) {
+ emit_comment = false;
+ printf("\t#");
+ }
+
+ /*
+ * The percentage of coverage joined at this target in relation
+ * to the next branch.
+ */
+ printf(" +%.2f%%", p);
+ }
+ }
+#endif
+ if (br->is_branch && br->end == addr) {
+ double p = 100*(double)br->taken / br->coverage;
+
+ if (p > 0.1) {
+ if (emit_comment) {
+ emit_comment = false;
+ printf("\t#");
+ }
+
+ /*
+ * The percentage of coverage leaving at this branch, and
+ * its prediction ratio.
+ */
+ printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred / br->taken);
+ }
+ }
+}
+
+
static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start,
struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
int max_lines, struct disasm_line *queue)
@@ -878,6 +962,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
s64 offset = dl->offset;
const u64 addr = start + offset;
struct disasm_line *next;
+ struct block_range *br;
next = disasm__get_next_ip_line(&notes->src->source, dl);
@@ -947,8 +1032,12 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
}
printf(" : ");
- color_fprintf(stdout, PERF_COLOR_MAGENTA, " %" PRIx64 ":", addr);
- color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", dl->line);
+
+ br = block_range__find(addr);
+ color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr);
+ color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line);
+ annotate__branch_printf(br, addr);
+ printf("\n");
if (ppercents != &percent)
free(ppercents);
@@ -1077,7 +1166,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
.addr = dl->ops.target.addr,
};
- if (!map_groups__find_ams(&target, NULL) &&
+ if (!map_groups__find_ams(&target) &&
target.sym->start == target.al_addr)
dl->ops.target.name = strdup(target.sym->name);
}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index e96f4daed9b9..ea44e4ff19c6 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -130,6 +130,7 @@ struct annotated_source {
struct annotation {
pthread_mutex_t lock;
+ u64 max_coverage;
struct annotated_source *src;
};
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
new file mode 100644
index 000000000000..7b3e1d75d803
--- /dev/null
+++ b/tools/perf/util/block-range.c
@@ -0,0 +1,328 @@
+#include "block-range.h"
+#include "annotate.h"
+
+struct {
+ struct rb_root root;
+ u64 blocks;
+} block_ranges;
+
+static void block_range__debug(void)
+{
+ /*
+ * XXX still paranoid for now; see if we can make this depend on
+ * DEBUG=1 builds.
+ */
+#if 1
+ struct rb_node *rb;
+ u64 old = 0; /* NULL isn't executable */
+
+ for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) {
+ struct block_range *entry = rb_entry(rb, struct block_range, node);
+
+ assert(old < entry->start);
+ assert(entry->start <= entry->end); /* single instruction block; jump to a jump */
+
+ old = entry->end;
+ }
+#endif
+}
+
+struct block_range *block_range__find(u64 addr)
+{
+ struct rb_node **p = &block_ranges.root.rb_node;
+ struct rb_node *parent = NULL;
+ struct block_range *entry;
+
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct block_range, node);
+
+ if (addr < entry->start)
+ p = &parent->rb_left;
+ else if (addr > entry->end)
+ p = &parent->rb_right;
+ else
+ return entry;
+ }
+
+ return NULL;
+}
+
+static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node)
+{
+ struct rb_node **p = &node->rb_left;
+ while (*p) {
+ node = *p;
+ p = &node->rb_right;
+ }
+ rb_link_node(left, node, p);
+}
+
+static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node)
+{
+ struct rb_node **p = &node->rb_right;
+ while (*p) {
+ node = *p;
+ p = &node->rb_left;
+ }
+ rb_link_node(right, node, p);
+}
+
+/**
+ * block_range__create
+ * @start: branch target starting this basic block
+ * @end: branch ending this basic block
+ *
+ * Create all the required block ranges to precisely span the given range.
+ */
+struct block_range_iter block_range__create(u64 start, u64 end)
+{
+ struct rb_node **p = &block_ranges.root.rb_node;
+ struct rb_node *n, *parent = NULL;
+ struct block_range *next, *entry = NULL;
+ struct block_range_iter iter = { NULL, NULL };
+
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct block_range, node);
+
+ if (start < entry->start)
+ p = &parent->rb_left;
+ else if (start > entry->end)
+ p = &parent->rb_right;
+ else
+ break;
+ }
+
+ /*
+ * Didn't find anything.. there's a hole at @start, however @end might
+ * be inside/behind the next range.
+ */
+ if (!*p) {
+ if (!entry) /* tree empty */
+ goto do_whole;
+
+ /*
+ * If the last node is before, advance one to find the next.
+ */
+ n = parent;
+ if (entry->end < start) {
+ n = rb_next(n);
+ if (!n)
+ goto do_whole;
+ }
+ next = rb_entry(n, struct block_range, node);
+
+ if (next->start <= end) { /* add head: [start...][n->start...] */
+ struct block_range *head = malloc(sizeof(struct block_range));
+ if (!head)
+ return iter;
+
+ *head = (struct block_range){
+ .start = start,
+ .end = next->start - 1,
+ .is_target = 1,
+ .is_branch = 0,
+ };
+
+ rb_link_left_of_node(&head->node, &next->node);
+ rb_insert_color(&head->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.start = head;
+ goto do_tail;
+ }
+
+do_whole:
+ /*
+ * The whole [start..end] range is non-overlapping.
+ */
+ entry = malloc(sizeof(struct block_range));
+ if (!entry)
+ return iter;
+
+ *entry = (struct block_range){
+ .start = start,
+ .end = end,
+ .is_target = 1,
+ .is_branch = 1,
+ };
+
+ rb_link_node(&entry->node, parent, p);
+ rb_insert_color(&entry->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.start = entry;
+ iter.end = entry;
+ goto done;
+ }
+
+ /*
+ * We found a range that overlapped with ours, split if needed.
+ */
+ if (entry->start < start) { /* split: [e->start...][start...] */
+ struct block_range *head = malloc(sizeof(struct block_range));
+ if (!head)
+ return iter;
+
+ *head = (struct block_range){
+ .start = entry->start,
+ .end = start - 1,
+ .is_target = entry->is_target,
+ .is_branch = 0,
+
+ .coverage = entry->coverage,
+ .entry = entry->entry,
+ };
+
+ entry->start = start;
+ entry->is_target = 1;
+ entry->entry = 0;
+
+ rb_link_left_of_node(&head->node, &entry->node);
+ rb_insert_color(&head->node, &block_ranges.root);
+ block_range__debug();
+
+ } else if (entry->start == start)
+ entry->is_target = 1;
+
+ iter.start = entry;
+
+do_tail:
+ /*
+ * At this point we've got: @iter.start = [@start...] but @end can still be
+ * inside or beyond it.
+ */
+ entry = iter.start;
+ for (;;) {