summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-annotate.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-09-09 07:46:13 +0200
committerIngo Molnar <mingo@kernel.org>2016-09-09 07:46:13 +0200
commit14520d630adb4314722dd57ccb689ffdc6a31383 (patch)
tree315bf902dc1d03622dc43c362b6fc48455713149 /tools/perf/builtin-annotate.c
parentc0b172e5b6770048751b2c0a4fe44346c2080c5d (diff)
parent25b8592e912f085ce2ff736a2927584ddeab238c (diff)
downloadlinux-14520d630adb4314722dd57ccb689ffdc6a31383.tar.gz
linux-14520d630adb4314722dd57ccb689ffdc6a31383.tar.bz2
linux-14520d630adb4314722dd57ccb689ffdc6a31383.zip
Merge tag 'perf-core-for-mingo-20160908' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Add branch stack / basic block info to 'perf annotate --stdio', where for each branch, we add an asm comment after the instruction with information on how often it was taken and predicted. See example with color output at: http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png (Peter Zijlstra) - Only open an evsel in CPUs in its cpu map, fixing some use cases in systems with multiple PMUs with different CPU maps (Mark Rutland) - Fix handling of huge TLB maps, recognizing it as anonymous (Wang Nan) Infrastructure changes: - Remove the symbol filtering code, i.e. the callbacks passed to all functions that could end up loading a DSO symtab, simplifying the code, eventually allowing what we should have had since day one: removing the 'map' parameter from dso__load() functions (Arnaldo Carvalho de Melo) Arch specific build fixes: - Fix detached tarball build on powerpc, where we were still accessing a file outside tools/ (Ravi Bangoria) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/builtin-annotate.c')
-rw-r--r--tools/perf/builtin-annotate.c104
1 files changed, 104 insertions, 0 deletions
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f07b23011b22..ebb628332a6e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -30,6 +30,7 @@
#include "util/tool.h"
#include "util/data.h"
#include "arch/common.h"
+#include "util/block-range.h"
#include <dlfcn.h>
#include <linux/bitmap.h>
@@ -46,6 +47,103 @@ struct perf_annotate {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
+/*
+ * Given one basic block:
+ *
+ * from to branch_i
+ * * ----> *
+ * |
+ * | block
+ * v
+ * * ----> *
+ * from to branch_i+1
+ *
+ * where the horizontal are the branches and the vertical is the executed
+ * block of instructions.
+ *
+ * We count, for each 'instruction', the number of blocks that covered it as
+ * well as count the ratio each branch is taken.
+ *
+ * We can do this without knowing the actual instruction stream by keeping
+ * track of the address ranges. We break down ranges such that there is no
+ * overlap and iterate from the start until the end.
+ *
+ * @acme: once we parse the objdump output _before_ processing the samples,
+ * we can easily fold the branch.cycles IPC bits in.
+ */
+static void process_basic_block(struct addr_map_symbol *start,
+ struct addr_map_symbol *end,
+ struct branch_flags *flags)
+{
+ struct symbol *sym = start->sym;
+ struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
+ struct block_range_iter iter;
+ struct block_range *entry;
+
+ /*
+ * Sanity; NULL isn't executable and the CPU cannot execute backwards
+ */
+ if (!start->addr || start->addr > end->addr)
+ return;
+
+ iter = block_range__create(start->addr, end->addr);
+ if (!block_range_iter__valid(&iter))
+ return;
+
+ /*
+ * First block in range is a branch target.
+ */
+ entry = block_range_iter(&iter);
+ assert(entry->is_target);
+ entry->entry++;
+
+ do {
+ entry = block_range_iter(&iter);
+
+ entry->coverage++;
+ entry->sym = sym;
+
+ if (notes)
+ notes->max_coverage = max(notes->max_coverage, entry->coverage);
+
+ } while (block_range_iter__next(&iter));
+
+ /*
+ * Last block in rage is a branch.
+ */
+ entry = block_range_iter(&iter);
+ assert(entry->is_branch);
+ entry->taken++;
+ if (flags->predicted)
+ entry->pred++;
+}
+
+static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample)
+{
+ struct addr_map_symbol *prev = NULL;
+ struct branch_info *bi;
+ int i;
+
+ if (!bs || !bs->nr)
+ return;
+
+ bi = sample__resolve_bstack(sample, al);
+ if (!bi)
+ return;
+
+ for (i = bs->nr - 1; i >= 0; i--) {
+ /*
+ * XXX filter against symbol
+ */
+ if (prev)
+ process_basic_block(prev, &bi[i].from, &bi[i].flags);
+ prev = &bi[i].to;
+ }
+
+ free(bi);
+}
+
static int perf_evsel__add_sample(struct perf_evsel *evsel,
struct perf_sample *sample,
struct addr_location *al,
@@ -72,6 +170,12 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}
+ /*
+ * XXX filtered samples can still have branch entires pointing into our
+ * symbol and are missed.
+ */
+ process_branch_stack(sample->branch_stack, al, sample);
+
sample->period = 1;
sample->weight = 1;