diff options
Diffstat (limited to 'kernel/trace')
| -rw-r--r-- | kernel/trace/Kconfig | 26 | ||||
| -rw-r--r-- | kernel/trace/Makefile | 2 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 2 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 129 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 31 | ||||
| -rw-r--r-- | kernel/trace/power-traces.c | 1 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 35 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 275 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 190 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 40 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 347 | ||||
| -rw-r--r-- | kernel/trace/trace_events_filter.c | 77 | ||||
| -rw-r--r-- | kernel/trace/trace_events_hist.c | 1755 | ||||
| -rw-r--r-- | kernel/trace/trace_events_trigger.c | 215 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 10 | ||||
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 13 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 5 | ||||
| -rw-r--r-- | kernel/trace/tracing_map.c | 1062 | ||||
| -rw-r--r-- | kernel/trace/tracing_map.h | 283 |
19 files changed, 4156 insertions, 342 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e45db6b0d878..fafeaf803bd0 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -528,6 +528,32 @@ config MMIOTRACE See Documentation/trace/mmiotrace.txt. If you are not helping to develop drivers, say N. +config TRACING_MAP + bool + depends on ARCH_HAVE_NMI_SAFE_CMPXCHG + help + tracing_map is a special-purpose lock-free map for tracing, + separated out as a stand-alone facility in order to allow it + to be shared between multiple tracers. It isn't meant to be + generally used outside of that context, and is normally + selected by tracers that use it. + +config HIST_TRIGGERS + bool "Histogram triggers" + depends on ARCH_HAVE_NMI_SAFE_CMPXCHG + select TRACING_MAP + default n + help + Hist triggers allow one or more arbitrary trace event fields + to be aggregated into hash tables and dumped to stdout by + reading a debugfs/tracefs file. They're useful for + gathering quick and dirty (though precise) summaries of + event activity as an initial guide for further investigation + using more advanced tools. + + See Documentation/trace/events.txt. + If in doubt, say N. + config MMIOTRACE_TEST tristate "Test module for mmiotrace" depends on MMIOTRACE && m diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 9b1044e936a6..979e7bfbde7a 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_seq.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o +obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o @@ -53,6 +54,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o +obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index f94e7a21f52d..9aef8654e90d 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1349,6 +1349,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, if (t->action == BLK_TN_MESSAGE) { log_action(iter, long_act ? "message" : "m"); blk_log_msg(s, iter->ent); + return trace_handle_return(s); } if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) @@ -1551,6 +1552,7 @@ static const struct { { BLK_TC_COMPLETE, "complete" }, { BLK_TC_FS, "fs" }, { BLK_TC_PC, "pc" }, + { BLK_TC_NOTIFY, "notify" }, { BLK_TC_AHEAD, "ahead" }, { BLK_TC_META, "meta" }, { BLK_TC_DISCARD, "discard" }, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3e4ffb3ace5f..780bcbe1d4de 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -62,17 +62,21 @@ EXPORT_SYMBOL_GPL(trace_call_bpf); static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { void *dst = (void *) (long) r1; - int size = (int) r2; + int ret, size = (int) r2; void *unsafe_ptr = (void *) (long) r3; - return probe_kernel_read(dst, unsafe_ptr, size); + ret = probe_kernel_read(dst, unsafe_ptr, size); + if (unlikely(ret < 0)) + memset(dst, 0, size); + + return ret; } static const struct bpf_func_proto bpf_probe_read_proto = { .func = bpf_probe_read, .gpl_only = true, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_STACK, + .arg1_type = ARG_PTR_TO_RAW_STACK, .arg2_type = ARG_CONST_STACK_SIZE, .arg3_type = ARG_ANYTHING, }; @@ -221,11 +225,12 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { .arg2_type = ARG_ANYTHING, }; -static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) { struct pt_regs *regs = (struct pt_regs *) (long) r1; struct bpf_map *map = (struct bpf_map *) (long) r2; struct bpf_array *array = container_of(map, struct bpf_array, map); + u64 index = flags & BPF_F_INDEX_MASK; void *data = (void *) (long) r4; struct perf_sample_data sample_data; struct perf_event *event; @@ -235,6 +240,10 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size) .data = data, }; + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) + return -EINVAL; + if (index == BPF_F_CURRENT_CPU) + index = raw_smp_processor_id(); if (unlikely(index >= array->map.max_entries)) return -E2BIG; @@ -268,7 +277,34 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { .arg5_type = ARG_CONST_STACK_SIZE, }; -static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) +static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); + +static u64 bpf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) +{ + struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); + + perf_fetch_caller_regs(regs); + + return bpf_perf_event_output((long)regs, r2, flags, r4, size); +} + +static const struct bpf_func_proto bpf_event_output_proto = { + .func = bpf_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + +const struct bpf_func_proto *bpf_get_event_output_proto(void) +{ + return &bpf_event_output_proto; +} + +static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -295,12 +331,20 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_smp_processor_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; + default: + return NULL; + } +} + +static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto; default: - return NULL; + return tracing_func_proto(func_id); } } @@ -332,9 +376,82 @@ static struct bpf_prog_type_list kprobe_tl = { .type = BPF_PROG_TYPE_KPROBE, }; +static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +{ + /* + * r1 points to perf tracepoint buffer where first 8 bytes are hidden + * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it + * from there and call the same bpf_perf_event_output() helper + */ + u64 ctx = *(long *)(uintptr_t)r1; + + return bpf_perf_event_output(ctx, r2, index, r4, size); +} + +static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { + .func = bpf_perf_event_output_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; + +static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + u64 ctx = *(long *)(uintptr_t)r1; + + return bpf_get_stackid(ctx, r2, r3, r4, r5); +} + +static const struct bpf_func_proto bpf_get_stackid_proto_tp = { + .func = bpf_get_stackid_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_perf_event_output_proto_tp; + case BPF_FUNC_get_stackid: + return &bpf_get_stackid_proto_tp; + default: + return tracing_func_proto(func_id); + } +} + +static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type) +{ + if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + return true; +} + +static const struct bpf_verifier_ops tracepoint_prog_ops = { + .get_func_proto = tp_prog_func_proto, + .is_valid_access = tp_prog_is_valid_access, +}; + +static struct bpf_prog_type_list tracepoint_tl = { + .ops = &tracepoint_prog_ops, + .type = BPF_PROG_TYPE_TRACEPOINT, +}; + static int __init register_kprobe_prog_ops(void) { bpf_register_prog_type(&kprobe_tl); + bpf_register_prog_type(&tracepoint_tl); return 0; } late_initcall(register_kprobe_prog_ops); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b1870fbd2b67..900dbb1efff2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1530,7 +1530,19 @@ static int ftrace_cmp_recs(const void *a, const void *b) return 0; } -static unsigned long ftrace_location_range(unsigned long start, unsigned long end) +/** + * ftrace_location_range - return the first address of a traced location + * if it touches the given ip range + * @start: start of range to search. + * @end: end of range to search (inclusive). @end points to the last byte + * to check. + * + * Returns rec->ip if the related ftrace location is a least partly within + * the given address range. That is, the first address of the instruction + * that is either a NOP or call to the function tracer. It checks the ftrace + * internal tables to determine if the address belongs or not. + */ +unsigned long ftrace_location_range(unsigned long start, unsigned long end) { struct ftrace_page *pg; struct dyn_ftrace *rec; @@ -3444,11 +3456,23 @@ struct ftrace_glob { int type; }; +/* + * If symbols in an architecture don't correspond exactly to the user-visible + * name of what they represent, it is possible to define this function to + * perform the necessary adjustments. +*/ +char * __weak arch_ftrace_match_adjust(char *str, const char *search) +{ + return str; +} + static int ftrace_match(char *str, struct ftrace_glob *g) { int matched = 0; int slen; + str = arch_ftrace_match_adjust(str, g->search); + switch (g->type) { case MATCH_FULL: if (strcmp(str, g->search) == 0) @@ -5713,7 +5737,6 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) { int i; int ret = 0; - unsigned long flags; int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; struct task_struct *g, *t; @@ -5729,7 +5752,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) } } - read_lock_irqsave(&tasklist_lock, flags); + read_lock(&tasklist_lock); do_each_thread(g, t) { if (start == end) { ret = -EAGAIN; @@ -5747,7 +5770,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) } while_each_thread(g, t); unlock: - read_unlock_irqrestore(&tasklist_lock, flags); + read_unlock(&tasklist_lock); free: for (i = start; i < end; i++) kfree(ret_stack_list[i]); diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index 81b87451c0ea..0c7dee221dca 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -15,5 +15,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume); EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); +EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_frequency); EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 95181e36891a..9c143739b8d7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -437,7 +437,7 @@ struct ring_buffer_per_cpu { raw_spinlock_t reader_lock; /* serialize readers */ arch_spinlock_t lock; struct lock_class_key lock_key; - unsigned int nr_pages; + unsigned long nr_pages; unsigned int current_context; struct list_head *pages; struct buffer_page *head_page; /* read from head */ @@ -458,7 +458,7 @@ struct ring_buffer_per_cpu { u64 write_stamp; u64 read_stamp; /* ring buffer pages to update, > 0 to add, < 0 to remove */ - int nr_pages_to_update; + long nr_pages_to_update; struct list_head new_pages; /* new pages to add */ struct work_struct update_pages_work; struct completion update_done; @@ -1128,10 +1128,10 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) return 0; } -static int __rb_allocate_pages(int nr_pages, struct list_head *pages, int cpu) +static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu) { - int i; struct buffer_page *bpage, *tmp; + long i; for (i = 0; i < nr_pages; i++) { struct page *page; @@ -1168,7 +1168,7 @@ free_pages: } static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, - unsigned nr_pages) + unsigned long nr_pages) { LIST_HEAD(pages); @@ -1193,7 +1193,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, } static struct ring_buffer_per_cpu * -rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) +rb_allocate_cpu_buffer(struct ring_buffer *buffer, long nr_pages, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; @@ -1293,8 +1293,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key) { struct ring_buffer *buffer; + long nr_pages; int bsize; - int cpu, nr_pages; + int cpu; /* keep it in its own cache line */ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), @@ -1420,12 +1421,12 @@ static inline unsigned long rb_page_write(struct buffer_page *bpage) } static int -rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned int nr_pages) +rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) { struct list_head *tail_page, *to_remove, *next_page; struct buffer_page *to_remove_page, *tmp_iter_page; struct buffer_page *last_page, *first_page; - unsigned int nr_removed; + unsigned long nr_removed; unsigned long head_bit; int page_entries; @@ -1642,7 +1643,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, int cpu_id) { struct ring_buffer_per_cpu *cpu_buffer; - unsigned nr_pages; + unsigned long nr_pages; int cpu, err = 0; /* @@ -1656,14 +1657,13 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, !cpumask_test_cpu(cpu_id, buffer->cpumask)) return size; - size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); - size *= BUF_PAGE_SIZE; + nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); /* we need a minimum of two pages */ - if (size < BUF_PAGE_SIZE * 2) - size = BUF_PAGE_SIZE * 2; + if (nr_pages < 2) + nr_pages = 2; - nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); + size = nr_pages * BUF_PAGE_SIZE; /* * Don't succeed if resizing is disabled, as a reader might be @@ -4640,8 +4640,9 @@ static int rb_cpu_notify(struct notifier_block *self, struct ring_buffer *buffer = container_of(self, struct ring_buffer, cpu_notify); long cpu = (long)hcpu; - int cpu_i, nr_pages_same; - unsigned int nr_pages; + long nr_pages_same; + int cpu_i; + unsigned long nr_pages; switch (action) { case CPU_UP_PREPARE: diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a2f0b9f33e9b..8a4bd6b68a0b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -253,6 +253,9 @@ unsigned long long ns2usecs(cycle_t nsec) #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD) +/* trace_flags that are default zero for instances */ +#define ZEROED_TRACE_FLAGS \ + TRACE_ITER_EVENT_FORK /* * The global_trace is the descriptor that holds the tracing @@ -303,33 +306,18 @@ void trace_array_put(struct trace_array *this_tr) mutex_unlock(&trace_types_lock); } -int filter_check_discard(struct trace_event_file *file, void *rec, - struct ring_buffer *buffer, - struct ring_buffer_event *event) -{ - if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && - !filter_match_preds(file->filter, rec)) { - ring_buffer_discard_commit(buffer, event); - return 1; - } - - return 0; -} -EXPORT_SYMBOL_GPL(filter_check_discard); - int call_filter_check_discard(struct trace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && !filter_match_preds(call->filter, rec)) { - ring_buffer_discard_commit(buffer, event); + __trace_event_discard_commit(buffer, event); return 1; } return 0; } -EXPORT_SYMBOL_GPL(call_filter_check_discard); static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu) { @@ -1672,6 +1660,16 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, } EXPORT_SYMBOL_GPL(tracing_generic_entry_update); +static __always_inline void +trace_event_setup(struct ring_buffer_event *event, + int type, unsigned long flags, int pc) +{ + struct trace_entry *ent = ring_buffer_event_data(event); + + tracing_generic_entry_update(ent, flags, pc); + ent->type = type; +} + struct ring_buffer_event * trace_buffer_lock_reserve(struct ring_buffer *buffer, int type, @@ -1681,34 +1679,137 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer, struct ring_buffer_event *event; event = ring_buffer_lock_reserve(buffer, len); - if (event != NULL) { - struct trace_entry *ent = ring_buffer_event_data(event); + if (event != NULL) + trace_event_setup(event, type, flags, pc); + + return event; +} + +DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); +DEFINE_PER_CPU(int, trace_buffered_event_cnt); +static int trace_buffered_event_ref; + +/** + * trace_buffered_event_enable - enable buffering events + * + * When events are being filtered, it is quicker to use a temporary + * buffer to write the event data into if there's a likely chance + * that it will not be committed. The discard of the ring buffer + * is not as fast as committing, and is much slower than copying + * a commit. + * + * When an event is to be filtered, allocate per cpu buffers to + * write the event data into, and if the event is filtered and discarded + * it is simply dropped, otherwise, the entire data is to be committed + * in one shot. + */ +void trace_buffered_event_enable(void) +{ + struct ring_buffer_event *event; + struct page *page; + int cpu; - tracing_generic_entry_update(ent, flags, pc); - ent->type = type; + WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); + + if (trace_buffered_event_ref++) + return; + + for_each_tracing_cpu(cpu) { + page = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL | __GFP_NORETRY, 0); + if (!page) + goto failed; + + event = page_address(page); + memset(event, 0, sizeof(*event)); + + per_cpu(trace_buffered_event, cpu) = event; + + preempt_disable(); + if (cpu == smp_processor_id() && + this_cpu_read(trace_buffered_event) != + per_cpu(trace_buffered_event, cpu)) + WARN_ON_ONCE(1); + preempt_enable(); } - return event; + return; + failed: + trace_buffered_event_disable(); } -void -__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +static void enable_trace_buffered_event(void *data) { - __this_cpu_write(trace_cmdline_save, true); - ring_buffer_unlock_commit(buffer, event); + /* Probably not needed, but do it anyway */ + smp_rmb(); + this_cpu_dec(trace_buffered_event_cnt); } -void trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc) +static void disable_trace_buffered_event(void *data) { - __buffer_unlock_commit(buffer, event); + this_cpu_inc(trace_buffered_event_cnt); +} - ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); - ftrace_trace_userstack(buffer, flags, pc); +/** + * trace_buffered_event_disable - disable buffering events + * + * When a filter is removed, it is faster to not use the buffered + * events, and to commit directly into the ring buffer. Free up + * the temp buffers when there are no more users. This requires + * special synchronization with current events. + */ +void trace_buffered_event_disable(void) +{ + int cpu; + + WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); + + if (WARN_ON_ONCE(!trace_buffered_event_ref)) + return; + + if (--trace_buffered_event_ref) + return; + + preempt_disable(); + /* For each CPU, set the buffer as used. */ + smp_call_function_many(tracing_buffer_mask, + disable_trace_buffered_event, NULL, 1); + preempt_enable(); + + /* Wait for all current users to finish */ + synchronize_sched(); + + for_each_tracing_cpu(cpu) { + free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); + per_cpu(trace_buffered_event, cpu) = NULL; + } + /* + * Make sure trace_buffered_event is NULL before clearing + * trace_buffered_event_cnt. + */ + smp_wmb(); + + preempt_disable(); + /* Do the work on each cpu */ + smp_call_function_many(tracing_buffer_mask, + enable_trace_buffered_event, NULL, 1); + preempt_enable(); +} + +void +__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +{ + __this_cpu_write(trace_cmdline_save, true); + + /* If this is the temp buffer, we need to commit fully */ + if (this_cpu_read(trace_buffered_event) == event) { + /* Length is in event->array[0] */ + ring_buffer_write(buffer, event->array[0], &event->array[1]); + /* Release the temp buffer */ + this_cpu_dec(trace_buffered_event_cnt); + } else + ring_buffer_unlock_commit(buffer, event); } -EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); static struct ring_buffer *temp_buffer; @@ -1719,8 +1820,23 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, unsigned long flags, int pc) { struct ring_buffer_event *entry; + int val; *current_rb = trace_file->tr->trace_buffer.buffer; + + if ((trace_file->flags & + (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && + (entry = this_cpu_read(trace_buffered_event))) { + /* Try to use the per cpu buffer first */ + val = this_cpu_inc_return(trace_buffered_event_cnt); + if (val == 1) { + trace_event_setup(entry, type, flags, pc); + entry->array[0] = len; + return entry; + } + this_cpu_dec(trace_buffered_event_cnt); + } + entry = trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); /* @@ -1738,17 +1854,6 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); -struct ring_buffer_event * -trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, - int type, unsigned long len, - unsigned long flags, int pc) -{ - *current_rb = global_trace.trace_buffer.buffer; - return trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); -} -EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); - void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct ring_buffer *buffer, struct ring_buffer_event *event, @@ -1760,14 +1865,6 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, ftrace_trace_stack(tr, buffer, flags, 0, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } -EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); - -void trace_current_buffer_discard_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event) -{ - ring_buffer_discard_commit(buffer, event); -} -EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); void trace_function(struct trace_array *tr, @@ -3571,6 +3668,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) if (mask == TRACE_ITER_RECORD_CMD) trace_event_enable_cmd_record(enabled); + if (mask == TRACE_ITER_EVENT_FORK) + trace_event_follow_fork(tr, enabled); + if (mask == TRACE_ITER_OVERWRITE) { ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled); #ifdef CONFIG_TRACER_MAX_TRACE @@ -3658,7 +3758,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, if (cnt >= sizeof(buf)) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) + if (copy_from_user(buf, ubuf, cnt)) return -EFAULT; buf[cnt] = 0; @@ -3804,12 +3904,19 @@ static const char readme_msg[] = "\t trigger: traceon, traceoff\n" "\t enable_event:<system>:<event>\n" "\t disable_event:<system>:<event>\n" +#ifdef CONFIG_HIST_TRIGGERS + "\t enable_hist:<system>:<event>\n" + "\t disable_hist:<system>:<event>\n" +#endif #ifdef CONFIG_STACKTRACE "\t\t stacktrace\n" #endif #ifdef CONFIG_TRACER_SNAPSHOT "\t\t snapshot\n" #endif +#ifdef CONFIG_HIST_TRIGGERS + "\t\t hist (see below)\n" +#endif "\t example: echo traceoff > events/block/block_unplug/trigger\n" "\t echo traceoff:3 > events/block/block_unplug/trigger\n" "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" @@ -3825,6 +3932,56 @@ static const char readme_msg[] = "\t To remove a trigger with a count:\n" "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" "\t Filters can be ignored when removing a trigger.\n" +#ifdef CONFIG_HIST_TRIGGERS + " hist trigger\t- If set, event hits are aggregated into a hash table\n" + "\t Format: hist:keys=<field1[,field2,...]>\n" + "\t [:values=<field1[,field2,...]>]\n" + "\t [:sort=<field1[,field2,...]>]\n" + "\t [:size=#entries]\n" + "\t [:pause][:continue][:clear]\n" + "\t [:name=histname1]\n" + "\t [if <filter>]\n\n" + "\t When a matching event is hit, an entry is added to a hash\n" + "\t table using the key(s) and value(s) named, and the value of a\n" + "\t sum called 'hitcount' is incremented. Keys and values\n" + "\t correspond to fields in the event's format description. Keys\n" + "\t can be any field, or the special string 'stacktrace'.\n" + "\t Compound keys consisting of up to two fields can be specified\n" + "\t by the 'keys' keyword. Values must correspond to numeric\n" + "\t fields. Sort keys consisting of up to two fields can be\n" + "\t specified using the 'sort' keyword. The sort direction can\n" + "\t be modified by appending '.descending' or '.ascending' to a\n" + "\t sort field. The 'size' parameter can be used to specify more\n" + "\t or fewer than the default 2048 entries for the hashtable size.\n" + "\t If a hist trigger is given a name using the 'name' parameter,\n" + "\t its histogram data will be shared with other triggers of the\n" + "\t same name, and trigger hits will update this common data.\n\n" + "\t Reading the 'hist' file for the event will dump the hash\n" + "\t table in its entirety to stdout. If there are multiple hist\n" + "\t triggers attached to an event, there will be a table for each\n" + "\t trigger in the output. The table displayed for a named\n" + "\t trigger will be the same as any other instance having the\n" + "\t same name. The default format used to display a given field\n" + "\t can be modified by appending any of the following modifiers\n" + "\t to the field name, as applicable:\n\n" + "\t .hex display a number as a hex value\n" + "\t .sym display an address as a symbol\n" + "\t .sym-offset display an address as a symbol and offset\n" + "\t .execname display a common_pid as a program name\n" + "\t .syscall display a syscall id as a syscall name\n\n" + "\t .log2 display log2 value rather than raw number\n\n" + "\t The 'pause' parameter can be used to pause an existing hist\n" + "\t trigger or to start a hist trigger but not log any events\n" + "\t until told to do so. 'continue' can be used to start or\n" + "\t restart a paused hist trigger.\n\n" + "\t The 'clear' parameter will clear the contents of a running\n" + "\t hist trigger and leave its current paused/active state\n" + "\t unchanged.\n\n" + "\t The enable_hist and disable_hist triggers can be used to\n" + "\t have one event conditionally start and stop another event's\n" + "\t already-attached hist trigger. The syntax is analagous to\n" + "\t the enable_event and disable_event triggers.\n" +#endif ; static ssize_t @@ -4474,7 +4631,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, if (cnt > MAX_TRACER_SIZE) cnt = MAX_TRACER_SIZE; - if (copy_from_user(&buf, ubuf, cnt)) + if (copy_from_user(buf, ubuf, cnt)) return -EFAULT; buf[cnt] = 0; @@ -5264,7 +5421,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, if (cnt >= sizeof(buf)) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) + if (copy_from_user(buf, ubuf, cnt)) return -EFAULT; buf[cnt] = 0; @@ -6650,7 +6807,7 @@ static int instance_mkdir(const char *name) if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) goto out_free_tr; - tr->trace_flags = global_trace.trace_flags; + tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; cpumask_copy(tr->tracing_cpumask, cpu_all_mask); @@ -6724,6 +6881,12 @@ static int instance_rmdir(const char *name) list_del(&tr->list); + /* Disable all the flags that were enabled coming in */ + for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { + if ((1 << i) & ZEROED_TRACE_FLAGS) + set_tracer_flag(tr, 1 << i, 0); + } + tracing_set_nop(tr); event_trace_del_tracer(tr); ftrace_destroy_function_files(tr); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3fff4adfd431..5167c366d6b7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -177,9 +177,8 @@ struct trace_options { }; struct trace_pid_list { - unsigned int nr_pids; - int order; - pid_t *pids; + int pid_max; + unsigned long *pids; }; /* @@ -656,6 +655,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags, extern cycle_t ftrace_now(int cpu); extern void trace_find_cmdline(int pid, char comm[]); |
