diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 14:58:01 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 14:58:01 -0800 |
commit | 2dcd9c71c1ffa9a036e09047f60e08383bb0abb6 (patch) | |
tree | 8caabaf493288a3d5adb46ac49ad5097d0c907c7 /kernel/trace | |
parent | b1c2a344cc19b40d2f1e7cbd9c2f4f205ae6d650 (diff) | |
parent | a96a5037ed0f52e2d86739f4a1ef985bd036e575 (diff) | |
download | linux-2dcd9c71c1ffa9a036e09047f60e08383bb0abb6.tar.gz linux-2dcd9c71c1ffa9a036e09047f60e08383bb0abb6.tar.bz2 linux-2dcd9c71c1ffa9a036e09047f60e08383bb0abb6.zip |
Merge tag 'trace-v4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from
- allow module init functions to be traced
- clean up some unused or not used by config events (saves space)
- clean up of trace histogram code
- add support for preempt and interrupt enabled/disable events
- other various clean ups
* tag 'trace-v4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (30 commits)
tracing, thermal: Hide cpu cooling trace events when not in use
tracing, thermal: Hide devfreq trace events when not in use
ftrace: Kill FTRACE_OPS_FL_PER_CPU
perf/ftrace: Small cleanup
perf/ftrace: Fix function trace events
perf/ftrace: Revert ("perf/ftrace: Fix double traces of perf on ftrace:function")
tracing, dma-buf: Remove unused trace event dma_fence_annotate_wait_on
tracing, memcg, vmscan: Hide trace events when not in use
tracing/xen: Hide events that are not used when X86_PAE is not defined
tracing: mark trace_test_buffer as __maybe_unused
printk: Remove superfluous memory barriers from printk_safe
ftrace: Clear hashes of stale ips of init memory
tracing: Add support for preempt and irq enable/disable events
tracing: Prepare to add preempt and irq trace events
ftrace/kallsyms: Have /proc/kallsyms show saved mod init functions
ftrace: Add freeing algorithm to free ftrace_mod_maps
ftrace: Save module init functions kallsyms symbols for tracing
ftrace: Allow module init functions to be traced
ftrace: Add a ftrace_free_mem() function for modules to use
tracing: Reimplement log2
...
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Kconfig | 11 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 354 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 64 | ||||
-rw-r--r-- | kernel/trace/trace.c | 91 | ||||
-rw-r--r-- | kernel/trace/trace.h | 9 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 82 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 31 | ||||
-rw-r--r-- | kernel/trace/trace_events_hist.c | 128 | ||||
-rw-r--r-- | kernel/trace/trace_irqsoff.c | 133 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 22 | ||||
-rw-r--r-- | kernel/trace/trace_probe.c | 86 | ||||
-rw-r--r-- | kernel/trace/trace_probe.h | 7 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 34 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 4 | ||||
-rw-r--r-- | kernel/trace/tracing_map.c | 3 | ||||
-rw-r--r-- | kernel/trace/tracing_map.h | 2 |
18 files changed, 703 insertions, 363 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f54b7b6b4a4b..af7dad126c13 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -160,6 +160,17 @@ config FUNCTION_GRAPH_TRACER address on the current task structure into a stack of calls. +config PREEMPTIRQ_EVENTS + bool "Enable trace events for preempt and irq disable/enable" + select TRACE_IRQFLAGS + depends on DEBUG_PREEMPT || !PROVE_LOCKING + default n + help + Enable tracing of disable and enable events for preemption and irqs. + For tracing preempt disable/enable events, DEBUG_PREEMPT must be + enabled. For tracing irq disable/enable events, PROVE_LOCKING must + be disabled. + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 19a15b2f1190..e2538c7638d4 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o +obj-$(CONFIG_PREEMPTIRQ_EVENTS) += trace_irqsoff.o obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8319e09e15b9..ccdf3664e4a9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -203,30 +203,6 @@ void clear_ftrace_function(void) ftrace_trace_function = ftrace_stub; } -static void per_cpu_ops_disable_all(struct ftrace_ops *ops) -{ - int cpu; - - for_each_possible_cpu(cpu) - *per_cpu_ptr(ops->disabled, cpu) = 1; -} - -static int per_cpu_ops_alloc(struct ftrace_ops *ops) -{ - int __percpu *disabled; - - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) - return -EINVAL; - - disabled = alloc_percpu(int); - if (!disabled) - return -ENOMEM; - - ops->disabled = disabled; - per_cpu_ops_disable_all(ops); - return 0; -} - static void ftrace_sync(struct work_struct *work) { /* @@ -262,8 +238,8 @@ static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) * If this is a dynamic, RCU, or per CPU ops, or we force list func, * then it needs to call the list anyway. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU | - FTRACE_OPS_FL_RCU) || FTRACE_FORCE_LIST_FUNC) + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) || + FTRACE_FORCE_LIST_FUNC) return ftrace_ops_list_func; return ftrace_ops_get_func(ops); @@ -422,11 +398,6 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; - if (ops->flags & FTRACE_OPS_FL_PER_CPU) { - if (per_cpu_ops_alloc(ops)) - return -ENOMEM; - } - add_ftrace_ops(&ftrace_ops_list, ops); /* Always save the function, and reset at unregistering */ @@ -2727,11 +2698,6 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) { } -static void per_cpu_ops_free(struct ftrace_ops *ops) -{ - free_percpu(ops->disabled); -} - static void ftrace_startup_enable(int command) { if (saved_ftrace_func != ftrace_trace_function) { @@ -2833,7 +2799,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * not currently active, we can just free them * without synchronizing all CPUs. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) goto free_ops; return 0; @@ -2880,7 +2846,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * The same goes for freeing the per_cpu data of the per_cpu * ops. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) { + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) { /* * We need to do a hard force of sched synchronization. * This is because we use preempt_disable() to do RCU, but @@ -2903,9 +2869,6 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) free_ops: arch_ftrace_trampoline_free(ops); - - if (ops->flags & FTRACE_OPS_FL_PER_CPU) - per_cpu_ops_free(ops); } return 0; @@ -5672,10 +5635,29 @@ static int ftrace_process_locs(struct module *mod, return ret; } +struct ftrace_mod_func { + struct list_head list; + char *name; + unsigned long ip; + unsigned int size; +}; + +struct ftrace_mod_map { + struct rcu_head rcu; + struct list_head list; + struct module *mod; + unsigned long start_addr; + unsigned long end_addr; + struct list_head funcs; + unsigned int num_funcs; +}; + #ifdef CONFIG_MODULES #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) +static LIST_HEAD(ftrace_mod_maps); + static int referenced_filters(struct dyn_ftrace *rec) { struct ftrace_ops *ops; @@ -5729,8 +5711,26 @@ static void clear_mod_from_hashes(struct ftrace_page *pg) mutex_unlock(&trace_types_lock); } +static void ftrace_free_mod_map(struct rcu_head *rcu) +{ + struct ftrace_mod_map *mod_map = container_of(rcu, struct ftrace_mod_map, rcu); + struct ftrace_mod_func *mod_func; + struct ftrace_mod_func *n; + + /* All the contents of mod_map are now not visible to readers */ + list_for_each_entry_safe(mod_func, n, &mod_map->funcs, list) { + kfree(mod_func->name); + list_del(&mod_func->list); + kfree(mod_func); + } + + kfree(mod_map); +} + void ftrace_release_mod(struct module *mod) { + struct ftrace_mod_map *mod_map; + struct ftrace_mod_map *n; struct dyn_ftrace *rec; struct ftrace_page **last_pg; struct ftrace_page *tmp_page = NULL; @@ -5742,6 +5742,14 @@ void ftrace_release_mod(struct module *mod) if (ftrace_disabled) goto out_unlock; + list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { + if (mod_map->mod == mod) { + list_del_rcu(&mod_map->list); + call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map); + break; + } + } + /* * Each module has its own ftrace_pages, remove * them from the list. @@ -5749,7 +5757,8 @@ void ftrace_release_mod(struct module *mod) last_pg = &ftrace_pages_start; for (pg = ftrace_pages_start; pg; pg = *last_pg) { rec = &pg->records[0]; - if (within_module_core(rec->ip, mod)) { + if (within_module_core(rec->ip, mod) || + within_module_init(rec->ip, mod)) { /* * As core pages are first, the first * page should never be a module page. @@ -5818,7 +5827,8 @@ void ftrace_module_enable(struct module *mod) * not part of this module, then skip this pg, * which the "break" will do. */ - if (!within_module_core(rec->ip, mod)) + if (!within_module_core(rec->ip, mod) && + !within_module_init(rec->ip, mod)) break; cnt = 0; @@ -5863,23 +5873,245 @@ void ftrace_module_init(struct module *mod) ftrace_process_locs(mod, mod->ftrace_callsites, mod->ftrace_callsites + mod->num_ftrace_callsites); } + +static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, + struct dyn_ftrace *rec) +{ + struct ftrace_mod_func *mod_func; + unsigned long symsize; + unsigned long offset; + char str[KSYM_SYMBOL_LEN]; + char *modname; + const char *ret; + + ret = kallsyms_lookup(rec->ip, &symsize, &offset, &modname, str); + if (!ret) + return; + + mod_func = kmalloc(sizeof(*mod_func), GFP_KERNEL); + if (!mod_func) + return; + + mod_func->name = kstrdup(str, GFP_KERNEL); + if (!mod_func->name) { + kfree(mod_func); + return; + } + + mod_func->ip = rec->ip - offset; + mod_func->size = symsize; + + mod_map->num_funcs++; + + list_add_rcu(&mod_func->list, &mod_map->funcs); +} + +static struct ftrace_mod_map * +allocate_ftrace_mod_map(struct module *mod, + unsigned long start, unsigned long end) +{ + struct ftrace_mod_map *mod_map; + + mod_map = kmalloc(sizeof(*mod_map), GFP_KERNEL); + if (!mod_map) + return NULL; + + mod_map->mod = mod; + mod_map->start_addr = start; + mod_map->end_addr = end; + mod_map->num_funcs = 0; + + INIT_LIST_HEAD_RCU(&mod_map->funcs); + + list_add_rcu(&mod_map->list, &ftrace_mod_maps); + + return mod_map; +} + +static const char * +ftrace_func_address_lookup(struct ftrace_mod_map *mod_map, + unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + struct ftrace_mod_func *found_func = NULL; + struct ftrace_mod_func *mod_func; + + list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) { + if (addr >= mod_func->ip && + addr < mod_func->ip + mod_func->size) { + found_func = mod_func; + break; + } + } + + if (found_func) { + if (size) + *size = found_func->size; + if (off) + *off = addr - found_func->ip; + if (sym) + strlcpy(sym, found_func->name, KSYM_NAME_LEN); + + return found_func->name; + } + + return NULL; +} + +const char * +ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + struct ftrace_mod_map *mod_map; + const char *ret = NULL; + + /* mod_map is freed via call_rcu_sched() */ + preempt_disable(); + list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { + ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); + if (ret) { + if (modname) + *modname = mod_map->mod->name; + break; + } + } + preempt_enable(); + + return ret; +} + +int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported) +{ + struct ftrace_mod_map *mod_map; + struct ftrace_mod_func *mod_func; + + preempt_disable(); + list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { + + if (symnum >= mod_map->num_funcs) { + symnum -= mod_map->num_funcs; + continue; + } + + list_for_each_entry_rcu(mod_func, &mod_map->funcs, list) { + if (symnum > 1) { + symnum--; + continue; + } + + *value = mod_func->ip; + *type = 'T'; + strlcpy(name, mod_func->name, KSYM_NAME_LEN); + strlcpy(module_name, mod_map->mod->name, MODULE_NAME_LEN); + *exported = 1; + preempt_enable(); + return 0; + } + WARN_ON(1); + break; + } + preempt_enable(); + return -ERANGE; +} + +#else +static void save_ftrace_mod_rec(struct ftrace_mod_map *mod_map, + struct dyn_ftrace *rec) { } +static inline struct ftrace_mod_map * +allocate_ftrace_mod_map(struct module *mod, + unsigned long start, unsigned long end) +{ + return NULL; +} #endif /* CONFIG_MODULES */ -void __init ftrace_free_init_mem(void) +struct ftrace_init_func { + struct list_head list; + unsigned long ip; +}; + +/* Clear any init ips from hashes */ +static void +clear_func_from_hash(struct ftrace_init_func *func, struct ftrace_hash *hash) +{ + struct ftrace_func_entry *entry; + + if (ftrace_hash_empty(hash)) + return; + + entry = __ftrace_lookup_ip(hash, func->ip); + + /* + * Do not allow this rec to match again. + * Yeah, it may waste some memory, but will be removed + * if/when the hash is modified again. + */ + if (entry) + entry->ip = 0; +} + +static void +clear_func_from_hashes(struct ftrace_init_func *func) +{ + struct trace_array *tr; + + mutex_lock(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + if (!tr->ops || !tr->ops->func_hash) + continue; + mutex_lock(&tr->ops->func_hash->regex_lock); + clear_func_from_hash(func, tr->ops->func_hash->filter_hash); + clear_func_from_hash(func, tr->ops->func_hash->notrace_hash); + mutex_unlock(&tr->ops->func_hash->regex_lock); + } + mutex_unlock(&trace_types_lock); +} + +static void add_to_clear_hash_list(struct list_head *clear_list, + struct dyn_ftrace *rec) +{ + struct ftrace_init_func *func; + + func = kmalloc(sizeof(*func), GFP_KERNEL); + if (!func) { + WARN_ONCE(1, "alloc failure, ftrace filter could be stale\n"); + return; + } + + func->ip = rec->ip; + list_add(&func->list, clear_list); +} + +void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) { - unsigned long start = (unsigned long)(&__init_begin); - unsigned long end = (unsigned long)(&__init_end); + unsigned long start = (unsigned long)(start_ptr); + unsigned long end = (unsigned long)(end_ptr); struct ftrace_page **last_pg = &ftrace_pages_start; struct ftrace_page *pg; struct dyn_ftrace *rec; struct dyn_ftrace key; + struct ftrace_mod_map *mod_map = NULL; + struct ftrace_init_func *func, *func_next; + struct list_head clear_hash; int order; + INIT_LIST_HEAD(&clear_hash); + key.ip = start; key.flags = end; /* overload flags, as it is unsigned long */ mutex_lock(&ftrace_lock); + /* + * If we are freeing module init memory, then check if + * any tracer is active. If so, we need to save a mapping of + * the module functions being freed with the address. + */ + if (mod && ftrace_ops_list != &ftrace_list_end) + mod_map = allocate_ftrace_mod_map(mod, start, end); + for (pg = ftrace_pages_start; pg; last_pg = &pg->next, pg = *last_pg) { if (end < pg->records[0].ip || start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) @@ -5890,6 +6122,13 @@ void __init ftrace_free_init_mem(void) ftrace_cmp_recs); if (!rec) continue; + + /* rec will be cleared from hashes after ftrace_lock unlock */ + add_to_clear_hash_list(&clear_hash, rec); + + if (mod_map) + save_ftrace_mod_rec(mod_map, rec); + pg->index--; ftrace_update_tot_cnt--; if (!pg->index) { @@ -5908,6 +6147,19 @@ void __init ftrace_free_init_mem(void) goto again; } mutex_unlock(&ftrace_lock); + + list_for_each_entry_safe(func, func_next, &clear_hash, list) { + clear_func_from_hashes(func); + kfree(func); + } +} + +void __init ftrace_free_init_mem(void) +{ + void *start = (void *)(&__init_begin); + void *end = (void *)(&__init_end); + + ftrace_free_mem(NULL, start, end); } void __init ftrace_init(void) @@ -6063,10 +6315,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * If any of the above fails then the op->func() is not executed. */ if ((!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) && - (!(op->flags & FTRACE_OPS_FL_PER_CPU) || - !ftrace_function_local_disabled(op)) && ftrace_ops_test(op, ip, regs)) { - if (FTRACE_WARN_ON(!op->func)) { pr_warn("op=%p %pS\n", op, op); goto out; @@ -6124,10 +6373,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); - if (!(op->flags & FTRACE_OPS_FL_PER_CPU) || - !ftrace_function_local_disabled(op)) { - op->func(ip, parent_ip, op, regs); - } + op->func(ip, parent_ip, op, regs); preempt_enable_notrace(); trace_clear_recursion(bit); @@ -6151,7 +6397,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops) * or does per cpu logic, then we need to call the assist handler. */ if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE) || - ops->flags & (FTRACE_OPS_FL_RCU | FTRACE_OPS_FL_PER_CPU)) + ops->flags & FTRACE_OPS_FL_RCU) return ftrace_ops_assist_func; return ops->func; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d57fede84b38..91874a95060d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2536,61 +2536,29 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified * by the current task between lock and unlock. But it can - * be modified more than once via an interrupt. To pass this - * information from the lock to the unlock without having to - * access the 'in_interrupt()' functions again (which do show - * a bit of overhead in something as critical as function tracing, - * we use a bitmask trick. + * be modified more than once via an interrupt. There are four + * different contexts that we need to consider. * - * bit 0 = NMI context - * bit 1 = IRQ context - * bit 2 = SoftIRQ context - * bit 3 = normal context. + * Normal context. + * SoftIRQ context + * IRQ context + * NMI context * - * This works because this is the order of contexts that can - * preempt other contexts. A SoftIRQ never preempts an IRQ - * context. - * - * When the context is determined, the corresponding bit is - * checked and set (if it was set, then a recursion of that context - * happened). - * - * On unlock, we need to clear this bit. To do so, just subtract - * 1 from the current_context and AND it to itself. - * - * (binary) - * 101 - 1 = 100 - * 101 & 100 = 100 (clearing bit zero) - * - * 1010 - 1 = 1001 - * 1010 & 1001 = 1000 (clearing bit 1) - * - * The least significant bit can be cleared this way, and it - * just so happens that it is the same bit corresponding to - * the current context. + * If for some reason the ring buffer starts to recurse, we + * only allow that to happen at most 4 times (one for each + * context). If it happens 5 times, then we consider this a + * recusive loop and do not let it go further. */ static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { - unsigned int val = cpu_buffer->current_context; - int bit; - - if (in_interrupt()) { - if (in_nmi()) - bit = RB_CTX_NMI; - else if (in_irq()) - bit = RB_CTX_IRQ; - else - bit = RB_CTX_SOFTIRQ; - } else - bit = RB_CTX_NORMAL; - - if (unlikely(val & (1 << bit))) + if (cpu_buffer->current_context >= 4) return 1; - val |= (1 << bit); - cpu_buffer->current_context = val; + cpu_buffer->current_context++; + /* Interrupts must see this update */ + barrier(); return 0; } @@ -2598,7 +2566,9 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) static __always_inline void trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { - cpu_buffer->current_context &= cpu_buffer->current_context - 1; + /* Don't let the dec leak out */ + barrier(); + cpu_buffer->current_context--; } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 752e5daf0896..73e67b68c53b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7687,6 +7687,7 @@ static int instance_mkdir(const char *name) struct trace_array *tr; int ret; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -EEXIST; @@ -7742,6 +7743,7 @@ static int instance_mkdir(const char *name) list_add(&tr->list, &ftrace_trace_arrays); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return 0; @@ -7753,6 +7755,7 @@ static int instance_mkdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; @@ -7765,6 +7768,7 @@ static int instance_rmdir(const char *name) int ret; int i; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = -ENODEV; @@ -7810,6 +7814,7 @@ static int instance_rmdir(const char *name) out_unlock: mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); return ret; } @@ -8276,6 +8281,92 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) } EXPORT_SYMBOL_GPL(ftrace_dump); +int trace_run_command(const char *buf, int (*createfn)(int, char **)) +{ + char **argv; + int argc, ret; + + argc = 0; + ret = 0; + argv = argv_split(GFP_KERNEL, buf, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = createfn(argc, argv); + + argv_free(argv); + + return ret; +} + +#define WRITE_BUFSIZE 4096 + +ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos, + int (*createfn)(int, char **)) +{ + char *kbuf, *buf, *tmp; + int ret = 0; + size_t done = 0; + size_t size; + + kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + while (done < count) { + size = count - done; + + if (size >= WRITE_BUFSIZE) + size = WRITE_BUFSIZE - 1; + + if (copy_from_user(kbuf, buffer + done, size)) { + ret = -EFAULT; + goto out; + } + kbuf[size] = '\0'; + buf = kbuf; + do { + tmp = strchr(buf, '\n'); + if (tmp) { + *tmp = '\0'; + size = tmp - buf + 1; + } else { + size = strlen(buf); + if (done + size < count) { + if (buf != kbuf) + break; + /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ + pr_warn("Line length is too long: Should be less than %d\n", + WRITE_BUFSIZE - 2); + ret = -EINVAL; + goto out; + } + } + done += size; + + /* Remove comments */ + tmp = strchr(buf, '#'); + + if (tmp) + *tmp = '\0'; + + ret = trace_run_command(buf, createfn); + if (ret) + goto out; + buf += size; + + } while (done < count); + } + ret = done; + +out: + kfree(kbuf); + + return ret; +} + __init static int tracer_alloc_buffers(void) { int ring_buf_size; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6b0b343a36a2..2a6d0325a761 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -739,8 +739,6 @@ extern int trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); -extern int trace_selftest_startup_sched_switch(struct tracer *trace, - struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); /* @@ -1755,6 +1753,13 @@ void trace_printk_start_comm(void); int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); +#define MAX_EVENT_NAME_LEN 64 + +extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); +extern ssize_t trace_parse_run_command(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos, + int (*createfn)(int, char**)); + /* * Normal trace_printk() and friends allocates special buffers * to do the manipulation, as well as saves the print formats diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 13ba2d3f6a91..55d6dff37daf 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -240,27 +240,41 @@ void perf_trace_destroy(struct perf_event *p_event) int perf_trace_add(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; - struct hlist_head __percpu *pcpu_list; - struct hlist_head *list; - - pcpu_list = tp_event->perf_events; - if (WARN_ON_ONCE(!pcpu_list)) - return -EINVAL; if (!(flags & PERF_EF_START)) p_event->hw.state = PERF_HES_STOPPED; - list = this_cpu_ptr(pcpu_list); - hlist_add_head_rcu(&p_event->hlist_entry, list); + /* + * If TRACE_REG_PERF_ADD returns false; no custom action was performed + * and we need to take the default action of enqueueing our event on + * the right per-cpu hlist. + */ + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) { + struct hlist_head __percpu *pcpu_list; + struct hlist_head *list; + + pcpu_list = tp_event->perf_events; + if (WARN_ON_ONCE(!pcpu_list)) + return -EINVAL; + + list = this_cpu_ptr(pcpu_list); + hlist_add_head_rcu(&p_event->hlist_entry, list); + } - return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); + return 0; } void perf_trace_del(struct perf_event *p_event, int flags) { struct trace_event_call *tp_event = p_event->tp_event; - hlist_del_rcu(&p_event->hlist_entry); - tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); + + /* + * If TRACE_REG_PERF_DEL returns false; no custom action was performed + * and we need to take the default action of dequeueing our event from + * the right per-cpu hlist. + */ + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event)) + hlist_del_rcu(&p_event->hlist_entry); } void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) @@ -306,16 +320,25 @@ static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { - struct perf_event *event; struct ftrace_entry *entry; - struct hlist_head *head; + struct perf_event *event; + struct hlist_head head; struct pt_regs regs; int rctx; - head = this_cpu_ptr(event_function.perf_events); - if (hlist_empty(head)) + if ((unsigned long)ops->private != smp_processor_id()) return; + event = container_of(ops, struct perf_event, ftrace_ops); + + /* + * @event->hlist entry is NULL (per INIT_HLIST_NODE), and all + * the perf code does is hlist_for_each_entry_rcu(), so we can + * get away with simply setting the @head.first pointer in order + * to create a singular list. + */ + head.first = &event->hlist_entry; + #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ sizeof(u64)) - sizeof(u32)) @@ -330,9 +353,8 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; - event = container_of(ops, struct perf_event, ftrace_ops); perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, - 1, ®s, head, NULL, event); + 1, ®s, &head, NULL); #undef ENTRY_SIZE } @@ -341,8 +363,10 @@ static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU; - ops->func = perf_ftrace_function_call; + ops->flags = FTRACE_OPS_FL_RCU; + ops->func = perf_ftrace_function_call; + ops->private = (void *)(unsigned long)nr_cpu_ids; + return register_ftrace_function(ops); } @@ -354,19 +378,11 @@ static int perf_ftrace_function_unregister(struct perf_event *event) return ret; } -static void perf_ftrace_function_enable(struct perf_event *event) -{ - ftrace_function_local_enable(&event->ftrace_ops); -} - -static void perf_ftrace_function_disable(struct perf_event *event) -{ - ftrace_function_local_disable(&event->ftrace_ops); -} - int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data) { + struct perf_event *event = data; + switch (type) { case TRACE_REG_REGISTER: case TRACE_REG_UNREGISTER: @@ -379,11 +395,11 @@ int perf_ftrace_event_register(struct trace_event_call *call, case TRACE_REG_PERF_CLOSE: return perf_ftrace_function_unregister(data); case TRACE_REG_PERF_ADD: - perf_ftrace_function_enable(data); - return 0; + event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id(); |