summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/trace/Kconfig26
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/trace.c275
-rw-r--r--kernel/trace/trace.h190
-rw-r--r--kernel/trace/trace_events.c329
-rw-r--r--kernel/trace/trace_events_filter.c77
-rw-r--r--kernel/trace/trace_events_hist.c1755
-rw-r--r--kernel/trace/trace_events_trigger.c215
-rw-r--r--kernel/trace/tracing_map.c1062
-rw-r--r--kernel/trace/tracing_map.h283
10 files changed, 3930 insertions, 284 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e45db6b0d878..fafeaf803bd0 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -528,6 +528,32 @@ config MMIOTRACE
See Documentation/trace/mmiotrace.txt.
If you are not helping to develop drivers, say N.
+config TRACING_MAP
+ bool
+ depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
+ help
+ tracing_map is a special-purpose lock-free map for tracing,
+ separated out as a stand-alone facility in order to allow it
+ to be shared between multiple tracers. It isn't meant to be
+ generally used outside of that context, and is normally
+ selected by tracers that use it.
+
+config HIST_TRIGGERS
+ bool "Histogram triggers"
+ depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select TRACING_MAP
+ default n
+ help
+ Hist triggers allow one or more arbitrary trace event fields
+ to be aggregated into hash tables and dumped to stdout by
+ reading a debugfs/tracefs file. They're useful for
+ gathering quick and dirty (though precise) summaries of
+ event activity as an initial guide for further investigation
+ using more advanced tools.
+
+ See Documentation/trace/events.txt.
+ If in doubt, say N.
+
config MMIOTRACE_TEST
tristate "Test module for mmiotrace"
depends on MMIOTRACE && m
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 9b1044e936a6..979e7bfbde7a 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_seq.o
obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o
+obj-$(CONFIG_TRACING_MAP) += tracing_map.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
@@ -53,6 +54,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
+obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a2f0b9f33e9b..8a4bd6b68a0b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -253,6 +253,9 @@ unsigned long long ns2usecs(cycle_t nsec)
#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
+/* trace_flags that are default zero for instances */
+#define ZEROED_TRACE_FLAGS \
+ TRACE_ITER_EVENT_FORK
/*
* The global_trace is the descriptor that holds the tracing
@@ -303,33 +306,18 @@ void trace_array_put(struct trace_array *this_tr)
mutex_unlock(&trace_types_lock);
}
-int filter_check_discard(struct trace_event_file *file, void *rec,
- struct ring_buffer *buffer,
- struct ring_buffer_event *event)
-{
- if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
- !filter_match_preds(file->filter, rec)) {
- ring_buffer_discard_commit(buffer, event);
- return 1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(filter_check_discard);
-
int call_filter_check_discard(struct trace_event_call *call, void *rec,
struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
!filter_match_preds(call->filter, rec)) {
- ring_buffer_discard_commit(buffer, event);
+ __trace_event_discard_commit(buffer, event);
return 1;
}
return 0;
}
-EXPORT_SYMBOL_GPL(call_filter_check_discard);
static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
{
@@ -1672,6 +1660,16 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
+static __always_inline void
+trace_event_setup(struct ring_buffer_event *event,
+ int type, unsigned long flags, int pc)
+{
+ struct trace_entry *ent = ring_buffer_event_data(event);
+
+ tracing_generic_entry_update(ent, flags, pc);
+ ent->type = type;
+}
+
struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer *buffer,
int type,
@@ -1681,34 +1679,137 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer,
struct ring_buffer_event *event;
event = ring_buffer_lock_reserve(buffer, len);
- if (event != NULL) {
- struct trace_entry *ent = ring_buffer_event_data(event);
+ if (event != NULL)
+ trace_event_setup(event, type, flags, pc);
+
+ return event;
+}
+
+DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
+DEFINE_PER_CPU(int, trace_buffered_event_cnt);
+static int trace_buffered_event_ref;
+
+/**
+ * trace_buffered_event_enable - enable buffering events
+ *
+ * When events are being filtered, it is quicker to use a temporary
+ * buffer to write the event data into if there's a likely chance
+ * that it will not be committed. The discard of the ring buffer
+ * is not as fast as committing, and is much slower than copying
+ * a commit.
+ *
+ * When an event is to be filtered, allocate per cpu buffers to
+ * write the event data into, and if the event is filtered and discarded
+ * it is simply dropped, otherwise, the entire data is to be committed
+ * in one shot.
+ */
+void trace_buffered_event_enable(void)
+{
+ struct ring_buffer_event *event;
+ struct page *page;
+ int cpu;
- tracing_generic_entry_update(ent, flags, pc);
- ent->type = type;
+ WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
+
+ if (trace_buffered_event_ref++)
+ return;
+
+ for_each_tracing_cpu(cpu) {
+ page = alloc_pages_node(cpu_to_node(cpu),
+ GFP_KERNEL | __GFP_NORETRY, 0);
+ if (!page)
+ goto failed;
+
+ event = page_address(page);
+ memset(event, 0, sizeof(*event));
+
+ per_cpu(trace_buffered_event, cpu) = event;
+
+ preempt_disable();
+ if (cpu == smp_processor_id() &&
+ this_cpu_read(trace_buffered_event) !=
+ per_cpu(trace_buffered_event, cpu))
+ WARN_ON_ONCE(1);
+ preempt_enable();
}
- return event;
+ return;
+ failed:
+ trace_buffered_event_disable();
}
-void
-__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
+static void enable_trace_buffered_event(void *data)
{
- __this_cpu_write(trace_cmdline_save, true);
- ring_buffer_unlock_commit(buffer, event);
+ /* Probably not needed, but do it anyway */
+ smp_rmb();
+ this_cpu_dec(trace_buffered_event_cnt);
}
-void trace_buffer_unlock_commit(struct trace_array *tr,
- struct ring_buffer *buffer,
- struct ring_buffer_event *event,
- unsigned long flags, int pc)
+static void disable_trace_buffered_event(void *data)
{
- __buffer_unlock_commit(buffer, event);
+ this_cpu_inc(trace_buffered_event_cnt);
+}
- ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
- ftrace_trace_userstack(buffer, flags, pc);
+/**
+ * trace_buffered_event_disable - disable buffering events
+ *
+ * When a filter is removed, it is faster to not use the buffered
+ * events, and to commit directly into the ring buffer. Free up
+ * the temp buffers when there are no more users. This requires
+ * special synchronization with current events.
+ */
+void trace_buffered_event_disable(void)
+{
+ int cpu;
+
+ WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
+
+ if (WARN_ON_ONCE(!trace_buffered_event_ref))
+ return;
+
+ if (--trace_buffered_event_ref)
+ return;
+
+ preempt_disable();
+ /* For each CPU, set the buffer as used. */
+ smp_call_function_many(tracing_buffer_mask,
+ disable_trace_buffered_event, NULL, 1);
+ preempt_enable();
+
+ /* Wait for all current users to finish */
+ synchronize_sched();
+
+ for_each_tracing_cpu(cpu) {
+ free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
+ per_cpu(trace_buffered_event, cpu) = NULL;
+ }
+ /*
+ * Make sure trace_buffered_event is NULL before clearing
+ * trace_buffered_event_cnt.
+ */
+ smp_wmb();
+
+ preempt_disable();
+ /* Do the work on each cpu */
+ smp_call_function_many(tracing_buffer_mask,
+ enable_trace_buffered_event, NULL, 1);
+ preempt_enable();
+}
+
+void
+__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
+{
+ __this_cpu_write(trace_cmdline_save, true);
+
+ /* If this is the temp buffer, we need to commit fully */
+ if (this_cpu_read(trace_buffered_event) == event) {
+ /* Length is in event->array[0] */
+ ring_buffer_write(buffer, event->array[0], &event->array[1]);
+ /* Release the temp buffer */
+ this_cpu_dec(trace_buffered_event_cnt);
+ } else
+ ring_buffer_unlock_commit(buffer, event);
}
-EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
static struct ring_buffer *temp_buffer;
@@ -1719,8 +1820,23 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
unsigned long flags, int pc)
{
struct ring_buffer_event *entry;
+ int val;
*current_rb = trace_file->tr->trace_buffer.buffer;
+
+ if ((trace_file->flags &
+ (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
+ (entry = this_cpu_read(trace_buffered_event))) {
+ /* Try to use the per cpu buffer first */
+ val = this_cpu_inc_return(trace_buffered_event_cnt);
+ if (val == 1) {
+ trace_event_setup(entry, type, flags, pc);
+ entry->array[0] = len;
+ return entry;
+ }
+ this_cpu_dec(trace_buffered_event_cnt);
+ }
+
entry = trace_buffer_lock_reserve(*current_rb,
type, len, flags, pc);
/*
@@ -1738,17 +1854,6 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
}
EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
-struct ring_buffer_event *
-trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
- int type, unsigned long len,
- unsigned long flags, int pc)
-{
- *current_rb = global_trace.trace_buffer.buffer;
- return trace_buffer_lock_reserve(*current_rb,
- type, len, flags, pc);
-}
-EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
-
void trace_buffer_unlock_commit_regs(struct trace_array *tr,
struct ring_buffer *buffer,
struct ring_buffer_event *event,
@@ -1760,14 +1865,6 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}
-EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
-
-void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
- struct ring_buffer_event *event)
-{
- ring_buffer_discard_commit(buffer, event);
-}
-EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
void
trace_function(struct trace_array *tr,
@@ -3571,6 +3668,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (mask == TRACE_ITER_RECORD_CMD)
trace_event_enable_cmd_record(enabled);
+ if (mask == TRACE_ITER_EVENT_FORK)
+ trace_event_follow_fork(tr, enabled);
+
if (mask == TRACE_ITER_OVERWRITE) {
ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -3658,7 +3758,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
if (cnt >= sizeof(buf))
return -EINVAL;
- if (copy_from_user(&buf, ubuf, cnt))
+ if (copy_from_user(buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
@@ -3804,12 +3904,19 @@ static const char readme_msg[] =
"\t trigger: traceon, traceoff\n"
"\t enable_event:<system>:<event>\n"
"\t disable_event:<system>:<event>\n"
+#ifdef CONFIG_HIST_TRIGGERS
+ "\t enable_hist:<system>:<event>\n"
+ "\t disable_hist:<system>:<event>\n"
+#endif
#ifdef CONFIG_STACKTRACE
"\t\t stacktrace\n"
#endif
#ifdef CONFIG_TRACER_SNAPSHOT
"\t\t snapshot\n"
#endif
+#ifdef CONFIG_HIST_TRIGGERS
+ "\t\t hist (see below)\n"
+#endif
"\t example: echo traceoff > events/block/block_unplug/trigger\n"
"\t echo traceoff:3 > events/block/block_unplug/trigger\n"
"\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
@@ -3825,6 +3932,56 @@ static const char readme_msg[] =
"\t To remove a trigger with a count:\n"
"\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
"\t Filters can be ignored when removing a trigger.\n"
+#ifdef CONFIG_HIST_TRIGGERS
+ " hist trigger\t- If set, event hits are aggregated into a hash table\n"
+ "\t Format: hist:keys=<field1[,field2,...]>\n"
+ "\t [:values=<field1[,field2,...]>]\n"
+ "\t [:sort=<field1[,field2,...]>]\n"
+ "\t [:size=#entries]\n"
+ "\t [:pause][:continue][:clear]\n"
+ "\t [:name=histname1]\n"
+ "\t [if <filter>]\n\n"
+ "\t When a matching event is hit, an entry is added to a hash\n"
+ "\t table using the key(s) and value(s) named, and the value of a\n"
+ "\t sum called 'hitcount' is incremented. Keys and values\n"
+ "\t correspond to fields in the event's format description. Keys\n"
+ "\t can be any field, or the special string 'stacktrace'.\n"
+ "\t Compound keys consisting of up to two fields can be specified\n"
+ "\t by the 'keys' keyword. Values must correspond to numeric\n"
+ "\t fields. Sort keys consisting of up to two fields can be\n"
+ "\t specified using the 'sort' keyword. The sort direction can\n"
+ "\t be modified by appending '.descending' or '.ascending' to a\n"
+ "\t sort field. The 'size' parameter can be used to specify more\n"
+ "\t or fewer than the default 2048 entries for the hashtable size.\n"
+ "\t If a hist trigger is given a name using the 'name' parameter,\n"
+ "\t its histogram data will be shared with other triggers of the\n"
+ "\t same name, and trigger hits will update this common data.\n\n"
+ "\t Reading the 'hist' file for the event will dump the hash\n"
+ "\t table in its entirety to stdout. If there are multiple hist\n"
+ "\t triggers attached to an event, there will be a table for each\n"
+ "\t trigger in the output. The table displayed for a named\n"
+ "\t trigger will be the same as any other instance having the\n"
+ "\t same name. The default format used to display a given field\n"
+ "\t can be modified by appending any of the following modifiers\n"
+ "\t to the field name, as applicable:\n\n"
+ "\t .hex display a number as a hex value\n"
+ "\t .sym display an address as a symbol\n"
+ "\t .sym-offset display an address as a symbol and offset\n"
+ "\t .execname display a common_pid as a program name\n"
+ "\t .syscall display a syscall id as a syscall name\n\n"
+ "\t .log2 display log2 value rather than raw number\n\n"
+ "\t The 'pause' parameter can be used to pause an existing hist\n"
+ "\t trigger or to start a hist trigger but not log any events\n"
+ "\t until told to do so. 'continue' can be used to start or\n"
+ "\t restart a paused hist trigger.\n\n"
+ "\t The 'clear' parameter will clear the contents of a running\n"
+ "\t hist trigger and leave its current paused/active state\n"
+ "\t unchanged.\n\n"
+ "\t The enable_hist and disable_hist triggers can be used to\n"
+ "\t have one event conditionally start and stop another event's\n"
+ "\t already-attached hist trigger. The syntax is analagous to\n"
+ "\t the enable_event and disable_event triggers.\n"
+#endif
;
static ssize_t
@@ -4474,7 +4631,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
if (cnt > MAX_TRACER_SIZE)
cnt = MAX_TRACER_SIZE;
- if (copy_from_user(&buf, ubuf, cnt))
+ if (copy_from_user(buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
@@ -5264,7 +5421,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
if (cnt >= sizeof(buf))
return -EINVAL;
- if (copy_from_user(&buf, ubuf, cnt))
+ if (copy_from_user(buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
@@ -6650,7 +6807,7 @@ static int instance_mkdir(const char *name)
if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
goto out_free_tr;
- tr->trace_flags = global_trace.trace_flags;
+ tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
@@ -6724,6 +6881,12 @@ static int instance_rmdir(const char *name)
list_del(&tr->list);
+ /* Disable all the flags that were enabled coming in */
+ for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
+ if ((1 << i) & ZEROED_TRACE_FLAGS)
+ set_tracer_flag(tr, 1 << i, 0);
+ }
+
tracing_set_nop(tr);
event_trace_del_tracer(tr);
ftrace_destroy_function_files(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3fff4adfd431..5167c366d6b7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -177,9 +177,8 @@ struct trace_options {
};
struct trace_pid_list {
- unsigned int nr_pids;
- int order;
- pid_t *pids;
+ int pid_max;
+ unsigned long *pids;
};
/*
@@ -656,6 +655,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
extern cycle_t ftrace_now(int cpu);
extern void trace_find_cmdline(int pid, char comm[]);
+extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
@@ -967,6 +967,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
C(STOP_ON_FREE, "disable_on_free"), \
C(IRQ_INFO, "irq-info"), \
C(MARKERS, "markers"), \
+ C(EVENT_FORK, "event-fork"), \
FUNCTION_FLAGS \
FGRAPH_FLAGS \
STACK_FLAGS \
@@ -1064,6 +1065,137 @@ struct trace_subsystem_dir {
int nr_events;
};
+extern int call_filter_check_discard(struct trace_event_call *call, void *rec,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event);
+
+void trace_buffer_unlock_commit_regs(struct trace_array *tr,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc,
+ struct pt_regs *regs);
+
+static inline void trace_buffer_unlock_commit(struct trace_array *tr,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc)
+{
+ trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL);
+}
+
+DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
+DECLARE_PER_CPU(int, trace_buffered_event_cnt);
+void trace_buffered_event_disable(void);
+void trace_buffered_event_enable(void);
+
+static inline void
+__trace_event_discard_commit(struct ring_buffer *buffer,
+ struct ring_buffer_event *event)
+{
+ if (this_cpu_read(trace_buffered_event) == event) {
+ /* Simply release the temp buffer */
+ this_cpu_dec(trace_buffered_event_cnt);
+ return;
+ }
+ ring_buffer_discard_commit(buffer, event);
+}
+
+/*
+ * Helper function for event_trigger_unlock_commit{_regs}().
+ * If there are event triggers attached to this event that requires
+ * filtering against its fields, then they wil be called as the
+ * entry already holds the field information of the current event.
+ *
+ * It also checks if the event should be discarded or not.
+ * It is to be discarded if the event is soft disabled and the
+ * event was only recorded to process triggers, or if the event
+ * filter is active and this event did not match the filters.
+ *
+ * Returns true if the event is discarded, false otherwise.
+ */
+static inline bool
+__event_trigger_test_discard(struct trace_event_file *file,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ void *entry,
+ enum event_trigger_type *tt)
+{
+ unsigned long eflags = file->flags;
+
+ if (eflags & EVENT_FILE_FL_TRIGGER_COND)
+ *tt = event_triggers_call(file, entry);
+
+ if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
+ (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
+ !filter_match_preds(file->filter, entry))) {
+ __trace_event_discard_commit(buffer, event);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * event_trigger_unlock_commit - handle triggers and finish event commit
+ * @file: The file pointer assoctiated to the event
+ * @buffer: The ring buffer that the event is being written to
+ * @event: The event meta data in the ring buffer
+ * @entry: The event itself
+ * @irq_flags: The state of the interrupts at the start of the event
+ * @pc: The state of the preempt count at the start of the event.
+ *
+ * This is a helper function to handle triggers that require data
+ * from the event itself. It also tests the event against filters and
+ * if the event is soft disabled and should be discarded.
+ */
+static inline void
+event_trigger_unlock_commit(struct trace_event_file *file,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ void *entry, unsigned long irq_flags, int pc)
+{
+ enum event_trigger_type tt = ETT_NONE;
+
+ if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
+ trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
+
+ if (tt)
+ event_triggers_post_call(file, tt, entry);
+}
+
+/**
+ * event_trigger_unlock_commit_regs - handle triggers and finish event commit
+ * @file: The file pointer assoctiated to the event
+ * @buffer: The ring buffer that the event is being written to
+ * @event: The event meta data in the ring buffer
+ * @entry: The event itself
+ * @irq_flags: The state of the interrupts at the start of the event
+ * @pc: The state of the preempt count at the start of the event.
+ *
+ * This is a helper function to handle triggers that require data
+ * from the event itself. It also tests the event against filters and
+ * if the event is soft disabled and should be discarded.
+ *
+ * Same as event_trigger_unlock_commit() but calls
+ * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit().
+ */
+static inline void
+event_trigger_unlock_commit_regs(struct trace_event_file *file,
+ struct ring_buffer *buffer,
+ struct ring_buffer_event *event,
+ void *entry, unsigned long irq_flags, int pc,
+ struct pt_regs *regs)
+{
+ enum event_trigger_type tt = ETT_NONE;
+
+ if (!__event_trigger_test_discard(file, buffer, event, entry, &tt))
+ trace_buffer_unlock_commit_regs(file->tr, buffer, event,
+ irq_flags, pc, regs);
+
+ if (tt)
+ event_triggers_post_call(file, tt, entry);
+}
+
#define FILTER_PRED_INVALID ((unsigned short)-1)
#define FILTER_PRED_IS_RIGHT (1 << 15)
#define FILTER_PRED_FOLD (1 << 15)
@@ -1161,6 +1293,15 @@ extern struct mutex event_mutex;
extern struct list_head ftrace_events;
extern const struct file_operations event_trigger_fops;
+extern const struct file_operations event_hist_fops;
+
+#ifdef CONFIG_HIST_TRIGGERS
+extern int register_trigger_hist_cmd(void);
+extern int register_trigger_hist_enable_disable_cmds(void);
+#else
+static inline int register_trigger_hist_cmd(void) { return 0; }
+static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; }
+#endif
extern int register_trigger_cmds(void);
extern void clear_event_triggers(struct trace_array *tr);
@@ -1174,9 +1315,41 @@ struct event_trigger_data {
char *filter_str;
void *private_data;
bool paused;
+ bool paused_tmp;
struct list_head list;
+ char *name;
+ struct list_head named_list;
+ struct event_trigger_data *named_data;
+};
+
+/* Avoid typos */
+#define ENABLE_EVENT_STR "enable_event"
+#define DISABLE_EVENT_STR "disable_event"
+#define ENABLE_HIST_STR "enable_hist"
+#define DISABLE_HIST_STR "disable_hist"
+
+struct enable_trigger_data {
+ struct trace_event_file *file;
+ bool enable;
+ bool hist;
};
+extern int event_enable_trigger_print(struct seq_file *m,
+ struct event_trigger_ops *ops,
+ struct event_trigger_data *data);
+extern void event_enable_trigger_free(struct event_trigger_ops *ops,
+ struct event_trigger_data *data);
+extern int event_enable_trigger_func(struct event_command *cmd_ops,
+ struct trace_event_file *file,
+ char *glob, char *cmd, char *param);
+extern int event_enable_register_trigger(char *glob,
+ struct event_trigger_ops *ops,
+ struct event_trigger_data *data,
+ struct trace_event_file *file);
+extern void event_enable_unregister_trigger(char *glob,
+ struct event_trigger_ops *ops,
+ struct event_trigger_data *test,
+ struct trace_event_file *file);
extern void trigger_data_free(struct event_trigger_data *data);
extern int event_trigger_init(struct event_trigger_ops *ops,
struct event_trigger_data *data);
@@ -1189,7 +1362,18 @@ extern void unregister_trigger(char *glob, struct event_trigger_ops *ops,
extern int set_trigger_filter(char *filter_str,
struct event_trigger_data *trigger_data,
struct trace_event_file *file);
+extern struct event_trigger_data *find_named_trigger(const char *name);
+extern bool is_named_trigger(struct event_trigger_data *test);
+extern int save_named_trigger(const char *name,
+ struct event_trigger_data *data);
+extern void del_named_trigger(struct event_trigger_data *data);
+extern void pause_named_trigger(struct event_trigger_data *data);
+extern void unpause_named_trigger(struct event_trigger_data *data);
+extern void set_named_trigger_data(struct event_trigger_data *data,
+ struct event_trigger_data *named_data);
extern int register_event_command(struct event_command *cmd);
+extern int unregister_event_command(struct event_command *cmd);
+extern int register_trigger_hist_enable_disable_cmds(void);
/**
* struct event_trigger_ops - callbacks for trace event triggers
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index b7b0760ba6ee..3d4155892a1e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,7 +15,7 @@
#include <linux/kthread.h>
#include <linux/tracefs.h>
#include <linux/uaccess.h>
-#include <linux/bsearch.h>
+#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/sort.h>
@@ -381,6 +381,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
{
struct trace_event_call *call = file->event_call;
struct trace_array *tr = file->tr;
+ unsigned long file_flags = file->flags;
int ret = 0;
int disable;
@@ -463,6 +464,15 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
break;
}
+ /* Enable or disable use of trace_buffered_event */
+ if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
+ (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
+ if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
+ trace_buffered_event_enable();
+ else
+ trace_buffered_event_disable();
+ }
+
return ret;
}
@@ -489,24 +499,26 @@ static void ftrace_clear_events(struct trace_array *tr)
mutex_unlock(&event_mutex);
}
-static int cmp_pid(const void *key, const void *elt)
+/* Shouldn't this be in a header? */
+extern int pid_max;
+
+/* Returns true if found in filter */
+static bool
+find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
{
- const pid_t *search_pid = key;
- const pid_t *pid = elt;
+ /*
+ * If pid_max changed after filtered_pids was created, we
+ * by default ignore all pids greater than the previous pid_max.
+ */
+ if (search_pid >= filtered_pids->pid_max)
+ return false;
- if (*search_pid == *pid)
- return 0;
- if (*search_pid < *pid)
- return -1;
- return 1;
+ return test_bit(search_pid, filtered_pids->pids);
}
static bool
-check_ignore_pid(struct trace_pid_list *filtered_pids, struct task_struct *task)
+ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
{
- pid_t search_pid;
- pid_t *pid;
-
/*
* Return false, because if filtered_pids does not exist,
* all pids are good to trace.
@@ -514,15 +526,68 @@ check_ignore_pid(struct trace_pid_list *filtered_pids, struct task_struct *task)
if (!filtered_pids)
return false;
- search_pid = task->pid;
+ return !find_filtered_pid(filtered_pids, task->pid);
+}
+
+static void filter_add_remove_task(struct trace_pid_list *pid_list,
+ struct task_struct *self,
+ struct task_struct *task)
+{
+ if (!pid_list)
+ return;
+
+ /* For forks, we only add if the forking task is listed */
+ if (self) {
+ if (!find_filtered_pid(pid_list, self->pid))
+ return;
+ }
- pid = bsearch(&search_pid, filtered_pids->pids,
- filtered_pids->nr_pids, sizeof(pid_t),
- cmp_pid);
- if (!pid)
- return true;
+ /* Sorry, but we don't support pid_max changing after setting */
+ if (task->pid >= pid_list->pid_max)
+ return;
- return false;
+ /* "self" is set for forks, and NULL for exits */
+ if (self)
+ set_bit(task->pid, pid_list->pids);
+ else
+ clear_bit(task->pid, pid_list->pids);
+}
+
+static void
+event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
+{
+ struct trace_pid_list *pid_list;
+ struct trace_array *tr = data;
+
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
+ filter_add_remove_task(pid_list, NULL, task);
+}
+
+static void
+event_filter_pid_sched_process_fork(void *data,
+ struct task_struct *self,
+ struct task_struct *task)
+{
+ struct trace_pid_list *pid_list;
+ struct trace_array *tr = data;
+
+ pid_list = rcu_dereference_sched(tr->filtered_pids);
+ filter_add_remove_task(pid_list, self, task);
+}
+
+void trace_event_follow_fork(struct trace_array *tr, bool enable)
+{
+ if (enable) {
+ register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
+ tr, INT_MIN);
+ register_trace_prio_sched_process_exit(event_filter_pid_sched_process_exit,
+ tr, INT_MAX);
+ } else {
+ unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
+ tr);
+ unregister_trace_sched_process_exit(event_filter_pid_sched_process_exit,
+ tr);
+ }
}
static void
@@ -535,8 +600,8 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
pid_list = rcu_dereference_sched(tr->filtered_pids);
this_cpu_write(tr->trace_buffer.data->ignore_pid,
- check_ignore_pid(pid_list, prev) &&
- check_ignore_pid(pid_list, next));
+ ignore_this_task(pid_list, prev) &&
+ ignore_this_task(pid_list, next));
}
static void
@@ -549,7 +614,7 @@ event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
pid_list = rcu_dereference_sched(tr->filtered_pids);
this_cpu_write(tr->trace_buffer.data->ignore_pid,
- check_ignore_pid(pid_list, next));
+ ignore_this_task(pid_list, next));
}
static void
@@ -565,7 +630,7 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
pid_list = rcu_dereference_sched(tr->filtered_pids);
this_cpu_write(tr->trace_buffer.data->ignore_pid,
- check_ignore_pid(pid_list, task));
+ ignore_this_task(pid_list, task));
}
static void
@@ -582,7 +647,7 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
/* Set tracing if current is enabled */
this_cpu_write(tr->trace_buffer.data->ignore_pid,
- check_ignore_pid(pid_list, current));
+ ignore_this_task(pid_list, current));
}
static void __ftrace_clear_event_pids(struct trace_array *tr)
@@ -620,7 +685,7 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
/* Wait till all users are no longer using pid filtering */
synchronize_sched();
- free_pages((unsigned long)pid_list->pids, pid_list->order);
+ vfree(pid_list->pids);
kfree(pid_list);
}
@@ -964,11 +1029,32 @@ static void t_stop(struct seq_file *m, void *p)
mutex_unlock(&event_mutex);
}
+static void *
+p_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct trace_array *tr = m->private;
+ struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
+ unsigned long pid = (unsigned long)v;
+
+ (*pos)++;
+
+ /* pid already is +1 of the actual prevous bit */
+ pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
+
+ /* Return pid + 1 to allow zero to be represented */
+ if (pid < pid_list->pid_max)
+ return (void *)(pid + 1);
+
+ return NULL;
+}
+
static void *p_start(struct seq_file *m, loff_t *pos)
__acquires(RCU)
{
struct trace_pid_list *pid_list;
struct trace_array *tr = m->private;
+ unsigned long pid;
+ loff_t l = 0;
/*
* Grab the mutex, to keep calls to p_next() having the same
@@ -981,10 +1067,18 @@ static void *p_start(struct seq_file *m, loff_t *pos)
pid_list = rcu_dereference_sched(tr->filtered_pids);
- if (!pid_list || *pos >= pid_list->nr_pids)
+ if (!pid_list)
+ return NULL;
+
+ pid = find_first_bit(pid_list->pids, pid_list->pid_max);
+ if (pid >= pid_list->pid_max)
return NULL;
- return (void *)&pid_list-&g