From 58b9987de86cc5f154b5e91923676f952fcf8a93 Mon Sep 17 00:00:00 2001 From: Ed Tsai Date: Thu, 22 Apr 2021 20:22:26 +0800 Subject: sched/tracing: Remove the redundant 'success' in the sched tracepoint 'success' is left here for a long time and also it is meaningless for the upper user. Just remove it. [ There were some tools expecting this, and this may break them. But hopefully they've been fixed in the mean time. Otherwise this may be likely reverted - SDR ] Link: https://lkml.kernel.org/r/20210422122226.9415-1-ed.tsai@mediatek.com Cc: Peter Zijlstra Signed-off-by: Ed Tsai Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 1eca2305ca42..94640482cfe7 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -148,7 +148,6 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( int, prio ) - __field( int, success ) __field( int, target_cpu ) ), @@ -156,7 +155,6 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ - __entry->success = 1; /* rudiment, kill when possible */ __entry->target_cpu = task_cpu(p); ), -- cgit v1.2.3 From 3d3d9c072e377a0c5fa0a1f661a95134e787db3a Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Sun, 2 May 2021 08:12:27 +0800 Subject: trace: replace WB_REASON_FOREIGN_FLUSH with a string Now WB_REASON_FOREIGN_FLUSH is displayed as a number, maybe a string is better. v2: replace some space with tab. Link: https://lkml.kernel.org/r/1619914347-21904-1-git-send-email-brookxu.cn@gmail.com Signed-off-by: Chunguang Xu Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/writeback.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 1efa463c4979..840d1ba84cf5 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -36,7 +36,8 @@ EM( WB_REASON_PERIODIC, "periodic") \ EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \ EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \ - EMe(WB_REASON_FORKER_THREAD, "forker_thread") + EM( WB_REASON_FORKER_THREAD, "forker_thread") \ + EMe(WB_REASON_FOREIGN_FLUSH, "foreign_flush") WB_WORK_REASON -- cgit v1.2.3 From ca24306d83a125df187ad53eddb038fe0cffb8ca Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 2 Jun 2021 17:18:58 +0900 Subject: bootconfig: Change array value to use child node It is not possible to put an array value with subkeys under a key node, because both of subkeys and the array elements are using "next" field of the xbc_node. Thus this changes the array values to use "child" field in the array case. The reason why split this change is to test it easily. Link: https://lkml.kernel.org/r/162262193838.264090.16044473274501498656.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 2696eb0fc149..3178a31fdabc 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -71,7 +71,7 @@ static inline __init bool xbc_node_is_key(struct xbc_node *node) */ static inline __init bool xbc_node_is_array(struct xbc_node *node) { - return xbc_node_is_value(node) && node->next != 0; + return xbc_node_is_value(node) && node->child != 0; } /** @@ -140,7 +140,7 @@ static inline struct xbc_node * __init xbc_find_node(const char *key) */ #define xbc_array_for_each_value(anode, value) \ for (value = xbc_node_get_data(anode); anode != NULL ; \ - anode = xbc_node_get_next(anode), \ + anode = xbc_node_get_child(anode), \ value = anode ? xbc_node_get_data(anode) : NULL) /** @@ -171,7 +171,7 @@ static inline struct xbc_node * __init xbc_find_node(const char *key) */ #define xbc_node_for_each_array_value(node, key, anode, value) \ for (value = xbc_node_find_value(node, key, &anode); value != NULL; \ - anode = xbc_node_get_next(anode), \ + anode = xbc_node_get_child(anode), \ value = anode ? xbc_node_get_data(anode) : NULL) /** -- cgit v1.2.3 From e5efaeb8a8f527d6e91289ff1f67fbcae452b2ca Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 2 Jun 2021 17:19:07 +0900 Subject: bootconfig: Support mixing a value and subkeys under a key Support mixing a value and subkeys under a key. Since kernel cmdline options will support "aaa.bbb=value1 aaa.bbb.ccc=value2", it is better that the bootconfig supports such configuration too. Note that this does not change syntax itself but just accepts mixed value and subkeys e.g. key = value1 key.subkey = value2 But this is not accepted; key { value1 subkey = value2 } That will make value1 as a subkey. Also, the order of the value node under a key is fixed. If there are a value and subkeys, the value is always the first child node of the key. Thus if user specifies subkeys first, e.g. key.subkey = value1 key = value2 In the program (and /proc/bootconfig), it will be shown as below key = value2 key.subkey = value1 Link: https://lkml.kernel.org/r/162262194685.264090.7738574774030567419.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 3178a31fdabc..e49043ac77c9 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -80,6 +80,8 @@ static inline __init bool xbc_node_is_array(struct xbc_node *node) * * Test the @node is a leaf key node which is a key node and has a value node * or no child. Returns true if it is a leaf node, or false if not. + * Note that the leaf node can have subkey nodes in addition to the + * value node. */ static inline __init bool xbc_node_is_leaf(struct xbc_node *node) { @@ -129,6 +131,23 @@ static inline struct xbc_node * __init xbc_find_node(const char *key) return xbc_node_find_child(NULL, key); } +/** + * xbc_node_get_subkey() - Return the first subkey node if exists + * @node: Parent node + * + * Return the first subkey node of the @node. If the @node has no child + * or only value node, this will return NULL. + */ +static inline struct xbc_node * __init xbc_node_get_subkey(struct xbc_node *node) +{ + struct xbc_node *child = xbc_node_get_child(node); + + if (child && xbc_node_is_value(child)) + return xbc_node_get_next(child); + else + return child; +} + /** * xbc_array_for_each_value() - Iterate value nodes on an array * @anode: An XBC arraied value node @@ -149,11 +168,24 @@ static inline struct xbc_node * __init xbc_find_node(const char *key) * @child: Iterated XBC node. * * Iterate child nodes of @parent. Each child nodes are stored to @child. + * The @child can be mixture of a value node and subkey nodes. */ #define xbc_node_for_each_child(parent, child) \ for (child = xbc_node_get_child(parent); child != NULL ; \ child = xbc_node_get_next(child)) +/** + * xbc_node_for_each_subkey() - Iterate child subkey nodes + * @parent: An XBC node. + * @child: Iterated XBC node. + * + * Iterate subkey nodes of @parent. Each child nodes are stored to @child. + * The @child is only the subkey node. + */ +#define xbc_node_for_each_subkey(parent, child) \ + for (child = xbc_node_get_subkey(parent); child != NULL ; \ + child = xbc_node_get_next(child)) + /** * xbc_node_for_each_array_value() - Iterate array entries of geven key * @node: An XBC node. -- cgit v1.2.3 From 99f4f5d62338cab9dcf45735344541574daedd20 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 2 Jun 2021 17:19:34 +0900 Subject: bootconfig: Share the checksum function with tools Move the checksum calculation function into the header for sharing it with tools/bootconfig. Link: https://lkml.kernel.org/r/162262197470.264090.16325743685807878807.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- include/linux/bootconfig.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index e49043ac77c9..6bdd94cff4e2 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -16,6 +16,26 @@ #define BOOTCONFIG_ALIGN (1 << BOOTCONFIG_ALIGN_SHIFT) #define BOOTCONFIG_ALIGN_MASK (BOOTCONFIG_ALIGN - 1) +/** + * xbc_calc_checksum() - Calculate checksum of bootconfig + * @data: Bootconfig data. + * @size: The size of the bootconfig data. + * + * Calculate the checksum value of the bootconfig data. + * The checksum will be used with the BOOTCONFIG_MAGIC and the size for + * embedding the bootconfig in the initrd image. + */ +static inline __init u32 xbc_calc_checksum(void *data, u32 size) +{ + unsigned char *p = data; + u32 ret = 0; + + while (size--) + ret += *p++; + + return ret; +} + /* XBC tree node */ struct xbc_node { u16 next; -- cgit v1.2.3 From 62de4f29e9174e67beb8d34ef5ced6730e087a31 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 22 Jun 2021 16:42:26 +0200 Subject: trace: Add __print_ns_to_secs() and __print_ns_without_secs() helpers To have nanosecond output displayed in a more human readable format, its nicer to convert it to a seconds format (XXX.YYYYYYYYY). The problem is that to do so, the numbers must be divided by NSEC_PER_SEC, and moded too. But as these numbers are 64 bit, this can not be done simply with '/' and '%' operators, but must use do_div() instead. Instead of performing the expensive do_div() in the hot path of the tracepoint, it is more efficient to perform it during the output phase. But passing in do_div() can confuse the parser, and do_div() doesn't work exactly like a normal C function. It modifies the number in place, and we don't want to modify the actual values in the ring buffer. Two helper functions are now created: __print_ns_to_secs() and __print_ns_without_secs() They both take a value of nanoseconds, and the former will return that number divided by NSEC_PER_SEC, and the latter will mod it with NSEC_PER_SEC giving a way to print a nice human readable format: __print_fmt("time=%llu.%09u", __print_ns_to_secs(REC->nsec_val), __print_ns_without_secs(REC->nsec_val)) Link: https://lkml.kernel.org/r/e503b903045496c4ccde52843e1e318b422f7a56.1624372313.git.bristot@redhat.com Cc: Phil Auld Cc: Sebastian Andrzej Siewior Cc: Kate Carcia Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Alexandre Chartre Cc: Clark Willaims Cc: John Kacur Cc: Juri Lelli Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Steven Rostedt Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (VMware) --- include/trace/trace_events.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 8268bf747d6f..acc17194c160 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -358,6 +358,21 @@ TRACE_MAKE_SYSTEM_STR(); trace_print_hex_dump_seq(p, prefix_str, prefix_type, \ rowsize, groupsize, buf, len, ascii) +#undef __print_ns_to_secs +#define __print_ns_to_secs(value) \ + ({ \ + u64 ____val = (u64)(value); \ + do_div(____val, NSEC_PER_SEC); \ + ____val; \ + }) + +#undef __print_ns_without_secs +#define __print_ns_without_secs(value) \ + ({ \ + u64 ____val = (u64)(value); \ + (u32) do_div(____val, NSEC_PER_SEC); \ + }) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static notrace enum print_line_t \ @@ -736,6 +751,16 @@ static inline void ftrace_test_probe_##call(void) \ #undef __print_array #undef __print_hex_dump +/* + * The below is not executed in the kernel. It is only what is + * displayed in the print format for userspace to parse. + */ +#undef __print_ns_to_secs +#define __print_ns_to_secs(val) (val) / 1000000000UL + +#undef __print_ns_without_secs +#define __print_ns_without_secs(val) (val) % 1000000000UL + #undef TP_printk #define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) -- cgit v1.2.3 From bce29ac9ce0bb0b0b146b687ab978378c21e9078 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 22 Jun 2021 16:42:27 +0200 Subject: trace: Add osnoise tracer In the context of high-performance computing (HPC), the Operating System Noise (*osnoise*) refers to the interference experienced by an application due to activities inside the operating system. In the context of Linux, NMIs, IRQs, SoftIRQs, and any other system thread can cause noise to the system. Moreover, hardware-related jobs can also cause noise, for example, via SMIs. The osnoise tracer leverages the hwlat_detector by running a similar loop with preemption, SoftIRQs and IRQs enabled, thus allowing all the sources of *osnoise* during its execution. Using the same approach of hwlat, osnoise takes note of the entry and exit point of any source of interferences, increasing a per-cpu interference counter. The osnoise tracer also saves an interference counter for each source of interference. The interference counter for NMI, IRQs, SoftIRQs, and threads is increased anytime the tool observes these interferences' entry events. When a noise happens without any interference from the operating system level, the hardware noise counter increases, pointing to a hardware-related noise. In this way, osnoise can account for any source of interference. At the end of the period, the osnoise tracer prints the sum of all noise, the max single noise, the percentage of CPU available for the thread, and the counters for the noise sources. Usage Write the ASCII text "osnoise" into the current_tracer file of the tracing system (generally mounted at /sys/kernel/tracing). For example:: [root@f32 ~]# cd /sys/kernel/tracing/ [root@f32 tracing]# echo osnoise > current_tracer It is possible to follow the trace by reading the trace trace file:: [root@f32 tracing]# cat trace # tracer: osnoise # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth MAX # || / SINGLE Interference counters: # |||| RUNTIME NOISE % OF CPU NOISE +-----------------------------+ # TASK-PID CPU# |||| TIMESTAMP IN US IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD # | | | |||| | | | | | | | | | | <...>-859 [000] .... 81.637220: 1000000 190 99.98100 9 18 0 1007 18 1 <...>-860 [001] .... 81.638154: 1000000 656 99.93440 74 23 0 1006 16 3 <...>-861 [002] .... 81.638193: 1000000 5675 99.43250 202 6 0 1013 25 21 <...>-862 [003] .... 81.638242: 1000000 125 99.98750 45 1 0 1011 23 0 <...>-863 [004] .... 81.638260: 1000000 1721 99.82790 168 7 0 1002 49 41 <...>-864 [005] .... 81.638286: 1000000 263 99.97370 57 6 0 1006 26 2 <...>-865 [006] .... 81.638302: 1000000 109 99.98910 21 3 0 1006 18 1 <...>-866 [007] .... 81.638326: 1000000 7816 99.21840 107 8 0 1016 39 19 In addition to the regular trace fields (from TASK-PID to TIMESTAMP), the tracer prints a message at the end of each period for each CPU that is running an osnoise/CPU thread. The osnoise specific fields report: - The RUNTIME IN USE reports the amount of time in microseconds that the osnoise thread kept looping reading the time. - The NOISE IN US reports the sum of noise in microseconds observed by the osnoise tracer during the associated runtime. - The % OF CPU AVAILABLE reports the percentage of CPU available for the osnoise thread during the runtime window. - The MAX SINGLE NOISE IN US reports the maximum single noise observed during the runtime window. - The Interference counters display how many each of the respective interference happened during the runtime window. Note that the example above shows a high number of HW noise samples. The reason being is that this sample was taken on a virtual machine, and the host interference is detected as a hardware interference. Tracer options The tracer has a set of options inside the osnoise directory, they are: - osnoise/cpus: CPUs at which a osnoise thread will execute. - osnoise/period_us: the period of the osnoise thread. - osnoise/runtime_us: how long an osnoise thread will look for noise. - osnoise/stop_tracing_us: stop the system tracing if a single noise higher than the configured value happens. Writing 0 disables this option. - osnoise/stop_tracing_total_us: stop the system tracing if total noise higher than the configured value happens. Writing 0 disables this option. - tracing_threshold: the minimum delta between two time() reads to be considered as noise, in us. When set to 0, the default value will be used, which is currently 5 us. Additional Tracing In addition to the tracer, a set of tracepoints were added to facilitate the identification of the osnoise source. - osnoise:sample_threshold: printed anytime a noise is higher than the configurable tolerance_ns. - osnoise:nmi_noise: noise from NMI, including the duration. - osnoise:irq_noise: noise from an IRQ, including the duration. - osnoise:softirq_noise: noise from a SoftIRQ, including the duration. - osnoise:thread_noise: noise from a thread, including the duration. Note that all the values are *net values*. For example, if while osnoise is running, another thread preempts the osnoise thread, it will start a thread_noise duration at the start. Then, an IRQ takes place, preempting the thread_noise, starting a irq_noise. When the IRQ ends its execution, it will compute its duration, and this duration will be subtracted from the thread_noise, in such a way as to avoid the double accounting of the IRQ execution. This logic is valid for all sources of noise. Here is one example of the usage of these tracepoints:: osnoise/8-961 [008] d.h. 5789.857532: irq_noise: local_timer:236 start 5789.857529929 duration 1845 ns osnoise/8-961 [008] dNh. 5789.858408: irq_noise: local_timer:236 start 5789.858404871 duration 2848 ns migration/8-54 [008] d... 5789.858413: thread_noise: migration/8:54 start 5789.858409300 duration 3068 ns osnoise/8-961 [008] .... 5789.858413: sample_threshold: start 5789.858404555 duration 8723 ns interferences 2 In this example, a noise sample of 8 microseconds was reported in the last line, pointing to two interferences. Looking backward in the trace, the two previous entries were about the migration thread running after a timer IRQ execution. The first event is not part of the noise because it took place one millisecond before. It is worth noticing that the sum of the duration reported in the tracepoints is smaller than eight us reported in the sample_threshold. The reason roots in the overhead of the entry and exit code that happens before and after any interference execution. This justifies the dual approach: measuring thread and tracing. Link: https://lkml.kernel.org/r/e649467042d60e7b62714c9c6751a56299d15119.1624372313.git.bristot@redhat.com Cc: Phil Auld Cc: Sebastian Andrzej Siewior Cc: Kate Carcia Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Alexandre Chartre Cc: Clark Willaims Cc: John Kacur Cc: Juri Lelli Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira [ Made the following functions static: trace_irqentry_callback() trace_irqexit_callback() trace_intel_irqentry_callback() trace_intel_irqexit_callback() Added to include/trace.h: osnoise_arch_register() osnoise_arch_unregister() Fixed define logic for LATENCY_FS_NOTIFY Reported-by: kernel test robot ] Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace_irq.h | 13 ++++ include/linux/trace.h | 5 ++ include/trace/events/osnoise.h | 142 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 160 insertions(+) create mode 100644 include/trace/events/osnoise.h (limited to 'include') diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 0abd9a1d2852..f6faa31289ba 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -7,12 +7,21 @@ extern bool trace_hwlat_callback_enabled; extern void trace_hwlat_callback(bool enter); #endif +#ifdef CONFIG_OSNOISE_TRACER +extern bool trace_osnoise_callback_enabled; +extern void trace_osnoise_callback(bool enter); +#endif + static inline void ftrace_nmi_enter(void) { #ifdef CONFIG_HWLAT_TRACER if (trace_hwlat_callback_enabled) trace_hwlat_callback(true); #endif +#ifdef CONFIG_OSNOISE_TRACER + if (trace_osnoise_callback_enabled) + trace_osnoise_callback(true); +#endif } static inline void ftrace_nmi_exit(void) @@ -21,6 +30,10 @@ static inline void ftrace_nmi_exit(void) if (trace_hwlat_callback_enabled) trace_hwlat_callback(false); #endif +#ifdef CONFIG_OSNOISE_TRACER + if (trace_osnoise_callback_enabled) + trace_osnoise_callback(false); +#endif } #endif /* _LINUX_FTRACE_IRQ_H */ diff --git a/include/linux/trace.h b/include/linux/trace.h index be1e130ed87c..4e3858640c47 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -41,6 +41,11 @@ int trace_array_init_printk(struct trace_array *tr); void trace_array_put(struct trace_array *tr); struct trace_array *trace_array_get_by_name(const char *name); int trace_array_destroy(struct trace_array *tr); + +/* For osnoise tracer */ +int osnoise_arch_register(void); +void osnoise_arch_unregister(void); + #endif /* CONFIG_TRACING */ #endif /* _LINUX_TRACE_H */ diff --git a/include/trace/events/osnoise.h b/include/trace/events/osnoise.h new file mode 100644 index 000000000000..28762c69f6c9 --- /dev/null +++ b/include/trace/events/osnoise.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM osnoise + +#if !defined(_OSNOISE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _OSNOISE_TRACE_H + +#include +TRACE_EVENT(thread_noise, + + TP_PROTO(struct task_struct *t, u64 start, u64 duration), + + TP_ARGS(t, start, duration), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN) + __field( u64, start ) + __field( u64, duration) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + __entry->start = start; + __entry->duration = duration; + ), + + TP_printk("%8s:%d start %llu.%09u duration %llu ns", + __entry->comm, + __entry->pid, + __print_ns_to_secs(__entry->start), + __print_ns_without_secs(__entry->start), + __entry->duration) +); + +TRACE_EVENT(softirq_noise, + + TP_PROTO(int vector, u64 start, u64 duration), + + TP_ARGS(vector, start, duration), + + TP_STRUCT__entry( + __field( u64, start ) + __field( u64, duration) + __field( int, vector ) + ), + + TP_fast_assign( + __entry->vector = vector; + __entry->start = start; + __entry->duration = duration; + ), + + TP_printk("%8s:%d start %llu.%09u duration %llu ns", + show_softirq_name(__entry->vector), + __entry->vector, + __print_ns_to_secs(__entry->start), + __print_ns_without_secs(__entry->start), + __entry->duration) +); + +TRACE_EVENT(irq_noise, + + TP_PROTO(int vector, const char *desc, u64 start, u64 duration), + + TP_ARGS(vector, desc, start, duration), + + TP_STRUCT__entry( + __field( u64, start ) + __field( u64, duration) + __string( desc, desc ) + __field( int, vector ) + + ), + + TP_fast_assign( + __assign_str(desc, desc); + __entry->vector = vector; + __entry->start = start; + __entry->duration = duration; + ), + + TP_printk("%s:%d start %llu.%09u duration %llu ns", + __get_str(desc), + __entry->vector, + __print_ns_to_secs(__entry->start), + __print_ns_without_secs(__entry->start), + __entry->duration) +); + +TRACE_EVENT(nmi_noise, + + TP_PROTO(u64 start, u64 duration), + + TP_ARGS(start, duration), + + TP_STRUCT__entry( + __field( u64, start ) + __field( u64, duration) + ), + + TP_fast_assign( + __entry->start = start; + __entry->duration = duration; + ), + + TP_printk("start %llu.%09u duration %llu ns", + __print_ns_to_secs(__entry->start), + __print_ns_without_secs(__entry->start), + __entry->duration) +); + +TRACE_EVENT(sample_threshold, + + TP_PROTO(u64 start, u64 duration, u64 interference), + + TP_ARGS(start, duration, interference), + + TP_STRUCT__entry( + __field( u64, start ) + __field( u64, duration) + __field( u64, interference) + ), + + TP_fast_assign( + __entry->start = start; + __entry->duration = duration; + __entry->interference = interference; + ), + + TP_printk("start %llu.%09u duration %llu ns interferences %llu", + __print_ns_to_secs(__entry->start), + __print_ns_without_secs(__entry->start), + __entry->duration, + __entry->interference) +); + +#endif /* _TRACE_OSNOISE_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From f7d9f6370e006400655ff96cb148f56598492d91 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Mon, 28 Jun 2021 11:45:47 +0200 Subject: trace/osnoise: Fix 'no previous prototype' warnings kernel test robot reported some osnoise functions with "no previous prototype." Fix these warnings by making local functions static, and by adding: void osnoise_trace_irq_entry(int id); void osnoise_trace_irq_exit(int id, const char *desc); to include/linux/trace.h. Link: https://lkml.kernel.org/r/e40d3cb4be8bde921f4b40fa6a095cf85ab807bd.1624872608.git.bristot@redhat.com Fixes: bce29ac9ce0b ("trace: Add osnoise tracer") Reported-by: kernel test robot Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/trace.h b/include/linux/trace.h index 4e3858640c47..bf169612ffe1 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -45,6 +45,8 @@ int trace_array_destroy(struct trace_array *tr); /* For osnoise tracer */ int osnoise_arch_register(void); void osnoise_arch_unregister(void); +void osnoise_trace_irq_entry(int id); +void osnoise_trace_irq_exit(int id, const char *desc); #endif /* CONFIG_TRACING */ -- cgit v1.2.3 From b62613b431bdababc90bf1440b2c7427172d94f4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 28 Jun 2021 13:55:22 +0100 Subject: tracing: Fix spelling in osnoise tracer "interferences" -> "interference" There is a spelling mistake in a TP_printk message, the word interferences is not the plural of interference. Fix this. Link: https://lkml.kernel.org/r/20210628125522.56361-1-colin.king@canonical.com Reviewed-by: Daniel Bristot de Oliveira Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/osnoise.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/osnoise.h b/include/trace/events/osnoise.h index 28762c69f6c9..82f741ec0f57 100644 --- a/include/trace/events/osnoise.h +++ b/include/trace/events/osnoise.h @@ -129,7 +129,7 @@ TRACE_EVENT(sample_threshold, __entry->interference = interference; ), - TP_printk("start %llu.%09u duration %llu ns interferences %llu", + TP_printk("start %llu.%09u duration %llu ns interference %llu", __print_ns_to_secs(__entry->start), __print_ns_without_secs(__entry->start), __entry->duration, -- cgit v1.2.3 From 9913d5745bd720c4266805c8d29952a3702e4eca Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 29 Jun 2021 09:40:10 -0400 Subject: tracepoint: Add tracepoint_probe_register_may_exist() for BPF tracing All internal use cases for tracepoint_probe_register() is set to not ever be called with the same function and data. If it is, it is considered a bug, as that means the accounting of handling tracepoints is corrupted. If the function and data for a tracepoint is already registered when tracepoint_probe_register() is called, it will call WARN_ON_ONCE() and return with EEXISTS. The BPF system call can end up calling tracepoint_probe_register() with the same data, which now means that this can trigger the warning because of a user space process. As WARN_ON_ONCE() should not be called because user space called a system call with bad data, there needs to be a way to register a tracepoint without triggering a warning. Enter tracepoint_probe_register_may_exist(), which can be called, but will not cause a WARN_ON() if the probe already exists. It will still error out with EEXIST, which will then be sent to the user space that performed the BPF system call. This keeps the previous testing for issues with other users of the tracepoint code, while letting BPF call it with duplicated data and not warn about it. Link: https://lore.kernel.org/lkml/20210626135845.4080-1-penguin-kernel@I-love.SAKURA.ne.jp/ Link: https://syzkaller.appspot.com/bug?id=41f4318cf01762389f4d1c1c459da4f542fe5153 Cc: stable@vger.kernel.org Fixes: c4f6699dfcb85 ("bpf: introduce BPF_RAW_TRACEPOINT") Reported-by: syzbot Reported-by: Tetsuo Handa Tested-by: syzbot+721aa903751db87aa244@syzkaller.appspotmail.com Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 13f65420f188..ab58696d0ddd 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -41,7 +41,17 @@ extern int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data, int prio); extern int +tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data, + int prio); +extern int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data); +static inline int +tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe, + void *data) +{ + return tracepoint_probe_register_prio_may_exist(tp, probe, data, + TRACEPOINT_DEFAULT_PRIO); +} extern void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), void *priv); -- cgit v1.2.3 From 78c14b385c195d4f25ab7c19186b8897a5b9ae3f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 12 Jun 2021 08:42:27 -0700 Subject: treewide: Add missing semicolons to __assign_str uses The __assign_str macro has an unusual ending semicolon but the vast majority of uses of the macro already have semicolon termination. $ git grep -P '\b__assign_str\b' | wc -l 551 $ git grep -P '\b__assign_str\b.*;' | wc -l 480 Add semicolons to the __assign_str() uses without semicolon termination and all the other uses without semicolon termination via additional defines that are equivalent to __assign_str() with the eventual goal of removing the semicolon from the __assign_str() macro definition. Link: https://lore.kernel.org/lkml/1e068d21106bb6db05b735b4916bb420e6c9842a.camel@perches.com/ Link: https://lkml.kernel.org/r/48a056adabd8f70444475352f617914cef504a45.camel@perches.com Signed-off-by: Joe Perches Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/btrfs.h | 2 +- include/trace/events/dma_fence.h | 4 ++-- include/trace/events/rpcgss.h | 4 ++-- include/trace/events/sunrpc.h | 40 ++++++++++++++++++++-------------------- 4 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index a41dd8a0c730..2b2d8bc66901 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1097,7 +1097,7 @@ TRACE_EVENT(btrfs_trigger_flush, __entry->flags = flags; __entry->bytes = bytes; __entry->flush = flush; - __assign_str(reason, reason) + __assign_str(reason, reason); ), TP_printk_btrfs("%s: flush=%d(%s) flags=%llu(%s) bytes=%llu", diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h index 64e92d56c6a8..3963e79ca7b4 100644 --- a/include/trace/events/dma_fence.h +++ b/include/trace/events/dma_fence.h @@ -23,8 +23,8 @@ DECLARE_EVENT_CLASS(dma_fence, ), TP_fast_assign( - __assign_str(driver, fence->ops->get_driver_name(fence)) - __assign_str(timeline, fence->ops->get_timeline_name(fence)) + __assign_str(driver, fence->ops->get_driver_name(fence)); + __assign_str(timeline, fence->ops->get_timeline_name(fence)); __entry->context = fence->context; __entry->seqno = fence->seqno; ), diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index ffdbe6f85da8..b2a2672e6632 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -152,7 +152,7 @@ DECLARE_EVENT_CLASS(rpcgss_ctx_class, TP_fast_assign( __entry->cred = gc; __entry->service = gc->gc_service; - __assign_str(principal, gc->gc_principal) + __assign_str(principal, gc->gc_principal); ), TP_printk("cred=%p service=%s principal='%s'", @@ -535,7 +535,7 @@ TRACE_EVENT(rpcgss_upcall_msg, ), TP_fast_assign( - __assign_str(msg, buf) + __assign_str(msg, buf); ), TP_printk("msg='%s'", __get_str(msg)) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index d02e01a27b69..861f199896c6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -154,8 +154,8 @@ TRACE_EVENT(rpc_clnt_new, __entry->client_id = clnt->cl_clid; __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); - __assign_str(program, program) - __assign_str(server, server) + __assign_str(program, program); + __assign_str(server, server); ), TP_printk("client=%u peer=[%s]:%s program=%s server=%s", @@ -180,8 +180,8 @@ TRACE_EVENT(rpc_clnt_new_err, TP_fast_assign( __entry->error = error; - __assign_str(program, program) - __assign_str(server, server) + __assign_str(program, program); + __assign_str(server, server); ), TP_printk("program=%s server=%s error=%d", @@ -284,8 +284,8 @@ TRACE_EVENT(rpc_request, __entry->client_id = task->tk_client->cl_clid; __entry->version = task->tk_client->cl_vers; __entry->async = RPC_IS_ASYNC(task); - __assign_str(progname, task->tk_client->cl_program->name) - __assign_str(procname, rpc_proc_name(task)) + __assign_str(progname, task->tk_client->cl_program->name); + __assign_str(procname, rpc_proc_name(task)); ), TP_printk("task:%u@%u %sv%d %s (%ssync)", @@ -494,10 +494,10 @@ DECLARE_EVENT_CLASS(rpc_reply_event, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); - __assign_str(progname, task->tk_client->cl_program->name) + __assign_str(progname, task->tk_client->cl_program->name); __entry->version = task->tk_client->cl_vers; - __assign_str(procname, rpc_proc_name(task)) - __assign_str(servername, task->tk_xprt->servername) + __assign_str(procname, rpc_proc_name(task)); + __assign_str(servername, task->tk_xprt->servername); ), TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s", @@ -622,8 +622,8 @@ TRACE_EVENT(rpc_stats_latency, __entry->task_id = task->tk_pid; __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); __entry->version = task->tk_client->cl_vers; - __assign_str(progname, task->tk_client->cl_program->name) - __assign_str(procname, rpc_proc_name(task)) + __assign_str(progname, task->tk_client->cl_program->name); + __assign_str(procname, rpc_proc_name(task)); __entry->backlog = ktime_to_us(backlog); __entry->rtt = ktime_to_us(rtt); __entry->execute = ktime_to_us(execute); @@ -669,15 +669,15 @@ TRACE_EVENT(rpc_xdr_overflow, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __assign_str(progname, - task->tk_client->cl_program->name) + task->tk_client->cl_program->name); __entry->version = task->tk_client->cl_vers; - __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + __assign_str(procedure, task->tk_msg.rpc_proc->p_name); } else { __entry->task_id = 0; __entry->client_id = 0; - __assign_str(progname, "unknown") + __assign_str(progname, "unknown"); __entry->version = 0; - __assign_str(procedure, "unknown") + __assign_str(procedure, "unknown"); } __entry->requested = requested; __entry->end = xdr->end; @@ -735,9 +735,9 @@ TRACE_EVENT(rpc_xdr_alignment, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __assign_str(progname, - task->tk_client->cl_program->name) + task->tk_client->cl_program->name); __entry->version = task->tk_client->cl_vers; - __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + __assign_str(procedure, task->tk_msg.rpc_proc->p_name); __entry->offset = offset; __entry->copied = copied; @@ -1107,9 +1107,9 @@ TRACE_EVENT(xprt_retransmit, __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->ntrans = rqst->rq_ntrans; __assign_str(progname, - task->tk_client->cl_program->name) + task->tk_client->cl_program->name); __entry->version = task->tk_client->cl_vers; - __assign_str(procedure, task->tk_msg.rpc_proc->p_name) + __assign_str(procedure, task->tk_msg.rpc_proc->p_name); ), TP_printk( @@ -1842,7 +1842,7 @@ TRACE_EVENT(svc_xprt_accept, TP_fast_assign( __assign_str(addr, xprt->xpt_remotebuf); - __assign_str(protocol, xprt->xpt_class->xcl_name) + __assign_str(protocol, xprt->xpt_class->xcl_name); __assign_str(service, service); ), -- cgit v1.2.3