summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 12:20:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 12:20:55 -0700
commitcdf072acb5baa18e5b05bdf3f13d6481f62396fc (patch)
tree9e6e6dc4c5adb79b1babab8d4c08cc86f1fe222b
parentdc55342867c9cf2295f4330420461361a9c8117d (diff)
parent4f881a696484a7d31a4d1b12547615b1a3ee5771 (diff)
downloadlinux-cdf072acb5baa18e5b05bdf3f13d6481f62396fc.tar.gz
linux-cdf072acb5baa18e5b05bdf3f13d6481f62396fc.tar.bz2
linux-cdf072acb5baa18e5b05bdf3f13d6481f62396fc.zip
Merge tag 'trace-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing updates from Steven Rostedt: "Major changes: - Changed location of tracing repo from personal git repo to: git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git - Added Masami Hiramatsu as co-maintainer - Updated MAINTAINERS file to separate out FTRACE as it is more than just TRACING. Minor changes: - Added Mark Rutland as FTRACE reviewer - Updated user_events to make it on its way to remove the BROKEN tag. The changes should now be acceptable but will run it through a cycle and hopefully we can remove the BROKEN tag next release. - Added filtering to eprobes - Added a delta time to the benchmark trace event - Have the histogram and filter callbacks called via a switch statement instead of indirect functions. This speeds it up to avoid retpolines. - Add a way to wake up ring buffer waiters waiting for the ring buffer to fill up to its watermark. - New ioctl() on the trace_pipe_raw file to wake up ring buffer waiters. - Wake up waiters when the ring buffer is disabled. A reader may block when the ring buffer is disabled, but if it was blocked when the ring buffer is disabled it should then wake up. Fixes: - Allow splice to read partially read ring buffer pages. This fixes splice never moving forward. - Fix inverted compare that made the "shortest" ring buffer wait queue actually the longest. - Fix a race in the ring buffer between resetting a page when a writer goes to another page, and the reader. - Fix ftrace accounting bug when function hooks are added at boot up before the weak functions are set to "disabled". - Fix bug that freed a user allocated snapshot buffer when enabling a tracer. - Fix possible recursive locks in osnoise tracer - Fix recursive locking direct functions - Other minor clean ups and fixes" * tag 'trace-v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (44 commits) ftrace: Create separate entry in MAINTAINERS for function hooks tracing: Update MAINTAINERS to reflect new tracing git repo tracing: Do not free snapshot if tracer is on cmdline ftrace: Still disable enabled records marked as disabled tracing/user_events: Move pages/locks into groups to prepare for namespaces tracing: Add Masami Hiramatsu as co-maintainer tracing: Remove unused variable 'dups' MAINTAINERS: add myself as a tracing reviewer ring-buffer: Fix race between reset page and reading page tracing/user_events: Update ABI documentation to align to bits vs bytes tracing/user_events: Use bits vs bytes for enabled status page data tracing/user_events: Use refcount instead of atomic for ref tracking tracing/user_events: Ensure user provided strings are safely formatted tracing/user_events: Use WRITE instead of READ for io vector import tracing/user_events: Use NULL for strstr checks tracing: Fix spelling mistake "preapre" -> "prepare" tracing: Wake up waiters when tracing is disabled tracing: Add ioctl() to force ring buffer waiters to wake up tracing: Wake up ring buffer waiters on closing of the file ring-buffer: Add ring_buffer_wake_waiters() ...
-rw-r--r--Documentation/trace/user_events.rst86
-rw-r--r--MAINTAINERS26
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/kprobes.h2
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--include/linux/ftrace.h41
-rw-r--r--include/linux/ring_buffer.h2
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/trace_events.h1
-rw-r--r--include/linux/user_events.h15
-rw-r--r--kernel/trace/ftrace.c34
-rw-r--r--kernel/trace/kprobe_event_gen_test.c49
-rw-r--r--kernel/trace/ring_buffer.c87
-rw-r--r--kernel/trace/rv/monitors/wip/wip.c8
-rw-r--r--kernel/trace/rv/monitors/wwnr/wwnr.c8
-rw-r--r--kernel/trace/trace.c78
-rw-r--r--kernel/trace/trace.h13
-rw-r--r--kernel/trace/trace_benchmark.c2
-rw-r--r--kernel/trace/trace_benchmark.h8
-rw-r--r--kernel/trace/trace_eprobe.c107
-rw-r--r--kernel/trace/trace_events_filter.c239
-rw-r--r--kernel/trace/trace_events_hist.c246
-rw-r--r--kernel/trace/trace_events_user.c568
-rw-r--r--kernel/trace/trace_osnoise.c3
-rw-r--r--kernel/trace/trace_probe.h3
-rw-r--r--kernel/trace/tracing_map.c5
-rw-r--r--kernel/tracepoint.c14
-rw-r--r--samples/user_events/example.c25
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc27
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c47
-rw-r--r--tools/testing/selftests/user_events/perf_test.c11
-rw-r--r--tools/verification/dot2/dot2k_templates/main_global.c8
-rw-r--r--tools/verification/dot2/dot2k_templates/main_per_cpu.c8
-rw-r--r--tools/verification/dot2/dot2k_templates/main_per_task.c8
34 files changed, 1299 insertions, 486 deletions
diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst
index c180936f49fc..9f181f342a70 100644
--- a/Documentation/trace/user_events.rst
+++ b/Documentation/trace/user_events.rst
@@ -20,14 +20,14 @@ dynamic_events is the same as the ioctl with the u: prefix applied.
Typically programs will register a set of events that they wish to expose to
tools that can read trace_events (such as ftrace and perf). The registration
-process gives back two ints to the program for each event. The first int is the
-status index. This index describes which byte in the
+process gives back two ints to the program for each event. The first int is
+the status bit. This describes which bit in little-endian format in the
/sys/kernel/debug/tracing/user_events_status file represents this event. The
-second int is the write index. This index describes the data when a write() or
+second int is the write index which describes the data when a write() or
writev() is called on the /sys/kernel/debug/tracing/user_events_data file.
-The structures referenced in this document are contained with the
-/include/uap/linux/user_events.h file in the source tree.
+The structures referenced in this document are contained within the
+/include/uapi/linux/user_events.h file in the source tree.
**NOTE:** *Both user_events_status and user_events_data are under the tracefs
filesystem and may be mounted at different paths than above.*
@@ -38,18 +38,18 @@ Registering within a user process is done via ioctl() out to the
/sys/kernel/debug/tracing/user_events_data file. The command to issue is
DIAG_IOCSREG.
-This command takes a struct user_reg as an argument::
+This command takes a packed struct user_reg as an argument::
struct user_reg {
u32 size;
u64 name_args;
- u32 status_index;
+ u32 status_bit;
u32 write_index;
};
The struct user_reg requires two inputs, the first is the size of the structure
to ensure forward and backward compatibility. The second is the command string
-to issue for registering. Upon success two outputs are set, the status index
+to issue for registering. Upon success two outputs are set, the status bit
and the write index.
User based events show up under tracefs like any other event under the
@@ -111,15 +111,56 @@ in realtime. This allows user programs to only incur the cost of the write() or
writev() calls when something is actively attached to the event.
User programs call mmap() on /sys/kernel/debug/tracing/user_events_status to
-check the status for each event that is registered. The byte to check in the
-file is given back after the register ioctl() via user_reg.status_index.
+check the status for each event that is registered. The bit to check in the
+file is given back after the register ioctl() via user_reg.status_bit. The bit
+is always in little-endian format. Programs can check if the bit is set either
+using a byte-wise index with a mask or a long-wise index with a little-endian
+mask.
+
Currently the size of user_events_status is a single page, however, custom
kernel configurations can change this size to allow more user based events. In
all cases the size of the file is a multiple of a page size.
-For example, if the register ioctl() gives back a status_index of 3 you would
-check byte 3 of the returned mmap data to see if anything is attached to that
-event.
+For example, if the register ioctl() gives back a status_bit of 3 you would
+check byte 0 (3 / 8) of the returned mmap data and then AND the result with 8
+(1 << (3 % 8)) to see if anything is attached to that event.
+
+A byte-wise index check is performed as follows::
+
+ int index, mask;
+ char *status_page;
+
+ index = status_bit / 8;
+ mask = 1 << (status_bit % 8);
+
+ ...
+
+ if (status_page[index] & mask) {
+ /* Enabled */
+ }
+
+A long-wise index check is performed as follows::
+
+ #include <asm/bitsperlong.h>
+ #include <endian.h>
+
+ #if __BITS_PER_LONG == 64
+ #define endian_swap(x) htole64(x)
+ #else
+ #define endian_swap(x) htole32(x)
+ #endif
+
+ long index, mask, *status_page;
+
+ index = status_bit / __BITS_PER_LONG;
+ mask = 1L << (status_bit % __BITS_PER_LONG);
+ mask = endian_swap(mask);
+
+ ...
+
+ if (status_page[index] & mask) {
+ /* Enabled */
+ }
Administrators can easily check the status of all registered events by reading
the user_events_status file directly via a terminal. The output is as follows::
@@ -137,7 +178,7 @@ For example, on a system that has a single event the output looks like this::
Active: 1
Busy: 0
- Max: 4096
+ Max: 32768
If a user enables the user event via ftrace, the output would change to this::
@@ -145,21 +186,10 @@ If a user enables the user event via ftrace, the output would change to this::
Active: 1
Busy: 1
- Max: 4096
-
-**NOTE:** *A status index of 0 will never be returned. This allows user
-programs to have an index that can be used on error cases.*
-
-Status Bits
-^^^^^^^^^^^
-The byte being checked will be non-zero if anything is attached. Programs can
-check specific bits in the byte to see what mechanism has been attached.
-
-The following values are defined to aid in checking what has been attached:
-
-**EVENT_STATUS_FTRACE** - Bit set if ftrace has been attached (Bit 0).
+ Max: 32768
-**EVENT_STATUS_PERF** - Bit set if perf has been attached (Bit 1).
+**NOTE:** *A status bit of 0 will never be returned. This allows user programs
+to have a bit that can be used on error cases.*
Writing Data
------------
diff --git a/MAINTAINERS b/MAINTAINERS
index 0dc4a769216b..bf933661fb36 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8433,6 +8433,19 @@ L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/fujitsu-tablet.c
+FUNCTION HOOKS (FTRACE)
+M: Steven Rostedt <rostedt@goodmis.org>
+M: Masami Hiramatsu <mhiramat@kernel.org>
+R: Mark Rutland <mark.rutland@arm.com>
+S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
+F: Documentation/trace/ftrace*
+F: kernel/trace/ftrace*
+F: kernel/trace/fgraph.c
+F: arch/*/*/*/*ftrace*
+F: arch/*/*/*ftrace*
+F: include/*/ftrace.h
+
FUNGIBLE ETHERNET DRIVERS
M: Dimitris Michailidis <dmichail@fungible.com>
L: netdev@vger.kernel.org
@@ -11422,7 +11435,7 @@ M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
M: "David S. Miller" <davem@davemloft.net>
M: Masami Hiramatsu <mhiramat@kernel.org>
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
F: Documentation/trace/kprobes.rst
F: include/asm-generic/kprobes.h
F: include/linux/kprobes.h
@@ -20771,14 +20784,11 @@ F: drivers/hwmon/pmbus/tps546d24.c
TRACING
M: Steven Rostedt <rostedt@goodmis.org>
-M: Ingo Molnar <mingo@redhat.com>
+M: Masami Hiramatsu <mhiramat@kernel.org>
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
-F: Documentation/trace/ftrace.rst
-F: arch/*/*/*/*ftrace*
-F: arch/*/*/*ftrace*
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
+F: Documentation/trace/*
F: fs/tracefs/
-F: include/*/ftrace.h
F: include/linux/trace*.h
F: include/trace/
F: kernel/trace/
@@ -20787,7 +20797,7 @@ F: tools/testing/selftests/ftrace/
TRACING MMIO ACCESSES (MMIOTRACE)
M: Steven Rostedt <rostedt@goodmis.org>
-M: Ingo Molnar <mingo@kernel.org>
+M: Masami Hiramatsu <mhiramat@kernel.org>
R: Karol Herbst <karolherbst@gmail.com>
R: Pekka Paalanen <ppaalanen@gmail.com>
L: linux-kernel@vger.kernel.org
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b5ef474be858..908d99b127d3 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -23,7 +23,6 @@
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
#ifndef __ASSEMBLY__
-extern atomic_t modifying_ftrace_code;
extern void __fentry__(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 71ea2eab43d5..a2e9317aad49 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -50,8 +50,6 @@ extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p);
-extern void arch_kprobe_override_function(struct pt_regs *regs);
-
/* Architecture specific copy of original instruction*/
struct arch_specific_insn {
/* copy of the original instruction */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 4c3c27b6aea3..eb8bc82846b9 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -59,8 +59,6 @@
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
-#define stack_addr(regs) ((unsigned long *)regs->sp)
-
#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
(b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0b61371e287b..62557d4bffc2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1122,47 +1122,6 @@ static inline void unpause_graph_tracing(void) { }
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_TRACING
-
-/* flags for current->trace */
-enum {
- TSK_TRACE_FL_TRACE_BIT = 0,
- TSK_TRACE_FL_GRAPH_BIT = 1,
-};
-enum {
- TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT,
- TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT,
-};
-
-static inline void set_tsk_trace_trace(struct task_struct *tsk)
-{
- set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
-}
-
-static inline void clear_tsk_trace_trace(struct task_struct *tsk)
-{
- clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace);
-}
-
-static inline int test_tsk_trace_trace(struct task_struct *tsk)
-{
- return tsk->trace & TSK_TRACE_FL_TRACE;
-}
-
-static inline void set_tsk_trace_graph(struct task_struct *tsk)
-{
- set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
-}
-
-static inline void clear_tsk_trace_graph(struct task_struct *tsk)
-{
- clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace);
-}
-
-static inline int test_tsk_trace_graph(struct task_struct *tsk)
-{
- return tsk->trace & TSK_TRACE_FL_GRAPH;
-}
-
enum ftrace_dump_mode;
extern enum ftrace_dump_mode ftrace_dump_on_oops;
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index dac53fd3afea..2504df9a0453 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -101,7 +101,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table);
-
+void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
#define RING_BUFFER_ALL_CPUS -1
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68ec44d6b962..11a317810566 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1390,9 +1390,6 @@ struct task_struct {
#endif
#ifdef CONFIG_TRACING
- /* State flags for use by tracers: */
- unsigned long trace;
-
/* Bitmask and counter of trace recursion: */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 8401dec93c15..20749bd9db71 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -92,6 +92,7 @@ struct trace_iterator {
unsigned int temp_size;
char *fmt; /* modified format holder */
unsigned int fmt_size;
+ long wait_index;
/* trace_seq for __print_flags() and __print_symbolic() etc. */
struct trace_seq tmp_seq;
diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 736e05603463..592a3fbed98e 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -20,15 +20,6 @@
#define USER_EVENTS_SYSTEM "user_events"
#define USER_EVENTS_PREFIX "u:"
-/* Bits 0-6 are for known probe types, Bit 7 is for unknown probes */
-#define EVENT_BIT_FTRACE 0
-#define EVENT_BIT_PERF 1
-#define EVENT_BIT_OTHER 7
-
-#define EVENT_STATUS_FTRACE (1 << EVENT_BIT_FTRACE)
-#define EVENT_STATUS_PERF (1 << EVENT_BIT_PERF)
-#define EVENT_STATUS_OTHER (1 << EVENT_BIT_OTHER)
-
/* Create dynamic location entry within a 32-bit value */
#define DYN_LOC(offset, size) ((size) << 16 | (offset))
@@ -45,12 +36,12 @@ struct user_reg {
/* Input: Pointer to string with event name, description and flags */
__u64 name_args;
- /* Output: Byte index of the event within the status page */
- __u32 status_index;
+ /* Output: Bitwise index of the event within the status page */
+ __u32 status_bit;
/* Output: Index of the event to use when writing data */
__u32 write_index;
-};
+} __attribute__((__packed__));
#define DIAG_IOC_MAGIC '*'
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 447d2e2a8549..02e18c436439 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1644,6 +1644,18 @@ ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops *op_ex
static struct ftrace_ops *
ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
+static bool skip_record(struct dyn_ftrace *rec)
+{
+ /*
+ * At boot up, weak functions are set to disable. Function tracing
+ * can be enabled before they are, and they still need to be disabled now.
+ * If the record is disabled, still continue if it is marked as already
+ * enabled (this is needed to keep the accounting working).
+ */
+ return rec->flags & FTRACE_FL_DISABLED &&
+ !(rec->flags & FTRACE_FL_ENABLED);
+}
+
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1693,7 +1705,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int in_hash = 0;
int match = 0;
- if (rec->flags & FTRACE_FL_DISABLED)
+ if (skip_record(rec))
continue;
if (all) {
@@ -2126,7 +2138,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
ftrace_bug_type = FTRACE_BUG_UNKNOWN;
- if (rec->flags & FTRACE_FL_DISABLED)
+ if (skip_record(rec))
return FTRACE_UPDATE_IGNORE;
/*
@@ -2241,7 +2253,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
if (update) {
/* If there's no more users, clear all flags */
if (!ftrace_rec_count(rec))
- rec->flags = 0;
+ rec->flags &= FTRACE_FL_DISABLED;
else
/*
* Just disable the record, but keep the ops TRAMP
@@ -2634,7 +2646,7 @@ void __weak ftrace_replace_code(int mod_flags)
do_for_each_ftrace_rec(pg, rec) {
- if (rec->flags & FTRACE_FL_DISABLED)
+ if (skip_record(rec))
continue;
failed = __ftrace_replace_code(rec, enable);
@@ -5427,6 +5439,8 @@ static struct ftrace_ops stub_ops = {
* it is safe to modify the ftrace record, where it should be
* currently calling @old_addr directly, to call @new_addr.
*
+ * This is called with direct_mutex locked.
+ *
* Safety checks should be made to make sure that the code at
* @rec->ip is currently calling @old_addr. And this must
* also update entry->direct to @new_addr.
@@ -5439,6 +5453,8 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
unsigned long ip = rec->ip;
int ret;
+ lockdep_assert_held(&direct_mutex);
+
/*
* The ftrace_lock was used to determine if the record
* had more than one registered user to it. If it did,
@@ -5461,7 +5477,7 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
if (ret)
goto out_lock;
- ret = register_ftrace_function(&stub_ops);
+ ret = register_ftrace_function_nolock(&stub_ops);
if (ret) {
ftrace_set_filter_ip(&stub_ops, ip, 1, 0);
goto out_lock;
@@ -6081,8 +6097,12 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
if (filter_hash) {
orig_hash = &iter->ops->func_hash->filter_hash;
- if (iter->tr && !list_empty(&iter->tr->mod_trace))
- iter->hash->flags |= FTRACE_HASH_FL_MOD;
+ if (iter->tr) {
+ if (list_empty(&iter->tr->mod_trace))
+ iter->hash->flags &= ~FTRACE_HASH_FL_MOD;
+ else
+ iter->hash->flags |= FTRACE_HASH_FL_MOD;
+ }
} else
orig_hash = &iter->ops->func_hash->notrace_hash;
diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c
index 18b0f1cbb947..80e04a1e1977 100644
--- a/kernel/trace/kprobe_event_gen_test.c
+++ b/kernel/trace/kprobe_event_gen_test.c
@@ -35,6 +35,45 @@
static struct trace_event_file *gen_kprobe_test;
static struct trace_event_file *gen_kretprobe_test;
+#define KPROBE_GEN_TEST_FUNC "do_sys_open"
+
+/* X86 */
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_32)
+#define KPROBE_GEN_TEST_ARG0 "dfd=%ax"
+#define KPROBE_GEN_TEST_ARG1 "filename=%dx"
+#define KPROBE_GEN_TEST_ARG2 "flags=%cx"
+#define KPROBE_GEN_TEST_ARG3 "mode=+4($stack)"
+
+/* ARM64 */
+#elif defined(CONFIG_ARM64)
+#define KPROBE_GEN_TEST_ARG0 "dfd=%x0"
+#define KPROBE_GEN_TEST_ARG1 "filename=%x1"
+#define KPROBE_GEN_TEST_ARG2 "flags=%x2"
+#define KPROBE_GEN_TEST_ARG3 "mode=%x3"
+
+/* ARM */
+#elif defined(CONFIG_ARM)
+#define KPROBE_GEN_TEST_ARG0 "dfd=%r0"
+#define KPROBE_GEN_TEST_ARG1 "filename=%r1"
+#define KPROBE_GEN_TEST_ARG2 "flags=%r2"
+#define KPROBE_GEN_TEST_ARG3 "mode=%r3"
+
+/* RISCV */
+#elif defined(CONFIG_RISCV)
+#define KPROBE_GEN_TEST_ARG0 "dfd=%a0"
+#define KPROBE_GEN_TEST_ARG1 "filename=%a1"
+#define KPROBE_GEN_TEST_ARG2 "flags=%a2"
+#define KPROBE_GEN_TEST_ARG3 "mode=%a3"
+
+/* others */
+#else
+#define KPROBE_GEN_TEST_ARG0 NULL
+#define KPROBE_GEN_TEST_ARG1 NULL
+#define KPROBE_GEN_TEST_ARG2 NULL
+#define KPROBE_GEN_TEST_ARG3 NULL
+#endif
+
+
/*
* Test to make sure we can create a kprobe event, then add more
* fields.
@@ -58,14 +97,14 @@ static int __init test_gen_kprobe_cmd(void)
* fields.
*/
ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test",
- "do_sys_open",
- "dfd=%ax", "filename=%dx");
+ KPROBE_GEN_TEST_FUNC,
+ KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1);
if (ret)
goto free;
/* Use kprobe_event_add_fields to add the rest of the fields */
- ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)");
+ ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3);
if (ret)
goto free;
@@ -128,7 +167,7 @@ static int __init test_gen_kretprobe_cmd(void)
* Define the kretprobe event.
*/
ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test",
- "do_sys_open",
+ KPROBE_GEN_TEST_FUNC,
"$retval");
if (ret)
goto free;
@@ -206,7 +245,7 @@ static void __exit kprobe_event_gen_test_exit(void)
WARN_ON(kprobe_event_delete("gen_kprobe_test"));
/* Disable the event or you can't remove it */
- WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
+ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
"kprobes",
"gen_kretprobe_test", false));
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d59b6a328b7f..c3f354cfc5ba 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -413,6 +413,7 @@ struct rb_irq_work {
struct irq_work work;
wait_queue_head_t waiters;
wait_queue_head_t full_waiters;
+ long wait_index;
bool waiters_pending;
bool full_waiters_pending;
bool wakeup_full;
@@ -917,13 +918,45 @@ static void rb_wake_up_waiters(struct irq_work *work)
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
wake_up_all(&rbwork->waiters);
- if (rbwork->wakeup_full) {
+ if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
rbwork->wakeup_full = false;
+ rbwork->full_waiters_pending = false;
wake_up_all(&rbwork->full_waiters);
}
}
/**
+ * ring_buffer_wake_waiters - wake up any waiters on this ring buffer
+ * @buffer: The ring buffer to wake waiters on
+ *
+ * In the case of a file that represents a ring buffer is closing,
+ * it is prudent to wake up any waiters that are on this.
+ */
+void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct rb_irq_work *rbwork;
+
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+
+ /* Wake up individual ones too. One level recursion */
+ for_each_buffer_cpu(buffer, cpu)
+ ring_buffer_wake_waiters(buffer, cpu);
+
+ rbwork = &buffer->irq_work;
+ } else {
+ cpu_buffer = buffer->buffers[cpu];
+ rbwork = &cpu_buffer->irq_work;
+ }
+
+ rbwork->wait_index++;
+ /* make sure the waiters see the new index */
+ smp_wmb();
+
+ rb_wake_up_waiters(&rbwork->work);
+}
+
+/**
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
* @cpu: the cpu buffer to wait on
@@ -938,6 +971,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait);
struct rb_irq_work *work;
+ long wait_index;
int ret = 0;
/*
@@ -956,6 +990,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
work = &cpu_buffer->irq_work;
}
+ wait_index = READ_ONCE(work->wait_index);
while (true) {
if (full)
@@ -1011,7 +1046,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
nr_pages = cpu_buffer->nr_pages;
dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full < full)
+ cpu_buffer->shortest_full > full)
cpu_buffer->shortest_full = full;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
if (!pagebusy &&
@@ -1020,6 +1055,11 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
}
schedule();
+
+ /* Make sure to see the new wait index */
+ smp_rmb();
+ if (wait_index != work->wait_index)
+ break;