summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/fprobe.rst16
-rw-r--r--Documentation/trace/ftrace.rst6
-rw-r--r--Documentation/trace/user_events.rst167
-rw-r--r--fs/exec.c2
-rw-r--r--include/linux/fprobe.h10
-rw-r--r--include/linux/ftrace.h5
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/seq_buf.h2
-rw-r--r--include/linux/user_events.h101
-rw-r--r--include/uapi/linux/user_events.h81
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/bpf_trace.c17
-rw-r--r--kernel/trace/fprobe.c32
-rw-r--r--kernel/trace/ftrace.c51
-rw-r--r--kernel/trace/ring_buffer.c102
-rw-r--r--kernel/trace/trace.c7
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_events_user.c1034
-rw-r--r--kernel/trace/trace_output.c175
-rw-r--r--kernel/trace/trace_output.h2
-rw-r--r--lib/seq_buf.c32
-rw-r--r--lib/test_fprobe.c105
-rw-r--r--samples/fprobe/fprobe_example.c7
-rw-r--r--samples/user_events/example.c45
-rwxr-xr-xscripts/leaking_addresses.pl1
-rw-r--r--scripts/recordmcount.c6
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat2
-rw-r--r--tools/testing/selftests/mm/protection_keys.c4
-rw-r--r--tools/testing/selftests/user_events/Makefile2
-rw-r--r--tools/testing/selftests/user_events/abi_test.c229
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c2
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c176
-rw-r--r--tools/testing/selftests/user_events/perf_test.c39
35 files changed, 1959 insertions, 518 deletions
diff --git a/Documentation/trace/fprobe.rst b/Documentation/trace/fprobe.rst
index b64bec1ce144..40dd2fbce861 100644
--- a/Documentation/trace/fprobe.rst
+++ b/Documentation/trace/fprobe.rst
@@ -87,14 +87,16 @@ returns as same as unregister_ftrace_function().
The fprobe entry/exit handler
=============================
-The prototype of the entry/exit callback function is as follows:
+The prototype of the entry/exit callback function are as follows:
.. code-block:: c
- void callback_func(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs);
+ int entry_callback(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs, void *entry_data);
-Note that both entry and exit callbacks have same ptototype. The @entry_ip is
-saved at function entry and passed to exit handler.
+ void exit_callback(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs, void *entry_data);
+
+Note that the @entry_ip is saved at function entry and passed to exit handler.
+If the entry callback function returns !0, the corresponding exit callback will be cancelled.
@fp
This is the address of `fprobe` data structure related to this handler.
@@ -113,6 +115,12 @@ saved at function entry and passed to exit handler.
to use @entry_ip. On the other hand, in the exit_handler, the instruction
pointer of @regs is set to the currect return address.
+@entry_data
+ This is a local storage to share the data between entry and exit handlers.
+ This storage is NULL by default. If the user specify `exit_handler` field
+ and `entry_data_size` field when registering the fprobe, the storage is
+ allocated and passed to both `entry_handler` and `exit_handler`.
+
Share the callbacks with kprobes
================================
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index e8bca5fea7cc..a9c8bce4bc7b 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -1027,6 +1027,7 @@ To see what is available, simply cat the file::
nohex
nobin
noblock
+ nofields
trace_printk
annotate
nouserstacktrace
@@ -1110,6 +1111,11 @@ Here are the available options:
block
When set, reading trace_pipe will not block when polled.
+ fields
+ Print the fields as described by their types. This is a better
+ option than using hex, bin or raw, as it gives a better parsing
+ of the content of the event.
+
trace_printk
Can disable trace_printk() from writing into the buffer.
diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst
index 422802ef4025..f79987e16cf4 100644
--- a/Documentation/trace/user_events.rst
+++ b/Documentation/trace/user_events.rst
@@ -20,11 +20,10 @@ dynamic_events is the same as the ioctl with the u: prefix applied.
Typically programs will register a set of events that they wish to expose to
tools that can read trace_events (such as ftrace and perf). The registration
-process gives back two ints to the program for each event. The first int is
-the status bit. This describes which bit in little-endian format in the
-/sys/kernel/tracing/user_events_status file represents this event. The
-second int is the write index which describes the data when a write() or
-writev() is called on the /sys/kernel/tracing/user_events_data file.
+process tells the kernel which address and bit to reflect if any tool has
+enabled the event and data should be written. The registration will give back
+a write index which describes the data when a write() or writev() is called
+on the /sys/kernel/tracing/user_events_data file.
The structures referenced in this document are contained within the
/include/uapi/linux/user_events.h file in the source tree.
@@ -41,23 +40,64 @@ DIAG_IOCSREG.
This command takes a packed struct user_reg as an argument::
struct user_reg {
- u32 size;
- u64 name_args;
- u32 status_bit;
- u32 write_index;
- };
+ /* Input: Size of the user_reg structure being used */
+ __u32 size;
+
+ /* Input: Bit in enable address to use */
+ __u8 enable_bit;
+
+ /* Input: Enable size in bytes at address */
+ __u8 enable_size;
+
+ /* Input: Flags for future use, set to 0 */
+ __u16 flags;
+
+ /* Input: Address to update when enabled */
+ __u64 enable_addr;
+
+ /* Input: Pointer to string with event name, description and flags */
+ __u64 name_args;
+
+ /* Output: Index of the event to use when writing data */
+ __u32 write_index;
+ } __attribute__((__packed__));
+
+The struct user_reg requires all the above inputs to be set appropriately.
+
++ size: This must be set to sizeof(struct user_reg).
-The struct user_reg requires two inputs, the first is the size of the structure
-to ensure forward and backward compatibility. The second is the command string
-to issue for registering. Upon success two outputs are set, the status bit
-and the write index.
++ enable_bit: The bit to reflect the event status at the address specified by
+ enable_addr.
+
++ enable_size: The size of the value specified by enable_addr.
+ This must be 4 (32-bit) or 8 (64-bit). 64-bit values are only allowed to be
+ used on 64-bit kernels, however, 32-bit can be used on all kernels.
+
++ flags: The flags to use, if any. For the initial version this must be 0.
+ Callers should first attempt to use flags and retry without flags to ensure
+ support for lower versions of the kernel. If a flag is not supported -EINVAL
+ is returned.
+
++ enable_addr: The address of the value to use to reflect event status. This
+ must be naturally aligned and write accessible within the user program.
+
++ name_args: The name and arguments to describe the event, see command format
+ for details.
+
+Upon successful registration the following is set.
+
++ write_index: The index to use for this file descriptor that represents this
+ event when writing out data. The index is unique to this instance of the file
+ descriptor that was used for the registration. See writing data for details.
User based events show up under tracefs like any other event under the
subsystem named "user_events". This means tools that wish to attach to the
events need to use /sys/kernel/tracing/events/user_events/[name]/enable
or perf record -e user_events:[name] when attaching/recording.
-**NOTE:** *The write_index returned is only valid for the FD that was used*
+**NOTE:** The event subsystem name by default is "user_events". Callers should
+not assume it will always be "user_events". Operators reserve the right in the
+future to change the subsystem name per-process to accomodate event isolation.
Command Format
^^^^^^^^^^^^^^
@@ -94,7 +134,7 @@ Would be represented by the following field::
struct mytype myname 20
Deleting
------------
+--------
Deleting an event from within a user process is done via ioctl() out to the
/sys/kernel/tracing/user_events_data file. The command to issue is
DIAG_IOCSDEL.
@@ -104,92 +144,79 @@ its name. Delete will only succeed if there are no references left to the
event (in both user and kernel space). User programs should use a separate file
to request deletes than the one used for registration due to this.
-Status
-------
-When tools attach/record user based events the status of the event is updated
-in realtime. This allows user programs to only incur the cost of the write() or
-writev() calls when something is actively attached to the event.
-
-User programs call mmap() on /sys/kernel/tracing/user_events_status to
-check the status for each event that is registered. The bit to check in the
-file is given back after the register ioctl() via user_reg.status_bit. The bit
-is always in little-endian format. Programs can check if the bit is set either
-using a byte-wise index with a mask or a long-wise index with a little-endian
-mask.
+Unregistering
+-------------
+If after registering an event it is no longer wanted to be updated then it can
+be disabled via ioctl() out to the /sys/kernel/tracing/user_events_data file.
+The command to issue is DIAG_IOCSUNREG. This is different than deleting, where
+deleting actually removes the event from the system. Unregistering simply tells
+the kernel your process is no longer interested in updates to the event.
-Currently the size of user_events_status is a single page, however, custom
-kernel configurations can change this size to allow more user based events. In
-all cases the size of the file is a multiple of a page size.
+This command takes a packed struct user_unreg as an argument::
-For example, if the register ioctl() gives back a status_bit of 3 you would
-check byte 0 (3 / 8) of the returned mmap data and then AND the result with 8
-(1 << (3 % 8)) to see if anything is attached to that event.
+ struct user_unreg {
+ /* Input: Size of the user_unreg structure being used */
+ __u32 size;
-A byte-wise index check is performed as follows::
+ /* Input: Bit to unregister */
+ __u8 disable_bit;
- int index, mask;
- char *status_page;
+ /* Input: Reserved, set to 0 */
+ __u8 __reserved;
- index = status_bit / 8;
- mask = 1 << (status_bit % 8);
-
- ...
+ /* Input: Reserved, set to 0 */
+ __u16 __reserved2;
- if (status_page[index] & mask) {
- /* Enabled */
- }
+ /* Input: Address to unregister */
+ __u64 disable_addr;
+ } __attribute__((__packed__));
-A long-wise index check is performed as follows::
+The struct user_unreg requires all the above inputs to be set appropriately.
- #include <asm/bitsperlong.h>
- #include <endian.h>
++ size: This must be set to sizeof(struct user_unreg).
- #if __BITS_PER_LONG == 64
- #define endian_swap(x) htole64(x)
- #else
- #define endian_swap(x) htole32(x)
- #endif
++ disable_bit: This must be set to the bit to disable (same bit that was
+ previously registered via enable_bit).
- long index, mask, *status_page;
++ disable_addr: This must be set to the address to disable (same address that was
+ previously registered via enable_addr).
- index = status_bit / __BITS_PER_LONG;
- mask = 1L << (status_bit % __BITS_PER_LONG);
- mask = endian_swap(mask);
+**NOTE:** Events are automatically unregistered when execve() is invoked. During
+fork() the registered events will be retained and must be unregistered manually
+in each process if wanted.
- ...
+Status
+------
+When tools attach/record user based events the status of the event is updated
+in realtime. This allows user programs to only incur the cost of the write() or
+writev() calls when something is actively attached to the event.
- if (status_page[index] & mask) {
- /* Enabled */
- }
+The kernel will update the specified bit that was registered for the event as
+tools attach/detach from the event. User programs simply check if the bit is set
+to see if something is attached or not.
Administrators can easily check the status of all registered events by reading
the user_events_status file directly via a terminal. The output is as follows::
- Byte:Name [# Comments]
+ Name [# Comments]
...
Active: ActiveCount
Busy: BusyCount
- Max: MaxCount
For example, on a system that has a single event the output looks like this::
- 1:test
+ test
Active: 1
Busy: 0
- Max: 32768
If a user enables the user event via ftrace, the output would change to this::
- 1:test # Used by ftrace
+ test # Used by ftrace
Active: 1
Busy: 1
- Max: 32768
-
-**NOTE:** *A status bit of 0 will never be returned. This allows user programs
-to have a bit that can be used on error cases.*
Writing Data
------------
@@ -217,7 +244,7 @@ For example, if I have a struct like this::
int src;
int dst;
int flags;
- };
+ } __attribute__((__packed__));
It's advised for user programs to do the following::
diff --git a/fs/exec.c b/fs/exec.c
index 87cf3a2f0e9a..a466e797c8e2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -65,6 +65,7 @@
#include <linux/syscall_user_dispatch.h>
#include <linux/coredump.h>
#include <linux/time_namespace.h>
+#include <linux/user_events.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -1859,6 +1860,7 @@ static int bprm_execve(struct linux_binprm *bprm,
current->fs->in_exec = 0;
current->in_execve = 0;
rseq_execve(current);
+ user_events_execve(current);
acct_update_integrals(current);
task_numa_free(current, false);
return retval;
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 1c2bde0ead73..47fefc7f363b 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -13,6 +13,8 @@
* @nmissed: The counter for missing events.
* @flags: The status flag.
* @rethook: The rethook data structure. (internal data)
+ * @entry_data_size: The private data storage size.
+ * @nr_maxactive: The max number of active functions.
* @entry_handler: The callback function for function entry.
* @exit_handler: The callback function for function exit.
*/
@@ -29,9 +31,13 @@ struct fprobe {
unsigned long nmissed;
unsigned int flags;
struct rethook *rethook;
+ size_t entry_data_size;
+ int nr_maxactive;
- void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs);
- void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs);
+ int (*entry_handler)(struct fprobe *fp, unsigned long entry_ip,
+ struct pt_regs *regs, void *entry_data);
+ void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip,
+ struct pt_regs *regs, void *entry_data);
};
/* This fprobe is soft-disabled. */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3e56cb6f40d1..6954e4ed5bbf 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -548,6 +548,7 @@ bool is_ftrace_trampoline(unsigned long addr);
* DIRECT - there is a direct function to call
* CALL_OPS - the record can use callsite-specific ops
* CALL_OPS_EN - the function is set up to use callsite-specific ops
+ * TOUCHED - A callback was added since boot up
*
* When a new ftrace_ops is registered and wants a function to save
* pt_regs, the rec->flags REGS is set. When the function has been
@@ -567,9 +568,10 @@ enum {
FTRACE_FL_DIRECT_EN = (1UL << 23),
FTRACE_FL_CALL_OPS = (1UL << 22),
FTRACE_FL_CALL_OPS_EN = (1UL << 21),
+ FTRACE_FL_TOUCHED = (1UL << 20),
};
-#define FTRACE_REF_MAX_SHIFT 21
+#define FTRACE_REF_MAX_SHIFT 20
#define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1)
#define ftrace_rec_count(rec) ((rec)->flags & FTRACE_REF_MAX)
@@ -628,6 +630,7 @@ enum {
FTRACE_ITER_PROBE = (1 << 4),
FTRACE_ITER_MOD = (1 << 5),
FTRACE_ITER_ENABLED = (1 << 6),
+ FTRACE_ITER_TOUCHED = (1 << 7),
};
void arch_ftrace_update_code(int command);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc4ad4c58fae..eed5d65b8d1f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -70,6 +70,7 @@ struct sighand_struct;
struct signal_struct;
struct task_delay_info;
struct task_group;
+struct user_event_mm;
/*
* Task state bitmask. NOTE! These bits are also
@@ -1529,6 +1530,10 @@ struct task_struct {
union rv_task_monitor rv[RV_PER_TASK_MONITORS];
#endif
+#ifdef CONFIG_USER_EVENTS
+ struct user_event_mm *user_event_mm;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index 5b31c5147969..515d7fcb9634 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -159,4 +159,6 @@ extern int
seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary);
#endif
+void seq_buf_do_printk(struct seq_buf *s, const char *lvl);
+
#endif /* _LINUX_SEQ_BUF_H */
diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 592a3fbed98e..2847f5a18a86 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -1,54 +1,83 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (c) 2021, Microsoft Corporation.
+ * Copyright (c) 2022, Microsoft Corporation.
*
* Authors:
* Beau Belgrave <beaub@linux.microsoft.com>
*/
-#ifndef _UAPI_LINUX_USER_EVENTS_H
-#define _UAPI_LINUX_USER_EVENTS_H
-#include <linux/types.h>
-#include <linux/ioctl.h>
+#ifndef _LINUX_USER_EVENTS_H
+#define _LINUX_USER_EVENTS_H
-#ifdef __KERNEL__
-#include <linux/uio.h>
-#else
-#include <sys/uio.h>
-#endif
+#include <linux/list.h>
+#include <linux/refcount.h>
+#include <linux/mm_types.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/user_events.h>
-#define USER_EVENTS_SYSTEM "user_events"
-#define USER_EVENTS_PREFIX "u:"
+#ifdef CONFIG_USER_EVENTS
+struct user_event_mm {
+ struct list_head link;
+ struct list_head enablers;
+ struct mm_struct *mm;
+ struct user_event_mm *next;
+ refcount_t refcnt;
+ refcount_t tasks;
+ struct rcu_work put_rwork;
+};
-/* Create dynamic location entry within a 32-bit value */
-#define DYN_LOC(offset, size) ((size) << 16 | (offset))
+extern void user_event_mm_dup(struct task_struct *t,
+ struct user_event_mm *old_mm);
-/*
- * Describes an event registration and stores the results of the registration.
- * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum
- * must set the size and name_args before invocation.
- */
-struct user_reg {
+extern void user_event_mm_remove(struct task_struct *t);
+
+static inline void user_events_fork(struct task_struct *t,
+ unsigned long clone_flags)
+{
+ struct user_event_mm *old_mm;
- /* Input: Size of the user_reg structure being used */
- __u32 size;
+ if (!t || !current->user_event_mm)
+ return;
- /* Input: Pointer to string with event name, description and flags */
- __u64 name_args;
+ old_mm = current->user_event_mm;
- /* Output: Bitwise index of the event within the status page */
- __u32 status_bit;
+ if (clone_flags & CLONE_VM) {
+ t->user_event_mm = old_mm;
+ refcount_inc(&old_mm->tasks);
+ return;
+ }
- /* Output: Index of the event to use when writing data */
- __u32 write_index;
-} __attribute__((__packed__));
+ user_event_mm_dup(t, old_mm);
+}
-#define DIAG_IOC_MAGIC '*'
+static inline void user_events_execve(struct task_struct *t)
+{
+ if (!t || !t->user_event_mm)
+ return;
+
+ user_event_mm_remove(t);
+}
+
+static inline void user_events_exit(struct task_struct *t)
+{
+ if (!t || !t->user_event_mm)
+ return;
+
+ user_event_mm_remove(t);
+}
+#else
+static inline void user_events_fork(struct task_struct *t,
+ unsigned long clone_flags)
+{
+}
-/* Requests to register a user_event */
-#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg*)
+static inline void user_events_execve(struct task_struct *t)
+{
+}
-/* Requests to delete a user_event */
-#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*)
+static inline void user_events_exit(struct task_struct *t)
+{
+}
+#endif /* CONFIG_USER_EVENTS */
-#endif /* _UAPI_LINUX_USER_EVENTS_H */
+#endif /* _LINUX_USER_EVENTS_H */
diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h
new file mode 100644
index 000000000000..2984aae4a2b4
--- /dev/null
+++ b/include/uapi/linux/user_events.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2021-2022, Microsoft Corporation.
+ *
+ * Authors:
+ * Beau Belgrave <beaub@linux.microsoft.com>
+ */
+#ifndef _UAPI_LINUX_USER_EVENTS_H
+#define _UAPI_LINUX_USER_EVENTS_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define USER_EVENTS_SYSTEM "user_events"
+#define USER_EVENTS_PREFIX "u:"
+
+/* Create dynamic location entry within a 32-bit value */
+#define DYN_LOC(offset, size) ((size) << 16 | (offset))
+
+/*
+ * Describes an event registration and stores the results of the registration.
+ * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum
+ * must set the size and name_args before invocation.
+ */
+struct user_reg {
+
+ /* Input: Size of the user_reg structure being used */
+ __u32 size;
+
+ /* Input: Bit in enable address to use */
+ __u8 enable_bit;
+
+ /* Input: Enable size in bytes at address */
+ __u8 enable_size;
+
+ /* Input: Flags for future use, set to 0 */
+ __u16 flags;
+
+ /* Input: Address to update when enabled */
+ __u64 enable_addr;
+
+ /* Input: Pointer to string with event name, description and flags */
+ __u64 name_args;
+
+ /* Output: Index of the event to use when writing data */
+ __u32 write_index;
+} __attribute__((__packed__));
+
+/*
+ * Describes an event unregister, callers must set the size, address and bit.
+ * This structure is passed to the DIAG_IOCSUNREG ioctl to disable bit updates.
+ */
+struct user_unreg {
+ /* Input: Size of the user_unreg structure being used */
+ __u32 size;
+
+ /* Input: Bit to unregister */
+ __u8 disable_bit;
+
+ /* Input: Reserved, set to 0 */
+ __u8 __reserved;
+
+ /* Input: Reserved, set to 0 */
+ __u16 __reserved2;
+
+ /* Input: Address to unregister */
+ __u64 disable_addr;
+} __attribute__((__packed__));
+
+#define DIAG_IOC_MAGIC '*'
+
+/* Request to register a user_event */
+#define DIAG_IOCSREG _IOWR(DIAG_IOC_MAGIC, 0, struct user_reg *)
+
+/* Request to delete a user_event */
+#define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char *)
+
+/* Requests to unregister a user_event */
+#define DIAG_IOCSUNREG _IOW(DIAG_IOC_MAGIC, 2, struct user_unreg*)
+
+#endif /* _UAPI_LINUX_USER_EVENTS_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 86902cb5ab78..34b90e2e7cf7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -68,6 +68,7 @@
#include <linux/kprobes.h>
#include <linux/rethook.h>
#include <linux/sysfs.h>
+#include <linux/user_events.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -818,6 +819,7 @@ void __noreturn do_exit(long code)
coredump_task_exit(tsk);
ptrace_event(PTRACE_EVENT_EXIT, code);
+ user_events_exit(tsk);
validate_creds_for_do_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index eccb35a85216..735d9f4f5acf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -97,6 +97,7 @@
#include <linux/io_uring.h>
#include <linux/bpf.h>
#include <linux/stackprotector.h>
+#include <linux/user_events.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -2735,6 +2736,7 @@ __latent_entropy struct task_struct *copy_process(
trace_task_newtask(p, clone_flags);
uprobe_copy_process(p, clone_flags);
+ user_events_fork(p, clone_flags);
copy_oom_score_adj(clone_flags, p);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5b1e7fa41ca8..8cf97fa4a4b3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -792,15 +792,15 @@ config USER_EVENTS
bool "User trace events"
select TRACING
select DYNAMIC_EVENTS
- depends on BROKEN || COMPILE_TEST # API needs to be straighten out
help
User trace events are user-defined trace events that
can be used like an existing kernel trace event. User trace
events are generated by writing to a tracefs file. User
processes can determine if their tracing events should be
- generated by memory mapping a tracefs file and checking for
- an associated byte being non-zero.
+ generated by registering a value and bit with the kernel
+ that reflects when it is enabled or not.
+ See Documentation/trace/user_events.rst.
If in doubt, say N.
config HIST_TRIGGERS
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index bcf91bc7bf71..9a050e36dc6c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2640,9 +2640,20 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
return err;
}
-static void
+static int
kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- struct pt_regs *regs)
+ struct pt_regs *regs, void *data)
+{
+ struct bpf_kprobe_multi_link *link;
+
+ link = container_of(fp, struct bpf_kprobe_multi_link, fp);
+ kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
+ return 0;
+}
+
+static void
+kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
+ struct pt_regs *regs, void *data)
{
struct bpf_kprobe_multi_link *link;
@@ -2844,7 +2855,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
goto error;
if (flags & BPF_F_KPROBE_MULTI_RETURN)
- link->fp.exit_handler = kprobe_multi_link_handler;
+ link->fp.exit_handler = kprobe_multi_link_exit_handler;
else
link->fp.entry_handler = kprobe_multi_link_handler;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index e8143e368074..9abb3905bc8e 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -17,15 +17,17 @@
struct fprobe_rethook_node {
struct rethook_node node;
unsigned long entry_ip;
+ char data[];
};
static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct fprobe_rethook_node *fpr;
- stru