summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/kprobetrace.rst8
-rw-r--r--arch/csky/kernel/probes/ftrace.c3
-rw-r--r--arch/loongarch/kernel/ftrace_dyn.c3
-rw-r--r--arch/parisc/kernel/ftrace.c3
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c3
-rw-r--r--arch/riscv/kernel/probes/ftrace.c3
-rw-r--r--arch/s390/kernel/ftrace.c3
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c3
-rw-r--r--include/linux/fprobe.h18
-rw-r--r--include/linux/kprobes.h7
-rw-r--r--include/linux/objpool.h105
-rw-r--r--include/linux/trace_recursion.h2
-rw-r--r--kernel/events/uprobes.c22
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/trace/Kconfig13
-rw-r--r--kernel/trace/ftrace.c1
-rw-r--r--kernel/trace/rethook.c4
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace_fprobe.c6
-rw-r--r--kernel/trace/trace_kprobe.c6
-rw-r--r--kernel/trace/trace_probe.c63
-rw-r--r--kernel/trace/trace_probe.h2
-rw-r--r--kernel/trace/trace_uprobe.c103
-rw-r--r--lib/objpool.c112
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc41
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc40
26 files changed, 406 insertions, 176 deletions
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst
index 69cb7776ae99..3b6791c17e9b 100644
--- a/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@ -58,8 +58,9 @@ Synopsis of kprobe_events
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
(u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
- (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr"
- and bitfield are supported.
+ (x8/x16/x32/x64), VFS layer common type(%pd/%pD), "char",
+ "string", "ustring", "symbol", "symstr" and bitfield are
+ supported.
(\*1) only for the probe on function entry (offs == 0). Note, this argument access
is best effort, because depending on the argument type, it may be passed on
@@ -122,6 +123,9 @@ With 'symstr' type, you can filter the event with wildcard pattern of the
symbols, and you don't need to solve symbol name by yourself.
For $comm, the default type is "string"; any other type is invalid.
+VFS layer common type(%pd/%pD) is a special type, which fetches dentry's or
+file's name from struct dentry's address or struct file's address.
+
.. _user_mem_access:
User Memory Access
diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index 834cffcfbce3..7ba4b98076de 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -12,6 +12,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe_ctlblk *kcb;
struct pt_regs *regs;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index 73858c9029cc..bff058317062 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -287,6 +287,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
struct kprobe_ctlblk *kcb;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 621a4b386ae4..c91f9c2e61ed 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -206,6 +206,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 072ebe7f290b..f8208c027148 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -21,6 +21,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
struct pt_regs *regs;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(nip, parent_nip);
if (bit < 0)
return;
diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c
index 7142ec42e889..a69dfa610aa8 100644
--- a/arch/riscv/kernel/probes/ftrace.c
+++ b/arch/riscv/kernel/probes/ftrace.c
@@ -11,6 +11,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe_ctlblk *kcb;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 798249ef5646..ddf2ee47cb87 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -296,6 +296,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe *p;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index dd2ec14adb77..15af7e98e161 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -21,6 +21,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct kprobe_ctlblk *kcb;
int bit;
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
bit = ftrace_test_recursion_trylock(ip, parent_ip);
if (bit < 0)
return;
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 3e03758151f4..f39869588117 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -7,6 +7,16 @@
#include <linux/ftrace.h>
#include <linux/rethook.h>
+struct fprobe;
+
+typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip,
+ unsigned long ret_ip, struct pt_regs *regs,
+ void *entry_data);
+
+typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
+ unsigned long ret_ip, struct pt_regs *regs,
+ void *entry_data);
+
/**
* struct fprobe - ftrace based probe.
* @ops: The ftrace_ops.
@@ -34,12 +44,8 @@ struct fprobe {
size_t entry_data_size;
int nr_maxactive;
- int (*entry_handler)(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
- void *entry_data);
- void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
- void *entry_data);
+ fprobe_entry_cb entry_handler;
+ fprobe_exit_cb exit_handler;
};
/* This fprobe is soft-disabled. */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0ff44d6633e3..5fcbc254d186 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -378,11 +378,15 @@ static inline void wait_for_kprobe_optimizer(void) { }
extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct ftrace_regs *fregs);
extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
+/* Set when ftrace has been killed: kprobes on ftrace must be disabled for safety */
+extern bool kprobe_ftrace_disabled __read_mostly;
+extern void kprobe_ftrace_kill(void);
#else
static inline int arch_prepare_kprobe_ftrace(struct kprobe *p)
{
return -EINVAL;
}
+static inline void kprobe_ftrace_kill(void) {}
#endif /* CONFIG_KPROBES_ON_FTRACE */
/* Get the kprobe at this addr (if any) - called with preemption disabled */
@@ -495,6 +499,9 @@ static inline void kprobe_flush_task(struct task_struct *tk)
static inline void kprobe_free_init_mem(void)
{
}
+static inline void kprobe_ftrace_kill(void)
+{
+}
static inline int disable_kprobe(struct kprobe *kp)
{
return -EOPNOTSUPP;
diff --git a/include/linux/objpool.h b/include/linux/objpool.h
index 15aff4a17f0c..cb1758eaa2d3 100644
--- a/include/linux/objpool.h
+++ b/include/linux/objpool.h
@@ -5,6 +5,10 @@
#include <linux/types.h>
#include <linux/refcount.h>
+#include <linux/atomic.h>
+#include <linux/cpumask.h>
+#include <linux/irqflags.h>
+#include <linux/smp.h>
/*
* objpool: ring-array based lockless MPMC queue
@@ -69,7 +73,7 @@ typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context);
* struct objpool_head - object pooling metadata
* @obj_size: object size, aligned to sizeof(void *)
* @nr_objs: total objs (to be pre-allocated with objpool)
- * @nr_cpus: local copy of nr_cpu_ids
+ * @nr_possible_cpus: cached value of num_possible_cpus()
* @capacity: max objs can be managed by one objpool_slot
* @gfp: gfp flags for kmalloc & vmalloc
* @ref: refcount of objpool
@@ -81,7 +85,7 @@ typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context);
struct objpool_head {
int obj_size;
int nr_objs;
- int nr_cpus;
+ int nr_possible_cpus;
int capacity;
gfp_t gfp;
refcount_t ref;
@@ -118,13 +122,94 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
gfp_t gfp, void *context, objpool_init_obj_cb objinit,
objpool_fini_cb release);
+/* try to retrieve object from slot */
+static inline void *__objpool_try_get_slot(struct objpool_head *pool, int cpu)
+{
+ struct objpool_slot *slot = pool->cpu_slots[cpu];
+ /* load head snapshot, other cpus may change it */
+ uint32_t head = smp_load_acquire(&slot->head);
+
+ while (head != READ_ONCE(slot->last)) {
+ void *obj;
+
+ /*
+ * data visibility of 'last' and 'head' could be out of
+ * order since memory updating of 'last' and 'head' are
+ * performed in push() and pop() independently
+ *
+ * before any retrieving attempts, pop() must guarantee
+ * 'last' is behind 'head', that is to say, there must
+ * be available objects in slot, which could be ensured
+ * by condition 'last != head && last - head <= nr_objs'
+ * that is equivalent to 'last - head - 1 < nr_objs' as
+ * 'last' and 'head' are both unsigned int32
+ */
+ if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) {
+ head = READ_ONCE(slot->head);
+ continue;
+ }
+
+ /* obj must be retrieved before moving forward head */
+ obj = READ_ONCE(slot->entries[head & slot->mask]);
+
+ /* move head forward to mark it's consumption */
+ if (try_cmpxchg_release(&slot->head, &head, head + 1))
+ return obj;
+ }
+
+ return NULL;
+}
+
/**
* objpool_pop() - allocate an object from objpool
* @pool: object pool
*
* return value: object ptr or NULL if failed
*/
-void *objpool_pop(struct objpool_head *pool);
+static inline void *objpool_pop(struct objpool_head *pool)
+{
+ void *obj = NULL;
+ unsigned long flags;
+ int i, cpu;
+
+ /* disable local irq to avoid preemption & interruption */
+ raw_local_irq_save(flags);
+
+ cpu = raw_smp_processor_id();
+ for (i = 0; i < pool->nr_possible_cpus; i++) {
+ obj = __objpool_try_get_slot(pool, cpu);
+ if (obj)
+ break;
+ cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1);
+ }
+ raw_local_irq_restore(flags);
+
+ return obj;
+}
+
+/* adding object to slot, abort if the slot was already full */
+static inline int
+__objpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu)
+{
+ struct objpool_slot *slot = pool->cpu_slots[cpu];
+ uint32_t head, tail;
+
+ /* loading tail and head as a local snapshot, tail first */
+ tail = READ_ONCE(slot->tail);
+
+ do {
+ head = READ_ONCE(slot->head);
+ /* fault caught: something must be wrong */
+ WARN_ON_ONCE(tail - head > pool->nr_objs);
+ } while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1));
+
+ /* now the tail position is reserved for the given obj */
+ WRITE_ONCE(slot->entries[tail & slot->mask], obj);
+ /* update sequence to make this obj available for pop() */
+ smp_store_release(&slot->last, tail + 1);
+
+ return 0;
+}
/**
* objpool_push() - reclaim the object and return back to objpool
@@ -134,7 +219,19 @@ void *objpool_pop(struct objpool_head *pool);
* return: 0 or error code (it fails only when user tries to push
* the same object multiple times or wrong "objects" into objpool)
*/
-int objpool_push(void *obj, struct objpool_head *pool);
+static inline int objpool_push(void *obj, struct objpool_head *pool)
+{
+ unsigned long flags;
+ int rc;
+
+ /* disable local irq to avoid preemption & interruption */
+ raw_local_irq_save(flags);
+ rc = __objpool_try_add_slot(obj, pool, raw_smp_processor_id());
+ raw_local_irq_restore(flags);
+
+ return rc;
+}
+
/**
* objpool_drop() - discard the object and deref objpool
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index d48cd92d2364..24ea8ac049b4 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -135,7 +135,7 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
# define do_ftrace_record_recursion(ip, pip) do { } while (0)
#endif
-#ifdef CONFIG_ARCH_WANTS_NO_INSTR
+#ifdef CONFIG_FTRACE_VALIDATE_RCU_IS_WATCHING
# define trace_warn_on_no_rcu(ip) \
({ \
bool __ret = !rcu_is_watching(); \
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 1215bc299390..2c83ba776fc7 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -39,7 +39,7 @@ static struct rb_root uprobes_tree = RB_ROOT;
*/
#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree)
-static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
+static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
#define UPROBES_HASH_SZ 13
/* serialize uprobe->pending_list */
@@ -669,9 +669,9 @@ static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
{
struct uprobe *uprobe;
- spin_lock(&uprobes_treelock);
+ read_lock(&uprobes_treelock);
uprobe = __find_uprobe(inode, offset);
- spin_unlock(&uprobes_treelock);
+ read_unlock(&uprobes_treelock);
return uprobe;
}
@@ -701,9 +701,9 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
{
struct uprobe *u;
- spin_lock(&uprobes_treelock);
+ write_lock(&uprobes_treelock);
u = __insert_uprobe(uprobe);
- spin_unlock(&uprobes_treelock);
+ write_unlock(&uprobes_treelock);
return u;
}
@@ -935,9 +935,9 @@ static void delete_uprobe(struct uprobe *uprobe)
if (WARN_ON(!uprobe_is_active(uprobe)))
return;
- spin_lock(&uprobes_treelock);
+ write_lock(&uprobes_treelock);
rb_erase(&uprobe->rb_node, &uprobes_tree);
- spin_unlock(&uprobes_treelock);
+ write_unlock(&uprobes_treelock);
RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
put_uprobe(uprobe);
}
@@ -1298,7 +1298,7 @@ static void build_probe_list(struct inode *inode,
min = vaddr_to_offset(vma, start);
max = min + (end - start) - 1;
- spin_lock(&uprobes_treelock);
+ read_lock(&uprobes_treelock);
n = find_node_in_range(inode, min, max);
if (n) {
for (t = n; t; t = rb_prev(t)) {
@@ -1316,7 +1316,7 @@ static void build_probe_list(struct inode *inode,
get_uprobe(u);
}
}
- spin_unlock(&uprobes_treelock);
+ read_unlock(&uprobes_treelock);
}
/* @vma contains reference counter, not the probed instruction. */
@@ -1407,9 +1407,9 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
min = vaddr_to_offset(vma, start);
max = min + (end - start) - 1;
- spin_lock(&uprobes_treelock);
+ read_lock(&uprobes_treelock);
n = find_node_in_range(inode, min, max);
- spin_unlock(&uprobes_treelock);
+ read_unlock(&uprobes_treelock);
return !!n;
}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 48b49925f7ff..f2cea3c80cb5 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1067,6 +1067,7 @@ static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
static int kprobe_ipmodify_enabled;
static int kprobe_ftrace_enabled;
+bool kprobe_ftrace_disabled;
static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
int *cnt)
@@ -1135,6 +1136,11 @@ static int disarm_kprobe_ftrace(struct kprobe *p)
ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
}
+
+void kprobe_ftrace_kill()
+{
+ kprobe_ftrace_disabled = true;
+}
#else /* !CONFIG_KPROBES_ON_FTRACE */
static inline int arm_kprobe_ftrace(struct kprobe *p)
{
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index b3d7f62ac581..166ad5444eea 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -974,6 +974,19 @@ config FTRACE_RECORD_RECURSION_SIZE
This file can be reset, but the limit can not change in
size at runtime.
+config FTRACE_VALIDATE_RCU_IS_WATCHING
+ bool "Validate RCU is on during ftrace execution"
+ depends on FUNCTION_TRACER
+ depends on ARCH_WANTS_NO_INSTR
+ help
+ All callbacks that attach to the function tracing have some sort of
+ protection against recursion. This option is only to verify that
+ ftrace (and other users of ftrace_test_recursion_trylock()) are not
+ called outside of RCU, as if they are, it can cause a race. But it
+ also has a noticeable overhead when enabled.
+
+ If unsure, say N
+
config RING_BUFFER_RECORD_RECURSION
bool "Record functions that recurse in the ring buffer"
depends on FTRACE_RECORD_RECURSION
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 50ca4d4f8840..d4efdab463b8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7894,6 +7894,7 @@ void ftrace_kill(void)
ftrace_disabled = 1;
ftrace_enabled = 0;
ftrace_trace_function = ftrace_stub;
+ kprobe_ftrace_kill();
}
/**
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index fa03094e9e69..30d224946881 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -166,6 +166,7 @@ struct rethook_node *rethook_try_get(struct rethook *rh)
if (unlikely(!handler))
return NULL;
+#if defined(CONFIG_FTRACE_VALIDATE_RCU_IS_WATCHING) || defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
/*
* This expects the caller will set up a rethook on a function entry.
* When the function returns, the rethook will eventually be reclaimed
@@ -174,6 +175,7 @@ struct rethook_node *rethook_try_get(struct rethook *rh)
*/
if (unlikely(!rcu_is_watching()))
return NULL;
+#endif
return (struct rethook_node *)objpool_pop(&rh->pool);
}
@@ -248,7 +250,7 @@ unsigned long rethook_find_ret_addr(struct task_struct *tsk, unsigned long frame
if (WARN_ON_ONCE(!cur))
return 0;
- if (WARN_ON_ONCE(tsk != current && task_is_running(tsk)))
+ if (tsk != current && task_is_running(tsk))
return 0;
do {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 233d1af39fff..7fe4b539a423 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5540,7 +5540,7 @@ static const char readme_msg[] =
"\t kernel return probes support: $retval, $arg<N>, $comm\n"
"\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
"\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
- "\t symstr, <type>\\[<array-size>\\]\n"
+ "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
#ifdef CONFIG_HIST_TRIGGERS
"\t field: <stype> <name>;\n"
"\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 4f4280815522..62e6a8f4aae9 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -994,6 +994,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
char gbuf[MAX_EVENT_NAME_LEN];
char sbuf[KSYM_NAME_LEN];
char abuf[MAX_BTF_ARGS_LEN];
+ char *dbuf = NULL;
bool is_tracepoint = false;
struct tracepoint *tpoint = NULL;
struct traceprobe_parse_context ctx = {
@@ -1104,6 +1105,10 @@ static int __trace_fprobe_create(int argc, const char *argv[])
argv = new_argv;
}
+ ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
+ if (ret)
+ goto out;
+
/* setup a probe */
tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
argc, is_return);
@@ -1154,6 +1159,7 @@ out:
trace_probe_log_clear();
kfree(new_argv);
kfree(symbol);
+ kfree(dbuf);
return ret;
parse_error:
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2cb2a3951b4f..16383247bdbf 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -800,6 +800,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char abuf[MAX_BTF_ARGS_LEN];
+ char *dbuf = NULL;
struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL };
switch (argv[0][0]) {
@@ -951,6 +952,10 @@ static int __trace_kprobe_create(int argc, const char *argv[])
argv = new_argv;
}
+ ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
+ if (ret)
+ goto out;
+
/* setup a probe */
tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
argc, is_return);
@@ -997,6 +1002,7 @@ out:
trace_probe_log_clear();
kfree(new_argv);
kfree(symbol);
+ kfree(dbuf);
return ret;
parse_error:
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index c3f2937b434a..5e263c141574 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -12,6 +12,7 @@
#define pr_fmt(fmt) "trace_probe: " fmt
#include <linux/bpf.h>
+#include <linux/fs.h>
#include "trace_btf.h"
#include "trace_probe.h"
@@ -1737,6 +1738,68 @@ error:
return ERR_PTR(ret);
}
+/* @buf: *buf must be equal to NULL. Caller must to free *buf */
+int traceprobe_expand_dentry_args(int argc, const char *argv[], char **buf)
+{
+ int i, used, ret;
+ const int bufsize = MAX_DENTRY_ARGS_LEN;
+ char *tmpbuf = NULL;
+
+ if (*buf)
+ return -EINVAL;
+
+ used = 0;
+ for (i = 0; i < argc; i++) {
+ char *tmp;
+ char *equal;
+ size_t arg_len;
+
+ if (!glob_match("*:%p[dD]", argv[i]))
+ continue;
+
+ if (!tmpbuf) {
+ tmpbuf = kmalloc(bufsize, GFP_KERNEL);
+ if (!tmpbuf)
+ return -ENOMEM;
+ }
+
+ tmp = kstrdup(argv[i], GFP_KERNEL);
+ if (!tmp)
+ goto nomem;
+
+ equal = strchr(tmp, '=');
+ if (equal)
+ *equal = '\0';
+ arg_len = strlen(argv[i]);
+ tmp[arg_len - 4] = '\0';
+ if (argv[i][arg_len - 1] == 'd')
+ ret = snprintf(tmpbuf + used, bufsize - used,
+ "%s%s+0x0(+0x%zx(%s)):string",
+ equal ? tmp : "", equal ? "=" : "",
+ offsetof(struct dentry, d_name.name),
+ equal ? equal + 1 : tmp);
+ else
+ ret = snprintf(tmpbuf + used, bufsize - used,
+ "%s%s+0x0(+0x%zx(+0x%zx(%s))):string",
+ equal ? tmp : "", equal ? "=" : "",
+ offsetof(struct dentry, d_name.name),
+ offsetof(struct file, f_path.dentry),
+ equal ? equal + 1 : tmp);
+
+ kfree(tmp);
+ if (ret >= bufsize - used)
+ goto nomem;
+ argv[i] = tmpbuf + used;
+ used += ret + 1;
+ }
+
+ *buf = tmpbuf;
+ return 0;
+nomem:
+ kfree(tmpbuf);
+ return -ENOMEM;
+}
+
void traceprobe_finish_parse(struct traceprobe_parse_context *ctx)
{
clear_btf_context(ctx);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index cef3a50628a3..5803e6a41570 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -34,6 +34,7 @@
#define MAX_ARRAY_LEN 64
#define MAX_ARG_NAME_LEN 32
#define MAX_BTF_ARGS_LEN 128
+#define MAX_DENTRY_ARGS_LEN 256
#define MAX_STRING_SIZE PATH_MAX
#define MAX_ARG_BUF_LEN (MAX_TRACE_ARGS * MAX_ARG_NAME_LEN)
@@ -428,6 +429,7 @@ extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
const char **traceprobe_expand_meta_args(int argc, const char *argv[],
int *new_argc, char *buf, int bufsize,
struct traceprobe_parse_context *ctx);
+extern int traceprobe_expand_dentry_args(int argc, const char *argv[], char **buf);
extern int traceprobe_update_arg(struct probe_arg *arg);
extern void traceprobe_free_probe_arg(struct probe_arg *arg);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 9e461362450a..8541fa1494ae 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -854,6 +854,7 @@ static const struct file_operations uprobe_profile_ops = {
struct uprobe_cpu_buffer {
struct mutex mutex;
void *buf;
+ int dsize;
};
static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer;
static int uprobe_buffer_refcnt;
@@ -940,30 +941,56 @@ static struct uprobe_cpu_buffer *uprobe_buffer_get(void)
static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb)
{
+ if (!ucb)
+ return;
mutex_unlock(&ucb->mutex);
}
+static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
+ struct pt_regs *regs,
+ struct uprobe_cpu_buffer **ucbp)
+{
+ struct uprobe_cpu_buffer *ucb;
+ int dsize, esize;
+
+ if (*ucbp)
+ return *ucbp;
+
+ esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+ dsize = __get_data_size(&tu->tp, regs, NULL);
+
+ ucb = uprobe_buffer_get();
+ ucb->dsize = tu->tp.size + dsize;
+
+ store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize);
+
+ *ucbp = ucb;
+ return ucb;
+}
+
static void __uprobe_trace_func(struct trace_uprobe *tu,
unsigned long func, struct pt_regs *regs,
- struct uprobe_cpu_buffer *ucb, int dsize,
+ struct uprobe_cpu_buffer **ucbp,
struct trace_event_file *trace_file)
{
struct uprobe_trace_entry_head *entry;
struct trace_event_buffer fbuffer;
+ struct uprobe_cpu_buffer *ucb;
void *data;
int size, esize;
struct trace_event_call *call = trace_probe_event_call(&tu->tp);
WARN_ON(call != trace_file->event_call);
- if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE))
+ ucb = prepare_uprobe_buffer(tu, regs, ucbp);
+ if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE))
return;
if (trace_trigger_soft_disabled(trace_file))
return;
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
- size = esize + tu->tp.size + dsize;
+ size = esize + ucb->dsize;
entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
if (!entry)
return;
@@ -977,14 +1004,14 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
data = DATAOF_TRACE_ENTRY(entry, false);
}
- memcpy(data, ucb->buf, tu->tp.size + dsize);
+ memcpy(data, ucb->buf, ucb->dsize);
trace_event_buffer_commit(&fbuffer);
}
/* uprobe handler */
static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
- struct uprobe_cpu_buffer *ucb, int dsize)
+ struct uprobe_cpu_buffer **ucbp)
{
struct event_file_link *link;
@@ -993,7 +1020,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
rcu_read_lock();
trace_probe_for_each_link_rcu(link, &tu->tp)
- __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file);
+ __uprobe_trace_func(tu, 0, regs, ucbp, link->file);
rcu_read_unlock();
return 0;
@@ -1001,13 +1028,13 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
struct pt_regs *regs,
- struct uprobe_cpu_buffer *ucb, int dsize)
+ struct uprobe_cpu_buffer **ucbp)
{
struct event_file_link *link;
rcu_read_lock();
trace_probe_for_each_link_rcu(link, &tu->tp)<