summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-24 21:46:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-24 21:46:36 -0700
commit327ecdbc0fda28da3563a34426f63d80384062be (patch)
tree181628cc4556034104b938859a8e01b17360e374 /include/linux
parent32b22538bea83bd48f00ab1403e5a4dbce41f0d0 (diff)
parent12e766d16814808b6a581597cef6ce9fc029e917 (diff)
downloadlinux-327ecdbc0fda28da3563a34426f63d80384062be.tar.gz
linux-327ecdbc0fda28da3563a34426f63d80384062be.tar.bz2
linux-327ecdbc0fda28da3563a34426f63d80384062be.zip
Merge tag 'perf-core-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar: "Core: - Move perf_event sysctls into kernel/events/ (Joel Granados) - Use POLLHUP for pinned events in error (Namhyung Kim) - Avoid the read if the count is already updated (Peter Zijlstra) - Allow the EPOLLRDNORM flag for poll (Tao Chen) - locking/percpu-rwsem: Add guard support [ NOTE: this got (mis-)merged into the perf tree due to related work ] (Peter Zijlstra) perf_pmu_unregister() related improvements: (Peter Zijlstra) - Simplify the perf_event_alloc() error path - Simplify the perf_pmu_register() error path - Simplify perf_pmu_register() - Simplify perf_init_event() - Simplify perf_event_alloc() - Merge struct pmu::pmu_disable_count into struct perf_cpu_pmu_context::pmu_disable_count - Add this_cpc() helper - Introduce perf_free_addr_filters() - Robustify perf_event_free_bpf_prog() - Simplify the perf_mmap() control flow - Further simplify perf_mmap() - Remove retry loop from perf_mmap() - Lift event->mmap_mutex in perf_mmap() - Detach 'struct perf_cpu_pmu_context' and 'struct pmu' lifetimes - Fix perf_mmap() failure path Uprobes: - Harden x86 uretprobe syscall trampoline check (Jiri Olsa) - Remove redundant spinlock in uprobe_deny_signal() (Liao Chang) - Remove the spinlock within handle_singlestep() (Liao Chang) x86 Intel PMU enhancements: - Support PEBS counters snapshotting (Kan Liang) - Fix intel_pmu_read_event() (Kan Liang) - Extend per event callchain limit to branch stack (Kan Liang) - Fix system-wide LBR profiling (Kan Liang) - Allocate bts_ctx only if necessary (Li RongQing) - Apply static call for drain_pebs (Peter Zijlstra) x86 AMD PMU enhancements: (Ravi Bangoria) - Remove pointless sample period check - Fix ->config to sample period calculation for OP PMU - Fix perf_ibs_op.cnt_mask for CurCnt - Don't allow freq mode event creation through ->config interface - Add PMU specific minimum period - Add ->check_period() callback - Ceil sample_period to min_period - Add support for OP Load Latency Filtering - Update DTLB/PageSize decode logic Hardware breakpoints: - Return EOPNOTSUPP for unsupported breakpoint type (Saket Kumar Bhaskar) Hardlockup detector improvements: (Li Huafei) - perf_event memory leak - Warn if watchdog_ev is leaked Fixes and cleanups: - Misc fixes and cleanups (Andy Shevchenko, Kan Liang, Peter Zijlstra, Ravi Bangoria, Thorsten Blum, XieLudan)" * tag 'perf-core-2025-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (55 commits) perf: Fix __percpu annotation perf: Clean up pmu specific data perf/x86: Remove swap_task_ctx() perf/x86/lbr: Fix shorter LBRs call stacks for the system-wide mode perf: Supply task information to sched_task() perf: attach/detach PMU specific data locking/percpu-rwsem: Add guard support perf: Save PMU specific data in task_struct perf: Extend per event callchain limit to branch stack perf/ring_buffer: Allow the EPOLLRDNORM flag for poll perf/core: Use POLLHUP for pinned events in error perf/core: Use sysfs_emit() instead of scnprintf() perf/core: Remove optional 'size' arguments from strscpy() calls perf/x86/intel/bts: Check if bts_ctx is allocated when calling BTS functions uprobes/x86: Harden uretprobe syscall trampoline check watchdog/hardlockup/perf: Warn if watchdog_ev is leaked watchdog/hardlockup/perf: Fix perf_event memory leak perf/x86: Annotate struct bts_buffer::buf with __counted_by() perf/core: Clean up perf_try_init_event() perf/core: Fix perf_mmap() failure path ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/idr.h17
-rw-r--r--include/linux/nmi.h4
-rw-r--r--include/linux/percpu-rwsem.h8
-rw-r--r--include/linux/perf_event.h92
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/uprobes.h3
6 files changed, 89 insertions, 37 deletions
diff --git a/include/linux/idr.h b/include/linux/idr.h
index da5f5fa4a3a6..cd729be369b3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
#include <linux/radix-tree.h>
#include <linux/gfp.h>
#include <linux/percpu.h>
+#include <linux/cleanup.h>
struct idr {
struct radix_tree_root idr_rt;
@@ -124,6 +125,22 @@ void *idr_get_next_ul(struct idr *, unsigned long *nextid);
void *idr_replace(struct idr *, void *, unsigned long id);
void idr_destroy(struct idr *);
+struct __class_idr {
+ struct idr *idr;
+ int id;
+};
+
+#define idr_null ((struct __class_idr){ NULL, -1 })
+#define take_idr_id(id) __get_and_null(id, idr_null)
+
+DEFINE_CLASS(idr_alloc, struct __class_idr,
+ if (_T.id >= 0) idr_remove(_T.idr, _T.id),
+ ((struct __class_idr){
+ .idr = idr,
+ .id = idr_alloc(idr, ptr, start, end, gfp),
+ }),
+ struct idr *idr, void *ptr, int start, int end, gfp_t gfp);
+
/**
* idr_init_base() - Initialise an IDR.
* @idr: IDR handle.
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a8dfb38c9bb6..e78fa535f61d 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -17,7 +17,6 @@
void lockup_detector_init(void);
void lockup_detector_retry_init(void);
void lockup_detector_soft_poweroff(void);
-void lockup_detector_cleanup(void);
extern int watchdog_user_enabled;
extern int watchdog_thresh;
@@ -37,7 +36,6 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
static inline void lockup_detector_init(void) { }
static inline void lockup_detector_retry_init(void) { }
static inline void lockup_detector_soft_poweroff(void) { }
-static inline void lockup_detector_cleanup(void) { }
#endif /* !CONFIG_LOCKUP_DETECTOR */
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
@@ -104,12 +102,10 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
extern void hardlockup_detector_perf_stop(void);
extern void hardlockup_detector_perf_restart(void);
-extern void hardlockup_detector_perf_cleanup(void);
extern void hardlockup_config_perf_event(const char *str);
#else
static inline void hardlockup_detector_perf_stop(void) { }
static inline void hardlockup_detector_perf_restart(void) { }
-static inline void hardlockup_detector_perf_cleanup(void) { }
static inline void hardlockup_config_perf_event(const char *str) { }
#endif
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c012df33a9f0..af7d75ede619 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -8,6 +8,7 @@
#include <linux/wait.h>
#include <linux/rcu_sync.h>
#include <linux/lockdep.h>
+#include <linux/cleanup.h>
struct percpu_rw_semaphore {
struct rcu_sync rss;
@@ -125,6 +126,13 @@ extern bool percpu_is_read_locked(struct percpu_rw_semaphore *);
extern void percpu_down_write(struct percpu_rw_semaphore *);
extern void percpu_up_write(struct percpu_rw_semaphore *);
+DEFINE_GUARD(percpu_read, struct percpu_rw_semaphore *,
+ percpu_down_read(_T), percpu_up_read(_T))
+DEFINE_GUARD_COND(percpu_read, _try, percpu_down_read_trylock(_T))
+
+DEFINE_GUARD(percpu_write, struct percpu_rw_semaphore *,
+ percpu_down_write(_T), percpu_up_write(_T))
+
static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem)
{
return atomic_read(&sem->block);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8333f132f4a9..63dddb3b54f0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -343,8 +343,7 @@ struct pmu {
*/
unsigned int scope;
- int __percpu *pmu_disable_count;
- struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
+ struct perf_cpu_pmu_context * __percpu *cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
@@ -495,7 +494,7 @@ struct pmu {
* context-switches callback
*/
void (*sched_task) (struct perf_event_pmu_context *pmu_ctx,
- bool sched_in);
+ struct task_struct *task, bool sched_in);
/*
* Kmem cache of PMU specific data
@@ -503,16 +502,6 @@ struct pmu {
struct kmem_cache *task_ctx_cache;
/*
- * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data)
- * can be synchronized using this function. See Intel LBR callstack support
- * implementation and Perf core context switch handling callbacks for usage
- * examples.
- */
- void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc,
- struct perf_event_pmu_context *next_epc);
- /* optional */
-
- /*
* Set up pmu-private data structures for an AUX area
*/
void *(*setup_aux) (struct perf_event *event, void **pages,
@@ -673,13 +662,16 @@ struct swevent_hlist {
struct rcu_head rcu_head;
};
-#define PERF_ATTACH_CONTEXT 0x01
-#define PERF_ATTACH_GROUP 0x02
-#define PERF_ATTACH_TASK 0x04
-#define PERF_ATTACH_TASK_DATA 0x08
-#define PERF_ATTACH_ITRACE 0x10
-#define PERF_ATTACH_SCHED_CB 0x20
-#define PERF_ATTACH_CHILD 0x40
+#define PERF_ATTACH_CONTEXT 0x0001
+#define PERF_ATTACH_GROUP 0x0002
+#define PERF_ATTACH_TASK 0x0004
+#define PERF_ATTACH_TASK_DATA 0x0008
+#define PERF_ATTACH_GLOBAL_DATA 0x0010
+#define PERF_ATTACH_SCHED_CB 0x0020
+#define PERF_ATTACH_CHILD 0x0040
+#define PERF_ATTACH_EXCLUSIVE 0x0080
+#define PERF_ATTACH_CALLCHAIN 0x0100
+#define PERF_ATTACH_ITRACE 0x0200
struct bpf_prog;
struct perf_cgroup;
@@ -921,7 +913,7 @@ struct perf_event_pmu_context {
struct list_head pinned_active;
struct list_head flexible_active;
- /* Used to avoid freeing per-cpu perf_event_pmu_context */
+ /* Used to identify the per-cpu perf_event_pmu_context */
unsigned int embedded : 1;
unsigned int nr_events;
@@ -931,7 +923,6 @@ struct perf_event_pmu_context {
atomic_t refcount; /* event <-> epc */
struct rcu_head rcu_head;
- void *task_ctx_data; /* pmu specific data */
/*
* Set when one or more (plausibly active) event can't be scheduled
* due to pmu overcommit or pmu constraints, except tolerant to
@@ -979,7 +970,6 @@ struct perf_event_context {
int nr_user;
int is_active;
- int nr_task_data;
int nr_stat;
int nr_freq;
int rotate_disable;
@@ -1020,6 +1010,41 @@ struct perf_event_context {
local_t nr_no_switch_fast;
};
+/**
+ * struct perf_ctx_data - PMU specific data for a task
+ * @rcu_head: To avoid the race on free PMU specific data
+ * @refcount: To track users
+ * @global: To track system-wide users
+ * @ctx_cache: Kmem cache of PMU specific data
+ * @data: PMU specific data
+ *
+ * Currently, the struct is only used in Intel LBR call stack mode to
+ * save/restore the call stack of a task on context switches.
+ *
+ * The rcu_head is used to prevent the race on free the data.
+ * The data only be allocated when Intel LBR call stack mode is enabled.
+ * The data will be freed when the mode is disabled.
+ * The content of the data will only be accessed in context switch, which
+ * should be protected by rcu_read_lock().
+ *
+ * Because of the alignment requirement of Intel Arch LBR, the Kmem cache
+ * is used to allocate the PMU specific data. The ctx_cache is to track
+ * the Kmem cache.
+ *
+ * Careful: Struct perf_ctx_data is added as a pointer in struct task_struct.
+ * When system-wide Intel LBR call stack mode is enabled, a buffer with
+ * constant size will be allocated for each task.
+ * Also, system memory consumption can further grow when the size of
+ * struct perf_ctx_data enlarges.
+ */
+struct perf_ctx_data {
+ struct rcu_head rcu_head;
+ refcount_t refcount;
+ int global;
+ struct kmem_cache *ctx_cache;
+ void *data;
+};
+
struct perf_cpu_pmu_context {
struct perf_event_pmu_context epc;
struct perf_event_pmu_context *task_epc;
@@ -1029,6 +1054,7 @@ struct perf_cpu_pmu_context {
int active_oncpu;
int exclusive;
+ int pmu_disable_count;
raw_spinlock_t hrtimer_lock;
struct hrtimer hrtimer;
@@ -1062,7 +1088,13 @@ struct perf_output_handle {
struct perf_buffer *rb;
unsigned long wakeup;
unsigned long size;
- u64 aux_flags;
+ union {
+ u64 flags; /* perf_output*() */
+ u64 aux_flags; /* perf_aux_output*() */
+ struct {
+ u64 skip_read : 1;
+ };
+ };
union {
void *addr;
unsigned long head;
@@ -1339,6 +1371,9 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
if (branch_sample_hw_index(event))
size += sizeof(u64);
+
+ brs->nr = min_t(u16, event->attr.sample_max_stack, brs->nr);
+
size += brs->nr * sizeof(struct perf_branch_entry);
/*
@@ -1646,19 +1681,10 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64
}
extern int sysctl_perf_event_paranoid;
-extern int sysctl_perf_event_mlock;
extern int sysctl_perf_event_sample_rate;
-extern int sysctl_perf_cpu_time_max_percent;
extern void perf_sample_event_took(u64 sample_len_ns);
-int perf_event_max_sample_rate_handler(const struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-int perf_cpu_time_max_percent_handler(const struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-int perf_event_max_stack_handler(const struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos);
-
/* Access to perf_event_open(2) syscall. */
#define PERF_SECURITY_OPEN 0
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4659898c0299..6e5c38718ff5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -65,6 +65,7 @@ struct mempolicy;
struct nameidata;
struct nsproxy;
struct perf_event_context;
+struct perf_ctx_data;
struct pid_namespace;
struct pipe_inode_info;
struct rcu_node;
@@ -1316,6 +1317,7 @@ struct task_struct {
struct perf_event_context *perf_event_ctxp;
struct mutex perf_event_mutex;
struct list_head perf_event_list;
+ struct perf_ctx_data __rcu *perf_ctx_data;
#endif
#ifdef CONFIG_DEBUG_PREEMPT
unsigned long preempt_disable_ip;
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index b1df7d792fa1..2e46b69ff0a6 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -39,6 +39,8 @@ struct page;
#define MAX_URETPROBE_DEPTH 64
+#define UPROBE_NO_TRAMPOLINE_VADDR (~0UL)
+
struct uprobe_consumer {
/*
* handler() can return UPROBE_HANDLER_REMOVE to signal the need to
@@ -143,6 +145,7 @@ struct uprobe_task {
struct uprobe *active_uprobe;
unsigned long xol_vaddr;
+ bool signal_denied;
struct arch_uprobe *auprobe;
};