diff options
| author | Thaumy Cheng <thaumy.love@gmail.com> | 2025-12-09 12:16:00 +0800 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-12-18 14:03:40 +0100 |
| commit | da07fa8730596596f0156900337444806c52ef45 (patch) | |
| tree | 2bbe3456e4536052344bc8c214235d6e53f25045 | |
| parent | 35287d31e0834c7be21146ba4560d15da8a450f3 (diff) | |
| download | linux-da07fa8730596596f0156900337444806c52ef45.tar.gz linux-da07fa8730596596f0156900337444806c52ef45.tar.bz2 linux-da07fa8730596596f0156900337444806c52ef45.zip | |
perf/core: Fix missing read event generation on task exit
[ Upstream commit c418d8b4d7a43a86b82ee39cb52ece3034383530 ]
For events with inherit_stat enabled, a "read" event will be generated
to collect per task event counts on task exit.
The call chain is as follows:
do_exit
-> perf_event_exit_task
-> perf_event_exit_task_context
-> perf_event_exit_event
-> perf_remove_from_context
-> perf_child_detach
-> sync_child_event
-> perf_event_read_event
However, the child event context detaches the task too early in
perf_event_exit_task_context, which causes sync_child_event to never
generate the read event in this case, since child_event->ctx->task is
always set to TASK_TOMBSTONE. Fix that by moving context lock section
backward to ensure ctx->task is not set to TASK_TOMBSTONE before
generating the read event.
Because perf_event_free_task calls perf_event_exit_task_context with
exit = false to tear down all child events from the context, and the
task never lived, accessing the task PID can lead to a use-after-free.
To fix that, let sync_child_event read task from argument and move the
call to the only place it should be triggered to avoid the effect of
setting ctx->task to TASK_TOMESTONE, and add a task parameter to
perf_event_exit_event to trigger the sync_child_event properly when
needed.
This bug can be reproduced by running "perf record -s" and attaching to
any program that generates perf events in its child tasks. If we check
the result with "perf report -T", the last line of the report will leave
an empty table like "# PID TID", which is expected to contain the
per-task event counts by design.
Fixes: ef54c1a476ae ("perf: Rework perf_event_exit_event()")
Signed-off-by: Thaumy Cheng <thaumy.love@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: linux-perf-users@vger.kernel.org
Link: https://patch.msgid.link/20251209041600.963586-1-thaumy.love@gmail.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
| -rw-r--r-- | kernel/events/core.c | 22 |
1 files changed, 12 insertions, 10 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 2c35acc2722b..413b88a4e00f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2316,8 +2316,6 @@ out: perf_event__header_size(leader); } -static void sync_child_event(struct perf_event *child_event); - static void perf_child_detach(struct perf_event *event) { struct perf_event *parent_event = event->parent; @@ -2336,7 +2334,6 @@ static void perf_child_detach(struct perf_event *event) lockdep_assert_held(&parent_event->child_mutex); */ - sync_child_event(event); list_del_init(&event->child_list); } @@ -4587,6 +4584,7 @@ out: static void perf_remove_from_owner(struct perf_event *event); static void perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx, + struct task_struct *task, bool revoke); /* @@ -4614,7 +4612,7 @@ static void perf_event_remove_on_exec(struct perf_event_context *ctx) modified = true; - perf_event_exit_event(event, ctx, false); + perf_event_exit_event(event, ctx, ctx->task, false); } raw_spin_lock_irqsave(&ctx->lock, flags); @@ -12447,7 +12445,7 @@ static void __pmu_detach_event(struct pmu *pmu, struct perf_event *event, /* * De-schedule the event and mark it REVOKED. */ - perf_event_exit_event(event, ctx, true); + perf_event_exit_event(event, ctx, ctx->task, true); /* * All _free_event() bits that rely on event->pmu: @@ -14004,14 +14002,13 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) } EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); -static void sync_child_event(struct perf_event *child_event) +static void sync_child_event(struct perf_event *child_event, + struct task_struct *task) { struct perf_event *parent_event = child_event->parent; u64 child_val; if (child_event->attr.inherit_stat) { - struct task_struct *task = child_event->ctx->task; - if (task && task != TASK_TOMBSTONE) perf_event_read_event(child_event, task); } @@ -14030,7 +14027,9 @@ static void sync_child_event(struct perf_event *child_event) static void perf_event_exit_event(struct perf_event *event, - struct perf_event_context *ctx, bool revoke) + struct perf_event_context *ctx, + struct task_struct *task, + bool revoke) { struct perf_event *parent_event = event->parent; unsigned long detach_flags = DETACH_EXIT; @@ -14053,6 +14052,9 @@ perf_event_exit_event(struct perf_event *event, mutex_lock(&parent_event->child_mutex); /* PERF_ATTACH_ITRACE might be set concurrently */ attach_state = READ_ONCE(event->attach_state); + + if (attach_state & PERF_ATTACH_CHILD) + sync_child_event(event, task); } if (revoke) @@ -14144,7 +14146,7 @@ static void perf_event_exit_task_context(struct task_struct *task, bool exit) perf_event_task(task, ctx, 0); list_for_each_entry_safe(child_event, next, &ctx->event_list, event_entry) - perf_event_exit_event(child_event, ctx, false); + perf_event_exit_event(child_event, ctx, exit ? task : NULL, false); mutex_unlock(&ctx->mutex); |
