summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2024-12-20 06:32:19 +0000
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-02-21 13:48:54 +0100
commita18682ccd2ec97e55f5a2c210b3bd2b4b7940a15 (patch)
tree3b2546ddec03c81c4c70164ac7756cec27a2dc17 /kernel
parent371e1a0e3839b5e065f1f5f6d4aa11c4f5f33f17 (diff)
downloadlinux-a18682ccd2ec97e55f5a2c210b3bd2b4b7940a15.tar.gz
linux-a18682ccd2ec97e55f5a2c210b3bd2b4b7940a15.tar.bz2
linux-a18682ccd2ec97e55f5a2c210b3bd2b4b7940a15.zip
sched/fair: Fix value reported by hot tasks pulled in /proc/schedstat
[ Upstream commit a430d99e349026d53e2557b7b22bd2ebd61fe12a ] In /proc/schedstat, lb_hot_gained reports the number hot tasks pulled during load balance. This value is incremented in can_migrate_task() if the task is migratable and hot. After incrementing the value, load balancer can still decide not to migrate this task leading to wrong accounting. Fix this by incrementing stats when hot tasks are detached. This issue only exists in detach_tasks() where we can decide to not migrate hot task even if it is migratable. However, in detach_one_task(), we migrate it unconditionally. [Swapnil: Handled the case where nr_failed_migrations_hot was not accounted properly and wrote commit log] Fixes: d31980846f96 ("sched: Move up affinity check to mitigate useless redoing overhead") Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reported-by: "Gautham R. Shenoy" <gautham.shenoy@amd.com> Not-yet-signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20241220063224.17767-2-swapnil.sapkal@amd.com Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/fair.c17
1 files changed, 13 insertions, 4 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cf3bbddd4b7f..eedbe66e0527 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8317,6 +8317,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
int tsk_cache_hot;
lockdep_assert_rq_held(env->src_rq);
+ if (p->sched_task_hot)
+ p->sched_task_hot = 0;
/*
* We do not migrate tasks that are:
@@ -8389,10 +8391,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
if (tsk_cache_hot <= 0 ||
env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
- if (tsk_cache_hot == 1) {
- schedstat_inc(env->sd->lb_hot_gained[env->idle]);
- schedstat_inc(p->stats.nr_forced_migrations);
- }
+ if (tsk_cache_hot == 1)
+ p->sched_task_hot = 1;
return 1;
}
@@ -8407,6 +8407,12 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
{
lockdep_assert_rq_held(env->src_rq);
+ if (p->sched_task_hot) {
+ p->sched_task_hot = 0;
+ schedstat_inc(env->sd->lb_hot_gained[env->idle]);
+ schedstat_inc(p->stats.nr_forced_migrations);
+ }
+
deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
set_task_cpu(p, env->dst_cpu);
}
@@ -8567,6 +8573,9 @@ static int detach_tasks(struct lb_env *env)
continue;
next:
+ if (p->sched_task_hot)
+ schedstat_inc(p->stats.nr_failed_migrations_hot);
+
list_move(&p->se.group_node, tasks);
}