From 214c1b7f13954559cf09d5d04b934bf32ba4d618 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 11:58:52 +0100 Subject: sched/balancing: Switch the 'DEFINE_SPINLOCK(balancing)' spinlock into an 'atomic_t sched_balance_running' flag The 'balancing' spinlock added in: 08c183f31bdb ("[PATCH] sched: add option to serialize load balancing") ... is taken when the SD_SERIALIZE flag is set in a domain, but in reality it is a glorified global atomic flag serializing the load-balancing of those domains. It doesn't have any explicit locking semantics per se: we just spin_trylock() it. Turn it into a ... global atomic flag. This makes it more clear what is going on here, and reduces overhead and code size a bit: # kernel/sched/fair.o: [x86-64 defconfig] text data bss dec hex filename 60730 2721 104 63555 f843 fair.o.before 60718 2721 104 63543 f837 fair.o.after Also document the flag a bit. No change in functionality intended. Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Cc: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308105901.1096078-2-mingo@kernel.org --- kernel/sched/fair.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6a16129f9a5c..2ef89b36aed1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11633,7 +11633,20 @@ out_unlock: return 0; } -static DEFINE_SPINLOCK(balancing); +/* + * This flag serializes load-balancing passes over large domains + * (above the NODE topology level) - only one load-balancing instance + * may run at a time, to reduce overhead on very large systems with + * lots of CPUs and large NUMA distances. + * + * - Note that load-balancing passes triggered while another one + * is executing are skipped and not re-tried. + * + * - Also note that this does not serialize rebalance_domains() + * execution, as non-SD_SERIALIZE domains will still be + * load-balanced in parallel. + */ +static atomic_t sched_balance_running = ATOMIC_INIT(0); /* * Scale the max load_balance interval with the number of CPUs in the system. @@ -11711,7 +11724,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) need_serialize = sd->flags & SD_SERIALIZE; if (need_serialize) { - if (!spin_trylock(&balancing)) + if (atomic_cmpxchg_acquire(&sched_balance_running, 0, 1)) goto out; } @@ -11729,7 +11742,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) interval = get_sd_balance_interval(sd, busy); } if (need_serialize) - spin_unlock(&balancing); + atomic_set_release(&sched_balance_running, 0); out: if (time_after(next_balance, sd->last_balance + interval)) { next_balance = sd->last_balance + interval; -- cgit v1.2.3 From 02a61f325a8e62a7c76479c5f2f7ddcba16877e8 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Fri, 8 Mar 2024 11:58:53 +0100 Subject: sched/balancing: Remove reliance on 'enum cpu_idle_type' ordering when iterating [CPU_MAX_IDLE_TYPES] arrays in show_schedstat() show_schedstat() output breaks and doesn't print all entries if the ordering of the definitions in 'enum cpu_idle_type' is changed, because show_schedstat() assumes that 'CPU_IDLE' is 0. Fix it before we change the definition order & values. [ mingo: Added changelog. ] Signed-off-by: Shrikanth Hegde Signed-off-by: Ingo Molnar Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20240308105901.1096078-3-mingo@kernel.org --- kernel/sched/stats.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 857f837f52cb..85277953cc72 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -150,8 +150,7 @@ static int show_schedstat(struct seq_file *seq, void *v) seq_printf(seq, "domain%d %*pb", dcount++, cpumask_pr_args(sched_domain_span(sd))); - for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; - itype++) { + for (itype = 0; itype < CPU_MAX_IDLE_TYPES; itype++) { seq_printf(seq, " %u %u %u %u %u %u %u %u", sd->lb_count[itype], sd->lb_balanced[itype], -- cgit v1.2.3 From 38d707c54df4ca58cd9ceae2ddcbd6f606b99e9f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 11:58:54 +0100 Subject: sched/balancing: Change 'enum cpu_idle_type' to have more natural definitions The cpu_idle_type enum has the confusingly inverted property that 'not idle' is 1, and 'idle' is '0'. This resulted in a number of unnecessary complications in the code. Reverse the order, remove the CPU_NOT_IDLE type, and convert all code to a natural boolean form. It's much more readable: - enum cpu_idle_type idle = this_rq->idle_balance ? - CPU_IDLE : CPU_NOT_IDLE; - + enum cpu_idle_type idle = this_rq->idle_balance; -------------------------------- - if (env->idle == CPU_NOT_IDLE || !busiest->sum_nr_running) + if (!env->idle || !busiest->sum_nr_running) -------------------------------- And gets rid of the double negation in these usages: - if (env->idle != CPU_NOT_IDLE && env->src_rq->nr_running <= 1) + if (env->idle && env->src_rq->nr_running <= 1) Furthermore, this makes code much more obvious where there's differentiation between CPU_IDLE and CPU_NEWLY_IDLE. Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Reviewed-by: Vincent Guittot Cc: "Gautham R. Shenoy" Link: https://lore.kernel.org/r/20240308105901.1096078-4-mingo@kernel.org --- kernel/sched/fair.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2ef89b36aed1..3a510cf1fb00 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9070,7 +9070,7 @@ static int detach_tasks(struct lb_env *env) * We don't want to steal all, otherwise we may be treated likewise, * which could at worst lead to a livelock crash. */ - if (env->idle != CPU_NOT_IDLE && env->src_rq->nr_running <= 1) + if (env->idle && env->src_rq->nr_running <= 1) break; env->loop++; @@ -9803,7 +9803,7 @@ static inline bool smt_vs_nonsmt_groups(struct sched_group *sg1, static inline bool smt_balance(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group *group) { - if (env->idle == CPU_NOT_IDLE) + if (!env->idle) return false; /* @@ -9827,7 +9827,7 @@ static inline long sibling_imbalance(struct lb_env *env, int ncores_busiest, ncores_local; long imbalance; - if (env->idle == CPU_NOT_IDLE || !busiest->sum_nr_running) + if (!env->idle || !busiest->sum_nr_running) return 0; ncores_busiest = sds->busiest->cores; @@ -9927,8 +9927,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->group_misfit_task_load = rq->misfit_task_load; *sg_status |= SG_OVERLOAD; } - } else if ((env->idle != CPU_NOT_IDLE) && - sched_reduced_capacity(rq, env->sd)) { + } else if (env->idle && sched_reduced_capacity(rq, env->sd)) { /* Check for a task running on a CPU with reduced capacity */ if (sgs->group_misfit_task_load < load) sgs->group_misfit_task_load = load; @@ -9940,7 +9939,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->group_weight = group->group_weight; /* Check if dst CPU is idle and preferred to this group */ - if (!local_group && env->idle != CPU_NOT_IDLE && sgs->sum_h_nr_running && + if (!local_group && env->idle && sgs->sum_h_nr_running && sched_group_asym(env, sgs, group)) sgs->group_asym_packing = 1; @@ -10698,7 +10697,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * waiting task in this overloaded busiest group. Let's * try to pull it. */ - if (env->idle != CPU_NOT_IDLE && env->imbalance == 0) { + if (env->idle && env->imbalance == 0) { env->migration_type = migrate_task; env->imbalance = 1; } @@ -10913,7 +10912,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) goto force_balance; if (busiest->group_type != group_overloaded) { - if (env->idle == CPU_NOT_IDLE) { + if (!env->idle) { /* * If the busiest group is not overloaded (and as a * result the local one too) but this CPU is already @@ -11121,7 +11120,7 @@ asym_active_balance(struct lb_env *env) * the lower priority @env::dst_cpu help it. Do not follow * CPU priority. */ - return env->idle != CPU_NOT_IDLE && sched_use_asym_prio(env->sd, env->dst_cpu) && + return env->idle && sched_use_asym_prio(env->sd, env->dst_cpu) && (sched_asym_prefer(env->dst_cpu, env->src_cpu) || !sched_use_asym_prio(env->sd, env->src_cpu)); } @@ -11159,7 +11158,7 @@ static int need_active_balance(struct lb_env *env) * because of other sched_class or IRQs if more capacity stays * available on dst_cpu. */ - if ((env->idle != CPU_NOT_IDLE) && + if (env->idle && (env->src_rq->cfs.h_nr_running == 1)) { if ((check_cpu_capacity(env->src_rq, sd)) && (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100)) @@ -11735,8 +11734,8 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) * env->dst_cpu, so we can't know our idle * state even if we migrated tasks. Update it. */ - idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE; - busy = idle != CPU_IDLE && !sched_idle_cpu(cpu); + idle = idle_cpu(cpu); + busy = !idle && !sched_idle_cpu(cpu); } sd->last_balance = jiffies; interval = get_sd_balance_interval(sd, busy); @@ -12416,9 +12415,7 @@ out: static __latent_entropy void run_rebalance_domains(struct softirq_action *h) { struct rq *this_rq = this_rq(); - enum cpu_idle_type idle = this_rq->idle_balance ? - CPU_IDLE : CPU_NOT_IDLE; - + enum cpu_idle_type idle = this_rq->idle_balance; /* * If this CPU has a pending nohz_balance_kick, then do the * balancing on behalf of the other idle CPUs whose ticks are -- cgit v1.2.3 From 11b0bfa5d463b17cac5bf6b94fea4921713530c3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 11:58:55 +0100 Subject: sched/debug: Increase SCHEDSTAT_VERSION to 16 We changed the order of definitions within 'enum cpu_idle_type', which changed the order of [CPU_MAX_IDLE_TYPES] columns in show_schedstat(). Suggested-by: Shrikanth Hegde Signed-off-by: Ingo Molnar Cc: "Gautham R. Shenoy" Link: https://lore.kernel.org/r/20240308105901.1096078-5-mingo@kernel.org --- kernel/sched/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 85277953cc72..78e48f5426ee 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -113,7 +113,7 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p, * Bump this up when changing the output format or the meaning of an existing * format, so that tools can adapt (or abort) */ -#define SCHEDSTAT_VERSION 15 +#define SCHEDSTAT_VERSION 16 static int show_schedstat(struct seq_file *seq, void *v) { -- cgit v1.2.3 From be8858dba9a2c3aec454a6b382671101fd0dc3b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 11:58:57 +0100 Subject: sched/balancing: Change comment formatting to not overlap Git conflict marker lines So the scheduler has two such comment blocks, with '=' used as a double underline: /* * VRUNTIME * ======== * '========' also happens to be a Git conflict marker, throwing off a simple search in an editor for this pattern. Change them to '-------' type of underline instead - it looks just as good. Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20240308105901.1096078-7-mingo@kernel.org --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3a510cf1fb00..84d4791cf628 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3679,7 +3679,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se, /* * VRUNTIME - * ======== + * -------- * * COROLLARY #1: The virtual runtime of the entity needs to be * adjusted if re-weight at !0-lag point. @@ -3762,7 +3762,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se, /* * DEADLINE - * ======== + * -------- * * When the weight changes, the virtual time slope changes and * we should adjust the relative virtual deadline accordingly. -- cgit v1.2.3 From 3a5fe9305719c680ccf63216781a4d4068c8e3f3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 11:58:58 +0100 Subject: sched/balancing: Fix comments (trying to) refer to NOHZ_BALANCE_KICK Fix two typos: - There's no such thing as 'nohz_balancing_kick', the flag is named 'BALANCE' and is capitalized: NOHZ_BALANCE_KICK. - Likewise there's no such thing as a 'pending nohz_balance_kick' either, the NOHZ_BALANCE_KICK flag is all upper-case. Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20240308105901.1096078-8-mingo@kernel.org --- kernel/sched/fair.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 84d4791cf628..f3c03c6db3c8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12409,15 +12409,16 @@ out: } /* - * run_rebalance_domains is triggered when needed from the scheduler tick. - * Also triggered for nohz idle balancing (with nohz_balancing_kick set). + * This softirq may be triggered from the scheduler tick, or by + * any of the flags in NOHZ_KICK_MASK: NOHZ_BALANCE_KICK, + * NOHZ_STATS_KICK or NOHZ_NEXT_KICK. */ static __latent_entropy void run_rebalance_domains(struct softirq_action *h) { struct rq *this_rq = this_rq(); enum cpu_idle_type idle = this_rq->idle_balance; /* - * If this CPU has a pending nohz_balance_kick, then do the + * If this CPU has a pending NOHZ_BALANCE_KICK, then do the * balancing on behalf of the other idle CPUs whose ticks are * stopped. Do nohz_idle_balance *before* rebalance_domains to * give the idle CPUs a chance to load balance. Else we may -- cgit v1.2.3 From 3dc6f6c8efe2d9148865664fffbe9dd89fb0d157 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 11:58:59 +0100 Subject: sched/balancing: Update run_rebalance_domains() comments The first sentence of the comment explaining run_rebalance_domains() is historic and not true anymore: * run_rebalance_domains is triggered when needed from the scheduler tick. ... contradicted/modified by the second sentence: * Also triggered for NOHZ idle balancing (with NOHZ_BALANCE_KICK set). Avoid that kind of confusion straight away and explain from what places sched_balance_softirq() is triggered. Signed-off-by: Ingo Molnar Reviewed-by: Vincent Guittot Acked-by: Valentin Schneider Link: https://lore.kernel.org/r/20240308105901.1096078-9-mingo@kernel.org --- kernel/sched/fair.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f3c03c6db3c8..b567c0790f44 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12409,9 +12409,12 @@ out: } /* - * This softirq may be triggered from the scheduler tick, or by - * any of the flags in NOHZ_KICK_MASK: NOHZ_BALANCE_KICK, - * NOHZ_STATS_KICK or NOHZ_NEXT_KICK. + * This softirq handler is triggered via SCHED_SOFTIRQ from two places: + * + * - directly from the local scheduler_tick() for periodic load balancing + * + * - indirectly from a remote scheduler_tick() for NOHZ idle balancing + * through the SMP cross-call nohz_csd_func() */ static __latent_entropy void run_rebalance_domains(struct softirq_action *h) { -- cgit v1.2.3 From e492e1b0e0721f3929ef9d9708d029144b396dd7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 11:59:00 +0100 Subject: sched/balancing: Vertically align the comments of 'struct sg_lb_stats' and 'struct sd_lb_stats' Make them easier to read. Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20240308105901.1096078-10-mingo@kernel.org --- kernel/sched/fair.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b567c0790f44..40b98e43d794 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9436,19 +9436,19 @@ static void update_blocked_averages(int cpu) * sg_lb_stats - stats of a sched_group required for load_balancing */ struct sg_lb_stats { - unsigned long avg_load; /*Avg load across the CPUs of the group */ - unsigned long group_load; /* Total load over the CPUs of the group */ + unsigned long avg_load; /* Avg load across the CPUs of the group */ + unsigned long group_load; /* Total load over the CPUs of the group */ unsigned long group_capacity; - unsigned long group_util; /* Total utilization over the CPUs of the group */ - unsigned long group_runnable; /* Total runnable time over the CPUs of the group */ - unsigned int sum_nr_running; /* Nr of tasks running in the group */ - unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */ + unsigned long group_util; /* Total utilization over the CPUs of the group */ + unsigned long group_runnable; /* Total runnable time over the CPUs of the group */ + unsigned int sum_nr_running; /* Nr of tasks running in the group */ + unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */ unsigned int idle_cpus; unsigned int group_weight; enum group_type group_type; - unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ - unsigned int group_smt_balance; /* Task on busy SMT be moved */ - unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ + unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ + unsigned int group_smt_balance; /* Task on busy SMT be moved */ + unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; unsigned int nr_preferred_running; @@ -9460,15 +9460,15 @@ struct sg_lb_stats { * during load balancing. */ struct sd_lb_stats { - struct sched_group *busiest; /* Busiest group in this sd */ - struct sched_group *local; /* Local group in this sd */ - unsigned long total_load; /* Total load of all groups in sd */ - unsigned long total_capacity; /* Total capacity of all groups in sd */ - unsigned long avg_load; /* Average load across all groups in sd */ - unsigned int prefer_sibling; /* tasks should go to sibling first */ - - struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ - struct sg_lb_stats local_stat; /* Statistics of the local group */ + struct sched_group *busiest; /* Busiest group in this sd */ + struct sched_group *local; /* Local group in this sd */ + unsigned long total_load; /* Total load of all groups in sd */ + unsigned long total_capacity; /* Total capacity of all groups in sd */ + unsigned long avg_load; /* Average load across all groups in sd */ + unsigned int prefer_sibling; /* tasks should go to sibling first */ + + struct sg_lb_stats busiest_stat; /* Statistics of the busiest group */ + struct sg_lb_stats local_stat; /* Statistics of the local group */ }; static inline void init_sd_lb_stats(struct sd_lb_stats *sds) -- cgit v1.2.3 From 33928ed8bde066316dc3cb6ccf7f74400073652f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 11:59:01 +0100 Subject: sched/balancing: Update comments in 'struct sg_lb_stats' and 'struct sd_lb_stats' - Align for readability - Capitalize consistently Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20240308105901.1096078-11-mingo@kernel.org --- kernel/sched/fair.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 40b98e43d794..116a640534b9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9433,17 +9433,17 @@ static void update_blocked_averages(int cpu) /********** Helpers for find_busiest_group ************************/ /* - * sg_lb_stats - stats of a sched_group required for load_balancing + * sg_lb_stats - stats of a sched_group required for load-balancing: */ struct sg_lb_stats { - unsigned long avg_load; /* Avg load across the CPUs of the group */ - unsigned long group_load; /* Total load over the CPUs of the group */ - unsigned long group_capacity; - unsigned long group_util; /* Total utilization over the CPUs of the group */ + unsigned long avg_load; /* Avg load over the CPUs of the group */ + unsigned long group_load; /* Total load over the CPUs of the group */ + unsigned long group_capacity; /* Capacity over the CPUs of the group */ + unsigned long group_util; /* Total utilization over the CPUs of the group */ unsigned long group_runnable; /* Total runnable time over the CPUs of the group */ - unsigned int sum_nr_running; /* Nr of tasks running in the group */ + unsigned int sum_nr_running; /* Nr of all tasks running in the group */ unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */ - unsigned int idle_cpus; + unsigned int idle_cpus; /* Nr of idle CPUs in the group */ unsigned int group_weight; enum group_type group_type; unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ @@ -9456,8 +9456,7 @@ struct sg_lb_stats { }; /* - * sd_lb_stats - Structure to store the statistics of a sched_domain - * during load balancing. + * sd_lb_stats - stats of a sched_domain required for load-balancing: */ struct sd_lb_stats { struct sched_group *busiest; /* Busiest group in this sd */ @@ -9465,7 +9464,7 @@ struct sd_lb_stats { unsigned long total_load; /* Total load of all groups in sd */ unsigned long total_capacity; /* Total capacity of all groups in sd */ unsigned long avg_load; /* Average load across all groups in sd */ - unsigned int prefer_sibling; /* tasks should go to sibling first */ + unsigned int prefer_sibling; /* Tasks should go to sibling first */ struct sg_lb_stats busiest_stat; /* Statistics of the busiest group */ struct sg_lb_stats local_stat; /* Statistics of the local group */ -- cgit v1.2.3 From 70a27d6d1b19392a23bb4a41de7788fbc539f18d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:07 +0100 Subject: sched/balancing: Rename run_rebalance_domains() => sched_balance_softirq() run_rebalance_domains() is a misnomer, as it doesn't only run rebalance_domains(), but since the introduction of the NOHZ code it also runs nohz_idle_balance(). Rename it to sched_balance_softirq(), reflecting its more generic purpose and that it's a softirq handler. Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-2-mingo@kernel.org --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 116a640534b9..953f39deb68e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12415,7 +12415,7 @@ out: * - indirectly from a remote scheduler_tick() for NOHZ idle balancing * through the SMP cross-call nohz_csd_func() */ -static __latent_entropy void run_rebalance_domains(struct softirq_action *h) +static __latent_entropy void sched_balance_softirq(struct softirq_action *h) { struct rq *this_rq = this_rq(); enum cpu_idle_type idle = this_rq->idle_balance; @@ -13216,7 +13216,7 @@ __init void init_sched_fair_class(void) #endif } - open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); + open_softirq(SCHED_SOFTIRQ, sched_balance_softirq); #ifdef CONFIG_NO_HZ_COMMON nohz.next_balance = jiffies; -- cgit v1.2.3 From 86dd6c04ef9f213e14d60c9f64bce1cc019f816e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:08 +0100 Subject: sched/balancing: Rename scheduler_tick() => sched_tick() - Standardize on prefixing scheduler-internal functions defined in with sched_*() prefix. scheduler_tick() was the only function using the scheduler_ prefix. Harmonize it. - The other reason to rename it is the NOHZ scheduler tick handling functions are already named sched_tick_*(). Make the 'git grep sched_tick' more meaningful. Signed-off-by: Ingo Molnar Acked-by: Valentin Schneider Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-3-mingo@kernel.org --- kernel/sched/core.c | 4 ++-- kernel/sched/loadavg.c | 2 +- kernel/time/timer.c | 2 +- kernel/workqueue.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d44efa0d0611..71b7a08a6502 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5662,7 +5662,7 @@ static inline u64 cpu_resched_latency(struct rq *rq) { return 0; } * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. */ -void scheduler_tick(void) +void sched_tick(void) { int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); @@ -6585,7 +6585,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * paths. For example, see arch/x86/entry_64.S. * * To drive preemption between tasks, the scheduler sets the flag in timer - * interrupt handler scheduler_tick(). + * interrupt handler sched_tick(). * * 3. Wakeups don't really cause entry into schedule(). They add a * task to the run-queue and that's it. diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 52c8f8226b0d..ca9da66cc894 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -379,7 +379,7 @@ void calc_global_load(void) } /* - * Called from scheduler_tick() to periodically update this CPU's + * Called from sched_tick() to periodically update this CPU's * active count. */ void calc_global_load_tick(struct rq *this_rq) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index e69e75d3858c..ff49ddcc9800 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2478,7 +2478,7 @@ void update_process_times(int user_tick) if (in_irq()) irq_work_tick(); #endif - scheduler_tick(); + sched_tick(); if (IS_ENABLED(CONFIG_POSIX_TIMERS)) run_posix_cpu_timers(); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index bf2bdac46843..8fbb0ec39079 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1464,7 +1464,7 @@ void wq_worker_sleeping(struct task_struct *task) * wq_worker_tick - a scheduler tick occurred while a kworker is running * @task: task currently running * - * Called from scheduler_tick(). We're in the IRQ context and the current + * Called from sched_tick(). We're in the IRQ context and the current * worker's fields which follow the 'K' locking rule can be accessed safely. */ void wq_worker_tick(struct task_struct *task) -- cgit v1.2.3 From 983be0628c061989b6cc175d2f5e429b40699fbb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:09 +0100 Subject: sched/balancing: Rename trigger_load_balance() => sched_balance_trigger() Standardize scheduler load-balancing function names on the sched_balance_() prefix. Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-4-mingo@kernel.org --- kernel/sched/core.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/sched.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 71b7a08a6502..929fce69f555 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5700,7 +5700,7 @@ void sched_tick(void) #ifdef CONFIG_SMP rq->idle_balance = idle_cpu(cpu); - trigger_load_balance(rq); + sched_balance_trigger(rq); #endif } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 953f39deb68e..e377b675920a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12438,7 +12438,7 @@ static __latent_entropy void sched_balance_softirq(struct softirq_action *h) /* * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing. */ -void trigger_load_balance(struct rq *rq) +void sched_balance_trigger(struct rq *rq) { /* * Don't need to rebalance while attached to NULL domain or diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d2242679239e..5b0ddb0e6017 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2397,7 +2397,7 @@ extern struct task_struct *pick_next_task_idle(struct rq *rq); extern void update_group_capacity(struct sched_domain *sd, int cpu); -extern void trigger_load_balance(struct rq *rq); +extern void sched_balance_trigger(struct rq *rq); extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx); -- cgit v1.2.3 From 14ff4dbd34f46cc6b6105f549983321241ccbba9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:10 +0100 Subject: sched/balancing: Rename rebalance_domains() => sched_balance_domains() Standardize scheduler load-balancing function names on the sched_balance_() prefix. Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-5-mingo@kernel.org --- kernel/sched/fair.c | 8 ++++---- kernel/sched/sched.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e377b675920a..330788b0c617 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11685,7 +11685,7 @@ static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost) * * Balancing parameters are set up in init_sched_domains. */ -static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) +static void sched_balance_domains(struct rq *rq, enum cpu_idle_type idle) { int continue_balancing = 1; int cpu = rq->cpu; @@ -12161,7 +12161,7 @@ static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags) rq_unlock_irqrestore(rq, &rf); if (flags & NOHZ_BALANCE_KICK) - rebalance_domains(rq, CPU_IDLE); + sched_balance_domains(rq, CPU_IDLE); } if (time_after(next_balance, rq->next_balance)) { @@ -12422,7 +12422,7 @@ static __latent_entropy void sched_balance_softirq(struct softirq_action *h) /* * If this CPU has a pending NOHZ_BALANCE_KICK, then do the * balancing on behalf of the other idle CPUs whose ticks are - * stopped. Do nohz_idle_balance *before* rebalance_domains to + * stopped. Do nohz_idle_balance *before* sched_balance_domains to * give the idle CPUs a chance to load balance. Else we may * load balance only within the local sched_domain hierarchy * and abort nohz_idle_balance altogether if we pull some load. @@ -12432,7 +12432,7 @@ static __latent_entropy void sched_balance_softirq(struct softirq_action *h) /* normal load balance */ update_blocked_averages(this_rq->cpu); - rebalance_domains(this_rq, idle); + sched_balance_domains(this_rq, idle); } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 5b0ddb0e6017..41024c1c49b4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2904,7 +2904,7 @@ extern void cfs_bandwidth_usage_dec(void); #define NOHZ_NEWILB_KICK_BIT 2 #define NOHZ_NEXT_KICK_BIT 3 -/* Run rebalance_domains() */ +/* Run sched_balance_domains() */ #define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) /* Update blocked load */ #define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) -- cgit v1.2.3 From 4c3e509ea9f249458e8692f8298cceac73105948 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:11 +0100 Subject: sched/balancing: Rename load_balance() => sched_balance_rq() Standardize scheduler load-balancing function names on the sched_balance_() prefix. Also load_balance() has become somewhat of a misnomer: historically it was the first and primary load-balancing function that was called, but with the introduction of sched domains, it's become a lower layer function that balances runqueues. Rename it to sched_balance_rq() accordingly. Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-6-mingo@kernel.org --- kernel/sched/fair.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 330788b0c617..0d2753c50be9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6866,7 +6866,7 @@ dequeue_throttle: #ifdef CONFIG_SMP -/* Working cpumask for: load_balance, load_balance_newidle. */ +/* Working cpumask for: sched_balance_rq, load_balance_newidle. */ static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask); static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask); static DEFINE_PER_CPU(cpumask_var_t, should_we_balance_tmpmask); @@ -11242,7 +11242,7 @@ static int should_we_balance(struct lb_env *env) * Check this_cpu to ensure it is balanced within domain. Attempt to move * tasks if there is an imbalance. */ -static int load_balance(int this_cpu, struct rq *this_rq, +static int sched_balance_rq(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, int *continue_balancing) { @@ -11647,7 +11647,7 @@ out_unlock: static atomic_t sched_balance_running = ATOMIC_INIT(0); /* - * Scale the max load_balance interval with the number of CPUs in the system. + * Scale the max sched_balance_rq interval with the number of CPUs in the system. * This trades load-balance latency on larger machines for less cross talk. */ void update_max_interval(void) @@ -11727,7 +11727,7 @@ static void sched_balance_domains(struct rq *rq, enum cpu_idle_type idle) } if (time_after_eq(jiffies, sd->last_balance + interval)) { - if (load_balance(cpu, rq, sd, idle, &continue_balancing)) { + if (sched_balance_rq(cpu, rq, sd, idle, &continue_balancing)) { /* * The LBF_DST_PINNED logic could have changed * env->dst_cpu, so we can't know our idle @@ -12353,7 +12353,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) if (sd->flags & SD_BALANCE_NEWIDLE) { - pulled_task = load_balance(this_cpu, this_rq, + pulled_task = sched_balance_rq(this_cpu, this_rq, sd, CPU_NEWLY_IDLE, &continue_balancing); -- cgit v1.2.3 From f1cd2e2e79d283e315356bd403c7f928e994f057 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Oct 2023 13:04:12 +0200 Subject: sched/balancing: Rename find_busiest_queue() => sched_balance_find_src_rq() The find_busiest_queue() naming has two small quirks: - Scheduler functions that deal with runqueues usually have a rq_ prefix or _rq postfix, but this function has neither. - Plus the 'busiest' qualifier to this function was historically correct, but has become somewhat of a misnomer: in quite a few cases we will not pick the busiest runqueue - but the best (possible) runqueue we can balance tasks from. So name it a bit more neutrally, similar to the 'src/dst' nomenclature we are already using when moving tasks between runqueues. To fix both quirks, and to standardize scheduler load-balancing function names on the sched_balance_() prefix, rename the function to sched_balance_find_src_rq(). Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-7-mingo@kernel.org --- kernel/sched/fair.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0d2753c50be9..1cd9a18b35e0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10959,9 +10959,9 @@ out_balanced: } /* - * find_busiest_queue - find the busiest runqueue among the CPUs in the group. + * sched_balance_find_src_rq - find the busiest runqueue among the CPUs in the group. */ -static struct rq *find_busiest_queue(struct lb_env *env, +static struct rq *sched_balance_find_src_rq(struct lb_env *env, struct sched_group *group) { struct rq *busiest = NULL, *rq; @@ -11280,7 +11280,7 @@ redo: goto out_balanced; } - busiest = find_busiest_queue(&env, group); + busiest = sched_balance_find_src_rq(&env, group); if (!busiest) { schedstat_inc(sd->lb_nobusyq[idle]); goto out_balanced; -- cgit v1.2.3 From 82cf921432fc184adbbb9c1bced182564876ec5e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:14 +0100 Subject: sched/balancing: Rename find_busiest_group() => sched_balance_find_src_group() Make two naming changes: 1) Standardize scheduler load-balancing function names on the sched_balance_() prefix. 2) Similar to find_busiest_queue(), the find_busiest_group() naming has become a bit of a misnomer: the 'busiest' qualifier to this function was historically correct but in the current code in quite a few cases we will not pick the 'busiest' group - but the best (possible) group we can balance from based on a complex set of constraints. So name it a bit more neutrally, similar to the 'src/dst' nomenclature we are already using when moving tasks between runqueues, and also use the sched_balance_ prefix: sched_balance_find_src_group(). Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-9-mingo@kernel.org --- kernel/sched/fair.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1cd9a18b35e0..96a81b2fa281 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9430,7 +9430,7 @@ static void update_blocked_averages(int cpu) rq_unlock_irqrestore(rq, &rf); } -/********** Helpers for find_busiest_group ************************/ +/********** Helpers for sched_balance_find_src_group ************************/ /* * sg_lb_stats - stats of a sched_group required for load-balancing: @@ -9637,7 +9637,7 @@ static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd) * * When this is so detected; this group becomes a candidate for busiest; see * update_sd_pick_busiest(). And calculate_imbalance() and - * find_busiest_group() avoid some of the usual balance conditions to allow it + * sched_balance_find_src_group() avoid some of the usual balance conditions to allow it * to create an effective group imbalance. * * This is a somewhat tricky proposition since the next run might not find the @@ -10788,7 +10788,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s ) / SCHED_CAPACITY_SCALE; } -/******* find_busiest_group() helpers end here *********************/ +/******* sched_balance_find_src_group() helpers end here *********************/ /* * Decision matrix according to the local and busiest group type: @@ -10811,7 +10811,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s */ /** - * find_busiest_group - Returns the busiest group within the sched_domain + * sched_balance_find_src_group - Returns the busiest group within the sched_domain * if there is an imbalance. * @env: The load balancing environment. * @@ -10820,7 +10820,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * * Return: - The busiest group if imbalance exists. */ -static struct sched_group *find_busiest_group(struct lb_env *env) +static struct sched_group *sched_balance_find_src_group(struct lb_env *env) { struct sg_lb_stats *local, *busiest; struct sd_lb_stats sds; @@ -11274,7 +11274,7 @@ redo: goto out_balanced; } - group = find_busiest_group(&env); + group = sched_balance_find_src_group(&env); if (!group) { schedstat_inc(sd->lb_nobusyg[idle]); goto out_balanced; @@ -11298,7 +11298,7 @@ redo: env.flags |= LBF_ALL_PINNED; if (busiest->nr_running > 1) { /* - * Attempt to move tasks. If find_busiest_group has found + * Attempt to move tasks. If sched_balance_find_src_group has found * an imbalance but busiest->nr_running <= 1, the group is * still unbalanced. ld_moved simply stays zero, so it is * correctly treated as an imbalance. -- cgit v1.2.3 From 391b7a5335c45b2bafe535cb440836ccd17515aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:15 +0100 Subject: sched/balancing: Rename update_blocked_averages() => sched_balance_update_blocked_averages() Standardize scheduler load-balancing function names on the sched_balance_() prefix. Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-10-mingo@kernel.org --- kernel/sched/fair.c | 8 ++++---- kernel/sched/pelt.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 96a81b2fa281..95f7092043f3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9411,7 +9411,7 @@ static unsigned long task_h_load(struct task_struct *p) } #endif -static void update_blocked_averages(int cpu) +static void sched_balance_update_blocked_averages(int cpu) { bool decayed = false, done = true; struct rq *rq = cpu_rq(cpu); @@ -12079,7 +12079,7 @@ static bool update_nohz_stats(struct rq *rq) if (!time_after(jiffies, READ_ONCE(rq->last_blocked_load_update_tick))) return true; - update_blocked_averages(cpu); + sched_balance_update_blocked_averages(cpu); return rq->has_blocked_load; } @@ -12339,7 +12339,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) raw_spin_rq_unlock(this_rq); t0 = sched_clock_cpu(this_cpu); - update_blocked_averages(this_cpu); + sched_balance_update_blocked_averages(this_cpu); rcu_read_lock(); for_each_domain(this_cpu, sd) { @@ -12431,7 +12431,7 @@ static __latent_entropy void sched_balance_softirq(struct softirq_action *h) return; /* normal load balance */ - update_blocked_averages(this_rq->cpu); + sched_balance_update_blocked_averages(this_rq->cpu); sched_balance_domains(this_rq, idle); } diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index 63b6cf898220..f80955ecdce6 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -209,7 +209,7 @@ ___update_load_sum(u64 now, struct sched_avg *sa, * This means that weight will be 0 but not running for a sched_entity * but also for a cfs_rq if the latter becomes idle. As an example, * this happens during idle_balance() which calls - * update_blocked_averages(). + * sched_balance_update_blocked_averages(). * * Also see the comment in accumulate_sum(). */ -- cgit v1.2.3 From 7d058285cd77cc1411c91efd1b1673530bb1bee8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:16 +0100 Subject: sched/balancing: Rename newidle_balance() => sched_balance_newidle() Standardize scheduler load-balancing function names on the sched_balance_() prefix. Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-11-mingo@kernel.org --- kernel/sched/fair.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 95f7092043f3..aa5ff0efcca8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4816,7 +4816,7 @@ static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq) return cfs_rq->avg.load_avg; } -static int newidle_balance(struct rq *this_rq, struct rq_flags *rf); +static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf); static inline unsigned long task_util(struct task_struct *p) { @@ -5136,7 +5136,7 @@ attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} static inline void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} -static inline int newidle_balance(struct rq *rq, struct rq_flags *rf) +static inline int sched_balance_newidle(struct rq *rq, struct rq_flags *rf) { return 0; } @@ -8253,7 +8253,7 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (rq->nr_running) return 1; - return newidle_balance(rq, rf) != 0; + return sched_balance_newidle(rq, rf) != 0; } #endif /* CONFIG_SMP */ @@ -8505,10 +8505,10 @@ idle: if (!rf) return NULL; - new_tasks = newidle_balance(rq, rf); + new_tasks = sched_balance_newidle(rq, rf); /* - * Because newidle_balance() releases (and re-acquires) rq->lock, it is + * Because sched_balance_newidle() releases (and re-acquires) rq->lock, it is * possible for any higher priority task to appear. In that case we * must re-start the pick_next_entity() loop. */ @@ -11493,7 +11493,7 @@ out_one_pinned: ld_moved = 0; /* - * newidle_balance() disregards balance intervals, so we could + * sched_balance_newidle() disregards balance intervals, so we could * repeatedly reach this code, which would lead to balance_interval * skyrocketing in a short amount of time. Skip the balance_interval * increase logic to avoid that. @@ -12277,7 +12277,7 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { } #endif /* CONFIG_NO_HZ_COMMON */ /* - * newidle_balance is called by schedule() if this_cpu is about to become + * sched_balance_newidle is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. * * Returns: @@ -12285,7 +12285,7 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { } * 0 - failed, no new tasks * > 0 - success, new (fair) tasks present */ -static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) +static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf) { unsigned long next_balance = jiffies + HZ; int this_cpu = this_rq->cpu; -- cgit v1.2.3 From 646ebaf51c64c6416ca89765c20041363fc1b518 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:17 +0100 Subject: sched/balancing: Rename find_idlest_group_cpu() => sched_balance_find_dst_group_cpu() Standardize scheduler load-balancing function names on the sched_balance_() prefix. Also use 'dst' instead of 'idlest': while historically correct, today it's not really true anymore that we return the 'idlest' group or CPU, we sort by idle-exit latency and only return the idlest CPUs from the lowest-latency set of CPUs. The true 'idlest' CPUs often remain idle for a long time and are never returned as long as the system is under-loaded. Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-12-mingo@kernel.org --- kernel/sched/fair.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index aa5ff0efcca8..02ff0272b2e4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7101,10 +7101,10 @@ static struct sched_group * find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu); /* - * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group. + * sched_balance_find_dst_group_cpu - find the idlest CPU among the CPUs in the group. */ static int -find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) +sched_balance_find_dst_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) { unsigned long load, min_load = ULONG_MAX; unsigned int min_exit_latency = UINT_MAX; @@ -7191,7 +7191,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p continue; } - new_cpu = find_idlest_group_cpu(group, p, cpu); + new_cpu = sched_balance_find_dst_group_cpu(group, p, cpu); if (new_cpu == cpu) { /* Now try balancing at a lower domain level of 'cpu': */ sd = sd->child; -- cgit v1.2.3 From a88b17080294f735c4124acccfa2d803a6a7d46f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:18 +0100 Subject: sched/balancing: Rename find_idlest_group() => sched_balance_find_dst_group() Standardize scheduler load-balancing function names on the sched_balance_() prefix. Also use 'dst' instead of 'idlest', because it's not really true that we return the 'idlest' group or CPU, we sort by idle-exit latency and only return the idlest CPUs from the lowest-latency set of CPUs. The true 'idlest' CPUs often remain idle for a long time and are never returned as long as the system is under-loaded. Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-13-mingo@kernel.org --- kernel/sched/fair.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 02ff0272b2e4..d0c3a091d7d1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7098,7 +7098,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, } static struct sched_group * -find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu); +sched_balance_find_dst_group(struct sched_domain *sd, struct task_struct *p, int this_cpu); /* * sched_balance_find_dst_group_cpu - find the idlest CPU among the CPUs in the group. @@ -7185,7 +7185,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p continue; } - group = find_idlest_group(sd, p, cpu); + group = sched_balance_find_dst_group(sd, p, cpu); if (!group) { sd = sd->child; continue; @@ -10296,13 +10296,13 @@ static bool update_pick_idlest(struct sched_group *idlest, } /* - * find_idlest_group() finds and returns the least busy CPU group within the + * sched_balance_find_dst_group() finds and returns the least busy CPU group within the * domain. * * Assumes p is allowed on at least one CPU in sd. */ static struct sched_group * -find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) +sched_balance_find_dst_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) { struct sched_group *idlest = NULL, *local = NULL, *group = sd->groups; struct sg_lb_stats local_sgs, tmp_sgs; -- cgit v1.2.3 From 686d148cbb5a1c2891914b8d11147d3c5556a29a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 8 Mar 2024 12:18:19 +0100 Subject: sched/balancing: Rename find_idlest_cpu() => sched_balance_find_dst_cpu() Standardize scheduler load-balancing function names on the sched_balance_() prefix. Also use 'dst' instead of 'idlest', because it's not really true that we return the 'idlest' group or CPU, we sort by idle-exit latency and only return the idlest CPUs from the lowest-latency set of CPUs. The true 'idlest' CPUs often remain idle for a long time and are never returned as long as the system is under-loaded. Signed-off-by: Ingo Molnar Reviewed-by: Shrikanth Hegde Link: https://lore.kernel.org/r/20240308111819.1101550-14-mingo@kernel.org --- kernel/sched/fair.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d0c3a091d7d1..4b3c4a181a91 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7160,7 +7160,7 @@ sched_balance_find_dst_group_cpu(struct sched_group *group, struct task_struct * return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; } -static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p, +static inline int sched_balance_find_dst_cpu(struct sched_domain *sd, struct task_struct *p, int cpu, int prev_cpu, int sd_flag) { int new_cpu = cpu; @@ -7936,7 +7936,7 @@ compute_energy(struct energy_env *eenv, struct perf_domain *pd, * NOTE: Forkees are not accepted in the energy-aware wake-up path because * they don't have any useful utilization data yet and it's not possible to * forecast their impact on energy consumption. Consequently, they will be - * placed by find_idlest_cpu() on the least loaded CPU, which might turn out + * placed by sched_balance_find_dst_cpu() on the least loaded CPU, which might turn out * to be energy-inefficient in some use-cases. The alternative would be to * bias new tasks towards specific types of CPUs first, or to try to infer * their util_avg from the parent task, but those heuristics could hurt @@ -8201,7 +8201,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) if (unlikely(sd)) { /* Slow path */ - new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); + new_cpu = sched_balance_find_dst_cpu(sd, p, cpu, prev_cpu, sd_flag); } else if (wake_flags & WF_TTWU) { /* XXX always ? */ /* Fast path */ new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); -- cgit v1.2.3 From d72cf62438d67b911212f8d4cf65d6167c1541ba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 12 Mar 2024 11:33:50 +0100 Subject: sched/balancing: Fix a couple of outdated function names in comments The 'idle_balance()' function hasn't existed for years, and there's no load_balance_newidle() either - both are sched_balance_newidle() today. Reported-by: Honglei Wang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/ZfAwNufbiyt/5biu@gmail.com --- kernel/sched/fair.c | 2 +- kernel/sched/pelt.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4b3c4a181a91..a19ea290b790 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6866,7 +6866,7 @@ dequeue_throttle: #ifdef CONFIG_SMP -/* Working cpumask for: sched_balance_rq, load_balance_newidle. */ +/* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */ static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask); static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask); static DEFINE_PER_CPU(cpumask_var_t, should_we_balance_tmpmask); diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index f80955ecdce6..3a96da25b67c 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -208,7 +208,7 @@ ___update_load_sum(u64 now, struct sched_avg *sa, * se has been already dequeued but cfs_rq->curr still points to it. * This means that weight will be 0 but not running for a sched_entity * but also for a cfs_rq if the latter becomes idle. As an example, - * this happens during idle_balance() which calls + * this happens during sched_balance_newidle() which calls * sched_balance_update_blocked_averages(). * * Also see the comment in accumulate_sum(). -- cgit v1.2.3 From b9e6e28663928cab836a19abbdec3d036a07db3b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Mar 2024 12:06:03 +0100 Subject: sched/fair: Fix typos in comments So I made all speling mistakes / typos red in my editor. Big mistake... Signed-off-by: Ingo Molnar Cc: linux-kernel@vger.kernel.org --- kernel/sched/fair.c | 68 ++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a19ea290b790..c8e50fbac345 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -388,8 +388,8 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) /* * With cfs_rq being unthrottled/throttled during an enqueue, - * it can happen the tmp_alone_branch points the a leaf that - * we finally want to del. In this case, tmp_alone_branch moves + * it can happen the tmp_alone_branch points to the leaf that + * we finally want to delete. In this case, tmp_alone_branch moves * to the prev element but it will point to rq->leaf_cfs_rq_list * at the end of the enqueue. */ @@ -406,7 +406,7 @@ static inline void assert_list_leaf_cfs_rq(struct rq *rq) SCHED_WARN_ON(rq->tmp_alone_branch != &rq->leaf_cfs_rq_list); } -/* Iterate thr' all leaf cfs_rq's on a runqueue */ +/* Iterate through all leaf cfs_rq's on a runqueue */ #define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \ list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list, \ leaf_cfs_rq_list) @@ -595,13 +595,13 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) * * [[ NOTE: this is only equal to the ideal scheduler under the condition * that join/leave operations happen at lag_i = 0, otherwise the - * virtual time has non-continguous motion equivalent to: + * virtual time has non-contiguous motion equivalent to: * * V +-= lag_i / W * * Also see the comment in place_entity() that deals with this. ]] * - * However, since v_i is u64, and the multiplcation could easily overflow + * However, since v_i is u64, and the multiplication could easily overflow * transform it into a relative form that uses smaller quantities: * * Substitute: v_i == (v_i - v0) + v0 @@ -671,7 +671,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) } if (load) { - /* sign flips effective floor / ceil */ + /* sign flips effective floor / ceiling */ if (avg < 0) avg -= (load - 1); avg = div_s64(avg, load); @@ -721,7 +721,7 @@ static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) * * lag_i >= 0 -> \Sum (v_i - v)*w_i >= (v_i - v)*(\Sum w_i) * - * Note: using 'avg_vruntime() > se->vruntime' is inacurate due + * Note: using 'avg_vruntime() > se->vruntime' is inaccurate due * to the loss in precision caused by the division. */ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime) @@ -1024,7 +1024,7 @@ void init_entity_runnable_average(struct sched_entity *se) if (entity_is_task(se)) sa->load_avg = scale_load_down(se->load.weight); - /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */ + /* when this task is enqueued, it will contribute to its cfs_rq's load_avg */ } /* @@ -1616,7 +1616,7 @@ static unsigned long score_nearby_nodes(struct task_struct *p, int nid, max_dist = READ_ONCE(sched_max_numa_distance); /* * This code is called for each node, introducing N^2 complexity, - * which should be ok given the number of nodes rarely exceeds 8. + * which should be OK given the number of nodes rarely exceeds 8. */ for_each_online_node(node) { unsigned long faults; @@ -3284,7 +3284,7 @@ retry_pids: /* * Shared library pages mapped by multiple processes are not * migrated as it is expected they are cache replicated. Avoid - * hinting faults in read-only file-backed mappings or the vdso + * hinting faults in read-only file-backed mappings or the vDSO * as migrating the pages will be of marginal benefit. */ if (!vma->vm_mm || @@ -3295,7 +3295,7 @@ retry_pids: /* * Skip inaccessible VMAs to avoid any confusion between - * PROT_NONE and NUMA hinting ptes + * PROT_NONE and NUMA hinting PTEs */ if (!vma_is_accessible(vma)) { trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_INACCESSIBLE); @@ -3327,7 +3327,7 @@ retry_pids: } /* - * Scanning the VMA's of short lived tasks add more overhead. So + * Scanning the VMAs of short lived tasks add more overhead. So * delay the scan for new VMAs. */ if (mm->numa_scan_seq && time_before(jiffies, @@ -3371,7 +3371,7 @@ retry_pids: /* * Try to scan sysctl_numa_balancing_size worth of * hpages that have at least one present PTE that - * is not already pte-numa. If the VMA contains + * is not already PTE-numa. If the VMA contains * areas that are unused or already full of prot_numa * PTEs, scan up to virtpages, to skip through those * areas faster. @@ -4733,7 +4733,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s /* * Track task load average for carrying it to new CPU after migrated, and - * track group sched_entity load average for task_h_load calc in migration + * track group sched_entity load average for task_h_load calculation in migration */ if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) __update_load_avg_se(now, cfs_rq, se); @@ -5014,14 +5014,14 @@ static inline int util_fits_cpu(unsigned long util, * | | | | | | | * | | | | | | | * +---------------------------------------- - * cpu0 cpu1 cpu2 + * CPU0 CPU1 CPU2 * * In the above example if a task is capped to a specific performance * point, y, then when: * - * * util = 80% of x then it does not fit on cpu0 and should migrate - * to cpu1 - * * util = 80% of y then it is forced to fit on cpu1 to honour + * * util = 80% of x then it does not fit on CPU0 and should migrate + * to CPU1 + * * util = 80% of y then it is forced to fit on CPU1 to honour * uclamp_max request. * * which is what we're enforcing here. A task always fits if @@ -5052,7 +5052,7 @@ static inline int util_fits_cpu(unsigned long util, * | | | | | | | * | | | | | | | (region c, boosted, util < uclamp_min) * +---------------------------------------- - * cpu0 cpu1 cpu2 + * CPU0 CPU1 CPU2 * * a) If util > uclamp_max, then we're capped, we don't care about * actual fitness value here. We only care if uclamp_max fits @@ -5242,7 +5242,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) se->vruntime = vruntime - lag; /* - * When joining the competition; the exisiting tasks will be, + * When joining the competition; the existing tasks will be, * on average, halfway through their slice, as such start tasks * off with half a slice to ease into the competition. */ @@ -5391,7 +5391,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * Now advance min_vruntime if @se was the entity holding it back, * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be * put back on, and if we advance min_vruntime, we'll be placed back - * further than we started -- ie. we'll be penalized. + * further than we started -- i.e. we'll be penalized. */ if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE) update_min_vruntime(cfs_rq); @@ -5427,7 +5427,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) /* * Track our maximum slice length, if the CPU's load is at - * least twice that of our own weight (i.e. dont track it + * least twice that of our own weight (i.e. don't track it * when there are only lesser-weight tasks around): */ if (schedsta