summaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
authorPhil Auld <pauld@redhat.com>2023-07-12 09:33:57 -0400
committerPeter Zijlstra <peterz@infradead.org>2023-08-02 16:19:26 +0200
commit88c56cfeaec4642aee8aac58b38d5708c6aae0d3 (patch)
treea931bedfa512ff5828f15bf56d50158b030a1b60 /kernel/sched/core.c
parentc98c18270be115678f4295b10a5af5dcc9c4efa0 (diff)
downloadlinux-88c56cfeaec4642aee8aac58b38d5708c6aae0d3.tar.gz
linux-88c56cfeaec4642aee8aac58b38d5708c6aae0d3.tar.bz2
linux-88c56cfeaec4642aee8aac58b38d5708c6aae0d3.zip
sched/fair: Block nohz tick_stop when cfs bandwidth in use
CFS bandwidth limits and NOHZ full don't play well together. Tasks can easily run well past their quotas before a remote tick does accounting. This leads to long, multi-period stalls before such tasks can run again. Currently, when presented with these conflicting requirements the scheduler is favoring nohz_full and letting the tick be stopped. However, nohz tick stopping is already best-effort, there are a number of conditions that can prevent it, whereas cfs runtime bandwidth is expected to be enforced. Make the scheduler favor bandwidth over stopping the tick by setting TICK_DEP_BIT_SCHED when the only running task is a cfs task with runtime limit enabled. We use cfs_b->hierarchical_quota to determine if the task requires the tick. Add check in pick_next_task_fair() as well since that is where we have a handle on the task that is actually going to be running. Add check in sched_can_stop_tick() to cover some edge cases such as nr_running going from 2->1 and the 1 remains the running task. Reviewed-By: Ben Segall <bsegall@google.com> Signed-off-by: Phil Auld <pauld@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20230712133357.381137-3-pauld@redhat.com
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c26
1 files changed, 26 insertions, 0 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3af25caf6343..614271a75525 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1194,6 +1194,20 @@ static void nohz_csd_func(void *info)
#endif /* CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL
+static inline bool __need_bw_check(struct rq *rq, struct task_struct *p)
+{
+ if (rq->nr_running != 1)
+ return false;
+
+ if (p->sched_class != &fair_sched_class)
+ return false;
+
+ if (!task_on_rq_queued(p))
+ return false;
+
+ return true;
+}
+
bool sched_can_stop_tick(struct rq *rq)
{
int fifo_nr_running;
@@ -1229,6 +1243,18 @@ bool sched_can_stop_tick(struct rq *rq)
if (rq->nr_running > 1)
return false;
+ /*
+ * If there is one task and it has CFS runtime bandwidth constraints
+ * and it's on the cpu now we don't want to stop the tick.
+ * This check prevents clearing the bit if a newly enqueued task here is
+ * dequeued by migrating while the constrained task continues to run.
+ * E.g. going from 2->1 without going through pick_next_task().
+ */
+ if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
+ if (cfs_task_bw_constrained(rq->curr))
+ return false;
+ }
+
return true;
}
#endif /* CONFIG_NO_HZ_FULL */