summaryrefslogtreecommitdiff
path: root/kernel/sched/ext.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/ext.c')
-rw-r--r--kernel/sched/ext.c63
1 files changed, 37 insertions, 26 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index b6e8c224f894..8e060a6d4945 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1411,9 +1411,9 @@ static bool scx_ops_tryset_enable_state(enum scx_ops_enable_state to,
return atomic_try_cmpxchg(&scx_ops_enable_state_var, &from_v, to);
}
-static bool scx_ops_bypassing(void)
+static bool scx_rq_bypassing(struct rq *rq)
{
- return unlikely(atomic_read(&scx_ops_bypass_depth));
+ return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
}
/**
@@ -1948,7 +1948,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
if (!scx_rq_online(rq))
goto local;
- if (scx_ops_bypassing())
+ if (scx_rq_bypassing(rq))
goto global;
if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
@@ -2612,7 +2612,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
* bypassing test.
*/
if ((prev->scx.flags & SCX_TASK_QUEUED) &&
- prev->scx.slice && !scx_ops_bypassing()) {
+ prev->scx.slice && !scx_rq_bypassing(rq)) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
goto has_tasks;
}
@@ -2625,7 +2625,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
if (consume_dispatch_q(rq, &scx_dsq_global))
goto has_tasks;
- if (!SCX_HAS_OP(dispatch) || scx_ops_bypassing() || !scx_rq_online(rq))
+ if (!SCX_HAS_OP(dispatch) || scx_rq_bypassing(rq) || !scx_rq_online(rq))
goto no_tasks;
dspc->rq = rq;
@@ -2671,7 +2671,8 @@ no_tasks:
* %SCX_OPS_ENQ_LAST is in effect.
*/
if ((prev->scx.flags & SCX_TASK_QUEUED) &&
- (!static_branch_unlikely(&scx_ops_enq_last) || scx_ops_bypassing())) {
+ (!static_branch_unlikely(&scx_ops_enq_last) ||
+ scx_rq_bypassing(rq))) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
goto has_tasks;
}
@@ -2863,7 +2864,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
* forcing a different task. Leave it at the head of the local
* DSQ.
*/
- if (p->scx.slice && !scx_ops_bypassing()) {
+ if (p->scx.slice && !scx_rq_bypassing(rq)) {
dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD);
return;
}
@@ -2928,7 +2929,7 @@ static struct task_struct *pick_task_scx(struct rq *rq)
return NULL;
if (unlikely(!p->scx.slice)) {
- if (!scx_ops_bypassing() && !scx_warned_zero_slice) {
+ if (!scx_rq_bypassing(rq) && !scx_warned_zero_slice) {
printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in pick_next_task_scx()\n",
p->comm, p->pid);
scx_warned_zero_slice = true;
@@ -2966,7 +2967,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
* calling ops.core_sched_before(). Accesses are controlled by the
* verifier.
*/
- if (SCX_HAS_OP(core_sched_before) && !scx_ops_bypassing())
+ if (SCX_HAS_OP(core_sched_before) && !scx_rq_bypassing(task_rq(a)))
return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before,
(struct task_struct *)a,
(struct task_struct *)b);
@@ -3325,7 +3326,7 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued)
* While disabling, always resched and refresh core-sched timestamp as
* we can't trust the slice management or ops.core_sched_before().
*/
- if (scx_ops_bypassing()) {
+ if (scx_rq_bypassing(rq)) {
curr->scx.slice = 0;
touch_core_sched(rq, curr);
} else if (SCX_HAS_OP(tick)) {
@@ -3664,7 +3665,7 @@ bool scx_can_stop_tick(struct rq *rq)
{
struct task_struct *p = rq->curr;
- if (scx_ops_bypassing())
+ if (scx_rq_bypassing(rq))
return false;
if (p->sched_class != &ext_sched_class)
@@ -4257,16 +4258,8 @@ static void scx_ops_bypass(bool bypass)
}
/*
- * We need to guarantee that no tasks are on the BPF scheduler while
- * bypassing. Either we see enabled or the enable path sees the
- * increased bypass_depth before moving tasks to SCX.
- */
- if (!scx_enabled())
- return;
-
- /*
* No task property is changing. We just need to make sure all currently
- * queued tasks are re-queued according to the new scx_ops_bypassing()
+ * queued tasks are re-queued according to the new scx_rq_bypassing()
* state. As an optimization, walk each rq's runnable_list instead of
* the scx_tasks list.
*
@@ -4280,6 +4273,24 @@ static void scx_ops_bypass(bool bypass)
rq_lock_irqsave(rq, &rf);
+ if (bypass) {
+ WARN_ON_ONCE(rq->scx.flags & SCX_RQ_BYPASSING);
+ rq->scx.flags |= SCX_RQ_BYPASSING;
+ } else {
+ WARN_ON_ONCE(!(rq->scx.flags & SCX_RQ_BYPASSING));
+ rq->scx.flags &= ~SCX_RQ_BYPASSING;
+ }
+
+ /*
+ * We need to guarantee that no tasks are on the BPF scheduler
+ * while bypassing. Either we see enabled or the enable path
+ * sees scx_rq_bypassing() before moving tasks to SCX.
+ */
+ if (!scx_enabled()) {
+ rq_unlock_irqrestore(rq, &rf);
+ continue;
+ }
+
/*
* The use of list_for_each_entry_safe_reverse() is required
* because each task is going to be removed from and added back
@@ -6397,17 +6408,17 @@ __bpf_kfunc void scx_bpf_kick_cpu(s32 cpu, u64 flags)
if (!ops_cpu_valid(cpu, NULL))
return;
+ local_irq_save(irq_flags);
+
+ this_rq = this_rq();
+
/*
* While bypassing for PM ops, IRQ handling may not be online which can
* lead to irq_work_queue() malfunction such as infinite busy wait for
* IRQ status update. Suppress kicking.
*/
- if (scx_ops_bypassing())
- return;
-
- local_irq_save(irq_flags);
-
- this_rq = this_rq();
+ if (scx_rq_bypassing(this_rq))
+ goto out;
/*
* Actual kicking is bounced to kick_cpus_irq_workfn() to avoid nesting