diff options
32 files changed, 1695 insertions, 747 deletions
diff --git a/Documentation/scheduler/sched-deadline.rst b/Documentation/scheduler/sched-deadline.rst index 9fe4846079bb..22838ed8e13a 100644 --- a/Documentation/scheduler/sched-deadline.rst +++ b/Documentation/scheduler/sched-deadline.rst @@ -749,21 +749,19 @@ Appendix A. Test suite of the command line options. Please refer to rt-app documentation for more details (`<rt-app-sources>/doc/*.json`). - The second testing application is a modification of schedtool, called - schedtool-dl, which can be used to setup SCHED_DEADLINE parameters for a - certain pid/application. schedtool-dl is available at: - https://github.com/scheduler-tools/schedtool-dl.git. + The second testing application is done using chrt which has support + for SCHED_DEADLINE. The usage is straightforward:: - # schedtool -E -t 10000000:100000000 -e ./my_cpuhog_app + # chrt -d -T 10000000 -D 100000000 0 ./my_cpuhog_app With this, my_cpuhog_app is put to run inside a SCHED_DEADLINE reservation - of 10ms every 100ms (note that parameters are expressed in microseconds). - You can also use schedtool to create a reservation for an already running + of 10ms every 100ms (note that parameters are expressed in nanoseconds). + You can also use chrt to create a reservation for an already running application, given that you know its pid:: - # schedtool -E -t 10000000:100000000 my_app_pid + # chrt -d -T 10000000 -D 100000000 -p 0 my_app_pid Appendix B. Minimal main() ========================== diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index bafa32dd375d..1a5ad184d28f 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -224,9 +224,9 @@ static void __init cppc_freq_invariance_init(void) * Fake (unused) bandwidth; workaround to "fix" * priority inheritance. */ - .sched_runtime = 1000000, - .sched_deadline = 10000000, - .sched_period = 10000000, + .sched_runtime = NSEC_PER_MSEC, + .sched_deadline = 10 * NSEC_PER_MSEC, + .sched_period = 10 * NSEC_PER_MSEC, }; int ret; diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index 3a494c5d1247..9cbd3c14c94f 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -335,7 +335,7 @@ static inline bool six_owner_running(struct six_lock *lock) */ rcu_read_lock(); struct task_struct *owner = READ_ONCE(lock->owner); - bool ret = owner ? owner_on_cpu(owner) : !rt_task(current); + bool ret = owner ? owner_on_cpu(owner) : !rt_or_dl_task(current); rcu_read_unlock(); return ret; diff --git a/fs/proc/base.c b/fs/proc/base.c index e7810f3bd522..b31283d81c52 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2626,7 +2626,7 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, } task_lock(p); - if (task_is_realtime(p)) + if (rt_or_dl_task_policy(p)) slack_ns = 0; else if (slack_ns == 0) slack_ns = p->default_timer_slack_ns; diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index db1249cd9692..b25377b6ea98 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task) { if (task->policy == SCHED_IDLE) return IOPRIO_CLASS_IDLE; - else if (task_is_realtime(task)) + else if (rt_or_dl_task_policy(task)) return IOPRIO_CLASS_RT; else return IOPRIO_CLASS_BE; diff --git a/include/linux/sched.h b/include/linux/sched.h index 3773c1c8f099..a1d0c7cab25c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -149,8 +149,9 @@ struct user_event_mm; * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state(). */ -#define is_special_task_state(state) \ - ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) +#define is_special_task_state(state) \ + ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \ + TASK_DEAD | TASK_FROZEN)) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP # define debug_normal_state_change(state_value) \ @@ -541,9 +542,14 @@ struct sched_entity { struct rb_node run_node; u64 deadline; u64 min_vruntime; + u64 min_slice; struct list_head group_node; - unsigned int on_rq; + unsigned char on_rq; + unsigned char sched_delayed; + unsigned char rel_deadline; + unsigned char custom_slice; + /* hole */ u64 exec_start; u64 sum_exec_runtime; @@ -639,12 +645,26 @@ struct sched_dl_entity { * * @dl_overrun tells if the task asked to be informed about runtime * overruns. + * + * @dl_server tells if this is a server entity. + * + * @dl_defer tells if this is a deferred or regular server. For + * now only defer server exists. + * + * @dl_defer_armed tells if the deferrable server is waiting + * for the replenishment timer to activate it. + * + * @dl_defer_running tells if the deferrable server is actually + * running, skipping the defer phase. */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; unsigned int dl_server : 1; + unsigned int dl_defer : 1; + unsigned int dl_defer_armed : 1; + unsigned int dl_defer_running : 1; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -672,7 +692,7 @@ struct sched_dl_entity { */ struct rq *rq; dl_server_has_tasks_f server_has_tasks; - dl_server_pick_f server_pick; + dl_server_pick_f server_pick_task; #ifdef CONFIG_RT_MUTEXES /* diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index df3aca89d4f5..3a912ab42bb5 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -10,16 +10,16 @@ #include <linux/sched.h> -#define MAX_DL_PRIO 0 - -static inline int dl_prio(int prio) +static inline bool dl_prio(int prio) { - if (unlikely(prio < MAX_DL_PRIO)) - return 1; - return 0; + return unlikely(prio < MAX_DL_PRIO); } -static inline int dl_task(struct task_struct *p) +/* + * Returns true if a task has a priority that belongs to DL class. PI-boosted + * tasks will return true. Use dl_policy() to ignore PI-boosted tasks. + */ +static inline bool dl_task(struct task_struct *p) { return dl_prio(p->prio); } diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index ab83d85e1183..6ab43b4f72f9 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -14,6 +14,7 @@ */ #define MAX_RT_PRIO 100 +#define MAX_DL_PRIO 0 #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index b2b9e6eb9683..4e3338103654 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -6,19 +6,40 @@ struct task_struct; -static inline int rt_prio(int prio) +static inline bool rt_prio(int prio) { - if (unlikely(prio < MAX_RT_PRIO)) - return 1; - return 0; + return unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO); } -static inline int rt_task(struct task_struct *p) +static inline bool rt_or_dl_prio(int prio) +{ + return unlikely(prio < MAX_RT_PRIO); +} + +/* + * Returns true if a task has a priority that belongs to RT class. PI-boosted + * tasks will return true. Use rt_policy() to ignore PI-boosted tasks. + */ +static inline bool rt_task(struct task_struct *p) { return rt_prio(p->prio); } -static inline bool task_is_realtime(struct task_struct *tsk) +/* + * Returns true if a task has a priority that belongs to RT or DL classes. + * PI-boosted tasks will return true. Use rt_or_dl_task_policy() to ignore + * PI-boosted tasks. + */ +static inline bool rt_or_dl_task(struct task_struct *p) +{ + return rt_or_dl_prio(p->prio); +} + +/* + * Returns true if a task has a policy that belongs to RT or DL classes. + * PI-boosted tasks will return false. + */ +static inline bool rt_or_dl_task_policy(struct task_struct *tsk) { int policy = tsk->policy; diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h index 90662385689b..bf6e9ae031c1 100644 --- a/include/uapi/linux/sched/types.h +++ b/include/uapi/linux/sched/types.h @@ -58,9 +58,9 @@ * * This is reflected by the following fields of the sched_attr structure: * - * @sched_deadline representative of the task's deadline - * @sched_runtime representative of the task's runtime - * @sched_period representative of the task's period + * @sched_deadline representative of the task's deadline in nanoseconds + * @sched_runtime representative of the task's runtime in nanoseconds + * @sched_period representative of the task's period in nanoseconds * * Given this task model, there are a multiplicity of scheduling algorithms * and policies, that can be used to ensure all the tasks will make their diff --git a/kernel/freezer.c b/kernel/freezer.c index f57aaf96b829..44bbd7dbd2c8 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -72,7 +72,7 @@ bool __refrigerator(bool check_kthr_stop) bool freeze; raw_spin_lock_irq(¤t->pi_lock); - set_current_state(TASK_FROZEN); + WRITE_ONCE(current->__state, TASK_FROZEN); /* unstale saved_state so that __thaw_task() will wake us up */ current->saved_state = TASK_RUNNING; raw_spin_unlock_irq(¤t->pi_lock); diff --git a/kernel/kthread.c b/kernel/kthread.c index f7be976ff88a..db4ceb0f503c 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -845,8 +845,16 @@ repeat: * event only cares about the address. */ trace_sched_kthread_work_execute_end(work, func); - } else if (!freezing(current)) + } else if (!freezing(current)) { schedule(); + } else { + /* + * Handle the case where the current remains + * TASK_INTERRUPTIBLE. try_to_freeze() expects + * the current to be TASK_RUNNING. + */ + __set_current_state(TASK_RUNNING); + } try_to_freeze(); cond_resched(); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index fba1229f1de6..ebebd0eec7f6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -347,7 +347,7 @@ static __always_inline int __waiter_prio(struct task_struct *task) { int prio = task->prio; - if (!rt_prio(prio)) + if (!rt_or_dl_prio(prio)) return DEFAULT_PRIO; return prio; @@ -435,7 +435,7 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, * Note that RT tasks are excluded from same priority (lateral) * steals to prevent the introduction of an unbounded latency. */ - if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio)) + if (rt_or_dl_prio(waiter->tree.prio)) return false; return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 33cac79e3994..5ded7dff46ef 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -631,7 +631,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, * if it is an RT task or wait in the wait queue * for too long. */ - if (has_handoff || (!rt_task(waiter->task) && + if (has_handoff || (!rt_or_dl_task(waiter->task) && !time_after(jiffies, waiter->timeout))) return false; @@ -914,7 +914,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) if (owner_state != OWNER_WRITER) { if (need_resched()) break; - if (rt_task(current) && + if (rt_or_dl_task(current) && (prev_owner_state != OWNER_WRITER)) break; } diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 3ad2cc4823e5..76d204b7d29c 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -237,7 +237,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) int a_prio = a->task->prio; int b_prio = b->task->prio; - if (rt_prio(a_prio) || rt_prio(b_prio)) { + if (rt_or_dl_prio(a_prio) || rt_or_dl_prio(b_prio)) { if (a_prio > b_prio) return true; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1d7f5941bcdc..a7af49b3a337 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -163,7 +163,10 @@ static inline int __task_prio(const struct task_struct *p) if (p->sched_class == &stop_sched_class) /* trumps deadline */ return -2; - if (rt_prio(p->prio)) /* includes deadline */ + if (p->dl_server) + return -1; /* deadline */ + + if (rt_or_dl_prio(p->prio)) return p->prio; /* [-1, 99] */ if (p->sched_class == &idle_sched_class) @@ -192,8 +195,24 @@ static inline bool prio_less(const struct task_struct *a, if (-pb < -pa) return false; - if (pa == -1) /* dl_prio() doesn't work because of stop_class above */ - return !dl_time_before(a->dl.deadline, b->dl.deadline); + if (pa == -1) { /* dl_prio() doesn't work because of stop_class above */ + const struct sched_dl_entity *a_dl, *b_dl; + + a_dl = &a->dl; + /* + * Since,'a' and 'b' can be CFS tasks served by DL server, + * __task_prio() can return -1 (for DL) even for those. In that + * case, get to the dl_server's DL entity. + */ + if (a->dl_server) + a_dl = a->dl_server; + + b_dl = &b->dl; + if (b->dl_server) + b_dl = b->dl_server; + + return !dl_time_before(a_dl->deadline, b_dl->deadline); + } if (pa == MAX_RT_PRIO + MAX_NICE) /* fair */ return cfs_prio_less(a, b, in_fi); @@ -240,6 +259,9 @@ static inline int rb_sched_core_cmp(const void *key, const struct rb_node *node) void sched_core_enqueue(struct rq *rq, struct task_struct *p) { + if (p->se.sched_delayed) + return; + rq->core->core_task_seq++; if (!p->core_cookie) @@ -250,6 +272,9 @@ void sched_core_enqueue(struct rq *rq, struct task_struct *p) void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { + if (p->se.sched_delayed) + return; + rq->core->core_task_seq++; if (sched_core_enqueued(p)) { @@ -1269,7 +1294,7 @@ bool sched_can_stop_tick(struct rq *rq) * dequeued by migrating while the constrained task continues to run. * E.g. going from 2->1 without going through pick_next_task(). */ - if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) { + if (__need_bw_check(rq, rq->curr)) { if (cfs_task_bw_constrained(rq->curr)) return false; } @@ -1672,6 +1697,9 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) if (unlikely(!p->sched_class->uclamp_enabled)) return; + if (p->se.sched_delayed) + return; + for_each_clamp_id(clamp_id) uclamp_rq_inc_id(rq, p, clamp_id); @@ -1696,6 +1724,9 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) if (unlikely(!p->sched_class->uclamp_enabled)) return; + if (p->se.sched_delayed) + return; + for_each_clamp_id(clamp_id) uclamp_rq_dec_id(rq, p, clamp_id); } @@ -1975,14 +2006,21 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags) psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED)); } - uclamp_rq_inc(rq, p); p->sched_class->enqueue_task(rq, p, flags); + /* + * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear + * ->sched_delayed. + */ + uclamp_rq_inc(rq, p); if (sched_core_enabled(rq)) sched_core_enqueue(rq, p); } -void dequeue_task(struct rq *rq, struct task_struct *p, int flags) +/* + * Must only return false when DEQUEUE_SLEEP. + */ +inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags) { if (sched_core_enabled(rq)) sched_core_dequeue(rq, p, flags); @@ -1995,8 +2033,12 @@ void dequeue_task(struct rq *rq, struct task_struct *p, int flags) psi_dequeue(p, flags & DEQUEUE_SLEEP); } + /* + * Must be before ->dequeue_task() because ->dequeue_task() can 'fail' + * and mark the task ->sched_delayed. + */ uclamp_rq_dec(rq, p); - p->sched_class->dequeue_task(rq, p, flags); + return p->sched_class->dequeue_task(rq, p, flags); } void activate_task(struct rq *rq, struct task_struct *p, int flags) @@ -2014,12 +2056,25 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags) void deactivate_task(struct rq *rq, struct task_struct *p, int flags) { - WRITE_ONCE(p->on_rq, (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING); + SCHED_WARN_ON(flags & DEQUEUE_SLEEP); + + WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); ASSERT_EXCLUSIVE_WRITER(p->on_rq); + /* + * Code explicitly relies on TASK_ON_RQ_MIGRATING begin set *before* + * dequeue_task() and cleared *after* enqueue_task(). + */ + dequeue_task(rq, p, flags); } +static void block_task(struct rq *rq, struct task_struct *p, int flags) +{ + if (dequeue_task(rq, p, DEQUEUE_SLEEP | flags)) + __block_task(rq, p); +} + /** * task_curr - is this task currently executing on a CPU? * @p: the task in question. @@ -2233,6 +2288,12 @@ void migrate_disable(void) struct task_struct *p = current; if (p->migration_disabled) { +#ifdef CONFIG_DEBUG_PREEMPT + /* + *Warn about overflow half-way through the range. + */ + WARN_ON_ONCE((s16)p->migration_disabled < 0); +#endif p->migration_disabled++; return; } @@ -2251,14 +2312,20 @@ void migrate_enable(void) .flags = SCA_MIGRATE_ENABLE, }; +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Check both overflow from migrate_disable() and superfluous + * migrate_enable(). + */ + if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) + return; +#endif + if (p->migration_disabled > 1) { p->migration_disabled--; return; } - if (WARN_ON_ONCE(!p->migration_disabled)) - return; - /* * Ensure stop_task runs either before or after this, and that * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). @@ -3607,8 +3674,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, rq->idle_stamp = 0; } #endif - - p->dl_server = NULL; } /* @@ -3644,12 +3709,14 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags) rq = __task_rq_lock(p, &rf); if (task_on_rq_queued(p)) { + update_rq_clock(rq); + if (p->se.sched_delayed) + enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED); if (!task_on_cpu(rq, p)) { /* * When on_rq && !on_cpu the task is preempted, see if * it should preempt the task that is current now. */ - update_rq_clock(rq); wakeup_preempt(rq, p, wake_flags); } ttwu_do_wakeup(p); @@ -4029,11 +4096,16 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * case the whole 'p->on_rq && ttwu_runnable()' case below * without taking any locks. * + * Specifically, given current runs ttwu() we must be before + * schedule()'s block_task(), as such this must not observe + * sched_delayed. + * * In particular: * - we rely on Program-Order guarantees for all the ordering, * - we're serialized against set_special_state() by virtue of * it disabling IRQs (this allows not taking ->pi_lock). */ + SCHED_WARN_ON(p->se.sched_delayed); if (!ttwu_state_match(p, state, &success)) goto out; @@ -4322,9 +4394,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.nr_migrations = 0; p->se.vruntime = 0; p->se.vlag = 0; - p->se.slice = sysctl_sched_base_slice; INIT_LIST_HEAD(&p->se.group_node); + /* A delayed task cannot be in clone(). */ + SCHED_WARN_ON(p->se.sched_delayed); + #ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = NULL; #endif @@ -4572,6 +4646,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->prio = p->normal_prio = p->static_prio; set_load_weight(p, false); + p->se.custom_slice = 0; + p->se.slice = sysctl_sched_base_slice; /* * We don't need the reset flag anymore after the fork. It has @@ -4686,7 +4762,7 @@ void wake_up_new_task(struct task_struct *p) update_rq_clock(rq); post_init_entity_util_avg(p); - activate_task(rq, p, ENQUEUE_NOCLOCK); + activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL); trace_sched_wakeup_new(p); wakeup_preempt(rq, p, WF_FORK); #ifdef CONFIG_SMP @@ -5769,8 +5845,8 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) schedstat_inc(this_rq()->sched_count); } -static void put_prev_task_balance(struct rq *rq, struct task_struct *prev, - struct rq_flags *rf) +static void prev_balance(struct rq *rq, struct task_struct *prev, + struct rq_flags *rf) { #ifdef CONFIG_SMP const struct sched_class *class; @@ -5787,8 +5863,6 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev, break; } #endif - - put_prev_task(rq, prev); } /* @@ -5800,6 +5874,8 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) const struct sched_class *class; struct task_struct *p; + rq->dl_server = NULL; + /* * Optimization: we know that if all tasks are in the fair class we can * call that function directly, but only if the @prev task wasn't of a @@ -5815,35 +5891,28 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) /* Assume the next prioritized class is idle_sched_class */ if (!p) { - put_prev_task(rq, prev); - p = pick_next_task_idle(rq); + p = pick_task_idle(rq); + put_prev_set_next_task(rq, prev, p); } - /* - * This is the fast path; it cannot be a DL server pick; - * therefore even if @p == @prev, ->dl_server must be NULL. - */ - if (p->dl_server) - p->dl_server = NULL; - return p; } restart: - put_prev_task_balance(rq, prev, rf); - - /* - * We've updated @prev and no longer need the server link, clear it. - * Must be done before ->pick_next_task() because that can (re)set - * ->dl_server. - */ - if (prev->dl_server) - prev->dl_server = NULL; + prev_balance(rq, prev, rf); for_each_class(class) { - p = class->pick_next_task(rq); - if (p) - return p; + if (class->pick_next_task) { + p = class->pick_next_task(rq, prev); + if (p) + return p; + } else { + p = class->pick_task(rq); + if (p) { + put_prev_set_next_task(rq, prev, p); + return p; + } + } } BUG(); /* The idle class should always have a runnable task. */ @@ -5873,6 +5942,8 @@ static inline struct task_struct *pick_task(struct rq *rq) const struct sched_class *class; struct task_struct *p; + rq->dl_server = NULL; + for_each_class(class) { p = class->pick_task(rq); if (p) @@ -5911,6 +5982,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * another cpu during offline. */ rq->core_pick = NULL; + rq->core_dl_server = NULL; return __pick_next_task(rq, prev, rf); } @@ -5929,16 +6001,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) WRITE_ONCE(rq->core_sched_seq, rq->core->core_pick_seq); next = rq->core_pick; - if (next != prev) { - put_prev_task(rq, prev); - set_next_task(rq, next); - } - + rq->dl_server = rq->core_dl_server; rq->core_pick = NULL; - goto out; + rq->core_dl_server = NULL; + goto out_set_next; } - put_prev_task_balance(rq, prev, rf); + prev_balance(rq, prev, rf); smt_mask = cpu_smt_mask(cpu); need_sync = !!rq->core->core_cookie; @@ -5979,6 +6048,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) next = pick_task(rq); if (!next->core_cookie) { rq->core_pick = NULL; + rq->core_dl_server = NULL; /* * For robustness, update the min_vruntime_fi for * unconstrained picks as well. @@ -6006,7 +6076,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (i != cpu && (rq_i != rq->core || !core_clock_updated)) update_rq_clock(rq_i); - p = rq_i->core_pick = pick_task(rq_i); + rq_i->core_pick = p = pick_task(rq_i); + rq_i->core_dl_server = rq_i->dl_server; + if (!max || prio_less(max, p, fi_before)) max = p; } @@ -6030,6 +6102,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } rq_i->core_pick = p; + rq_i->core_dl_server = NULL; if (p == rq_i->idle) { if (rq_i->nr_running) { @@ -6090,6 +6163,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (i == cpu) { rq_i->core_pick = NULL; + rq_i->core_dl_server = NULL; continue; } @@ -6098,6 +6172,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (rq_i->curr == rq_i->core_pick) { rq_i->core_pick = NULL; + rq_i->core_dl_server = NULL; continue; } @@ -6105,8 +6180,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } out_set_next: - set_next_task(rq, next); -out: + put_prev_set_next_task(rq, prev, next); if (rq->core->core_forceidle_count && next == rq->idle) queue_core_balance(rq); @@ -6342,19 +6416,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * Constants for the sched_mode argument of __schedule(). * * The mode argument allows RT enabled kernels to differentiate a |
