summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/scheduler/sched-deadline.rst14
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c6
-rw-r--r--fs/bcachefs/six.c2
-rw-r--r--fs/proc/base.c2
-rw-r--r--include/linux/ioprio.h2
-rw-r--r--include/linux/sched.h28
-rw-r--r--include/linux/sched/deadline.h14
-rw-r--r--include/linux/sched/prio.h1
-rw-r--r--include/linux/sched/rt.h33
-rw-r--r--include/uapi/linux/sched/types.h6
-rw-r--r--kernel/freezer.c2
-rw-r--r--kernel/kthread.c10
-rw-r--r--kernel/locking/rtmutex.c4
-rw-r--r--kernel/locking/rwsem.c4
-rw-r--r--kernel/locking/ww_mutex.h2
-rw-r--r--kernel/sched/core.c248
-rw-r--r--kernel/sched/cpufreq_schedutil.c6
-rw-r--r--kernel/sched/deadline.c503
-rw-r--r--kernel/sched/debug.c198
-rw-r--r--kernel/sched/fair.c770
-rw-r--r--kernel/sched/features.h30
-rw-r--r--kernel/sched/idle.c23
-rw-r--r--kernel/sched/rt.c261
-rw-r--r--kernel/sched/sched.h101
-rw-r--r--kernel/sched/stop_task.c18
-rw-r--r--kernel/sched/syscalls.c134
-rw-r--r--kernel/sched/topology.c8
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/time/hrtimer.c2
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c2
32 files changed, 1695 insertions, 747 deletions
diff --git a/Documentation/scheduler/sched-deadline.rst b/Documentation/scheduler/sched-deadline.rst
index 9fe4846079bb..22838ed8e13a 100644
--- a/Documentation/scheduler/sched-deadline.rst
+++ b/Documentation/scheduler/sched-deadline.rst
@@ -749,21 +749,19 @@ Appendix A. Test suite
of the command line options. Please refer to rt-app documentation for more
details (`<rt-app-sources>/doc/*.json`).
- The second testing application is a modification of schedtool, called
- schedtool-dl, which can be used to setup SCHED_DEADLINE parameters for a
- certain pid/application. schedtool-dl is available at:
- https://github.com/scheduler-tools/schedtool-dl.git.
+ The second testing application is done using chrt which has support
+ for SCHED_DEADLINE.
The usage is straightforward::
- # schedtool -E -t 10000000:100000000 -e ./my_cpuhog_app
+ # chrt -d -T 10000000 -D 100000000 0 ./my_cpuhog_app
With this, my_cpuhog_app is put to run inside a SCHED_DEADLINE reservation
- of 10ms every 100ms (note that parameters are expressed in microseconds).
- You can also use schedtool to create a reservation for an already running
+ of 10ms every 100ms (note that parameters are expressed in nanoseconds).
+ You can also use chrt to create a reservation for an already running
application, given that you know its pid::
- # schedtool -E -t 10000000:100000000 my_app_pid
+ # chrt -d -T 10000000 -D 100000000 -p 0 my_app_pid
Appendix B. Minimal main()
==========================
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index bafa32dd375d..1a5ad184d28f 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -224,9 +224,9 @@ static void __init cppc_freq_invariance_init(void)
* Fake (unused) bandwidth; workaround to "fix"
* priority inheritance.
*/
- .sched_runtime = 1000000,
- .sched_deadline = 10000000,
- .sched_period = 10000000,
+ .sched_runtime = NSEC_PER_MSEC,
+ .sched_deadline = 10 * NSEC_PER_MSEC,
+ .sched_period = 10 * NSEC_PER_MSEC,
};
int ret;
diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index 3a494c5d1247..9cbd3c14c94f 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -335,7 +335,7 @@ static inline bool six_owner_running(struct six_lock *lock)
*/
rcu_read_lock();
struct task_struct *owner = READ_ONCE(lock->owner);
- bool ret = owner ? owner_on_cpu(owner) : !rt_task(current);
+ bool ret = owner ? owner_on_cpu(owner) : !rt_or_dl_task(current);
rcu_read_unlock();
return ret;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e7810f3bd522..b31283d81c52 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2626,7 +2626,7 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
}
task_lock(p);
- if (task_is_realtime(p))
+ if (rt_or_dl_task_policy(p))
slack_ns = 0;
else if (slack_ns == 0)
slack_ns = p->default_timer_slack_ns;
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index db1249cd9692..b25377b6ea98 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task)
{
if (task->policy == SCHED_IDLE)
return IOPRIO_CLASS_IDLE;
- else if (task_is_realtime(task))
+ else if (rt_or_dl_task_policy(task))
return IOPRIO_CLASS_RT;
else
return IOPRIO_CLASS_BE;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3773c1c8f099..a1d0c7cab25c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -149,8 +149,9 @@ struct user_event_mm;
* Special states are those that do not use the normal wait-loop pattern. See
* the comment with set_special_state().
*/
-#define is_special_task_state(state) \
- ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
+#define is_special_task_state(state) \
+ ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \
+ TASK_DEAD | TASK_FROZEN))
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
# define debug_normal_state_change(state_value) \
@@ -541,9 +542,14 @@ struct sched_entity {
struct rb_node run_node;
u64 deadline;
u64 min_vruntime;
+ u64 min_slice;
struct list_head group_node;
- unsigned int on_rq;
+ unsigned char on_rq;
+ unsigned char sched_delayed;
+ unsigned char rel_deadline;
+ unsigned char custom_slice;
+ /* hole */
u64 exec_start;
u64 sum_exec_runtime;
@@ -639,12 +645,26 @@ struct sched_dl_entity {
*
* @dl_overrun tells if the task asked to be informed about runtime
* overruns.
+ *
+ * @dl_server tells if this is a server entity.
+ *
+ * @dl_defer tells if this is a deferred or regular server. For
+ * now only defer server exists.
+ *
+ * @dl_defer_armed tells if the deferrable server is waiting
+ * for the replenishment timer to activate it.
+ *
+ * @dl_defer_running tells if the deferrable server is actually
+ * running, skipping the defer phase.
*/
unsigned int dl_throttled : 1;
unsigned int dl_yielded : 1;
unsigned int dl_non_contending : 1;
unsigned int dl_overrun : 1;
unsigned int dl_server : 1;
+ unsigned int dl_defer : 1;
+ unsigned int dl_defer_armed : 1;
+ unsigned int dl_defer_running : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
@@ -672,7 +692,7 @@ struct sched_dl_entity {
*/
struct rq *rq;
dl_server_has_tasks_f server_has_tasks;
- dl_server_pick_f server_pick;
+ dl_server_pick_f server_pick_task;
#ifdef CONFIG_RT_MUTEXES
/*
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index df3aca89d4f5..3a912ab42bb5 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -10,16 +10,16 @@
#include <linux/sched.h>
-#define MAX_DL_PRIO 0
-
-static inline int dl_prio(int prio)
+static inline bool dl_prio(int prio)
{
- if (unlikely(prio < MAX_DL_PRIO))
- return 1;
- return 0;
+ return unlikely(prio < MAX_DL_PRIO);
}
-static inline int dl_task(struct task_struct *p)
+/*
+ * Returns true if a task has a priority that belongs to DL class. PI-boosted
+ * tasks will return true. Use dl_policy() to ignore PI-boosted tasks.
+ */
+static inline bool dl_task(struct task_struct *p)
{
return dl_prio(p->prio);
}
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index ab83d85e1183..6ab43b4f72f9 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -14,6 +14,7 @@
*/
#define MAX_RT_PRIO 100
+#define MAX_DL_PRIO 0
#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index b2b9e6eb9683..4e3338103654 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -6,19 +6,40 @@
struct task_struct;
-static inline int rt_prio(int prio)
+static inline bool rt_prio(int prio)
{
- if (unlikely(prio < MAX_RT_PRIO))
- return 1;
- return 0;
+ return unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO);
}
-static inline int rt_task(struct task_struct *p)
+static inline bool rt_or_dl_prio(int prio)
+{
+ return unlikely(prio < MAX_RT_PRIO);
+}
+
+/*
+ * Returns true if a task has a priority that belongs to RT class. PI-boosted
+ * tasks will return true. Use rt_policy() to ignore PI-boosted tasks.
+ */
+static inline bool rt_task(struct task_struct *p)
{
return rt_prio(p->prio);
}
-static inline bool task_is_realtime(struct task_struct *tsk)
+/*
+ * Returns true if a task has a priority that belongs to RT or DL classes.
+ * PI-boosted tasks will return true. Use rt_or_dl_task_policy() to ignore
+ * PI-boosted tasks.
+ */
+static inline bool rt_or_dl_task(struct task_struct *p)
+{
+ return rt_or_dl_prio(p->prio);
+}
+
+/*
+ * Returns true if a task has a policy that belongs to RT or DL classes.
+ * PI-boosted tasks will return false.
+ */
+static inline bool rt_or_dl_task_policy(struct task_struct *tsk)
{
int policy = tsk->policy;
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index 90662385689b..bf6e9ae031c1 100644
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -58,9 +58,9 @@
*
* This is reflected by the following fields of the sched_attr structure:
*
- * @sched_deadline representative of the task's deadline
- * @sched_runtime representative of the task's runtime
- * @sched_period representative of the task's period
+ * @sched_deadline representative of the task's deadline in nanoseconds
+ * @sched_runtime representative of the task's runtime in nanoseconds
+ * @sched_period representative of the task's period in nanoseconds
*
* Given this task model, there are a multiplicity of scheduling algorithms
* and policies, that can be used to ensure all the tasks will make their
diff --git a/kernel/freezer.c b/kernel/freezer.c
index f57aaf96b829..44bbd7dbd2c8 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -72,7 +72,7 @@ bool __refrigerator(bool check_kthr_stop)
bool freeze;
raw_spin_lock_irq(&current->pi_lock);
- set_current_state(TASK_FROZEN);
+ WRITE_ONCE(current->__state, TASK_FROZEN);
/* unstale saved_state so that __thaw_task() will wake us up */
current->saved_state = TASK_RUNNING;
raw_spin_unlock_irq(&current->pi_lock);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index f7be976ff88a..db4ceb0f503c 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -845,8 +845,16 @@ repeat:
* event only cares about the address.
*/
trace_sched_kthread_work_execute_end(work, func);
- } else if (!freezing(current))
+ } else if (!freezing(current)) {
schedule();
+ } else {
+ /*
+ * Handle the case where the current remains
+ * TASK_INTERRUPTIBLE. try_to_freeze() expects
+ * the current to be TASK_RUNNING.
+ */
+ __set_current_state(TASK_RUNNING);
+ }
try_to_freeze();
cond_resched();
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index fba1229f1de6..ebebd0eec7f6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -347,7 +347,7 @@ static __always_inline int __waiter_prio(struct task_struct *task)
{
int prio = task->prio;
- if (!rt_prio(prio))
+ if (!rt_or_dl_prio(prio))
return DEFAULT_PRIO;
return prio;
@@ -435,7 +435,7 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
* Note that RT tasks are excluded from same priority (lateral)
* steals to prevent the introduction of an unbounded latency.
*/
- if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio))
+ if (rt_or_dl_prio(waiter->tree.prio))
return false;
return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 33cac79e3994..5ded7dff46ef 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -631,7 +631,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
* if it is an RT task or wait in the wait queue
* for too long.
*/
- if (has_handoff || (!rt_task(waiter->task) &&
+ if (has_handoff || (!rt_or_dl_task(waiter->task) &&
!time_after(jiffies, waiter->timeout)))
return false;
@@ -914,7 +914,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
if (owner_state != OWNER_WRITER) {
if (need_resched())
break;
- if (rt_task(current) &&
+ if (rt_or_dl_task(current) &&
(prev_owner_state != OWNER_WRITER))
break;
}
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index 3ad2cc4823e5..76d204b7d29c 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -237,7 +237,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
int a_prio = a->task->prio;
int b_prio = b->task->prio;
- if (rt_prio(a_prio) || rt_prio(b_prio)) {
+ if (rt_or_dl_prio(a_prio) || rt_or_dl_prio(b_prio)) {
if (a_prio > b_prio)
return true;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1d7f5941bcdc..a7af49b3a337 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -163,7 +163,10 @@ static inline int __task_prio(const struct task_struct *p)
if (p->sched_class == &stop_sched_class) /* trumps deadline */
return -2;
- if (rt_prio(p->prio)) /* includes deadline */
+ if (p->dl_server)
+ return -1; /* deadline */
+
+ if (rt_or_dl_prio(p->prio))
return p->prio; /* [-1, 99] */
if (p->sched_class == &idle_sched_class)
@@ -192,8 +195,24 @@ static inline bool prio_less(const struct task_struct *a,
if (-pb < -pa)
return false;
- if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
- return !dl_time_before(a->dl.deadline, b->dl.deadline);
+ if (pa == -1) { /* dl_prio() doesn't work because of stop_class above */
+ const struct sched_dl_entity *a_dl, *b_dl;
+
+ a_dl = &a->dl;
+ /*
+ * Since,'a' and 'b' can be CFS tasks served by DL server,
+ * __task_prio() can return -1 (for DL) even for those. In that
+ * case, get to the dl_server's DL entity.
+ */
+ if (a->dl_server)
+ a_dl = a->dl_server;
+
+ b_dl = &b->dl;
+ if (b->dl_server)
+ b_dl = b->dl_server;
+
+ return !dl_time_before(a_dl->deadline, b_dl->deadline);
+ }
if (pa == MAX_RT_PRIO + MAX_NICE) /* fair */
return cfs_prio_less(a, b, in_fi);
@@ -240,6 +259,9 @@ static inline int rb_sched_core_cmp(const void *key, const struct rb_node *node)
void sched_core_enqueue(struct rq *rq, struct task_struct *p)
{
+ if (p->se.sched_delayed)
+ return;
+
rq->core->core_task_seq++;
if (!p->core_cookie)
@@ -250,6 +272,9 @@ void sched_core_enqueue(struct rq *rq, struct task_struct *p)
void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
{
+ if (p->se.sched_delayed)
+ return;
+
rq->core->core_task_seq++;
if (sched_core_enqueued(p)) {
@@ -1269,7 +1294,7 @@ bool sched_can_stop_tick(struct rq *rq)
* dequeued by migrating while the constrained task continues to run.
* E.g. going from 2->1 without going through pick_next_task().
*/
- if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
+ if (__need_bw_check(rq, rq->curr)) {
if (cfs_task_bw_constrained(rq->curr))
return false;
}
@@ -1672,6 +1697,9 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled))
return;
+ if (p->se.sched_delayed)
+ return;
+
for_each_clamp_id(clamp_id)
uclamp_rq_inc_id(rq, p, clamp_id);
@@ -1696,6 +1724,9 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled))
return;
+ if (p->se.sched_delayed)
+ return;
+
for_each_clamp_id(clamp_id)
uclamp_rq_dec_id(rq, p, clamp_id);
}
@@ -1975,14 +2006,21 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED));
}
- uclamp_rq_inc(rq, p);
p->sched_class->enqueue_task(rq, p, flags);
+ /*
+ * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
+ * ->sched_delayed.
+ */
+ uclamp_rq_inc(rq, p);
if (sched_core_enabled(rq))
sched_core_enqueue(rq, p);
}
-void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
+/*
+ * Must only return false when DEQUEUE_SLEEP.
+ */
+inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
if (sched_core_enabled(rq))
sched_core_dequeue(rq, p, flags);
@@ -1995,8 +2033,12 @@ void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
psi_dequeue(p, flags & DEQUEUE_SLEEP);
}
+ /*
+ * Must be before ->dequeue_task() because ->dequeue_task() can 'fail'
+ * and mark the task ->sched_delayed.
+ */
uclamp_rq_dec(rq, p);
- p->sched_class->dequeue_task(rq, p, flags);
+ return p->sched_class->dequeue_task(rq, p, flags);
}
void activate_task(struct rq *rq, struct task_struct *p, int flags)
@@ -2014,12 +2056,25 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{
- WRITE_ONCE(p->on_rq, (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING);
+ SCHED_WARN_ON(flags & DEQUEUE_SLEEP);
+
+ WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
ASSERT_EXCLUSIVE_WRITER(p->on_rq);
+ /*
+ * Code explicitly relies on TASK_ON_RQ_MIGRATING begin set *before*
+ * dequeue_task() and cleared *after* enqueue_task().
+ */
+
dequeue_task(rq, p, flags);
}
+static void block_task(struct rq *rq, struct task_struct *p, int flags)
+{
+ if (dequeue_task(rq, p, DEQUEUE_SLEEP | flags))
+ __block_task(rq, p);
+}
+
/**
* task_curr - is this task currently executing on a CPU?
* @p: the task in question.
@@ -2233,6 +2288,12 @@ void migrate_disable(void)
struct task_struct *p = current;
if (p->migration_disabled) {
+#ifdef CONFIG_DEBUG_PREEMPT
+ /*
+ *Warn about overflow half-way through the range.
+ */
+ WARN_ON_ONCE((s16)p->migration_disabled < 0);
+#endif
p->migration_disabled++;
return;
}
@@ -2251,14 +2312,20 @@ void migrate_enable(void)
.flags = SCA_MIGRATE_ENABLE,
};
+#ifdef CONFIG_DEBUG_PREEMPT
+ /*
+ * Check both overflow from migrate_disable() and superfluous
+ * migrate_enable().
+ */
+ if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
+ return;
+#endif
+
if (p->migration_disabled > 1) {
p->migration_disabled--;
return;
}
- if (WARN_ON_ONCE(!p->migration_disabled))
- return;
-
/*
* Ensure stop_task runs either before or after this, and that
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
@@ -3607,8 +3674,6 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
rq->idle_stamp = 0;
}
#endif
-
- p->dl_server = NULL;
}
/*
@@ -3644,12 +3709,14 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
rq = __task_rq_lock(p, &rf);
if (task_on_rq_queued(p)) {
+ update_rq_clock(rq);
+ if (p->se.sched_delayed)
+ enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED);
if (!task_on_cpu(rq, p)) {
/*
* When on_rq && !on_cpu the task is preempted, see if
* it should preempt the task that is current now.
*/
- update_rq_clock(rq);
wakeup_preempt(rq, p, wake_flags);
}
ttwu_do_wakeup(p);
@@ -4029,11 +4096,16 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* case the whole 'p->on_rq && ttwu_runnable()' case below
* without taking any locks.
*
+ * Specifically, given current runs ttwu() we must be before
+ * schedule()'s block_task(), as such this must not observe
+ * sched_delayed.
+ *
* In particular:
* - we rely on Program-Order guarantees for all the ordering,
* - we're serialized against set_special_state() by virtue of
* it disabling IRQs (this allows not taking ->pi_lock).
*/
+ SCHED_WARN_ON(p->se.sched_delayed);
if (!ttwu_state_match(p, state, &success))
goto out;
@@ -4322,9 +4394,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.nr_migrations = 0;
p->se.vruntime = 0;
p->se.vlag = 0;
- p->se.slice = sysctl_sched_base_slice;
INIT_LIST_HEAD(&p->se.group_node);
+ /* A delayed task cannot be in clone(). */
+ SCHED_WARN_ON(p->se.sched_delayed);
+
#ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL;
#endif
@@ -4572,6 +4646,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->prio = p->normal_prio = p->static_prio;
set_load_weight(p, false);
+ p->se.custom_slice = 0;
+ p->se.slice = sysctl_sched_base_slice;
/*
* We don't need the reset flag anymore after the fork. It has
@@ -4686,7 +4762,7 @@ void wake_up_new_task(struct task_struct *p)
update_rq_clock(rq);
post_init_entity_util_avg(p);
- activate_task(rq, p, ENQUEUE_NOCLOCK);
+ activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
trace_sched_wakeup_new(p);
wakeup_preempt(rq, p, WF_FORK);
#ifdef CONFIG_SMP
@@ -5769,8 +5845,8 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
schedstat_inc(this_rq()->sched_count);
}
-static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
- struct rq_flags *rf)
+static void prev_balance(struct rq *rq, struct task_struct *prev,
+ struct rq_flags *rf)
{
#ifdef CONFIG_SMP
const struct sched_class *class;
@@ -5787,8 +5863,6 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
break;
}
#endif
-
- put_prev_task(rq, prev);
}
/*
@@ -5800,6 +5874,8 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
const struct sched_class *class;
struct task_struct *p;
+ rq->dl_server = NULL;
+
/*
* Optimization: we know that if all tasks are in the fair class we can
* call that function directly, but only if the @prev task wasn't of a
@@ -5815,35 +5891,28 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
/* Assume the next prioritized class is idle_sched_class */
if (!p) {
- put_prev_task(rq, prev);
- p = pick_next_task_idle(rq);
+ p = pick_task_idle(rq);
+ put_prev_set_next_task(rq, prev, p);
}
- /*
- * This is the fast path; it cannot be a DL server pick;
- * therefore even if @p == @prev, ->dl_server must be NULL.
- */
- if (p->dl_server)
- p->dl_server = NULL;
-
return p;
}
restart:
- put_prev_task_balance(rq, prev, rf);
-
- /*
- * We've updated @prev and no longer need the server link, clear it.
- * Must be done before ->pick_next_task() because that can (re)set
- * ->dl_server.
- */
- if (prev->dl_server)
- prev->dl_server = NULL;
+ prev_balance(rq, prev, rf);
for_each_class(class) {
- p = class->pick_next_task(rq);
- if (p)
- return p;
+ if (class->pick_next_task) {
+ p = class->pick_next_task(rq, prev);
+ if (p)
+ return p;
+ } else {
+ p = class->pick_task(rq);
+ if (p) {
+ put_prev_set_next_task(rq, prev, p);
+ return p;
+ }
+ }
}
BUG(); /* The idle class should always have a runnable task. */
@@ -5873,6 +5942,8 @@ static inline struct task_struct *pick_task(struct rq *rq)
const struct sched_class *class;
struct task_struct *p;
+ rq->dl_server = NULL;
+
for_each_class(class) {
p = class->pick_task(rq);
if (p)
@@ -5911,6 +5982,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* another cpu during offline.
*/
rq->core_pick = NULL;
+ rq->core_dl_server = NULL;
return __pick_next_task(rq, prev, rf);
}
@@ -5929,16 +6001,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
WRITE_ONCE(rq->core_sched_seq, rq->core->core_pick_seq);
next = rq->core_pick;
- if (next != prev) {
- put_prev_task(rq, prev);
- set_next_task(rq, next);
- }
-
+ rq->dl_server = rq->core_dl_server;
rq->core_pick = NULL;
- goto out;
+ rq->core_dl_server = NULL;
+ goto out_set_next;
}
- put_prev_task_balance(rq, prev, rf);
+ prev_balance(rq, prev, rf);
smt_mask = cpu_smt_mask(cpu);
need_sync = !!rq->core->core_cookie;
@@ -5979,6 +6048,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
next = pick_task(rq);
if (!next->core_cookie) {
rq->core_pick = NULL;
+ rq->core_dl_server = NULL;
/*
* For robustness, update the min_vruntime_fi for
* unconstrained picks as well.
@@ -6006,7 +6076,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
if (i != cpu && (rq_i != rq->core || !core_clock_updated))
update_rq_clock(rq_i);
- p = rq_i->core_pick = pick_task(rq_i);
+ rq_i->core_pick = p = pick_task(rq_i);
+ rq_i->core_dl_server = rq_i->dl_server;
+
if (!max || prio_less(max, p, fi_before))
max = p;
}
@@ -6030,6 +6102,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
rq_i->core_pick = p;
+ rq_i->core_dl_server = NULL;
if (p == rq_i->idle) {
if (rq_i->nr_running) {
@@ -6090,6 +6163,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
if (i == cpu) {
rq_i->core_pick = NULL;
+ rq_i->core_dl_server = NULL;
continue;
}
@@ -6098,6 +6172,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
if (rq_i->curr == rq_i->core_pick) {
rq_i->core_pick = NULL;
+ rq_i->core_dl_server = NULL;
continue;
}
@@ -6105,8 +6180,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
out_set_next:
- set_next_task(rq, next);
-out:
+ put_prev_set_next_task(rq, prev, next);
if (rq->core->core_forceidle_count && next == rq->idle)
queue_core_balance(rq);
@@ -6342,19 +6416,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* Constants for the sched_mode argument of __schedule().
*
* The mode argument allows RT enabled kernels to differentiate a