summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CREDITS3
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--MAINTAINERS1
-rw-r--r--include/linux/preempt.h41
-rw-r--r--include/linux/sched.h41
-rw-r--r--include/linux/spinlock.h14
-rw-r--r--kernel/sched/build_policy.c1
-rw-r--r--kernel/sched/clock.c4
-rw-r--r--kernel/sched/core.c1874
-rw-r--r--kernel/sched/core_sched.c2
-rw-r--r--kernel/sched/cputime.c14
-rw-r--r--kernel/sched/deadline.c8
-rw-r--r--kernel/sched/fair.c18
-rw-r--r--kernel/sched/idle.c12
-rw-r--r--kernel/sched/loadavg.c4
-rw-r--r--kernel/sched/pelt.c4
-rw-r--r--kernel/sched/psi.c60
-rw-r--r--kernel/sched/rt.c22
-rw-r--r--kernel/sched/sched.h434
-rw-r--r--kernel/sched/stats.h2
-rw-r--r--kernel/sched/syscalls.c1699
-rw-r--r--kernel/sched/topology.c12
-rw-r--r--kernel/sched/wait_bit.c4
23 files changed, 2183 insertions, 2095 deletions
diff --git a/CREDITS b/CREDITS
index 1a762cf46b49..d6cbd4c792a1 100644
--- a/CREDITS
+++ b/CREDITS
@@ -271,6 +271,9 @@ D: Driver for WaveFront soundcards (Turtle Beach Maui, Tropez, Tropez+)
D: Various bugfixes and changes to sound drivers
S: USA
+N: Daniel Bristot de Oliveira
+D: Scheduler contributions, notably: SCHED_DEADLINE
+
N: Carlos Henrique Bauer
E: chbauer@acm.org
E: bauer@atlas.unisinos.br
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 1bd225eca34c..740ca2bc2822 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4728,7 +4728,9 @@
none - Limited to cond_resched() calls
voluntary - Limited to cond_resched() and might_sleep() calls
full - Any section that isn't explicitly preempt disabled
- can be preempted anytime.
+ can be preempted anytime. Tasks will also yield
+ contended spinlocks (if the critical section isn't
+ explicitly preempt disabled beyond the lock itself).
print-fatal-signals=
[KNL] debug: print fatal signals
diff --git a/MAINTAINERS b/MAINTAINERS
index 73d6d4979bc9..8377529c9b95 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -20047,7 +20047,6 @@ R: Dietmar Eggemann <dietmar.eggemann@arm.com> (SCHED_NORMAL)
R: Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
R: Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
R: Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
-R: Daniel Bristot de Oliveira <bristot@redhat.com> (SCHED_DEADLINE)
R: Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
L: linux-kernel@vger.kernel.org
S: Maintained
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 7233e9cf1bab..ce76f1a45722 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -481,4 +481,45 @@ DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+extern bool preempt_model_none(void);
+extern bool preempt_model_voluntary(void);
+extern bool preempt_model_full(void);
+
+#else
+
+static inline bool preempt_model_none(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_NONE);
+}
+static inline bool preempt_model_voluntary(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
+}
+static inline bool preempt_model_full(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT);
+}
+
+#endif
+
+static inline bool preempt_model_rt(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_RT);
+}
+
+/*
+ * Does the preemption model allow non-cooperative preemption?
+ *
+ * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
+ * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
+ * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
+ * PREEMPT_NONE model.
+ */
+static inline bool preempt_model_preemptible(void)
+{
+ return preempt_model_full() || preempt_model_rt();
+}
+
#endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a5f4b48fca18..76214d7c819d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2064,47 +2064,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock);
__cond_resched_rwlock_write(lock); \
})
-#ifdef CONFIG_PREEMPT_DYNAMIC
-
-extern bool preempt_model_none(void);
-extern bool preempt_model_voluntary(void);
-extern bool preempt_model_full(void);
-
-#else
-
-static inline bool preempt_model_none(void)
-{
- return IS_ENABLED(CONFIG_PREEMPT_NONE);
-}
-static inline bool preempt_model_voluntary(void)
-{
- return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY);
-}
-static inline bool preempt_model_full(void)
-{
- return IS_ENABLED(CONFIG_PREEMPT);
-}
-
-#endif
-
-static inline bool preempt_model_rt(void)
-{
- return IS_ENABLED(CONFIG_PREEMPT_RT);
-}
-
-/*
- * Does the preemption model allow non-cooperative preemption?
- *
- * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with
- * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the
- * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the
- * PREEMPT_NONE model.
- */
-static inline bool preempt_model_preemptible(void)
-{
- return preempt_model_full() || preempt_model_rt();
-}
-
static __always_inline bool need_resched(void)
{
return unlikely(tif_need_resched());
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3fcd20de6ca8..63dd8cf3c3c2 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -462,11 +462,10 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
*/
static inline int spin_needbreak(spinlock_t *lock)
{
-#ifdef CONFIG_PREEMPTION
+ if (!preempt_model_preemptible())
+ return 0;
+
return spin_is_contended(lock);
-#else
- return 0;
-#endif
}
/*
@@ -479,11 +478,10 @@ static inline int spin_needbreak(spinlock_t *lock)
*/
static inline int rwlock_needbreak(rwlock_t *lock)
{
-#ifdef CONFIG_PREEMPTION
+ if (!preempt_model_preemptible())
+ return 0;
+
return rwlock_is_contended(lock);
-#else
- return 0;
-#endif
}
/*
diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c
index d9dc9ab3773f..39c315182b35 100644
--- a/kernel/sched/build_policy.c
+++ b/kernel/sched/build_policy.c
@@ -52,3 +52,4 @@
#include "cputime.c"
#include "deadline.c"
+#include "syscalls.c"
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 3c6193de9cde..a09655b48140 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -340,7 +340,7 @@ again:
this_clock = sched_clock_local(my_scd);
/*
* We must enforce atomic readout on 32-bit, otherwise the
- * update on the remote CPU can hit inbetween the readout of
+ * update on the remote CPU can hit in between the readout of
* the low 32-bit and the high 32-bit portion.
*/
remote_clock = cmpxchg64(&scd->clock, 0, 0);
@@ -444,7 +444,7 @@ notrace void sched_clock_tick_stable(void)
}
/*
- * We are going deep-idle (irqs are disabled):
+ * We are going deep-idle (IRQs are disabled):
*/
notrace void sched_clock_idle_sleep_event(void)
{
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 35a35e36024b..ae5ef3013a55 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2,9 +2,10 @@
/*
* kernel/sched/core.c
*
- * Core kernel scheduler code and related syscalls
+ * Core kernel CPU scheduler code
*
* Copyright (C) 1991-2002 Linus Torvalds
+ * Copyright (C) 1998-2024 Ingo Molnar, Red Hat
*/
#include <linux/highmem.h>
#include <linux/hrtimer_api.h>
@@ -706,14 +707,14 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
/*
* Since irq_time is only updated on {soft,}irq_exit, we might run into
* this case when a previous update_rq_clock() happened inside a
- * {soft,}irq region.
+ * {soft,}IRQ region.
*
* When this happens, we stop ->clock_task and only update the
* prev_irq_time stamp to account for the part that fit, so that a next
* update will consume the rest. This ensures ->clock_task is
* monotonic.
*
- * It does however cause some slight miss-attribution of {soft,}irq
+ * It does however cause some slight miss-attribution of {soft,}IRQ
* time, a more accurate solution would be to update the irq_time using
* the current rq->clock timestamp, except that would require using
* atomic ops.
@@ -825,7 +826,7 @@ static void __hrtick_start(void *arg)
/*
* Called to set the hrtick timer state.
*
- * called with rq->lock held and irqs disabled
+ * called with rq->lock held and IRQs disabled
*/
void hrtick_start(struct rq *rq, u64 delay)
{
@@ -849,7 +850,7 @@ void hrtick_start(struct rq *rq, u64 delay)
/*
* Called to set the hrtick timer state.
*
- * called with rq->lock held and irqs disabled
+ * called with rq->lock held and IRQs disabled
*/
void hrtick_start(struct rq *rq, u64 delay)
{
@@ -883,7 +884,7 @@ static inline void hrtick_rq_init(struct rq *rq)
#endif /* CONFIG_SCHED_HRTICK */
/*
- * cmpxchg based fetch_or, macro so it works for different integer types
+ * try_cmpxchg based fetch_or() macro so it works for different integer types:
*/
#define fetch_or(ptr, mask) \
({ \
@@ -1080,7 +1081,7 @@ void resched_cpu(int cpu)
*
* We don't do similar optimization for completely idle system, as
* selecting an idle CPU will add more delays to the timers than intended
- * (as that CPU's timer base may not be uptodate wrt jiffies etc).
+ * (as that CPU's timer base may not be up to date wrt jiffies etc).
*/
int get_nohz_timer_target(void)
{
@@ -1140,7 +1141,7 @@ static void wake_up_idle_cpu(int cpu)
* nohz functions that would need to follow TIF_NR_POLLING
* clearing:
*
- * - On most archs, a simple fetch_or on ti::flags with a
+ * - On most architectures, a simple fetch_or on ti::flags with a
* "0" value would be enough to know if an IPI needs to be sent.
*
* - x86 needs to perform a last need_resched() check between
@@ -1323,30 +1324,27 @@ int tg_nop(struct task_group *tg, void *data)
}
#endif
-static void set_load_weight(struct task_struct *p, bool update_load)
+void set_load_weight(struct task_struct *p, bool update_load)
{
int prio = p->static_prio - MAX_RT_PRIO;
- struct load_weight *load = &p->se.load;
+ struct load_weight lw;
- /*
- * SCHED_IDLE tasks get minimal weight:
- */
if (task_has_idle_policy(p)) {
- load->weight = scale_load(WEIGHT_IDLEPRIO);
- load->inv_weight = WMULT_IDLEPRIO;
- return;
+ lw.weight = scale_load(WEIGHT_IDLEPRIO);
+ lw.inv_weight = WMULT_IDLEPRIO;
+ } else {
+ lw.weight = scale_load(sched_prio_to_weight[prio]);
+ lw.inv_weight = sched_prio_to_wmult[prio];
}
/*
* SCHED_OTHER tasks have to update their load when changing their
* weight
*/
- if (update_load && p->sched_class == &fair_sched_class) {
- reweight_task(p, prio);
- } else {
- load->weight = scale_load(sched_prio_to_weight[prio]);
- load->inv_weight = sched_prio_to_wmult[prio];
- }
+ if (update_load && p->sched_class == &fair_sched_class)
+ reweight_task(p, &lw);
+ else
+ p->se.load = lw;
}
#ifdef CONFIG_UCLAMP_TASK
@@ -1383,7 +1381,7 @@ static unsigned int __maybe_unused sysctl_sched_uclamp_util_max = SCHED_CAPACITY
* This knob will not override the system default sched_util_clamp_min defined
* above.
*/
-static unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE;
+unsigned int sysctl_sched_uclamp_util_min_rt_default = SCHED_CAPACITY_SCALE;
/* All clamps are required to be less or equal than these values */
static struct uclamp_se uclamp_default[UCLAMP_CNT];
@@ -1408,32 +1406,6 @@ static struct uclamp_se uclamp_default[UCLAMP_CNT];
*/
DEFINE_STATIC_KEY_FALSE(sched_uclamp_used);
-/* Integer rounded range for each bucket */
-#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
-
-#define for_each_clamp_id(clamp_id) \
- for ((clamp_id) = 0; (clamp_id) < UCLAMP_CNT; (clamp_id)++)
-
-static inline unsigned int uclamp_bucket_id(unsigned int clamp_value)
-{
- return min_t(unsigned int, clamp_value / UCLAMP_BUCKET_DELTA, UCLAMP_BUCKETS - 1);
-}
-
-static inline unsigned int uclamp_none(enum uclamp_id clamp_id)
-{
- if (clamp_id == UCLAMP_MIN)
- return 0;
- return SCHED_CAPACITY_SCALE;
-}
-
-static inline void uclamp_se_set(struct uclamp_se *uc_se,
- unsigned int value, bool user_defined)
-{
- uc_se->value = value;
- uc_se->bucket_id = uclamp_bucket_id(value);
- uc_se->user_defined = user_defined;
-}
-
static inline unsigned int
uclamp_idle_value(struct rq *rq, enum uclamp_id clamp_id,
unsigned int clamp_value)
@@ -1675,7 +1647,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
rq_clamp = uclamp_rq_get(rq, clamp_id);
/*
* Defensive programming: this should never happen. If it happens,
- * e.g. due to future modification, warn and fixup the expected value.
+ * e.g. due to future modification, warn and fix up the expected value.
*/
SCHED_WARN_ON(bucket->value > rq_clamp);
if (bucket->value >= rq_clamp) {
@@ -1897,107 +1869,6 @@ undo:
}
#endif
-static int uclamp_validate(struct task_struct *p,
- const struct sched_attr *attr)
-{
- int util_min = p->uclamp_req[UCLAMP_MIN].value;
- int util_max = p->uclamp_req[UCLAMP_MAX].value;
-
- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
- util_min = attr->sched_util_min;
-
- if (util_min + 1 > SCHED_CAPACITY_SCALE + 1)
- return -EINVAL;
- }
-
- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
- util_max = attr->sched_util_max;
-
- if (util_max + 1 > SCHED_CAPACITY_SCALE + 1)
- return -EINVAL;
- }
-
- if (util_min != -1 && util_max != -1 && util_min > util_max)
- return -EINVAL;
-
- /*
- * We have valid uclamp attributes; make sure uclamp is enabled.
- *
- * We need to do that here, because enabling static branches is a
- * blocking operation which obviously cannot be done while holding
- * scheduler locks.
- */
- static_branch_enable(&sched_uclamp_used);
-
- return 0;
-}
-
-static bool uclamp_reset(const struct sched_attr *attr,
- enum uclamp_id clamp_id,
- struct uclamp_se *uc_se)
-{
- /* Reset on sched class change for a non user-defined clamp value. */
- if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) &&
- !uc_se->user_defined)
- return true;
-
- /* Reset on sched_util_{min,max} == -1. */
- if (clamp_id == UCLAMP_MIN &&
- attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN &&
- attr->sched_util_min == -1) {
- return true;
- }
-
- if (clamp_id == UCLAMP_MAX &&
- attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX &&
- attr->sched_util_max == -1) {
- return true;
- }
-
- return false;
-}
-
-static void __setscheduler_uclamp(struct task_struct *p,
- const struct sched_attr *attr)
-{
- enum uclamp_id clamp_id;
-
- for_each_clamp_id(clamp_id) {
- struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
- unsigned int value;
-
- if (!uclamp_reset(attr, clamp_id, uc_se))
- continue;
-
- /*
- * RT by default have a 100% boost value that could be modified
- * at runtime.
- */
- if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN))
- value = sysctl_sched_uclamp_util_min_rt_default;
- else
- value = uclamp_none(clamp_id);
-
- uclamp_se_set(uc_se, value, false);
-
- }
-
- if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
- return;
-
- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN &&
- attr->sched_util_min != -1) {
- uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
- attr->sched_util_min, true);
- }
-
- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX &&
- attr->sched_util_max != -1) {
- uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
- attr->sched_util_max, true);
- }
-}
-
static void uclamp_fork(struct task_struct *p)
{
enum uclamp_id clamp_id;
@@ -2065,13 +1936,6 @@ static void __init init_uclamp(void)
#else /* !CONFIG_UCLAMP_TASK */
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
-static inline int uclamp_validate(struct task_struct *p,
- const struct sched_attr *attr)
-{
- return -EOPNOTSUPP;
-}
-static void __setscheduler_uclamp(struct task_struct *p,
- const struct sched_attr *attr) { }
static inline void uclamp_fork(struct task_struct *p) { }
static inline void uclamp_post_fork(struct task_struct *p) { }
static inline void init_uclamp(void) { }
@@ -2101,7 +1965,7 @@ unsigned long get_wchan(struct task_struct *p)
return ip;
}
-static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
+void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
if (!(flags & ENQUEUE_NOCLOCK))
update_rq_clock(rq);
@@ -2118,7 +1982,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
sched_core_enqueue(rq, p);
}
-static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
+void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
if (sched_core_enabled(rq))
sched_core_dequeue(rq, p, flags);
@@ -2156,52 +2020,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
dequeue_task(rq, p, flags);
}
-static inline int __normal_prio(int policy, int rt_prio, int nice)
-{
- int prio;
-
- if (dl_policy(policy))
- prio = MAX_DL_PRIO - 1;
- else if (rt_policy(policy))
- prio = MAX_RT_PRIO - 1 - rt_prio;
- else
- prio = NICE_TO_PRIO(nice);
-
- return prio;
-}
-
-/*
- * Calculate the expected normal priority: i.e. priority
- * without taking RT-inheritance into account. Might be
- * boosted by interactivity modifiers. Changes upon fork,
- * setprio syscalls, and whenever the interactivity
- * estimator recalculates.
- */
-static inline int normal_prio(struct task_struct *p)
-{
- return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio));
-}
-
-/*
- * Calculate the current priority, i.e. the priority
- * taken into account by the scheduler. This value might
- * be boosted by RT tasks, or might be boosted by
- * interactivity modifiers. Will be RT if the task got
- * RT-boosted. If not then it returns p->normal_prio.
- */
-static int effective_prio(struct task_struct *p)
-{
- p->normal_prio = normal_prio(p);
- /*
- * If we are RT tasks or we were boosted to RT priority,
- * keep the priority unchanged. Otherwise, update priority
- * to the normal priority:
- */
- if (!rt_prio(p->prio))
- return p->normal_prio;
- return p->prio;
-}
-
/**
* task_curr - is this task currently executing on a CPU?
* @p: the task in question.
@@ -2220,9 +2038,9 @@ inline int task_curr(const struct task_struct *p)
* this means any call to check_class_changed() must be followed by a call to
* balance_callback().
*/
-static inline void check_class_changed(struct rq *rq, struct task_struct *p,
- const struct sched_class *prev_class,
- int oldprio)
+void check_class_changed(struct rq *rq, struct task_struct *p,
+ const struct sched_class *prev_class,
+ int oldprio)
{
if (prev_class != p->sched_class) {
if (prev_class->switched_from)
@@ -2391,9 +2209,6 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
static void
__do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx);
-static int __set_cpus_allowed_ptr(struct task_struct *p,
- struct affinity_context *ctx);
-
static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
{
struct affinity_context ac = {
@@ -2408,7 +2223,7 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
return;
/*
- * Violates locking rules! see comment in __do_set_cpus_allowed().
+ * Violates locking rules! See comment in __do_set_cpus_allowed().
*/
__do_set_cpus_allowed(p, &ac);
}
@@ -2575,7 +2390,7 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
}
/*
- * migration_cpu_stop - this will be executed by a highprio stopper thread
+ * migration_cpu_stop - this will be executed by a high-prio stopper thread
* and performs thread migration by bumping thread off CPU then
* 'pushing' onto another runqueue.
*/
@@ -2820,16 +2635,6 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu);
}
-static cpumask_t *alloc_user_cpus_ptr(int node)
-{
- /*
- * See do_set_cpus_allowed() above for the rcu_head usage.
- */
- int size = max_t(int, cpumask_size(), sizeof(struct rcu_head));
-
- return kmalloc_node(size, GFP_KERNEL, node);
-}
-
int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
int node)
{
@@ -3198,8 +3003,7 @@ out:
* task must not exit() & deallocate itself prematurely. The
* call is not atomic; no spinlocks may be held.
*/
-static int __set_cpus_allowed_ptr(struct task_struct *p,
- struct affinity_context *ctx)
+int __set_cpus_allowed_ptr(struct task_struct *p, struct affinity_context *ctx)
{
struct rq_flags rf;
struct rq *rq;
@@ -3318,9 +3122,6 @@ out_free_mask:
free_cpumask_var(new_mask);
}
-static int
-__sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
-
/*
* Restore the affinity of a task @p which was previously restricted by a
* call to force_compatible_cpus_allowed_ptr().
@@ -3700,12 +3501,6 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
#else /* CONFIG_SMP */
-static inline int __set_cpus_allowed_ptr(struct task_struct *p,
- struct affinity_context *ctx)
-{
- return set_cpus_allowed_ptr(p, ctx->new_mask);
-}
-
static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { }
static inline bool rq_has_pinned_tasks(struct rq *rq)
@@ -3713,11 +3508,6 @@ static inline bool rq_has_pinned_tasks(struct rq *rq)
return false;
}
-static inline cpumask_t *alloc_user_cpus_ptr(int node)
-{
- return NULL;
-}
-
#endif /* !CONFIG_SMP */
static void
@@ -3900,8 +3690,8 @@ void sched_ttwu_pending(void *arg)
* it is possible for select_idle_siblings() to stack a number
* of tasks on this CPU during that window.
*
- * It is ok to clear ttwu_pending when another task pending.
- * We will receive IPI after local irq enabled and then enqueue it.
+ * It is OK to clear ttwu_pending when another task pending.
+ * We will receive IPI after local IRQ enabled and then enqueue it.
* Since now nr_running > 0, idle_cpu() will always get correct result.
*/
WRITE_ONCE(rq->ttwu_pending, 0);
@@ -5094,7 +4884,7 @@ __splice_balance_callbacks(struct rq *rq, bool split)
return head;
}
-static inline struct balance_callback *splice_balance_callbacks(struct rq *rq)
+struct balance_callback *splice_balance_callbacks(struct rq *rq)
{
return __splice_balance_callbacks(rq, true);
}
@@ -5104,7 +4894,7 @@ static void __balance_callbacks(struct rq *rq)
do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
}
-static inline void balance_callbacks(struct rq *rq, struct balance_callback *head)
+void balance_callbacks(struct rq *rq, struct balance_callback *head)
{
unsigned long flags;
@@ -5121,15 +4911,6 @@ static inline void __balance_callbacks(struct rq *rq)
{
}
-static inline struct balance_callback *splice_balance_callbacks(struct rq *rq)
-{
- return NULL;
-}
-
-static inline void balance_callbacks(struct rq *rq, struct balance_callback *head)
-{
-}
-
#endif
static inline void
@@ -5232,7 +5013,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
*
* The context switch have flipped the stack from under us and restored the
* local variables which were saved when this task called schedule() in the
- * past. prev == current is still correct but we need to recalculate this_rq
+ * past. 'prev == current' is still correct but we need to recalculate this_rq
* because prev may have moved to another CPU.
*/
static struct rq *finish_task_switch(struct task_struct *prev)
@@ -5555,9 +5336,9 @@ EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
static inline void prefetch_curr_exec_start(struct task_struct *p)
{
#ifdef CONFIG_FAIR_GROUP_SCHED
- struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+ struct sched_entity *curr = p->se.cfs_rq->curr;
#else
- struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+ struct sched_entity *curr = task_rq(p)->cfs.curr;
#endif
prefetch(curr);
prefetch(&curr->exec_start);
@@ -5578,7 +5359,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
/*
* 64-bit doesn't need locks to atomically read a 64-bit value.
* So we have a optimization chance when the task's delta_exec is 0.
- * Reading ->on_cpu is racy, but this is ok.
+ * Reading ->on_cpu is racy, but this is OK.
*
* If we race with it leaving CPU, we'll take a lock. So we're correct.
* If we race with it entering CPU, unaccounted time is 0. This is
@@ -6856,7 +6637,7 @@ void __sched schedule_idle(void)
{
/*
* As this skips calling sched_submit_work(), which the idle task does
- * regardless because that function is a nop when the task is in a
+ * regardless because that function is a NOP when the task is in a
* TASK_RUNNING state, make sure this isn't used someplace that the
* current task can be in any other state. Note, idle is always in the
* TASK_RUNNING state.
@@ -7051,9 +6832,9 @@ EXPORT_SYMBOL(dynamic_preempt_schedule_notrace);
/*
* This is the entry point to schedule() from kernel preemption
- * off of irq context.
- * Note, that this is called and return with irqs disabled. This will
- * protect us against recursive calling from irq.
+ * off of IRQ context.
+ * Note, that this is called and return with IRQs disabled. This will
+ * protect us against recursive calling from IRQ contexts.
*/
asmlinkage __visible void __sched preempt_schedule_irq(void)
{
@@ -7083,7 +6864,7 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
}
EXPORT_SYMBOL(default_wake_function);
-static void __setscheduler_prio(struct task_struct *p, int prio)
+void __setscheduler_prio(struct task_struct *p, int prio)
{
if (dl_prio(prio))
p->sched_class = &dl_sched_class;
@@ -7123,21 +6904,6 @@ void rt_mutex_post_schedule(void)
lockdep_assert(fetch_and_set(current->sched_rt_mutex, 0));
}
-static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-{
- if (pi_task)
- prio = min(prio, pi_task->prio);
-
- return prio;
-}
-
-static inline int rt_effective_prio(struct task_struct *p, int prio)
-{
- struct task_struct *pi_task = rt_mutex_get_top_task(p);
-
- return __rt_effective_prio(pi_task, prio);
-}
-
/*
* rt_mutex_setprio - set the current priority of a task
* @p: task to boost
@@ -7187,7 +6953,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
goto out_unlock;
/*
- * Idle task boosting is a nono in general. There is one
+ * Idle task boosting is a no-no in general. There is one
* exception, when PREEMPT_RT and NOHZ is active:
*
* The idle task calls get_next_timer_interrupt() and holds
@@ -7266,1325 +7032,8 @@ out_unlock:
preempt_enable();
}
-#else
-static inline int rt_effective_prio(struct task_struct *p, int prio)
-{
- return prio;
-}
-#endif
-
-void set_user_nice(struct task_struct *p, long nice)
-{
- bool queued, running;
- struct rq *rq;
- int old_prio;
-
- if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
- return;
- /*
- * We have to be careful, if called from sys_setpriority(),
- * the task might be in the middle of scheduling on another CPU.
- */
- CLASS(task_rq_lock, rq_guard)(p);
- rq = rq_guard.rq;
-
- update_rq_clock(rq);
-
- /*
- * The RT priorities are set via sched_setscheduler(), but we still
- * allow the 'normal' nice value to be set - but as expected
- * it won't have any effect on scheduling until the task is
- * SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
- */
- if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
- p->static_prio = NICE_TO_PRIO(nice);
- return;
- }
-
- queued = task_on_rq_queued(p);
- running = task_current(rq, p);
- if (queued)
- dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
- if (running)
- put_prev_task(rq, p);
-
- p->static_prio = NICE_TO_PRIO(nice);
- set_load_weight(p, true);
- old_prio = p->prio;
- p->prio = effective_prio(p);
-
- if (queued)
- enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
- if (running)
- set_next_task(rq, p);
-
- /*
- * If the task increased its priority or is running and
- * lowered its priority, then reschedule its CPU:
- */
- p->sched_class->prio_changed(rq, p, old_prio);
-}
-EXPORT_SYMBOL(set_user_nice);
-
-/*
- * is_nice_reduction - check if nice value is an actual reduction
- *
- * Similar to can_nice() but does not perform a capability check.
- *
- * @p: task
- * @nice: nice value
- */
-static bool is_nice_reduction(const struct task_struct *p, const int nice)
-{
- /* Convert nice value [19,-20] to rlimit style value [1,40]: */
- int nice_rlim = nice_to_rlimit(nice);
-
- return (nice_rlim <= task_rlimit(p, RLIMIT_NICE));
-}
-
-/*
- * can_nice - check if a task can reduce its nice value
- * @p: task
- * @nice: nice value
- */
-int can_nice(const struct task_struct *p, const int nice)
-{
- return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE);
-}
-
-#ifdef __ARCH_WANT_SYS_NICE
-
-/*
- * sys_nice - change the priority of the current process.
- * @increment: priority increment
- *
- * sys_setpriority is a more generic, but much slower function that
- * does similar things.
- */
-SYSCALL_DEFINE1(nice, int, increment)
-{
- long nice, retval;
-
- /*
- * Setpriority might change our priority at the same moment.
- * We don't