summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/mips/kvm/mips.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c6
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/nmi.h10
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/sched/sysctl.h1
-rw-r--r--include/linux/smpboot.h15
-rw-r--r--include/linux/swait.h36
-rw-r--r--kernel/cpu.c5
-rw-r--r--kernel/kthread.c6
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/rcu/srcutiny.c4
-rw-r--r--kernel/rcu/tree.c8
-rw-r--r--kernel/rcu/tree_exp.h4
-rw-r--r--kernel/rcu/tree_plugin.h12
-rw-r--r--kernel/sched/Makefile2
-rw-r--r--kernel/sched/core.c72
-rw-r--r--kernel/sched/cpufreq_schedutil.c103
-rw-r--r--kernel/sched/deadline.c8
-rw-r--r--kernel/sched/debug.c35
-rw-r--r--kernel/sched/fair.c663
-rw-r--r--kernel/sched/pelt.c399
-rw-r--r--kernel/sched/pelt.h72
-rw-r--r--kernel/sched/rt.c15
-rw-r--r--kernel/sched/sched.h87
-rw-r--r--kernel/sched/swait.c32
-rw-r--r--kernel/smpboot.c54
-rw-r--r--kernel/stop_machine.c41
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/watchdog.c147
-rw-r--r--kernel/watchdog_hld.c4
-rw-r--r--virt/kvm/arm/arm.c4
-rw-r--r--virt/kvm/arm/psci.c2
-rw-r--r--virt/kvm/async_pf.c2
-rw-r--r--virt/kvm/kvm_main.c4
38 files changed, 1009 insertions, 870 deletions
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 7cd76f93a438..f7ea8e21656b 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -515,7 +515,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
dvcpu->arch.wait = 0;
if (swq_has_sleeper(&dvcpu->wq))
- swake_up(&dvcpu->wq);
+ swake_up_one(&dvcpu->wq);
return 0;
}
@@ -1204,7 +1204,7 @@ static void kvm_mips_comparecount_func(unsigned long data)
vcpu->arch.wait = 0;
if (swq_has_sleeper(&vcpu->wq))
- swake_up(&vcpu->wq);
+ swake_up_one(&vcpu->wq);
}
/* low level hrtimer wake routine */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de686b340f4a..ee4a8854985e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -216,7 +216,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
wqp = kvm_arch_vcpu_wq(vcpu);
if (swq_has_sleeper(wqp)) {
- swake_up(wqp);
+ swake_up_one(wqp);
++vcpu->stat.halt_wakeup;
}
@@ -3188,7 +3188,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
}
}
- prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
if (kvmppc_vcore_check_block(vc)) {
finish_swait(&vc->wq, &wait);
@@ -3311,7 +3311,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- swake_up(&vc->wq);
+ swake_up_one(&vc->wq);
}
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index daa09f89ca2d..fcb55b02990e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1145,7 +1145,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
* yield-candidate.
*/
vcpu->preempted = true;
- swake_up(&vcpu->wq);
+ swake_up_one(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
/*
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5b2300b818af..a37bda38d205 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -154,7 +154,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
for (;;) {
if (!n.halted)
- prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
if (hlist_unhashed(&n.link))
break;
@@ -188,7 +188,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
if (n->halted)
smp_send_reschedule(n->cpu);
else if (swq_has_sleeper(&n->wq))
- swake_up(&n->wq);
+ swake_up_one(&n->wq);
}
static void apf_task_wake_all(void)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b5cd8465d44f..d536d457517b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1379,7 +1379,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
* using swait_active() is safe.
*/
if (swait_active(q))
- swake_up(q);
+ swake_up_one(q);
if (apic_lvtt_tscdeadline(apic))
ktimer->expired_tscdeadline = ktimer->tscdeadline;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8796ba387152..4cf06a64bc02 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -164,6 +164,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+ CPUHP_AP_WATCHDOG_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
CPUHP_AP_ONLINE_DYN,
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b8d868d23e79..08f9247e9827 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -45,12 +45,18 @@ extern void touch_softlockup_watchdog(void);
extern void touch_softlockup_watchdog_sync(void);
extern void touch_all_softlockup_watchdogs(void);
extern unsigned int softlockup_panic;
-#else
+
+extern int lockup_detector_online_cpu(unsigned int cpu);
+extern int lockup_detector_offline_cpu(unsigned int cpu);
+#else /* CONFIG_SOFTLOCKUP_DETECTOR */
static inline void touch_softlockup_watchdog_sched(void) { }
static inline void touch_softlockup_watchdog(void) { }
static inline void touch_softlockup_watchdog_sync(void) { }
static inline void touch_all_softlockup_watchdogs(void) { }
-#endif
+
+#define lockup_detector_online_cpu NULL
+#define lockup_detector_offline_cpu NULL
+#endif /* CONFIG_SOFTLOCKUP_DETECTOR */
#ifdef CONFIG_DETECT_HUNG_TASK
void reset_hung_task_detector(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43731fe51c97..e0f4f56c9310 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1017,7 +1017,6 @@ struct task_struct {
u64 last_sum_exec_runtime;
struct callback_head numa_work;
- struct list_head numa_entry;
struct numa_group *numa_group;
/*
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 1c1a1512ec55..913488d828cb 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -40,7 +40,6 @@ extern unsigned int sysctl_numa_balancing_scan_size;
#ifdef CONFIG_SCHED_DEBUG
extern __read_mostly unsigned int sysctl_sched_migration_cost;
extern __read_mostly unsigned int sysctl_sched_nr_migrate;
-extern __read_mostly unsigned int sysctl_sched_time_avg;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index c174844cf663..d0884b525001 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -25,8 +25,6 @@ struct smpboot_thread_data;
* parked (cpu offline)
* @unpark: Optional unpark function, called when the thread is
* unparked (cpu online)
- * @cpumask: Internal state. To update which threads are unparked,
- * call smpboot_update_cpumask_percpu_thread().
* @selfparking: Thread is not parked by the park function.
* @thread_comm: The base name of the thread
*/
@@ -40,23 +38,12 @@ struct smp_hotplug_thread {
void (*cleanup)(unsigned int cpu, bool online);
void (*park)(unsigned int cpu);
void (*unpark)(unsigned int cpu);
- cpumask_var_t cpumask;
bool selfparking;
const char *thread_comm;
};
-int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
- const struct cpumask *cpumask);
-
-static inline int
-smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
-{
- return smpboot_register_percpu_thread_cpumask(plug_thread,
- cpu_possible_mask);
-}
+int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
-void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
- const struct cpumask *);
#endif
diff --git a/include/linux/swait.h b/include/linux/swait.h
index bf8cb0dee23c..73e06e9986d4 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -16,7 +16,7 @@
* wait-queues, but the semantics are actually completely different, and
* every single user we have ever had has been buggy (or pointless).
*
- * A "swake_up()" only wakes up _one_ waiter, which is not at all what
+ * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what
* "wake_up()" does, and has led to problems. In other cases, it has
* been fine, because there's only ever one waiter (kvm), but in that
* case gthe whole "simple" wait-queue is just pointless to begin with,
@@ -38,8 +38,8 @@
* all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
* sleeper state.
*
- * - the exclusive mode; because this requires preserving the list order
- * and this is hard.
+ * - the !exclusive mode; because that leads to O(n) wakeups, everything is
+ * exclusive.
*
* - custom wake callback functions; because you cannot give any guarantees
* about random code. This also allows swait to be used in RT, such that
@@ -115,7 +115,7 @@ extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name
* CPU0 - waker CPU1 - waiter
*
* for (;;) {
- * @cond = true; prepare_to_swait(&wq_head, &wait, state);
+ * @cond = true; prepare_to_swait_exclusive(&wq_head, &wait, state);
* smp_mb(); // smp_mb() from set_current_state()
* if (swait_active(wq_head)) if (@cond)
* wake_up(wq_head); break;
@@ -157,20 +157,20 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
return swait_active(wq);
}
-extern void swake_up(struct swait_queue_head *q);
+extern void swake_up_one(struct swait_queue_head *q);
extern void swake_up_all(struct swait_queue_head *q);
extern void swake_up_locked(struct swait_queue_head *q);
-extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
+extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
-/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+/* as per ___wait_event() but for swait, therefore "exclusive == 1" */
#define ___swait_event(wq, condition, state, ret, cmd) \
({ \
+ __label__ __out; \
struct swait_queue __wait; \
long __ret = ret; \
\
@@ -183,20 +183,20 @@ extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
\
if (___wait_is_interruptible(state) && __int) { \
__ret = __int; \
- break; \
+ goto __out; \
} \
\
cmd; \
} \
finish_swait(&wq, &__wait); \
- __ret; \
+__out: __ret; \
})
#define __swait_event(wq, condition) \
(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \
schedule())
-#define swait_event(wq, condition) \
+#define swait_event_exclusive(wq, condition) \
do { \
if (condition) \
break; \
@@ -208,7 +208,7 @@ do { \
TASK_UNINTERRUPTIBLE, timeout, \
__ret = schedule_timeout(__ret))
-#define swait_event_timeout(wq, condition, timeout) \
+#define swait_event_timeout_exclusive(wq, condition, timeout) \
({ \
long __ret = timeout; \
if (!___wait_cond_timeout(condition)) \
@@ -220,7 +220,7 @@ do { \
___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \
schedule())
-#define swait_event_interruptible(wq, condition) \
+#define swait_event_interruptible_exclusive(wq, condition) \
({ \
int __ret = 0; \
if (!(condition)) \
@@ -233,7 +233,7 @@ do { \
TASK_INTERRUPTIBLE, timeout, \
__ret = schedule_timeout(__ret))
-#define swait_event_interruptible_timeout(wq, condition, timeout) \
+#define swait_event_interruptible_timeout_exclusive(wq, condition, timeout)\
({ \
long __ret = timeout; \
if (!___wait_cond_timeout(condition)) \
@@ -246,7 +246,7 @@ do { \
(void)___swait_event(wq, condition, TASK_IDLE, 0, schedule())
/**
- * swait_event_idle - wait without system load contribution
+ * swait_event_idle_exclusive - wait without system load contribution
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
*
@@ -257,7 +257,7 @@ do { \
* condition and doesn't want to contribute to system load. Signals are
* ignored.
*/
-#define swait_event_idle(wq, condition) \
+#define swait_event_idle_exclusive(wq, condition) \
do { \
if (condition) \
break; \
@@ -270,7 +270,7 @@ do { \
__ret = schedule_timeout(__ret))
/**
- * swait_event_idle_timeout - wait up to timeout without load contribution
+ * swait_event_idle_timeout_exclusive - wait up to timeout without load contribution
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout at which we'll give up in jiffies
@@ -288,7 +288,7 @@ do { \
* or the remaining jiffies (at least 1) if the @condition evaluated
* to %true before the @timeout elapsed.
*/
-#define swait_event_idle_timeout(wq, condition, timeout) \
+#define swait_event_idle_timeout_exclusive(wq, condition, timeout) \
({ \
long __ret = timeout; \
if (!___wait_cond_timeout(condition)) \
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2f8f338e77cf..15be70aae8ac 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1344,6 +1344,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = perf_event_init_cpu,
.teardown.single = perf_event_exit_cpu,
},
+ [CPUHP_AP_WATCHDOG_ONLINE] = {
+ .name = "lockup_detector:online",
+ .startup.single = lockup_detector_online_cpu,
+ .teardown.single = lockup_detector_offline_cpu,
+ },
[CPUHP_AP_WORKQUEUE_ONLINE] = {
.name = "workqueue:online",
.startup.single = workqueue_online_cpu,
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 486dedbd9af5..087d18d771b5 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -190,7 +190,7 @@ static void __kthread_parkme(struct kthread *self)
if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
break;
- complete_all(&self->parked);
+ complete(&self->parked);
schedule();
}
__set_current_state(TASK_RUNNING);
@@ -471,7 +471,6 @@ void kthread_unpark(struct task_struct *k)
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
__kthread_bind(k, kthread->cpu, TASK_PARKED);
- reinit_completion(&kthread->parked);
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
/*
* __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
@@ -499,6 +498,9 @@ int kthread_park(struct task_struct *k)
if (WARN_ON(k->flags & PF_EXITING))
return -ENOSYS;
+ if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
+ return -EBUSY;
+
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
if (k != current) {
wake_up_process(k);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 87331565e505..70178f6ffdc4 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -92,7 +92,7 @@ static void s2idle_enter(void)
/* Push all the CPUs into the idle loop. */
wake_up_all_idle_cpus();
/* Make the current CPU wait so it can enter the idle loop too. */
- swait_event(s2idle_wait_head,
+ swait_event_exclusive(s2idle_wait_head,
s2idle_state == S2IDLE_STATE_WAKE);
cpuidle_pause();
@@ -160,7 +160,7 @@ void s2idle_wake(void)
raw_spin_lock_irqsave(&s2idle_lock, flags);
if (s2idle_state > S2IDLE_STATE_NONE) {
s2idle_state = S2IDLE_STATE_WAKE;
- swake_up(&s2idle_wait_head);
+ swake_up_one(&s2idle_wait_head);
}
raw_spin_unlock_irqrestore(&s2idle_lock, flags);
}
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 622792abe41a..04fc2ed71af8 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -110,7 +110,7 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx)
WRITE_ONCE(sp->srcu_lock_nesting[idx], newval);
if (!newval && READ_ONCE(sp->srcu_gp_waiting))
- swake_up(&sp->srcu_wq);
+ swake_up_one(&sp->srcu_wq);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -140,7 +140,7 @@ void srcu_drive_gp(struct work_struct *wp)
idx = sp->srcu_idx;
WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */
- swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
+ swait_event_exclusive(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
/* Invoke the callbacks we removed above. */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 6930934e8b9f..0b760c1369f7 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1701,7 +1701,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
!READ_ONCE(rsp->gp_flags) ||
!rsp->gp_kthread)
return;
- swake_up(&rsp->gp_wq);
+ swake_up_one(&rsp->gp_wq);
}
/*
@@ -2015,7 +2015,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
}
/*
- * Helper function for swait_event_idle() wakeup at force-quiescent-state
+ * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state
* time.
*/
static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
@@ -2163,7 +2163,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
READ_ONCE(rsp->gp_seq),
TPS("reqwait"));
rsp->gp_state = RCU_GP_WAIT_GPS;
- swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
+ swait_event_idle_exclusive(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
RCU_GP_FLAG_INIT);
rsp->gp_state = RCU_GP_DONE_GPS;
/* Locking provides needed memory barrier. */
@@ -2191,7 +2191,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
READ_ONCE(rsp->gp_seq),
TPS("fqswait"));
rsp->gp_state = RCU_GP_WAIT_FQS;
- ret = swait_event_idle_timeout(rsp->gp_wq,
+ ret = swait_event_idle_timeout_exclusive(rsp->gp_wq,
rcu_gp_fqs_check_wake(rsp, &gf), j);
rsp->gp_state = RCU_GP_DOING_FQS;
/* Locking provides needed memory barriers. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index b3df3b770afb..0b2c2ad69629 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -212,7 +212,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (wake) {
smp_mb(); /* EGP done before wake_up(). */
- swake_up(&rsp->expedited_wq);
+ swake_up_one(&rsp->expedited_wq);
}
break;
}
@@ -526,7 +526,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
jiffies_start = jiffies;
for (;;) {
- ret = swait_event_timeout(
+ ret = swait_event_timeout_exclusive(
rsp->expedited_wq,
sync_rcu_preempt_exp_done_unlocked(rnp_root),
jiffies_stall);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c1b17f5b9361..a97c20ea9bce 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1926,8 +1926,8 @@ static void __wake_nocb_leader(struct rcu_data *rdp, bool force,
WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
del_timer(&rdp->nocb_timer);
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
- smp_mb(); /* ->nocb_leader_sleep before swake_up(). */
- swake_up(&rdp_leader->nocb_wq);
+ smp_mb(); /* ->nocb_leader_sleep before swake_up_one(). */
+ swake_up_one(&rdp_leader->nocb_wq);
} else {
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
}
@@ -2159,7 +2159,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
*/
trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
for (;;) {
- swait_event_interruptible(
+ swait_event_interruptible_exclusive(
rnp->nocb_gp_wq[rcu_seq_ctr(c) & 0x1],
(d = rcu_seq_done(&rnp->gp_seq, c)));
if (likely(d))
@@ -2188,7 +2188,7 @@ wait_again:
/* Wait for callbacks to appear. */
if (!rcu_nocb_poll) {
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep"));
- swait_event_interruptible(my_rdp->nocb_wq,
+ swait_event_interruptible_exclusive(my_rdp->nocb_wq,
!READ_ONCE(my_rdp->nocb_leader_sleep));
raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
my_rdp->nocb_leader_sleep = true;
@@ -2253,7 +2253,7 @@ wait_again:
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
/* List was empty, so wake up the follower. */
- swake_up(&rdp->nocb_wq);
+ swake_up_one(&rdp->nocb_wq);
}
}
@@ -2270,7 +2270,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
{
for (;;) {
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep"));
- swait_event_interruptible(rdp->nocb_wq,
+ swait_event_interruptible_exclusive(rdp->nocb_wq,
READ_ONCE(rdp->nocb_follower_head));
if (smp_load_acquire(&rdp->nocb_follower_head)) {
/* ^^^ Ensure CB invocation follows _head test. */
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index d9a02b318108..7fe183404c38 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle.o fair.o rt.o deadline.o
obj-y += wait.o wait_bit.o swait.o completion.o
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
+obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe365c9a08e9..deafa9fe602b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -17,6 +17,8 @@
#include "../workqueue_internal.h"
#include "../smpboot.h"
+#include "pelt.h"
+
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
@@ -45,14 +47,6 @@ const_debug unsigned int sysctl_sched_features =
const_debug unsigned int sysctl_sched_nr_migrate = 32;
/*
- * period over which we average the RT time consumption, measured
- * in ms.
- *
- * default: 1s
- */
-const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
-
-/*
* period over which we measure -rt task CPU usage in us.
* default: 1s
*/
@@ -183,9 +177,9 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
rq->clock_task += delta;
-#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+#ifdef HAVE_SCHED_AVG_IRQ
if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
- sched_rt_avg_update(rq, irq_delta + steal);
+ update_irq_load_avg(rq, irq_delta + steal);
#endif
}
@@ -649,23 +643,6 @@ bool sched_can_stop_tick(struct rq *rq)
return true;
}
#endif /* CONFIG_NO_HZ_FULL */
-
-void sched_avg_update(struct rq *rq)
-{
- s64 period = sched_avg_period();
-
- while ((s64)(rq_clock(rq) - rq->age_stamp) > period) {
- /*
- * Inline assembly required to prevent the compiler
- * optimising this loop into a divmod call.
- * See __iter_div_u64_rem() for another example of this.
- */
- asm("" : "+rm" (rq->age_stamp));
- rq->age_stamp += period;
- rq->rt_avg /= 2;
- }
-}
-
#endif /* CONFIG_SMP */
#if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
@@ -1199,6 +1176,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
__set_task_cpu(p, new_cpu);
}
+#ifdef CONFIG_NUMA_BALANCING
static void __migrate_swap_task(struct task_struct *p, int cpu)
{
if (task_on_rq_queued(p)) {
@@ -1280,16 +1258,17 @@ unlock:
/*
* Cross migrate two tasks
*/
-int migrate_swap(struct task_struct *cur, struct task_struct *p)
+int migrate_swap(struct task_struct *cur, struct task_struct *p,
+ int target_cpu, int curr_cpu)
{
struct migration_swap_arg arg;
int ret = -EINVAL;
arg = (struct migration_swap_arg){
.src_task = cur,
- .src_cpu = task_cpu(cur),
+ .src_cpu = curr_cpu,
.dst_task = p,
- .dst_cpu = task_cpu(p),
+ .dst_cpu = target_cpu,
};
if (arg.src_cpu == arg.dst_cpu)
@@ -1314,6 +1293,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
out:
return ret;
}
+#endif /* CONFIG_NUMA_BALANCING */
/*
* wait_task_inactive - wait for a thread to unschedule.
@@ -2317,7 +2297,6 @@ static inline void init_schedstats(void) {}
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
unsigned long flags;
- int cpu = get_cpu();
__sched_fork(clone_flags, p);
/*
@@ -2353,14 +2332,12 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->sched_reset_on_fork = 0;
}
- if (dl_prio(p->prio)) {
- put_cpu();
+ if (dl_prio(p->prio))
return -EAGAIN;
- } else if (rt_prio(p->prio)) {
+ else if (rt_prio(p->prio))
p->sched_class = &rt_sched_class;
- } else {
+ else
p->sched_class = &fair_sched_class;
- }
init_entity_runnable_average(&p->se);
@@ -2376,7 +2353,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
* We're setting the CPU for the first time, we don't migrate,
* so use __set_task_cpu().
*/
- __set_task_cpu(p, cpu);
+ __set_task_cpu(p, smp_processor_id());
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -2393,8 +2370,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
plist_node_init(&p->pushable_tasks, MAX_PRIO);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
#endif
-
- put_cpu();
return 0;
}
@@ -5714,13 +5689,6 @@ void set_rq_offline(struct rq *rq)
}
}
-static void set_cpu_rq_start_time(unsigned int cpu)
-{
- struct rq *rq = cpu_rq(cpu);
-
- rq->age_stamp = sched_clock_cpu(cpu);
-}
-
/*
* used to mark begin/end of suspend/resume:
*/
@@ -5838,7 +5806,6 @@ static void sched_rq_cpu_starting(unsigned int cpu)
int sched_cpu_starting(unsigned int cpu)
{
- set_cpu_rq_start_time(cpu);