diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 12:52:55 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-20 12:52:55 -0800 |
commit | 828cad8ea05d194d8a9452e0793261c2024c23a2 (patch) | |
tree | 0ad7c7e044cdcfe75d78da0b52eb2358d4686e02 /kernel/sched | |
parent | 60c906bab124a0627fba04c9ca5e61bba4747c0c (diff) | |
parent | bb3bac2ca9a3a5b7fa601781adf70167a0449d75 (diff) | |
download | linux-828cad8ea05d194d8a9452e0793261c2024c23a2.tar.gz linux-828cad8ea05d194d8a9452e0793261c2024c23a2.tar.bz2 linux-828cad8ea05d194d8a9452e0793261c2024c23a2.zip |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main changes in this (fairly busy) cycle were:
- There was a class of scheduler bugs related to forgetting to update
the rq-clock timestamp which can cause weird and hard to debug
problems, so there's a new debug facility for this: which uncovered
a whole lot of bugs which convinced us that we want to keep the
debug facility.
(Peter Zijlstra, Matt Fleming)
- Various cputime related updates: eliminate cputime and use u64
nanoseconds directly, simplify and improve the arch interfaces,
implement delayed accounting more widely, etc. - (Frederic
Weisbecker)
- Move code around for better structure plus cleanups (Ingo Molnar)
- Move IO schedule accounting deeper into the scheduler plus related
changes to improve the situation (Tejun Heo)
- ... plus a round of sched/rt and sched/deadline fixes, plus other
fixes, updats and cleanups"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (85 commits)
sched/core: Remove unlikely() annotation from sched_move_task()
sched/autogroup: Rename auto_group.[ch] to autogroup.[ch]
sched/topology: Split out scheduler topology code from core.c into topology.c
sched/core: Remove unnecessary #include headers
sched/rq_clock: Consolidate the ordering of the rq_clock methods
delayacct: Include <uapi/linux/taskstats.h>
sched/core: Clean up comments
sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds
sched/clock: Add dummy clear_sched_clock_stable() stub function
sched/cputime: Remove generic asm headers
sched/cputime: Remove unused nsec_to_cputime()
s390, sched/cputime: Remove unused cputime definitions
powerpc, sched/cputime: Remove unused cputime definitions
s390, sched/cputime: Make arch_cpu_idle_time() to return nsecs
ia64, sched/cputime: Remove unused cputime definitions
ia64: Convert vtime to use nsec units directly
ia64, sched/cputime: Move the nsecs based cputime headers to the last arch using it
sched/cputime: Remove jiffies based cputime
sched/cputime, vtime: Return nsecs instead of cputime_t to account
sched/cputime: Complete nsec conversion of tick based accounting
...
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/Makefile | 4 | ||||
-rw-r--r-- | kernel/sched/autogroup.c (renamed from kernel/sched/auto_group.c) | 0 | ||||
-rw-r--r-- | kernel/sched/autogroup.h (renamed from kernel/sched/auto_group.h) | 0 | ||||
-rw-r--r-- | kernel/sched/clock.c | 158 | ||||
-rw-r--r-- | kernel/sched/completion.c | 10 | ||||
-rw-r--r-- | kernel/sched/core.c | 2333 | ||||
-rw-r--r-- | kernel/sched/cpuacct.c | 2 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 178 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 13 | ||||
-rw-r--r-- | kernel/sched/debug.c | 4 | ||||
-rw-r--r-- | kernel/sched/fair.c | 94 | ||||
-rw-r--r-- | kernel/sched/idle_task.c | 2 | ||||
-rw-r--r-- | kernel/sched/rt.c | 10 | ||||
-rw-r--r-- | kernel/sched/sched.h | 137 | ||||
-rw-r--r-- | kernel/sched/stats.h | 4 | ||||
-rw-r--r-- | kernel/sched/stop_task.c | 2 | ||||
-rw-r--r-- | kernel/sched/topology.c | 1658 |
17 files changed, 2406 insertions, 2203 deletions
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 5e59b832ae2b..89ab6758667b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -18,8 +18,8 @@ endif obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o obj-y += wait.o swait.o completion.o idle.o -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o -obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o +obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o diff --git a/kernel/sched/auto_group.c b/kernel/sched/autogroup.c index da39489d2d80..da39489d2d80 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/autogroup.c diff --git a/kernel/sched/auto_group.h b/kernel/sched/autogroup.h index 890c95f2587a..890c95f2587a 100644 --- a/kernel/sched/auto_group.h +++ b/kernel/sched/autogroup.h diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index e85a725e5c34..ad64efe41722 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -77,41 +77,88 @@ EXPORT_SYMBOL_GPL(sched_clock); __read_mostly int sched_clock_running; +void sched_clock_init(void) +{ + sched_clock_running = 1; +} + #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static struct static_key __sched_clock_stable = STATIC_KEY_INIT; -static int __sched_clock_stable_early; +/* + * We must start with !__sched_clock_stable because the unstable -> stable + * transition is accurate, while the stable -> unstable transition is not. + * + * Similarly we start with __sched_clock_stable_early, thereby assuming we + * will become stable, such that there's only a single 1 -> 0 transition. + */ +static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); +static int __sched_clock_stable_early = 1; -int sched_clock_stable(void) +/* + * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset + */ +static __read_mostly u64 raw_offset; +static __read_mostly u64 gtod_offset; + +struct sched_clock_data { + u64 tick_raw; + u64 tick_gtod; + u64 clock; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); + +static inline struct sched_clock_data *this_scd(void) { - return static_key_false(&__sched_clock_stable); + return this_cpu_ptr(&sched_clock_data); } -static void __set_sched_clock_stable(void) +static inline struct sched_clock_data *cpu_sdc(int cpu) { - if (!sched_clock_stable()) - static_key_slow_inc(&__sched_clock_stable); + return &per_cpu(sched_clock_data, cpu); +} - tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); +int sched_clock_stable(void) +{ + return static_branch_likely(&__sched_clock_stable); } -void set_sched_clock_stable(void) +static void __set_sched_clock_stable(void) { - __sched_clock_stable_early = 1; + struct sched_clock_data *scd = this_scd(); - smp_mb(); /* matches sched_clock_init() */ + /* + * Attempt to make the (initial) unstable->stable transition continuous. + */ + raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); - if (!sched_clock_running) - return; + printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", + scd->tick_gtod, gtod_offset, + scd->tick_raw, raw_offset); - __set_sched_clock_stable(); + static_branch_enable(&__sched_clock_stable); + tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); } static void __clear_sched_clock_stable(struct work_struct *work) { - /* XXX worry about clock continuity */ - if (sched_clock_stable()) - static_key_slow_dec(&__sched_clock_stable); + struct sched_clock_data *scd = this_scd(); + + /* + * Attempt to make the stable->unstable transition continuous. + * + * Trouble is, this is typically called from the TSC watchdog + * timer, which is late per definition. This means the tick + * values can already be screwy. + * + * Still do what we can. + */ + gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); + + printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", + scd->tick_gtod, gtod_offset, + scd->tick_raw, raw_offset); + static_branch_disable(&__sched_clock_stable); tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); } @@ -121,47 +168,15 @@ void clear_sched_clock_stable(void) { __sched_clock_stable_early = 0; - smp_mb(); /* matches sched_clock_init() */ - - if (!sched_clock_running) - return; + smp_mb(); /* matches sched_clock_init_late() */ - schedule_work(&sched_clock_work); + if (sched_clock_running == 2) + schedule_work(&sched_clock_work); } -struct sched_clock_data { - u64 tick_raw; - u64 tick_gtod; - u64 clock; -}; - -static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); - -static inline struct sched_clock_data *this_scd(void) +void sched_clock_init_late(void) { - return this_cpu_ptr(&sched_clock_data); -} - -static inline struct sched_clock_data *cpu_sdc(int cpu) -{ - return &per_cpu(sched_clock_data, cpu); -} - -void sched_clock_init(void) -{ - u64 ktime_now = ktime_to_ns(ktime_get()); - int cpu; - - for_each_possible_cpu(cpu) { - struct sched_clock_data *scd = cpu_sdc(cpu); - - scd->tick_raw = 0; - scd->tick_gtod = ktime_now; - scd->clock = ktime_now; - } - - sched_clock_running = 1; - + sched_clock_running = 2; /* * Ensure that it is impossible to not do a static_key update. * @@ -173,8 +188,6 @@ void sched_clock_init(void) if (__sched_clock_stable_early) __set_sched_clock_stable(); - else - __clear_sched_clock_stable(NULL); } /* @@ -216,7 +229,7 @@ again: * scd->tick_gtod + TICK_NSEC); */ - clock = scd->tick_gtod + delta; + clock = scd->tick_gtod + gtod_offset + delta; min_clock = wrap_max(scd->tick_gtod, old_clock); max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); @@ -302,7 +315,7 @@ u64 sched_clock_cpu(int cpu) u64 clock; if (sched_clock_stable()) - return sched_clock(); + return sched_clock() + raw_offset; if (unlikely(!sched_clock_running)) return 0ull; @@ -323,23 +336,22 @@ EXPORT_SYMBOL_GPL(sched_clock_cpu); void sched_clock_tick(void) { struct sched_clock_data *scd; - u64 now, now_gtod; - - if (sched_clock_stable()) - return; - - if (unlikely(!sched_clock_running)) - return; WARN_ON_ONCE(!irqs_disabled()); + /* + * Update these values even if sched_clock_stable(), because it can + * become unstable at any point in time at which point we need some + * values to fall back on. + * + * XXX arguably we can skip this if we expose tsc_clocksource_reliable + */ scd = this_scd(); - now_gtod = ktime_to_ns(ktime_get()); - now = sched_clock(); + scd->tick_raw = sched_clock(); + scd->tick_gtod = ktime_get_ns(); - scd->tick_raw = now; - scd->tick_gtod = now_gtod; - sched_clock_local(scd); + if (!sched_clock_stable() && likely(sched_clock_running)) + sched_clock_local(scd); } /* @@ -366,11 +378,6 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ -void sched_clock_init(void) -{ - sched_clock_running = 1; -} - u64 sched_clock_cpu(int cpu) { if (unlikely(!sched_clock_running)) @@ -378,6 +385,7 @@ u64 sched_clock_cpu(int cpu) return sched_clock(); } + #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ /* diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 8d0f35debf35..f063a25d4449 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -31,7 +31,8 @@ void complete(struct completion *x) unsigned long flags; spin_lock_irqsave(&x->wait.lock, flags); - x->done++; + if (x->done != UINT_MAX) + x->done++; __wake_up_locked(&x->wait, TASK_NORMAL, 1); spin_unlock_irqrestore(&x->wait.lock, flags); } @@ -51,7 +52,7 @@ void complete_all(struct completion *x) unsigned long flags; spin_lock_irqsave(&x->wait.lock, flags); - x->done += UINT_MAX/2; + x->done = UINT_MAX; __wake_up_locked(&x->wait, TASK_NORMAL, 0); spin_unlock_irqrestore(&x->wait.lock, flags); } @@ -79,7 +80,8 @@ do_wait_for_common(struct completion *x, if (!x->done) return timeout; } - x->done--; + if (x->done != UINT_MAX) + x->done--; return timeout ?: 1; } @@ -280,7 +282,7 @@ bool try_wait_for_completion(struct completion *x) spin_lock_irqsave(&x->wait.lock, flags); if (!x->done) ret = 0; - else + else if (x->done != UINT_MAX) x->done--; spin_unlock_irqrestore(&x->wait.lock, flags); return ret; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c56fb57f2991..34e2291a9a6c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1,88 +1,28 @@ /* * kernel/sched/core.c * - * Kernel scheduler and related syscalls + * Core kernel scheduler code and related syscalls * * Copyright (C) 1991-2002 Linus Torvalds - * - * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and - * make semaphores SMP safe - * 1998-11-19 Implemented schedule_timeout() and related stuff - * by Andrea Arcangeli - * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar: - * hybrid priority-list and round-robin design with - * an array-switch method of distributing timeslices - * and per-CPU runqueues. Cleanups and useful suggestions - * by Davide Libenzi, preemptible kernel bits by Robert Love. - * 2003-09-03 Interactivity tuning by Con Kolivas. - * 2004-04-02 Scheduler domains code by Nick Piggin - * 2007-04-15 Work begun on replacing all interactivity tuning with a - * fair scheduling design by Con Kolivas. - * 2007-05-05 Load balancing (smp-nice) and other improvements - * by Peter Williams - * 2007-05-06 Interactivity improvements to CFS by Mike Galbraith - * 2007-07-01 Group scheduling enhancements by Srivatsa Vaddagiri - * 2007-11-29 RT balancing improvements by Steven Rostedt, Gregory Haskins, - * Thomas Gleixner, Mike Kravetz */ - -#include <linux/kasan.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/nmi.h> -#include <linux/init.h> -#include <linux/uaccess.h> -#include <linux/highmem.h> -#include <linux/mmu_context.h> -#include <linux/interrupt.h> -#include <linux/capability.h> -#include <linux/completion.h> -#include <linux/kernel_stat.h> -#include <linux/debug_locks.h> -#include <linux/perf_event.h> -#include <linux/security.h> -#include <linux/notifier.h> -#include <linux/profile.h> -#include <linux/freezer.h> -#include <linux/vmalloc.h> -#include <linux/blkdev.h> -#include <linux/delay.h> -#include <linux/pid_namespace.h> -#include <linux/smp.h> -#include <linux/threads.h> -#include <linux/timer.h> -#include <linux/rcupdate.h> -#include <linux/cpu.h> +#include <linux/sched.h> #include <linux/cpuset.h> -#include <linux/percpu.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/sysctl.h> -#include <linux/syscalls.h> -#include <linux/times.h> -#include <linux/tsacct_kern.h> -#include <linux/kprobes.h> #include <linux/delayacct.h> -#include <linux/unistd.h> -#include <linux/pagemap.h> -#include <linux/hrtimer.h> -#include <linux/tick.h> -#include <linux/ctype.h> -#include <linux/ftrace.h> -#include <linux/slab.h> #include <linux/init_task.h> #include <linux/context_tracking.h> -#include <linux/compiler.h> -#include <linux/frame.h> + +#include <linux/blkdev.h> +#include <linux/kprobes.h> +#include <linux/mmu_context.h> +#include <linux/module.h> +#include <linux/nmi.h> #include <linux/prefetch.h> -#include <linux/mutex.h> +#include <linux/profile.h> +#include <linux/security.h> +#include <linux/syscalls.h> #include <asm/switch_to.h> #include <asm/tlb.h> -#include <asm/irq_regs.h> -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#endif #include "sched.h" #include "../workqueue_internal.h" @@ -91,27 +31,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/sched.h> -DEFINE_MUTEX(sched_domains_mutex); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -static void update_rq_clock_task(struct rq *rq, s64 delta); - -void update_rq_clock(struct rq *rq) -{ - s64 delta; - - lockdep_assert_held(&rq->lock); - - if (rq->clock_skip_update & RQCF_ACT_SKIP) - return; - - delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; - if (delta < 0) - return; - rq->clock += delta; - update_rq_clock_task(rq, delta); -} - /* * Debugging: various feature bits */ @@ -140,7 +61,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC; /* - * period over which we measure -rt task cpu usage in us. + * period over which we measure -rt task CPU usage in us. * default: 1s */ unsigned int sysctl_sched_rt_period = 1000000; @@ -153,7 +74,7 @@ __read_mostly int scheduler_running; */ int sysctl_sched_rt_runtime = 950000; -/* cpus with isolated domains */ +/* CPUs with isolated domains */ cpumask_var_t cpu_isolated_map; /* @@ -185,7 +106,7 @@ struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) rq = task_rq(p); raw_spin_lock(&rq->lock); if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { - rf->cookie = lockdep_pin_lock(&rq->lock); + rq_pin_lock(rq, rf); return rq; } raw_spin_unlock(&rq->lock); @@ -221,11 +142,11 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) * If we observe the old cpu in task_rq_lock, the acquire of * the old rq->lock will fully serialize against the stores. * - * If we observe the new cpu in task_rq_lock, the acquire will + * If we observe the new CPU in task_rq_lock, the acquire will * pair with the WMB to ensure we must then also see migrating. */ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { - rf->cookie = lockdep_pin_lock(&rq->lock); + rq_pin_lock(rq, rf); return rq; } raw_spin_unlock(&rq->lock); @@ -236,6 +157,84 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) } } +/* + * RQ-clock updating methods: + */ + +static void update_rq_clock_task(struct rq *rq, s64 delta) +{ +/* + * In theory, the compile should just see 0 here, and optimize out the call + * to sched_rt_avg_update. But I don't trust it... + */ +#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) + s64 steal = 0, irq_delta = 0; +#endif +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; + + /* + * Since irq_time is only updated on {soft,}irq_exit, we might run into + * this case when a previous update_rq_clock() happened inside a + * {soft,}irq region. + * + * When this happens, we stop ->clock_task and only update the + * prev_irq_time stamp to account for the part that fit, so that a next + * update will consume the rest. This ensures ->clock_task is + * monotonic. + * + * It does however cause some slight miss-attribution of {soft,}irq + * time, a more accurate solution would be to update the irq_time using + * the current rq->clock timestamp, except that would require using + * atomic ops. + */ + if (irq_delta > delta) + irq_delta = delta; + + rq->prev_irq_time += irq_delta; + delta -= irq_delta; +#endif +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + if (static_key_false((¶virt_steal_rq_enabled))) { + steal = paravirt_steal_clock(cpu_of(rq)); + steal -= rq->prev_steal_time_rq; + + if (unlikely(steal > delta)) + steal = delta; + + rq->prev_steal_time_rq += steal; + delta -= steal; + } +#endif + + rq->clock_task += delta; + +#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) + if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) + sched_rt_avg_update(rq, irq_delta + steal); +#endif +} + +void update_rq_clock(struct rq *rq) +{ + s64 delta; + + lockdep_assert_held(&rq->lock); + + if (rq->clock_update_flags & RQCF_ACT_SKIP) + return; + +#ifdef CONFIG_SCHED_DEBUG + rq->clock_update_flags |= RQCF_UPDATED; +#endif + delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; + if (delta < 0) + return; + rq->clock += delta; + update_rq_clock_task(rq, delta); +} + + #ifdef CONFIG_SCHED_HRTICK /* * Use HR-timers to deliver accurate preemption points. @@ -458,7 +457,7 @@ void wake_up_q(struct wake_q_head *head) task = container_of(node, struct task_struct, wake_q); BUG_ON(!task); - /* task can safely be re-inserted now */ + /* Task can safely be re-inserted now: */ node = node->next; task->wake_q.next = NULL; @@ -516,12 +515,12 @@ void resched_cpu(int cpu) #ifdef CONFIG_SMP #ifdef CONFIG_NO_HZ_COMMON /* - * In the semi idle case, use the nearest busy cpu for migrating timers - * from an idle cpu. This is good for power-savings. + * In the semi idle case, use the nearest busy CPU for migrating timers + * from an idle CPU. This is good for power-savings. * * We don't do similar optimization for completely idle system, as - * selecting an idle cpu will add more delays to the timers than intended - * (as that cpu's timer base may not be uptodate wrt jiffies etc). + * selecting an idle CPU will add more delays to the timers than intended + * (as that CPU's timer base may not be uptodate wrt jiffies etc). */ int get_nohz_timer_target(void) { @@ -550,6 +549,7 @@ unlock: rcu_read_unlock(); return cpu; } + /* * When add_timer_on() enqueues a timer into the timer wheel of an * idle CPU then this timer might expire before the next timer event @@ -784,60 +784,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) dequeue_task(rq, p, flags); } -static void update_rq_clock_task(struct rq *rq, s64 delta) -{ -/* - * In theory, the compile should just see 0 here, and optimize out the call - * to sched_rt_avg_update. But I don't trust it... - */ -#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) - s64 steal = 0, irq_delta = 0; -#endif -#ifdef CONFIG_IRQ_TIME_ACCOUNTING - irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; - - /* - * Since irq_time is only updated on {soft,}irq_exit, we might run into - * this case when a previous update_rq_clock() happened inside a - * {soft,}irq region. - * - * When this happens, we stop ->clock_task and only update the - * prev_irq_time stamp to account for the part that fit, so that a next - * update will consume the rest. This ensures ->clock_task is - * monotonic. - * - * It does however cause some slight miss-attribution of {soft,}irq - * time, a more accurate solution would be to update the irq_time using - * the current rq->clock timestamp, except that would require using - * atomic ops. - */ - if (irq_delta > delta) - irq_delta = delta; - - rq->prev_irq_time += irq_delta; - delta -= irq_delta; -#endif -#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING - if (static_key_false((¶virt_steal_rq_enabled))) { - steal = paravirt_steal_clock(cpu_of(rq)); - steal -= rq->prev_steal_time_rq; - - if (unlikely(steal > delta)) - steal = delta; - - rq->prev_steal_time_rq += steal; - delta -= steal; - } -#endif - - rq->clock_task += delta; - -#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) - if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) - sched_rt_avg_update(rq, irq_delta + steal); -#endif -} - void sched_set_stop_task(int cpu, struct task_struct *stop) { struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; @@ -1018,7 +964,7 @@ struct migration_arg { }; /* - * Move (not current) task off this cpu, onto dest cpu. We're doing + * Move (not current) task off this CPU, onto the destination CPU. We're doing * this because either it can't run here any more (set_cpus_allowed() * away from this CPU, or CPU going down), or because we're * attempting to rebalance this task on exec (sched_exec). @@ -1052,8 +998,8 @@ static int migration_cpu_stop(void *data) struct rq *rq = this_rq(); /* - * The original target cpu might have gone down and we might - * be on another cpu but it doesn't matter. + * The original target CPU might have gone down and we might + * be on another CPU but it doesn't matter. */ local_irq_disable(); /* @@ -1171,7 +1117,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, if (p->flags & PF_KTHREAD) { /* * For kernel threads that do indeed end up on online && - * !active we want to ensure they are strict per-cpu threads. + * !active we want to ensure they are strict per-CPU threads. */ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && !cpumask_intersects(new_mask, cpu_active_mask) && @@ -1195,9 +1141,9 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, * OK, since we're going to drop the lock immediately * afterwards anyway. */ - lockdep_unpin_lock(&rq->lock, rf.cookie); + rq_unpin_lock(rq, &rf); rq = move_queued_task(rq, p, dest_cpu); - lockdep_repin_lock(&rq->lock, rf.cookie); + rq_repin_lock(rq, &rf); } out: task_rq_unlock(rq, p, &rf); @@ -1276,7 +1222,7 @@ static void __migrate_swap_task(struct task_struct *p, int cpu) /* * Task isn't running anymore; make it appear like we migrated * it before it went to sleep. This means on wakeup we make the - * previous cpu our target instead of where it really is. + * previous CPU our target instead of where it really is. */ p->wake_cpu = cpu; } @@ -1508,12 +1454,12 @@ EXPORT_SYMBOL_GPL(kick_process); * * - on cpu-up we allow per-cpu kthreads on the online && !active cpu, * see __set_cpus_allowed_ptr(). At this point the newly online - * cpu isn't yet part of the sched domains, and balancing will not + * CPU isn't yet part of the sched domains, and balancing will not * see it. * - * - on cpu-down we clear cpu_active() to mask the sched domains and + * - on CPU-down we clear cpu_active() to mask the sched domains and * avoid the load balancer to place new tasks on the to be removed - * cpu. Existing tasks will remain running there and will be taken + * CPU. Existing tasks will remain running there and will be taken * off. * * This means that fallback selection must not select !active CPUs. @@ -1529,9 +1475,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) int dest_cpu; /* - * If the node that the cpu is on has been offlined, cpu_to_node() - * will return -1. There is no cpu on the node, and we should - * select the cpu on the other node. + * If the node that the CPU is on has been offlined, cpu_to_node() + * will return -1. There is no CPU on the node, and we should + * select the CPU on the other node. */ if (nid != -1) { nodemask = cpumask_of_node(nid); @@ -1563,7 +1509,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) state = possible; break; } - /* fall-through */ + /* Fall-through */ case possible: do_set_cpus_allowed(p, cpu_possible_mask); state = fail; @@ -1607,7 +1553,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) /* * In order not to call set_task_cpu() on a blocking task we need * to rely on ttwu() to place the task on a valid ->cpus_allowed - * cpu. + * CPU. * * Since this is common to all placement strategies, this lives here. * @@ -1681,7 +1627,7 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl activate_task(rq, p, en_flags); p->on_rq = TASK_ON_RQ_QUEUED; - /* if a worker is waking up, notify workqueue */ + /* If a worker is waking up, notify the workqueue: */ if (p->flags & PF_WQ_WORKER) wq_worker_waking_up(p, cpu_of(rq)); } @@ -1690,7 +1636,7 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl * Mark the task runnable and perform wakeup-preemption. */ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, - struct pin_cookie cookie) + struct rq_flags *rf) { check_preempt_curr(rq, p, wake_flags); p->state = TASK_RUNNING; @@ -1702,9 +1648,9 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, * Our task @p is fully woken up and running; so its safe to * drop the rq->lock, hereafter rq is only used for statistics. */ - lockdep_unpin_lock(&rq->lock, cookie); + rq_unpin_lock(rq, rf); p->sched_class->task_woken(rq, p); - lockdep_repin_lock(&rq->lock, cookie); + rq_repin_lock(rq, rf); } if (rq->idle_stamp) { @@ -1723,7 +1669,7 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, static void ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, - struct pin_cookie cookie) + struct rq_flags *rf) { int en_flags = ENQUEUE_WAKEUP; @@ -1738,7 +1684,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, #endif ttwu_activate(rq, p, en_flags); - ttwu_do_wakeup(rq, p, wake_flags, cookie); + ttwu_do_wakeup(rq, p, wake_flags, rf); } /* @@ -1757,7 +1703,7 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) if (task_on_rq_queued(p)) { /* check_preempt_curr() may use rq clock */ update_rq_clock(rq); - ttwu_do_wakeup(rq, p, wake_flags, rf.cookie); + ttwu_do_wakeup(rq, p, wake_flags, &rf); ret = 1; } __task_rq_unlock(rq, &rf); @@ -1770,15 +1716,15 @@ void sched_ttwu_pending(void) { struct rq *rq = this_rq(); struct llist_node *llist = llist_del_all(&rq->wake_list); - struct pin_cookie cookie; struct task_struct *p; unsigned long flags; + struct rq_flags rf; if (!llist) return; raw_spin_lock_irqsave(&rq->lock, flags); - cookie = lockdep_pin_lock(&rq->lock); + rq_pin_lock(rq, &rf); while (llist) { int wake_flags = 0; @@ -1789,10 +1735,10 @@ void sched_ttwu_pending(void) if (p->sched_remote_wakeup) wake_flags = WF_MIGRATED; - ttwu_do_activate(rq, p, wake_flags, cookie); + ttwu_do_activate(rq, p, wake_flags, &rf); } - lockdep_unpin_lock(&rq->lock, cookie); + rq_unpin_lock(rq, &rf); raw_spin_unlock_irqrestore(&rq->lock, flags); } @@ -1864,7 +1810,7 @@ void wake_up_if_idle(int cpu) raw_spin_lock_irqsave(&rq->lock, flags); if (is_idle_task(rq->curr)) smp_send_reschedule(cpu); - /* Else cpu is not in idle, do nothing here */ + /* Else CPU is not idle, do nothing here: */ raw_spin_unlock_irqrestore(&rq->lock, flags); } @@ -1881,20 +1827,20 |