diff options
Diffstat (limited to 'kernel')
32 files changed, 2579 insertions, 2406 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 74963d192c5d..ca9cb55b5855 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -453,8 +453,8 @@ static void fill_ac(acct_t *ac) spin_lock_irq(¤t->sighand->siglock); tty = current->signal->tty; /* Safe as we hold the siglock */ ac->ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; - ac->ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); - ac->ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); + ac->ac_utime = encode_comp_t(nsec_to_AHZ(pacct->ac_utime)); + ac->ac_stime = encode_comp_t(nsec_to_AHZ(pacct->ac_stime)); ac->ac_flag = pacct->ac_flag; ac->ac_mem = encode_comp_t(pacct->ac_mem); ac->ac_minflt = encode_comp_t(pacct->ac_minflt); @@ -530,7 +530,7 @@ out: void acct_collect(long exitcode, int group_dead) { struct pacct_struct *pacct = ¤t->signal->pacct; - cputime_t utime, stime; + u64 utime, stime; unsigned long vsize = 0; if (group_dead && current->mm) { @@ -559,6 +559,7 @@ void acct_collect(long exitcode, int group_dead) pacct->ac_flag |= ACORE; if (current->flags & PF_SIGNALED) pacct->ac_flag |= AXSIG; + task_cputime(current, &utime, &stime); pacct->ac_utime += utime; pacct->ac_stime += stime; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 435c14a45118..660549656991 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -82,19 +82,19 @@ void __delayacct_blkio_end(void) int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { - cputime_t utime, stime, stimescaled, utimescaled; + u64 utime, stime, stimescaled, utimescaled; unsigned long long t2, t3; unsigned long flags, t1; s64 tmp; task_cputime(tsk, &utime, &stime); tmp = (s64)d->cpu_run_real_total; - tmp += cputime_to_nsecs(utime + stime); + tmp += utime + stime; d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; task_cputime_scaled(tsk, &utimescaled, &stimescaled); tmp = (s64)d->cpu_scaled_run_real_total; - tmp += cputime_to_nsecs(utimescaled + stimescaled); + tmp += utimescaled + stimescaled; d->cpu_scaled_run_real_total = (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; diff --git a/kernel/exit.c b/kernel/exit.c index 8f14b866f9f6..8e5e21338b3a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -86,7 +86,7 @@ static void __exit_signal(struct task_struct *tsk) bool group_dead = thread_group_leader(tsk); struct sighand_struct *sighand; struct tty_struct *uninitialized_var(tty); - cputime_t utime, stime; + u64 utime, stime; sighand = rcu_dereference_check(tsk->sighand, lockdep_tasklist_lock_is_held()); @@ -1091,7 +1091,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) struct signal_struct *sig = p->signal; struct signal_struct *psig = current->signal; unsigned long maxrss; - cputime_t tgutime, tgstime; + u64 tgutime, tgstime; /* * The resource counters for the group leader are in its diff --git a/kernel/fork.c b/kernel/fork.c index 105c6676d93b..f6995cdfe714 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1314,7 +1314,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); if (cpu_limit != RLIM_INFINITY) { - sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); + sig->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC; sig->cputimer.running = true; } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 9b349619f431..8464a5cbab97 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -783,6 +783,20 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); +void __sched +mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) +{ + int token; + + might_sleep(); + + token = io_schedule_prepare(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, + subclass, NULL, _RET_IP_, NULL, 0); + io_schedule_finish(token); +} +EXPORT_SYMBOL_GPL(mutex_lock_io_nested); + static inline int ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { @@ -950,6 +964,16 @@ int __sched mutex_lock_killable(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock_killable); +void __sched mutex_lock_io(struct mutex *lock) +{ + int token; + + token = io_schedule_prepare(); + mutex_lock(lock); + io_schedule_finish(token); +} +EXPORT_SYMBOL_GPL(mutex_lock_io); + static noinline void __sched __mutex_lock_slowpath(struct mutex *lock) { diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 5e59b832ae2b..89ab6758667b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -18,8 +18,8 @@ endif obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o obj-y += wait.o swait.o completion.o idle.o -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o -obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o +obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o diff --git a/kernel/sched/auto_group.c b/kernel/sched/autogroup.c index da39489d2d80..da39489d2d80 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/autogroup.c diff --git a/kernel/sched/auto_group.h b/kernel/sched/autogroup.h index 890c95f2587a..890c95f2587a 100644 --- a/kernel/sched/auto_group.h +++ b/kernel/sched/autogroup.h diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index e85a725e5c34..ad64efe41722 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -77,41 +77,88 @@ EXPORT_SYMBOL_GPL(sched_clock); __read_mostly int sched_clock_running; +void sched_clock_init(void) +{ + sched_clock_running = 1; +} + #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static struct static_key __sched_clock_stable = STATIC_KEY_INIT; -static int __sched_clock_stable_early; +/* + * We must start with !__sched_clock_stable because the unstable -> stable + * transition is accurate, while the stable -> unstable transition is not. + * + * Similarly we start with __sched_clock_stable_early, thereby assuming we + * will become stable, such that there's only a single 1 -> 0 transition. + */ +static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); +static int __sched_clock_stable_early = 1; -int sched_clock_stable(void) +/* + * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset + */ +static __read_mostly u64 raw_offset; +static __read_mostly u64 gtod_offset; + +struct sched_clock_data { + u64 tick_raw; + u64 tick_gtod; + u64 clock; +}; + +static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); + +static inline struct sched_clock_data *this_scd(void) { - return static_key_false(&__sched_clock_stable); + return this_cpu_ptr(&sched_clock_data); } -static void __set_sched_clock_stable(void) +static inline struct sched_clock_data *cpu_sdc(int cpu) { - if (!sched_clock_stable()) - static_key_slow_inc(&__sched_clock_stable); + return &per_cpu(sched_clock_data, cpu); +} - tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); +int sched_clock_stable(void) +{ + return static_branch_likely(&__sched_clock_stable); } -void set_sched_clock_stable(void) +static void __set_sched_clock_stable(void) { - __sched_clock_stable_early = 1; + struct sched_clock_data *scd = this_scd(); - smp_mb(); /* matches sched_clock_init() */ + /* + * Attempt to make the (initial) unstable->stable transition continuous. + */ + raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); - if (!sched_clock_running) - return; + printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", + scd->tick_gtod, gtod_offset, + scd->tick_raw, raw_offset); - __set_sched_clock_stable(); + static_branch_enable(&__sched_clock_stable); + tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); } static void __clear_sched_clock_stable(struct work_struct *work) { - /* XXX worry about clock continuity */ - if (sched_clock_stable()) - static_key_slow_dec(&__sched_clock_stable); + struct sched_clock_data *scd = this_scd(); + + /* + * Attempt to make the stable->unstable transition continuous. + * + * Trouble is, this is typically called from the TSC watchdog + * timer, which is late per definition. This means the tick + * values can already be screwy. + * + * Still do what we can. + */ + gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); + + printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", + scd->tick_gtod, gtod_offset, + scd->tick_raw, raw_offset); + static_branch_disable(&__sched_clock_stable); tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); } @@ -121,47 +168,15 @@ void clear_sched_clock_stable(void) { __sched_clock_stable_early = 0; - smp_mb(); /* matches sched_clock_init() */ - - if (!sched_clock_running) - return; + smp_mb(); /* matches sched_clock_init_late() */ - schedule_work(&sched_clock_work); + if (sched_clock_running == 2) + schedule_work(&sched_clock_work); } -struct sched_clock_data { - u64 tick_raw; - u64 tick_gtod; - u64 clock; -}; - -static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); - -static inline struct sched_clock_data *this_scd(void) +void sched_clock_init_late(void) { - return this_cpu_ptr(&sched_clock_data); -} - -static inline struct sched_clock_data *cpu_sdc(int cpu) -{ - return &per_cpu(sched_clock_data, cpu); -} - -void sched_clock_init(void) -{ - u64 ktime_now = ktime_to_ns(ktime_get()); - int cpu; - - for_each_possible_cpu(cpu) { - struct sched_clock_data *scd = cpu_sdc(cpu); - - scd->tick_raw = 0; - scd->tick_gtod = ktime_now; - scd->clock = ktime_now; - } - - sched_clock_running = 1; - + sched_clock_running = 2; /* * Ensure that it is impossible to not do a static_key update. * @@ -173,8 +188,6 @@ void sched_clock_init(void) if (__sched_clock_stable_early) __set_sched_clock_stable(); - else - __clear_sched_clock_stable(NULL); } /* @@ -216,7 +229,7 @@ again: * scd->tick_gtod + TICK_NSEC); */ - clock = scd->tick_gtod + delta; + clock = scd->tick_gtod + gtod_offset + delta; min_clock = wrap_max(scd->tick_gtod, old_clock); max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); @@ -302,7 +315,7 @@ u64 sched_clock_cpu(int cpu) u64 clock; if (sched_clock_stable()) - return sched_clock(); + return sched_clock() + raw_offset; if (unlikely(!sched_clock_running)) return 0ull; @@ -323,23 +336,22 @@ EXPORT_SYMBOL_GPL(sched_clock_cpu); void sched_clock_tick(void) { struct sched_clock_data *scd; - u64 now, now_gtod; - - if (sched_clock_stable()) - return; - - if (unlikely(!sched_clock_running)) - return; WARN_ON_ONCE(!irqs_disabled()); + /* + * Update these values even if sched_clock_stable(), because it can + * become unstable at any point in time at which point we need some + * values to fall back on. + * + * XXX arguably we can skip this if we expose tsc_clocksource_reliable + */ scd = this_scd(); - now_gtod = ktime_to_ns(ktime_get()); - now = sched_clock(); + scd->tick_raw = sched_clock(); + scd->tick_gtod = ktime_get_ns(); - scd->tick_raw = now; - scd->tick_gtod = now_gtod; - sched_clock_local(scd); + if (!sched_clock_stable() && likely(sched_clock_running)) + sched_clock_local(scd); } /* @@ -366,11 +378,6 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ -void sched_clock_init(void) -{ - sched_clock_running = 1; -} - u64 sched_clock_cpu(int cpu) { if (unlikely(!sched_clock_running)) @@ -378,6 +385,7 @@ u64 sched_clock_cpu(int cpu) return sched_clock(); } + #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ /* diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 8d0f35debf35..f063a25d4449 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -31,7 +31,8 @@ void complete(struct completion *x) unsigned long flags; spin_lock_irqsave(&x->wait.lock, flags); - x->done++; + if (x->done != UINT_MAX) + x->done++; __wake_up_locked(&x->wait, TASK_NORMAL, 1); spin_unlock_irqrestore(&x->wait.lock, flags); } @@ -51,7 +52,7 @@ void complete_all(struct completion *x) unsigned long flags; spin_lock_irqsave(&x->wait.lock, flags); - x->done += UINT_MAX/2; + x->done = UINT_MAX; __wake_up_locked(&x->wait, TASK_NORMAL, 0); spin_unlock_irqrestore(&x->wait.lock, flags); } @@ -79,7 +80,8 @@ do_wait_for_common(struct completion *x, if (!x->done) return timeout; } - x->done--; + if (x->done != UINT_MAX) + x->done--; return timeout ?: 1; } @@ -280,7 +282,7 @@ bool try_wait_for_completion(struct completion *x) spin_lock_irqsave(&x->wait.lock, flags); if (!x->done) ret = 0; - else + else if (x->done != UINT_MAX) x->done--; spin_unlock_irqrestore(&x->wait.lock, flags); return ret; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c56fb57f2991..34e2291a9a6c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1,88 +1,28 @@ /* * kernel/sched/core.c * - * Kernel scheduler and related syscalls + * Core kernel scheduler code and related syscalls * * Copyright (C) 1991-2002 Linus Torvalds - * - * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and - * make semaphores SMP safe - * 1998-11-19 Implemented schedule_timeout() and related stuff - * by Andrea Arcangeli - * 2002-01-04 New ultra-scalable O(1) scheduler by Ingo Molnar: - * hybrid priority-list and round-robin design with - * an array-switch method of distributing timeslices - * and per-CPU runqueues. Cleanups and useful suggestions - * by Davide Libenzi, preemptible kernel bits by Robert Love. - * 2003-09-03 Interactivity tuning by Con Kolivas. - * 2004-04-02 Scheduler domains code by Nick Piggin - * 2007-04-15 Work begun on replacing all interactivity tuning with a - * fair scheduling design by Con Kolivas. - * 2007-05-05 Load balancing (smp-nice) and other improvements - * by Peter Williams - * 2007-05-06 Interactivity improvements to CFS by Mike Galbraith - * 2007-07-01 Group scheduling enhancements by Srivatsa Vaddagiri - * 2007-11-29 RT balancing improvements by Steven Rostedt, Gregory Haskins, - * Thomas Gleixner, Mike Kravetz */ - -#include <linux/kasan.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/nmi.h> -#include <linux/init.h> -#include <linux/uaccess.h> -#include <linux/highmem.h> -#include <linux/mmu_context.h> -#include <linux/interrupt.h> -#include <linux/capability.h> -#include <linux/completion.h> -#include <linux/kernel_stat.h> -#include <linux/debug_locks.h> -#include <linux/perf_event.h> -#include <linux/security.h> -#include <linux/notifier.h> -#include <linux/profile.h> -#include <linux/freezer.h> -#include <linux/vmalloc.h> -#include <linux/blkdev.h> -#include <linux/delay.h> -#include <linux/pid_namespace.h> -#include <linux/smp.h> -#include <linux/threads.h> -#include <linux/timer.h> -#include <linux/rcupdate.h> -#include <linux/cpu.h> +#include <linux/sched.h> #include <linux/cpuset.h> -#include <linux/percpu.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/sysctl.h> -#include <linux/syscalls.h> -#include <linux/times.h> -#include <linux/tsacct_kern.h> -#include <linux/kprobes.h> #include <linux/delayacct.h> -#include <linux/unistd.h> -#include <linux/pagemap.h> -#include <linux/hrtimer.h> -#include <linux/tick.h> -#include <linux/ctype.h> -#include <linux/ftrace.h> -#include <linux/slab.h> #include <linux/init_task.h> #include <linux/context_tracking.h> -#include <linux/compiler.h> -#include <linux/frame.h> + +#include <linux/blkdev.h> +#include <linux/kprobes.h> +#include <linux/mmu_context.h> +#include <linux/module.h> +#include <linux/nmi.h> #include <linux/prefetch.h> -#include <linux/mutex.h> +#include <linux/profile.h> +#include <linux/security.h> +#include <linux/syscalls.h> #include <asm/switch_to.h> #include <asm/tlb.h> -#include <asm/irq_regs.h> -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#endif #include "sched.h" #include "../workqueue_internal.h" @@ -91,27 +31,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/sched.h> -DEFINE_MUTEX(sched_domains_mutex); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -static void update_rq_clock_task(struct rq *rq, s64 delta); - -void update_rq_clock(struct rq *rq) -{ - s64 delta; - - lockdep_assert_held(&rq->lock); - - if (rq->clock_skip_update & RQCF_ACT_SKIP) - return; - - delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; - if (delta < 0) - return; - rq->clock += delta; - update_rq_clock_task(rq, delta); -} - /* * Debugging: various feature bits */ @@ -140,7 +61,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC; /* - * period over which we measure -rt task cpu usage in us. + * period over which we measure -rt task CPU usage in us. * default: 1s */ unsigned int sysctl_sched_rt_period = 1000000; @@ -153,7 +74,7 @@ __read_mostly int scheduler_running; */ int sysctl_sched_rt_runtime = 950000; -/* cpus with isolated domains */ +/* CPUs with isolated domains */ cpumask_var_t cpu_isolated_map; /* @@ -185,7 +106,7 @@ struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) rq = task_rq(p); raw_spin_lock(&rq->lock); if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { - rf->cookie = lockdep_pin_lock(&rq->lock); + rq_pin_lock(rq, rf); return rq; } raw_spin_unlock(&rq->lock); @@ -221,11 +142,11 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) * If we observe the old cpu in task_rq_lock, the acquire of * the old rq->lock will fully serialize against the stores. * - * If we observe the new cpu in task_rq_lock, the acquire will + * If we observe the new CPU in task_rq_lock, the acquire will * pair with the WMB to ensure we must then also see migrating. */ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { - rf->cookie = lockdep_pin_lock(&rq->lock); + rq_pin_lock(rq, rf); return rq; } raw_spin_unlock(&rq->lock); @@ -236,6 +157,84 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) } } +/* + * RQ-clock updating methods: + */ + +static void update_rq_clock_task(struct rq *rq, s64 delta) +{ +/* + * In theory, the compile should just see 0 here, and optimize out the call + * to sched_rt_avg_update. But I don't trust it... + */ +#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) + s64 steal = 0, irq_delta = 0; +#endif +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; + + /* + * Since irq_time is only updated on {soft,}irq_exit, we might run into + * this case when a previous update_rq_clock() happened inside a + * {soft,}irq region. + * + * When this happens, we stop ->clock_task and only update the + * prev_irq_time stamp to account for the part that fit, so that a next + * update will consume the rest. This ensures ->clock_task is + * monotonic. + * + * It does however cause some slight miss-attribution of {soft,}irq + * time, a more accurate solution would be to update the irq_time using + * the current rq->clock timestamp, except that would require using + * atomic ops. + */ + if (irq_delta > delta) + irq_delta = delta; + + rq->prev_irq_time += irq_delta; + delta -= irq_delta; +#endif +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + if (static_key_false((¶virt_steal_rq_enabled))) { + steal = paravirt_steal_clock(cpu_of(rq)); + steal -= rq->prev_steal_time_rq; + + if (unlikely(steal > delta)) + steal = delta; + + rq->prev_steal_time_rq += steal; + delta -= steal; + } +#endif + + rq->clock_task += delta; + +#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) + if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) + sched_rt_avg_update(rq, irq_delta + steal); +#endif +} + +void update_rq_clock(struct rq *rq) +{ + s64 delta; + + lockdep_assert_held(&rq->lock); + + if (rq->clock_update_flags & RQCF_ACT_SKIP) + return; + +#ifdef CONFIG_SCHED_DEBUG + rq->clock_update_flags |= RQCF_UPDATED; +#endif + delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; + if (delta < 0) + return; + rq->clock += delta; + update_rq_clock_task(rq, delta); +} + + #ifdef CONFIG_SCHED_HRTICK /* * Use HR-timers to deliver accurate preemption points. @@ -458,7 +457,7 @@ void wake_up_q(struct wake_q_head *head) task = container_of(node, struct task_struct, wake_q); BUG_ON(!task); - /* task can safely be re-inserted now */ + /* Task can safely be re-inserted now: */ node = node->next; task->wake_q.next = NULL; @@ -516,12 +515,12 @@ void resched_cpu(int cpu) #ifdef CONFIG_SMP #ifdef CONFIG_NO_HZ_COMMON /* - * In the semi idle case, use the nearest busy cpu for migrating timers - * from an idle cpu. This is good for power-savings. + * In the semi idle case, use the nearest busy CPU for migrating timers + * from an idle CPU. This is good for power-savings. * * We don't do similar optimization for completely idle system, as - * selecting an idle cpu will add more delays to the timers than intended - * (as that cpu's timer base may not be uptodate wrt jiffies etc). + * selecting an idle CPU will add more delays to the timers than intended + * (as that CPU's timer base may not be uptodate wrt jiffies etc). */ int get_nohz_timer_target(void) { @@ -550,6 +549,7 @@ unlock: rcu_read_unlock(); return cpu; } + /* * When add_timer_on() enqueues a timer into the timer wheel of an * idle CPU then this timer might expire before the next timer event @@ -784,60 +784,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) dequeue_task(rq, p, flags); } -static void update_rq_clock_task(struct rq *rq, s64 delta) -{ -/* - * In theory, the compile should just see 0 here, and optimize out the call - * to sched_rt_avg_update. But I don't trust it... - */ -#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) - s64 steal = 0, irq_delta = 0; -#endif -#ifdef CONFIG_IRQ_TIME_ACCOUNTING - irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; - - /* - * Since irq_time is only updated on {soft,}irq_exit, we might run into - * this case when a previous update_rq_clock() happened inside a - * {soft,}irq region. - * - * When this happens, we stop ->clock_task and only update the - * prev_irq_time stamp to account for the part that fit, so that a next - * update will consume the rest. This ensures ->clock_task is - * monotonic. - * - * It does however cause some slight miss-attribution of {soft,}irq - * time, a more accurate solution would be to update the irq_time using - * the current rq->clock timestamp, except that would require using - * atomic ops. - */ - if (irq_delta > delta) - irq_delta = delta; - - rq->prev_irq_time += irq_delta; - delta -= irq_delta; -#endif -#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING - if (static_key_false((¶virt_steal_rq_enabled))) { - steal = paravirt_steal_clock(cpu_of(rq)); - steal -= rq->prev_steal_time_rq; - - if (unlikely(steal > delta)) - steal = delta; - - rq->prev_steal_time_rq += steal; - delta -= steal; - } -#endif - - rq->clock_task += delta; - -#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) - if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) - sched_rt_avg_update(rq, irq_delta + steal); -#endif -} - void sched_set_stop_task(int cpu, struct task_struct *stop) { struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; @@ -1018,7 +964,7 @@ struct migration_arg { }; /* - * Move (not current) task off this cpu, onto dest cpu. We're doing + * Move (not current) task off this CPU, onto the destination CPU. We're doing * this because either it can't run here any more (set_cpus_allowed() * away from this CPU, or CPU going down), or because we're * attempting to rebalance this task on exec (sched_exec). @@ -1052,8 +998,8 @@ static int migration_cpu_stop(void *data) struct rq *rq = this_rq(); /* - * The original target cpu might have gone down and we might - * be on another cpu but it doesn't matter. + * The original target CPU might have gone down and we might + * be on another CPU but it doesn't matter. */ local_irq_disable(); /* @@ -1171,7 +1117,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, if (p->flags & PF_KTHREAD) { /* * For kernel threads that do indeed end up on online && - * !active we want to ensure they are strict per-cpu threads. + * !active we want to ensure they are strict per-CPU threads. */ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && !cpumask_intersects(new_mask, cpu_active_mask) && @@ -1195,9 +1141,9 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, * OK, since we're going to drop the lock immediately * afterwards anyway. */ - lockdep_unpin_lock(&rq->lock, rf.cookie); + rq_unpin_lock(rq, &rf); rq = move_queued_task(rq, p, dest_cpu); - lockdep_repin_lock(&rq->lock, rf.cookie); + rq_repin_lock(rq, &rf); } out: task_rq_unlock(rq, p, &rf); @@ -1276,7 +1222,7 @@ static void __migrate_swap_task(struct task_struct *p, int cpu) /* * Task isn't running anymore; make it appear like we migrated * it before it went to sleep. This means on wakeup we make the - * previous cpu our target instead of where it really is. + * previous CPU our target instead of where it really is. */ p->wake_cpu = cpu; } @@ -1508,12 +1454,12 @@ EXPORT_SYMBOL_GPL(kick_process); * * - on cpu-up we allow per-cpu kthreads on the online && !active cpu, * see __set_cpus_allowed_ptr |