diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-09 14:28:42 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-09 14:28:42 -0800 |
| commit | 23e8fe2e16587494268510c1bc9f6952f50f0311 (patch) | |
| tree | 32618fbd16f87ea39c303021479eaac336bb815a /kernel | |
| parent | 30d46827c2744f56bb31460007f2d16455f10720 (diff) | |
| parent | f49028292c13b958fdf4f36c8cc8119d0dde187b (diff) | |
| download | linux-23e8fe2e16587494268510c1bc9f6952f50f0311.tar.gz linux-23e8fe2e16587494268510c1bc9f6952f50f0311.tar.bz2 linux-23e8fe2e16587494268510c1bc9f6952f50f0311.zip | |
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar:
"The main RCU changes in this cycle are:
- Documentation updates.
- Miscellaneous fixes.
- Preemptible-RCU fixes, including fixing an old bug in the
interaction of RCU priority boosting and CPU hotplug.
- SRCU updates.
- RCU CPU stall-warning updates.
- RCU torture-test updates"
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (54 commits)
rcu: Initialize tiny RCU stall-warning timeouts at boot
rcu: Fix RCU CPU stall detection in tiny implementation
rcu: Add GP-kthread-starvation checks to CPU stall warnings
rcu: Make cond_resched_rcu_qs() apply to normal RCU flavors
rcu: Optionally run grace-period kthreads at real-time priority
ksoftirqd: Use new cond_resched_rcu_qs() function
ksoftirqd: Enable IRQs and call cond_resched() before poking RCU
rcutorture: Add more diagnostics in rcu_barrier() test failure case
torture: Flag console.log file to prevent holdovers from earlier runs
torture: Add "-enable-kvm -soundhw pcspk" to qemu command line
rcutorture: Handle different mpstat versions
rcutorture: Check from beginning to end of grace period
rcu: Remove redundant rcu_batches_completed() declaration
rcutorture: Drop rcu_torture_completed() and friends
rcu: Provide rcu_batches_completed_sched() for TINY_RCU
rcutorture: Use unsigned for Reader Batch computations
rcutorture: Make build-output parsing correctly flag RCU's warnings
rcu: Make _batches_completed() functions return unsigned long
rcutorture: Issue warnings on close calls due to Reader Batch blows
documentation: Fix smp typo in memory-barriers.txt
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cpu.c | 56 | ||||
| -rw-r--r-- | kernel/notifier.c | 3 | ||||
| -rw-r--r-- | kernel/power/Kconfig | 1 | ||||
| -rw-r--r-- | kernel/rcu/Makefile | 3 | ||||
| -rw-r--r-- | kernel/rcu/rcu.h | 6 | ||||
| -rw-r--r-- | kernel/rcu/rcutorture.c | 66 | ||||
| -rw-r--r-- | kernel/rcu/srcu.c | 2 | ||||
| -rw-r--r-- | kernel/rcu/tiny.c | 113 | ||||
| -rw-r--r-- | kernel/rcu/tiny_plugin.h | 9 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 355 | ||||
| -rw-r--r-- | kernel/rcu/tree.h | 62 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 271 | ||||
| -rw-r--r-- | kernel/rcu/tree_trace.c | 8 | ||||
| -rw-r--r-- | kernel/softirq.c | 3 |
14 files changed, 440 insertions, 518 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 5d220234b3ca..1972b161c61e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -58,22 +58,23 @@ static int cpu_hotplug_disabled; static struct { struct task_struct *active_writer; - struct mutex lock; /* Synchronizes accesses to refcount, */ + /* wait queue to wake up the active_writer */ + wait_queue_head_t wq; + /* verifies that no writer will get active while readers are active */ + struct mutex lock; /* * Also blocks the new readers during * an ongoing cpu hotplug operation. */ - int refcount; - /* And allows lockless put_online_cpus(). */ - atomic_t puts_pending; + atomic_t refcount; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif } cpu_hotplug = { .active_writer = NULL, + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), - .refcount = 0, #ifdef CONFIG_DEBUG_LOCK_ALLOC .dep_map = {.name = "cpu_hotplug.lock" }, #endif @@ -86,15 +87,6 @@ static struct { #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) -static void apply_puts_pending(int max) -{ - int delta; - - if (atomic_read(&cpu_hotplug.puts_pending) >= max) { - delta = atomic_xchg(&cpu_hotplug.puts_pending, 0); - cpu_hotplug.refcount -= delta; - } -} void get_online_cpus(void) { @@ -103,8 +95,7 @@ void get_online_cpus(void) return; cpuhp_lock_acquire_read(); mutex_lock(&cpu_hotplug.lock); - apply_puts_pending(65536); - cpu_hotplug.refcount++; + atomic_inc(&cpu_hotplug.refcount); mutex_unlock(&cpu_hotplug.lock); } EXPORT_SYMBOL_GPL(get_online_cpus); @@ -116,8 +107,7 @@ bool try_get_online_cpus(void) if (!mutex_trylock(&cpu_hotplug.lock)) return false; cpuhp_lock_acquire_tryread(); - apply_puts_pending(65536); - cpu_hotplug.refcount++; + atomic_inc(&cpu_hotplug.refcount); mutex_unlock(&cpu_hotplug.lock); return true; } @@ -125,20 +115,18 @@ EXPORT_SYMBOL_GPL(try_get_online_cpus); void put_online_cpus(void) { + int refcount; + if (cpu_hotplug.active_writer == current) return; - if (!mutex_trylock(&cpu_hotplug.lock)) { - atomic_inc(&cpu_hotplug.puts_pending); - cpuhp_lock_release(); - return; - } - if (WARN_ON(!cpu_hotplug.refcount)) - cpu_hotplug.refcount++; /* try to fix things up */ + refcount = atomic_dec_return(&cpu_hotplug.refcount); + if (WARN_ON(refcount < 0)) /* try to fix things up */ + atomic_inc(&cpu_hotplug.refcount); + + if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq)) + wake_up(&cpu_hotplug.wq); - if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) - wake_up_process(cpu_hotplug.active_writer); - mutex_unlock(&cpu_hotplug.lock); cpuhp_lock_release(); } @@ -168,18 +156,20 @@ EXPORT_SYMBOL_GPL(put_online_cpus); */ void cpu_hotplug_begin(void) { - cpu_hotplug.active_writer = current; + DEFINE_WAIT(wait); + cpu_hotplug.active_writer = current; cpuhp_lock_acquire(); + for (;;) { mutex_lock(&cpu_hotplug.lock); - apply_puts_pending(1); - if (likely(!cpu_hotplug.refcount)) - break; - __set_current_state(TASK_UNINTERRUPTIBLE); + prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE); + if (likely(!atomic_read(&cpu_hotplug.refcount))) + break; mutex_unlock(&cpu_hotplug.lock); schedule(); } + finish_wait(&cpu_hotplug.wq, &wait); } void cpu_hotplug_done(void) diff --git a/kernel/notifier.c b/kernel/notifier.c index 4803da6eab62..ae9fc7cc360e 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -402,6 +402,7 @@ int raw_notifier_call_chain(struct raw_notifier_head *nh, } EXPORT_SYMBOL_GPL(raw_notifier_call_chain); +#ifdef CONFIG_SRCU /* * SRCU notifier chain routines. Registration and unregistration * use a mutex, and call_chain is synchronized by SRCU (no locks). @@ -528,6 +529,8 @@ void srcu_init_notifier_head(struct srcu_notifier_head *nh) } EXPORT_SYMBOL_GPL(srcu_init_notifier_head); +#endif /* CONFIG_SRCU */ + static ATOMIC_NOTIFIER_HEAD(die_chain); int notrace notify_die(enum die_val val, const char *str, diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 48b28d387c7f..7e01f78f0417 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -251,6 +251,7 @@ config APM_EMULATION config PM_OPP bool + select SRCU ---help--- SOCs have a standard set of tuples consisting of frequency and voltage pairs that the device will support per voltage domain. This diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index e6fae503d1bc..50a808424b06 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -1,4 +1,5 @@ -obj-y += update.o srcu.o +obj-y += update.o +obj-$(CONFIG_SRCU) += srcu.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += tree.o obj-$(CONFIG_PREEMPT_RCU) += tree.o diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 07bb02eda844..80adef7d4c3d 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -137,4 +137,10 @@ int rcu_jiffies_till_stall_check(void); void rcu_early_boot_tests(void); +/* + * This function really isn't for public consumption, but RCU is special in + * that context switches can allow the state machine to make progress. + */ +extern void resched_cpu(int cpu); + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 4d559baf06e0..30d42aa55d83 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -244,7 +244,8 @@ struct rcu_torture_ops { int (*readlock)(void); void (*read_delay)(struct torture_random_state *rrsp); void (*readunlock)(int idx); - int (*completed)(void); + unsigned long (*started)(void); + unsigned long (*completed)(void); void (*deferred_free)(struct rcu_torture *p); void (*sync)(void); void (*exp_sync)(void); @@ -296,11 +297,6 @@ static void rcu_torture_read_unlock(int idx) __releases(RCU) rcu_read_unlock(); } -static int rcu_torture_completed(void) -{ - return rcu_batches_completed(); -} - /* * Update callback in the pipe. This should be invoked after a grace period. */ @@ -356,7 +352,7 @@ rcu_torture_cb(struct rcu_head *p) cur_ops->deferred_free(rp); } -static int rcu_no_completed(void) +static unsigned long rcu_no_completed(void) { return 0; } @@ -377,7 +373,8 @@ static struct rcu_torture_ops rcu_ops = { .readlock = rcu_torture_read_lock, .read_delay = rcu_read_delay, .readunlock = rcu_torture_read_unlock, - .completed = rcu_torture_completed, + .started = rcu_batches_started, + .completed = rcu_batches_completed, .deferred_free = rcu_torture_deferred_free, .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, @@ -407,11 +404,6 @@ static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH) rcu_read_unlock_bh(); } -static int rcu_bh_torture_completed(void) -{ - return rcu_batches_completed_bh(); -} - static void rcu_bh_torture_deferred_free(struct rcu_torture *p) { call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); @@ -423,7 +415,8 @@ static struct rcu_torture_ops rcu_bh_ops = { .readlock = rcu_bh_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = rcu_bh_torture_read_unlock, - .completed = rcu_bh_torture_completed, + .started = rcu_batches_started_bh, + .completed = rcu_batches_completed_bh, .deferred_free = rcu_bh_torture_deferred_free, .sync = synchronize_rcu_bh, .exp_sync = synchronize_rcu_bh_expedited, @@ -466,6 +459,7 @@ static struct rcu_torture_ops rcu_busted_ops = { .readlock = rcu_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = rcu_torture_read_unlock, + .started = rcu_no_completed, .completed = rcu_no_completed, .deferred_free = rcu_busted_torture_deferred_free, .sync = synchronize_rcu_busted, @@ -510,7 +504,7 @@ static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) srcu_read_unlock(&srcu_ctl, idx); } -static int srcu_torture_completed(void) +static unsigned long srcu_torture_completed(void) { return srcu_batches_completed(&srcu_ctl); } @@ -564,6 +558,7 @@ static struct rcu_torture_ops srcu_ops = { .readlock = srcu_torture_read_lock, .read_delay = srcu_read_delay, .readunlock = srcu_torture_read_unlock, + .started = NULL, .completed = srcu_torture_completed, .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, @@ -600,7 +595,8 @@ static struct rcu_torture_ops sched_ops = { .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = rcu_no_completed, + .started = rcu_batches_started_sched, + .completed = rcu_batches_completed_sched, .deferred_free = rcu_sched_torture_deferred_free, .sync = synchronize_sched, .exp_sync = synchronize_sched_expedited, @@ -638,6 +634,7 @@ static struct rcu_torture_ops tasks_ops = { .readlock = tasks_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = tasks_torture_read_unlock, + .started = rcu_no_completed, .completed = rcu_no_completed, .deferred_free = rcu_tasks_torture_deferred_free, .sync = synchronize_rcu_tasks, @@ -1015,8 +1012,8 @@ static void rcutorture_trace_dump(void) static void rcu_torture_timer(unsigned long unused) { int idx; - int completed; - int completed_end; + unsigned long started; + unsigned long completed; static DEFINE_TORTURE_RANDOM(rand); static DEFINE_SPINLOCK(rand_lock); struct rcu_torture *p; @@ -1024,7 +1021,10 @@ static void rcu_torture_timer(unsigned long unused) unsigned long long ts; idx = cur_ops->readlock(); - completed = cur_ops->completed(); + if (cur_ops->started) + started = cur_ops->started(); + else + started = cur_ops->completed(); ts = rcu_trace_clock_local(); p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || @@ -1047,14 +1047,16 @@ static void rcu_torture_timer(unsigned long unused) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - completed_end = cur_ops->completed(); + completed = cur_ops->completed(); if (pipe_count > 1) { do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, - completed, completed_end); + started, completed); rcutorture_trace_dump(); } __this_cpu_inc(rcu_torture_count[pipe_count]); - completed = completed_end - completed; + completed = completed - started; + if (cur_ops->started) + completed++; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; @@ -1073,8 +1075,8 @@ static void rcu_torture_timer(unsigned long unused) static int rcu_torture_reader(void *arg) { - int completed; - int completed_end; + unsigned long started; + unsigned long completed; int idx; DEFINE_TORTURE_RANDOM(rand); struct rcu_torture *p; @@ -1093,7 +1095,10 @@ rcu_torture_reader(void *arg) mod_timer(&t, jiffies + 1); } idx = cur_ops->readlock(); - completed = cur_ops->completed(); + if (cur_ops->started) + started = cur_ops->started(); + else + started = cur_ops->completed(); ts = rcu_trace_clock_local(); p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || @@ -1114,14 +1119,16 @@ rcu_torture_reader(void *arg) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - completed_end = cur_ops->completed(); + completed = cur_ops->completed(); if (pipe_count > 1) { do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, - ts, completed, completed_end); + ts, started, completed); rcutorture_trace_dump(); } __this_cpu_inc(rcu_torture_count[pipe_count]); - completed = completed_end - completed; + completed = completed - started; + if (cur_ops->started) + completed++; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; @@ -1420,6 +1427,9 @@ static int rcu_torture_barrier(void *arg) cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { n_rcu_torture_barrier_error++; + pr_err("barrier_cbs_invoked = %d, n_barrier_cbs = %d\n", + atomic_read(&barrier_cbs_invoked), + n_barrier_cbs); WARN_ON_ONCE(1); } n_barrier_successes++; diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index e037f3eb2f7b..445bf8ffe3fb 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(srcu_barrier); * Report the number of batches, correlated with, but not necessarily * precisely the same as, the number of grace periods that have elapsed. */ -long srcu_batches_completed(struct srcu_struct *sp) +unsigned long srcu_batches_completed(struct srcu_struct *sp) { return sp->completed; } diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 0db5649f8817..cc9ceca7bde1 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -47,54 +47,14 @@ static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); -static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - #include "tiny_plugin.h" -/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */ -static void rcu_idle_enter_common(long long newval) -{ - if (newval) { - RCU_TRACE(trace_rcu_dyntick(TPS("--="), - rcu_dynticks_nesting, newval)); - rcu_dynticks_nesting = newval; - return; - } - RCU_TRACE(trace_rcu_dyntick(TPS("Start"), - rcu_dynticks_nesting, newval)); - if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - - RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), - rcu_dynticks_nesting, newval)); - ftrace_dump(DUMP_ALL); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } - rcu_sched_qs(); /* implies rcu_bh_inc() */ - barrier(); - rcu_dynticks_nesting = newval; -} - /* * Enter idle, which is an extended quiescent state if we have fully - * entered that mode (i.e., if the new value of dynticks_nesting is zero). + * entered that mode. */ void rcu_idle_enter(void) { - unsigned long flags; - long long newval; - - local_irq_save(flags); - WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); - if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == - DYNTICK_TASK_NEST_VALUE) - newval = 0; - else - newval = rcu_dynticks_nesting - DYNTICK_TASK_NEST_VALUE; - rcu_idle_enter_common(newval); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -103,55 +63,14 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); */ void rcu_irq_exit(void) { - unsigned long flags; - long long newval; - - local_irq_save(flags); - newval = rcu_dynticks_nesting - 1; - WARN_ON_ONCE(newval < 0); - rcu_idle_enter_common(newval); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_irq_exit); -/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */ -static void rcu_idle_exit_common(long long oldval) -{ - if (oldval) { - RCU_TRACE(trace_rcu_dyntick(TPS("++="), - oldval, rcu_dynticks_nesting)); - return; - } - RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); - if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - - RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), - oldval, rcu_dynticks_nesting)); - ftrace_dump(DUMP_ALL); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } -} - /* * Exit idle, so that we are no longer in an extended quiescent state. */ void rcu_idle_exit(void) { - unsigned long flags; - long long oldval; - - local_irq_save(flags); - oldval = rcu_dynticks_nesting; - WARN_ON_ONCE(rcu_dynticks_nesting < 0); - if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) - rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE; - else - rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - rcu_idle_exit_common(oldval); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_exit); @@ -160,15 +79,6 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit); */ void rcu_irq_enter(void) { - unsigned long flags; - long long oldval; - - local_irq_save(flags); - oldval = rcu_dynticks_nesting; - rcu_dynticks_nesting++; - WARN_ON_ONCE(rcu_dynticks_nesting == 0); - rcu_idle_exit_common(oldval); - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_irq_enter); @@ -179,23 +89,13 @@ EXPORT_SYMBOL_GPL(rcu_irq_enter); */ bool notrace __rcu_is_watching(void) { - return rcu_dynticks_nesting; + return true; } EXPORT_SYMBOL(__rcu_is_watching); #endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ /* - * Test whether the current CPU was interrupted from idle. Nested - * interrupts don't count, we must be running at the first interrupt - * level. - */ -static int rcu_is_cpu_rrupt_from_idle(void) -{ - return rcu_dynticks_nesting <= 1; -} - -/* * Helper function for rcu_sched_qs() and rcu_bh_qs(). * Also irqs are disabled to avoid confusion due to interrupt handlers * invoking call_rcu(). @@ -250,7 +150,7 @@ void rcu_bh_qs(void) void rcu_check_callbacks(int user) { RCU_TRACE(check_cpu_stalls()); - if (user || rcu_is_cpu_rrupt_from_idle()) + if (user) rcu_sched_qs(); else if (!in_softirq()) rcu_bh_qs(); @@ -357,6 +257,11 @@ static void __call_rcu(struct rcu_head *head, rcp->curtail = &head->next; RCU_TRACE(rcp->qlen++); local_irq_restore(flags); + + if (unlikely(is_idle_task(current))) { + /* force scheduling for rcu_sched_qs() */ + resched_cpu(0); + } } /* @@ -383,6 +288,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk)); + RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk)); rcu_early_boot_tests(); } diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 858c56569127..f94e209a10d6 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -145,17 +145,16 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp) rcp->ticks_this_gp++; j = jiffies; js = ACCESS_ONCE(rcp->jiffies_stall); - if (*rcp->curtail && ULONG_CMP_GE(j, js)) { + if (rcp->rcucblist && ULONG_CMP_GE(j, js)) { pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", - rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting, + rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE, jiffies - rcp->gp_start, rcp->qlen); dump_stack(); - } - if (*rcp->curtail && ULONG_CMP_GE(j, js)) ACCESS_ONCE(rcp->jiffies_stall) = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; - else if (ULONG_CMP_GE(j, js)) + } else if (ULONG_CMP_GE(j, js)) { ACCESS_ONCE(rcp->jiffies_stall) = jiffies + rcu_jiffies_till_stall_check(); + } } static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7680fc275036..48d640ca1a05 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -156,6 +156,10 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); +/* rcuc/rcub kthread realtime priority */ +static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; +module_param(kthread_prio, int, 0644); + /* * Track the rcutorture test sequence number and the update version * number within a given test. The rcutorture_testseq is incremented @@ -215,6 +219,9 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ }; +DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); +EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); + /* * Let the RCU core know that this CPU has gone through the scheduler, * which is a quiescent state. This is called when the need for a @@ -284,6 +291,22 @@ void rcu_note_context_switch(void) } EXPORT_SYMBOL_GPL(rcu_note_context_switch); +/* + * Register a quiesecent state for all RCU flavors. If there is an + * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight + * dyntick-idle quiescent state visible to other CPUs (but only for those + * RCU flavors in desparate need of a quiescent state, which will normally + * be none of them). Either way, do a lightweight quiescent state for + * all RCU flavors. + */ +void rcu_all_qs(void) +{ + if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) + rcu_momentary_dyntick_idle(); + this_cpu_inc(rcu_qs_ctr); +} +EXPORT_SYMBOL_GPL(rcu_all_qs); + static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ static long qhimark = 10000; /* If this many pending, ignore blimit. */ static long qlowmark = 100; /* Once only this many pending, use blimit. */ @@ -315,18 +338,54 @@ static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(void); /* - * Return the number of RCU-sched batches processed thus far for debug & stats. + * Return the number of RCU batches started thus far for debug & stats. + */ +unsigned long rcu_batches_started(void) +{ + return rcu_state_p->gpnum; +} +EXPORT_SYMBOL_GPL(rcu_batches_started); + +/* + * Return the number of RCU-sched batches started thus far for debug & stats. + */ +unsigned long rcu_batches_started_sched(void) +{ + return rcu_sched_state.gpnum; +} +EXPORT_SYMBOL_GPL(rcu_batches_started_sched); + +/* + * Return the number of RCU BH batches started thus far for debug & stats. */ -long rcu_batches_completed_sched(void) +unsigned long rcu_batches_started_bh(void) +{ + return rcu_bh_state.gpnum; +} +EXPORT_SYMBOL_GPL(rcu_batches_started_bh); + +/* + * Return the number of RCU batches completed thus far for debug & stats. + */ +unsigned long rcu_batches_completed(void) +{ + return rcu_state_p->completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed); + +/* + * Return the number of RCU-sched batches completed thus far for debug & stats. + */ +unsigned long rcu_batches_completed_sched(void) { return rcu_sched_state.completed; } EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); /* - * Return the number of RCU BH batches processed thus far for debug & stats. + * Return the number of RCU BH batches completed thus far for debug & stats. */ -long rcu_batches_completed_bh(void) +unsigned long rcu_batches_completed_bh(void) { return rcu_bh_state.completed; } @@ -759,39 +818,71 @@ void rcu_irq_enter(void) /** * rcu_nmi_enter - inform RCU of entry to NMI context * - * If the CPU was idle with dynamic ticks active, and there is no - * irq handler running, this updates rdtp->dynticks_nmi to let the - * RCU grace-period handling know that the CPU is active. + * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and + * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know + * that the CPU is active. This implementation permits nested NMIs, as + * long as the nesting level does not overflow an int. (You will probably + * run out of stack space first.) */ void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + int incby = 2; - if (rdtp->dynticks_nmi_nesting == 0 && - (atomic_read(&rdtp->dynticks) & 0x1)) - return; - rdtp->dynticks_nmi_nesting++; - smp_mb__before_atomic(); /* Force delay from prior write. */ - atomic_inc(&rdtp->dynticks); - /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ - smp_mb__after_atomic(); /* See above. */ - WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + /* Complain about underflow. */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); + + /* + * If idle from RCU viewpoint, atomically increment ->dynticks + * to mark non-idle and increment ->dynticks_nmi_nesting by one. + * Otherwise, increment ->dynticks_nmi_nesting by two. This means + * if ->dynticks_nmi_nesting is equal to one, we are guaranteed + * to be in the outermost NMI handler that interrupted an RCU-idle + * period (observation due to Andy Lutomirski). + */ + if (!(atomic_read(&rdtp->dynticks) & 0x1)) { + smp_mb__before_atomic(); /* Force delay from prior write. */ + atomic_inc(&rdtp->dynticks); + /* atomic_inc() before later RCU read-side crit sects */ + smp_mb__after_atomic(); /* See above. */ + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + incby = 1; + } + rdtp->dynticks_nmi_nesting += incby; + barrier(); } /** * rcu_nmi_exit - inform RCU of exit from NMI context * - * If the CPU was idle with dynamic ticks active, and there is no - * irq handler running, this updates rdtp->dynticks_nmi to let the - * RCU grace-period handling know that the CPU is no longer active. + * If we are returning from the outermost NMI handler that interrupted an + * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting + * to let the RCU grace-period handling know that the CPU is back to + * being RCU-idle. */ void rcu_nmi_exit(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - if (rdtp->dynticks_nmi_nesting == 0 || - --rdtp->dynticks_nmi_nesting != 0) + /* + * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. + * (We are exiting an NMI handler, so RCU better be paying attention + * to us!) + */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + + /* + * If the nesting level is not 1, the CPU wasn't RCU-idle, so + * leave it in non-RCU-idle state. + */ + if (rdtp->dynticks_nmi_nesting != 1) { + rdtp->dynticks_nmi_nesting -= 2; return; + } + + /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ + rdtp->dynticks_nmi_nesting = 0; /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ smp_mb__before_atomic(); /* See above. */ atomic_inc(&rdtp->dynticks); @@ -898,17 +989,14 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); return 1; } else { + if (ULONG_CMP_LT(ACCESS_ONCE(rdp->gpnum) + ULONG_MAX / 4, + rdp->mynode->gpnum)) + ACCESS_ONCE(rdp->gpwrap) = true; return 0; } } /* - * This function really isn't for public consumption, but RCU is special in - * that context switches can allow the state machine to make progress. - */ -extern void resched_cpu(int cpu); - -/* * Return true if the specified CPU has passed through a quiescent * state by virtue of being in or having passed through an dynticks * idle state since the last call to dyntick_save_progress_counter() @@ -1011,6 +1099,22 @@ static void record_gp_stall_check_time(struct rcu_state *rsp) j1 = rcu_jiffies_till_stall_check(); ACCESS_ONCE(rsp->jiffies_stall) = j + j1; rsp->jiffies_resched = j + j1 / 2; + rsp->n_force_qs_gpstart = ACCESS_ONCE(rsp->n_force_qs); +} + +/* + * Complain about starvation of grace-period kthread. + */ +static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) +{ + unsigned long gpa; + unsigned long j; + + j = jiffies; + gpa = ACCESS_ONCE(rsp->gp_activity); + if (j - gpa > 2 * HZ) + pr_err("%s kthread starved for %ld jiffies!\n", + rsp->name, j - gpa); } /* @@ -1033,11 +1137,13 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp) } } -static void print_other_cpu_stall(struct rcu_state *rsp) +static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) { int cpu; long delta; unsigned long flags; + unsigned long gpa; + unsigned long j; int ndetected = 0; struct rcu_node *rnp = rcu_get_root(rsp); long totqlen = 0; @@ -1075,30 +1181,34 @@ static void print_other_cpu_stall(struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp->lock, flags); } - /* - * Now rat on any tasks that got kicked up to the root rcu_node - * due to CPU offlining. - */ - rnp = rcu_get_root(rsp); - raw_spin_lock_irqsave(&rnp->lock, flags); - ndetected += rcu_print_task_stall(rnp); - raw_spin_unlock_irqrestore(&rnp->lock, flags); - print_cpu_stall_info_end(); for_each_possible_cpu(cpu) totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start), (long)rsp->gpnum, (long)rsp->completed, totqlen); - if (ndetected == 0) - pr_err("INFO: Stall ended before state dump start\n"); - else + if (ndetected) { rcu_dump_cpu |
