diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/locking/locktorture.c | 25 | ||||
| -rw-r--r-- | kernel/rcu/rcuscale.c | 4 | ||||
| -rw-r--r-- | kernel/rcu/rcutorture.c | 7 | ||||
| -rw-r--r-- | kernel/rcu/refscale.c | 36 | ||||
| -rw-r--r-- | kernel/rcu/srcutiny.c | 2 | ||||
| -rw-r--r-- | kernel/rcu/tasks.h | 36 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 103 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 19 | ||||
| -rw-r--r-- | kernel/rcu/tree_stall.h | 111 | ||||
| -rw-r--r-- | kernel/scftorture.c | 78 | ||||
| -rw-r--r-- | kernel/sched/core.c | 11 | ||||
| -rw-r--r-- | kernel/torture.c | 6 |
12 files changed, 270 insertions, 168 deletions
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index b3adb40549bf..7c5a4a087cc7 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -59,7 +59,7 @@ static struct task_struct **writer_tasks; static struct task_struct **reader_tasks; static bool lock_is_write_held; -static bool lock_is_read_held; +static atomic_t lock_is_read_held; static unsigned long last_lock_release; struct lock_stress_stats { @@ -682,7 +682,7 @@ static int lock_torture_writer(void *arg) if (WARN_ON_ONCE(lock_is_write_held)) lwsp->n_lock_fail++; lock_is_write_held = true; - if (WARN_ON_ONCE(lock_is_read_held)) + if (WARN_ON_ONCE(atomic_read(&lock_is_read_held))) lwsp->n_lock_fail++; /* rare, but... */ lwsp->n_lock_acquired++; @@ -717,13 +717,13 @@ static int lock_torture_reader(void *arg) schedule_timeout_uninterruptible(1); cxt.cur_ops->readlock(tid); - lock_is_read_held = true; + atomic_inc(&lock_is_read_held); if (WARN_ON_ONCE(lock_is_write_held)) lrsp->n_lock_fail++; /* rare, but... */ lrsp->n_lock_acquired++; cxt.cur_ops->read_delay(&rand); - lock_is_read_held = false; + atomic_dec(&lock_is_read_held); cxt.cur_ops->readunlock(tid); stutter_wait("lock_torture_reader"); @@ -738,20 +738,22 @@ static int lock_torture_reader(void *arg) static void __torture_print_stats(char *page, struct lock_stress_stats *statp, bool write) { + long cur; bool fail = false; int i, n_stress; - long max = 0, min = statp ? statp[0].n_lock_acquired : 0; + long max = 0, min = statp ? data_race(statp[0].n_lock_acquired) : 0; long long sum = 0; n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress; for (i = 0; i < n_stress; i++) { - if (statp[i].n_lock_fail) + if (data_race(statp[i].n_lock_fail)) fail = true; - sum += statp[i].n_lock_acquired; - if (max < statp[i].n_lock_acquired) - max = statp[i].n_lock_acquired; - if (min > statp[i].n_lock_acquired) - min = statp[i].n_lock_acquired; + cur = data_race(statp[i].n_lock_acquired); + sum += cur; + if (max < cur) + max = cur; + if (min > cur) + min = cur; } page += sprintf(page, "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", @@ -996,7 +998,6 @@ static int __init lock_torture_init(void) } if (nreaders_stress) { - lock_is_read_held = false; cxt.lrsa = kmalloc_array(cxt.nrealreaders_stress, sizeof(*cxt.lrsa), GFP_KERNEL); diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index dca51fe9c73f..2cc34a22a506 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -487,7 +487,7 @@ retry: if (gp_async) { cur_ops->gp_barrier(); } - writer_n_durations[me] = i_max; + writer_n_durations[me] = i_max + 1; torture_kthread_stopping("rcu_scale_writer"); return 0; } @@ -561,7 +561,7 @@ rcu_scale_cleanup(void) wdpp = writer_durations[i]; if (!wdpp) continue; - for (j = 0; j <= writer_n_durations[i]; j++) { + for (j = 0; j < writer_n_durations[i]; j++) { wdp = &wdpp[j]; pr_alert("%s%s %4d writer-duration: %5d %llu\n", scale_type, SCALE_FLAG, diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 40ef5417d954..ab4215266ebe 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2022,8 +2022,13 @@ static int rcu_torture_stall(void *args) __func__, raw_smp_processor_id()); while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(), stop_at)) - if (stall_cpu_block) + if (stall_cpu_block) { +#ifdef CONFIG_PREEMPTION + preempt_schedule(); +#else schedule_timeout_uninterruptible(HZ); +#endif + } if (stall_cpu_irqsoff) local_irq_enable(); else if (!stall_cpu_block) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index d998a76fb542..66dc14cf5687 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -467,6 +467,40 @@ static struct ref_scale_ops acqrel_ops = { .name = "acqrel" }; +static volatile u64 stopopts; + +static void ref_clock_section(const int nloops) +{ + u64 x = 0; + int i; + + preempt_disable(); + for (i = nloops; i >= 0; i--) + x += ktime_get_real_fast_ns(); + preempt_enable(); + stopopts = x; +} + +static void ref_clock_delay_section(const int nloops, const int udl, const int ndl) +{ + u64 x = 0; + int i; + + preempt_disable(); + for (i = nloops; i >= 0; i--) { + x += ktime_get_real_fast_ns(); + un_delay(udl, ndl); + } + preempt_enable(); + stopopts = x; +} + +static struct ref_scale_ops clock_ops = { + .readsection = ref_clock_section, + .delaysection = ref_clock_delay_section, + .name = "clock" +}; + static void rcu_scale_one_reader(void) { if (readdelay <= 0) @@ -759,7 +793,7 @@ ref_scale_init(void) int firsterr = 0; static struct ref_scale_ops *scale_ops[] = { &rcu_ops, &srcu_ops, &rcu_trace_ops, &rcu_tasks_ops, &refcnt_ops, &rwlock_ops, - &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, + &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops, }; if (!torture_init_begin(scale_type, verbose)) diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 26344dc6483b..a0ba2ed49bc6 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); */ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { - int newval = ssp->srcu_lock_nesting[idx] - 1; + int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); if (!newval && READ_ONCE(ssp->srcu_gp_waiting)) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 8536c55df514..806160c44b17 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -643,8 +643,8 @@ void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } // // "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of // passing an empty function to schedule_on_each_cpu(). This approach -// provides an asynchronous call_rcu_tasks_rude() API and batching -// of concurrent calls to the synchronous synchronize_rcu_rude() API. +// provides an asynchronous call_rcu_tasks_rude() API and batching of +// concurrent calls to the synchronous synchronize_rcu_tasks_rude() API. // This invokes schedule_on_each_cpu() in order to send IPIs far and wide // and induces otherwise unnecessary context switches on all online CPUs, // whether idle or not. @@ -785,7 +785,10 @@ EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread); // set that task's .need_qs flag so that task's next outermost // rcu_read_unlock_trace() will report the quiescent state (in which // case the count of readers is incremented). If both attempts fail, -// the task is added to a "holdout" list. +// the task is added to a "holdout" list. Note that IPIs are used +// to invoke trc_read_check_handler() in the context of running tasks +// in order to avoid ordering overhead on common-case shared-variable +// accessses. // rcu_tasks_trace_postscan(): // Initialize state and attempt to identify an immediate quiescent // state as above (but only for idle tasks), unblock CPU-hotplug @@ -847,7 +850,7 @@ static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw); /* If we are the last reader, wake up the grace-period kthread. */ void rcu_read_unlock_trace_special(struct task_struct *t, int nesting) { - int nq = t->trc_reader_special.b.need_qs; + int nq = READ_ONCE(t->trc_reader_special.b.need_qs); if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && t->trc_reader_special.b.need_mb) @@ -894,7 +897,7 @@ static void trc_read_check_handler(void *t_in) // If the task is not in a read-side critical section, and // if this is the last reader, awaken the grace-period kthread. - if (likely(!t->trc_reader_nesting)) { + if (likely(!READ_ONCE(t->trc_reader_nesting))) { if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) wake_up(&trc_wait); // Mark as checked after decrement to avoid false @@ -903,7 +906,7 @@ static void trc_read_check_handler(void *t_in) goto reset_ipi; } // If we are racing with an rcu_read_unlock_trace(), try again later. - if (unlikely(t->trc_reader_nesting < 0)) { + if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) { if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) wake_up(&trc_wait); goto reset_ipi; @@ -913,14 +916,14 @@ static void trc_read_check_handler(void *t_in) // Get here if the task is in a read-side critical section. Set // its state so that it will awaken the grace-period kthread upon // exit from that critical section. - WARN_ON_ONCE(t->trc_reader_special.b.need_qs); + WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); WRITE_ONCE(t->trc_reader_special.b.need_qs, true); reset_ipi: // Allow future IPIs to be sent on CPU and for task. // Also order this IPI handler against any later manipulations of // the intended task. - smp_store_release(&per_cpu(trc_ipi_to_cpu, smp_processor_id()), false); // ^^^ + smp_store_release(per_cpu_ptr(&trc_ipi_to_cpu, smp_processor_id()), false); // ^^^ smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^ } @@ -950,6 +953,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg) n_heavy_reader_ofl_updates++; in_qs = true; } else { + // The task is not running, so C-language access is safe. in_qs = likely(!t->trc_reader_nesting); } @@ -964,7 +968,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg) // state so that it will awaken the grace-period kthread upon exit // from that critical section. atomic_inc(&trc_n_readers_need_end); // One more to wait on. - WARN_ON_ONCE(t->trc_reader_special.b.need_qs); + WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); WRITE_ONCE(t->trc_reader_special.b.need_qs, true); return true; } @@ -982,7 +986,7 @@ static void trc_wait_for_one_reader(struct task_struct *t, // The current task had better be in a quiescent state. if (t == current) { t->trc_reader_checked = true; - WARN_ON_ONCE(t->trc_reader_nesting); + WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); return; } @@ -994,6 +998,12 @@ static void trc_wait_for_one_reader(struct task_struct *t, } put_task_struct(t); + // If this task is not yet on the holdout list, then we are in + // an RCU read-side critical section. Otherwise, the invocation of + // rcu_add_holdout() that added it to the list did the necessary + // get_task_struct(). Either way, the task cannot be freed out + // from under this code. + // If currently running, send an IPI, either way, add to list. trc_add_holdout(t, bhp); if (task_curr(t) && @@ -1092,8 +1102,8 @@ static void show_stalled_task_trace(struct task_struct *t, bool *firstreport) ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0], ".i"[is_idle_task(t)], ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)], - t->trc_reader_nesting, - " N"[!!t->trc_reader_special.b.need_qs], + READ_ONCE(t->trc_reader_nesting), + " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)], cpu); sched_show_task(t); } @@ -1187,7 +1197,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) static void exit_tasks_rcu_finish_trace(struct task_struct *t) { WRITE_ONCE(t->trc_reader_checked, true); - WARN_ON_ONCE(t->trc_reader_nesting); + WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); WRITE_ONCE(t->trc_reader_nesting, 0); if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs))) rcu_read_unlock_trace_special(t, 0); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0fda98a0d12e..bce848e50512 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -74,17 +74,10 @@ /* Data structures. */ -/* - * Steal a bit from the bottom of ->dynticks for idle entry/exit - * control. Initially this is for TLB flushing. - */ -#define RCU_DYNTICK_CTRL_MASK 0x1 -#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1) - static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { .dynticks_nesting = 1, .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, - .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), + .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_RCU_NOCB_CPU .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY, #endif @@ -259,6 +252,15 @@ void rcu_softirq_qs(void) } /* + * Increment the current CPU's rcu_data structure's ->dynticks field + * with ordering. Return the new value. + */ +static noinline noinstr unsigned long rcu_dynticks_inc(int incby) +{ + return arch_atomic_add_return(incby, this_cpu_ptr(&rcu_data.dynticks)); +} + +/* * Record entry into an extended quiescent state. This is only to be * called when not already in an extended quiescent state, that is, * RCU is watching prior to the call to this function and is no longer @@ -266,7 +268,6 @@ void rcu_softirq_qs(void) */ static noinstr void rcu_dynticks_eqs_enter(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); int seq; /* @@ -275,13 +276,9 @@ static noinstr void rcu_dynticks_eqs_enter(void) * next idle sojourn. */ rcu_dynticks_task_trace_enter(); // Before ->dynticks update! - seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + seq = rcu_dynticks_inc(1); // RCU is no longer watching. Better be in extended quiescent state! - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (seq & RCU_DYNTICK_CTRL_CTR)); - /* Better not have special action (TLB flush) pending! */ - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (seq & RCU_DYNTICK_CTRL_MASK)); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1)); } /* @@ -291,7 +288,6 @@ static noinstr void rcu_dynticks_eqs_enter(void) */ static noinstr void rcu_dynticks_eqs_exit(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); int seq; /* @@ -299,15 +295,10 @@ static noinstr void rcu_dynticks_eqs_exit(void) * and we also must force ordering with the next RCU read-side * critical section. */ - seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + seq = rcu_dynticks_inc(1); // RCU is now watching. Better not be in an extended quiescent state! rcu_dynticks_task_trace_exit(); // After ->dynticks update! - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !(seq & RCU_DYNTICK_CTRL_CTR)); - if (seq & RCU_DYNTICK_CTRL_MASK) { - arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks); - smp_mb__after_atomic(); /* _exit after clearing mask. */ - } + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1)); } /* @@ -324,9 +315,9 @@ static void rcu_dynticks_eqs_online(void) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR) + if (atomic_read(&rdp->dynticks) & 0x1) return; - atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks); + rcu_dynticks_inc(1); } /* @@ -336,9 +327,7 @@ static void rcu_dynticks_eqs_online(void) */ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) { - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR); + return !(atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1); } /* @@ -347,9 +336,8 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) */ static int rcu_dynticks_snap(struct rcu_data *rdp) { - int snap = atomic_add_return(0, &rdp->dynticks); - - return snap & ~RCU_DYNTICK_CTRL_MASK; + smp_mb(); // Fundamental RCU ordering guarantee. + return atomic_read_acquire(&rdp->dynticks); } /* @@ -358,7 +346,7 @@ static int rcu_dynticks_snap(struct rcu_data *rdp) */ static bool rcu_dynticks_in_eqs(int snap) { - return !(snap & RCU_DYNTICK_CTRL_CTR); + return !(snap & 0x1); } /* Return true if the specified CPU is currently idle from an RCU viewpoint. */ @@ -389,8 +377,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) int snap; // If not quiescent, force back to earlier extended quiescent state. - snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK | - RCU_DYNTICK_CTRL_CTR); + snap = atomic_read(&rdp->dynticks) & ~0x1; smp_rmb(); // Order ->dynticks and *vp reads. if (READ_ONCE(*vp)) @@ -398,32 +385,7 @@ bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) smp_rmb(); // Order *vp read and ->dynticks re-read. // If still in the same extended quiescent state, we are good! - return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK); -} - -/* - * Set the special (bottom) bit of the specified CPU so that it - * will take special action (such as flushing its TLB) on the - * next exit from an extended quiescent state. Returns true if - * the bit was successfully set, or false if the CPU was not in - * an extended quiescent state. - */ -bool rcu_eqs_special_set(int cpu) -{ - int old; - int new; - int new_old; - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - - new_old = atomic_read(&rdp->dynticks); - do { - old = new_old; - if (old & RCU_DYNTICK_CTRL_CTR) - return false; - new = old | RCU_DYNTICK_CTRL_MASK; - new_old = atomic_cmpxchg(&rdp->dynticks, old, new); - } while (new_old != old); - return true; + return snap == atomic_read(&rdp->dynticks); } /* @@ -439,13 +401,12 @@ bool rcu_eqs_special_set(int cpu) */ notrace void rcu_momentary_dyntick_idle(void) { - int special; + int seq; raw_cpu_write(rcu_data.rcu_need_heavy_qs, false); - special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, - &this_cpu_ptr(&rcu_data)->dynticks); + seq = rcu_dynticks_inc(2); /* It is illegal to call this from idle state. */ - WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); + WARN_ON_ONCE(!(seq & 0x1)); rcu_preempt_deferred_qs(current); } EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle); @@ -1325,7 +1286,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) */ jtsq = READ_ONCE(jiffies_to_sched_qs); ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu); - rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu); + rnhqp = per_cpu_ptr(&rcu_data.rcu_need_heavy_qs, rdp->cpu); if (!READ_ONCE(*rnhqp) && (time_after(jiffies, rcu_state.gp_start + jtsq * 2) || time_after(jiffies, rcu_state.jiffies_resched) || @@ -1772,7 +1733,7 @@ static void rcu_strict_gp_boundary(void *unused) /* * Initialize a new grace period. Return false if no grace period required. */ -static bool rcu_gp_init(void) +static noinline_for_stack bool rcu_gp_init(void) { unsigned long firstseq; unsigned long flags; @@ -1966,7 +1927,7 @@ static void rcu_gp_fqs(bool first_time) /* * Loop doing repeated quiescent-state forcing until the grace period ends. */ -static void rcu_gp_fqs_loop(void) +static noinline_for_stack void rcu_gp_fqs_loop(void) { bool first_gp_fqs; int gf = 0; @@ -1993,8 +1954,8 @@ static void rcu_gp_fqs_loop(void) trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("fqswait")); WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_FQS); - ret = swait_event_idle_timeout_exclusive( - rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j); + (void)swait_event_idle_timeout_exclusive(rcu_state.gp_wq, + rcu_gp_fqs_check_wake(&gf), j); rcu_gp_torture_wait(); WRITE_ONCE(rcu_state.gp_state, RCU_GP_DOING_FQS); /* Locking provides needed memory barriers. */ @@ -4047,7 +4008,7 @@ void rcu_barrier(void) */ init_completion(&rcu_state.barrier_completion); atomic_set(&rcu_state.barrier_cpu_count, 2); - get_online_cpus(); + cpus_read_lock(); /* * Force each CPU with callbacks to register a new callback. @@ -4078,7 +4039,7 @@ void rcu_barrier(void) rcu_state.barrier_sequence); } } - put_online_cpus(); + cpus_read_unlock(); /* * Now that we have an rcu_barrier_callback() callback on each diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index aec5075c60b3..7a4876a3a882 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -313,7 +313,7 @@ void rcu_note_context_switch(bool preempt) trace_rcu_utilization(TPS("Start context switch")); lockdep_assert_irqs_disabled(); - WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0); + WARN_ONCE(!preempt && rcu_preempt_depth() > 0, "Voluntary context switch within RCU read-side critical section!"); if (rcu_preempt_depth() > 0 && !t->rcu_read_unlock_special.b.blocked) { @@ -372,17 +372,20 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) static void rcu_preempt_read_enter(void) { - current->rcu_read_lock_nesting++; + WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1); } static int rcu_preempt_read_exit(void) { - return --current->rcu_read_lock_nesting; + int ret = READ_ONCE(current->rcu_read_lock_nesting) - 1; + + WRITE_ONCE(current->rcu_read_lock_nesting, ret); + return ret; } static void rcu_preempt_depth_set(int val) { - current->rcu_read_lock_nesting = val; + WRITE_ONCE(current->rcu_read_lock_nesting, val); } /* @@ -1495,17 +1498,17 @@ static void noinstr rcu_dynticks_task_exit(void) /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */ static void rcu_dynticks_task_trace_enter(void) { -#ifdef CONFIG_TASKS_RCU_TRACE +#ifdef CONFIG_TASKS_TRACE_RCU if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) current->trc_reader_special.b.need_mb = true; -#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */ +#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ } /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */ static void rcu_dynticks_task_trace_exit(void) { -#ifdef CONFIG_TASKS_RCU_TRACE +#ifdef CONFIG_TASKS_TRACE_RCU if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) current->trc_reader_special.b.need_mb = false; -#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */ +#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ } diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 6c76988cc019..677ee3d8671b 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -7,6 +7,8 @@ * Author: Paul E. McKenney <paulmck@linux.ibm.com> */ +#include <linux/kvm_para.h> + ////////////////////////////////////////////////////////////////////////////// // // Controlling CPU stall warnings, including delay calculation. @@ -117,17 +119,14 @@ static void panic_on_rcu_stall(void) } /** - * rcu_cpu_stall_reset - prevent further stall warnings in current grace period - * - * Set the stall-warning timeout way off into the future, thus preventing - * any RCU CPU stall-warning messages from appearing in the current set of - * RCU grace periods. + * rcu_cpu_stall_reset - restart stall-warning timeout for current grace period * * The caller must disable hard irqs. */ void rcu_cpu_stall_reset(void) { - WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2); + WRITE_ONCE(rcu_state.jiffies_stall, + jiffies + rcu_jiffies_till_stall_check()); } ////////////////////////////////////////////////////////////////////////////// @@ -267,8 +266,10 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) struct task_struct *ts[8]; lockdep_assert_irqs_disabled(); - if (!rcu_preempt_blocked_readers_cgp(rnp)) + if (!rcu_preempt_blocked_readers_cgp(rnp)) { + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return 0; + } pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", rnp->level, rnp->grplo, rnp->grphi); t = list_entry(rnp->gp_tasks->prev, @@ -280,8 +281,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags) break; } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - for (i--; i; i--) { - t = ts[i]; + while (i) { + t = ts[--i]; if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr)) pr_cont(" P%d", t->pid); else @@ -350,7 +351,7 @@ static void rcu_dump_cpu_stacks(void) static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d", rdp->last_accelerate & 0xffff, jiffies & 0xffff, @@ -464,9 +465,10 @@ static void rcu_check_gp_kthread_starvation(void) pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n", rcu_state.name, j, (long)rcu_seq_current(&rcu_state.gp_seq), - data_race(rcu_state.gp_flags), - gp_state_getname(rcu_state.gp_state), rcu_state.gp_state, - gpk ? gpk->__state : ~0, cpu); + data_race(READ_ONCE(rcu_state.gp_flags)), + gp_state_getname(rcu_state.gp_state), + data_race(READ_ONCE(rcu_state.gp_state)), + gpk ? data_race(READ_ONCE(gpk->__state)) : ~0, cpu); if (gpk) { pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name); pr_err("RCU grace-period kthread stack dump:\n"); @@ -509,7 +511,7 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void) (long)rcu_seq_current(&rcu_state.gp_seq), data_race(rcu_state.gp_flags), gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS, - gpk->__state); + data_race(READ_ONCE(gpk->__state))); pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n", cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu)); } @@ -568,11 +570,11 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) pr_err("INFO: Stall ended before state dump start\n"); } else { j = jiffies; - gpa = data_race(rcu_state.gp_activity); + gpa = data_race(READ_ONCE(rcu_state.gp_activity)); pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", rcu_state.name, j - gpa, j, gpa, - data_race(jiffies_till_next_fqs), - rcu_get_root()->qsmask); + data_race(READ_ONCE(jiffies_till_next_fqs)), + data_race(READ_ONCE(rcu_get_root()->qsmask))); } } /* Rewrite if needed in case of slow consoles. */ @@ -646,6 +648,7 @@ static void print_cpu_stall(unsigned long gps) static void check_cpu_stall(struct rcu_data *rdp) { + bool didstall = false; unsigned long gs1; unsigned long gs2; unsigned long gps; @@ -691,24 +694,46 @@ static void check_cpu_stall(struct rcu_data *rdp) ULONG_CMP_GE(gps, js)) return; /* No stall or GP completed since entering function. */ rnp = rdp->mynode; - jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; + jn = jiffies + ULONG_MAX / 2; if (rcu_gp_in_progress() && (READ_ONCE(rnp->qsmask) & rdp->grpmask) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* We haven't checked in, so go dump stack. */ print_cpu_stall(gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) rcu_ftrace_dump(DUMP_ALL); + didstall = true; } else if (rcu_gp_in_progress() && ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like an RCU stall. Check to see if the host + * stopped the vm. + */ + if (kvm_check_and_clear_guest_paused()) + return; + /* They had a few time units to dump stack, so complain. */ print_other_cpu_stall(gs2, gps); if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) rcu_ftrace_dump(DUMP_ALL); + didstall = true; + } + if (didstall && READ_ONCE(rcu_state.jiffies_stall) == jn) { + jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; + WRITE_ONCE(rcu_state.jiffies_stall, jn); } } @@ -742,7 +767,7 @@ bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) rcu_for_each_leaf_node(rnp) { if (!cpup) { - if (READ_ONCE(rnp->qsmask)) { + if (data_race(READ_ONCE(rnp->qsmask))) { return false; } else { if (READ_ONCE(rnp->gp_tasks)) @@ -791,32 +816,34 @@ void show_rcu_gp_kthreads(void) struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); j = jiffies; - ja = j - data_race(rcu_state.gp_activity); - jr = j - data_race(rcu_state.gp_req_activity); - js = j - data_race(rcu_state.gp_start); - jw = j - data_race(rcu_state.gp_wake_time); + ja = j - data_race(READ_ONCE(rcu_state.gp_activity)); + jr = j - data_race(READ_ONCE(rcu_state.gp_req_activity)); + js = j - data_race(READ_ONCE(rcu_state.gp_start)); + jw = j - data_race(READ_ONCE(rcu_state.gp_wake_time)); pr_info("%s: wait state: %s(%d) ->state: %#x ->rt_priority %u delta ->gp_start %lu ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_max %lu ->gp_flags %#x\n", rcu_state.name, gp_state_getname(rcu_state.gp_state), - rcu_state.gp_state, t ? t->__state : 0x1ffff, t ? t->rt_priority : 0xffU, - js, ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq), - (long)data_race(rcu_state.gp_seq), - (long)data_race(rcu_get_root()->gp_seq_needed), - data_race(rcu_state.gp_max), - data_race(rcu_state.gp_flags)); + data_race(READ_ONCE(rcu_state.gp_state)), + t ? data_race(READ_ONCE(t->__state)) : 0x1ffff, t ? t->rt_priority : 0xffU, + js, ja, jr, jw, (long)data_race(READ_ONCE(rcu_state.gp_wake_seq)), + (long)data_race(READ_ONCE(rcu_state.gp_seq)), + (long)data_race(READ_ONCE(rcu_get_root()->gp_seq_needed)), + data_race(READ_ONCE(rcu_state.gp_max)), + data_race(READ_ONCE(rcu_state.gp_flags))); rcu_for_each_node_breadth_first(rnp) { if (ULONG_CMP_GE(READ_ONCE(rcu_state.gp_seq), READ_ONCE(rnp->gp_seq_needed)) && - !data_race(rnp->qsmask) && !data_race(rnp->boost_tasks) && - !data_race(rnp->exp_tasks) && !data_race(rnp->gp_tasks)) + !data_race(READ_ONCE(rnp->qsmask)) && !data_race(READ_ONCE(rnp->boost_tasks)) && + !data_race(READ_ONCE(rnp->exp_tasks)) && !data_race(READ_ONCE(rnp->gp_tasks))) continue; pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld ->qsmask %#lx %c%c%c%c ->n_boosts %ld\n", rnp->grplo, rnp->grphi, - (long)data_race(rnp->gp_seq), (long)data_race(rnp->gp_seq_needed), - data_race(rnp->qsmask), - ".b"[!!data_race(rnp->boost_kthread_task)], - ".B"[!!data_race(rnp->boost_tasks)], - ".E"[!!data_race(rnp->exp_tasks)], - ".G"[!!data_race(rnp->gp_tasks)], - data_race(rnp->n_boosts)); + (long)data_race(READ_ONCE(rnp->gp_seq)), + (long)data_race(READ_ONCE(rnp->gp_seq_needed)), + data_race(READ_ONCE(rnp->qsmask)), + ".b"[!!data_race(READ_ONCE(rnp->boost_kthread_task))], + ".B"[!!data_race(READ_ONCE(rnp->boost_tasks))], + ".E"[!!data_race(READ_ONCE(rnp->exp_tasks))], + ".G"[!!data_race(READ_ONCE(rnp->gp_tasks))], + data_race(READ_ONCE(rnp->n_boosts))); if (!rcu_is_leaf_node(rnp)) continue; for_each_leaf_node_possible_cpu(rnp, cpu) { @@ -826,12 +853,12 @@ void show_rcu_gp_kthreads(void) READ_ONCE(rdp->gp_seq_needed))) continue; pr_info("\tcpu %d ->gp_seq_needed %ld\n", - cpu, (long)data_race(rdp->gp_seq_needed)); + cpu, (long)data_race(READ_ONCE(rdp->gp_seq_needed))); } } for_each_possible_cpu(cpu) { rdp = per_cpu_ptr(&rcu_data, cpu); - cbs += data_race(rdp->n_cbs_invoked); + cbs += data_race(READ_ONCE(rdp->n_cbs_invoked)); if (rcu_segcblist_is_offloaded(&rdp->cblist)) show_rcu_nocb_state(rdp); } @@ -913,11 +940,11 @@ void rcu_fwd_progress_check(unsigned long j) if (rcu_gp_in_progress()) { pr_info("%s: GP age %lu jiffies\n", - __func__, jiffies - rcu_state.gp_start); + __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_start))); show_rcu_gp_kthreads(); } else { pr_info("%s: Last GP end %lu jiffies ago\n", - __func__, jiffies - rcu_state.gp_end); + __func__, jiffies - data_race(READ_ONCE(rcu_state.gp_end))); preempt_disable(); rdp = this_cpu_ptr(&rcu_data); rcu_check_gp_start_stall(rdp->mynode, rdp, j); diff --git a/kernel/scftorture.c b/kernel/scftorture.c index 29e8fc5d91a7..64a08288b1a6 100644 --- a/kernel/scftorture.c +++ b/kernel/scftorture.c @@ -64,6 +64,7 @@ torture_param(bool |
