diff options
Diffstat (limited to 'kernel/rcu')
| -rw-r--r-- | kernel/rcu/Kconfig | 3 | ||||
| -rw-r--r-- | kernel/rcu/rcu.h | 43 | ||||
| -rw-r--r-- | kernel/rcu/rcuscale.c | 9 | ||||
| -rw-r--r-- | kernel/rcu/rcutorture.c | 234 | ||||
| -rw-r--r-- | kernel/rcu/refscale.c | 2 | ||||
| -rw-r--r-- | kernel/rcu/srcutiny.c | 2 | ||||
| -rw-r--r-- | kernel/rcu/srcutree.c | 438 | ||||
| -rw-r--r-- | kernel/rcu/tasks.h | 33 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 18 | ||||
| -rw-r--r-- | kernel/rcu/tree_exp.h | 16 | ||||
| -rw-r--r-- | kernel/rcu/tree_nocb.h | 4 |
11 files changed, 573 insertions, 229 deletions
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index ab62074174c3..9071182b1284 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -53,9 +53,6 @@ config RCU_EXPERT Say N if you are unsure. -config SRCU - def_bool y - config TINY_SRCU bool default y if TINY_RCU diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 115616ac3bfa..4a1b9622598b 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -14,6 +14,43 @@ /* * Grace-period counter management. + * + * The two least significant bits contain the control flags. + * The most significant bits contain the grace-period sequence counter. + * + * When both control flags are zero, no grace period is in progress. + * When either bit is non-zero, a grace period has started and is in + * progress. When the grace period completes, the control flags are reset + * to 0 and the grace-period sequence counter is incremented. + * + * However some specific RCU usages make use of custom values. + * + * SRCU special control values: + * + * SRCU_SNP_INIT_SEQ : Invalid/init value set when SRCU node + * is initialized. + * + * SRCU_STATE_IDLE : No SRCU gp is in progress + * + * SRCU_STATE_SCAN1 : State set by rcu_seq_start(). Indicates + * we are scanning the readers on the slot + * defined as inactive (there might well + * be pending readers that will use that + * index, but their number is bounded). + * + * SRCU_STATE_SCAN2 : State set manually via rcu_seq_set_state() + * Indicates we are flipping the readers + * index and then scanning the readers on the + * slot newly designated as inactive (again, + * the number of pending readers that will use + * this inactive index is bounded). + * + * RCU polled GP special control value: + * + * RCU_GET_STATE_COMPLETED : State value indicating an already-completed + * polled GP has completed. This value covers + * both the state and the counter of the + * grace-period sequence number. */ #define RCU_SEQ_CTR_SHIFT 2 @@ -341,11 +378,13 @@ extern void rcu_init_geometry(void); * specified state structure (for SRCU) or the only rcu_state structure * (for RCU). */ -#define srcu_for_each_node_breadth_first(sp, rnp) \ +#define _rcu_for_each_node_breadth_first(sp, rnp) \ for ((rnp) = &(sp)->node[0]; \ (rnp) < &(sp)->node[rcu_num_nodes]; (rnp)++) #define rcu_for_each_node_breadth_first(rnp) \ - srcu_for_each_node_breadth_first(&rcu_state, rnp) + _rcu_for_each_node_breadth_first(&rcu_state, rnp) +#define srcu_for_each_node_breadth_first(ssp, rnp) \ + _rcu_for_each_node_breadth_first(ssp->srcu_sup, rnp) /* * Scan the leaves of the rcu_node hierarchy for the rcu_state structure. diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 91fb5905a008..e82ec9f9a5d8 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -631,8 +631,7 @@ static int compute_real(int n) static int rcu_scale_shutdown(void *arg) { - wait_event(shutdown_wq, - atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); + wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); smp_mb(); /* Wake before output. */ rcu_scale_cleanup(); kernel_power_off(); @@ -716,7 +715,7 @@ kfree_scale_thread(void *arg) // is tested. if ((kfree_rcu_test_single && !kfree_rcu_test_double) || (kfree_rcu_test_both && torture_random(&tr) & 0x800)) - kfree_rcu(alloc_ptr); + kfree_rcu_mightsleep(alloc_ptr); else kfree_rcu(alloc_ptr, rh); } @@ -771,8 +770,8 @@ kfree_scale_cleanup(void) static int kfree_scale_shutdown(void *arg) { - wait_event(shutdown_wq, - atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads); + wait_event_idle(shutdown_wq, + atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads); smp_mb(); /* Wake before output. */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 8e6c023212cb..147551c23baf 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -119,7 +119,9 @@ torture_param(int, stutter, 5, "Number of seconds to run/halt test"); torture_param(int, test_boost, 1, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); torture_param(int, test_boost_duration, 4, "Duration of each boost test, seconds."); torture_param(int, test_boost_interval, 7, "Interval between boost tests, seconds."); +torture_param(int, test_nmis, 0, "End-test NMI tests, 0 to disable."); torture_param(bool, test_no_idle_hz, true, "Test support for tickless idle CPUs"); +torture_param(int, test_srcu_lockdep, 0, "Test specified SRCU deadlock scenario."); torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); static char *torture_type = "rcu"; @@ -179,7 +181,6 @@ static atomic_t n_rcu_torture_mbchk_tries; static atomic_t n_rcu_torture_error; static long n_rcu_torture_barrier_error; static long n_rcu_torture_boost_ktrerror; -static long n_rcu_torture_boost_rterror; static long n_rcu_torture_boost_failure; static long n_rcu_torture_boosts; static atomic_long_t n_rcu_torture_timers; @@ -2194,12 +2195,11 @@ rcu_torture_stats_print(void) atomic_read(&n_rcu_torture_alloc), atomic_read(&n_rcu_torture_alloc_fail), atomic_read(&n_rcu_torture_free)); - pr_cont("rtmbe: %d rtmbkf: %d/%d rtbe: %ld rtbke: %ld rtbre: %ld ", + pr_cont("rtmbe: %d rtmbkf: %d/%d rtbe: %ld rtbke: %ld ", atomic_read(&n_rcu_torture_mberror), atomic_read(&n_rcu_torture_mbchk_fail), atomic_read(&n_rcu_torture_mbchk_tries), n_rcu_torture_barrier_error, - n_rcu_torture_boost_ktrerror, - n_rcu_torture_boost_rterror); + n_rcu_torture_boost_ktrerror); pr_cont("rtbf: %ld rtb: %ld nt: %ld ", n_rcu_torture_boost_failure, n_rcu_torture_boosts, @@ -2217,15 +2217,13 @@ rcu_torture_stats_print(void) if (atomic_read(&n_rcu_torture_mberror) || atomic_read(&n_rcu_torture_mbchk_fail) || n_rcu_torture_barrier_error || n_rcu_torture_boost_ktrerror || - n_rcu_torture_boost_rterror || n_rcu_torture_boost_failure || - i > 1) { + n_rcu_torture_boost_failure || i > 1) { pr_cont("%s", "!!! "); atomic_inc(&n_rcu_torture_error); WARN_ON_ONCE(atomic_read(&n_rcu_torture_mberror)); WARN_ON_ONCE(atomic_read(&n_rcu_torture_mbchk_fail)); WARN_ON_ONCE(n_rcu_torture_barrier_error); // rcu_barrier() WARN_ON_ONCE(n_rcu_torture_boost_ktrerror); // no boost kthread - WARN_ON_ONCE(n_rcu_torture_boost_rterror); // can't set RT prio WARN_ON_ONCE(n_rcu_torture_boost_failure); // boost failed (TIMER_SOFTIRQ RT prio?) WARN_ON_ONCE(i > 1); // Too-short grace period } @@ -2358,7 +2356,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) "n_barrier_cbs=%d " "onoff_interval=%d onoff_holdoff=%d " "read_exit_delay=%d read_exit_burst=%d " - "nocbs_nthreads=%d nocbs_toggle=%d\n", + "nocbs_nthreads=%d nocbs_toggle=%d " + "test_nmis=%d\n", torture_type, tag, nrealreaders, nfakewriters, stat_interval, verbose, test_no_idle_hz, shuffle_interval, stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, @@ -2369,7 +2368,8 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) n_barrier_cbs, onoff_interval, onoff_holdoff, read_exit_delay, read_exit_burst, - nocbs_nthreads, nocbs_toggle); + nocbs_nthreads, nocbs_toggle, + test_nmis); } static int rcutorture_booster_cleanup(unsigned int cpu) @@ -3273,6 +3273,29 @@ static void rcu_torture_read_exit_cleanup(void) torture_stop_kthread(rcutorture_read_exit, read_exit_task); } +static void rcutorture_test_nmis(int n) +{ +#if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST) + int cpu; + int dumpcpu; + int i; + + for (i = 0; i < n; i++) { + preempt_disable(); + cpu = smp_processor_id(); + dumpcpu = cpu + 1; + if (dumpcpu >= nr_cpu_ids) + dumpcpu = 0; + pr_alert("%s: CPU %d invoking dump_cpu_task(%d)\n", __func__, cpu, dumpcpu); + dump_cpu_task(dumpcpu); + preempt_enable(); + schedule_timeout_uninterruptible(15 * HZ); + } +#else // #if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST) + WARN_ONCE(n, "Non-zero rcutorture.test_nmis=%d permitted only when rcutorture is built in.\n", test_nmis); +#endif // #else // #if IS_BUILTIN(CONFIG_RCU_TORTURE_TEST) +} + static enum cpuhp_state rcutor_hp; static void @@ -3297,6 +3320,8 @@ rcu_torture_cleanup(void) return; } + rcutorture_test_nmis(test_nmis); + if (cur_ops->gp_kthread_dbg) cur_ops->gp_kthread_dbg(); rcu_torture_read_exit_cleanup(); @@ -3463,6 +3488,188 @@ static void rcutorture_sync(void) cur_ops->sync(); } +static DEFINE_MUTEX(mut0); +static DEFINE_MUTEX(mut1); +static DEFINE_MUTEX(mut2); +static DEFINE_MUTEX(mut3); +static DEFINE_MUTEX(mut4); +static DEFINE_MUTEX(mut5); +static DEFINE_MUTEX(mut6); +static DEFINE_MUTEX(mut7); +static DEFINE_MUTEX(mut8); +static DEFINE_MUTEX(mut9); + +static DECLARE_RWSEM(rwsem0); +static DECLARE_RWSEM(rwsem1); +static DECLARE_RWSEM(rwsem2); +static DECLARE_RWSEM(rwsem3); +static DECLARE_RWSEM(rwsem4); +static DECLARE_RWSEM(rwsem5); +static DECLARE_RWSEM(rwsem6); +static DECLARE_RWSEM(rwsem7); +static DECLARE_RWSEM(rwsem8); +static DECLARE_RWSEM(rwsem9); + +DEFINE_STATIC_SRCU(srcu0); +DEFINE_STATIC_SRCU(srcu1); +DEFINE_STATIC_SRCU(srcu2); +DEFINE_STATIC_SRCU(srcu3); +DEFINE_STATIC_SRCU(srcu4); +DEFINE_STATIC_SRCU(srcu5); +DEFINE_STATIC_SRCU(srcu6); +DEFINE_STATIC_SRCU(srcu7); +DEFINE_STATIC_SRCU(srcu8); +DEFINE_STATIC_SRCU(srcu9); + +static int srcu_lockdep_next(const char *f, const char *fl, const char *fs, const char *fu, int i, + int cyclelen, int deadlock) +{ + int j = i + 1; + + if (j >= cyclelen) + j = deadlock ? 0 : -1; + if (j >= 0) + pr_info("%s: %s(%d), %s(%d), %s(%d)\n", f, fl, i, fs, j, fu, i); + else + pr_info("%s: %s(%d), %s(%d)\n", f, fl, i, fu, i); + return j; +} + +// Test lockdep on SRCU-based deadlock scenarios. +static void rcu_torture_init_srcu_lockdep(void) +{ + int cyclelen; + int deadlock; + bool err = false; + int i; + int j; + int idx; + struct mutex *muts[] = { &mut0, &mut1, &mut2, &mut3, &mut4, + &mut5, &mut6, &mut7, &mut8, &mut9 }; + struct rw_semaphore *rwsems[] = { &rwsem0, &rwsem1, &rwsem2, &rwsem3, &rwsem4, + &rwsem5, &rwsem6, &rwsem7, &rwsem8, &rwsem9 }; + struct srcu_struct *srcus[] = { &srcu0, &srcu1, &srcu2, &srcu3, &srcu4, + &srcu5, &srcu6, &srcu7, &srcu8, &srcu9 }; + int testtype; + + if (!test_srcu_lockdep) + return; + + deadlock = test_srcu_lockdep / 1000; + testtype = (test_srcu_lockdep / 10) % 100; + cyclelen = test_srcu_lockdep % 10; + WARN_ON_ONCE(ARRAY_SIZE(muts) != ARRAY_SIZE(srcus)); + if (WARN_ONCE(deadlock != !!deadlock, + "%s: test_srcu_lockdep=%d and deadlock digit %d must be zero or one.\n", + __func__, test_srcu_lockdep, deadlock)) + err = true; + if (WARN_ONCE(cyclelen <= 0, + "%s: test_srcu_lockdep=%d and cycle-length digit %d must be greater than zero.\n", + __func__, test_srcu_lockdep, cyclelen)) + err = true; + if (err) + goto err_out; + + if (testtype == 0) { + pr_info("%s: test_srcu_lockdep = %05d: SRCU %d-way %sdeadlock.\n", + __func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-"); + if (deadlock && cyclelen == 1) + pr_info("%s: Expect hang.\n", __func__); + for (i = 0; i < cyclelen; i++) { + j = srcu_lockdep_next(__func__, "srcu_read_lock", "synchronize_srcu", + "srcu_read_unlock", i, cyclelen, deadlock); + idx = srcu_read_lock(srcus[i]); + if (j >= 0) + synchronize_srcu(srcus[j]); + srcu_read_unlock(srcus[i], idx); + } + return; + } + + if (testtype == 1) { + pr_info("%s: test_srcu_lockdep = %05d: SRCU/mutex %d-way %sdeadlock.\n", + __func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-"); + for (i = 0; i < cyclelen; i++) { + pr_info("%s: srcu_read_lock(%d), mutex_lock(%d), mutex_unlock(%d), srcu_read_unlock(%d)\n", + __func__, i, i, i, i); + idx = srcu_read_lock(srcus[i]); + mutex_lock(muts[i]); + mutex_unlock(muts[i]); + srcu_read_unlock(srcus[i], idx); + + j = srcu_lockdep_next(__func__, "mutex_lock", "synchronize_srcu", + "mutex_unlock", i, cyclelen, deadlock); + mutex_lock(muts[i]); + if (j >= 0) + synchronize_srcu(srcus[j]); + mutex_unlock(muts[i]); + } + return; + } + + if (testtype == 2) { + pr_info("%s: test_srcu_lockdep = %05d: SRCU/rwsem %d-way %sdeadlock.\n", + __func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-"); + for (i = 0; i < cyclelen; i++) { + pr_info("%s: srcu_read_lock(%d), down_read(%d), up_read(%d), srcu_read_unlock(%d)\n", + __func__, i, i, i, i); + idx = srcu_read_lock(srcus[i]); + down_read(rwsems[i]); + up_read(rwsems[i]); + srcu_read_unlock(srcus[i], idx); + + j = srcu_lockdep_next(__func__, "down_write", "synchronize_srcu", + "up_write", i, cyclelen, deadlock); + down_write(rwsems[i]); + if (j >= 0) + synchronize_srcu(srcus[j]); + up_write(rwsems[i]); + } + return; + } + +#ifdef CONFIG_TASKS_TRACE_RCU + if (testtype == 3) { + pr_info("%s: test_srcu_lockdep = %05d: SRCU and Tasks Trace RCU %d-way %sdeadlock.\n", + __func__, test_srcu_lockdep, cyclelen, deadlock ? "" : "non-"); + if (deadlock && cyclelen == 1) + pr_info("%s: Expect hang.\n", __func__); + for (i = 0; i < cyclelen; i++) { + char *fl = i == 0 ? "rcu_read_lock_trace" : "srcu_read_lock"; + char *fs = i == cyclelen - 1 ? "synchronize_rcu_tasks_trace" + : "synchronize_srcu"; + char *fu = i == 0 ? "rcu_read_unlock_trace" : "srcu_read_unlock"; + + j = srcu_lockdep_next(__func__, fl, fs, fu, i, cyclelen, deadlock); + if (i == 0) + rcu_read_lock_trace(); + else + idx = srcu_read_lock(srcus[i]); + if (j >= 0) { + if (i == cyclelen - 1) + synchronize_rcu_tasks_trace(); + else + synchronize_srcu(srcus[j]); + } + if (i == 0) + rcu_read_unlock_trace(); + else + srcu_read_unlock(srcus[i], idx); + } + return; + } +#endif // #ifdef CONFIG_TASKS_TRACE_RCU + +err_out: + pr_info("%s: test_srcu_lockdep = %05d does nothing.\n", __func__, test_srcu_lockdep); + pr_info("%s: test_srcu_lockdep = DNNL.\n", __func__); + pr_info("%s: D: Deadlock if nonzero.\n", __func__); + pr_info("%s: NN: Test number, 0=SRCU, 1=SRCU/mutex, 2=SRCU/rwsem, 3=SRCU/Tasks Trace RCU.\n", __func__); + pr_info("%s: L: Cycle length.\n", __func__); + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU)) + pr_info("%s: NN=3 disallowed because kernel is built with CONFIG_TASKS_TRACE_RCU=n\n", __func__); +} + static int __init rcu_torture_init(void) { @@ -3501,9 +3708,17 @@ rcu_torture_init(void) pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); fqs_duration = 0; } + if (nocbs_nthreads != 0 && (cur_ops != &rcu_ops || + !IS_ENABLED(CONFIG_RCU_NOCB_CPU))) { + pr_alert("rcu-torture types: %s and CONFIG_RCU_NOCB_CPU=%d, nocb toggle disabled.\n", + cur_ops->name, IS_ENABLED(CONFIG_RCU_NOCB_CPU)); + nocbs_nthreads = 0; + } if (cur_ops->init) cur_ops->init(); + rcu_torture_init_srcu_lockdep(); + if (nreaders >= 0) { nrealreaders = nreaders; } else { @@ -3540,7 +3755,6 @@ rcu_torture_init(void) atomic_set(&n_rcu_torture_error, 0); n_rcu_torture_barrier_error = 0; n_rcu_torture_boost_ktrerror = 0; - n_rcu_torture_boost_rterror = 0; n_rcu_torture_boost_failure = 0; n_rcu_torture_boosts = 0; for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index afa3e1a2f690..1970ce5f22d4 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -1031,7 +1031,7 @@ ref_scale_cleanup(void) static int ref_scale_shutdown(void *arg) { - wait_event(shutdown_wq, shutdown_start); + wait_event_idle(shutdown_wq, shutdown_start); smp_mb(); // Wake before output. ref_scale_cleanup(); diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index b12fb0cec44d..336af24e0fe3 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -197,6 +197,8 @@ void synchronize_srcu(struct srcu_struct *ssp) { struct rcu_synchronize rs; + srcu_lock_sync(&ssp->dep_map); + RCU_LOCKDEP_WARN(lockdep_is_held(ssp) || lock_is_held(&rcu_bh_lock_map) || lock_is_held(&rcu_lock_map) || diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index ab4ee58af84b..20d7a238d675 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -103,7 +103,7 @@ do { \ #define spin_trylock_irqsave_rcu_node(p, flags) \ ({ \ - bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ + bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ \ if (___locked) \ smp_mb__after_unlock_lock(); \ @@ -135,8 +135,8 @@ static void init_srcu_struct_data(struct srcu_struct *ssp) spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; - sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq; - sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq; + sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq; + sdp->srcu_gp_seq_needed_exp = ssp->srcu_sup->srcu_gp_seq; sdp->mynode = NULL; sdp->cpu = cpu; INIT_WORK(&sdp->work, srcu_invoke_callbacks); @@ -173,14 +173,14 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) /* Initialize geometry if it has not already been initialized. */ rcu_init_geometry(); - ssp->node = kcalloc(rcu_num_nodes, sizeof(*ssp->node), gfp_flags); - if (!ssp->node) + ssp->srcu_sup->node = kcalloc(rcu_num_nodes, sizeof(*ssp->srcu_sup->node), gfp_flags); + if (!ssp->srcu_sup->node) return false; /* Work out the overall tree geometry. */ - ssp->level[0] = &ssp->node[0]; + ssp->srcu_sup->level[0] = &ssp->srcu_sup->node[0]; for (i = 1; i < rcu_num_lvls; i++) - ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1]; + ssp->srcu_sup->level[i] = ssp->srcu_sup->level[i - 1] + num_rcu_lvl[i - 1]; rcu_init_levelspread(levelspread, num_rcu_lvl); /* Each pass through this loop initializes one srcu_node structure. */ @@ -195,17 +195,17 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ; snp->grplo = -1; snp->grphi = -1; - if (snp == &ssp->node[0]) { + if (snp == &ssp->srcu_sup->node[0]) { /* Root node, special case. */ snp->srcu_parent = NULL; continue; } /* Non-root node. */ - if (snp == ssp->level[level + 1]) + if (snp == ssp->srcu_sup->level[level + 1]) level++; - snp->srcu_parent = ssp->level[level - 1] + - (snp - ssp->level[level]) / + snp->srcu_parent = ssp->srcu_sup->level[level - 1] + + (snp - ssp->srcu_sup->level[level]) / levelspread[level - 1]; } @@ -214,7 +214,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) * leaves of the srcu_node tree. */ level = rcu_num_lvls - 1; - snp_first = ssp->level[level]; + snp_first = ssp->srcu_sup->level[level]; for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(ssp->sda, cpu); sdp->mynode = &snp_first[cpu / levelspread[level]]; @@ -225,7 +225,7 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) } sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); } - smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_WAIT_BARRIER); + smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_WAIT_BARRIER); return true; } @@ -236,36 +236,47 @@ static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags) */ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) { - ssp->srcu_size_state = SRCU_SIZE_SMALL; - ssp->node = NULL; - mutex_init(&ssp->srcu_cb_mutex); - mutex_init(&ssp->srcu_gp_mutex); + if (!is_static) + ssp->srcu_sup = kzalloc(sizeof(*ssp->srcu_sup), GFP_KERNEL); + if (!ssp->srcu_sup) + return -ENOMEM; + if (!is_static) + spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); + ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; + ssp->srcu_sup->node = NULL; + mutex_init(&ssp->srcu_sup->srcu_cb_mutex); + mutex_init(&ssp->srcu_sup->srcu_gp_mutex); ssp->srcu_idx = 0; - ssp->srcu_gp_seq = 0; - ssp->srcu_barrier_seq = 0; - mutex_init(&ssp->srcu_barrier_mutex); - atomic_set(&ssp->srcu_barrier_cpu_cnt, 0); - INIT_DELAYED_WORK(&ssp->work, process_srcu); - ssp->sda_is_static = is_static; + ssp->srcu_sup->srcu_gp_seq = 0; + ssp->srcu_sup->srcu_barrier_seq = 0; + mutex_init(&ssp->srcu_sup->srcu_barrier_mutex); + atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0); + INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu); + ssp->srcu_sup->sda_is_static = is_static; if (!is_static) ssp->sda = alloc_percpu(struct srcu_data); - if (!ssp->sda) + if (!ssp->sda) { + if (!is_static) + kfree(ssp->srcu_sup); return -ENOMEM; + } init_srcu_struct_data(ssp); - ssp->srcu_gp_seq_needed_exp = 0; - ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); - if (READ_ONCE(ssp->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { + ssp->srcu_sup->srcu_gp_seq_needed_exp = 0; + ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns(); + if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) { - if (!ssp->sda_is_static) { + if (!ssp->srcu_sup->sda_is_static) { free_percpu(ssp->sda); ssp->sda = NULL; + kfree(ssp->srcu_sup); return -ENOMEM; } } else { - WRITE_ONCE(ssp->srcu_size_state, SRCU_SIZE_BIG); + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); } } - smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */ + ssp->srcu_sup->srcu_ssp = ssp; + smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, 0); /* Init done. */ return 0; } @@ -277,7 +288,6 @@ int __init_srcu_struct(struct srcu_struct *ssp, const char *name, /* Don't re-initialize a lock while it is held. */ debug_check_no_locks_freed((void *)ssp, sizeof(*ssp)); lockdep_init_map(&ssp->dep_map, name, key, 0); - spin_lock_init(&ACCESS_PRIVATE(ssp, lock)); return init_srcu_struct_fields(ssp, false); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -294,7 +304,6 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); */ int init_srcu_struct(struct srcu_struct *ssp) { - spin_lock_init(&ACCESS_PRIVATE(ssp, lock)); return init_srcu_struct_fields(ssp, false); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -306,8 +315,8 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); */ static void __srcu_transition_to_big(struct srcu_struct *ssp) { - lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); - smp_store_release(&ssp->srcu_size_state, SRCU_SIZE_ALLOC); + lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); + smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC); } /* @@ -318,15 +327,15 @@ static void srcu_transition_to_big(struct srcu_struct *ssp) unsigned long flags; /* Double-checked locking on ->srcu_size-state. */ - if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) + if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) return; - spin_lock_irqsave_rcu_node(ssp, flags); - if (smp_load_acquire(&ssp->srcu_size_state) != SRCU_SIZE_SMALL) { - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) { + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } __srcu_transition_to_big(ssp); - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -337,14 +346,14 @@ static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) { unsigned long j; - if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_size_state) + if (!SRCU_SIZING_IS_CONTEND() || ssp->srcu_sup->srcu_size_state) return; j = jiffies; - if (ssp->srcu_size_jiffies != j) { - ssp->srcu_size_jiffies = j; - ssp->srcu_n_lock_retries = 0; + if (ssp->srcu_sup->srcu_size_jiffies != j) { + ssp->srcu_sup->srcu_size_jiffies = j; + ssp->srcu_sup->srcu_n_lock_retries = 0; } - if (++ssp->srcu_n_lock_retries <= small_contention_lim) + if (++ssp->srcu_sup->srcu_n_lock_retries <= small_contention_lim) return; __srcu_transition_to_big(ssp); } @@ -361,9 +370,9 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon if (spin_trylock_irqsave_rcu_node(sdp, *flags)) return; - spin_lock_irqsave_rcu_node(ssp, *flags); + spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); spin_lock_irqsave_check_contention(ssp); - spin_unlock_irqrestore_rcu_node(ssp, *flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); spin_lock_irqsave_rcu_node(sdp, *flags); } @@ -375,9 +384,9 @@ static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned lon */ static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) { - if (spin_trylock_irqsave_rcu_node(ssp, *flags)) + if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) return; - spin_lock_irqsave_rcu_node(ssp, *flags); + spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); spin_lock_irqsave_check_contention(ssp); } @@ -394,15 +403,15 @@ static void check_init_srcu_struct(struct srcu_struct *ssp) unsigned long flags; /* The smp_load_acquire() pairs with the smp_store_release(). */ - if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/ + if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - spin_lock_irqsave_rcu_node(ssp, flags); - if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) { - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); + if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) { + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); return; } init_srcu_struct_fields(ssp, true); - spin_unlock_irqrestore_rcu_node(ssp, flags); + spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); } /* @@ -607,17 +616,18 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp) unsigned long gpstart; unsigned long j; unsigned long jbase = SRCU_INTERVAL; + struct srcu_usage *sup = ssp->srcu_sup; - if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) + if (ULONG_CMP_LT(READ_ONCE(sup->srcu_gp_seq), READ_ONCE(sup->srcu_gp_seq_needed_exp))) jbase = 0; - if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) { + if (rcu_seq_state(READ_ONCE(sup->srcu_gp_seq))) { j = jiffies - 1; - gpstart = READ_ONCE(ssp->srcu_gp_start); + gpstart = READ_ONCE(sup->srcu_gp_start); if (time_after(j, gpstart)) jbase += j - gpstart; if (!jbase) { - WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1); - if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase) + WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + 1); + if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase) jbase = 1; } } @@ -634,12 +644,13 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp) void cleanup_srcu_struct(struct srcu_struct *ssp) { int cpu; + struct srcu_usage *sup = ssp->srcu_sup; if (WARN_ON(!srcu_get_delay(ssp))) return; /* Just leak it! */ if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ - flush_delayed_work(&ssp->work); + flush_delayed_work(&sup->work); for_each_possible_cpu(cpu) { struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); @@ -648,21 +659,23 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist))) return; /* Forgot srcu_barrier(), so just leak it! */ } - if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || - WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) || + if (WARN_ON(rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)) != SRCU_STATE_IDLE) || + WARN_ON(rcu_seq_current(&sup->srcu_gp_seq) != sup->srcu_gp_seq_needed) || WARN_ON(srcu_readers_active(ssp))) { pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n", - __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)), - rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed); + __func__, ssp, rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)), + rcu_seq_current(&sup->srcu_gp_seq), sup->srcu_gp_seq_needed); return; /* Caller forgot to stop doing call_srcu()? */ } - if (!ssp->sda_is_static) { + kfree(sup->node); + sup->node = NULL; + sup->srcu_size_state = SRCU_SIZE_SMALL; + if (!sup->sda_is_static) { free_percpu(ssp->sda); ssp->sda = NULL; + kfree(sup); + ssp->srcu_sup = NULL; } - kfree(ssp->node); - ssp->node = NULL; - ssp->srcu_size_state = SRCU_SIZE_SMALL; } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); @@ -760,23 +773,23 @@ static void srcu_gp_start(struct srcu_struct *ssp) struct srcu_data *sdp; int state; - if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) + if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); else sdp = this_cpu_ptr(ssp->sda); - lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); - WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); + lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); + WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)); spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_gp_seq)); + rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&ssp->srcu_gp_seq)); + rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq)); spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ - WRITE_ONCE(ssp->srcu_gp_start, jiffies); - WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0); + WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies); + WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0); smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ - rcu_seq_start(&ssp->srcu_gp_seq); - state = rcu_seq_state(ssp->srcu_gp_seq); + rcu_seq_start(&ssp->srcu_sup->srcu_gp_seq); + state = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq); WARN_ON_ONCE(state != SRCU_STATE_SCAN1); } @@ -849,28 +862,29 @@ static void srcu_gp_end(struct srcu_struct *ssp) unsigned long sgsne; struct srcu_node *snp; int ss_state; + struct srcu_usage *sup = ssp->srcu_sup; /* Prevent more than one additional grace period. */ - mutex_lock(&ssp->srcu_cb_mutex); + mutex_lock(&sup->srcu_cb_mutex); /* End the current grace period. */ - spin_lock_irq_rcu_node(ssp); - idx = rcu_seq_state(ssp->srcu_gp_seq); + spin_lock_irq_rcu_node(sup); + idx = rcu_seq_state(sup->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); - if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp))) + if (ULONG_CMP_LT(READ_ONCE(sup->srcu_gp_seq), READ_ONCE(sup->srcu_gp_seq_needed_exp))) cbdelay = 0; - WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns()); - rcu_seq_end(&ssp->srcu_gp_seq); - gpseq = rcu_seq_current(&ssp->srcu_gp_seq); - if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) - WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq); - spin_unlock_irq_rcu_node(ssp); - mutex_unlock(&ssp->srcu_gp_mutex); + WRITE_ONCE(sup->srcu_last_gp_end, ktime_get_mono_fast_ns()); + rcu_seq_end(&sup->srcu_gp_seq); + gpseq = rcu_seq_current(&sup->srcu_gp_seq); + if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq)) + WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq); + spin_unlock_irq_rcu_node(sup); + mutex_unlock(&sup->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ /* Initiate callback invocation as needed. */ - ss_state = smp_load_acquire(&ssp->srcu_size_state); + ss_state = smp_load_acquire(&sup->srcu_size_state); if (ss_state < SRCU_SIZE_WAIT_BARRIER) { srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()), cbdelay); @@ -879,7 +893,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) srcu_for_each_node_breadth_first(ssp, snp) { spin_lock_irq_rcu_node(snp); cbs = false; - last_lvl = snp >= ssp->level[rcu_num_lvls - 1]; + last_lvl = snp >= sup->level[rcu_num_lvls - 1]; if (last_lvl) cbs = ss_state < SRCU_SIZE_BIG || snp->srcu_have_cbs[idx] == gpseq; snp->srcu_have_cbs[idx] = gpseq; @@ -911,18 +925,18 @@ static void srcu_gp_end(struct srcu_struct *ssp) } /* Callback initiation done, allow grace periods after next. */ - mutex_unlock(&ssp->srcu_cb_mutex); + mutex_unlock(&sup->srcu_cb_mutex); /* Start a new grace period if needed. */ - spin_lock_irq_rcu_node(ssp); - gpseq = rcu_seq_current(&ssp->srcu_gp_seq); + spin_lock_irq_rcu_node(sup); + gpseq = rcu_seq_current(&sup->srcu_gp_seq); if (!rcu_seq_state(gpseq) && - ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) { + ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) { |
