diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/rcu/Kconfig | 29 | ||||
-rw-r--r-- | kernel/rcu/rcu.h | 15 | ||||
-rw-r--r-- | kernel/rcu/rcuscale.c | 1 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 247 | ||||
-rw-r--r-- | kernel/rcu/refscale.c | 18 | ||||
-rw-r--r-- | kernel/rcu/tasks.h | 541 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 11 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 129 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 13 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 113 | ||||
-rw-r--r-- | kernel/rcu/tree_nocb.h | 266 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 35 | ||||
-rw-r--r-- | kernel/rcu/update.c | 13 | ||||
-rw-r--r-- | kernel/sched/core.c | 32 |
15 files changed, 1026 insertions, 438 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 9d44f2d46c69..1950eb870244 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1814,6 +1814,7 @@ static inline void rcu_copy_process(struct task_struct *p) p->trc_reader_nesting = 0; p->trc_reader_special.s = 0; INIT_LIST_HEAD(&p->trc_holdout_list); + INIT_LIST_HEAD(&p->trc_blkd_node); #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ } diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 1c630e573548..c05ca52cdf64 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -262,6 +262,35 @@ config RCU_NOCB_CPU Say Y here if you need reduced OS jitter, despite added overhead. Say N here if you are unsure. +config RCU_NOCB_CPU_DEFAULT_ALL + bool "Offload RCU callback processing from all CPUs by default" + depends on RCU_NOCB_CPU + default n + help + Use this option to offload callback processing from all CPUs + by default, in the absence of the rcu_nocbs or nohz_full boot + parameter. This also avoids the need to use any boot parameters + to achieve the effect of offloading all CPUs on boot. + + Say Y here if you want offload all CPUs by default on boot. + Say N here if you are unsure. + +config RCU_NOCB_CPU_CB_BOOST + bool "Offload RCU callback from real-time kthread" + depends on RCU_NOCB_CPU && RCU_BOOST + default y if PREEMPT_RT + help + Use this option to invoke offloaded callbacks as SCHED_FIFO + to avoid starvation by heavy SCHED_OTHER background load. + Of course, running as SCHED_FIFO during callback floods will + cause the rcuo[ps] kthreads to monopolize the CPU for hundreds + of milliseconds or more. Therefore, when enabling this option, + it is your responsibility to ensure that latency-sensitive + tasks either run with higher priority or run on some other CPU. + + Say Y here if you want to set RT priority for offloading kthreads. + Say N here if you are building a !PREEMPT_RT kernel and are unsure. + config TASKS_TRACE_RCU_READ_MB bool "Tasks Trace RCU readers use memory barriers in user and idle" depends on RCU_EXPERT && TASKS_TRACE_RCU diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 4916077119f3..32291f4eefde 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -23,6 +23,9 @@ #define RCU_SEQ_CTR_SHIFT 2 #define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1) +/* Low-order bit definition for polled grace-period APIs. */ +#define RCU_GET_STATE_COMPLETED 0x1 + extern int sysctl_sched_rt_runtime; /* @@ -120,6 +123,18 @@ static inline bool rcu_seq_done(unsigned long *sp, unsigned long s) } /* + * Given a snapshot from rcu_seq_snap(), determine whether or not a + * full update-side operation has occurred, but do not allow the + * (ULONG_MAX / 2) safety-factor/guard-band. + */ +static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s) +{ + unsigned long cur_s = READ_ONCE(*sp); + + return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_STATE_MASK + 1)); +} + +/* * Has a grace period completed since the time the old gp_seq was collected? */ static inline bool rcu_seq_completed_gp(unsigned long old, unsigned long new) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 277a5bfb37d4..3ef02d4a8108 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -419,6 +419,7 @@ rcu_scale_writer(void *arg) VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started"); WARN_ON(!wdpp); set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); + current->flags |= PF_NO_SETAFFINITY; sched_set_fifo_low(current); if (holdoff) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 7120165a9342..d8e1b270a065 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -75,62 +75,47 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@ torture_param(int, extendables, RCUTORTURE_MAX_EXTEND, "Extend readers by disabling bh (1), irqs (2), or preempt (4)"); -torture_param(int, fqs_duration, 0, - "Duration of fqs bursts (us), 0 to disable"); +torture_param(int, fqs_duration, 0, "Duration of fqs bursts (us), 0 to disable"); torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)"); -torture_param(int, fwd_progress, 1, "Test grace-period forward progress"); +torture_param(int, fwd_progress, 1, "Number of grace-period forward progress tasks (0 to disable)"); torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait"); -torture_param(int, fwd_progress_holdoff, 60, - "Time between forward-progress tests (s)"); -torture_param(bool, fwd_progress_need_resched, 1, - "Hide cond_resched() behind need_resched()"); +torture_param(int, fwd_progress_holdoff, 60, "Time between forward-progress tests (s)"); +torture_param(bool, fwd_progress_need_resched, 1, "Hide cond_resched() behind need_resched()"); torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives"); +torture_param(bool, gp_cond_exp, false, "Use conditional/async expedited GP wait primitives"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); -torture_param(bool, gp_normal, false, - "Use normal (non-expedited) GP wait primitives"); +torture_param(bool, gp_normal, false, "Use normal (non-expedited) GP wait primitives"); torture_param(bool, gp_poll, false, "Use polling GP wait primitives"); +torture_param(bool, gp_poll_exp, false, "Use polling expedited GP wait primitives"); torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives"); torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers"); torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers"); -torture_param(int, n_barrier_cbs, 0, - "# of callbacks/kthreads for barrier testing"); +torture_param(int, n_barrier_cbs, 0, "# of callbacks/kthreads for barrier testing"); torture_param(int, nfakewriters, 4, "Number of RCU fake writer threads"); torture_param(int, nreaders, -1, "Number of RCU reader threads"); -torture_param(int, object_debug, 0, - "Enable debug-object double call_rcu() testing"); +torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing"); torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); -torture_param(int, onoff_interval, 0, - "Time between CPU hotplugs (jiffies), 0=disable"); +torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable"); torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable"); torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)"); -torture_param(int, read_exit_delay, 13, - "Delay between read-then-exit episodes (s)"); -torture_param(int, read_exit_burst, 16, - "# of read-then-exit bursts per episode, zero to disable"); +torture_param(int, read_exit_delay, 13, "Delay between read-then-exit episodes (s)"); +torture_param(int, read_exit_burst, 16, "# of read-then-exit bursts per episode, zero to disable"); torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles"); torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable."); torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable."); -torture_param(int, stall_cpu_holdoff, 10, - "Time to wait before starting stall (s)."); -torture_param(bool, stall_no_softlockup, false, - "Avoid softlockup warning during cpu stall."); +torture_param(int, stall_cpu_holdoff, 10, "Time to wait before starting stall (s)."); +torture_param(bool, stall_no_softlockup, false, "Avoid softlockup warning during cpu stall."); torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling."); torture_param(int, stall_cpu_block, 0, "Sleep while stalling."); -torture_param(int, stall_gp_kthread, 0, - "Grace-period kthread stall duration (s)."); -torture_param(int, stat_interval, 60, - "Number of seconds between stats printk()s"); +torture_param(int, stall_gp_kthread, 0, "Grace-period kthread stall duration (s)."); +torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s"); torture_param(int, stutter, 5, "Number of seconds to run/halt test"); torture_param(int, test_boost, 1, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); -torture_param(int, test_boost_duration, 4, - "Duration of each boost test, seconds."); -torture_param(int, test_boost_interval, 7, - "Interval between boost tests, seconds."); -torture_param(bool, test_no_idle_hz, true, - "Test support for tickless idle CPUs"); -torture_param(int, verbose, 1, - "Enable verbose debugging printk()s"); +torture_param(int, test_boost_duration, 4, "Duration of each boost test, seconds."); +torture_param(int, test_boost_interval, 7, "Interval between boost tests, seconds."); +torture_param(bool, test_no_idle_hz, true, "Test support for tickless idle CPUs"); +torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); static char *torture_type = "rcu"; module_param(torture_type, charp, 0444); @@ -209,12 +194,16 @@ static int rcu_torture_writer_state; #define RTWS_DEF_FREE 3 #define RTWS_EXP_SYNC 4 #define RTWS_COND_GET 5 -#define RTWS_COND_SYNC 6 -#define RTWS_POLL_GET 7 -#define RTWS_POLL_WAIT 8 -#define RTWS_SYNC 9 -#define RTWS_STUTTER 10 -#define RTWS_STOPPING 11 +#define RTWS_COND_GET_EXP 6 +#define RTWS_COND_SYNC 7 +#define RTWS_COND_SYNC_EXP 8 +#define RTWS_POLL_GET 9 +#define RTWS_POLL_GET_EXP 10 +#define RTWS_POLL_WAIT 11 +#define RTWS_POLL_WAIT_EXP 12 +#define RTWS_SYNC 13 +#define RTWS_STUTTER 14 +#define RTWS_STOPPING 15 static const char * const rcu_torture_writer_state_names[] = { "RTWS_FIXED_DELAY", "RTWS_DELAY", @@ -222,9 +211,13 @@ static const char * const rcu_torture_writer_state_names[] = { "RTWS_DEF_FREE", "RTWS_EXP_SYNC", "RTWS_COND_GET", + "RTWS_COND_GET_EXP", "RTWS_COND_SYNC", + "RTWS_COND_SYNC_EXP", "RTWS_POLL_GET", + "RTWS_POLL_GET_EXP", "RTWS_POLL_WAIT", + "RTWS_POLL_WAIT_EXP", "RTWS_SYNC", "RTWS_STUTTER", "RTWS_STOPPING", @@ -337,7 +330,12 @@ struct rcu_torture_ops { void (*deferred_free)(struct rcu_torture *p); void (*sync)(void); void (*exp_sync)(void); + unsigned long (*get_gp_state_exp)(void); + unsigned long (*start_gp_poll_exp)(void); + bool (*poll_gp_state_exp)(unsigned long oldstate); + void (*cond_sync_exp)(unsigned long oldstate); unsigned long (*get_gp_state)(void); + unsigned long (*get_gp_completed)(void); unsigned long (*start_gp_poll)(void); bool (*poll_gp_state)(unsigned long oldstate); void (*cond_sync)(unsigned long oldstate); @@ -504,9 +502,14 @@ static struct rcu_torture_ops rcu_ops = { .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, .get_gp_state = get_state_synchronize_rcu, + .get_gp_completed = get_completed_synchronize_rcu, .start_gp_poll = start_poll_synchronize_rcu, .poll_gp_state = poll_state_synchronize_rcu, .cond_sync = cond_synchronize_rcu, + .get_gp_state_exp = get_state_synchronize_rcu, + .start_gp_poll_exp = start_poll_synchronize_rcu_expedited, + .poll_gp_state_exp = poll_state_synchronize_rcu, + .cond_sync_exp = cond_synchronize_rcu_expedited, .call = call_rcu, .cb_barrier = rcu_barrier, .fqs = rcu_force_quiescent_state, @@ -1136,9 +1139,8 @@ rcu_torture_fqs(void *arg) return 0; } -// Used by writers to randomly choose from the available grace-period -// primitives. The only purpose of the initialization is to size the array. -static int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC, RTWS_COND_GET, RTWS_POLL_GET, RTWS_SYNC }; +// Used by writers to randomly choose from the available grace-period primitives. +static int synctype[ARRAY_SIZE(rcu_torture_writer_state_names)] = { }; static int nsynctypes; /* @@ -1146,18 +1148,27 @@ static int nsynctypes; */ static void rcu_torture_write_types(void) { - bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal; - bool gp_poll1 = gp_poll, gp_sync1 = gp_sync; + bool gp_cond1 = gp_cond, gp_cond_exp1 = gp_cond_exp, gp_exp1 = gp_exp; + bool gp_poll_exp1 = gp_poll_exp, gp_normal1 = gp_normal, gp_poll1 = gp_poll; + bool gp_sync1 = gp_sync; /* Initialize synctype[] array. If none set, take default. */ - if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_poll1 && !gp_sync1) - gp_cond1 = gp_exp1 = gp_normal1 = gp_poll1 = gp_sync1 = true; + if (!gp_cond1 && !gp_cond_exp1 && !gp_exp1 && !gp_poll_exp && + !gp_normal1 && !gp_poll1 && !gp_sync1) + gp_cond1 = gp_cond_exp1 = gp_exp1 = gp_poll_exp1 = + gp_normal1 = gp_poll1 = gp_sync1 = true; if (gp_cond1 && cur_ops->get_gp_state && cur_ops->cond_sync) { synctype[nsynctypes++] = RTWS_COND_GET; pr_info("%s: Testing conditional GPs.\n", __func__); } else if (gp_cond && (!cur_ops->get_gp_state || !cur_ops->cond_sync)) { pr_alert("%s: gp_cond without primitives.\n", __func__); } + if (gp_cond_exp1 && cur_ops->get_gp_state_exp && cur_ops->cond_sync_exp) { + synctype[nsynctypes++] = RTWS_COND_GET_EXP; + pr_info("%s: Testing conditional expedited GPs.\n", __func__); + } else if (gp_cond_exp && (!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp)) { + pr_alert("%s: gp_cond_exp without primitives.\n", __func__); + } if (gp_exp1 && cur_ops->exp_sync) { synctype[nsynctypes++] = RTWS_EXP_SYNC; pr_info("%s: Testing expedited GPs.\n", __func__); @@ -1176,6 +1187,12 @@ static void rcu_torture_write_types(void) } else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) { pr_alert("%s: gp_poll without primitives.\n", __func__); } + if (gp_poll_exp1 && cur_ops->start_gp_poll_exp && cur_ops->poll_gp_state_exp) { + synctype[nsynctypes++] = RTWS_POLL_GET_EXP; + pr_info("%s: Testing polling expedited GPs.\n", __func__); + } else if (gp_poll_exp && (!cur_ops->start_gp_poll_exp || !cur_ops->poll_gp_state_exp)) { + pr_alert("%s: gp_poll_exp without primitives.\n", __func__); + } if (gp_sync1 && cur_ops->sync) { synctype[nsynctypes++] = RTWS_SYNC; pr_info("%s: Testing normal GPs.\n", __func__); @@ -1254,6 +1271,10 @@ rcu_torture_writer(void *arg) rcu_torture_writer_state_getname(), rcu_torture_writer_state, cookie, cur_ops->get_gp_state()); + if (cur_ops->get_gp_completed) { + cookie = cur_ops->get_gp_completed(); + WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie)); + } cur_ops->readunlock(idx); } switch (synctype[torture_random(&rand) % nsynctypes]) { @@ -1263,7 +1284,12 @@ rcu_torture_writer(void *arg) break; case RTWS_EXP_SYNC: rcu_torture_writer_state = RTWS_EXP_SYNC; + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) + cookie = cur_ops->get_gp_state(); cur_ops->exp_sync(); + cur_ops->exp_sync(); + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) + WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie)); rcu_torture_pipe_update(old_rp); break; case RTWS_COND_GET: @@ -1274,6 +1300,14 @@ rcu_torture_writer(void *arg) cur_ops->cond_sync(gp_snap); rcu_torture_pipe_update(old_rp); break; + case RTWS_COND_GET_EXP: + rcu_torture_writer_state = RTWS_COND_GET_EXP; + gp_snap = cur_ops->get_gp_state_exp(); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + rcu_torture_writer_state = RTWS_COND_SYNC_EXP; + cur_ops->cond_sync_exp(gp_snap); + rcu_torture_pipe_update(old_rp); + break; case RTWS_POLL_GET: rcu_torture_writer_state = RTWS_POLL_GET; gp_snap = cur_ops->start_gp_poll(); @@ -1283,9 +1317,23 @@ rcu_torture_writer(void *arg) &rand); rcu_torture_pipe_update(old_rp); break; + case RTWS_POLL_GET_EXP: + rcu_torture_writer_state = RTWS_POLL_GET_EXP; + gp_snap = cur_ops->start_gp_poll_exp(); + rcu_torture_writer_state = RTWS_POLL_WAIT_EXP; + while (!cur_ops->poll_gp_state_exp(gp_snap)) + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + rcu_torture_pipe_update(old_rp); + break; case RTWS_SYNC: rcu_torture_writer_state = RTWS_SYNC; + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) + cookie = cur_ops->get_gp_state(); cur_ops->sync(); + cur_ops->sync(); + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) + WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie)); rcu_torture_pipe_update(old_rp); break; default: @@ -1321,8 +1369,9 @@ rcu_torture_writer(void *arg) if (list_empty(&rcu_tortures[i].rtort_free) && rcu_access_pointer(rcu_torture_current) != &rcu_tortures[i]) { - rcu_ftrace_dump(DUMP_ALL); + tracing_off(); WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count); + rcu_ftrace_dump(DUMP_ALL); } if (stutter_waited) sched_set_normal(current, oldnice); @@ -1384,6 +1433,11 @@ rcu_torture_fakewriter(void *arg) torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); cur_ops->cond_sync(gp_snap); break; + case RTWS_COND_GET_EXP: + gp_snap = cur_ops->get_gp_state_exp(); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + cur_ops->cond_sync_exp(gp_snap); + break; case RTWS_POLL_GET: gp_snap = cur_ops->start_gp_poll(); while (!cur_ops->poll_gp_state(gp_snap)) { @@ -1391,6 +1445,13 @@ rcu_torture_fakewriter(void *arg) &rand); } break; + case RTWS_POLL_GET_EXP: + gp_snap = cur_ops->start_gp_poll_exp(); + while (!cur_ops->poll_gp_state_exp(gp_snap)) { + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + } + break; case RTWS_SYNC: cur_ops->sync(); break; @@ -1868,7 +1929,7 @@ rcu_torture_stats_print(void) batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]); } } - for (i = RCU_TORTURE_PIPE_LEN - 1; i >= 0; i--) { + for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) { if (pipesummary[i] != 0) break; } @@ -1990,7 +2051,13 @@ static void rcu_torture_mem_dump_obj(void) static int z; kcp = kmem_cache_create("rcuscale", 136, 8, SLAB_STORE_USER, NULL); + if (WARN_ON_ONCE(!kcp)) + return; rhp = kmem_cache_alloc(kcp, GFP_KERNEL); + if (WARN_ON_ONCE(!rhp)) { + kmem_cache_destroy(kcp); + return; + } pr_alert("mem_dump_obj() slab test: rcu_torture_stats = %px, &rhp = %px, rhp = %px, &z = %px\n", stats_task, &rhp, rhp, &z); pr_alert("mem_dump_obj(ZERO_SIZE_PTR):"); mem_dump_obj(ZERO_SIZE_PTR); @@ -2007,6 +2074,8 @@ static void rcu_torture_mem_dump_obj(void) kmem_cache_free(kcp, rhp); kmem_cache_destroy(kcp); rhp = kmalloc(sizeof(*rhp), GFP_KERNEL); + if (WARN_ON_ONCE(!rhp)) + return; pr_alert("mem_dump_obj() kmalloc test: rcu_torture_stats = %px, &rhp = %px, rhp = %px\n", stats_task, &rhp, rhp); pr_alert("mem_dump_obj(kmalloc %px):", rhp); mem_dump_obj(rhp); @@ -2014,6 +2083,8 @@ static void rcu_torture_mem_dump_obj(void) mem_dump_obj(&rhp->func); kfree(rhp); rhp = vmalloc(4096); + if (WARN_ON_ONCE(!rhp)) + return; pr_alert("mem_dump_obj() vmalloc test: rcu_torture_stats = %px, &rhp = %px, rhp = %px\n", stats_task, &rhp, rhp); pr_alert("mem_dump_obj(vmalloc %px):", rhp); mem_dump_obj(rhp); @@ -2075,6 +2146,19 @@ static int rcutorture_booster_init(unsigned int cpu) if (boost_tasks[cpu] != NULL) return 0; /* Already created, nothing more to do. */ + // Testing RCU priority boosting requires rcutorture do + // some serious abuse. Counter this by running ksoftirqd + // at higher priority. + if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) { + struct sched_param sp; + struct task_struct *t; + + t = per_cpu(ksoftirqd, cpu); + WARN_ON_ONCE(!t); + sp.sched_priority = 2; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + } + /* Don't allow time recalculation while creating a new task. */ mutex_lock(&boost_mutex); rcu_torture_disable_rt_throttle(); @@ -2873,7 +2957,6 @@ static int rcu_torture_read_exit_child(void *trsp_in) // Parent kthread which creates and destroys read-exit child kthreads. static int rcu_torture_read_exit(void *unused) { - int count = 0; bool errexit = false; int i; struct task_struct *tsp; @@ -2885,34 +2968,28 @@ static int rcu_torture_read_exit(void *unused) // Each pass through this loop does one read-exit episode. do { - if (++count > read_exit_burst) { - VERBOSE_TOROUT_STRING("rcu_torture_read_exit: End of episode"); - rcu_barrier(); // Wait for task_struct free, avoid OOM. - for (i = 0; i < read_exit_delay; i++) { - schedule_timeout_uninterruptible(HZ); - if (READ_ONCE(read_exit_child_stop)) - break; + VERBOSE_TOROUT_STRING("rcu_torture_read_exit: Start of episode"); + for (i = 0; i < read_exit_burst; i++) { + if (READ_ONCE(read_exit_child_stop)) + break; + stutter_wait("rcu_torture_read_exit"); + // Spawn child. + tsp = kthread_run(rcu_torture_read_exit_child, + &trs, "%s", "rcu_torture_read_exit_child"); + if (IS_ERR(tsp)) { + TOROUT_ERRSTRING("out of memory"); + errexit = true; + break; } - if (!READ_ONCE(read_exit_child_stop)) - VERBOSE_TOROUT_STRING("rcu_torture_read_exit: Start of episode"); - count = 0; - } - if (READ_ONCE(read_exit_child_stop)) - break; - // Spawn child. - tsp = kthread_run(rcu_torture_read_exit_child, - &trs, "%s", - "rcu_torture_read_exit_child"); - if (IS_ERR(tsp)) { - TOROUT_ERRSTRING("out of memory"); - errexit = true; - tsp = NULL; - break; + cond_resched(); + kthread_stop(tsp); + n_read_exits++; } - cond_resched(); - kthread_stop(tsp); - n_read_exits ++; - stutter_wait("rcu_torture_read_exit"); + VERBOSE_TOROUT_STRING("rcu_torture_read_exit: End of episode"); + rcu_barrier(); // Wait for task_struct free, avoid OOM. + i = 0; + for (; !errexit && !READ_ONCE(read_exit_child_stop) && i < read_exit_delay; i++) + schedule_timeout_uninterruptible(HZ); } while (!errexit && !READ_ONCE(read_exit_child_stop)); // Clean up and exit. @@ -3122,6 +3199,7 @@ static void rcu_test_debug_objects(void) pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME); destroy_rcu_head_on_stack(&rh1); destroy_rcu_head_on_stack(&rh2); + kfree(rhp); #else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME); #endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ @@ -3329,21 +3407,6 @@ rcu_torture_init(void) rcutor_hp = firsterr; if (torture_init_error(firsterr)) goto unwind; - - // Testing RCU priority boosting requires rcutorture do - // some serious abuse. Counter this by running ksoftirqd - // at higher priority. - if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) { - for_each_online_cpu(cpu) { - struct sched_param sp; - struct task_struct *t; - - t = per_cpu(ksoftirqd, cpu); - WARN_ON_ONCE(!t); - sp.sched_priority = 2; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - } - } } shutdown_jiffies = jiffies + shutdown_secs * HZ; firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 909644abee67..435c884c02b5 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -385,7 +385,7 @@ static struct ref_scale_ops rwsem_ops = { }; // Definitions for global spinlock -static DEFINE_SPINLOCK(test_lock); +static DEFINE_RAW_SPINLOCK(test_lock); static void ref_lock_section(const int nloops) { @@ -393,8 +393,8 @@ static void ref_lock_section(const int nloops) preempt_disable(); for (i = nloops; i >= 0; i--) { - spin_lock(&test_lock); - spin_unlock(&test_lock); + raw_spin_lock(&test_lock); + raw_spin_unlock(&test_lock); } preempt_enable(); } @@ -405,9 +405,9 @@ static void ref_lock_delay_section(const int nloops, const int udl, const int nd preempt_disable(); for (i = nloops; i >= 0; i--) { - spin_lock(&test_lock); + raw_spin_lock(&test_lock); un_delay(udl, ndl); - spin_unlock(&test_lock); + raw_spin_unlock(&test_lock); } preempt_enable(); } @@ -427,8 +427,8 @@ static void ref_lock_irq_section(const int nloops) preempt_disable(); for (i = nloops; i >= 0; i--) { - spin_lock_irqsave(&test_lock, flags); - spin_unlock_irqrestore(&test_lock, flags); + raw_spin_lock_irqsave(&test_lock, flags); + raw_spin_unlock_irqrestore(&test_lock, flags); } preempt_enable(); } @@ -440,9 +440,9 @@ static void ref_lock_irq_delay_section(const int nloops, const int udl, const in preempt_disable(); for (i = nloops; i >= 0; i--) { - spin_lock_irqsave(&test_lock, flags); + raw_spin_lock_irqsave(&test_lock, flags); un_delay(udl, ndl); - spin_unlock_irqrestore(&test_lock, flags); + raw_spin_unlock_irqrestore(&test_lock, flags); } preempt_enable(); } diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 3925e32159b5..83c7e6620d40 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -14,7 +14,7 @@ struct rcu_tasks; typedef void (*rcu_tasks_gp_func_t)(struct rcu_tasks *rtp); -typedef void (*pregp_func_t)(void); +typedef void (*pregp_func_t)(struct list_head *hop); typedef void (*pertask_func_t)(struct task_struct *t, struct list_head *hop); typedef void (*postscan_func_t)(struct list_head *hop); typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp); @@ -29,6 +29,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp); * @rtp_work: Work queue for invoking callbacks. * @rtp_irq_work: IRQ work queue for deferred wakeups. * @barrier_q_head: RCU callback for barrier operation. + * @rtp_blkd_tasks: List of tasks blocked as readers. * @cpu: CPU number corresponding to this entry. * @rtpp: Pointer to the rcu_tasks structure. */ @@ -40,6 +41,7 @@ struct rcu_tasks_percpu { struct work_struct rtp_work; struct irq_work rtp_irq_work; struct rcu_head barrier_q_head; + struct list_head rtp_blkd_tasks; int cpu; struct rcu_tasks *rtpp; }; @@ -48,6 +50,7 @@ struct rcu_tasks_percpu { * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism. * @cbs_wait: RCU wait allowing a new callback to get kthread's attention. * @cbs_gbl_lock: Lock protecting callback list. + * @tasks_gp_mutex: Mutex protecting grace period, needed during mid-boot dead zone. * @kthread_ptr: This flavor's grace-period/callback-invocation kthread. * @gp_func: This flavor's grace-period-wait function. * @gp_state: Grace period's most recent state transition (debugging). @@ -79,6 +82,7 @@ struct rcu_tasks_percpu { struct rcu_tasks { struct rcuwait cbs_wait; raw_spinlock_t cbs_gbl_lock; + struct mutex tasks_gp_mutex; int gp_state; int gp_sleep; int init_fract; @@ -119,6 +123,7 @@ static struct rcu_tasks rt_name = \ { \ .cbs_wait = __RCUWAIT_INITIALIZER(rt_name.wait), \ .cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock), \ + .tasks_gp_mutex = __MUTEX_INITIALIZER(rt_name.tasks_gp_mutex), \ .gp_func = gp, \ .call_func = call, \ .rtpcpu = &rt_name ## __percpu, \ @@ -140,6 +145,7 @@ static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY; module_param(rcu_task_ipi_delay, int, 0644); /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ +#define RCU_TASK_BOOT_STALL_TIMEOUT (HZ * 30) #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT; module_param(rcu_task_stall_timeout, int, 0644); @@ -253,6 +259,8 @@ static void cblist_init_generic(struct rcu_tasks *rtp) INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq); rtpcp->cpu = cpu; rtpcp->rtpp = rtp; + if (!rtpcp->rtp_blkd_tasks.next) + INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks); raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled. } raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); @@ -323,17 +331,6 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, irq_work_queue(&rtpcp->rtp_irq_work); } -// Wait for a grace period for the specified flavor of Tasks RCU. -static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp) -{ - /* Complain if the scheduler has not started. */ - RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, - "synchronize_rcu_tasks called too soon"); - - /* Wait for the grace period. */ - wait_rcu_gp(rtp->call_func); -} - // RCU callback function for rcu_barrier_tasks_generic(). static void rcu_barrier_tasks_generic_cb(struct rcu_head *rhp) { @@ -439,6 +436,11 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) WRITE_ONCE(rtp->percpu_dequeue_lim, 1); pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name); } + for (cpu = rtp->percpu_dequeue_lim; cpu < nr_cpu_ids; cpu++) { + struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); + + WARN_ON_ONCE(rcu_segcblist_n_cbs(&rtpcp->cblist)); + } raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); } @@ -497,10 +499,41 @@ static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp) rcu_tasks_invoke_cbs(rtp, rtpcp); } -/* RCU-tasks kthread that detects grace periods and invokes callbacks. */ -static int __noreturn rcu_tasks_kthread(void *arg) +// Wait for one grace period. +static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot) { int needgpcb; + + mutex_lock(&rtp->tasks_gp_mutex); + + // If there were none, wait a bit and start over. + if (unlikely(midboot)) { + needgpcb = 0x2; + } else { + set_tasks_gp_state(rtp, RTGS_WAIT_CBS); + rcuwait_wait_event(&rtp->cbs_wait, + (needgpcb = rcu_tasks_need_gpcb(rtp)), + TASK_IDLE); + } + + if (needgpcb & 0x2) { + // Wait for one grace period. + set_tasks_gp_state(rtp, RTGS_WAIT_GP); + rtp->gp_start = jiffies; + rcu_seq_start(&rtp->tasks_gp_seq); + rtp->gp_func(rtp); + rcu_seq_end(&rtp->tasks_gp_seq); + } + + // Invoke callbacks. + set_tasks_gp_state(rtp, RTGS_INVOKE_CBS); + rcu_tasks_invoke_cbs(rtp, per_cpu_ptr(rtp->rtpcpu, 0)); + mutex_unlock(&rtp->tasks_gp_mutex); +} + +// RCU-tasks kthread that detects grace periods and invokes callbacks. +static int __noreturn rcu_tasks_kthread(void *arg) +{ struct rcu_tasks *rtp = arg; /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ @@ -514,29 +547,28 @@ static int __noreturn rcu_tasks_kthread(void *arg) * This loop is terminated by the system going down. ;-) */ for (;;) { - set_tasks_gp_state(rtp, RTGS_WAIT_CBS); + // Wait for one grace period and invoke any callbacks + // that are ready. + rcu_tasks_one_gp(rtp, false); - /* If there were none, wait a bit and start over. */ - rcuwait_wait_event(&rtp->cbs_wait, - (needgpcb = rcu_tasks_need_gpcb(rtp)), - TASK_IDLE); - - if (needgpcb & 0x2) { - // Wait for one grace period. - set_tasks_gp_state(rtp, RTGS_WAIT_GP); - rtp->gp_start = jiffies; - rcu_seq_start(&rtp->tasks_gp_seq); - rtp->gp_func(rtp); - rcu_seq_end(&rtp->tasks_gp_seq); - } + // Paranoid sleep to keep this from entering a tight loop. + schedule_timeout_idle(rtp->gp_sleep); + } +} - /* Invoke callbacks. */ - set_tasks_gp_state(rtp, RTGS_INVOKE_CBS); - rcu_tasks_invoke_cbs(rtp, per_cpu_ptr(rtp->rtpcpu, 0)); +// Wait for a grace period for the specified flavor of Tasks RCU. +static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp) +{ + /* Complain if the scheduler has not started. */ + RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, + "synchronize_rcu_tasks called too soon"); - /* Paranoid sleep to keep this from entering a tight loop */ - schedule_timeout_idle(rtp->gp_sleep); + // If the grace-period kthread is running, use it. + if (READ_ONCE(rtp->kthread_ptr)) { + wait_rcu_gp(rtp->call_func); + return; } + rcu_tasks_one_gp(rtp, true); } /* Spawn RCU-tasks grace-period kthread. */ @@ -630,7 +662,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) struct task_struct *t; set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP); - rtp->pregp_func(); + rtp->pregp_func(&holdouts); /* * There were callbacks, so we need to wait for an RCU-tasks @@ -639,10 +671,12 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) * and make a list of them in holdouts. */ set_tasks_gp_state(rtp, RTGS_SCAN_TASKLIST); - rcu_read_lock(); - for_each_process_thread(g, t) - rtp->pertask_func(t, &holdouts); - rcu_read_unlock(); + if (rtp->pertask_func) { + rcu_read_lock(); + for_each_process_thread(g, t) + rtp->pertask_func(t, &holdouts); + rcu_read_unlock(); + } set_tasks_gp_state(rtp, RTGS_POST_SCAN_TASKLIST); rtp->postscan_func(&holdouts); @@ -760,7 +794,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) |