15 files changed, 1026 insertions, 438 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 9d44f2d46c69..1950eb870244 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1814,6 +1814,7 @@ static inline void rcu_copy_process(struct task_struct *p)
 	p->trc_reader_nesting = 0;
 	p->trc_reader_special.s = 0;
 	INIT_LIST_HEAD(&p->trc_holdout_list);
+	INIT_LIST_HEAD(&p->trc_blkd_node);
 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
 }
 
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 1c630e573548..c05ca52cdf64 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -262,6 +262,35 @@ config RCU_NOCB_CPU
 	  Say Y here if you need reduced OS jitter, despite added overhead.
 	  Say N here if you are unsure.
 
+config RCU_NOCB_CPU_DEFAULT_ALL
+	bool "Offload RCU callback processing from all CPUs by default"
+	depends on RCU_NOCB_CPU
+	default n
+	help
+	  Use this option to offload callback processing from all CPUs
+	  by default, in the absence of the rcu_nocbs or nohz_full boot
+	  parameter. This also avoids the need to use any boot parameters
+	  to achieve the effect of offloading all CPUs on boot.
+
+	  Say Y here if you want offload all CPUs by default on boot.
+	  Say N here if you are unsure.
+
+config RCU_NOCB_CPU_CB_BOOST
+	bool "Offload RCU callback from real-time kthread"
+	depends on RCU_NOCB_CPU && RCU_BOOST
+	default y if PREEMPT_RT
+	help
+	  Use this option to invoke offloaded callbacks as SCHED_FIFO
+	  to avoid starvation by heavy SCHED_OTHER background load.
+	  Of course, running as SCHED_FIFO during callback floods will
+	  cause the rcuo[ps] kthreads to monopolize the CPU for hundreds
+	  of milliseconds or more.  Therefore, when enabling this option,
+	  it is your responsibility to ensure that latency-sensitive
+	  tasks either run with higher priority or run on some other CPU.
+
+	  Say Y here if you want to set RT priority for offloading kthreads.
+	  Say N here if you are building a !PREEMPT_RT kernel and are unsure.
+
 config TASKS_TRACE_RCU_READ_MB
 	bool "Tasks Trace RCU readers use memory barriers in user and idle"
 	depends on RCU_EXPERT && TASKS_TRACE_RCU
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 4916077119f3..32291f4eefde 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -23,6 +23,9 @@
 #define RCU_SEQ_CTR_SHIFT	2
 #define RCU_SEQ_STATE_MASK	((1 << RCU_SEQ_CTR_SHIFT) - 1)
 
+/* Low-order bit definition for polled grace-period APIs. */
+#define RCU_GET_STATE_COMPLETED	0x1
+
 extern int sysctl_sched_rt_runtime;
 
 /*
@@ -120,6 +123,18 @@ static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
 }
 
 /*
+ * Given a snapshot from rcu_seq_snap(), determine whether or not a
+ * full update-side operation has occurred, but do not allow the
+ * (ULONG_MAX / 2) safety-factor/guard-band.
+ */
+static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s)
+{
+	unsigned long cur_s = READ_ONCE(*sp);
+
+	return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_STATE_MASK + 1));
+}
+
+/*
  * Has a grace period completed since the time the old gp_seq was collected?
  */
 static inline bool rcu_seq_completed_gp(unsigned long old, unsigned long new)
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 277a5bfb37d4..3ef02d4a8108 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -419,6 +419,7 @@ rcu_scale_writer(void *arg)
 	VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started");
 	WARN_ON(!wdpp);
 	set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+	current->flags |= PF_NO_SETAFFINITY;
 	sched_set_fifo_low(current);
 
 	if (holdoff)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 7120165a9342..d8e1b270a065 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -75,62 +75,47 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@
 
 torture_param(int, extendables, RCUTORTURE_MAX_EXTEND,
 	      "Extend readers by disabling bh (1), irqs (2), or preempt (4)");
-torture_param(int, fqs_duration, 0,
-	      "Duration of fqs bursts (us), 0 to disable");
+torture_param(int, fqs_duration, 0, "Duration of fqs bursts (us), 0 to disable");
 torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
 torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
-torture_param(int, fwd_progress, 1, "Test grace-period forward progress");
+torture_param(int, fwd_progress, 1, "Number of grace-period forward progress tasks (0 to disable)");
 torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait");
-torture_param(int, fwd_progress_holdoff, 60,
-	      "Time between forward-progress tests (s)");
-torture_param(bool, fwd_progress_need_resched, 1,
-	      "Hide cond_resched() behind need_resched()");
+torture_param(int, fwd_progress_holdoff, 60, "Time between forward-progress tests (s)");
+torture_param(bool, fwd_progress_need_resched, 1, "Hide cond_resched() behind need_resched()");
 torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
+torture_param(bool, gp_cond_exp, false, "Use conditional/async expedited GP wait primitives");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
-torture_param(bool, gp_normal, false,
-	     "Use normal (non-expedited) GP wait primitives");
+torture_param(bool, gp_normal, false, "Use normal (non-expedited) GP wait primitives");
 torture_param(bool, gp_poll, false, "Use polling GP wait primitives");
+torture_param(bool, gp_poll_exp, false, "Use polling expedited GP wait primitives");
 torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives");
 torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
 torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers");
-torture_param(int, n_barrier_cbs, 0,
-	     "# of callbacks/kthreads for barrier testing");
+torture_param(int, n_barrier_cbs, 0, "# of callbacks/kthreads for barrier testing");
 torture_param(int, nfakewriters, 4, "Number of RCU fake writer threads");
 torture_param(int, nreaders, -1, "Number of RCU reader threads");
-torture_param(int, object_debug, 0,
-	     "Enable debug-object double call_rcu() testing");
+torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
 torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
-torture_param(int, onoff_interval, 0,
-	     "Time between CPU hotplugs (jiffies), 0=disable");
+torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
 torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
 torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
-torture_param(int, read_exit_delay, 13,
-	      "Delay between read-then-exit episodes (s)");
-torture_param(int, read_exit_burst, 16,
-	      "# of read-then-exit bursts per episode, zero to disable");
+torture_param(int, read_exit_delay, 13, "Delay between read-then-exit episodes (s)");
+torture_param(int, read_exit_burst, 16, "# of read-then-exit bursts per episode, zero to disable");
 torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles");
 torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable.");
 torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
-torture_param(int, stall_cpu_holdoff, 10,
-	     "Time to wait before starting stall (s).");
-torture_param(bool, stall_no_softlockup, false,
-	     "Avoid softlockup warning during cpu stall.");
+torture_param(int, stall_cpu_holdoff, 10, "Time to wait before starting stall (s).");
+torture_param(bool, stall_no_softlockup, false, "Avoid softlockup warning during cpu stall.");
 torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
 torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
-torture_param(int, stall_gp_kthread, 0,
-	      "Grace-period kthread stall duration (s).");
-torture_param(int, stat_interval, 60,
-	     "Number of seconds between stats printk()s");
+torture_param(int, stall_gp_kthread, 0, "Grace-period kthread stall duration (s).");
+torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s");
 torture_param(int, stutter, 5, "Number of seconds to run/halt test");
 torture_param(int, test_boost, 1, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
-torture_param(int, test_boost_duration, 4,
-	     "Duration of each boost test, seconds.");
-torture_param(int, test_boost_interval, 7,
-	     "Interval between boost tests, seconds.");
-torture_param(bool, test_no_idle_hz, true,
-	     "Test support for tickless idle CPUs");
-torture_param(int, verbose, 1,
-	     "Enable verbose debugging printk()s");
+torture_param(int, test_boost_duration, 4, "Duration of each boost test, seconds.");
+torture_param(int, test_boost_interval, 7, "Interval between boost tests, seconds.");
+torture_param(bool, test_no_idle_hz, true, "Test support for tickless idle CPUs");
+torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 
 static char *torture_type = "rcu";
 module_param(torture_type, charp, 0444);
@@ -209,12 +194,16 @@ static int rcu_torture_writer_state;
 #define RTWS_DEF_FREE		3
 #define RTWS_EXP_SYNC		4
 #define RTWS_COND_GET		5
-#define RTWS_COND_SYNC		6
-#define RTWS_POLL_GET		7
-#define RTWS_POLL_WAIT		8
-#define RTWS_SYNC		9
-#define RTWS_STUTTER		10
-#define RTWS_STOPPING		11
+#define RTWS_COND_GET_EXP	6
+#define RTWS_COND_SYNC		7
+#define RTWS_COND_SYNC_EXP	8
+#define RTWS_POLL_GET		9
+#define RTWS_POLL_GET_EXP	10
+#define RTWS_POLL_WAIT		11
+#define RTWS_POLL_WAIT_EXP	12
+#define RTWS_SYNC		13
+#define RTWS_STUTTER		14
+#define RTWS_STOPPING		15
 static const char * const rcu_torture_writer_state_names[] = {
 	"RTWS_FIXED_DELAY",
 	"RTWS_DELAY",
@@ -222,9 +211,13 @@ static const char * const rcu_torture_writer_state_names[] = {
 	"RTWS_DEF_FREE",
 	"RTWS_EXP_SYNC",
 	"RTWS_COND_GET",
+	"RTWS_COND_GET_EXP",
 	"RTWS_COND_SYNC",
+	"RTWS_COND_SYNC_EXP",
 	"RTWS_POLL_GET",
+	"RTWS_POLL_GET_EXP",
 	"RTWS_POLL_WAIT",
+	"RTWS_POLL_WAIT_EXP",
 	"RTWS_SYNC",
 	"RTWS_STUTTER",
 	"RTWS_STOPPING",
@@ -337,7 +330,12 @@ struct rcu_torture_ops {
 	void (*deferred_free)(struct rcu_torture *p);
 	void (*sync)(void);
 	void (*exp_sync)(void);
+	unsigned long (*get_gp_state_exp)(void);
+	unsigned long (*start_gp_poll_exp)(void);
+	bool (*poll_gp_state_exp)(unsigned long oldstate);
+	void (*cond_sync_exp)(unsigned long oldstate);
 	unsigned long (*get_gp_state)(void);
+	unsigned long (*get_gp_completed)(void);
 	unsigned long (*start_gp_poll)(void);
 	bool (*poll_gp_state)(unsigned long oldstate);
 	void (*cond_sync)(unsigned long oldstate);
@@ -504,9 +502,14 @@ static struct rcu_torture_ops rcu_ops = {
 	.sync			= synchronize_rcu,
 	.exp_sync		= synchronize_rcu_expedited,
 	.get_gp_state		= get_state_synchronize_rcu,
+	.get_gp_completed	= get_completed_synchronize_rcu,
 	.start_gp_poll		= start_poll_synchronize_rcu,
 	.poll_gp_state		= poll_state_synchronize_rcu,
 	.cond_sync		= cond_synchronize_rcu,
+	.get_gp_state_exp	= get_state_synchronize_rcu,
+	.start_gp_poll_exp	= start_poll_synchronize_rcu_expedited,
+	.poll_gp_state_exp	= poll_state_synchronize_rcu,
+	.cond_sync_exp		= cond_synchronize_rcu_expedited,
 	.call			= call_rcu,
 	.cb_barrier		= rcu_barrier,
 	.fqs			= rcu_force_quiescent_state,
@@ -1136,9 +1139,8 @@ rcu_torture_fqs(void *arg)
 	return 0;
 }
 
-// Used by writers to randomly choose from the available grace-period
-// primitives.  The only purpose of the initialization is to size the array.
-static int synctype[] = { RTWS_DEF_FREE, RTWS_EXP_SYNC, RTWS_COND_GET, RTWS_POLL_GET, RTWS_SYNC };
+// Used by writers to randomly choose from the available grace-period primitives.
+static int synctype[ARRAY_SIZE(rcu_torture_writer_state_names)] = { };
 static int nsynctypes;
 
 /*
@@ -1146,18 +1148,27 @@ static int nsynctypes;
  */
 static void rcu_torture_write_types(void)
 {
-	bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
-	bool gp_poll1 = gp_poll, gp_sync1 = gp_sync;
+	bool gp_cond1 = gp_cond, gp_cond_exp1 = gp_cond_exp, gp_exp1 = gp_exp;
+	bool gp_poll_exp1 = gp_poll_exp, gp_normal1 = gp_normal, gp_poll1 = gp_poll;
+	bool gp_sync1 = gp_sync;
 
 	/* Initialize synctype[] array.  If none set, take default. */
-	if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_poll1 && !gp_sync1)
-		gp_cond1 = gp_exp1 = gp_normal1 = gp_poll1 = gp_sync1 = true;
+	if (!gp_cond1 && !gp_cond_exp1 && !gp_exp1 && !gp_poll_exp &&
+	    !gp_normal1 && !gp_poll1 && !gp_sync1)
+		gp_cond1 = gp_cond_exp1 = gp_exp1 = gp_poll_exp1 =
+			   gp_normal1 = gp_poll1 = gp_sync1 = true;
 	if (gp_cond1 && cur_ops->get_gp_state && cur_ops->cond_sync) {
 		synctype[nsynctypes++] = RTWS_COND_GET;
 		pr_info("%s: Testing conditional GPs.\n", __func__);
 	} else if (gp_cond && (!cur_ops->get_gp_state || !cur_ops->cond_sync)) {
 		pr_alert("%s: gp_cond without primitives.\n", __func__);
 	}
+	if (gp_cond_exp1 && cur_ops->get_gp_state_exp && cur_ops->cond_sync_exp) {
+		synctype[nsynctypes++] = RTWS_COND_GET_EXP;
+		pr_info("%s: Testing conditional expedited GPs.\n", __func__);
+	} else if (gp_cond_exp && (!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp)) {
+		pr_alert("%s: gp_cond_exp without primitives.\n", __func__);
+	}
 	if (gp_exp1 && cur_ops->exp_sync) {
 		synctype[nsynctypes++] = RTWS_EXP_SYNC;
 		pr_info("%s: Testing expedited GPs.\n", __func__);
@@ -1176,6 +1187,12 @@ static void rcu_torture_write_types(void)
 	} else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) {
 		pr_alert("%s: gp_poll without primitives.\n", __func__);
 	}
+	if (gp_poll_exp1 && cur_ops->start_gp_poll_exp && cur_ops->poll_gp_state_exp) {
+		synctype[nsynctypes++] = RTWS_POLL_GET_EXP;
+		pr_info("%s: Testing polling expedited GPs.\n", __func__);
+	} else if (gp_poll_exp && (!cur_ops->start_gp_poll_exp || !cur_ops->poll_gp_state_exp)) {
+		pr_alert("%s: gp_poll_exp without primitives.\n", __func__);
+	}
 	if (gp_sync1 && cur_ops->sync) {
 		synctype[nsynctypes++] = RTWS_SYNC;
 		pr_info("%s: Testing normal GPs.\n", __func__);
@@ -1254,6 +1271,10 @@ rcu_torture_writer(void *arg)
 					  rcu_torture_writer_state_getname(),
 					  rcu_torture_writer_state,
 					  cookie, cur_ops->get_gp_state());
+				if (cur_ops->get_gp_completed) {
+					cookie = cur_ops->get_gp_completed();
+					WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie));
+				}
 				cur_ops->readunlock(idx);
 			}
 			switch (synctype[torture_random(&rand) % nsynctypes]) {
@@ -1263,7 +1284,12 @@ rcu_torture_writer(void *arg)
 				break;
 			case RTWS_EXP_SYNC:
 				rcu_torture_writer_state = RTWS_EXP_SYNC;
+				if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+					cookie = cur_ops->get_gp_state();
 				cur_ops->exp_sync();
+				cur_ops->exp_sync();
+				if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+					WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie));
 				rcu_torture_pipe_update(old_rp);
 				break;
 			case RTWS_COND_GET:
@@ -1274,6 +1300,14 @@ rcu_torture_writer(void *arg)
 				cur_ops->cond_sync(gp_snap);
 				rcu_torture_pipe_update(old_rp);
 				break;
+			case RTWS_COND_GET_EXP:
+				rcu_torture_writer_state = RTWS_COND_GET_EXP;
+				gp_snap = cur_ops->get_gp_state_exp();
+				torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
+				rcu_torture_writer_state = RTWS_COND_SYNC_EXP;
+				cur_ops->cond_sync_exp(gp_snap);
+				rcu_torture_pipe_update(old_rp);
+				break;
 			case RTWS_POLL_GET:
 				rcu_torture_writer_state = RTWS_POLL_GET;
 				gp_snap = cur_ops->start_gp_poll();
@@ -1283,9 +1317,23 @@ rcu_torture_writer(void *arg)
 								  &rand);
 				rcu_torture_pipe_update(old_rp);
 				break;
+			case RTWS_POLL_GET_EXP:
+				rcu_torture_writer_state = RTWS_POLL_GET_EXP;
+				gp_snap = cur_ops->start_gp_poll_exp();
+				rcu_torture_writer_state = RTWS_POLL_WAIT_EXP;
+				while (!cur_ops->poll_gp_state_exp(gp_snap))
+					torture_hrtimeout_jiffies(torture_random(&rand) % 16,
+								  &rand);
+				rcu_torture_pipe_update(old_rp);
+				break;
 			case RTWS_SYNC:
 				rcu_torture_writer_state = RTWS_SYNC;
+				if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+					cookie = cur_ops->get_gp_state();
 				cur_ops->sync();
+				cur_ops->sync();
+				if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+					WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie));
 				rcu_torture_pipe_update(old_rp);
 				break;
 			default:
@@ -1321,8 +1369,9 @@ rcu_torture_writer(void *arg)
 				if (list_empty(&rcu_tortures[i].rtort_free) &&
 				    rcu_access_pointer(rcu_torture_current) !=
 				    &rcu_tortures[i]) {
-					rcu_ftrace_dump(DUMP_ALL);
+					tracing_off();
 					WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
+					rcu_ftrace_dump(DUMP_ALL);
 				}
 		if (stutter_waited)
 			sched_set_normal(current, oldnice);
@@ -1384,6 +1433,11 @@ rcu_torture_fakewriter(void *arg)
 				torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
 				cur_ops->cond_sync(gp_snap);
 				break;
+			case RTWS_COND_GET_EXP:
+				gp_snap = cur_ops->get_gp_state_exp();
+				torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
+				cur_ops->cond_sync_exp(gp_snap);
+				break;
 			case RTWS_POLL_GET:
 				gp_snap = cur_ops->start_gp_poll();
 				while (!cur_ops->poll_gp_state(gp_snap)) {
@@ -1391,6 +1445,13 @@ rcu_torture_fakewriter(void *arg)
 								  &rand);
 				}
 				break;
+			case RTWS_POLL_GET_EXP:
+				gp_snap = cur_ops->start_gp_poll_exp();
+				while (!cur_ops->poll_gp_state_exp(gp_snap)) {
+					torture_hrtimeout_jiffies(torture_random(&rand) % 16,
+								  &rand);
+				}
+				break;
 			case RTWS_SYNC:
 				cur_ops->sync();
 				break;
@@ -1868,7 +1929,7 @@ rcu_torture_stats_print(void)
 			batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
 		}
 	}
-	for (i = RCU_TORTURE_PIPE_LEN - 1; i >= 0; i--) {
+	for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
 		if (pipesummary[i] != 0)
 			break;
 	}
@@ -1990,7 +2051,13 @@ static void rcu_torture_mem_dump_obj(void)
 	static int z;
 
 	kcp = kmem_cache_create("rcuscale", 136, 8, SLAB_STORE_USER, NULL);
+	if (WARN_ON_ONCE(!kcp))
+		return;
 	rhp = kmem_cache_alloc(kcp, GFP_KERNEL);
+	if (WARN_ON_ONCE(!rhp)) {
+		kmem_cache_destroy(kcp);
+		return;
+	}
 	pr_alert("mem_dump_obj() slab test: rcu_torture_stats = %px, &rhp = %px, rhp = %px, &z = %px\n", stats_task, &rhp, rhp, &z);
 	pr_alert("mem_dump_obj(ZERO_SIZE_PTR):");
 	mem_dump_obj(ZERO_SIZE_PTR);
@@ -2007,6 +2074,8 @@ static void rcu_torture_mem_dump_obj(void)
 	kmem_cache_free(kcp, rhp);
 	kmem_cache_destroy(kcp);
 	rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
+	if (WARN_ON_ONCE(!rhp))
+		return;
 	pr_alert("mem_dump_obj() kmalloc test: rcu_torture_stats = %px, &rhp = %px, rhp = %px\n", stats_task, &rhp, rhp);
 	pr_alert("mem_dump_obj(kmalloc %px):", rhp);
 	mem_dump_obj(rhp);
@@ -2014,6 +2083,8 @@ static void rcu_torture_mem_dump_obj(void)
 	mem_dump_obj(&rhp->func);
 	kfree(rhp);
 	rhp = vmalloc(4096);
+	if (WARN_ON_ONCE(!rhp))
+		return;
 	pr_alert("mem_dump_obj() vmalloc test: rcu_torture_stats = %px, &rhp = %px, rhp = %px\n", stats_task, &rhp, rhp);
 	pr_alert("mem_dump_obj(vmalloc %px):", rhp);
 	mem_dump_obj(rhp);
@@ -2075,6 +2146,19 @@ static int rcutorture_booster_init(unsigned int cpu)
 	if (boost_tasks[cpu] != NULL)
 		return 0;  /* Already created, nothing more to do. */
 
+	// Testing RCU priority boosting requires rcutorture do
+	// some serious abuse.  Counter this by running ksoftirqd
+	// at higher priority.
+	if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) {
+		struct sched_param sp;
+		struct task_struct *t;
+
+		t = per_cpu(ksoftirqd, cpu);
+		WARN_ON_ONCE(!t);
+		sp.sched_priority = 2;
+		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
+	}
+
 	/* Don't allow time recalculation while creating a new task. */
 	mutex_lock(&boost_mutex);
 	rcu_torture_disable_rt_throttle();
@@ -2873,7 +2957,6 @@ static int rcu_torture_read_exit_child(void *trsp_in)
 // Parent kthread which creates and destroys read-exit child kthreads.
 static int rcu_torture_read_exit(void *unused)
 {
-	int count = 0;
 	bool errexit = false;
 	int i;
 	struct task_struct *tsp;
@@ -2885,34 +2968,28 @@ static int rcu_torture_read_exit(void *unused)
 
 	// Each pass through this loop does one read-exit episode.
 	do {
-		if (++count > read_exit_burst) {
-			VERBOSE_TOROUT_STRING("rcu_torture_read_exit: End of episode");
-			rcu_barrier(); // Wait for task_struct free, avoid OOM.
-			for (i = 0; i < read_exit_delay; i++) {
-				schedule_timeout_uninterruptible(HZ);
-				if (READ_ONCE(read_exit_child_stop))
-					break;
+		VERBOSE_TOROUT_STRING("rcu_torture_read_exit: Start of episode");
+		for (i = 0; i < read_exit_burst; i++) {
+			if (READ_ONCE(read_exit_child_stop))
+				break;
+			stutter_wait("rcu_torture_read_exit");
+			// Spawn child.
+			tsp = kthread_run(rcu_torture_read_exit_child,
+					  &trs, "%s", "rcu_torture_read_exit_child");
+			if (IS_ERR(tsp)) {
+				TOROUT_ERRSTRING("out of memory");
+				errexit = true;
+				break;
 			}
-			if (!READ_ONCE(read_exit_child_stop))
-				VERBOSE_TOROUT_STRING("rcu_torture_read_exit: Start of episode");
-			count = 0;
-		}
-		if (READ_ONCE(read_exit_child_stop))
-			break;
-		// Spawn child.
-		tsp = kthread_run(rcu_torture_read_exit_child,
-				     &trs, "%s",
-				     "rcu_torture_read_exit_child");
-		if (IS_ERR(tsp)) {
-			TOROUT_ERRSTRING("out of memory");
-			errexit = true;
-			tsp = NULL;
-			break;
+			cond_resched();
+			kthread_stop(tsp);
+			n_read_exits++;
 		}
-		cond_resched();
-		kthread_stop(tsp);
-		n_read_exits ++;
-		stutter_wait("rcu_torture_read_exit");
+		VERBOSE_TOROUT_STRING("rcu_torture_read_exit: End of episode");
+		rcu_barrier(); // Wait for task_struct free, avoid OOM.
+		i = 0;
+		for (; !errexit && !READ_ONCE(read_exit_child_stop) && i < read_exit_delay; i++)
+			schedule_timeout_uninterruptible(HZ);
 	} while (!errexit && !READ_ONCE(read_exit_child_stop));
 
 	// Clean up and exit.
@@ -3122,6 +3199,7 @@ static void rcu_test_debug_objects(void)
 	pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME);
 	destroy_rcu_head_on_stack(&rh1);
 	destroy_rcu_head_on_stack(&rh2);
+	kfree(rhp);
 #else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 	pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME);
 #endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
@@ -3329,21 +3407,6 @@ rcu_torture_init(void)
 		rcutor_hp = firsterr;
 		if (torture_init_error(firsterr))
 			goto unwind;
-
-		// Testing RCU priority boosting requires rcutorture do
-		// some serious abuse.  Counter this by running ksoftirqd
-		// at higher priority.
-		if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) {
-			for_each_online_cpu(cpu) {
-				struct sched_param sp;
-				struct task_struct *t;
-
-				t = per_cpu(ksoftirqd, cpu);
-				WARN_ON_ONCE(!t);
-				sp.sched_priority = 2;
-				sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
-			}
-		}
 	}
 	shutdown_jiffies = jiffies + shutdown_secs * HZ;
 	firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 909644abee67..435c884c02b5 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -385,7 +385,7 @@ static struct ref_scale_ops rwsem_ops = {
 };
 
 // Definitions for global spinlock
-static DEFINE_SPINLOCK(test_lock);
+static DEFINE_RAW_SPINLOCK(test_lock);
 
 static void ref_lock_section(const int nloops)
 {
@@ -393,8 +393,8 @@ static void ref_lock_section(const int nloops)
 
 	preempt_disable();
 	for (i = nloops; i >= 0; i--) {
-		spin_lock(&test_lock);
-		spin_unlock(&test_lock);
+		raw_spin_lock(&test_lock);
+		raw_spin_unlock(&test_lock);
 	}
 	preempt_enable();
 }
@@ -405,9 +405,9 @@ static void ref_lock_delay_section(const int nloops, const int udl, const int nd
 
 	preempt_disable();
 	for (i = nloops; i >= 0; i--) {
-		spin_lock(&test_lock);
+		raw_spin_lock(&test_lock);
 		un_delay(udl, ndl);
-		spin_unlock(&test_lock);
+		raw_spin_unlock(&test_lock);
 	}
 	preempt_enable();
 }
@@ -427,8 +427,8 @@ static void ref_lock_irq_section(const int nloops)
 
 	preempt_disable();
 	for (i = nloops; i >= 0; i--) {
-		spin_lock_irqsave(&test_lock, flags);
-		spin_unlock_irqrestore(&test_lock, flags);
+		raw_spin_lock_irqsave(&test_lock, flags);
+		raw_spin_unlock_irqrestore(&test_lock, flags);
 	}
 	preempt_enable();
 }
@@ -440,9 +440,9 @@ static void ref_lock_irq_delay_section(const int nloops, const int udl, const in
 
 	preempt_disable();
 	for (i = nloops; i >= 0; i--) {
-		spin_lock_irqsave(&test_lock, flags);
+		raw_spin_lock_irqsave(&test_lock, flags);
 		un_delay(udl, ndl);
-		spin_unlock_irqrestore(&test_lock, flags);
+		raw_spin_unlock_irqrestore(&test_lock, flags);
 	}
 	preempt_enable();
 }
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 3925e32159b5..83c7e6620d40 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -14,7 +14,7 @@
 
 struct rcu_tasks;
 typedef void (*rcu_tasks_gp_func_t)(struct rcu_tasks *rtp);
-typedef void (*pregp_func_t)(void);
+typedef void (*pregp_func_t)(struct list_head *hop);
 typedef void (*pertask_func_t)(struct task_struct *t, struct list_head *hop);
 typedef void (*postscan_func_t)(struct list_head *hop);
 typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp);
@@ -29,6 +29,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
  * @rtp_work: Work queue for invoking callbacks.
  * @rtp_irq_work: IRQ work queue for deferred wakeups.
  * @barrier_q_head: RCU callback for barrier operation.
+ * @rtp_blkd_tasks: List of tasks blocked as readers.
  * @cpu: CPU number corresponding to this entry.
  * @rtpp: Pointer to the rcu_tasks structure.
  */
@@ -40,6 +41,7 @@ struct rcu_tasks_percpu {
 	struct work_struct rtp_work;
 	struct irq_work rtp_irq_work;
 	struct rcu_head barrier_q_head;
+	struct list_head rtp_blkd_tasks;
 	int cpu;
 	struct rcu_tasks *rtpp;
 };
@@ -48,6 +50,7 @@ struct rcu_tasks_percpu {
  * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism.
  * @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
  * @cbs_gbl_lock: Lock protecting callback list.
+ * @tasks_gp_mutex: Mutex protecting grace period, needed during mid-boot dead zone.
  * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
  * @gp_func: This flavor's grace-period-wait function.
  * @gp_state: Grace period's most recent state transition (debugging).
@@ -79,6 +82,7 @@ struct rcu_tasks_percpu {
 struct rcu_tasks {
 	struct rcuwait cbs_wait;
 	raw_spinlock_t cbs_gbl_lock;
+	struct mutex tasks_gp_mutex;
 	int gp_state;
 	int gp_sleep;
 	int init_fract;
@@ -119,6 +123,7 @@ static struct rcu_tasks rt_name =							\
 {											\
 	.cbs_wait = __RCUWAIT_INITIALIZER(rt_name.wait),				\
 	.cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock),			\
+	.tasks_gp_mutex = __MUTEX_INITIALIZER(rt_name.tasks_gp_mutex),			\
 	.gp_func = gp,									\
 	.call_func = call,								\
 	.rtpcpu = &rt_name ## __percpu,							\
@@ -140,6 +145,7 @@ static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY;
 module_param(rcu_task_ipi_delay, int, 0644);
 
 /* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
+#define RCU_TASK_BOOT_STALL_TIMEOUT (HZ * 30)
 #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
 static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
 module_param(rcu_task_stall_timeout, int, 0644);
@@ -253,6 +259,8 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
 		INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq);
 		rtpcp->cpu = cpu;
 		rtpcp->rtpp = rtp;
+		if (!rtpcp->rtp_blkd_tasks.next)
+			INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks);
 		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
 	}
 	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
@@ -323,17 +331,6 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 		irq_work_queue(&rtpcp->rtp_irq_work);
 }
 
-// Wait for a grace period for the specified flavor of Tasks RCU.
-static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
-{
-	/* Complain if the scheduler has not started.  */
-	RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
-			 "synchronize_rcu_tasks called too soon");
-
-	/* Wait for the grace period. */
-	wait_rcu_gp(rtp->call_func);
-}
-
 // RCU callback function for rcu_barrier_tasks_generic().
 static void rcu_barrier_tasks_generic_cb(struct rcu_head *rhp)
 {
@@ -439,6 +436,11 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
 			WRITE_ONCE(rtp->percpu_dequeue_lim, 1);
 			pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name);
 		}
+		for (cpu = rtp->percpu_dequeue_lim; cpu < nr_cpu_ids; cpu++) {
+			struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
+
+			WARN_ON_ONCE(rcu_segcblist_n_cbs(&rtpcp->cblist));
+		}
 		raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
 	}
 
@@ -497,10 +499,41 @@ static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp)
 	rcu_tasks_invoke_cbs(rtp, rtpcp);
 }
 
-/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
-static int __noreturn rcu_tasks_kthread(void *arg)
+// Wait for one grace period.
+static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot)
 {
 	int needgpcb;
+
+	mutex_lock(&rtp->tasks_gp_mutex);
+
+	// If there were none, wait a bit and start over.
+	if (unlikely(midboot)) {
+		needgpcb = 0x2;
+	} else {
+		set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
+		rcuwait_wait_event(&rtp->cbs_wait,
+				   (needgpcb = rcu_tasks_need_gpcb(rtp)),
+				   TASK_IDLE);
+	}
+
+	if (needgpcb & 0x2) {
+		// Wait for one grace period.
+		set_tasks_gp_state(rtp, RTGS_WAIT_GP);
+		rtp->gp_start = jiffies;
+		rcu_seq_start(&rtp->tasks_gp_seq);
+		rtp->gp_func(rtp);
+		rcu_seq_end(&rtp->tasks_gp_seq);
+	}
+
+	// Invoke callbacks.
+	set_tasks_gp_state(rtp, RTGS_INVOKE_CBS);
+	rcu_tasks_invoke_cbs(rtp, per_cpu_ptr(rtp->rtpcpu, 0));
+	mutex_unlock(&rtp->tasks_gp_mutex);
+}
+
+// RCU-tasks kthread that detects grace periods and invokes callbacks.
+static int __noreturn rcu_tasks_kthread(void *arg)
+{
 	struct rcu_tasks *rtp = arg;
 
 	/* Run on housekeeping CPUs by default.  Sysadm can move if desired. */
@@ -514,29 +547,28 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 	 * This loop is terminated by the system going down.  ;-)
 	 */
 	for (;;) {
-		set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
+		// Wait for one grace period and invoke any callbacks
+		// that are ready.
+		rcu_tasks_one_gp(rtp, false);
 
-		/* If there were none, wait a bit and start over. */
-		rcuwait_wait_event(&rtp->cbs_wait,
-				   (needgpcb = rcu_tasks_need_gpcb(rtp)),
-				   TASK_IDLE);
-
-		if (needgpcb & 0x2) {
-			// Wait for one grace period.
-			set_tasks_gp_state(rtp, RTGS_WAIT_GP);
-			rtp->gp_start = jiffies;
-			rcu_seq_start(&rtp->tasks_gp_seq);
-			rtp->gp_func(rtp);
-			rcu_seq_end(&rtp->tasks_gp_seq);
-		}
+		// Paranoid sleep to keep this from entering a tight loop.
+		schedule_timeout_idle(rtp->gp_sleep);
+	}
+}
 
-		/* Invoke callbacks. */
-		set_tasks_gp_state(rtp, RTGS_INVOKE_CBS);
-		rcu_tasks_invoke_cbs(rtp, per_cpu_ptr(rtp->rtpcpu, 0));
+// Wait for a grace period for the specified flavor of Tasks RCU.
+static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
+{
+	/* Complain if the scheduler has not started.  */
+	RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
+			 "synchronize_rcu_tasks called too soon");
 
-		/* Paranoid sleep to keep this from entering a tight loop */
-		schedule_timeout_idle(rtp->gp_sleep);
+	// If the grace-period kthread is running, use it.
+	if (READ_ONCE(rtp->kthread_ptr)) {
+		wait_rcu_gp(rtp->call_func);
+		return;
 	}
+	rcu_tasks_one_gp(rtp, true);
 }
 
 /* Spawn RCU-tasks grace-period kthread. */
@@ -630,7 +662,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
 	struct task_struct *t;
 
 	set_tasks_gp_state(rtp, RTGS_PRE_WAIT_GP);
-	rtp->pregp_func();
+	rtp->pregp_func(&holdouts);
 
 	/*
 	 * There were callbacks, so we need to wait for an RCU-tasks
@@ -639,10 +671,12 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
 	 * and make a list of them in holdouts.
 	 */
 	set_tasks_gp_state(rtp, RTGS_SCAN_TASKLIST);
-	rcu_read_lock();
-	for_each_process_thread(g, t)
-		rtp->pertask_func(t, &holdouts);
-	rcu_read_unlock();
+	if (rtp->pertask_func) {
+		rcu_read_lock();
+		for_each_process_thread(g, t)
+			rtp->pertask_func(t, &holdouts);
+		rcu_read_unlock();
+	}
 
 	set_tasks_gp_state(rtp, RTGS_POST_SCAN_TASKLIST);
 	rtp->postscan_func(&holdouts);
@@ -760,7 +794,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)