summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/context_tracking.c140
-rw-r--r--kernel/entry/common.c2
-rw-r--r--kernel/rcu/rcu.h12
-rw-r--r--kernel/rcu/rcu_segcblist.c11
-rw-r--r--kernel/rcu/rcu_segcblist.h11
-rw-r--r--kernel/rcu/rcuscale.c214
-rw-r--r--kernel/rcu/rcutorture.c121
-rw-r--r--kernel/rcu/refscale.c67
-rw-r--r--kernel/rcu/srcutree.c11
-rw-r--r--kernel/rcu/tasks.h214
-rw-r--r--kernel/rcu/tree.c174
-rw-r--r--kernel/rcu/tree.h10
-rw-r--r--kernel/rcu/tree_exp.h121
-rw-r--r--kernel/rcu/tree_nocb.h279
-rw-r--r--kernel/rcu/tree_plugin.h11
-rw-r--r--kernel/rcu/tree_stall.h16
-rw-r--r--kernel/sched/core.c6
-rw-r--r--kernel/smp.c38
-rw-r--r--kernel/stop_machine.c2
-rw-r--r--kernel/trace/trace_osnoise.c4
20 files changed, 847 insertions, 617 deletions
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 24b1e1143260..938c48952d26 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -28,34 +28,34 @@
DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
- .dynticks_nesting = 1,
- .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
+ .nesting = 1,
+ .nmi_nesting = CT_NESTING_IRQ_NONIDLE,
#endif
- .state = ATOMIC_INIT(RCU_DYNTICKS_IDX),
+ .state = ATOMIC_INIT(CT_RCU_WATCHING),
};
EXPORT_SYMBOL_GPL(context_tracking);
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
#define TPS(x) tracepoint_string(x)
-/* Record the current task on dyntick-idle entry. */
-static __always_inline void rcu_dynticks_task_enter(void)
+/* Record the current task on exiting RCU-tasks (dyntick-idle entry). */
+static __always_inline void rcu_task_exit(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
}
-/* Record no current task on dyntick-idle exit. */
-static __always_inline void rcu_dynticks_task_exit(void)
+/* Record no current task on entering RCU-tasks (dyntick-idle exit). */
+static __always_inline void rcu_task_enter(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
}
-/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
-static __always_inline void rcu_dynticks_task_trace_enter(void)
+/* Turn on heavyweight RCU tasks trace readers on kernel exit. */
+static __always_inline void rcu_task_trace_heavyweight_enter(void)
{
#ifdef CONFIG_TASKS_TRACE_RCU
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
@@ -63,8 +63,8 @@ static __always_inline void rcu_dynticks_task_trace_enter(void)
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
}
-/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
-static __always_inline void rcu_dynticks_task_trace_exit(void)
+/* Turn off heavyweight RCU tasks trace readers on kernel entry. */
+static __always_inline void rcu_task_trace_heavyweight_exit(void)
{
#ifdef CONFIG_TASKS_TRACE_RCU
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
@@ -87,10 +87,10 @@ static noinstr void ct_kernel_exit_state(int offset)
* critical sections, and we also must force ordering with the
* next idle sojourn.
*/
- rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
+ rcu_task_trace_heavyweight_enter(); // Before CT state update!
seq = ct_state_inc(offset);
// RCU is no longer watching. Better be in extended quiescent state!
- WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICKS_IDX));
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & CT_RCU_WATCHING));
}
/*
@@ -109,15 +109,15 @@ static noinstr void ct_kernel_enter_state(int offset)
*/
seq = ct_state_inc(offset);
// RCU is now watching. Better not be in an extended quiescent state!
- rcu_dynticks_task_trace_exit(); // After ->dynticks update!
- WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICKS_IDX));
+ rcu_task_trace_heavyweight_exit(); // After CT state update!
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & CT_RCU_WATCHING));
}
/*
* Enter an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
*
- * We crowbar the ->dynticks_nmi_nesting field to zero to allow for
+ * We crowbar the ->nmi_nesting field to zero to allow for
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*/
@@ -125,19 +125,19 @@ static void noinstr ct_kernel_exit(bool user, int offset)
{
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
- WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE);
- WRITE_ONCE(ct->dynticks_nmi_nesting, 0);
+ WARN_ON_ONCE(ct_nmi_nesting() != CT_NESTING_IRQ_NONIDLE);
+ WRITE_ONCE(ct->nmi_nesting, 0);
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
- ct_dynticks_nesting() == 0);
- if (ct_dynticks_nesting() != 1) {
+ ct_nesting() == 0);
+ if (ct_nesting() != 1) {
// RCU will still be watching, so just do accounting and leave.
- ct->dynticks_nesting--;
+ ct->nesting--;
return;
}
instrumentation_begin();
lockdep_assert_irqs_disabled();
- trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks());
+ trace_rcu_watching(TPS("End"), ct_nesting(), 0, ct_rcu_watching());
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
rcu_preempt_deferred_qs(current);
@@ -145,18 +145,18 @@ static void noinstr ct_kernel_exit(bool user, int offset)
instrument_atomic_write(&ct->state, sizeof(ct->state));
instrumentation_end();
- WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
+ WRITE_ONCE(ct->nesting, 0); /* Avoid irq-access tearing. */
// RCU is watching here ...
ct_kernel_exit_state(offset);
// ... but is no longer watching here.
- rcu_dynticks_task_enter();
+ rcu_task_exit();
}
/*
* Exit an RCU extended quiescent state, which can be either the
* idle loop or adaptive-tickless usermode execution.
*
- * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to
+ * We crowbar the ->nmi_nesting field to CT_NESTING_IRQ_NONIDLE to
* allow for the possibility of usermode upcalls messing up our count of
* interrupt nesting level during the busy period that is just now starting.
*/
@@ -166,14 +166,14 @@ static void noinstr ct_kernel_enter(bool user, int offset)
long oldval;
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
- oldval = ct_dynticks_nesting();
+ oldval = ct_nesting();
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
if (oldval) {
// RCU was already watching, so just do accounting and leave.
- ct->dynticks_nesting++;
+ ct->nesting++;
return;
}
- rcu_dynticks_task_exit();
+ rcu_task_enter();
// RCU is not watching here ...
ct_kernel_enter_state(offset);
// ... but is watching here.
@@ -182,11 +182,11 @@ static void noinstr ct_kernel_enter(bool user, int offset)
// instrumentation for the noinstr ct_kernel_enter_state()
instrument_atomic_write(&ct->state, sizeof(ct->state));
- trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
+ trace_rcu_watching(TPS("Start"), ct_nesting(), 1, ct_rcu_watching());
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
- WRITE_ONCE(ct->dynticks_nesting, 1);
- WARN_ON_ONCE(ct_dynticks_nmi_nesting());
- WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
+ WRITE_ONCE(ct->nesting, 1);
+ WARN_ON_ONCE(ct_nmi_nesting());
+ WRITE_ONCE(ct->nmi_nesting, CT_NESTING_IRQ_NONIDLE);
instrumentation_end();
}
@@ -194,7 +194,7 @@ static void noinstr ct_kernel_enter(bool user, int offset)
* ct_nmi_exit - inform RCU of exit from NMI context
*
* If we are returning from the outermost NMI handler that interrupted an
- * RCU-idle period, update ct->state and ct->dynticks_nmi_nesting
+ * RCU-idle period, update ct->state and ct->nmi_nesting
* to let the RCU grace-period handling know that the CPU is back to
* being RCU-idle.
*
@@ -207,47 +207,47 @@ void noinstr ct_nmi_exit(void)
instrumentation_begin();
/*
- * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
+ * Check for ->nmi_nesting underflow and bad CT state.
* (We are exiting an NMI handler, so RCU better be paying attention
* to us!)
*/
- WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0);
- WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
+ WARN_ON_ONCE(ct_nmi_nesting() <= 0);
+ WARN_ON_ONCE(!rcu_is_watching_curr_cpu());
/*
* If the nesting level is not 1, the CPU wasn't RCU-idle, so
* leave it in non-RCU-idle state.
*/
- if (ct_dynticks_nmi_nesting() != 1) {
- trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2,
- ct_dynticks());
- WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */
- ct_dynticks_nmi_nesting() - 2);
+ if (ct_nmi_nesting() != 1) {
+ trace_rcu_watching(TPS("--="), ct_nmi_nesting(), ct_nmi_nesting() - 2,
+ ct_rcu_watching());
+ WRITE_ONCE(ct->nmi_nesting, /* No store tearing. */
+ ct_nmi_nesting() - 2);
instrumentation_end();
return;
}
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
- trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
- WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
+ trace_rcu_watching(TPS("Endirq"), ct_nmi_nesting(), 0, ct_rcu_watching());
+ WRITE_ONCE(ct->nmi_nesting, 0); /* Avoid store tearing. */
// instrumentation for the noinstr ct_kernel_exit_state()
instrument_atomic_write(&ct->state, sizeof(ct->state));
instrumentation_end();
// RCU is watching here ...
- ct_kernel_exit_state(RCU_DYNTICKS_IDX);
+ ct_kernel_exit_state(CT_RCU_WATCHING);
// ... but is no longer watching here.
if (!in_nmi())
- rcu_dynticks_task_enter();
+ rcu_task_exit();
}
/**
* ct_nmi_enter - inform RCU of entry to NMI context
*
* If the CPU was idle from RCU's viewpoint, update ct->state and
- * ct->dynticks_nmi_nesting to let the RCU grace-period handling know
+ * ct->nmi_nesting to let the RCU grace-period handling know
* that the CPU is active. This implementation permits nested NMIs, as
* long as the nesting level does not overflow an int. (You will probably
* run out of stack space first.)
@@ -261,27 +261,27 @@ void noinstr ct_nmi_enter(void)
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
/* Complain about underflow. */
- WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0);
+ WARN_ON_ONCE(ct_nmi_nesting() < 0);
/*
- * If idle from RCU viewpoint, atomically increment ->dynticks
- * to mark non-idle and increment ->dynticks_nmi_nesting by one.
- * Otherwise, increment ->dynticks_nmi_nesting by two. This means
- * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
+ * If idle from RCU viewpoint, atomically increment CT state
+ * to mark non-idle and increment ->nmi_nesting by one.
+ * Otherwise, increment ->nmi_nesting by two. This means
+ * if ->nmi_nesting is equal to one, we are guaranteed
* to be in the outermost NMI handler that interrupted an RCU-idle
* period (observation due to Andy Lutomirski).
*/
- if (rcu_dynticks_curr_cpu_in_eqs()) {
+ if (!rcu_is_watching_curr_cpu()) {
if (!in_nmi())
- rcu_dynticks_task_exit();
+ rcu_task_enter();
// RCU is not watching here ...
- ct_kernel_enter_state(RCU_DYNTICKS_IDX);
+ ct_kernel_enter_state(CT_RCU_WATCHING);
// ... but is watching here.
instrumentation_begin();
- // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
+ // instrumentation for the noinstr rcu_is_watching_curr_cpu()
instrument_atomic_read(&ct->state, sizeof(ct->state));
// instrumentation for the noinstr ct_kernel_enter_state()
instrument_atomic_write(&ct->state, sizeof(ct->state));
@@ -294,12 +294,12 @@ void noinstr ct_nmi_enter(void)
instrumentation_begin();
}
- trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
- ct_dynticks_nmi_nesting(),
- ct_dynticks_nmi_nesting() + incby, ct_dynticks());
+ trace_rcu_watching(incby == 1 ? TPS("Startirq") : TPS("++="),
+ ct_nmi_nesting(),
+ ct_nmi_nesting() + incby, ct_rcu_watching());
instrumentation_end();
- WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */
- ct_dynticks_nmi_nesting() + incby);
+ WRITE_ONCE(ct->nmi_nesting, /* Prevent store tearing. */
+ ct_nmi_nesting() + incby);
barrier();
}
@@ -317,7 +317,7 @@ void noinstr ct_nmi_enter(void)
void noinstr ct_idle_enter(void)
{
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
- ct_kernel_exit(false, RCU_DYNTICKS_IDX + CONTEXT_IDLE);
+ ct_kernel_exit(false, CT_RCU_WATCHING + CT_STATE_IDLE);
}
EXPORT_SYMBOL_GPL(ct_idle_enter);
@@ -335,7 +335,7 @@ void noinstr ct_idle_exit(void)
unsigned long flags;
raw_local_irq_save(flags);
- ct_kernel_enter(false, RCU_DYNTICKS_IDX - CONTEXT_IDLE);
+ ct_kernel_enter(false, CT_RCU_WATCHING - CT_STATE_IDLE);
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(ct_idle_exit);
@@ -485,7 +485,7 @@ void noinstr __ct_user_enter(enum ctx_state state)
* user_exit() or ct_irq_enter(). Let's remove RCU's dependency
* on the tick.
*/
- if (state == CONTEXT_USER) {
+ if (state == CT_STATE_USER) {
instrumentation_begin();
trace_user_enter(0);
vtime_user_enter(current);
@@ -504,7 +504,7 @@ void noinstr __ct_user_enter(enum ctx_state state)
* CPU doesn't need to maintain the tick for RCU maintenance purposes
* when the CPU runs in userspace.
*/
- ct_kernel_exit(true, RCU_DYNTICKS_IDX + state);
+ ct_kernel_exit(true, CT_RCU_WATCHING + state);
/*
* Special case if we only track user <-> kernel transitions for tickless
@@ -534,7 +534,7 @@ void noinstr __ct_user_enter(enum ctx_state state)
/*
* Tracking for vtime and RCU EQS. Make sure we don't race
* with NMIs. OTOH we don't care about ordering here since
- * RCU only requires RCU_DYNTICKS_IDX increments to be fully
+ * RCU only requires CT_RCU_WATCHING increments to be fully
* ordered.
*/
raw_atomic_add(state, &ct->state);
@@ -620,8 +620,8 @@ void noinstr __ct_user_exit(enum ctx_state state)
* Exit RCU idle mode while entering the kernel because it can
* run a RCU read side critical section anytime.
*/
- ct_kernel_enter(true, RCU_DYNTICKS_IDX - state);
- if (state == CONTEXT_USER) {
+ ct_kernel_enter(true, CT_RCU_WATCHING - state);
+ if (state == CT_STATE_USER) {
instrumentation_begin();
vtime_user_exit(current);
trace_user_exit(0);
@@ -634,17 +634,17 @@ void noinstr __ct_user_exit(enum ctx_state state)
* In this we case we don't care about any concurrency/ordering.
*/
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
- raw_atomic_set(&ct->state, CONTEXT_KERNEL);
+ raw_atomic_set(&ct->state, CT_STATE_KERNEL);
} else {
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
/* Tracking for vtime only, no concurrent RCU EQS accounting */
- raw_atomic_set(&ct->state, CONTEXT_KERNEL);
+ raw_atomic_set(&ct->state, CT_STATE_KERNEL);
} else {
/*
* Tracking for vtime and RCU EQS. Make sure we don't race
* with NMIs. OTOH we don't care about ordering here since
- * RCU only requires RCU_DYNTICKS_IDX increments to be fully
+ * RCU only requires CT_RCU_WATCHING increments to be fully
* ordered.
*/
raw_atomic_sub(state, &ct->state);
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 90843cc38588..5b6934e23c21 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -182,7 +182,7 @@ static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
unsigned long nr = syscall_get_nr(current, regs);
- CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
+ CT_WARN_ON(ct_state() != CT_STATE_KERNEL);
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr))
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 38238e595a61..feb3ac1dc5d5 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -54,9 +54,6 @@
* grace-period sequence number.
*/
-#define RCU_SEQ_CTR_SHIFT 2
-#define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1)
-
/* Low-order bit definition for polled grace-period APIs. */
#define RCU_GET_STATE_COMPLETED 0x1
@@ -255,6 +252,11 @@ static inline void debug_rcu_head_callback(struct rcu_head *rhp)
kmem_dump_obj(rhp);
}
+static inline bool rcu_barrier_cb_is_done(struct rcu_head *rhp)
+{
+ return rhp->next == rhp;
+}
+
extern int rcu_cpu_stall_suppress_at_boot;
static inline bool rcu_stall_is_suppressed_at_boot(void)
@@ -606,7 +608,7 @@ void srcutorture_get_gp_data(struct srcu_struct *sp, int *flags,
#endif
#ifdef CONFIG_TINY_RCU
-static inline bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) { return false; }
+static inline bool rcu_watching_zero_in_eqs(int cpu, int *vp) { return false; }
static inline unsigned long rcu_get_gp_seq(void) { return 0; }
static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
static inline unsigned long
@@ -619,7 +621,7 @@ static inline void rcu_fwd_progress_check(unsigned long j) { }
static inline void rcu_gp_slow_register(atomic_t *rgssp) { }
static inline void rcu_gp_slow_unregister(atomic_t *rgssp) { }
#else /* #ifdef CONFIG_TINY_RCU */
-bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
+bool rcu_watching_zero_in_eqs(int cpu, int *vp);
unsigned long rcu_get_gp_seq(void);
unsigned long rcu_exp_batches_completed(void);
unsigned long srcu_batches_completed(struct srcu_struct *sp);
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 1693ea22ef1b..298a2c573f02 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -261,17 +261,6 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
}
/*
- * Mark the specified rcu_segcblist structure as offloaded (or not)
- */
-void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload)
-{
- if (offload)
- rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED);
- else
- rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED);
-}
-
-/*
* Does the specified rcu_segcblist structure contain callbacks that
* are ready to be invoked?
*/
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 4fe877f5f654..259904075636 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -89,16 +89,7 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
{
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- rcu_segcblist_test_flags(rsclp, SEGCBLIST_LOCKING))
- return true;
-
- return false;
-}
-
-static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rsclp)
-{
- if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- !rcu_segcblist_test_flags(rsclp, SEGCBLIST_RCU_CORE))
+ rcu_segcblist_test_flags(rsclp, SEGCBLIST_OFFLOADED))
return true;
return false;
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index b53a9e8f5904..6d37596deb1f 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -39,6 +39,7 @@
#include <linux/torture.h>
#include <linux/vmalloc.h>
#include <linux/rcupdate_trace.h>
+#include <linux/sched/debug.h>
#include "rcu.h"
@@ -104,6 +105,20 @@ static char *scale_type = "rcu";
module_param(scale_type, charp, 0444);
MODULE_PARM_DESC(scale_type, "Type of RCU to scalability-test (rcu, srcu, ...)");
+// Structure definitions for custom fixed-per-task allocator.
+struct writer_mblock {
+ struct rcu_head wmb_rh;
+ struct llist_node wmb_node;
+ struct writer_freelist *wmb_wfl;
+};
+
+struct writer_freelist {
+ struct llist_head ws_lhg;
+ atomic_t ws_inflight;
+ struct llist_head ____cacheline_internodealigned_in_smp ws_lhp;
+ struct writer_mblock *ws_mblocks;
+};
+
static int nrealreaders;
static int nrealwriters;
static struct task_struct **writer_tasks;
@@ -111,6 +126,8 @@ static struct task_struct **reader_tasks;
static struct task_struct *shutdown_task;
static u64 **writer_durations;
+static bool *writer_done;
+static struct writer_freelist *writer_freelists;
static int *writer_n_durations;
static atomic_t n_rcu_scale_reader_started;
static atomic_t n_rcu_scale_writer_started;
@@ -120,7 +137,6 @@ static u64 t_rcu_scale_writer_started;
static u64 t_rcu_scale_writer_finished;
static unsigned long b_rcu_gp_test_started;
static unsigned long b_rcu_gp_test_finished;
-static DEFINE_PER_CPU(atomic_t, n_async_inflight);
#define MAX_MEAS 10000
#define MIN_MEAS 100
@@ -143,6 +159,7 @@ struct rcu_scale_ops {
void (*sync)(void);
void (*exp_sync)(void);
struct task_struct *(*rso_gp_kthread)(void);
+ void (*stats)(void);
const char *name;
};
@@ -224,6 +241,11 @@ static void srcu_scale_synchronize(void)
synchronize_srcu(srcu_ctlp);
}
+static void srcu_scale_stats(void)
+{
+ srcu_torture_stats_print(srcu_ctlp, scale_type, SCALE_FLAG);
+}
+
static void srcu_scale_synchronize_expedited(void)
{
synchronize_srcu_expedited(srcu_ctlp);
@@ -241,6 +263,7 @@ static struct rcu_scale_ops srcu_ops = {
.gp_barrier = srcu_rcu_barrier,
.sync = srcu_scale_synchronize,
.exp_sync = srcu_scale_synchronize_expedited,
+ .stats = srcu_scale_stats,
.name = "srcu"
};
@@ -270,6 +293,7 @@ static struct rcu_scale_ops srcud_ops = {
.gp_barrier = srcu_rcu_barrier,
.sync = srcu_scale_synchronize,
.exp_sync = srcu_scale_synchronize_expedited,
+ .stats = srcu_scale_stats,
.name = "srcud"
};
@@ -288,6 +312,11 @@ static void tasks_scale_read_unlock(int idx)
{
}
+static void rcu_tasks_scale_stats(void)
+{
+ rcu_tasks_torture_stats_print(scale_type, SCALE_FLAG);
+}
+
static struct rcu_scale_ops tasks_ops = {
.ptype = RCU_TASKS_FLAVOR,
.init = rcu_sync_scale_init,
@@ -300,6 +329,7 @@ static struct rcu_scale_ops tasks_ops = {
.sync = synchronize_rcu_tasks,
.exp_sync = synchronize_rcu_tasks,
.rso_gp_kthread = get_rcu_tasks_gp_kthread,
+ .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_scale_stats,
.name = "tasks"
};
@@ -326,6 +356,11 @@ static void tasks_rude_scale_read_unlock(int idx)
{
}
+static void rcu_tasks_rude_scale_stats(void)
+{
+ rcu_tasks_rude_torture_stats_print(scale_type, SCALE_FLAG);
+}
+
static struct rcu_scale_ops tasks_rude_ops = {
.ptype = RCU_TASKS_RUDE_FLAVOR,
.init = rcu_sync_scale_init,
@@ -333,11 +368,10 @@ static struct rcu_scale_ops tasks_rude_ops = {
.readunlock = tasks_rude_scale_read_unlock,
.get_gp_seq = rcu_no_completed,
.gp_diff = rcu_seq_diff,
- .async = call_rcu_tasks_rude,
- .gp_barrier = rcu_barrier_tasks_rude,
.sync = synchronize_rcu_tasks_rude,
.exp_sync = synchronize_rcu_tasks_rude,
.rso_gp_kthread = get_rcu_tasks_rude_gp_kthread,
+ .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_rude_scale_stats,
.name = "tasks-rude"
};
@@ -366,6 +400,11 @@ static void tasks_trace_scale_read_unlock(int idx)
rcu_read_unlock_trace();
}
+static void rcu_tasks_trace_scale_stats(void)
+{
+ rcu_tasks_trace_torture_stats_print(scale_type, SCALE_FLAG);
+}
+
static struct rcu_scale_ops tasks_tracing_ops = {
.ptype = RCU_TASKS_FLAVOR,
.init = rcu_sync_scale_init,
@@ -378,6 +417,7 @@ static struct rcu_scale_ops tasks_tracing_ops = {
.sync = synchronize_rcu_tasks_trace,
.exp_sync = synchronize_rcu_tasks_trace,
.rso_gp_kthread = get_rcu_tasks_trace_gp_kthread,
+ .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_trace_scale_stats,
.name = "tasks-tracing"
};
@@ -438,12 +478,52 @@ rcu_scale_reader(void *arg)
}
/*
+ * Allocate a writer_mblock structure for the specified rcu_scale_writer
+ * task.
+ */
+static struct writer_mblock *rcu_scale_alloc(long me)
+{
+ struct llist_node *llnp;
+ struct writer_freelist *wflp;
+ struct writer_mblock *wmbp;
+
+ if (WARN_ON_ONCE(!writer_freelists))
+ return NULL;
+ wflp = &writer_freelists[me];
+ if (llist_empty(&wflp->ws_lhp)) {
+ // ->ws_lhp is private to its rcu_scale_writer task.
+ wmbp = container_of(llist_del_all(&wflp->ws_lhg), struct writer_mblock, wmb_node);
+ wflp->ws_lhp.first = &wmbp->wmb_node;
+ }
+ llnp = llist_del_first(&wflp->ws_lhp);
+ if (!llnp)
+ return NULL;
+ return container_of(llnp, struct writer_mblock, wmb_node);
+}
+
+/*
+ * Free a writer_mblock structure to its rcu_scale_writer task.
+ */
+static void rcu_scale_free(struct writer_mblock *wmbp)
+{
+ struct writer_freelist *wflp;
+
+ if (!wmbp)
+ return;
+ wflp = wmbp->wmb_wfl;
+ llist_add(&wmbp->wmb_node, &wflp->ws_lhg);
+}
+
+/*
* Callback function for asynchronous grace periods from rcu_scale_writer().
*/
static void rcu_scale_async_cb(struct rcu_head *rhp)
{
- atomic_dec(this_cpu_ptr(&n_async_inflight));
- kfree(rhp);
+ struct writer_mblock *wmbp = container_of(rhp, struct writer_mblock, wmb_rh);
+ struct writer_freelist *wflp = wmbp->wmb_wfl;
+
+ atomic_dec(&wflp->ws_inflight);
+ rcu_scale_free(wmbp);
}
/*
@@ -456,12 +536,14 @@ rcu_scale_writer(void *arg)
int i_max;
unsigned long jdone;
long me = (long)arg;
- struct rcu_head *rhp = NULL;
+ bool selfreport = false;
bool started = false, done = false, alldone = false;
u64 t;
DEFINE_TORTURE_RANDOM(tr);
u64 *wdp;
u64 *wdpp = writer_durations[me];
+ struct writer_freelist *wflp = &writer_freelists[me];
+ struct writer_mblock *wmbp = NULL;
VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started");
WARN_ON(!wdpp);
@@ -493,30 +575,34 @@ rcu_scale_writer(void *arg)
jdone = jiffies + minruntime * HZ;
do {
+ bool gp_succeeded = false;
+
if (writer_holdoff)
udelay(writer_holdoff);
if (writer_holdoff_jiffies)
schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1);
wdp = &wdpp[i];
*wdp = ktime_get_mono_fast_ns();
- if (gp_async) {
-retry:
- if (!rhp)
- rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
- if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
- atomic_inc(this_cpu_ptr(&n_async_inflight));
- cur_ops->async(rhp, rcu_scale_async_cb);
- rhp = NULL;
+ if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) {
+ if (!wmbp)
+ wmbp = rcu_scale_alloc(me);
+ if (wmbp && atomic_read(&wflp->ws_inflight) < gp_async_max) {
+ atomic_inc(&wflp->ws_inflight);
+ cur_ops->async(&wmbp->wmb_rh, rcu_scale_async_cb);
+ wmbp = NULL;
+ gp_succeeded = true;
} else if (!kthread_should_stop()) {
cur_ops->gp_barrier();
- goto retry;
} else {
- kfree(rhp); /* Because we are stopping. */
+ rcu_scale_free(wmbp); /* Because we are stopping. */
+ wmbp = NULL;
}
} else if (gp_exp) {
cur_ops->exp_sync();
+ gp_succeeded = true;
} else {
cur_ops->sync();
+ gp_succeeded = true;
}
t = ktime_get_mono_fast_ns();
*wdp = t - *wdp;
@@ -526,6 +612,7 @@ retry:
started = true;
if (!done && i >= MIN_MEAS && time_after(jiffies, jdone)) {
done = true;
+ WRITE_ONCE(writer_done[me], true);
sched_set_normal(current, 0);
pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n",
scale_type, SCALE_FLAG, me, MIN_MEAS);
@@ -551,11 +638,32 @@ retry:
if (done && !alldone &&
atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters)
alldone = true;
- if (started && !alldone && i < MAX_MEAS - 1)
+ if (done && !alldone && time_after(jiffies, jdone + HZ * 60)) {
+ static atomic_t dumped;
+ int i;
+
+ if (!atomic_xchg(&dumped, 1)) {
+ for (i = 0; i < nrealwriters; i++) {
+ if (writer_done[i])
+ continue;
+ pr_info("%s: Task %ld flags writer %d:\n", __func__, me, i);
+ sched_show_task(writer_tasks[i]);
+ }
+ if (cur_ops->stats)
+ cur_ops->stats();
+ }
+ }
+ if (!selfreport && time_after(jiffies, jdone + HZ * (70 + me))) {
+ pr_info("%s: Writer %ld self-report: started %d done %d/%d->%d i %d jdone %lu.\n",
+ __func__, me, started, done, writer_done[me], atomic_read(&n_rcu_scale_writer_finished), i, jiffies - jdone);
+ selfreport = true;
+ }
+ if (gp_succeeded && started && !alldone && i < MAX_MEAS - 1)
i++;
rcu_scale_wait_shutdown();
} while (!torture_must_stop());
- if (gp_async) {
+ if (gp_async && cur_ops->async) {
+ rcu_scale_free(wmbp);
cur_ops->gp_barrier();
}
writer_n_durations[me] = i_max + 1;
@@ -713,6 +821,7 @@ kfree_scale_cleanup(void)
torture_stop_kthread(kfree_scale_thread,
kfree_reader_tasks[i]);
kfree(kfree_reader_tasks);
+ kfree_reader_tasks = NULL;
}
torture_cleanup_end();
@@ -881,6 +990,7 @@ rcu_scale_cleanup(void)
torture_stop_kthread(rcu_scale_reader,
reader_tasks[i]);
kfree(reader_tasks);
+ reader_tasks = NULL;
}
if (writer_tasks) {
@@ -919,10 +1029,33 @@ rcu_scale_cleanup(void)
schedule_timeout_uninterruptible(1);
}
kfree(writer_durations[i]);
+ if (writer_freelists) {
+ int ctr = 0;
+ struct llist_node *llnp;
+ struct writer_freelist *wflp = &writer_freelists[i];
+
+ if (wflp->ws_mblocks) {
+ llist_for_each(llnp, wflp->ws_lhg.first)
+ ctr++;
+ llist_for_each(llnp, wflp->ws_lhp.first)
+ ctr++;
+ WARN_ONCE(ctr != gp_async_max,
+ "%s: ctr = %d gp_async_max = %d\n",
+ __func__, ctr, gp_async_max);
+ kfree(wflp->ws_mblocks);
+ }
+ }
}
kfree(writer_tasks);
+ writer_tasks = NULL;
kfree(writer_durations);
+ writer_durations = NULL;
kfree(writer_n_durations);
+ writer_n_durations = NULL;
+ kfree(writer_done);
+ writer_done = NULL;
+ kfree(writer_freelists);
+ writer_freelists = NULL;
}
/* Do torture-type-specific cleanup operations. */
@@ -949,8 +1082,9 @@ rcu_scale_shutdown(void *arg)
static int __init
rcu_scale_init(void)
{
- long i;
int firsterr = 0;
+ long i;
+ long j;
static struct rcu_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
};
@@ -1017,14 +1151,22 @@ rcu_scale_init(void)
}
while (atomic_read(&n_rcu_scale_reader_started) < nrealreaders)
schedule_timeout_uninterruptible(1);
- writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]),
- GFP_KERNEL);
- writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations),
- GFP_KERNEL);
- writer_n_durations =
- kcalloc(nrealwriters, sizeof(*writer_n_durations),
- GFP_KERNEL);
- if (!writer_tasks || !writer_durations || !writer_n_durations) {
+ writer_tasks = kcalloc(nrealwriters, sizeof(writer_tasks[0]), GFP_KERNEL);
+ writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations), GFP_KERNEL);
+ writer_n_durations = kcalloc(nrealwriters, sizeof(*writer_n_durations), GFP_KERNEL);
+ writer_done = kcalloc(nrealwriters, sizeof(writer_done[0]), GFP_KERNEL);
+ if (gp_async) {
+ if (gp_async_max <= 0) {
+ pr_warn("%s: gp_async_max = %d must be greater than zero.\n",
+ __func__, gp_async_max);
+ WARN_ON_ONCE(IS_BUILTIN(CONFIG_RCU_TORTURE_TEST));
+ firsterr = -EINVAL;
+ goto unwind;
+ }
+ writer_freelists = kcalloc(nrealwriters, sizeof(writer_freelists[0]