/* SPDX-License-Identifier: GPL-2.0+ */
/*
* RCU expedited grace periods
*
* Copyright IBM Corporation, 2016
*
* Authors: Paul E. McKenney <paulmck@linux.ibm.com>
*/
#include <linux/lockdep.h>
static void rcu_exp_handler(void *unused);
static int rcu_print_task_exp_stall(struct rcu_node *rnp);
/*
* Record the start of an expedited grace period.
*/
static void rcu_exp_gp_seq_start(void)
{
rcu_seq_start(&rcu_state.expedited_sequence);
rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap);
}
/*
* Return the value that the expedited-grace-period counter will have
* at the end of the current grace period.
*/
static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void)
{
return rcu_seq_endval(&rcu_state.expedited_sequence);
}
/*
* Record the end of an expedited grace period.
*/
static void rcu_exp_gp_seq_end(void)
{
rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap);
rcu_seq_end(&rcu_state.expedited_sequence);
smp_mb(); /* Ensure that consecutive grace periods serialize. */
}
/*
* Take a snapshot of the expedited-grace-period counter, which is the
* earliest value that will indicate that a full grace period has
* elapsed since the current time.
*/
static unsigned long rcu_exp_gp_seq_snap(void)
{
unsigned long s;
smp_mb(); /* Caller's modifications seen first by other CPUs. */
s = rcu_seq_snap(&rcu_state.expedited_sequence);
trace_rcu_exp_grace_period(rcu_state.name, s, TPS("snap"));
return s;
}
/*
* Given a counter snapshot from rcu_exp_gp_seq_snap(), return true
* if a full expedited grace period has elapsed since that snapshot
* was taken.
*/
static bool rcu_exp_gp_seq_done(unsigned long s)
{
return rcu_seq_done(&rcu_state.expedited_sequence, s);
}
/*
* Reset the ->expmaskinit values in the rcu_node tree to reflect any
* recent CPU-online activity. Note that these masks are not cleared
* when CPUs go offline, so they reflect the union of all CPUs that have
* ever been online. This means that this function normally takes its
* no-work-to-do fastpath.
*/
static void sync_exp_reset_tree_hotplug(void)
{
bool done;
unsigned long flags;
unsigned long mask;
unsigned long oldmask;
int ncpus = smp_load_acquire(&rcu_state.ncpus); /* Order vs. locking. */
struct rcu_node *rnp;
struct rcu_node *rnp_up;
/* If no new CPUs onlined since last time, nothing to do. */
if (likely(ncpus == rcu_state.ncpus_snap))
return;
rcu_state.ncpus_snap = ncpus;
/*
* Each pass through the following loop propagates newly onlined
* CPUs for the current rcu_node structure up the rcu_node tree.
*/
rcu_for_each_leaf_node(rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmaskinit == rnp->expmaskinitnext) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
continue; /* No new CPUs, nothing to do. */
}
/* Update this node's mask, track old value for propagation. */
oldmask = rnp->expmaskinit;
rnp->expmaskinit = rnp->expmaskinitnext;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* If was already nonzero, nothing to propagate. */
if (oldmask)
continue;
/* Propagate the new CPU up the tree. */
mask = rnp->grpmask;
rnp_up = rnp->parent;
done = false;
while (rnp_up) {
raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
if (rnp_up->expmaskinit)
done = true;
rnp_up->expmaskinit |= mask;
raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
if (done)
break;
mask = rnp_up->grpmask;
rnp_up = rnp_up->parent;
}
}
}
/*
* Reset the ->expmask values in the rcu_node tree in preparation for
* a new expedited grace period.
*/
static void __maybe_unused sync_exp_reset_tree(void)
{
unsigned long flags;
struct rcu_node *rnp;
sync_exp_reset_tree_hotplug();
rcu_for_each_node_breadth_first(rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
WARN_ON_ONCE(rnp->expmask);
WRITE_ONCE(rnp->expmask, rnp->expmaskinit);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
}
/*
* Return non-zero if there is no RCU expedited grace period in progress
* for the specified rcu_node structure, in other words, if all CPUs and
* tasks covered by the specified rcu_node structure have done their bit
* for the current expedited grace period.
*/
static bool sync_rcu_exp_done(struct rcu_node *rnp)
{
raw_lockdep_assert_held_rcu_node(rnp);
return READ_ONCE(rnp->exp_tasks) == NULL &&
READ_ONCE(rnp->expmask) == 0;
}
/*
* Like sync_rcu_exp_done(), but where the caller does not hold the
* rcu_node's ->lock.
*/
static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp)
{
unsigned long flags;
bool ret;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
ret = sync_rcu_exp_done(rnp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return ret;
}
/*
* Report the exit from RCU read-side critical section for the last task
* that queued itself during or before the current expedited preemptible-RCU
* grace period. This event is reported either to the rcu_node structure on
* which the task was queued or to one of that rcu_node structure's ancestors,
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
*/
static void __rcu_report_exp_rnp(struct rcu_node *rnp,
bool wake, unsigned long flags)
__releases(rnp->lock)
{
unsigned long mask;
raw_lockdep_assert_held_rcu_node(rnp);
for (;;) {
if (!sync_rcu_exp_done(rnp)) {
if (!rnp->expma