From 2e672ab2d491713541963afca3a5967ccc2376e9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Oct 2017 16:54:49 -0700 Subject: rcu: Avoid ->dynticks_nmi_nesting store tearing NMIs can nest, and store tearing could in theory happen on carries from one byte to the next. This commit therefore adds the WRITE_ONCE() macros preventing this. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f9c0ca2ccf0c..c5d960f86cf8 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1103,7 +1103,8 @@ void rcu_nmi_enter(void) rcu_dynticks_eqs_exit(); incby = 1; } - rdtp->dynticks_nmi_nesting += incby; + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ + rdtp->dynticks_nmi_nesting + incby); barrier(); } @@ -1135,12 +1136,13 @@ void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdtp->dynticks_nmi_nesting != 1) { - rdtp->dynticks_nmi_nesting -= 2; + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ + rdtp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - rdtp->dynticks_nmi_nesting = 0; + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ rcu_dynticks_eqs_enter(); } -- cgit v1.2.3 From a0eb22bf64a755bb162b421120b9fbe7d012b85f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Oct 2017 19:45:10 -0700 Subject: rcu: Reduce dyntick-idle state space Both extended-quiescent-state entry and exit first update the nesting counter and then adjust the dyntick-idle state. This means that there are four states: (1) Both nesting and dyntick idle indicate idle, (2) Nesting indicates idle but dyntick idle does not, (3) Nesting indicates non-idle and dyntick idle does not, and (4) Both nesting and dyntick idle indicate non-idle. This commit simplifies the state space by eliminating #3, reversing the order of updates on exit from extended quiescent state. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c5d960f86cf8..49f661bb8ffe 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -928,21 +928,21 @@ void rcu_irq_exit_irqson(void) * we really have exited idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_eqs_exit_common(long long oldval, int user) +static void rcu_eqs_exit_common(long long newval, int user) { RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) rcu_dynticks_task_exit(); rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on exit: not idle task"), - oldval, rdtp->dynticks_nesting); + rdtp->dynticks_nesting, newval); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -967,8 +967,8 @@ static void rcu_eqs_exit(bool user) rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; } else { __this_cpu_inc(disable_rcu_irq_enter); + rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user); rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - rcu_eqs_exit_common(oldval, user); __this_cpu_dec(disable_rcu_irq_enter); } } @@ -1037,7 +1037,7 @@ void rcu_user_exit(void) void rcu_irq_enter(void) { struct rcu_dynticks *rdtp; - long long oldval; + long long newval; lockdep_assert_irqs_disabled(); rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1046,14 +1046,13 @@ void rcu_irq_enter(void) if (rdtp->dynticks_nmi_nesting) return; - oldval = rdtp->dynticks_nesting; - rdtp->dynticks_nesting++; - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting == 0); - if (oldval) - trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); + newval = rdtp->dynticks_nesting + 1; + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && newval == 0); + if (rdtp->dynticks_nesting) + trace_rcu_dyntick(TPS("++="), rdtp->dynticks_nesting, newval); else - rcu_eqs_exit_common(oldval, true); + rcu_eqs_exit_common(newval, true); + rdtp->dynticks_nesting++; } /* -- cgit v1.2.3 From fd581a91ac16187625ec509414d08d37827472c4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Oct 2017 21:56:20 -0700 Subject: rcu: Move rcu_nmi_{enter,exit}() to prepare for consolidation This is a code-motion-only commit that prepares to define rcu_irq_enter() in terms of rcu_nmi_enter() and rcu_irq_exit() in terms of rcu_irq_exit(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 150 +++++++++++++++++++++++++++--------------------------- 1 file changed, 75 insertions(+), 75 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 49f661bb8ffe..419f3c38e1b6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -866,6 +866,44 @@ void rcu_user_enter(void) } #endif /* CONFIG_NO_HZ_FULL */ +/** + * rcu_nmi_exit - inform RCU of exit from NMI context + * + * If we are returning from the outermost NMI handler that interrupted an + * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting + * to let the RCU grace-period handling know that the CPU is back to + * being RCU-idle. + * + * If you add or remove a call to rcu_nmi_exit(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. + */ +void rcu_nmi_exit(void) +{ + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + + /* + * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. + * (We are exiting an NMI handler, so RCU better be paying attention + * to us!) + */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); + WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); + + /* + * If the nesting level is not 1, the CPU wasn't RCU-idle, so + * leave it in non-RCU-idle state. + */ + if (rdtp->dynticks_nmi_nesting != 1) { + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ + rdtp->dynticks_nmi_nesting - 2); + return; + } + + /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ + rcu_dynticks_eqs_enter(); +} + /** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle * @@ -1012,6 +1050,43 @@ void rcu_user_exit(void) } #endif /* CONFIG_NO_HZ_FULL */ +/** + * rcu_nmi_enter - inform RCU of entry to NMI context + * + * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and + * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know + * that the CPU is active. This implementation permits nested NMIs, as + * long as the nesting level does not overflow an int. (You will probably + * run out of stack space first.) + * + * If you add or remove a call to rcu_nmi_enter(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. + */ +void rcu_nmi_enter(void) +{ + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + int incby = 2; + + /* Complain about underflow. */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); + + /* + * If idle from RCU viewpoint, atomically increment ->dynticks + * to mark non-idle and increment ->dynticks_nmi_nesting by one. + * Otherwise, increment ->dynticks_nmi_nesting by two. This means + * if ->dynticks_nmi_nesting is equal to one, we are guaranteed + * to be in the outermost NMI handler that interrupted an RCU-idle + * period (observation due to Andy Lutomirski). + */ + if (rcu_dynticks_curr_cpu_in_eqs()) { + rcu_dynticks_eqs_exit(); + incby = 1; + } + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ + rdtp->dynticks_nmi_nesting + incby); + barrier(); +} + /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle * @@ -1070,81 +1145,6 @@ void rcu_irq_enter_irqson(void) local_irq_restore(flags); } -/** - * rcu_nmi_enter - inform RCU of entry to NMI context - * - * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and - * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know - * that the CPU is active. This implementation permits nested NMIs, as - * long as the nesting level does not overflow an int. (You will probably - * run out of stack space first.) - * - * If you add or remove a call to rcu_nmi_enter(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_nmi_enter(void) -{ - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int incby = 2; - - /* Complain about underflow. */ - WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); - - /* - * If idle from RCU viewpoint, atomically increment ->dynticks - * to mark non-idle and increment ->dynticks_nmi_nesting by one. - * Otherwise, increment ->dynticks_nmi_nesting by two. This means - * if ->dynticks_nmi_nesting is equal to one, we are guaranteed - * to be in the outermost NMI handler that interrupted an RCU-idle - * period (observation due to Andy Lutomirski). - */ - if (rcu_dynticks_curr_cpu_in_eqs()) { - rcu_dynticks_eqs_exit(); - incby = 1; - } - WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ - rdtp->dynticks_nmi_nesting + incby); - barrier(); -} - -/** - * rcu_nmi_exit - inform RCU of exit from NMI context - * - * If we are returning from the outermost NMI handler that interrupted an - * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting - * to let the RCU grace-period handling know that the CPU is back to - * being RCU-idle. - * - * If you add or remove a call to rcu_nmi_exit(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_nmi_exit(void) -{ - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - - /* - * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. - * (We are exiting an NMI handler, so RCU better be paying attention - * to us!) - */ - WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); - WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); - - /* - * If the nesting level is not 1, the CPU wasn't RCU-idle, so - * leave it in non-RCU-idle state. - */ - if (rdtp->dynticks_nmi_nesting != 1) { - WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ - rdtp->dynticks_nmi_nesting - 2); - return; - } - - /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ - rcu_dynticks_eqs_enter(); -} - /** * rcu_is_watching - see if RCU thinks that the current CPU is idle * -- cgit v1.2.3 From 6136d6e48a0138f6be5bb3427dbeb0ba07a546a4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 08:28:04 -0700 Subject: rcu: Clamp ->dynticks_nmi_nesting at eqs entry/exit In preparation for merging dyntick-idle irq handling into the NMI algorithm, clamp ->dynticks_nmi_nesting value to allow for interrupts that enter but never leave and vice versa. It is important that the clamping happen outside of the extended quiescent state. Otherwise, there will be short windows where irqs and NMIs fail to convince RCU to start watching. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 2 ++ kernel/rcu/tree.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 59c471de342a..f4a411964c41 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -56,6 +56,8 @@ #define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ DYNTICK_TASK_FLAG) +#define DYNTICK_IRQ_NONIDLE ((INT_MAX / 2) + 1) + /* * Grace-period counter management. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 419f3c38e1b6..142cdd4a50c9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -818,6 +818,7 @@ static void rcu_eqs_enter(bool user) struct rcu_dynticks *rdtp; rdtp = this_cpu_ptr(&rcu_dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) @@ -1008,6 +1009,7 @@ static void rcu_eqs_exit(bool user) rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user); rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; __this_cpu_dec(disable_rcu_irq_enter); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } } -- cgit v1.2.3 From 58721f5da4bcd5187566f4159a4fc88f70bf74f6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 10:42:22 -0700 Subject: rcu: Define rcu_irq_{enter,exit}() in terms of rcu_nmi_{enter,exit}() RCU currently uses two different mechanisms for tracking irqs and NMIs. This is unnecessary complexity: Given that NMIs can nest and given that RCU's tracking handles such nesting, the NMI tracking mechanism can also be used to track irqs. This commit therefore defines rcu_irq_enter() in terms of rcu_nmi_enter() and rcu_irq_exit() in terms of rcu_nmi_exit(). Unfortunately, callers must still distinguish between the irq and NMI functions because additional actions are taken when an irq interrupts idle or nohz_full usermode execution, and these actions cannot always be taken from NMI handlers. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 59 ++++++++++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 38 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 142cdd4a50c9..fde0e840563f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -266,6 +266,7 @@ void rcu_bh_qs(void) static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, + .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; @@ -914,8 +915,8 @@ void rcu_nmi_exit(void) * * This code assumes that the idle loop never does anything that might * result in unbalanced calls to irq_enter() and irq_exit(). If your - * architecture violates this assumption, RCU will give you what you - * deserve, good and hard. But very infrequently and irreproducibly. + * architecture's idle loop violates this assumption, RCU will give you what + * you deserve, good and hard. But very infrequently and irreproducibly. * * Use things like work queues to work around this limitation. * @@ -926,23 +927,14 @@ void rcu_nmi_exit(void) */ void rcu_irq_exit(void) { - struct rcu_dynticks *rdtp; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - rdtp = this_cpu_ptr(&rcu_dynticks); - - /* Page faults can happen in NMI handlers, so check... */ - if (rdtp->dynticks_nmi_nesting) - return; - - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting < 1); - if (rdtp->dynticks_nesting <= 1) { - rcu_eqs_enter_common(true); - } else { - trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1); - rdtp->dynticks_nesting--; - } + if (rdtp->dynticks_nmi_nesting == 1) + rcu_prepare_for_idle(); + rcu_nmi_exit(); + if (rdtp->dynticks_nmi_nesting == 0) + rcu_dynticks_task_enter(); } /* @@ -1097,12 +1089,12 @@ void rcu_nmi_enter(void) * sections can occur. The caller must have disabled interrupts. * * Note that the Linux kernel is fully capable of entering an interrupt - * handler that it never exits, for example when doing upcalls to - * user mode! This code assumes that the idle loop never does upcalls to - * user mode. If your architecture does do upcalls from the idle loop (or - * does anything else that results in unbalanced calls to the irq_enter() - * and irq_exit() functions), RCU will give you what you deserve, good - * and hard. But very infrequently and irreproducibly. + * handler that it never exits, for example when doing upcalls to user mode! + * This code assumes that the idle loop never does upcalls to user mode. + * If your architecture's idle loop does do upcalls to user mode (or does + * anything else that results in unbalanced calls to the irq_enter() and + * irq_exit() functions), RCU will give you what you deserve, good and hard. + * But very infrequently and irreproducibly. * * Use things like work queues to work around this limitation. * @@ -1113,23 +1105,14 @@ void rcu_nmi_enter(void) */ void rcu_irq_enter(void) { - struct rcu_dynticks *rdtp; - long long newval; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - rdtp = this_cpu_ptr(&rcu_dynticks); - - /* Page faults can happen in NMI handlers, so check... */ - if (rdtp->dynticks_nmi_nesting) - return; - - newval = rdtp->dynticks_nesting + 1; - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && newval == 0); - if (rdtp->dynticks_nesting) - trace_rcu_dyntick(TPS("++="), rdtp->dynticks_nesting, newval); - else - rcu_eqs_exit_common(newval, true); - rdtp->dynticks_nesting++; + if (rdtp->dynticks_nmi_nesting == 0) + rcu_dynticks_task_exit(); + rcu_nmi_enter(); + if (rdtp->dynticks_nmi_nesting == 1) + rcu_cleanup_after_idle(); } /* -- cgit v1.2.3 From 51a1fd30f13090be7750fed86cf3728afaf4e394 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 14:43:40 -0700 Subject: rcu: Make ->dynticks_nesting be a simple counter Now that ->dynticks_nesting counts only process-level dyntick-idle entry and exit, there is no need for the elaborate segmented counter with its guard fields and overflow checking. This commit therefore makes ->dynticks_nesting be a simple counter. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 27 +-------------------------- kernel/rcu/tree.c | 40 ++++++++++++++++++++-------------------- kernel/rcu/tree.h | 1 - 3 files changed, 21 insertions(+), 47 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index f4a411964c41..afe0559d1867 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -30,32 +30,7 @@ #define RCU_TRACE(stmt) #endif /* #else #ifdef CONFIG_RCU_TRACE */ -/* - * Process-level increment to ->dynticks_nesting field. This allows for - * architectures that use half-interrupts and half-exceptions from - * process context. - * - * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH - * that counts the number of process-based reasons why RCU cannot - * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE - * is the value used to increment or decrement this field. - * - * The rest of the bits could in principle be used to count interrupts, - * but this would mean that a negative-one value in the interrupt - * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field. - * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK - * that is set to DYNTICK_TASK_FLAG upon initial exit from idle. - * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon - * initial exit from idle. - */ -#define DYNTICK_TASK_NEST_WIDTH 7 -#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1) -#define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1) -#define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2) -#define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3) -#define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ - DYNTICK_TASK_FLAG) - +/* Offset to allow for unmatched rcu_irq_{enter,exit}(). */ #define DYNTICK_IRQ_NONIDLE ((INT_MAX / 2) + 1) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index fde0e840563f..d123474fe829 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -265,7 +265,7 @@ void rcu_bh_qs(void) #endif static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, + .dynticks_nesting = 1, .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; @@ -813,6 +813,10 @@ static void rcu_eqs_enter_common(bool user) /* * Enter an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. + * + * We crowbar the ->dynticks_nmi_nesting field to zero to allow for + * the possibility of usermode upcalls having messed up our count + * of interrupt nesting level during the prior busy period. */ static void rcu_eqs_enter(bool user) { @@ -821,11 +825,11 @@ static void rcu_eqs_enter(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); - if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) + rdtp->dynticks_nesting == 0); + if (rdtp->dynticks_nesting == 1) rcu_eqs_enter_common(user); else - rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; + rdtp->dynticks_nesting--; } /** @@ -836,10 +840,6 @@ static void rcu_eqs_enter(bool user) * critical sections can occur in irq handlers in idle, a possibility * handled by irq_enter() and irq_exit().) * - * We crowbar the ->dynticks_nesting field to zero to allow for - * the possibility of usermode upcalls having messed up our count - * of interrupt nesting level during the prior busy period. - * * If you add or remove a call to rcu_idle_enter(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ @@ -984,6 +984,10 @@ static void rcu_eqs_exit_common(long long newval, int user) /* * Exit an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. + * + * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to + * allow for the possibility of usermode upcalls messing up our count of + * interrupt nesting level during the busy period that is just now starting. */ static void rcu_eqs_exit(bool user) { @@ -994,12 +998,12 @@ static void rcu_eqs_exit(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); - if (oldval & DYNTICK_TASK_NEST_MASK) { - rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; + if (oldval) { + rdtp->dynticks_nesting++; } else { __this_cpu_inc(disable_rcu_irq_enter); - rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user); - rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; + rcu_eqs_exit_common(1, user); + rdtp->dynticks_nesting = 1; __this_cpu_dec(disable_rcu_irq_enter); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } @@ -1011,11 +1015,6 @@ static void rcu_eqs_exit(bool user) * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to - * allow for the possibility of usermode upcalls messing up our count - * of interrupt nesting level during the busy period that is just - * now starting. - * * If you add or remove a call to rcu_idle_exit(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ @@ -1219,7 +1218,8 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); */ static int rcu_is_cpu_rrupt_from_idle(void) { - return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1; + return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 && + __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1; } /* @@ -3709,7 +3709,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); rdp->dynticks = &per_cpu(rcu_dynticks, cpu); - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks))); rdp->cpu = cpu; rdp->rsp = rsp; @@ -3738,7 +3738,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ !init_nocb_callback_list(rdp)) rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ - rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; + rdp->dynticks->dynticks_nesting = 1; rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 46a5d1991450..dbd7e3753bed 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -39,7 +39,6 @@ */ struct rcu_dynticks { long long dynticks_nesting; /* Track irq/process nesting level. */ - /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ -- cgit v1.2.3 From 844ccdd7dce2c1a6ea9b437fcf8c3265b136e4a5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 16:51:47 -0700 Subject: rcu: Eliminate rcu_irq_enter_disabled() Now that the irq path uses the rcu_nmi_{enter,exit}() algorithm, rcu_irq_enter() and rcu_irq_exit() may be used from any context. There is thus no need for rcu_irq_enter_disabled() and for the checks using it. This commit therefore eliminates rcu_irq_enter_disabled(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 22 ++-------------------- kernel/trace/trace.c | 11 ----------- 2 files changed, 2 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d123474fe829..444aa2b3f24d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -270,20 +270,6 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; -/* - * There's a few places, currently just in the tracing infrastructure, - * that uses rcu_irq_enter() to make sure RCU is watching. But there's - * a small location where that will not even work. In those cases - * rcu_irq_enter_disabled() needs to be checked to make sure rcu_irq_enter() - * can be called. - */ -static DEFINE_PER_CPU(bool, disable_rcu_irq_enter); - -bool rcu_irq_enter_disabled(void) -{ - return this_cpu_read(disable_rcu_irq_enter); -} - /* * Record entry into an extended quiescent state. This is only to be * called when not already in an extended quiescent state. @@ -792,10 +778,8 @@ static void rcu_eqs_enter_common(bool user) do_nocb_deferred_wakeup(rdp); } rcu_prepare_for_idle(); - __this_cpu_inc(disable_rcu_irq_enter); - rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */ - rcu_dynticks_eqs_enter(); /* After this, tracing works again. */ - __this_cpu_dec(disable_rcu_irq_enter); + rdtp->dynticks_nesting = 0; + rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); /* @@ -1001,10 +985,8 @@ static void rcu_eqs_exit(bool user) if (oldval) { rdtp->dynticks_nesting++; } else { - __this_cpu_inc(disable_rcu_irq_enter); rcu_eqs_exit_common(1, user); rdtp->dynticks_nesting = 1; - __this_cpu_dec(disable_rcu_irq_enter); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 73e67b68c53b..dbce1be3bab8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2682,17 +2682,6 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, if (unlikely(in_nmi())) return; - /* - * It is possible that a function is being traced in a - * location that RCU is not watching. A call to - * rcu_irq_enter() will make sure that it is, but there's - * a few internal rcu functions that could be traced - * where that wont work either. In those cases, we just - * do nothing. - */ - if (unlikely(rcu_irq_enter_disabled())) - return; - rcu_irq_enter_irqson(); __ftrace_trace_stack(buffer, flags, skip, pc, NULL); rcu_irq_exit_irqson(); -- cgit v1.2.3 From aa24163b2ee5c92120e32e99b5a93143a0f4258e Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Wed, 15 Nov 2017 19:50:14 +0530 Subject: cgroup/cpuset: remove circular dependency deadlock Remove circular dependency deadlock in a scenario where hotplug of CPU is being done while there is updation in cgroup and cpuset triggered from userspace. Process A => kthreadd => Process B => Process C => Process A Process A cpu_subsys_offline(); cpu_down(); _cpu_down(); percpu_down_write(&cpu_hotplug_lock); //held cpuhp_invoke_callback(); workqueue_offline_cpu(); queue_work_on(); // unbind_work on system_highpri_wq __queue_work(); insert_work(); wake_up_worker(); flush_work(); wait_for_completion(); worker_thread(); manage_workers(); create_worker(); kthread_create_on_node(); wake_up_process(kthreadd_task); kthreadd kthreadd(); kernel_thread(); do_fork(); copy_process(); percpu_down_read(&cgroup_threadgroup_rwsem); __rwsem_down_read_failed_common(); //waiting Process B kernfs_fop_write(); cgroup_file_write(); cgroup_procs_write(); percpu_down_write(&cgroup_threadgroup_rwsem); //held cgroup_attach_task(); cgroup_migrate(); cgroup_migrate_execute(); cpuset_can_attach(); mutex_lock(&cpuset_mutex); //waiting Process C kernfs_fop_write(); cgroup_file_write(); cpuset_write_resmask(); mutex_lock(&cpuset_mutex); //held update_cpumask(); update_cpumasks_hier(); rebuild_sched_domains_locked(); get_online_cpus(); percpu_down_read(&cpu_hotplug_lock); //waiting Eliminating deadlock by reversing the locking order for cpuset_mutex and cpu_hotplug_lock. Signed-off-by: Prateek Sood Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 53 ++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f7efa7b4d825..cab5fd1ee767 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -812,6 +812,18 @@ done: return ndoms; } +static void cpuset_sched_change_begin(void) +{ + cpus_read_lock(); + mutex_lock(&cpuset_mutex); +} + +static void cpuset_sched_change_end(void) +{ + mutex_unlock(&cpuset_mutex); + cpus_read_unlock(); +} + /* * Rebuild scheduler domains. * @@ -821,16 +833,14 @@ done: * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * - * Call with cpuset_mutex held. Takes get_online_cpus(). */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_cpuslocked(void) { struct sched_domain_attr *attr; cpumask_var_t *doms; int ndoms; lockdep_assert_held(&cpuset_mutex); - get_online_cpus(); /* * We have raced with CPU hotplug. Don't do anything to avoid @@ -838,27 +848,25 @@ static void rebuild_sched_domains_locked(void) * Anyways, hotplug work item will rebuild sched domains. */ if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) - goto out; + return; /* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr); /* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); -out: - put_online_cpus(); } #else /* !CONFIG_SMP */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_cpuslocked(void) { } #endif /* CONFIG_SMP */ void rebuild_sched_domains(void) { - mutex_lock(&cpuset_mutex); - rebuild_sched_domains_locked(); - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_begin(); + rebuild_sched_domains_cpuslocked(); + cpuset_sched_change_end(); } /** @@ -944,7 +952,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) rcu_read_unlock(); if (need_rebuild_sched_domains) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); } /** @@ -1276,7 +1284,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) cs->relax_domain_level = val; if (!cpumask_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); } return 0; @@ -1309,7 +1317,6 @@ static void update_tasks_flags(struct cpuset *cs) * * Call with cpuset_mutex held. */ - static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on) { @@ -1342,7 +1349,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spin_unlock_irq(&callback_lock); if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); if (spread_flag_changed) update_tasks_flags(cs); @@ -1610,7 +1617,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = 0; - mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; @@ -1646,7 +1653,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); return retval; } @@ -1657,7 +1664,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = -ENODEV; - mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1670,7 +1677,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); return retval; } @@ -1709,7 +1716,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work); - mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1733,7 +1740,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, free_trial_cpuset(trialcs); out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); @@ -2034,14 +2041,14 @@ out_unlock: /* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which - * will call rebuild_sched_domains_locked(). + * will call rebuild_sched_domains_cpuslocked(). */ static void cpuset_css_offline(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); - mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); @@ -2049,7 +2056,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); } static void cpuset_css_free(struct cgroup_subsys_state *css) -- cgit v1.2.3 From 1599a185f0e6113be185b9fb809c621c73865829 Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Wed, 15 Nov 2017 19:50:15 +0530 Subject: cpuset: Make cpuset hotplug synchronous Convert cpuset_hotplug_workfn() into synchronous call for cpu hotplug path. For memory hotplug path it still gets queued as a work item. Since cpuset_hotplug_workfn() can be made synchronous for cpu hotplug path, it is not required to wait for cpuset hotplug while thawing processes. Signed-off-by: Prateek Sood Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 41 ++++++++++++++++++++--------------------- kernel/power/process.c | 2 -- kernel/sched/core.c | 1 - 3 files changed, 20 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index cab5fd1ee767..227bc25d951d 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2277,15 +2277,8 @@ retry: mutex_unlock(&cpuset_mutex); } -static bool force_rebuild; - -void cpuset_force_rebuild(void) -{ - force_rebuild = true; -} - /** - * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset + * cpuset_hotplug - handle CPU/memory hotunplug for a cpuset * * This function is called after either CPU or memory configuration has * changed and updates cpuset accordingly. The top_cpuset is always @@ -2300,7 +2293,7 @@ void cpuset_force_rebuild(void) * Note that CPU offlining during suspend is ignored. We don't modify * cpusets across suspend/resume cycles at all. */ -static void cpuset_hotplug_workfn(struct work_struct *work) +static void cpuset_hotplug(bool use_cpu_hp_lock) { static cpumask_t new_cpus; static nodemask_t new_mems; @@ -2358,25 +2351,31 @@ static void cpuset_hotplug_workfn(struct work_struct *work) } /* rebuild sched domains if cpus_allowed has changed */ - if (cpus_updated || force_rebuild) { - force_rebuild = false; - rebuild_sched_domains(); + if (cpus_updated) { + if (use_cpu_hp_lock) + rebuild_sched_domains(); + else { + /* Acquiring cpu_hotplug_lock is not required. + * When cpuset_hotplug() is called in hotplug path, + * cpu_hotplug_lock is held by the hotplug context + * which is waiting for cpuhp_thread_fun to indicate + * completion of callback. + */ + mutex_lock(&cpuset_mutex); + rebuild_sched_domains_cpuslocked(); + mutex_unlock(&cpuset_mutex); + } } } -void cpuset_update_active_cpus(void) +static void cpuset_hotplug_workfn(struct work_struct *work) { - /* - * We're inside cpu hotplug critical region which usually nests - * inside cgroup synchronization. Bounce actual hotplug processing - * to a work item to avoid reverse locking order. - */ - schedule_work(&cpuset_hotplug_work); + cpuset_hotplug(true); } -void cpuset_wait_for_hotplug(void) +void cpuset_update_active_cpus(void) { - flush_work(&cpuset_hotplug_work); + cpuset_hotplug(false); } /* diff --git a/kernel/power/process.c b/kernel/power/process.c index 7381d49a44db..c326d7235c5f 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -204,8 +204,6 @@ void thaw_processes(void) __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); - cpuset_wait_for_hotplug(); - read_lock(&tasklist_lock); for_each_process_thread(g, p) { /* No other threads should have PF_SUSPEND_TASK set */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 75554f366fd3..88b3450b29ab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5624,7 +5624,6 @@ static void cpuset_cpu_active(void) * restore the original sched domains by considering the * cpuset configurations. */ - cpuset_force_rebuild(); } cpuset_update_active_cpus(); } -- cgit v1.2.3 From c98a9805096460567404799a7bd3149826affde7 Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Fri, 3 Nov 2017 17:27:50 +0200 Subject: workqueue: respect isolated cpus when queueing an unbound work Initialize wq_unbound_cpumask to exclude cpus that were isolated by the cmdline's isolcpus parameter. Signed-off-by: Tal Shorer Signed-off-by: Tejun Heo --- kernel/workqueue.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8fdb710bfdd7..6a5658cb46da 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -4957,6 +4958,10 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) return -ENOMEM; + /* + * Not excluding isolated cpus on purpose. + * If the user wishes to include them, we allow that. + */ cpumask_and(cpumask, cpumask, cpu_possible_mask); if (!cpumask_empty(cpumask)) { apply_wqattrs_lock(); @@ -5555,7 +5560,7 @@ int __init workqueue_init_early(void) WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); - cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); + cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN)); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); -- cgit v1.2.3 From ddf7005f32212f28669032651e09bd8d2245c35d Mon Sep 17 00:00:00 2001 From: Wang Long Date: Sun, 19 Nov 2017 16:08:37 -0500 Subject: debug cgroup: use task_css_set instead of rcu_dereference This macro `task_css_set` verifies that the caller is inside proper critical section if the kernel set CONFIG_PROVE_RCU=y. Signed-off-by: Wang Long Signed-off-by: Tejun Heo --- kernel/cgroup/debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c index 5f780d8f6a9d..9caeda610249 100644 --- a/kernel/cgroup/debug.c +++ b/kernel/cgroup/debug.c @@ -50,7 +50,7 @@ static int current_css_set_read(struct seq_file *seq, void *v) spin_lock_irq(&css_set_lock); rcu_read_lock(); - cset = rcu_dereference(current->cgroups); + cset = task_css_set(current); refcnt = refcount_read(&cset->refcount); seq_printf(seq, "css_set %pK %d", cset, refcnt); if (refcnt > cset->nr_tasks) @@ -96,7 +96,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v) spin_lock_irq(&css_set_lock); rcu_read_lock(); - cset = rcu_dereference(current->cgroups); + cset = task_css_set(current); list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; -- cgit v1.2.3 From 52cf373c37a684f8fc279d541307fad39d206376 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 28 Nov 2017 13:59:25 +0100 Subject: cgroup: properly init u64_stats Lockdep complains that the stats update is trying to register a non-static key. This is because u64_stats are using a seqlock on 32bit arches, which needs to be initialized before usage. Fixes: 041cd640b2f3 (cgroup: Implement cgroup2 basic CPU usage accounting) Signed-off-by: Lucas Stach Signed-off-by: Tejun Heo --- kernel/cgroup/stat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c index 133b465691d6..1e111dd455c4 100644 --- a/kernel/cgroup/stat.c +++ b/kernel/cgroup/stat.c @@ -296,8 +296,12 @@ int cgroup_stat_init(struct cgroup *cgrp) } /* ->updated_children list is self terminated */ - for_each_possible_cpu(cpu) - cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp; + for_each_possible_cpu(cpu) { + struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu); + + cstat->updated_children = cgrp; + u64_stats_init(&cstat->sync); + } prev_cputime_init(&cgrp->stat.prev_cputime); -- cgit v1.2.3 From bd2b879a1ca55486fdb9dcac691bfd3dd79c83d6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 12:29:01 -0700 Subject: rcu: Add tracing to irq/NMI dyntick-idle transitions Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 444aa2b3f24d..d069ba2d8412 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -880,12 +880,15 @@ void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdtp->dynticks_nmi_nesting != 1) { + trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, + rdtp->dynticks_nmi_nesting - 2); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ rdtp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ + trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0); WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ rcu_dynticks_eqs_enter(); } @@ -1057,6 +1060,9 @@ void rcu_nmi_enter(void) rcu_dynticks_eqs_exit(); incby = 1; } + trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), + rdtp->dynticks_nmi_nesting, + rdtp->dynticks_nmi_nesting + incby); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ rdtp->dynticks_nmi_nesting + incby); barrier(); -- cgit v1.2.3 From 84585aa8b6ad24e5bdfba9db4a320a6aeed192ab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 15:55:16 -0700 Subject: rcu: Shrink ->dynticks_{nmi_,}nesting from long long to long Because the ->dynticks_nesting field now only contains the process-based nesting level instead of a value encoding both the process nesting level and the irq "nesting" level, we no longer need a long long, even on 32-bit systems. This commit therefore changes both the ->dynticks_nesting and ->dynticks_nmi_nesting fields to long. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 2 +- kernel/rcu/tree.c | 6 +++--- kernel/rcu/tree.h | 4 ++-- kernel/rcu/tree_plugin.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index afe0559d1867..6334f2c1abd0 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -31,7 +31,7 @@ #endif /* #else #ifdef CONFIG_RCU_TRACE */ /* Offset to allow for unmatched rcu_irq_{enter,exit}(). */ -#define DYNTICK_IRQ_NONIDLE ((INT_MAX / 2) + 1) +#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d069ba2d8412..92de3bacda07 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -946,7 +946,7 @@ void rcu_irq_exit_irqson(void) * we really have exited idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_eqs_exit_common(long long newval, int user) +static void rcu_eqs_exit_common(long newval, int user) { RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) @@ -979,7 +979,7 @@ static void rcu_eqs_exit_common(long long newval, int user) static void rcu_eqs_exit(bool user) { struct rcu_dynticks *rdtp; - long long oldval; + long oldval; lockdep_assert_irqs_disabled(); rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1043,7 +1043,7 @@ void rcu_user_exit(void) void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int incby = 2; + long incby = 2; /* Complain about underflow. */ WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index dbd7e3753bed..6488a3b0e729 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -38,8 +38,8 @@ * Dynticks per-CPU state. */ struct rcu_dynticks { - long long dynticks_nesting; /* Track irq/process nesting level. */ - int dynticks_nmi_nesting; /* Track NMI nesting level. */ + long dynticks_nesting; /* Track process nesting level. */ + long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index db85ca3975f1..e94e754464cd 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1687,7 +1687,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum; - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], -- cgit v1.2.3 From dec98900eae1e22467182e58688abe5fae98bd5f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 16:24:29 -0700 Subject: rcu: Add ->dynticks field to rcu_dyntick trace event Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 92de3bacda07..5febb76809f6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -761,13 +761,13 @@ static void rcu_eqs_enter_common(bool user) struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0); + trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0); + trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0, rdtp->dynticks); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -880,15 +880,14 @@ void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdtp->dynticks_nmi_nesting != 1) { - trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, - rdtp->dynticks_nmi_nesting - 2); + trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, rdtp->dynticks_nmi_nesting - 2, rdtp->dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ rdtp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0); + trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0, rdtp->dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ rcu_dynticks_eqs_enter(); } @@ -953,14 +952,13 @@ static void rcu_eqs_exit_common(long newval, int user) rcu_dynticks_task_exit(); rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval, rdtp->dynticks); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - trace_rcu_dyntick(TPS("Error on exit: not idle task"), - rdtp->dynticks_nesting, newval); + trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, newval, rdtp->dynticks); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -1062,7 +1060,7 @@ void rcu_nmi_enter(void) } trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), rdtp->dynticks_nmi_nesting, - rdtp->dynticks_nmi_nesting + incby); + rdtp->dynticks_nmi_nesting + incby, rdtp->dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ rdtp->dynticks_nmi_nesting + incby); barrier(); -- cgit v1.2.3 From 914955e18ca09fc404d7fc3614bb04c96a03692c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 13:50:57 -0700 Subject: rcu: Stop duplicating lockdep checks in RCU's idle-entry code The three RCU_LOCKDEP_WARN() calls in rcu_eqs_enter_common() are redundant with other lockdep checks, so this commit removes them. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5febb76809f6..80cada11f544 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -781,17 +781,6 @@ static void rcu_eqs_enter_common(bool user) rdtp->dynticks_nesting = 0; rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); - - /* - * It is illegal to enter an extended quiescent state while - * in an RCU read-side critical section. - */ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), - "Illegal idle entry in RCU read-side critical section."); - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), - "Illegal idle entry in RCU-bh read-side critical section."); - RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), - "Illegal idle entry in RCU-sched read-side critical section."); } /* -- cgit v1.2.3 From 2342172fd6c148506456862d795c7f155baf6797 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 15:03:10 -0700 Subject: rcu: Avoid ->dynticks_nesting store tearing Although ->dynticks_nesting is updated only by process level, it is accessed from hardirq to check for interrupt-from-idle quiescent states. Store tearing is thus possible, so this commit applies WRITE_ONCE() to ->dynticks_nesting stores. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 80cada11f544..b2ded4d436c6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -778,7 +778,7 @@ static void rcu_eqs_enter_common(bool user) do_nocb_deferred_wakeup(rdp); } rcu_prepare_for_idle(); - rdtp->dynticks_nesting = 0; + WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); } @@ -976,7 +976,7 @@ static void rcu_eqs_exit(bool user) rdtp->dynticks_nesting++; } else { rcu_eqs_exit_common(1, user); - rdtp->dynticks_nesting = 1; + WRITE_ONCE(rdtp->dynticks_nesting, 1); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } } @@ -3713,7 +3713,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ !init_nocb_callback_list(rdp)) rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ - rdp->dynticks->dynticks_nesting = 1; + rdp->dynticks->dynticks_nesting = 1; /* CPU not up, no tearing. */ rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ -- cgit v1.2.3 From 215bba9f59e35e64b9936da62632b2fa3ede647c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 16:37:03 -0700 Subject: rcu: Fold rcu_eqs_enter_common() into rcu_eqs_enter() There is now only one call to rcu_eqs_enter_common() and there is no other reason to keep it separate. This commit therefore inlines it into its sole call site, saving a few lines of code in the process. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b2ded4d436c6..5c8a5796c71f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -749,16 +749,27 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * rcu_eqs_enter_common - current CPU is entering an extended quiescent state + * Enter an RCU extended quiescent state, which can be either the + * idle loop or adaptive-tickless usermode execution. * - * Enter idle, doing appropriate accounting. The caller must have - * disabled interrupts. + * We crowbar the ->dynticks_nmi_nesting field to zero to allow for + * the possibility of usermode upcalls having messed up our count + * of interrupt nesting level during the prior busy period. */ -static void rcu_eqs_enter_common(bool user) +static void rcu_eqs_enter(bool user) { struct rcu_state *rsp; struct rcu_data *rdp; - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + struct rcu_dynticks *rdtp; + + rdtp = this_cpu_ptr(&rcu_dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting == 0); + if (rdtp->dynticks_nesting != 1) { + rdtp->dynticks_nesting--; + return; + } lockdep_assert_irqs_disabled(); trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); @@ -783,28 +794,6 @@ static void rcu_eqs_enter_common(bool user) rcu_dynticks_task_enter(); } -/* - * Enter an RCU extended quiescent state, which can be either the - * idle loop or adaptive-tickless usermode execution. - * - * We crowbar the ->dynticks_nmi_nesting field to zero to allow for - * the possibility of usermode upcalls having messed up our count - * of interrupt nesting level during the prior busy period. - */ -static void rcu_eqs_enter(bool user) -{ - struct rcu_dynticks *rdtp; - - rdtp = this_cpu_ptr(&rcu_dynticks); - WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting == 0); - if (rdtp->dynticks_nesting == 1) - rcu_eqs_enter_common(user); - else - rdtp->dynticks_nesting--; -} - /** * rcu_idle_enter - inform RCU that current CPU is entering idle * -- cgit v1.2.3 From 9dd238e28640d5514bbd0ff2d425f32409981d85 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 16:56:26 -0700 Subject: rcu: Fold rcu_eqs_exit_common() into rcu_eqs_exit() There is now only one call to rcu_eqs_exit_common() and there is no other reason to keep it separate. This commit therefore inlines it into its sole call site, saving a few lines of code in the process. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5c8a5796c71f..46a8e06bf03e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -916,34 +916,6 @@ void rcu_irq_exit_irqson(void) local_irq_restore(flags); } -/* - * rcu_eqs_exit_common - current CPU moving away from extended quiescent state - * - * If the new value of the ->dynticks_nesting counter was previously zero, - * we really have exited idle, and must do the appropriate accounting. - * The caller must have disabled interrupts. - */ -static void rcu_eqs_exit_common(long newval, int user) -{ - RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) - - rcu_dynticks_task_exit(); - rcu_dynticks_eqs_exit(); - rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval, rdtp->dynticks); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, newval, rdtp->dynticks); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } -} - /* * Exit an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. @@ -963,11 +935,25 @@ static void rcu_eqs_exit(bool user) WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); if (oldval) { rdtp->dynticks_nesting++; - } else { - rcu_eqs_exit_common(1, user); - WRITE_ONCE(rdtp->dynticks_nesting, 1); - WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); + return; + } + rcu_dynticks_task_exit(); + rcu_dynticks_eqs_exit(); + rcu_cleanup_after_idle(); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks); + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !user && !is_idle_task(current)) { + struct task_struct *idle __maybe_unused = + idle_task(smp_processor_id()); + + trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, 1, rdtp->dynticks); + rcu_ftrace_dump(DUMP_ORIG); + WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", + current->pid, current->comm, + idle->pid, idle->comm); /* must be idle task! */ } + WRITE_ONCE(rdtp->dynticks_nesting, 1); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } /** -- cgit v1.2.3 From e68bbb266dcfed201d8d54a2828ef820d747f083 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 19:55:31 -0700 Subject: rcu: Simplify rcu_eqs_{enter,exit}() non-idle task debug code The code that checks for non-idle non-nohz_idle-usermode tasks invoking rcu_eqs_enter() and rcu_eqs_exit() prints a considerable quantity of helpful information. However, these checks fire rarely, so the extra complexity is no longer worth it. This commit therefore replaces this debug code with simple WARN_ON_ONCE() statements. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 46a8e06bf03e..4d374d2bc925 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -773,17 +773,7 @@ static void rcu_eqs_enter(bool user) lockdep_assert_irqs_disabled(); trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0, rdtp->dynticks); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); do_nocb_deferred_wakeup(rdp); @@ -941,17 +931,7 @@ static void rcu_eqs_exit(bool user) rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, 1, rdtp->dynticks); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); WRITE_ONCE(rdtp->dynticks_nesting, 1); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } -- cgit v1.2.3 From d633198088bd9e358566c470ed182994403acc7a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Oct 2017 13:52:30 -0700 Subject: srcu: Prohibit call_srcu() use under raw spinlocks Invoking queue_delayed_work() while holding a raw spinlock is forbidden in -rt kernels, which is exactly what __call_srcu() does, indirectly via srcu_funnel_gp_start(). This commit therefore downgrades Tree SRCU's locking from raw to non-raw spinlocks, which works because call_srcu() is not ever called while holding a raw spinlock. Reported-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- kernel/rcu/srcutree.c | 109 +++++++++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 6d5880089ff6..d5cea81378cc 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -53,6 +53,33 @@ static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); static void process_srcu(struct work_struct *work); +/* Wrappers for lock acquisition and r