diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 86 | ||||
-rw-r--r-- | kernel/locking/rtmutex-debug.c | 9 | ||||
-rw-r--r-- | kernel/rcu/Makefile | 4 | ||||
-rw-r--r-- | kernel/rcu/rcu.h | 153 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 35 | ||||
-rw-r--r-- | kernel/rcu/srcu.c | 12 | ||||
-rw-r--r-- | kernel/rcu/srcutiny.c | 215 | ||||
-rw-r--r-- | kernel/rcu/srcutree.c | 996 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 20 | ||||
-rw-r--r-- | kernel/rcu/tiny_plugin.h | 13 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 696 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 164 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 25 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 64 | ||||
-rw-r--r-- | kernel/rcu/tree_trace.c | 26 | ||||
-rw-r--r-- | kernel/rcu/update.c | 53 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 | ||||
-rw-r--r-- | kernel/signal.c | 2 |
19 files changed, 1871 insertions, 708 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 6c463c80e93d..9330ce24f1bb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1313,7 +1313,7 @@ void __cleanup_sighand(struct sighand_struct *sighand) if (atomic_dec_and_test(&sighand->count)) { signalfd_cleanup(sighand); /* - * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it + * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it * without an RCU grace period, see __lock_task_sighand(). */ kmem_cache_free(sighand_cachep, sighand); @@ -2144,7 +2144,7 @@ void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index a95e5d1f4a9c..e9d4f85b290c 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1144,10 +1144,10 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, return 0; printk("\n"); - printk("======================================================\n"); - printk("[ INFO: possible circular locking dependency detected ]\n"); + pr_warn("======================================================\n"); + pr_warn("WARNING: possible circular locking dependency detected\n"); print_kernel_ident(); - printk("-------------------------------------------------------\n"); + pr_warn("------------------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(check_src); @@ -1482,11 +1482,11 @@ print_bad_irq_dependency(struct task_struct *curr, return 0; printk("\n"); - printk("======================================================\n"); - printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", + pr_warn("=====================================================\n"); + pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n", irqclass, irqclass); print_kernel_ident(); - printk("------------------------------------------------------\n"); + pr_warn("-----------------------------------------------------\n"); printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, @@ -1711,10 +1711,10 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, return 0; printk("\n"); - printk("=============================================\n"); - printk("[ INFO: possible recursive locking detected ]\n"); + pr_warn("============================================\n"); + pr_warn("WARNING: possible recursive locking detected\n"); print_kernel_ident(); - printk("---------------------------------------------\n"); + pr_warn("--------------------------------------------\n"); printk("%s/%d is trying to acquire lock:\n", curr->comm, task_pid_nr(curr)); print_lock(next); @@ -2061,10 +2061,10 @@ static void print_collision(struct task_struct *curr, struct lock_chain *chain) { printk("\n"); - printk("======================\n"); - printk("[chain_key collision ]\n"); + pr_warn("============================\n"); + pr_warn("WARNING: chain_key collision\n"); print_kernel_ident(); - printk("----------------------\n"); + pr_warn("----------------------------\n"); printk("%s/%d: ", current->comm, task_pid_nr(current)); printk("Hash chain already cached but the contents don't match!\n"); @@ -2360,10 +2360,10 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, return 0; printk("\n"); - printk("=================================\n"); - printk("[ INFO: inconsistent lock state ]\n"); + pr_warn("================================\n"); + pr_warn("WARNING: inconsistent lock state\n"); print_kernel_ident(); - printk("---------------------------------\n"); + pr_warn("--------------------------------\n"); printk("inconsistent {%s} -> {%s} usage.\n", usage_str[prev_bit], usage_str[new_bit]); @@ -2425,10 +2425,10 @@ print_irq_inversion_bug(struct task_struct *curr, return 0; printk("\n"); - printk("=========================================================\n"); - printk("[ INFO: possible irq lock inversion dependency detected ]\n"); + pr_warn("========================================================\n"); + pr_warn("WARNING: possible irq lock inversion dependency detected\n"); print_kernel_ident(); - printk("---------------------------------------------------------\n"); + pr_warn("--------------------------------------------------------\n"); printk("%s/%d just changed the state of lock:\n", curr->comm, task_pid_nr(curr)); print_lock(this); @@ -3170,10 +3170,10 @@ print_lock_nested_lock_not_held(struct task_struct *curr, return 0; printk("\n"); - printk("==================================\n"); - printk("[ BUG: Nested lock was not taken ]\n"); + pr_warn("==================================\n"); + pr_warn("WARNING: Nested lock was not taken\n"); print_kernel_ident(); - printk("----------------------------------\n"); + pr_warn("----------------------------------\n"); printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); print_lock(hlock); @@ -3383,10 +3383,10 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, return 0; printk("\n"); - printk("=====================================\n"); - printk("[ BUG: bad unlock balance detected! ]\n"); + pr_warn("=====================================\n"); + pr_warn("WARNING: bad unlock balance detected!\n"); print_kernel_ident(); - printk("-------------------------------------\n"); + pr_warn("-------------------------------------\n"); printk("%s/%d is trying to release lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); @@ -3880,10 +3880,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, return 0; printk("\n"); - printk("=================================\n"); - printk("[ BUG: bad contention detected! ]\n"); + pr_warn("=================================\n"); + pr_warn("WARNING: bad contention detected!\n"); print_kernel_ident(); - printk("---------------------------------\n"); + pr_warn("---------------------------------\n"); printk("%s/%d is trying to contend lock (", curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); @@ -4244,10 +4244,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, return; printk("\n"); - printk("=========================\n"); - printk("[ BUG: held lock freed! ]\n"); + pr_warn("=========================\n"); + pr_warn("WARNING: held lock freed!\n"); print_kernel_ident(); - printk("-------------------------\n"); + pr_warn("-------------------------\n"); printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", curr->comm, task_pid_nr(curr), mem_from, mem_to-1); print_lock(hlock); @@ -4302,11 +4302,11 @@ static void print_held_locks_bug(void) return; printk("\n"); - printk("=====================================\n"); - printk("[ BUG: %s/%d still has locks held! ]\n", + pr_warn("====================================\n"); + pr_warn("WARNING: %s/%d still has locks held!\n", current->comm, task_pid_nr(current)); print_kernel_ident(); - printk("-------------------------------------\n"); + pr_warn("------------------------------------\n"); lockdep_print_held_locks(current); printk("\nstack backtrace:\n"); dump_stack(); @@ -4371,7 +4371,7 @@ retry: } while_each_thread(g, p); printk("\n"); - printk("=============================================\n\n"); + pr_warn("=============================================\n\n"); if (unlock) read_unlock(&tasklist_lock); @@ -4401,10 +4401,10 @@ asmlinkage __visible void lockdep_sys_exit(void) if (!debug_locks_off()) return; printk("\n"); - printk("================================================\n"); - printk("[ BUG: lock held when returning to user space! ]\n"); + pr_warn("================================================\n"); + pr_warn("WARNING: lock held when returning to user space!\n"); print_kernel_ident(); - printk("------------------------------------------------\n"); + pr_warn("------------------------------------------------\n"); printk("%s/%d is leaving the kernel with locks still held!\n", curr->comm, curr->pid); lockdep_print_held_locks(curr); @@ -4421,13 +4421,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ /* Note: the following can be executed concurrently, so be careful. */ printk("\n"); - pr_err("===============================\n"); - pr_err("[ ERR: suspicious RCU usage. ]\n"); + pr_warn("=============================\n"); + pr_warn("WARNING: suspicious RCU usage\n"); print_kernel_ident(); - pr_err("-------------------------------\n"); - pr_err("%s:%d %s!\n", file, line, s); - pr_err("\nother info that might help us debug this:\n\n"); - pr_err("\n%srcu_scheduler_active = %d, debug_locks = %d\n", + pr_warn("-----------------------------\n"); + printk("%s:%d %s!\n", file, line, s); + printk("\nother info that might help us debug this:\n\n"); + printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", !rcu_lockdep_current_cpu_online() ? "RCU used illegally from offline CPU!\n" : !rcu_is_watching() diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index 97ee9df32e0f..db4f55211b04 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -102,10 +102,11 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) return; } - printk("\n============================================\n"); - printk( "[ BUG: circular locking deadlock detected! ]\n"); - printk("%s\n", print_tainted()); - printk( "--------------------------------------------\n"); + pr_warn("\n"); + pr_warn("============================================\n"); + pr_warn("WARNING: circular locking deadlock detected!\n"); + pr_warn("%s\n", print_tainted()); + pr_warn("--------------------------------------------\n"); printk("%s/%d is deadlocking current task %s/%d\n\n", task->comm, task_pid_nr(task), current->comm, task_pid_nr(current)); diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 18dfc485225c..158e6593d58c 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -3,7 +3,9 @@ KCOV_INSTRUMENT := n obj-y += update.o sync.o -obj-$(CONFIG_SRCU) += srcu.o +obj-$(CONFIG_CLASSIC_SRCU) += srcu.o +obj-$(CONFIG_TREE_SRCU) += srcutree.o +obj-$(CONFIG_TINY_SRCU) += srcutiny.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o obj-$(CONFIG_TREE_RCU) += tree.o diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 0d6ff3e471be..73e16ec4054b 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -56,6 +56,83 @@ #define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ DYNTICK_TASK_FLAG) + +/* + * Grace-period counter management. + */ + +#define RCU_SEQ_CTR_SHIFT 2 +#define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1) + +/* + * Return the counter portion of a sequence number previously returned + * by rcu_seq_snap() or rcu_seq_current(). + */ +static inline unsigned long rcu_seq_ctr(unsigned long s) +{ + return s >> RCU_SEQ_CTR_SHIFT; +} + +/* + * Return the state portion of a sequence number previously returned + * by rcu_seq_snap() or rcu_seq_current(). + */ +static inline int rcu_seq_state(unsigned long s) +{ + return s & RCU_SEQ_STATE_MASK; +} + +/* + * Set the state portion of the pointed-to sequence number. + * The caller is responsible for preventing conflicting updates. + */ +static inline void rcu_seq_set_state(unsigned long *sp, int newstate) +{ + WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK); + WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate); +} + +/* Adjust sequence number for start of update-side operation. */ +static inline void rcu_seq_start(unsigned long *sp) +{ + WRITE_ONCE(*sp, *sp + 1); + smp_mb(); /* Ensure update-side operation after counter increment. */ + WARN_ON_ONCE(rcu_seq_state(*sp) != 1); +} + +/* Adjust sequence number for end of update-side operation. */ +static inline void rcu_seq_end(unsigned long *sp) +{ + smp_mb(); /* Ensure update-side operation before counter increment. */ + WARN_ON_ONCE(!rcu_seq_state(*sp)); + WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1); +} + +/* Take a snapshot of the update side's sequence number. */ +static inline unsigned long rcu_seq_snap(unsigned long *sp) +{ + unsigned long s; + + s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK; + smp_mb(); /* Above access must not bleed into critical section. */ + return s; +} + +/* Return the current value the update side's sequence number, no ordering. */ +static inline unsigned long rcu_seq_current(unsigned long *sp) +{ + return READ_ONCE(*sp); +} + +/* + * Given a snapshot from rcu_seq_snap(), determine whether or not a + * full update-side operation has occurred. + */ +static inline bool rcu_seq_done(unsigned long *sp, unsigned long s) +{ + return ULONG_CMP_GE(READ_ONCE(*sp), s); +} + /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally * by call_rcu() and rcu callback execution, and are therefore not part of the @@ -109,12 +186,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) rcu_lock_acquire(&rcu_callback_map); if (__is_kfree_rcu_offset(offset)) { - RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); + RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset);) kfree((void *)head - offset); rcu_lock_release(&rcu_callback_map); return true; } else { - RCU_TRACE(trace_rcu_invoke_callback(rn, head)); + RCU_TRACE(trace_rcu_invoke_callback(rn, head);) head->func(head); rcu_lock_release(&rcu_callback_map); return false; @@ -144,4 +221,76 @@ void rcu_test_sync_prims(void); */ extern void resched_cpu(int cpu); +#if defined(SRCU) || !defined(TINY_RCU) + +#include <linux/rcu_node_tree.h> + +extern int rcu_num_lvls; +extern int num_rcu_lvl[]; +extern int rcu_num_nodes; +static bool rcu_fanout_exact; +static int rcu_fanout_leaf; + +/* + * Compute the per-level fanout, either using the exact fanout specified + * or balancing the tree, depending on the rcu_fanout_exact boot parameter. + */ +static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt) +{ + int i; + + if (rcu_fanout_exact) { + levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; + for (i = rcu_num_lvls - 2; i >= 0; i--) + levelspread[i] = RCU_FANOUT; + } else { + int ccur; + int cprv; + + cprv = nr_cpu_ids; + for (i = rcu_num_lvls - 1; i >= 0; i--) { + ccur = levelcnt[i]; + levelspread[i] = (cprv + ccur - 1) / ccur; + cprv = ccur; + } + } +} + +/* + * Do a full breadth-first scan of the rcu_node structures for the + * specified rcu_state structure. + */ +#define rcu_for_each_node_breadth_first(rsp, rnp) \ + for ((rnp) = &(rsp)->node[0]; \ + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) + +/* + * Do a breadth-first scan of the non-leaf rcu_node structures for the + * specified rcu_state structure. Note that if there is a singleton + * rcu_node tree with but one rcu_node structure, this loop is a no-op. + */ +#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ + for ((rnp) = &(rsp)->node[0]; \ + (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) + +/* + * Scan the leaves of the rcu_node hierarchy for the specified rcu_state + * structure. Note that if there is a singleton rcu_node tree with but + * one rcu_node structure, this loop -will- visit the rcu_node structure. + * It is still a leaf node, even if it is also the root node. + */ +#define rcu_for_each_leaf_node(rsp, rnp) \ + for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ + (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) + +/* + * Iterate over all possible CPUs in a leaf RCU node. + */ +#define for_each_leaf_node_possible_cpu(rnp, cpu) \ + for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \ + cpu <= rnp->grphi; \ + cpu = cpumask_next((cpu), cpu_possible_mask)) + +#endif /* #if defined(SRCU) || !defined(TINY_RCU) */ + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index cccc417a8135..e9d4527cdd43 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -559,19 +559,34 @@ static void srcu_torture_barrier(void) static void srcu_torture_stats(void) { - int cpu; - int idx = srcu_ctlp->completed & 0x1; + int __maybe_unused cpu; + int idx; - pr_alert("%s%s per-CPU(idx=%d):", +#if defined(CONFIG_TREE_SRCU) || defined(CONFIG_CLASSIC_SRCU) +#ifdef CONFIG_TREE_SRCU + idx = srcu_ctlp->srcu_idx & 0x1; +#else /* #ifdef CONFIG_TREE_SRCU */ + idx = srcu_ctlp->completed & 0x1; +#endif /* #else #ifdef CONFIG_TREE_SRCU */ + pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { unsigned long l0, l1; unsigned long u0, u1; long c0, c1; - struct srcu_array *counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu); +#ifdef CONFIG_TREE_SRCU + struct srcu_data *counts; + counts = per_cpu_ptr(srcu_ctlp->sda, cpu); + u0 = counts->srcu_unlock_count[!idx]; + u1 = counts->srcu_unlock_count[idx]; +#else /* #ifdef CONFIG_TREE_SRCU */ + struct srcu_array *counts; + + counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu); u0 = counts->unlock_count[!idx]; u1 = counts->unlock_count[idx]; +#endif /* #else #ifdef CONFIG_TREE_SRCU */ /* * Make sure that a lock is always counted if the corresponding @@ -579,14 +594,26 @@ static void srcu_torture_stats(void) */ smp_rmb(); +#ifdef CONFIG_TREE_SRCU + l0 = counts->srcu_lock_count[!idx]; + l1 = counts->srcu_lock_count[idx]; +#else /* #ifdef CONFIG_TREE_SRCU */ l0 = counts->lock_count[!idx]; l1 = counts->lock_count[idx]; +#endif /* #else #ifdef CONFIG_TREE_SRCU */ c0 = l0 - u0; c1 = l1 - u1; pr_cont(" %d(%ld,%ld)", cpu, c0, c1); } pr_cont("\n"); +#elif defined(CONFIG_TINY_SRCU) + idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1; + pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%d,%d)\n", + torture_type, TORTURE_FLAG, idx, + READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]), + READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx])); +#endif } static void srcu_torture_synchronize_expedited(void) diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index ef3bcfb15b39..584d8a983883 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -22,7 +22,7 @@ * Lai Jiangshan <laijs@cn.fujitsu.com> * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU/ *.txt + * Documentation/RCU/ *.txt * */ @@ -243,8 +243,14 @@ static bool srcu_readers_active(struct srcu_struct *sp) * cleanup_srcu_struct - deconstruct a sleep-RCU structure * @sp: structure to clean up. * - * Must invoke this after you are finished using a given srcu_struct that - * was initialized via init_srcu_struct(), else you leak memory. + * Must invoke this only after you are finished using a given srcu_struct + * that was initialized via init_srcu_struct(). This code does some + * probabalistic checking, spotting late uses of srcu_read_lock(), + * synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu(). + * If any such late uses are detected, the per-CPU memory associated with + * the srcu_struct is simply leaked and WARN_ON() is invoked. If the + * caller frees the srcu_struct itself, a use-after-free crash will likely + * ensue, but at least there will be a warning printed. */ void cleanup_srcu_struct(struct srcu_struct *sp) { diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c new file mode 100644 index 000000000000..b8293527ee18 --- /dev/null +++ b/kernel/rcu/srcutiny.c @@ -0,0 +1,215 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion, + * tiny version for non-preemptible single-CPU use. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2017 + * + * Author: Paul McKenney <paulmck@us.ibm.com> + */ + +#include <linux/export.h> +#include <linux/mutex.h> +#include <linux/preempt.h> +#include <linux/rcupdate_wait.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/srcu.h> + +#include <linux/rcu_node_tree.h> +#include "rcu.h" + +static int init_srcu_struct_fields(struct srcu_struct *sp) +{ + sp->srcu_lock_nesting[0] = 0; + sp->srcu_lock_nesting[1] = 0; + init_swait_queue_head(&sp->srcu_wq); + sp->srcu_gp_seq = 0; + rcu_segcblist_init(&sp->srcu_cblist); + sp->srcu_gp_running = false; + sp->srcu_gp_waiting = false; + sp->srcu_idx = 0; + INIT_WORK(&sp->srcu_work, srcu_drive_gp); + return 0; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +int __init_srcu_struct(struct srcu_struct *sp, const char *name, + struct lock_class_key *key) +{ + /* Don't re-initialize a lock while it is held. */ + debug_check_no_locks_freed((void *)sp, sizeof(*sp)); + lockdep_init_map(&sp->dep_map, name, key, 0); + return init_srcu_struct_fields(sp); +} +EXPORT_SYMBOL_GPL(__init_srcu_struct); + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +/* + * init_srcu_struct - initialize a sleep-RCU structure + * @sp: structure to initialize. + * + * Must invoke this on a given srcu_struct before passing that srcu_struct + * to any other function. Each srcu_struct represents a separate domain + * of SRCU protection. + */ +int init_srcu_struct(struct srcu_struct *sp) +{ + return init_srcu_struct_fields(sp); +} +EXPORT_SYMBOL_GPL(init_srcu_struct); + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +/* + * cleanup_srcu_struct - deconstruct a sleep-RCU structure + * @sp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory. + */ +void cleanup_srcu_struct(struct srcu_struct *sp) +{ + WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); + flush_work(&sp->srcu_work); + WARN_ON(rcu_seq_state(sp->srcu_gp_seq)); + WARN_ON(sp->srcu_gp_running); + WARN_ON(sp->srcu_gp_waiting); + WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)); +} +EXPORT_SYMBOL_GPL(cleanup_srcu_struct); + +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Must be called from process context. + * Returns an index that must be passed to the matching srcu_read_unlock(). + */ +int __srcu_read_lock(struct srcu_struct *sp) +{ + int idx; + + idx = READ_ONCE(sp->srcu_idx); + WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1); + return idx; +} +EXPORT_SYMBOL_GPL(__srcu_read_lock); + +/* + * Removes the count for the old reader from the appropriate element of + * the srcu_struct. Must be called from process context. + */ +void __srcu_read_unlock(struct srcu_struct *sp, int idx) +{ + int newval = sp->srcu_lock_nesting[idx] - 1; + + WRITE_ONCE(sp->srcu_lock_nesting[idx], newval); + if (!newval && READ_ONCE(sp->srcu_gp_waiting)) + swake_up(&sp->srcu_wq); +} +EXPORT_SYMBOL_GPL(__srcu_read_unlock); + +/* + * Workqueue handler to drive one grace period and invoke any callbacks + * that become ready as a result. Single-CPU and !PREEMPT operation + * means that we get away with murder on synchronization. ;-) + */ +void srcu_drive_gp(struct work_struct *wp) +{ + int idx; + struct rcu_cblist ready_cbs; + struct srcu_struct *sp; + struct rcu_head *rhp; + + sp = container_of(wp, struct srcu_struct, srcu_work); + if (sp->srcu_gp_running || rcu_segcblist_empty(&sp->srcu_cblist)) + return; /* Already running or nothing to do. */ + + /* Tag recently arrived callbacks and wait for readers. */ + WRITE_ONCE(sp->srcu_gp_running, true); + rcu_segcblist_accelerate(&sp->srcu_cblist, + rcu_seq_snap(&sp->srcu_gp_seq)); + rcu_seq_start(&sp->srcu_gp_seq); + idx = sp->srcu_idx; + WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); + WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ + swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); + WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ + rcu_seq_end(&sp->srcu_gp_seq); + + /* Update callback list based on GP, and invoke ready callbacks. */ + rcu_segcblist_advance(&sp->srcu_cblist, + rcu_seq_current(&sp->srcu_gp_seq)); + if (rcu_segcblist_ready_cbs(&sp->srcu_cblist)) { + rcu_cblist_init(&ready_cbs); + local_irq_disable(); + rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs); + local_irq_enable(); + rhp = rcu_cblist_dequeue(&ready_cbs); + for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { + local_bh_disable(); + rhp->func(rhp); + local_bh_enable(); + } + local_irq_disable(); + rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs); + local_irq_enable(); + } + WRITE_ONCE(sp->srcu_gp_running, false); + + /* + * If more callbacks, reschedule ourselves. This can race with + * a call_srcu() at interrupt level, but the ->srcu_gp_running + * checks will straighten that out. + */ + if (!rcu_segcblist_empty(&sp->srcu_cblist)) + schedule_work(&sp->srcu_work); +} +EXPORT_SYMBOL_GPL(srcu_drive_gp); + +/* + * Enqueue an SRCU callback on the specified srcu_struct structure, + * initiating grace-period processing if it is not already running. + */ +void call_srcu(struct srcu_struct *sp, struct rcu_head *head, + rcu_callback_t func) +{ + unsigned long flags; + + head->func = func; + local_irq_save(flags); + rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); + local_irq_restore(flags); + if (!READ_ONCE(sp->srcu_gp_running)) + schedule_work(&sp->srcu_work); +} +EXPORT_SYMBOL_GPL(call_srcu); + +/* + * synchronize_srcu - wait for prior SRCU read-side critical-section completion + */ +void synchronize_srcu(struct srcu_struct *sp) +{ + struct rcu_synchronize rs; + + init_rcu_head_on_stack(&rs.head); + init_completion(&rs.completion); + call_srcu(sp, &rs.head, wakeme_after_rcu); + wait_for_completion(&rs.completion); + destroy_rcu_head_on_stack(&rs.head); +} +EXPORT_SYMBOL_GPL(synchronize_srcu); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c new file mode 100644 index 000000000000..9ecf0acc18eb --- /dev/null +++ b/kernel/rcu/srcutree.c @@ -0,0 +1,996 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2006 + * Copyright (C) Fujitsu, 2012 + * + * Author: Paul McKenney <paulmck@us.ibm.com> + * Lai Jiangshan <laijs@cn.fujitsu.com> + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU/ *.txt + * + */ + +#include <linux/export.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/preempt.h> +#include <linux/rcupdate_wait.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/delay.h> +#include <linux/srcu.h> + +#include "rcu.h" + +static void srcu_invoke_callbacks(struct work_struct *work); +static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); + +/* + * Initialize SRCU combining tree. Note that statically allocated + * srcu_struct structures might already have srcu_read_lock() and + * srcu_read_unlock() running against them. So if the is_static parameter + * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. + */ +static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) +{ + int cpu; + int i; + int level = 0; + int levelspread[RCU_NUM_LVLS]; + struct srcu_data *sdp; + struct srcu_node *snp; + struct srcu_node *snp_first; + + /* Work out the overall tree geometry. */ + sp->level[0] = &sp->node[0]; + for (i = 1; i < rcu_num_lvls; i++) + sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1]; + rcu_init_levelspread(levelspread, num_rcu_lvl); + + /* Each pass through this loop initializes one srcu_node structure. */ + rcu_for_each_node_breadth_first(sp, snp) { + spin_lock_init(&snp->lock); + for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) + snp->srcu_have_cbs[i] = 0; + snp->grplo = -1; + snp->grphi = -1; + if (snp == &sp->node[0]) { + /* Root node, special case. */ + snp->srcu_parent = NULL; + continue; + } + + /* Non-root node. */ + if (snp == sp->level[level + 1]) + level++; + snp->srcu_parent = sp->level[level - 1] + + (snp - sp->level[level]) / + levelspread[level - 1]; + } + + /* + * Initialize the per-CPU srcu_data array, which feeds into the + * leaves of the srcu_node tree. + */ + WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != + ARRAY_SIZE(sdp->srcu_unlock_count)); + level = rcu_num_lvls - 1; + snp_first = sp->level[level]; + for_each_possible_cpu(cpu) { + sdp = per_cpu_ptr(sp->sda, cpu); + spin_lock_init(&sdp->lock); + rcu_segcblist_init(&sdp->srcu_cblist); + sdp->srcu_cblist_invoking = false; + sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; + sdp->mynode = &snp_first[cpu / levelspread[level]]; + for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { + if (snp->grplo < 0) + snp->grplo = cpu; + snp->grphi = cpu; + } + sdp->cpu = cpu; + INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); + sdp->sp = sp; + if (is_static) + continue; + + /* Dynamically allocated, better be no srcu_read_locks()! */ + for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) { + sdp->srcu_lock_count[i] = 0; + sdp->srcu_unlock_count[i] = 0; + } + } +} + +/* + * Initialize non-compile-time initialized fields, including the + * associated srcu_node and srcu_data structures. The is_static + * parameter is passed through to init_srcu_struct_nodes(), and + * also |