From 01768b42dc97a67b4fb33a2535c49fc1969880df Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:11:53 +0100 Subject: locking: Move the mutex code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-1ditvncg30dgbpvrz2bxfmke@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/Makefile | 9 + kernel/locking/mutex-debug.c | 110 +++++ kernel/locking/mutex-debug.h | 55 +++ kernel/locking/mutex.c | 960 +++++++++++++++++++++++++++++++++++++++++++ kernel/locking/mutex.h | 48 +++ 5 files changed, 1182 insertions(+) create mode 100644 kernel/locking/Makefile create mode 100644 kernel/locking/mutex-debug.c create mode 100644 kernel/locking/mutex-debug.h create mode 100644 kernel/locking/mutex.c create mode 100644 kernel/locking/mutex.h (limited to 'kernel/locking') diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile new file mode 100644 index 000000000000..fe8bd58b22f8 --- /dev/null +++ b/kernel/locking/Makefile @@ -0,0 +1,9 @@ + +obj-y += mutex.o + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_mutex-debug.o = -pg +CFLAGS_REMOVE_rtmutex-debug.o = -pg +endif + +obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c new file mode 100644 index 000000000000..7e3443fe1f48 --- /dev/null +++ b/kernel/locking/mutex-debug.c @@ -0,0 +1,110 @@ +/* + * kernel/mutex-debug.c + * + * Debugging code for mutexes + * + * Started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * lock debugging, locking tree, deadlock detection started by: + * + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey + * Released under the General Public License (GPL). + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mutex-debug.h" + +/* + * Must be called with lock->wait_lock held. + */ +void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) +{ + memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); + waiter->magic = waiter; + INIT_LIST_HEAD(&waiter->list); +} + +void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) +{ + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); + DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); +} + +void debug_mutex_free_waiter(struct mutex_waiter *waiter) +{ + DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list)); + memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter)); +} + +void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct thread_info *ti) +{ + SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + + /* Mark the current thread as blocked on the lock: */ + ti->task->blocked_on = waiter; +} + +void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct thread_info *ti) +{ + DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); + DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); + DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter); + ti->task->blocked_on = NULL; + + list_del_init(&waiter->list); + waiter->task = NULL; +} + +void debug_mutex_unlock(struct mutex *lock) +{ + if (unlikely(!debug_locks)) + return; + + DEBUG_LOCKS_WARN_ON(lock->magic != lock); + DEBUG_LOCKS_WARN_ON(lock->owner != current); + DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); + mutex_clear_owner(lock); +} + +void debug_mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif + lock->magic = lock; +} + +/*** + * mutex_destroy - mark a mutex unusable + * @lock: the mutex to be destroyed + * + * This function marks the mutex uninitialized, and any subsequent + * use of the mutex is forbidden. The mutex must not be locked when + * this function is called. + */ +void mutex_destroy(struct mutex *lock) +{ + DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock)); + lock->magic = NULL; +} + +EXPORT_SYMBOL_GPL(mutex_destroy); diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h new file mode 100644 index 000000000000..0799fd3e4cfa --- /dev/null +++ b/kernel/locking/mutex-debug.h @@ -0,0 +1,55 @@ +/* + * Mutexes: blocking mutual exclusion locks + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * This file contains mutex debugging related internal declarations, + * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case. + * More details are in kernel/mutex-debug.c. + */ + +/* + * This must be called with lock->wait_lock held. + */ +extern void debug_mutex_lock_common(struct mutex *lock, + struct mutex_waiter *waiter); +extern void debug_mutex_wake_waiter(struct mutex *lock, + struct mutex_waiter *waiter); +extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); +extern void debug_mutex_add_waiter(struct mutex *lock, + struct mutex_waiter *waiter, + struct thread_info *ti); +extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct thread_info *ti); +extern void debug_mutex_unlock(struct mutex *lock); +extern void debug_mutex_init(struct mutex *lock, const char *name, + struct lock_class_key *key); + +static inline void mutex_set_owner(struct mutex *lock) +{ + lock->owner = current; +} + +static inline void mutex_clear_owner(struct mutex *lock) +{ + lock->owner = NULL; +} + +#define spin_lock_mutex(lock, flags) \ + do { \ + struct mutex *l = container_of(lock, struct mutex, wait_lock); \ + \ + DEBUG_LOCKS_WARN_ON(in_interrupt()); \ + local_irq_save(flags); \ + arch_spin_lock(&(lock)->rlock.raw_lock);\ + DEBUG_LOCKS_WARN_ON(l->magic != l); \ + } while (0) + +#define spin_unlock_mutex(lock, flags) \ + do { \ + arch_spin_unlock(&(lock)->rlock.raw_lock); \ + local_irq_restore(flags); \ + preempt_check_resched(); \ + } while (0) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c new file mode 100644 index 000000000000..d24105b1b794 --- /dev/null +++ b/kernel/locking/mutex.c @@ -0,0 +1,960 @@ +/* + * kernel/mutex.c + * + * Mutexes: blocking mutual exclusion locks + * + * Started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and + * David Howells for suggestions and improvements. + * + * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline + * from the -rt tree, where it was originally implemented for rtmutexes + * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale + * and Sven Dietrich. + * + * Also see Documentation/mutex-design.txt. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * In the DEBUG case we are using the "NULL fastpath" for mutexes, + * which forces all calls into the slowpath: + */ +#ifdef CONFIG_DEBUG_MUTEXES +# include "mutex-debug.h" +# include +#else +# include "mutex.h" +# include +#endif + +/* + * A negative mutex count indicates that waiters are sleeping waiting for the + * mutex. + */ +#define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0) + +void +__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) +{ + atomic_set(&lock->count, 1); + spin_lock_init(&lock->wait_lock); + INIT_LIST_HEAD(&lock->wait_list); + mutex_clear_owner(lock); +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER + lock->spin_mlock = NULL; +#endif + + debug_mutex_init(lock, name, key); +} + +EXPORT_SYMBOL(__mutex_init); + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +/* + * We split the mutex lock/unlock logic into separate fastpath and + * slowpath functions, to reduce the register pressure on the fastpath. + * We also put the fastpath first in the kernel image, to make sure the + * branch is predicted by the CPU as default-untaken. + */ +static __used noinline void __sched +__mutex_lock_slowpath(atomic_t *lock_count); + +/** + * mutex_lock - acquire the mutex + * @lock: the mutex to be acquired + * + * Lock the mutex exclusively for this task. If the mutex is not + * available right now, it will sleep until it can get it. + * + * The mutex must later on be released by the same task that + * acquired it. Recursive locking is not allowed. The task + * may not exit without first unlocking the mutex. Also, kernel + * memory where the mutex resides mutex must not be freed with + * the mutex still locked. The mutex must first be initialized + * (or statically defined) before it can be locked. memset()-ing + * the mutex to 0 is not allowed. + * + * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging + * checks that will enforce the restrictions and will also do + * deadlock debugging. ) + * + * This function is similar to (but not equivalent to) down(). + */ +void __sched mutex_lock(struct mutex *lock) +{ + might_sleep(); + /* + * The locking fastpath is the 1->0 transition from + * 'unlocked' into 'locked' state. + */ + __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); + mutex_set_owner(lock); +} + +EXPORT_SYMBOL(mutex_lock); +#endif + +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER +/* + * In order to avoid a stampede of mutex spinners from acquiring the mutex + * more or less simultaneously, the spinners need to acquire a MCS lock + * first before spinning on the owner field. + * + * We don't inline mspin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ +struct mspin_node { + struct mspin_node *next ; + int locked; /* 1 if lock acquired */ +}; +#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) + +static noinline +void mspin_lock(struct mspin_node **lock, struct mspin_node *node) +{ + struct mspin_node *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* Lock acquired */ + node->locked = 1; + return; + } + ACCESS_ONCE(prev->next) = node; + smp_wmb(); + /* Wait until the lock holder passes the lock down */ + while (!ACCESS_ONCE(node->locked)) + arch_mutex_cpu_relax(); +} + +static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) +{ + struct mspin_node *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (cmpxchg(lock, node, NULL) == node) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + ACCESS_ONCE(next->locked) = 1; + smp_wmb(); +} + +/* + * Mutex spinning code migrated from kernel/sched/core.c + */ + +static inline bool owner_running(struct mutex *lock, struct task_struct *owner) +{ + if (lock->owner != owner) + return false; + + /* + * Ensure we emit the owner->on_cpu, dereference _after_ checking + * lock->owner still matches owner, if that fails, owner might + * point to free()d memory, if it still matches, the rcu_read_lock() + * ensures the memory stays valid. + */ + barrier(); + + return owner->on_cpu; +} + +/* + * Look out! "owner" is an entirely speculative pointer + * access and not reliable. + */ +static noinline +int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) +{ + rcu_read_lock(); + while (owner_running(lock, owner)) { + if (need_resched()) + break; + + arch_mutex_cpu_relax(); + } + rcu_read_unlock(); + + /* + * We break out the loop above on need_resched() and when the + * owner changed, which is a sign for heavy contention. Return + * success only when lock->owner is NULL. + */ + return lock->owner == NULL; +} + +/* + * Initial check for entering the mutex spinning loop + */ +static inline int mutex_can_spin_on_owner(struct mutex *lock) +{ + struct task_struct *owner; + int retval = 1; + + rcu_read_lock(); + owner = ACCESS_ONCE(lock->owner); + if (owner) + retval = owner->on_cpu; + rcu_read_unlock(); + /* + * if lock->owner is not set, the mutex owner may have just acquired + * it and not set the owner yet or the mutex has been released. + */ + return retval; +} +#endif + +static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); + +/** + * mutex_unlock - release the mutex + * @lock: the mutex to be released + * + * Unlock a mutex that has been locked by this task previously. + * + * This function must not be used in interrupt context. Unlocking + * of a not locked mutex is not allowed. + * + * This function is similar to (but not equivalent to) up(). + */ +void __sched mutex_unlock(struct mutex *lock) +{ + /* + * The unlocking fastpath is the 0->1 transition from 'locked' + * into 'unlocked' state: + */ +#ifndef CONFIG_DEBUG_MUTEXES + /* + * When debugging is enabled we must not clear the owner before time, + * the slow path will always be taken, and that clears the owner field + * after verifying that it was indeed current. + */ + mutex_clear_owner(lock); +#endif + __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); +} + +EXPORT_SYMBOL(mutex_unlock); + +/** + * ww_mutex_unlock - release the w/w mutex + * @lock: the mutex to be released + * + * Unlock a mutex that has been locked by this task previously with any of the + * ww_mutex_lock* functions (with or without an acquire context). It is + * forbidden to release the locks after releasing the acquire context. + * + * This function must not be used in interrupt context. Unlocking + * of a unlocked mutex is not allowed. + */ +void __sched ww_mutex_unlock(struct ww_mutex *lock) +{ + /* + * The unlocking fastpath is the 0->1 transition from 'locked' + * into 'unlocked' state: + */ + if (lock->ctx) { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); +#endif + if (lock->ctx->acquired > 0) + lock->ctx->acquired--; + lock->ctx = NULL; + } + +#ifndef CONFIG_DEBUG_MUTEXES + /* + * When debugging is enabled we must not clear the owner before time, + * the slow path will always be taken, and that clears the owner field + * after verifying that it was indeed current. + */ + mutex_clear_owner(&lock->base); +#endif + __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath); +} +EXPORT_SYMBOL(ww_mutex_unlock); + +static inline int __sched +__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) +{ + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); + + if (!hold_ctx) + return 0; + + if (unlikely(ctx == hold_ctx)) + return -EALREADY; + + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); + ctx->contending_lock = ww; +#endif + return -EDEADLK; + } + + return 0; +} + +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + /* + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, + * but released with a normal mutex_unlock in this call. + * + * This should never happen, always use ww_mutex_unlock. + */ + DEBUG_LOCKS_WARN_ON(ww->ctx); + + /* + * Not quite done after calling ww_acquire_done() ? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); + + if (ww_ctx->contending_lock) { + /* + * After -EDEADLK you tried to + * acquire a different ww_mutex? Bad! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); + + /* + * You called ww_mutex_lock after receiving -EDEADLK, + * but 'forgot' to unlock everything else first? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); + ww_ctx->contending_lock = NULL; + } + + /* + * Naughty, using a different class will lead to undefined behavior! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); +#endif + ww_ctx->acquired++; +} + +/* + * after acquiring lock with fastpath or when we lost out in contested + * slowpath, set ctx and wake up any waiters so they can recheck. + * + * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, + * as the fastpath and opportunistic spinning are disabled in that case. + */ +static __always_inline void +ww_mutex_set_context_fastpath(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx) +{ + unsigned long flags; + struct mutex_waiter *cur; + + ww_mutex_lock_acquired(lock, ctx); + + lock->ctx = ctx; + + /* + * The lock->ctx update should be visible on all cores before + * the atomic read is done, otherwise contended waiters might be + * missed. The contended waiters will either see ww_ctx == NULL + * and keep spinning, or it will acquire wait_lock, add itself + * to waiter list and sleep. + */ + smp_mb(); /* ^^^ */ + + /* + * Check if lock is contended, if not there is nobody to wake up + */ + if (likely(atomic_read(&lock->base.count) == 0)) + return; + + /* + * Uh oh, we raced in fastpath, wake up everyone in this case, + * so they can see the new lock->ctx. + */ + spin_lock_mutex(&lock->base.wait_lock, flags); + list_for_each_entry(cur, &lock->base.wait_list, list) { + debug_mutex_wake_waiter(&lock->base, cur); + wake_up_process(cur->task); + } + spin_unlock_mutex(&lock->base.wait_lock, flags); +} + +/* + * Lock a mutex (possibly interruptible), slowpath: + */ +static __always_inline int __sched +__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + struct lockdep_map *nest_lock, unsigned long ip, + struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) +{ + struct task_struct *task = current; + struct mutex_waiter waiter; + unsigned long flags; + int ret; + + preempt_disable(); + mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); + +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER + /* + * Optimistic spinning. + * + * We try to spin for acquisition when we find that there are no + * pending waiters and the lock owner is currently running on a + * (different) CPU. + * + * The rationale is that if the lock owner is running, it is likely to + * release the lock soon. + * + * Since this needs the lock owner, and this mutex implementation + * doesn't track the owner atomically in the lock field, we need to + * track it non-atomically. + * + * We can't do this for DEBUG_MUTEXES because that relies on wait_lock + * to serialize everything. + * + * The mutex spinners are queued up using MCS lock so that only one + * spinner can compete for the mutex. However, if mutex spinning isn't + * going to happen, there is no point in going through the lock/unlock + * overhead. + */ + if (!mutex_can_spin_on_owner(lock)) + goto slowpath; + + for (;;) { + struct task_struct *owner; + struct mspin_node node; + + if (use_ww_ctx && ww_ctx->acquired > 0) { + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base); + /* + * If ww->ctx is set the contents are undefined, only + * by acquiring wait_lock there is a guarantee that + * they are not invalid when reading. + * + * As such, when deadlock detection needs to be + * performed the optimistic spinning cannot be done. + */ + if (ACCESS_ONCE(ww->ctx)) + goto slowpath; + } + + /* + * If there's an owner, wait for it to either + * release the lock or go to sleep. + */ + mspin_lock(MLOCK(lock), &node); + owner = ACCESS_ONCE(lock->owner); + if (owner && !mutex_spin_on_owner(lock, owner)) { + mspin_unlock(MLOCK(lock), &node); + goto slowpath; + } + + if ((atomic_read(&lock->count) == 1) && + (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { + lock_acquired(&lock->dep_map, ip); + if (use_ww_ctx) { + struct ww_mutex *ww; + ww = container_of(lock, struct ww_mutex, base); + + ww_mutex_set_context_fastpath(ww, ww_ctx); + } + + mutex_set_owner(lock); + mspin_unlock(MLOCK(lock), &node); + preempt_enable(); + return 0; + } + mspin_unlock(MLOCK(lock), &node); + + /* + * When there's no owner, we might have preempted between the + * owner acquiring the lock and setting the owner field. If + * we're an RT task that will live-lock because we won't let + * the owner complete. + */ + if (!owner && (need_resched() || rt_task(task))) + goto slowpath; + + /* + * The cpu_relax() call is a compiler barrier which forces + * everything in this loop to be re-loaded. We don't need + * memory barriers as we'll eventually observe the right + * values at the cost of a few extra spins. + */ + arch_mutex_cpu_relax(); + } +slowpath: +#endif + spin_lock_mutex(&lock->wait_lock, flags); + + /* once more, can we acquire the lock? */ + if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, 0) == 1)) + goto skip_wait; + + debug_mutex_lock_common(lock, &waiter); + debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); + + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + waiter.task = task; + + lock_contended(&lock->dep_map, ip); + + for (;;) { + /* + * Lets try to take the lock again - this is needed even if + * we get here for the first time (shortly after failing to + * acquire the lock), to make sure that we get a wakeup once + * it's unlocked. Later on, if we sleep, this is the + * operation that gives us the lock. We xchg it to -1, so + * that when we release the lock, we properly wake up the + * other waiters: + */ + if (MUTEX_SHOW_NO_WAITER(lock) && + (atomic_xchg(&lock->count, -1) == 1)) + break; + + /* + * got a signal? (This code gets eliminated in the + * TASK_UNINTERRUPTIBLE case.) + */ + if (unlikely(signal_pending_state(state, task))) { + ret = -EINTR; + goto err; + } + + if (use_ww_ctx && ww_ctx->acquired > 0) { + ret = __mutex_lock_check_stamp(lock, ww_ctx); + if (ret) + goto err; + } + + __set_task_state(task, state); + + /* didn't get the lock, go to sleep: */ + spin_unlock_mutex(&lock->wait_lock, flags); + schedule_preempt_disabled(); + spin_lock_mutex(&lock->wait_lock, flags); + } + mutex_remove_waiter(lock, &waiter, current_thread_info()); + /* set it to 0 if there are no waiters left: */ + if (likely(list_empty(&lock->wait_list))) + atomic_set(&lock->count, 0); + debug_mutex_free_waiter(&waiter); + +skip_wait: + /* got the lock - cleanup and rejoice! */ + lock_acquired(&lock->dep_map, ip); + mutex_set_owner(lock); + + if (use_ww_ctx) { + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + struct mutex_waiter *cur; + + /* + * This branch gets optimized out for the common case, + * and is only important for ww_mutex_lock. + */ + ww_mutex_lock_acquired(ww, ww_ctx); + ww->ctx = ww_ctx; + + /* + * Give any possible sleeping processes the chance to wake up, + * so they can recheck if they have to back off. + */ + list_for_each_entry(cur, &lock->wait_list, list) { + debug_mutex_wake_waiter(lock, cur); + wake_up_process(cur->task); + } + } + + spin_unlock_mutex(&lock->wait_lock, flags); + preempt_enable(); + return 0; + +err: + mutex_remove_waiter(lock, &waiter, task_thread_info(task)); + spin_unlock_mutex(&lock->wait_lock, flags); + debug_mutex_free_waiter(&waiter); + mutex_release(&lock->dep_map, 1, ip); + preempt_enable(); + return ret; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void __sched +mutex_lock_nested(struct mutex *lock, unsigned int subclass) +{ + might_sleep(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, + subclass, NULL, _RET_IP_, NULL, 0); +} + +EXPORT_SYMBOL_GPL(mutex_lock_nested); + +void __sched +_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) +{ + might_sleep(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, + 0, nest, _RET_IP_, NULL, 0); +} + +EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); + +int __sched +mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) +{ + might_sleep(); + return __mutex_lock_common(lock, TASK_KILLABLE, + subclass, NULL, _RET_IP_, NULL, 0); +} +EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); + +int __sched +mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) +{ + might_sleep(); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, + subclass, NULL, _RET_IP_, NULL, 0); +} + +EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); + +static inline int +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + unsigned tmp; + + if (ctx->deadlock_inject_countdown-- == 0) { + tmp = ctx->deadlock_inject_interval; + if (tmp > UINT_MAX/4) + tmp = UINT_MAX; + else + tmp = tmp*2 + tmp + tmp/2; + + ctx->deadlock_inject_interval = tmp; + ctx->deadlock_inject_countdown = tmp; + ctx->contending_lock = lock; + + ww_mutex_unlock(lock); + + return -EDEADLK; + } +#endif + + return 0; +} + +int __sched +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, + 0, &ctx->dep_map, _RET_IP_, ctx, 1); + if (!ret && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(__ww_mutex_lock); + +int __sched +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, + 0, &ctx->dep_map, _RET_IP_, ctx, 1); + + if (!ret && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + + return ret; +} +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); + +#endif + +/* + * Release the lock, slowpath: + */ +static inline void +__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + unsigned long flags; + + spin_lock_mutex(&lock->wait_lock, flags); + mutex_release(&lock->dep_map, nested, _RET_IP_); + debug_mutex_unlock(lock); + + /* + * some architectures leave the lock unlocked in the fastpath failure + * case, others need to leave it locked. In the later case we have to + * unlock it here + */ + if (__mutex_slowpath_needs_to_unlock()) + atomic_set(&lock->count, 1); + + if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ + struct mutex_waiter *waiter = + list_entry(lock->wait_list.next, + struct mutex_waiter, list); + + debug_mutex_wake_waiter(lock, waiter); + + wake_up_process(waiter->task); + } + + spin_unlock_mutex(&lock->wait_lock, flags); +} + +/* + * Release the lock, slowpath: + */ +static __used noinline void +__mutex_unlock_slowpath(atomic_t *lock_count) +{ + __mutex_unlock_common_slowpath(lock_count, 1); +} + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +/* + * Here come the less common (and hence less performance-critical) APIs: + * mutex_lock_interruptible() and mutex_trylock(). + */ +static noinline int __sched +__mutex_lock_killable_slowpath(struct mutex *lock); + +static noinline int __sched +__mutex_lock_interruptible_slowpath(struct mutex *lock); + +/** + * mutex_lock_interruptible - acquire the mutex, interruptible + * @lock: the mutex to be acquired + * + * Lock the mutex like mutex_lock(), and return 0 if the mutex has + * been acquired or sleep until the mutex becomes available. If a + * signal arrives while waiting for the lock then this function + * returns -EINTR. + * + * This function is similar to (but not equivalent to) down_interruptible(). + */ +int __sched mutex_lock_interruptible(struct mutex *lock) +{ + int ret; + + might_sleep(); + ret = __mutex_fastpath_lock_retval(&lock->count); + if (likely(!ret)) { + mutex_set_owner(lock); + return 0; + } else + return __mutex_lock_interruptible_slowpath(lock); +} + +EXPORT_SYMBOL(mutex_lock_interruptible); + +int __sched mutex_lock_killable(struct mutex *lock) +{ + int ret; + + might_sleep(); + ret = __mutex_fastpath_lock_retval(&lock->count); + if (likely(!ret)) { + mutex_set_owner(lock); + return 0; + } else + return __mutex_lock_killable_slowpath(lock); +} +EXPORT_SYMBOL(mutex_lock_killable); + +static __used noinline void __sched +__mutex_lock_slowpath(atomic_t *lock_count) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, + NULL, _RET_IP_, NULL, 0); +} + +static noinline int __sched +__mutex_lock_killable_slowpath(struct mutex *lock) +{ + return __mutex_lock_common(lock, TASK_KILLABLE, 0, + NULL, _RET_IP_, NULL, 0); +} + +static noinline int __sched +__mutex_lock_interruptible_slowpath(struct mutex *lock) +{ + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, + NULL, _RET_IP_, NULL, 0); +} + +static noinline int __sched +__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0, + NULL, _RET_IP_, ctx, 1); +} + +static noinline int __sched +__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx) +{ + return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0, + NULL, _RET_IP_, ctx, 1); +} + +#endif + +/* + * Spinlock based trylock, we take the spinlock and check whether we + * can get the lock: + */ +static inline int __mutex_trylock_slowpath(atomic_t *lock_count) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + unsigned long flags; + int prev; + + spin_lock_mutex(&lock->wait_lock, flags); + + prev = atomic_xchg(&lock->count, -1); + if (likely(prev == 1)) { + mutex_set_owner(lock); + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); + } + + /* Set it back to 0 if there are no waiters: */ + if (likely(list_empty(&lock->wait_list))) + atomic_set(&lock->count, 0); + + spin_unlock_mutex(&lock->wait_lock, flags); + + return prev == 1; +} + +/** + * mutex_trylock - try to acquire the mutex, without waiting + * @lock: the mutex to be acquired + * + * Try to acquire the mutex atomically. Returns 1 if the mutex + * has been acquired successfully, and 0 on contention. + * + * NOTE: this function follows the spin_trylock() convention, so + * it is negated from the down_trylock() return values! Be careful + * about this when converting semaphore users to mutexes. + * + * This function must not be used in interrupt context. The + * mutex must be released by the same task that acquired it. + */ +int __sched mutex_trylock(struct mutex *lock) +{ + int ret; + + ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); + if (ret) + mutex_set_owner(lock); + + return ret; +} +EXPORT_SYMBOL(mutex_trylock); + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +int __sched +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + + ret = __mutex_fastpath_lock_retval(&lock->base.count); + + if (likely(!ret)) { + ww_mutex_set_context_fastpath(lock, ctx); + mutex_set_owner(&lock->base); + } else + ret = __ww_mutex_lock_slowpath(lock, ctx); + return ret; +} +EXPORT_SYMBOL(__ww_mutex_lock); + +int __sched +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; + + might_sleep(); + + ret = __mutex_fastpath_lock_retval(&lock->base.count); + + if (likely(!ret)) { + ww_mutex_set_context_fastpath(lock, ctx); + mutex_set_owner(&lock->base); + } else + ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx); + return ret; +} +EXPORT_SYMBOL(__ww_mutex_lock_interruptible); + +#endif + +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h new file mode 100644 index 000000000000..4115fbf83b12 --- /dev/null +++ b/kernel/locking/mutex.h @@ -0,0 +1,48 @@ +/* + * Mutexes: blocking mutual exclusion locks + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * This file contains mutex debugging related internal prototypes, for the + * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: + */ + +#define spin_lock_mutex(lock, flags) \ + do { spin_lock(lock); (void)(flags); } while (0) +#define spin_unlock_mutex(lock, flags) \ + do { spin_unlock(lock); (void)(flags); } while (0) +#define mutex_remove_waiter(lock, waiter, ti) \ + __list_del((waiter)->list.prev, (waiter)->list.next) + +#ifdef CONFIG_SMP +static inline void mutex_set_owner(struct mutex *lock) +{ + lock->owner = current; +} + +static inline void mutex_clear_owner(struct mutex *lock) +{ + lock->owner = NULL; +} +#else +static inline void mutex_set_owner(struct mutex *lock) +{ +} + +static inline void mutex_clear_owner(struct mutex *lock) +{ +} +#endif + +#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) +#define debug_mutex_free_waiter(waiter) do { } while (0) +#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) +#define debug_mutex_unlock(lock) do { } while (0) +#define debug_mutex_init(lock, name, key) do { } while (0) + +static inline void +debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) +{ +} -- cgit v1.2.3 From 8eddac3f103736163f49255bcb109edadea167f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 Oct 2013 18:14:17 +0100 Subject: locking: Move the lockdep code to kernel/locking/ Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wl7s3tta5isufzfguc23et06@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/Makefile | 6 + kernel/locking/lockdep.c | 4257 ++++++++++++++++++++++++++++++++++++ kernel/locking/lockdep_internals.h | 170 ++ kernel/locking/lockdep_proc.c | 683 ++++++ kernel/locking/lockdep_states.h | 9 + 5 files changed, 5125 insertions(+) create mode 100644 kernel/locking/lockdep.c create mode 100644 kernel/locking/lockdep_internals.h create mode 100644 kernel/locking/lockdep_proc.c create mode 100644 kernel/locking/lockdep_states.h (limited to 'kernel/locking') diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index fe8bd58b22f8..c103599fc1ba 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -2,8 +2,14 @@ obj-y += mutex.o ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_lockdep.o = -pg +CFLAGS_REMOVE_lockdep_proc.o = -pg CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg endif obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o +obj-$(CONFIG_LOCKDEP) += lockdep.o +ifeq ($(CONFIG_PROC_FS),y) +obj-$(CONFIG_LOCKDEP) += lockdep_proc.o +endif diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c new file mode 100644 index 000000000000..4e8e14c34e42 --- /dev/null +++ b/kernel/locking/lockdep.c @@ -0,0 +1,4257 @@ +/* + * kernel/lockdep.c + * + * Runtime locking correctness validator + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * this code maps all the lock dependencies as they occur in a live kernel + * and will warn about the following classes of locking bugs: + * + * - lock inversion scenarios + * - circular lock dependencies + * - hardirq/softirq safe/unsafe locking bugs + * + * Bugs are reported even if the current locking scenario does not cause + * any deadlock at this point. + * + * I.e. if anytime in the past two locks were taken in a different order, + * even if it happened for another task, even if those were different + * locks (but of the same class as this lock), this code will detect it. + * + * Thanks to Arjan van de Ven for coming up with the initial idea of + * mapping lock dependencies runtime. + */ +#define DISABLE_BRANCH_PROFILING +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "lockdep_internals.h" + +#define CREATE_TRACE_POINTS +#include + +#ifdef CONFIG_PROVE_LOCKING +int prove_locking = 1; +module_param(prove_locking, int, 0644); +#else +#define prove_locking 0 +#endif + +#ifdef CONFIG_LOCK_STAT +int lock_stat = 1; +module_param(lock_stat, int, 0644); +#else +#define lock_stat 0 +#endif + +/* + * lockdep_lock: protects the lockdep graph, the hashes and the + * class/list/hash allocators. + * + * This is one of the rare exceptions where it's justified + * to use a raw spinlock - we really dont want the spinlock + * code to recurse back into the lockdep code... + */ +static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; + +static int graph_lock(void) +{ + arch_spin_lock(&lockdep_lock); + /* + * Make sure that if another CPU detected a bug while + * walking the graph we dont change it (while the other + * CPU is busy printing out stuff with the graph lock + * dropped already) + */ + if (!debug_locks) { + arch_spin_unlock(&lockdep_lock); + return 0; + } + /* prevent any recursions within lockdep from causing deadlocks */ + current->lockdep_recursion++; + return 1; +} + +static inline int graph_unlock(void) +{ + if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) { + /* + * The lockdep graph lock isn't locked while we expect it to + * be, we're confused now, bye! + */ + return DEBUG_LOCKS_WARN_ON(1); + } + + current->lockdep_recursion--; + arch_spin_unlock(&lockdep_lock); + return 0; +} + +/* + * Turn lock debugging off and return with 0 if it was off already, + * and also release the graph lock: + */ +static inline int debug_locks_off_graph_unlock(void) +{ + int ret = debug_locks_off(); + + arch_spin_unlock(&lockdep_lock); + + return ret; +} + +static int lockdep_initialized; + +unsigned long nr_list_entries; +static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; + +/* + * All data structures here are protected by the global debug_lock. + * + * Mutex key structs only get allocated, once during bootup, and never + * get freed - this significantly simplifies the debugging code. + */ +unsigned long nr_lock_classes; +static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; + +static inline struct lock_class *hlock_class(struct held_lock *hlock) +{ + if (!hlock->class_idx) { + /* + * Someone passed in garbage, we give up. + */ + DEBUG_LOCKS_WARN_ON(1); + return NULL; + } + return lock_classes + hlock->class_idx - 1; +} + +#ifdef CONFIG_LOCK_STAT +static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], + cpu_lock_stats); + +static inline u64 lockstat_clock(void) +{ + return local_clock(); +} + +static int lock_point(unsigned long points[], unsigned long ip) +{ + int i; + + for (i = 0; i < LOCKSTAT_POINTS; i++) { + if (points[i] == 0) { + points[i] = ip; + break; + } + if (points[i] == ip) + break; + } + + return i; +} + +static void lock_time_inc(struct lock_time *lt, u64 time) +{ + if (time > lt->max) + lt->max = time; + + if (time < lt->min || !lt->nr) + lt->min = time; + + lt->total += time; + lt->nr++; +} + +static inline void lock_time_add(struct lock_time *src, struct lock_time *dst) +{ + if (!src->nr) + return; + + if (src->max > dst->max) + dst->max = src->max; + + if (src->min < dst->min || !dst->nr) + dst->min = src->min; + + dst->total += src->total; + dst->nr += src->nr; +} + +struct lock_class_stats lock_stats(struct lock_class *class) +{ + struct lock_class_stats stats; + int cpu, i; + + memset(&stats, 0, sizeof(struct lock_class_stats)); + for_each_possible_cpu(cpu) { + struct lock_class_stats *pcs = + &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; + + for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) + stats.contention_point[i] += pcs->contention_point[i]; + + for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) + stats.contending_point[i] += pcs->contending_point[i]; + + lock_time_add(&pcs->read_waittime, &stats.read_waittime); + lock_time_add(&pcs->write_waittime, &stats.write_waittime); + + lock_time_add(&pcs->read_holdtime, &stats.read_holdtime); + lock_time_add(&pcs->write_holdtime, &stats.write_holdtime); + + for (i = 0; i < ARRAY_SIZE(stats.bounces); i++) + stats.bounces[i] += pcs->bounces[i]; + } + + return stats; +} + +void clear_lock_stats(struct lock_class *class) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct lock_class_stats *cpu_stats = + &per_cpu(cpu_lock_stats, cpu)[class - lock_classes]; + + memset(cpu_stats, 0, sizeof(struct lock_class_stats)); + } + memset(class->contention_point, 0, sizeof(class->contention_point)); + memset(class->contending_point, 0, sizeof(class->contending_point)); +} + +static struct lock_class_stats *get_lock_stats(struct lock_class *class) +{ + return &get_cpu_var(cpu_lock_stats)[class - lock_classes]; +} + +static void put_lock_stats(struct lock_class_stats *stats) +{ + put_cpu_var(cpu_lock_stats); +} + +static void lock_release_holdtime(struct held_lock *hlock) +{ + struct lock_class_stats *stats; + u64 holdtime; + + if (!lock_stat) + return; + + holdtime = lockstat_clock() - hlock->holdtime_stamp; + + stats = get_lock_stats(hlock_class(hlock)); + if (hlock->read) + lock_time_inc(&stats->read_holdtime, holdtime); + else + lock_time_inc(&stats->write_holdtime, holdtime); + put_lock_stats(stats); +} +#else +static inline void lock_release_holdtime(struct held_lock *hlock) +{ +} +#endif + +/* + * We keep a global list of all lock classes. The list only grows, + * never shrinks. The list is only accessed with the lockdep + * spinlock lock held. + */ +LIST_HEAD(all_lock_classes); + +/* + * The lockdep classes are in a hash-table as well, for fast lookup: + */ +#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) +#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) +#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS) +#define classhashentry(key) (classhash_table + __classhashfn((key))) + +static struct list_head classhash_table[CLASSHASH_SIZE]; + +/* + * We put the lock dependency chains into a hash-table as well, to cache + * their existence: + */ +#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) +#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) +#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS) +#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) + +static struct list_head chainhash_table[CHAINHASH_SIZE]; + +/* + * The hash key of the lock dependency chains is a hash itself too: + * it's a hash of all locks taken up to that lock, including that lock. + * It's a 64-bit hash, because it's important for the keys to be + * unique. + */ +#define iterate_chain_key(key1, key2) \ + (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \ + ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \ + (key2)) + +void lockdep_off(void) +{ + current->lockdep_recursion++; +} +EXPORT_SYMBOL(lockdep_off); + +void lockdep_on(void) +{ + current->lockdep_recursion--; +} +EXPORT_SYMBOL(lockdep_on); + +/* + * Debugging switches: + */ + +#define VERBOSE 0 +#define VERY_VERBOSE 0 + +#if VERBOSE +# define HARDIRQ_VERBOSE 1 +# define SOFTIRQ_VERBOSE 1 +# define RECLAIM_VERBOSE 1 +#else +# define HARDIRQ_VERBOSE 0 +# define SOFTIRQ_VERBOSE 0 +# define RECLAIM_VERBOSE 0 +#endif + +#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE +/* + * Quick filtering for interesting events: + */ +static int class_filter(struct lock_class *class) +{ +#if 0 + /* Example */ + if (class->name_version == 1 && + !strcmp(class->name, "lockname")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "&struct->lockfield")) + return 1; +#endif + /* Filter everything else. 1 would be to allow everything else */ + return 0; +} +#endif + +static int verbose(struct lock_class *class) +{ +#if VERBOSE + return class_filter(class); +#endif + return 0; +} + +/* + * Stack-trace: tightly packed array of stack backtrace + * addresses. Protected by the graph_lock. + */ +unsigned long nr_stack_trace_entries; +static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; + +static void print_lockdep_off(const char *bug_msg) +{ + printk(KERN_DEBUG "%s\n", bug_msg); + printk(KERN_DEBUG "turning off the locking correctness validator.\n"); + printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n"); +} + +static int save_trace(struct stack_trace *trace) +{ + trace->nr_entries = 0; + trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; + trace->entries = stack_trace + nr_stack_trace_entries; + + trace->skip = 3; + + save_stack_trace(trace); + + /* + * Some daft arches put -1 at the end to indicate its a full trace. + * + * this is buggy anyway, since it takes a whole extra entry so a + * complete trace that maxes out the entries provided will be reported + * as incomplete, friggin useless + */ + if (trace->nr_entries != 0 && + trace->entries[trace->nr_entries-1] == ULONG_MAX) + trace->nr_entries--; + + trace->max_entries = trace->nr_entries; + + nr_stack_trace_entries += trace->nr_entries; + + if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { + if (!debug_locks_off_graph_unlock()) + return 0; + + print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); + dump_stack(); + + return 0; + } + + return 1; +} + +unsigned int nr_hardirq_chains; +unsigned int nr_softirq_chains; +unsigned int nr_process_chains; +unsigned int max_lockdep_depth; + +#ifdef CONFIG_DEBUG_LOCKDEP +/* + * We cannot printk in early bootup code. Not even early_printk() + * might work. So we mark any initialization errors and printk + * about it later on, in lockdep_info(). + */ +static int lockdep_init_error; +static const char *lock_init_error; +static unsigned long lockdep_init_trace_data[20]; +static struct stack_trace lockdep_init_trace = { + .max_entries = ARRAY_SIZE(lockdep_init_trace_data), + .entries = lockdep_init_trace_data, +}; + +/* + * Various lockdep statistics: + */ +DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); +#endif + +/* + * Locking printouts: + */ + +#define __USAGE(__STATE) \ + [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W", \ + [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \ + [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\ + [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R", + +static const char *usage_str[] = +{ +#define LOCKDEP_STATE(__STATE) __USAGE(__STATE) +#include "lockdep_states.h" +#undef LOCKDEP_STATE + [LOCK_USED] = "INITIAL USE", +}; + +const char * __get_key_name(struct lockdep_subclass_key *key, char *str) +{ + return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); +} + +static inline unsigned long lock_flag(enum lock_usage_bit bit) +{ + return 1UL << bit; +} + +static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit) +{ + char c = '.'; + + if (class->usage_mask & lock_flag(bit + 2)) + c = '+'; + if (class->usage_mask & lock_flag(bit)) { + c = '-'; + if (class->usage_mask & lock_flag(bit + 2)) + c = '?'; + } + + return c; +} + +void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]) +{ + int i = 0; + +#define LOCKDEP_STATE(__STATE) \ + usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \ + usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ); +#include "lockdep_states.h" +#undef LOCKDEP_STATE + + usage[i] = '\0'; +} + +static void __print_lock_name(struct lock_class *class) +{ + char str[KSYM_NAME_LEN]; + const char *name; + + name = class->name; + if (!name) { + name = __get_key_name(class->key, str); + printk("%s", name); + } else { + printk("%s", name); + if (class->name_version > 1) + printk("#%d", class->name_version); + if (class->subclass) + printk("/%d", class->subclass); + } +} + +static void print_lock_name(struct lock_class *class) +{ + char usage[LOCK_USAGE_CHARS]; + + get_usage_chars(class, usage); + + printk(" ("); + __print_lock_name(class); + printk("){%s}", usage); +} + +static void print_lockdep_cache(struct lockdep_map *lock) +{ + const char *name; + char str[KSYM_NAME_LEN]; + + name = lock->name; + if (!name) + name = __get_key_name(lock->key->subkeys, str); + + printk("%s", name); +} + +static void print_lock(struct held_lock *hlock) +{ + print_lock_name(hlock_class(hlock)); + printk(", at: "); + print_ip_sym(hlock->acquire_ip); +} + +static void lockdep_print_held_locks(struct task_struct *curr) +{ + int i, depth = curr->lockdep_depth; + + if (!depth) { + printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr)); + return; + } + printk("%d lock%s held by %s/%d:\n", + depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr)); + + for (i = 0; i < depth; i++) { + printk(" #%d: ", i); + print_lock(curr->held_locks + i); + } +} + +static void print_kernel_ident(void) +{ + printk("%s %.*s %s\n", init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version, + print_tainted()); +} + +static int very_verbose(struct lock_class *class) +{ +#if VERY_VERBOSE + return class_filter(class); +#endif + return 0; +} + +/* + * Is this the address of a static object: + */ +static int static_obj(void *obj) +{ + unsigned long start = (unsigned long) &_stext, + end = (unsigned long) &_end, + addr = (unsigned long) obj; + + /* + * static variable? + */ + if ((addr >= start) && (addr < end)) + return 1; + + if (arch_is_kernel_data(addr)) + return 1; + + /* + * in-kernel percpu var? + */ + if (is_kernel_percpu_address(addr)) + return 1; + + /* + * module static or percpu var? + */ + return is_module_address(addr) || is_module_percpu_address(addr); +} + +/* + * To make lock name printouts unique, we calculate a unique + * class->name_version generation counter: + */ +static int count_matching_names(struct lock_class *new_class) +{ + struct lock_class *class; + int count = 0; + + if (!new_class->name) + return 0; + + list_for_each_entry(class, &all_lock_classes, lock_entry) { + if (new_class->key - new_class->subclass == class->key) + return class->name_version; + if (class->name && !strcmp(class->name, new_class->name)) + count = max(count, class->name_version); + } + + return count + 1; +} + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * If the architecture calls into lockdep before initializing + * the hashes then we'll warn about it later. (we cannot printk + * right now) + */ + if (unlikely(!lockdep_initialized)) { + lockdep_init(); + lockdep_init_error = 1; + lock_init_error = lock->name; + save_stack_trace(&lockdep_init_trace); + } +#endif + + if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { + debug_locks_off(); + printk(KERN_ERR + "BUG: looking up invalid subclass: %u\n", subclass); + printk(KERN_ERR + "turning off the locking correctness validator.\n"); + dump_stack(); + return NULL; + } + + /* + * Static locks do not have their class-keys yet - for them the key + * is the lock object itself: + */ + if (unlikely(!lock->key)) + lock->key = (void *)lock; + + /* + * NOTE: the class-key must be unique. For dynamic locks, a static + * lock_class_key variable is passed in through the mutex_init() + * (or spin_lock_init()) call - which acts as the key. For static + * locks we use the lock object itself as the key. + */ + BUILD_BUG_ON(sizeof(struct lock_class_key) > + sizeof(struct lockdep_map)); + + key = lock->key->subkeys + subclass; + + hash_head = classhashentry(key); + + /* + * We can walk the hash lockfree, because the hash only + * grows, and we are careful when adding entries to the end: + */ + list_for_each_entry(class, hash_head, hash_entry) { + if (class->key == key) { + /* + * Huh! same key, different name? Did someone trample + * on some memory? We're most confused. + */ + WARN_ON_ONCE(class->name != lock->name); + return class; + } + } + + return NULL; +} + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + unsigned long flags; + + class = look_up_lock_class(lock, subclass); + if (likely(class)) + goto out_set_class_cache; + + /* + * Debug-check: all keys must be persistent! + */ + if (!static_obj(lock->key)) { + debug_locks_off(); + printk("INFO: trying to register non-static key.\n"); + printk("the code is fine but needs lockdep annotation.\n"); + printk("turning off the locking correctness validator.\n"); + dump_stack(); + + return NULL; + } + + key = lock->key->subkeys + subclass; + hash_head = classhashentry(key); + + raw_local_irq_save(flags); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } + /* + * We have to do the hash-walk again, to avoid races + * with another CPU: + */ + list_for_each_entry(class, hash_head, hash_entry) + if (class->key == key) + goto out_unlock_set; + /* + * Allocate a new key from the static array, and add it to + * the hash: + */ + if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { + if (!debug_locks_off_graph_unlock()) { + raw_local_irq_restore(flags); + return NULL; + } + raw_local_irq_restore(flags); + + print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); + dump_stack(); + return NULL; + } + class = lock_classes + nr_lock_classes++; + debug_atomic_inc(nr_unused_locks); + class->key = key; + class->name = lock->name; + class->subclass = subclass; + INIT_LIST_HEAD(&class->lock_entry); + INIT_LIST_HEAD(&class->locks_before); + INIT_LIST_HEAD(&class->locks_after); + class->name_version = count_matching_names(class); + /* + * We use RCU's safe list-add method to make + * parallel walking of the hash-list safe: + */ + list_add_tail_rcu(&class->hash_entry, hash_head); + /* + * Add it to the global list of classes: + */ + list_add_tail_rcu(&class->lock_entry, &all_lock_classes); + + if (verbose(class)) { + graph_unlock(); + raw_local_irq_restore(flags); + + printk("\nnew class %p: %s", class->key, class->name); + if (class->name_version > 1) + printk("#%d", class->name_version); + printk("\n"); + dump_stack(); + + raw_local_irq_save(flags); + if (!graph_lock()) { + raw_local_irq_restore(flags); + return NULL; + } + } +out_unlock_set: + graph_unlock(); + raw_local_irq_restore(flags); + +out_set_class_cache: + if (!subclass || force) + lock->class_cache[0] = class; + else if (subclass < NR_LOCKDEP_CACHING_CLASSES) + lock->class_cache[subclass] = class; + + /* + * Hash collision, did we smoke some? We found a class with a matching + * hash but the subclass -- which is hashed in -- didn't match. + */ + if (DEBUG_LOCKS_WARN_ON(class->subclass != subclass)) + return NULL; + + return class; +} + +#ifdef CONFIG_PROVE_LOCKING +/* + * Allocate a lockdep entry. (assumes the graph_lock held, returns + * with NULL on failure) + */ +static struct lock_list *alloc_list_entry(void) +{ + if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { + if (!debug_locks_off_graph_unlock()) + return NULL; + + print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!"); + dump_stack(); + return NULL; + } + return list_entries + nr_list_entries++; +} + +/* + * Add a new dependency to the head of the list: + */ +static int add_lock_to_list(struct lock_class *class, struct lock_class *this, + struct list_head *head, unsigned long ip, + int distance, struct stack_trace *trace) +{ + struct lock_list *entry; + /* + * Lock not present yet - get a new dependency struct and + * add it to the list: + */ + entry = alloc_list_entry(); + if (!entry) + return 0; + + entry->class = this; + entry->distance = distance; + entry->trace = *trace; + /* + * Since we never remove from the dependency list, the list can + * be walked lockless by other CPUs, it's only allocation + * that must be protected by the spinlock. But this also means + * we must make new entries visible only once writes to the + * entry become visible - hence the RCU op: + */ + list_add_tail_rcu(&entry->entry, head); + + return 1; +} + +/* + * For good efficiency of modular, we use power of 2 + */ +#define MAX_CIRCULAR_QUEUE_SIZE 4096UL +#define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1) + +/* + * The circular_queue and helpers is used to implement the + * breadth-first search(BFS)algorithem, by which we can build + * the shortest path from the next lock to be acquired to the + * previous held lock if there is a circular between them. + */ +struct circular_queue { + unsigned long element[MAX_CIRCULAR_QUEUE_SIZE]; + unsigned int front, rear; +}; + +static struct circular_queue lock_cq; + +unsigned int max_bfs_queue_depth; + +static unsigned int lockdep_dependency_gen_id; + +static inline void __cq_init(struct circular_queue *cq) +{ + cq->front = cq->rear = 0; + lockdep_dependency_gen_id++; +} + +static inline int __cq_empty(struct circular_queue *cq) +{ + return (cq->front == cq->rear); +} + +static inline int __cq_full(struct circular_queue *cq) +{ + return ((cq->rear + 1) & CQ_MASK) == cq->front; +} + +static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) +{ + if (__cq_full(cq)) + return -1; + + cq->element[cq->rear] = elem; + cq->rear = (cq->rear + 1) & CQ_MASK; + return 0; +} + +static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) +{ + if (__cq_empty(cq)) + return -1; + + *elem = cq->element[cq->front]; + cq->front = (cq->front + 1) & CQ_MASK; + return 0; +} + +static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) +{ + return (cq->rear - cq->front) & CQ_MASK; +} + +static inline void mark_lock_accessed(struct lock_list *lock, + struct lock_list *parent) +{ + unsigned long nr; + + nr = lock - list_entries; + WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ + lock->parent = parent; + lock->class->dep_gen_id = lockdep_dependency_gen_id; +} + +static inline unsigned long lock_accessed(struct lock_list *lock) +{ + unsigned long nr; + + nr = lock - list_entries; + WARN_ON(nr >= nr_list_entries); /* Out-of-bounds, input fail */ + return lock->class->dep_gen_id == lockdep_dependency_gen_id; +} + +static inline struct lock_list *get_lock_parent(struct lock_list *child) +{ + return child->parent; +} + +static inline int get_lock_depth(struct lock_list *child) +{ + int depth = 0; + struct lock_list *parent; + + while ((parent = get_lock_parent(child))) { + child = parent; + depth++; + } + return depth; +} + +static int __bfs(struct lock_list *source_entry, + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry, + int forward) +{ + struct lock_list *entry; + struct list_head *head; + struct circular_queue *cq = &lock_cq; + int ret = 1; + + if (match(source_entry, data)) { + *target_entry = source_entry; + ret = 0; + goto exit; + } + + if (forward) + head = &source_entry->class->locks_after; + else + head = &source_entry->class->locks_before; + + if (list_empty(head)) + goto exit; + + __cq_init(cq); + __cq_enqueue(cq, (unsigned long)source_entry); + + while (!__cq_empty(cq)) { + struct lock_list *lock; + + __cq_dequeue(cq, (unsigned long *)&lock); + + if (!lock->class) { + ret = -2; + goto exit; + } + + if (forward) + head = &lock->class->locks_after; + else + head = &lock->class->locks_before; + + list_for_each_entry(entry, head, entry) { + if (!lock_accessed(entry)) { + unsigned int cq_depth; + mark_lock_accessed(entry, lock); + if (match(entry, data)) { + *target_entry = entry; + ret = 0; + goto exit; + } + + if (__cq_enqueue(cq, (unsigned long)entry)) { + ret = -1; + goto exit; + } + cq_depth = __cq_get_elem_count(cq); + if (max_bfs_queue_depth < cq_depth) + max_bfs_queue_depth = cq_depth; + } + } + } +exit: + return ret; +} + +static inline int __bfs_forwards(struct lock_list *src_entry, + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry) +{ + return __bfs(src_entry, data, match, target_entry, 1); + +} + +static inline int __bfs_backwards(struct lock_list *src_entry, + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry) +{ + return __bfs(src_entry, data, match, target_entry, 0); + +} + +/* + * Recursive, forwards-direction lock-dependency checking, used for + * both noncyclic checking and for hardirq-unsafe/softirq-unsafe + * checking. + */ + +/* + * Print a dependency chain entry (this is only done when a deadlock + * has been detected): + */ +static noinline int +print_circular_bug_entry(struct lock_list *target, int depth) +{ + if (debug_locks_silent) + return 0; + printk("\n-> #%u", depth); + print_lock_name(target->class); + printk(":\n"); + print_stack_trace(&target->trace, 6); + + return 0; +} + +static void +print_circular_lock_scenario(struct held_lock *src, + struct held_lock *tgt, + struct lock_list *prt) +{ + struct lock_class *source = hlock_class(src); + struct lock_class *target = hlock_class(tgt); + struct lock_class *parent = prt->class; + + /* + * A direct locking problem where unsafe_class lock is taken + * directly by safe_class lock, then all we need to show + * is the deadlock scenario, as it is obvious that the + * unsafe lock is taken under the safe lock. + * + * But if there is a chain instead, where the safe lock takes + * an intermediate lock (middle_class) where this lock is + * not the same as the safe lock, then the lock chain is + * used to describe the problem. Otherwise we would need + * to show a different CPU case for each link in the chain + * from the safe_class lock to the unsafe_class lock. + */ + if (parent != source) { + printk("Chain exists of:\n "); + __print_lock_name(source); + printk(" --> "); + __print_lock_name(parent); + printk(" --> "); + __print_lock_name(target); + printk("\n\n"); + } + + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(target); + printk(");\n"); + printk(" lock("); + __print_lock_name(parent); + printk(");\n"); + printk(" lock("); + __print_lock_name(target); + printk(");\n"); + printk(" lock("); + __print_lock_name(source); + printk(");\n"); + printk("\n *** DEADLOCK ***\n\n"); +} + +/* + * When a circular dependency is detected, print the + * header first: + */ +static noinline int +print_circular_bug_header(struct lock_list *entry, unsigned int depth, + struct held_lock *check_src, + struct held_lock *check_tgt) +{ + struct task_struct *curr = current; + + if (debug_locks_silent) + return 0; + + printk("\n"); + printk("======================================================\n"); + printk("[ INFO: possible