From a9d6938ddb7f892552013b93e4842fc1a538628d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Mar 2019 13:30:01 -0700 Subject: locktorture: NULL cxt.lwsa and cxt.lrsa to allow bad-arg detection Currently, lock_torture_cleanup() uses the values of cxt.lwsa and cxt.lrsa to detect bad parameters that prevented locktorture from initializing, let alone running. In this case, lock_torture_cleanup() does no cleanup aside from invoking torture_cleanup_begin() and torture_cleanup_end(), as required to permit future torture tests to run. However, this heuristic fails if the run with bad parameters was preceded by a previous run that actually ran: In this case, both cxt.lwsa and cxt.lrsa will remain non-zero, which means that the current lock_torture_cleanup() invocation will be unable to detect the fact that it should skip cleanup, which can result in charming outcomes such as double frees. This commit therefore NULLs out both cxt.lwsa and cxt.lrsa at the end of any run that actually ran. Signed-off-by: Paul E. McKenney Cc: Davidlohr Bueso Cc: Josh Triplett --- kernel/locking/locktorture.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index ad40a2617063..80a463d31a8d 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -829,7 +829,9 @@ static void lock_torture_cleanup(void) "End of test: SUCCESS"); kfree(cxt.lwsa); + cxt.lwsa = NULL; kfree(cxt.lrsa); + cxt.lrsa = NULL; end: torture_cleanup_end(); -- cgit v1.2.3 From 46ad0840b1584b92b5ff2cc3ed0b011dd6b8e0f1 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 22 Mar 2019 10:30:06 -0400 Subject: locking/rwsem: Remove arch specific rwsem files As the generic rwsem-xadd code is using the appropriate acquire and release versions of the atomic operations, the arch specific rwsem.h files will not be that much faster than the generic code as long as the atomic functions are properly implemented. So we can remove those arch specific rwsem.h and stop building asm/rwsem.h to reduce maintenance effort. Currently, only x86, alpha and ia64 have implemented architecture specific fast paths. I don't have access to alpha and ia64 systems for testing, but they are legacy systems that are not likely to be updated to the latest kernel anyway. By using a rwsem microbenchmark, the total locking rates on a 4-socket 56-core 112-thread x86-64 system before and after the patch were as follows (mixed means equal # of read and write locks): Before Patch After Patch # of Threads wlock rlock mixed wlock rlock mixed ------------ ----- ----- ----- ----- ----- ----- 1 29,201 30,143 29,458 28,615 30,172 29,201 2 6,807 13,299 1,171 7,725 15,025 1,804 4 6,504 12,755 1,520 7,127 14,286 1,345 8 6,762 13,412 764 6,826 13,652 726 16 6,693 15,408 662 6,599 15,938 626 32 6,145 15,286 496 5,549 15,487 511 64 5,812 15,495 60 5,858 15,572 60 There were some run-to-run variations for the multi-thread tests. For x86-64, using the generic C code fast path seems to be a little bit faster than the assembly version with low lock contention. Looking at the assembly version of the fast paths, there are assembly to/from C code wrappers that save and restore all the callee-clobbered registers (7 registers on x86-64). The assembly generated from the generic C code doesn't need to do that. That may explain the slight performance gain here. The generic asm rwsem.h can also be merged into kernel/locking/rwsem.h with no code change as no other code other than those under kernel/locking needs to access the internal rwsem macros and functions. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-riscv@lists.infradead.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linuxppc-dev@lists.ozlabs.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: uclinux-h8-devel@lists.sourceforge.jp Link: https://lkml.kernel.org/r/20190322143008.21313-2-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/percpu-rwsem.c | 2 + kernel/locking/rwsem.h | 130 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 883cf1b92d90..f17dad99eec8 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -7,6 +7,8 @@ #include #include +#include "rwsem.h" + int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, const char *name, struct lock_class_key *rwsem_key) { diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index bad2bca0268b..067e265fa5c1 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -32,6 +32,26 @@ # define DEBUG_RWSEMS_WARN_ON(c) #endif +/* + * R/W semaphores originally for PPC using the stuff in lib/rwsem.c. + * Adapted largely from include/asm-i386/rwsem.h + * by Paul Mackerras . + */ + +/* + * the semaphore definition + */ +#ifdef CONFIG_64BIT +# define RWSEM_ACTIVE_MASK 0xffffffffL +#else +# define RWSEM_ACTIVE_MASK 0x0000ffffL +#endif + +#define RWSEM_ACTIVE_BIAS 0x00000001L +#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER /* * All writes to owner are protected by WRITE_ONCE() to make sure that @@ -132,3 +152,113 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) { } #endif + +#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM +/* + * lock for reading + */ +static inline void __down_read(struct rw_semaphore *sem) +{ + if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) + rwsem_down_read_failed(sem); +} + +static inline int __down_read_killable(struct rw_semaphore *sem) +{ + if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { + if (IS_ERR(rwsem_down_read_failed_killable(sem))) + return -EINTR; + } + + return 0; +} + +static inline int __down_read_trylock(struct rw_semaphore *sem) +{ + long tmp; + + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, + tmp + RWSEM_ACTIVE_READ_BIAS)) { + return 1; + } + } + return 0; +} + +/* + * lock for writing + */ +static inline void __down_write(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, + &sem->count); + if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) + rwsem_down_write_failed(sem); +} + +static inline int __down_write_killable(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, + &sem->count); + if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) + if (IS_ERR(rwsem_down_write_failed_killable(sem))) + return -EINTR; + return 0; +} + +static inline int __down_write_trylock(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); + return tmp == RWSEM_UNLOCKED_VALUE; +} + +/* + * unlock after reading + */ +static inline void __up_read(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_dec_return_release(&sem->count); + if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) + rwsem_wake(sem); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct rw_semaphore *sem) +{ + if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, + &sem->count) < 0)) + rwsem_wake(sem); +} + +/* + * downgrade write lock to read lock + */ +static inline void __downgrade_write(struct rw_semaphore *sem) +{ + long tmp; + + /* + * When downgrading from exclusive to shared ownership, + * anything inside the write-locked region cannot leak + * into the read side. In contrast, anything in the + * read-locked region is ok to be re-ordered into the + * write side. As such, rely on RELEASE semantics. + */ + tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); + if (tmp < 0) + rwsem_downgrade_wake(sem); +} + +#endif /* CONFIG_RWSEM_XCHGADD_ALGORITHM */ -- cgit v1.2.3 From 390a0c62c23cb026cd4664a66f6f45fed3a215f6 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 22 Mar 2019 10:30:07 -0400 Subject: locking/rwsem: Remove rwsem-spinlock.c & use rwsem-xadd.c for all archs Currently, we have two different implementation of rwsem: 1) CONFIG_RWSEM_GENERIC_SPINLOCK (rwsem-spinlock.c) 2) CONFIG_RWSEM_XCHGADD_ALGORITHM (rwsem-xadd.c) As we are going to use a single generic implementation for rwsem-xadd.c and no architecture-specific code will be needed, there is no point in keeping two different implementations of rwsem. In most cases, the performance of rwsem-spinlock.c will be worse. It also doesn't get all the performance tuning and optimizations that had been implemented in rwsem-xadd.c over the years. For simplication, we are going to remove rwsem-spinlock.c and make all architectures use a single implementation of rwsem - rwsem-xadd.c. All references to RWSEM_GENERIC_SPINLOCK and RWSEM_XCHGADD_ALGORITHM in the code are removed. Suggested-by: Peter Zijlstra Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-riscv@lists.infradead.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linuxppc-dev@lists.ozlabs.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: uclinux-h8-devel@lists.sourceforge.jp Link: https://lkml.kernel.org/r/20190322143008.21313-3-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/Makefile | 4 +- kernel/locking/rwsem-spinlock.c | 339 ---------------------------------------- kernel/locking/rwsem.h | 3 - 3 files changed, 1 insertion(+), 345 deletions(-) delete mode 100644 kernel/locking/rwsem-spinlock.c (limited to 'kernel/locking') diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 392c7f23af76..1af83e9ce57d 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -3,7 +3,7 @@ # and is generally not a function of system call inputs. KCOV_INSTRUMENT := n -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o +obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) @@ -25,8 +25,6 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o -obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o -obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c deleted file mode 100644 index a7ffb2a96ede..000000000000 --- a/kernel/locking/rwsem-spinlock.c +++ /dev/null @@ -1,339 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* rwsem-spinlock.c: R/W semaphores: contention handling functions for - * generic spinlock implementation - * - * Copyright (c) 2001 David Howells (dhowells@redhat.com). - * - Derived partially from idea by Andrea Arcangeli - * - Derived also from comments by Linus - */ -#include -#include -#include -#include - -enum rwsem_waiter_type { - RWSEM_WAITING_FOR_WRITE, - RWSEM_WAITING_FOR_READ -}; - -struct rwsem_waiter { - struct list_head list; - struct task_struct *task; - enum rwsem_waiter_type type; -}; - -int rwsem_is_locked(struct rw_semaphore *sem) -{ - int ret = 1; - unsigned long flags; - - if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { - ret = (sem->count != 0); - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - } - return ret; -} -EXPORT_SYMBOL(rwsem_is_locked); - -/* - * initialise the semaphore - */ -void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held semaphore: - */ - debug_check_no_locks_freed((void *)sem, sizeof(*sem)); - lockdep_init_map(&sem->dep_map, name, key, 0); -#endif - sem->count = 0; - raw_spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} -EXPORT_SYMBOL(__init_rwsem); - -/* - * handle the lock release when processes blocked on it that can now run - * - if we come here, then: - * - the 'active count' _reached_ zero - * - the 'waiting count' is non-zero - * - the spinlock must be held by the caller - * - woken process blocks are discarded from the list after having task zeroed - * - writers are only woken if wakewrite is non-zero - */ -static inline struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) -{ - struct rwsem_waiter *waiter; - struct task_struct *tsk; - int woken; - - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - - if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wakewrite) - /* Wake up a writer. Note that we do not grant it the - * lock - it will have to acquire it when it runs. */ - wake_up_process(waiter->task); - goto out; - } - - /* grant an infinite number of read locks to the front of the queue */ - woken = 0; - do { - struct list_head *next = waiter->list.next; - - list_del(&waiter->list); - tsk = waiter->task; - /* - * Make sure we do not wakeup the next reader before - * setting the nil condition to grant the next reader; - * otherwise we could miss the wakeup on the other - * side and end up sleeping again. See the pairing - * in rwsem_down_read_failed(). - */ - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); - woken++; - if (next == &sem->wait_list) - break; - waiter = list_entry(next, struct rwsem_waiter, list); - } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - - sem->count += woken; - - out: - return sem; -} - -/* - * wake a single writer - */ -static inline struct rw_semaphore * -__rwsem_wake_one_writer(struct rw_semaphore *sem) -{ - struct rwsem_waiter *waiter; - - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - wake_up_process(waiter->task); - - return sem; -} - -/* - * get a read lock on the semaphore - */ -int __sched __down_read_common(struct rw_semaphore *sem, int state) -{ - struct rwsem_waiter waiter; - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->count >= 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->count++; - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - goto out; - } - - /* set up my own style of waitqueue */ - waiter.task = current; - waiter.type = RWSEM_WAITING_FOR_READ; - get_task_struct(current); - - list_add_tail(&waiter.list, &sem->wait_list); - - /* wait to be given the lock */ - for (;;) { - if (!waiter.task) - break; - if (signal_pending_state(state, current)) - goto out_nolock; - set_current_state(state); - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - schedule(); - raw_spin_lock_irqsave(&sem->wait_lock, flags); - } - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - out: - return 0; - -out_nolock: - /* - * We didn't take the lock, so that there is a writer, which - * is owner or the first waiter of the sem. If it's a waiter, - * it will be woken by current owner. Not need to wake anybody. - */ - list_del(&waiter.list); - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - return -EINTR; -} - -void __sched __down_read(struct rw_semaphore *sem) -{ - __down_read_common(sem, TASK_UNINTERRUPTIBLE); -} - -int __sched __down_read_killable(struct rw_semaphore *sem) -{ - return __down_read_common(sem, TASK_KILLABLE); -} - -/* - * trylock for reading -- returns 1 if successful, 0 if contention - */ -int __down_read_trylock(struct rw_semaphore *sem) -{ - unsigned long flags; - int ret = 0; - - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->count >= 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->count++; - ret = 1; - } - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return ret; -} - -/* - * get a write lock on the semaphore - */ -int __sched __down_write_common(struct rw_semaphore *sem, int state) -{ - struct rwsem_waiter waiter; - unsigned long flags; - int ret = 0; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - /* set up my own style of waitqueue */ - waiter.task = current; - waiter.type = RWSEM_WAITING_FOR_WRITE; - list_add_tail(&waiter.list, &sem->wait_list); - - /* wait for someone to release the lock */ - for (;;) { - /* - * That is the key to support write lock stealing: allows the - * task already on CPU to get the lock soon rather than put - * itself into sleep and waiting for system woke it or someone - * else in the head of the wait list up. - */ - if (sem->count == 0) - break; - if (signal_pending_state(state, current)) - goto out_nolock; - - set_current_state(state); - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - schedule(); - raw_spin_lock_irqsave(&sem->wait_lock, flags); - } - /* got the lock */ - sem->count = -1; - list_del(&waiter.list); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return ret; - -out_nolock: - list_del(&waiter.list); - if (!list_empty(&sem->wait_list) && sem->count >= 0) - __rwsem_do_wake(sem, 0); - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return -EINTR; -} - -void __sched __down_write(struct rw_semaphore *sem) -{ - __down_write_common(sem, TASK_UNINTERRUPTIBLE); -} - -int __sched __down_write_killable(struct rw_semaphore *sem) -{ - return __down_write_common(sem, TASK_KILLABLE); -} - -/* - * trylock for writing -- returns 1 if successful, 0 if contention - */ -int __down_write_trylock(struct rw_semaphore *sem) -{ - unsigned long flags; - int ret = 0; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->count == 0) { - /* got the lock */ - sem->count = -1; - ret = 1; - } - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return ret; -} - -/* - * release a read lock on the semaphore - */ -void __up_read(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (--sem->count == 0 && !list_empty(&sem->wait_list)) - sem = __rwsem_wake_one_writer(sem); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - -/* - * release a write lock on the semaphore - */ -void __up_write(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - sem->count = 0; - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 1); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - -/* - * downgrade a write lock into a read lock - * - just wake up any readers at the front of the queue - */ -void __downgrade_write(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - sem->count = 1; - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 0); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 067e265fa5c1..45ee00236e03 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -153,7 +153,6 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) } #endif -#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM /* * lock for reading */ @@ -260,5 +259,3 @@ static inline void __downgrade_write(struct rw_semaphore *sem) if (tmp < 0) rwsem_downgrade_wake(sem); } - -#endif /* CONFIG_RWSEM_XCHGADD_ALGORITHM */ -- cgit v1.2.3 From ddb20d1d3aed8f130519c0a29cd5392efcc067b8 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 22 Mar 2019 10:30:08 -0400 Subject: locking/rwsem: Optimize down_read_trylock() Modify __down_read_trylock() to optimize for an unlocked rwsem and make it generate slightly better code. Before this patch, down_read_trylock: 0x0000000000000000 <+0>: callq 0x5 0x0000000000000005 <+5>: jmp 0x18 0x0000000000000007 <+7>: lea 0x1(%rdx),%rcx 0x000000000000000b <+11>: mov %rdx,%rax 0x000000000000000e <+14>: lock cmpxchg %rcx,(%rdi) 0x0000000000000013 <+19>: cmp %rax,%rdx 0x0000000000000016 <+22>: je 0x23 0x0000000000000018 <+24>: mov (%rdi),%rdx 0x000000000000001b <+27>: test %rdx,%rdx 0x000000000000001e <+30>: jns 0x7 0x0000000000000020 <+32>: xor %eax,%eax 0x0000000000000022 <+34>: retq 0x0000000000000023 <+35>: mov %gs:0x0,%rax 0x000000000000002c <+44>: or $0x3,%rax 0x0000000000000030 <+48>: mov %rax,0x20(%rdi) 0x0000000000000034 <+52>: mov $0x1,%eax 0x0000000000000039 <+57>: retq After patch, down_read_trylock: 0x0000000000000000 <+0>: callq 0x5 0x0000000000000005 <+5>: xor %eax,%eax 0x0000000000000007 <+7>: lea 0x1(%rax),%rdx 0x000000000000000b <+11>: lock cmpxchg %rdx,(%rdi) 0x0000000000000010 <+16>: jne 0x29 0x0000000000000012 <+18>: mov %gs:0x0,%rax 0x000000000000001b <+27>: or $0x3,%rax 0x000000000000001f <+31>: mov %rax,0x20(%rdi) 0x0000000000000023 <+35>: mov $0x1,%eax 0x0000000000000028 <+40>: retq 0x0000000000000029 <+41>: test %rax,%rax 0x000000000000002c <+44>: jns 0x7 0x000000000000002e <+46>: xor %eax,%eax 0x0000000000000030 <+48>: retq By using a rwsem microbenchmark, the down_read_trylock() rate (with a load of 10 to lengthen the lock critical section) on a x86-64 system before and after the patch were: Before Patch After Patch # of Threads rlock rlock ------------ ----- ----- 1 14,496 14,716 2 8,644 8,453 4 6,799 6,983 8 5,664 7,190 On a ARM64 system, the performance results were: Before Patch After Patch # of Threads rlock rlock ------------ ----- ----- 1 23,676 24,488 2 7,697 9,502 4 4,945 3,440 8 2,641 1,603 For the uncontended case (1 thread), the new down_read_trylock() is a little bit faster. For the contended cases, the new down_read_trylock() perform pretty well in x86-64, but performance degrades at high contention level on ARM64. Suggested-by: Linus Torvalds Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-riscv@lists.infradead.org Cc: linux-um@lists.infradead.org Cc: linux-xtensa@linux-xtensa.org Cc: linuxppc-dev@lists.ozlabs.org Cc: nios2-dev@lists.rocketboards.org Cc: openrisc@lists.librecores.org Cc: uclinux-h8-devel@lists.sourceforge.jp Link: https://lkml.kernel.org/r/20190322143008.21313-4-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 45ee00236e03..1f5775aa6a1d 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -174,14 +174,17 @@ static inline int __down_read_killable(struct rw_semaphore *sem) static inline int __down_read_trylock(struct rw_semaphore *sem) { - long tmp; + /* + * Optimize for the case when the rwsem is not locked at all. + */ + long tmp = RWSEM_UNLOCKED_VALUE; - while ((tmp = atomic_long_read(&sem->count)) >= 0) { - if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, - tmp + RWSEM_ACTIVE_READ_BIAS)) { + do { + if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, + tmp + RWSEM_ACTIVE_READ_BIAS)) { return 1; } - } + } while (tmp >= 0); return 0; } -- cgit v1.2.3 From d1be6a28b13ce6d1bc42bf9b6a9454c65839225b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Feb 2019 12:48:44 +0000 Subject: asm-generic/mmiowb: Add generic implementation of mmiowb() tracking In preparation for removing all explicit mmiowb() calls from driver code, implement a tracking system in asm-generic based loosely on the PowerPC implementation. This allows architectures with a non-empty mmiowb() definition to have the barrier automatically inserted in spin_unlock() following a critical section containing an I/O write. Acked-by: Linus Torvalds Signed-off-by: Will Deacon --- kernel/locking/spinlock.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 936f3d14dd6b..0ff08380f531 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -22,6 +22,13 @@ #include #include +#ifdef CONFIG_MMIOWB +#ifndef arch_mmiowb_state +DEFINE_PER_CPU(struct mmiowb_state, __mmiowb_state); +EXPORT_PER_CPU_SYMBOL(__mmiowb_state); +#endif +#endif + /* * If lockdep is enabled then we use the non-preemption spin-ops * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are -- cgit v1.2.3 From 60ca1e5a200cd294a12907fa36dece4241db4ab8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Feb 2019 12:59:59 +0000 Subject: mmiowb: Hook up mmiowb helpers to spinlocks and generic I/O accessors Removing explicit calls to mmiowb() from driver code means that we must now call into the generic mmiowb_spin_{lock,unlock}() functions from the core spinlock code. In order to elide barriers following critical sections without any I/O writes, we also hook into the asm-generic I/O routines. Acked-by: Linus Torvalds Signed-off-by: Will Deacon --- kernel/locking/spinlock_debug.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 9aa0fccd5d43..399669f7eba8 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -111,6 +111,7 @@ void do_raw_spin_lock(raw_spinlock_t *lock) { debug_spin_lock_before(lock); arch_spin_lock(&lock->raw_lock); + mmiowb_spin_lock(); debug_spin_lock_after(lock); } @@ -118,8 +119,10 @@ int do_raw_spin_trylock(raw_spinlock_t *lock) { int ret = arch_spin_trylock(&lock->raw_lock); - if (ret) + if (ret) { + mmiowb_spin_lock(); debug_spin_lock_after(lock); + } #ifndef CONFIG_SMP /* * Must not happen on UP: @@ -131,6 +134,7 @@ int do_raw_spin_trylock(raw_spinlock_t *lock) void do_raw_spin_unlock(raw_spinlock_t *lock) { + mmiowb_spin_unlock(); debug_spin_unlock(lock); arch_spin_unlock(&lock->raw_lock); } -- cgit v1.2.3 From eecec78f777742903ec9167490c625661284155d Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:10 -0400 Subject: locking/rwsem: Relocate rwsem_down_read_failed() The rwsem_down_read_failed*() functions were relocated from above the optimistic spinning section to below that section. This enables the reader functions to use optimisitic spinning in future patches. There is no code change. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Will Deacon Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Link: http://lkml.kernel.org/r/20190404174320.22416-2-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 172 ++++++++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 86 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index fbe96341beee..0d518b52ade4 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -224,92 +224,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, atomic_long_add(adjustment, &sem->count); } -/* - * Wait for the read lock to be granted - */ -static inline struct rw_semaphore __sched * -__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) -{ - long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; - struct rwsem_waiter waiter; - DEFINE_WAKE_Q(wake_q); - - waiter.task = current; - waiter.type = RWSEM_WAITING_FOR_READ; - - raw_spin_lock_irq(&sem->wait_lock); - if (list_empty(&sem->wait_list)) { - /* - * In case the wait queue is empty and the lock isn't owned - * by a writer, this reader can exit the slowpath and return - * immediately as its RWSEM_ACTIVE_READ_BIAS has already - * been set in the count. - */ - if (atomic_long_read(&sem->count) >= 0) { - raw_spin_unlock_irq(&sem->wait_lock); - return sem; - } - adjustment += RWSEM_WAITING_BIAS; - } - list_add_tail(&waiter.list, &sem->wait_list); - - /* we're now waiting on the lock, but no longer actively locking */ - count = atomic_long_add_return(adjustment, &sem->count); - - /* - * If there are no active locks, wake the front queued process(es). - * - * If there are no writers and we are first in the queue, - * wake our own waiter to join the existing active readers ! - */ - if (count == RWSEM_WAITING_BIAS || - (count > RWSEM_WAITING_BIAS && - adjustment != -RWSEM_ACTIVE_READ_BIAS)) - __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); - - raw_spin_unlock_irq(&sem->wait_lock); - wake_up_q(&wake_q); - - /* wait to be given the lock */ - while (true) { - set_current_state(state); - if (!waiter.task) - break; - if (signal_pending_state(state, current)) { - raw_spin_lock_irq(&sem->wait_lock); - if (waiter.task) - goto out_nolock; - raw_spin_unlock_irq(&sem->wait_lock); - break; - } - schedule(); - } - - __set_current_state(TASK_RUNNING); - return sem; -out_nolock: - list_del(&waiter.list); - if (list_empty(&sem->wait_list)) - atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); - raw_spin_unlock_irq(&sem->wait_lock); - __set_current_state(TASK_RUNNING); - return ERR_PTR(-EINTR); -} - -__visible struct rw_semaphore * __sched -rwsem_down_read_failed(struct rw_semaphore *sem) -{ - return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE); -} -EXPORT_SYMBOL(rwsem_down_read_failed); - -__visible struct rw_semaphore * __sched -rwsem_down_read_failed_killable(struct rw_semaphore *sem) -{ - return __rwsem_down_read_failed_common(sem, TASK_KILLABLE); -} -EXPORT_SYMBOL(rwsem_down_read_failed_killable); - /* * This function must be called with the sem->wait_lock held to prevent * race conditions between checking the rwsem wait list and setting the @@ -504,6 +418,92 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) } #endif +/* + * Wait for the read lock to be granted + */ +static inline struct rw_semaphore __sched * +__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) +{ + long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; + struct rwsem_waiter waiter; + DEFINE_WAKE_Q(wake_q); + + waiter.task = current; + waiter.type = RWSEM_WAITING_FOR_READ; + + raw_spin_lock_irq(&sem->wait_lock); + if (list_empty(&sem->wait_list)) { + /* + * In case the wait queue is empty and the lock isn't owned + * by a writer, this reader can exit the slowpath and return + * immediately as its RWSEM_ACTIVE_READ_BIAS has already + * been set in the count. + */ + if (atomic_long_read(&sem->count) >= 0) { + raw_spin_unlock_irq(&sem->wait_lock); + return sem; + } + adjustment += RWSEM_WAITING_BIAS; + } + list_add_tail(&waiter.list, &sem->wait_list); + + /* we're now waiting on the lock, but no longer actively locking */ + count = atomic_long_add_return(adjustment, &sem->count); + + /* + * If there are no active locks, wake the front queued process(es). + * + * If there are no writers and we are first in the queue, + * wake our own waiter to join the existing active readers ! + */ + if (count == RWSEM_WAITING_BIAS || + (count > RWSEM_WAITING_BIAS && + adjustment != -RWSEM_ACTIVE_READ_BIAS)) + __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); + + raw_spin_unlock_irq(&sem->wait_lock); + wake_up_q(&wake_q); + + /* wait to be given the lock */ + while (true) { + set_current_state(state); + if (!waiter.task) + break; + if (signal_pending_state(state, current)) { + raw_spin_lock_irq(&sem->wait_lock); + if (waiter.task) + goto out_nolock; + raw_spin_unlock_irq(&sem->wait_lock); + break; + } + schedule(); + } + + __set_current_state(TASK_RUNNING); + return sem; +out_nolock: + list_del(&waiter.list); + if (list_empty(&sem->wait_list)) + atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); + raw_spin_unlock_irq(&sem->wait_lock); + __set_current_state(TASK_RUNNING); + return ERR_PTR(-EINTR); +} + +__visible struct rw_semaphore * __sched +rwsem_down_read_failed(struct rw_semaphore *sem) +{ + return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(rwsem_down_read_failed); + +__visible struct rw_semaphore * __sched +rwsem_down_read_failed_killable(struct rw_semaphore *sem) +{ + return __rwsem_down_read_failed_common(sem, TASK_KILLABLE); +} +EXPORT_SYMBOL(rwsem_down_read_failed_killable); + /* * Wait until we successfully acquire the write lock */ -- cgit v1.2.3 From c7580c1e84435c9ccc6c612d9fee8e71811f7be6 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:11 -0400 Subject: locking/rwsem: Move owner setting code from rwsem.c to rwsem.h Move all the owner setting code closer to the rwsem-xadd fast paths directly within rwsem.h file as well as in the slowpaths where owner setting is done after acquring the lock. This will enable us to add DEBUG_RWSEMS check in a later patch to make sure that read lock is really acquired when rwsem_down_read_failed() returns, for instance. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Link: http://lkml.kernel.org/r/20190404174320.22416-3-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 6 +++--- kernel/locking/rwsem.c | 19 ++----------------- kernel/locking/rwsem.h | 17 +++++++++++++++-- 3 files changed, 20 insertions(+), 22 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 0d518b52ade4..c213869e1aa7 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -176,9 +176,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, goto try_reader_grant; } /* - * It is not really necessary to set it to reader-owned here, - * but it gives the spinners an early indication that the - * readers now have the lock. + * Set it to reader-owned to give spinners an early + * indication that readers now have the lock. */ __rwsem_set_reader_owned(sem, waiter->task); } @@ -441,6 +440,7 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) */ if (atomic_long_read(&sem->count) >= 0) { raw_spin_unlock_irq(&sem->wait_lock); + rwsem_set_reader_owned(sem); return sem; } adjustment += RWSEM_WAITING_BIAS; diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e586f0d03ad3..59e584895532 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -24,7 +24,6 @@ void __sched down_read(struct rw_semaphore *sem) rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_read_trylock, __down_read); - rwsem_set_reader_owned(sem); } EXPORT_SYMBOL(down_read); @@ -39,7 +38,6 @@ int __sched down_read_killable(struct rw_semaphore *sem) return -EINTR; } - rwsem_set_reader_owned(sem); return 0; } @@ -52,10 +50,8 @@ int down_read_trylock(struct rw_semaphore *sem) { int ret = __down_read_trylock(sem); - if (ret == 1) { + if (ret == 1) rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); - rwsem_set_reader_owned(sem); - } return ret; } @@ -70,7 +66,6 @@ void __sched down_write(struct rw_semaphore *sem) rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_write_trylock, __down_write); - rwsem_set_owner(sem); } EXPORT_SYMBOL(down_write); @@ -88,7 +83,6 @@ int __sched down_write_killable(struct rw_semaphore *sem) return -EINTR; } - rwsem_set_owner(sem); return 0; } @@ -101,10 +95,8 @@ int down_write_trylock(struct rw_semaphore *sem) { int ret = __down_write_trylock(sem); - if (ret == 1) { + if (ret == 1) rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); - rwsem_set_owner(sem); - } return ret; } @@ -119,7 +111,6 @@ void up_read(struct rw_semaphore *sem) rwsem_release(&sem->dep_map, 1, _RET_IP_); DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); - rwsem_clear_reader_owned(sem); __up_read(sem); } @@ -133,7 +124,6 @@ void up_write(struct rw_semaphore *sem) rwsem_release(&sem->dep_map, 1, _RET_IP_); DEBUG_RWSEMS_WARN_ON(sem->owner != current); - rwsem_clear_owner(sem); __up_write(sem); } @@ -147,7 +137,6 @@ void downgrade_write(struct rw_semaphore *sem) lock_downgrade(&sem->dep_map, _RET_IP_); DEBUG_RWSEMS_WARN_ON(sem->owner != current); - rwsem_set_reader_owned(sem); __downgrade_write(sem); } @@ -161,7 +150,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_read_trylock, __down_read); - rwsem_set_reader_owned(sem); } EXPORT_SYMBOL(down_read_nested); @@ -172,7 +160,6 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); LOCK_CONTENDED(sem, __down_write_trylock, __down_write); - rwsem_set_owner(sem); } EXPORT_SYMBOL(_down_write_nest_lock); @@ -193,7 +180,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_write_trylock, __down_write); - rwsem_set_owner(sem); } EXPORT_SYMBOL(down_write_nested); @@ -208,7 +194,6 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) return -EINTR; } - rwsem_set_owner(sem); return 0; } diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 1f5775aa6a1d..ee24c4f257a5 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -160,6 +160,8 @@ static inline void __down_read(struct rw_semaphore *sem) { if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) rwsem_down_read_failed(sem); + else + rwsem_set_reader_owned(sem); } static inline int __down_read_killable(struct rw_semaphore *sem) @@ -167,8 +169,9 @@ static inline int __down_read_killable(struct rw_semaphore *sem) if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { if (IS_ERR(rwsem_down_read_failed_killable(sem))) return -EINTR; + } else { + rwsem_set_reader_owned(sem); } - return 0; } @@ -182,6 +185,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) do { if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, tmp + RWSEM_ACTIVE_READ_BIAS)) { + rwsem_set_reader_owned(sem); return 1; } } while (tmp >= 0); @@ -199,6 +203,7 @@ static inline void __down_write(struct rw_semaphore *sem) &sem->count); if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) rwsem_down_write_failed(sem); + rwsem_set_owner(sem); } static inline int __down_write_killable(struct rw_semaphore *sem) @@ -210,6 +215,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) if (IS_ERR(rwsem_down_write_failed_killable(sem))) return -EINTR; + rwsem_set_owner(sem); return 0; } @@ -219,7 +225,11 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); - return tmp == RWSEM_UNLOCKED_VALUE; + if (tmp == RWSEM_UNLOCKED_VALUE) { + rwsem_set_owner(sem); + return true; + } + return false; } /* @@ -229,6 +239,7 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; + rwsem_clear_reader_owned(sem); tmp = atomic_long_dec_return_release(&sem->count); if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) rwsem_wake(sem); @@ -239,6 +250,7 @@ static inline void __up_read(struct rw_semaphore *sem) */ static inline void __up_write(struct rw_semaphore *sem) { + rwsem_clear_owner(sem); if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, &sem->count) < 0)) rwsem_wake(sem); @@ -259,6 +271,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * write side. As such, rely on RELEASE semantics. */ tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); + rwsem_set_reader_owned(sem); if (tmp < 0) rwsem_downgrade_wake(sem); } -- cgit v1.2.3 From 12a30a7fc142a123c61da9623bd824d95d36c12e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:12 -0400 Subject: locking/rwsem: Move rwsem internal function declarations to rwsem-xadd.h We don't need to expose rwsem internal functions which are not supposed to be called directly from other kernel code. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Will Deacon Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Link: http://lkml.kernel.org/r/20190404174320.22416-4-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index ee24c4f257a5..19997c82270b 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -153,6 +153,13 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) } #endif +extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); + /* * lock for reading */ -- cgit v1.2.3 From a338ecb07a338c9a8b0ca0010e862ebe598b1551 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:13 -0400 Subject: locking/rwsem: Micro-optimize rwsem_try_read_lock_unqueued() The atomic_long_cmpxchg_acquire() in rwsem_try_read_lock_unqueued() is replaced by atomic_long_try_cmpxchg_acquire() to simpify the code and generate slightly better assembly code. There is no functional change. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Will Deacon Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Link: http://lkml.kernel.org/r/20190404174320.22416-5-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index c213869e1aa7..f6198e1a58f6 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -259,21 +259,16 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) */ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) { - long old, count = atomic_long_read(&sem->count); + long count = atomic_long_read(&sem->count); - while (true) { - if (!(count == 0 || count == RWSEM_WAITING_BIAS)) - return false; - - old = atomic_long_cmpxchg_acquire(&sem->count, count, - count + RWSEM_ACTIVE_WRITE_BIAS); - if (old == count) { + while (!count || count == RWSEM_WAITING_BIAS) { + if (atomic_long_try_cmpxchg_acquire(&sem->count, &count, + count + RWSEM_ACTIVE_WRITE_BIAS)) { rwsem_set_owner(sem); return true; } - - count = old; } + return false; } static inline bool owner_on_cpu(struct task_struct *owner) -- cgit v1.2.3 From a68e2c4c637918da47b3aa270051545cff7d8245 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:14 -0400 Subject: locking/rwsem: Add debug check for __down_read*() When rwsem_down_read_failed*() return, the read lock is acquired indirectly by others. So debug checks are added in __down_read() and __down_read_killable() to make sure the rwsem is really reader-owned. The other debug check calls in kernel/locking/rwsem.c except the one in up_read_non_owner() are also moved over to rwsem-xadd.h. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Link: http://lkml.kernel.org/r/20190404174320.22416-6-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.c | 3 --- kernel/locking/rwsem.h | 12 ++++++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 59e584895532..90de5f1780ba 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -109,7 +109,6 @@ EXPORT_SYMBOL(down_write_trylock); void up_read(struct rw_semaphore *sem) { rwsem_release(&sem->dep_map, 1, _RET_IP_); - DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); __up_read(sem); } @@ -122,7 +121,6 @@ EXPORT_SYMBOL(up_read); void up_write(struct rw_semaphore *sem) { rwsem_release(&sem->dep_map, 1, _RET_IP_); - DEBUG_RWSEMS_WARN_ON(sem->owner != current); __up_write(sem); } @@ -135,7 +133,6 @@ EXPORT_SYMBOL(up_write); void downgrade_write(struct rw_semaphore *sem) { lock_downgrade(&sem->dep_map, _RET_IP_); - DEBUG_RWSEMS_WARN_ON(sem->owner != current); __downgrade_write(sem); } diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 19997c82270b..1d8f722a6761 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -165,10 +165,13 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); */ static inline void __down_read(struct rw_semaphore *sem) { - if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) + if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { rwsem_down_read_failed(sem); - else + DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & + RWSEM_READER_OWNED)); + } else { rwsem_set_reader_owned(sem); + } } static inline int __down_read_killable(struct rw_semaphore *sem) @@ -176,6 +179,8 @@ static inline int __down_read_killable(struct rw_semaphore *sem) if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { if (IS_ERR(rwsem_down_read_failed_killable(sem))) return -EINTR; + DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & + RWSEM_READER_OWNED)); } else { rwsem_set_reader_owned(sem); } @@ -246,6 +251,7 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; + DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); rwsem_clear_reader_owned(sem); tmp = atomic_long_dec_return_release(&sem->count); if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) @@ -257,6 +263,7 @@ static inline void __up_read(struct rw_semaphore *sem) */ static inline void __up_write(struct rw_semaphore *sem) { + DEBUG_RWSEMS_WARN_ON(sem->owner != current); rwsem_clear_owner(sem); if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, &sem->count) < 0)) @@ -277,6 +284,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * read-locked region is ok to be re-ordered into the * write side. As such, rely on RELEASE semantics. */ + DEBUG_RWSEMS_WARN_ON(sem->owner != current); tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); rwsem_set_reader_owned(sem); if (tmp < 0) -- cgit v1.2.3 From 3b4ba6643d26a95e08067fca9a5da1828f9afabf Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:15 -0400 Subject: locking/rwsem: Enhance DEBUG_RWSEMS_WARN_ON() macro Currently, the DEBUG_RWSEMS_WARN_ON() macro just dumps a stack trace when the rwsem isn't in the right state. It does not show the actual states of the rwsem. This may not be that helpful in the debugging process. Enhance the DEBUG_RWSEMS_WARN_ON() macro to also show the current content of the rwsem count and owner fields to give more information about what is wrong with the rwsem. The debug_locks_off() function is called as is done inside DEBUG_LOCKS_WARN_ON(). Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Link: http://lkml.kernel.org/r/20190404174320.22416-7-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem.c | 3 ++- kernel/locking/rwsem.h | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 90de5f1780ba..ccbf18f560ff 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -198,7 +198,8 @@ EXPORT_SYMBOL(down_write_killable_nested); void up_read_non_owner(struct rw_semaphore *sem) { - DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); + DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), + sem); __up_read(sem); } diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 1d8f722a6761..3059a2dc39f8 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -27,9 +27,15 @@ #define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) #ifdef CONFIG_DEBUG_RWSEMS -# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) +# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ + if (WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ + #c, atomic_long_read(&(sem)->count), \ + (long)((sem)->owner), (long)current, \ + list_empty(&(sem)->wait_list) ? "" : "not ")) \ + debug_locks_off(); \ + } while (0) #else -# define DEBUG_RWSEMS_WARN_ON(c) +# define DEBUG_RWSEMS_WARN_ON(c, sem) #endif /* @@ -168,7 +174,7 @@ static inline void __down_read(struct rw_semaphore *sem) if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { rwsem_down_read_failed(sem); DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & - RWSEM_READER_OWNED)); + RWSEM_READER_OWNED), sem); } else { rwsem_set_reader_owned(sem); } @@ -180,7 +186,7 @@ static inline int __down_read_killable(struct rw_semaphore *sem) if (IS_ERR(rwsem_down_read_failed_killable(sem))) return -EINTR; DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & - RWSEM_READER_OWNED)); + RWSEM_READER_OWNED), sem); } else { rwsem_set_reader_owned(sem); } @@ -251,7 +257,8 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; - DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); + DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), + sem); rwsem_clear_reader_owned(sem); tmp = atomic_long_dec_return_release(&sem->count); if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) @@ -263,7 +270,7 @@ static inline void __up_read(struct rw_semaphore *sem) */ static inline void __up_write(struct rw_semaphore *sem) { - DEBUG_RWSEMS_WARN_ON(sem->owner != current); + DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); rwsem_clear_owner(sem); if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, &sem->count) < 0)) @@ -284,7 +291,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) * read-locked region is ok to be re-ordered into the * write side. As such, rely on RELEASE semantics. */ - DEBUG_RWSEMS_WARN_ON(sem->owner != current); + DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); rwsem_set_reader_owned(sem); if (tmp < 0) -- cgit v1.2.3 From ad53fa10fa9e816067bbae7109845940f5e6df50 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 4 Apr 2019 13:43:16 -0400 Subject: locking/qspinlock_stat: Introduce generic lockevent_*() counting APIs The percpu event counts used by qspinlock code can be useful for other locking code as well. So a new set of lockevent_* counting APIs is introduced with the lock event names extracted out into the new lock_events_list.h header file for easier addition in the future. The existing qstat_inc() calls are replaced by either lockevent_inc() or lockevent_cond_inc() calls. The qstat_hop() call is renamed to lockevent_pv_hop(). The "reset_counters" debugfs file is also renamed to ".reset_counts". Signed-off-by: Waiman Long Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tim Chen Cc: Will Deacon Link: http://lkml.kernel.org/r/20190404174320.22416-8-longman@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/lock_events.h | 55 ++++++++++++ kernel/locking/lock_events_list.h | 50 +++++++++++ kernel/locking/qspinlock.c | 8 +- kernel/locking/qspinlock_paravirt.h | 19 +++-- kernel/locking/qspinlock_stat.h | 163 ++++++++++++++---------------------- 5 files changed, 181 insertions(+), 114 deletions(-) create mode 100644 kernel/locking/lock_events.h create mode 100644 kernel/locking/lock_events_list.h (limited to 'kernel/locking') diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h new file mode 100644 index 000000000000..4009e07b474a --- /dev/null +++ b/kernel/locking/lock_events.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: Waiman Long + */ + +enum lock_events { + +#include "lock_events_list.h" + + lockevent_num, /* Total number of lock event counts */ + LOCKEVENT_reset_cnts = lockevent_num, +}; + +#ifdef CONFIG_QUEUED_LOCK_STAT +/* + * Per-cpu counters + */ +DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]); + +/* + * Increment the PV qspinlock statistical counters + */ +static inline void __lockevent_inc(enum lock_events event, bool cond) +{ + if (cond) + __this_cpu_inc(lockevents[event]); +} + +#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true) +#define lockevent_cond_inc(ev, c) __lockevent_inc(LOCKEVENT_ ##ev, c) + +static inline void __lockevent_add(enum lock_events event, int inc) +{ + __this_cpu_add(lockevents[event], inc); +} + +#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c) + +#else /* CONFIG_QUEUED_LOCK_STAT */ + +#define lockevent_inc(ev) +#define lockevent_add(ev, c) +#define lockevent_cond_inc(ev, c) + +#endif /* CONFIG_QUEUED_LOCK_STAT */ diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h new file mode 100644 index 000000000000..8b4d2e180475 --- /dev/null +++ b/kernel/locking/lock_events_list.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: Waiman Long + */ + +#ifndef LOCK_EVENT +#define LOCK_EVENT(name) LOCKEVENT_ ## name, +#endif + +#ifdef CONFIG_QUEUED_SPINLOCKS +#ifdef CONFIG_PARAVIRT_SPINLOCKS +/* + * Locking events for PV qspinlock. + */ +LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */ +LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */ +LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */ +LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */ +LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */ +LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */ +LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */ +LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */ +LOCK_EVENT(pv_wait_early) /* # of early vCPU wait's */ +LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */ +LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */ +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +/* + * Locking events for qspinlock + * + * Subtracting lock_use_node[234] from lock_slowpath will give you + * lock_use_node1. + */ +LOCK_EVENT(lock_pending) /* # of locking ops via pending code */ +LOCK_EVENT(lock_slowpath) /* # of locking ops via MCS lock queue */ +LOCK_EVENT(lock_use_node2) /* # of locking ops that use 2nd percpu node */ +LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */ +LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */ +LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */ +#endif /* CONFIG_QUEUED_SPINLOCKS */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 5e9247dc2515..e14b32c69639 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -395,7 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * 0,1,0 -> 0,0,1 */ clear_pending_set_locked(lock); - qstat_inc(qstat_lock_pending, true); + lockevent_inc(lock_pending); return; /* @@ -403,7 +403,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * queuing. */ queue: - qstat_inc(qstat_lock_slowpath, true); + lockevent_inc(lock_slowpath); pv_queue: node = this_cpu_ptr(&qnodes[0].mcs); idx = node->count++; @@ -419,7 +419,7 @@ pv_queue: * simple enough. */ if (unlikely(idx >= MAX_NODES)) { - qstat_inc(qstat_lock_no_node, true); + lockevent_inc(lock_no_node); while (!queued_spin_trylock(lock)) cpu_relax(); goto release; @@ -430,7 +430,7 @@ pv_queue: /* * Keep counts of non-zero index values: */ - qstat_inc(qstat_lock_use_node2 + idx - 1, idx); + lockevent_cond_inc(lock_use_node2 + idx - 1, idx); /* * Ensure that we increment the head node->count before initialising diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 8f36c27c1794..89bab079e7a4 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -89,7 +89,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) if (!(val & _Q_LOCKED_PENDING_MASK) && (cmpxchg_acquire(&lock->locked, 0, _Q