summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-27 14:14:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-27 14:14:30 -0700
commitbc6cb4d5bc3a44197de30784eae71d8ba28483eb (patch)
treefdd00391c6068c217eeb8a4a06afc40cc1fc6853 /arch
parented3b7923a816ded62dccef377c9ee346c7d3b1b4 (diff)
parentb33eb50a92b0a298fa8a6ac350e741c3ec100f6d (diff)
downloadlinux-bc6cb4d5bc3a44197de30784eae71d8ba28483eb.tar.gz
linux-bc6cb4d5bc3a44197de30784eae71d8ba28483eb.tar.bz2
linux-bc6cb4d5bc3a44197de30784eae71d8ba28483eb.zip
Merge tag 'locking-core-2023-06-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: - Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() The cmpxchg128() family of functions is basically & functionally the same as cmpxchg_double(), but with a saner interface. Instead of a 6-parameter horror that forced u128 - u64/u64-halves layout details on the interface and exposed users to complexity, fragility & bugs, use a natural 3-parameter interface with u128 types. - Restructure the generated atomic headers, and add kerneldoc comments for all of the generic atomic{,64,_long}_t operations. The generated definitions are much cleaner now, and come with documentation. - Implement lock_set_cmp_fn() on lockdep, for defining an ordering when taking multiple locks of the same type. This gets rid of one use of lockdep_set_novalidate_class() in the bcache code. - Fix raw_cpu_generic_try_cmpxchg() bug due to an unintended variable shadowing generating garbage code on Clang on certain ARM builds. * tag 'locking-core-2023-06-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (43 commits) locking/atomic: scripts: fix ${atomic}_dec_if_positive() kerneldoc percpu: Fix self-assignment of __old in raw_cpu_generic_try_cmpxchg() locking/atomic: treewide: delete arch_atomic_*() kerneldoc locking/atomic: docs: Add atomic operations to the driver basic API documentation locking/atomic: scripts: generate kerneldoc comments docs: scripts: kernel-doc: accept bitwise negation like ~@var locking/atomic: scripts: simplify raw_atomic*() definitions locking/atomic: scripts: simplify raw_atomic_long*() definitions locking/atomic: scripts: split pfx/name/sfx/order locking/atomic: scripts: restructure fallback ifdeffery locking/atomic: scripts: build raw_atomic_long*() directly locking/atomic: treewide: use raw_atomic*_<op>() locking/atomic: scripts: add trivial raw_atomic*_<op>() locking/atomic: scripts: factor out order template generation locking/atomic: scripts: remove leftover "${mult}" locking/atomic: scripts: remove bogus order parameter locking/atomic: xtensa: add preprocessor symbols locking/atomic: x86: add preprocessor symbols locking/atomic: sparc: add preprocessor symbols locking/atomic: sh: add preprocessor symbols ...
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/atomic.h35
-rw-r--r--arch/arc/include/asm/atomic-spinlock.h9
-rw-r--r--arch/arc/include/asm/atomic.h24
-rw-r--r--arch/arc/include/asm/atomic64-arcv2.h19
-rw-r--r--arch/arm/include/asm/assembler.h17
-rw-r--r--arch/arm/include/asm/atomic.h15
-rw-r--r--arch/arm/include/asm/sync_bitops.h29
-rw-r--r--arch/arm/lib/bitops.h14
-rw-r--r--arch/arm/lib/testchangebit.S4
-rw-r--r--arch/arm/lib/testclearbit.S4
-rw-r--r--arch/arm/lib/testsetbit.S4
-rw-r--r--arch/arm64/include/asm/atomic.h28
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h56
-rw-r--r--arch/arm64/include/asm/atomic_lse.h39
-rw-r--r--arch/arm64/include/asm/cmpxchg.h48
-rw-r--r--arch/arm64/include/asm/percpu.h30
-rw-r--r--arch/csky/include/asm/atomic.h35
-rw-r--r--arch/hexagon/include/asm/atomic.h69
-rw-r--r--arch/ia64/include/asm/atomic.h7
-rw-r--r--arch/loongarch/include/asm/atomic.h56
-rw-r--r--arch/m68k/include/asm/atomic.h18
-rw-r--r--arch/mips/include/asm/atomic.h11
-rw-r--r--arch/openrisc/include/asm/atomic.h3
-rw-r--r--arch/parisc/include/asm/atomic.h27
-rw-r--r--arch/powerpc/include/asm/atomic.h24
-rw-r--r--arch/powerpc/kernel/smp.c12
-rw-r--r--arch/riscv/include/asm/atomic.h72
-rw-r--r--arch/s390/include/asm/cmpxchg.h32
-rw-r--r--arch/s390/include/asm/cpu_mf.h2
-rw-r--r--arch/s390/include/asm/percpu.h34
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c16
-rw-r--r--arch/sh/include/asm/atomic-grb.h9
-rw-r--r--arch/sh/include/asm/atomic-irq.h9
-rw-r--r--arch/sh/include/asm/atomic-llsc.h9
-rw-r--r--arch/sh/include/asm/atomic.h3
-rw-r--r--arch/sparc/include/asm/atomic_32.h18
-rw-r--r--arch/sparc/include/asm/atomic_64.h29
-rw-r--r--arch/x86/include/asm/atomic.h87
-rw-r--r--arch/x86/include/asm/atomic64_32.h76
-rw-r--r--arch/x86/include/asm/atomic64_64.h81
-rw-r--r--arch/x86/include/asm/cmpxchg.h25
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h67
-rw-r--r--arch/x86/include/asm/percpu.h102
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/cpu/mce/core.c16
-rw-r--r--arch/x86/kernel/nmi.c2
-rw-r--r--arch/x86/kernel/pvclock.c4
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S43
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S67
-rw-r--r--arch/xtensa/include/asm/atomic.h12
53 files changed, 533 insertions, 930 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index f2861a43a61e..cbd9244571af 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -200,25 +200,6 @@ ATOMIC_OPS(xor, xor)
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-#define arch_atomic64_cmpxchg(v, old, new) \
- (arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic64_xchg(v, new) \
- (arch_xchg(&((v)->counter), new))
-
-#define arch_atomic_cmpxchg(v, old, new) \
- (arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic_xchg(v, new) \
- (arch_xchg(&((v)->counter), new))
-
-/**
- * arch_atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int c, new, old;
@@ -242,15 +223,6 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
}
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
-/**
- * arch_atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 c, new, old;
@@ -274,13 +246,6 @@ static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
-/*
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 old, tmp;
diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
index 2c830347bfb4..89d12a60f84c 100644
--- a/arch/arc/include/asm/atomic-spinlock.h
+++ b/arch/arc/include/asm/atomic-spinlock.h
@@ -81,6 +81,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
@@ -92,7 +97,11 @@ ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
#define arch_atomic_andnot arch_atomic_andnot
+
+#define arch_atomic_fetch_and arch_atomic_fetch_and
#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 52ee51e1ff7c..592d7fffc223 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -22,30 +22,6 @@
#include <asm/atomic-spinlock.h>
#endif
-#define arch_atomic_cmpxchg(v, o, n) \
-({ \
- arch_cmpxchg(&((v)->counter), (o), (n)); \
-})
-
-#ifdef arch_cmpxchg_relaxed
-#define arch_atomic_cmpxchg_relaxed(v, o, n) \
-({ \
- arch_cmpxchg_relaxed(&((v)->counter), (o), (n)); \
-})
-#endif
-
-#define arch_atomic_xchg(v, n) \
-({ \
- arch_xchg(&((v)->counter), (n)); \
-})
-
-#ifdef arch_xchg_relaxed
-#define arch_atomic_xchg_relaxed(v, n) \
-({ \
- arch_xchg_relaxed(&((v)->counter), (n)); \
-})
-#endif
-
/*
* 64-bit atomics
*/
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
index c5a8010fdc97..6b6db981967a 100644
--- a/arch/arc/include/asm/atomic64-arcv2.h
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -159,6 +159,7 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
return prev;
}
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
{
@@ -179,14 +180,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
return prev;
}
-
-/**
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
+#define arch_atomic64_xchg arch_atomic64_xchg
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
@@ -212,15 +206,6 @@ static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-/**
- * arch_atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if it was not @u.
- * Returns the old value of @v
- */
static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 old, temp;
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 505a306e0271..aebe2c8f6a68 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -394,6 +394,23 @@ ALT_UP_B(.L0_\@)
#endif
.endm
+/*
+ * Raw SMP data memory barrier
+ */
+ .macro __smp_dmb mode
+#if __LINUX_ARM_ARCH__ >= 7
+ .ifeqs "\mode","arm"
+ dmb ish
+ .else
+ W(dmb) ish
+ .endif
+#elif __LINUX_ARM_ARCH__ == 6
+ mcr p15, 0, r0, c7, c10, 5 @ dmb
+#else
+ .error "Incompatible SMP platform"
+#endif
+ .endm
+
#if defined(CONFIG_CPU_V7M)
/*
* setmode is used to assert to be in svc mode during boot. For v7-M
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index db8512d9a918..f0e3b01afa74 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -197,6 +197,16 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
return val; \
}
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
int ret;
@@ -210,8 +220,7 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
return ret;
}
-
-#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
#endif /* __LINUX_ARM_ARCH__ */
@@ -240,8 +249,6 @@ ATOMIC_OPS(xor, ^=, eor)
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
#ifndef CONFIG_GENERIC_ATOMIC64
typedef struct {
s64 counter;
diff --git a/arch/arm/include/asm/sync_bitops.h b/arch/arm/include/asm/sync_bitops.h
index 6f5d627c44a3..f46b3c570f92 100644
--- a/arch/arm/include/asm/sync_bitops.h
+++ b/arch/arm/include/asm/sync_bitops.h
@@ -14,14 +14,35 @@
* ops which are SMP safe even on a UP kernel.
*/
+/*
+ * Unordered
+ */
+
#define sync_set_bit(nr, p) _set_bit(nr, p)
#define sync_clear_bit(nr, p) _clear_bit(nr, p)
#define sync_change_bit(nr, p) _change_bit(nr, p)
-#define sync_test_and_set_bit(nr, p) _test_and_set_bit(nr, p)
-#define sync_test_and_clear_bit(nr, p) _test_and_clear_bit(nr, p)
-#define sync_test_and_change_bit(nr, p) _test_and_change_bit(nr, p)
#define sync_test_bit(nr, addr) test_bit(nr, addr)
-#define arch_sync_cmpxchg arch_cmpxchg
+/*
+ * Fully ordered
+ */
+
+int _sync_test_and_set_bit(int nr, volatile unsigned long * p);
+#define sync_test_and_set_bit(nr, p) _sync_test_and_set_bit(nr, p)
+
+int _sync_test_and_clear_bit(int nr, volatile unsigned long * p);
+#define sync_test_and_clear_bit(nr, p) _sync_test_and_clear_bit(nr, p)
+
+int _sync_test_and_change_bit(int nr, volatile unsigned long * p);
+#define sync_test_and_change_bit(nr, p) _sync_test_and_change_bit(nr, p)
+
+#define arch_sync_cmpxchg(ptr, old, new) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __smp_mb__before_atomic(); \
+ __ret = arch_cmpxchg_relaxed((ptr), (old), (new)); \
+ __smp_mb__after_atomic(); \
+ __ret; \
+})
#endif
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 95bd35991288..f069d1b2318e 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -28,7 +28,7 @@ UNWIND( .fnend )
ENDPROC(\name )
.endm
- .macro testop, name, instr, store
+ .macro __testop, name, instr, store, barrier
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
@@ -38,7 +38,7 @@ UNWIND( .fnstart )
mov r0, r0, lsr #5
add r1, r1, r0, lsl #2 @ Get word offset
mov r3, r2, lsl r3 @ create mask
- smp_dmb
+ \barrier
#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
.arch_extension mp
ALT_SMP(W(pldw) [r1])
@@ -50,13 +50,21 @@ UNWIND( .fnstart )
strex ip, r2, [r1]
cmp ip, #0
bne 1b
- smp_dmb
+ \barrier
cmp r0, #0
movne r0, #1
2: bx lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
+
+ .macro testop, name, instr, store
+ __testop \name, \instr, \store, smp_dmb
+ .endm
+
+ .macro sync_testop, name, instr, store
+ __testop \name, \instr, \store, __smp_dmb
+ .endm
#else
.macro bitop, name, instr
ENTRY( \name )
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
index 4ebecc67e6e0..f13fe9bc2399 100644
--- a/arch/arm/lib/testchangebit.S
+++ b/arch/arm/lib/testchangebit.S
@@ -10,3 +10,7 @@
.text
testop _test_and_change_bit, eor, str
+
+#if __LINUX_ARM_ARCH__ >= 6
+sync_testop _sync_test_and_change_bit, eor, str
+#endif
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
index 009afa0f5b4a..4d2c5ca620eb 100644
--- a/arch/arm/lib/testclearbit.S
+++ b/arch/arm/lib/testclearbit.S
@@ -10,3 +10,7 @@
.text
testop _test_and_clear_bit, bicne, strne
+
+#if __LINUX_ARM_ARCH__ >= 6
+sync_testop _sync_test_and_clear_bit, bicne, strne
+#endif
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
index f3192e55acc8..649dbab65d8d 100644
--- a/arch/arm/lib/testsetbit.S
+++ b/arch/arm/lib/testsetbit.S
@@ -10,3 +10,7 @@
.text
testop _test_and_set_bit, orreq, streq
+
+#if __LINUX_ARM_ARCH__ >= 6
+sync_testop _sync_test_and_set_bit, orreq, streq
+#endif
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index c9979273d389..400d279e0f8d 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -142,24 +142,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
-#define arch_atomic_xchg_relaxed(v, new) \
- arch_xchg_relaxed(&((v)->counter), (new))
-#define arch_atomic_xchg_acquire(v, new) \
- arch_xchg_acquire(&((v)->counter), (new))
-#define arch_atomic_xchg_release(v, new) \
- arch_xchg_release(&((v)->counter), (new))
-#define arch_atomic_xchg(v, new) \
- arch_xchg(&((v)->counter), (new))
-
-#define arch_atomic_cmpxchg_relaxed(v, old, new) \
- arch_cmpxchg_relaxed(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_acquire(v, old, new) \
- arch_cmpxchg_acquire(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_release(v, old, new) \
- arch_cmpxchg_release(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg(v, old, new) \
- arch_cmpxchg(&((v)->counter), (old), (new))
-
#define arch_atomic_andnot arch_atomic_andnot
/*
@@ -209,16 +191,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
-#define arch_atomic64_xchg_relaxed arch_atomic_xchg_relaxed
-#define arch_atomic64_xchg_acquire arch_atomic_xchg_acquire
-#define arch_atomic64_xchg_release arch_atomic_xchg_release
-#define arch_atomic64_xchg arch_atomic_xchg
-
-#define arch_atomic64_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
-#define arch_atomic64_cmpxchg_acquire arch_atomic_cmpxchg_acquire
-#define arch_atomic64_cmpxchg_release arch_atomic_cmpxchg_release
-#define arch_atomic64_cmpxchg arch_atomic_cmpxchg
-
#define arch_atomic64_andnot arch_atomic64_andnot
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index cbb3d961123b..89d2ba272359 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -294,38 +294,46 @@ __CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L)
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name, mb, rel, cl) \
-static __always_inline long \
-__ll_sc__cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __CMPXCHG128(name, mb, rel, cl...) \
+static __always_inline u128 \
+__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
{ \
- unsigned long tmp, ret; \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ unsigned int tmp; \
\
- asm volatile("// __cmpxchg_double" #name "\n" \
- " prfm pstl1strm, %2\n" \
- "1: ldxp %0, %1, %2\n" \
- " eor %0, %0, %3\n" \
- " eor %1, %1, %4\n" \
- " orr %1, %0, %1\n" \
- " cbnz %1, 2f\n" \
- " st" #rel "xp %w0, %5, %6, %2\n" \
- " cbnz %w0, 1b\n" \
+ asm volatile("// __cmpxchg128" #name "\n" \
+ " prfm pstl1strm, %[v]\n" \
+ "1: ldxp %[rl], %[rh], %[v]\n" \
+ " cmp %[rl], %[ol]\n" \
+ " ccmp %[rh], %[oh], 0, eq\n" \
+ " b.ne 2f\n" \
+ " st" #rel "xp %w[tmp], %[nl], %[nh], %[v]\n" \
+ " cbnz %w[tmp], 1b\n" \
" " #mb "\n" \
"2:" \
- : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \
- : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \
- : cl); \
+ : [v] "+Q" (*(u128 *)ptr), \
+ [rl] "=&r" (r.low), [rh] "=&r" (r.high), \
+ [tmp] "=&r" (tmp) \
+ : [ol] "r" (o.low), [oh] "r" (o.high), \
+ [nl] "r" (n.low), [nh] "r" (n.high) \
+ : "cc", ##cl); \
\
- return ret; \
+ return r.full; \
}
-__CMPXCHG_DBL( , , , )
-__CMPXCHG_DBL(_mb, dmb ish, l, "memory")
+__CMPXCHG128( , , )
+__CMPXCHG128(_mb, dmb ish, l, "memory")
+
+#undef __CMPXCHG128
-#undef __CMPXCHG_DBL
#undef K
#endif /* __ASM_ATOMIC_LL_SC_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 319958b95cfd..87f568a94e55 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -281,40 +281,35 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory")
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name, mb, cl...) \
-static __always_inline long \
-__lse__cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+#define __CMPXCHG128(name, mb, cl...) \
+static __always_inline u128 \
+__lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
{ \
- unsigned long oldval1 = old1; \
- unsigned long oldval2 = old2; \
- register unsigned long x0 asm ("x0") = old1; \
- register unsigned long x1 asm ("x1") = old2; \
- register unsigned long x2 asm ("x2") = new1; \
- register unsigned long x3 asm ("x3") = new2; \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ register unsigned long x0 asm ("x0") = o.low; \
+ register unsigned long x1 asm ("x1") = o.high; \
+ register unsigned long x2 asm ("x2") = n.low; \
+ register unsigned long x3 asm ("x3") = n.high; \
register unsigned long x4 asm ("x4") = (unsigned long)ptr; \
\
asm volatile( \
__LSE_PREAMBLE \
" casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
- " eor %[old1], %[old1], %[oldval1]\n" \
- " eor %[old2], %[old2], %[oldval2]\n" \
- " orr %[old1], %[old1], %[old2]" \
: [old1] "+&r" (x0), [old2] "+&r" (x1), \
- [v] "+Q" (*(__uint128_t *)ptr) \
+ [v] "+Q" (*(u128 *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
- [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
+ [oldval1] "r" (o.low), [oldval2] "r" (o.high) \
: cl); \
\
- return x0; \
+ r.low = x0; r.high = x1; \
+ \
+ return r.full; \
}
-__CMPXCHG_DBL( , )
-__CMPXCHG_DBL(_mb, al, "memory")
+__CMPXCHG128( , )
+__CMPXCHG128(_mb, al, "memory")
-#undef __CMPXCHG_DBL
+#undef __CMPXCHG128
#endif /* __ASM_ATOMIC_LSE_H */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index c6bc5d8ec3ca..d7a540736741 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -130,21 +130,18 @@ __CMPXCHG_CASE(mb_, 64)
#undef __CMPXCHG_CASE