summaryrefslogtreecommitdiff
path: root/arch/x86/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/atomic64_386_32.S86
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S16
-rw-r--r--arch/x86/lib/checksum_32.S27
-rw-r--r--arch/x86/lib/clear_page_64.S6
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S4
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S4
-rw-r--r--arch/x86/lib/copy_mc_64.S18
-rw-r--r--arch/x86/lib/copy_page_64.S4
-rw-r--r--arch/x86/lib/copy_user_64.S42
-rw-r--r--arch/x86/lib/csum-copy_64.S2
-rw-r--r--arch/x86/lib/csum-partial_64.c183
-rw-r--r--arch/x86/lib/error-inject.c3
-rw-r--r--arch/x86/lib/getuser.S22
-rw-r--r--arch/x86/lib/hweight.S6
-rw-r--r--arch/x86/lib/insn-eval.c71
-rw-r--r--arch/x86/lib/iomap_copy_64.S2
-rw-r--r--arch/x86/lib/memcpy_32.c4
-rw-r--r--arch/x86/lib/memcpy_64.S12
-rw-r--r--arch/x86/lib/memmove_64.S4
-rw-r--r--arch/x86/lib/memset_64.S6
-rw-r--r--arch/x86/lib/mmx_32.c388
-rw-r--r--arch/x86/lib/msr-reg.S4
-rw-r--r--arch/x86/lib/putuser.S6
-rw-r--r--arch/x86/lib/retpoline.S4
-rw-r--r--arch/x86/lib/usercopy_32.c67
-rw-r--r--arch/x86/lib/usercopy_64.c8
27 files changed, 295 insertions, 705 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index c6506c6a7092..f76747862bd2 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -63,7 +63,6 @@ ifeq ($(CONFIG_X86_32),y)
ifneq ($(CONFIG_X86_CMPXCHG64),y)
lib-y += cmpxchg8b_emu.o atomic64_386_32.o
endif
- lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
else
obj-y += iomap_copy_64.o
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 16bc9130e7a5..e768815e58ae 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -9,81 +9,83 @@
#include <asm/alternative.h>
/* if you want SMP support, implement these with real spinlocks */
-.macro LOCK reg
+.macro IRQ_SAVE reg
pushfl
cli
.endm
-.macro UNLOCK reg
+.macro IRQ_RESTORE reg
popfl
.endm
-#define BEGIN(op) \
+#define BEGIN_IRQ_SAVE(op) \
.macro endp; \
SYM_FUNC_END(atomic64_##op##_386); \
.purgem endp; \
.endm; \
SYM_FUNC_START(atomic64_##op##_386); \
- LOCK v;
+ IRQ_SAVE v;
#define ENDP endp
-#define RET \
- UNLOCK v; \
- ret
-
-#define RET_ENDP \
- RET; \
- ENDP
+#define RET_IRQ_RESTORE \
+ IRQ_RESTORE v; \
+ RET
#define v %ecx
-BEGIN(read)
+BEGIN_IRQ_SAVE(read)
movl (v), %eax
movl 4(v), %edx
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(set)
+BEGIN_IRQ_SAVE(set)
movl %ebx, (v)
movl %ecx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(xchg)
+BEGIN_IRQ_SAVE(xchg)
movl (v), %eax
movl 4(v), %edx
movl %ebx, (v)
movl %ecx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %ecx
-BEGIN(add)
+BEGIN_IRQ_SAVE(add)
addl %eax, (v)
adcl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %ecx
-BEGIN(add_return)
+BEGIN_IRQ_SAVE(add_return)
addl (v), %eax
adcl 4(v), %edx
movl %eax, (v)
movl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %ecx
-BEGIN(sub)
+BEGIN_IRQ_SAVE(sub)
subl %eax, (v)
sbbl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %ecx
-BEGIN(sub_return)
+BEGIN_IRQ_SAVE(sub_return)
negl %edx
negl %eax
sbbl $0, %edx
@@ -91,47 +93,52 @@ BEGIN(sub_return)
adcl 4(v), %edx
movl %eax, (v)
movl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(inc)
+BEGIN_IRQ_SAVE(inc)
addl $1, (v)
adcl $0, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(inc_return)
+BEGIN_IRQ_SAVE(inc_return)
movl (v), %eax
movl 4(v), %edx
addl $1, %eax
adcl $0, %edx
movl %eax, (v)
movl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(dec)
+BEGIN_IRQ_SAVE(dec)
subl $1, (v)
sbbl $0, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(dec_return)
+BEGIN_IRQ_SAVE(dec_return)
movl (v), %eax
movl 4(v), %edx
subl $1, %eax
sbbl $0, %edx
movl %eax, (v)
movl %edx, 4(v)
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
#define v %esi
-BEGIN(add_unless)
+BEGIN_IRQ_SAVE(add_unless)
addl %eax, %ecx
adcl %edx, %edi
addl (v), %eax
@@ -143,7 +150,7 @@ BEGIN(add_unless)
movl %edx, 4(v)
movl $1, %eax
2:
- RET
+ RET_IRQ_RESTORE
3:
cmpl %edx, %edi
jne 1b
@@ -153,7 +160,7 @@ ENDP
#undef v
#define v %esi
-BEGIN(inc_not_zero)
+BEGIN_IRQ_SAVE(inc_not_zero)
movl (v), %eax
movl 4(v), %edx
testl %eax, %eax
@@ -165,7 +172,7 @@ BEGIN(inc_not_zero)
movl %edx, 4(v)
movl $1, %eax
2:
- RET
+ RET_IRQ_RESTORE
3:
testl %edx, %edx
jne 1b
@@ -174,7 +181,7 @@ ENDP
#undef v
#define v %esi
-BEGIN(dec_if_positive)
+BEGIN_IRQ_SAVE(dec_if_positive)
movl (v), %eax
movl 4(v), %edx
subl $1, %eax
@@ -183,5 +190,6 @@ BEGIN(dec_if_positive)
movl %eax, (v)
movl %edx, 4(v)
1:
-RET_ENDP
+ RET_IRQ_RESTORE
+ENDP
#undef v
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index ce6935690766..90afb488b396 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -18,7 +18,7 @@
SYM_FUNC_START(atomic64_read_cx8)
read64 %ecx
- ret
+ RET
SYM_FUNC_END(atomic64_read_cx8)
SYM_FUNC_START(atomic64_set_cx8)
@@ -28,7 +28,7 @@ SYM_FUNC_START(atomic64_set_cx8)
cmpxchg8b (%esi)
jne 1b
- ret
+ RET
SYM_FUNC_END(atomic64_set_cx8)
SYM_FUNC_START(atomic64_xchg_cx8)
@@ -37,7 +37,7 @@ SYM_FUNC_START(atomic64_xchg_cx8)
cmpxchg8b (%esi)
jne 1b
- ret
+ RET
SYM_FUNC_END(atomic64_xchg_cx8)
.macro addsub_return func ins insc
@@ -68,7 +68,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8)
popl %esi
popl %ebx
popl %ebp
- ret
+ RET
SYM_FUNC_END(atomic64_\func\()_return_cx8)
.endm
@@ -93,7 +93,7 @@ SYM_FUNC_START(atomic64_\func\()_return_cx8)
movl %ebx, %eax
movl %ecx, %edx
popl %ebx
- ret
+ RET
SYM_FUNC_END(atomic64_\func\()_return_cx8)
.endm
@@ -118,7 +118,7 @@ SYM_FUNC_START(atomic64_dec_if_positive_cx8)
movl %ebx, %eax
movl %ecx, %edx
popl %ebx
- ret
+ RET
SYM_FUNC_END(atomic64_dec_if_positive_cx8)
SYM_FUNC_START(atomic64_add_unless_cx8)
@@ -149,7 +149,7 @@ SYM_FUNC_START(atomic64_add_unless_cx8)
addl $8, %esp
popl %ebx
popl %ebp
- ret
+ RET
4:
cmpl %edx, 4(%esp)
jne 2b
@@ -176,5 +176,5 @@ SYM_FUNC_START(atomic64_inc_not_zero_cx8)
movl $1, %eax
3:
popl %ebx
- ret
+ RET
SYM_FUNC_END(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4304320e51f4..23318c338db0 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -127,7 +127,7 @@ SYM_FUNC_START(csum_partial)
8:
popl %ebx
popl %esi
- ret
+ RET
SYM_FUNC_END(csum_partial)
#else
@@ -245,7 +245,7 @@ SYM_FUNC_START(csum_partial)
90:
popl %ebx
popl %esi
- ret
+ RET
SYM_FUNC_END(csum_partial)
#endif
@@ -260,9 +260,9 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
* Copy from ds while checksumming, otherwise like csum_partial
*/
-#define EXC(y...) \
- 9999: y; \
- _ASM_EXTABLE_UA(9999b, 6001f)
+#define EXC(y...) \
+ 9999: y; \
+ _ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX)
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
@@ -358,20 +358,11 @@ EXC( movb %cl, (%edi) )
adcl $0, %eax
7:
-# Exception handler:
-.section .fixup, "ax"
-
-6001:
- xorl %eax, %eax
- jmp 7b
-
-.previous
-
popl %ebx
popl %esi
popl %edi
popl %ecx # equivalent to addl $4,%esp
- ret
+ RET
SYM_FUNC_END(csum_partial_copy_generic)
#else
@@ -439,15 +430,11 @@ EXC( movb %dl, (%edi) )
6: addl %edx, %eax
adcl $0, %eax
7:
-.section .fixup, "ax"
-6001: xorl %eax, %eax
- jmp 7b
-.previous
popl %esi
popl %edi
popl %ebx
- ret
+ RET
SYM_FUNC_END(csum_partial_copy_generic)
#undef ROUND
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index c4c7dd115953..fe59b8ac4fcc 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -17,7 +17,7 @@ SYM_FUNC_START(clear_page_rep)
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
- ret
+ RET
SYM_FUNC_END(clear_page_rep)
EXPORT_SYMBOL_GPL(clear_page_rep)
@@ -39,7 +39,7 @@ SYM_FUNC_START(clear_page_orig)
leaq 64(%rdi),%rdi
jnz .Lloop
nop
- ret
+ RET
SYM_FUNC_END(clear_page_orig)
EXPORT_SYMBOL_GPL(clear_page_orig)
@@ -47,6 +47,6 @@ SYM_FUNC_START(clear_page_erms)
movl $4096,%ecx
xorl %eax,%eax
rep stosb
- ret
+ RET
SYM_FUNC_END(clear_page_erms)
EXPORT_SYMBOL_GPL(clear_page_erms)
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 3542502faa3b..33c70c0160ea 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -37,11 +37,11 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
popfq
mov $1, %al
- ret
+ RET
.Lnot_same:
popfq
xor %al,%al
- ret
+ RET
SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index ca01ed6029f4..6a912d58fecc 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -32,7 +32,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
movl %ecx, 4(%esi)
popfl
- ret
+ RET
.Lnot_same:
movl (%esi), %eax
@@ -40,7 +40,7 @@ SYM_FUNC_START(cmpxchg8b_emu)
movl 4(%esi), %edx
popfl
- ret
+ RET
SYM_FUNC_END(cmpxchg8b_emu)
EXPORT_SYMBOL(cmpxchg8b_emu)
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
index 7334055157ba..c859a8a09860 100644
--- a/arch/x86/lib/copy_mc_64.S
+++ b/arch/x86/lib/copy_mc_64.S
@@ -77,10 +77,8 @@ SYM_FUNC_START(copy_mc_fragile)
.L_done_memcpy_trap:
xorl %eax, %eax
.L_done:
- ret
-SYM_FUNC_END(copy_mc_fragile)
+ RET
- .section .fixup, "ax"
/*
* Return number of bytes not copied for any failure. Note that
* there is no "tail" handling since the source buffer is 8-byte
@@ -105,14 +103,14 @@ SYM_FUNC_END(copy_mc_fragile)
movl %ecx, %edx
jmp copy_mc_fragile_handle_tail
- .previous
-
_ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+
+SYM_FUNC_END(copy_mc_fragile)
#endif /* CONFIG_X86_MCE */
/*
@@ -132,10 +130,8 @@ SYM_FUNC_START(copy_mc_enhanced_fast_string)
rep movsb
/* Copy successful. Return zero */
xorl %eax, %eax
- ret
-SYM_FUNC_END(copy_mc_enhanced_fast_string)
+ RET
- .section .fixup, "ax"
.E_copy:
/*
* On fault %rcx is updated such that the copy instruction could
@@ -145,9 +141,9 @@ SYM_FUNC_END(copy_mc_enhanced_fast_string)
* user-copy routines.
*/
movq %rcx, %rax
- ret
-
- .previous
+ RET
_ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT_MCE_SAFE)
+
+SYM_FUNC_END(copy_mc_enhanced_fast_string)
#endif /* !CONFIG_UML */
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index db4b4f9197c7..30ea644bf446 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -17,7 +17,7 @@ SYM_FUNC_START(copy_page)
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
- ret
+ RET
SYM_FUNC_END(copy_page)
EXPORT_SYMBOL(copy_page)
@@ -85,5 +85,5 @@ SYM_FUNC_START_LOCAL(copy_page_regs)
movq (%rsp), %rbx
movq 1*8(%rsp), %r12
addq $2*8, %rsp
- ret
+ RET
SYM_FUNC_END(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index a2cbeae4b180..8ca5ecf16dc4 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -32,14 +32,10 @@
decl %ecx
jnz 100b
102:
- .section .fixup,"ax"
-103: addl %ecx,%edx /* ecx is zerorest also */
- jmp .Lcopy_user_handle_tail
- .previous
- _ASM_EXTABLE_CPY(100b, 103b)
- _ASM_EXTABLE_CPY(101b, 103b)
- .endm
+ _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align)
+ _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align)
+.endm
/*
* copy_user_generic_unrolled - memory copy with exception handling.
@@ -105,9 +101,8 @@ SYM_FUNC_START(copy_user_generic_unrolled)
jnz 21b
23: xor %eax,%eax
ASM_CLAC
- ret
+ RET
- .section .fixup,"ax"
30: shll $6,%ecx
addl %ecx,%edx
jmp 60f
@@ -115,7 +110,6 @@ SYM_FUNC_START(copy_user_generic_unrolled)
jmp 60f
50: movl %ecx,%edx
60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
- .previous
_ASM_EXTABLE_CPY(1b, 30b)
_ASM_EXTABLE_CPY(2b, 30b)
@@ -166,20 +160,16 @@ SYM_FUNC_START(copy_user_generic_string)
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
-1: rep
- movsq
+1: rep movsq
2: movl %edx,%ecx
-3: rep
- movsb
+3: rep movsb
xorl %eax,%eax
ASM_CLAC
- ret
+ RET
- .section .fixup,"ax"
11: leal (%rdx,%rcx,8),%ecx
12: movl %ecx,%edx /* ecx is zerorest also */
jmp .Lcopy_user_handle_tail
- .previous
_ASM_EXTABLE_CPY(1b, 11b)
_ASM_EXTABLE_CPY(3b, 12b)
@@ -203,16 +193,13 @@ SYM_FUNC_START(copy_user_enhanced_fast_string)
/* CPUs without FSRM should avoid rep movsb for short copies */
ALTERNATIVE "cmpl $64, %edx; jb .L_copy_short_string", "", X86_FEATURE_FSRM
movl %edx,%ecx
-1: rep
- movsb
+1: rep movsb
xorl %eax,%eax
ASM_CLAC
- ret
+ RET
- .section .fixup,"ax"
12: movl %ecx,%edx /* ecx is zerorest also */
jmp .Lcopy_user_handle_tail
- .previous
_ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_enhanced_fast_string)
@@ -241,7 +228,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
- ret
+ RET
3:
movl %edx,%eax
@@ -249,6 +236,11 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
RET
_ASM_EXTABLE_CPY(1b, 2b)
+
+.Lcopy_user_handle_align:
+ addl %ecx,%edx /* ecx is zerorest also */
+ jmp .Lcopy_user_handle_tail
+
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
@@ -357,9 +349,8 @@ SYM_FUNC_START(__copy_user_nocache)
xorl %eax,%eax
ASM_CLAC
sfence
- ret
+ RET
- .section .fixup,"ax"
.L_fixup_4x8b_copy:
shll $6,%ecx
addl %ecx,%edx
@@ -375,7 +366,6 @@ SYM_FUNC_START(__copy_user_nocache)
.L_fixup_handle_tail:
sfence
jmp .Lcopy_user_handle_tail
- .previous
_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 1fbd8ee9642d..d9e16a2cf285 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -201,7 +201,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
movq 3*8(%rsp), %r13
movq 4*8(%rsp), %r15
addq $5*8, %rsp
- ret
+ RET
.Lshort:
movl %ecx, %r10d
jmp .L1
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
index e7925d668b68..1f8a8f895173 100644
--- a/arch/x86/lib/csum-partial_64.c
+++ b/arch/x86/lib/csum-partial_64.c
@@ -9,6 +9,7 @@
#include <linux/compiler.h>
#include <linux/export.h>
#include <asm/checksum.h>
+#include <asm/word-at-a-time.h>
static inline unsigned short from32to16(unsigned a)
{
@@ -21,120 +22,119 @@ static inline unsigned short from32to16(unsigned a)
}
/*
- * Do a 64-bit checksum on an arbitrary memory area.
+ * Do a checksum on an arbitrary memory area.
* Returns a 32bit checksum.
*
* This isn't as time critical as it used to be because many NICs
* do hardware checksumming these days.
- *
- * Things tried and found to not make it faster:
- * Manual Prefetching
- * Unrolling to an 128 bytes inner loop.
- * Using interleaving with more registers to break the carry chains.
+ *
+ * Still, with CHECKSUM_COMPLETE this is called to compute
+ * checksums on IPv6 headers (40 bytes) and other small parts.
+ * it's best to have buff aligned on a 64-bit boundary
*/
-static unsigned do_csum(const unsigned char *buff, unsigned len)
+__wsum csum_partial(const void *buff, int len, __wsum sum)
{
- unsigned odd, count;
- unsigned long result = 0;
+ u64 temp64 = (__force u64)sum;
+ unsigned odd, result;
- if (unlikely(len == 0))
- return result;
odd = 1 & (unsigned long) buff;
if (unlikely(odd)) {
- result = *buff << 8;
+ if (unlikely(len == 0))
+ return sum;
+ temp64 = ror32((__force u32)sum, 8);
+ temp64 += (*(unsigned char *)buff << 8);
len--;
buff++;
}
- count = len >> 1; /* nr of 16-bit words.. */
- if (count) {
- if (2 & (unsigned long) buff) {
- result += *(unsigned short *)buff;
- count--;
- len -= 2;
- buff += 2;
- }
- count >>= 1; /* nr of 32-bit words.. */
- if (count) {
- unsigned long zero;
- unsigned count64;
- if (4 & (unsigned long) buff) {
- result += *(unsigned int *) buff;
- count--;
- len -= 4;
- buff += 4;
- }
- count >>= 1; /* nr of 64-bit words.. */
- /* main loop using 64byte blocks */
- zero = 0;
- count64 = count >> 3;
- while (count64) {
- asm("addq 0*8(%[src]),%[res]\n\t"
- "adcq 1*8(%[src]),%[res]\n\t"
- "adcq 2*8(%[src]),%[res]\n\t"
- "adcq 3*8(%[src]),%[res]\n\t"
- "adcq 4*8(%[src]),%[res]\n\t"
- "adcq 5*8(%[src]),%[res]\n\t"
- "adcq 6*8(%[src]),%[res]\n\t"
- "adcq 7*8(%[src]),%[res]\n\t"
- "adcq %[zero],%[res]"
- : [res] "=r" (result)
- : [src] "r" (buff), [zero] "r" (zero),
- "[res]" (result));
- buff += 64;
- count64--;
- }
+ while (unlikely(len >= 64)) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq 1*8(%[src]),%[res]\n\t"
+ "adcq 2*8(%[src]),%[res]\n\t"
+ "adcq 3*8(%[src]),%[res]\n\t"
+ "adcq 4*8(%[src]),%[res]\n\t"
+ "adcq 5*8(%[src]),%[res]\n\t"
+ "adcq 6*8(%[src]),%[res]\n\t"
+ "adcq 7*8(%[src]),%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [src] "r" (buff)
+ : "memory");
+ buff += 64;
+ len -= 64;
+ }
+
+ if (len & 32) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq 1*8(%[src]),%[res]\n\t"
+ "adcq 2*8(%[src]),%[res]\n\t"
+ "adcq 3*8(%[src]),%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [src] "r" (buff)
+ : "memory");
+ buff += 32;
+ }
+ if (len & 16) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq 1*8(%[src]),%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [src] "r" (buff)
+ : "memory");
+ buff += 16;
+ }
+ if (len & 8) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [src] "r" (buff)
+ : "memory");
+ buff += 8;
+ }
+ if (len & 7) {
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+ unsigned int shift = (8 - (len & 7)) * 8;
+ unsigned long trail;
- /* last up to 7 8byte blocks */
- count %= 8;
- while (count) {
- asm("addq %1,%0\n\t"
- "adcq %2,%0\n"
- : "=r" (result)
- : "m" (*(unsigned long *)buff),
- "r" (zero), "0" (result));
- --count;
- buff += 8;
- }
- result = add32_with_carry(result>>32,
- result&0xffffffff);
+ trail = (load_unaligned_zeropad(buff) << shift) >> shift;
- if (len & 4) {
- result += *(unsigned int *) buff;
- buff += 4;
- }
+ asm("addq %[trail],%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [trail] "r" (trail));
+#else
+ if (len & 4) {
+ asm("addq %[val],%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [val] "r" ((u64)*(u32 *)buff)
+ : "memory");
+ buff += 4;
}
if (len & 2) {
- result += *(unsigned short *) buff;
+ asm("addq %[val],%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [val] "r" ((u64)*(u16 *)buff)
+ : "memory");
buff += 2;
}
+ if (len & 1) {
+ asm("addq %[val],%[res]\n\t"
+ "adcq $0,%[res]"
+ : [res] "+r" (temp64)
+ : [val] "r" ((u64)*(u8 *)buff)
+ : "memory");
+ }
+#endif
}
- if (len & 1)
- result += *buff;
- result = add32_with_carry(result>>32, result & 0xffffffff);
- if (unlikely(odd)) {
+ result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff);
+ if (unlikely(odd)) {
result = from32to16(result);
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
}
- return result;
-}
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 64-bit boundary
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum)
-{
- return (__force __wsum)add32_with_carry(do_csum(buff, len),
- (__force u32)sum);
+ return (__force __wsum)result;
}
EXPORT_SYMBOL(csum_partial);
@@ -147,4 +147,3 @@ __sum16 ip_compute_csum(const void *buff, int len)
return csum_fold(csum_partial(buff,len,0));
}
EXPORT_SYMBOL(ip_compute_csum);
-
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index be5b5fb1598b..520897061ee0 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/linkage.h>
#include <linux/error-injection.h>
#include <linux/kprobes.h>
@@ -10,7 +11,7 @@ asm(
".type just_return_func, @function\n"
".globl just_return_func\n"
"just_return_func:\n"
- " ret\n"
+ ASM_RET
".size just_return_func, .-just_return_func\n"
);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index fa1bc2104b32..b70d98d79a9d 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -57,7 +57,7 @@ SYM_FUNC_START(__get_user_1)
1: movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
@@ -71,7 +71,7 @@ SYM_FUNC_START(__get_user_2)
2: movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
@@ -85,7 +85,7 @@ SYM_FUNC_START(__get_user_4)
3: movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4)
@@ -100,7 +100,7 @@ SYM_FUNC_START(__get_user_8)
4: movq (%_ASM_AX),%rdx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
#else
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_DX,%_ASM_AX
@@ -112,7 +112,7 @@ SYM_FUNC_START(__get_user_8)
5: movl 4(%_ASM_AX),%ecx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
#endif
SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
@@ -124,7 +124,7 @@ SYM_FUNC_START(__get_user_nocheck_1)
6: movzbl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_nocheck_1)
EXPORT_SYMBOL(__get_user_nocheck_1)
@@ -134,7 +134,7 @@ SYM_FUNC_START(__get_user_nocheck_2)
7: movzwl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_nocheck_2)
EXPORT_SYMBOL(__get_user_nocheck_2)
@@ -144,7 +144,7 @@ SYM_FUNC_START(__get_user_nocheck_4)
8: movl (%_ASM_AX),%edx
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_nocheck_4)
EXPORT_SYMBOL(__get_user_nocheck_4)
@@ -159,7 +159,7 @@ SYM_FUNC_START(__get_user_nocheck_8)
#endif
xor %eax,%eax
ASM_CLAC
- ret
+ RET
SYM_FUNC_END(__get_user_nocheck_8)
EXPORT_SYMBOL(__get_user_nocheck_8)
@@ -169,7 +169,7 @@ SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
bad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
- ret
+ RET
SYM_CODE_END(.Lbad_get_user_clac)
#ifdef CONFIG_X86_32
@@ -179,7 +179,7 @@ bad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
- ret
+ RET
SYM_CODE_END(.Lbad_get_user_8_clac)
#endif
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index dbf8cc97b7f5..12c16c6aa44a 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -32,7 +32,7 @@ SYM_FUNC_START(__sw_hweight32)
imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
shrl $24, %eax # w = w_tmp >> 24
__ASM_SIZE(pop,) %__ASM_REG(dx)
- ret
+ RET
SYM_FUNC_END(__sw_hweight32)
EXPORT_SYMBOL(__sw_hweight32)
@@ -65,7 +65,7 @@ SYM_FUNC_START(__sw_hweight64)
popq %rdx
popq %rdi
- ret
+ RET
#else /* CONFIG_X86_32 */
/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
pushl %ecx
@@ -77,7 +77,7 @@ SYM_FUNC_START(__sw_hweight64)
addl %ecx, %eax # result