summaryrefslogtreecommitdiff
path: root/lib/raid6/recov_neon_inner.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-10 10:17:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-10 10:17:23 -0700
commit3d8dfe75ef69f4dd4ba35c09b20a5aa58b4a5078 (patch)
treee5dd2ba86a027007610df67331304e083fe560ec /lib/raid6/recov_neon_inner.c
parentd6075262969321bcb5d795de25595fc2a141ac02 (diff)
parentb855b58ac1b7891b219e1d9ef60c45c774cadefe (diff)
downloadlinux-3d8dfe75ef69f4dd4ba35c09b20a5aa58b4a5078.tar.gz
linux-3d8dfe75ef69f4dd4ba35c09b20a5aa58b4a5078.tar.bz2
linux-3d8dfe75ef69f4dd4ba35c09b20a5aa58b4a5078.zip
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - Pseudo NMI support for arm64 using GICv3 interrupt priorities - uaccess macros clean-up (unsafe user accessors also merged but reverted, waiting for objtool support on arm64) - ptrace regsets for Pointer Authentication (ARMv8.3) key management - inX() ordering w.r.t. delay() on arm64 and riscv (acks in place by the riscv maintainers) - arm64/perf updates: PMU bindings converted to json-schema, unused variable and misleading comment removed - arm64/debug fixes to ensure checking of the triggering exception level and to avoid the propagation of the UNKNOWN FAR value into the si_code for debug signals - Workaround for Fujitsu A64FX erratum 010001 - lib/raid6 ARM NEON optimisations - NR_CPUS now defaults to 256 on arm64 - Minor clean-ups (documentation/comments, Kconfig warning, unused asm-offsets, clang warnings) - MAINTAINERS update for list information to the ARM64 ACPI entry * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (54 commits) arm64: mmu: drop paging_init comments arm64: debug: Ensure debug handlers check triggering exception level arm64: debug: Don't propagate UNKNOWN FAR into si_code for debug signals Revert "arm64: uaccess: Implement unsafe accessors" arm64: avoid clang warning about self-assignment arm64: Kconfig.platforms: fix warning unmet direct dependencies lib/raid6: arm: optimize away a mask operation in NEON recovery routine lib/raid6: use vdupq_n_u8 to avoid endianness warnings arm64: io: Hook up __io_par() for inX() ordering riscv: io: Update __io_[p]ar() macros to take an argument asm-generic/io: Pass result of I/O accessor to __io_[p]ar() arm64: Add workaround for Fujitsu A64FX erratum 010001 arm64: Rename get_thread_info() arm64: Remove documentation about TIF_USEDFPU arm64: irqflags: Fix clang build warnings arm64: Enable the support of pseudo-NMIs arm64: Skip irqflags tracing for NMI in IRQs disabled context arm64: Skip preemption when exiting an NMI arm64: Handle serror in NMI context irqchip/gic-v3: Allow interrupts to be set as pseudo-NMI ...
Diffstat (limited to 'lib/raid6/recov_neon_inner.c')
-rw-r--r--lib/raid6/recov_neon_inner.c19
1 files changed, 8 insertions, 11 deletions
diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c
index 8cd20c9f834a..f13c07f82297 100644
--- a/lib/raid6/recov_neon_inner.c
+++ b/lib/raid6/recov_neon_inner.c
@@ -10,11 +10,6 @@
#include <arm_neon.h>
-static const uint8x16_t x0f = {
- 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
- 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-};
-
#ifdef CONFIG_ARM
/*
* AArch32 does not provide this intrinsic natively because it does not
@@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
uint8x16_t pm1 = vld1q_u8(pbmul + 16);
uint8x16_t qm0 = vld1q_u8(qmul);
uint8x16_t qm1 = vld1q_u8(qmul + 16);
+ uint8x16_t x0f = vdupq_n_u8(0x0f);
/*
* while ( bytes-- ) {
@@ -60,14 +56,14 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
px = veorq_u8(vld1q_u8(p), vld1q_u8(dp));
vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
- vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
+ vy = vshrq_n_u8(vx, 4);
vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
- vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
+ vy = vqtbl1q_u8(qm1, vy);
qx = veorq_u8(vx, vy);
- vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4);
+ vy = vshrq_n_u8(px, 4);
vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f));
- vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f));
+ vy = vqtbl1q_u8(pm1, vy);
vx = veorq_u8(vx, vy);
db = veorq_u8(vx, qx);
@@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
{
uint8x16_t qm0 = vld1q_u8(qmul);
uint8x16_t qm1 = vld1q_u8(qmul + 16);
+ uint8x16_t x0f = vdupq_n_u8(0x0f);
/*
* while (bytes--) {
@@ -100,9 +97,9 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
- vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
+ vy = vshrq_n_u8(vx, 4);
vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
- vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
+ vy = vqtbl1q_u8(qm1, vy);
vx = veorq_u8(vx, vy);
vy = veorq_u8(vx, vld1q_u8(p));