/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Bit sliced AES using NEON instructions
*
* Copyright (C) 2017 Linaro Ltd.
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
*/
/*
* The algorithm implemented here is described in detail by the paper
* 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
* Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
*
* This implementation is based primarily on the OpenSSL implementation
* for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
.fpu neon
rounds .req ip
bskey .req r4
q0l .req d0
q0h .req d1
q1l .req d2
q1h .req d3
q2l .req d4
q2h .req d5
q3l .req d6
q3h .req d7
q4l .req d8
q4h .req d9
q5l .req d10
q5h .req d11
q6l .req d12
q6h .req d13
q7l .req d14
q7h .req d15
q8l .req d16
q8h .req d17
q9l .req d18
q9h .req d19
q10l .req d20
q10h .req d21
q11l .req d22
q11h .req d23
q12l .req d24
q12h .req d25
q13l .req d26
q13h .req d27
q14l .req d28
q14h .req d29
q15l .req d30
q15h .req d31
.macro __tbl, out, tbl, in, tmp
.ifc \out, \tbl
.ifb \tmp
.error __tbl needs temp register if out == tbl
.endif
vmov \tmp, \out
.endif
vtbl.8 \out\()l, {\tbl}, \in\()l
.ifc \out, \tbl
vtbl.8 \out\()h, {\tmp}, \in\()h
.else
vtbl.8 \out\()h, {\tbl}, \in\()h
.endif
.endm
.macro __ldr, out, sym
vldr \out\()l, \sym
vldr \out\()h, \sym + 8
.endm
.macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
veor \b2, \b2, \b1
veor \b5, \b5, \b6
veor \b3, \b3, \b0
veor \b6, \b6, \b2
veor \b5, \b5, \b0
veor \b6, \b6, \b3
veor \b3, \b3, \b7
veor \b7, \b7, \b5
veor \b3, \b3, \b4
veor \b4, \b4, \b5
veor \b2, \b2, \b7
veor \b3, \b3, \b1
veor \b1, \b1, \b5
.endm
.macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
veor \b0, \b0, \b6
veor \b1, \b1, \b4
veor \b4, \b4, \b6
veor \b2, \b2, \b0
veor \b6, \b6, \b1
veor \b1, \b1, \b5
veor \b5, \b5, \b3
veor \b3, \b3, \b7
veor \b7, \b7, \b5
veor \b2, \b2, \b5
veor \b4, \b4, \b7
.endm
.macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
veor \b1, \b1, \b7
veor \b4, \b4, \b7
veor \b7, \b7, \b5
veor \b1, \b1, \b3
veor \b2, \b2, \b5
veor \b3, \b3, \b7
veor \b6, \b6, \b1
veor \b2, \b2, \b0
veor \b5, \b5, \b3
veor \b4, \b4, \b6
veor \b0, \b0, \b6
veor \b1, \b1, \b4
.endm
.macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
veor \b1, \b1, \b5
veor \b2, \b2, \b7
veor \b3, \b3, \b1
veor \b4, \b4, \b5
veor \b7, \b7, \b5
veor \b3, \b3, \b4
veor \b5, \b5, \b0
veor \b3, \b3, \b7
veor \b6, \b6, \b2
veor \b2, \b2, \b1
veor \b6, \b6, \b3
veor \b3, \b3, \b0
veor \b5, \b5, \b6
.endm
.macro mul_gf4, x0, x1, y0, y1, t0, t1
veor \t0, \y0, \y1
vand \t0, \t0, \x0
veor \x0, \x0,<