#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
#
# ====================================================================
# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
# project.
# ====================================================================
# Poly1305 hash for MIPS.
#
# May 2016
#
# Numbers are cycles per processed byte with poly1305_blocks alone.
#
# IALU/gcc
# R1x000 ~5.5/+130% (big-endian)
# Octeon II 2.50/+70% (little-endian)
#
# March 2019
#
# Add 32-bit code path.
#
# October 2019
#
# Modulo-scheduling reduction allows to omit dependency chain at the
# end of inner loop and improve performance. Also optimize MIPS32R2
# code path for MIPS 1004K core. Per René von Dorst's suggestions.
#
# IALU/gcc
# R1x000 ~9.8/? (big-endian)
# Octeon II 3.65/+140% (little-endian)
# MT7621/1004K 4.75/? (little-endian)
#
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
# one picks the latter, it's possible to arrange code in ABI neutral
# manner. Therefore let's stick to NUBI register layout:
#
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
#
# The return value is placed in $a0. Following coding rules facilitate
# interoperability:
#
# - never ever touch $tp, "thread pointer", former $gp [o32 can be
# excluded from the rule, because it's specified volatile];
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
# old code];
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
#
# For reference here is register layout for N32/64 MIPS ABIs:
#
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
#
# <appro@openssl.org>
#
######################################################################
$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
if ($flavour =~ /64|n32/i) {{{
######################################################################
# 64-bit code path
#
my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
$code.=<<___;
#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
defined(_MIPS_ARCH_MIPS64R6)) \\
&& !defined(_MIPS_ARCH_MIPS64R2)
# define _MIPS_ARCH_MIPS64R2
#endif
#if defined(_MIPS_ARCH_MIPS64R6)
# define dmultu(rs,rt)
# define mflo(rd,rs,rt) dmulu rd,rs,rt
# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
#else
# define dmultu(rs,rt) dmultu rs,rt
# define mflo(rd,rs,rt) mflo rd
# define mfhi(rd,rs,rt) mfhi rd
#endif
#ifdef __KERNEL__
# define poly1305_init poly1305_init_mips
# define poly1305_blocks poly1305_blocks_mips
# define poly1305_emit poly1305_emit_mips
#endif
#if defined(__MIPSEB__) && !defined(MIPSEB)
# define MIPSEB
#endif
#ifdef MIPSEB
# define MSB 0
# define LSB 7
#else
# define MSB 7
# define LSB 0
#endif
.text
.set noat
.set noreorder
.align 5
.globl poly1305_init
.ent poly1305_init
poly1305_init:
.frame $sp,0,$ra
.set reorder
sd $zero,0($ctx)
sd $zero,8($ctx)
sd $zero,16($ctx)
beqz $inp,.Lno_key
#if defined(_MIPS_ARCH_MIPS64R6)
andi $tmp0,$inp,7 # $inp % 8
dsubu $inp,$inp,$tmp0 # align $inp
sll $tmp0,$tmp0,3 # byte to bit offset
ld $in0,0($inp)
ld $in1,8($inp)
beqz $tmp0,.Laligned_key
ld $tmp2,16($inp)
subu $tmp1,$zero,$tmp0
# ifdef MIPSEB
dsllv $in0,$in0,$tmp0
dsrlv $tmp3,$in1,$tmp1
dsllv $in1,$in1,$tmp0
dsrlv $tmp2,$tmp2,$tmp1
# else
dsrlv $in0,$in0,$tmp0
dsllv $tmp3,$in1,$tmp1
dsrlv $in1,$in1,$tmp0
dsllv $tmp2,$tmp2,$tmp1
# endif
or $in0,$in0,$tmp3
or $in1,$in1,$tmp2
.Laligned_key:
#else
ldl $in0,0+MSB($inp)
ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp)
ldr $in1,8+LSB($inp)
#endif
#ifdef MIPSEB
# if defined(_MIPS_ARCH_MIPS64R2)
dsbh $in0,$in0 # byte swap
dsbh $in1,$in1
dshd $in0,$in0
dshd $in1,$in1
# else
ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32
or $tmp0,$tmp2 # 0x000000FF000000FF
and $tmp1,$in0,$tmp0 # byte swap
and $tmp3,$in1,$tmp0
dsrl $tmp2,$in0,24
dsrl $tmp4,$in1,24
dsll $tmp1,24
dsll $tmp3,24
and $tmp2,$tmp0
and $tmp4,$tmp0
dsll $tmp0,8 # 0x0000FF000000FF00
or $tmp1,$tmp2
or $tmp3,$tmp4
and $tmp2,$in0,$tmp0
and $tmp4,$in1,$tmp0
d