162306a36Sopenharmony_ci#!/usr/bin/env perl 262306a36Sopenharmony_ci# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause 362306a36Sopenharmony_ci# 462306a36Sopenharmony_ci# ==================================================================== 562306a36Sopenharmony_ci# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL 662306a36Sopenharmony_ci# project. 762306a36Sopenharmony_ci# ==================================================================== 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci# Poly1305 hash for MIPS. 1062306a36Sopenharmony_ci# 1162306a36Sopenharmony_ci# May 2016 1262306a36Sopenharmony_ci# 1362306a36Sopenharmony_ci# Numbers are cycles per processed byte with poly1305_blocks alone. 1462306a36Sopenharmony_ci# 1562306a36Sopenharmony_ci# IALU/gcc 1662306a36Sopenharmony_ci# R1x000 ~5.5/+130% (big-endian) 1762306a36Sopenharmony_ci# Octeon II 2.50/+70% (little-endian) 1862306a36Sopenharmony_ci# 1962306a36Sopenharmony_ci# March 2019 2062306a36Sopenharmony_ci# 2162306a36Sopenharmony_ci# Add 32-bit code path. 2262306a36Sopenharmony_ci# 2362306a36Sopenharmony_ci# October 2019 2462306a36Sopenharmony_ci# 2562306a36Sopenharmony_ci# Modulo-scheduling reduction allows to omit dependency chain at the 2662306a36Sopenharmony_ci# end of inner loop and improve performance. Also optimize MIPS32R2 2762306a36Sopenharmony_ci# code path for MIPS 1004K core. Per René von Dorst's suggestions. 2862306a36Sopenharmony_ci# 2962306a36Sopenharmony_ci# IALU/gcc 3062306a36Sopenharmony_ci# R1x000 ~9.8/? (big-endian) 3162306a36Sopenharmony_ci# Octeon II 3.65/+140% (little-endian) 3262306a36Sopenharmony_ci# MT7621/1004K 4.75/? (little-endian) 3362306a36Sopenharmony_ci# 3462306a36Sopenharmony_ci###################################################################### 3562306a36Sopenharmony_ci# There is a number of MIPS ABI in use, O32 and N32/64 are most 3662306a36Sopenharmony_ci# widely used. Then there is a new contender: NUBI. It appears that if 3762306a36Sopenharmony_ci# one picks the latter, it's possible to arrange code in ABI neutral 3862306a36Sopenharmony_ci# manner. Therefore let's stick to NUBI register layout: 3962306a36Sopenharmony_ci# 4062306a36Sopenharmony_ci($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 4162306a36Sopenharmony_ci($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 4262306a36Sopenharmony_ci($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 4362306a36Sopenharmony_ci($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 4462306a36Sopenharmony_ci# 4562306a36Sopenharmony_ci# The return value is placed in $a0. Following coding rules facilitate 4662306a36Sopenharmony_ci# interoperability: 4762306a36Sopenharmony_ci# 4862306a36Sopenharmony_ci# - never ever touch $tp, "thread pointer", former $gp [o32 can be 4962306a36Sopenharmony_ci# excluded from the rule, because it's specified volatile]; 5062306a36Sopenharmony_ci# - copy return value to $t0, former $v0 [or to $a0 if you're adapting 5162306a36Sopenharmony_ci# old code]; 5262306a36Sopenharmony_ci# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 5362306a36Sopenharmony_ci# 5462306a36Sopenharmony_ci# For reference here is register layout for N32/64 MIPS ABIs: 5562306a36Sopenharmony_ci# 5662306a36Sopenharmony_ci# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 5762306a36Sopenharmony_ci# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 5862306a36Sopenharmony_ci# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 5962306a36Sopenharmony_ci# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 6062306a36Sopenharmony_ci# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 6162306a36Sopenharmony_ci# 6262306a36Sopenharmony_ci# <appro@openssl.org> 6362306a36Sopenharmony_ci# 6462306a36Sopenharmony_ci###################################################################### 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ciif ($flavour =~ /64|n32/i) {{{ 7162306a36Sopenharmony_ci###################################################################### 7262306a36Sopenharmony_ci# 64-bit code path 7362306a36Sopenharmony_ci# 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_cimy ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); 7662306a36Sopenharmony_cimy ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci$code.=<<___; 7962306a36Sopenharmony_ci#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ 8062306a36Sopenharmony_ci defined(_MIPS_ARCH_MIPS64R6)) \\ 8162306a36Sopenharmony_ci && !defined(_MIPS_ARCH_MIPS64R2) 8262306a36Sopenharmony_ci# define _MIPS_ARCH_MIPS64R2 8362306a36Sopenharmony_ci#endif 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6) 8662306a36Sopenharmony_ci# define dmultu(rs,rt) 8762306a36Sopenharmony_ci# define mflo(rd,rs,rt) dmulu rd,rs,rt 8862306a36Sopenharmony_ci# define mfhi(rd,rs,rt) dmuhu rd,rs,rt 8962306a36Sopenharmony_ci#else 9062306a36Sopenharmony_ci# define dmultu(rs,rt) dmultu rs,rt 9162306a36Sopenharmony_ci# define mflo(rd,rs,rt) mflo rd 9262306a36Sopenharmony_ci# define mfhi(rd,rs,rt) mfhi rd 9362306a36Sopenharmony_ci#endif 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci#ifdef __KERNEL__ 9662306a36Sopenharmony_ci# define poly1305_init poly1305_init_mips 9762306a36Sopenharmony_ci# define poly1305_blocks poly1305_blocks_mips 9862306a36Sopenharmony_ci# define poly1305_emit poly1305_emit_mips 9962306a36Sopenharmony_ci#endif 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci#if defined(__MIPSEB__) && !defined(MIPSEB) 10262306a36Sopenharmony_ci# define MIPSEB 10362306a36Sopenharmony_ci#endif 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci#ifdef MIPSEB 10662306a36Sopenharmony_ci# define MSB 0 10762306a36Sopenharmony_ci# define LSB 7 10862306a36Sopenharmony_ci#else 10962306a36Sopenharmony_ci# define MSB 7 11062306a36Sopenharmony_ci# define LSB 0 11162306a36Sopenharmony_ci#endif 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci.text 11462306a36Sopenharmony_ci.set noat 11562306a36Sopenharmony_ci.set noreorder 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci.align 5 11862306a36Sopenharmony_ci.globl poly1305_init 11962306a36Sopenharmony_ci.ent poly1305_init 12062306a36Sopenharmony_cipoly1305_init: 12162306a36Sopenharmony_ci .frame $sp,0,$ra 12262306a36Sopenharmony_ci .set reorder 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci sd $zero,0($ctx) 12562306a36Sopenharmony_ci sd $zero,8($ctx) 12662306a36Sopenharmony_ci sd $zero,16($ctx) 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci beqz $inp,.Lno_key 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6) 13162306a36Sopenharmony_ci andi $tmp0,$inp,7 # $inp % 8 13262306a36Sopenharmony_ci dsubu $inp,$inp,$tmp0 # align $inp 13362306a36Sopenharmony_ci sll $tmp0,$tmp0,3 # byte to bit offset 13462306a36Sopenharmony_ci ld $in0,0($inp) 13562306a36Sopenharmony_ci ld $in1,8($inp) 13662306a36Sopenharmony_ci beqz $tmp0,.Laligned_key 13762306a36Sopenharmony_ci ld $tmp2,16($inp) 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci subu $tmp1,$zero,$tmp0 14062306a36Sopenharmony_ci# ifdef MIPSEB 14162306a36Sopenharmony_ci dsllv $in0,$in0,$tmp0 14262306a36Sopenharmony_ci dsrlv $tmp3,$in1,$tmp1 14362306a36Sopenharmony_ci dsllv $in1,$in1,$tmp0 14462306a36Sopenharmony_ci dsrlv $tmp2,$tmp2,$tmp1 14562306a36Sopenharmony_ci# else 14662306a36Sopenharmony_ci dsrlv $in0,$in0,$tmp0 14762306a36Sopenharmony_ci dsllv $tmp3,$in1,$tmp1 14862306a36Sopenharmony_ci dsrlv $in1,$in1,$tmp0 14962306a36Sopenharmony_ci dsllv $tmp2,$tmp2,$tmp1 15062306a36Sopenharmony_ci# endif 15162306a36Sopenharmony_ci or $in0,$in0,$tmp3 15262306a36Sopenharmony_ci or $in1,$in1,$tmp2 15362306a36Sopenharmony_ci.Laligned_key: 15462306a36Sopenharmony_ci#else 15562306a36Sopenharmony_ci ldl $in0,0+MSB($inp) 15662306a36Sopenharmony_ci ldl $in1,8+MSB($inp) 15762306a36Sopenharmony_ci ldr $in0,0+LSB($inp) 15862306a36Sopenharmony_ci ldr $in1,8+LSB($inp) 15962306a36Sopenharmony_ci#endif 16062306a36Sopenharmony_ci#ifdef MIPSEB 16162306a36Sopenharmony_ci# if defined(_MIPS_ARCH_MIPS64R2) 16262306a36Sopenharmony_ci dsbh $in0,$in0 # byte swap 16362306a36Sopenharmony_ci dsbh $in1,$in1 16462306a36Sopenharmony_ci dshd $in0,$in0 16562306a36Sopenharmony_ci dshd $in1,$in1 16662306a36Sopenharmony_ci# else 16762306a36Sopenharmony_ci ori $tmp0,$zero,0xFF 16862306a36Sopenharmony_ci dsll $tmp2,$tmp0,32 16962306a36Sopenharmony_ci or $tmp0,$tmp2 # 0x000000FF000000FF 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci and $tmp1,$in0,$tmp0 # byte swap 17262306a36Sopenharmony_ci and $tmp3,$in1,$tmp0 17362306a36Sopenharmony_ci dsrl $tmp2,$in0,24 17462306a36Sopenharmony_ci dsrl $tmp4,$in1,24 17562306a36Sopenharmony_ci dsll $tmp1,24 17662306a36Sopenharmony_ci dsll $tmp3,24 17762306a36Sopenharmony_ci and $tmp2,$tmp0 17862306a36Sopenharmony_ci and $tmp4,$tmp0 17962306a36Sopenharmony_ci dsll $tmp0,8 # 0x0000FF000000FF00 18062306a36Sopenharmony_ci or $tmp1,$tmp2 18162306a36Sopenharmony_ci or $tmp3,$tmp4 18262306a36Sopenharmony_ci and $tmp2,$in0,$tmp0 18362306a36Sopenharmony_ci and $tmp4,$in1,$tmp0 18462306a36Sopenharmony_ci dsrl $in0,8 18562306a36Sopenharmony_ci dsrl $in1,8 18662306a36Sopenharmony_ci dsll $tmp2,8 18762306a36Sopenharmony_ci dsll $tmp4,8 18862306a36Sopenharmony_ci and $in0,$tmp0 18962306a36Sopenharmony_ci and $in1,$tmp0 19062306a36Sopenharmony_ci or $tmp1,$tmp2 19162306a36Sopenharmony_ci or $tmp3,$tmp4 19262306a36Sopenharmony_ci or $in0,$tmp1 19362306a36Sopenharmony_ci or $in1,$tmp3 19462306a36Sopenharmony_ci dsrl $tmp1,$in0,32 19562306a36Sopenharmony_ci dsrl $tmp3,$in1,32 19662306a36Sopenharmony_ci dsll $in0,32 19762306a36Sopenharmony_ci dsll $in1,32 19862306a36Sopenharmony_ci or $in0,$tmp1 19962306a36Sopenharmony_ci or $in1,$tmp3 20062306a36Sopenharmony_ci# endif 20162306a36Sopenharmony_ci#endif 20262306a36Sopenharmony_ci li $tmp0,1 20362306a36Sopenharmony_ci dsll $tmp0,32 # 0x0000000100000000 20462306a36Sopenharmony_ci daddiu $tmp0,-63 # 0x00000000ffffffc1 20562306a36Sopenharmony_ci dsll $tmp0,28 # 0x0ffffffc10000000 20662306a36Sopenharmony_ci daddiu $tmp0,-1 # 0x0ffffffc0fffffff 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci and $in0,$tmp0 20962306a36Sopenharmony_ci daddiu $tmp0,-3 # 0x0ffffffc0ffffffc 21062306a36Sopenharmony_ci and $in1,$tmp0 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci sd $in0,24($ctx) 21362306a36Sopenharmony_ci dsrl $tmp0,$in1,2 21462306a36Sopenharmony_ci sd $in1,32($ctx) 21562306a36Sopenharmony_ci daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) 21662306a36Sopenharmony_ci sd $tmp0,40($ctx) 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci.Lno_key: 21962306a36Sopenharmony_ci li $v0,0 # return 0 22062306a36Sopenharmony_ci jr $ra 22162306a36Sopenharmony_ci.end poly1305_init 22262306a36Sopenharmony_ci___ 22362306a36Sopenharmony_ci{ 22462306a36Sopenharmony_cimy $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_cimy ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = 22762306a36Sopenharmony_ci ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); 22862306a36Sopenharmony_cimy ($shr,$shl) = ($s6,$s7); # used on R6 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci$code.=<<___; 23162306a36Sopenharmony_ci.align 5 23262306a36Sopenharmony_ci.globl poly1305_blocks 23362306a36Sopenharmony_ci.ent poly1305_blocks 23462306a36Sopenharmony_cipoly1305_blocks: 23562306a36Sopenharmony_ci .set noreorder 23662306a36Sopenharmony_ci dsrl $len,4 # number of complete blocks 23762306a36Sopenharmony_ci bnez $len,poly1305_blocks_internal 23862306a36Sopenharmony_ci nop 23962306a36Sopenharmony_ci jr $ra 24062306a36Sopenharmony_ci nop 24162306a36Sopenharmony_ci.end poly1305_blocks 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci.align 5 24462306a36Sopenharmony_ci.ent poly1305_blocks_internal 24562306a36Sopenharmony_cipoly1305_blocks_internal: 24662306a36Sopenharmony_ci .set noreorder 24762306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6) 24862306a36Sopenharmony_ci .frame $sp,8*8,$ra 24962306a36Sopenharmony_ci .mask $SAVED_REGS_MASK|0x000c0000,-8 25062306a36Sopenharmony_ci dsubu $sp,8*8 25162306a36Sopenharmony_ci sd $s7,56($sp) 25262306a36Sopenharmony_ci sd $s6,48($sp) 25362306a36Sopenharmony_ci#else 25462306a36Sopenharmony_ci .frame $sp,6*8,$ra 25562306a36Sopenharmony_ci .mask $SAVED_REGS_MASK,-8 25662306a36Sopenharmony_ci dsubu $sp,6*8 25762306a36Sopenharmony_ci#endif 25862306a36Sopenharmony_ci sd $s5,40($sp) 25962306a36Sopenharmony_ci sd $s4,32($sp) 26062306a36Sopenharmony_ci___ 26162306a36Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 26262306a36Sopenharmony_ci sd $s3,24($sp) 26362306a36Sopenharmony_ci sd $s2,16($sp) 26462306a36Sopenharmony_ci sd $s1,8($sp) 26562306a36Sopenharmony_ci sd $s0,0($sp) 26662306a36Sopenharmony_ci___ 26762306a36Sopenharmony_ci$code.=<<___; 26862306a36Sopenharmony_ci .set reorder 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6) 27162306a36Sopenharmony_ci andi $shr,$inp,7 27262306a36Sopenharmony_ci dsubu $inp,$inp,$shr # align $inp 27362306a36Sopenharmony_ci sll $shr,$shr,3 # byte to bit offset 27462306a36Sopenharmony_ci subu $shl,$zero,$shr 27562306a36Sopenharmony_ci#endif 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci ld $h0,0($ctx) # load hash value 27862306a36Sopenharmony_ci ld $h1,8($ctx) 27962306a36Sopenharmony_ci ld $h2,16($ctx) 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci ld $r0,24($ctx) # load key 28262306a36Sopenharmony_ci ld $r1,32($ctx) 28362306a36Sopenharmony_ci ld $rs1,40($ctx) 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci dsll $len,4 28662306a36Sopenharmony_ci daddu $len,$inp # end of buffer 28762306a36Sopenharmony_ci b .Loop 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci.align 4 29062306a36Sopenharmony_ci.Loop: 29162306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6) 29262306a36Sopenharmony_ci ld $in0,0($inp) # load input 29362306a36Sopenharmony_ci ld $in1,8($inp) 29462306a36Sopenharmony_ci beqz $shr,.Laligned_inp 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci ld $tmp2,16($inp) 29762306a36Sopenharmony_ci# ifdef MIPSEB 29862306a36Sopenharmony_ci dsllv $in0,$in0,$shr 29962306a36Sopenharmony_ci dsrlv $tmp3,$in1,$shl 30062306a36Sopenharmony_ci dsllv $in1,$in1,$shr 30162306a36Sopenharmony_ci dsrlv $tmp2,$tmp2,$shl 30262306a36Sopenharmony_ci# else 30362306a36Sopenharmony_ci dsrlv $in0,$in0,$shr 30462306a36Sopenharmony_ci dsllv $tmp3,$in1,$shl 30562306a36Sopenharmony_ci dsrlv $in1,$in1,$shr 30662306a36Sopenharmony_ci dsllv $tmp2,$tmp2,$shl 30762306a36Sopenharmony_ci# endif 30862306a36Sopenharmony_ci or $in0,$in0,$tmp3 30962306a36Sopenharmony_ci or $in1,$in1,$tmp2 31062306a36Sopenharmony_ci.Laligned_inp: 31162306a36Sopenharmony_ci#else 31262306a36Sopenharmony_ci ldl $in0,0+MSB($inp) # load input 31362306a36Sopenharmony_ci ldl $in1,8+MSB($inp) 31462306a36Sopenharmony_ci ldr $in0,0+LSB($inp) 31562306a36Sopenharmony_ci ldr $in1,8+LSB($inp) 31662306a36Sopenharmony_ci#endif 31762306a36Sopenharmony_ci daddiu $inp,16 31862306a36Sopenharmony_ci#ifdef MIPSEB 31962306a36Sopenharmony_ci# if defined(_MIPS_ARCH_MIPS64R2) 32062306a36Sopenharmony_ci dsbh $in0,$in0 # byte swap 32162306a36Sopenharmony_ci dsbh $in1,$in1 32262306a36Sopenharmony_ci dshd $in0,$in0 32362306a36Sopenharmony_ci dshd $in1,$in1 32462306a36Sopenharmony_ci# else 32562306a36Sopenharmony_ci ori $tmp0,$zero,0xFF 32662306a36Sopenharmony_ci dsll $tmp2,$tmp0,32 32762306a36Sopenharmony_ci or $tmp0,$tmp2 # 0x000000FF000000FF 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci and $tmp1,$in0,$tmp0 # byte swap 33062306a36Sopenharmony_ci and $tmp3,$in1,$tmp0 33162306a36Sopenharmony_ci dsrl $tmp2,$in0,24 33262306a36Sopenharmony_ci dsrl $tmp4,$in1,24 33362306a36Sopenharmony_ci dsll $tmp1,24 33462306a36Sopenharmony_ci dsll $tmp3,24 33562306a36Sopenharmony_ci and $tmp2,$tmp0 33662306a36Sopenharmony_ci and $tmp4,$tmp0 33762306a36Sopenharmony_ci dsll $tmp0,8 # 0x0000FF000000FF00 33862306a36Sopenharmony_ci or $tmp1,$tmp2 33962306a36Sopenharmony_ci or $tmp3,$tmp4 34062306a36Sopenharmony_ci and $tmp2,$in0,$tmp0 34162306a36Sopenharmony_ci and $tmp4,$in1,$tmp0 34262306a36Sopenharmony_ci dsrl $in0,8 34362306a36Sopenharmony_ci dsrl $in1,8 34462306a36Sopenharmony_ci dsll $tmp2,8 34562306a36Sopenharmony_ci dsll $tmp4,8 34662306a36Sopenharmony_ci and $in0,$tmp0 34762306a36Sopenharmony_ci and $in1,$tmp0 34862306a36Sopenharmony_ci or $tmp1,$tmp2 34962306a36Sopenharmony_ci or $tmp3,$tmp4 35062306a36Sopenharmony_ci or $in0,$tmp1 35162306a36Sopenharmony_ci or $in1,$tmp3 35262306a36Sopenharmony_ci dsrl $tmp1,$in0,32 35362306a36Sopenharmony_ci dsrl $tmp3,$in1,32 35462306a36Sopenharmony_ci dsll $in0,32 35562306a36Sopenharmony_ci dsll $in1,32 35662306a36Sopenharmony_ci or $in0,$tmp1 35762306a36Sopenharmony_ci or $in1,$tmp3 35862306a36Sopenharmony_ci# endif 35962306a36Sopenharmony_ci#endif 36062306a36Sopenharmony_ci dsrl $tmp1,$h2,2 # modulo-scheduled reduction 36162306a36Sopenharmony_ci andi $h2,$h2,3 36262306a36Sopenharmony_ci dsll $tmp0,$tmp1,2 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci daddu $d0,$h0,$in0 # accumulate input 36562306a36Sopenharmony_ci daddu $tmp1,$tmp0 36662306a36Sopenharmony_ci sltu $tmp0,$d0,$h0 36762306a36Sopenharmony_ci daddu $d0,$d0,$tmp1 # ... and residue 36862306a36Sopenharmony_ci sltu $tmp1,$d0,$tmp1 36962306a36Sopenharmony_ci daddu $d1,$h1,$in1 37062306a36Sopenharmony_ci daddu $tmp0,$tmp1 37162306a36Sopenharmony_ci sltu $tmp1,$d1,$h1 37262306a36Sopenharmony_ci daddu $d1,$tmp0 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci dmultu ($r0,$d0) # h0*r0 37562306a36Sopenharmony_ci daddu $d2,$h2,$padbit 37662306a36Sopenharmony_ci sltu $tmp0,$d1,$tmp0 37762306a36Sopenharmony_ci mflo ($h0,$r0,$d0) 37862306a36Sopenharmony_ci mfhi ($h1,$r0,$d0) 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci dmultu ($rs1,$d1) # h1*5*r1 38162306a36Sopenharmony_ci daddu $d2,$tmp1 38262306a36Sopenharmony_ci daddu $d2,$tmp0 38362306a36Sopenharmony_ci mflo ($tmp0,$rs1,$d1) 38462306a36Sopenharmony_ci mfhi ($tmp1,$rs1,$d1) 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci dmultu ($r1,$d0) # h0*r1 38762306a36Sopenharmony_ci mflo ($tmp2,$r1,$d0) 38862306a36Sopenharmony_ci mfhi ($h2,$r1,$d0) 38962306a36Sopenharmony_ci daddu $h0,$tmp0 39062306a36Sopenharmony_ci daddu $h1,$tmp1 39162306a36Sopenharmony_ci sltu $tmp0,$h0,$tmp0 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci dmultu ($r0,$d1) # h1*r0 39462306a36Sopenharmony_ci daddu $h1,$tmp0 39562306a36Sopenharmony_ci daddu $h1,$tmp2 39662306a36Sopenharmony_ci mflo ($tmp0,$r0,$d1) 39762306a36Sopenharmony_ci mfhi ($tmp1,$r0,$d1) 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci dmultu ($rs1,$d2) # h2*5*r1 40062306a36Sopenharmony_ci sltu $tmp2,$h1,$tmp2 40162306a36Sopenharmony_ci daddu $h2,$tmp2 40262306a36Sopenharmony_ci mflo ($tmp2,$rs1,$d2) 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci dmultu ($r0,$d2) # h2*r0 40562306a36Sopenharmony_ci daddu $h1,$tmp0 40662306a36Sopenharmony_ci daddu $h2,$tmp1 40762306a36Sopenharmony_ci mflo ($tmp3,$r0,$d2) 40862306a36Sopenharmony_ci sltu $tmp0,$h1,$tmp0 40962306a36Sopenharmony_ci daddu $h2,$tmp0 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci daddu $h1,$tmp2 41262306a36Sopenharmony_ci sltu $tmp2,$h1,$tmp2 41362306a36Sopenharmony_ci daddu $h2,$tmp2 41462306a36Sopenharmony_ci daddu $h2,$tmp3 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci bne $inp,$len,.Loop 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci sd $h0,0($ctx) # store hash value 41962306a36Sopenharmony_ci sd $h1,8($ctx) 42062306a36Sopenharmony_ci sd $h2,16($ctx) 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci .set noreorder 42362306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6) 42462306a36Sopenharmony_ci ld $s7,56($sp) 42562306a36Sopenharmony_ci ld $s6,48($sp) 42662306a36Sopenharmony_ci#endif 42762306a36Sopenharmony_ci ld $s5,40($sp) # epilogue 42862306a36Sopenharmony_ci ld $s4,32($sp) 42962306a36Sopenharmony_ci___ 43062306a36Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue 43162306a36Sopenharmony_ci ld $s3,24($sp) 43262306a36Sopenharmony_ci ld $s2,16($sp) 43362306a36Sopenharmony_ci ld $s1,8($sp) 43462306a36Sopenharmony_ci ld $s0,0($sp) 43562306a36Sopenharmony_ci___ 43662306a36Sopenharmony_ci$code.=<<___; 43762306a36Sopenharmony_ci jr $ra 43862306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6) 43962306a36Sopenharmony_ci daddu $sp,8*8 44062306a36Sopenharmony_ci#else 44162306a36Sopenharmony_ci daddu $sp,6*8 44262306a36Sopenharmony_ci#endif 44362306a36Sopenharmony_ci.end poly1305_blocks_internal 44462306a36Sopenharmony_ci___ 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ci{ 44762306a36Sopenharmony_cimy ($ctx,$mac,$nonce) = ($a0,$a1,$a2); 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci$code.=<<___; 45062306a36Sopenharmony_ci.align 5 45162306a36Sopenharmony_ci.globl poly1305_emit 45262306a36Sopenharmony_ci.ent poly1305_emit 45362306a36Sopenharmony_cipoly1305_emit: 45462306a36Sopenharmony_ci .frame $sp,0,$ra 45562306a36Sopenharmony_ci .set reorder 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci ld $tmp2,16($ctx) 45862306a36Sopenharmony_ci ld $tmp0,0($ctx) 45962306a36Sopenharmony_ci ld $tmp1,8($ctx) 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci li $in0,-4 # final reduction 46262306a36Sopenharmony_ci dsrl $in1,$tmp2,2 46362306a36Sopenharmony_ci and $in0,$tmp2 46462306a36Sopenharmony_ci andi $tmp2,$tmp2,3 46562306a36Sopenharmony_ci daddu $in0,$in1 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci daddu $tmp0,$tmp0,$in0 46862306a36Sopenharmony_ci sltu $in1,$tmp0,$in0 46962306a36Sopenharmony_ci daddiu $in0,$tmp0,5 # compare to modulus 47062306a36Sopenharmony_ci daddu $tmp1,$tmp1,$in1 47162306a36Sopenharmony_ci sltiu $tmp3,$in0,5 47262306a36Sopenharmony_ci sltu $tmp4,$tmp1,$in1 47362306a36Sopenharmony_ci daddu $in1,$tmp1,$tmp3 47462306a36Sopenharmony_ci daddu $tmp2,$tmp2,$tmp4 47562306a36Sopenharmony_ci sltu $tmp3,$in1,$tmp3 47662306a36Sopenharmony_ci daddu $tmp2,$tmp2,$tmp3 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci dsrl $tmp2,2 # see if it carried/borrowed 47962306a36Sopenharmony_ci dsubu $tmp2,$zero,$tmp2 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci xor $in0,$tmp0 48262306a36Sopenharmony_ci xor $in1,$tmp1 48362306a36Sopenharmony_ci and $in0,$tmp2 48462306a36Sopenharmony_ci and $in1,$tmp2 48562306a36Sopenharmony_ci xor $in0,$tmp0 48662306a36Sopenharmony_ci xor $in1,$tmp1 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci lwu $tmp0,0($nonce) # load nonce 48962306a36Sopenharmony_ci lwu $tmp1,4($nonce) 49062306a36Sopenharmony_ci lwu $tmp2,8($nonce) 49162306a36Sopenharmony_ci lwu $tmp3,12($nonce) 49262306a36Sopenharmony_ci dsll $tmp1,32 49362306a36Sopenharmony_ci dsll $tmp3,32 49462306a36Sopenharmony_ci or $tmp0,$tmp1 49562306a36Sopenharmony_ci or $tmp2,$tmp3 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci daddu $in0,$tmp0 # accumulate nonce 49862306a36Sopenharmony_ci daddu $in1,$tmp2 49962306a36Sopenharmony_ci sltu $tmp0,$in0,$tmp0 50062306a36Sopenharmony_ci daddu $in1,$tmp0 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci dsrl $tmp0,$in0,8 # write mac value 50362306a36Sopenharmony_ci dsrl $tmp1,$in0,16 50462306a36Sopenharmony_ci dsrl $tmp2,$in0,24 50562306a36Sopenharmony_ci sb $in0,0($mac) 50662306a36Sopenharmony_ci dsrl $tmp3,$in0,32 50762306a36Sopenharmony_ci sb $tmp0,1($mac) 50862306a36Sopenharmony_ci dsrl $tmp0,$in0,40 50962306a36Sopenharmony_ci sb $tmp1,2($mac) 51062306a36Sopenharmony_ci dsrl $tmp1,$in0,48 51162306a36Sopenharmony_ci sb $tmp2,3($mac) 51262306a36Sopenharmony_ci dsrl $tmp2,$in0,56 51362306a36Sopenharmony_ci sb $tmp3,4($mac) 51462306a36Sopenharmony_ci dsrl $tmp3,$in1,8 51562306a36Sopenharmony_ci sb $tmp0,5($mac) 51662306a36Sopenharmony_ci dsrl $tmp0,$in1,16 51762306a36Sopenharmony_ci sb $tmp1,6($mac) 51862306a36Sopenharmony_ci dsrl $tmp1,$in1,24 51962306a36Sopenharmony_ci sb $tmp2,7($mac) 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci sb $in1,8($mac) 52262306a36Sopenharmony_ci dsrl $tmp2,$in1,32 52362306a36Sopenharmony_ci sb $tmp3,9($mac) 52462306a36Sopenharmony_ci dsrl $tmp3,$in1,40 52562306a36Sopenharmony_ci sb $tmp0,10($mac) 52662306a36Sopenharmony_ci dsrl $tmp0,$in1,48 52762306a36Sopenharmony_ci sb $tmp1,11($mac) 52862306a36Sopenharmony_ci dsrl $tmp1,$in1,56 52962306a36Sopenharmony_ci sb $tmp2,12($mac) 53062306a36Sopenharmony_ci sb $tmp3,13($mac) 53162306a36Sopenharmony_ci sb $tmp0,14($mac) 53262306a36Sopenharmony_ci sb $tmp1,15($mac) 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci jr $ra 53562306a36Sopenharmony_ci.end poly1305_emit 53662306a36Sopenharmony_ci.rdata 53762306a36Sopenharmony_ci.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm" 53862306a36Sopenharmony_ci.align 2 53962306a36Sopenharmony_ci___ 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ci}}} else {{{ 54262306a36Sopenharmony_ci###################################################################### 54362306a36Sopenharmony_ci# 32-bit code path 54462306a36Sopenharmony_ci# 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_cimy ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); 54762306a36Sopenharmony_cimy ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = 54862306a36Sopenharmony_ci ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2); 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci$code.=<<___; 55162306a36Sopenharmony_ci#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\ 55262306a36Sopenharmony_ci defined(_MIPS_ARCH_MIPS32R6)) \\ 55362306a36Sopenharmony_ci && !defined(_MIPS_ARCH_MIPS32R2) 55462306a36Sopenharmony_ci# define _MIPS_ARCH_MIPS32R2 55562306a36Sopenharmony_ci#endif 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) 55862306a36Sopenharmony_ci# define multu(rs,rt) 55962306a36Sopenharmony_ci# define mflo(rd,rs,rt) mulu rd,rs,rt 56062306a36Sopenharmony_ci# define mfhi(rd,rs,rt) muhu rd,rs,rt 56162306a36Sopenharmony_ci#else 56262306a36Sopenharmony_ci# define multu(rs,rt) multu rs,rt 56362306a36Sopenharmony_ci# define mflo(rd,rs,rt) mflo rd 56462306a36Sopenharmony_ci# define mfhi(rd,rs,rt) mfhi rd 56562306a36Sopenharmony_ci#endif 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci#ifdef __KERNEL__ 56862306a36Sopenharmony_ci# define poly1305_init poly1305_init_mips 56962306a36Sopenharmony_ci# define poly1305_blocks poly1305_blocks_mips 57062306a36Sopenharmony_ci# define poly1305_emit poly1305_emit_mips 57162306a36Sopenharmony_ci#endif 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci#if defined(__MIPSEB__) && !defined(MIPSEB) 57462306a36Sopenharmony_ci# define MIPSEB 57562306a36Sopenharmony_ci#endif 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci#ifdef MIPSEB 57862306a36Sopenharmony_ci# define MSB 0 57962306a36Sopenharmony_ci# define LSB 3 58062306a36Sopenharmony_ci#else 58162306a36Sopenharmony_ci# define MSB 3 58262306a36Sopenharmony_ci# define LSB 0 58362306a36Sopenharmony_ci#endif 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci.text 58662306a36Sopenharmony_ci.set noat 58762306a36Sopenharmony_ci.set noreorder 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci.align 5 59062306a36Sopenharmony_ci.globl poly1305_init 59162306a36Sopenharmony_ci.ent poly1305_init 59262306a36Sopenharmony_cipoly1305_init: 59362306a36Sopenharmony_ci .frame $sp,0,$ra 59462306a36Sopenharmony_ci .set reorder 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci sw $zero,0($ctx) 59762306a36Sopenharmony_ci sw $zero,4($ctx) 59862306a36Sopenharmony_ci sw $zero,8($ctx) 59962306a36Sopenharmony_ci sw $zero,12($ctx) 60062306a36Sopenharmony_ci sw $zero,16($ctx) 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci beqz $inp,.Lno_key 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) 60562306a36Sopenharmony_ci andi $tmp0,$inp,3 # $inp % 4 60662306a36Sopenharmony_ci subu $inp,$inp,$tmp0 # align $inp 60762306a36Sopenharmony_ci sll $tmp0,$tmp0,3 # byte to bit offset 60862306a36Sopenharmony_ci lw $in0,0($inp) 60962306a36Sopenharmony_ci lw $in1,4($inp) 61062306a36Sopenharmony_ci lw $in2,8($inp) 61162306a36Sopenharmony_ci lw $in3,12($inp) 61262306a36Sopenharmony_ci beqz $tmp0,.Laligned_key 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci lw $tmp2,16($inp) 61562306a36Sopenharmony_ci subu $tmp1,$zero,$tmp0 61662306a36Sopenharmony_ci# ifdef MIPSEB 61762306a36Sopenharmony_ci sllv $in0,$in0,$tmp0 61862306a36Sopenharmony_ci srlv $tmp3,$in1,$tmp1 61962306a36Sopenharmony_ci sllv $in1,$in1,$tmp0 62062306a36Sopenharmony_ci or $in0,$in0,$tmp3 62162306a36Sopenharmony_ci srlv $tmp3,$in2,$tmp1 62262306a36Sopenharmony_ci sllv $in2,$in2,$tmp0 62362306a36Sopenharmony_ci or $in1,$in1,$tmp3 62462306a36Sopenharmony_ci srlv $tmp3,$in3,$tmp1 62562306a36Sopenharmony_ci sllv $in3,$in3,$tmp0 62662306a36Sopenharmony_ci or $in2,$in2,$tmp3 62762306a36Sopenharmony_ci srlv $tmp2,$tmp2,$tmp1 62862306a36Sopenharmony_ci or $in3,$in3,$tmp2 62962306a36Sopenharmony_ci# else 63062306a36Sopenharmony_ci srlv $in0,$in0,$tmp0 63162306a36Sopenharmony_ci sllv $tmp3,$in1,$tmp1 63262306a36Sopenharmony_ci srlv $in1,$in1,$tmp0 63362306a36Sopenharmony_ci or $in0,$in0,$tmp3 63462306a36Sopenharmony_ci sllv $tmp3,$in2,$tmp1 63562306a36Sopenharmony_ci srlv $in2,$in2,$tmp0 63662306a36Sopenharmony_ci or $in1,$in1,$tmp3 63762306a36Sopenharmony_ci sllv $tmp3,$in3,$tmp1 63862306a36Sopenharmony_ci srlv $in3,$in3,$tmp0 63962306a36Sopenharmony_ci or $in2,$in2,$tmp3 64062306a36Sopenharmony_ci sllv $tmp2,$tmp2,$tmp1 64162306a36Sopenharmony_ci or $in3,$in3,$tmp2 64262306a36Sopenharmony_ci# endif 64362306a36Sopenharmony_ci.Laligned_key: 64462306a36Sopenharmony_ci#else 64562306a36Sopenharmony_ci lwl $in0,0+MSB($inp) 64662306a36Sopenharmony_ci lwl $in1,4+MSB($inp) 64762306a36Sopenharmony_ci lwl $in2,8+MSB($inp) 64862306a36Sopenharmony_ci lwl $in3,12+MSB($inp) 64962306a36Sopenharmony_ci lwr $in0,0+LSB($inp) 65062306a36Sopenharmony_ci lwr $in1,4+LSB($inp) 65162306a36Sopenharmony_ci lwr $in2,8+LSB($inp) 65262306a36Sopenharmony_ci lwr $in3,12+LSB($inp) 65362306a36Sopenharmony_ci#endif 65462306a36Sopenharmony_ci#ifdef MIPSEB 65562306a36Sopenharmony_ci# if defined(_MIPS_ARCH_MIPS32R2) 65662306a36Sopenharmony_ci wsbh $in0,$in0 # byte swap 65762306a36Sopenharmony_ci wsbh $in1,$in1 65862306a36Sopenharmony_ci wsbh $in2,$in2 65962306a36Sopenharmony_ci wsbh $in3,$in3 66062306a36Sopenharmony_ci rotr $in0,$in0,16 66162306a36Sopenharmony_ci rotr $in1,$in1,16 66262306a36Sopenharmony_ci rotr $in2,$in2,16 66362306a36Sopenharmony_ci rotr $in3,$in3,16 66462306a36Sopenharmony_ci# else 66562306a36Sopenharmony_ci srl $tmp0,$in0,24 # byte swap 66662306a36Sopenharmony_ci srl $tmp1,$in0,8 66762306a36Sopenharmony_ci andi $tmp2,$in0,0xFF00 66862306a36Sopenharmony_ci sll $in0,$in0,24 66962306a36Sopenharmony_ci andi $tmp1,0xFF00 67062306a36Sopenharmony_ci sll $tmp2,$tmp2,8 67162306a36Sopenharmony_ci or $in0,$tmp0 67262306a36Sopenharmony_ci srl $tmp0,$in1,24 67362306a36Sopenharmony_ci or $tmp1,$tmp2 67462306a36Sopenharmony_ci srl $tmp2,$in1,8 67562306a36Sopenharmony_ci or $in0,$tmp1 67662306a36Sopenharmony_ci andi $tmp1,$in1,0xFF00 67762306a36Sopenharmony_ci sll $in1,$in1,24 67862306a36Sopenharmony_ci andi $tmp2,0xFF00 67962306a36Sopenharmony_ci sll $tmp1,$tmp1,8 68062306a36Sopenharmony_ci or $in1,$tmp0 68162306a36Sopenharmony_ci srl $tmp0,$in2,24 68262306a36Sopenharmony_ci or $tmp2,$tmp1 68362306a36Sopenharmony_ci srl $tmp1,$in2,8 68462306a36Sopenharmony_ci or $in1,$tmp2 68562306a36Sopenharmony_ci andi $tmp2,$in2,0xFF00 68662306a36Sopenharmony_ci sll $in2,$in2,24 68762306a36Sopenharmony_ci andi $tmp1,0xFF00 68862306a36Sopenharmony_ci sll $tmp2,$tmp2,8 68962306a36Sopenharmony_ci or $in2,$tmp0 69062306a36Sopenharmony_ci srl $tmp0,$in3,24 69162306a36Sopenharmony_ci or $tmp1,$tmp2 69262306a36Sopenharmony_ci srl $tmp2,$in3,8 69362306a36Sopenharmony_ci or $in2,$tmp1 69462306a36Sopenharmony_ci andi $tmp1,$in3,0xFF00 69562306a36Sopenharmony_ci sll $in3,$in3,24 69662306a36Sopenharmony_ci andi $tmp2,0xFF00 69762306a36Sopenharmony_ci sll $tmp1,$tmp1,8 69862306a36Sopenharmony_ci or $in3,$tmp0 69962306a36Sopenharmony_ci or $tmp2,$tmp1 70062306a36Sopenharmony_ci or $in3,$tmp2 70162306a36Sopenharmony_ci# endif 70262306a36Sopenharmony_ci#endif 70362306a36Sopenharmony_ci lui $tmp0,0x0fff 70462306a36Sopenharmony_ci ori $tmp0,0xffff # 0x0fffffff 70562306a36Sopenharmony_ci and $in0,$in0,$tmp0 70662306a36Sopenharmony_ci subu $tmp0,3 # 0x0ffffffc 70762306a36Sopenharmony_ci and $in1,$in1,$tmp0 70862306a36Sopenharmony_ci and $in2,$in2,$tmp0 70962306a36Sopenharmony_ci and $in3,$in3,$tmp0 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci sw $in0,20($ctx) 71262306a36Sopenharmony_ci sw $in1,24($ctx) 71362306a36Sopenharmony_ci sw $in2,28($ctx) 71462306a36Sopenharmony_ci sw $in3,32($ctx) 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci srl $tmp1,$in1,2 71762306a36Sopenharmony_ci srl $tmp2,$in2,2 71862306a36Sopenharmony_ci srl $tmp3,$in3,2 71962306a36Sopenharmony_ci addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) 72062306a36Sopenharmony_ci addu $in2,$in2,$tmp2 72162306a36Sopenharmony_ci addu $in3,$in3,$tmp3 72262306a36Sopenharmony_ci sw $in1,36($ctx) 72362306a36Sopenharmony_ci sw $in2,40($ctx) 72462306a36Sopenharmony_ci sw $in3,44($ctx) 72562306a36Sopenharmony_ci.Lno_key: 72662306a36Sopenharmony_ci li $v0,0 72762306a36Sopenharmony_ci jr $ra 72862306a36Sopenharmony_ci.end poly1305_init 72962306a36Sopenharmony_ci___ 73062306a36Sopenharmony_ci{ 73162306a36Sopenharmony_cimy $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000"; 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_cimy ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = 73462306a36Sopenharmony_ci ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11); 73562306a36Sopenharmony_cimy ($d0,$d1,$d2,$d3) = 73662306a36Sopenharmony_ci ($a4,$a5,$a6,$a7); 73762306a36Sopenharmony_cimy $shr = $t2; # used on R6 73862306a36Sopenharmony_cimy $one = $t2; # used on R2 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci$code.=<<___; 74162306a36Sopenharmony_ci.globl poly1305_blocks 74262306a36Sopenharmony_ci.align 5 74362306a36Sopenharmony_ci.ent poly1305_blocks 74462306a36Sopenharmony_cipoly1305_blocks: 74562306a36Sopenharmony_ci .frame $sp,16*4,$ra 74662306a36Sopenharmony_ci .mask $SAVED_REGS_MASK,-4 74762306a36Sopenharmony_ci .set noreorder 74862306a36Sopenharmony_ci subu $sp, $sp,4*12 74962306a36Sopenharmony_ci sw $s11,4*11($sp) 75062306a36Sopenharmony_ci sw $s10,4*10($sp) 75162306a36Sopenharmony_ci sw $s9, 4*9($sp) 75262306a36Sopenharmony_ci sw $s8, 4*8($sp) 75362306a36Sopenharmony_ci sw $s7, 4*7($sp) 75462306a36Sopenharmony_ci sw $s6, 4*6($sp) 75562306a36Sopenharmony_ci sw $s5, 4*5($sp) 75662306a36Sopenharmony_ci sw $s4, 4*4($sp) 75762306a36Sopenharmony_ci___ 75862306a36Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 75962306a36Sopenharmony_ci sw $s3, 4*3($sp) 76062306a36Sopenharmony_ci sw $s2, 4*2($sp) 76162306a36Sopenharmony_ci sw $s1, 4*1($sp) 76262306a36Sopenharmony_ci sw $s0, 4*0($sp) 76362306a36Sopenharmony_ci___ 76462306a36Sopenharmony_ci$code.=<<___; 76562306a36Sopenharmony_ci .set reorder 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci srl $len,4 # number of complete blocks 76862306a36Sopenharmony_ci li $one,1 76962306a36Sopenharmony_ci beqz $len,.Labort 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) 77262306a36Sopenharmony_ci andi $shr,$inp,3 77362306a36Sopenharmony_ci subu $inp,$inp,$shr # align $inp 77462306a36Sopenharmony_ci sll $shr,$shr,3 # byte to bit offset 77562306a36Sopenharmony_ci#endif 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci lw $h0,0($ctx) # load hash value 77862306a36Sopenharmony_ci lw $h1,4($ctx) 77962306a36Sopenharmony_ci lw $h2,8($ctx) 78062306a36Sopenharmony_ci lw $h3,12($ctx) 78162306a36Sopenharmony_ci lw $h4,16($ctx) 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci lw $r0,20($ctx) # load key 78462306a36Sopenharmony_ci lw $r1,24($ctx) 78562306a36Sopenharmony_ci lw $r2,28($ctx) 78662306a36Sopenharmony_ci lw $r3,32($ctx) 78762306a36Sopenharmony_ci lw $rs1,36($ctx) 78862306a36Sopenharmony_ci lw $rs2,40($ctx) 78962306a36Sopenharmony_ci lw $rs3,44($ctx) 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci sll $len,4 79262306a36Sopenharmony_ci addu $len,$len,$inp # end of buffer 79362306a36Sopenharmony_ci b .Loop 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci.align 4 79662306a36Sopenharmony_ci.Loop: 79762306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) 79862306a36Sopenharmony_ci lw $d0,0($inp) # load input 79962306a36Sopenharmony_ci lw $d1,4($inp) 80062306a36Sopenharmony_ci lw $d2,8($inp) 80162306a36Sopenharmony_ci lw $d3,12($inp) 80262306a36Sopenharmony_ci beqz $shr,.Laligned_inp 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci lw $t0,16($inp) 80562306a36Sopenharmony_ci subu $t1,$zero,$shr 80662306a36Sopenharmony_ci# ifdef MIPSEB 80762306a36Sopenharmony_ci sllv $d0,$d0,$shr 80862306a36Sopenharmony_ci srlv $at,$d1,$t1 80962306a36Sopenharmony_ci sllv $d1,$d1,$shr 81062306a36Sopenharmony_ci or $d0,$d0,$at 81162306a36Sopenharmony_ci srlv $at,$d2,$t1 81262306a36Sopenharmony_ci sllv $d2,$d2,$shr 81362306a36Sopenharmony_ci or $d1,$d1,$at 81462306a36Sopenharmony_ci srlv $at,$d3,$t1 81562306a36Sopenharmony_ci sllv $d3,$d3,$shr 81662306a36Sopenharmony_ci or $d2,$d2,$at 81762306a36Sopenharmony_ci srlv $t0,$t0,$t1 81862306a36Sopenharmony_ci or $d3,$d3,$t0 81962306a36Sopenharmony_ci# else 82062306a36Sopenharmony_ci srlv $d0,$d0,$shr 82162306a36Sopenharmony_ci sllv $at,$d1,$t1 82262306a36Sopenharmony_ci srlv $d1,$d1,$shr 82362306a36Sopenharmony_ci or $d0,$d0,$at 82462306a36Sopenharmony_ci sllv $at,$d2,$t1 82562306a36Sopenharmony_ci srlv $d2,$d2,$shr 82662306a36Sopenharmony_ci or $d1,$d1,$at 82762306a36Sopenharmony_ci sllv $at,$d3,$t1 82862306a36Sopenharmony_ci srlv $d3,$d3,$shr 82962306a36Sopenharmony_ci or $d2,$d2,$at 83062306a36Sopenharmony_ci sllv $t0,$t0,$t1 83162306a36Sopenharmony_ci or $d3,$d3,$t0 83262306a36Sopenharmony_ci# endif 83362306a36Sopenharmony_ci.Laligned_inp: 83462306a36Sopenharmony_ci#else 83562306a36Sopenharmony_ci lwl $d0,0+MSB($inp) # load input 83662306a36Sopenharmony_ci lwl $d1,4+MSB($inp) 83762306a36Sopenharmony_ci lwl $d2,8+MSB($inp) 83862306a36Sopenharmony_ci lwl $d3,12+MSB($inp) 83962306a36Sopenharmony_ci lwr $d0,0+LSB($inp) 84062306a36Sopenharmony_ci lwr $d1,4+LSB($inp) 84162306a36Sopenharmony_ci lwr $d2,8+LSB($inp) 84262306a36Sopenharmony_ci lwr $d3,12+LSB($inp) 84362306a36Sopenharmony_ci#endif 84462306a36Sopenharmony_ci#ifdef MIPSEB 84562306a36Sopenharmony_ci# if defined(_MIPS_ARCH_MIPS32R2) 84662306a36Sopenharmony_ci wsbh $d0,$d0 # byte swap 84762306a36Sopenharmony_ci wsbh $d1,$d1 84862306a36Sopenharmony_ci wsbh $d2,$d2 84962306a36Sopenharmony_ci wsbh $d3,$d3 85062306a36Sopenharmony_ci rotr $d0,$d0,16 85162306a36Sopenharmony_ci rotr $d1,$d1,16 85262306a36Sopenharmony_ci rotr $d2,$d2,16 85362306a36Sopenharmony_ci rotr $d3,$d3,16 85462306a36Sopenharmony_ci# else 85562306a36Sopenharmony_ci srl $at,$d0,24 # byte swap 85662306a36Sopenharmony_ci srl $t0,$d0,8 85762306a36Sopenharmony_ci andi $t1,$d0,0xFF00 85862306a36Sopenharmony_ci sll $d0,$d0,24 85962306a36Sopenharmony_ci andi $t0,0xFF00 86062306a36Sopenharmony_ci sll $t1,$t1,8 86162306a36Sopenharmony_ci or $d0,$at 86262306a36Sopenharmony_ci srl $at,$d1,24 86362306a36Sopenharmony_ci or $t0,$t1 86462306a36Sopenharmony_ci srl $t1,$d1,8 86562306a36Sopenharmony_ci or $d0,$t0 86662306a36Sopenharmony_ci andi $t0,$d1,0xFF00 86762306a36Sopenharmony_ci sll $d1,$d1,24 86862306a36Sopenharmony_ci andi $t1,0xFF00 86962306a36Sopenharmony_ci sll $t0,$t0,8 87062306a36Sopenharmony_ci or $d1,$at 87162306a36Sopenharmony_ci srl $at,$d2,24 87262306a36Sopenharmony_ci or $t1,$t0 87362306a36Sopenharmony_ci srl $t0,$d2,8 87462306a36Sopenharmony_ci or $d1,$t1 87562306a36Sopenharmony_ci andi $t1,$d2,0xFF00 87662306a36Sopenharmony_ci sll $d2,$d2,24 87762306a36Sopenharmony_ci andi $t0,0xFF00 87862306a36Sopenharmony_ci sll $t1,$t1,8 87962306a36Sopenharmony_ci or $d2,$at 88062306a36Sopenharmony_ci srl $at,$d3,24 88162306a36Sopenharmony_ci or $t0,$t1 88262306a36Sopenharmony_ci srl $t1,$d3,8 88362306a36Sopenharmony_ci or $d2,$t0 88462306a36Sopenharmony_ci andi $t0,$d3,0xFF00 88562306a36Sopenharmony_ci sll $d3,$d3,24 88662306a36Sopenharmony_ci andi $t1,0xFF00 88762306a36Sopenharmony_ci sll $t0,$t0,8 88862306a36Sopenharmony_ci or $d3,$at 88962306a36Sopenharmony_ci or $t1,$t0 89062306a36Sopenharmony_ci or $d3,$t1 89162306a36Sopenharmony_ci# endif 89262306a36Sopenharmony_ci#endif 89362306a36Sopenharmony_ci srl $t0,$h4,2 # modulo-scheduled reduction 89462306a36Sopenharmony_ci andi $h4,$h4,3 89562306a36Sopenharmony_ci sll $at,$t0,2 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci addu $d0,$d0,$h0 # accumulate input 89862306a36Sopenharmony_ci addu $t0,$t0,$at 89962306a36Sopenharmony_ci sltu $h0,$d0,$h0 90062306a36Sopenharmony_ci addu $d0,$d0,$t0 # ... and residue 90162306a36Sopenharmony_ci sltu $at,$d0,$t0 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci addu $d1,$d1,$h1 90462306a36Sopenharmony_ci addu $h0,$h0,$at # carry 90562306a36Sopenharmony_ci sltu $h1,$d1,$h1 90662306a36Sopenharmony_ci addu $d1,$d1,$h0 90762306a36Sopenharmony_ci sltu $h0,$d1,$h0 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci addu $d2,$d2,$h2 91062306a36Sopenharmony_ci addu $h1,$h1,$h0 # carry 91162306a36Sopenharmony_ci sltu $h2,$d2,$h2 91262306a36Sopenharmony_ci addu $d2,$d2,$h1 91362306a36Sopenharmony_ci sltu $h1,$d2,$h1 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci addu $d3,$d3,$h3 91662306a36Sopenharmony_ci addu $h2,$h2,$h1 # carry 91762306a36Sopenharmony_ci sltu $h3,$d3,$h3 91862306a36Sopenharmony_ci addu $d3,$d3,$h2 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6) 92162306a36Sopenharmony_ci multu $r0,$d0 # d0*r0 92262306a36Sopenharmony_ci sltu $h2,$d3,$h2 92362306a36Sopenharmony_ci maddu $rs3,$d1 # d1*s3 92462306a36Sopenharmony_ci addu $h3,$h3,$h2 # carry 92562306a36Sopenharmony_ci maddu $rs2,$d2 # d2*s2 92662306a36Sopenharmony_ci addu $h4,$h4,$padbit 92762306a36Sopenharmony_ci maddu $rs1,$d3 # d3*s1 92862306a36Sopenharmony_ci addu $h4,$h4,$h3 92962306a36Sopenharmony_ci mfhi $at 93062306a36Sopenharmony_ci mflo $h0 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci multu $r1,$d0 # d0*r1 93362306a36Sopenharmony_ci maddu $r0,$d1 # d1*r0 93462306a36Sopenharmony_ci maddu $rs3,$d2 # d2*s3 93562306a36Sopenharmony_ci maddu $rs2,$d3 # d3*s2 93662306a36Sopenharmony_ci maddu $rs1,$h4 # h4*s1 93762306a36Sopenharmony_ci maddu $at,$one # hi*1 93862306a36Sopenharmony_ci mfhi $at 93962306a36Sopenharmony_ci mflo $h1 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci multu $r2,$d0 # d0*r2 94262306a36Sopenharmony_ci maddu $r1,$d1 # d1*r1 94362306a36Sopenharmony_ci maddu $r0,$d2 # d2*r0 94462306a36Sopenharmony_ci maddu $rs3,$d3 # d3*s3 94562306a36Sopenharmony_ci maddu $rs2,$h4 # h4*s2 94662306a36Sopenharmony_ci maddu $at,$one # hi*1 94762306a36Sopenharmony_ci mfhi $at 94862306a36Sopenharmony_ci mflo $h2 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci mul $t0,$r0,$h4 # h4*r0 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci multu $r3,$d0 # d0*r3 95362306a36Sopenharmony_ci maddu $r2,$d1 # d1*r2 95462306a36Sopenharmony_ci maddu $r1,$d2 # d2*r1 95562306a36Sopenharmony_ci maddu $r0,$d3 # d3*r0 95662306a36Sopenharmony_ci maddu $rs3,$h4 # h4*s3 95762306a36Sopenharmony_ci maddu $at,$one # hi*1 95862306a36Sopenharmony_ci mfhi $at 95962306a36Sopenharmony_ci mflo $h3 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci addiu $inp,$inp,16 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci addu $h4,$t0,$at 96462306a36Sopenharmony_ci#else 96562306a36Sopenharmony_ci multu ($r0,$d0) # d0*r0 96662306a36Sopenharmony_ci mflo ($h0,$r0,$d0) 96762306a36Sopenharmony_ci mfhi ($h1,$r0,$d0) 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ci sltu $h2,$d3,$h2 97062306a36Sopenharmony_ci addu $h3,$h3,$h2 # carry 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_ci multu ($rs3,$d1) # d1*s3 97362306a36Sopenharmony_ci mflo ($at,$rs3,$d1) 97462306a36Sopenharmony_ci mfhi ($t0,$rs3,$d1) 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci addu $h4,$h4,$padbit 97762306a36Sopenharmony_ci addiu $inp,$inp,16 97862306a36Sopenharmony_ci addu $h4,$h4,$h3 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci multu ($rs2,$d2) # d2*s2 98162306a36Sopenharmony_ci mflo ($a3,$rs2,$d2) 98262306a36Sopenharmony_ci mfhi ($t1,$rs2,$d2) 98362306a36Sopenharmony_ci addu $h0,$h0,$at 98462306a36Sopenharmony_ci addu $h1,$h1,$t0 98562306a36Sopenharmony_ci multu ($rs1,$d3) # d3*s1 98662306a36Sopenharmony_ci sltu $at,$h0,$at 98762306a36Sopenharmony_ci addu $h1,$h1,$at 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci mflo ($at,$rs1,$d3) 99062306a36Sopenharmony_ci mfhi ($t0,$rs1,$d3) 99162306a36Sopenharmony_ci addu $h0,$h0,$a3 99262306a36Sopenharmony_ci addu $h1,$h1,$t1 99362306a36Sopenharmony_ci multu ($r1,$d0) # d0*r1 99462306a36Sopenharmony_ci sltu $a3,$h0,$a3 99562306a36Sopenharmony_ci addu $h1,$h1,$a3 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci mflo ($a3,$r1,$d0) 99962306a36Sopenharmony_ci mfhi ($h2,$r1,$d0) 100062306a36Sopenharmony_ci addu $h0,$h0,$at 100162306a36Sopenharmony_ci addu $h1,$h1,$t0 100262306a36Sopenharmony_ci multu ($r0,$d1) # d1*r0 100362306a36Sopenharmony_ci sltu $at,$h0,$at 100462306a36Sopenharmony_ci addu $h1,$h1,$at 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci mflo ($at,$r0,$d1) 100762306a36Sopenharmony_ci mfhi ($t0,$r0,$d1) 100862306a36Sopenharmony_ci addu $h1,$h1,$a3 100962306a36Sopenharmony_ci sltu $a3,$h1,$a3 101062306a36Sopenharmony_ci multu ($rs3,$d2) # d2*s3 101162306a36Sopenharmony_ci addu $h2,$h2,$a3 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci mflo ($a3,$rs3,$d2) 101462306a36Sopenharmony_ci mfhi ($t1,$rs3,$d2) 101562306a36Sopenharmony_ci addu $h1,$h1,$at 101662306a36Sopenharmony_ci addu $h2,$h2,$t0 101762306a36Sopenharmony_ci multu ($rs2,$d3) # d3*s2 101862306a36Sopenharmony_ci sltu $at,$h1,$at 101962306a36Sopenharmony_ci addu $h2,$h2,$at 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_ci mflo ($at,$rs2,$d3) 102262306a36Sopenharmony_ci mfhi ($t0,$rs2,$d3) 102362306a36Sopenharmony_ci addu $h1,$h1,$a3 102462306a36Sopenharmony_ci addu $h2,$h2,$t1 102562306a36Sopenharmony_ci multu ($rs1,$h4) # h4*s1 102662306a36Sopenharmony_ci sltu $a3,$h1,$a3 102762306a36Sopenharmony_ci addu $h2,$h2,$a3 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci mflo ($a3,$rs1,$h4) 103062306a36Sopenharmony_ci addu $h1,$h1,$at 103162306a36Sopenharmony_ci addu $h2,$h2,$t0 103262306a36Sopenharmony_ci multu ($r2,$d0) # d0*r2 103362306a36Sopenharmony_ci sltu $at,$h1,$at 103462306a36Sopenharmony_ci addu $h2,$h2,$at 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci mflo ($at,$r2,$d0) 103862306a36Sopenharmony_ci mfhi ($h3,$r2,$d0) 103962306a36Sopenharmony_ci addu $h1,$h1,$a3 104062306a36Sopenharmony_ci sltu $a3,$h1,$a3 104162306a36Sopenharmony_ci multu ($r1,$d1) # d1*r1 104262306a36Sopenharmony_ci addu $h2,$h2,$a3 104362306a36Sopenharmony_ci 104462306a36Sopenharmony_ci mflo ($a3,$r1,$d1) 104562306a36Sopenharmony_ci mfhi ($t1,$r1,$d1) 104662306a36Sopenharmony_ci addu $h2,$h2,$at 104762306a36Sopenharmony_ci sltu $at,$h2,$at 104862306a36Sopenharmony_ci multu ($r0,$d2) # d2*r0 104962306a36Sopenharmony_ci addu $h3,$h3,$at 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci mflo ($at,$r0,$d2) 105262306a36Sopenharmony_ci mfhi ($t0,$r0,$d2) 105362306a36Sopenharmony_ci addu $h2,$h2,$a3 105462306a36Sopenharmony_ci addu $h3,$h3,$t1 105562306a36Sopenharmony_ci multu ($rs3,$d3) # d3*s3 105662306a36Sopenharmony_ci sltu $a3,$h2,$a3 105762306a36Sopenharmony_ci addu $h3,$h3,$a3 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_ci mflo ($a3,$rs3,$d3) 106062306a36Sopenharmony_ci mfhi ($t1,$rs3,$d3) 106162306a36Sopenharmony_ci addu $h2,$h2,$at 106262306a36Sopenharmony_ci addu $h3,$h3,$t0 106362306a36Sopenharmony_ci multu ($rs2,$h4) # h4*s2 106462306a36Sopenharmony_ci sltu $at,$h2,$at 106562306a36Sopenharmony_ci addu $h3,$h3,$at 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci mflo ($at,$rs2,$h4) 106862306a36Sopenharmony_ci addu $h2,$h2,$a3 106962306a36Sopenharmony_ci addu $h3,$h3,$t1 107062306a36Sopenharmony_ci multu ($r3,$d0) # d0*r3 107162306a36Sopenharmony_ci sltu $a3,$h2,$a3 107262306a36Sopenharmony_ci addu $h3,$h3,$a3 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci mflo ($a3,$r3,$d0) 107662306a36Sopenharmony_ci mfhi ($t1,$r3,$d0) 107762306a36Sopenharmony_ci addu $h2,$h2,$at 107862306a36Sopenharmony_ci sltu $at,$h2,$at 107962306a36Sopenharmony_ci multu ($r2,$d1) # d1*r2 108062306a36Sopenharmony_ci addu $h3,$h3,$at 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci mflo ($at,$r2,$d1) 108362306a36Sopenharmony_ci mfhi ($t0,$r2,$d1) 108462306a36Sopenharmony_ci addu $h3,$h3,$a3 108562306a36Sopenharmony_ci sltu $a3,$h3,$a3 108662306a36Sopenharmony_ci multu ($r0,$d3) # d3*r0 108762306a36Sopenharmony_ci addu $t1,$t1,$a3 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ci mflo ($a3,$r0,$d3) 109062306a36Sopenharmony_ci mfhi ($d3,$r0,$d3) 109162306a36Sopenharmony_ci addu $h3,$h3,$at 109262306a36Sopenharmony_ci addu $t1,$t1,$t0 109362306a36Sopenharmony_ci multu ($r1,$d2) # d2*r1 109462306a36Sopenharmony_ci sltu $at,$h3,$at 109562306a36Sopenharmony_ci addu $t1,$t1,$at 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci mflo ($at,$r1,$d2) 109862306a36Sopenharmony_ci mfhi ($t0,$r1,$d2) 109962306a36Sopenharmony_ci addu $h3,$h3,$a3 110062306a36Sopenharmony_ci addu $t1,$t1,$d3 110162306a36Sopenharmony_ci multu ($rs3,$h4) # h4*s3 110262306a36Sopenharmony_ci sltu $a3,$h3,$a3 110362306a36Sopenharmony_ci addu $t1,$t1,$a3 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci mflo ($a3,$rs3,$h4) 110662306a36Sopenharmony_ci addu $h3,$h3,$at 110762306a36Sopenharmony_ci addu $t1,$t1,$t0 110862306a36Sopenharmony_ci multu ($r0,$h4) # h4*r0 110962306a36Sopenharmony_ci sltu $at,$h3,$at 111062306a36Sopenharmony_ci addu $t1,$t1,$at 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci mflo ($h4,$r0,$h4) 111462306a36Sopenharmony_ci addu $h3,$h3,$a3 111562306a36Sopenharmony_ci sltu $a3,$h3,$a3 111662306a36Sopenharmony_ci addu $t1,$t1,$a3 111762306a36Sopenharmony_ci addu $h4,$h4,$t1 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_ci li $padbit,1 # if we loop, padbit is 1 112062306a36Sopenharmony_ci#endif 112162306a36Sopenharmony_ci bne $inp,$len,.Loop 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci sw $h0,0($ctx) # store hash value 112462306a36Sopenharmony_ci sw $h1,4($ctx) 112562306a36Sopenharmony_ci sw $h2,8($ctx) 112662306a36Sopenharmony_ci sw $h3,12($ctx) 112762306a36Sopenharmony_ci sw $h4,16($ctx) 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci .set noreorder 113062306a36Sopenharmony_ci.Labort: 113162306a36Sopenharmony_ci lw $s11,4*11($sp) 113262306a36Sopenharmony_ci lw $s10,4*10($sp) 113362306a36Sopenharmony_ci lw $s9, 4*9($sp) 113462306a36Sopenharmony_ci lw $s8, 4*8($sp) 113562306a36Sopenharmony_ci lw $s7, 4*7($sp) 113662306a36Sopenharmony_ci lw $s6, 4*6($sp) 113762306a36Sopenharmony_ci lw $s5, 4*5($sp) 113862306a36Sopenharmony_ci lw $s4, 4*4($sp) 113962306a36Sopenharmony_ci___ 114062306a36Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 114162306a36Sopenharmony_ci lw $s3, 4*3($sp) 114262306a36Sopenharmony_ci lw $s2, 4*2($sp) 114362306a36Sopenharmony_ci lw $s1, 4*1($sp) 114462306a36Sopenharmony_ci lw $s0, 4*0($sp) 114562306a36Sopenharmony_ci___ 114662306a36Sopenharmony_ci$code.=<<___; 114762306a36Sopenharmony_ci jr $ra 114862306a36Sopenharmony_ci addu $sp,$sp,4*12 114962306a36Sopenharmony_ci.end poly1305_blocks 115062306a36Sopenharmony_ci___ 115162306a36Sopenharmony_ci} 115262306a36Sopenharmony_ci{ 115362306a36Sopenharmony_cimy ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci$code.=<<___; 115662306a36Sopenharmony_ci.align 5 115762306a36Sopenharmony_ci.globl poly1305_emit 115862306a36Sopenharmony_ci.ent poly1305_emit 115962306a36Sopenharmony_cipoly1305_emit: 116062306a36Sopenharmony_ci .frame $sp,0,$ra 116162306a36Sopenharmony_ci .set reorder 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ci lw $tmp4,16($ctx) 116462306a36Sopenharmony_ci lw $tmp0,0($ctx) 116562306a36Sopenharmony_ci lw $tmp1,4($ctx) 116662306a36Sopenharmony_ci lw $tmp2,8($ctx) 116762306a36Sopenharmony_ci lw $tmp3,12($ctx) 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci li $in0,-4 # final reduction 117062306a36Sopenharmony_ci srl $ctx,$tmp4,2 117162306a36Sopenharmony_ci and $in0,$in0,$tmp4 117262306a36Sopenharmony_ci andi $tmp4,$tmp4,3 117362306a36Sopenharmony_ci addu $ctx,$ctx,$in0 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci addu $tmp0,$tmp0,$ctx 117662306a36Sopenharmony_ci sltu $ctx,$tmp0,$ctx 117762306a36Sopenharmony_ci addiu $in0,$tmp0,5 # compare to modulus 117862306a36Sopenharmony_ci addu $tmp1,$tmp1,$ctx 117962306a36Sopenharmony_ci sltiu $in1,$in0,5 118062306a36Sopenharmony_ci sltu $ctx,$tmp1,$ctx 118162306a36Sopenharmony_ci addu $in1,$in1,$tmp1 118262306a36Sopenharmony_ci addu $tmp2,$tmp2,$ctx 118362306a36Sopenharmony_ci sltu $in2,$in1,$tmp1 118462306a36Sopenharmony_ci sltu $ctx,$tmp2,$ctx 118562306a36Sopenharmony_ci addu $in2,$in2,$tmp2 118662306a36Sopenharmony_ci addu $tmp3,$tmp3,$ctx 118762306a36Sopenharmony_ci sltu $in3,$in2,$tmp2 118862306a36Sopenharmony_ci sltu $ctx,$tmp3,$ctx 118962306a36Sopenharmony_ci addu $in3,$in3,$tmp3 119062306a36Sopenharmony_ci addu $tmp4,$tmp4,$ctx 119162306a36Sopenharmony_ci sltu $ctx,$in3,$tmp3 119262306a36Sopenharmony_ci addu $ctx,$tmp4 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci srl $ctx,2 # see if it carried/borrowed 119562306a36Sopenharmony_ci subu $ctx,$zero,$ctx 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci xor $in0,$tmp0 119862306a36Sopenharmony_ci xor $in1,$tmp1 119962306a36Sopenharmony_ci xor $in2,$tmp2 120062306a36Sopenharmony_ci xor $in3,$tmp3 120162306a36Sopenharmony_ci and $in0,$ctx 120262306a36Sopenharmony_ci and $in1,$ctx 120362306a36Sopenharmony_ci and $in2,$ctx 120462306a36Sopenharmony_ci and $in3,$ctx 120562306a36Sopenharmony_ci xor $in0,$tmp0 120662306a36Sopenharmony_ci xor $in1,$tmp1 120762306a36Sopenharmony_ci xor $in2,$tmp2 120862306a36Sopenharmony_ci xor $in3,$tmp3 120962306a36Sopenharmony_ci 121062306a36Sopenharmony_ci lw $tmp0,0($nonce) # load nonce 121162306a36Sopenharmony_ci lw $tmp1,4($nonce) 121262306a36Sopenharmony_ci lw $tmp2,8($nonce) 121362306a36Sopenharmony_ci lw $tmp3,12($nonce) 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci addu $in0,$tmp0 # accumulate nonce 121662306a36Sopenharmony_ci sltu $ctx,$in0,$tmp0 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci addu $in1,$tmp1 121962306a36Sopenharmony_ci sltu $tmp1,$in1,$tmp1 122062306a36Sopenharmony_ci addu $in1,$ctx 122162306a36Sopenharmony_ci sltu $ctx,$in1,$ctx 122262306a36Sopenharmony_ci addu $ctx,$tmp1 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_ci addu $in2,$tmp2 122562306a36Sopenharmony_ci sltu $tmp2,$in2,$tmp2 122662306a36Sopenharmony_ci addu $in2,$ctx 122762306a36Sopenharmony_ci sltu $ctx,$in2,$ctx 122862306a36Sopenharmony_ci addu $ctx,$tmp2 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_ci addu $in3,$tmp3 123162306a36Sopenharmony_ci addu $in3,$ctx 123262306a36Sopenharmony_ci 123362306a36Sopenharmony_ci srl $tmp0,$in0,8 # write mac value 123462306a36Sopenharmony_ci srl $tmp1,$in0,16 123562306a36Sopenharmony_ci srl $tmp2,$in0,24 123662306a36Sopenharmony_ci sb $in0, 0($mac) 123762306a36Sopenharmony_ci sb $tmp0,1($mac) 123862306a36Sopenharmony_ci srl $tmp0,$in1,8 123962306a36Sopenharmony_ci sb $tmp1,2($mac) 124062306a36Sopenharmony_ci srl $tmp1,$in1,16 124162306a36Sopenharmony_ci sb $tmp2,3($mac) 124262306a36Sopenharmony_ci srl $tmp2,$in1,24 124362306a36Sopenharmony_ci sb $in1, 4($mac) 124462306a36Sopenharmony_ci sb $tmp0,5($mac) 124562306a36Sopenharmony_ci srl $tmp0,$in2,8 124662306a36Sopenharmony_ci sb $tmp1,6($mac) 124762306a36Sopenharmony_ci srl $tmp1,$in2,16 124862306a36Sopenharmony_ci sb $tmp2,7($mac) 124962306a36Sopenharmony_ci srl $tmp2,$in2,24 125062306a36Sopenharmony_ci sb $in2, 8($mac) 125162306a36Sopenharmony_ci sb $tmp0,9($mac) 125262306a36Sopenharmony_ci srl $tmp0,$in3,8 125362306a36Sopenharmony_ci sb $tmp1,10($mac) 125462306a36Sopenharmony_ci srl $tmp1,$in3,16 125562306a36Sopenharmony_ci sb $tmp2,11($mac) 125662306a36Sopenharmony_ci srl $tmp2,$in3,24 125762306a36Sopenharmony_ci sb $in3, 12($mac) 125862306a36Sopenharmony_ci sb $tmp0,13($mac) 125962306a36Sopenharmony_ci sb $tmp1,14($mac) 126062306a36Sopenharmony_ci sb $tmp2,15($mac) 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci jr $ra 126362306a36Sopenharmony_ci.end poly1305_emit 126462306a36Sopenharmony_ci.rdata 126562306a36Sopenharmony_ci.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm" 126662306a36Sopenharmony_ci.align 2 126762306a36Sopenharmony_ci___ 126862306a36Sopenharmony_ci} 126962306a36Sopenharmony_ci}}} 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci$output=pop and open STDOUT,">$output"; 127262306a36Sopenharmony_ciprint $code; 127362306a36Sopenharmony_ciclose STDOUT; 1274