162306a36Sopenharmony_ci#!/usr/bin/env perl
262306a36Sopenharmony_ci# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
362306a36Sopenharmony_ci#
462306a36Sopenharmony_ci# ====================================================================
562306a36Sopenharmony_ci# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
662306a36Sopenharmony_ci# project.
762306a36Sopenharmony_ci# ====================================================================
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci# Poly1305 hash for MIPS.
1062306a36Sopenharmony_ci#
1162306a36Sopenharmony_ci# May 2016
1262306a36Sopenharmony_ci#
1362306a36Sopenharmony_ci# Numbers are cycles per processed byte with poly1305_blocks alone.
1462306a36Sopenharmony_ci#
1562306a36Sopenharmony_ci#		IALU/gcc
1662306a36Sopenharmony_ci# R1x000	~5.5/+130%	(big-endian)
1762306a36Sopenharmony_ci# Octeon II	2.50/+70%	(little-endian)
1862306a36Sopenharmony_ci#
1962306a36Sopenharmony_ci# March 2019
2062306a36Sopenharmony_ci#
2162306a36Sopenharmony_ci# Add 32-bit code path.
2262306a36Sopenharmony_ci#
2362306a36Sopenharmony_ci# October 2019
2462306a36Sopenharmony_ci#
2562306a36Sopenharmony_ci# Modulo-scheduling reduction allows to omit dependency chain at the
2662306a36Sopenharmony_ci# end of inner loop and improve performance. Also optimize MIPS32R2
2762306a36Sopenharmony_ci# code path for MIPS 1004K core. Per René von Dorst's suggestions.
2862306a36Sopenharmony_ci#
2962306a36Sopenharmony_ci#		IALU/gcc
3062306a36Sopenharmony_ci# R1x000	~9.8/?		(big-endian)
3162306a36Sopenharmony_ci# Octeon II	3.65/+140%	(little-endian)
3262306a36Sopenharmony_ci# MT7621/1004K	4.75/?		(little-endian)
3362306a36Sopenharmony_ci#
3462306a36Sopenharmony_ci######################################################################
3562306a36Sopenharmony_ci# There is a number of MIPS ABI in use, O32 and N32/64 are most
3662306a36Sopenharmony_ci# widely used. Then there is a new contender: NUBI. It appears that if
3762306a36Sopenharmony_ci# one picks the latter, it's possible to arrange code in ABI neutral
3862306a36Sopenharmony_ci# manner. Therefore let's stick to NUBI register layout:
3962306a36Sopenharmony_ci#
4062306a36Sopenharmony_ci($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
4162306a36Sopenharmony_ci($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
4262306a36Sopenharmony_ci($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
4362306a36Sopenharmony_ci($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
4462306a36Sopenharmony_ci#
4562306a36Sopenharmony_ci# The return value is placed in $a0. Following coding rules facilitate
4662306a36Sopenharmony_ci# interoperability:
4762306a36Sopenharmony_ci#
4862306a36Sopenharmony_ci# - never ever touch $tp, "thread pointer", former $gp [o32 can be
4962306a36Sopenharmony_ci#   excluded from the rule, because it's specified volatile];
5062306a36Sopenharmony_ci# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
5162306a36Sopenharmony_ci#   old code];
5262306a36Sopenharmony_ci# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
5362306a36Sopenharmony_ci#
5462306a36Sopenharmony_ci# For reference here is register layout for N32/64 MIPS ABIs:
5562306a36Sopenharmony_ci#
5662306a36Sopenharmony_ci# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
5762306a36Sopenharmony_ci# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
5862306a36Sopenharmony_ci# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
5962306a36Sopenharmony_ci# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
6062306a36Sopenharmony_ci# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
6162306a36Sopenharmony_ci#
6262306a36Sopenharmony_ci# <appro@openssl.org>
6362306a36Sopenharmony_ci#
6462306a36Sopenharmony_ci######################################################################
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ciif ($flavour =~ /64|n32/i) {{{
7162306a36Sopenharmony_ci######################################################################
7262306a36Sopenharmony_ci# 64-bit code path
7362306a36Sopenharmony_ci#
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_cimy ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
7662306a36Sopenharmony_cimy ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci$code.=<<___;
7962306a36Sopenharmony_ci#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
8062306a36Sopenharmony_ci     defined(_MIPS_ARCH_MIPS64R6)) \\
8162306a36Sopenharmony_ci     && !defined(_MIPS_ARCH_MIPS64R2)
8262306a36Sopenharmony_ci# define _MIPS_ARCH_MIPS64R2
8362306a36Sopenharmony_ci#endif
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6)
8662306a36Sopenharmony_ci# define dmultu(rs,rt)
8762306a36Sopenharmony_ci# define mflo(rd,rs,rt)	dmulu	rd,rs,rt
8862306a36Sopenharmony_ci# define mfhi(rd,rs,rt)	dmuhu	rd,rs,rt
8962306a36Sopenharmony_ci#else
9062306a36Sopenharmony_ci# define dmultu(rs,rt)		dmultu	rs,rt
9162306a36Sopenharmony_ci# define mflo(rd,rs,rt)	mflo	rd
9262306a36Sopenharmony_ci# define mfhi(rd,rs,rt)	mfhi	rd
9362306a36Sopenharmony_ci#endif
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci#ifdef	__KERNEL__
9662306a36Sopenharmony_ci# define poly1305_init   poly1305_init_mips
9762306a36Sopenharmony_ci# define poly1305_blocks poly1305_blocks_mips
9862306a36Sopenharmony_ci# define poly1305_emit   poly1305_emit_mips
9962306a36Sopenharmony_ci#endif
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci#if defined(__MIPSEB__) && !defined(MIPSEB)
10262306a36Sopenharmony_ci# define MIPSEB
10362306a36Sopenharmony_ci#endif
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci#ifdef MIPSEB
10662306a36Sopenharmony_ci# define MSB 0
10762306a36Sopenharmony_ci# define LSB 7
10862306a36Sopenharmony_ci#else
10962306a36Sopenharmony_ci# define MSB 7
11062306a36Sopenharmony_ci# define LSB 0
11162306a36Sopenharmony_ci#endif
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci.text
11462306a36Sopenharmony_ci.set	noat
11562306a36Sopenharmony_ci.set	noreorder
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci.align	5
11862306a36Sopenharmony_ci.globl	poly1305_init
11962306a36Sopenharmony_ci.ent	poly1305_init
12062306a36Sopenharmony_cipoly1305_init:
12162306a36Sopenharmony_ci	.frame	$sp,0,$ra
12262306a36Sopenharmony_ci	.set	reorder
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	sd	$zero,0($ctx)
12562306a36Sopenharmony_ci	sd	$zero,8($ctx)
12662306a36Sopenharmony_ci	sd	$zero,16($ctx)
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	beqz	$inp,.Lno_key
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6)
13162306a36Sopenharmony_ci	andi	$tmp0,$inp,7		# $inp % 8
13262306a36Sopenharmony_ci	dsubu	$inp,$inp,$tmp0		# align $inp
13362306a36Sopenharmony_ci	sll	$tmp0,$tmp0,3		# byte to bit offset
13462306a36Sopenharmony_ci	ld	$in0,0($inp)
13562306a36Sopenharmony_ci	ld	$in1,8($inp)
13662306a36Sopenharmony_ci	beqz	$tmp0,.Laligned_key
13762306a36Sopenharmony_ci	ld	$tmp2,16($inp)
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	subu	$tmp1,$zero,$tmp0
14062306a36Sopenharmony_ci# ifdef	MIPSEB
14162306a36Sopenharmony_ci	dsllv	$in0,$in0,$tmp0
14262306a36Sopenharmony_ci	dsrlv	$tmp3,$in1,$tmp1
14362306a36Sopenharmony_ci	dsllv	$in1,$in1,$tmp0
14462306a36Sopenharmony_ci	dsrlv	$tmp2,$tmp2,$tmp1
14562306a36Sopenharmony_ci# else
14662306a36Sopenharmony_ci	dsrlv	$in0,$in0,$tmp0
14762306a36Sopenharmony_ci	dsllv	$tmp3,$in1,$tmp1
14862306a36Sopenharmony_ci	dsrlv	$in1,$in1,$tmp0
14962306a36Sopenharmony_ci	dsllv	$tmp2,$tmp2,$tmp1
15062306a36Sopenharmony_ci# endif
15162306a36Sopenharmony_ci	or	$in0,$in0,$tmp3
15262306a36Sopenharmony_ci	or	$in1,$in1,$tmp2
15362306a36Sopenharmony_ci.Laligned_key:
15462306a36Sopenharmony_ci#else
15562306a36Sopenharmony_ci	ldl	$in0,0+MSB($inp)
15662306a36Sopenharmony_ci	ldl	$in1,8+MSB($inp)
15762306a36Sopenharmony_ci	ldr	$in0,0+LSB($inp)
15862306a36Sopenharmony_ci	ldr	$in1,8+LSB($inp)
15962306a36Sopenharmony_ci#endif
16062306a36Sopenharmony_ci#ifdef	MIPSEB
16162306a36Sopenharmony_ci# if defined(_MIPS_ARCH_MIPS64R2)
16262306a36Sopenharmony_ci	dsbh	$in0,$in0		# byte swap
16362306a36Sopenharmony_ci	 dsbh	$in1,$in1
16462306a36Sopenharmony_ci	dshd	$in0,$in0
16562306a36Sopenharmony_ci	 dshd	$in1,$in1
16662306a36Sopenharmony_ci# else
16762306a36Sopenharmony_ci	ori	$tmp0,$zero,0xFF
16862306a36Sopenharmony_ci	dsll	$tmp2,$tmp0,32
16962306a36Sopenharmony_ci	or	$tmp0,$tmp2		# 0x000000FF000000FF
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	and	$tmp1,$in0,$tmp0	# byte swap
17262306a36Sopenharmony_ci	 and	$tmp3,$in1,$tmp0
17362306a36Sopenharmony_ci	dsrl	$tmp2,$in0,24
17462306a36Sopenharmony_ci	 dsrl	$tmp4,$in1,24
17562306a36Sopenharmony_ci	dsll	$tmp1,24
17662306a36Sopenharmony_ci	 dsll	$tmp3,24
17762306a36Sopenharmony_ci	and	$tmp2,$tmp0
17862306a36Sopenharmony_ci	 and	$tmp4,$tmp0
17962306a36Sopenharmony_ci	dsll	$tmp0,8			# 0x0000FF000000FF00
18062306a36Sopenharmony_ci	or	$tmp1,$tmp2
18162306a36Sopenharmony_ci	 or	$tmp3,$tmp4
18262306a36Sopenharmony_ci	and	$tmp2,$in0,$tmp0
18362306a36Sopenharmony_ci	 and	$tmp4,$in1,$tmp0
18462306a36Sopenharmony_ci	dsrl	$in0,8
18562306a36Sopenharmony_ci	 dsrl	$in1,8
18662306a36Sopenharmony_ci	dsll	$tmp2,8
18762306a36Sopenharmony_ci	 dsll	$tmp4,8
18862306a36Sopenharmony_ci	and	$in0,$tmp0
18962306a36Sopenharmony_ci	 and	$in1,$tmp0
19062306a36Sopenharmony_ci	or	$tmp1,$tmp2
19162306a36Sopenharmony_ci	 or	$tmp3,$tmp4
19262306a36Sopenharmony_ci	or	$in0,$tmp1
19362306a36Sopenharmony_ci	 or	$in1,$tmp3
19462306a36Sopenharmony_ci	dsrl	$tmp1,$in0,32
19562306a36Sopenharmony_ci	 dsrl	$tmp3,$in1,32
19662306a36Sopenharmony_ci	dsll	$in0,32
19762306a36Sopenharmony_ci	 dsll	$in1,32
19862306a36Sopenharmony_ci	or	$in0,$tmp1
19962306a36Sopenharmony_ci	 or	$in1,$tmp3
20062306a36Sopenharmony_ci# endif
20162306a36Sopenharmony_ci#endif
20262306a36Sopenharmony_ci	li	$tmp0,1
20362306a36Sopenharmony_ci	dsll	$tmp0,32		# 0x0000000100000000
20462306a36Sopenharmony_ci	daddiu	$tmp0,-63		# 0x00000000ffffffc1
20562306a36Sopenharmony_ci	dsll	$tmp0,28		# 0x0ffffffc10000000
20662306a36Sopenharmony_ci	daddiu	$tmp0,-1		# 0x0ffffffc0fffffff
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	and	$in0,$tmp0
20962306a36Sopenharmony_ci	daddiu	$tmp0,-3		# 0x0ffffffc0ffffffc
21062306a36Sopenharmony_ci	and	$in1,$tmp0
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	sd	$in0,24($ctx)
21362306a36Sopenharmony_ci	dsrl	$tmp0,$in1,2
21462306a36Sopenharmony_ci	sd	$in1,32($ctx)
21562306a36Sopenharmony_ci	daddu	$tmp0,$in1		# s1 = r1 + (r1 >> 2)
21662306a36Sopenharmony_ci	sd	$tmp0,40($ctx)
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci.Lno_key:
21962306a36Sopenharmony_ci	li	$v0,0			# return 0
22062306a36Sopenharmony_ci	jr	$ra
22162306a36Sopenharmony_ci.end	poly1305_init
22262306a36Sopenharmony_ci___
22362306a36Sopenharmony_ci{
22462306a36Sopenharmony_cimy $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_cimy ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
22762306a36Sopenharmony_ci   ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
22862306a36Sopenharmony_cimy ($shr,$shl) = ($s6,$s7);		# used on R6
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci$code.=<<___;
23162306a36Sopenharmony_ci.align	5
23262306a36Sopenharmony_ci.globl	poly1305_blocks
23362306a36Sopenharmony_ci.ent	poly1305_blocks
23462306a36Sopenharmony_cipoly1305_blocks:
23562306a36Sopenharmony_ci	.set	noreorder
23662306a36Sopenharmony_ci	dsrl	$len,4			# number of complete blocks
23762306a36Sopenharmony_ci	bnez	$len,poly1305_blocks_internal
23862306a36Sopenharmony_ci	nop
23962306a36Sopenharmony_ci	jr	$ra
24062306a36Sopenharmony_ci	nop
24162306a36Sopenharmony_ci.end	poly1305_blocks
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci.align	5
24462306a36Sopenharmony_ci.ent	poly1305_blocks_internal
24562306a36Sopenharmony_cipoly1305_blocks_internal:
24662306a36Sopenharmony_ci	.set	noreorder
24762306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6)
24862306a36Sopenharmony_ci	.frame	$sp,8*8,$ra
24962306a36Sopenharmony_ci	.mask	$SAVED_REGS_MASK|0x000c0000,-8
25062306a36Sopenharmony_ci	dsubu	$sp,8*8
25162306a36Sopenharmony_ci	sd	$s7,56($sp)
25262306a36Sopenharmony_ci	sd	$s6,48($sp)
25362306a36Sopenharmony_ci#else
25462306a36Sopenharmony_ci	.frame	$sp,6*8,$ra
25562306a36Sopenharmony_ci	.mask	$SAVED_REGS_MASK,-8
25662306a36Sopenharmony_ci	dsubu	$sp,6*8
25762306a36Sopenharmony_ci#endif
25862306a36Sopenharmony_ci	sd	$s5,40($sp)
25962306a36Sopenharmony_ci	sd	$s4,32($sp)
26062306a36Sopenharmony_ci___
26162306a36Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
26262306a36Sopenharmony_ci	sd	$s3,24($sp)
26362306a36Sopenharmony_ci	sd	$s2,16($sp)
26462306a36Sopenharmony_ci	sd	$s1,8($sp)
26562306a36Sopenharmony_ci	sd	$s0,0($sp)
26662306a36Sopenharmony_ci___
26762306a36Sopenharmony_ci$code.=<<___;
26862306a36Sopenharmony_ci	.set	reorder
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6)
27162306a36Sopenharmony_ci	andi	$shr,$inp,7
27262306a36Sopenharmony_ci	dsubu	$inp,$inp,$shr		# align $inp
27362306a36Sopenharmony_ci	sll	$shr,$shr,3		# byte to bit offset
27462306a36Sopenharmony_ci	subu	$shl,$zero,$shr
27562306a36Sopenharmony_ci#endif
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	ld	$h0,0($ctx)		# load hash value
27862306a36Sopenharmony_ci	ld	$h1,8($ctx)
27962306a36Sopenharmony_ci	ld	$h2,16($ctx)
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	ld	$r0,24($ctx)		# load key
28262306a36Sopenharmony_ci	ld	$r1,32($ctx)
28362306a36Sopenharmony_ci	ld	$rs1,40($ctx)
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci	dsll	$len,4
28662306a36Sopenharmony_ci	daddu	$len,$inp		# end of buffer
28762306a36Sopenharmony_ci	b	.Loop
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci.align	4
29062306a36Sopenharmony_ci.Loop:
29162306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6)
29262306a36Sopenharmony_ci	ld	$in0,0($inp)		# load input
29362306a36Sopenharmony_ci	ld	$in1,8($inp)
29462306a36Sopenharmony_ci	beqz	$shr,.Laligned_inp
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	ld	$tmp2,16($inp)
29762306a36Sopenharmony_ci# ifdef	MIPSEB
29862306a36Sopenharmony_ci	dsllv	$in0,$in0,$shr
29962306a36Sopenharmony_ci	dsrlv	$tmp3,$in1,$shl
30062306a36Sopenharmony_ci	dsllv	$in1,$in1,$shr
30162306a36Sopenharmony_ci	dsrlv	$tmp2,$tmp2,$shl
30262306a36Sopenharmony_ci# else
30362306a36Sopenharmony_ci	dsrlv	$in0,$in0,$shr
30462306a36Sopenharmony_ci	dsllv	$tmp3,$in1,$shl
30562306a36Sopenharmony_ci	dsrlv	$in1,$in1,$shr
30662306a36Sopenharmony_ci	dsllv	$tmp2,$tmp2,$shl
30762306a36Sopenharmony_ci# endif
30862306a36Sopenharmony_ci	or	$in0,$in0,$tmp3
30962306a36Sopenharmony_ci	or	$in1,$in1,$tmp2
31062306a36Sopenharmony_ci.Laligned_inp:
31162306a36Sopenharmony_ci#else
31262306a36Sopenharmony_ci	ldl	$in0,0+MSB($inp)	# load input
31362306a36Sopenharmony_ci	ldl	$in1,8+MSB($inp)
31462306a36Sopenharmony_ci	ldr	$in0,0+LSB($inp)
31562306a36Sopenharmony_ci	ldr	$in1,8+LSB($inp)
31662306a36Sopenharmony_ci#endif
31762306a36Sopenharmony_ci	daddiu	$inp,16
31862306a36Sopenharmony_ci#ifdef	MIPSEB
31962306a36Sopenharmony_ci# if defined(_MIPS_ARCH_MIPS64R2)
32062306a36Sopenharmony_ci	dsbh	$in0,$in0		# byte swap
32162306a36Sopenharmony_ci	 dsbh	$in1,$in1
32262306a36Sopenharmony_ci	dshd	$in0,$in0
32362306a36Sopenharmony_ci	 dshd	$in1,$in1
32462306a36Sopenharmony_ci# else
32562306a36Sopenharmony_ci	ori	$tmp0,$zero,0xFF
32662306a36Sopenharmony_ci	dsll	$tmp2,$tmp0,32
32762306a36Sopenharmony_ci	or	$tmp0,$tmp2		# 0x000000FF000000FF
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	and	$tmp1,$in0,$tmp0	# byte swap
33062306a36Sopenharmony_ci	 and	$tmp3,$in1,$tmp0
33162306a36Sopenharmony_ci	dsrl	$tmp2,$in0,24
33262306a36Sopenharmony_ci	 dsrl	$tmp4,$in1,24
33362306a36Sopenharmony_ci	dsll	$tmp1,24
33462306a36Sopenharmony_ci	 dsll	$tmp3,24
33562306a36Sopenharmony_ci	and	$tmp2,$tmp0
33662306a36Sopenharmony_ci	 and	$tmp4,$tmp0
33762306a36Sopenharmony_ci	dsll	$tmp0,8			# 0x0000FF000000FF00
33862306a36Sopenharmony_ci	or	$tmp1,$tmp2
33962306a36Sopenharmony_ci	 or	$tmp3,$tmp4
34062306a36Sopenharmony_ci	and	$tmp2,$in0,$tmp0
34162306a36Sopenharmony_ci	 and	$tmp4,$in1,$tmp0
34262306a36Sopenharmony_ci	dsrl	$in0,8
34362306a36Sopenharmony_ci	 dsrl	$in1,8
34462306a36Sopenharmony_ci	dsll	$tmp2,8
34562306a36Sopenharmony_ci	 dsll	$tmp4,8
34662306a36Sopenharmony_ci	and	$in0,$tmp0
34762306a36Sopenharmony_ci	 and	$in1,$tmp0
34862306a36Sopenharmony_ci	or	$tmp1,$tmp2
34962306a36Sopenharmony_ci	 or	$tmp3,$tmp4
35062306a36Sopenharmony_ci	or	$in0,$tmp1
35162306a36Sopenharmony_ci	 or	$in1,$tmp3
35262306a36Sopenharmony_ci	dsrl	$tmp1,$in0,32
35362306a36Sopenharmony_ci	 dsrl	$tmp3,$in1,32
35462306a36Sopenharmony_ci	dsll	$in0,32
35562306a36Sopenharmony_ci	 dsll	$in1,32
35662306a36Sopenharmony_ci	or	$in0,$tmp1
35762306a36Sopenharmony_ci	 or	$in1,$tmp3
35862306a36Sopenharmony_ci# endif
35962306a36Sopenharmony_ci#endif
36062306a36Sopenharmony_ci	dsrl	$tmp1,$h2,2		# modulo-scheduled reduction
36162306a36Sopenharmony_ci	andi	$h2,$h2,3
36262306a36Sopenharmony_ci	dsll	$tmp0,$tmp1,2
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	daddu	$d0,$h0,$in0		# accumulate input
36562306a36Sopenharmony_ci	 daddu	$tmp1,$tmp0
36662306a36Sopenharmony_ci	sltu	$tmp0,$d0,$h0
36762306a36Sopenharmony_ci	daddu	$d0,$d0,$tmp1		# ... and residue
36862306a36Sopenharmony_ci	sltu	$tmp1,$d0,$tmp1
36962306a36Sopenharmony_ci	daddu	$d1,$h1,$in1
37062306a36Sopenharmony_ci	daddu	$tmp0,$tmp1
37162306a36Sopenharmony_ci	sltu	$tmp1,$d1,$h1
37262306a36Sopenharmony_ci	daddu	$d1,$tmp0
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	dmultu	($r0,$d0)		# h0*r0
37562306a36Sopenharmony_ci	 daddu	$d2,$h2,$padbit
37662306a36Sopenharmony_ci	 sltu	$tmp0,$d1,$tmp0
37762306a36Sopenharmony_ci	mflo	($h0,$r0,$d0)
37862306a36Sopenharmony_ci	mfhi	($h1,$r0,$d0)
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	dmultu	($rs1,$d1)		# h1*5*r1
38162306a36Sopenharmony_ci	 daddu	$d2,$tmp1
38262306a36Sopenharmony_ci	 daddu	$d2,$tmp0
38362306a36Sopenharmony_ci	mflo	($tmp0,$rs1,$d1)
38462306a36Sopenharmony_ci	mfhi	($tmp1,$rs1,$d1)
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	dmultu	($r1,$d0)		# h0*r1
38762306a36Sopenharmony_ci	mflo	($tmp2,$r1,$d0)
38862306a36Sopenharmony_ci	mfhi	($h2,$r1,$d0)
38962306a36Sopenharmony_ci	 daddu	$h0,$tmp0
39062306a36Sopenharmony_ci	 daddu	$h1,$tmp1
39162306a36Sopenharmony_ci	 sltu	$tmp0,$h0,$tmp0
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	dmultu	($r0,$d1)		# h1*r0
39462306a36Sopenharmony_ci	 daddu	$h1,$tmp0
39562306a36Sopenharmony_ci	 daddu	$h1,$tmp2
39662306a36Sopenharmony_ci	mflo	($tmp0,$r0,$d1)
39762306a36Sopenharmony_ci	mfhi	($tmp1,$r0,$d1)
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	dmultu	($rs1,$d2)		# h2*5*r1
40062306a36Sopenharmony_ci	 sltu	$tmp2,$h1,$tmp2
40162306a36Sopenharmony_ci	 daddu	$h2,$tmp2
40262306a36Sopenharmony_ci	mflo	($tmp2,$rs1,$d2)
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	dmultu	($r0,$d2)		# h2*r0
40562306a36Sopenharmony_ci	 daddu	$h1,$tmp0
40662306a36Sopenharmony_ci	 daddu	$h2,$tmp1
40762306a36Sopenharmony_ci	mflo	($tmp3,$r0,$d2)
40862306a36Sopenharmony_ci	 sltu	$tmp0,$h1,$tmp0
40962306a36Sopenharmony_ci	 daddu	$h2,$tmp0
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ci	daddu	$h1,$tmp2
41262306a36Sopenharmony_ci	sltu	$tmp2,$h1,$tmp2
41362306a36Sopenharmony_ci	daddu	$h2,$tmp2
41462306a36Sopenharmony_ci	daddu	$h2,$tmp3
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci	bne	$inp,$len,.Loop
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	sd	$h0,0($ctx)		# store hash value
41962306a36Sopenharmony_ci	sd	$h1,8($ctx)
42062306a36Sopenharmony_ci	sd	$h2,16($ctx)
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	.set	noreorder
42362306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6)
42462306a36Sopenharmony_ci	ld	$s7,56($sp)
42562306a36Sopenharmony_ci	ld	$s6,48($sp)
42662306a36Sopenharmony_ci#endif
42762306a36Sopenharmony_ci	ld	$s5,40($sp)		# epilogue
42862306a36Sopenharmony_ci	ld	$s4,32($sp)
42962306a36Sopenharmony_ci___
43062306a36Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi epilogue
43162306a36Sopenharmony_ci	ld	$s3,24($sp)
43262306a36Sopenharmony_ci	ld	$s2,16($sp)
43362306a36Sopenharmony_ci	ld	$s1,8($sp)
43462306a36Sopenharmony_ci	ld	$s0,0($sp)
43562306a36Sopenharmony_ci___
43662306a36Sopenharmony_ci$code.=<<___;
43762306a36Sopenharmony_ci	jr	$ra
43862306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R6)
43962306a36Sopenharmony_ci	daddu	$sp,8*8
44062306a36Sopenharmony_ci#else
44162306a36Sopenharmony_ci	daddu	$sp,6*8
44262306a36Sopenharmony_ci#endif
44362306a36Sopenharmony_ci.end	poly1305_blocks_internal
44462306a36Sopenharmony_ci___
44562306a36Sopenharmony_ci}
44662306a36Sopenharmony_ci{
44762306a36Sopenharmony_cimy ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci$code.=<<___;
45062306a36Sopenharmony_ci.align	5
45162306a36Sopenharmony_ci.globl	poly1305_emit
45262306a36Sopenharmony_ci.ent	poly1305_emit
45362306a36Sopenharmony_cipoly1305_emit:
45462306a36Sopenharmony_ci	.frame	$sp,0,$ra
45562306a36Sopenharmony_ci	.set	reorder
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	ld	$tmp2,16($ctx)
45862306a36Sopenharmony_ci	ld	$tmp0,0($ctx)
45962306a36Sopenharmony_ci	ld	$tmp1,8($ctx)
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci	li	$in0,-4			# final reduction
46262306a36Sopenharmony_ci	dsrl	$in1,$tmp2,2
46362306a36Sopenharmony_ci	and	$in0,$tmp2
46462306a36Sopenharmony_ci	andi	$tmp2,$tmp2,3
46562306a36Sopenharmony_ci	daddu	$in0,$in1
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	daddu	$tmp0,$tmp0,$in0
46862306a36Sopenharmony_ci	sltu	$in1,$tmp0,$in0
46962306a36Sopenharmony_ci	 daddiu	$in0,$tmp0,5		# compare to modulus
47062306a36Sopenharmony_ci	daddu	$tmp1,$tmp1,$in1
47162306a36Sopenharmony_ci	 sltiu	$tmp3,$in0,5
47262306a36Sopenharmony_ci	sltu	$tmp4,$tmp1,$in1
47362306a36Sopenharmony_ci	 daddu	$in1,$tmp1,$tmp3
47462306a36Sopenharmony_ci	daddu	$tmp2,$tmp2,$tmp4
47562306a36Sopenharmony_ci	 sltu	$tmp3,$in1,$tmp3
47662306a36Sopenharmony_ci	 daddu	$tmp2,$tmp2,$tmp3
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	dsrl	$tmp2,2			# see if it carried/borrowed
47962306a36Sopenharmony_ci	dsubu	$tmp2,$zero,$tmp2
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	xor	$in0,$tmp0
48262306a36Sopenharmony_ci	xor	$in1,$tmp1
48362306a36Sopenharmony_ci	and	$in0,$tmp2
48462306a36Sopenharmony_ci	and	$in1,$tmp2
48562306a36Sopenharmony_ci	xor	$in0,$tmp0
48662306a36Sopenharmony_ci	xor	$in1,$tmp1
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	lwu	$tmp0,0($nonce)		# load nonce
48962306a36Sopenharmony_ci	lwu	$tmp1,4($nonce)
49062306a36Sopenharmony_ci	lwu	$tmp2,8($nonce)
49162306a36Sopenharmony_ci	lwu	$tmp3,12($nonce)
49262306a36Sopenharmony_ci	dsll	$tmp1,32
49362306a36Sopenharmony_ci	dsll	$tmp3,32
49462306a36Sopenharmony_ci	or	$tmp0,$tmp1
49562306a36Sopenharmony_ci	or	$tmp2,$tmp3
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	daddu	$in0,$tmp0		# accumulate nonce
49862306a36Sopenharmony_ci	daddu	$in1,$tmp2
49962306a36Sopenharmony_ci	sltu	$tmp0,$in0,$tmp0
50062306a36Sopenharmony_ci	daddu	$in1,$tmp0
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ci	dsrl	$tmp0,$in0,8		# write mac value
50362306a36Sopenharmony_ci	dsrl	$tmp1,$in0,16
50462306a36Sopenharmony_ci	dsrl	$tmp2,$in0,24
50562306a36Sopenharmony_ci	sb	$in0,0($mac)
50662306a36Sopenharmony_ci	dsrl	$tmp3,$in0,32
50762306a36Sopenharmony_ci	sb	$tmp0,1($mac)
50862306a36Sopenharmony_ci	dsrl	$tmp0,$in0,40
50962306a36Sopenharmony_ci	sb	$tmp1,2($mac)
51062306a36Sopenharmony_ci	dsrl	$tmp1,$in0,48
51162306a36Sopenharmony_ci	sb	$tmp2,3($mac)
51262306a36Sopenharmony_ci	dsrl	$tmp2,$in0,56
51362306a36Sopenharmony_ci	sb	$tmp3,4($mac)
51462306a36Sopenharmony_ci	dsrl	$tmp3,$in1,8
51562306a36Sopenharmony_ci	sb	$tmp0,5($mac)
51662306a36Sopenharmony_ci	dsrl	$tmp0,$in1,16
51762306a36Sopenharmony_ci	sb	$tmp1,6($mac)
51862306a36Sopenharmony_ci	dsrl	$tmp1,$in1,24
51962306a36Sopenharmony_ci	sb	$tmp2,7($mac)
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	sb	$in1,8($mac)
52262306a36Sopenharmony_ci	dsrl	$tmp2,$in1,32
52362306a36Sopenharmony_ci	sb	$tmp3,9($mac)
52462306a36Sopenharmony_ci	dsrl	$tmp3,$in1,40
52562306a36Sopenharmony_ci	sb	$tmp0,10($mac)
52662306a36Sopenharmony_ci	dsrl	$tmp0,$in1,48
52762306a36Sopenharmony_ci	sb	$tmp1,11($mac)
52862306a36Sopenharmony_ci	dsrl	$tmp1,$in1,56
52962306a36Sopenharmony_ci	sb	$tmp2,12($mac)
53062306a36Sopenharmony_ci	sb	$tmp3,13($mac)
53162306a36Sopenharmony_ci	sb	$tmp0,14($mac)
53262306a36Sopenharmony_ci	sb	$tmp1,15($mac)
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	jr	$ra
53562306a36Sopenharmony_ci.end	poly1305_emit
53662306a36Sopenharmony_ci.rdata
53762306a36Sopenharmony_ci.asciiz	"Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
53862306a36Sopenharmony_ci.align	2
53962306a36Sopenharmony_ci___
54062306a36Sopenharmony_ci}
54162306a36Sopenharmony_ci}}} else {{{
54262306a36Sopenharmony_ci######################################################################
54362306a36Sopenharmony_ci# 32-bit code path
54462306a36Sopenharmony_ci#
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_cimy ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
54762306a36Sopenharmony_cimy ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
54862306a36Sopenharmony_ci   ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci$code.=<<___;
55162306a36Sopenharmony_ci#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\
55262306a36Sopenharmony_ci     defined(_MIPS_ARCH_MIPS32R6)) \\
55362306a36Sopenharmony_ci     && !defined(_MIPS_ARCH_MIPS32R2)
55462306a36Sopenharmony_ci# define _MIPS_ARCH_MIPS32R2
55562306a36Sopenharmony_ci#endif
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6)
55862306a36Sopenharmony_ci# define multu(rs,rt)
55962306a36Sopenharmony_ci# define mflo(rd,rs,rt)	mulu	rd,rs,rt
56062306a36Sopenharmony_ci# define mfhi(rd,rs,rt)	muhu	rd,rs,rt
56162306a36Sopenharmony_ci#else
56262306a36Sopenharmony_ci# define multu(rs,rt)	multu	rs,rt
56362306a36Sopenharmony_ci# define mflo(rd,rs,rt)	mflo	rd
56462306a36Sopenharmony_ci# define mfhi(rd,rs,rt)	mfhi	rd
56562306a36Sopenharmony_ci#endif
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci#ifdef	__KERNEL__
56862306a36Sopenharmony_ci# define poly1305_init   poly1305_init_mips
56962306a36Sopenharmony_ci# define poly1305_blocks poly1305_blocks_mips
57062306a36Sopenharmony_ci# define poly1305_emit   poly1305_emit_mips
57162306a36Sopenharmony_ci#endif
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci#if defined(__MIPSEB__) && !defined(MIPSEB)
57462306a36Sopenharmony_ci# define MIPSEB
57562306a36Sopenharmony_ci#endif
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci#ifdef MIPSEB
57862306a36Sopenharmony_ci# define MSB 0
57962306a36Sopenharmony_ci# define LSB 3
58062306a36Sopenharmony_ci#else
58162306a36Sopenharmony_ci# define MSB 3
58262306a36Sopenharmony_ci# define LSB 0
58362306a36Sopenharmony_ci#endif
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci.text
58662306a36Sopenharmony_ci.set	noat
58762306a36Sopenharmony_ci.set	noreorder
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci.align	5
59062306a36Sopenharmony_ci.globl	poly1305_init
59162306a36Sopenharmony_ci.ent	poly1305_init
59262306a36Sopenharmony_cipoly1305_init:
59362306a36Sopenharmony_ci	.frame	$sp,0,$ra
59462306a36Sopenharmony_ci	.set	reorder
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	sw	$zero,0($ctx)
59762306a36Sopenharmony_ci	sw	$zero,4($ctx)
59862306a36Sopenharmony_ci	sw	$zero,8($ctx)
59962306a36Sopenharmony_ci	sw	$zero,12($ctx)
60062306a36Sopenharmony_ci	sw	$zero,16($ctx)
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	beqz	$inp,.Lno_key
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6)
60562306a36Sopenharmony_ci	andi	$tmp0,$inp,3		# $inp % 4
60662306a36Sopenharmony_ci	subu	$inp,$inp,$tmp0		# align $inp
60762306a36Sopenharmony_ci	sll	$tmp0,$tmp0,3		# byte to bit offset
60862306a36Sopenharmony_ci	lw	$in0,0($inp)
60962306a36Sopenharmony_ci	lw	$in1,4($inp)
61062306a36Sopenharmony_ci	lw	$in2,8($inp)
61162306a36Sopenharmony_ci	lw	$in3,12($inp)
61262306a36Sopenharmony_ci	beqz	$tmp0,.Laligned_key
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci	lw	$tmp2,16($inp)
61562306a36Sopenharmony_ci	subu	$tmp1,$zero,$tmp0
61662306a36Sopenharmony_ci# ifdef	MIPSEB
61762306a36Sopenharmony_ci	sllv	$in0,$in0,$tmp0
61862306a36Sopenharmony_ci	srlv	$tmp3,$in1,$tmp1
61962306a36Sopenharmony_ci	sllv	$in1,$in1,$tmp0
62062306a36Sopenharmony_ci	or	$in0,$in0,$tmp3
62162306a36Sopenharmony_ci	srlv	$tmp3,$in2,$tmp1
62262306a36Sopenharmony_ci	sllv	$in2,$in2,$tmp0
62362306a36Sopenharmony_ci	or	$in1,$in1,$tmp3
62462306a36Sopenharmony_ci	srlv	$tmp3,$in3,$tmp1
62562306a36Sopenharmony_ci	sllv	$in3,$in3,$tmp0
62662306a36Sopenharmony_ci	or	$in2,$in2,$tmp3
62762306a36Sopenharmony_ci	srlv	$tmp2,$tmp2,$tmp1
62862306a36Sopenharmony_ci	or	$in3,$in3,$tmp2
62962306a36Sopenharmony_ci# else
63062306a36Sopenharmony_ci	srlv	$in0,$in0,$tmp0
63162306a36Sopenharmony_ci	sllv	$tmp3,$in1,$tmp1
63262306a36Sopenharmony_ci	srlv	$in1,$in1,$tmp0
63362306a36Sopenharmony_ci	or	$in0,$in0,$tmp3
63462306a36Sopenharmony_ci	sllv	$tmp3,$in2,$tmp1
63562306a36Sopenharmony_ci	srlv	$in2,$in2,$tmp0
63662306a36Sopenharmony_ci	or	$in1,$in1,$tmp3
63762306a36Sopenharmony_ci	sllv	$tmp3,$in3,$tmp1
63862306a36Sopenharmony_ci	srlv	$in3,$in3,$tmp0
63962306a36Sopenharmony_ci	or	$in2,$in2,$tmp3
64062306a36Sopenharmony_ci	sllv	$tmp2,$tmp2,$tmp1
64162306a36Sopenharmony_ci	or	$in3,$in3,$tmp2
64262306a36Sopenharmony_ci# endif
64362306a36Sopenharmony_ci.Laligned_key:
64462306a36Sopenharmony_ci#else
64562306a36Sopenharmony_ci	lwl	$in0,0+MSB($inp)
64662306a36Sopenharmony_ci	lwl	$in1,4+MSB($inp)
64762306a36Sopenharmony_ci	lwl	$in2,8+MSB($inp)
64862306a36Sopenharmony_ci	lwl	$in3,12+MSB($inp)
64962306a36Sopenharmony_ci	lwr	$in0,0+LSB($inp)
65062306a36Sopenharmony_ci	lwr	$in1,4+LSB($inp)
65162306a36Sopenharmony_ci	lwr	$in2,8+LSB($inp)
65262306a36Sopenharmony_ci	lwr	$in3,12+LSB($inp)
65362306a36Sopenharmony_ci#endif
65462306a36Sopenharmony_ci#ifdef	MIPSEB
65562306a36Sopenharmony_ci# if defined(_MIPS_ARCH_MIPS32R2)
65662306a36Sopenharmony_ci	wsbh	$in0,$in0		# byte swap
65762306a36Sopenharmony_ci	wsbh	$in1,$in1
65862306a36Sopenharmony_ci	wsbh	$in2,$in2
65962306a36Sopenharmony_ci	wsbh	$in3,$in3
66062306a36Sopenharmony_ci	rotr	$in0,$in0,16
66162306a36Sopenharmony_ci	rotr	$in1,$in1,16
66262306a36Sopenharmony_ci	rotr	$in2,$in2,16
66362306a36Sopenharmony_ci	rotr	$in3,$in3,16
66462306a36Sopenharmony_ci# else
66562306a36Sopenharmony_ci	srl	$tmp0,$in0,24		# byte swap
66662306a36Sopenharmony_ci	srl	$tmp1,$in0,8
66762306a36Sopenharmony_ci	andi	$tmp2,$in0,0xFF00
66862306a36Sopenharmony_ci	sll	$in0,$in0,24
66962306a36Sopenharmony_ci	andi	$tmp1,0xFF00
67062306a36Sopenharmony_ci	sll	$tmp2,$tmp2,8
67162306a36Sopenharmony_ci	or	$in0,$tmp0
67262306a36Sopenharmony_ci	 srl	$tmp0,$in1,24
67362306a36Sopenharmony_ci	or	$tmp1,$tmp2
67462306a36Sopenharmony_ci	 srl	$tmp2,$in1,8
67562306a36Sopenharmony_ci	or	$in0,$tmp1
67662306a36Sopenharmony_ci	 andi	$tmp1,$in1,0xFF00
67762306a36Sopenharmony_ci	 sll	$in1,$in1,24
67862306a36Sopenharmony_ci	 andi	$tmp2,0xFF00
67962306a36Sopenharmony_ci	 sll	$tmp1,$tmp1,8
68062306a36Sopenharmony_ci	 or	$in1,$tmp0
68162306a36Sopenharmony_ci	srl	$tmp0,$in2,24
68262306a36Sopenharmony_ci	 or	$tmp2,$tmp1
68362306a36Sopenharmony_ci	srl	$tmp1,$in2,8
68462306a36Sopenharmony_ci	 or	$in1,$tmp2
68562306a36Sopenharmony_ci	andi	$tmp2,$in2,0xFF00
68662306a36Sopenharmony_ci	sll	$in2,$in2,24
68762306a36Sopenharmony_ci	andi	$tmp1,0xFF00
68862306a36Sopenharmony_ci	sll	$tmp2,$tmp2,8
68962306a36Sopenharmony_ci	or	$in2,$tmp0
69062306a36Sopenharmony_ci	 srl	$tmp0,$in3,24
69162306a36Sopenharmony_ci	or	$tmp1,$tmp2
69262306a36Sopenharmony_ci	 srl	$tmp2,$in3,8
69362306a36Sopenharmony_ci	or	$in2,$tmp1
69462306a36Sopenharmony_ci	 andi	$tmp1,$in3,0xFF00
69562306a36Sopenharmony_ci	 sll	$in3,$in3,24
69662306a36Sopenharmony_ci	 andi	$tmp2,0xFF00
69762306a36Sopenharmony_ci	 sll	$tmp1,$tmp1,8
69862306a36Sopenharmony_ci	 or	$in3,$tmp0
69962306a36Sopenharmony_ci	 or	$tmp2,$tmp1
70062306a36Sopenharmony_ci	 or	$in3,$tmp2
70162306a36Sopenharmony_ci# endif
70262306a36Sopenharmony_ci#endif
70362306a36Sopenharmony_ci	lui	$tmp0,0x0fff
70462306a36Sopenharmony_ci	ori	$tmp0,0xffff		# 0x0fffffff
70562306a36Sopenharmony_ci	and	$in0,$in0,$tmp0
70662306a36Sopenharmony_ci	subu	$tmp0,3			# 0x0ffffffc
70762306a36Sopenharmony_ci	and	$in1,$in1,$tmp0
70862306a36Sopenharmony_ci	and	$in2,$in2,$tmp0
70962306a36Sopenharmony_ci	and	$in3,$in3,$tmp0
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	sw	$in0,20($ctx)
71262306a36Sopenharmony_ci	sw	$in1,24($ctx)
71362306a36Sopenharmony_ci	sw	$in2,28($ctx)
71462306a36Sopenharmony_ci	sw	$in3,32($ctx)
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	srl	$tmp1,$in1,2
71762306a36Sopenharmony_ci	srl	$tmp2,$in2,2
71862306a36Sopenharmony_ci	srl	$tmp3,$in3,2
71962306a36Sopenharmony_ci	addu	$in1,$in1,$tmp1		# s1 = r1 + (r1 >> 2)
72062306a36Sopenharmony_ci	addu	$in2,$in2,$tmp2
72162306a36Sopenharmony_ci	addu	$in3,$in3,$tmp3
72262306a36Sopenharmony_ci	sw	$in1,36($ctx)
72362306a36Sopenharmony_ci	sw	$in2,40($ctx)
72462306a36Sopenharmony_ci	sw	$in3,44($ctx)
72562306a36Sopenharmony_ci.Lno_key:
72662306a36Sopenharmony_ci	li	$v0,0
72762306a36Sopenharmony_ci	jr	$ra
72862306a36Sopenharmony_ci.end	poly1305_init
72962306a36Sopenharmony_ci___
73062306a36Sopenharmony_ci{
73162306a36Sopenharmony_cimy $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_cimy ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
73462306a36Sopenharmony_ci   ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
73562306a36Sopenharmony_cimy ($d0,$d1,$d2,$d3) =
73662306a36Sopenharmony_ci   ($a4,$a5,$a6,$a7);
73762306a36Sopenharmony_cimy $shr = $t2;		# used on R6
73862306a36Sopenharmony_cimy $one = $t2;		# used on R2
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci$code.=<<___;
74162306a36Sopenharmony_ci.globl	poly1305_blocks
74262306a36Sopenharmony_ci.align	5
74362306a36Sopenharmony_ci.ent	poly1305_blocks
74462306a36Sopenharmony_cipoly1305_blocks:
74562306a36Sopenharmony_ci	.frame	$sp,16*4,$ra
74662306a36Sopenharmony_ci	.mask	$SAVED_REGS_MASK,-4
74762306a36Sopenharmony_ci	.set	noreorder
74862306a36Sopenharmony_ci	subu	$sp, $sp,4*12
74962306a36Sopenharmony_ci	sw	$s11,4*11($sp)
75062306a36Sopenharmony_ci	sw	$s10,4*10($sp)
75162306a36Sopenharmony_ci	sw	$s9, 4*9($sp)
75262306a36Sopenharmony_ci	sw	$s8, 4*8($sp)
75362306a36Sopenharmony_ci	sw	$s7, 4*7($sp)
75462306a36Sopenharmony_ci	sw	$s6, 4*6($sp)
75562306a36Sopenharmony_ci	sw	$s5, 4*5($sp)
75662306a36Sopenharmony_ci	sw	$s4, 4*4($sp)
75762306a36Sopenharmony_ci___
75862306a36Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
75962306a36Sopenharmony_ci	sw	$s3, 4*3($sp)
76062306a36Sopenharmony_ci	sw	$s2, 4*2($sp)
76162306a36Sopenharmony_ci	sw	$s1, 4*1($sp)
76262306a36Sopenharmony_ci	sw	$s0, 4*0($sp)
76362306a36Sopenharmony_ci___
76462306a36Sopenharmony_ci$code.=<<___;
76562306a36Sopenharmony_ci	.set	reorder
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	srl	$len,4			# number of complete blocks
76862306a36Sopenharmony_ci	li	$one,1
76962306a36Sopenharmony_ci	beqz	$len,.Labort
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6)
77262306a36Sopenharmony_ci	andi	$shr,$inp,3
77362306a36Sopenharmony_ci	subu	$inp,$inp,$shr		# align $inp
77462306a36Sopenharmony_ci	sll	$shr,$shr,3		# byte to bit offset
77562306a36Sopenharmony_ci#endif
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci	lw	$h0,0($ctx)		# load hash value
77862306a36Sopenharmony_ci	lw	$h1,4($ctx)
77962306a36Sopenharmony_ci	lw	$h2,8($ctx)
78062306a36Sopenharmony_ci	lw	$h3,12($ctx)
78162306a36Sopenharmony_ci	lw	$h4,16($ctx)
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	lw	$r0,20($ctx)		# load key
78462306a36Sopenharmony_ci	lw	$r1,24($ctx)
78562306a36Sopenharmony_ci	lw	$r2,28($ctx)
78662306a36Sopenharmony_ci	lw	$r3,32($ctx)
78762306a36Sopenharmony_ci	lw	$rs1,36($ctx)
78862306a36Sopenharmony_ci	lw	$rs2,40($ctx)
78962306a36Sopenharmony_ci	lw	$rs3,44($ctx)
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci	sll	$len,4
79262306a36Sopenharmony_ci	addu	$len,$len,$inp		# end of buffer
79362306a36Sopenharmony_ci	b	.Loop
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci.align	4
79662306a36Sopenharmony_ci.Loop:
79762306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6)
79862306a36Sopenharmony_ci	lw	$d0,0($inp)		# load input
79962306a36Sopenharmony_ci	lw	$d1,4($inp)
80062306a36Sopenharmony_ci	lw	$d2,8($inp)
80162306a36Sopenharmony_ci	lw	$d3,12($inp)
80262306a36Sopenharmony_ci	beqz	$shr,.Laligned_inp
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci	lw	$t0,16($inp)
80562306a36Sopenharmony_ci	subu	$t1,$zero,$shr
80662306a36Sopenharmony_ci# ifdef	MIPSEB
80762306a36Sopenharmony_ci	sllv	$d0,$d0,$shr
80862306a36Sopenharmony_ci	srlv	$at,$d1,$t1
80962306a36Sopenharmony_ci	sllv	$d1,$d1,$shr
81062306a36Sopenharmony_ci	or	$d0,$d0,$at
81162306a36Sopenharmony_ci	srlv	$at,$d2,$t1
81262306a36Sopenharmony_ci	sllv	$d2,$d2,$shr
81362306a36Sopenharmony_ci	or	$d1,$d1,$at
81462306a36Sopenharmony_ci	srlv	$at,$d3,$t1
81562306a36Sopenharmony_ci	sllv	$d3,$d3,$shr
81662306a36Sopenharmony_ci	or	$d2,$d2,$at
81762306a36Sopenharmony_ci	srlv	$t0,$t0,$t1
81862306a36Sopenharmony_ci	or	$d3,$d3,$t0
81962306a36Sopenharmony_ci# else
82062306a36Sopenharmony_ci	srlv	$d0,$d0,$shr
82162306a36Sopenharmony_ci	sllv	$at,$d1,$t1
82262306a36Sopenharmony_ci	srlv	$d1,$d1,$shr
82362306a36Sopenharmony_ci	or	$d0,$d0,$at
82462306a36Sopenharmony_ci	sllv	$at,$d2,$t1
82562306a36Sopenharmony_ci	srlv	$d2,$d2,$shr
82662306a36Sopenharmony_ci	or	$d1,$d1,$at
82762306a36Sopenharmony_ci	sllv	$at,$d3,$t1
82862306a36Sopenharmony_ci	srlv	$d3,$d3,$shr
82962306a36Sopenharmony_ci	or	$d2,$d2,$at
83062306a36Sopenharmony_ci	sllv	$t0,$t0,$t1
83162306a36Sopenharmony_ci	or	$d3,$d3,$t0
83262306a36Sopenharmony_ci# endif
83362306a36Sopenharmony_ci.Laligned_inp:
83462306a36Sopenharmony_ci#else
83562306a36Sopenharmony_ci	lwl	$d0,0+MSB($inp)		# load input
83662306a36Sopenharmony_ci	lwl	$d1,4+MSB($inp)
83762306a36Sopenharmony_ci	lwl	$d2,8+MSB($inp)
83862306a36Sopenharmony_ci	lwl	$d3,12+MSB($inp)
83962306a36Sopenharmony_ci	lwr	$d0,0+LSB($inp)
84062306a36Sopenharmony_ci	lwr	$d1,4+LSB($inp)
84162306a36Sopenharmony_ci	lwr	$d2,8+LSB($inp)
84262306a36Sopenharmony_ci	lwr	$d3,12+LSB($inp)
84362306a36Sopenharmony_ci#endif
84462306a36Sopenharmony_ci#ifdef	MIPSEB
84562306a36Sopenharmony_ci# if defined(_MIPS_ARCH_MIPS32R2)
84662306a36Sopenharmony_ci	wsbh	$d0,$d0			# byte swap
84762306a36Sopenharmony_ci	wsbh	$d1,$d1
84862306a36Sopenharmony_ci	wsbh	$d2,$d2
84962306a36Sopenharmony_ci	wsbh	$d3,$d3
85062306a36Sopenharmony_ci	rotr	$d0,$d0,16
85162306a36Sopenharmony_ci	rotr	$d1,$d1,16
85262306a36Sopenharmony_ci	rotr	$d2,$d2,16
85362306a36Sopenharmony_ci	rotr	$d3,$d3,16
85462306a36Sopenharmony_ci# else
85562306a36Sopenharmony_ci	srl	$at,$d0,24		# byte swap
85662306a36Sopenharmony_ci	srl	$t0,$d0,8
85762306a36Sopenharmony_ci	andi	$t1,$d0,0xFF00
85862306a36Sopenharmony_ci	sll	$d0,$d0,24
85962306a36Sopenharmony_ci	andi	$t0,0xFF00
86062306a36Sopenharmony_ci	sll	$t1,$t1,8
86162306a36Sopenharmony_ci	or	$d0,$at
86262306a36Sopenharmony_ci	 srl	$at,$d1,24
86362306a36Sopenharmony_ci	or	$t0,$t1
86462306a36Sopenharmony_ci	 srl	$t1,$d1,8
86562306a36Sopenharmony_ci	or	$d0,$t0
86662306a36Sopenharmony_ci	 andi	$t0,$d1,0xFF00
86762306a36Sopenharmony_ci	 sll	$d1,$d1,24
86862306a36Sopenharmony_ci	 andi	$t1,0xFF00
86962306a36Sopenharmony_ci	 sll	$t0,$t0,8
87062306a36Sopenharmony_ci	 or	$d1,$at
87162306a36Sopenharmony_ci	srl	$at,$d2,24
87262306a36Sopenharmony_ci	 or	$t1,$t0
87362306a36Sopenharmony_ci	srl	$t0,$d2,8
87462306a36Sopenharmony_ci	 or	$d1,$t1
87562306a36Sopenharmony_ci	andi	$t1,$d2,0xFF00
87662306a36Sopenharmony_ci	sll	$d2,$d2,24
87762306a36Sopenharmony_ci	andi	$t0,0xFF00
87862306a36Sopenharmony_ci	sll	$t1,$t1,8
87962306a36Sopenharmony_ci	or	$d2,$at
88062306a36Sopenharmony_ci	 srl	$at,$d3,24
88162306a36Sopenharmony_ci	or	$t0,$t1
88262306a36Sopenharmony_ci	 srl	$t1,$d3,8
88362306a36Sopenharmony_ci	or	$d2,$t0
88462306a36Sopenharmony_ci	 andi	$t0,$d3,0xFF00
88562306a36Sopenharmony_ci	 sll	$d3,$d3,24
88662306a36Sopenharmony_ci	 andi	$t1,0xFF00
88762306a36Sopenharmony_ci	 sll	$t0,$t0,8
88862306a36Sopenharmony_ci	 or	$d3,$at
88962306a36Sopenharmony_ci	 or	$t1,$t0
89062306a36Sopenharmony_ci	 or	$d3,$t1
89162306a36Sopenharmony_ci# endif
89262306a36Sopenharmony_ci#endif
89362306a36Sopenharmony_ci	srl	$t0,$h4,2		# modulo-scheduled reduction
89462306a36Sopenharmony_ci	andi	$h4,$h4,3
89562306a36Sopenharmony_ci	sll	$at,$t0,2
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	addu	$d0,$d0,$h0		# accumulate input
89862306a36Sopenharmony_ci	 addu	$t0,$t0,$at
89962306a36Sopenharmony_ci	sltu	$h0,$d0,$h0
90062306a36Sopenharmony_ci	addu	$d0,$d0,$t0		# ... and residue
90162306a36Sopenharmony_ci	sltu	$at,$d0,$t0
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	addu	$d1,$d1,$h1
90462306a36Sopenharmony_ci	 addu	$h0,$h0,$at		# carry
90562306a36Sopenharmony_ci	sltu	$h1,$d1,$h1
90662306a36Sopenharmony_ci	addu	$d1,$d1,$h0
90762306a36Sopenharmony_ci	sltu	$h0,$d1,$h0
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	addu	$d2,$d2,$h2
91062306a36Sopenharmony_ci	 addu	$h1,$h1,$h0		# carry
91162306a36Sopenharmony_ci	sltu	$h2,$d2,$h2
91262306a36Sopenharmony_ci	addu	$d2,$d2,$h1
91362306a36Sopenharmony_ci	sltu	$h1,$d2,$h1
91462306a36Sopenharmony_ci
91562306a36Sopenharmony_ci	addu	$d3,$d3,$h3
91662306a36Sopenharmony_ci	 addu	$h2,$h2,$h1		# carry
91762306a36Sopenharmony_ci	sltu	$h3,$d3,$h3
91862306a36Sopenharmony_ci	addu	$d3,$d3,$h2
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6)
92162306a36Sopenharmony_ci	multu	$r0,$d0			# d0*r0
92262306a36Sopenharmony_ci	 sltu	$h2,$d3,$h2
92362306a36Sopenharmony_ci	maddu	$rs3,$d1		# d1*s3
92462306a36Sopenharmony_ci	 addu	$h3,$h3,$h2		# carry
92562306a36Sopenharmony_ci	maddu	$rs2,$d2		# d2*s2
92662306a36Sopenharmony_ci	 addu	$h4,$h4,$padbit
92762306a36Sopenharmony_ci	maddu	$rs1,$d3		# d3*s1
92862306a36Sopenharmony_ci	 addu	$h4,$h4,$h3
92962306a36Sopenharmony_ci	mfhi	$at
93062306a36Sopenharmony_ci	mflo	$h0
93162306a36Sopenharmony_ci
93262306a36Sopenharmony_ci	multu	$r1,$d0			# d0*r1
93362306a36Sopenharmony_ci	maddu	$r0,$d1			# d1*r0
93462306a36Sopenharmony_ci	maddu	$rs3,$d2		# d2*s3
93562306a36Sopenharmony_ci	maddu	$rs2,$d3		# d3*s2
93662306a36Sopenharmony_ci	maddu	$rs1,$h4		# h4*s1
93762306a36Sopenharmony_ci	maddu	$at,$one		# hi*1
93862306a36Sopenharmony_ci	mfhi	$at
93962306a36Sopenharmony_ci	mflo	$h1
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci	multu	$r2,$d0			# d0*r2
94262306a36Sopenharmony_ci	maddu	$r1,$d1			# d1*r1
94362306a36Sopenharmony_ci	maddu	$r0,$d2			# d2*r0
94462306a36Sopenharmony_ci	maddu	$rs3,$d3		# d3*s3
94562306a36Sopenharmony_ci	maddu	$rs2,$h4		# h4*s2
94662306a36Sopenharmony_ci	maddu	$at,$one		# hi*1
94762306a36Sopenharmony_ci	mfhi	$at
94862306a36Sopenharmony_ci	mflo	$h2
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	mul	$t0,$r0,$h4		# h4*r0
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	multu	$r3,$d0			# d0*r3
95362306a36Sopenharmony_ci	maddu	$r2,$d1			# d1*r2
95462306a36Sopenharmony_ci	maddu	$r1,$d2			# d2*r1
95562306a36Sopenharmony_ci	maddu	$r0,$d3			# d3*r0
95662306a36Sopenharmony_ci	maddu	$rs3,$h4		# h4*s3
95762306a36Sopenharmony_ci	maddu	$at,$one		# hi*1
95862306a36Sopenharmony_ci	mfhi	$at
95962306a36Sopenharmony_ci	mflo	$h3
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	 addiu	$inp,$inp,16
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci	addu	$h4,$t0,$at
96462306a36Sopenharmony_ci#else
96562306a36Sopenharmony_ci	multu	($r0,$d0)		# d0*r0
96662306a36Sopenharmony_ci	mflo	($h0,$r0,$d0)
96762306a36Sopenharmony_ci	mfhi	($h1,$r0,$d0)
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_ci	 sltu	$h2,$d3,$h2
97062306a36Sopenharmony_ci	 addu	$h3,$h3,$h2		# carry
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_ci	multu	($rs3,$d1)		# d1*s3
97362306a36Sopenharmony_ci	mflo	($at,$rs3,$d1)
97462306a36Sopenharmony_ci	mfhi	($t0,$rs3,$d1)
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ci	 addu	$h4,$h4,$padbit
97762306a36Sopenharmony_ci	 addiu	$inp,$inp,16
97862306a36Sopenharmony_ci	 addu	$h4,$h4,$h3
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	multu	($rs2,$d2)		# d2*s2
98162306a36Sopenharmony_ci	mflo	($a3,$rs2,$d2)
98262306a36Sopenharmony_ci	mfhi	($t1,$rs2,$d2)
98362306a36Sopenharmony_ci	 addu	$h0,$h0,$at
98462306a36Sopenharmony_ci	 addu	$h1,$h1,$t0
98562306a36Sopenharmony_ci	multu	($rs1,$d3)		# d3*s1
98662306a36Sopenharmony_ci	 sltu	$at,$h0,$at
98762306a36Sopenharmony_ci	 addu	$h1,$h1,$at
98862306a36Sopenharmony_ci
98962306a36Sopenharmony_ci	mflo	($at,$rs1,$d3)
99062306a36Sopenharmony_ci	mfhi	($t0,$rs1,$d3)
99162306a36Sopenharmony_ci	 addu	$h0,$h0,$a3
99262306a36Sopenharmony_ci	 addu	$h1,$h1,$t1
99362306a36Sopenharmony_ci	multu	($r1,$d0)		# d0*r1
99462306a36Sopenharmony_ci	 sltu	$a3,$h0,$a3
99562306a36Sopenharmony_ci	 addu	$h1,$h1,$a3
99662306a36Sopenharmony_ci
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	mflo	($a3,$r1,$d0)
99962306a36Sopenharmony_ci	mfhi	($h2,$r1,$d0)
100062306a36Sopenharmony_ci	 addu	$h0,$h0,$at
100162306a36Sopenharmony_ci	 addu	$h1,$h1,$t0
100262306a36Sopenharmony_ci	multu	($r0,$d1)		# d1*r0
100362306a36Sopenharmony_ci	 sltu	$at,$h0,$at
100462306a36Sopenharmony_ci	 addu	$h1,$h1,$at
100562306a36Sopenharmony_ci
100662306a36Sopenharmony_ci	mflo	($at,$r0,$d1)
100762306a36Sopenharmony_ci	mfhi	($t0,$r0,$d1)
100862306a36Sopenharmony_ci	 addu	$h1,$h1,$a3
100962306a36Sopenharmony_ci	 sltu	$a3,$h1,$a3
101062306a36Sopenharmony_ci	multu	($rs3,$d2)		# d2*s3
101162306a36Sopenharmony_ci	 addu	$h2,$h2,$a3
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci	mflo	($a3,$rs3,$d2)
101462306a36Sopenharmony_ci	mfhi	($t1,$rs3,$d2)
101562306a36Sopenharmony_ci	 addu	$h1,$h1,$at
101662306a36Sopenharmony_ci	 addu	$h2,$h2,$t0
101762306a36Sopenharmony_ci	multu	($rs2,$d3)		# d3*s2
101862306a36Sopenharmony_ci	 sltu	$at,$h1,$at
101962306a36Sopenharmony_ci	 addu	$h2,$h2,$at
102062306a36Sopenharmony_ci
102162306a36Sopenharmony_ci	mflo	($at,$rs2,$d3)
102262306a36Sopenharmony_ci	mfhi	($t0,$rs2,$d3)
102362306a36Sopenharmony_ci	 addu	$h1,$h1,$a3
102462306a36Sopenharmony_ci	 addu	$h2,$h2,$t1
102562306a36Sopenharmony_ci	multu	($rs1,$h4)		# h4*s1
102662306a36Sopenharmony_ci	 sltu	$a3,$h1,$a3
102762306a36Sopenharmony_ci	 addu	$h2,$h2,$a3
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci	mflo	($a3,$rs1,$h4)
103062306a36Sopenharmony_ci	 addu	$h1,$h1,$at
103162306a36Sopenharmony_ci	 addu	$h2,$h2,$t0
103262306a36Sopenharmony_ci	multu	($r2,$d0)		# d0*r2
103362306a36Sopenharmony_ci	 sltu	$at,$h1,$at
103462306a36Sopenharmony_ci	 addu	$h2,$h2,$at
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_ci	mflo	($at,$r2,$d0)
103862306a36Sopenharmony_ci	mfhi	($h3,$r2,$d0)
103962306a36Sopenharmony_ci	 addu	$h1,$h1,$a3
104062306a36Sopenharmony_ci	 sltu	$a3,$h1,$a3
104162306a36Sopenharmony_ci	multu	($r1,$d1)		# d1*r1
104262306a36Sopenharmony_ci	 addu	$h2,$h2,$a3
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_ci	mflo	($a3,$r1,$d1)
104562306a36Sopenharmony_ci	mfhi	($t1,$r1,$d1)
104662306a36Sopenharmony_ci	 addu	$h2,$h2,$at
104762306a36Sopenharmony_ci	 sltu	$at,$h2,$at
104862306a36Sopenharmony_ci	multu	($r0,$d2)		# d2*r0
104962306a36Sopenharmony_ci	 addu	$h3,$h3,$at
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci	mflo	($at,$r0,$d2)
105262306a36Sopenharmony_ci	mfhi	($t0,$r0,$d2)
105362306a36Sopenharmony_ci	 addu	$h2,$h2,$a3
105462306a36Sopenharmony_ci	 addu	$h3,$h3,$t1
105562306a36Sopenharmony_ci	multu	($rs3,$d3)		# d3*s3
105662306a36Sopenharmony_ci	 sltu	$a3,$h2,$a3
105762306a36Sopenharmony_ci	 addu	$h3,$h3,$a3
105862306a36Sopenharmony_ci
105962306a36Sopenharmony_ci	mflo	($a3,$rs3,$d3)
106062306a36Sopenharmony_ci	mfhi	($t1,$rs3,$d3)
106162306a36Sopenharmony_ci	 addu	$h2,$h2,$at
106262306a36Sopenharmony_ci	 addu	$h3,$h3,$t0
106362306a36Sopenharmony_ci	multu	($rs2,$h4)		# h4*s2
106462306a36Sopenharmony_ci	 sltu	$at,$h2,$at
106562306a36Sopenharmony_ci	 addu	$h3,$h3,$at
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci	mflo	($at,$rs2,$h4)
106862306a36Sopenharmony_ci	 addu	$h2,$h2,$a3
106962306a36Sopenharmony_ci	 addu	$h3,$h3,$t1
107062306a36Sopenharmony_ci	multu	($r3,$d0)		# d0*r3
107162306a36Sopenharmony_ci	 sltu	$a3,$h2,$a3
107262306a36Sopenharmony_ci	 addu	$h3,$h3,$a3
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci
107562306a36Sopenharmony_ci	mflo	($a3,$r3,$d0)
107662306a36Sopenharmony_ci	mfhi	($t1,$r3,$d0)
107762306a36Sopenharmony_ci	 addu	$h2,$h2,$at
107862306a36Sopenharmony_ci	 sltu	$at,$h2,$at
107962306a36Sopenharmony_ci	multu	($r2,$d1)		# d1*r2
108062306a36Sopenharmony_ci	 addu	$h3,$h3,$at
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci	mflo	($at,$r2,$d1)
108362306a36Sopenharmony_ci	mfhi	($t0,$r2,$d1)
108462306a36Sopenharmony_ci	 addu	$h3,$h3,$a3
108562306a36Sopenharmony_ci	 sltu	$a3,$h3,$a3
108662306a36Sopenharmony_ci	multu	($r0,$d3)		# d3*r0
108762306a36Sopenharmony_ci	 addu	$t1,$t1,$a3
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ci	mflo	($a3,$r0,$d3)
109062306a36Sopenharmony_ci	mfhi	($d3,$r0,$d3)
109162306a36Sopenharmony_ci	 addu	$h3,$h3,$at
109262306a36Sopenharmony_ci	 addu	$t1,$t1,$t0
109362306a36Sopenharmony_ci	multu	($r1,$d2)		# d2*r1
109462306a36Sopenharmony_ci	 sltu	$at,$h3,$at
109562306a36Sopenharmony_ci	 addu	$t1,$t1,$at
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ci	mflo	($at,$r1,$d2)
109862306a36Sopenharmony_ci	mfhi	($t0,$r1,$d2)
109962306a36Sopenharmony_ci	 addu	$h3,$h3,$a3
110062306a36Sopenharmony_ci	 addu	$t1,$t1,$d3
110162306a36Sopenharmony_ci	multu	($rs3,$h4)		# h4*s3
110262306a36Sopenharmony_ci	 sltu	$a3,$h3,$a3
110362306a36Sopenharmony_ci	 addu	$t1,$t1,$a3
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	mflo	($a3,$rs3,$h4)
110662306a36Sopenharmony_ci	 addu	$h3,$h3,$at
110762306a36Sopenharmony_ci	 addu	$t1,$t1,$t0
110862306a36Sopenharmony_ci	multu	($r0,$h4)		# h4*r0
110962306a36Sopenharmony_ci	 sltu	$at,$h3,$at
111062306a36Sopenharmony_ci	 addu	$t1,$t1,$at
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci	mflo	($h4,$r0,$h4)
111462306a36Sopenharmony_ci	 addu	$h3,$h3,$a3
111562306a36Sopenharmony_ci	 sltu	$a3,$h3,$a3
111662306a36Sopenharmony_ci	 addu	$t1,$t1,$a3
111762306a36Sopenharmony_ci	addu	$h4,$h4,$t1
111862306a36Sopenharmony_ci
111962306a36Sopenharmony_ci	li	$padbit,1		# if we loop, padbit is 1
112062306a36Sopenharmony_ci#endif
112162306a36Sopenharmony_ci	bne	$inp,$len,.Loop
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	sw	$h0,0($ctx)		# store hash value
112462306a36Sopenharmony_ci	sw	$h1,4($ctx)
112562306a36Sopenharmony_ci	sw	$h2,8($ctx)
112662306a36Sopenharmony_ci	sw	$h3,12($ctx)
112762306a36Sopenharmony_ci	sw	$h4,16($ctx)
112862306a36Sopenharmony_ci
112962306a36Sopenharmony_ci	.set	noreorder
113062306a36Sopenharmony_ci.Labort:
113162306a36Sopenharmony_ci	lw	$s11,4*11($sp)
113262306a36Sopenharmony_ci	lw	$s10,4*10($sp)
113362306a36Sopenharmony_ci	lw	$s9, 4*9($sp)
113462306a36Sopenharmony_ci	lw	$s8, 4*8($sp)
113562306a36Sopenharmony_ci	lw	$s7, 4*7($sp)
113662306a36Sopenharmony_ci	lw	$s6, 4*6($sp)
113762306a36Sopenharmony_ci	lw	$s5, 4*5($sp)
113862306a36Sopenharmony_ci	lw	$s4, 4*4($sp)
113962306a36Sopenharmony_ci___
114062306a36Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
114162306a36Sopenharmony_ci	lw	$s3, 4*3($sp)
114262306a36Sopenharmony_ci	lw	$s2, 4*2($sp)
114362306a36Sopenharmony_ci	lw	$s1, 4*1($sp)
114462306a36Sopenharmony_ci	lw	$s0, 4*0($sp)
114562306a36Sopenharmony_ci___
114662306a36Sopenharmony_ci$code.=<<___;
114762306a36Sopenharmony_ci	jr	$ra
114862306a36Sopenharmony_ci	addu	$sp,$sp,4*12
114962306a36Sopenharmony_ci.end	poly1305_blocks
115062306a36Sopenharmony_ci___
115162306a36Sopenharmony_ci}
115262306a36Sopenharmony_ci{
115362306a36Sopenharmony_cimy ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci$code.=<<___;
115662306a36Sopenharmony_ci.align	5
115762306a36Sopenharmony_ci.globl	poly1305_emit
115862306a36Sopenharmony_ci.ent	poly1305_emit
115962306a36Sopenharmony_cipoly1305_emit:
116062306a36Sopenharmony_ci	.frame	$sp,0,$ra
116162306a36Sopenharmony_ci	.set	reorder
116262306a36Sopenharmony_ci
116362306a36Sopenharmony_ci	lw	$tmp4,16($ctx)
116462306a36Sopenharmony_ci	lw	$tmp0,0($ctx)
116562306a36Sopenharmony_ci	lw	$tmp1,4($ctx)
116662306a36Sopenharmony_ci	lw	$tmp2,8($ctx)
116762306a36Sopenharmony_ci	lw	$tmp3,12($ctx)
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci	li	$in0,-4			# final reduction
117062306a36Sopenharmony_ci	srl	$ctx,$tmp4,2
117162306a36Sopenharmony_ci	and	$in0,$in0,$tmp4
117262306a36Sopenharmony_ci	andi	$tmp4,$tmp4,3
117362306a36Sopenharmony_ci	addu	$ctx,$ctx,$in0
117462306a36Sopenharmony_ci
117562306a36Sopenharmony_ci	addu	$tmp0,$tmp0,$ctx
117662306a36Sopenharmony_ci	sltu	$ctx,$tmp0,$ctx
117762306a36Sopenharmony_ci	 addiu	$in0,$tmp0,5		# compare to modulus
117862306a36Sopenharmony_ci	addu	$tmp1,$tmp1,$ctx
117962306a36Sopenharmony_ci	 sltiu	$in1,$in0,5
118062306a36Sopenharmony_ci	sltu	$ctx,$tmp1,$ctx
118162306a36Sopenharmony_ci	 addu	$in1,$in1,$tmp1
118262306a36Sopenharmony_ci	addu	$tmp2,$tmp2,$ctx
118362306a36Sopenharmony_ci	 sltu	$in2,$in1,$tmp1
118462306a36Sopenharmony_ci	sltu	$ctx,$tmp2,$ctx
118562306a36Sopenharmony_ci	 addu	$in2,$in2,$tmp2
118662306a36Sopenharmony_ci	addu	$tmp3,$tmp3,$ctx
118762306a36Sopenharmony_ci	 sltu	$in3,$in2,$tmp2
118862306a36Sopenharmony_ci	sltu	$ctx,$tmp3,$ctx
118962306a36Sopenharmony_ci	 addu	$in3,$in3,$tmp3
119062306a36Sopenharmony_ci	addu	$tmp4,$tmp4,$ctx
119162306a36Sopenharmony_ci	 sltu	$ctx,$in3,$tmp3
119262306a36Sopenharmony_ci	 addu	$ctx,$tmp4
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	srl	$ctx,2			# see if it carried/borrowed
119562306a36Sopenharmony_ci	subu	$ctx,$zero,$ctx
119662306a36Sopenharmony_ci
119762306a36Sopenharmony_ci	xor	$in0,$tmp0
119862306a36Sopenharmony_ci	xor	$in1,$tmp1
119962306a36Sopenharmony_ci	xor	$in2,$tmp2
120062306a36Sopenharmony_ci	xor	$in3,$tmp3
120162306a36Sopenharmony_ci	and	$in0,$ctx
120262306a36Sopenharmony_ci	and	$in1,$ctx
120362306a36Sopenharmony_ci	and	$in2,$ctx
120462306a36Sopenharmony_ci	and	$in3,$ctx
120562306a36Sopenharmony_ci	xor	$in0,$tmp0
120662306a36Sopenharmony_ci	xor	$in1,$tmp1
120762306a36Sopenharmony_ci	xor	$in2,$tmp2
120862306a36Sopenharmony_ci	xor	$in3,$tmp3
120962306a36Sopenharmony_ci
121062306a36Sopenharmony_ci	lw	$tmp0,0($nonce)		# load nonce
121162306a36Sopenharmony_ci	lw	$tmp1,4($nonce)
121262306a36Sopenharmony_ci	lw	$tmp2,8($nonce)
121362306a36Sopenharmony_ci	lw	$tmp3,12($nonce)
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	addu	$in0,$tmp0		# accumulate nonce
121662306a36Sopenharmony_ci	sltu	$ctx,$in0,$tmp0
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	addu	$in1,$tmp1
121962306a36Sopenharmony_ci	sltu	$tmp1,$in1,$tmp1
122062306a36Sopenharmony_ci	addu	$in1,$ctx
122162306a36Sopenharmony_ci	sltu	$ctx,$in1,$ctx
122262306a36Sopenharmony_ci	addu	$ctx,$tmp1
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_ci	addu	$in2,$tmp2
122562306a36Sopenharmony_ci	sltu	$tmp2,$in2,$tmp2
122662306a36Sopenharmony_ci	addu	$in2,$ctx
122762306a36Sopenharmony_ci	sltu	$ctx,$in2,$ctx
122862306a36Sopenharmony_ci	addu	$ctx,$tmp2
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_ci	addu	$in3,$tmp3
123162306a36Sopenharmony_ci	addu	$in3,$ctx
123262306a36Sopenharmony_ci
123362306a36Sopenharmony_ci	srl	$tmp0,$in0,8		# write mac value
123462306a36Sopenharmony_ci	srl	$tmp1,$in0,16
123562306a36Sopenharmony_ci	srl	$tmp2,$in0,24
123662306a36Sopenharmony_ci	sb	$in0, 0($mac)
123762306a36Sopenharmony_ci	sb	$tmp0,1($mac)
123862306a36Sopenharmony_ci	srl	$tmp0,$in1,8
123962306a36Sopenharmony_ci	sb	$tmp1,2($mac)
124062306a36Sopenharmony_ci	srl	$tmp1,$in1,16
124162306a36Sopenharmony_ci	sb	$tmp2,3($mac)
124262306a36Sopenharmony_ci	srl	$tmp2,$in1,24
124362306a36Sopenharmony_ci	sb	$in1, 4($mac)
124462306a36Sopenharmony_ci	sb	$tmp0,5($mac)
124562306a36Sopenharmony_ci	srl	$tmp0,$in2,8
124662306a36Sopenharmony_ci	sb	$tmp1,6($mac)
124762306a36Sopenharmony_ci	srl	$tmp1,$in2,16
124862306a36Sopenharmony_ci	sb	$tmp2,7($mac)
124962306a36Sopenharmony_ci	srl	$tmp2,$in2,24
125062306a36Sopenharmony_ci	sb	$in2, 8($mac)
125162306a36Sopenharmony_ci	sb	$tmp0,9($mac)
125262306a36Sopenharmony_ci	srl	$tmp0,$in3,8
125362306a36Sopenharmony_ci	sb	$tmp1,10($mac)
125462306a36Sopenharmony_ci	srl	$tmp1,$in3,16
125562306a36Sopenharmony_ci	sb	$tmp2,11($mac)
125662306a36Sopenharmony_ci	srl	$tmp2,$in3,24
125762306a36Sopenharmony_ci	sb	$in3, 12($mac)
125862306a36Sopenharmony_ci	sb	$tmp0,13($mac)
125962306a36Sopenharmony_ci	sb	$tmp1,14($mac)
126062306a36Sopenharmony_ci	sb	$tmp2,15($mac)
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_ci	jr	$ra
126362306a36Sopenharmony_ci.end	poly1305_emit
126462306a36Sopenharmony_ci.rdata
126562306a36Sopenharmony_ci.asciiz	"Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
126662306a36Sopenharmony_ci.align	2
126762306a36Sopenharmony_ci___
126862306a36Sopenharmony_ci}
126962306a36Sopenharmony_ci}}}
127062306a36Sopenharmony_ci
127162306a36Sopenharmony_ci$output=pop and open STDOUT,">$output";
127262306a36Sopenharmony_ciprint $code;
127362306a36Sopenharmony_ciclose STDOUT;
1274