1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2018-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci#
9e1051a39Sopenharmony_ci# ====================================================================
10e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and
12e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further
13e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/.
14e1051a39Sopenharmony_ci# ====================================================================
15e1051a39Sopenharmony_ci#
16e1051a39Sopenharmony_ci# X25519 lower-level primitives for PPC64.
17e1051a39Sopenharmony_ci#
18e1051a39Sopenharmony_ci# July 2018.
19e1051a39Sopenharmony_ci#
20e1051a39Sopenharmony_ci# Base 2^64 is faster than base 2^51 on pre-POWER8, most notably ~15%
21e1051a39Sopenharmony_ci# faster on PPC970/G5. POWER8 on the other hand seems to trip on own
22e1051a39Sopenharmony_ci# shoelaces when handling longer carry chains. As base 2^51 has just
23e1051a39Sopenharmony_ci# single-carry pairs, it's 25% faster than base 2^64. Since PPC970 is
24e1051a39Sopenharmony_ci# pretty old, base 2^64 implementation is not engaged. Comparison to
25e1051a39Sopenharmony_ci# compiler-generated code is complicated by the fact that not all
26e1051a39Sopenharmony_ci# compilers support 128-bit integers. When compiler doesn't, like xlc,
27e1051a39Sopenharmony_ci# this module delivers more than 2x improvement, and when it does,
28e1051a39Sopenharmony_ci# from 12% to 30% improvement was measured...
29e1051a39Sopenharmony_ci
30e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension)
31e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file
32e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
33e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
34e1051a39Sopenharmony_ci
35e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
36e1051a39Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
37e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
38e1051a39Sopenharmony_cidie "can't locate ppc-xlate.pl";
39e1051a39Sopenharmony_ci
40e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour \"$output\""
41e1051a39Sopenharmony_ci    or die "can't call $xlate: $!";
42e1051a39Sopenharmony_ci*STDOUT=*OUT;
43e1051a39Sopenharmony_ci
44e1051a39Sopenharmony_cimy $sp = "r1";
45e1051a39Sopenharmony_cimy ($rp,$ap,$bp) = map("r$_",3..5);
46e1051a39Sopenharmony_ci
47e1051a39Sopenharmony_ci####################################################### base 2^64
48e1051a39Sopenharmony_ciif (0) {
49e1051a39Sopenharmony_cimy ($bi,$a0,$a1,$a2,$a3,$t0,$t1, $t2,$t3,
50e1051a39Sopenharmony_ci    $acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7) =
51e1051a39Sopenharmony_ci    map("r$_",(6..12,22..31));
52e1051a39Sopenharmony_cimy $zero = "r0";
53e1051a39Sopenharmony_cimy $FRAME = 16*8;
54e1051a39Sopenharmony_ci
55e1051a39Sopenharmony_ci$code.=<<___;
56e1051a39Sopenharmony_ci.text
57e1051a39Sopenharmony_ci
58e1051a39Sopenharmony_ci.globl	x25519_fe64_mul
59e1051a39Sopenharmony_ci.type	x25519_fe64_mul,\@function
60e1051a39Sopenharmony_ci.align	5
61e1051a39Sopenharmony_cix25519_fe64_mul:
62e1051a39Sopenharmony_ci	stdu	$sp,-$FRAME($sp)
63e1051a39Sopenharmony_ci	std	r22,`$FRAME-8*10`($sp)
64e1051a39Sopenharmony_ci	std	r23,`$FRAME-8*9`($sp)
65e1051a39Sopenharmony_ci	std	r24,`$FRAME-8*8`($sp)
66e1051a39Sopenharmony_ci	std	r25,`$FRAME-8*7`($sp)
67e1051a39Sopenharmony_ci	std	r26,`$FRAME-8*6`($sp)
68e1051a39Sopenharmony_ci	std	r27,`$FRAME-8*5`($sp)
69e1051a39Sopenharmony_ci	std	r28,`$FRAME-8*4`($sp)
70e1051a39Sopenharmony_ci	std	r29,`$FRAME-8*3`($sp)
71e1051a39Sopenharmony_ci	std	r30,`$FRAME-8*2`($sp)
72e1051a39Sopenharmony_ci	std	r31,`$FRAME-8*1`($sp)
73e1051a39Sopenharmony_ci
74e1051a39Sopenharmony_ci	ld	$bi,0($bp)
75e1051a39Sopenharmony_ci	ld	$a0,0($ap)
76e1051a39Sopenharmony_ci	xor	$zero,$zero,$zero
77e1051a39Sopenharmony_ci	ld	$a1,8($ap)
78e1051a39Sopenharmony_ci	ld	$a2,16($ap)
79e1051a39Sopenharmony_ci	ld	$a3,24($ap)
80e1051a39Sopenharmony_ci
81e1051a39Sopenharmony_ci	mulld	$acc0,$a0,$bi		# a[0]*b[0]
82e1051a39Sopenharmony_ci	mulhdu	$t0,$a0,$bi
83e1051a39Sopenharmony_ci	mulld	$acc1,$a1,$bi		# a[1]*b[0]
84e1051a39Sopenharmony_ci	mulhdu	$t1,$a1,$bi
85e1051a39Sopenharmony_ci	mulld	$acc2,$a2,$bi		# a[2]*b[0]
86e1051a39Sopenharmony_ci	mulhdu	$t2,$a2,$bi
87e1051a39Sopenharmony_ci	mulld	$acc3,$a3,$bi		# a[3]*b[0]
88e1051a39Sopenharmony_ci	mulhdu	$t3,$a3,$bi
89e1051a39Sopenharmony_ci___
90e1051a39Sopenharmony_cifor(my @acc=($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7),
91e1051a39Sopenharmony_ci    my $i=1; $i<4; shift(@acc), $i++) {
92e1051a39Sopenharmony_cimy $acc4 = $i==1? $zero : @acc[4];
93e1051a39Sopenharmony_ci
94e1051a39Sopenharmony_ci$code.=<<___;
95e1051a39Sopenharmony_ci	ld	$bi,`8*$i`($bp)
96e1051a39Sopenharmony_ci	addc	@acc[1],@acc[1],$t0	# accumulate high parts
97e1051a39Sopenharmony_ci	mulld	$t0,$a0,$bi
98e1051a39Sopenharmony_ci	adde	@acc[2],@acc[2],$t1
99e1051a39Sopenharmony_ci	mulld	$t1,$a1,$bi
100e1051a39Sopenharmony_ci	adde	@acc[3],@acc[3],$t2
101e1051a39Sopenharmony_ci	mulld	$t2,$a2,$bi
102e1051a39Sopenharmony_ci	adde	@acc[4],$acc4,$t3
103e1051a39Sopenharmony_ci	mulld	$t3,$a3,$bi
104e1051a39Sopenharmony_ci	addc	@acc[1],@acc[1],$t0	# accumulate low parts
105e1051a39Sopenharmony_ci	mulhdu	$t0,$a0,$bi
106e1051a39Sopenharmony_ci	adde	@acc[2],@acc[2],$t1
107e1051a39Sopenharmony_ci	mulhdu	$t1,$a1,$bi
108e1051a39Sopenharmony_ci	adde	@acc[3],@acc[3],$t2
109e1051a39Sopenharmony_ci	mulhdu	$t2,$a2,$bi
110e1051a39Sopenharmony_ci	adde	@acc[4],@acc[4],$t3
111e1051a39Sopenharmony_ci	mulhdu	$t3,$a3,$bi
112e1051a39Sopenharmony_ci	adde	@acc[5],$zero,$zero
113e1051a39Sopenharmony_ci___
114e1051a39Sopenharmony_ci}
115e1051a39Sopenharmony_ci$code.=<<___;
116e1051a39Sopenharmony_ci	li	$bi,38
117e1051a39Sopenharmony_ci	addc	$acc4,$acc4,$t0
118e1051a39Sopenharmony_ci	mulld	$t0,$acc4,$bi
119e1051a39Sopenharmony_ci	adde	$acc5,$acc5,$t1
120e1051a39Sopenharmony_ci	mulld	$t1,$acc5,$bi
121e1051a39Sopenharmony_ci	adde	$acc6,$acc6,$t2
122e1051a39Sopenharmony_ci	mulld	$t2,$acc6,$bi
123e1051a39Sopenharmony_ci	adde	$acc7,$acc7,$t3
124e1051a39Sopenharmony_ci	mulld	$t3,$acc7,$bi
125e1051a39Sopenharmony_ci
126e1051a39Sopenharmony_ci	addc	$acc0,$acc0,$t0
127e1051a39Sopenharmony_ci	mulhdu	$t0,$acc4,$bi
128e1051a39Sopenharmony_ci	adde	$acc1,$acc1,$t1
129e1051a39Sopenharmony_ci	mulhdu	$t1,$acc5,$bi
130e1051a39Sopenharmony_ci	adde	$acc2,$acc2,$t2
131e1051a39Sopenharmony_ci	mulhdu	$t2,$acc6,$bi
132e1051a39Sopenharmony_ci	adde	$acc3,$acc3,$t3
133e1051a39Sopenharmony_ci	mulhdu	$t3,$acc7,$bi
134e1051a39Sopenharmony_ci	adde	$acc4,$zero,$zero
135e1051a39Sopenharmony_ci
136e1051a39Sopenharmony_ci	addc	$acc1,$acc1,$t0
137e1051a39Sopenharmony_ci	adde	$acc2,$acc2,$t1
138e1051a39Sopenharmony_ci	adde	$acc3,$acc3,$t2
139e1051a39Sopenharmony_ci	adde	$acc4,$acc4,$t3
140e1051a39Sopenharmony_ci
141e1051a39Sopenharmony_ci	mulld	$acc4,$acc4,$bi
142e1051a39Sopenharmony_ci
143e1051a39Sopenharmony_ci	addc	$acc0,$acc0,$acc4
144e1051a39Sopenharmony_ci	addze	$acc1,$acc1
145e1051a39Sopenharmony_ci	addze	$acc2,$acc2
146e1051a39Sopenharmony_ci	addze	$acc3,$acc3
147e1051a39Sopenharmony_ci
148e1051a39Sopenharmony_ci	subfe	$acc4,$acc4,$acc4	# carry -> ~mask
149e1051a39Sopenharmony_ci	std	$acc1,8($rp)
150e1051a39Sopenharmony_ci	andc	$acc4,$bi,$acc4
151e1051a39Sopenharmony_ci	std	$acc2,16($rp)
152e1051a39Sopenharmony_ci	add	$acc0,$acc0,$acc4
153e1051a39Sopenharmony_ci	std	$acc3,24($rp)
154e1051a39Sopenharmony_ci	std	$acc0,0($rp)
155e1051a39Sopenharmony_ci
156e1051a39Sopenharmony_ci	ld	r22,`$FRAME-8*10`($sp)
157e1051a39Sopenharmony_ci	ld	r23,`$FRAME-8*9`($sp)
158e1051a39Sopenharmony_ci	ld	r24,`$FRAME-8*8`($sp)
159e1051a39Sopenharmony_ci	ld	r25,`$FRAME-8*7`($sp)
160e1051a39Sopenharmony_ci	ld	r26,`$FRAME-8*6`($sp)
161e1051a39Sopenharmony_ci	ld	r27,`$FRAME-8*5`($sp)
162e1051a39Sopenharmony_ci	ld	r28,`$FRAME-8*4`($sp)
163e1051a39Sopenharmony_ci	ld	r29,`$FRAME-8*3`($sp)
164e1051a39Sopenharmony_ci	ld	r30,`$FRAME-8*2`($sp)
165e1051a39Sopenharmony_ci	ld	r31,`$FRAME-8*1`($sp)
166e1051a39Sopenharmony_ci	addi	$sp,$sp,$FRAME
167e1051a39Sopenharmony_ci	blr
168e1051a39Sopenharmony_ci	.long	0
169e1051a39Sopenharmony_ci	.byte	0,12,4,0,0x80,10,3,0
170e1051a39Sopenharmony_ci	.long	0
171e1051a39Sopenharmony_ci.size	x25519_fe64_mul,.-x25519_fe64_mul
172e1051a39Sopenharmony_ci
173e1051a39Sopenharmony_ci.globl	x25519_fe64_sqr
174e1051a39Sopenharmony_ci.type	x25519_fe64_sqr,\@function
175e1051a39Sopenharmony_ci.align	5
176e1051a39Sopenharmony_cix25519_fe64_sqr:
177e1051a39Sopenharmony_ci	stdu	$sp,-$FRAME($sp)
178e1051a39Sopenharmony_ci	std	r22,`$FRAME-8*10`($sp)
179e1051a39Sopenharmony_ci	std	r23,`$FRAME-8*9`($sp)
180e1051a39Sopenharmony_ci	std	r24,`$FRAME-8*8`($sp)
181e1051a39Sopenharmony_ci	std	r25,`$FRAME-8*7`($sp)
182e1051a39Sopenharmony_ci	std	r26,`$FRAME-8*6`($sp)
183e1051a39Sopenharmony_ci	std	r27,`$FRAME-8*5`($sp)
184e1051a39Sopenharmony_ci	std	r28,`$FRAME-8*4`($sp)
185e1051a39Sopenharmony_ci	std	r29,`$FRAME-8*3`($sp)
186e1051a39Sopenharmony_ci	std	r30,`$FRAME-8*2`($sp)
187e1051a39Sopenharmony_ci	std	r31,`$FRAME-8*1`($sp)
188e1051a39Sopenharmony_ci
189e1051a39Sopenharmony_ci	ld	$a0,0($ap)
190e1051a39Sopenharmony_ci	xor	$zero,$zero,$zero
191e1051a39Sopenharmony_ci	ld	$a1,8($ap)
192e1051a39Sopenharmony_ci	ld	$a2,16($ap)
193e1051a39Sopenharmony_ci	ld	$a3,24($ap)
194e1051a39Sopenharmony_ci
195e1051a39Sopenharmony_ci	################################
196e1051a39Sopenharmony_ci	#  |  |  |  |  |  |a1*a0|  |
197e1051a39Sopenharmony_ci	#  |  |  |  |  |a2*a0|  |  |
198e1051a39Sopenharmony_ci	#  |  |a3*a2|a3*a0|  |  |  |
199e1051a39Sopenharmony_ci	#  |  |  |  |a2*a1|  |  |  |
200e1051a39Sopenharmony_ci	#  |  |  |a3*a1|  |  |  |  |
201e1051a39Sopenharmony_ci	# *|  |  |  |  |  |  |  | 2|
202e1051a39Sopenharmony_ci	# +|a3*a3|a2*a2|a1*a1|a0*a0|
203e1051a39Sopenharmony_ci	#  |--+--+--+--+--+--+--+--|
204e1051a39Sopenharmony_ci	#  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is $accx, i.e. follow $accx
205e1051a39Sopenharmony_ci	#
206e1051a39Sopenharmony_ci	#  "can't overflow" below mark carrying into high part of
207e1051a39Sopenharmony_ci	#  multiplication result, which can't overflow, because it
208e1051a39Sopenharmony_ci	#  can never be all ones.
209e1051a39Sopenharmony_ci
210e1051a39Sopenharmony_ci	mulld	$acc1,$a1,$a0		# a[1]*a[0]
211e1051a39Sopenharmony_ci	mulhdu	$t1,$a1,$a0
212e1051a39Sopenharmony_ci	mulld	$acc2,$a2,$a0		# a[2]*a[0]
213e1051a39Sopenharmony_ci	mulhdu	$t2,$a2,$a0
214e1051a39Sopenharmony_ci	mulld	$acc3,$a3,$a0		# a[3]*a[0]
215e1051a39Sopenharmony_ci	mulhdu	$acc4,$a3,$a0
216e1051a39Sopenharmony_ci
217e1051a39Sopenharmony_ci	addc	$acc2,$acc2,$t1		# accumulate high parts of multiplication
218e1051a39Sopenharmony_ci	 mulld	$t0,$a2,$a1		# a[2]*a[1]
219e1051a39Sopenharmony_ci	 mulhdu	$t1,$a2,$a1
220e1051a39Sopenharmony_ci	adde	$acc3,$acc3,$t2
221e1051a39Sopenharmony_ci	 mulld	$t2,$a3,$a1		# a[3]*a[1]
222e1051a39Sopenharmony_ci	 mulhdu	$t3,$a3,$a1
223e1051a39Sopenharmony_ci	addze	$acc4,$acc4		# can't overflow
224e1051a39Sopenharmony_ci
225e1051a39Sopenharmony_ci	mulld	$acc5,$a3,$a2		# a[3]*a[2]
226e1051a39Sopenharmony_ci	mulhdu	$acc6,$a3,$a2
227e1051a39Sopenharmony_ci
228e1051a39Sopenharmony_ci	addc	$t1,$t1,$t2		# accumulate high parts of multiplication
229e1051a39Sopenharmony_ci	 mulld	$acc0,$a0,$a0		# a[0]*a[0]
230e1051a39Sopenharmony_ci	addze	$t2,$t3			# can't overflow
231e1051a39Sopenharmony_ci
232e1051a39Sopenharmony_ci	addc	$acc3,$acc3,$t0		# accumulate low parts of multiplication
233e1051a39Sopenharmony_ci	 mulhdu	$a0,$a0,$a0
234e1051a39Sopenharmony_ci	adde	$acc4,$acc4,$t1
235e1051a39Sopenharmony_ci	 mulld	$t1,$a1,$a1		# a[1]*a[1]
236e1051a39Sopenharmony_ci	adde	$acc5,$acc5,$t2
237e1051a39Sopenharmony_ci	 mulhdu	$a1,$a1,$a1
238e1051a39Sopenharmony_ci	addze	$acc6,$acc6		# can't overflow
239e1051a39Sopenharmony_ci
240e1051a39Sopenharmony_ci	addc	$acc1,$acc1,$acc1	# acc[1-6]*=2
241e1051a39Sopenharmony_ci	 mulld	$t2,$a2,$a2		# a[2]*a[2]
242e1051a39Sopenharmony_ci	adde	$acc2,$acc2,$acc2
243e1051a39Sopenharmony_ci	 mulhdu	$a2,$a2,$a2
244e1051a39Sopenharmony_ci	adde	$acc3,$acc3,$acc3
245e1051a39Sopenharmony_ci	 mulld	$t3,$a3,$a3		# a[3]*a[3]
246e1051a39Sopenharmony_ci	adde	$acc4,$acc4,$acc4
247e1051a39Sopenharmony_ci	 mulhdu	$a3,$a3,$a3
248e1051a39Sopenharmony_ci	adde	$acc5,$acc5,$acc5
249e1051a39Sopenharmony_ci	adde	$acc6,$acc6,$acc6
250e1051a39Sopenharmony_ci	addze	$acc7,$zero
251e1051a39Sopenharmony_ci
252e1051a39Sopenharmony_ci	addc	$acc1,$acc1,$a0		# +a[i]*a[i]
253e1051a39Sopenharmony_ci	 li	$bi,38
254e1051a39Sopenharmony_ci	adde	$acc2,$acc2,$t1
255e1051a39Sopenharmony_ci	adde	$acc3,$acc3,$a1
256e1051a39Sopenharmony_ci	adde	$acc4,$acc4,$t2
257e1051a39Sopenharmony_ci	adde	$acc5,$acc5,$a2
258e1051a39Sopenharmony_ci	adde	$acc6,$acc6,$t3
259e1051a39Sopenharmony_ci	adde	$acc7,$acc7,$a3
260e1051a39Sopenharmony_ci
261e1051a39Sopenharmony_ci	mulld	$t0,$acc4,$bi
262e1051a39Sopenharmony_ci	mulld	$t1,$acc5,$bi
263e1051a39Sopenharmony_ci	mulld	$t2,$acc6,$bi
264e1051a39Sopenharmony_ci	mulld	$t3,$acc7,$bi
265e1051a39Sopenharmony_ci
266e1051a39Sopenharmony_ci	addc	$acc0,$acc0,$t0
267e1051a39Sopenharmony_ci	mulhdu	$t0,$acc4,$bi
268e1051a39Sopenharmony_ci	adde	$acc1,$acc1,$t1
269e1051a39Sopenharmony_ci	mulhdu	$t1,$acc5,$bi
270e1051a39Sopenharmony_ci	adde	$acc2,$acc2,$t2
271e1051a39Sopenharmony_ci	mulhdu	$t2,$acc6,$bi
272e1051a39Sopenharmony_ci	adde	$acc3,$acc3,$t3
273e1051a39Sopenharmony_ci	mulhdu	$t3,$acc7,$bi
274e1051a39Sopenharmony_ci	addze	$acc4,$zero
275e1051a39Sopenharmony_ci
276e1051a39Sopenharmony_ci	addc	$acc1,$acc1,$t0
277e1051a39Sopenharmony_ci	adde	$acc2,$acc2,$t1
278e1051a39Sopenharmony_ci	adde	$acc3,$acc3,$t2
279e1051a39Sopenharmony_ci	adde	$acc4,$acc4,$t3
280e1051a39Sopenharmony_ci
281e1051a39Sopenharmony_ci	mulld	$acc4,$acc4,$bi
282e1051a39Sopenharmony_ci
283e1051a39Sopenharmony_ci	addc	$acc0,$acc0,$acc4
284e1051a39Sopenharmony_ci	addze	$acc1,$acc1
285e1051a39Sopenharmony_ci	addze	$acc2,$acc2
286e1051a39Sopenharmony_ci	addze	$acc3,$acc3
287e1051a39Sopenharmony_ci
288e1051a39Sopenharmony_ci	subfe	$acc4,$acc4,$acc4	# carry -> ~mask
289e1051a39Sopenharmony_ci	std	$acc1,8($rp)
290e1051a39Sopenharmony_ci	andc	$acc4,$bi,$acc4
291e1051a39Sopenharmony_ci	std	$acc2,16($rp)
292e1051a39Sopenharmony_ci	add	$acc0,$acc0,$acc4
293e1051a39Sopenharmony_ci	std	$acc3,24($rp)
294e1051a39Sopenharmony_ci	std	$acc0,0($rp)
295e1051a39Sopenharmony_ci
296e1051a39Sopenharmony_ci	ld	r22,`$FRAME-8*10`($sp)
297e1051a39Sopenharmony_ci	ld	r23,`$FRAME-8*9`($sp)
298e1051a39Sopenharmony_ci	ld	r24,`$FRAME-8*8`($sp)
299e1051a39Sopenharmony_ci	ld	r25,`$FRAME-8*7`($sp)
300e1051a39Sopenharmony_ci	ld	r26,`$FRAME-8*6`($sp)
301e1051a39Sopenharmony_ci	ld	r27,`$FRAME-8*5`($sp)
302e1051a39Sopenharmony_ci	ld	r28,`$FRAME-8*4`($sp)
303e1051a39Sopenharmony_ci	ld	r29,`$FRAME-8*3`($sp)
304e1051a39Sopenharmony_ci	ld	r30,`$FRAME-8*2`($sp)
305e1051a39Sopenharmony_ci	ld	r31,`$FRAME-8*1`($sp)
306e1051a39Sopenharmony_ci	addi	$sp,$sp,$FRAME
307e1051a39Sopenharmony_ci	blr
308e1051a39Sopenharmony_ci	.long	0
309e1051a39Sopenharmony_ci	.byte	0,12,4,0,0x80,10,2,0
310e1051a39Sopenharmony_ci	.long	0
311e1051a39Sopenharmony_ci.size	x25519_fe64_sqr,.-x25519_fe64_sqr
312e1051a39Sopenharmony_ci
313e1051a39Sopenharmony_ci.globl	x25519_fe64_mul121666
314e1051a39Sopenharmony_ci.type	x25519_fe64_mul121666,\@function
315e1051a39Sopenharmony_ci.align	5
316e1051a39Sopenharmony_cix25519_fe64_mul121666:
317e1051a39Sopenharmony_ci	lis	$bi,`65536>>16`
318e1051a39Sopenharmony_ci	ori	$bi,$bi,`121666-65536`
319e1051a39Sopenharmony_ci
320e1051a39Sopenharmony_ci	ld	$t0,0($ap)
321e1051a39Sopenharmony_ci	ld	$t1,8($ap)
322e1051a39Sopenharmony_ci	ld	$bp,16($ap)
323e1051a39Sopenharmony_ci	ld	$ap,24($ap)
324e1051a39Sopenharmony_ci
325e1051a39Sopenharmony_ci	mulld	$a0,$t0,$bi
326e1051a39Sopenharmony_ci	mulhdu	$t0,$t0,$bi
327e1051a39Sopenharmony_ci	mulld	$a1,$t1,$bi
328e1051a39Sopenharmony_ci	mulhdu	$t1,$t1,$bi
329e1051a39Sopenharmony_ci	mulld	$a2,$bp,$bi
330e1051a39Sopenharmony_ci	mulhdu	$bp,$bp,$bi
331e1051a39Sopenharmony_ci	mulld	$a3,$ap,$bi
332e1051a39Sopenharmony_ci	mulhdu	$ap,$ap,$bi
333e1051a39Sopenharmony_ci
334e1051a39Sopenharmony_ci	addc	$a1,$a1,$t0
335e1051a39Sopenharmony_ci	adde	$a2,$a2,$t1
336e1051a39Sopenharmony_ci	adde	$a3,$a3,$bp
337e1051a39Sopenharmony_ci	addze	$ap,    $ap
338e1051a39Sopenharmony_ci
339e1051a39Sopenharmony_ci	mulli	$ap,$ap,38
340e1051a39Sopenharmony_ci
341e1051a39Sopenharmony_ci	addc	$a0,$a0,$ap
342e1051a39Sopenharmony_ci	addze	$a1,$a1
343e1051a39Sopenharmony_ci	addze	$a2,$a2
344e1051a39Sopenharmony_ci	addze	$a3,$a3
345e1051a39Sopenharmony_ci
346e1051a39Sopenharmony_ci	subfe	$t1,$t1,$t1		# carry -> ~mask
347e1051a39Sopenharmony_ci	std	$a1,8($rp)
348e1051a39Sopenharmony_ci	andc	$t0,$t0,$t1
349e1051a39Sopenharmony_ci	std	$a2,16($rp)
350e1051a39Sopenharmony_ci	add	$a0,$a0,$t0
351e1051a39Sopenharmony_ci	std	$a3,24($rp)
352e1051a39Sopenharmony_ci	std	$a0,0($rp)
353e1051a39Sopenharmony_ci
354e1051a39Sopenharmony_ci	blr
355e1051a39Sopenharmony_ci	.long	0
356e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,2,0
357e1051a39Sopenharmony_ci	.long	0
358e1051a39Sopenharmony_ci.size	x25519_fe64_mul121666,.-x25519_fe64_mul121666
359e1051a39Sopenharmony_ci
360e1051a39Sopenharmony_ci.globl	x25519_fe64_add
361e1051a39Sopenharmony_ci.type	x25519_fe64_add,\@function
362e1051a39Sopenharmony_ci.align	5
363e1051a39Sopenharmony_cix25519_fe64_add:
364e1051a39Sopenharmony_ci	ld	$a0,0($ap)
365e1051a39Sopenharmony_ci	ld	$t0,0($bp)
366e1051a39Sopenharmony_ci	ld	$a1,8($ap)
367e1051a39Sopenharmony_ci	ld	$t1,8($bp)
368e1051a39Sopenharmony_ci	ld	$a2,16($ap)
369e1051a39Sopenharmony_ci	ld	$bi,16($bp)
370e1051a39Sopenharmony_ci	ld	$a3,24($ap)
371e1051a39Sopenharmony_ci	ld	$bp,24($bp)
372e1051a39Sopenharmony_ci
373e1051a39Sopenharmony_ci	addc	$a0,$a0,$t0
374e1051a39Sopenharmony_ci	adde	$a1,$a1,$t1
375e1051a39Sopenharmony_ci	adde	$a2,$a2,$bi
376e1051a39Sopenharmony_ci	adde	$a3,$a3,$bp
377e1051a39Sopenharmony_ci
378e1051a39Sopenharmony_ci	li	$t0,38
379e1051a39Sopenharmony_ci	subfe	$t1,$t1,$t1		# carry -> ~mask
380e1051a39Sopenharmony_ci	andc	$t1,$t0,$t1
381e1051a39Sopenharmony_ci
382e1051a39Sopenharmony_ci	addc	$a0,$a0,$t1
383e1051a39Sopenharmony_ci	addze	$a1,$a1
384e1051a39Sopenharmony_ci	addze	$a2,$a2
385e1051a39Sopenharmony_ci	addze	$a3,$a3
386e1051a39Sopenharmony_ci
387e1051a39Sopenharmony_ci	subfe	$t1,$t1,$t1		# carry -> ~mask
388e1051a39Sopenharmony_ci	std	$a1,8($rp)
389e1051a39Sopenharmony_ci	andc	$t0,$t0,$t1
390e1051a39Sopenharmony_ci	std	$a2,16($rp)
391e1051a39Sopenharmony_ci	add	$a0,$a0,$t0
392e1051a39Sopenharmony_ci	std	$a3,24($rp)
393e1051a39Sopenharmony_ci	std	$a0,0($rp)
394e1051a39Sopenharmony_ci
395e1051a39Sopenharmony_ci	blr
396e1051a39Sopenharmony_ci	.long	0
397e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,3,0
398e1051a39Sopenharmony_ci	.long	0
399e1051a39Sopenharmony_ci.size	x25519_fe64_add,.-x25519_fe64_add
400e1051a39Sopenharmony_ci
401e1051a39Sopenharmony_ci.globl	x25519_fe64_sub
402e1051a39Sopenharmony_ci.type	x25519_fe64_sub,\@function
403e1051a39Sopenharmony_ci.align	5
404e1051a39Sopenharmony_cix25519_fe64_sub:
405e1051a39Sopenharmony_ci	ld	$a0,0($ap)
406e1051a39Sopenharmony_ci	ld	$t0,0($bp)
407e1051a39Sopenharmony_ci	ld	$a1,8($ap)
408e1051a39Sopenharmony_ci	ld	$t1,8($bp)
409e1051a39Sopenharmony_ci	ld	$a2,16($ap)
410e1051a39Sopenharmony_ci	ld	$bi,16($bp)
411e1051a39Sopenharmony_ci	ld	$a3,24($ap)
412e1051a39Sopenharmony_ci	ld	$bp,24($bp)
413e1051a39Sopenharmony_ci
414e1051a39Sopenharmony_ci	subfc	$a0,$t0,$a0
415e1051a39Sopenharmony_ci	subfe	$a1,$t1,$a1
416e1051a39Sopenharmony_ci	subfe	$a2,$bi,$a2
417e1051a39Sopenharmony_ci	subfe	$a3,$bp,$a3
418e1051a39Sopenharmony_ci
419e1051a39Sopenharmony_ci	li	$t0,38
420e1051a39Sopenharmony_ci	subfe	$t1,$t1,$t1		# borrow -> mask
421e1051a39Sopenharmony_ci	xor	$zero,$zero,$zero
422e1051a39Sopenharmony_ci	and	$t1,$t0,$t1
423e1051a39Sopenharmony_ci
424e1051a39Sopenharmony_ci	subfc	$a0,$t1,$a0
425e1051a39Sopenharmony_ci	subfe	$a1,$zero,$a1
426e1051a39Sopenharmony_ci	subfe	$a2,$zero,$a2
427e1051a39Sopenharmony_ci	subfe	$a3,$zero,$a3
428e1051a39Sopenharmony_ci
429e1051a39Sopenharmony_ci	subfe	$t1,$t1,$t1		# borrow -> mask
430e1051a39Sopenharmony_ci	std	$a1,8($rp)
431e1051a39Sopenharmony_ci	and	$t0,$t0,$t1
432e1051a39Sopenharmony_ci	std	$a2,16($rp)
433e1051a39Sopenharmony_ci	subf	$a0,$t0,$a0
434e1051a39Sopenharmony_ci	std	$a3,24($rp)
435e1051a39Sopenharmony_ci	std	$a0,0($rp)
436e1051a39Sopenharmony_ci
437e1051a39Sopenharmony_ci	blr
438e1051a39Sopenharmony_ci	.long	0
439e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,3,0
440e1051a39Sopenharmony_ci	.long	0
441e1051a39Sopenharmony_ci.size	x25519_fe64_sub,.-x25519_fe64_sub
442e1051a39Sopenharmony_ci
443e1051a39Sopenharmony_ci.globl	x25519_fe64_tobytes
444e1051a39Sopenharmony_ci.type	x25519_fe64_tobytes,\@function
445e1051a39Sopenharmony_ci.align	5
446e1051a39Sopenharmony_cix25519_fe64_tobytes:
447e1051a39Sopenharmony_ci	ld	$a3,24($ap)
448e1051a39Sopenharmony_ci	ld	$a0,0($ap)
449e1051a39Sopenharmony_ci	ld	$a1,8($ap)
450e1051a39Sopenharmony_ci	ld	$a2,16($ap)
451e1051a39Sopenharmony_ci
452e1051a39Sopenharmony_ci	sradi	$t0,$a3,63		# most significant bit -> mask
453e1051a39Sopenharmony_ci	li	$t1,19
454e1051a39Sopenharmony_ci	and	$t0,$t0,$t1
455e1051a39Sopenharmony_ci	sldi	$a3,$a3,1
456e1051a39Sopenharmony_ci	add	$t0,$t0,$t1		# compare to modulus in the same go
457e1051a39Sopenharmony_ci	srdi	$a3,$a3,1		# most significant bit cleared
458e1051a39Sopenharmony_ci
459e1051a39Sopenharmony_ci	addc	$a0,$a0,$t0
460e1051a39Sopenharmony_ci	addze	$a1,$a1
461e1051a39Sopenharmony_ci	addze	$a2,$a2
462e1051a39Sopenharmony_ci	addze	$a3,$a3
463e1051a39Sopenharmony_ci
464e1051a39Sopenharmony_ci	xor	$zero,$zero,$zero
465e1051a39Sopenharmony_ci	sradi	$t0,$a3,63		# most significant bit -> mask
466e1051a39Sopenharmony_ci	sldi	$a3,$a3,1
467e1051a39Sopenharmony_ci	andc	$t0,$t1,$t0
468e1051a39Sopenharmony_ci	srdi	$a3,$a3,1		# most significant bit cleared
469e1051a39Sopenharmony_ci
470e1051a39Sopenharmony_ci	subi	$rp,$rp,1
471e1051a39Sopenharmony_ci	subfc	$a0,$t0,$a0
472e1051a39Sopenharmony_ci	subfe	$a1,$zero,$a1
473e1051a39Sopenharmony_ci	subfe	$a2,$zero,$a2
474e1051a39Sopenharmony_ci	subfe	$a3,$zero,$a3
475e1051a39Sopenharmony_ci
476e1051a39Sopenharmony_ci___
477e1051a39Sopenharmony_cifor (my @a=($a0,$a1,$a2,$a3), my $i=0; $i<4; shift(@a), $i++) {
478e1051a39Sopenharmony_ci$code.=<<___;
479e1051a39Sopenharmony_ci	srdi	$t0,@a[0],8
480e1051a39Sopenharmony_ci	stbu	@a[0],1($rp)
481e1051a39Sopenharmony_ci	srdi	@a[0],@a[0],16
482e1051a39Sopenharmony_ci	stbu	$t0,1($rp)
483e1051a39Sopenharmony_ci	srdi	$t0,@a[0],8
484e1051a39Sopenharmony_ci	stbu	@a[0],1($rp)
485e1051a39Sopenharmony_ci	srdi	@a[0],@a[0],16
486e1051a39Sopenharmony_ci	stbu	$t0,1($rp)
487e1051a39Sopenharmony_ci	srdi	$t0,@a[0],8
488e1051a39Sopenharmony_ci	stbu	@a[0],1($rp)
489e1051a39Sopenharmony_ci	srdi	@a[0],@a[0],16
490e1051a39Sopenharmony_ci	stbu	$t0,1($rp)
491e1051a39Sopenharmony_ci	srdi	$t0,@a[0],8
492e1051a39Sopenharmony_ci	stbu	@a[0],1($rp)
493e1051a39Sopenharmony_ci	stbu	$t0,1($rp)
494e1051a39Sopenharmony_ci___
495e1051a39Sopenharmony_ci}
496e1051a39Sopenharmony_ci$code.=<<___;
497e1051a39Sopenharmony_ci	blr
498e1051a39Sopenharmony_ci	.long	0
499e1051a39Sopenharmony_ci	.byte	0,12,0x14,0,0,0,2,0
500e1051a39Sopenharmony_ci	.long	0
501e1051a39Sopenharmony_ci.size	x25519_fe64_tobytes,.-x25519_fe64_tobytes
502e1051a39Sopenharmony_ci___
503e1051a39Sopenharmony_ci}
504e1051a39Sopenharmony_ci####################################################### base 2^51
505e1051a39Sopenharmony_ci{
506e1051a39Sopenharmony_cimy ($bi,$a0,$a1,$a2,$a3,$a4,$t0, $t1,
507e1051a39Sopenharmony_ci    $h0lo,$h0hi,$h1lo,$h1hi,$h2lo,$h2hi,$h3lo,$h3hi,$h4lo,$h4hi) =
508e1051a39Sopenharmony_ci    map("r$_",(6..12,21..31));
509e1051a39Sopenharmony_cimy $mask = "r0";
510e1051a39Sopenharmony_cimy $FRAME = 18*8;
511e1051a39Sopenharmony_ci
512e1051a39Sopenharmony_ci$code.=<<___;
513e1051a39Sopenharmony_ci.text
514e1051a39Sopenharmony_ci
515e1051a39Sopenharmony_ci.globl	x25519_fe51_mul
516e1051a39Sopenharmony_ci.type	x25519_fe51_mul,\@function
517e1051a39Sopenharmony_ci.align	5
518e1051a39Sopenharmony_cix25519_fe51_mul:
519e1051a39Sopenharmony_ci	stdu	$sp,-$FRAME($sp)
520e1051a39Sopenharmony_ci	std	r21,`$FRAME-8*11`($sp)
521e1051a39Sopenharmony_ci	std	r22,`$FRAME-8*10`($sp)
522e1051a39Sopenharmony_ci	std	r23,`$FRAME-8*9`($sp)
523e1051a39Sopenharmony_ci	std	r24,`$FRAME-8*8`($sp)
524e1051a39Sopenharmony_ci	std	r25,`$FRAME-8*7`($sp)
525e1051a39Sopenharmony_ci	std	r26,`$FRAME-8*6`($sp)
526e1051a39Sopenharmony_ci	std	r27,`$FRAME-8*5`($sp)
527e1051a39Sopenharmony_ci	std	r28,`$FRAME-8*4`($sp)
528e1051a39Sopenharmony_ci	std	r29,`$FRAME-8*3`($sp)
529e1051a39Sopenharmony_ci	std	r30,`$FRAME-8*2`($sp)
530e1051a39Sopenharmony_ci	std	r31,`$FRAME-8*1`($sp)
531e1051a39Sopenharmony_ci
532e1051a39Sopenharmony_ci	ld	$bi,0($bp)
533e1051a39Sopenharmony_ci	ld	$a0,0($ap)
534e1051a39Sopenharmony_ci	ld	$a1,8($ap)
535e1051a39Sopenharmony_ci	ld	$a2,16($ap)
536e1051a39Sopenharmony_ci	ld	$a3,24($ap)
537e1051a39Sopenharmony_ci	ld	$a4,32($ap)
538e1051a39Sopenharmony_ci
539e1051a39Sopenharmony_ci	mulld	$h0lo,$a0,$bi		# a[0]*b[0]
540e1051a39Sopenharmony_ci	mulhdu	$h0hi,$a0,$bi
541e1051a39Sopenharmony_ci
542e1051a39Sopenharmony_ci	mulld	$h1lo,$a1,$bi		# a[1]*b[0]
543e1051a39Sopenharmony_ci	mulhdu	$h1hi,$a1,$bi
544e1051a39Sopenharmony_ci
545e1051a39Sopenharmony_ci	 mulld	$h4lo,$a4,$bi		# a[4]*b[0]
546e1051a39Sopenharmony_ci	 mulhdu	$h4hi,$a4,$bi
547e1051a39Sopenharmony_ci	 ld	$ap,8($bp)
548e1051a39Sopenharmony_ci	 mulli	$a4,$a4,19
549e1051a39Sopenharmony_ci
550e1051a39Sopenharmony_ci	mulld	$h2lo,$a2,$bi		# a[2]*b[0]
551e1051a39Sopenharmony_ci	mulhdu	$h2hi,$a2,$bi
552e1051a39Sopenharmony_ci
553e1051a39Sopenharmony_ci	mulld	$h3lo,$a3,$bi		# a[3]*b[0]
554e1051a39Sopenharmony_ci	mulhdu	$h3hi,$a3,$bi
555e1051a39Sopenharmony_ci___
556e1051a39Sopenharmony_cifor(my @a=($a0,$a1,$a2,$a3,$a4),
557e1051a39Sopenharmony_ci    my $i=1; $i<4; $i++) {
558e1051a39Sopenharmony_ci	($ap,$bi) = ($bi,$ap);
559e1051a39Sopenharmony_ci$code.=<<___;
560e1051a39Sopenharmony_ci	mulld	$t0,@a[4],$bi
561e1051a39Sopenharmony_ci	mulhdu	$t1,@a[4],$bi
562e1051a39Sopenharmony_ci	addc	$h0lo,$h0lo,$t0
563e1051a39Sopenharmony_ci	adde	$h0hi,$h0hi,$t1
564e1051a39Sopenharmony_ci
565e1051a39Sopenharmony_ci	mulld	$t0,@a[0],$bi
566e1051a39Sopenharmony_ci	mulhdu	$t1,@a[0],$bi
567e1051a39Sopenharmony_ci	addc	$h1lo,$h1lo,$t0
568e1051a39Sopenharmony_ci	adde	$h1hi,$h1hi,$t1
569e1051a39Sopenharmony_ci
570e1051a39Sopenharmony_ci	 mulld	$t0,@a[3],$bi
571e1051a39Sopenharmony_ci	 mulhdu	$t1,@a[3],$bi
572e1051a39Sopenharmony_ci	 ld	$ap,`8*($i+1)`($bp)
573e1051a39Sopenharmony_ci	 mulli	@a[3],@a[3],19
574e1051a39Sopenharmony_ci	 addc	$h4lo,$h4lo,$t0
575e1051a39Sopenharmony_ci	 adde	$h4hi,$h4hi,$t1
576e1051a39Sopenharmony_ci
577e1051a39Sopenharmony_ci	mulld	$t0,@a[1],$bi
578e1051a39Sopenharmony_ci	mulhdu	$t1,@a[1],$bi
579e1051a39Sopenharmony_ci	addc	$h2lo,$h2lo,$t0
580e1051a39Sopenharmony_ci	adde	$h2hi,$h2hi,$t1
581e1051a39Sopenharmony_ci
582e1051a39Sopenharmony_ci	mulld	$t0,@a[2],$bi
583e1051a39Sopenharmony_ci	mulhdu	$t1,@a[2],$bi
584e1051a39Sopenharmony_ci	addc	$h3lo,$h3lo,$t0
585e1051a39Sopenharmony_ci	adde	$h3hi,$h3hi,$t1
586e1051a39Sopenharmony_ci___
587e1051a39Sopenharmony_ci	unshift(@a,pop(@a));
588e1051a39Sopenharmony_ci}
589e1051a39Sopenharmony_ci	($ap,$bi) = ($bi,$ap);
590e1051a39Sopenharmony_ci$code.=<<___;
591e1051a39Sopenharmony_ci	mulld	$t0,$a1,$bi
592e1051a39Sopenharmony_ci	mulhdu	$t1,$a1,$bi
593e1051a39Sopenharmony_ci	addc	$h0lo,$h0lo,$t0
594e1051a39Sopenharmony_ci	adde	$h0hi,$h0hi,$t1
595e1051a39Sopenharmony_ci
596e1051a39Sopenharmony_ci	mulld	$t0,$a2,$bi
597e1051a39Sopenharmony_ci	mulhdu	$t1,$a2,$bi
598e1051a39Sopenharmony_ci	addc	$h1lo,$h1lo,$t0
599e1051a39Sopenharmony_ci	adde	$h1hi,$h1hi,$t1
600e1051a39Sopenharmony_ci
601e1051a39Sopenharmony_ci	mulld	$t0,$a3,$bi
602e1051a39Sopenharmony_ci	mulhdu	$t1,$a3,$bi
603e1051a39Sopenharmony_ci	addc	$h2lo,$h2lo,$t0
604e1051a39Sopenharmony_ci	adde	$h2hi,$h2hi,$t1
605e1051a39Sopenharmony_ci
606e1051a39Sopenharmony_ci	mulld	$t0,$a4,$bi
607e1051a39Sopenharmony_ci	mulhdu	$t1,$a4,$bi
608e1051a39Sopenharmony_ci	addc	$h3lo,$h3lo,$t0
609e1051a39Sopenharmony_ci	adde	$h3hi,$h3hi,$t1
610e1051a39Sopenharmony_ci
611e1051a39Sopenharmony_ci	mulld	$t0,$a0,$bi
612e1051a39Sopenharmony_ci	mulhdu	$t1,$a0,$bi
613e1051a39Sopenharmony_ci	addc	$h4lo,$h4lo,$t0
614e1051a39Sopenharmony_ci	adde	$h4hi,$h4hi,$t1
615e1051a39Sopenharmony_ci
616e1051a39Sopenharmony_ci.Lfe51_reduce:
617e1051a39Sopenharmony_ci	li	$mask,-1
618e1051a39Sopenharmony_ci	srdi	$mask,$mask,13		# 0x7ffffffffffff
619e1051a39Sopenharmony_ci
620e1051a39Sopenharmony_ci	srdi	$t0,$h2lo,51
621e1051a39Sopenharmony_ci	and	$a2,$h2lo,$mask
622e1051a39Sopenharmony_ci	insrdi	$t0,$h2hi,51,0		# h2>>51
623e1051a39Sopenharmony_ci	 srdi	$t1,$h0lo,51
624e1051a39Sopenharmony_ci	 and	$a0,$h0lo,$mask
625e1051a39Sopenharmony_ci	 insrdi	$t1,$h0hi,51,0		# h0>>51
626e1051a39Sopenharmony_ci	addc	$h3lo,$h3lo,$t0
627e1051a39Sopenharmony_ci	addze	$h3hi,$h3hi
628e1051a39Sopenharmony_ci	 addc	$h1lo,$h1lo,$t1
629e1051a39Sopenharmony_ci	 addze	$h1hi,$h1hi
630e1051a39Sopenharmony_ci
631e1051a39Sopenharmony_ci	srdi	$t0,$h3lo,51
632e1051a39Sopenharmony_ci	and	$a3,$h3lo,$mask
633e1051a39Sopenharmony_ci	insrdi	$t0,$h3hi,51,0		# h3>>51
634e1051a39Sopenharmony_ci	 srdi	$t1,$h1lo,51
635e1051a39Sopenharmony_ci	 and	$a1,$h1lo,$mask
636e1051a39Sopenharmony_ci	 insrdi	$t1,$h1hi,51,0		# h1>>51
637e1051a39Sopenharmony_ci	addc	$h4lo,$h4lo,$t0
638e1051a39Sopenharmony_ci	addze	$h4hi,$h4hi
639e1051a39Sopenharmony_ci	 add	$a2,$a2,$t1
640e1051a39Sopenharmony_ci
641e1051a39Sopenharmony_ci	srdi	$t0,$h4lo,51
642e1051a39Sopenharmony_ci	and	$a4,$h4lo,$mask
643e1051a39Sopenharmony_ci	insrdi	$t0,$h4hi,51,0
644e1051a39Sopenharmony_ci	mulli	$t0,$t0,19		# (h4 >> 51) * 19
645e1051a39Sopenharmony_ci
646e1051a39Sopenharmony_ci	add	$a0,$a0,$t0
647e1051a39Sopenharmony_ci
648e1051a39Sopenharmony_ci	srdi	$t1,$a2,51
649e1051a39Sopenharmony_ci	and	$a2,$a2,$mask
650e1051a39Sopenharmony_ci	add	$a3,$a3,$t1
651e1051a39Sopenharmony_ci
652e1051a39Sopenharmony_ci	srdi	$t0,$a0,51
653e1051a39Sopenharmony_ci	and	$a0,$a0,$mask
654e1051a39Sopenharmony_ci	add	$a1,$a1,$t0
655e1051a39Sopenharmony_ci
656e1051a39Sopenharmony_ci	std	$a2,16($rp)
657e1051a39Sopenharmony_ci	std	$a3,24($rp)
658e1051a39Sopenharmony_ci	std	$a4,32($rp)
659e1051a39Sopenharmony_ci	std	$a0,0($rp)
660e1051a39Sopenharmony_ci	std	$a1,8($rp)
661e1051a39Sopenharmony_ci
662e1051a39Sopenharmony_ci	ld	r21,`$FRAME-8*11`($sp)
663e1051a39Sopenharmony_ci	ld	r22,`$FRAME-8*10`($sp)
664e1051a39Sopenharmony_ci	ld	r23,`$FRAME-8*9`($sp)
665e1051a39Sopenharmony_ci	ld	r24,`$FRAME-8*8`($sp)
666e1051a39Sopenharmony_ci	ld	r25,`$FRAME-8*7`($sp)
667e1051a39Sopenharmony_ci	ld	r26,`$FRAME-8*6`($sp)
668e1051a39Sopenharmony_ci	ld	r27,`$FRAME-8*5`($sp)
669e1051a39Sopenharmony_ci	ld	r28,`$FRAME-8*4`($sp)
670e1051a39Sopenharmony_ci	ld	r29,`$FRAME-8*3`($sp)
671e1051a39Sopenharmony_ci	ld	r30,`$FRAME-8*2`($sp)
672e1051a39Sopenharmony_ci	ld	r31,`$FRAME-8*1`($sp)
673e1051a39Sopenharmony_ci	addi	$sp,$sp,$FRAME
674e1051a39Sopenharmony_ci	blr
675e1051a39Sopenharmony_ci	.long	0
676e1051a39Sopenharmony_ci	.byte	0,12,4,0,0x80,11,3,0
677e1051a39Sopenharmony_ci	.long	0
678e1051a39Sopenharmony_ci.size	x25519_fe51_mul,.-x25519_fe51_mul
679e1051a39Sopenharmony_ci___
680e1051a39Sopenharmony_ci{
681e1051a39Sopenharmony_cimy ($a0,$a1,$a2,$a3,$a4,$t0,$t1) = ($a0,$a1,$a2,$a3,$a4,$t0,$t1);
682e1051a39Sopenharmony_ci$code.=<<___;
683e1051a39Sopenharmony_ci.globl	x25519_fe51_sqr
684e1051a39Sopenharmony_ci.type	x25519_fe51_sqr,\@function
685e1051a39Sopenharmony_ci.align	5
686e1051a39Sopenharmony_cix25519_fe51_sqr:
687e1051a39Sopenharmony_ci	stdu	$sp,-$FRAME($sp)
688e1051a39Sopenharmony_ci	std	r21,`$FRAME-8*11`($sp)
689e1051a39Sopenharmony_ci	std	r22,`$FRAME-8*10`($sp)
690e1051a39Sopenharmony_ci	std	r23,`$FRAME-8*9`($sp)
691e1051a39Sopenharmony_ci	std	r24,`$FRAME-8*8`($sp)
692e1051a39Sopenharmony_ci	std	r25,`$FRAME-8*7`($sp)
693e1051a39Sopenharmony_ci	std	r26,`$FRAME-8*6`($sp)
694e1051a39Sopenharmony_ci	std	r27,`$FRAME-8*5`($sp)
695e1051a39Sopenharmony_ci	std	r28,`$FRAME-8*4`($sp)
696e1051a39Sopenharmony_ci	std	r29,`$FRAME-8*3`($sp)
697e1051a39Sopenharmony_ci	std	r30,`$FRAME-8*2`($sp)
698e1051a39Sopenharmony_ci	std	r31,`$FRAME-8*1`($sp)
699e1051a39Sopenharmony_ci
700e1051a39Sopenharmony_ci	ld	$a0,0($ap)
701e1051a39Sopenharmony_ci	ld	$a1,8($ap)
702e1051a39Sopenharmony_ci	ld	$a2,16($ap)
703e1051a39Sopenharmony_ci	ld	$a3,24($ap)
704e1051a39Sopenharmony_ci	ld	$a4,32($ap)
705e1051a39Sopenharmony_ci
706e1051a39Sopenharmony_ci	add	$bi,$a0,$a0		# a[0]*2
707e1051a39Sopenharmony_ci	mulli	$t1,$a4,19		# a[4]*19
708e1051a39Sopenharmony_ci
709e1051a39Sopenharmony_ci	mulld	$h0lo,$a0,$a0
710e1051a39Sopenharmony_ci	mulhdu	$h0hi,$a0,$a0
711e1051a39Sopenharmony_ci	mulld	$h1lo,$a1,$bi
712e1051a39Sopenharmony_ci	mulhdu	$h1hi,$a1,$bi
713e1051a39Sopenharmony_ci	mulld	$h2lo,$a2,$bi
714e1051a39Sopenharmony_ci	mulhdu	$h2hi,$a2,$bi
715e1051a39Sopenharmony_ci	mulld	$h3lo,$a3,$bi
716e1051a39Sopenharmony_ci	mulhdu	$h3hi,$a3,$bi
717e1051a39Sopenharmony_ci	mulld	$h4lo,$a4,$bi
718e1051a39Sopenharmony_ci	mulhdu	$h4hi,$a4,$bi
719e1051a39Sopenharmony_ci	add	$bi,$a1,$a1		# a[1]*2
720e1051a39Sopenharmony_ci___
721e1051a39Sopenharmony_ci	($a4,$t1) = ($t1,$a4);
722e1051a39Sopenharmony_ci$code.=<<___;
723e1051a39Sopenharmony_ci	mulld	$t0,$t1,$a4
724e1051a39Sopenharmony_ci	mulhdu	$t1,$t1,$a4
725e1051a39Sopenharmony_ci	addc	$h3lo,$h3lo,$t0
726e1051a39Sopenharmony_ci	adde	$h3hi,$h3hi,$t1
727e1051a39Sopenharmony_ci
728e1051a39Sopenharmony_ci	mulli	$bp,$a3,19		# a[3]*19
729e1051a39Sopenharmony_ci
730e1051a39Sopenharmony_ci	mulld	$t0,$a1,$a1
731e1051a39Sopenharmony_ci	mulhdu	$t1,$a1,$a1
732e1051a39Sopenharmony_ci	addc	$h2lo,$h2lo,$t0
733e1051a39Sopenharmony_ci	adde	$h2hi,$h2hi,$t1
734e1051a39Sopenharmony_ci	mulld	$t0,$a2,$bi
735e1051a39Sopenharmony_ci	mulhdu	$t1,$a2,$bi
736e1051a39Sopenharmony_ci	addc	$h3lo,$h3lo,$t0
737e1051a39Sopenharmony_ci	adde	$h3hi,$h3hi,$t1
738e1051a39Sopenharmony_ci	mulld	$t0,$a3,$bi
739e1051a39Sopenharmony_ci	mulhdu	$t1,$a3,$bi
740e1051a39Sopenharmony_ci	addc	$h4lo,$h4lo,$t0
741e1051a39Sopenharmony_ci	adde	$h4hi,$h4hi,$t1
742e1051a39Sopenharmony_ci	mulld	$t0,$a4,$bi
743e1051a39Sopenharmony_ci	mulhdu	$t1,$a4,$bi
744e1051a39Sopenharmony_ci	add	$bi,$a3,$a3		# a[3]*2
745e1051a39Sopenharmony_ci	addc	$h0lo,$h0lo,$t0
746e1051a39Sopenharmony_ci	adde	$h0hi,$h0hi,$t1
747e1051a39Sopenharmony_ci___
748e1051a39Sopenharmony_ci	($a3,$t1) = ($bp,$a3);
749e1051a39Sopenharmony_ci$code.=<<___;
750e1051a39Sopenharmony_ci	mulld	$t0,$t1,$a3
751e1051a39Sopenharmony_ci	mulhdu	$t1,$t1,$a3
752e1051a39Sopenharmony_ci	addc	$h1lo,$h1lo,$t0
753e1051a39Sopenharmony_ci	adde	$h1hi,$h1hi,$t1
754e1051a39Sopenharmony_ci	mulld	$t0,$bi,$a4
755e1051a39Sopenharmony_ci	mulhdu	$t1,$bi,$a4
756e1051a39Sopenharmony_ci	add	$bi,$a2,$a2		# a[2]*2
757e1051a39Sopenharmony_ci	addc	$h2lo,$h2lo,$t0
758e1051a39Sopenharmony_ci	adde	$h2hi,$h2hi,$t1
759e1051a39Sopenharmony_ci
760e1051a39Sopenharmony_ci	mulld	$t0,$a2,$a2
761e1051a39Sopenharmony_ci	mulhdu	$t1,$a2,$a2
762e1051a39Sopenharmony_ci	addc	$h4lo,$h4lo,$t0
763e1051a39Sopenharmony_ci	adde	$h4hi,$h4hi,$t1
764e1051a39Sopenharmony_ci	mulld	$t0,$a3,$bi
765e1051a39Sopenharmony_ci	mulhdu	$t1,$a3,$bi
766e1051a39Sopenharmony_ci	addc	$h0lo,$h0lo,$t0
767e1051a39Sopenharmony_ci	adde	$h0hi,$h0hi,$t1
768e1051a39Sopenharmony_ci	mulld	$t0,$a4,$bi
769e1051a39Sopenharmony_ci	mulhdu	$t1,$a4,$bi
770e1051a39Sopenharmony_ci	addc	$h1lo,$h1lo,$t0
771e1051a39Sopenharmony_ci	adde	$h1hi,$h1hi,$t1
772e1051a39Sopenharmony_ci
773e1051a39Sopenharmony_ci	b	.Lfe51_reduce
774e1051a39Sopenharmony_ci	.long	0
775e1051a39Sopenharmony_ci	.byte	0,12,4,0,0x80,11,2,0
776e1051a39Sopenharmony_ci	.long	0
777e1051a39Sopenharmony_ci.size	x25519_fe51_sqr,.-x25519_fe51_sqr
778e1051a39Sopenharmony_ci___
779e1051a39Sopenharmony_ci}
780e1051a39Sopenharmony_ci$code.=<<___;
781e1051a39Sopenharmony_ci.globl	x25519_fe51_mul121666
782e1051a39Sopenharmony_ci.type	x25519_fe51_mul121666,\@function
783e1051a39Sopenharmony_ci.align	5
784e1051a39Sopenharmony_cix25519_fe51_mul121666:
785e1051a39Sopenharmony_ci	stdu	$sp,-$FRAME($sp)
786e1051a39Sopenharmony_ci	std	r21,`$FRAME-8*11`($sp)
787e1051a39Sopenharmony_ci	std	r22,`$FRAME-8*10`($sp)
788e1051a39Sopenharmony_ci	std	r23,`$FRAME-8*9`($sp)
789e1051a39Sopenharmony_ci	std	r24,`$FRAME-8*8`($sp)
790e1051a39Sopenharmony_ci	std	r25,`$FRAME-8*7`($sp)
791e1051a39Sopenharmony_ci	std	r26,`$FRAME-8*6`($sp)
792e1051a39Sopenharmony_ci	std	r27,`$FRAME-8*5`($sp)
793e1051a39Sopenharmony_ci	std	r28,`$FRAME-8*4`($sp)
794e1051a39Sopenharmony_ci	std	r29,`$FRAME-8*3`($sp)
795e1051a39Sopenharmony_ci	std	r30,`$FRAME-8*2`($sp)
796e1051a39Sopenharmony_ci	std	r31,`$FRAME-8*1`($sp)
797e1051a39Sopenharmony_ci
798e1051a39Sopenharmony_ci	lis	$bi,`65536>>16`
799e1051a39Sopenharmony_ci	ori	$bi,$bi,`121666-65536`
800e1051a39Sopenharmony_ci	ld	$a0,0($ap)
801e1051a39Sopenharmony_ci	ld	$a1,8($ap)
802e1051a39Sopenharmony_ci	ld	$a2,16($ap)
803e1051a39Sopenharmony_ci	ld	$a3,24($ap)
804e1051a39Sopenharmony_ci	ld	$a4,32($ap)
805e1051a39Sopenharmony_ci
806e1051a39Sopenharmony_ci	mulld	$h0lo,$a0,$bi		# a[0]*121666
807e1051a39Sopenharmony_ci	mulhdu	$h0hi,$a0,$bi
808e1051a39Sopenharmony_ci	mulld	$h1lo,$a1,$bi		# a[1]*121666
809e1051a39Sopenharmony_ci	mulhdu	$h1hi,$a1,$bi
810e1051a39Sopenharmony_ci	mulld	$h2lo,$a2,$bi		# a[2]*121666
811e1051a39Sopenharmony_ci	mulhdu	$h2hi,$a2,$bi
812e1051a39Sopenharmony_ci	mulld	$h3lo,$a3,$bi		# a[3]*121666
813e1051a39Sopenharmony_ci	mulhdu	$h3hi,$a3,$bi
814e1051a39Sopenharmony_ci	mulld	$h4lo,$a4,$bi		# a[4]*121666
815e1051a39Sopenharmony_ci	mulhdu	$h4hi,$a4,$bi
816e1051a39Sopenharmony_ci
817e1051a39Sopenharmony_ci	b	.Lfe51_reduce
818e1051a39Sopenharmony_ci	.long	0
819e1051a39Sopenharmony_ci	.byte	0,12,4,0,0x80,11,2,0
820e1051a39Sopenharmony_ci	.long	0
821e1051a39Sopenharmony_ci.size	x25519_fe51_mul121666,.-x25519_fe51_mul121666
822e1051a39Sopenharmony_ci___
823e1051a39Sopenharmony_ci}
824e1051a39Sopenharmony_ci
825e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem;
826e1051a39Sopenharmony_ciprint $code;
827e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
828