1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci#
9e1051a39Sopenharmony_ci# ====================================================================
10e1051a39Sopenharmony_ci# Written by Amitay Isaacs <amitay@ozlabs.org> and Martin Schwenke
11e1051a39Sopenharmony_ci# <martin@meltin.net> for the OpenSSL project.
12e1051a39Sopenharmony_ci# ====================================================================
13e1051a39Sopenharmony_ci#
14e1051a39Sopenharmony_ci# p521 lower-level primitives for PPC64 using vector instructions.
15e1051a39Sopenharmony_ci#
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ciuse strict;
18e1051a39Sopenharmony_ciuse warnings;
19e1051a39Sopenharmony_ci
20e1051a39Sopenharmony_cimy $flavour = shift;
21e1051a39Sopenharmony_cimy $output = "";
22e1051a39Sopenharmony_ciwhile (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
23e1051a39Sopenharmony_ciif (!$output) {
24e1051a39Sopenharmony_ci	$output = "-";
25e1051a39Sopenharmony_ci}
26e1051a39Sopenharmony_ci
27e1051a39Sopenharmony_cimy ($xlate, $dir);
28e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
29e1051a39Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
30e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
31e1051a39Sopenharmony_cidie "can't locate ppc-xlate.pl";
32e1051a39Sopenharmony_ci
33e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour $output";
34e1051a39Sopenharmony_ci*STDOUT=*OUT;
35e1051a39Sopenharmony_ci
36e1051a39Sopenharmony_cimy $code = "";
37e1051a39Sopenharmony_ci
38e1051a39Sopenharmony_cimy ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12");
39e1051a39Sopenharmony_ci
40e1051a39Sopenharmony_cimy $vzero = "v32";
41e1051a39Sopenharmony_ci
42e1051a39Sopenharmony_cisub startproc($)
43e1051a39Sopenharmony_ci{
44e1051a39Sopenharmony_ci    my ($name) = @_;
45e1051a39Sopenharmony_ci
46e1051a39Sopenharmony_ci    $code.=<<___;
47e1051a39Sopenharmony_ci    .globl ${name}
48e1051a39Sopenharmony_ci    .align 5
49e1051a39Sopenharmony_ci${name}:
50e1051a39Sopenharmony_ci
51e1051a39Sopenharmony_ci___
52e1051a39Sopenharmony_ci}
53e1051a39Sopenharmony_ci
54e1051a39Sopenharmony_cisub endproc($)
55e1051a39Sopenharmony_ci{
56e1051a39Sopenharmony_ci    my ($name) = @_;
57e1051a39Sopenharmony_ci
58e1051a39Sopenharmony_ci    $code.=<<___;
59e1051a39Sopenharmony_ci	blr
60e1051a39Sopenharmony_ci	    .size	${name},.-${name}
61e1051a39Sopenharmony_ci
62e1051a39Sopenharmony_ci___
63e1051a39Sopenharmony_ci}
64e1051a39Sopenharmony_ci
65e1051a39Sopenharmony_ci
66e1051a39Sopenharmony_cisub push_vrs($$)
67e1051a39Sopenharmony_ci{
68e1051a39Sopenharmony_ci	my ($min, $max) = @_;
69e1051a39Sopenharmony_ci
70e1051a39Sopenharmony_ci	my $count = $max - $min + 1;
71e1051a39Sopenharmony_ci
72e1051a39Sopenharmony_ci	$code.=<<___;
73e1051a39Sopenharmony_ci	mr		$savesp,$sp
74e1051a39Sopenharmony_ci	stdu		$sp,-16*`$count+1`($sp)
75e1051a39Sopenharmony_ci
76e1051a39Sopenharmony_ci___
77e1051a39Sopenharmony_ci	    for (my $i = $min; $i <= $max; $i++) {
78e1051a39Sopenharmony_ci		    my $mult = $max - $i + 1;
79e1051a39Sopenharmony_ci		    $code.=<<___;
80e1051a39Sopenharmony_ci	stxv		$i,-16*$mult($savesp)
81e1051a39Sopenharmony_ci___
82e1051a39Sopenharmony_ci
83e1051a39Sopenharmony_ci	}
84e1051a39Sopenharmony_ci
85e1051a39Sopenharmony_ci	$code.=<<___;
86e1051a39Sopenharmony_ci
87e1051a39Sopenharmony_ci___
88e1051a39Sopenharmony_ci}
89e1051a39Sopenharmony_ci
90e1051a39Sopenharmony_cisub pop_vrs($$)
91e1051a39Sopenharmony_ci{
92e1051a39Sopenharmony_ci	my ($min, $max) = @_;
93e1051a39Sopenharmony_ci
94e1051a39Sopenharmony_ci	$code.=<<___;
95e1051a39Sopenharmony_ci	ld		$savesp,0($sp)
96e1051a39Sopenharmony_ci___
97e1051a39Sopenharmony_ci	for (my $i = $min; $i <= $max; $i++) {
98e1051a39Sopenharmony_ci		my $mult = $max - $i + 1;
99e1051a39Sopenharmony_ci		$code.=<<___;
100e1051a39Sopenharmony_ci	lxv		$i,-16*$mult($savesp)
101e1051a39Sopenharmony_ci___
102e1051a39Sopenharmony_ci	}
103e1051a39Sopenharmony_ci
104e1051a39Sopenharmony_ci	$code.=<<___;
105e1051a39Sopenharmony_ci	mr		$sp,$savesp
106e1051a39Sopenharmony_ci
107e1051a39Sopenharmony_ci___
108e1051a39Sopenharmony_ci}
109e1051a39Sopenharmony_ci
110e1051a39Sopenharmony_cisub load_vrs($$)
111e1051a39Sopenharmony_ci{
112e1051a39Sopenharmony_ci	my ($pointer, $reg_list) = @_;
113e1051a39Sopenharmony_ci
114e1051a39Sopenharmony_ci	for (my $i = 0; $i <= 8; $i++) {
115e1051a39Sopenharmony_ci		my $offset = $i * 8;
116e1051a39Sopenharmony_ci		$code.=<<___;
117e1051a39Sopenharmony_ci	lxsd		$reg_list->[$i],$offset($pointer)
118e1051a39Sopenharmony_ci___
119e1051a39Sopenharmony_ci	}
120e1051a39Sopenharmony_ci
121e1051a39Sopenharmony_ci	$code.=<<___;
122e1051a39Sopenharmony_ci
123e1051a39Sopenharmony_ci___
124e1051a39Sopenharmony_ci}
125e1051a39Sopenharmony_ci
126e1051a39Sopenharmony_cisub store_vrs($$)
127e1051a39Sopenharmony_ci{
128e1051a39Sopenharmony_ci	my ($pointer, $reg_list) = @_;
129e1051a39Sopenharmony_ci
130e1051a39Sopenharmony_ci	for (my $i = 0; $i <= 8; $i++) {
131e1051a39Sopenharmony_ci		my $offset = $i * 16;
132e1051a39Sopenharmony_ci		$code.=<<___;
133e1051a39Sopenharmony_ci	stxv		$reg_list->[$i],$offset($pointer)
134e1051a39Sopenharmony_ci___
135e1051a39Sopenharmony_ci	}
136e1051a39Sopenharmony_ci
137e1051a39Sopenharmony_ci	$code.=<<___;
138e1051a39Sopenharmony_ci
139e1051a39Sopenharmony_ci___
140e1051a39Sopenharmony_ci}
141e1051a39Sopenharmony_ci
142e1051a39Sopenharmony_ci$code.=<<___;
143e1051a39Sopenharmony_ci.machine	"any"
144e1051a39Sopenharmony_ci.text
145e1051a39Sopenharmony_ci
146e1051a39Sopenharmony_ci___
147e1051a39Sopenharmony_ci
148e1051a39Sopenharmony_ci{
149e1051a39Sopenharmony_ci	# mul/square common
150e1051a39Sopenharmony_ci	my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v44", "v54");
151e1051a39Sopenharmony_ci	my ($zero, $one) = ("r8", "r9");
152e1051a39Sopenharmony_ci	my @out = map("v$_",(55..63));
153e1051a39Sopenharmony_ci
154e1051a39Sopenharmony_ci	{
155e1051a39Sopenharmony_ci		#
156e1051a39Sopenharmony_ci		# p521_felem_mul
157e1051a39Sopenharmony_ci		#
158e1051a39Sopenharmony_ci
159e1051a39Sopenharmony_ci		my ($in1p, $in2p) = ("r4", "r5");
160e1051a39Sopenharmony_ci		my @in1 = map("v$_",(45..53));
161e1051a39Sopenharmony_ci		my @in2 = map("v$_",(35..43));
162e1051a39Sopenharmony_ci
163e1051a39Sopenharmony_ci		startproc("p521_felem_mul");
164e1051a39Sopenharmony_ci
165e1051a39Sopenharmony_ci		push_vrs(52, 63);
166e1051a39Sopenharmony_ci
167e1051a39Sopenharmony_ci		$code.=<<___;
168e1051a39Sopenharmony_ci	vspltisw	$vzero,0
169e1051a39Sopenharmony_ci
170e1051a39Sopenharmony_ci___
171e1051a39Sopenharmony_ci
172e1051a39Sopenharmony_ci		load_vrs($in1p, \@in1);
173e1051a39Sopenharmony_ci		load_vrs($in2p, \@in2);
174e1051a39Sopenharmony_ci
175e1051a39Sopenharmony_ci		$code.=<<___;
176e1051a39Sopenharmony_ci	vmsumudm	$out[0],$in1[0],$in2[0],$vzero
177e1051a39Sopenharmony_ci
178e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[0],$in1[1],0b00
179e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[1],$in2[0],0b00
180e1051a39Sopenharmony_ci	vmsumudm	$out[1],$t1,$t2,$vzero
181e1051a39Sopenharmony_ci
182e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[2],$in2[1],0b00
183e1051a39Sopenharmony_ci	vmsumudm	$out[2],$t1,$t2,$vzero
184e1051a39Sopenharmony_ci	vmsumudm	$out[2],$in1[2],$in2[0],$out[2]
185e1051a39Sopenharmony_ci
186e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[3],$in2[2],0b00
187e1051a39Sopenharmony_ci	vmsumudm	$out[3],$t1,$t2,$vzero
188e1051a39Sopenharmony_ci	xxpermdi	$t3,$in1[2],$in1[3],0b00
189e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[1],$in2[0],0b00
190e1051a39Sopenharmony_ci	vmsumudm	$out[3],$t3,$t4,$out[3]
191e1051a39Sopenharmony_ci
192e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[4],$in2[3],0b00
193e1051a39Sopenharmony_ci	vmsumudm	$out[4],$t1,$t2,$vzero
194e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[2],$in2[1],0b00
195e1051a39Sopenharmony_ci	vmsumudm	$out[4],$t3,$t4,$out[4]
196e1051a39Sopenharmony_ci	vmsumudm	$out[4],$in1[4],$in2[0],$out[4]
197e1051a39Sopenharmony_ci
198e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[5],$in2[4],0b00
199e1051a39Sopenharmony_ci	vmsumudm	$out[5],$t1,$t2,$vzero
200e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[3],$in2[2],0b00
201e1051a39Sopenharmony_ci	vmsumudm	$out[5],$t3,$t4,$out[5]
202e1051a39Sopenharmony_ci
203e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[6],$in2[5],0b00
204e1051a39Sopenharmony_ci	vmsumudm	$out[6],$t1,$t2,$vzero
205e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[4],$in2[3],0b00
206e1051a39Sopenharmony_ci	vmsumudm	$out[6],$t3,$t4,$out[6]
207e1051a39Sopenharmony_ci
208e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[7],$in2[6],0b00
209e1051a39Sopenharmony_ci	vmsumudm	$out[7],$t1,$t2,$vzero
210e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[5],$in2[4],0b00
211e1051a39Sopenharmony_ci	vmsumudm	$out[7],$t3,$t4,$out[7]
212e1051a39Sopenharmony_ci
213e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[8],$in2[7],0b00
214e1051a39Sopenharmony_ci	vmsumudm	$out[8],$t1,$t2,$vzero
215e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[6],$in2[5],0b00
216e1051a39Sopenharmony_ci	vmsumudm	$out[8],$t3,$t4,$out[8]
217e1051a39Sopenharmony_ci
218e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[4],$in1[5],0b00
219e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[1],$in2[0],0b00
220e1051a39Sopenharmony_ci	vmsumudm	$out[5],$t1,$t2,$out[5]
221e1051a39Sopenharmony_ci
222e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[2],$in2[1],0b00
223e1051a39Sopenharmony_ci	vmsumudm	$out[6],$t1,$t2,$out[6]
224e1051a39Sopenharmony_ci	vmsumudm	$out[6],$in1[6],$in2[0],$out[6]
225e1051a39Sopenharmony_ci
226e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[3],$in2[2],0b00
227e1051a39Sopenharmony_ci	vmsumudm	$out[7],$t1,$t2,$out[7]
228e1051a39Sopenharmony_ci	xxpermdi	$t3,$in1[6],$in1[7],0b00
229e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[1],$in2[0],0b00
230e1051a39Sopenharmony_ci	vmsumudm	$out[7],$t3,$t4,$out[7]
231e1051a39Sopenharmony_ci
232e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[4],$in2[3],0b00
233e1051a39Sopenharmony_ci	vmsumudm	$out[8],$t1,$t2,$out[8]
234e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[2],$in2[1],0b00
235e1051a39Sopenharmony_ci	vmsumudm	$out[8],$t3,$t4,$out[8]
236e1051a39Sopenharmony_ci	vmsumudm	$out[8],$in1[8],$in2[0],$out[8]
237e1051a39Sopenharmony_ci
238e1051a39Sopenharmony_ci	li		$zero,0
239e1051a39Sopenharmony_ci	li		$one,1
240e1051a39Sopenharmony_ci	mtvsrdd		$t1,$one,$zero
241e1051a39Sopenharmony_ci___
242e1051a39Sopenharmony_ci
243e1051a39Sopenharmony_ci		for (my $i = 0; $i <= 8; $i++) {
244e1051a39Sopenharmony_ci			$code.=<<___;
245e1051a39Sopenharmony_ci	vsld		$in2[$i],$in2[$i],$t1
246e1051a39Sopenharmony_ci___
247e1051a39Sopenharmony_ci		}
248e1051a39Sopenharmony_ci
249e1051a39Sopenharmony_ci		$code.=<<___;
250e1051a39Sopenharmony_ci
251e1051a39Sopenharmony_ci	vmsumudm	$out[7],$in1[8],$in2[8],$out[7]
252e1051a39Sopenharmony_ci
253e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[8],$in2[7],0b00
254e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[7],$in1[8],0b00
255e1051a39Sopenharmony_ci	vmsumudm	$out[6],$t1,$t2,$out[6]
256e1051a39Sopenharmony_ci
257e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[6],$in1[7],0b00
258e1051a39Sopenharmony_ci	vmsumudm	$out[5],$t1,$t2,$out[5]
259e1051a39Sopenharmony_ci	vmsumudm	$out[5],$in1[8],$in2[6],$out[5]
260e1051a39Sopenharmony_ci
261e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[5],$in1[6],0b00
262e1051a39Sopenharmony_ci	vmsumudm	$out[4],$t1,$t2,$out[4]
263e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[6],$in2[5],0b00
264e1051a39Sopenharmony_ci	xxpermdi	$t3,$in1[7],$in1[8],0b00
265e1051a39Sopenharmony_ci	vmsumudm	$out[4],$t3,$t4,$out[4]
266e1051a39Sopenharmony_ci
267e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[4],$in1[5],0b00
268e1051a39Sopenharmony_ci	vmsumudm	$out[3],$t1,$t2,$out[3]
269e1051a39Sopenharmony_ci	xxpermdi	$t3,$in1[6],$in1[7],0b00
270e1051a39Sopenharmony_ci	vmsumudm	$out[3],$t3,$t4,$out[3]
271e1051a39Sopenharmony_ci	vmsumudm	$out[3],$in1[8],$in2[4],$out[3]
272e1051a39Sopenharmony_ci
273e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[3],$in1[4],0b00
274e1051a39Sopenharmony_ci	vmsumudm	$out[2],$t1,$t2,$out[2]
275e1051a39Sopenharmony_ci	xxpermdi	$t3,$in1[5],$in1[6],0b00
276e1051a39Sopenharmony_ci	vmsumudm	$out[2],$t3,$t4,$out[2]
277e1051a39Sopenharmony_ci
278e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[2],$in1[3],0b00
279e1051a39Sopenharmony_ci	vmsumudm	$out[1],$t1,$t2,$out[1]
280e1051a39Sopenharmony_ci	xxpermdi	$t3,$in1[4],$in1[5],0b00
281e1051a39Sopenharmony_ci	vmsumudm	$out[1],$t3,$t4,$out[1]
282e1051a39Sopenharmony_ci
283e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[1],$in1[2],0b00
284e1051a39Sopenharmony_ci	vmsumudm	$out[0],$t1,$t2,$out[0]
285e1051a39Sopenharmony_ci	xxpermdi	$t3,$in1[3],$in1[4],0b00
286e1051a39Sopenharmony_ci	vmsumudm	$out[0],$t3,$t4,$out[0]
287e1051a39Sopenharmony_ci
288e1051a39Sopenharmony_ci	xxpermdi	$t2,$in2[4],$in2[3],0b00
289e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[7],$in1[8],0b00
290e1051a39Sopenharmony_ci	vmsumudm	$out[2],$t1,$t2,$out[2]
291e1051a39Sopenharmony_ci
292e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[6],$in1[7],0b00
293e1051a39Sopenharmony_ci	vmsumudm	$out[1],$t1,$t2,$out[1]
294e1051a39Sopenharmony_ci	vmsumudm	$out[1],$in1[8],$in2[2],$out[1]
295e1051a39Sopenharmony_ci
296e1051a39Sopenharmony_ci	xxpermdi	$t1,$in1[5],$in1[6],0b00
297e1051a39Sopenharmony_ci	vmsumudm	$out[0],$t1,$t2,$out[0]
298e1051a39Sopenharmony_ci	xxpermdi	$t4,$in2[2],$in2[1],0b00
299e1051a39Sopenharmony_ci	xxpermdi	$t3,$in1[7],$in1[8],0b00
300e1051a39Sopenharmony_ci	vmsumudm	$out[0],$t3,$t4,$out[0]
301e1051a39Sopenharmony_ci
302e1051a39Sopenharmony_ci___
303e1051a39Sopenharmony_ci
304e1051a39Sopenharmony_ci		store_vrs($outp, \@out);
305e1051a39Sopenharmony_ci
306e1051a39Sopenharmony_ci		pop_vrs(52, 63);
307e1051a39Sopenharmony_ci
308e1051a39Sopenharmony_ci		endproc("p521_felem_mul");
309e1051a39Sopenharmony_ci	}
310e1051a39Sopenharmony_ci
311e1051a39Sopenharmony_ci	{
312e1051a39Sopenharmony_ci		#
313e1051a39Sopenharmony_ci		# p51_felem_square
314e1051a39Sopenharmony_ci		#
315e1051a39Sopenharmony_ci
316e1051a39Sopenharmony_ci		my ($inp) = ("r4");
317e1051a39Sopenharmony_ci		my @in = map("v$_",(45..53));
318e1051a39Sopenharmony_ci		my @inx2 = map("v$_",(35..43));
319e1051a39Sopenharmony_ci
320e1051a39Sopenharmony_ci		startproc("p521_felem_square");
321e1051a39Sopenharmony_ci
322e1051a39Sopenharmony_ci		push_vrs(52, 63);
323e1051a39Sopenharmony_ci
324e1051a39Sopenharmony_ci		$code.=<<___;
325e1051a39Sopenharmony_ci	vspltisw	$vzero,0
326e1051a39Sopenharmony_ci
327e1051a39Sopenharmony_ci___
328e1051a39Sopenharmony_ci
329e1051a39Sopenharmony_ci		load_vrs($inp, \@in);
330e1051a39Sopenharmony_ci
331e1051a39Sopenharmony_ci		$code.=<<___;
332e1051a39Sopenharmony_ci	li		$zero,0
333e1051a39Sopenharmony_ci	li		$one,1
334e1051a39Sopenharmony_ci	mtvsrdd		$t1,$one,$zero
335e1051a39Sopenharmony_ci___
336e1051a39Sopenharmony_ci
337e1051a39Sopenharmony_ci		for (my $i = 0; $i <= 8; $i++) {
338e1051a39Sopenharmony_ci			$code.=<<___;
339e1051a39Sopenharmony_ci	vsld		$inx2[$i],$in[$i],$t1
340e1051a39Sopenharmony_ci___
341e1051a39Sopenharmony_ci		}
342e1051a39Sopenharmony_ci
343e1051a39Sopenharmony_ci		$code.=<<___;
344e1051a39Sopenharmony_ci	vmsumudm	$out[0],$in[0],$in[0],$vzero
345e1051a39Sopenharmony_ci
346e1051a39Sopenharmony_ci	vmsumudm	$out[1],$in[0],$inx2[1],$vzero
347e1051a39Sopenharmony_ci
348e1051a39Sopenharmony_ci	xxpermdi	$t1,$in[0],$in[1],0b00
349e1051a39Sopenharmony_ci	xxpermdi	$t2,$inx2[2],$in[1],0b00
350e1051a39Sopenharmony_ci	vmsumudm	$out[2],$t1,$t2,$vzero
351e1051a39Sopenharmony_ci
352e1051a39Sopenharmony_ci	xxpermdi	$t2,$inx2[3],$inx2[2],0b00
353e1051a39Sopenharmony_ci	vmsumudm	$out[3],$t1,$t2,$vzero
354e1051a39Sopenharmony_ci
355e1051a39Sopenharmony_ci	xxpermdi	$t2,$inx2[4],$inx2[3],0b00
356e1051a39Sopenharmony_ci	vmsumudm	$out[4],$t1,$t2,$vzero
357e1051a39Sopenharmony_ci	vmsumudm	$out[4],$in[2],$in[2],$out[4]
358e1051a39Sopenharmony_ci
359e1051a39Sopenharmony_ci	xxpermdi	$t2,$inx2[5],$inx2[4],0b00
360e1051a39Sopenharmony_ci	vmsumudm	$out[5],$t1,$t2,$vzero
361e1051a39Sopenharmony_ci	vmsumudm	$out[5],$in[2],$inx2[3],$out[5]
362e1051a39Sopenharmony_ci
363e1051a39Sopenharmony_ci	xxpermdi	$t2,$inx2[6],$inx2[5],0b00
364e1051a39Sopenharmony_ci	vmsumudm	$out[6],$t1,$t2,$vzero
365e1051a39Sopenharmony_ci	xxpermdi	$t3,$in[2],$in[3],0b00
366e1051a39Sopenharmony_ci	xxpermdi	$t4,$inx2[4],$in[3],0b00
367e1051a39Sopenharmony_ci	vmsumudm	$out[6],$t3,$t4,$out[6]
368e1051a39Sopenharmony_ci
369e1051a39Sopenharmony_ci	xxpermdi	$t2,$inx2[7],$inx2[6],0b00
370e1051a39Sopenharmony_ci	vmsumudm	$out[7],$t1,$t2,$vzero
371e1051a39Sopenharmony_ci	xxpermdi	$t4,$inx2[5],$inx2[4],0b00
372e1051a39Sopenharmony_ci	vmsumudm	$out[7],$t3,$t4,$out[7]
373e1051a39Sopenharmony_ci
374e1051a39Sopenharmony_ci	xxpermdi	$t2,$inx2[8],$inx2[7],0b00
375e1051a39Sopenharmony_ci	vmsumudm	$out[8],$t1,$t2,$vzero
376e1051a39Sopenharmony_ci	xxpermdi	$t4,$inx2[6],$inx2[5],0b00
377e1051a39Sopenharmony_ci	vmsumudm	$out[8],$t3,$t4,$out[8]
378e1051a39Sopenharmony_ci	vmsumudm	$out[8],$in[4],$in[4],$out[8]
379e1051a39Sopenharmony_ci
380e1051a39Sopenharmony_ci	vmsumudm	$out[1],$in[5],$inx2[5],$out[1]
381e1051a39Sopenharmony_ci
382e1051a39Sopenharmony_ci	vmsumudm	$out[3],$in[6],$inx2[6],$out[3]
383e1051a39Sopenharmony_ci
384e1051a39Sopenharmony_ci	vmsumudm	$out[5],$in[7],$inx2[7],$out[5]
385e1051a39Sopenharmony_ci
386e1051a39Sopenharmony_ci	vmsumudm	$out[7],$in[8],$inx2[8],$out[7]
387e1051a39Sopenharmony_ci
388e1051a39Sopenharmony_ci	mtvsrdd		$t1,$one,$zero
389e1051a39Sopenharmony_ci___
390e1051a39Sopenharmony_ci
391e1051a39Sopenharmony_ci		for (my $i = 5; $i <= 8; $i++) {
392e1051a39Sopenharmony_ci			$code.=<<___;
393e1051a39Sopenharmony_ci	vsld		$inx2[$i],$inx2[$i],$t1
394e1051a39Sopenharmony_ci___
395e1051a39Sopenharmony_ci		}
396e1051a39Sopenharmony_ci
397e1051a39Sopenharmony_ci		$code.=<<___;
398e1051a39Sopenharmony_ci
399e1051a39Sopenharmony_ci	vmsumudm	$out[6],$in[7],$inx2[8],$out[6]
400e1051a39Sopenharmony_ci
401e1051a39Sopenharmony_ci	vmsumudm	$out[5],$in[6],$inx2[8],$out[5]
402e1051a39Sopenharmony_ci
403e1051a39Sopenharmony_ci	xxpermdi	$t2,$inx2[8],$inx2[7],0b00
404e1051a39Sopenharmony_ci	xxpermdi	$t1,$in[5],$in[6],0b00
405e1051a39Sopenharmony_ci	vmsumudm	$out[4],$t1,$t2,$out[4]
406e1051a39Sopenharmony_ci
407e1051a39Sopenharmony_ci	xxpermdi	$t1,$in[4],$in[5],0b00
408e1051a39Sopenharmony_ci	vmsumudm	$out[3],$t1,$t2,$out[3]
409e1051a39Sopenharmony_ci
410e1051a39Sopenharmony_ci	xxpermdi	$t1,$in[3],$in[4],0b00
411e1051a39Sopenharmony_ci	vmsumudm	$out[2],$t1,$t2,$out[2]
412e1051a39Sopenharmony_ci	vmsumudm	$out[2],$in[5],$inx2[6],$out[2]
413e1051a39Sopenharmony_ci
414e1051a39Sopenharmony_ci	xxpermdi	$t1,$in[2],$in[3],0b00
415e1051a39Sopenharmony_ci	vmsumudm	$out[1],$t1,$t2,$out[1]
416e1051a39Sopenharmony_ci	vmsumudm	$out[1],$in[4],$inx2[6],$out[1]
417e1051a39Sopenharmony_ci
418e1051a39Sopenharmony_ci	xxpermdi	$t1,$in[1],$in[2],0b00
419e1051a39Sopenharmony_ci	vmsumudm	$out[0],$t1,$t2,$out[0]
420e1051a39Sopenharmony_ci	xxpermdi	$t2,$inx2[6],$inx2[5],0b00
421e1051a39Sopenharmony_ci	xxpermdi	$t1,$in[3],$in[4],0b00
422e1051a39Sopenharmony_ci	vmsumudm	$out[0],$t1,$t2,$out[0]
423e1051a39Sopenharmony_ci
424e1051a39Sopenharmony_ci___
425e1051a39Sopenharmony_ci
426e1051a39Sopenharmony_ci		store_vrs($outp, \@out);
427e1051a39Sopenharmony_ci
428e1051a39Sopenharmony_ci		pop_vrs(52, 63);
429e1051a39Sopenharmony_ci
430e1051a39Sopenharmony_ci		endproc("p521_felem_square");
431e1051a39Sopenharmony_ci	}
432e1051a39Sopenharmony_ci}
433e1051a39Sopenharmony_ci
434e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem;
435e1051a39Sopenharmony_ciprint $code;
436e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
437