1e1051a39Sopenharmony_ci#! /usr/bin/env perl
2e1051a39Sopenharmony_ci# Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci#
4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci# this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci
9e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10e1051a39Sopenharmony_cipush(@INC,"${dir}","${dir}../../perlasm");
11e1051a39Sopenharmony_cirequire "x86asm.pl";
12e1051a39Sopenharmony_ci
13e1051a39Sopenharmony_ci$output = pop and open STDOUT,">$output";
14e1051a39Sopenharmony_ci
15e1051a39Sopenharmony_ci&asm_init($ARGV[0]);
16e1051a39Sopenharmony_ci
17e1051a39Sopenharmony_ci&bn_mul_comba("bn_mul_comba8",8);
18e1051a39Sopenharmony_ci&bn_mul_comba("bn_mul_comba4",4);
19e1051a39Sopenharmony_ci&bn_sqr_comba("bn_sqr_comba8",8);
20e1051a39Sopenharmony_ci&bn_sqr_comba("bn_sqr_comba4",4);
21e1051a39Sopenharmony_ci
22e1051a39Sopenharmony_ci&asm_finish();
23e1051a39Sopenharmony_ci
24e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!";
25e1051a39Sopenharmony_ci
26e1051a39Sopenharmony_cisub mul_add_c
27e1051a39Sopenharmony_ci	{
28e1051a39Sopenharmony_ci	local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
29e1051a39Sopenharmony_ci
30e1051a39Sopenharmony_ci	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
31e1051a39Sopenharmony_ci	# words, and 1 if load return value
32e1051a39Sopenharmony_ci
33e1051a39Sopenharmony_ci	&comment("mul a[$ai]*b[$bi]");
34e1051a39Sopenharmony_ci
35e1051a39Sopenharmony_ci	# "eax" and "edx" will always be pre-loaded.
36e1051a39Sopenharmony_ci	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
37e1051a39Sopenharmony_ci	# &mov("edx",&DWP($bi*4,$b,"",0));
38e1051a39Sopenharmony_ci
39e1051a39Sopenharmony_ci	&mul("edx");
40e1051a39Sopenharmony_ci	&add($c0,"eax");
41e1051a39Sopenharmony_ci	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# load next a
42e1051a39Sopenharmony_ci	 &mov("eax",&wparam(0)) if $pos > 0;			# load r[]
43e1051a39Sopenharmony_ci	 ###
44e1051a39Sopenharmony_ci	&adc($c1,"edx");
45e1051a39Sopenharmony_ci	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;	# load next b
46e1051a39Sopenharmony_ci	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;	# load next b
47e1051a39Sopenharmony_ci	 ###
48e1051a39Sopenharmony_ci	&adc($c2,0);
49e1051a39Sopenharmony_ci	 # is pos > 1, it means it is the last loop
50e1051a39Sopenharmony_ci	 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;		# save r[];
51e1051a39Sopenharmony_ci	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# load next a
52e1051a39Sopenharmony_ci	}
53e1051a39Sopenharmony_ci
54e1051a39Sopenharmony_cisub sqr_add_c
55e1051a39Sopenharmony_ci	{
56e1051a39Sopenharmony_ci	local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
57e1051a39Sopenharmony_ci
58e1051a39Sopenharmony_ci	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
59e1051a39Sopenharmony_ci	# words, and 1 if load return value
60e1051a39Sopenharmony_ci
61e1051a39Sopenharmony_ci	&comment("sqr a[$ai]*a[$bi]");
62e1051a39Sopenharmony_ci
63e1051a39Sopenharmony_ci	# "eax" and "edx" will always be pre-loaded.
64e1051a39Sopenharmony_ci	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
65e1051a39Sopenharmony_ci	# &mov("edx",&DWP($bi*4,$b,"",0));
66e1051a39Sopenharmony_ci
67e1051a39Sopenharmony_ci	if ($ai == $bi)
68e1051a39Sopenharmony_ci		{ &mul("eax");}
69e1051a39Sopenharmony_ci	else
70e1051a39Sopenharmony_ci		{ &mul("edx");}
71e1051a39Sopenharmony_ci	&add($c0,"eax");
72e1051a39Sopenharmony_ci	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# load next a
73e1051a39Sopenharmony_ci	 ###
74e1051a39Sopenharmony_ci	&adc($c1,"edx");
75e1051a39Sopenharmony_ci	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
76e1051a39Sopenharmony_ci	 ###
77e1051a39Sopenharmony_ci	&adc($c2,0);
78e1051a39Sopenharmony_ci	 # is pos > 1, it means it is the last loop
79e1051a39Sopenharmony_ci	 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
80e1051a39Sopenharmony_ci	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# load next b
81e1051a39Sopenharmony_ci	}
82e1051a39Sopenharmony_ci
83e1051a39Sopenharmony_cisub sqr_add_c2
84e1051a39Sopenharmony_ci	{
85e1051a39Sopenharmony_ci	local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
86e1051a39Sopenharmony_ci
87e1051a39Sopenharmony_ci	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
88e1051a39Sopenharmony_ci	# words, and 1 if load return value
89e1051a39Sopenharmony_ci
90e1051a39Sopenharmony_ci	&comment("sqr a[$ai]*a[$bi]");
91e1051a39Sopenharmony_ci
92e1051a39Sopenharmony_ci	# "eax" and "edx" will always be pre-loaded.
93e1051a39Sopenharmony_ci	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
94e1051a39Sopenharmony_ci	# &mov("edx",&DWP($bi*4,$a,"",0));
95e1051a39Sopenharmony_ci
96e1051a39Sopenharmony_ci	if ($ai == $bi)
97e1051a39Sopenharmony_ci		{ &mul("eax");}
98e1051a39Sopenharmony_ci	else
99e1051a39Sopenharmony_ci		{ &mul("edx");}
100e1051a39Sopenharmony_ci	&add("eax","eax");
101e1051a39Sopenharmony_ci	 ###
102e1051a39Sopenharmony_ci	&adc("edx","edx");
103e1051a39Sopenharmony_ci	 ###
104e1051a39Sopenharmony_ci	&adc($c2,0);
105e1051a39Sopenharmony_ci	 &add($c0,"eax");
106e1051a39Sopenharmony_ci	&adc($c1,"edx");
107e1051a39Sopenharmony_ci	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# load next a
108e1051a39Sopenharmony_ci	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;	# load next b
109e1051a39Sopenharmony_ci	&adc($c2,0);
110e1051a39Sopenharmony_ci	&mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
111e1051a39Sopenharmony_ci	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
112e1051a39Sopenharmony_ci	 ###
113e1051a39Sopenharmony_ci	}
114e1051a39Sopenharmony_ci
115e1051a39Sopenharmony_cisub bn_mul_comba
116e1051a39Sopenharmony_ci	{
117e1051a39Sopenharmony_ci	local($name,$num)=@_;
118e1051a39Sopenharmony_ci	local($a,$b,$c0,$c1,$c2);
119e1051a39Sopenharmony_ci	local($i,$as,$ae,$bs,$be,$ai,$bi);
120e1051a39Sopenharmony_ci	local($tot,$end);
121e1051a39Sopenharmony_ci
122e1051a39Sopenharmony_ci	&function_begin_B($name,"");
123e1051a39Sopenharmony_ci
124e1051a39Sopenharmony_ci	$c0="ebx";
125e1051a39Sopenharmony_ci	$c1="ecx";
126e1051a39Sopenharmony_ci	$c2="ebp";
127e1051a39Sopenharmony_ci	$a="esi";
128e1051a39Sopenharmony_ci	$b="edi";
129e1051a39Sopenharmony_ci
130e1051a39Sopenharmony_ci	$as=0;
131e1051a39Sopenharmony_ci	$ae=0;
132e1051a39Sopenharmony_ci	$bs=0;
133e1051a39Sopenharmony_ci	$be=0;
134e1051a39Sopenharmony_ci	$tot=$num+$num-1;
135e1051a39Sopenharmony_ci
136e1051a39Sopenharmony_ci	&push("esi");
137e1051a39Sopenharmony_ci	 &mov($a,&wparam(1));
138e1051a39Sopenharmony_ci	&push("edi");
139e1051a39Sopenharmony_ci	 &mov($b,&wparam(2));
140e1051a39Sopenharmony_ci	&push("ebp");
141e1051a39Sopenharmony_ci	 &push("ebx");
142e1051a39Sopenharmony_ci
143e1051a39Sopenharmony_ci	&xor($c0,$c0);
144e1051a39Sopenharmony_ci	 &mov("eax",&DWP(0,$a,"",0));	# load the first word
145e1051a39Sopenharmony_ci	&xor($c1,$c1);
146e1051a39Sopenharmony_ci	 &mov("edx",&DWP(0,$b,"",0));	# load the first second
147e1051a39Sopenharmony_ci
148e1051a39Sopenharmony_ci	for ($i=0; $i<$tot; $i++)
149e1051a39Sopenharmony_ci		{
150e1051a39Sopenharmony_ci		$ai=$as;
151e1051a39Sopenharmony_ci		$bi=$bs;
152e1051a39Sopenharmony_ci		$end=$be+1;
153e1051a39Sopenharmony_ci
154e1051a39Sopenharmony_ci		&comment("################## Calculate word $i");
155e1051a39Sopenharmony_ci
156e1051a39Sopenharmony_ci		for ($j=$bs; $j<$end; $j++)
157e1051a39Sopenharmony_ci			{
158e1051a39Sopenharmony_ci			&xor($c2,$c2) if ($j == $bs);
159e1051a39Sopenharmony_ci			if (($j+1) == $end)
160e1051a39Sopenharmony_ci				{
161e1051a39Sopenharmony_ci				$v=1;
162e1051a39Sopenharmony_ci				$v=2 if (($i+1) == $tot);
163e1051a39Sopenharmony_ci				}
164e1051a39Sopenharmony_ci			else
165e1051a39Sopenharmony_ci				{ $v=0; }
166e1051a39Sopenharmony_ci			if (($j+1) != $end)
167e1051a39Sopenharmony_ci				{
168e1051a39Sopenharmony_ci				$na=($ai-1);
169e1051a39Sopenharmony_ci				$nb=($bi+1);
170e1051a39Sopenharmony_ci				}
171e1051a39Sopenharmony_ci			else
172e1051a39Sopenharmony_ci				{
173e1051a39Sopenharmony_ci				$na=$as+($i < ($num-1));
174e1051a39Sopenharmony_ci				$nb=$bs+($i >= ($num-1));
175e1051a39Sopenharmony_ci				}
176e1051a39Sopenharmony_ci#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
177e1051a39Sopenharmony_ci			&mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
178e1051a39Sopenharmony_ci			if ($v)
179e1051a39Sopenharmony_ci				{
180e1051a39Sopenharmony_ci				&comment("saved r[$i]");
181e1051a39Sopenharmony_ci				# &mov("eax",&wparam(0));
182e1051a39Sopenharmony_ci				# &mov(&DWP($i*4,"eax","",0),$c0);
183e1051a39Sopenharmony_ci				($c0,$c1,$c2)=($c1,$c2,$c0);
184e1051a39Sopenharmony_ci				}
185e1051a39Sopenharmony_ci			$ai--;
186e1051a39Sopenharmony_ci			$bi++;
187e1051a39Sopenharmony_ci			}
188e1051a39Sopenharmony_ci		$as++ if ($i < ($num-1));
189e1051a39Sopenharmony_ci		$ae++ if ($i >= ($num-1));
190e1051a39Sopenharmony_ci
191e1051a39Sopenharmony_ci		$bs++ if ($i >= ($num-1));
192e1051a39Sopenharmony_ci		$be++ if ($i < ($num-1));
193e1051a39Sopenharmony_ci		}
194e1051a39Sopenharmony_ci	&comment("save r[$i]");
195e1051a39Sopenharmony_ci	# &mov("eax",&wparam(0));
196e1051a39Sopenharmony_ci	&mov(&DWP($i*4,"eax","",0),$c0);
197e1051a39Sopenharmony_ci
198e1051a39Sopenharmony_ci	&pop("ebx");
199e1051a39Sopenharmony_ci	&pop("ebp");
200e1051a39Sopenharmony_ci	&pop("edi");
201e1051a39Sopenharmony_ci	&pop("esi");
202e1051a39Sopenharmony_ci	&ret();
203e1051a39Sopenharmony_ci	&function_end_B($name);
204e1051a39Sopenharmony_ci	}
205e1051a39Sopenharmony_ci
206e1051a39Sopenharmony_cisub bn_sqr_comba
207e1051a39Sopenharmony_ci	{
208e1051a39Sopenharmony_ci	local($name,$num)=@_;
209e1051a39Sopenharmony_ci	local($r,$a,$c0,$c1,$c2)=@_;
210e1051a39Sopenharmony_ci	local($i,$as,$ae,$bs,$be,$ai,$bi);
211e1051a39Sopenharmony_ci	local($b,$tot,$end,$half);
212e1051a39Sopenharmony_ci
213e1051a39Sopenharmony_ci	&function_begin_B($name,"");
214e1051a39Sopenharmony_ci
215e1051a39Sopenharmony_ci	$c0="ebx";
216e1051a39Sopenharmony_ci	$c1="ecx";
217e1051a39Sopenharmony_ci	$c2="ebp";
218e1051a39Sopenharmony_ci	$a="esi";
219e1051a39Sopenharmony_ci	$r="edi";
220e1051a39Sopenharmony_ci
221e1051a39Sopenharmony_ci	&push("esi");
222e1051a39Sopenharmony_ci	 &push("edi");
223e1051a39Sopenharmony_ci	&push("ebp");
224e1051a39Sopenharmony_ci	 &push("ebx");
225e1051a39Sopenharmony_ci	&mov($r,&wparam(0));
226e1051a39Sopenharmony_ci	 &mov($a,&wparam(1));
227e1051a39Sopenharmony_ci	&xor($c0,$c0);
228e1051a39Sopenharmony_ci	 &xor($c1,$c1);
229e1051a39Sopenharmony_ci	&mov("eax",&DWP(0,$a,"",0)); # load the first word
230e1051a39Sopenharmony_ci
231e1051a39Sopenharmony_ci	$as=0;
232e1051a39Sopenharmony_ci	$ae=0;
233e1051a39Sopenharmony_ci	$bs=0;
234e1051a39Sopenharmony_ci	$be=0;
235e1051a39Sopenharmony_ci	$tot=$num+$num-1;
236e1051a39Sopenharmony_ci
237e1051a39Sopenharmony_ci	for ($i=0; $i<$tot; $i++)
238e1051a39Sopenharmony_ci		{
239e1051a39Sopenharmony_ci		$ai=$as;
240e1051a39Sopenharmony_ci		$bi=$bs;
241e1051a39Sopenharmony_ci		$end=$be+1;
242e1051a39Sopenharmony_ci
243e1051a39Sopenharmony_ci		&comment("############### Calculate word $i");
244e1051a39Sopenharmony_ci		for ($j=$bs; $j<$end; $j++)
245e1051a39Sopenharmony_ci			{
246e1051a39Sopenharmony_ci			&xor($c2,$c2) if ($j == $bs);
247e1051a39Sopenharmony_ci			if (($ai-1) < ($bi+1))
248e1051a39Sopenharmony_ci				{
249e1051a39Sopenharmony_ci				$v=1;
250e1051a39Sopenharmony_ci				$v=2 if ($i+1) == $tot;
251e1051a39Sopenharmony_ci				}
252e1051a39Sopenharmony_ci			else
253e1051a39Sopenharmony_ci				{ $v=0; }
254e1051a39Sopenharmony_ci			if (!$v)
255e1051a39Sopenharmony_ci				{
256e1051a39Sopenharmony_ci				$na=$ai-1;
257e1051a39Sopenharmony_ci				$nb=$bi+1;
258e1051a39Sopenharmony_ci				}
259e1051a39Sopenharmony_ci			else
260e1051a39Sopenharmony_ci				{
261e1051a39Sopenharmony_ci				$na=$as+($i < ($num-1));
262e1051a39Sopenharmony_ci				$nb=$bs+($i >= ($num-1));
263e1051a39Sopenharmony_ci				}
264e1051a39Sopenharmony_ci			if ($ai == $bi)
265e1051a39Sopenharmony_ci				{
266e1051a39Sopenharmony_ci				&sqr_add_c($r,$a,$ai,$bi,
267e1051a39Sopenharmony_ci					$c0,$c1,$c2,$v,$i,$na,$nb);
268e1051a39Sopenharmony_ci				}
269e1051a39Sopenharmony_ci			else
270e1051a39Sopenharmony_ci				{
271e1051a39Sopenharmony_ci				&sqr_add_c2($r,$a,$ai,$bi,
272e1051a39Sopenharmony_ci					$c0,$c1,$c2,$v,$i,$na,$nb);
273e1051a39Sopenharmony_ci				}
274e1051a39Sopenharmony_ci			if ($v)
275e1051a39Sopenharmony_ci				{
276e1051a39Sopenharmony_ci				&comment("saved r[$i]");
277e1051a39Sopenharmony_ci				#&mov(&DWP($i*4,$r,"",0),$c0);
278e1051a39Sopenharmony_ci				($c0,$c1,$c2)=($c1,$c2,$c0);
279e1051a39Sopenharmony_ci				last;
280e1051a39Sopenharmony_ci				}
281e1051a39Sopenharmony_ci			$ai--;
282e1051a39Sopenharmony_ci			$bi++;
283e1051a39Sopenharmony_ci			}
284e1051a39Sopenharmony_ci		$as++ if ($i < ($num-1));
285e1051a39Sopenharmony_ci		$ae++ if ($i >= ($num-1));
286e1051a39Sopenharmony_ci
287e1051a39Sopenharmony_ci		$bs++ if ($i >= ($num-1));
288e1051a39Sopenharmony_ci		$be++ if ($i < ($num-1));
289e1051a39Sopenharmony_ci		}
290e1051a39Sopenharmony_ci	&mov(&DWP($i*4,$r,"",0),$c0);
291e1051a39Sopenharmony_ci	&pop("ebx");
292e1051a39Sopenharmony_ci	&pop("ebp");
293e1051a39Sopenharmony_ci	&pop("edi");
294e1051a39Sopenharmony_ci	&pop("esi");
295e1051a39Sopenharmony_ci	&ret();
296e1051a39Sopenharmony_ci	&function_end_B($name);
297e1051a39Sopenharmony_ci	}
298