1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 10e1051a39Sopenharmony_cipush(@INC,"${dir}","${dir}../../perlasm"); 11e1051a39Sopenharmony_cirequire "x86asm.pl"; 12e1051a39Sopenharmony_ci 13e1051a39Sopenharmony_ci$output = pop and open STDOUT,">$output"; 14e1051a39Sopenharmony_ci 15e1051a39Sopenharmony_ci&asm_init($ARGV[0]); 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_ci&bn_mul_comba("bn_mul_comba8",8); 18e1051a39Sopenharmony_ci&bn_mul_comba("bn_mul_comba4",4); 19e1051a39Sopenharmony_ci&bn_sqr_comba("bn_sqr_comba8",8); 20e1051a39Sopenharmony_ci&bn_sqr_comba("bn_sqr_comba4",4); 21e1051a39Sopenharmony_ci 22e1051a39Sopenharmony_ci&asm_finish(); 23e1051a39Sopenharmony_ci 24e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 25e1051a39Sopenharmony_ci 26e1051a39Sopenharmony_cisub mul_add_c 27e1051a39Sopenharmony_ci { 28e1051a39Sopenharmony_ci local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 29e1051a39Sopenharmony_ci 30e1051a39Sopenharmony_ci # pos == -1 if eax and edx are pre-loaded, 0 to load from next 31e1051a39Sopenharmony_ci # words, and 1 if load return value 32e1051a39Sopenharmony_ci 33e1051a39Sopenharmony_ci &comment("mul a[$ai]*b[$bi]"); 34e1051a39Sopenharmony_ci 35e1051a39Sopenharmony_ci # "eax" and "edx" will always be pre-loaded. 36e1051a39Sopenharmony_ci # &mov("eax",&DWP($ai*4,$a,"",0)) ; 37e1051a39Sopenharmony_ci # &mov("edx",&DWP($bi*4,$b,"",0)); 38e1051a39Sopenharmony_ci 39e1051a39Sopenharmony_ci &mul("edx"); 40e1051a39Sopenharmony_ci &add($c0,"eax"); 41e1051a39Sopenharmony_ci &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 42e1051a39Sopenharmony_ci &mov("eax",&wparam(0)) if $pos > 0; # load r[] 43e1051a39Sopenharmony_ci ### 44e1051a39Sopenharmony_ci &adc($c1,"edx"); 45e1051a39Sopenharmony_ci &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # load next b 46e1051a39Sopenharmony_ci &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # load next b 47e1051a39Sopenharmony_ci ### 48e1051a39Sopenharmony_ci &adc($c2,0); 49e1051a39Sopenharmony_ci # is pos > 1, it means it is the last loop 50e1051a39Sopenharmony_ci &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; 51e1051a39Sopenharmony_ci &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next a 52e1051a39Sopenharmony_ci } 53e1051a39Sopenharmony_ci 54e1051a39Sopenharmony_cisub sqr_add_c 55e1051a39Sopenharmony_ci { 56e1051a39Sopenharmony_ci local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 57e1051a39Sopenharmony_ci 58e1051a39Sopenharmony_ci # pos == -1 if eax and edx are pre-loaded, 0 to load from next 59e1051a39Sopenharmony_ci # words, and 1 if load return value 60e1051a39Sopenharmony_ci 61e1051a39Sopenharmony_ci &comment("sqr a[$ai]*a[$bi]"); 62e1051a39Sopenharmony_ci 63e1051a39Sopenharmony_ci # "eax" and "edx" will always be pre-loaded. 64e1051a39Sopenharmony_ci # &mov("eax",&DWP($ai*4,$a,"",0)) ; 65e1051a39Sopenharmony_ci # &mov("edx",&DWP($bi*4,$b,"",0)); 66e1051a39Sopenharmony_ci 67e1051a39Sopenharmony_ci if ($ai == $bi) 68e1051a39Sopenharmony_ci { &mul("eax");} 69e1051a39Sopenharmony_ci else 70e1051a39Sopenharmony_ci { &mul("edx");} 71e1051a39Sopenharmony_ci &add($c0,"eax"); 72e1051a39Sopenharmony_ci &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 73e1051a39Sopenharmony_ci ### 74e1051a39Sopenharmony_ci &adc($c1,"edx"); 75e1051a39Sopenharmony_ci &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); 76e1051a39Sopenharmony_ci ### 77e1051a39Sopenharmony_ci &adc($c2,0); 78e1051a39Sopenharmony_ci # is pos > 1, it means it is the last loop 79e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 80e1051a39Sopenharmony_ci &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 81e1051a39Sopenharmony_ci } 82e1051a39Sopenharmony_ci 83e1051a39Sopenharmony_cisub sqr_add_c2 84e1051a39Sopenharmony_ci { 85e1051a39Sopenharmony_ci local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 86e1051a39Sopenharmony_ci 87e1051a39Sopenharmony_ci # pos == -1 if eax and edx are pre-loaded, 0 to load from next 88e1051a39Sopenharmony_ci # words, and 1 if load return value 89e1051a39Sopenharmony_ci 90e1051a39Sopenharmony_ci &comment("sqr a[$ai]*a[$bi]"); 91e1051a39Sopenharmony_ci 92e1051a39Sopenharmony_ci # "eax" and "edx" will always be pre-loaded. 93e1051a39Sopenharmony_ci # &mov("eax",&DWP($ai*4,$a,"",0)) ; 94e1051a39Sopenharmony_ci # &mov("edx",&DWP($bi*4,$a,"",0)); 95e1051a39Sopenharmony_ci 96e1051a39Sopenharmony_ci if ($ai == $bi) 97e1051a39Sopenharmony_ci { &mul("eax");} 98e1051a39Sopenharmony_ci else 99e1051a39Sopenharmony_ci { &mul("edx");} 100e1051a39Sopenharmony_ci &add("eax","eax"); 101e1051a39Sopenharmony_ci ### 102e1051a39Sopenharmony_ci &adc("edx","edx"); 103e1051a39Sopenharmony_ci ### 104e1051a39Sopenharmony_ci &adc($c2,0); 105e1051a39Sopenharmony_ci &add($c0,"eax"); 106e1051a39Sopenharmony_ci &adc($c1,"edx"); 107e1051a39Sopenharmony_ci &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 108e1051a39Sopenharmony_ci &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 109e1051a39Sopenharmony_ci &adc($c2,0); 110e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 111e1051a39Sopenharmony_ci &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); 112e1051a39Sopenharmony_ci ### 113e1051a39Sopenharmony_ci } 114e1051a39Sopenharmony_ci 115e1051a39Sopenharmony_cisub bn_mul_comba 116e1051a39Sopenharmony_ci { 117e1051a39Sopenharmony_ci local($name,$num)=@_; 118e1051a39Sopenharmony_ci local($a,$b,$c0,$c1,$c2); 119e1051a39Sopenharmony_ci local($i,$as,$ae,$bs,$be,$ai,$bi); 120e1051a39Sopenharmony_ci local($tot,$end); 121e1051a39Sopenharmony_ci 122e1051a39Sopenharmony_ci &function_begin_B($name,""); 123e1051a39Sopenharmony_ci 124e1051a39Sopenharmony_ci $c0="ebx"; 125e1051a39Sopenharmony_ci $c1="ecx"; 126e1051a39Sopenharmony_ci $c2="ebp"; 127e1051a39Sopenharmony_ci $a="esi"; 128e1051a39Sopenharmony_ci $b="edi"; 129e1051a39Sopenharmony_ci 130e1051a39Sopenharmony_ci $as=0; 131e1051a39Sopenharmony_ci $ae=0; 132e1051a39Sopenharmony_ci $bs=0; 133e1051a39Sopenharmony_ci $be=0; 134e1051a39Sopenharmony_ci $tot=$num+$num-1; 135e1051a39Sopenharmony_ci 136e1051a39Sopenharmony_ci &push("esi"); 137e1051a39Sopenharmony_ci &mov($a,&wparam(1)); 138e1051a39Sopenharmony_ci &push("edi"); 139e1051a39Sopenharmony_ci &mov($b,&wparam(2)); 140e1051a39Sopenharmony_ci &push("ebp"); 141e1051a39Sopenharmony_ci &push("ebx"); 142e1051a39Sopenharmony_ci 143e1051a39Sopenharmony_ci &xor($c0,$c0); 144e1051a39Sopenharmony_ci &mov("eax",&DWP(0,$a,"",0)); # load the first word 145e1051a39Sopenharmony_ci &xor($c1,$c1); 146e1051a39Sopenharmony_ci &mov("edx",&DWP(0,$b,"",0)); # load the first second 147e1051a39Sopenharmony_ci 148e1051a39Sopenharmony_ci for ($i=0; $i<$tot; $i++) 149e1051a39Sopenharmony_ci { 150e1051a39Sopenharmony_ci $ai=$as; 151e1051a39Sopenharmony_ci $bi=$bs; 152e1051a39Sopenharmony_ci $end=$be+1; 153e1051a39Sopenharmony_ci 154e1051a39Sopenharmony_ci &comment("################## Calculate word $i"); 155e1051a39Sopenharmony_ci 156e1051a39Sopenharmony_ci for ($j=$bs; $j<$end; $j++) 157e1051a39Sopenharmony_ci { 158e1051a39Sopenharmony_ci &xor($c2,$c2) if ($j == $bs); 159e1051a39Sopenharmony_ci if (($j+1) == $end) 160e1051a39Sopenharmony_ci { 161e1051a39Sopenharmony_ci $v=1; 162e1051a39Sopenharmony_ci $v=2 if (($i+1) == $tot); 163e1051a39Sopenharmony_ci } 164e1051a39Sopenharmony_ci else 165e1051a39Sopenharmony_ci { $v=0; } 166e1051a39Sopenharmony_ci if (($j+1) != $end) 167e1051a39Sopenharmony_ci { 168e1051a39Sopenharmony_ci $na=($ai-1); 169e1051a39Sopenharmony_ci $nb=($bi+1); 170e1051a39Sopenharmony_ci } 171e1051a39Sopenharmony_ci else 172e1051a39Sopenharmony_ci { 173e1051a39Sopenharmony_ci $na=$as+($i < ($num-1)); 174e1051a39Sopenharmony_ci $nb=$bs+($i >= ($num-1)); 175e1051a39Sopenharmony_ci } 176e1051a39Sopenharmony_ci#printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; 177e1051a39Sopenharmony_ci &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); 178e1051a39Sopenharmony_ci if ($v) 179e1051a39Sopenharmony_ci { 180e1051a39Sopenharmony_ci &comment("saved r[$i]"); 181e1051a39Sopenharmony_ci # &mov("eax",&wparam(0)); 182e1051a39Sopenharmony_ci # &mov(&DWP($i*4,"eax","",0),$c0); 183e1051a39Sopenharmony_ci ($c0,$c1,$c2)=($c1,$c2,$c0); 184e1051a39Sopenharmony_ci } 185e1051a39Sopenharmony_ci $ai--; 186e1051a39Sopenharmony_ci $bi++; 187e1051a39Sopenharmony_ci } 188e1051a39Sopenharmony_ci $as++ if ($i < ($num-1)); 189e1051a39Sopenharmony_ci $ae++ if ($i >= ($num-1)); 190e1051a39Sopenharmony_ci 191e1051a39Sopenharmony_ci $bs++ if ($i >= ($num-1)); 192e1051a39Sopenharmony_ci $be++ if ($i < ($num-1)); 193e1051a39Sopenharmony_ci } 194e1051a39Sopenharmony_ci &comment("save r[$i]"); 195e1051a39Sopenharmony_ci # &mov("eax",&wparam(0)); 196e1051a39Sopenharmony_ci &mov(&DWP($i*4,"eax","",0),$c0); 197e1051a39Sopenharmony_ci 198e1051a39Sopenharmony_ci &pop("ebx"); 199e1051a39Sopenharmony_ci &pop("ebp"); 200e1051a39Sopenharmony_ci &pop("edi"); 201e1051a39Sopenharmony_ci &pop("esi"); 202e1051a39Sopenharmony_ci &ret(); 203e1051a39Sopenharmony_ci &function_end_B($name); 204e1051a39Sopenharmony_ci } 205e1051a39Sopenharmony_ci 206e1051a39Sopenharmony_cisub bn_sqr_comba 207e1051a39Sopenharmony_ci { 208e1051a39Sopenharmony_ci local($name,$num)=@_; 209e1051a39Sopenharmony_ci local($r,$a,$c0,$c1,$c2)=@_; 210e1051a39Sopenharmony_ci local($i,$as,$ae,$bs,$be,$ai,$bi); 211e1051a39Sopenharmony_ci local($b,$tot,$end,$half); 212e1051a39Sopenharmony_ci 213e1051a39Sopenharmony_ci &function_begin_B($name,""); 214e1051a39Sopenharmony_ci 215e1051a39Sopenharmony_ci $c0="ebx"; 216e1051a39Sopenharmony_ci $c1="ecx"; 217e1051a39Sopenharmony_ci $c2="ebp"; 218e1051a39Sopenharmony_ci $a="esi"; 219e1051a39Sopenharmony_ci $r="edi"; 220e1051a39Sopenharmony_ci 221e1051a39Sopenharmony_ci &push("esi"); 222e1051a39Sopenharmony_ci &push("edi"); 223e1051a39Sopenharmony_ci &push("ebp"); 224e1051a39Sopenharmony_ci &push("ebx"); 225e1051a39Sopenharmony_ci &mov($r,&wparam(0)); 226e1051a39Sopenharmony_ci &mov($a,&wparam(1)); 227e1051a39Sopenharmony_ci &xor($c0,$c0); 228e1051a39Sopenharmony_ci &xor($c1,$c1); 229e1051a39Sopenharmony_ci &mov("eax",&DWP(0,$a,"",0)); # load the first word 230e1051a39Sopenharmony_ci 231e1051a39Sopenharmony_ci $as=0; 232e1051a39Sopenharmony_ci $ae=0; 233e1051a39Sopenharmony_ci $bs=0; 234e1051a39Sopenharmony_ci $be=0; 235e1051a39Sopenharmony_ci $tot=$num+$num-1; 236e1051a39Sopenharmony_ci 237e1051a39Sopenharmony_ci for ($i=0; $i<$tot; $i++) 238e1051a39Sopenharmony_ci { 239e1051a39Sopenharmony_ci $ai=$as; 240e1051a39Sopenharmony_ci $bi=$bs; 241e1051a39Sopenharmony_ci $end=$be+1; 242e1051a39Sopenharmony_ci 243e1051a39Sopenharmony_ci &comment("############### Calculate word $i"); 244e1051a39Sopenharmony_ci for ($j=$bs; $j<$end; $j++) 245e1051a39Sopenharmony_ci { 246e1051a39Sopenharmony_ci &xor($c2,$c2) if ($j == $bs); 247e1051a39Sopenharmony_ci if (($ai-1) < ($bi+1)) 248e1051a39Sopenharmony_ci { 249e1051a39Sopenharmony_ci $v=1; 250e1051a39Sopenharmony_ci $v=2 if ($i+1) == $tot; 251e1051a39Sopenharmony_ci } 252e1051a39Sopenharmony_ci else 253e1051a39Sopenharmony_ci { $v=0; } 254e1051a39Sopenharmony_ci if (!$v) 255e1051a39Sopenharmony_ci { 256e1051a39Sopenharmony_ci $na=$ai-1; 257e1051a39Sopenharmony_ci $nb=$bi+1; 258e1051a39Sopenharmony_ci } 259e1051a39Sopenharmony_ci else 260e1051a39Sopenharmony_ci { 261e1051a39Sopenharmony_ci $na=$as+($i < ($num-1)); 262e1051a39Sopenharmony_ci $nb=$bs+($i >= ($num-1)); 263e1051a39Sopenharmony_ci } 264e1051a39Sopenharmony_ci if ($ai == $bi) 265e1051a39Sopenharmony_ci { 266e1051a39Sopenharmony_ci &sqr_add_c($r,$a,$ai,$bi, 267e1051a39Sopenharmony_ci $c0,$c1,$c2,$v,$i,$na,$nb); 268e1051a39Sopenharmony_ci } 269e1051a39Sopenharmony_ci else 270e1051a39Sopenharmony_ci { 271e1051a39Sopenharmony_ci &sqr_add_c2($r,$a,$ai,$bi, 272e1051a39Sopenharmony_ci $c0,$c1,$c2,$v,$i,$na,$nb); 273e1051a39Sopenharmony_ci } 274e1051a39Sopenharmony_ci if ($v) 275e1051a39Sopenharmony_ci { 276e1051a39Sopenharmony_ci &comment("saved r[$i]"); 277e1051a39Sopenharmony_ci #&mov(&DWP($i*4,$r,"",0),$c0); 278e1051a39Sopenharmony_ci ($c0,$c1,$c2)=($c1,$c2,$c0); 279e1051a39Sopenharmony_ci last; 280e1051a39Sopenharmony_ci } 281e1051a39Sopenharmony_ci $ai--; 282e1051a39Sopenharmony_ci $bi++; 283e1051a39Sopenharmony_ci } 284e1051a39Sopenharmony_ci $as++ if ($i < ($num-1)); 285e1051a39Sopenharmony_ci $ae++ if ($i >= ($num-1)); 286e1051a39Sopenharmony_ci 287e1051a39Sopenharmony_ci $bs++ if ($i >= ($num-1)); 288e1051a39Sopenharmony_ci $be++ if ($i < ($num-1)); 289e1051a39Sopenharmony_ci } 290e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$c0); 291e1051a39Sopenharmony_ci &pop("ebx"); 292e1051a39Sopenharmony_ci &pop("ebp"); 293e1051a39Sopenharmony_ci &pop("edi"); 294e1051a39Sopenharmony_ci &pop("esi"); 295e1051a39Sopenharmony_ci &ret(); 296e1051a39Sopenharmony_ci &function_end_B($name); 297e1051a39Sopenharmony_ci } 298