1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 11e1051a39Sopenharmony_cipush(@INC,"${dir}","${dir}../../perlasm"); 12e1051a39Sopenharmony_cirequire "x86asm.pl"; 13e1051a39Sopenharmony_ci 14e1051a39Sopenharmony_ci$output = pop and open STDOUT,">$output"; 15e1051a39Sopenharmony_ci 16e1051a39Sopenharmony_ci&asm_init($ARGV[0]); 17e1051a39Sopenharmony_ci 18e1051a39Sopenharmony_ci$sse2=0; 19e1051a39Sopenharmony_cifor (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 20e1051a39Sopenharmony_ci 21e1051a39Sopenharmony_ci&external_label("OPENSSL_ia32cap_P") if ($sse2); 22e1051a39Sopenharmony_ci 23e1051a39Sopenharmony_ci&bn_mul_add_words("bn_mul_add_words"); 24e1051a39Sopenharmony_ci&bn_mul_words("bn_mul_words"); 25e1051a39Sopenharmony_ci&bn_sqr_words("bn_sqr_words"); 26e1051a39Sopenharmony_ci&bn_div_words("bn_div_words"); 27e1051a39Sopenharmony_ci&bn_add_words("bn_add_words"); 28e1051a39Sopenharmony_ci&bn_sub_words("bn_sub_words"); 29e1051a39Sopenharmony_ci&bn_sub_part_words("bn_sub_part_words"); 30e1051a39Sopenharmony_ci 31e1051a39Sopenharmony_ci&asm_finish(); 32e1051a39Sopenharmony_ci 33e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 34e1051a39Sopenharmony_ci 35e1051a39Sopenharmony_cisub bn_mul_add_words 36e1051a39Sopenharmony_ci { 37e1051a39Sopenharmony_ci local($name)=@_; 38e1051a39Sopenharmony_ci 39e1051a39Sopenharmony_ci &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 40e1051a39Sopenharmony_ci 41e1051a39Sopenharmony_ci $r="eax"; 42e1051a39Sopenharmony_ci $a="edx"; 43e1051a39Sopenharmony_ci $c="ecx"; 44e1051a39Sopenharmony_ci 45e1051a39Sopenharmony_ci if ($sse2) { 46e1051a39Sopenharmony_ci &picmeup("eax","OPENSSL_ia32cap_P"); 47e1051a39Sopenharmony_ci &bt(&DWP(0,"eax"),26); 48e1051a39Sopenharmony_ci &jnc(&label("maw_non_sse2")); 49e1051a39Sopenharmony_ci 50e1051a39Sopenharmony_ci &mov($r,&wparam(0)); 51e1051a39Sopenharmony_ci &mov($a,&wparam(1)); 52e1051a39Sopenharmony_ci &mov($c,&wparam(2)); 53e1051a39Sopenharmony_ci &movd("mm0",&wparam(3)); # mm0 = w 54e1051a39Sopenharmony_ci &pxor("mm1","mm1"); # mm1 = carry_in 55e1051a39Sopenharmony_ci &jmp(&label("maw_sse2_entry")); 56e1051a39Sopenharmony_ci 57e1051a39Sopenharmony_ci &set_label("maw_sse2_unrolled",16); 58e1051a39Sopenharmony_ci &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] 59e1051a39Sopenharmony_ci &paddq("mm1","mm3"); # mm1 = carry_in + r[0] 60e1051a39Sopenharmony_ci &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] 61e1051a39Sopenharmony_ci &pmuludq("mm2","mm0"); # mm2 = w*a[0] 62e1051a39Sopenharmony_ci &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1] 63e1051a39Sopenharmony_ci &pmuludq("mm4","mm0"); # mm4 = w*a[1] 64e1051a39Sopenharmony_ci &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2] 65e1051a39Sopenharmony_ci &pmuludq("mm6","mm0"); # mm6 = w*a[2] 66e1051a39Sopenharmony_ci &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3] 67e1051a39Sopenharmony_ci &pmuludq("mm7","mm0"); # mm7 = w*a[3] 68e1051a39Sopenharmony_ci &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0] 69e1051a39Sopenharmony_ci &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1] 70e1051a39Sopenharmony_ci &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1] 71e1051a39Sopenharmony_ci &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2] 72e1051a39Sopenharmony_ci &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2] 73e1051a39Sopenharmony_ci &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3] 74e1051a39Sopenharmony_ci &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3] 75e1051a39Sopenharmony_ci &movd(&DWP(0,$r,"",0),"mm1"); 76e1051a39Sopenharmony_ci &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4] 77e1051a39Sopenharmony_ci &pmuludq("mm2","mm0"); # mm2 = w*a[4] 78e1051a39Sopenharmony_ci &psrlq("mm1",32); # mm1 = carry0 79e1051a39Sopenharmony_ci &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5] 80e1051a39Sopenharmony_ci &pmuludq("mm4","mm0"); # mm4 = w*a[5] 81e1051a39Sopenharmony_ci &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1] 82e1051a39Sopenharmony_ci &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6] 83e1051a39Sopenharmony_ci &pmuludq("mm6","mm0"); # mm6 = w*a[6] 84e1051a39Sopenharmony_ci &movd(&DWP(4,$r,"",0),"mm1"); 85e1051a39Sopenharmony_ci &psrlq("mm1",32); # mm1 = carry1 86e1051a39Sopenharmony_ci &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7] 87e1051a39Sopenharmony_ci &add($a,32); 88e1051a39Sopenharmony_ci &pmuludq("mm3","mm0"); # mm3 = w*a[7] 89e1051a39Sopenharmony_ci &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2] 90e1051a39Sopenharmony_ci &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4] 91e1051a39Sopenharmony_ci &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4] 92e1051a39Sopenharmony_ci &movd(&DWP(8,$r,"",0),"mm1"); 93e1051a39Sopenharmony_ci &psrlq("mm1",32); # mm1 = carry2 94e1051a39Sopenharmony_ci &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3] 95e1051a39Sopenharmony_ci &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5] 96e1051a39Sopenharmony_ci &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5] 97e1051a39Sopenharmony_ci &movd(&DWP(12,$r,"",0),"mm1"); 98e1051a39Sopenharmony_ci &psrlq("mm1",32); # mm1 = carry3 99e1051a39Sopenharmony_ci &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4] 100e1051a39Sopenharmony_ci &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6] 101e1051a39Sopenharmony_ci &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6] 102e1051a39Sopenharmony_ci &movd(&DWP(16,$r,"",0),"mm1"); 103e1051a39Sopenharmony_ci &psrlq("mm1",32); # mm1 = carry4 104e1051a39Sopenharmony_ci &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5] 105e1051a39Sopenharmony_ci &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7] 106e1051a39Sopenharmony_ci &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7] 107e1051a39Sopenharmony_ci &movd(&DWP(20,$r,"",0),"mm1"); 108e1051a39Sopenharmony_ci &psrlq("mm1",32); # mm1 = carry5 109e1051a39Sopenharmony_ci &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6] 110e1051a39Sopenharmony_ci &movd(&DWP(24,$r,"",0),"mm1"); 111e1051a39Sopenharmony_ci &psrlq("mm1",32); # mm1 = carry6 112e1051a39Sopenharmony_ci &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] 113e1051a39Sopenharmony_ci &movd(&DWP(28,$r,"",0),"mm1"); 114e1051a39Sopenharmony_ci &lea($r,&DWP(32,$r)); 115e1051a39Sopenharmony_ci &psrlq("mm1",32); # mm1 = carry_out 116e1051a39Sopenharmony_ci 117e1051a39Sopenharmony_ci &sub($c,8); 118e1051a39Sopenharmony_ci &jz(&label("maw_sse2_exit")); 119e1051a39Sopenharmony_ci &set_label("maw_sse2_entry"); 120e1051a39Sopenharmony_ci &test($c,0xfffffff8); 121e1051a39Sopenharmony_ci &jnz(&label("maw_sse2_unrolled")); 122e1051a39Sopenharmony_ci 123e1051a39Sopenharmony_ci &set_label("maw_sse2_loop",4); 124e1051a39Sopenharmony_ci &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 125e1051a39Sopenharmony_ci &movd("mm3",&DWP(0,$r)); # mm3 = r[i] 126e1051a39Sopenharmony_ci &pmuludq("mm2","mm0"); # a[i] *= w 127e1051a39Sopenharmony_ci &lea($a,&DWP(4,$a)); 128e1051a39Sopenharmony_ci &paddq("mm1","mm3"); # carry += r[i] 129e1051a39Sopenharmony_ci &paddq("mm1","mm2"); # carry += a[i]*w 130e1051a39Sopenharmony_ci &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 131e1051a39Sopenharmony_ci &sub($c,1); 132e1051a39Sopenharmony_ci &psrlq("mm1",32); # carry = carry_high 133e1051a39Sopenharmony_ci &lea($r,&DWP(4,$r)); 134e1051a39Sopenharmony_ci &jnz(&label("maw_sse2_loop")); 135e1051a39Sopenharmony_ci &set_label("maw_sse2_exit"); 136e1051a39Sopenharmony_ci &movd("eax","mm1"); # c = carry_out 137e1051a39Sopenharmony_ci &emms(); 138e1051a39Sopenharmony_ci &ret(); 139e1051a39Sopenharmony_ci 140e1051a39Sopenharmony_ci &set_label("maw_non_sse2",16); 141e1051a39Sopenharmony_ci } 142e1051a39Sopenharmony_ci 143e1051a39Sopenharmony_ci # function_begin prologue 144e1051a39Sopenharmony_ci &push("ebp"); 145e1051a39Sopenharmony_ci &push("ebx"); 146e1051a39Sopenharmony_ci &push("esi"); 147e1051a39Sopenharmony_ci &push("edi"); 148e1051a39Sopenharmony_ci 149e1051a39Sopenharmony_ci &comment(""); 150e1051a39Sopenharmony_ci $Low="eax"; 151e1051a39Sopenharmony_ci $High="edx"; 152e1051a39Sopenharmony_ci $a="ebx"; 153e1051a39Sopenharmony_ci $w="ebp"; 154e1051a39Sopenharmony_ci $r="edi"; 155e1051a39Sopenharmony_ci $c="esi"; 156e1051a39Sopenharmony_ci 157e1051a39Sopenharmony_ci &xor($c,$c); # clear carry 158e1051a39Sopenharmony_ci &mov($r,&wparam(0)); # 159e1051a39Sopenharmony_ci 160e1051a39Sopenharmony_ci &mov("ecx",&wparam(2)); # 161e1051a39Sopenharmony_ci &mov($a,&wparam(1)); # 162e1051a39Sopenharmony_ci 163e1051a39Sopenharmony_ci &and("ecx",0xfffffff8); # num / 8 164e1051a39Sopenharmony_ci &mov($w,&wparam(3)); # 165e1051a39Sopenharmony_ci 166e1051a39Sopenharmony_ci &push("ecx"); # Up the stack for a tmp variable 167e1051a39Sopenharmony_ci 168e1051a39Sopenharmony_ci &jz(&label("maw_finish")); 169e1051a39Sopenharmony_ci 170e1051a39Sopenharmony_ci &set_label("maw_loop",16); 171e1051a39Sopenharmony_ci 172e1051a39Sopenharmony_ci for ($i=0; $i<32; $i+=4) 173e1051a39Sopenharmony_ci { 174e1051a39Sopenharmony_ci &comment("Round $i"); 175e1051a39Sopenharmony_ci 176e1051a39Sopenharmony_ci &mov("eax",&DWP($i,$a)); # *a 177e1051a39Sopenharmony_ci &mul($w); # *a * w 178e1051a39Sopenharmony_ci &add("eax",$c); # L(t)+= c 179e1051a39Sopenharmony_ci &adc("edx",0); # H(t)+=carry 180e1051a39Sopenharmony_ci &add("eax",&DWP($i,$r)); # L(t)+= *r 181e1051a39Sopenharmony_ci &adc("edx",0); # H(t)+=carry 182e1051a39Sopenharmony_ci &mov(&DWP($i,$r),"eax"); # *r= L(t); 183e1051a39Sopenharmony_ci &mov($c,"edx"); # c= H(t); 184e1051a39Sopenharmony_ci } 185e1051a39Sopenharmony_ci 186e1051a39Sopenharmony_ci &comment(""); 187e1051a39Sopenharmony_ci &sub("ecx",8); 188e1051a39Sopenharmony_ci &lea($a,&DWP(32,$a)); 189e1051a39Sopenharmony_ci &lea($r,&DWP(32,$r)); 190e1051a39Sopenharmony_ci &jnz(&label("maw_loop")); 191e1051a39Sopenharmony_ci 192e1051a39Sopenharmony_ci &set_label("maw_finish",0); 193e1051a39Sopenharmony_ci &mov("ecx",&wparam(2)); # get num 194e1051a39Sopenharmony_ci &and("ecx",7); 195e1051a39Sopenharmony_ci &jnz(&label("maw_finish2")); # helps branch prediction 196e1051a39Sopenharmony_ci &jmp(&label("maw_end")); 197e1051a39Sopenharmony_ci 198e1051a39Sopenharmony_ci &set_label("maw_finish2",1); 199e1051a39Sopenharmony_ci for ($i=0; $i<7; $i++) 200e1051a39Sopenharmony_ci { 201e1051a39Sopenharmony_ci &comment("Tail Round $i"); 202e1051a39Sopenharmony_ci &mov("eax",&DWP($i*4,$a)); # *a 203e1051a39Sopenharmony_ci &mul($w); # *a * w 204e1051a39Sopenharmony_ci &add("eax",$c); # L(t)+=c 205e1051a39Sopenharmony_ci &adc("edx",0); # H(t)+=carry 206e1051a39Sopenharmony_ci &add("eax",&DWP($i*4,$r)); # L(t)+= *r 207e1051a39Sopenharmony_ci &adc("edx",0); # H(t)+=carry 208e1051a39Sopenharmony_ci &dec("ecx") if ($i != 7-1); 209e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r),"eax"); # *r= L(t); 210e1051a39Sopenharmony_ci &mov($c,"edx"); # c= H(t); 211e1051a39Sopenharmony_ci &jz(&label("maw_end")) if ($i != 7-1); 212e1051a39Sopenharmony_ci } 213e1051a39Sopenharmony_ci &set_label("maw_end",0); 214e1051a39Sopenharmony_ci &mov("eax",$c); 215e1051a39Sopenharmony_ci 216e1051a39Sopenharmony_ci &pop("ecx"); # clear variable from 217e1051a39Sopenharmony_ci 218e1051a39Sopenharmony_ci &function_end($name); 219e1051a39Sopenharmony_ci } 220e1051a39Sopenharmony_ci 221e1051a39Sopenharmony_cisub bn_mul_words 222e1051a39Sopenharmony_ci { 223e1051a39Sopenharmony_ci local($name)=@_; 224e1051a39Sopenharmony_ci 225e1051a39Sopenharmony_ci &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 226e1051a39Sopenharmony_ci 227e1051a39Sopenharmony_ci $r="eax"; 228e1051a39Sopenharmony_ci $a="edx"; 229e1051a39Sopenharmony_ci $c="ecx"; 230e1051a39Sopenharmony_ci 231e1051a39Sopenharmony_ci if ($sse2) { 232e1051a39Sopenharmony_ci &picmeup("eax","OPENSSL_ia32cap_P"); 233e1051a39Sopenharmony_ci &bt(&DWP(0,"eax"),26); 234e1051a39Sopenharmony_ci &jnc(&label("mw_non_sse2")); 235e1051a39Sopenharmony_ci 236e1051a39Sopenharmony_ci &mov($r,&wparam(0)); 237e1051a39Sopenharmony_ci &mov($a,&wparam(1)); 238e1051a39Sopenharmony_ci &mov($c,&wparam(2)); 239e1051a39Sopenharmony_ci &movd("mm0",&wparam(3)); # mm0 = w 240e1051a39Sopenharmony_ci &pxor("mm1","mm1"); # mm1 = carry = 0 241e1051a39Sopenharmony_ci 242e1051a39Sopenharmony_ci &set_label("mw_sse2_loop",16); 243e1051a39Sopenharmony_ci &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 244e1051a39Sopenharmony_ci &pmuludq("mm2","mm0"); # a[i] *= w 245e1051a39Sopenharmony_ci &lea($a,&DWP(4,$a)); 246e1051a39Sopenharmony_ci &paddq("mm1","mm2"); # carry += a[i]*w 247e1051a39Sopenharmony_ci &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 248e1051a39Sopenharmony_ci &sub($c,1); 249e1051a39Sopenharmony_ci &psrlq("mm1",32); # carry = carry_high 250e1051a39Sopenharmony_ci &lea($r,&DWP(4,$r)); 251e1051a39Sopenharmony_ci &jnz(&label("mw_sse2_loop")); 252e1051a39Sopenharmony_ci 253e1051a39Sopenharmony_ci &movd("eax","mm1"); # return carry 254e1051a39Sopenharmony_ci &emms(); 255e1051a39Sopenharmony_ci &ret(); 256e1051a39Sopenharmony_ci &set_label("mw_non_sse2",16); 257e1051a39Sopenharmony_ci } 258e1051a39Sopenharmony_ci 259e1051a39Sopenharmony_ci # function_begin prologue 260e1051a39Sopenharmony_ci &push("ebp"); 261e1051a39Sopenharmony_ci &push("ebx"); 262e1051a39Sopenharmony_ci &push("esi"); 263e1051a39Sopenharmony_ci &push("edi"); 264e1051a39Sopenharmony_ci 265e1051a39Sopenharmony_ci &comment(""); 266e1051a39Sopenharmony_ci $Low="eax"; 267e1051a39Sopenharmony_ci $High="edx"; 268e1051a39Sopenharmony_ci $a="ebx"; 269e1051a39Sopenharmony_ci $w="ecx"; 270e1051a39Sopenharmony_ci $r="edi"; 271e1051a39Sopenharmony_ci $c="esi"; 272e1051a39Sopenharmony_ci $num="ebp"; 273e1051a39Sopenharmony_ci 274e1051a39Sopenharmony_ci &xor($c,$c); # clear carry 275e1051a39Sopenharmony_ci &mov($r,&wparam(0)); # 276e1051a39Sopenharmony_ci &mov($a,&wparam(1)); # 277e1051a39Sopenharmony_ci &mov($num,&wparam(2)); # 278e1051a39Sopenharmony_ci &mov($w,&wparam(3)); # 279e1051a39Sopenharmony_ci 280e1051a39Sopenharmony_ci &and($num,0xfffffff8); # num / 8 281e1051a39Sopenharmony_ci &jz(&label("mw_finish")); 282e1051a39Sopenharmony_ci 283e1051a39Sopenharmony_ci &set_label("mw_loop",0); 284e1051a39Sopenharmony_ci for ($i=0; $i<32; $i+=4) 285e1051a39Sopenharmony_ci { 286e1051a39Sopenharmony_ci &comment("Round $i"); 287e1051a39Sopenharmony_ci 288e1051a39Sopenharmony_ci &mov("eax",&DWP($i,$a,"",0)); # *a 289e1051a39Sopenharmony_ci &mul($w); # *a * w 290e1051a39Sopenharmony_ci &add("eax",$c); # L(t)+=c 291e1051a39Sopenharmony_ci # XXX 292e1051a39Sopenharmony_ci 293e1051a39Sopenharmony_ci &adc("edx",0); # H(t)+=carry 294e1051a39Sopenharmony_ci &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 295e1051a39Sopenharmony_ci 296e1051a39Sopenharmony_ci &mov($c,"edx"); # c= H(t); 297e1051a39Sopenharmony_ci } 298e1051a39Sopenharmony_ci 299e1051a39Sopenharmony_ci &comment(""); 300e1051a39Sopenharmony_ci &add($a,32); 301e1051a39Sopenharmony_ci &add($r,32); 302e1051a39Sopenharmony_ci &sub($num,8); 303e1051a39Sopenharmony_ci &jz(&label("mw_finish")); 304e1051a39Sopenharmony_ci &jmp(&label("mw_loop")); 305e1051a39Sopenharmony_ci 306e1051a39Sopenharmony_ci &set_label("mw_finish",0); 307e1051a39Sopenharmony_ci &mov($num,&wparam(2)); # get num 308e1051a39Sopenharmony_ci &and($num,7); 309e1051a39Sopenharmony_ci &jnz(&label("mw_finish2")); 310e1051a39Sopenharmony_ci &jmp(&label("mw_end")); 311e1051a39Sopenharmony_ci 312e1051a39Sopenharmony_ci &set_label("mw_finish2",1); 313e1051a39Sopenharmony_ci for ($i=0; $i<7; $i++) 314e1051a39Sopenharmony_ci { 315e1051a39Sopenharmony_ci &comment("Tail Round $i"); 316e1051a39Sopenharmony_ci &mov("eax",&DWP($i*4,$a,"",0));# *a 317e1051a39Sopenharmony_ci &mul($w); # *a * w 318e1051a39Sopenharmony_ci &add("eax",$c); # L(t)+=c 319e1051a39Sopenharmony_ci # XXX 320e1051a39Sopenharmony_ci &adc("edx",0); # H(t)+=carry 321e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); 322e1051a39Sopenharmony_ci &mov($c,"edx"); # c= H(t); 323e1051a39Sopenharmony_ci &dec($num) if ($i != 7-1); 324e1051a39Sopenharmony_ci &jz(&label("mw_end")) if ($i != 7-1); 325e1051a39Sopenharmony_ci } 326e1051a39Sopenharmony_ci &set_label("mw_end",0); 327e1051a39Sopenharmony_ci &mov("eax",$c); 328e1051a39Sopenharmony_ci 329e1051a39Sopenharmony_ci &function_end($name); 330e1051a39Sopenharmony_ci } 331e1051a39Sopenharmony_ci 332e1051a39Sopenharmony_cisub bn_sqr_words 333e1051a39Sopenharmony_ci { 334e1051a39Sopenharmony_ci local($name)=@_; 335e1051a39Sopenharmony_ci 336e1051a39Sopenharmony_ci &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 337e1051a39Sopenharmony_ci 338e1051a39Sopenharmony_ci $r="eax"; 339e1051a39Sopenharmony_ci $a="edx"; 340e1051a39Sopenharmony_ci $c="ecx"; 341e1051a39Sopenharmony_ci 342e1051a39Sopenharmony_ci if ($sse2) { 343e1051a39Sopenharmony_ci &picmeup("eax","OPENSSL_ia32cap_P"); 344e1051a39Sopenharmony_ci &bt(&DWP(0,"eax"),26); 345e1051a39Sopenharmony_ci &jnc(&label("sqr_non_sse2")); 346e1051a39Sopenharmony_ci 347e1051a39Sopenharmony_ci &mov($r,&wparam(0)); 348e1051a39Sopenharmony_ci &mov($a,&wparam(1)); 349e1051a39Sopenharmony_ci &mov($c,&wparam(2)); 350e1051a39Sopenharmony_ci 351e1051a39Sopenharmony_ci &set_label("sqr_sse2_loop",16); 352e1051a39Sopenharmony_ci &movd("mm0",&DWP(0,$a)); # mm0 = a[i] 353e1051a39Sopenharmony_ci &pmuludq("mm0","mm0"); # a[i] *= a[i] 354e1051a39Sopenharmony_ci &lea($a,&DWP(4,$a)); # a++ 355e1051a39Sopenharmony_ci &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] 356e1051a39Sopenharmony_ci &sub($c,1); 357e1051a39Sopenharmony_ci &lea($r,&DWP(8,$r)); # r += 2 358e1051a39Sopenharmony_ci &jnz(&label("sqr_sse2_loop")); 359e1051a39Sopenharmony_ci 360e1051a39Sopenharmony_ci &emms(); 361e1051a39Sopenharmony_ci &ret(); 362e1051a39Sopenharmony_ci &set_label("sqr_non_sse2",16); 363e1051a39Sopenharmony_ci } 364e1051a39Sopenharmony_ci 365e1051a39Sopenharmony_ci # function_begin prologue 366e1051a39Sopenharmony_ci &push("ebp"); 367e1051a39Sopenharmony_ci &push("ebx"); 368e1051a39Sopenharmony_ci &push("esi"); 369e1051a39Sopenharmony_ci &push("edi"); 370e1051a39Sopenharmony_ci 371e1051a39Sopenharmony_ci &comment(""); 372e1051a39Sopenharmony_ci $r="esi"; 373e1051a39Sopenharmony_ci $a="edi"; 374e1051a39Sopenharmony_ci $num="ebx"; 375e1051a39Sopenharmony_ci 376e1051a39Sopenharmony_ci &mov($r,&wparam(0)); # 377e1051a39Sopenharmony_ci &mov($a,&wparam(1)); # 378e1051a39Sopenharmony_ci &mov($num,&wparam(2)); # 379e1051a39Sopenharmony_ci 380e1051a39Sopenharmony_ci &and($num,0xfffffff8); # num / 8 381e1051a39Sopenharmony_ci &jz(&label("sw_finish")); 382e1051a39Sopenharmony_ci 383e1051a39Sopenharmony_ci &set_label("sw_loop",0); 384e1051a39Sopenharmony_ci for ($i=0; $i<32; $i+=4) 385e1051a39Sopenharmony_ci { 386e1051a39Sopenharmony_ci &comment("Round $i"); 387e1051a39Sopenharmony_ci &mov("eax",&DWP($i,$a,"",0)); # *a 388e1051a39Sopenharmony_ci # XXX 389e1051a39Sopenharmony_ci &mul("eax"); # *a * *a 390e1051a39Sopenharmony_ci &mov(&DWP($i*2,$r,"",0),"eax"); # 391e1051a39Sopenharmony_ci &mov(&DWP($i*2+4,$r,"",0),"edx");# 392e1051a39Sopenharmony_ci } 393e1051a39Sopenharmony_ci 394e1051a39Sopenharmony_ci &comment(""); 395e1051a39Sopenharmony_ci &add($a,32); 396e1051a39Sopenharmony_ci &add($r,64); 397e1051a39Sopenharmony_ci &sub($num,8); 398e1051a39Sopenharmony_ci &jnz(&label("sw_loop")); 399e1051a39Sopenharmony_ci 400e1051a39Sopenharmony_ci &set_label("sw_finish",0); 401e1051a39Sopenharmony_ci &mov($num,&wparam(2)); # get num 402e1051a39Sopenharmony_ci &and($num,7); 403e1051a39Sopenharmony_ci &jz(&label("sw_end")); 404e1051a39Sopenharmony_ci 405e1051a39Sopenharmony_ci for ($i=0; $i<7; $i++) 406e1051a39Sopenharmony_ci { 407e1051a39Sopenharmony_ci &comment("Tail Round $i"); 408e1051a39Sopenharmony_ci &mov("eax",&DWP($i*4,$a,"",0)); # *a 409e1051a39Sopenharmony_ci # XXX 410e1051a39Sopenharmony_ci &mul("eax"); # *a * *a 411e1051a39Sopenharmony_ci &mov(&DWP($i*8,$r,"",0),"eax"); # 412e1051a39Sopenharmony_ci &dec($num) if ($i != 7-1); 413e1051a39Sopenharmony_ci &mov(&DWP($i*8+4,$r,"",0),"edx"); 414e1051a39Sopenharmony_ci &jz(&label("sw_end")) if ($i != 7-1); 415e1051a39Sopenharmony_ci } 416e1051a39Sopenharmony_ci &set_label("sw_end",0); 417e1051a39Sopenharmony_ci 418e1051a39Sopenharmony_ci &function_end($name); 419e1051a39Sopenharmony_ci } 420e1051a39Sopenharmony_ci 421e1051a39Sopenharmony_cisub bn_div_words 422e1051a39Sopenharmony_ci { 423e1051a39Sopenharmony_ci local($name)=@_; 424e1051a39Sopenharmony_ci 425e1051a39Sopenharmony_ci &function_begin_B($name,""); 426e1051a39Sopenharmony_ci &mov("edx",&wparam(0)); # 427e1051a39Sopenharmony_ci &mov("eax",&wparam(1)); # 428e1051a39Sopenharmony_ci &mov("ecx",&wparam(2)); # 429e1051a39Sopenharmony_ci &div("ecx"); 430e1051a39Sopenharmony_ci &ret(); 431e1051a39Sopenharmony_ci &function_end_B($name); 432e1051a39Sopenharmony_ci } 433e1051a39Sopenharmony_ci 434e1051a39Sopenharmony_cisub bn_add_words 435e1051a39Sopenharmony_ci { 436e1051a39Sopenharmony_ci local($name)=@_; 437e1051a39Sopenharmony_ci 438e1051a39Sopenharmony_ci &function_begin($name,""); 439e1051a39Sopenharmony_ci 440e1051a39Sopenharmony_ci &comment(""); 441e1051a39Sopenharmony_ci $a="esi"; 442e1051a39Sopenharmony_ci $b="edi"; 443e1051a39Sopenharmony_ci $c="eax"; 444e1051a39Sopenharmony_ci $r="ebx"; 445e1051a39Sopenharmony_ci $tmp1="ecx"; 446e1051a39Sopenharmony_ci $tmp2="edx"; 447e1051a39Sopenharmony_ci $num="ebp"; 448e1051a39Sopenharmony_ci 449e1051a39Sopenharmony_ci &mov($r,&wparam(0)); # get r 450e1051a39Sopenharmony_ci &mov($a,&wparam(1)); # get a 451e1051a39Sopenharmony_ci &mov($b,&wparam(2)); # get b 452e1051a39Sopenharmony_ci &mov($num,&wparam(3)); # get num 453e1051a39Sopenharmony_ci &xor($c,$c); # clear carry 454e1051a39Sopenharmony_ci &and($num,0xfffffff8); # num / 8 455e1051a39Sopenharmony_ci 456e1051a39Sopenharmony_ci &jz(&label("aw_finish")); 457e1051a39Sopenharmony_ci 458e1051a39Sopenharmony_ci &set_label("aw_loop",0); 459e1051a39Sopenharmony_ci for ($i=0; $i<8; $i++) 460e1051a39Sopenharmony_ci { 461e1051a39Sopenharmony_ci &comment("Round $i"); 462e1051a39Sopenharmony_ci 463e1051a39Sopenharmony_ci &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 464e1051a39Sopenharmony_ci &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 465e1051a39Sopenharmony_ci &add($tmp1,$c); 466e1051a39Sopenharmony_ci &mov($c,0); 467e1051a39Sopenharmony_ci &adc($c,$c); 468e1051a39Sopenharmony_ci &add($tmp1,$tmp2); 469e1051a39Sopenharmony_ci &adc($c,0); 470e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 471e1051a39Sopenharmony_ci } 472e1051a39Sopenharmony_ci 473e1051a39Sopenharmony_ci &comment(""); 474e1051a39Sopenharmony_ci &add($a,32); 475e1051a39Sopenharmony_ci &add($b,32); 476e1051a39Sopenharmony_ci &add($r,32); 477e1051a39Sopenharmony_ci &sub($num,8); 478e1051a39Sopenharmony_ci &jnz(&label("aw_loop")); 479e1051a39Sopenharmony_ci 480e1051a39Sopenharmony_ci &set_label("aw_finish",0); 481e1051a39Sopenharmony_ci &mov($num,&wparam(3)); # get num 482e1051a39Sopenharmony_ci &and($num,7); 483e1051a39Sopenharmony_ci &jz(&label("aw_end")); 484e1051a39Sopenharmony_ci 485e1051a39Sopenharmony_ci for ($i=0; $i<7; $i++) 486e1051a39Sopenharmony_ci { 487e1051a39Sopenharmony_ci &comment("Tail Round $i"); 488e1051a39Sopenharmony_ci &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 489e1051a39Sopenharmony_ci &mov($tmp2,&DWP($i*4,$b,"",0));# *b 490e1051a39Sopenharmony_ci &add($tmp1,$c); 491e1051a39Sopenharmony_ci &mov($c,0); 492e1051a39Sopenharmony_ci &adc($c,$c); 493e1051a39Sopenharmony_ci &add($tmp1,$tmp2); 494e1051a39Sopenharmony_ci &adc($c,0); 495e1051a39Sopenharmony_ci &dec($num) if ($i != 6); 496e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 497e1051a39Sopenharmony_ci &jz(&label("aw_end")) if ($i != 6); 498e1051a39Sopenharmony_ci } 499e1051a39Sopenharmony_ci &set_label("aw_end",0); 500e1051a39Sopenharmony_ci 501e1051a39Sopenharmony_ci# &mov("eax",$c); # $c is "eax" 502e1051a39Sopenharmony_ci 503e1051a39Sopenharmony_ci &function_end($name); 504e1051a39Sopenharmony_ci } 505e1051a39Sopenharmony_ci 506e1051a39Sopenharmony_cisub bn_sub_words 507e1051a39Sopenharmony_ci { 508e1051a39Sopenharmony_ci local($name)=@_; 509e1051a39Sopenharmony_ci 510e1051a39Sopenharmony_ci &function_begin($name,""); 511e1051a39Sopenharmony_ci 512e1051a39Sopenharmony_ci &comment(""); 513e1051a39Sopenharmony_ci $a="esi"; 514e1051a39Sopenharmony_ci $b="edi"; 515e1051a39Sopenharmony_ci $c="eax"; 516e1051a39Sopenharmony_ci $r="ebx"; 517e1051a39Sopenharmony_ci $tmp1="ecx"; 518e1051a39Sopenharmony_ci $tmp2="edx"; 519e1051a39Sopenharmony_ci $num="ebp"; 520e1051a39Sopenharmony_ci 521e1051a39Sopenharmony_ci &mov($r,&wparam(0)); # get r 522e1051a39Sopenharmony_ci &mov($a,&wparam(1)); # get a 523e1051a39Sopenharmony_ci &mov($b,&wparam(2)); # get b 524e1051a39Sopenharmony_ci &mov($num,&wparam(3)); # get num 525e1051a39Sopenharmony_ci &xor($c,$c); # clear carry 526e1051a39Sopenharmony_ci &and($num,0xfffffff8); # num / 8 527e1051a39Sopenharmony_ci 528e1051a39Sopenharmony_ci &jz(&label("aw_finish")); 529e1051a39Sopenharmony_ci 530e1051a39Sopenharmony_ci &set_label("aw_loop",0); 531e1051a39Sopenharmony_ci for ($i=0; $i<8; $i++) 532e1051a39Sopenharmony_ci { 533e1051a39Sopenharmony_ci &comment("Round $i"); 534e1051a39Sopenharmony_ci 535e1051a39Sopenharmony_ci &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 536e1051a39Sopenharmony_ci &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 537e1051a39Sopenharmony_ci &sub($tmp1,$c); 538e1051a39Sopenharmony_ci &mov($c,0); 539e1051a39Sopenharmony_ci &adc($c,$c); 540e1051a39Sopenharmony_ci &sub($tmp1,$tmp2); 541e1051a39Sopenharmony_ci &adc($c,0); 542e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 543e1051a39Sopenharmony_ci } 544e1051a39Sopenharmony_ci 545e1051a39Sopenharmony_ci &comment(""); 546e1051a39Sopenharmony_ci &add($a,32); 547e1051a39Sopenharmony_ci &add($b,32); 548e1051a39Sopenharmony_ci &add($r,32); 549e1051a39Sopenharmony_ci &sub($num,8); 550e1051a39Sopenharmony_ci &jnz(&label("aw_loop")); 551e1051a39Sopenharmony_ci 552e1051a39Sopenharmony_ci &set_label("aw_finish",0); 553e1051a39Sopenharmony_ci &mov($num,&wparam(3)); # get num 554e1051a39Sopenharmony_ci &and($num,7); 555e1051a39Sopenharmony_ci &jz(&label("aw_end")); 556e1051a39Sopenharmony_ci 557e1051a39Sopenharmony_ci for ($i=0; $i<7; $i++) 558e1051a39Sopenharmony_ci { 559e1051a39Sopenharmony_ci &comment("Tail Round $i"); 560e1051a39Sopenharmony_ci &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 561e1051a39Sopenharmony_ci &mov($tmp2,&DWP($i*4,$b,"",0));# *b 562e1051a39Sopenharmony_ci &sub($tmp1,$c); 563e1051a39Sopenharmony_ci &mov($c,0); 564e1051a39Sopenharmony_ci &adc($c,$c); 565e1051a39Sopenharmony_ci &sub($tmp1,$tmp2); 566e1051a39Sopenharmony_ci &adc($c,0); 567e1051a39Sopenharmony_ci &dec($num) if ($i != 6); 568e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 569e1051a39Sopenharmony_ci &jz(&label("aw_end")) if ($i != 6); 570e1051a39Sopenharmony_ci } 571e1051a39Sopenharmony_ci &set_label("aw_end",0); 572e1051a39Sopenharmony_ci 573e1051a39Sopenharmony_ci# &mov("eax",$c); # $c is "eax" 574e1051a39Sopenharmony_ci 575e1051a39Sopenharmony_ci &function_end($name); 576e1051a39Sopenharmony_ci } 577e1051a39Sopenharmony_ci 578e1051a39Sopenharmony_cisub bn_sub_part_words 579e1051a39Sopenharmony_ci { 580e1051a39Sopenharmony_ci local($name)=@_; 581e1051a39Sopenharmony_ci 582e1051a39Sopenharmony_ci &function_begin($name,""); 583e1051a39Sopenharmony_ci 584e1051a39Sopenharmony_ci &comment(""); 585e1051a39Sopenharmony_ci $a="esi"; 586e1051a39Sopenharmony_ci $b="edi"; 587e1051a39Sopenharmony_ci $c="eax"; 588e1051a39Sopenharmony_ci $r="ebx"; 589e1051a39Sopenharmony_ci $tmp1="ecx"; 590e1051a39Sopenharmony_ci $tmp2="edx"; 591e1051a39Sopenharmony_ci $num="ebp"; 592e1051a39Sopenharmony_ci 593e1051a39Sopenharmony_ci &mov($r,&wparam(0)); # get r 594e1051a39Sopenharmony_ci &mov($a,&wparam(1)); # get a 595e1051a39Sopenharmony_ci &mov($b,&wparam(2)); # get b 596e1051a39Sopenharmony_ci &mov($num,&wparam(3)); # get num 597e1051a39Sopenharmony_ci &xor($c,$c); # clear carry 598e1051a39Sopenharmony_ci &and($num,0xfffffff8); # num / 8 599e1051a39Sopenharmony_ci 600e1051a39Sopenharmony_ci &jz(&label("aw_finish")); 601e1051a39Sopenharmony_ci 602e1051a39Sopenharmony_ci &set_label("aw_loop",0); 603e1051a39Sopenharmony_ci for ($i=0; $i<8; $i++) 604e1051a39Sopenharmony_ci { 605e1051a39Sopenharmony_ci &comment("Round $i"); 606e1051a39Sopenharmony_ci 607e1051a39Sopenharmony_ci &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 608e1051a39Sopenharmony_ci &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 609e1051a39Sopenharmony_ci &sub($tmp1,$c); 610e1051a39Sopenharmony_ci &mov($c,0); 611e1051a39Sopenharmony_ci &adc($c,$c); 612e1051a39Sopenharmony_ci &sub($tmp1,$tmp2); 613e1051a39Sopenharmony_ci &adc($c,0); 614e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 615e1051a39Sopenharmony_ci } 616e1051a39Sopenharmony_ci 617e1051a39Sopenharmony_ci &comment(""); 618e1051a39Sopenharmony_ci &add($a,32); 619e1051a39Sopenharmony_ci &add($b,32); 620e1051a39Sopenharmony_ci &add($r,32); 621e1051a39Sopenharmony_ci &sub($num,8); 622e1051a39Sopenharmony_ci &jnz(&label("aw_loop")); 623e1051a39Sopenharmony_ci 624e1051a39Sopenharmony_ci &set_label("aw_finish",0); 625e1051a39Sopenharmony_ci &mov($num,&wparam(3)); # get num 626e1051a39Sopenharmony_ci &and($num,7); 627e1051a39Sopenharmony_ci &jz(&label("aw_end")); 628e1051a39Sopenharmony_ci 629e1051a39Sopenharmony_ci for ($i=0; $i<7; $i++) 630e1051a39Sopenharmony_ci { 631e1051a39Sopenharmony_ci &comment("Tail Round $i"); 632e1051a39Sopenharmony_ci &mov($tmp1,&DWP(0,$a,"",0)); # *a 633e1051a39Sopenharmony_ci &mov($tmp2,&DWP(0,$b,"",0));# *b 634e1051a39Sopenharmony_ci &sub($tmp1,$c); 635e1051a39Sopenharmony_ci &mov($c,0); 636e1051a39Sopenharmony_ci &adc($c,$c); 637e1051a39Sopenharmony_ci &sub($tmp1,$tmp2); 638e1051a39Sopenharmony_ci &adc($c,0); 639e1051a39Sopenharmony_ci &mov(&DWP(0,$r,"",0),$tmp1); # *r 640e1051a39Sopenharmony_ci &add($a, 4); 641e1051a39Sopenharmony_ci &add($b, 4); 642e1051a39Sopenharmony_ci &add($r, 4); 643e1051a39Sopenharmony_ci &dec($num) if ($i != 6); 644e1051a39Sopenharmony_ci &jz(&label("aw_end")) if ($i != 6); 645e1051a39Sopenharmony_ci } 646e1051a39Sopenharmony_ci &set_label("aw_end",0); 647e1051a39Sopenharmony_ci 648e1051a39Sopenharmony_ci &cmp(&wparam(4),0); 649e1051a39Sopenharmony_ci &je(&label("pw_end")); 650e1051a39Sopenharmony_ci 651e1051a39Sopenharmony_ci &mov($num,&wparam(4)); # get dl 652e1051a39Sopenharmony_ci &cmp($num,0); 653e1051a39Sopenharmony_ci &je(&label("pw_end")); 654e1051a39Sopenharmony_ci &jge(&label("pw_pos")); 655e1051a39Sopenharmony_ci 656e1051a39Sopenharmony_ci &comment("pw_neg"); 657e1051a39Sopenharmony_ci &mov($tmp2,0); 658e1051a39Sopenharmony_ci &sub($tmp2,$num); 659e1051a39Sopenharmony_ci &mov($num,$tmp2); 660e1051a39Sopenharmony_ci &and($num,0xfffffff8); # num / 8 661e1051a39Sopenharmony_ci &jz(&label("pw_neg_finish")); 662e1051a39Sopenharmony_ci 663e1051a39Sopenharmony_ci &set_label("pw_neg_loop",0); 664e1051a39Sopenharmony_ci for ($i=0; $i<8; $i++) 665e1051a39Sopenharmony_ci { 666e1051a39Sopenharmony_ci &comment("dl<0 Round $i"); 667e1051a39Sopenharmony_ci 668e1051a39Sopenharmony_ci &mov($tmp1,0); 669e1051a39Sopenharmony_ci &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 670e1051a39Sopenharmony_ci &sub($tmp1,$c); 671e1051a39Sopenharmony_ci &mov($c,0); 672e1051a39Sopenharmony_ci &adc($c,$c); 673e1051a39Sopenharmony_ci &sub($tmp1,$tmp2); 674e1051a39Sopenharmony_ci &adc($c,0); 675e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 676e1051a39Sopenharmony_ci } 677e1051a39Sopenharmony_ci 678e1051a39Sopenharmony_ci &comment(""); 679e1051a39Sopenharmony_ci &add($b,32); 680e1051a39Sopenharmony_ci &add($r,32); 681e1051a39Sopenharmony_ci &sub($num,8); 682e1051a39Sopenharmony_ci &jnz(&label("pw_neg_loop")); 683e1051a39Sopenharmony_ci 684e1051a39Sopenharmony_ci &set_label("pw_neg_finish",0); 685e1051a39Sopenharmony_ci &mov($tmp2,&wparam(4)); # get dl 686e1051a39Sopenharmony_ci &mov($num,0); 687e1051a39Sopenharmony_ci &sub($num,$tmp2); 688e1051a39Sopenharmony_ci &and($num,7); 689e1051a39Sopenharmony_ci &jz(&label("pw_end")); 690e1051a39Sopenharmony_ci 691e1051a39Sopenharmony_ci for ($i=0; $i<7; $i++) 692e1051a39Sopenharmony_ci { 693e1051a39Sopenharmony_ci &comment("dl<0 Tail Round $i"); 694e1051a39Sopenharmony_ci &mov($tmp1,0); 695e1051a39Sopenharmony_ci &mov($tmp2,&DWP($i*4,$b,"",0));# *b 696e1051a39Sopenharmony_ci &sub($tmp1,$c); 697e1051a39Sopenharmony_ci &mov($c,0); 698e1051a39Sopenharmony_ci &adc($c,$c); 699e1051a39Sopenharmony_ci &sub($tmp1,$tmp2); 700e1051a39Sopenharmony_ci &adc($c,0); 701e1051a39Sopenharmony_ci &dec($num) if ($i != 6); 702e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 703e1051a39Sopenharmony_ci &jz(&label("pw_end")) if ($i != 6); 704e1051a39Sopenharmony_ci } 705e1051a39Sopenharmony_ci 706e1051a39Sopenharmony_ci &jmp(&label("pw_end")); 707e1051a39Sopenharmony_ci 708e1051a39Sopenharmony_ci &set_label("pw_pos",0); 709e1051a39Sopenharmony_ci 710e1051a39Sopenharmony_ci &and($num,0xfffffff8); # num / 8 711e1051a39Sopenharmony_ci &jz(&label("pw_pos_finish")); 712e1051a39Sopenharmony_ci 713e1051a39Sopenharmony_ci &set_label("pw_pos_loop",0); 714e1051a39Sopenharmony_ci 715e1051a39Sopenharmony_ci for ($i=0; $i<8; $i++) 716e1051a39Sopenharmony_ci { 717e1051a39Sopenharmony_ci &comment("dl>0 Round $i"); 718e1051a39Sopenharmony_ci 719e1051a39Sopenharmony_ci &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 720e1051a39Sopenharmony_ci &sub($tmp1,$c); 721e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 722e1051a39Sopenharmony_ci &jnc(&label("pw_nc".$i)); 723e1051a39Sopenharmony_ci } 724e1051a39Sopenharmony_ci 725e1051a39Sopenharmony_ci &comment(""); 726e1051a39Sopenharmony_ci &add($a,32); 727e1051a39Sopenharmony_ci &add($r,32); 728e1051a39Sopenharmony_ci &sub($num,8); 729e1051a39Sopenharmony_ci &jnz(&label("pw_pos_loop")); 730e1051a39Sopenharmony_ci 731e1051a39Sopenharmony_ci &set_label("pw_pos_finish",0); 732e1051a39Sopenharmony_ci &mov($num,&wparam(4)); # get dl 733e1051a39Sopenharmony_ci &and($num,7); 734e1051a39Sopenharmony_ci &jz(&label("pw_end")); 735e1051a39Sopenharmony_ci 736e1051a39Sopenharmony_ci for ($i=0; $i<7; $i++) 737e1051a39Sopenharmony_ci { 738e1051a39Sopenharmony_ci &comment("dl>0 Tail Round $i"); 739e1051a39Sopenharmony_ci &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 740e1051a39Sopenharmony_ci &sub($tmp1,$c); 741e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 742e1051a39Sopenharmony_ci &jnc(&label("pw_tail_nc".$i)); 743e1051a39Sopenharmony_ci &dec($num) if ($i != 6); 744e1051a39Sopenharmony_ci &jz(&label("pw_end")) if ($i != 6); 745e1051a39Sopenharmony_ci } 746e1051a39Sopenharmony_ci &mov($c,1); 747e1051a39Sopenharmony_ci &jmp(&label("pw_end")); 748e1051a39Sopenharmony_ci 749e1051a39Sopenharmony_ci &set_label("pw_nc_loop",0); 750e1051a39Sopenharmony_ci for ($i=0; $i<8; $i++) 751e1051a39Sopenharmony_ci { 752e1051a39Sopenharmony_ci &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 753e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 754e1051a39Sopenharmony_ci &set_label("pw_nc".$i,0); 755e1051a39Sopenharmony_ci } 756e1051a39Sopenharmony_ci 757e1051a39Sopenharmony_ci &comment(""); 758e1051a39Sopenharmony_ci &add($a,32); 759e1051a39Sopenharmony_ci &add($r,32); 760e1051a39Sopenharmony_ci &sub($num,8); 761e1051a39Sopenharmony_ci &jnz(&label("pw_nc_loop")); 762e1051a39Sopenharmony_ci 763e1051a39Sopenharmony_ci &mov($num,&wparam(4)); # get dl 764e1051a39Sopenharmony_ci &and($num,7); 765e1051a39Sopenharmony_ci &jz(&label("pw_nc_end")); 766e1051a39Sopenharmony_ci 767e1051a39Sopenharmony_ci for ($i=0; $i<7; $i++) 768e1051a39Sopenharmony_ci { 769e1051a39Sopenharmony_ci &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 770e1051a39Sopenharmony_ci &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 771e1051a39Sopenharmony_ci &set_label("pw_tail_nc".$i,0); 772e1051a39Sopenharmony_ci &dec($num) if ($i != 6); 773e1051a39Sopenharmony_ci &jz(&label("pw_nc_end")) if ($i != 6); 774e1051a39Sopenharmony_ci } 775e1051a39Sopenharmony_ci 776e1051a39Sopenharmony_ci &set_label("pw_nc_end",0); 777e1051a39Sopenharmony_ci &mov($c,0); 778e1051a39Sopenharmony_ci 779e1051a39Sopenharmony_ci &set_label("pw_end",0); 780e1051a39Sopenharmony_ci 781e1051a39Sopenharmony_ci# &mov("eax",$c); # $c is "eax" 782e1051a39Sopenharmony_ci 783e1051a39Sopenharmony_ci &function_end($name); 784e1051a39Sopenharmony_ci } 785