1e1051a39Sopenharmony_ci.ident "sparcv8plus.s, Version 1.4" 2e1051a39Sopenharmony_ci.ident "SPARC v9 ISA artwork by Andy Polyakov <appro@openssl.org>" 3e1051a39Sopenharmony_ci 4e1051a39Sopenharmony_ci/* 5e1051a39Sopenharmony_ci * ==================================================================== 6e1051a39Sopenharmony_ci * Copyright 1999-2016 The OpenSSL Project Authors. All Rights Reserved. 7e1051a39Sopenharmony_ci * 8e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License"). You may not use 9e1051a39Sopenharmony_ci * this file except in compliance with the License. You can obtain a copy 10e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at 11e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html 12e1051a39Sopenharmony_ci * ==================================================================== 13e1051a39Sopenharmony_ci */ 14e1051a39Sopenharmony_ci 15e1051a39Sopenharmony_ci/* 16e1051a39Sopenharmony_ci * This is my modest contribution to OpenSSL project (see 17e1051a39Sopenharmony_ci * http://www.openssl.org/ for more information about it) and is 18e1051a39Sopenharmony_ci * a drop-in UltraSPARC ISA replacement for crypto/bn/bn_asm.c 19e1051a39Sopenharmony_ci * module. For updates see http://fy.chalmers.se/~appro/hpe/. 20e1051a39Sopenharmony_ci * 21e1051a39Sopenharmony_ci * Questions-n-answers. 22e1051a39Sopenharmony_ci * 23e1051a39Sopenharmony_ci * Q. How to compile? 24e1051a39Sopenharmony_ci * A. With SC4.x/SC5.x: 25e1051a39Sopenharmony_ci * 26e1051a39Sopenharmony_ci * cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o 27e1051a39Sopenharmony_ci * 28e1051a39Sopenharmony_ci * and with gcc: 29e1051a39Sopenharmony_ci * 30e1051a39Sopenharmony_ci * gcc -mcpu=ultrasparc -c bn_asm.sparc.v8plus.S -o bn_asm.o 31e1051a39Sopenharmony_ci * 32e1051a39Sopenharmony_ci * or if above fails (it does if you have gas installed): 33e1051a39Sopenharmony_ci * 34e1051a39Sopenharmony_ci * gcc -E bn_asm.sparc.v8plus.S | as -xarch=v8plus /dev/fd/0 -o bn_asm.o 35e1051a39Sopenharmony_ci * 36e1051a39Sopenharmony_ci * Quick-n-dirty way to fuse the module into the library. 37e1051a39Sopenharmony_ci * Provided that the library is already configured and built 38e1051a39Sopenharmony_ci * (in 0.9.2 case with no-asm option): 39e1051a39Sopenharmony_ci * 40e1051a39Sopenharmony_ci * # cd crypto/bn 41e1051a39Sopenharmony_ci * # cp /some/place/bn_asm.sparc.v8plus.S . 42e1051a39Sopenharmony_ci * # cc -xarch=v8plus -c bn_asm.sparc.v8plus.S -o bn_asm.o 43e1051a39Sopenharmony_ci * # make 44e1051a39Sopenharmony_ci * # cd ../.. 45e1051a39Sopenharmony_ci * # make; make test 46e1051a39Sopenharmony_ci * 47e1051a39Sopenharmony_ci * Quick-n-dirty way to get rid of it: 48e1051a39Sopenharmony_ci * 49e1051a39Sopenharmony_ci * # cd crypto/bn 50e1051a39Sopenharmony_ci * # touch bn_asm.c 51e1051a39Sopenharmony_ci * # make 52e1051a39Sopenharmony_ci * # cd ../.. 53e1051a39Sopenharmony_ci * # make; make test 54e1051a39Sopenharmony_ci * 55e1051a39Sopenharmony_ci * Q. V8plus architecture? What kind of beast is that? 56e1051a39Sopenharmony_ci * A. Well, it's rather a programming model than an architecture... 57e1051a39Sopenharmony_ci * It's actually v9-compliant, i.e. *any* UltraSPARC, CPU under 58e1051a39Sopenharmony_ci * special conditions, namely when kernel doesn't preserve upper 59e1051a39Sopenharmony_ci * 32 bits of otherwise 64-bit registers during a context switch. 60e1051a39Sopenharmony_ci * 61e1051a39Sopenharmony_ci * Q. Why just UltraSPARC? What about SuperSPARC? 62e1051a39Sopenharmony_ci * A. Original release did target UltraSPARC only. Now SuperSPARC 63e1051a39Sopenharmony_ci * version is provided along. Both version share bn_*comba[48] 64e1051a39Sopenharmony_ci * implementations (see comment later in code for explanation). 65e1051a39Sopenharmony_ci * But what's so special about this UltraSPARC implementation? 66e1051a39Sopenharmony_ci * Why didn't I let compiler do the job? Trouble is that most of 67e1051a39Sopenharmony_ci * available compilers (well, SC5.0 is the only exception) don't 68e1051a39Sopenharmony_ci * attempt to take advantage of UltraSPARC's 64-bitness under 69e1051a39Sopenharmony_ci * 32-bit kernels even though it's perfectly possible (see next 70e1051a39Sopenharmony_ci * question). 71e1051a39Sopenharmony_ci * 72e1051a39Sopenharmony_ci * Q. 64-bit registers under 32-bit kernels? Didn't you just say it 73e1051a39Sopenharmony_ci * doesn't work? 74e1051a39Sopenharmony_ci * A. You can't address *all* registers as 64-bit wide:-( The catch is 75e1051a39Sopenharmony_ci * that you actually may rely upon %o0-%o5 and %g1-%g4 being fully 76e1051a39Sopenharmony_ci * preserved if you're in a leaf function, i.e. such never calling 77e1051a39Sopenharmony_ci * any other functions. All functions in this module are leaf and 78e1051a39Sopenharmony_ci * 10 registers is a handful. And as a matter of fact none-"comba" 79e1051a39Sopenharmony_ci * routines don't require even that much and I could even afford to 80e1051a39Sopenharmony_ci * not allocate own stack frame for 'em:-) 81e1051a39Sopenharmony_ci * 82e1051a39Sopenharmony_ci * Q. What about 64-bit kernels? 83e1051a39Sopenharmony_ci * A. What about 'em? Just kidding:-) Pure 64-bit version is currently 84e1051a39Sopenharmony_ci * under evaluation and development... 85e1051a39Sopenharmony_ci * 86e1051a39Sopenharmony_ci * Q. What about shared libraries? 87e1051a39Sopenharmony_ci * A. What about 'em? Kidding again:-) Code does *not* contain any 88e1051a39Sopenharmony_ci * code position dependencies and it's safe to include it into 89e1051a39Sopenharmony_ci * shared library as is. 90e1051a39Sopenharmony_ci * 91e1051a39Sopenharmony_ci * Q. How much faster does it go? 92e1051a39Sopenharmony_ci * A. Do you have a good benchmark? In either case below is what I 93e1051a39Sopenharmony_ci * experience with crypto/bn/expspeed.c test program: 94e1051a39Sopenharmony_ci * 95e1051a39Sopenharmony_ci * v8plus module on U10/300MHz against bn_asm.c compiled with: 96e1051a39Sopenharmony_ci * 97e1051a39Sopenharmony_ci * cc-5.0 -xarch=v8plus -xO5 -xdepend +7-12% 98e1051a39Sopenharmony_ci * cc-4.2 -xarch=v8plus -xO5 -xdepend +25-35% 99e1051a39Sopenharmony_ci * egcs-1.1.2 -mcpu=ultrasparc -O3 +35-45% 100e1051a39Sopenharmony_ci * 101e1051a39Sopenharmony_ci * v8 module on SS10/60MHz against bn_asm.c compiled with: 102e1051a39Sopenharmony_ci * 103e1051a39Sopenharmony_ci * cc-5.0 -xarch=v8 -xO5 -xdepend +7-10% 104e1051a39Sopenharmony_ci * cc-4.2 -xarch=v8 -xO5 -xdepend +10% 105e1051a39Sopenharmony_ci * egcs-1.1.2 -mv8 -O3 +35-45% 106e1051a39Sopenharmony_ci * 107e1051a39Sopenharmony_ci * As you can see it's damn hard to beat the new Sun C compiler 108e1051a39Sopenharmony_ci * and it's in first place GNU C users who will appreciate this 109e1051a39Sopenharmony_ci * assembler implementation:-) 110e1051a39Sopenharmony_ci */ 111e1051a39Sopenharmony_ci 112e1051a39Sopenharmony_ci/* 113e1051a39Sopenharmony_ci * Revision history. 114e1051a39Sopenharmony_ci * 115e1051a39Sopenharmony_ci * 1.0 - initial release; 116e1051a39Sopenharmony_ci * 1.1 - new loop unrolling model(*); 117e1051a39Sopenharmony_ci * - some more fine tuning; 118e1051a39Sopenharmony_ci * 1.2 - made gas friendly; 119e1051a39Sopenharmony_ci * - updates to documentation concerning v9; 120e1051a39Sopenharmony_ci * - new performance comparison matrix; 121e1051a39Sopenharmony_ci * 1.3 - fixed problem with /usr/ccs/lib/cpp; 122e1051a39Sopenharmony_ci * 1.4 - native V9 bn_*_comba[48] implementation (15% more efficient) 123e1051a39Sopenharmony_ci * resulting in slight overall performance kick; 124e1051a39Sopenharmony_ci * - some retunes; 125e1051a39Sopenharmony_ci * - support for GNU as added; 126e1051a39Sopenharmony_ci * 127e1051a39Sopenharmony_ci * (*) Originally unrolled loop looked like this: 128e1051a39Sopenharmony_ci * for (;;) { 129e1051a39Sopenharmony_ci * op(p+0); if (--n==0) break; 130e1051a39Sopenharmony_ci * op(p+1); if (--n==0) break; 131e1051a39Sopenharmony_ci * op(p+2); if (--n==0) break; 132e1051a39Sopenharmony_ci * op(p+3); if (--n==0) break; 133e1051a39Sopenharmony_ci * p+=4; 134e1051a39Sopenharmony_ci * } 135e1051a39Sopenharmony_ci * I unroll according to following: 136e1051a39Sopenharmony_ci * while (n&~3) { 137e1051a39Sopenharmony_ci * op(p+0); op(p+1); op(p+2); op(p+3); 138e1051a39Sopenharmony_ci * p+=4; n=-4; 139e1051a39Sopenharmony_ci * } 140e1051a39Sopenharmony_ci * if (n) { 141e1051a39Sopenharmony_ci * op(p+0); if (--n==0) return; 142e1051a39Sopenharmony_ci * op(p+2); if (--n==0) return; 143e1051a39Sopenharmony_ci * op(p+3); return; 144e1051a39Sopenharmony_ci * } 145e1051a39Sopenharmony_ci */ 146e1051a39Sopenharmony_ci 147e1051a39Sopenharmony_ci#if defined(__SUNPRO_C) && defined(__sparcv9) 148e1051a39Sopenharmony_ci /* They've said -xarch=v9 at command line */ 149e1051a39Sopenharmony_ci .register %g2,#scratch 150e1051a39Sopenharmony_ci .register %g3,#scratch 151e1051a39Sopenharmony_ci# define FRAME_SIZE -192 152e1051a39Sopenharmony_ci#elif defined(__GNUC__) && defined(__arch64__) 153e1051a39Sopenharmony_ci /* They've said -m64 at command line */ 154e1051a39Sopenharmony_ci .register %g2,#scratch 155e1051a39Sopenharmony_ci .register %g3,#scratch 156e1051a39Sopenharmony_ci# define FRAME_SIZE -192 157e1051a39Sopenharmony_ci#else 158e1051a39Sopenharmony_ci# define FRAME_SIZE -96 159e1051a39Sopenharmony_ci#endif 160e1051a39Sopenharmony_ci/* 161e1051a39Sopenharmony_ci * GNU assembler can't stand stuw:-( 162e1051a39Sopenharmony_ci */ 163e1051a39Sopenharmony_ci#define stuw st 164e1051a39Sopenharmony_ci 165e1051a39Sopenharmony_ci.section ".text",#alloc,#execinstr 166e1051a39Sopenharmony_ci.file "bn_asm.sparc.v8plus.S" 167e1051a39Sopenharmony_ci 168e1051a39Sopenharmony_ci.align 32 169e1051a39Sopenharmony_ci 170e1051a39Sopenharmony_ci.global bn_mul_add_words 171e1051a39Sopenharmony_ci/* 172e1051a39Sopenharmony_ci * BN_ULONG bn_mul_add_words(rp,ap,num,w) 173e1051a39Sopenharmony_ci * BN_ULONG *rp,*ap; 174e1051a39Sopenharmony_ci * int num; 175e1051a39Sopenharmony_ci * BN_ULONG w; 176e1051a39Sopenharmony_ci */ 177e1051a39Sopenharmony_cibn_mul_add_words: 178e1051a39Sopenharmony_ci sra %o2,%g0,%o2 ! signx %o2 179e1051a39Sopenharmony_ci brgz,a %o2,.L_bn_mul_add_words_proceed 180e1051a39Sopenharmony_ci lduw [%o1],%g2 181e1051a39Sopenharmony_ci retl 182e1051a39Sopenharmony_ci clr %o0 183e1051a39Sopenharmony_ci nop 184e1051a39Sopenharmony_ci nop 185e1051a39Sopenharmony_ci nop 186e1051a39Sopenharmony_ci 187e1051a39Sopenharmony_ci.L_bn_mul_add_words_proceed: 188e1051a39Sopenharmony_ci srl %o3,%g0,%o3 ! clruw %o3 189e1051a39Sopenharmony_ci andcc %o2,-4,%g0 190e1051a39Sopenharmony_ci bz,pn %icc,.L_bn_mul_add_words_tail 191e1051a39Sopenharmony_ci clr %o5 192e1051a39Sopenharmony_ci 193e1051a39Sopenharmony_ci.L_bn_mul_add_words_loop: ! wow! 32 aligned! 194e1051a39Sopenharmony_ci lduw [%o0],%g1 195e1051a39Sopenharmony_ci lduw [%o1+4],%g3 196e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 197e1051a39Sopenharmony_ci add %g1,%o5,%o4 198e1051a39Sopenharmony_ci nop 199e1051a39Sopenharmony_ci add %o4,%g2,%o4 200e1051a39Sopenharmony_ci stuw %o4,[%o0] 201e1051a39Sopenharmony_ci srlx %o4,32,%o5 202e1051a39Sopenharmony_ci 203e1051a39Sopenharmony_ci lduw [%o0+4],%g1 204e1051a39Sopenharmony_ci lduw [%o1+8],%g2 205e1051a39Sopenharmony_ci mulx %o3,%g3,%g3 206e1051a39Sopenharmony_ci add %g1,%o5,%o4 207e1051a39Sopenharmony_ci dec 4,%o2 208e1051a39Sopenharmony_ci add %o4,%g3,%o4 209e1051a39Sopenharmony_ci stuw %o4,[%o0+4] 210e1051a39Sopenharmony_ci srlx %o4,32,%o5 211e1051a39Sopenharmony_ci 212e1051a39Sopenharmony_ci lduw [%o0+8],%g1 213e1051a39Sopenharmony_ci lduw [%o1+12],%g3 214e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 215e1051a39Sopenharmony_ci add %g1,%o5,%o4 216e1051a39Sopenharmony_ci inc 16,%o1 217e1051a39Sopenharmony_ci add %o4,%g2,%o4 218e1051a39Sopenharmony_ci stuw %o4,[%o0+8] 219e1051a39Sopenharmony_ci srlx %o4,32,%o5 220e1051a39Sopenharmony_ci 221e1051a39Sopenharmony_ci lduw [%o0+12],%g1 222e1051a39Sopenharmony_ci mulx %o3,%g3,%g3 223e1051a39Sopenharmony_ci add %g1,%o5,%o4 224e1051a39Sopenharmony_ci inc 16,%o0 225e1051a39Sopenharmony_ci add %o4,%g3,%o4 226e1051a39Sopenharmony_ci andcc %o2,-4,%g0 227e1051a39Sopenharmony_ci stuw %o4,[%o0-4] 228e1051a39Sopenharmony_ci srlx %o4,32,%o5 229e1051a39Sopenharmony_ci bnz,a,pt %icc,.L_bn_mul_add_words_loop 230e1051a39Sopenharmony_ci lduw [%o1],%g2 231e1051a39Sopenharmony_ci 232e1051a39Sopenharmony_ci brnz,a,pn %o2,.L_bn_mul_add_words_tail 233e1051a39Sopenharmony_ci lduw [%o1],%g2 234e1051a39Sopenharmony_ci.L_bn_mul_add_words_return: 235e1051a39Sopenharmony_ci retl 236e1051a39Sopenharmony_ci mov %o5,%o0 237e1051a39Sopenharmony_ci 238e1051a39Sopenharmony_ci.L_bn_mul_add_words_tail: 239e1051a39Sopenharmony_ci lduw [%o0],%g1 240e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 241e1051a39Sopenharmony_ci add %g1,%o5,%o4 242e1051a39Sopenharmony_ci dec %o2 243e1051a39Sopenharmony_ci add %o4,%g2,%o4 244e1051a39Sopenharmony_ci srlx %o4,32,%o5 245e1051a39Sopenharmony_ci brz,pt %o2,.L_bn_mul_add_words_return 246e1051a39Sopenharmony_ci stuw %o4,[%o0] 247e1051a39Sopenharmony_ci 248e1051a39Sopenharmony_ci lduw [%o1+4],%g2 249e1051a39Sopenharmony_ci lduw [%o0+4],%g1 250e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 251e1051a39Sopenharmony_ci add %g1,%o5,%o4 252e1051a39Sopenharmony_ci dec %o2 253e1051a39Sopenharmony_ci add %o4,%g2,%o4 254e1051a39Sopenharmony_ci srlx %o4,32,%o5 255e1051a39Sopenharmony_ci brz,pt %o2,.L_bn_mul_add_words_return 256e1051a39Sopenharmony_ci stuw %o4,[%o0+4] 257e1051a39Sopenharmony_ci 258e1051a39Sopenharmony_ci lduw [%o1+8],%g2 259e1051a39Sopenharmony_ci lduw [%o0+8],%g1 260e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 261e1051a39Sopenharmony_ci add %g1,%o5,%o4 262e1051a39Sopenharmony_ci add %o4,%g2,%o4 263e1051a39Sopenharmony_ci stuw %o4,[%o0+8] 264e1051a39Sopenharmony_ci retl 265e1051a39Sopenharmony_ci srlx %o4,32,%o0 266e1051a39Sopenharmony_ci 267e1051a39Sopenharmony_ci.type bn_mul_add_words,#function 268e1051a39Sopenharmony_ci.size bn_mul_add_words,(.-bn_mul_add_words) 269e1051a39Sopenharmony_ci 270e1051a39Sopenharmony_ci.align 32 271e1051a39Sopenharmony_ci 272e1051a39Sopenharmony_ci.global bn_mul_words 273e1051a39Sopenharmony_ci/* 274e1051a39Sopenharmony_ci * BN_ULONG bn_mul_words(rp,ap,num,w) 275e1051a39Sopenharmony_ci * BN_ULONG *rp,*ap; 276e1051a39Sopenharmony_ci * int num; 277e1051a39Sopenharmony_ci * BN_ULONG w; 278e1051a39Sopenharmony_ci */ 279e1051a39Sopenharmony_cibn_mul_words: 280e1051a39Sopenharmony_ci sra %o2,%g0,%o2 ! signx %o2 281e1051a39Sopenharmony_ci brgz,a %o2,.L_bn_mul_words_proceed 282e1051a39Sopenharmony_ci lduw [%o1],%g2 283e1051a39Sopenharmony_ci retl 284e1051a39Sopenharmony_ci clr %o0 285e1051a39Sopenharmony_ci nop 286e1051a39Sopenharmony_ci nop 287e1051a39Sopenharmony_ci nop 288e1051a39Sopenharmony_ci 289e1051a39Sopenharmony_ci.L_bn_mul_words_proceed: 290e1051a39Sopenharmony_ci srl %o3,%g0,%o3 ! clruw %o3 291e1051a39Sopenharmony_ci andcc %o2,-4,%g0 292e1051a39Sopenharmony_ci bz,pn %icc,.L_bn_mul_words_tail 293e1051a39Sopenharmony_ci clr %o5 294e1051a39Sopenharmony_ci 295e1051a39Sopenharmony_ci.L_bn_mul_words_loop: ! wow! 32 aligned! 296e1051a39Sopenharmony_ci lduw [%o1+4],%g3 297e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 298e1051a39Sopenharmony_ci add %g2,%o5,%o4 299e1051a39Sopenharmony_ci nop 300e1051a39Sopenharmony_ci stuw %o4,[%o0] 301e1051a39Sopenharmony_ci srlx %o4,32,%o5 302e1051a39Sopenharmony_ci 303e1051a39Sopenharmony_ci lduw [%o1+8],%g2 304e1051a39Sopenharmony_ci mulx %o3,%g3,%g3 305e1051a39Sopenharmony_ci add %g3,%o5,%o4 306e1051a39Sopenharmony_ci dec 4,%o2 307e1051a39Sopenharmony_ci stuw %o4,[%o0+4] 308e1051a39Sopenharmony_ci srlx %o4,32,%o5 309e1051a39Sopenharmony_ci 310e1051a39Sopenharmony_ci lduw [%o1+12],%g3 311e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 312e1051a39Sopenharmony_ci add %g2,%o5,%o4 313e1051a39Sopenharmony_ci inc 16,%o1 314e1051a39Sopenharmony_ci stuw %o4,[%o0+8] 315e1051a39Sopenharmony_ci srlx %o4,32,%o5 316e1051a39Sopenharmony_ci 317e1051a39Sopenharmony_ci mulx %o3,%g3,%g3 318e1051a39Sopenharmony_ci add %g3,%o5,%o4 319e1051a39Sopenharmony_ci inc 16,%o0 320e1051a39Sopenharmony_ci stuw %o4,[%o0-4] 321e1051a39Sopenharmony_ci srlx %o4,32,%o5 322e1051a39Sopenharmony_ci andcc %o2,-4,%g0 323e1051a39Sopenharmony_ci bnz,a,pt %icc,.L_bn_mul_words_loop 324e1051a39Sopenharmony_ci lduw [%o1],%g2 325e1051a39Sopenharmony_ci nop 326e1051a39Sopenharmony_ci nop 327e1051a39Sopenharmony_ci 328e1051a39Sopenharmony_ci brnz,a,pn %o2,.L_bn_mul_words_tail 329e1051a39Sopenharmony_ci lduw [%o1],%g2 330e1051a39Sopenharmony_ci.L_bn_mul_words_return: 331e1051a39Sopenharmony_ci retl 332e1051a39Sopenharmony_ci mov %o5,%o0 333e1051a39Sopenharmony_ci 334e1051a39Sopenharmony_ci.L_bn_mul_words_tail: 335e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 336e1051a39Sopenharmony_ci add %g2,%o5,%o4 337e1051a39Sopenharmony_ci dec %o2 338e1051a39Sopenharmony_ci srlx %o4,32,%o5 339e1051a39Sopenharmony_ci brz,pt %o2,.L_bn_mul_words_return 340e1051a39Sopenharmony_ci stuw %o4,[%o0] 341e1051a39Sopenharmony_ci 342e1051a39Sopenharmony_ci lduw [%o1+4],%g2 343e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 344e1051a39Sopenharmony_ci add %g2,%o5,%o4 345e1051a39Sopenharmony_ci dec %o2 346e1051a39Sopenharmony_ci srlx %o4,32,%o5 347e1051a39Sopenharmony_ci brz,pt %o2,.L_bn_mul_words_return 348e1051a39Sopenharmony_ci stuw %o4,[%o0+4] 349e1051a39Sopenharmony_ci 350e1051a39Sopenharmony_ci lduw [%o1+8],%g2 351e1051a39Sopenharmony_ci mulx %o3,%g2,%g2 352e1051a39Sopenharmony_ci add %g2,%o5,%o4 353e1051a39Sopenharmony_ci stuw %o4,[%o0+8] 354e1051a39Sopenharmony_ci retl 355e1051a39Sopenharmony_ci srlx %o4,32,%o0 356e1051a39Sopenharmony_ci 357e1051a39Sopenharmony_ci.type bn_mul_words,#function 358e1051a39Sopenharmony_ci.size bn_mul_words,(.-bn_mul_words) 359e1051a39Sopenharmony_ci 360e1051a39Sopenharmony_ci.align 32 361e1051a39Sopenharmony_ci.global bn_sqr_words 362e1051a39Sopenharmony_ci/* 363e1051a39Sopenharmony_ci * void bn_sqr_words(r,a,n) 364e1051a39Sopenharmony_ci * BN_ULONG *r,*a; 365e1051a39Sopenharmony_ci * int n; 366e1051a39Sopenharmony_ci */ 367e1051a39Sopenharmony_cibn_sqr_words: 368e1051a39Sopenharmony_ci sra %o2,%g0,%o2 ! signx %o2 369e1051a39Sopenharmony_ci brgz,a %o2,.L_bn_sqr_words_proceed 370e1051a39Sopenharmony_ci lduw [%o1],%g2 371e1051a39Sopenharmony_ci retl 372e1051a39Sopenharmony_ci clr %o0 373e1051a39Sopenharmony_ci nop 374e1051a39Sopenharmony_ci nop 375e1051a39Sopenharmony_ci nop 376e1051a39Sopenharmony_ci 377e1051a39Sopenharmony_ci.L_bn_sqr_words_proceed: 378e1051a39Sopenharmony_ci andcc %o2,-4,%g0 379e1051a39Sopenharmony_ci nop 380e1051a39Sopenharmony_ci bz,pn %icc,.L_bn_sqr_words_tail 381e1051a39Sopenharmony_ci nop 382e1051a39Sopenharmony_ci 383e1051a39Sopenharmony_ci.L_bn_sqr_words_loop: ! wow! 32 aligned! 384e1051a39Sopenharmony_ci lduw [%o1+4],%g3 385e1051a39Sopenharmony_ci mulx %g2,%g2,%o4 386e1051a39Sopenharmony_ci stuw %o4,[%o0] 387e1051a39Sopenharmony_ci srlx %o4,32,%o5 388e1051a39Sopenharmony_ci stuw %o5,[%o0+4] 389e1051a39Sopenharmony_ci nop 390e1051a39Sopenharmony_ci 391e1051a39Sopenharmony_ci lduw [%o1+8],%g2 392e1051a39Sopenharmony_ci mulx %g3,%g3,%o4 393e1051a39Sopenharmony_ci dec 4,%o2 394e1051a39Sopenharmony_ci stuw %o4,[%o0+8] 395e1051a39Sopenharmony_ci srlx %o4,32,%o5 396e1051a39Sopenharmony_ci stuw %o5,[%o0+12] 397e1051a39Sopenharmony_ci 398e1051a39Sopenharmony_ci lduw [%o1+12],%g3 399e1051a39Sopenharmony_ci mulx %g2,%g2,%o4 400e1051a39Sopenharmony_ci srlx %o4,32,%o5 401e1051a39Sopenharmony_ci stuw %o4,[%o0+16] 402e1051a39Sopenharmony_ci inc 16,%o1 403e1051a39Sopenharmony_ci stuw %o5,[%o0+20] 404e1051a39Sopenharmony_ci 405e1051a39Sopenharmony_ci mulx %g3,%g3,%o4 406e1051a39Sopenharmony_ci inc 32,%o0 407e1051a39Sopenharmony_ci stuw %o4,[%o0-8] 408e1051a39Sopenharmony_ci srlx %o4,32,%o5 409e1051a39Sopenharmony_ci andcc %o2,-4,%g2 410e1051a39Sopenharmony_ci stuw %o5,[%o0-4] 411e1051a39Sopenharmony_ci bnz,a,pt %icc,.L_bn_sqr_words_loop 412e1051a39Sopenharmony_ci lduw [%o1],%g2 413e1051a39Sopenharmony_ci nop 414e1051a39Sopenharmony_ci 415e1051a39Sopenharmony_ci brnz,a,pn %o2,.L_bn_sqr_words_tail 416e1051a39Sopenharmony_ci lduw [%o1],%g2 417e1051a39Sopenharmony_ci.L_bn_sqr_words_return: 418e1051a39Sopenharmony_ci retl 419e1051a39Sopenharmony_ci clr %o0 420e1051a39Sopenharmony_ci 421e1051a39Sopenharmony_ci.L_bn_sqr_words_tail: 422e1051a39Sopenharmony_ci mulx %g2,%g2,%o4 423e1051a39Sopenharmony_ci dec %o2 424e1051a39Sopenharmony_ci stuw %o4,[%o0] 425e1051a39Sopenharmony_ci srlx %o4,32,%o5 426e1051a39Sopenharmony_ci brz,pt %o2,.L_bn_sqr_words_return 427e1051a39Sopenharmony_ci stuw %o5,[%o0+4] 428e1051a39Sopenharmony_ci 429e1051a39Sopenharmony_ci lduw [%o1+4],%g2 430e1051a39Sopenharmony_ci mulx %g2,%g2,%o4 431e1051a39Sopenharmony_ci dec %o2 432e1051a39Sopenharmony_ci stuw %o4,[%o0+8] 433e1051a39Sopenharmony_ci srlx %o4,32,%o5 434e1051a39Sopenharmony_ci brz,pt %o2,.L_bn_sqr_words_return 435e1051a39Sopenharmony_ci stuw %o5,[%o0+12] 436e1051a39Sopenharmony_ci 437e1051a39Sopenharmony_ci lduw [%o1+8],%g2 438e1051a39Sopenharmony_ci mulx %g2,%g2,%o4 439e1051a39Sopenharmony_ci srlx %o4,32,%o5 440e1051a39Sopenharmony_ci stuw %o4,[%o0+16] 441e1051a39Sopenharmony_ci stuw %o5,[%o0+20] 442e1051a39Sopenharmony_ci retl 443e1051a39Sopenharmony_ci clr %o0 444e1051a39Sopenharmony_ci 445e1051a39Sopenharmony_ci.type bn_sqr_words,#function 446e1051a39Sopenharmony_ci.size bn_sqr_words,(.-bn_sqr_words) 447e1051a39Sopenharmony_ci 448e1051a39Sopenharmony_ci.align 32 449e1051a39Sopenharmony_ci.global bn_div_words 450e1051a39Sopenharmony_ci/* 451e1051a39Sopenharmony_ci * BN_ULONG bn_div_words(h,l,d) 452e1051a39Sopenharmony_ci * BN_ULONG h,l,d; 453e1051a39Sopenharmony_ci */ 454e1051a39Sopenharmony_cibn_div_words: 455e1051a39Sopenharmony_ci sllx %o0,32,%o0 456e1051a39Sopenharmony_ci or %o0,%o1,%o0 457e1051a39Sopenharmony_ci udivx %o0,%o2,%o0 458e1051a39Sopenharmony_ci retl 459e1051a39Sopenharmony_ci srl %o0,%g0,%o0 ! clruw %o0 460e1051a39Sopenharmony_ci 461e1051a39Sopenharmony_ci.type bn_div_words,#function 462e1051a39Sopenharmony_ci.size bn_div_words,(.-bn_div_words) 463e1051a39Sopenharmony_ci 464e1051a39Sopenharmony_ci.align 32 465e1051a39Sopenharmony_ci 466e1051a39Sopenharmony_ci.global bn_add_words 467e1051a39Sopenharmony_ci/* 468e1051a39Sopenharmony_ci * BN_ULONG bn_add_words(rp,ap,bp,n) 469e1051a39Sopenharmony_ci * BN_ULONG *rp,*ap,*bp; 470e1051a39Sopenharmony_ci * int n; 471e1051a39Sopenharmony_ci */ 472e1051a39Sopenharmony_cibn_add_words: 473e1051a39Sopenharmony_ci sra %o3,%g0,%o3 ! signx %o3 474e1051a39Sopenharmony_ci brgz,a %o3,.L_bn_add_words_proceed 475e1051a39Sopenharmony_ci lduw [%o1],%o4 476e1051a39Sopenharmony_ci retl 477e1051a39Sopenharmony_ci clr %o0 478e1051a39Sopenharmony_ci 479e1051a39Sopenharmony_ci.L_bn_add_words_proceed: 480e1051a39Sopenharmony_ci andcc %o3,-4,%g0 481e1051a39Sopenharmony_ci bz,pn %icc,.L_bn_add_words_tail 482e1051a39Sopenharmony_ci addcc %g0,0,%g0 ! clear carry flag 483e1051a39Sopenharmony_ci 484e1051a39Sopenharmony_ci.L_bn_add_words_loop: ! wow! 32 aligned! 485e1051a39Sopenharmony_ci dec 4,%o3 486e1051a39Sopenharmony_ci lduw [%o2],%o5 487e1051a39Sopenharmony_ci lduw [%o1+4],%g1 488e1051a39Sopenharmony_ci lduw [%o2+4],%g2 489e1051a39Sopenharmony_ci lduw [%o1+8],%g3 490e1051a39Sopenharmony_ci lduw [%o2+8],%g4 491e1051a39Sopenharmony_ci addccc %o5,%o4,%o5 492e1051a39Sopenharmony_ci stuw %o5,[%o0] 493e1051a39Sopenharmony_ci 494e1051a39Sopenharmony_ci lduw [%o1+12],%o4 495e1051a39Sopenharmony_ci lduw [%o2+12],%o5 496e1051a39Sopenharmony_ci inc 16,%o1 497e1051a39Sopenharmony_ci addccc %g1,%g2,%g1 498e1051a39Sopenharmony_ci stuw %g1,[%o0+4] 499e1051a39Sopenharmony_ci 500e1051a39Sopenharmony_ci inc 16,%o2 501e1051a39Sopenharmony_ci addccc %g3,%g4,%g3 502e1051a39Sopenharmony_ci stuw %g3,[%o0+8] 503e1051a39Sopenharmony_ci 504e1051a39Sopenharmony_ci inc 16,%o0 505e1051a39Sopenharmony_ci addccc %o5,%o4,%o5 506e1051a39Sopenharmony_ci stuw %o5,[%o0-4] 507e1051a39Sopenharmony_ci and %o3,-4,%g1 508e1051a39Sopenharmony_ci brnz,a,pt %g1,.L_bn_add_words_loop 509e1051a39Sopenharmony_ci lduw [%o1],%o4 510e1051a39Sopenharmony_ci 511e1051a39Sopenharmony_ci brnz,a,pn %o3,.L_bn_add_words_tail 512e1051a39Sopenharmony_ci lduw [%o1],%o4 513e1051a39Sopenharmony_ci.L_bn_add_words_return: 514e1051a39Sopenharmony_ci clr %o0 515e1051a39Sopenharmony_ci retl 516e1051a39Sopenharmony_ci movcs %icc,1,%o0 517e1051a39Sopenharmony_ci nop 518e1051a39Sopenharmony_ci 519e1051a39Sopenharmony_ci.L_bn_add_words_tail: 520e1051a39Sopenharmony_ci lduw [%o2],%o5 521e1051a39Sopenharmony_ci dec %o3 522e1051a39Sopenharmony_ci addccc %o5,%o4,%o5 523e1051a39Sopenharmony_ci brz,pt %o3,.L_bn_add_words_return 524e1051a39Sopenharmony_ci stuw %o5,[%o0] 525e1051a39Sopenharmony_ci 526e1051a39Sopenharmony_ci lduw [%o1+4],%o4 527e1051a39Sopenharmony_ci lduw [%o2+4],%o5 528e1051a39Sopenharmony_ci dec %o3 529e1051a39Sopenharmony_ci addccc %o5,%o4,%o5 530e1051a39Sopenharmony_ci brz,pt %o3,.L_bn_add_words_return 531e1051a39Sopenharmony_ci stuw %o5,[%o0+4] 532e1051a39Sopenharmony_ci 533e1051a39Sopenharmony_ci lduw [%o1+8],%o4 534e1051a39Sopenharmony_ci lduw [%o2+8],%o5 535e1051a39Sopenharmony_ci addccc %o5,%o4,%o5 536e1051a39Sopenharmony_ci stuw %o5,[%o0+8] 537e1051a39Sopenharmony_ci clr %o0 538e1051a39Sopenharmony_ci retl 539e1051a39Sopenharmony_ci movcs %icc,1,%o0 540e1051a39Sopenharmony_ci 541e1051a39Sopenharmony_ci.type bn_add_words,#function 542e1051a39Sopenharmony_ci.size bn_add_words,(.-bn_add_words) 543e1051a39Sopenharmony_ci 544e1051a39Sopenharmony_ci.global bn_sub_words 545e1051a39Sopenharmony_ci/* 546e1051a39Sopenharmony_ci * BN_ULONG bn_sub_words(rp,ap,bp,n) 547e1051a39Sopenharmony_ci * BN_ULONG *rp,*ap,*bp; 548e1051a39Sopenharmony_ci * int n; 549e1051a39Sopenharmony_ci */ 550e1051a39Sopenharmony_cibn_sub_words: 551e1051a39Sopenharmony_ci sra %o3,%g0,%o3 ! signx %o3 552e1051a39Sopenharmony_ci brgz,a %o3,.L_bn_sub_words_proceed 553e1051a39Sopenharmony_ci lduw [%o1],%o4 554e1051a39Sopenharmony_ci retl 555e1051a39Sopenharmony_ci clr %o0 556e1051a39Sopenharmony_ci 557e1051a39Sopenharmony_ci.L_bn_sub_words_proceed: 558e1051a39Sopenharmony_ci andcc %o3,-4,%g0 559e1051a39Sopenharmony_ci bz,pn %icc,.L_bn_sub_words_tail 560e1051a39Sopenharmony_ci addcc %g0,0,%g0 ! clear carry flag 561e1051a39Sopenharmony_ci 562e1051a39Sopenharmony_ci.L_bn_sub_words_loop: ! wow! 32 aligned! 563e1051a39Sopenharmony_ci dec 4,%o3 564e1051a39Sopenharmony_ci lduw [%o2],%o5 565e1051a39Sopenharmony_ci lduw [%o1+4],%g1 566e1051a39Sopenharmony_ci lduw [%o2+4],%g2 567e1051a39Sopenharmony_ci lduw [%o1+8],%g3 568e1051a39Sopenharmony_ci lduw [%o2+8],%g4 569e1051a39Sopenharmony_ci subccc %o4,%o5,%o5 570e1051a39Sopenharmony_ci stuw %o5,[%o0] 571e1051a39Sopenharmony_ci 572e1051a39Sopenharmony_ci lduw [%o1+12],%o4 573e1051a39Sopenharmony_ci lduw [%o2+12],%o5 574e1051a39Sopenharmony_ci inc 16,%o1 575e1051a39Sopenharmony_ci subccc %g1,%g2,%g2 576e1051a39Sopenharmony_ci stuw %g2,[%o0+4] 577e1051a39Sopenharmony_ci 578e1051a39Sopenharmony_ci inc 16,%o2 579e1051a39Sopenharmony_ci subccc %g3,%g4,%g4 580e1051a39Sopenharmony_ci stuw %g4,[%o0+8] 581e1051a39Sopenharmony_ci 582e1051a39Sopenharmony_ci inc 16,%o0 583e1051a39Sopenharmony_ci subccc %o4,%o5,%o5 584e1051a39Sopenharmony_ci stuw %o5,[%o0-4] 585e1051a39Sopenharmony_ci and %o3,-4,%g1 586e1051a39Sopenharmony_ci brnz,a,pt %g1,.L_bn_sub_words_loop 587e1051a39Sopenharmony_ci lduw [%o1],%o4 588e1051a39Sopenharmony_ci 589e1051a39Sopenharmony_ci brnz,a,pn %o3,.L_bn_sub_words_tail 590e1051a39Sopenharmony_ci lduw [%o1],%o4 591e1051a39Sopenharmony_ci.L_bn_sub_words_return: 592e1051a39Sopenharmony_ci clr %o0 593e1051a39Sopenharmony_ci retl 594e1051a39Sopenharmony_ci movcs %icc,1,%o0 595e1051a39Sopenharmony_ci nop 596e1051a39Sopenharmony_ci 597e1051a39Sopenharmony_ci.L_bn_sub_words_tail: ! wow! 32 aligned! 598e1051a39Sopenharmony_ci lduw [%o2],%o5 599e1051a39Sopenharmony_ci dec %o3 600e1051a39Sopenharmony_ci subccc %o4,%o5,%o5 601e1051a39Sopenharmony_ci brz,pt %o3,.L_bn_sub_words_return 602e1051a39Sopenharmony_ci stuw %o5,[%o0] 603e1051a39Sopenharmony_ci 604e1051a39Sopenharmony_ci lduw [%o1+4],%o4 605e1051a39Sopenharmony_ci lduw [%o2+4],%o5 606e1051a39Sopenharmony_ci dec %o3 607e1051a39Sopenharmony_ci subccc %o4,%o5,%o5 608e1051a39Sopenharmony_ci brz,pt %o3,.L_bn_sub_words_return 609e1051a39Sopenharmony_ci stuw %o5,[%o0+4] 610e1051a39Sopenharmony_ci 611e1051a39Sopenharmony_ci lduw [%o1+8],%o4 612e1051a39Sopenharmony_ci lduw [%o2+8],%o5 613e1051a39Sopenharmony_ci subccc %o4,%o5,%o5 614e1051a39Sopenharmony_ci stuw %o5,[%o0+8] 615e1051a39Sopenharmony_ci clr %o0 616e1051a39Sopenharmony_ci retl 617e1051a39Sopenharmony_ci movcs %icc,1,%o0 618e1051a39Sopenharmony_ci 619e1051a39Sopenharmony_ci.type bn_sub_words,#function 620e1051a39Sopenharmony_ci.size bn_sub_words,(.-bn_sub_words) 621e1051a39Sopenharmony_ci 622e1051a39Sopenharmony_ci/* 623e1051a39Sopenharmony_ci * Code below depends on the fact that upper parts of the %l0-%l7 624e1051a39Sopenharmony_ci * and %i0-%i7 are zeroed by kernel after context switch. In 625e1051a39Sopenharmony_ci * previous versions this comment stated that "the trouble is that 626e1051a39Sopenharmony_ci * it's not feasible to implement the mumbo-jumbo in less V9 627e1051a39Sopenharmony_ci * instructions:-(" which apparently isn't true thanks to 628e1051a39Sopenharmony_ci * 'bcs,a %xcc,.+8; inc %rd' pair. But the performance improvement 629e1051a39Sopenharmony_ci * results not from the shorter code, but from elimination of 630e1051a39Sopenharmony_ci * multicycle none-pairable 'rd %y,%rd' instructions. 631e1051a39Sopenharmony_ci * 632e1051a39Sopenharmony_ci * Andy. 633e1051a39Sopenharmony_ci */ 634e1051a39Sopenharmony_ci 635e1051a39Sopenharmony_ci/* 636e1051a39Sopenharmony_ci * Here is register usage map for *all* routines below. 637e1051a39Sopenharmony_ci */ 638e1051a39Sopenharmony_ci#define t_1 %o0 639e1051a39Sopenharmony_ci#define t_2 %o1 640e1051a39Sopenharmony_ci#define c_12 %o2 641e1051a39Sopenharmony_ci#define c_3 %o3 642e1051a39Sopenharmony_ci 643e1051a39Sopenharmony_ci#define ap(I) [%i1+4*I] 644e1051a39Sopenharmony_ci#define bp(I) [%i2+4*I] 645e1051a39Sopenharmony_ci#define rp(I) [%i0+4*I] 646e1051a39Sopenharmony_ci 647e1051a39Sopenharmony_ci#define a_0 %l0 648e1051a39Sopenharmony_ci#define a_1 %l1 649e1051a39Sopenharmony_ci#define a_2 %l2 650e1051a39Sopenharmony_ci#define a_3 %l3 651e1051a39Sopenharmony_ci#define a_4 %l4 652e1051a39Sopenharmony_ci#define a_5 %l5 653e1051a39Sopenharmony_ci#define a_6 %l6 654e1051a39Sopenharmony_ci#define a_7 %l7 655e1051a39Sopenharmony_ci 656e1051a39Sopenharmony_ci#define b_0 %i3 657e1051a39Sopenharmony_ci#define b_1 %i4 658e1051a39Sopenharmony_ci#define b_2 %i5 659e1051a39Sopenharmony_ci#define b_3 %o4 660e1051a39Sopenharmony_ci#define b_4 %o5 661e1051a39Sopenharmony_ci#define b_5 %o7 662e1051a39Sopenharmony_ci#define b_6 %g1 663e1051a39Sopenharmony_ci#define b_7 %g4 664e1051a39Sopenharmony_ci 665e1051a39Sopenharmony_ci.align 32 666e1051a39Sopenharmony_ci.global bn_mul_comba8 667e1051a39Sopenharmony_ci/* 668e1051a39Sopenharmony_ci * void bn_mul_comba8(r,a,b) 669e1051a39Sopenharmony_ci * BN_ULONG *r,*a,*b; 670e1051a39Sopenharmony_ci */ 671e1051a39Sopenharmony_cibn_mul_comba8: 672e1051a39Sopenharmony_ci save %sp,FRAME_SIZE,%sp 673e1051a39Sopenharmony_ci mov 1,t_2 674e1051a39Sopenharmony_ci lduw ap(0),a_0 675e1051a39Sopenharmony_ci sllx t_2,32,t_2 676e1051a39Sopenharmony_ci lduw bp(0),b_0 != 677e1051a39Sopenharmony_ci lduw bp(1),b_1 678e1051a39Sopenharmony_ci mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3); 679e1051a39Sopenharmony_ci srlx t_1,32,c_12 680e1051a39Sopenharmony_ci stuw t_1,rp(0) !=!r[0]=c1; 681e1051a39Sopenharmony_ci 682e1051a39Sopenharmony_ci lduw ap(1),a_1 683e1051a39Sopenharmony_ci mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1); 684e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 685e1051a39Sopenharmony_ci clr c_3 != 686e1051a39Sopenharmony_ci bcs,a %xcc,.+8 687e1051a39Sopenharmony_ci add c_3,t_2,c_3 688e1051a39Sopenharmony_ci lduw ap(2),a_2 689e1051a39Sopenharmony_ci mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1); 690e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 691e1051a39Sopenharmony_ci bcs,a %xcc,.+8 692e1051a39Sopenharmony_ci add c_3,t_2,c_3 693e1051a39Sopenharmony_ci srlx t_1,32,c_12 != 694e1051a39Sopenharmony_ci stuw t_1,rp(1) !r[1]=c2; 695e1051a39Sopenharmony_ci or c_12,c_3,c_12 696e1051a39Sopenharmony_ci 697e1051a39Sopenharmony_ci mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); 698e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 != 699e1051a39Sopenharmony_ci clr c_3 700e1051a39Sopenharmony_ci bcs,a %xcc,.+8 701e1051a39Sopenharmony_ci add c_3,t_2,c_3 702e1051a39Sopenharmony_ci lduw bp(2),b_2 != 703e1051a39Sopenharmony_ci mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2); 704e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 705e1051a39Sopenharmony_ci bcs,a %xcc,.+8 706e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 707e1051a39Sopenharmony_ci lduw bp(3),b_3 708e1051a39Sopenharmony_ci mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); 709e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 710e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 711e1051a39Sopenharmony_ci add c_3,t_2,c_3 712e1051a39Sopenharmony_ci srlx t_1,32,c_12 713e1051a39Sopenharmony_ci stuw t_1,rp(2) !r[2]=c3; 714e1051a39Sopenharmony_ci or c_12,c_3,c_12 != 715e1051a39Sopenharmony_ci 716e1051a39Sopenharmony_ci mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3); 717e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 718e1051a39Sopenharmony_ci clr c_3 719e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 720e1051a39Sopenharmony_ci add c_3,t_2,c_3 721e1051a39Sopenharmony_ci mulx a_1,b_2,t_1 !=!mul_add_c(a[1],b[2],c1,c2,c3); 722e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 723e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 724e1051a39Sopenharmony_ci add c_3,t_2,c_3 725e1051a39Sopenharmony_ci lduw ap(3),a_3 726e1051a39Sopenharmony_ci mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); 727e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 != 728e1051a39Sopenharmony_ci bcs,a %xcc,.+8 729e1051a39Sopenharmony_ci add c_3,t_2,c_3 730e1051a39Sopenharmony_ci lduw ap(4),a_4 731e1051a39Sopenharmony_ci mulx a_3,b_0,t_1 !=!mul_add_c(a[3],b[0],c1,c2,c3);!= 732e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 733e1051a39Sopenharmony_ci bcs,a %xcc,.+8 734e1051a39Sopenharmony_ci add c_3,t_2,c_3 735e1051a39Sopenharmony_ci srlx t_1,32,c_12 != 736e1051a39Sopenharmony_ci stuw t_1,rp(3) !r[3]=c1; 737e1051a39Sopenharmony_ci or c_12,c_3,c_12 738e1051a39Sopenharmony_ci 739e1051a39Sopenharmony_ci mulx a_4,b_0,t_1 !mul_add_c(a[4],b[0],c2,c3,c1); 740e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 != 741e1051a39Sopenharmony_ci clr c_3 742e1051a39Sopenharmony_ci bcs,a %xcc,.+8 743e1051a39Sopenharmony_ci add c_3,t_2,c_3 744e1051a39Sopenharmony_ci mulx a_3,b_1,t_1 !=!mul_add_c(a[3],b[1],c2,c3,c1); 745e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 746e1051a39Sopenharmony_ci bcs,a %xcc,.+8 747e1051a39Sopenharmony_ci add c_3,t_2,c_3 748e1051a39Sopenharmony_ci mulx a_2,b_2,t_1 !=!mul_add_c(a[2],b[2],c2,c3,c1); 749e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 750e1051a39Sopenharmony_ci bcs,a %xcc,.+8 751e1051a39Sopenharmony_ci add c_3,t_2,c_3 752e1051a39Sopenharmony_ci lduw bp(4),b_4 != 753e1051a39Sopenharmony_ci mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1); 754e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 755e1051a39Sopenharmony_ci bcs,a %xcc,.+8 756e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 757e1051a39Sopenharmony_ci lduw bp(5),b_5 758e1051a39Sopenharmony_ci mulx a_0,b_4,t_1 !mul_add_c(a[0],b[4],c2,c3,c1); 759e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 760e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 761e1051a39Sopenharmony_ci add c_3,t_2,c_3 762e1051a39Sopenharmony_ci srlx t_1,32,c_12 763e1051a39Sopenharmony_ci stuw t_1,rp(4) !r[4]=c2; 764e1051a39Sopenharmony_ci or c_12,c_3,c_12 != 765e1051a39Sopenharmony_ci 766e1051a39Sopenharmony_ci mulx a_0,b_5,t_1 !mul_add_c(a[0],b[5],c3,c1,c2); 767e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 768e1051a39Sopenharmony_ci clr c_3 769e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 770e1051a39Sopenharmony_ci add c_3,t_2,c_3 771e1051a39Sopenharmony_ci mulx a_1,b_4,t_1 !mul_add_c(a[1],b[4],c3,c1,c2); 772e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 773e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 774e1051a39Sopenharmony_ci add c_3,t_2,c_3 775e1051a39Sopenharmony_ci mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2); 776e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 777e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 778e1051a39Sopenharmony_ci add c_3,t_2,c_3 779e1051a39Sopenharmony_ci mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); 780e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 781e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 782e1051a39Sopenharmony_ci add c_3,t_2,c_3 783e1051a39Sopenharmony_ci lduw ap(5),a_5 784e1051a39Sopenharmony_ci mulx a_4,b_1,t_1 !mul_add_c(a[4],b[1],c3,c1,c2); 785e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 != 786e1051a39Sopenharmony_ci bcs,a %xcc,.+8 787e1051a39Sopenharmony_ci add c_3,t_2,c_3 788e1051a39Sopenharmony_ci lduw ap(6),a_6 789e1051a39Sopenharmony_ci mulx a_5,b_0,t_1 !=!mul_add_c(a[5],b[0],c3,c1,c2); 790e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 791e1051a39Sopenharmony_ci bcs,a %xcc,.+8 792e1051a39Sopenharmony_ci add c_3,t_2,c_3 793e1051a39Sopenharmony_ci srlx t_1,32,c_12 != 794e1051a39Sopenharmony_ci stuw t_1,rp(5) !r[5]=c3; 795e1051a39Sopenharmony_ci or c_12,c_3,c_12 796e1051a39Sopenharmony_ci 797e1051a39Sopenharmony_ci mulx a_6,b_0,t_1 !mul_add_c(a[6],b[0],c1,c2,c3); 798e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 != 799e1051a39Sopenharmony_ci clr c_3 800e1051a39Sopenharmony_ci bcs,a %xcc,.+8 801e1051a39Sopenharmony_ci add c_3,t_2,c_3 802e1051a39Sopenharmony_ci mulx a_5,b_1,t_1 !=!mul_add_c(a[5],b[1],c1,c2,c3); 803e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 804e1051a39Sopenharmony_ci bcs,a %xcc,.+8 805e1051a39Sopenharmony_ci add c_3,t_2,c_3 806e1051a39Sopenharmony_ci mulx a_4,b_2,t_1 !=!mul_add_c(a[4],b[2],c1,c2,c3); 807e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 808e1051a39Sopenharmony_ci bcs,a %xcc,.+8 809e1051a39Sopenharmony_ci add c_3,t_2,c_3 810e1051a39Sopenharmony_ci mulx a_3,b_3,t_1 !=!mul_add_c(a[3],b[3],c1,c2,c3); 811e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 812e1051a39Sopenharmony_ci bcs,a %xcc,.+8 813e1051a39Sopenharmony_ci add c_3,t_2,c_3 814e1051a39Sopenharmony_ci mulx a_2,b_4,t_1 !=!mul_add_c(a[2],b[4],c1,c2,c3); 815e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 816e1051a39Sopenharmony_ci bcs,a %xcc,.+8 817e1051a39Sopenharmony_ci add c_3,t_2,c_3 818e1051a39Sopenharmony_ci lduw bp(6),b_6 != 819e1051a39Sopenharmony_ci mulx a_1,b_5,t_1 !mul_add_c(a[1],b[5],c1,c2,c3); 820e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 821e1051a39Sopenharmony_ci bcs,a %xcc,.+8 822e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 823e1051a39Sopenharmony_ci lduw bp(7),b_7 824e1051a39Sopenharmony_ci mulx a_0,b_6,t_1 !mul_add_c(a[0],b[6],c1,c2,c3); 825e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 826e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 827e1051a39Sopenharmony_ci add c_3,t_2,c_3 828e1051a39Sopenharmony_ci srlx t_1,32,c_12 829e1051a39Sopenharmony_ci stuw t_1,rp(6) !r[6]=c1; 830e1051a39Sopenharmony_ci or c_12,c_3,c_12 != 831e1051a39Sopenharmony_ci 832e1051a39Sopenharmony_ci mulx a_0,b_7,t_1 !mul_add_c(a[0],b[7],c2,c3,c1); 833e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 834e1051a39Sopenharmony_ci clr c_3 835e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 836e1051a39Sopenharmony_ci add c_3,t_2,c_3 837e1051a39Sopenharmony_ci mulx a_1,b_6,t_1 !mul_add_c(a[1],b[6],c2,c3,c1); 838e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 839e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 840e1051a39Sopenharmony_ci add c_3,t_2,c_3 841e1051a39Sopenharmony_ci mulx a_2,b_5,t_1 !mul_add_c(a[2],b[5],c2,c3,c1); 842e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 843e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 844e1051a39Sopenharmony_ci add c_3,t_2,c_3 845e1051a39Sopenharmony_ci mulx a_3,b_4,t_1 !mul_add_c(a[3],b[4],c2,c3,c1); 846e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 847e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 848e1051a39Sopenharmony_ci add c_3,t_2,c_3 849e1051a39Sopenharmony_ci mulx a_4,b_3,t_1 !mul_add_c(a[4],b[3],c2,c3,c1); 850e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 851e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 852e1051a39Sopenharmony_ci add c_3,t_2,c_3 853e1051a39Sopenharmony_ci mulx a_5,b_2,t_1 !mul_add_c(a[5],b[2],c2,c3,c1); 854e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 855e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 856e1051a39Sopenharmony_ci add c_3,t_2,c_3 857e1051a39Sopenharmony_ci lduw ap(7),a_7 858e1051a39Sopenharmony_ci mulx a_6,b_1,t_1 !=!mul_add_c(a[6],b[1],c2,c3,c1); 859e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 860e1051a39Sopenharmony_ci bcs,a %xcc,.+8 861e1051a39Sopenharmony_ci add c_3,t_2,c_3 862e1051a39Sopenharmony_ci mulx a_7,b_0,t_1 !=!mul_add_c(a[7],b[0],c2,c3,c1); 863e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 864e1051a39Sopenharmony_ci bcs,a %xcc,.+8 865e1051a39Sopenharmony_ci add c_3,t_2,c_3 866e1051a39Sopenharmony_ci srlx t_1,32,c_12 != 867e1051a39Sopenharmony_ci stuw t_1,rp(7) !r[7]=c2; 868e1051a39Sopenharmony_ci or c_12,c_3,c_12 869e1051a39Sopenharmony_ci 870e1051a39Sopenharmony_ci mulx a_7,b_1,t_1 !=!mul_add_c(a[7],b[1],c3,c1,c2); 871e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 872e1051a39Sopenharmony_ci clr c_3 873e1051a39Sopenharmony_ci bcs,a %xcc,.+8 874e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 875e1051a39Sopenharmony_ci mulx a_6,b_2,t_1 !mul_add_c(a[6],b[2],c3,c1,c2); 876e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 877e1051a39Sopenharmony_ci bcs,a %xcc,.+8 878e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 879e1051a39Sopenharmony_ci mulx a_5,b_3,t_1 !mul_add_c(a[5],b[3],c3,c1,c2); 880e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 881e1051a39Sopenharmony_ci bcs,a %xcc,.+8 882e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 883e1051a39Sopenharmony_ci mulx a_4,b_4,t_1 !mul_add_c(a[4],b[4],c3,c1,c2); 884e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 885e1051a39Sopenharmony_ci bcs,a %xcc,.+8 886e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 887e1051a39Sopenharmony_ci mulx a_3,b_5,t_1 !mul_add_c(a[3],b[5],c3,c1,c2); 888e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 889e1051a39Sopenharmony_ci bcs,a %xcc,.+8 890e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 891e1051a39Sopenharmony_ci mulx a_2,b_6,t_1 !mul_add_c(a[2],b[6],c3,c1,c2); 892e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 893e1051a39Sopenharmony_ci bcs,a %xcc,.+8 894e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 895e1051a39Sopenharmony_ci mulx a_1,b_7,t_1 !mul_add_c(a[1],b[7],c3,c1,c2); 896e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 897e1051a39Sopenharmony_ci bcs,a %xcc,.+8 898e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 899e1051a39Sopenharmony_ci srlx t_1,32,c_12 900e1051a39Sopenharmony_ci stuw t_1,rp(8) !r[8]=c3; 901e1051a39Sopenharmony_ci or c_12,c_3,c_12 902e1051a39Sopenharmony_ci 903e1051a39Sopenharmony_ci mulx a_2,b_7,t_1 !=!mul_add_c(a[2],b[7],c1,c2,c3); 904e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 905e1051a39Sopenharmony_ci clr c_3 906e1051a39Sopenharmony_ci bcs,a %xcc,.+8 907e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 908e1051a39Sopenharmony_ci mulx a_3,b_6,t_1 !mul_add_c(a[3],b[6],c1,c2,c3); 909e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 910e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 911e1051a39Sopenharmony_ci add c_3,t_2,c_3 912e1051a39Sopenharmony_ci mulx a_4,b_5,t_1 !mul_add_c(a[4],b[5],c1,c2,c3); 913e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 914e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 915e1051a39Sopenharmony_ci add c_3,t_2,c_3 916e1051a39Sopenharmony_ci mulx a_5,b_4,t_1 !mul_add_c(a[5],b[4],c1,c2,c3); 917e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 918e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 919e1051a39Sopenharmony_ci add c_3,t_2,c_3 920e1051a39Sopenharmony_ci mulx a_6,b_3,t_1 !mul_add_c(a[6],b[3],c1,c2,c3); 921e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 922e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 923e1051a39Sopenharmony_ci add c_3,t_2,c_3 924e1051a39Sopenharmony_ci mulx a_7,b_2,t_1 !mul_add_c(a[7],b[2],c1,c2,c3); 925e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 926e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 927e1051a39Sopenharmony_ci add c_3,t_2,c_3 928e1051a39Sopenharmony_ci srlx t_1,32,c_12 929e1051a39Sopenharmony_ci stuw t_1,rp(9) !r[9]=c1; 930e1051a39Sopenharmony_ci or c_12,c_3,c_12 != 931e1051a39Sopenharmony_ci 932e1051a39Sopenharmony_ci mulx a_7,b_3,t_1 !mul_add_c(a[7],b[3],c2,c3,c1); 933e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 934e1051a39Sopenharmony_ci clr c_3 935e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 936e1051a39Sopenharmony_ci add c_3,t_2,c_3 937e1051a39Sopenharmony_ci mulx a_6,b_4,t_1 !mul_add_c(a[6],b[4],c2,c3,c1); 938e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 939e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 940e1051a39Sopenharmony_ci add c_3,t_2,c_3 941e1051a39Sopenharmony_ci mulx a_5,b_5,t_1 !mul_add_c(a[5],b[5],c2,c3,c1); 942e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 943e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 944e1051a39Sopenharmony_ci add c_3,t_2,c_3 945e1051a39Sopenharmony_ci mulx a_4,b_6,t_1 !mul_add_c(a[4],b[6],c2,c3,c1); 946e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 947e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 948e1051a39Sopenharmony_ci add c_3,t_2,c_3 949e1051a39Sopenharmony_ci mulx a_3,b_7,t_1 !mul_add_c(a[3],b[7],c2,c3,c1); 950e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 951e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 952e1051a39Sopenharmony_ci add c_3,t_2,c_3 953e1051a39Sopenharmony_ci srlx t_1,32,c_12 954e1051a39Sopenharmony_ci stuw t_1,rp(10) !r[10]=c2; 955e1051a39Sopenharmony_ci or c_12,c_3,c_12 != 956e1051a39Sopenharmony_ci 957e1051a39Sopenharmony_ci mulx a_4,b_7,t_1 !mul_add_c(a[4],b[7],c3,c1,c2); 958e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 959e1051a39Sopenharmony_ci clr c_3 960e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 961e1051a39Sopenharmony_ci add c_3,t_2,c_3 962e1051a39Sopenharmony_ci mulx a_5,b_6,t_1 !mul_add_c(a[5],b[6],c3,c1,c2); 963e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 964e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 965e1051a39Sopenharmony_ci add c_3,t_2,c_3 966e1051a39Sopenharmony_ci mulx a_6,b_5,t_1 !mul_add_c(a[6],b[5],c3,c1,c2); 967e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 968e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 969e1051a39Sopenharmony_ci add c_3,t_2,c_3 970e1051a39Sopenharmony_ci mulx a_7,b_4,t_1 !mul_add_c(a[7],b[4],c3,c1,c2); 971e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 972e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 973e1051a39Sopenharmony_ci add c_3,t_2,c_3 974e1051a39Sopenharmony_ci srlx t_1,32,c_12 975e1051a39Sopenharmony_ci stuw t_1,rp(11) !r[11]=c3; 976e1051a39Sopenharmony_ci or c_12,c_3,c_12 != 977e1051a39Sopenharmony_ci 978e1051a39Sopenharmony_ci mulx a_7,b_5,t_1 !mul_add_c(a[7],b[5],c1,c2,c3); 979e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 980e1051a39Sopenharmony_ci clr c_3 981e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 982e1051a39Sopenharmony_ci add c_3,t_2,c_3 983e1051a39Sopenharmony_ci mulx a_6,b_6,t_1 !mul_add_c(a[6],b[6],c1,c2,c3); 984e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 985e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 986e1051a39Sopenharmony_ci add c_3,t_2,c_3 987e1051a39Sopenharmony_ci mulx a_5,b_7,t_1 !mul_add_c(a[5],b[7],c1,c2,c3); 988e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 989e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 990e1051a39Sopenharmony_ci add c_3,t_2,c_3 991e1051a39Sopenharmony_ci srlx t_1,32,c_12 992e1051a39Sopenharmony_ci stuw t_1,rp(12) !r[12]=c1; 993e1051a39Sopenharmony_ci or c_12,c_3,c_12 != 994e1051a39Sopenharmony_ci 995e1051a39Sopenharmony_ci mulx a_6,b_7,t_1 !mul_add_c(a[6],b[7],c2,c3,c1); 996e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 997e1051a39Sopenharmony_ci clr c_3 998e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 999e1051a39Sopenharmony_ci add c_3,t_2,c_3 1000e1051a39Sopenharmony_ci mulx a_7,b_6,t_1 !mul_add_c(a[7],b[6],c2,c3,c1); 1001e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1002e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 1003e1051a39Sopenharmony_ci add c_3,t_2,c_3 1004e1051a39Sopenharmony_ci srlx t_1,32,c_12 1005e1051a39Sopenharmony_ci st t_1,rp(13) !r[13]=c2; 1006e1051a39Sopenharmony_ci or c_12,c_3,c_12 != 1007e1051a39Sopenharmony_ci 1008e1051a39Sopenharmony_ci mulx a_7,b_7,t_1 !mul_add_c(a[7],b[7],c3,c1,c2); 1009e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1010e1051a39Sopenharmony_ci srlx t_1,32,c_12 != 1011e1051a39Sopenharmony_ci stuw t_1,rp(14) !r[14]=c3; 1012e1051a39Sopenharmony_ci stuw c_12,rp(15) !r[15]=c1; 1013e1051a39Sopenharmony_ci 1014e1051a39Sopenharmony_ci ret 1015e1051a39Sopenharmony_ci restore %g0,%g0,%o0 != 1016e1051a39Sopenharmony_ci 1017e1051a39Sopenharmony_ci.type bn_mul_comba8,#function 1018e1051a39Sopenharmony_ci.size bn_mul_comba8,(.-bn_mul_comba8) 1019e1051a39Sopenharmony_ci 1020e1051a39Sopenharmony_ci.align 32 1021e1051a39Sopenharmony_ci 1022e1051a39Sopenharmony_ci.global bn_mul_comba4 1023e1051a39Sopenharmony_ci/* 1024e1051a39Sopenharmony_ci * void bn_mul_comba4(r,a,b) 1025e1051a39Sopenharmony_ci * BN_ULONG *r,*a,*b; 1026e1051a39Sopenharmony_ci */ 1027e1051a39Sopenharmony_cibn_mul_comba4: 1028e1051a39Sopenharmony_ci save %sp,FRAME_SIZE,%sp 1029e1051a39Sopenharmony_ci lduw ap(0),a_0 1030e1051a39Sopenharmony_ci mov 1,t_2 1031e1051a39Sopenharmony_ci lduw bp(0),b_0 1032e1051a39Sopenharmony_ci sllx t_2,32,t_2 != 1033e1051a39Sopenharmony_ci lduw bp(1),b_1 1034e1051a39Sopenharmony_ci mulx a_0,b_0,t_1 !mul_add_c(a[0],b[0],c1,c2,c3); 1035e1051a39Sopenharmony_ci srlx t_1,32,c_12 1036e1051a39Sopenharmony_ci stuw t_1,rp(0) !=!r[0]=c1; 1037e1051a39Sopenharmony_ci 1038e1051a39Sopenharmony_ci lduw ap(1),a_1 1039e1051a39Sopenharmony_ci mulx a_0,b_1,t_1 !mul_add_c(a[0],b[1],c2,c3,c1); 1040e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1041e1051a39Sopenharmony_ci clr c_3 != 1042e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1043e1051a39Sopenharmony_ci add c_3,t_2,c_3 1044e1051a39Sopenharmony_ci lduw ap(2),a_2 1045e1051a39Sopenharmony_ci mulx a_1,b_0,t_1 !=!mul_add_c(a[1],b[0],c2,c3,c1); 1046e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1047e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1048e1051a39Sopenharmony_ci add c_3,t_2,c_3 1049e1051a39Sopenharmony_ci srlx t_1,32,c_12 != 1050e1051a39Sopenharmony_ci stuw t_1,rp(1) !r[1]=c2; 1051e1051a39Sopenharmony_ci or c_12,c_3,c_12 1052e1051a39Sopenharmony_ci 1053e1051a39Sopenharmony_ci mulx a_2,b_0,t_1 !mul_add_c(a[2],b[0],c3,c1,c2); 1054e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 != 1055e1051a39Sopenharmony_ci clr c_3 1056e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1057e1051a39Sopenharmony_ci add c_3,t_2,c_3 1058e1051a39Sopenharmony_ci lduw bp(2),b_2 != 1059e1051a39Sopenharmony_ci mulx a_1,b_1,t_1 !mul_add_c(a[1],b[1],c3,c1,c2); 1060e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1061e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1062e1051a39Sopenharmony_ci add c_3,t_2,c_3 != 1063e1051a39Sopenharmony_ci lduw bp(3),b_3 1064e1051a39Sopenharmony_ci mulx a_0,b_2,t_1 !mul_add_c(a[0],b[2],c3,c1,c2); 1065e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1066e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 1067e1051a39Sopenharmony_ci add c_3,t_2,c_3 1068e1051a39Sopenharmony_ci srlx t_1,32,c_12 1069e1051a39Sopenharmony_ci stuw t_1,rp(2) !r[2]=c3; 1070e1051a39Sopenharmony_ci or c_12,c_3,c_12 != 1071e1051a39Sopenharmony_ci 1072e1051a39Sopenharmony_ci mulx a_0,b_3,t_1 !mul_add_c(a[0],b[3],c1,c2,c3); 1073e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1074e1051a39Sopenharmony_ci clr c_3 1075e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 1076e1051a39Sopenharmony_ci add c_3,t_2,c_3 1077e1051a39Sopenharmony_ci mulx a_1,b_2,t_1 !mul_add_c(a[1],b[2],c1,c2,c3); 1078e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1079e1051a39Sopenharmony_ci bcs,a %xcc,.+8 != 1080e1051a39Sopenharmony_ci add c_3,t_2,c_3 1081e1051a39Sopenharmony_ci lduw ap(3),a_3 1082e1051a39Sopenharmony_ci mulx a_2,b_1,t_1 !mul_add_c(a[2],b[1],c1,c2,c3); 1083e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 != 1084e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1085e1051a39Sopenharmony_ci add c_3,t_2,c_3 1086e1051a39Sopenharmony_ci mulx a_3,b_0,t_1 !mul_add_c(a[3],b[0],c1,c2,c3);!= 1087e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 != 1088e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1089e1051a39Sopenharmony_ci add c_3,t_2,c_3 1090e1051a39Sopenharmony_ci srlx t_1,32,c_12 1091e1051a39Sopenharmony_ci stuw t_1,rp(3) !=!r[3]=c1; 1092e1051a39Sopenharmony_ci or c_12,c_3,c_12 1093e1051a39Sopenharmony_ci 1094e1051a39Sopenharmony_ci mulx a_3,b_1,t_1 !mul_add_c(a[3],b[1],c2,c3,c1); 1095e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1096e1051a39Sopenharmony_ci clr c_3 != 1097e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1098e1051a39Sopenharmony_ci add c_3,t_2,c_3 1099e1051a39Sopenharmony_ci mulx a_2,b_2,t_1 !mul_add_c(a[2],b[2],c2,c3,c1); 1100e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 != 1101e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1102e1051a39Sopenharmony_ci add c_3,t_2,c_3 1103e1051a39Sopenharmony_ci mulx a_1,b_3,t_1 !mul_add_c(a[1],b[3],c2,c3,c1); 1104e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 != 1105e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1106e1051a39Sopenharmony_ci add c_3,t_2,c_3 1107e1051a39Sopenharmony_ci srlx t_1,32,c_12 1108e1051a39Sopenharmony_ci stuw t_1,rp(4) !=!r[4]=c2; 1109e1051a39Sopenharmony_ci or c_12,c_3,c_12 1110e1051a39Sopenharmony_ci 1111e1051a39Sopenharmony_ci mulx a_2,b_3,t_1 !mul_add_c(a[2],b[3],c3,c1,c2); 1112e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1113e1051a39Sopenharmony_ci clr c_3 != 1114e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1115e1051a39Sopenharmony_ci add c_3,t_2,c_3 1116e1051a39Sopenharmony_ci mulx a_3,b_2,t_1 !mul_add_c(a[3],b[2],c3,c1,c2); 1117e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 != 1118e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1119e1051a39Sopenharmony_ci add c_3,t_2,c_3 1120e1051a39Sopenharmony_ci srlx t_1,32,c_12 1121e1051a39Sopenharmony_ci stuw t_1,rp(5) !=!r[5]=c3; 1122e1051a39Sopenharmony_ci or c_12,c_3,c_12 1123e1051a39Sopenharmony_ci 1124e1051a39Sopenharmony_ci mulx a_3,b_3,t_1 !mul_add_c(a[3],b[3],c1,c2,c3); 1125e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1126e1051a39Sopenharmony_ci srlx t_1,32,c_12 != 1127e1051a39Sopenharmony_ci stuw t_1,rp(6) !r[6]=c1; 1128e1051a39Sopenharmony_ci stuw c_12,rp(7) !r[7]=c2; 1129e1051a39Sopenharmony_ci 1130e1051a39Sopenharmony_ci ret 1131e1051a39Sopenharmony_ci restore %g0,%g0,%o0 1132e1051a39Sopenharmony_ci 1133e1051a39Sopenharmony_ci.type bn_mul_comba4,#function 1134e1051a39Sopenharmony_ci.size bn_mul_comba4,(.-bn_mul_comba4) 1135e1051a39Sopenharmony_ci 1136e1051a39Sopenharmony_ci.align 32 1137e1051a39Sopenharmony_ci 1138e1051a39Sopenharmony_ci.global bn_sqr_comba8 1139e1051a39Sopenharmony_cibn_sqr_comba8: 1140e1051a39Sopenharmony_ci save %sp,FRAME_SIZE,%sp 1141e1051a39Sopenharmony_ci mov 1,t_2 1142e1051a39Sopenharmony_ci lduw ap(0),a_0 1143e1051a39Sopenharmony_ci sllx t_2,32,t_2 1144e1051a39Sopenharmony_ci lduw ap(1),a_1 1145e1051a39Sopenharmony_ci mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3); 1146e1051a39Sopenharmony_ci srlx t_1,32,c_12 1147e1051a39Sopenharmony_ci stuw t_1,rp(0) !r[0]=c1; 1148e1051a39Sopenharmony_ci 1149e1051a39Sopenharmony_ci lduw ap(2),a_2 1150e1051a39Sopenharmony_ci mulx a_0,a_1,t_1 !=!sqr_add_c2(a,1,0,c2,c3,c1); 1151e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1152e1051a39Sopenharmony_ci clr c_3 1153e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1154e1051a39Sopenharmony_ci add c_3,t_2,c_3 1155e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1156e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1157e1051a39Sopenharmony_ci add c_3,t_2,c_3 1158e1051a39Sopenharmony_ci srlx t_1,32,c_12 1159e1051a39Sopenharmony_ci stuw t_1,rp(1) !r[1]=c2; 1160e1051a39Sopenharmony_ci or c_12,c_3,c_12 1161e1051a39Sopenharmony_ci 1162e1051a39Sopenharmony_ci mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); 1163e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1164e1051a39Sopenharmony_ci clr c_3 1165e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1166e1051a39Sopenharmony_ci add c_3,t_2,c_3 1167e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1168e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1169e1051a39Sopenharmony_ci add c_3,t_2,c_3 1170e1051a39Sopenharmony_ci lduw ap(3),a_3 1171e1051a39Sopenharmony_ci mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); 1172e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1173e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1174e1051a39Sopenharmony_ci add c_3,t_2,c_3 1175e1051a39Sopenharmony_ci srlx t_1,32,c_12 1176e1051a39Sopenharmony_ci stuw t_1,rp(2) !r[2]=c3; 1177e1051a39Sopenharmony_ci or c_12,c_3,c_12 1178e1051a39Sopenharmony_ci 1179e1051a39Sopenharmony_ci mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3); 1180e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1181e1051a39Sopenharmony_ci clr c_3 1182e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1183e1051a39Sopenharmony_ci add c_3,t_2,c_3 1184e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1185e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1186e1051a39Sopenharmony_ci add c_3,t_2,c_3 1187e1051a39Sopenharmony_ci lduw ap(4),a_4 1188e1051a39Sopenharmony_ci mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); 1189e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1190e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1191e1051a39Sopenharmony_ci add c_3,t_2,c_3 1192e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1193e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1194e1051a39Sopenharmony_ci add c_3,t_2,c_3 1195e1051a39Sopenharmony_ci srlx t_1,32,c_12 1196e1051a39Sopenharmony_ci st t_1,rp(3) !r[3]=c1; 1197e1051a39Sopenharmony_ci or c_12,c_3,c_12 1198e1051a39Sopenharmony_ci 1199e1051a39Sopenharmony_ci mulx a_4,a_0,t_1 !sqr_add_c2(a,4,0,c2,c3,c1); 1200e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1201e1051a39Sopenharmony_ci clr c_3 1202e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1203e1051a39Sopenharmony_ci add c_3,t_2,c_3 1204e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1205e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1206e1051a39Sopenharmony_ci add c_3,t_2,c_3 1207e1051a39Sopenharmony_ci mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); 1208e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1209e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1210e1051a39Sopenharmony_ci add c_3,t_2,c_3 1211e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1212e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1213e1051a39Sopenharmony_ci add c_3,t_2,c_3 1214e1051a39Sopenharmony_ci lduw ap(5),a_5 1215e1051a39Sopenharmony_ci mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); 1216e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1217e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1218e1051a39Sopenharmony_ci add c_3,t_2,c_3 1219e1051a39Sopenharmony_ci srlx t_1,32,c_12 1220e1051a39Sopenharmony_ci stuw t_1,rp(4) !r[4]=c2; 1221e1051a39Sopenharmony_ci or c_12,c_3,c_12 1222e1051a39Sopenharmony_ci 1223e1051a39Sopenharmony_ci mulx a_0,a_5,t_1 !sqr_add_c2(a,5,0,c3,c1,c2); 1224e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1225e1051a39Sopenharmony_ci clr c_3 1226e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1227e1051a39Sopenharmony_ci add c_3,t_2,c_3 1228e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1229e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1230e1051a39Sopenharmony_ci add c_3,t_2,c_3 1231e1051a39Sopenharmony_ci mulx a_1,a_4,t_1 !sqr_add_c2(a,4,1,c3,c1,c2); 1232e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1233e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1234e1051a39Sopenharmony_ci add c_3,t_2,c_3 1235e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1236e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1237e1051a39Sopenharmony_ci add c_3,t_2,c_3 1238e1051a39Sopenharmony_ci lduw ap(6),a_6 1239e1051a39Sopenharmony_ci mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2); 1240e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1241e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1242e1051a39Sopenharmony_ci add c_3,t_2,c_3 1243e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1244e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1245e1051a39Sopenharmony_ci add c_3,t_2,c_3 1246e1051a39Sopenharmony_ci srlx t_1,32,c_12 1247e1051a39Sopenharmony_ci stuw t_1,rp(5) !r[5]=c3; 1248e1051a39Sopenharmony_ci or c_12,c_3,c_12 1249e1051a39Sopenharmony_ci 1250e1051a39Sopenharmony_ci mulx a_6,a_0,t_1 !sqr_add_c2(a,6,0,c1,c2,c3); 1251e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1252e1051a39Sopenharmony_ci clr c_3 1253e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1254e1051a39Sopenharmony_ci add c_3,t_2,c_3 1255e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1256e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1257e1051a39Sopenharmony_ci add c_3,t_2,c_3 1258e1051a39Sopenharmony_ci mulx a_5,a_1,t_1 !sqr_add_c2(a,5,1,c1,c2,c3); 1259e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1260e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1261e1051a39Sopenharmony_ci add c_3,t_2,c_3 1262e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1263e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1264e1051a39Sopenharmony_ci add c_3,t_2,c_3 1265e1051a39Sopenharmony_ci mulx a_4,a_2,t_1 !sqr_add_c2(a,4,2,c1,c2,c3); 1266e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1267e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1268e1051a39Sopenharmony_ci add c_3,t_2,c_3 1269e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1270e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1271e1051a39Sopenharmony_ci add c_3,t_2,c_3 1272e1051a39Sopenharmony_ci lduw ap(7),a_7 1273e1051a39Sopenharmony_ci mulx a_3,a_3,t_1 !=!sqr_add_c(a,3,c1,c2,c3); 1274e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1275e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1276e1051a39Sopenharmony_ci add c_3,t_2,c_3 1277e1051a39Sopenharmony_ci srlx t_1,32,c_12 1278e1051a39Sopenharmony_ci stuw t_1,rp(6) !r[6]=c1; 1279e1051a39Sopenharmony_ci or c_12,c_3,c_12 1280e1051a39Sopenharmony_ci 1281e1051a39Sopenharmony_ci mulx a_0,a_7,t_1 !sqr_add_c2(a,7,0,c2,c3,c1); 1282e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1283e1051a39Sopenharmony_ci clr c_3 1284e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1285e1051a39Sopenharmony_ci add c_3,t_2,c_3 1286e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1287e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1288e1051a39Sopenharmony_ci add c_3,t_2,c_3 1289e1051a39Sopenharmony_ci mulx a_1,a_6,t_1 !sqr_add_c2(a,6,1,c2,c3,c1); 1290e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1291e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1292e1051a39Sopenharmony_ci add c_3,t_2,c_3 1293e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1294e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1295e1051a39Sopenharmony_ci add c_3,t_2,c_3 1296e1051a39Sopenharmony_ci mulx a_2,a_5,t_1 !sqr_add_c2(a,5,2,c2,c3,c1); 1297e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1298e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1299e1051a39Sopenharmony_ci add c_3,t_2,c_3 1300e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1301e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1302e1051a39Sopenharmony_ci add c_3,t_2,c_3 1303e1051a39Sopenharmony_ci mulx a_3,a_4,t_1 !sqr_add_c2(a,4,3,c2,c3,c1); 1304e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1305e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1306e1051a39Sopenharmony_ci add c_3,t_2,c_3 1307e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1308e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1309e1051a39Sopenharmony_ci add c_3,t_2,c_3 1310e1051a39Sopenharmony_ci srlx t_1,32,c_12 1311e1051a39Sopenharmony_ci stuw t_1,rp(7) !r[7]=c2; 1312e1051a39Sopenharmony_ci or c_12,c_3,c_12 1313e1051a39Sopenharmony_ci 1314e1051a39Sopenharmony_ci mulx a_7,a_1,t_1 !sqr_add_c2(a,7,1,c3,c1,c2); 1315e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1316e1051a39Sopenharmony_ci clr c_3 1317e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1318e1051a39Sopenharmony_ci add c_3,t_2,c_3 1319e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1320e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1321e1051a39Sopenharmony_ci add c_3,t_2,c_3 1322e1051a39Sopenharmony_ci mulx a_6,a_2,t_1 !sqr_add_c2(a,6,2,c3,c1,c2); 1323e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1324e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1325e1051a39Sopenharmony_ci add c_3,t_2,c_3 1326e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1327e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1328e1051a39Sopenharmony_ci add c_3,t_2,c_3 1329e1051a39Sopenharmony_ci mulx a_5,a_3,t_1 !sqr_add_c2(a,5,3,c3,c1,c2); 1330e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1331e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1332e1051a39Sopenharmony_ci add c_3,t_2,c_3 1333e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1334e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1335e1051a39Sopenharmony_ci add c_3,t_2,c_3 1336e1051a39Sopenharmony_ci mulx a_4,a_4,t_1 !sqr_add_c(a,4,c3,c1,c2); 1337e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1338e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1339e1051a39Sopenharmony_ci add c_3,t_2,c_3 1340e1051a39Sopenharmony_ci srlx t_1,32,c_12 1341e1051a39Sopenharmony_ci stuw t_1,rp(8) !r[8]=c3; 1342e1051a39Sopenharmony_ci or c_12,c_3,c_12 1343e1051a39Sopenharmony_ci 1344e1051a39Sopenharmony_ci mulx a_2,a_7,t_1 !sqr_add_c2(a,7,2,c1,c2,c3); 1345e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1346e1051a39Sopenharmony_ci clr c_3 1347e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1348e1051a39Sopenharmony_ci add c_3,t_2,c_3 1349e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1350e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1351e1051a39Sopenharmony_ci add c_3,t_2,c_3 1352e1051a39Sopenharmony_ci mulx a_3,a_6,t_1 !sqr_add_c2(a,6,3,c1,c2,c3); 1353e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1354e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1355e1051a39Sopenharmony_ci add c_3,t_2,c_3 1356e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1357e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1358e1051a39Sopenharmony_ci add c_3,t_2,c_3 1359e1051a39Sopenharmony_ci mulx a_4,a_5,t_1 !sqr_add_c2(a,5,4,c1,c2,c3); 1360e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1361e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1362e1051a39Sopenharmony_ci add c_3,t_2,c_3 1363e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1364e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1365e1051a39Sopenharmony_ci add c_3,t_2,c_3 1366e1051a39Sopenharmony_ci srlx t_1,32,c_12 1367e1051a39Sopenharmony_ci stuw t_1,rp(9) !r[9]=c1; 1368e1051a39Sopenharmony_ci or c_12,c_3,c_12 1369e1051a39Sopenharmony_ci 1370e1051a39Sopenharmony_ci mulx a_7,a_3,t_1 !sqr_add_c2(a,7,3,c2,c3,c1); 1371e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1372e1051a39Sopenharmony_ci clr c_3 1373e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1374e1051a39Sopenharmony_ci add c_3,t_2,c_3 1375e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1376e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1377e1051a39Sopenharmony_ci add c_3,t_2,c_3 1378e1051a39Sopenharmony_ci mulx a_6,a_4,t_1 !sqr_add_c2(a,6,4,c2,c3,c1); 1379e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1380e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1381e1051a39Sopenharmony_ci add c_3,t_2,c_3 1382e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1383e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1384e1051a39Sopenharmony_ci add c_3,t_2,c_3 1385e1051a39Sopenharmony_ci mulx a_5,a_5,t_1 !sqr_add_c(a,5,c2,c3,c1); 1386e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1387e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1388e1051a39Sopenharmony_ci add c_3,t_2,c_3 1389e1051a39Sopenharmony_ci srlx t_1,32,c_12 1390e1051a39Sopenharmony_ci stuw t_1,rp(10) !r[10]=c2; 1391e1051a39Sopenharmony_ci or c_12,c_3,c_12 1392e1051a39Sopenharmony_ci 1393e1051a39Sopenharmony_ci mulx a_4,a_7,t_1 !sqr_add_c2(a,7,4,c3,c1,c2); 1394e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1395e1051a39Sopenharmony_ci clr c_3 1396e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1397e1051a39Sopenharmony_ci add c_3,t_2,c_3 1398e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1399e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1400e1051a39Sopenharmony_ci add c_3,t_2,c_3 1401e1051a39Sopenharmony_ci mulx a_5,a_6,t_1 !sqr_add_c2(a,6,5,c3,c1,c2); 1402e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1403e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1404e1051a39Sopenharmony_ci add c_3,t_2,c_3 1405e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1406e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1407e1051a39Sopenharmony_ci add c_3,t_2,c_3 1408e1051a39Sopenharmony_ci srlx t_1,32,c_12 1409e1051a39Sopenharmony_ci stuw t_1,rp(11) !r[11]=c3; 1410e1051a39Sopenharmony_ci or c_12,c_3,c_12 1411e1051a39Sopenharmony_ci 1412e1051a39Sopenharmony_ci mulx a_7,a_5,t_1 !sqr_add_c2(a,7,5,c1,c2,c3); 1413e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1414e1051a39Sopenharmony_ci clr c_3 1415e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1416e1051a39Sopenharmony_ci add c_3,t_2,c_3 1417e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1418e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1419e1051a39Sopenharmony_ci add c_3,t_2,c_3 1420e1051a39Sopenharmony_ci mulx a_6,a_6,t_1 !sqr_add_c(a,6,c1,c2,c3); 1421e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1422e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1423e1051a39Sopenharmony_ci add c_3,t_2,c_3 1424e1051a39Sopenharmony_ci srlx t_1,32,c_12 1425e1051a39Sopenharmony_ci stuw t_1,rp(12) !r[12]=c1; 1426e1051a39Sopenharmony_ci or c_12,c_3,c_12 1427e1051a39Sopenharmony_ci 1428e1051a39Sopenharmony_ci mulx a_6,a_7,t_1 !sqr_add_c2(a,7,6,c2,c3,c1); 1429e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1430e1051a39Sopenharmony_ci clr c_3 1431e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1432e1051a39Sopenharmony_ci add c_3,t_2,c_3 1433e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1434e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1435e1051a39Sopenharmony_ci add c_3,t_2,c_3 1436e1051a39Sopenharmony_ci srlx t_1,32,c_12 1437e1051a39Sopenharmony_ci stuw t_1,rp(13) !r[13]=c2; 1438e1051a39Sopenharmony_ci or c_12,c_3,c_12 1439e1051a39Sopenharmony_ci 1440e1051a39Sopenharmony_ci mulx a_7,a_7,t_1 !sqr_add_c(a,7,c3,c1,c2); 1441e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1442e1051a39Sopenharmony_ci srlx t_1,32,c_12 1443e1051a39Sopenharmony_ci stuw t_1,rp(14) !r[14]=c3; 1444e1051a39Sopenharmony_ci stuw c_12,rp(15) !r[15]=c1; 1445e1051a39Sopenharmony_ci 1446e1051a39Sopenharmony_ci ret 1447e1051a39Sopenharmony_ci restore %g0,%g0,%o0 1448e1051a39Sopenharmony_ci 1449e1051a39Sopenharmony_ci.type bn_sqr_comba8,#function 1450e1051a39Sopenharmony_ci.size bn_sqr_comba8,(.-bn_sqr_comba8) 1451e1051a39Sopenharmony_ci 1452e1051a39Sopenharmony_ci.align 32 1453e1051a39Sopenharmony_ci 1454e1051a39Sopenharmony_ci.global bn_sqr_comba4 1455e1051a39Sopenharmony_ci/* 1456e1051a39Sopenharmony_ci * void bn_sqr_comba4(r,a) 1457e1051a39Sopenharmony_ci * BN_ULONG *r,*a; 1458e1051a39Sopenharmony_ci */ 1459e1051a39Sopenharmony_cibn_sqr_comba4: 1460e1051a39Sopenharmony_ci save %sp,FRAME_SIZE,%sp 1461e1051a39Sopenharmony_ci mov 1,t_2 1462e1051a39Sopenharmony_ci lduw ap(0),a_0 1463e1051a39Sopenharmony_ci sllx t_2,32,t_2 1464e1051a39Sopenharmony_ci lduw ap(1),a_1 1465e1051a39Sopenharmony_ci mulx a_0,a_0,t_1 !sqr_add_c(a,0,c1,c2,c3); 1466e1051a39Sopenharmony_ci srlx t_1,32,c_12 1467e1051a39Sopenharmony_ci stuw t_1,rp(0) !r[0]=c1; 1468e1051a39Sopenharmony_ci 1469e1051a39Sopenharmony_ci lduw ap(2),a_2 1470e1051a39Sopenharmony_ci mulx a_0,a_1,t_1 !sqr_add_c2(a,1,0,c2,c3,c1); 1471e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1472e1051a39Sopenharmony_ci clr c_3 1473e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1474e1051a39Sopenharmony_ci add c_3,t_2,c_3 1475e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1476e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1477e1051a39Sopenharmony_ci add c_3,t_2,c_3 1478e1051a39Sopenharmony_ci srlx t_1,32,c_12 1479e1051a39Sopenharmony_ci stuw t_1,rp(1) !r[1]=c2; 1480e1051a39Sopenharmony_ci or c_12,c_3,c_12 1481e1051a39Sopenharmony_ci 1482e1051a39Sopenharmony_ci mulx a_2,a_0,t_1 !sqr_add_c2(a,2,0,c3,c1,c2); 1483e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1484e1051a39Sopenharmony_ci clr c_3 1485e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1486e1051a39Sopenharmony_ci add c_3,t_2,c_3 1487e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1488e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1489e1051a39Sopenharmony_ci add c_3,t_2,c_3 1490e1051a39Sopenharmony_ci lduw ap(3),a_3 1491e1051a39Sopenharmony_ci mulx a_1,a_1,t_1 !sqr_add_c(a,1,c3,c1,c2); 1492e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1493e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1494e1051a39Sopenharmony_ci add c_3,t_2,c_3 1495e1051a39Sopenharmony_ci srlx t_1,32,c_12 1496e1051a39Sopenharmony_ci stuw t_1,rp(2) !r[2]=c3; 1497e1051a39Sopenharmony_ci or c_12,c_3,c_12 1498e1051a39Sopenharmony_ci 1499e1051a39Sopenharmony_ci mulx a_0,a_3,t_1 !sqr_add_c2(a,3,0,c1,c2,c3); 1500e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1501e1051a39Sopenharmony_ci clr c_3 1502e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1503e1051a39Sopenharmony_ci add c_3,t_2,c_3 1504e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1505e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1506e1051a39Sopenharmony_ci add c_3,t_2,c_3 1507e1051a39Sopenharmony_ci mulx a_1,a_2,t_1 !sqr_add_c2(a,2,1,c1,c2,c3); 1508e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1509e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1510e1051a39Sopenharmony_ci add c_3,t_2,c_3 1511e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1512e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1513e1051a39Sopenharmony_ci add c_3,t_2,c_3 1514e1051a39Sopenharmony_ci srlx t_1,32,c_12 1515e1051a39Sopenharmony_ci stuw t_1,rp(3) !r[3]=c1; 1516e1051a39Sopenharmony_ci or c_12,c_3,c_12 1517e1051a39Sopenharmony_ci 1518e1051a39Sopenharmony_ci mulx a_3,a_1,t_1 !sqr_add_c2(a,3,1,c2,c3,c1); 1519e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1520e1051a39Sopenharmony_ci clr c_3 1521e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1522e1051a39Sopenharmony_ci add c_3,t_2,c_3 1523e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1524e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1525e1051a39Sopenharmony_ci add c_3,t_2,c_3 1526e1051a39Sopenharmony_ci mulx a_2,a_2,t_1 !sqr_add_c(a,2,c2,c3,c1); 1527e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1528e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1529e1051a39Sopenharmony_ci add c_3,t_2,c_3 1530e1051a39Sopenharmony_ci srlx t_1,32,c_12 1531e1051a39Sopenharmony_ci stuw t_1,rp(4) !r[4]=c2; 1532e1051a39Sopenharmony_ci or c_12,c_3,c_12 1533e1051a39Sopenharmony_ci 1534e1051a39Sopenharmony_ci mulx a_2,a_3,t_1 !sqr_add_c2(a,3,2,c3,c1,c2); 1535e1051a39Sopenharmony_ci addcc c_12,t_1,c_12 1536e1051a39Sopenharmony_ci clr c_3 1537e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1538e1051a39Sopenharmony_ci add c_3,t_2,c_3 1539e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1540e1051a39Sopenharmony_ci bcs,a %xcc,.+8 1541e1051a39Sopenharmony_ci add c_3,t_2,c_3 1542e1051a39Sopenharmony_ci srlx t_1,32,c_12 1543e1051a39Sopenharmony_ci stuw t_1,rp(5) !r[5]=c3; 1544e1051a39Sopenharmony_ci or c_12,c_3,c_12 1545e1051a39Sopenharmony_ci 1546e1051a39Sopenharmony_ci mulx a_3,a_3,t_1 !sqr_add_c(a,3,c1,c2,c3); 1547e1051a39Sopenharmony_ci addcc c_12,t_1,t_1 1548e1051a39Sopenharmony_ci srlx t_1,32,c_12 1549e1051a39Sopenharmony_ci stuw t_1,rp(6) !r[6]=c1; 1550e1051a39Sopenharmony_ci stuw c_12,rp(7) !r[7]=c2; 1551e1051a39Sopenharmony_ci 1552e1051a39Sopenharmony_ci ret 1553e1051a39Sopenharmony_ci restore %g0,%g0,%o0 1554e1051a39Sopenharmony_ci 1555e1051a39Sopenharmony_ci.type bn_sqr_comba4,#function 1556e1051a39Sopenharmony_ci.size bn_sqr_comba4,(.-bn_sqr_comba4) 1557e1051a39Sopenharmony_ci 1558e1051a39Sopenharmony_ci.align 32 1559