1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_ci# SHA256/512 block procedure for PA-RISC. 18e1051a39Sopenharmony_ci 19e1051a39Sopenharmony_ci# June 2009. 20e1051a39Sopenharmony_ci# 21e1051a39Sopenharmony_ci# SHA256 performance is >75% better than gcc 3.2 generated code on 22e1051a39Sopenharmony_ci# PA-7100LC. Compared to code generated by vendor compiler this 23e1051a39Sopenharmony_ci# implementation is almost 70% faster in 64-bit build, but delivers 24e1051a39Sopenharmony_ci# virtually same performance in 32-bit build on PA-8600. 25e1051a39Sopenharmony_ci# 26e1051a39Sopenharmony_ci# SHA512 performance is >2.9x better than gcc 3.2 generated code on 27e1051a39Sopenharmony_ci# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the 28e1051a39Sopenharmony_ci# code is executed on PA-RISC 2.0 processor and switches to 64-bit 29e1051a39Sopenharmony_ci# code path delivering adequate performance even in "blended" 32-bit 30e1051a39Sopenharmony_ci# build. Though 64-bit code is not any faster than code generated by 31e1051a39Sopenharmony_ci# vendor compiler on PA-8600... 32e1051a39Sopenharmony_ci# 33e1051a39Sopenharmony_ci# Special thanks to polarhome.com for providing HP-UX account. 34e1051a39Sopenharmony_ci 35e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 36e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 37e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 38e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 39e1051a39Sopenharmony_ci 40e1051a39Sopenharmony_ci$output and open STDOUT,">$output"; 41e1051a39Sopenharmony_ci 42e1051a39Sopenharmony_ciif ($flavour =~ /64/) { 43e1051a39Sopenharmony_ci $LEVEL ="2.0W"; 44e1051a39Sopenharmony_ci $SIZE_T =8; 45e1051a39Sopenharmony_ci $FRAME_MARKER =80; 46e1051a39Sopenharmony_ci $SAVED_RP =16; 47e1051a39Sopenharmony_ci $PUSH ="std"; 48e1051a39Sopenharmony_ci $PUSHMA ="std,ma"; 49e1051a39Sopenharmony_ci $POP ="ldd"; 50e1051a39Sopenharmony_ci $POPMB ="ldd,mb"; 51e1051a39Sopenharmony_ci} else { 52e1051a39Sopenharmony_ci $LEVEL ="1.0"; 53e1051a39Sopenharmony_ci $SIZE_T =4; 54e1051a39Sopenharmony_ci $FRAME_MARKER =48; 55e1051a39Sopenharmony_ci $SAVED_RP =20; 56e1051a39Sopenharmony_ci $PUSH ="stw"; 57e1051a39Sopenharmony_ci $PUSHMA ="stwm"; 58e1051a39Sopenharmony_ci $POP ="ldw"; 59e1051a39Sopenharmony_ci $POPMB ="ldwm"; 60e1051a39Sopenharmony_ci} 61e1051a39Sopenharmony_ci 62e1051a39Sopenharmony_ciif ($output =~ /512/) { 63e1051a39Sopenharmony_ci $func="sha512_block_data_order"; 64e1051a39Sopenharmony_ci $SZ=8; 65e1051a39Sopenharmony_ci @Sigma0=(28,34,39); 66e1051a39Sopenharmony_ci @Sigma1=(14,18,41); 67e1051a39Sopenharmony_ci @sigma0=(1, 8, 7); 68e1051a39Sopenharmony_ci @sigma1=(19,61, 6); 69e1051a39Sopenharmony_ci $rounds=80; 70e1051a39Sopenharmony_ci $LAST10BITS=0x017; 71e1051a39Sopenharmony_ci $LD="ldd"; 72e1051a39Sopenharmony_ci $LDM="ldd,ma"; 73e1051a39Sopenharmony_ci $ST="std"; 74e1051a39Sopenharmony_ci} else { 75e1051a39Sopenharmony_ci $func="sha256_block_data_order"; 76e1051a39Sopenharmony_ci $SZ=4; 77e1051a39Sopenharmony_ci @Sigma0=( 2,13,22); 78e1051a39Sopenharmony_ci @Sigma1=( 6,11,25); 79e1051a39Sopenharmony_ci @sigma0=( 7,18, 3); 80e1051a39Sopenharmony_ci @sigma1=(17,19,10); 81e1051a39Sopenharmony_ci $rounds=64; 82e1051a39Sopenharmony_ci $LAST10BITS=0x0f2; 83e1051a39Sopenharmony_ci $LD="ldw"; 84e1051a39Sopenharmony_ci $LDM="ldwm"; 85e1051a39Sopenharmony_ci $ST="stw"; 86e1051a39Sopenharmony_ci} 87e1051a39Sopenharmony_ci 88e1051a39Sopenharmony_ci$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker 89e1051a39Sopenharmony_ci # [+ argument transfer] 90e1051a39Sopenharmony_ci$XOFF=16*$SZ+32; # local variables 91e1051a39Sopenharmony_ci$FRAME+=$XOFF; 92e1051a39Sopenharmony_ci$XOFF+=$FRAME_MARKER; # distance between %sp and local variables 93e1051a39Sopenharmony_ci 94e1051a39Sopenharmony_ci$ctx="%r26"; # zapped by $a0 95e1051a39Sopenharmony_ci$inp="%r25"; # zapped by $a1 96e1051a39Sopenharmony_ci$num="%r24"; # zapped by $t0 97e1051a39Sopenharmony_ci 98e1051a39Sopenharmony_ci$a0 ="%r26"; 99e1051a39Sopenharmony_ci$a1 ="%r25"; 100e1051a39Sopenharmony_ci$t0 ="%r24"; 101e1051a39Sopenharmony_ci$t1 ="%r29"; 102e1051a39Sopenharmony_ci$Tbl="%r31"; 103e1051a39Sopenharmony_ci 104e1051a39Sopenharmony_ci@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28"); 105e1051a39Sopenharmony_ci 106e1051a39Sopenharmony_ci@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 107e1051a39Sopenharmony_ci "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp); 108e1051a39Sopenharmony_ci 109e1051a39Sopenharmony_cisub ROUND_00_15 { 110e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; 111e1051a39Sopenharmony_ci$code.=<<___; 112e1051a39Sopenharmony_ci _ror $e,$Sigma1[0],$a0 113e1051a39Sopenharmony_ci and $f,$e,$t0 114e1051a39Sopenharmony_ci _ror $e,$Sigma1[1],$a1 115e1051a39Sopenharmony_ci addl $t1,$h,$h 116e1051a39Sopenharmony_ci andcm $g,$e,$t1 117e1051a39Sopenharmony_ci xor $a1,$a0,$a0 118e1051a39Sopenharmony_ci _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1 119e1051a39Sopenharmony_ci or $t0,$t1,$t1 ; Ch(e,f,g) 120e1051a39Sopenharmony_ci addl @X[$i%16],$h,$h 121e1051a39Sopenharmony_ci xor $a0,$a1,$a1 ; Sigma1(e) 122e1051a39Sopenharmony_ci addl $t1,$h,$h 123e1051a39Sopenharmony_ci _ror $a,$Sigma0[0],$a0 124e1051a39Sopenharmony_ci addl $a1,$h,$h 125e1051a39Sopenharmony_ci 126e1051a39Sopenharmony_ci _ror $a,$Sigma0[1],$a1 127e1051a39Sopenharmony_ci and $a,$b,$t0 128e1051a39Sopenharmony_ci and $a,$c,$t1 129e1051a39Sopenharmony_ci xor $a1,$a0,$a0 130e1051a39Sopenharmony_ci _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1 131e1051a39Sopenharmony_ci xor $t1,$t0,$t0 132e1051a39Sopenharmony_ci and $b,$c,$t1 133e1051a39Sopenharmony_ci xor $a0,$a1,$a1 ; Sigma0(a) 134e1051a39Sopenharmony_ci addl $h,$d,$d 135e1051a39Sopenharmony_ci xor $t1,$t0,$t0 ; Maj(a,b,c) 136e1051a39Sopenharmony_ci `"$LDM $SZ($Tbl),$t1" if ($i<15)` 137e1051a39Sopenharmony_ci addl $a1,$h,$h 138e1051a39Sopenharmony_ci addl $t0,$h,$h 139e1051a39Sopenharmony_ci 140e1051a39Sopenharmony_ci___ 141e1051a39Sopenharmony_ci} 142e1051a39Sopenharmony_ci 143e1051a39Sopenharmony_cisub ROUND_16_xx { 144e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; 145e1051a39Sopenharmony_ci$i-=16; 146e1051a39Sopenharmony_ci$code.=<<___; 147e1051a39Sopenharmony_ci _ror @X[($i+1)%16],$sigma0[0],$a0 148e1051a39Sopenharmony_ci _ror @X[($i+1)%16],$sigma0[1],$a1 149e1051a39Sopenharmony_ci addl @X[($i+9)%16],@X[$i],@X[$i] 150e1051a39Sopenharmony_ci _ror @X[($i+14)%16],$sigma1[0],$t0 151e1051a39Sopenharmony_ci _ror @X[($i+14)%16],$sigma1[1],$t1 152e1051a39Sopenharmony_ci xor $a1,$a0,$a0 153e1051a39Sopenharmony_ci _shr @X[($i+1)%16],$sigma0[2],$a1 154e1051a39Sopenharmony_ci xor $t1,$t0,$t0 155e1051a39Sopenharmony_ci _shr @X[($i+14)%16],$sigma1[2],$t1 156e1051a39Sopenharmony_ci xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f]) 157e1051a39Sopenharmony_ci xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f]) 158e1051a39Sopenharmony_ci $LDM $SZ($Tbl),$t1 159e1051a39Sopenharmony_ci addl $a0,@X[$i],@X[$i] 160e1051a39Sopenharmony_ci addl $t0,@X[$i],@X[$i] 161e1051a39Sopenharmony_ci___ 162e1051a39Sopenharmony_ci$code.=<<___ if ($i==15); 163e1051a39Sopenharmony_ci extru $t1,31,10,$a1 164e1051a39Sopenharmony_ci comiclr,<> $LAST10BITS,$a1,%r0 165e1051a39Sopenharmony_ci ldo 1($Tbl),$Tbl ; signal end of $Tbl 166e1051a39Sopenharmony_ci___ 167e1051a39Sopenharmony_ci&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); 168e1051a39Sopenharmony_ci} 169e1051a39Sopenharmony_ci 170e1051a39Sopenharmony_ci$code=<<___; 171e1051a39Sopenharmony_ci .LEVEL $LEVEL 172e1051a39Sopenharmony_ci .SPACE \$TEXT\$ 173e1051a39Sopenharmony_ci .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 174e1051a39Sopenharmony_ci 175e1051a39Sopenharmony_ci .ALIGN 64 176e1051a39Sopenharmony_ciL\$table 177e1051a39Sopenharmony_ci___ 178e1051a39Sopenharmony_ci$code.=<<___ if ($SZ==8); 179e1051a39Sopenharmony_ci .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd 180e1051a39Sopenharmony_ci .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc 181e1051a39Sopenharmony_ci .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 182e1051a39Sopenharmony_ci .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 183e1051a39Sopenharmony_ci .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe 184e1051a39Sopenharmony_ci .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 185e1051a39Sopenharmony_ci .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 186e1051a39Sopenharmony_ci .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 187e1051a39Sopenharmony_ci .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 188e1051a39Sopenharmony_ci .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 189e1051a39Sopenharmony_ci .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 190e1051a39Sopenharmony_ci .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 191e1051a39Sopenharmony_ci .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 192e1051a39Sopenharmony_ci .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 193e1051a39Sopenharmony_ci .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 194e1051a39Sopenharmony_ci .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 195e1051a39Sopenharmony_ci .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 196e1051a39Sopenharmony_ci .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df 197e1051a39Sopenharmony_ci .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 198e1051a39Sopenharmony_ci .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b 199e1051a39Sopenharmony_ci .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 200e1051a39Sopenharmony_ci .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 201e1051a39Sopenharmony_ci .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 202e1051a39Sopenharmony_ci .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 203e1051a39Sopenharmony_ci .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 204e1051a39Sopenharmony_ci .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 205e1051a39Sopenharmony_ci .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb 206e1051a39Sopenharmony_ci .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 207e1051a39Sopenharmony_ci .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 208e1051a39Sopenharmony_ci .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec 209e1051a39Sopenharmony_ci .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 210e1051a39Sopenharmony_ci .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b 211e1051a39Sopenharmony_ci .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 212e1051a39Sopenharmony_ci .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 213e1051a39Sopenharmony_ci .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 214e1051a39Sopenharmony_ci .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b 215e1051a39Sopenharmony_ci .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 216e1051a39Sopenharmony_ci .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c 217e1051a39Sopenharmony_ci .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a 218e1051a39Sopenharmony_ci .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 219e1051a39Sopenharmony_ci___ 220e1051a39Sopenharmony_ci$code.=<<___ if ($SZ==4); 221e1051a39Sopenharmony_ci .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 222e1051a39Sopenharmony_ci .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 223e1051a39Sopenharmony_ci .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 224e1051a39Sopenharmony_ci .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 225e1051a39Sopenharmony_ci .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 226e1051a39Sopenharmony_ci .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 227e1051a39Sopenharmony_ci .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 228e1051a39Sopenharmony_ci .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 229e1051a39Sopenharmony_ci .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 230e1051a39Sopenharmony_ci .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 231e1051a39Sopenharmony_ci .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 232e1051a39Sopenharmony_ci .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 233e1051a39Sopenharmony_ci .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 234e1051a39Sopenharmony_ci .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 235e1051a39Sopenharmony_ci .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 236e1051a39Sopenharmony_ci .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 237e1051a39Sopenharmony_ci___ 238e1051a39Sopenharmony_ci$code.=<<___; 239e1051a39Sopenharmony_ci 240e1051a39Sopenharmony_ci .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR 241e1051a39Sopenharmony_ci .ALIGN 64 242e1051a39Sopenharmony_ci$func 243e1051a39Sopenharmony_ci .PROC 244e1051a39Sopenharmony_ci .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 245e1051a39Sopenharmony_ci .ENTRY 246e1051a39Sopenharmony_ci $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 247e1051a39Sopenharmony_ci $PUSHMA %r3,$FRAME(%sp) 248e1051a39Sopenharmony_ci $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 249e1051a39Sopenharmony_ci $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 250e1051a39Sopenharmony_ci $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 251e1051a39Sopenharmony_ci $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 252e1051a39Sopenharmony_ci $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 253e1051a39Sopenharmony_ci $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 254e1051a39Sopenharmony_ci $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 255e1051a39Sopenharmony_ci $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 256e1051a39Sopenharmony_ci $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) 257e1051a39Sopenharmony_ci $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) 258e1051a39Sopenharmony_ci $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) 259e1051a39Sopenharmony_ci $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) 260e1051a39Sopenharmony_ci $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) 261e1051a39Sopenharmony_ci $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp) 262e1051a39Sopenharmony_ci $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp) 263e1051a39Sopenharmony_ci 264e1051a39Sopenharmony_ci _shl $num,`log(16*$SZ)/log(2)`,$num 265e1051a39Sopenharmony_ci addl $inp,$num,$num ; $num to point at the end of $inp 266e1051a39Sopenharmony_ci 267e1051a39Sopenharmony_ci $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments 268e1051a39Sopenharmony_ci $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) 269e1051a39Sopenharmony_ci $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp) 270e1051a39Sopenharmony_ci 271e1051a39Sopenharmony_ci blr %r0,$Tbl 272e1051a39Sopenharmony_ci ldi 3,$t1 273e1051a39Sopenharmony_ciL\$pic 274e1051a39Sopenharmony_ci andcm $Tbl,$t1,$Tbl ; wipe privilege level 275e1051a39Sopenharmony_ci ldo L\$table-L\$pic($Tbl),$Tbl 276e1051a39Sopenharmony_ci___ 277e1051a39Sopenharmony_ci$code.=<<___ if ($SZ==8 && $SIZE_T==4); 278e1051a39Sopenharmony_ci ldi 31,$t1 279e1051a39Sopenharmony_ci mtctl $t1,%cr11 280e1051a39Sopenharmony_ci extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0 281e1051a39Sopenharmony_ci b L\$parisc1 282e1051a39Sopenharmony_ci nop 283e1051a39Sopenharmony_ci___ 284e1051a39Sopenharmony_ci$code.=<<___; 285e1051a39Sopenharmony_ci $LD `0*$SZ`($ctx),$A ; load context 286e1051a39Sopenharmony_ci $LD `1*$SZ`($ctx),$B 287e1051a39Sopenharmony_ci $LD `2*$SZ`($ctx),$C 288e1051a39Sopenharmony_ci $LD `3*$SZ`($ctx),$D 289e1051a39Sopenharmony_ci $LD `4*$SZ`($ctx),$E 290e1051a39Sopenharmony_ci $LD `5*$SZ`($ctx),$F 291e1051a39Sopenharmony_ci $LD `6*$SZ`($ctx),$G 292e1051a39Sopenharmony_ci $LD `7*$SZ`($ctx),$H 293e1051a39Sopenharmony_ci 294e1051a39Sopenharmony_ci extru $inp,31,`log($SZ)/log(2)`,$t0 295e1051a39Sopenharmony_ci sh3addl $t0,%r0,$t0 296e1051a39Sopenharmony_ci subi `8*$SZ`,$t0,$t0 297e1051a39Sopenharmony_ci mtctl $t0,%cr11 ; load %sar with align factor 298e1051a39Sopenharmony_ci 299e1051a39Sopenharmony_ciL\$oop 300e1051a39Sopenharmony_ci ldi `$SZ-1`,$t0 301e1051a39Sopenharmony_ci $LDM $SZ($Tbl),$t1 302e1051a39Sopenharmony_ci andcm $inp,$t0,$t0 ; align $inp 303e1051a39Sopenharmony_ci___ 304e1051a39Sopenharmony_ci for ($i=0;$i<15;$i++) { # load input block 305e1051a39Sopenharmony_ci $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; } 306e1051a39Sopenharmony_ci$code.=<<___; 307e1051a39Sopenharmony_ci cmpb,*= $inp,$t0,L\$aligned 308e1051a39Sopenharmony_ci $LD `$SZ*15`($t0),@X[15] 309e1051a39Sopenharmony_ci $LD `$SZ*16`($t0),@X[16] 310e1051a39Sopenharmony_ci___ 311e1051a39Sopenharmony_ci for ($i=0;$i<16;$i++) { # align data 312e1051a39Sopenharmony_ci $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; } 313e1051a39Sopenharmony_ci$code.=<<___; 314e1051a39Sopenharmony_ciL\$aligned 315e1051a39Sopenharmony_ci nop ; otherwise /usr/ccs/bin/as is confused by below .WORD 316e1051a39Sopenharmony_ci___ 317e1051a39Sopenharmony_ci 318e1051a39Sopenharmony_cifor($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } 319e1051a39Sopenharmony_ci$code.=<<___; 320e1051a39Sopenharmony_ciL\$rounds 321e1051a39Sopenharmony_ci nop ; otherwise /usr/ccs/bin/as is confused by below .WORD 322e1051a39Sopenharmony_ci___ 323e1051a39Sopenharmony_cifor(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); } 324e1051a39Sopenharmony_ci$code.=<<___; 325e1051a39Sopenharmony_ci bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled? 326e1051a39Sopenharmony_ci nop 327e1051a39Sopenharmony_ci 328e1051a39Sopenharmony_ci $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments 329e1051a39Sopenharmony_ci $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp 330e1051a39Sopenharmony_ci $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num 331e1051a39Sopenharmony_ci ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl 332e1051a39Sopenharmony_ci 333e1051a39Sopenharmony_ci $LD `0*$SZ`($ctx),@X[0] ; load context 334e1051a39Sopenharmony_ci $LD `1*$SZ`($ctx),@X[1] 335e1051a39Sopenharmony_ci $LD `2*$SZ`($ctx),@X[2] 336e1051a39Sopenharmony_ci $LD `3*$SZ`($ctx),@X[3] 337e1051a39Sopenharmony_ci $LD `4*$SZ`($ctx),@X[4] 338e1051a39Sopenharmony_ci $LD `5*$SZ`($ctx),@X[5] 339e1051a39Sopenharmony_ci addl @X[0],$A,$A 340e1051a39Sopenharmony_ci $LD `6*$SZ`($ctx),@X[6] 341e1051a39Sopenharmony_ci addl @X[1],$B,$B 342e1051a39Sopenharmony_ci $LD `7*$SZ`($ctx),@X[7] 343e1051a39Sopenharmony_ci ldo `16*$SZ`($inp),$inp ; advance $inp 344e1051a39Sopenharmony_ci 345e1051a39Sopenharmony_ci $ST $A,`0*$SZ`($ctx) ; save context 346e1051a39Sopenharmony_ci addl @X[2],$C,$C 347e1051a39Sopenharmony_ci $ST $B,`1*$SZ`($ctx) 348e1051a39Sopenharmony_ci addl @X[3],$D,$D 349e1051a39Sopenharmony_ci $ST $C,`2*$SZ`($ctx) 350e1051a39Sopenharmony_ci addl @X[4],$E,$E 351e1051a39Sopenharmony_ci $ST $D,`3*$SZ`($ctx) 352e1051a39Sopenharmony_ci addl @X[5],$F,$F 353e1051a39Sopenharmony_ci $ST $E,`4*$SZ`($ctx) 354e1051a39Sopenharmony_ci addl @X[6],$G,$G 355e1051a39Sopenharmony_ci $ST $F,`5*$SZ`($ctx) 356e1051a39Sopenharmony_ci addl @X[7],$H,$H 357e1051a39Sopenharmony_ci $ST $G,`6*$SZ`($ctx) 358e1051a39Sopenharmony_ci $ST $H,`7*$SZ`($ctx) 359e1051a39Sopenharmony_ci 360e1051a39Sopenharmony_ci cmpb,*<>,n $inp,$num,L\$oop 361e1051a39Sopenharmony_ci $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp 362e1051a39Sopenharmony_ci___ 363e1051a39Sopenharmony_ciif ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0 364e1051a39Sopenharmony_ci{{ 365e1051a39Sopenharmony_ci$code.=<<___; 366e1051a39Sopenharmony_ci b L\$done 367e1051a39Sopenharmony_ci nop 368e1051a39Sopenharmony_ci 369e1051a39Sopenharmony_ci .ALIGN 64 370e1051a39Sopenharmony_ciL\$parisc1 371e1051a39Sopenharmony_ci___ 372e1051a39Sopenharmony_ci 373e1051a39Sopenharmony_ci@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo, 374e1051a39Sopenharmony_ci $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) = 375e1051a39Sopenharmony_ci ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 376e1051a39Sopenharmony_ci "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16"); 377e1051a39Sopenharmony_ci$a0 ="%r17"; 378e1051a39Sopenharmony_ci$a1 ="%r18"; 379e1051a39Sopenharmony_ci$a2 ="%r19"; 380e1051a39Sopenharmony_ci$a3 ="%r20"; 381e1051a39Sopenharmony_ci$t0 ="%r21"; 382e1051a39Sopenharmony_ci$t1 ="%r22"; 383e1051a39Sopenharmony_ci$t2 ="%r28"; 384e1051a39Sopenharmony_ci$t3 ="%r29"; 385e1051a39Sopenharmony_ci$Tbl="%r31"; 386e1051a39Sopenharmony_ci 387e1051a39Sopenharmony_ci@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx 388e1051a39Sopenharmony_ci 389e1051a39Sopenharmony_cisub ROUND_00_15_pa1 { 390e1051a39Sopenharmony_cimy ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, 391e1051a39Sopenharmony_ci $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_; 392e1051a39Sopenharmony_cimy ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; 393e1051a39Sopenharmony_ci 394e1051a39Sopenharmony_ci$code.=<<___ if (!$flag); 395e1051a39Sopenharmony_ci ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi 396e1051a39Sopenharmony_ci ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] 397e1051a39Sopenharmony_ci___ 398e1051a39Sopenharmony_ci$code.=<<___; 399e1051a39Sopenharmony_ci shd $ehi,$elo,$Sigma1[0],$t0 400e1051a39Sopenharmony_ci add $Xlo,$hlo,$hlo 401e1051a39Sopenharmony_ci shd $elo,$ehi,$Sigma1[0],$t1 402e1051a39Sopenharmony_ci addc $Xhi,$hhi,$hhi ; h += X[i] 403e1051a39Sopenharmony_ci shd $ehi,$elo,$Sigma1[1],$t2 404e1051a39Sopenharmony_ci ldwm 8($Tbl),$Xhi 405e1051a39Sopenharmony_ci shd $elo,$ehi,$Sigma1[1],$t3 406e1051a39Sopenharmony_ci ldw -4($Tbl),$Xlo ; load K[i] 407e1051a39Sopenharmony_ci xor $t2,$t0,$t0 408e1051a39Sopenharmony_ci xor $t3,$t1,$t1 409e1051a39Sopenharmony_ci and $flo,$elo,$a0 410e1051a39Sopenharmony_ci and $fhi,$ehi,$a1 411e1051a39Sopenharmony_ci shd $ehi,$elo,$Sigma1[2],$t2 412e1051a39Sopenharmony_ci andcm $glo,$elo,$a2 413e1051a39Sopenharmony_ci shd $elo,$ehi,$Sigma1[2],$t3 414e1051a39Sopenharmony_ci andcm $ghi,$ehi,$a3 415e1051a39Sopenharmony_ci xor $t2,$t0,$t0 416e1051a39Sopenharmony_ci xor $t3,$t1,$t1 ; Sigma1(e) 417e1051a39Sopenharmony_ci add $Xlo,$hlo,$hlo 418e1051a39Sopenharmony_ci xor $a2,$a0,$a0 419e1051a39Sopenharmony_ci addc $Xhi,$hhi,$hhi ; h += K[i] 420e1051a39Sopenharmony_ci xor $a3,$a1,$a1 ; Ch(e,f,g) 421e1051a39Sopenharmony_ci 422e1051a39Sopenharmony_ci add $t0,$hlo,$hlo 423e1051a39Sopenharmony_ci shd $ahi,$alo,$Sigma0[0],$t0 424e1051a39Sopenharmony_ci addc $t1,$hhi,$hhi ; h += Sigma1(e) 425e1051a39Sopenharmony_ci shd $alo,$ahi,$Sigma0[0],$t1 426e1051a39Sopenharmony_ci add $a0,$hlo,$hlo 427e1051a39Sopenharmony_ci shd $ahi,$alo,$Sigma0[1],$t2 428e1051a39Sopenharmony_ci addc $a1,$hhi,$hhi ; h += Ch(e,f,g) 429e1051a39Sopenharmony_ci shd $alo,$ahi,$Sigma0[1],$t3 430e1051a39Sopenharmony_ci 431e1051a39Sopenharmony_ci xor $t2,$t0,$t0 432e1051a39Sopenharmony_ci xor $t3,$t1,$t1 433e1051a39Sopenharmony_ci shd $ahi,$alo,$Sigma0[2],$t2 434e1051a39Sopenharmony_ci and $alo,$blo,$a0 435e1051a39Sopenharmony_ci shd $alo,$ahi,$Sigma0[2],$t3 436e1051a39Sopenharmony_ci and $ahi,$bhi,$a1 437e1051a39Sopenharmony_ci xor $t2,$t0,$t0 438e1051a39Sopenharmony_ci xor $t3,$t1,$t1 ; Sigma0(a) 439e1051a39Sopenharmony_ci 440e1051a39Sopenharmony_ci and $alo,$clo,$a2 441e1051a39Sopenharmony_ci and $ahi,$chi,$a3 442e1051a39Sopenharmony_ci xor $a2,$a0,$a0 443e1051a39Sopenharmony_ci add $hlo,$dlo,$dlo 444e1051a39Sopenharmony_ci xor $a3,$a1,$a1 445e1051a39Sopenharmony_ci addc $hhi,$dhi,$dhi ; d += h 446e1051a39Sopenharmony_ci and $blo,$clo,$a2 447e1051a39Sopenharmony_ci add $t0,$hlo,$hlo 448e1051a39Sopenharmony_ci and $bhi,$chi,$a3 449e1051a39Sopenharmony_ci addc $t1,$hhi,$hhi ; h += Sigma0(a) 450e1051a39Sopenharmony_ci xor $a2,$a0,$a0 451e1051a39Sopenharmony_ci add $a0,$hlo,$hlo 452e1051a39Sopenharmony_ci xor $a3,$a1,$a1 ; Maj(a,b,c) 453e1051a39Sopenharmony_ci addc $a1,$hhi,$hhi ; h += Maj(a,b,c) 454e1051a39Sopenharmony_ci 455e1051a39Sopenharmony_ci___ 456e1051a39Sopenharmony_ci$code.=<<___ if ($i==15 && $flag); 457e1051a39Sopenharmony_ci extru $Xlo,31,10,$Xlo 458e1051a39Sopenharmony_ci comiclr,= $LAST10BITS,$Xlo,%r0 459e1051a39Sopenharmony_ci b L\$rounds_pa1 460e1051a39Sopenharmony_ci nop 461e1051a39Sopenharmony_ci___ 462e1051a39Sopenharmony_cipush(@X,shift(@X)); push(@X,shift(@X)); 463e1051a39Sopenharmony_ci} 464e1051a39Sopenharmony_ci 465e1051a39Sopenharmony_cisub ROUND_16_xx_pa1 { 466e1051a39Sopenharmony_cimy ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; 467e1051a39Sopenharmony_cimy ($i)=shift; 468e1051a39Sopenharmony_ci$i-=16; 469e1051a39Sopenharmony_ci$code.=<<___; 470e1051a39Sopenharmony_ci ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi 471e1051a39Sopenharmony_ci ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1] 472e1051a39Sopenharmony_ci ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1 473e1051a39Sopenharmony_ci ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9] 474e1051a39Sopenharmony_ci ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3 475e1051a39Sopenharmony_ci ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14] 476e1051a39Sopenharmony_ci shd $Xnhi,$Xnlo,$sigma0[0],$t0 477e1051a39Sopenharmony_ci shd $Xnlo,$Xnhi,$sigma0[0],$t1 478e1051a39Sopenharmony_ci add $a0,$Xlo,$Xlo 479e1051a39Sopenharmony_ci shd $Xnhi,$Xnlo,$sigma0[1],$t2 480e1051a39Sopenharmony_ci addc $a1,$Xhi,$Xhi 481e1051a39Sopenharmony_ci shd $Xnlo,$Xnhi,$sigma0[1],$t3 482e1051a39Sopenharmony_ci xor $t2,$t0,$t0 483e1051a39Sopenharmony_ci shd $Xnhi,$Xnlo,$sigma0[2],$t2 484e1051a39Sopenharmony_ci xor $t3,$t1,$t1 485e1051a39Sopenharmony_ci extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3 486e1051a39Sopenharmony_ci xor $t2,$t0,$t0 487e1051a39Sopenharmony_ci shd $a3,$a2,$sigma1[0],$a0 488e1051a39Sopenharmony_ci xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f]) 489e1051a39Sopenharmony_ci shd $a2,$a3,$sigma1[0],$a1 490e1051a39Sopenharmony_ci add $t0,$Xlo,$Xlo 491e1051a39Sopenharmony_ci shd $a3,$a2,$sigma1[1],$t2 492e1051a39Sopenharmony_ci addc $t1,$Xhi,$Xhi 493e1051a39Sopenharmony_ci shd $a2,$a3,$sigma1[1],$t3 494e1051a39Sopenharmony_ci xor $t2,$a0,$a0 495e1051a39Sopenharmony_ci shd $a3,$a2,$sigma1[2],$t2 496e1051a39Sopenharmony_ci xor $t3,$a1,$a1 497e1051a39Sopenharmony_ci extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3 498e1051a39Sopenharmony_ci xor $t2,$a0,$a0 499e1051a39Sopenharmony_ci xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f]) 500e1051a39Sopenharmony_ci add $a0,$Xlo,$Xlo 501e1051a39Sopenharmony_ci addc $a1,$Xhi,$Xhi 502e1051a39Sopenharmony_ci 503e1051a39Sopenharmony_ci stw $Xhi,`-$XOFF+8*($i%16)`(%sp) 504e1051a39Sopenharmony_ci stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp) 505e1051a39Sopenharmony_ci___ 506e1051a39Sopenharmony_ci&ROUND_00_15_pa1($i,@_,1); 507e1051a39Sopenharmony_ci} 508e1051a39Sopenharmony_ci$code.=<<___; 509e1051a39Sopenharmony_ci ldw `0*4`($ctx),$Ahi ; load context 510e1051a39Sopenharmony_ci ldw `1*4`($ctx),$Alo 511e1051a39Sopenharmony_ci ldw `2*4`($ctx),$Bhi 512e1051a39Sopenharmony_ci ldw `3*4`($ctx),$Blo 513e1051a39Sopenharmony_ci ldw `4*4`($ctx),$Chi 514e1051a39Sopenharmony_ci ldw `5*4`($ctx),$Clo 515e1051a39Sopenharmony_ci ldw `6*4`($ctx),$Dhi 516e1051a39Sopenharmony_ci ldw `7*4`($ctx),$Dlo 517e1051a39Sopenharmony_ci ldw `8*4`($ctx),$Ehi 518e1051a39Sopenharmony_ci ldw `9*4`($ctx),$Elo 519e1051a39Sopenharmony_ci ldw `10*4`($ctx),$Fhi 520e1051a39Sopenharmony_ci ldw `11*4`($ctx),$Flo 521e1051a39Sopenharmony_ci ldw `12*4`($ctx),$Ghi 522e1051a39Sopenharmony_ci ldw `13*4`($ctx),$Glo 523e1051a39Sopenharmony_ci ldw `14*4`($ctx),$Hhi 524e1051a39Sopenharmony_ci ldw `15*4`($ctx),$Hlo 525e1051a39Sopenharmony_ci 526e1051a39Sopenharmony_ci extru $inp,31,2,$t0 527e1051a39Sopenharmony_ci sh3addl $t0,%r0,$t0 528e1051a39Sopenharmony_ci subi 32,$t0,$t0 529e1051a39Sopenharmony_ci mtctl $t0,%cr11 ; load %sar with align factor 530e1051a39Sopenharmony_ci 531e1051a39Sopenharmony_ciL\$oop_pa1 532e1051a39Sopenharmony_ci extru $inp,31,2,$a3 533e1051a39Sopenharmony_ci comib,= 0,$a3,L\$aligned_pa1 534e1051a39Sopenharmony_ci sub $inp,$a3,$inp 535e1051a39Sopenharmony_ci 536e1051a39Sopenharmony_ci ldw `0*4`($inp),$X[0] 537e1051a39Sopenharmony_ci ldw `1*4`($inp),$X[1] 538e1051a39Sopenharmony_ci ldw `2*4`($inp),$t2 539e1051a39Sopenharmony_ci ldw `3*4`($inp),$t3 540e1051a39Sopenharmony_ci ldw `4*4`($inp),$a0 541e1051a39Sopenharmony_ci ldw `5*4`($inp),$a1 542e1051a39Sopenharmony_ci ldw `6*4`($inp),$a2 543e1051a39Sopenharmony_ci ldw `7*4`($inp),$a3 544e1051a39Sopenharmony_ci vshd $X[0],$X[1],$X[0] 545e1051a39Sopenharmony_ci vshd $X[1],$t2,$X[1] 546e1051a39Sopenharmony_ci stw $X[0],`-$XOFF+0*4`(%sp) 547e1051a39Sopenharmony_ci ldw `8*4`($inp),$t0 548e1051a39Sopenharmony_ci vshd $t2,$t3,$t2 549e1051a39Sopenharmony_ci stw $X[1],`-$XOFF+1*4`(%sp) 550e1051a39Sopenharmony_ci ldw `9*4`($inp),$t1 551e1051a39Sopenharmony_ci vshd $t3,$a0,$t3 552e1051a39Sopenharmony_ci___ 553e1051a39Sopenharmony_ci{ 554e1051a39Sopenharmony_cimy @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); 555e1051a39Sopenharmony_cifor ($i=2;$i<=(128/4-8);$i++) { 556e1051a39Sopenharmony_ci$code.=<<___; 557e1051a39Sopenharmony_ci stw $t[0],`-$XOFF+$i*4`(%sp) 558e1051a39Sopenharmony_ci ldw `(8+$i)*4`($inp),$t[0] 559e1051a39Sopenharmony_ci vshd $t[1],$t[2],$t[1] 560e1051a39Sopenharmony_ci___ 561e1051a39Sopenharmony_cipush(@t,shift(@t)); 562e1051a39Sopenharmony_ci} 563e1051a39Sopenharmony_cifor (;$i<(128/4-1);$i++) { 564e1051a39Sopenharmony_ci$code.=<<___; 565e1051a39Sopenharmony_ci stw $t[0],`-$XOFF+$i*4`(%sp) 566e1051a39Sopenharmony_ci vshd $t[1],$t[2],$t[1] 567e1051a39Sopenharmony_ci___ 568e1051a39Sopenharmony_cipush(@t,shift(@t)); 569e1051a39Sopenharmony_ci} 570e1051a39Sopenharmony_ci$code.=<<___; 571e1051a39Sopenharmony_ci b L\$collected_pa1 572e1051a39Sopenharmony_ci stw $t[0],`-$XOFF+$i*4`(%sp) 573e1051a39Sopenharmony_ci 574e1051a39Sopenharmony_ci___ 575e1051a39Sopenharmony_ci} 576e1051a39Sopenharmony_ci$code.=<<___; 577e1051a39Sopenharmony_ciL\$aligned_pa1 578e1051a39Sopenharmony_ci ldw `0*4`($inp),$X[0] 579e1051a39Sopenharmony_ci ldw `1*4`($inp),$X[1] 580e1051a39Sopenharmony_ci ldw `2*4`($inp),$t2 581e1051a39Sopenharmony_ci ldw `3*4`($inp),$t3 582e1051a39Sopenharmony_ci ldw `4*4`($inp),$a0 583e1051a39Sopenharmony_ci ldw `5*4`($inp),$a1 584e1051a39Sopenharmony_ci ldw `6*4`($inp),$a2 585e1051a39Sopenharmony_ci ldw `7*4`($inp),$a3 586e1051a39Sopenharmony_ci stw $X[0],`-$XOFF+0*4`(%sp) 587e1051a39Sopenharmony_ci ldw `8*4`($inp),$t0 588e1051a39Sopenharmony_ci stw $X[1],`-$XOFF+1*4`(%sp) 589e1051a39Sopenharmony_ci ldw `9*4`($inp),$t1 590e1051a39Sopenharmony_ci___ 591e1051a39Sopenharmony_ci{ 592e1051a39Sopenharmony_cimy @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); 593e1051a39Sopenharmony_cifor ($i=2;$i<(128/4-8);$i++) { 594e1051a39Sopenharmony_ci$code.=<<___; 595e1051a39Sopenharmony_ci stw $t[0],`-$XOFF+$i*4`(%sp) 596e1051a39Sopenharmony_ci ldw `(8+$i)*4`($inp),$t[0] 597e1051a39Sopenharmony_ci___ 598e1051a39Sopenharmony_cipush(@t,shift(@t)); 599e1051a39Sopenharmony_ci} 600e1051a39Sopenharmony_cifor (;$i<128/4;$i++) { 601e1051a39Sopenharmony_ci$code.=<<___; 602e1051a39Sopenharmony_ci stw $t[0],`-$XOFF+$i*4`(%sp) 603e1051a39Sopenharmony_ci___ 604e1051a39Sopenharmony_cipush(@t,shift(@t)); 605e1051a39Sopenharmony_ci} 606e1051a39Sopenharmony_ci$code.="L\$collected_pa1\n"; 607e1051a39Sopenharmony_ci} 608e1051a39Sopenharmony_ci 609e1051a39Sopenharmony_cifor($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } 610e1051a39Sopenharmony_ci$code.="L\$rounds_pa1\n"; 611e1051a39Sopenharmony_cifor(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } 612e1051a39Sopenharmony_ci 613e1051a39Sopenharmony_ci$code.=<<___; 614e1051a39Sopenharmony_ci $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments 615e1051a39Sopenharmony_ci $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp 616e1051a39Sopenharmony_ci $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num 617e1051a39Sopenharmony_ci ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl 618e1051a39Sopenharmony_ci 619e1051a39Sopenharmony_ci ldw `0*4`($ctx),$t1 ; update context 620e1051a39Sopenharmony_ci ldw `1*4`($ctx),$t0 621e1051a39Sopenharmony_ci ldw `2*4`($ctx),$t3 622e1051a39Sopenharmony_ci ldw `3*4`($ctx),$t2 623e1051a39Sopenharmony_ci ldw `4*4`($ctx),$a1 624e1051a39Sopenharmony_ci ldw `5*4`($ctx),$a0 625e1051a39Sopenharmony_ci ldw `6*4`($ctx),$a3 626e1051a39Sopenharmony_ci add $t0,$Alo,$Alo 627e1051a39Sopenharmony_ci ldw `7*4`($ctx),$a2 628e1051a39Sopenharmony_ci addc $t1,$Ahi,$Ahi 629e1051a39Sopenharmony_ci ldw `8*4`($ctx),$t1 630e1051a39Sopenharmony_ci add $t2,$Blo,$Blo 631e1051a39Sopenharmony_ci ldw `9*4`($ctx),$t0 632e1051a39Sopenharmony_ci addc $t3,$Bhi,$Bhi 633e1051a39Sopenharmony_ci ldw `10*4`($ctx),$t3 634e1051a39Sopenharmony_ci add $a0,$Clo,$Clo 635e1051a39Sopenharmony_ci ldw `11*4`($ctx),$t2 636e1051a39Sopenharmony_ci addc $a1,$Chi,$Chi 637e1051a39Sopenharmony_ci ldw `12*4`($ctx),$a1 638e1051a39Sopenharmony_ci add $a2,$Dlo,$Dlo 639e1051a39Sopenharmony_ci ldw `13*4`($ctx),$a0 640e1051a39Sopenharmony_ci addc $a3,$Dhi,$Dhi 641e1051a39Sopenharmony_ci ldw `14*4`($ctx),$a3 642e1051a39Sopenharmony_ci add $t0,$Elo,$Elo 643e1051a39Sopenharmony_ci ldw `15*4`($ctx),$a2 644e1051a39Sopenharmony_ci addc $t1,$Ehi,$Ehi 645e1051a39Sopenharmony_ci stw $Ahi,`0*4`($ctx) 646e1051a39Sopenharmony_ci add $t2,$Flo,$Flo 647e1051a39Sopenharmony_ci stw $Alo,`1*4`($ctx) 648e1051a39Sopenharmony_ci addc $t3,$Fhi,$Fhi 649e1051a39Sopenharmony_ci stw $Bhi,`2*4`($ctx) 650e1051a39Sopenharmony_ci add $a0,$Glo,$Glo 651e1051a39Sopenharmony_ci stw $Blo,`3*4`($ctx) 652e1051a39Sopenharmony_ci addc $a1,$Ghi,$Ghi 653e1051a39Sopenharmony_ci stw $Chi,`4*4`($ctx) 654e1051a39Sopenharmony_ci add $a2,$Hlo,$Hlo 655e1051a39Sopenharmony_ci stw $Clo,`5*4`($ctx) 656e1051a39Sopenharmony_ci addc $a3,$Hhi,$Hhi 657e1051a39Sopenharmony_ci stw $Dhi,`6*4`($ctx) 658e1051a39Sopenharmony_ci ldo `16*$SZ`($inp),$inp ; advance $inp 659e1051a39Sopenharmony_ci stw $Dlo,`7*4`($ctx) 660e1051a39Sopenharmony_ci stw $Ehi,`8*4`($ctx) 661e1051a39Sopenharmony_ci stw $Elo,`9*4`($ctx) 662e1051a39Sopenharmony_ci stw $Fhi,`10*4`($ctx) 663e1051a39Sopenharmony_ci stw $Flo,`11*4`($ctx) 664e1051a39Sopenharmony_ci stw $Ghi,`12*4`($ctx) 665e1051a39Sopenharmony_ci stw $Glo,`13*4`($ctx) 666e1051a39Sopenharmony_ci stw $Hhi,`14*4`($ctx) 667e1051a39Sopenharmony_ci comb,= $inp,$num,L\$done 668e1051a39Sopenharmony_ci stw $Hlo,`15*4`($ctx) 669e1051a39Sopenharmony_ci b L\$oop_pa1 670e1051a39Sopenharmony_ci $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp 671e1051a39Sopenharmony_ciL\$done 672e1051a39Sopenharmony_ci___ 673e1051a39Sopenharmony_ci}} 674e1051a39Sopenharmony_ci$code.=<<___; 675e1051a39Sopenharmony_ci $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 676e1051a39Sopenharmony_ci $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 677e1051a39Sopenharmony_ci $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 678e1051a39Sopenharmony_ci $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 679e1051a39Sopenharmony_ci $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 680e1051a39Sopenharmony_ci $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 681e1051a39Sopenharmony_ci $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 682e1051a39Sopenharmony_ci $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 683e1051a39Sopenharmony_ci $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 684e1051a39Sopenharmony_ci $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 685e1051a39Sopenharmony_ci $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 686e1051a39Sopenharmony_ci $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 687e1051a39Sopenharmony_ci $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 688e1051a39Sopenharmony_ci $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 689e1051a39Sopenharmony_ci $POP `-$FRAME+14*$SIZE_T`(%sp),%r17 690e1051a39Sopenharmony_ci $POP `-$FRAME+15*$SIZE_T`(%sp),%r18 691e1051a39Sopenharmony_ci bv (%r2) 692e1051a39Sopenharmony_ci .EXIT 693e1051a39Sopenharmony_ci $POPMB -$FRAME(%sp),%r3 694e1051a39Sopenharmony_ci .PROCEND 695e1051a39Sopenharmony_ci .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" 696e1051a39Sopenharmony_ci___ 697e1051a39Sopenharmony_ci 698e1051a39Sopenharmony_ci# Explicitly encode PA-RISC 2.0 instructions used in this module, so 699e1051a39Sopenharmony_ci# that it can be compiled with .LEVEL 1.0. It should be noted that I 700e1051a39Sopenharmony_ci# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 701e1051a39Sopenharmony_ci# directive... 702e1051a39Sopenharmony_ci 703e1051a39Sopenharmony_cimy $ldd = sub { 704e1051a39Sopenharmony_ci my ($mod,$args) = @_; 705e1051a39Sopenharmony_ci my $orig = "ldd$mod\t$args"; 706e1051a39Sopenharmony_ci 707e1051a39Sopenharmony_ci if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices 708e1051a39Sopenharmony_ci { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1); 709e1051a39Sopenharmony_ci $opcode|=(1<<3) if ($mod =~ /^,m/); 710e1051a39Sopenharmony_ci $opcode|=(1<<2) if ($mod =~ /^,mb/); 711e1051a39Sopenharmony_ci sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 712e1051a39Sopenharmony_ci } 713e1051a39Sopenharmony_ci else { "\t".$orig; } 714e1051a39Sopenharmony_ci}; 715e1051a39Sopenharmony_ci 716e1051a39Sopenharmony_cimy $std = sub { 717e1051a39Sopenharmony_ci my ($mod,$args) = @_; 718e1051a39Sopenharmony_ci my $orig = "std$mod\t$args"; 719e1051a39Sopenharmony_ci 720e1051a39Sopenharmony_ci if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices 721e1051a39Sopenharmony_ci { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); 722e1051a39Sopenharmony_ci sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 723e1051a39Sopenharmony_ci } 724e1051a39Sopenharmony_ci else { "\t".$orig; } 725e1051a39Sopenharmony_ci}; 726e1051a39Sopenharmony_ci 727e1051a39Sopenharmony_cimy $extrd = sub { 728e1051a39Sopenharmony_ci my ($mod,$args) = @_; 729e1051a39Sopenharmony_ci my $orig = "extrd$mod\t$args"; 730e1051a39Sopenharmony_ci 731e1051a39Sopenharmony_ci # I only have ",u" completer, it's implicitly encoded... 732e1051a39Sopenharmony_ci if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 733e1051a39Sopenharmony_ci { my $opcode=(0x36<<26)|($1<<21)|($4<<16); 734e1051a39Sopenharmony_ci my $len=32-$3; 735e1051a39Sopenharmony_ci $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos 736e1051a39Sopenharmony_ci $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len 737e1051a39Sopenharmony_ci sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 738e1051a39Sopenharmony_ci } 739e1051a39Sopenharmony_ci elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 740e1051a39Sopenharmony_ci { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); 741e1051a39Sopenharmony_ci my $len=32-$2; 742e1051a39Sopenharmony_ci $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len 743e1051a39Sopenharmony_ci $opcode |= (1<<13) if ($mod =~ /,\**=/); 744e1051a39Sopenharmony_ci sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 745e1051a39Sopenharmony_ci } 746e1051a39Sopenharmony_ci else { "\t".$orig; } 747e1051a39Sopenharmony_ci}; 748e1051a39Sopenharmony_ci 749e1051a39Sopenharmony_cimy $shrpd = sub { 750e1051a39Sopenharmony_ci my ($mod,$args) = @_; 751e1051a39Sopenharmony_ci my $orig = "shrpd$mod\t$args"; 752e1051a39Sopenharmony_ci 753e1051a39Sopenharmony_ci if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 754e1051a39Sopenharmony_ci { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; 755e1051a39Sopenharmony_ci my $cpos=63-$3; 756e1051a39Sopenharmony_ci $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa 757e1051a39Sopenharmony_ci sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 758e1051a39Sopenharmony_ci } 759e1051a39Sopenharmony_ci elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 760e1051a39Sopenharmony_ci { sprintf "\t.WORD\t0x%08x\t; %s", 761e1051a39Sopenharmony_ci (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; 762e1051a39Sopenharmony_ci } 763e1051a39Sopenharmony_ci else { "\t".$orig; } 764e1051a39Sopenharmony_ci}; 765e1051a39Sopenharmony_ci 766e1051a39Sopenharmony_cisub assemble { 767e1051a39Sopenharmony_ci my ($mnemonic,$mod,$args)=@_; 768e1051a39Sopenharmony_ci my $opcode = eval("\$$mnemonic"); 769e1051a39Sopenharmony_ci 770e1051a39Sopenharmony_ci ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; 771e1051a39Sopenharmony_ci} 772e1051a39Sopenharmony_ci 773e1051a39Sopenharmony_ciif (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 774e1051a39Sopenharmony_ci =~ /GNU assembler/) { 775e1051a39Sopenharmony_ci $gnuas = 1; 776e1051a39Sopenharmony_ci} 777e1051a39Sopenharmony_ci 778e1051a39Sopenharmony_ciforeach (split("\n",$code)) { 779e1051a39Sopenharmony_ci s/\`([^\`]*)\`/eval $1/ge; 780e1051a39Sopenharmony_ci 781e1051a39Sopenharmony_ci s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/ 782e1051a39Sopenharmony_ci $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32 783e1051a39Sopenharmony_ci : sprintf("shd\t%$1,%$2,%d",$3)/e or 784e1051a39Sopenharmony_ci # translate made up instructions: _ror, _shr, _align, _shl 785e1051a39Sopenharmony_ci s/_ror(\s+)(%r[0-9]+),/ 786e1051a39Sopenharmony_ci ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or 787e1051a39Sopenharmony_ci 788e1051a39Sopenharmony_ci s/_shr(\s+%r[0-9]+),([0-9]+),/ 789e1051a39Sopenharmony_ci $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2) 790e1051a39Sopenharmony_ci : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or 791e1051a39Sopenharmony_ci 792e1051a39Sopenharmony_ci s/_align(\s+%r[0-9]+,%r[0-9]+),/ 793e1051a39Sopenharmony_ci ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or 794e1051a39Sopenharmony_ci 795e1051a39Sopenharmony_ci s/_shl(\s+%r[0-9]+),([0-9]+),/ 796e1051a39Sopenharmony_ci $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2) 797e1051a39Sopenharmony_ci : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e; 798e1051a39Sopenharmony_ci 799e1051a39Sopenharmony_ci s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4); 800e1051a39Sopenharmony_ci 801e1051a39Sopenharmony_ci s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); 802e1051a39Sopenharmony_ci s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); 803e1051a39Sopenharmony_ci s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); 804e1051a39Sopenharmony_ci s/cmpb,\*/comb,/ if ($SIZE_T==4); 805e1051a39Sopenharmony_ci s/\bbv\b/bve/ if ($SIZE_T==8); 806e1051a39Sopenharmony_ci 807e1051a39Sopenharmony_ci print $_,"\n"; 808e1051a39Sopenharmony_ci} 809e1051a39Sopenharmony_ci 810e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 811