1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_ci# SHA2 block procedures for MIPS. 18e1051a39Sopenharmony_ci 19e1051a39Sopenharmony_ci# October 2010. 20e1051a39Sopenharmony_ci# 21e1051a39Sopenharmony_ci# SHA256 performance improvement on MIPS R5000 CPU is ~27% over gcc- 22e1051a39Sopenharmony_ci# generated code in o32 build and ~55% in n32/64 build. SHA512 [which 23e1051a39Sopenharmony_ci# for now can only be compiled for MIPS64 ISA] improvement is modest 24e1051a39Sopenharmony_ci# ~17%, but it comes for free, because it's same instruction sequence. 25e1051a39Sopenharmony_ci# Improvement coefficients are for aligned input. 26e1051a39Sopenharmony_ci 27e1051a39Sopenharmony_ci# September 2012. 28e1051a39Sopenharmony_ci# 29e1051a39Sopenharmony_ci# Add MIPS[32|64]R2 code (>25% less instructions). 30e1051a39Sopenharmony_ci 31e1051a39Sopenharmony_ci###################################################################### 32e1051a39Sopenharmony_ci# There is a number of MIPS ABI in use, O32 and N32/64 are most 33e1051a39Sopenharmony_ci# widely used. Then there is a new contender: NUBI. It appears that if 34e1051a39Sopenharmony_ci# one picks the latter, it's possible to arrange code in ABI neutral 35e1051a39Sopenharmony_ci# manner. Therefore let's stick to NUBI register layout: 36e1051a39Sopenharmony_ci# 37e1051a39Sopenharmony_ci($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 38e1051a39Sopenharmony_ci($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 39e1051a39Sopenharmony_ci($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 40e1051a39Sopenharmony_ci($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 41e1051a39Sopenharmony_ci# 42e1051a39Sopenharmony_ci# The return value is placed in $a0. Following coding rules facilitate 43e1051a39Sopenharmony_ci# interoperability: 44e1051a39Sopenharmony_ci# 45e1051a39Sopenharmony_ci# - never ever touch $tp, "thread pointer", former $gp [o32 can be 46e1051a39Sopenharmony_ci# excluded from the rule, because it's specified volatile]; 47e1051a39Sopenharmony_ci# - copy return value to $t0, former $v0 [or to $a0 if you're adapting 48e1051a39Sopenharmony_ci# old code]; 49e1051a39Sopenharmony_ci# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 50e1051a39Sopenharmony_ci# 51e1051a39Sopenharmony_ci# For reference here is register layout for N32/64 MIPS ABIs: 52e1051a39Sopenharmony_ci# 53e1051a39Sopenharmony_ci# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 54e1051a39Sopenharmony_ci# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 55e1051a39Sopenharmony_ci# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 56e1051a39Sopenharmony_ci# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 57e1051a39Sopenharmony_ci# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 58e1051a39Sopenharmony_ci# 59e1051a39Sopenharmony_ci# if $output doesn't have an extension, it's not an output file 60e1051a39Sopenharmony_ci# so use it for $flavour. 61e1051a39Sopenharmony_ci 62e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 63e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 64e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 65e1051a39Sopenharmony_ci# supported flavours are o32,n32,64,nubi32,nubi64, default is o32 66e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32"; 67e1051a39Sopenharmony_ci 68e1051a39Sopenharmony_ciif ($flavour =~ /64|n32/i) { 69e1051a39Sopenharmony_ci $PTR_LA="dla"; 70e1051a39Sopenharmony_ci $PTR_ADD="daddu"; # incidentally works even on n32 71e1051a39Sopenharmony_ci $PTR_SUB="dsubu"; # incidentally works even on n32 72e1051a39Sopenharmony_ci $REG_S="sd"; 73e1051a39Sopenharmony_ci $REG_L="ld"; 74e1051a39Sopenharmony_ci $PTR_SLL="dsll"; # incidentally works even on n32 75e1051a39Sopenharmony_ci $SZREG=8; 76e1051a39Sopenharmony_ci} else { 77e1051a39Sopenharmony_ci $PTR_LA="la"; 78e1051a39Sopenharmony_ci $PTR_ADD="addu"; 79e1051a39Sopenharmony_ci $PTR_SUB="subu"; 80e1051a39Sopenharmony_ci $REG_S="sw"; 81e1051a39Sopenharmony_ci $REG_L="lw"; 82e1051a39Sopenharmony_ci $PTR_SLL="sll"; 83e1051a39Sopenharmony_ci $SZREG=4; 84e1051a39Sopenharmony_ci} 85e1051a39Sopenharmony_ci$pf = ($flavour =~ /nubi/i) ? $t0 : $t2; 86e1051a39Sopenharmony_ci# 87e1051a39Sopenharmony_ci# <appro@openssl.org> 88e1051a39Sopenharmony_ci# 89e1051a39Sopenharmony_ci###################################################################### 90e1051a39Sopenharmony_ci 91e1051a39Sopenharmony_ci$big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC}); 92e1051a39Sopenharmony_ci 93e1051a39Sopenharmony_ciopen STDOUT,">$output"; 94e1051a39Sopenharmony_ci 95e1051a39Sopenharmony_ciif (!defined($big_endian)) { $big_endian=(unpack('L',pack('N',1))==1); } 96e1051a39Sopenharmony_ci 97e1051a39Sopenharmony_ciif ($output =~ /512/) { 98e1051a39Sopenharmony_ci $label="512"; 99e1051a39Sopenharmony_ci $SZ=8; 100e1051a39Sopenharmony_ci $LD="ld"; # load from memory 101e1051a39Sopenharmony_ci $ST="sd"; # store to memory 102e1051a39Sopenharmony_ci $SLL="dsll"; # shift left logical 103e1051a39Sopenharmony_ci $SRL="dsrl"; # shift right logical 104e1051a39Sopenharmony_ci $ADDU="daddu"; 105e1051a39Sopenharmony_ci $ROTR="drotr"; 106e1051a39Sopenharmony_ci @Sigma0=(28,34,39); 107e1051a39Sopenharmony_ci @Sigma1=(14,18,41); 108e1051a39Sopenharmony_ci @sigma0=( 7, 1, 8); # right shift first 109e1051a39Sopenharmony_ci @sigma1=( 6,19,61); # right shift first 110e1051a39Sopenharmony_ci $lastK=0x817; 111e1051a39Sopenharmony_ci $rounds=80; 112e1051a39Sopenharmony_ci} else { 113e1051a39Sopenharmony_ci $label="256"; 114e1051a39Sopenharmony_ci $SZ=4; 115e1051a39Sopenharmony_ci $LD="lw"; # load from memory 116e1051a39Sopenharmony_ci $ST="sw"; # store to memory 117e1051a39Sopenharmony_ci $SLL="sll"; # shift left logical 118e1051a39Sopenharmony_ci $SRL="srl"; # shift right logical 119e1051a39Sopenharmony_ci $ADDU="addu"; 120e1051a39Sopenharmony_ci $ROTR="rotr"; 121e1051a39Sopenharmony_ci @Sigma0=( 2,13,22); 122e1051a39Sopenharmony_ci @Sigma1=( 6,11,25); 123e1051a39Sopenharmony_ci @sigma0=( 3, 7,18); # right shift first 124e1051a39Sopenharmony_ci @sigma1=(10,17,19); # right shift first 125e1051a39Sopenharmony_ci $lastK=0x8f2; 126e1051a39Sopenharmony_ci $rounds=64; 127e1051a39Sopenharmony_ci} 128e1051a39Sopenharmony_ci 129e1051a39Sopenharmony_ci$MSB = $big_endian ? 0 : ($SZ-1); 130e1051a39Sopenharmony_ci$LSB = ($SZ-1)&~$MSB; 131e1051a39Sopenharmony_ci 132e1051a39Sopenharmony_ci@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("\$$_",(1,2,3,7,24,25,30,31)); 133e1051a39Sopenharmony_ci@X=map("\$$_",(8..23)); 134e1051a39Sopenharmony_ci 135e1051a39Sopenharmony_ci$ctx=$a0; 136e1051a39Sopenharmony_ci$inp=$a1; 137e1051a39Sopenharmony_ci$len=$a2; $Ktbl=$len; 138e1051a39Sopenharmony_ci 139e1051a39Sopenharmony_cisub BODY_00_15 { 140e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; 141e1051a39Sopenharmony_cimy ($T1,$tmp0,$tmp1,$tmp2)=(@X[4],@X[5],@X[6],@X[7]); 142e1051a39Sopenharmony_ci 143e1051a39Sopenharmony_ci$code.=<<___ if ($i<15); 144e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) 145e1051a39Sopenharmony_ci ${LD} @X[1],`($i+1)*$SZ`($inp) 146e1051a39Sopenharmony_ci#else 147e1051a39Sopenharmony_ci ${LD}l @X[1],`($i+1)*$SZ+$MSB`($inp) 148e1051a39Sopenharmony_ci ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp) 149e1051a39Sopenharmony_ci#endif 150e1051a39Sopenharmony_ci___ 151e1051a39Sopenharmony_ci$code.=<<___ if (!$big_endian && $i<16 && $SZ==4); 152e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 153e1051a39Sopenharmony_ci wsbh @X[0],@X[0] # byte swap($i) 154e1051a39Sopenharmony_ci rotr @X[0],@X[0],16 155e1051a39Sopenharmony_ci#else 156e1051a39Sopenharmony_ci srl $tmp0,@X[0],24 # byte swap($i) 157e1051a39Sopenharmony_ci srl $tmp1,@X[0],8 158e1051a39Sopenharmony_ci andi $tmp2,@X[0],0xFF00 159e1051a39Sopenharmony_ci sll @X[0],@X[0],24 160e1051a39Sopenharmony_ci andi $tmp1,0xFF00 161e1051a39Sopenharmony_ci sll $tmp2,$tmp2,8 162e1051a39Sopenharmony_ci or @X[0],$tmp0 163e1051a39Sopenharmony_ci or $tmp1,$tmp2 164e1051a39Sopenharmony_ci or @X[0],$tmp1 165e1051a39Sopenharmony_ci#endif 166e1051a39Sopenharmony_ci___ 167e1051a39Sopenharmony_ci$code.=<<___ if (!$big_endian && $i<16 && $SZ==8); 168e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS64R2) 169e1051a39Sopenharmony_ci dsbh @X[0],@X[0] # byte swap($i) 170e1051a39Sopenharmony_ci dshd @X[0],@X[0] 171e1051a39Sopenharmony_ci#else 172e1051a39Sopenharmony_ci ori $tmp0,$zero,0xFF 173e1051a39Sopenharmony_ci dsll $tmp2,$tmp0,32 174e1051a39Sopenharmony_ci or $tmp0,$tmp2 # 0x000000FF000000FF 175e1051a39Sopenharmony_ci and $tmp1,@X[0],$tmp0 # byte swap($i) 176e1051a39Sopenharmony_ci dsrl $tmp2,@X[0],24 177e1051a39Sopenharmony_ci dsll $tmp1,24 178e1051a39Sopenharmony_ci and $tmp2,$tmp0 179e1051a39Sopenharmony_ci dsll $tmp0,8 # 0x0000FF000000FF00 180e1051a39Sopenharmony_ci or $tmp1,$tmp2 181e1051a39Sopenharmony_ci and $tmp2,@X[0],$tmp0 182e1051a39Sopenharmony_ci dsrl @X[0],8 183e1051a39Sopenharmony_ci dsll $tmp2,8 184e1051a39Sopenharmony_ci and @X[0],$tmp0 185e1051a39Sopenharmony_ci or $tmp1,$tmp2 186e1051a39Sopenharmony_ci or @X[0],$tmp1 187e1051a39Sopenharmony_ci dsrl $tmp1,@X[0],32 188e1051a39Sopenharmony_ci dsll @X[0],32 189e1051a39Sopenharmony_ci or @X[0],$tmp1 190e1051a39Sopenharmony_ci#endif 191e1051a39Sopenharmony_ci___ 192e1051a39Sopenharmony_ci$code.=<<___; 193e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 194e1051a39Sopenharmony_ci xor $tmp2,$f,$g # $i 195e1051a39Sopenharmony_ci $ROTR $tmp0,$e,@Sigma1[0] 196e1051a39Sopenharmony_ci $ADDU $T1,$X[0],$h 197e1051a39Sopenharmony_ci $ROTR $tmp1,$e,@Sigma1[1] 198e1051a39Sopenharmony_ci and $tmp2,$e 199e1051a39Sopenharmony_ci $ROTR $h,$e,@Sigma1[2] 200e1051a39Sopenharmony_ci xor $tmp0,$tmp1 201e1051a39Sopenharmony_ci $ROTR $tmp1,$a,@Sigma0[0] 202e1051a39Sopenharmony_ci xor $tmp2,$g # Ch(e,f,g) 203e1051a39Sopenharmony_ci xor $tmp0,$h # Sigma1(e) 204e1051a39Sopenharmony_ci 205e1051a39Sopenharmony_ci $ROTR $h,$a,@Sigma0[1] 206e1051a39Sopenharmony_ci $ADDU $T1,$tmp2 207e1051a39Sopenharmony_ci $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i] 208e1051a39Sopenharmony_ci xor $h,$tmp1 209e1051a39Sopenharmony_ci $ROTR $tmp1,$a,@Sigma0[2] 210e1051a39Sopenharmony_ci $ADDU $T1,$tmp0 211e1051a39Sopenharmony_ci and $tmp0,$b,$c 212e1051a39Sopenharmony_ci xor $h,$tmp1 # Sigma0(a) 213e1051a39Sopenharmony_ci xor $tmp1,$b,$c 214e1051a39Sopenharmony_ci#else 215e1051a39Sopenharmony_ci $ADDU $T1,$X[0],$h # $i 216e1051a39Sopenharmony_ci $SRL $h,$e,@Sigma1[0] 217e1051a39Sopenharmony_ci xor $tmp2,$f,$g 218e1051a39Sopenharmony_ci $SLL $tmp1,$e,`$SZ*8-@Sigma1[2]` 219e1051a39Sopenharmony_ci and $tmp2,$e 220e1051a39Sopenharmony_ci $SRL $tmp0,$e,@Sigma1[1] 221e1051a39Sopenharmony_ci xor $h,$tmp1 222e1051a39Sopenharmony_ci $SLL $tmp1,$e,`$SZ*8-@Sigma1[1]` 223e1051a39Sopenharmony_ci xor $h,$tmp0 224e1051a39Sopenharmony_ci $SRL $tmp0,$e,@Sigma1[2] 225e1051a39Sopenharmony_ci xor $h,$tmp1 226e1051a39Sopenharmony_ci $SLL $tmp1,$e,`$SZ*8-@Sigma1[0]` 227e1051a39Sopenharmony_ci xor $h,$tmp0 228e1051a39Sopenharmony_ci xor $tmp2,$g # Ch(e,f,g) 229e1051a39Sopenharmony_ci xor $tmp0,$tmp1,$h # Sigma1(e) 230e1051a39Sopenharmony_ci 231e1051a39Sopenharmony_ci $SRL $h,$a,@Sigma0[0] 232e1051a39Sopenharmony_ci $ADDU $T1,$tmp2 233e1051a39Sopenharmony_ci $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i] 234e1051a39Sopenharmony_ci $SLL $tmp1,$a,`$SZ*8-@Sigma0[2]` 235e1051a39Sopenharmony_ci $ADDU $T1,$tmp0 236e1051a39Sopenharmony_ci $SRL $tmp0,$a,@Sigma0[1] 237e1051a39Sopenharmony_ci xor $h,$tmp1 238e1051a39Sopenharmony_ci $SLL $tmp1,$a,`$SZ*8-@Sigma0[1]` 239e1051a39Sopenharmony_ci xor $h,$tmp0 240e1051a39Sopenharmony_ci $SRL $tmp0,$a,@Sigma0[2] 241e1051a39Sopenharmony_ci xor $h,$tmp1 242e1051a39Sopenharmony_ci $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]` 243e1051a39Sopenharmony_ci xor $h,$tmp0 244e1051a39Sopenharmony_ci and $tmp0,$b,$c 245e1051a39Sopenharmony_ci xor $h,$tmp1 # Sigma0(a) 246e1051a39Sopenharmony_ci xor $tmp1,$b,$c 247e1051a39Sopenharmony_ci#endif 248e1051a39Sopenharmony_ci $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer 249e1051a39Sopenharmony_ci $ADDU $h,$tmp0 250e1051a39Sopenharmony_ci and $tmp1,$a 251e1051a39Sopenharmony_ci $ADDU $T1,$tmp2 # +=K[$i] 252e1051a39Sopenharmony_ci $ADDU $h,$tmp1 # +=Maj(a,b,c) 253e1051a39Sopenharmony_ci $ADDU $d,$T1 254e1051a39Sopenharmony_ci $ADDU $h,$T1 255e1051a39Sopenharmony_ci___ 256e1051a39Sopenharmony_ci$code.=<<___ if ($i>=13); 257e1051a39Sopenharmony_ci $LD @X[3],`(($i+3)%16)*$SZ`($sp) # prefetch from ring buffer 258e1051a39Sopenharmony_ci___ 259e1051a39Sopenharmony_ci} 260e1051a39Sopenharmony_ci 261e1051a39Sopenharmony_cisub BODY_16_XX { 262e1051a39Sopenharmony_cimy $i=@_[0]; 263e1051a39Sopenharmony_cimy ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]); 264e1051a39Sopenharmony_ci 265e1051a39Sopenharmony_ci$code.=<<___; 266e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 267e1051a39Sopenharmony_ci $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) 268e1051a39Sopenharmony_ci $ROTR $tmp0,@X[1],@sigma0[1] 269e1051a39Sopenharmony_ci $ADDU @X[0],@X[9] # +=X[i+9] 270e1051a39Sopenharmony_ci xor $tmp2,$tmp0 271e1051a39Sopenharmony_ci $ROTR $tmp0,@X[1],@sigma0[2] 272e1051a39Sopenharmony_ci 273e1051a39Sopenharmony_ci $SRL $tmp3,@X[14],@sigma1[0] 274e1051a39Sopenharmony_ci $ROTR $tmp1,@X[14],@sigma1[1] 275e1051a39Sopenharmony_ci xor $tmp2,$tmp0 # sigma0(X[i+1]) 276e1051a39Sopenharmony_ci $ROTR $tmp0,@X[14],@sigma1[2] 277e1051a39Sopenharmony_ci xor $tmp3,$tmp1 278e1051a39Sopenharmony_ci $ADDU @X[0],$tmp2 279e1051a39Sopenharmony_ci#else 280e1051a39Sopenharmony_ci $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) 281e1051a39Sopenharmony_ci $ADDU @X[0],@X[9] # +=X[i+9] 282e1051a39Sopenharmony_ci $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]` 283e1051a39Sopenharmony_ci $SRL $tmp0,@X[1],@sigma0[1] 284e1051a39Sopenharmony_ci xor $tmp2,$tmp1 285e1051a39Sopenharmony_ci $SLL $tmp1,`@sigma0[2]-@sigma0[1]` 286e1051a39Sopenharmony_ci xor $tmp2,$tmp0 287e1051a39Sopenharmony_ci $SRL $tmp0,@X[1],@sigma0[2] 288e1051a39Sopenharmony_ci xor $tmp2,$tmp1 289e1051a39Sopenharmony_ci 290e1051a39Sopenharmony_ci $SRL $tmp3,@X[14],@sigma1[0] 291e1051a39Sopenharmony_ci xor $tmp2,$tmp0 # sigma0(X[i+1]) 292e1051a39Sopenharmony_ci $SLL $tmp1,@X[14],`$SZ*8-@sigma1[2]` 293e1051a39Sopenharmony_ci $ADDU @X[0],$tmp2 294e1051a39Sopenharmony_ci $SRL $tmp0,@X[14],@sigma1[1] 295e1051a39Sopenharmony_ci xor $tmp3,$tmp1 296e1051a39Sopenharmony_ci $SLL $tmp1,`@sigma1[2]-@sigma1[1]` 297e1051a39Sopenharmony_ci xor $tmp3,$tmp0 298e1051a39Sopenharmony_ci $SRL $tmp0,@X[14],@sigma1[2] 299e1051a39Sopenharmony_ci xor $tmp3,$tmp1 300e1051a39Sopenharmony_ci#endif 301e1051a39Sopenharmony_ci xor $tmp3,$tmp0 # sigma1(X[i+14]) 302e1051a39Sopenharmony_ci $ADDU @X[0],$tmp3 303e1051a39Sopenharmony_ci___ 304e1051a39Sopenharmony_ci &BODY_00_15(@_); 305e1051a39Sopenharmony_ci} 306e1051a39Sopenharmony_ci 307e1051a39Sopenharmony_ci$FRAMESIZE=16*$SZ+16*$SZREG; 308e1051a39Sopenharmony_ci$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000"; 309e1051a39Sopenharmony_ci 310e1051a39Sopenharmony_ci$code.=<<___; 311e1051a39Sopenharmony_ci#include "mips_arch.h" 312e1051a39Sopenharmony_ci 313e1051a39Sopenharmony_ci.text 314e1051a39Sopenharmony_ci.set noat 315e1051a39Sopenharmony_ci#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) 316e1051a39Sopenharmony_ci.option pic2 317e1051a39Sopenharmony_ci#endif 318e1051a39Sopenharmony_ci 319e1051a39Sopenharmony_ci.align 5 320e1051a39Sopenharmony_ci.globl sha${label}_block_data_order 321e1051a39Sopenharmony_ci.ent sha${label}_block_data_order 322e1051a39Sopenharmony_cisha${label}_block_data_order: 323e1051a39Sopenharmony_ci .frame $sp,$FRAMESIZE,$ra 324e1051a39Sopenharmony_ci .mask $SAVED_REGS_MASK,-$SZREG 325e1051a39Sopenharmony_ci .set noreorder 326e1051a39Sopenharmony_ci___ 327e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /o32/i); # o32 PIC-ification 328e1051a39Sopenharmony_ci .cpload $pf 329e1051a39Sopenharmony_ci___ 330e1051a39Sopenharmony_ci$code.=<<___; 331e1051a39Sopenharmony_ci $PTR_SUB $sp,$FRAMESIZE 332e1051a39Sopenharmony_ci $REG_S $ra,$FRAMESIZE-1*$SZREG($sp) 333e1051a39Sopenharmony_ci $REG_S $fp,$FRAMESIZE-2*$SZREG($sp) 334e1051a39Sopenharmony_ci $REG_S $s11,$FRAMESIZE-3*$SZREG($sp) 335e1051a39Sopenharmony_ci $REG_S $s10,$FRAMESIZE-4*$SZREG($sp) 336e1051a39Sopenharmony_ci $REG_S $s9,$FRAMESIZE-5*$SZREG($sp) 337e1051a39Sopenharmony_ci $REG_S $s8,$FRAMESIZE-6*$SZREG($sp) 338e1051a39Sopenharmony_ci $REG_S $s7,$FRAMESIZE-7*$SZREG($sp) 339e1051a39Sopenharmony_ci $REG_S $s6,$FRAMESIZE-8*$SZREG($sp) 340e1051a39Sopenharmony_ci $REG_S $s5,$FRAMESIZE-9*$SZREG($sp) 341e1051a39Sopenharmony_ci $REG_S $s4,$FRAMESIZE-10*$SZREG($sp) 342e1051a39Sopenharmony_ci___ 343e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 344e1051a39Sopenharmony_ci $REG_S $s3,$FRAMESIZE-11*$SZREG($sp) 345e1051a39Sopenharmony_ci $REG_S $s2,$FRAMESIZE-12*$SZREG($sp) 346e1051a39Sopenharmony_ci $REG_S $s1,$FRAMESIZE-13*$SZREG($sp) 347e1051a39Sopenharmony_ci $REG_S $s0,$FRAMESIZE-14*$SZREG($sp) 348e1051a39Sopenharmony_ci $REG_S $gp,$FRAMESIZE-15*$SZREG($sp) 349e1051a39Sopenharmony_ci___ 350e1051a39Sopenharmony_ci$code.=<<___; 351e1051a39Sopenharmony_ci $PTR_SLL @X[15],$len,`log(16*$SZ)/log(2)` 352e1051a39Sopenharmony_ci___ 353e1051a39Sopenharmony_ci$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification 354e1051a39Sopenharmony_ci .cplocal $Ktbl 355e1051a39Sopenharmony_ci .cpsetup $pf,$zero,sha${label}_block_data_order 356e1051a39Sopenharmony_ci___ 357e1051a39Sopenharmony_ci$code.=<<___; 358e1051a39Sopenharmony_ci .set reorder 359e1051a39Sopenharmony_ci $PTR_LA $Ktbl,K${label} # PIC-ified 'load address' 360e1051a39Sopenharmony_ci 361e1051a39Sopenharmony_ci $LD $A,0*$SZ($ctx) # load context 362e1051a39Sopenharmony_ci $LD $B,1*$SZ($ctx) 363e1051a39Sopenharmony_ci $LD $C,2*$SZ($ctx) 364e1051a39Sopenharmony_ci $LD $D,3*$SZ($ctx) 365e1051a39Sopenharmony_ci $LD $E,4*$SZ($ctx) 366e1051a39Sopenharmony_ci $LD $F,5*$SZ($ctx) 367e1051a39Sopenharmony_ci $LD $G,6*$SZ($ctx) 368e1051a39Sopenharmony_ci $LD $H,7*$SZ($ctx) 369e1051a39Sopenharmony_ci 370e1051a39Sopenharmony_ci $PTR_ADD @X[15],$inp # pointer to the end of input 371e1051a39Sopenharmony_ci $REG_S @X[15],16*$SZ($sp) 372e1051a39Sopenharmony_ci b .Loop 373e1051a39Sopenharmony_ci 374e1051a39Sopenharmony_ci.align 5 375e1051a39Sopenharmony_ci.Loop: 376e1051a39Sopenharmony_ci#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) 377e1051a39Sopenharmony_ci ${LD} @X[0],($inp) 378e1051a39Sopenharmony_ci#else 379e1051a39Sopenharmony_ci ${LD}l @X[0],$MSB($inp) 380e1051a39Sopenharmony_ci ${LD}r @X[0],$LSB($inp) 381e1051a39Sopenharmony_ci#endif 382e1051a39Sopenharmony_ci___ 383e1051a39Sopenharmony_cifor ($i=0;$i<16;$i++) 384e1051a39Sopenharmony_ci{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); } 385e1051a39Sopenharmony_ci$code.=<<___; 386e1051a39Sopenharmony_ci b .L16_xx 387e1051a39Sopenharmony_ci.align 4 388e1051a39Sopenharmony_ci.L16_xx: 389e1051a39Sopenharmony_ci___ 390e1051a39Sopenharmony_cifor (;$i<32;$i++) 391e1051a39Sopenharmony_ci{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); push(@X,shift(@X)); } 392e1051a39Sopenharmony_ci$code.=<<___; 393e1051a39Sopenharmony_ci and @X[6],0xfff 394e1051a39Sopenharmony_ci li @X[7],$lastK 395e1051a39Sopenharmony_ci .set noreorder 396e1051a39Sopenharmony_ci bne @X[6],@X[7],.L16_xx 397e1051a39Sopenharmony_ci $PTR_ADD $Ktbl,16*$SZ # Ktbl+=16 398e1051a39Sopenharmony_ci 399e1051a39Sopenharmony_ci $REG_L @X[15],16*$SZ($sp) # restore pointer to the end of input 400e1051a39Sopenharmony_ci $LD @X[0],0*$SZ($ctx) 401e1051a39Sopenharmony_ci $LD @X[1],1*$SZ($ctx) 402e1051a39Sopenharmony_ci $LD @X[2],2*$SZ($ctx) 403e1051a39Sopenharmony_ci $PTR_ADD $inp,16*$SZ 404e1051a39Sopenharmony_ci $LD @X[3],3*$SZ($ctx) 405e1051a39Sopenharmony_ci $ADDU $A,@X[0] 406e1051a39Sopenharmony_ci $LD @X[4],4*$SZ($ctx) 407e1051a39Sopenharmony_ci $ADDU $B,@X[1] 408e1051a39Sopenharmony_ci $LD @X[5],5*$SZ($ctx) 409e1051a39Sopenharmony_ci $ADDU $C,@X[2] 410e1051a39Sopenharmony_ci $LD @X[6],6*$SZ($ctx) 411e1051a39Sopenharmony_ci $ADDU $D,@X[3] 412e1051a39Sopenharmony_ci $LD @X[7],7*$SZ($ctx) 413e1051a39Sopenharmony_ci $ADDU $E,@X[4] 414e1051a39Sopenharmony_ci $ST $A,0*$SZ($ctx) 415e1051a39Sopenharmony_ci $ADDU $F,@X[5] 416e1051a39Sopenharmony_ci $ST $B,1*$SZ($ctx) 417e1051a39Sopenharmony_ci $ADDU $G,@X[6] 418e1051a39Sopenharmony_ci $ST $C,2*$SZ($ctx) 419e1051a39Sopenharmony_ci $ADDU $H,@X[7] 420e1051a39Sopenharmony_ci $ST $D,3*$SZ($ctx) 421e1051a39Sopenharmony_ci $ST $E,4*$SZ($ctx) 422e1051a39Sopenharmony_ci $ST $F,5*$SZ($ctx) 423e1051a39Sopenharmony_ci $ST $G,6*$SZ($ctx) 424e1051a39Sopenharmony_ci $ST $H,7*$SZ($ctx) 425e1051a39Sopenharmony_ci 426e1051a39Sopenharmony_ci bne $inp,@X[15],.Loop 427e1051a39Sopenharmony_ci $PTR_SUB $Ktbl,`($rounds-16)*$SZ` # rewind $Ktbl 428e1051a39Sopenharmony_ci 429e1051a39Sopenharmony_ci $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) 430e1051a39Sopenharmony_ci $REG_L $fp,$FRAMESIZE-2*$SZREG($sp) 431e1051a39Sopenharmony_ci $REG_L $s11,$FRAMESIZE-3*$SZREG($sp) 432e1051a39Sopenharmony_ci $REG_L $s10,$FRAMESIZE-4*$SZREG($sp) 433e1051a39Sopenharmony_ci $REG_L $s9,$FRAMESIZE-5*$SZREG($sp) 434e1051a39Sopenharmony_ci $REG_L $s8,$FRAMESIZE-6*$SZREG($sp) 435e1051a39Sopenharmony_ci $REG_L $s7,$FRAMESIZE-7*$SZREG($sp) 436e1051a39Sopenharmony_ci $REG_L $s6,$FRAMESIZE-8*$SZREG($sp) 437e1051a39Sopenharmony_ci $REG_L $s5,$FRAMESIZE-9*$SZREG($sp) 438e1051a39Sopenharmony_ci $REG_L $s4,$FRAMESIZE-10*$SZREG($sp) 439e1051a39Sopenharmony_ci___ 440e1051a39Sopenharmony_ci$code.=<<___ if ($flavour =~ /nubi/i); 441e1051a39Sopenharmony_ci $REG_L $s3,$FRAMESIZE-11*$SZREG($sp) 442e1051a39Sopenharmony_ci $REG_L $s2,$FRAMESIZE-12*$SZREG($sp) 443e1051a39Sopenharmony_ci $REG_L $s1,$FRAMESIZE-13*$SZREG($sp) 444e1051a39Sopenharmony_ci $REG_L $s0,$FRAMESIZE-14*$SZREG($sp) 445e1051a39Sopenharmony_ci $REG_L $gp,$FRAMESIZE-15*$SZREG($sp) 446e1051a39Sopenharmony_ci___ 447e1051a39Sopenharmony_ci$code.=<<___; 448e1051a39Sopenharmony_ci jr $ra 449e1051a39Sopenharmony_ci $PTR_ADD $sp,$FRAMESIZE 450e1051a39Sopenharmony_ci.end sha${label}_block_data_order 451e1051a39Sopenharmony_ci 452e1051a39Sopenharmony_ci.rdata 453e1051a39Sopenharmony_ci.align 5 454e1051a39Sopenharmony_ciK${label}: 455e1051a39Sopenharmony_ci___ 456e1051a39Sopenharmony_ciif ($SZ==4) { 457e1051a39Sopenharmony_ci$code.=<<___; 458e1051a39Sopenharmony_ci .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 459e1051a39Sopenharmony_ci .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 460e1051a39Sopenharmony_ci .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 461e1051a39Sopenharmony_ci .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 462e1051a39Sopenharmony_ci .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc 463e1051a39Sopenharmony_ci .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da 464e1051a39Sopenharmony_ci .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 465e1051a39Sopenharmony_ci .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 466e1051a39Sopenharmony_ci .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 467e1051a39Sopenharmony_ci .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 468e1051a39Sopenharmony_ci .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 469e1051a39Sopenharmony_ci .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 470e1051a39Sopenharmony_ci .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 471e1051a39Sopenharmony_ci .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 472e1051a39Sopenharmony_ci .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 473e1051a39Sopenharmony_ci .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 474e1051a39Sopenharmony_ci___ 475e1051a39Sopenharmony_ci} else { 476e1051a39Sopenharmony_ci$code.=<<___; 477e1051a39Sopenharmony_ci .dword 0x428a2f98d728ae22, 0x7137449123ef65cd 478e1051a39Sopenharmony_ci .dword 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc 479e1051a39Sopenharmony_ci .dword 0x3956c25bf348b538, 0x59f111f1b605d019 480e1051a39Sopenharmony_ci .dword 0x923f82a4af194f9b, 0xab1c5ed5da6d8118 481e1051a39Sopenharmony_ci .dword 0xd807aa98a3030242, 0x12835b0145706fbe 482e1051a39Sopenharmony_ci .dword 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2 483e1051a39Sopenharmony_ci .dword 0x72be5d74f27b896f, 0x80deb1fe3b1696b1 484e1051a39Sopenharmony_ci .dword 0x9bdc06a725c71235, 0xc19bf174cf692694 485e1051a39Sopenharmony_ci .dword 0xe49b69c19ef14ad2, 0xefbe4786384f25e3 486e1051a39Sopenharmony_ci .dword 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65 487e1051a39Sopenharmony_ci .dword 0x2de92c6f592b0275, 0x4a7484aa6ea6e483 488e1051a39Sopenharmony_ci .dword 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5 489e1051a39Sopenharmony_ci .dword 0x983e5152ee66dfab, 0xa831c66d2db43210 490e1051a39Sopenharmony_ci .dword 0xb00327c898fb213f, 0xbf597fc7beef0ee4 491e1051a39Sopenharmony_ci .dword 0xc6e00bf33da88fc2, 0xd5a79147930aa725 492e1051a39Sopenharmony_ci .dword 0x06ca6351e003826f, 0x142929670a0e6e70 493e1051a39Sopenharmony_ci .dword 0x27b70a8546d22ffc, 0x2e1b21385c26c926 494e1051a39Sopenharmony_ci .dword 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df 495e1051a39Sopenharmony_ci .dword 0x650a73548baf63de, 0x766a0abb3c77b2a8 496e1051a39Sopenharmony_ci .dword 0x81c2c92e47edaee6, 0x92722c851482353b 497e1051a39Sopenharmony_ci .dword 0xa2bfe8a14cf10364, 0xa81a664bbc423001 498e1051a39Sopenharmony_ci .dword 0xc24b8b70d0f89791, 0xc76c51a30654be30 499e1051a39Sopenharmony_ci .dword 0xd192e819d6ef5218, 0xd69906245565a910 500e1051a39Sopenharmony_ci .dword 0xf40e35855771202a, 0x106aa07032bbd1b8 501e1051a39Sopenharmony_ci .dword 0x19a4c116b8d2d0c8, 0x1e376c085141ab53 502e1051a39Sopenharmony_ci .dword 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8 503e1051a39Sopenharmony_ci .dword 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb 504e1051a39Sopenharmony_ci .dword 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3 505e1051a39Sopenharmony_ci .dword 0x748f82ee5defb2fc, 0x78a5636f43172f60 506e1051a39Sopenharmony_ci .dword 0x84c87814a1f0ab72, 0x8cc702081a6439ec 507e1051a39Sopenharmony_ci .dword 0x90befffa23631e28, 0xa4506cebde82bde9 508e1051a39Sopenharmony_ci .dword 0xbef9a3f7b2c67915, 0xc67178f2e372532b 509e1051a39Sopenharmony_ci .dword 0xca273eceea26619c, 0xd186b8c721c0c207 510e1051a39Sopenharmony_ci .dword 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178 511e1051a39Sopenharmony_ci .dword 0x06f067aa72176fba, 0x0a637dc5a2c898a6 512e1051a39Sopenharmony_ci .dword 0x113f9804bef90dae, 0x1b710b35131c471b 513e1051a39Sopenharmony_ci .dword 0x28db77f523047d84, 0x32caab7b40c72493 514e1051a39Sopenharmony_ci .dword 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c 515e1051a39Sopenharmony_ci .dword 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a 516e1051a39Sopenharmony_ci .dword 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 517e1051a39Sopenharmony_ci___ 518e1051a39Sopenharmony_ci} 519e1051a39Sopenharmony_ci$code.=<<___; 520e1051a39Sopenharmony_ci.asciiz "SHA${label} for MIPS, CRYPTOGAMS by <appro\@openssl.org>" 521e1051a39Sopenharmony_ci.align 5 522e1051a39Sopenharmony_ci 523e1051a39Sopenharmony_ci___ 524e1051a39Sopenharmony_ci 525e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem; 526e1051a39Sopenharmony_ciprint $code; 527e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 528