1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# 16e1051a39Sopenharmony_ci# Permission to use under GPL terms is granted. 17e1051a39Sopenharmony_ci# ==================================================================== 18e1051a39Sopenharmony_ci 19e1051a39Sopenharmony_ci# SHA256 block procedure for ARMv4. May 2007. 20e1051a39Sopenharmony_ci 21e1051a39Sopenharmony_ci# Performance is ~2x better than gcc 3.4 generated code and in "abso- 22e1051a39Sopenharmony_ci# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per 23e1051a39Sopenharmony_ci# byte [on single-issue Xscale PXA250 core]. 24e1051a39Sopenharmony_ci 25e1051a39Sopenharmony_ci# July 2010. 26e1051a39Sopenharmony_ci# 27e1051a39Sopenharmony_ci# Rescheduling for dual-issue pipeline resulted in 22% improvement on 28e1051a39Sopenharmony_ci# Cortex A8 core and ~20 cycles per processed byte. 29e1051a39Sopenharmony_ci 30e1051a39Sopenharmony_ci# February 2011. 31e1051a39Sopenharmony_ci# 32e1051a39Sopenharmony_ci# Profiler-assisted and platform-specific optimization resulted in 16% 33e1051a39Sopenharmony_ci# improvement on Cortex A8 core and ~15.4 cycles per processed byte. 34e1051a39Sopenharmony_ci 35e1051a39Sopenharmony_ci# September 2013. 36e1051a39Sopenharmony_ci# 37e1051a39Sopenharmony_ci# Add NEON implementation. On Cortex A8 it was measured to process one 38e1051a39Sopenharmony_ci# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon 39e1051a39Sopenharmony_ci# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only 40e1051a39Sopenharmony_ci# code (meaning that latter performs sub-optimally, nothing was done 41e1051a39Sopenharmony_ci# about it). 42e1051a39Sopenharmony_ci 43e1051a39Sopenharmony_ci# May 2014. 44e1051a39Sopenharmony_ci# 45e1051a39Sopenharmony_ci# Add ARMv8 code path performing at 2.0 cpb on Apple A7. 46e1051a39Sopenharmony_ci 47e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 48e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 49e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 50e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 51e1051a39Sopenharmony_ci 52e1051a39Sopenharmony_ciif ($flavour && $flavour ne "void") { 53e1051a39Sopenharmony_ci $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 54e1051a39Sopenharmony_ci ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 55e1051a39Sopenharmony_ci ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or 56e1051a39Sopenharmony_ci die "can't locate arm-xlate.pl"; 57e1051a39Sopenharmony_ci 58e1051a39Sopenharmony_ci open STDOUT,"| \"$^X\" $xlate $flavour \"$output\"" 59e1051a39Sopenharmony_ci or die "can't call $xlate: $!"; 60e1051a39Sopenharmony_ci} else { 61e1051a39Sopenharmony_ci $output and open STDOUT,">$output"; 62e1051a39Sopenharmony_ci} 63e1051a39Sopenharmony_ci 64e1051a39Sopenharmony_ci$ctx="r0"; $t0="r0"; 65e1051a39Sopenharmony_ci$inp="r1"; $t4="r1"; 66e1051a39Sopenharmony_ci$len="r2"; $t1="r2"; 67e1051a39Sopenharmony_ci$T1="r3"; $t3="r3"; 68e1051a39Sopenharmony_ci$A="r4"; 69e1051a39Sopenharmony_ci$B="r5"; 70e1051a39Sopenharmony_ci$C="r6"; 71e1051a39Sopenharmony_ci$D="r7"; 72e1051a39Sopenharmony_ci$E="r8"; 73e1051a39Sopenharmony_ci$F="r9"; 74e1051a39Sopenharmony_ci$G="r10"; 75e1051a39Sopenharmony_ci$H="r11"; 76e1051a39Sopenharmony_ci@V=($A,$B,$C,$D,$E,$F,$G,$H); 77e1051a39Sopenharmony_ci$t2="r12"; 78e1051a39Sopenharmony_ci$Ktbl="r14"; 79e1051a39Sopenharmony_ci 80e1051a39Sopenharmony_ci@Sigma0=( 2,13,22); 81e1051a39Sopenharmony_ci@Sigma1=( 6,11,25); 82e1051a39Sopenharmony_ci@sigma0=( 7,18, 3); 83e1051a39Sopenharmony_ci@sigma1=(17,19,10); 84e1051a39Sopenharmony_ci 85e1051a39Sopenharmony_cisub BODY_00_15 { 86e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 87e1051a39Sopenharmony_ci 88e1051a39Sopenharmony_ci$code.=<<___ if ($i<16); 89e1051a39Sopenharmony_ci#if __ARM_ARCH__>=7 90e1051a39Sopenharmony_ci @ ldr $t1,[$inp],#4 @ $i 91e1051a39Sopenharmony_ci# if $i==15 92e1051a39Sopenharmony_ci str $inp,[sp,#17*4] @ make room for $t4 93e1051a39Sopenharmony_ci# endif 94e1051a39Sopenharmony_ci eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 95e1051a39Sopenharmony_ci add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 96e1051a39Sopenharmony_ci eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 97e1051a39Sopenharmony_ci# ifndef __ARMEB__ 98e1051a39Sopenharmony_ci rev $t1,$t1 99e1051a39Sopenharmony_ci# endif 100e1051a39Sopenharmony_ci#else 101e1051a39Sopenharmony_ci @ ldrb $t1,[$inp,#3] @ $i 102e1051a39Sopenharmony_ci add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 103e1051a39Sopenharmony_ci ldrb $t2,[$inp,#2] 104e1051a39Sopenharmony_ci ldrb $t0,[$inp,#1] 105e1051a39Sopenharmony_ci orr $t1,$t1,$t2,lsl#8 106e1051a39Sopenharmony_ci ldrb $t2,[$inp],#4 107e1051a39Sopenharmony_ci orr $t1,$t1,$t0,lsl#16 108e1051a39Sopenharmony_ci# if $i==15 109e1051a39Sopenharmony_ci str $inp,[sp,#17*4] @ make room for $t4 110e1051a39Sopenharmony_ci# endif 111e1051a39Sopenharmony_ci eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` 112e1051a39Sopenharmony_ci orr $t1,$t1,$t2,lsl#24 113e1051a39Sopenharmony_ci eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 114e1051a39Sopenharmony_ci#endif 115e1051a39Sopenharmony_ci___ 116e1051a39Sopenharmony_ci$code.=<<___; 117e1051a39Sopenharmony_ci ldr $t2,[$Ktbl],#4 @ *K256++ 118e1051a39Sopenharmony_ci add $h,$h,$t1 @ h+=X[i] 119e1051a39Sopenharmony_ci str $t1,[sp,#`$i%16`*4] 120e1051a39Sopenharmony_ci eor $t1,$f,$g 121e1051a39Sopenharmony_ci add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) 122e1051a39Sopenharmony_ci and $t1,$t1,$e 123e1051a39Sopenharmony_ci add $h,$h,$t2 @ h+=K256[i] 124e1051a39Sopenharmony_ci eor $t1,$t1,$g @ Ch(e,f,g) 125e1051a39Sopenharmony_ci eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` 126e1051a39Sopenharmony_ci add $h,$h,$t1 @ h+=Ch(e,f,g) 127e1051a39Sopenharmony_ci#if $i==31 128e1051a39Sopenharmony_ci and $t2,$t2,#0xff 129e1051a39Sopenharmony_ci cmp $t2,#0xf2 @ done? 130e1051a39Sopenharmony_ci#endif 131e1051a39Sopenharmony_ci#if $i<15 132e1051a39Sopenharmony_ci# if __ARM_ARCH__>=7 133e1051a39Sopenharmony_ci ldr $t1,[$inp],#4 @ prefetch 134e1051a39Sopenharmony_ci# else 135e1051a39Sopenharmony_ci ldrb $t1,[$inp,#3] 136e1051a39Sopenharmony_ci# endif 137e1051a39Sopenharmony_ci eor $t2,$a,$b @ a^b, b^c in next round 138e1051a39Sopenharmony_ci#else 139e1051a39Sopenharmony_ci ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx 140e1051a39Sopenharmony_ci eor $t2,$a,$b @ a^b, b^c in next round 141e1051a39Sopenharmony_ci ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx 142e1051a39Sopenharmony_ci#endif 143e1051a39Sopenharmony_ci eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) 144e1051a39Sopenharmony_ci and $t3,$t3,$t2 @ (b^c)&=(a^b) 145e1051a39Sopenharmony_ci add $d,$d,$h @ d+=h 146e1051a39Sopenharmony_ci eor $t3,$t3,$b @ Maj(a,b,c) 147e1051a39Sopenharmony_ci add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) 148e1051a39Sopenharmony_ci @ add $h,$h,$t3 @ h+=Maj(a,b,c) 149e1051a39Sopenharmony_ci___ 150e1051a39Sopenharmony_ci ($t2,$t3)=($t3,$t2); 151e1051a39Sopenharmony_ci} 152e1051a39Sopenharmony_ci 153e1051a39Sopenharmony_cisub BODY_16_XX { 154e1051a39Sopenharmony_cimy ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; 155e1051a39Sopenharmony_ci 156e1051a39Sopenharmony_ci$code.=<<___; 157e1051a39Sopenharmony_ci @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i 158e1051a39Sopenharmony_ci @ ldr $t4,[sp,#`($i+14)%16`*4] 159e1051a39Sopenharmony_ci mov $t0,$t1,ror#$sigma0[0] 160e1051a39Sopenharmony_ci add $a,$a,$t2 @ h+=Maj(a,b,c) from the past 161e1051a39Sopenharmony_ci mov $t2,$t4,ror#$sigma1[0] 162e1051a39Sopenharmony_ci eor $t0,$t0,$t1,ror#$sigma0[1] 163e1051a39Sopenharmony_ci eor $t2,$t2,$t4,ror#$sigma1[1] 164e1051a39Sopenharmony_ci eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) 165e1051a39Sopenharmony_ci ldr $t1,[sp,#`($i+0)%16`*4] 166e1051a39Sopenharmony_ci eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) 167e1051a39Sopenharmony_ci ldr $t4,[sp,#`($i+9)%16`*4] 168e1051a39Sopenharmony_ci 169e1051a39Sopenharmony_ci add $t2,$t2,$t0 170e1051a39Sopenharmony_ci eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 171e1051a39Sopenharmony_ci add $t1,$t1,$t2 172e1051a39Sopenharmony_ci eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) 173e1051a39Sopenharmony_ci add $t1,$t1,$t4 @ X[i] 174e1051a39Sopenharmony_ci___ 175e1051a39Sopenharmony_ci &BODY_00_15(@_); 176e1051a39Sopenharmony_ci} 177e1051a39Sopenharmony_ci 178e1051a39Sopenharmony_ci$code=<<___; 179e1051a39Sopenharmony_ci#ifndef __KERNEL__ 180e1051a39Sopenharmony_ci# include "arm_arch.h" 181e1051a39Sopenharmony_ci#else 182e1051a39Sopenharmony_ci# define __ARM_ARCH__ __LINUX_ARM_ARCH__ 183e1051a39Sopenharmony_ci# define __ARM_MAX_ARCH__ 7 184e1051a39Sopenharmony_ci#endif 185e1051a39Sopenharmony_ci 186e1051a39Sopenharmony_ci#if defined(__thumb2__) 187e1051a39Sopenharmony_ci.syntax unified 188e1051a39Sopenharmony_ci.thumb 189e1051a39Sopenharmony_ci#else 190e1051a39Sopenharmony_ci.code 32 191e1051a39Sopenharmony_ci#endif 192e1051a39Sopenharmony_ci 193e1051a39Sopenharmony_ci.text 194e1051a39Sopenharmony_ci 195e1051a39Sopenharmony_ci.type K256,%object 196e1051a39Sopenharmony_ci.align 5 197e1051a39Sopenharmony_ciK256: 198e1051a39Sopenharmony_ci.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 199e1051a39Sopenharmony_ci.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 200e1051a39Sopenharmony_ci.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 201e1051a39Sopenharmony_ci.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 202e1051a39Sopenharmony_ci.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc 203e1051a39Sopenharmony_ci.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da 204e1051a39Sopenharmony_ci.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 205e1051a39Sopenharmony_ci.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 206e1051a39Sopenharmony_ci.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 207e1051a39Sopenharmony_ci.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 208e1051a39Sopenharmony_ci.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 209e1051a39Sopenharmony_ci.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 210e1051a39Sopenharmony_ci.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 211e1051a39Sopenharmony_ci.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 212e1051a39Sopenharmony_ci.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 213e1051a39Sopenharmony_ci.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 214e1051a39Sopenharmony_ci.size K256,.-K256 215e1051a39Sopenharmony_ci.word 0 @ terminator 216e1051a39Sopenharmony_ci#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 217e1051a39Sopenharmony_ci.LOPENSSL_armcap: 218e1051a39Sopenharmony_ci# ifdef _WIN32 219e1051a39Sopenharmony_ci.word OPENSSL_armcap_P 220e1051a39Sopenharmony_ci# else 221e1051a39Sopenharmony_ci.word OPENSSL_armcap_P-.Lsha256_block_data_order 222e1051a39Sopenharmony_ci# endif 223e1051a39Sopenharmony_ci#endif 224e1051a39Sopenharmony_ci.align 5 225e1051a39Sopenharmony_ci 226e1051a39Sopenharmony_ci.global sha256_block_data_order 227e1051a39Sopenharmony_ci.type sha256_block_data_order,%function 228e1051a39Sopenharmony_cisha256_block_data_order: 229e1051a39Sopenharmony_ci.Lsha256_block_data_order: 230e1051a39Sopenharmony_ci#if __ARM_ARCH__<7 && !defined(__thumb2__) 231e1051a39Sopenharmony_ci sub r3,pc,#8 @ sha256_block_data_order 232e1051a39Sopenharmony_ci#else 233e1051a39Sopenharmony_ci adr r3,.Lsha256_block_data_order 234e1051a39Sopenharmony_ci#endif 235e1051a39Sopenharmony_ci#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 236e1051a39Sopenharmony_ci ldr r12,.LOPENSSL_armcap 237e1051a39Sopenharmony_ci# if !defined(_WIN32) 238e1051a39Sopenharmony_ci ldr r12,[r3,r12] @ OPENSSL_armcap_P 239e1051a39Sopenharmony_ci# endif 240e1051a39Sopenharmony_ci# if defined(__APPLE__) || defined(_WIN32) 241e1051a39Sopenharmony_ci ldr r12,[r12] 242e1051a39Sopenharmony_ci# endif 243e1051a39Sopenharmony_ci tst r12,#ARMV8_SHA256 244e1051a39Sopenharmony_ci bne .LARMv8 245e1051a39Sopenharmony_ci tst r12,#ARMV7_NEON 246e1051a39Sopenharmony_ci bne .LNEON 247e1051a39Sopenharmony_ci#endif 248e1051a39Sopenharmony_ci add $len,$inp,$len,lsl#6 @ len to point at the end of inp 249e1051a39Sopenharmony_ci stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} 250e1051a39Sopenharmony_ci ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} 251e1051a39Sopenharmony_ci sub $Ktbl,r3,#256+32 @ K256 252e1051a39Sopenharmony_ci sub sp,sp,#16*4 @ alloca(X[16]) 253e1051a39Sopenharmony_ci.Loop: 254e1051a39Sopenharmony_ci# if __ARM_ARCH__>=7 255e1051a39Sopenharmony_ci ldr $t1,[$inp],#4 256e1051a39Sopenharmony_ci# else 257e1051a39Sopenharmony_ci ldrb $t1,[$inp,#3] 258e1051a39Sopenharmony_ci# endif 259e1051a39Sopenharmony_ci eor $t3,$B,$C @ magic 260e1051a39Sopenharmony_ci eor $t2,$t2,$t2 261e1051a39Sopenharmony_ci___ 262e1051a39Sopenharmony_cifor($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } 263e1051a39Sopenharmony_ci$code.=".Lrounds_16_xx:\n"; 264e1051a39Sopenharmony_cifor (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } 265e1051a39Sopenharmony_ci$code.=<<___; 266e1051a39Sopenharmony_ci#ifdef __thumb2__ 267e1051a39Sopenharmony_ci ite eq @ Thumb2 thing, sanity check in ARM 268e1051a39Sopenharmony_ci#endif 269e1051a39Sopenharmony_ci ldreq $t3,[sp,#16*4] @ pull ctx 270e1051a39Sopenharmony_ci bne .Lrounds_16_xx 271e1051a39Sopenharmony_ci 272e1051a39Sopenharmony_ci add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 273e1051a39Sopenharmony_ci ldr $t0,[$t3,#0] 274e1051a39Sopenharmony_ci ldr $t1,[$t3,#4] 275e1051a39Sopenharmony_ci ldr $t2,[$t3,#8] 276e1051a39Sopenharmony_ci add $A,$A,$t0 277e1051a39Sopenharmony_ci ldr $t0,[$t3,#12] 278e1051a39Sopenharmony_ci add $B,$B,$t1 279e1051a39Sopenharmony_ci ldr $t1,[$t3,#16] 280e1051a39Sopenharmony_ci add $C,$C,$t2 281e1051a39Sopenharmony_ci ldr $t2,[$t3,#20] 282e1051a39Sopenharmony_ci add $D,$D,$t0 283e1051a39Sopenharmony_ci ldr $t0,[$t3,#24] 284e1051a39Sopenharmony_ci add $E,$E,$t1 285e1051a39Sopenharmony_ci ldr $t1,[$t3,#28] 286e1051a39Sopenharmony_ci add $F,$F,$t2 287e1051a39Sopenharmony_ci ldr $inp,[sp,#17*4] @ pull inp 288e1051a39Sopenharmony_ci ldr $t2,[sp,#18*4] @ pull inp+len 289e1051a39Sopenharmony_ci add $G,$G,$t0 290e1051a39Sopenharmony_ci add $H,$H,$t1 291e1051a39Sopenharmony_ci stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} 292e1051a39Sopenharmony_ci cmp $inp,$t2 293e1051a39Sopenharmony_ci sub $Ktbl,$Ktbl,#256 @ rewind Ktbl 294e1051a39Sopenharmony_ci bne .Loop 295e1051a39Sopenharmony_ci 296e1051a39Sopenharmony_ci add sp,sp,#`16+3`*4 @ destroy frame 297e1051a39Sopenharmony_ci#if __ARM_ARCH__>=5 298e1051a39Sopenharmony_ci ldmia sp!,{r4-r11,pc} 299e1051a39Sopenharmony_ci#else 300e1051a39Sopenharmony_ci ldmia sp!,{r4-r11,lr} 301e1051a39Sopenharmony_ci tst lr,#1 302e1051a39Sopenharmony_ci moveq pc,lr @ be binary compatible with V4, yet 303e1051a39Sopenharmony_ci bx lr @ interoperable with Thumb ISA:-) 304e1051a39Sopenharmony_ci#endif 305e1051a39Sopenharmony_ci.size sha256_block_data_order,.-sha256_block_data_order 306e1051a39Sopenharmony_ci___ 307e1051a39Sopenharmony_ci###################################################################### 308e1051a39Sopenharmony_ci# NEON stuff 309e1051a39Sopenharmony_ci# 310e1051a39Sopenharmony_ci{{{ 311e1051a39Sopenharmony_cimy @X=map("q$_",(0..3)); 312e1051a39Sopenharmony_cimy ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); 313e1051a39Sopenharmony_cimy $Xfer=$t4; 314e1051a39Sopenharmony_cimy $j=0; 315e1051a39Sopenharmony_ci 316e1051a39Sopenharmony_cisub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } 317e1051a39Sopenharmony_cisub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } 318e1051a39Sopenharmony_ci 319e1051a39Sopenharmony_cisub AUTOLOAD() # thunk [simplified] x86-style perlasm 320e1051a39Sopenharmony_ci{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; 321e1051a39Sopenharmony_ci my $arg = pop; 322e1051a39Sopenharmony_ci $arg = "#$arg" if ($arg*1 eq $arg); 323e1051a39Sopenharmony_ci $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; 324e1051a39Sopenharmony_ci} 325e1051a39Sopenharmony_ci 326e1051a39Sopenharmony_cisub Xupdate() 327e1051a39Sopenharmony_ci{ use integer; 328e1051a39Sopenharmony_ci my $body = shift; 329e1051a39Sopenharmony_ci my @insns = (&$body,&$body,&$body,&$body); 330e1051a39Sopenharmony_ci my ($a,$b,$c,$d,$e,$f,$g,$h); 331e1051a39Sopenharmony_ci 332e1051a39Sopenharmony_ci &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] 333e1051a39Sopenharmony_ci eval(shift(@insns)); 334e1051a39Sopenharmony_ci eval(shift(@insns)); 335e1051a39Sopenharmony_ci eval(shift(@insns)); 336e1051a39Sopenharmony_ci &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] 337e1051a39Sopenharmony_ci eval(shift(@insns)); 338e1051a39Sopenharmony_ci eval(shift(@insns)); 339e1051a39Sopenharmony_ci eval(shift(@insns)); 340e1051a39Sopenharmony_ci &vshr_u32 ($T2,$T0,$sigma0[0]); 341e1051a39Sopenharmony_ci eval(shift(@insns)); 342e1051a39Sopenharmony_ci eval(shift(@insns)); 343e1051a39Sopenharmony_ci &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] 344e1051a39Sopenharmony_ci eval(shift(@insns)); 345e1051a39Sopenharmony_ci eval(shift(@insns)); 346e1051a39Sopenharmony_ci &vshr_u32 ($T1,$T0,$sigma0[2]); 347e1051a39Sopenharmony_ci eval(shift(@insns)); 348e1051a39Sopenharmony_ci eval(shift(@insns)); 349e1051a39Sopenharmony_ci &vsli_32 ($T2,$T0,32-$sigma0[0]); 350e1051a39Sopenharmony_ci eval(shift(@insns)); 351e1051a39Sopenharmony_ci eval(shift(@insns)); 352e1051a39Sopenharmony_ci &vshr_u32 ($T3,$T0,$sigma0[1]); 353e1051a39Sopenharmony_ci eval(shift(@insns)); 354e1051a39Sopenharmony_ci eval(shift(@insns)); 355e1051a39Sopenharmony_ci &veor ($T1,$T1,$T2); 356e1051a39Sopenharmony_ci eval(shift(@insns)); 357e1051a39Sopenharmony_ci eval(shift(@insns)); 358e1051a39Sopenharmony_ci &vsli_32 ($T3,$T0,32-$sigma0[1]); 359e1051a39Sopenharmony_ci eval(shift(@insns)); 360e1051a39Sopenharmony_ci eval(shift(@insns)); 361e1051a39Sopenharmony_ci &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); 362e1051a39Sopenharmony_ci eval(shift(@insns)); 363e1051a39Sopenharmony_ci eval(shift(@insns)); 364e1051a39Sopenharmony_ci &veor ($T1,$T1,$T3); # sigma0(X[1..4]) 365e1051a39Sopenharmony_ci eval(shift(@insns)); 366e1051a39Sopenharmony_ci eval(shift(@insns)); 367e1051a39Sopenharmony_ci &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); 368e1051a39Sopenharmony_ci eval(shift(@insns)); 369e1051a39Sopenharmony_ci eval(shift(@insns)); 370e1051a39Sopenharmony_ci &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); 371e1051a39Sopenharmony_ci eval(shift(@insns)); 372e1051a39Sopenharmony_ci eval(shift(@insns)); 373e1051a39Sopenharmony_ci &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) 374e1051a39Sopenharmony_ci eval(shift(@insns)); 375e1051a39Sopenharmony_ci eval(shift(@insns)); 376e1051a39Sopenharmony_ci &veor ($T5,$T5,$T4); 377e1051a39Sopenharmony_ci eval(shift(@insns)); 378e1051a39Sopenharmony_ci eval(shift(@insns)); 379e1051a39Sopenharmony_ci &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); 380e1051a39Sopenharmony_ci eval(shift(@insns)); 381e1051a39Sopenharmony_ci eval(shift(@insns)); 382e1051a39Sopenharmony_ci &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); 383e1051a39Sopenharmony_ci eval(shift(@insns)); 384e1051a39Sopenharmony_ci eval(shift(@insns)); 385e1051a39Sopenharmony_ci &veor ($T5,$T5,$T4); # sigma1(X[14..15]) 386e1051a39Sopenharmony_ci eval(shift(@insns)); 387e1051a39Sopenharmony_ci eval(shift(@insns)); 388e1051a39Sopenharmony_ci &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) 389e1051a39Sopenharmony_ci eval(shift(@insns)); 390e1051a39Sopenharmony_ci eval(shift(@insns)); 391e1051a39Sopenharmony_ci &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); 392e1051a39Sopenharmony_ci eval(shift(@insns)); 393e1051a39Sopenharmony_ci eval(shift(@insns)); 394e1051a39Sopenharmony_ci &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); 395e1051a39Sopenharmony_ci eval(shift(@insns)); 396e1051a39Sopenharmony_ci eval(shift(@insns)); 397e1051a39Sopenharmony_ci &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); 398e1051a39Sopenharmony_ci eval(shift(@insns)); 399e1051a39Sopenharmony_ci eval(shift(@insns)); 400e1051a39Sopenharmony_ci &veor ($T5,$T5,$T4); 401e1051a39Sopenharmony_ci eval(shift(@insns)); 402e1051a39Sopenharmony_ci eval(shift(@insns)); 403e1051a39Sopenharmony_ci &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); 404e1051a39Sopenharmony_ci eval(shift(@insns)); 405e1051a39Sopenharmony_ci eval(shift(@insns)); 406e1051a39Sopenharmony_ci &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 407e1051a39Sopenharmony_ci eval(shift(@insns)); 408e1051a39Sopenharmony_ci eval(shift(@insns)); 409e1051a39Sopenharmony_ci &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); 410e1051a39Sopenharmony_ci eval(shift(@insns)); 411e1051a39Sopenharmony_ci eval(shift(@insns)); 412e1051a39Sopenharmony_ci &veor ($T5,$T5,$T4); # sigma1(X[16..17]) 413e1051a39Sopenharmony_ci eval(shift(@insns)); 414e1051a39Sopenharmony_ci eval(shift(@insns)); 415e1051a39Sopenharmony_ci &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) 416e1051a39Sopenharmony_ci eval(shift(@insns)); 417e1051a39Sopenharmony_ci eval(shift(@insns)); 418e1051a39Sopenharmony_ci &vadd_i32 ($T0,$T0,@X[0]); 419e1051a39Sopenharmony_ci while($#insns>=2) { eval(shift(@insns)); } 420e1051a39Sopenharmony_ci &vst1_32 ("{$T0}","[$Xfer,:128]!"); 421e1051a39Sopenharmony_ci eval(shift(@insns)); 422e1051a39Sopenharmony_ci eval(shift(@insns)); 423e1051a39Sopenharmony_ci 424e1051a39Sopenharmony_ci push(@X,shift(@X)); # "rotate" X[] 425e1051a39Sopenharmony_ci} 426e1051a39Sopenharmony_ci 427e1051a39Sopenharmony_cisub Xpreload() 428e1051a39Sopenharmony_ci{ use integer; 429e1051a39Sopenharmony_ci my $body = shift; 430e1051a39Sopenharmony_ci my @insns = (&$body,&$body,&$body,&$body); 431e1051a39Sopenharmony_ci my ($a,$b,$c,$d,$e,$f,$g,$h); 432e1051a39Sopenharmony_ci 433e1051a39Sopenharmony_ci eval(shift(@insns)); 434e1051a39Sopenharmony_ci eval(shift(@insns)); 435e1051a39Sopenharmony_ci eval(shift(@insns)); 436e1051a39Sopenharmony_ci eval(shift(@insns)); 437e1051a39Sopenharmony_ci &vld1_32 ("{$T0}","[$Ktbl,:128]!"); 438e1051a39Sopenharmony_ci eval(shift(@insns)); 439e1051a39Sopenharmony_ci eval(shift(@insns)); 440e1051a39Sopenharmony_ci eval(shift(@insns)); 441e1051a39Sopenharmony_ci eval(shift(@insns)); 442e1051a39Sopenharmony_ci &vrev32_8 (@X[0],@X[0]); 443e1051a39Sopenharmony_ci eval(shift(@insns)); 444e1051a39Sopenharmony_ci eval(shift(@insns)); 445e1051a39Sopenharmony_ci eval(shift(@insns)); 446e1051a39Sopenharmony_ci eval(shift(@insns)); 447e1051a39Sopenharmony_ci &vadd_i32 ($T0,$T0,@X[0]); 448e1051a39Sopenharmony_ci foreach (@insns) { eval; } # remaining instructions 449e1051a39Sopenharmony_ci &vst1_32 ("{$T0}","[$Xfer,:128]!"); 450e1051a39Sopenharmony_ci 451e1051a39Sopenharmony_ci push(@X,shift(@X)); # "rotate" X[] 452e1051a39Sopenharmony_ci} 453e1051a39Sopenharmony_ci 454e1051a39Sopenharmony_cisub body_00_15 () { 455e1051a39Sopenharmony_ci ( 456e1051a39Sopenharmony_ci '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. 457e1051a39Sopenharmony_ci '&add ($h,$h,$t1)', # h+=X[i]+K[i] 458e1051a39Sopenharmony_ci '&eor ($t1,$f,$g)', 459e1051a39Sopenharmony_ci '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', 460e1051a39Sopenharmony_ci '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past 461e1051a39Sopenharmony_ci '&and ($t1,$t1,$e)', 462e1051a39Sopenharmony_ci '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) 463e1051a39Sopenharmony_ci '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', 464e1051a39Sopenharmony_ci '&eor ($t1,$t1,$g)', # Ch(e,f,g) 465e1051a39Sopenharmony_ci '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) 466e1051a39Sopenharmony_ci '&eor ($t2,$a,$b)', # a^b, b^c in next round 467e1051a39Sopenharmony_ci '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) 468e1051a39Sopenharmony_ci '&add ($h,$h,$t1)', # h+=Ch(e,f,g) 469e1051a39Sopenharmony_ci '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. 470e1051a39Sopenharmony_ci '&ldr ($t1,"[$Ktbl]") if ($j==15);'. 471e1051a39Sopenharmony_ci '&ldr ($t1,"[sp,#64]") if ($j==31)', 472e1051a39Sopenharmony_ci '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) 473e1051a39Sopenharmony_ci '&add ($d,$d,$h)', # d+=h 474e1051a39Sopenharmony_ci '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) 475e1051a39Sopenharmony_ci '&eor ($t3,$t3,$b)', # Maj(a,b,c) 476e1051a39Sopenharmony_ci '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' 477e1051a39Sopenharmony_ci ) 478e1051a39Sopenharmony_ci} 479e1051a39Sopenharmony_ci 480e1051a39Sopenharmony_ci$code.=<<___; 481e1051a39Sopenharmony_ci#if __ARM_MAX_ARCH__>=7 482e1051a39Sopenharmony_ci.arch armv7-a 483e1051a39Sopenharmony_ci.fpu neon 484e1051a39Sopenharmony_ci 485e1051a39Sopenharmony_ci.global sha256_block_data_order_neon 486e1051a39Sopenharmony_ci.type sha256_block_data_order_neon,%function 487e1051a39Sopenharmony_ci.align 5 488e1051a39Sopenharmony_ci.skip 16 489e1051a39Sopenharmony_cisha256_block_data_order_neon: 490e1051a39Sopenharmony_ci.LNEON: 491e1051a39Sopenharmony_ci stmdb sp!,{r4-r12,lr} 492e1051a39Sopenharmony_ci 493e1051a39Sopenharmony_ci sub $H,sp,#16*4+16 494e1051a39Sopenharmony_ci adr $Ktbl,K256 495e1051a39Sopenharmony_ci bic $H,$H,#15 @ align for 128-bit stores 496e1051a39Sopenharmony_ci mov $t2,sp 497e1051a39Sopenharmony_ci mov sp,$H @ alloca 498e1051a39Sopenharmony_ci add $len,$inp,$len,lsl#6 @ len to point at the end of inp 499e1051a39Sopenharmony_ci 500e1051a39Sopenharmony_ci vld1.8 {@X[0]},[$inp]! 501e1051a39Sopenharmony_ci vld1.8 {@X[1]},[$inp]! 502e1051a39Sopenharmony_ci vld1.8 {@X[2]},[$inp]! 503e1051a39Sopenharmony_ci vld1.8 {@X[3]},[$inp]! 504e1051a39Sopenharmony_ci vld1.32 {$T0},[$Ktbl,:128]! 505e1051a39Sopenharmony_ci vld1.32 {$T1},[$Ktbl,:128]! 506e1051a39Sopenharmony_ci vld1.32 {$T2},[$Ktbl,:128]! 507e1051a39Sopenharmony_ci vld1.32 {$T3},[$Ktbl,:128]! 508e1051a39Sopenharmony_ci vrev32.8 @X[0],@X[0] @ yes, even on 509e1051a39Sopenharmony_ci str $ctx,[sp,#64] 510e1051a39Sopenharmony_ci vrev32.8 @X[1],@X[1] @ big-endian 511e1051a39Sopenharmony_ci str $inp,[sp,#68] 512e1051a39Sopenharmony_ci mov $Xfer,sp 513e1051a39Sopenharmony_ci vrev32.8 @X[2],@X[2] 514e1051a39Sopenharmony_ci str $len,[sp,#72] 515e1051a39Sopenharmony_ci vrev32.8 @X[3],@X[3] 516e1051a39Sopenharmony_ci str $t2,[sp,#76] @ save original sp 517e1051a39Sopenharmony_ci vadd.i32 $T0,$T0,@X[0] 518e1051a39Sopenharmony_ci vadd.i32 $T1,$T1,@X[1] 519e1051a39Sopenharmony_ci vst1.32 {$T0},[$Xfer,:128]! 520e1051a39Sopenharmony_ci vadd.i32 $T2,$T2,@X[2] 521e1051a39Sopenharmony_ci vst1.32 {$T1},[$Xfer,:128]! 522e1051a39Sopenharmony_ci vadd.i32 $T3,$T3,@X[3] 523e1051a39Sopenharmony_ci vst1.32 {$T2},[$Xfer,:128]! 524e1051a39Sopenharmony_ci vst1.32 {$T3},[$Xfer,:128]! 525e1051a39Sopenharmony_ci 526e1051a39Sopenharmony_ci ldmia $ctx,{$A-$H} 527e1051a39Sopenharmony_ci sub $Xfer,$Xfer,#64 528e1051a39Sopenharmony_ci ldr $t1,[sp,#0] 529e1051a39Sopenharmony_ci eor $t2,$t2,$t2 530e1051a39Sopenharmony_ci eor $t3,$B,$C 531e1051a39Sopenharmony_ci b .L_00_48 532e1051a39Sopenharmony_ci 533e1051a39Sopenharmony_ci.align 4 534e1051a39Sopenharmony_ci.L_00_48: 535e1051a39Sopenharmony_ci___ 536e1051a39Sopenharmony_ci &Xupdate(\&body_00_15); 537e1051a39Sopenharmony_ci &Xupdate(\&body_00_15); 538e1051a39Sopenharmony_ci &Xupdate(\&body_00_15); 539e1051a39Sopenharmony_ci &Xupdate(\&body_00_15); 540e1051a39Sopenharmony_ci$code.=<<___; 541e1051a39Sopenharmony_ci teq $t1,#0 @ check for K256 terminator 542e1051a39Sopenharmony_ci ldr $t1,[sp,#0] 543e1051a39Sopenharmony_ci sub $Xfer,$Xfer,#64 544e1051a39Sopenharmony_ci bne .L_00_48 545e1051a39Sopenharmony_ci 546e1051a39Sopenharmony_ci ldr $inp,[sp,#68] 547e1051a39Sopenharmony_ci ldr $t0,[sp,#72] 548e1051a39Sopenharmony_ci sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl 549e1051a39Sopenharmony_ci teq $inp,$t0 550e1051a39Sopenharmony_ci it eq 551e1051a39Sopenharmony_ci subeq $inp,$inp,#64 @ avoid SEGV 552e1051a39Sopenharmony_ci vld1.8 {@X[0]},[$inp]! @ load next input block 553e1051a39Sopenharmony_ci vld1.8 {@X[1]},[$inp]! 554e1051a39Sopenharmony_ci vld1.8 {@X[2]},[$inp]! 555e1051a39Sopenharmony_ci vld1.8 {@X[3]},[$inp]! 556e1051a39Sopenharmony_ci it ne 557e1051a39Sopenharmony_ci strne $inp,[sp,#68] 558e1051a39Sopenharmony_ci mov $Xfer,sp 559e1051a39Sopenharmony_ci___ 560e1051a39Sopenharmony_ci &Xpreload(\&body_00_15); 561e1051a39Sopenharmony_ci &Xpreload(\&body_00_15); 562e1051a39Sopenharmony_ci &Xpreload(\&body_00_15); 563e1051a39Sopenharmony_ci &Xpreload(\&body_00_15); 564e1051a39Sopenharmony_ci$code.=<<___; 565e1051a39Sopenharmony_ci ldr $t0,[$t1,#0] 566e1051a39Sopenharmony_ci add $A,$A,$t2 @ h+=Maj(a,b,c) from the past 567e1051a39Sopenharmony_ci ldr $t2,[$t1,#4] 568e1051a39Sopenharmony_ci ldr $t3,[$t1,#8] 569e1051a39Sopenharmony_ci ldr $t4,[$t1,#12] 570e1051a39Sopenharmony_ci add $A,$A,$t0 @ accumulate 571e1051a39Sopenharmony_ci ldr $t0,[$t1,#16] 572e1051a39Sopenharmony_ci add $B,$B,$t2 573e1051a39Sopenharmony_ci ldr $t2,[$t1,#20] 574e1051a39Sopenharmony_ci add $C,$C,$t3 575e1051a39Sopenharmony_ci ldr $t3,[$t1,#24] 576e1051a39Sopenharmony_ci add $D,$D,$t4 577e1051a39Sopenharmony_ci ldr $t4,[$t1,#28] 578e1051a39Sopenharmony_ci add $E,$E,$t0 579e1051a39Sopenharmony_ci str $A,[$t1],#4 580e1051a39Sopenharmony_ci add $F,$F,$t2 581e1051a39Sopenharmony_ci str $B,[$t1],#4 582e1051a39Sopenharmony_ci add $G,$G,$t3 583e1051a39Sopenharmony_ci str $C,[$t1],#4 584e1051a39Sopenharmony_ci add $H,$H,$t4 585e1051a39Sopenharmony_ci str $D,[$t1],#4 586e1051a39Sopenharmony_ci stmia $t1,{$E-$H} 587e1051a39Sopenharmony_ci 588e1051a39Sopenharmony_ci ittte ne 589e1051a39Sopenharmony_ci movne $Xfer,sp 590e1051a39Sopenharmony_ci ldrne $t1,[sp,#0] 591e1051a39Sopenharmony_ci eorne $t2,$t2,$t2 592e1051a39Sopenharmony_ci ldreq sp,[sp,#76] @ restore original sp 593e1051a39Sopenharmony_ci itt ne 594e1051a39Sopenharmony_ci eorne $t3,$B,$C 595e1051a39Sopenharmony_ci bne .L_00_48 596e1051a39Sopenharmony_ci 597e1051a39Sopenharmony_ci ldmia sp!,{r4-r12,pc} 598e1051a39Sopenharmony_ci.size sha256_block_data_order_neon,.-sha256_block_data_order_neon 599e1051a39Sopenharmony_ci#endif 600e1051a39Sopenharmony_ci___ 601e1051a39Sopenharmony_ci}}} 602e1051a39Sopenharmony_ci###################################################################### 603e1051a39Sopenharmony_ci# ARMv8 stuff 604e1051a39Sopenharmony_ci# 605e1051a39Sopenharmony_ci{{{ 606e1051a39Sopenharmony_cimy ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); 607e1051a39Sopenharmony_cimy @MSG=map("q$_",(8..11)); 608e1051a39Sopenharmony_cimy ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); 609e1051a39Sopenharmony_cimy $Ktbl="r3"; 610e1051a39Sopenharmony_cimy $_byte = ($flavour =~ /win/ ? "DCB" : ".byte"); 611e1051a39Sopenharmony_ci 612e1051a39Sopenharmony_ci$code.=<<___; 613e1051a39Sopenharmony_ci#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 614e1051a39Sopenharmony_ci 615e1051a39Sopenharmony_ci# if defined(__thumb2__) 616e1051a39Sopenharmony_ci# define INST(a,b,c,d) $_byte c,d|0xc,a,b 617e1051a39Sopenharmony_ci# else 618e1051a39Sopenharmony_ci# define INST(a,b,c,d) $_byte a,b,c,d 619e1051a39Sopenharmony_ci# endif 620e1051a39Sopenharmony_ci 621e1051a39Sopenharmony_ci.type sha256_block_data_order_armv8,%function 622e1051a39Sopenharmony_ci.align 5 623e1051a39Sopenharmony_cisha256_block_data_order_armv8: 624e1051a39Sopenharmony_ci.LARMv8: 625e1051a39Sopenharmony_ci vld1.32 {$ABCD,$EFGH},[$ctx] 626e1051a39Sopenharmony_ci sub $Ktbl,$Ktbl,#256+32 627e1051a39Sopenharmony_ci add $len,$inp,$len,lsl#6 @ len to point at the end of inp 628e1051a39Sopenharmony_ci b .Loop_v8 629e1051a39Sopenharmony_ci 630e1051a39Sopenharmony_ci.align 4 631e1051a39Sopenharmony_ci.Loop_v8: 632e1051a39Sopenharmony_ci vld1.8 {@MSG[0]-@MSG[1]},[$inp]! 633e1051a39Sopenharmony_ci vld1.8 {@MSG[2]-@MSG[3]},[$inp]! 634e1051a39Sopenharmony_ci vld1.32 {$W0},[$Ktbl]! 635e1051a39Sopenharmony_ci vrev32.8 @MSG[0],@MSG[0] 636e1051a39Sopenharmony_ci vrev32.8 @MSG[1],@MSG[1] 637e1051a39Sopenharmony_ci vrev32.8 @MSG[2],@MSG[2] 638e1051a39Sopenharmony_ci vrev32.8 @MSG[3],@MSG[3] 639e1051a39Sopenharmony_ci vmov $ABCD_SAVE,$ABCD @ offload 640e1051a39Sopenharmony_ci vmov $EFGH_SAVE,$EFGH 641e1051a39Sopenharmony_ci teq $inp,$len 642e1051a39Sopenharmony_ci___ 643e1051a39Sopenharmony_cifor($i=0;$i<12;$i++) { 644e1051a39Sopenharmony_ci$code.=<<___; 645e1051a39Sopenharmony_ci vld1.32 {$W1},[$Ktbl]! 646e1051a39Sopenharmony_ci vadd.i32 $W0,$W0,@MSG[0] 647e1051a39Sopenharmony_ci sha256su0 @MSG[0],@MSG[1] 648e1051a39Sopenharmony_ci vmov $abcd,$ABCD 649e1051a39Sopenharmony_ci sha256h $ABCD,$EFGH,$W0 650e1051a39Sopenharmony_ci sha256h2 $EFGH,$abcd,$W0 651e1051a39Sopenharmony_ci sha256su1 @MSG[0],@MSG[2],@MSG[3] 652e1051a39Sopenharmony_ci___ 653e1051a39Sopenharmony_ci ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); 654e1051a39Sopenharmony_ci} 655e1051a39Sopenharmony_ci$code.=<<___; 656e1051a39Sopenharmony_ci vld1.32 {$W1},[$Ktbl]! 657e1051a39Sopenharmony_ci vadd.i32 $W0,$W0,@MSG[0] 658e1051a39Sopenharmony_ci vmov $abcd,$ABCD 659e1051a39Sopenharmony_ci sha256h $ABCD,$EFGH,$W0 660e1051a39Sopenharmony_ci sha256h2 $EFGH,$abcd,$W0 661e1051a39Sopenharmony_ci 662e1051a39Sopenharmony_ci vld1.32 {$W0},[$Ktbl]! 663e1051a39Sopenharmony_ci vadd.i32 $W1,$W1,@MSG[1] 664e1051a39Sopenharmony_ci vmov $abcd,$ABCD 665e1051a39Sopenharmony_ci sha256h $ABCD,$EFGH,$W1 666e1051a39Sopenharmony_ci sha256h2 $EFGH,$abcd,$W1 667e1051a39Sopenharmony_ci 668e1051a39Sopenharmony_ci vld1.32 {$W1},[$Ktbl] 669e1051a39Sopenharmony_ci vadd.i32 $W0,$W0,@MSG[2] 670e1051a39Sopenharmony_ci sub $Ktbl,$Ktbl,#256-16 @ rewind 671e1051a39Sopenharmony_ci vmov $abcd,$ABCD 672e1051a39Sopenharmony_ci sha256h $ABCD,$EFGH,$W0 673e1051a39Sopenharmony_ci sha256h2 $EFGH,$abcd,$W0 674e1051a39Sopenharmony_ci 675e1051a39Sopenharmony_ci vadd.i32 $W1,$W1,@MSG[3] 676e1051a39Sopenharmony_ci vmov $abcd,$ABCD 677e1051a39Sopenharmony_ci sha256h $ABCD,$EFGH,$W1 678e1051a39Sopenharmony_ci sha256h2 $EFGH,$abcd,$W1 679e1051a39Sopenharmony_ci 680e1051a39Sopenharmony_ci vadd.i32 $ABCD,$ABCD,$ABCD_SAVE 681e1051a39Sopenharmony_ci vadd.i32 $EFGH,$EFGH,$EFGH_SAVE 682e1051a39Sopenharmony_ci it ne 683e1051a39Sopenharmony_ci bne .Loop_v8 684e1051a39Sopenharmony_ci 685e1051a39Sopenharmony_ci vst1.32 {$ABCD,$EFGH},[$ctx] 686e1051a39Sopenharmony_ci 687e1051a39Sopenharmony_ci ret @ bx lr 688e1051a39Sopenharmony_ci.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 689e1051a39Sopenharmony_ci#endif 690e1051a39Sopenharmony_ci___ 691e1051a39Sopenharmony_ci}}} 692e1051a39Sopenharmony_ci$code.=<<___; 693e1051a39Sopenharmony_ci.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" 694e1051a39Sopenharmony_ci.align 2 695e1051a39Sopenharmony_ci#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) 696e1051a39Sopenharmony_ci.comm OPENSSL_armcap_P,4,4 697e1051a39Sopenharmony_ci#endif 698e1051a39Sopenharmony_ci___ 699e1051a39Sopenharmony_ci 700e1051a39Sopenharmony_ciopen SELF,$0; 701e1051a39Sopenharmony_ciwhile(<SELF>) { 702e1051a39Sopenharmony_ci next if (/^#!/); 703e1051a39Sopenharmony_ci last if (!s/^#/@/ and !/^$/); 704e1051a39Sopenharmony_ci print; 705e1051a39Sopenharmony_ci} 706e1051a39Sopenharmony_ciclose SELF; 707e1051a39Sopenharmony_ci 708e1051a39Sopenharmony_ci{ my %opcode = ( 709e1051a39Sopenharmony_ci "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, 710e1051a39Sopenharmony_ci "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); 711e1051a39Sopenharmony_ci 712e1051a39Sopenharmony_ci sub unsha256 { 713e1051a39Sopenharmony_ci my ($mnemonic,$arg)=@_; 714e1051a39Sopenharmony_ci 715e1051a39Sopenharmony_ci if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { 716e1051a39Sopenharmony_ci my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) 717e1051a39Sopenharmony_ci |(($2&7)<<17)|(($2&8)<<4) 718e1051a39Sopenharmony_ci |(($3&7)<<1) |(($3&8)<<2); 719e1051a39Sopenharmony_ci # since ARMv7 instructions are always encoded little-endian. 720e1051a39Sopenharmony_ci # correct solution is to use .inst directive, but older 721e1051a39Sopenharmony_ci # assemblers don't implement it:-( 722e1051a39Sopenharmony_ci sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", 723e1051a39Sopenharmony_ci $word&0xff,($word>>8)&0xff, 724e1051a39Sopenharmony_ci ($word>>16)&0xff,($word>>24)&0xff, 725e1051a39Sopenharmony_ci $mnemonic,$arg; 726e1051a39Sopenharmony_ci } 727e1051a39Sopenharmony_ci } 728e1051a39Sopenharmony_ci} 729e1051a39Sopenharmony_ci 730e1051a39Sopenharmony_ciforeach (split($/,$code)) { 731e1051a39Sopenharmony_ci 732e1051a39Sopenharmony_ci s/\`([^\`]*)\`/eval $1/geo; 733e1051a39Sopenharmony_ci 734e1051a39Sopenharmony_ci s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; 735e1051a39Sopenharmony_ci 736e1051a39Sopenharmony_ci s/\bret\b/bx lr/go or 737e1051a39Sopenharmony_ci s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 738e1051a39Sopenharmony_ci 739e1051a39Sopenharmony_ci print $_,"\n"; 740e1051a39Sopenharmony_ci} 741e1051a39Sopenharmony_ci 742e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; # enforce flush 743