162306a36Sopenharmony_ci#define __ARM_ARCH__ __LINUX_ARM_ARCH__ 262306a36Sopenharmony_ci@ SPDX-License-Identifier: GPL-2.0 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci@ This code is taken from the OpenSSL project but the author (Andy Polyakov) 562306a36Sopenharmony_ci@ has relicensed it under the GPLv2. Therefore this program is free software; 662306a36Sopenharmony_ci@ you can redistribute it and/or modify it under the terms of the GNU General 762306a36Sopenharmony_ci@ Public License version 2 as published by the Free Software Foundation. 862306a36Sopenharmony_ci@ 962306a36Sopenharmony_ci@ The original headers, including the original license headers, are 1062306a36Sopenharmony_ci@ included below for completeness. 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci@ ==================================================================== 1362306a36Sopenharmony_ci@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 1462306a36Sopenharmony_ci@ project. The module is, however, dual licensed under OpenSSL and 1562306a36Sopenharmony_ci@ CRYPTOGAMS licenses depending on where you obtain it. For further 1662306a36Sopenharmony_ci@ details see https://www.openssl.org/~appro/cryptogams/. 1762306a36Sopenharmony_ci@ ==================================================================== 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci@ sha1_block procedure for ARMv4. 2062306a36Sopenharmony_ci@ 2162306a36Sopenharmony_ci@ January 2007. 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci@ Size/performance trade-off 2462306a36Sopenharmony_ci@ ==================================================================== 2562306a36Sopenharmony_ci@ impl size in bytes comp cycles[*] measured performance 2662306a36Sopenharmony_ci@ ==================================================================== 2762306a36Sopenharmony_ci@ thumb 304 3212 4420 2862306a36Sopenharmony_ci@ armv4-small 392/+29% 1958/+64% 2250/+96% 2962306a36Sopenharmony_ci@ armv4-compact 740/+89% 1552/+26% 1840/+22% 3062306a36Sopenharmony_ci@ armv4-large 1420/+92% 1307/+19% 1370/+34%[***] 3162306a36Sopenharmony_ci@ full unroll ~5100/+260% ~1260/+4% ~1300/+5% 3262306a36Sopenharmony_ci@ ==================================================================== 3362306a36Sopenharmony_ci@ thumb = same as 'small' but in Thumb instructions[**] and 3462306a36Sopenharmony_ci@ with recurring code in two private functions; 3562306a36Sopenharmony_ci@ small = detached Xload/update, loops are folded; 3662306a36Sopenharmony_ci@ compact = detached Xload/update, 5x unroll; 3762306a36Sopenharmony_ci@ large = interleaved Xload/update, 5x unroll; 3862306a36Sopenharmony_ci@ full unroll = interleaved Xload/update, full unroll, estimated[!]; 3962306a36Sopenharmony_ci@ 4062306a36Sopenharmony_ci@ [*] Manually counted instructions in "grand" loop body. Measured 4162306a36Sopenharmony_ci@ performance is affected by prologue and epilogue overhead, 4262306a36Sopenharmony_ci@ i-cache availability, branch penalties, etc. 4362306a36Sopenharmony_ci@ [**] While each Thumb instruction is twice smaller, they are not as 4462306a36Sopenharmony_ci@ diverse as ARM ones: e.g., there are only two arithmetic 4562306a36Sopenharmony_ci@ instructions with 3 arguments, no [fixed] rotate, addressing 4662306a36Sopenharmony_ci@ modes are limited. As result it takes more instructions to do 4762306a36Sopenharmony_ci@ the same job in Thumb, therefore the code is never twice as 4862306a36Sopenharmony_ci@ small and always slower. 4962306a36Sopenharmony_ci@ [***] which is also ~35% better than compiler generated code. Dual- 5062306a36Sopenharmony_ci@ issue Cortex A8 core was measured to process input block in 5162306a36Sopenharmony_ci@ ~990 cycles. 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci@ August 2010. 5462306a36Sopenharmony_ci@ 5562306a36Sopenharmony_ci@ Rescheduling for dual-issue pipeline resulted in 13% improvement on 5662306a36Sopenharmony_ci@ Cortex A8 core and in absolute terms ~870 cycles per input block 5762306a36Sopenharmony_ci@ [or 13.6 cycles per byte]. 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci@ February 2011. 6062306a36Sopenharmony_ci@ 6162306a36Sopenharmony_ci@ Profiler-assisted and platform-specific optimization resulted in 10% 6262306a36Sopenharmony_ci@ improvement on Cortex A8 core and 12.2 cycles per byte. 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci#include <linux/linkage.h> 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci.text 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci.align 2 6962306a36Sopenharmony_ciENTRY(sha1_block_data_order) 7062306a36Sopenharmony_ci stmdb sp!,{r4-r12,lr} 7162306a36Sopenharmony_ci add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 7262306a36Sopenharmony_ci ldmia r0,{r3,r4,r5,r6,r7} 7362306a36Sopenharmony_ci.Lloop: 7462306a36Sopenharmony_ci ldr r8,.LK_00_19 7562306a36Sopenharmony_ci mov r14,sp 7662306a36Sopenharmony_ci sub sp,sp,#15*4 7762306a36Sopenharmony_ci mov r5,r5,ror#30 7862306a36Sopenharmony_ci mov r6,r6,ror#30 7962306a36Sopenharmony_ci mov r7,r7,ror#30 @ [6] 8062306a36Sopenharmony_ci.L_00_15: 8162306a36Sopenharmony_ci#if __ARM_ARCH__<7 8262306a36Sopenharmony_ci ldrb r10,[r1,#2] 8362306a36Sopenharmony_ci ldrb r9,[r1,#3] 8462306a36Sopenharmony_ci ldrb r11,[r1,#1] 8562306a36Sopenharmony_ci add r7,r8,r7,ror#2 @ E+=K_00_19 8662306a36Sopenharmony_ci ldrb r12,[r1],#4 8762306a36Sopenharmony_ci orr r9,r9,r10,lsl#8 8862306a36Sopenharmony_ci eor r10,r5,r6 @ F_xx_xx 8962306a36Sopenharmony_ci orr r9,r9,r11,lsl#16 9062306a36Sopenharmony_ci add r7,r7,r3,ror#27 @ E+=ROR(A,27) 9162306a36Sopenharmony_ci orr r9,r9,r12,lsl#24 9262306a36Sopenharmony_ci#else 9362306a36Sopenharmony_ci ldr r9,[r1],#4 @ handles unaligned 9462306a36Sopenharmony_ci add r7,r8,r7,ror#2 @ E+=K_00_19 9562306a36Sopenharmony_ci eor r10,r5,r6 @ F_xx_xx 9662306a36Sopenharmony_ci add r7,r7,r3,ror#27 @ E+=ROR(A,27) 9762306a36Sopenharmony_ci#ifdef __ARMEL__ 9862306a36Sopenharmony_ci rev r9,r9 @ byte swap 9962306a36Sopenharmony_ci#endif 10062306a36Sopenharmony_ci#endif 10162306a36Sopenharmony_ci and r10,r4,r10,ror#2 10262306a36Sopenharmony_ci add r7,r7,r9 @ E+=X[i] 10362306a36Sopenharmony_ci eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) 10462306a36Sopenharmony_ci str r9,[r14,#-4]! 10562306a36Sopenharmony_ci add r7,r7,r10 @ E+=F_00_19(B,C,D) 10662306a36Sopenharmony_ci#if __ARM_ARCH__<7 10762306a36Sopenharmony_ci ldrb r10,[r1,#2] 10862306a36Sopenharmony_ci ldrb r9,[r1,#3] 10962306a36Sopenharmony_ci ldrb r11,[r1,#1] 11062306a36Sopenharmony_ci add r6,r8,r6,ror#2 @ E+=K_00_19 11162306a36Sopenharmony_ci ldrb r12,[r1],#4 11262306a36Sopenharmony_ci orr r9,r9,r10,lsl#8 11362306a36Sopenharmony_ci eor r10,r4,r5 @ F_xx_xx 11462306a36Sopenharmony_ci orr r9,r9,r11,lsl#16 11562306a36Sopenharmony_ci add r6,r6,r7,ror#27 @ E+=ROR(A,27) 11662306a36Sopenharmony_ci orr r9,r9,r12,lsl#24 11762306a36Sopenharmony_ci#else 11862306a36Sopenharmony_ci ldr r9,[r1],#4 @ handles unaligned 11962306a36Sopenharmony_ci add r6,r8,r6,ror#2 @ E+=K_00_19 12062306a36Sopenharmony_ci eor r10,r4,r5 @ F_xx_xx 12162306a36Sopenharmony_ci add r6,r6,r7,ror#27 @ E+=ROR(A,27) 12262306a36Sopenharmony_ci#ifdef __ARMEL__ 12362306a36Sopenharmony_ci rev r9,r9 @ byte swap 12462306a36Sopenharmony_ci#endif 12562306a36Sopenharmony_ci#endif 12662306a36Sopenharmony_ci and r10,r3,r10,ror#2 12762306a36Sopenharmony_ci add r6,r6,r9 @ E+=X[i] 12862306a36Sopenharmony_ci eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) 12962306a36Sopenharmony_ci str r9,[r14,#-4]! 13062306a36Sopenharmony_ci add r6,r6,r10 @ E+=F_00_19(B,C,D) 13162306a36Sopenharmony_ci#if __ARM_ARCH__<7 13262306a36Sopenharmony_ci ldrb r10,[r1,#2] 13362306a36Sopenharmony_ci ldrb r9,[r1,#3] 13462306a36Sopenharmony_ci ldrb r11,[r1,#1] 13562306a36Sopenharmony_ci add r5,r8,r5,ror#2 @ E+=K_00_19 13662306a36Sopenharmony_ci ldrb r12,[r1],#4 13762306a36Sopenharmony_ci orr r9,r9,r10,lsl#8 13862306a36Sopenharmony_ci eor r10,r3,r4 @ F_xx_xx 13962306a36Sopenharmony_ci orr r9,r9,r11,lsl#16 14062306a36Sopenharmony_ci add r5,r5,r6,ror#27 @ E+=ROR(A,27) 14162306a36Sopenharmony_ci orr r9,r9,r12,lsl#24 14262306a36Sopenharmony_ci#else 14362306a36Sopenharmony_ci ldr r9,[r1],#4 @ handles unaligned 14462306a36Sopenharmony_ci add r5,r8,r5,ror#2 @ E+=K_00_19 14562306a36Sopenharmony_ci eor r10,r3,r4 @ F_xx_xx 14662306a36Sopenharmony_ci add r5,r5,r6,ror#27 @ E+=ROR(A,27) 14762306a36Sopenharmony_ci#ifdef __ARMEL__ 14862306a36Sopenharmony_ci rev r9,r9 @ byte swap 14962306a36Sopenharmony_ci#endif 15062306a36Sopenharmony_ci#endif 15162306a36Sopenharmony_ci and r10,r7,r10,ror#2 15262306a36Sopenharmony_ci add r5,r5,r9 @ E+=X[i] 15362306a36Sopenharmony_ci eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) 15462306a36Sopenharmony_ci str r9,[r14,#-4]! 15562306a36Sopenharmony_ci add r5,r5,r10 @ E+=F_00_19(B,C,D) 15662306a36Sopenharmony_ci#if __ARM_ARCH__<7 15762306a36Sopenharmony_ci ldrb r10,[r1,#2] 15862306a36Sopenharmony_ci ldrb r9,[r1,#3] 15962306a36Sopenharmony_ci ldrb r11,[r1,#1] 16062306a36Sopenharmony_ci add r4,r8,r4,ror#2 @ E+=K_00_19 16162306a36Sopenharmony_ci ldrb r12,[r1],#4 16262306a36Sopenharmony_ci orr r9,r9,r10,lsl#8 16362306a36Sopenharmony_ci eor r10,r7,r3 @ F_xx_xx 16462306a36Sopenharmony_ci orr r9,r9,r11,lsl#16 16562306a36Sopenharmony_ci add r4,r4,r5,ror#27 @ E+=ROR(A,27) 16662306a36Sopenharmony_ci orr r9,r9,r12,lsl#24 16762306a36Sopenharmony_ci#else 16862306a36Sopenharmony_ci ldr r9,[r1],#4 @ handles unaligned 16962306a36Sopenharmony_ci add r4,r8,r4,ror#2 @ E+=K_00_19 17062306a36Sopenharmony_ci eor r10,r7,r3 @ F_xx_xx 17162306a36Sopenharmony_ci add r4,r4,r5,ror#27 @ E+=ROR(A,27) 17262306a36Sopenharmony_ci#ifdef __ARMEL__ 17362306a36Sopenharmony_ci rev r9,r9 @ byte swap 17462306a36Sopenharmony_ci#endif 17562306a36Sopenharmony_ci#endif 17662306a36Sopenharmony_ci and r10,r6,r10,ror#2 17762306a36Sopenharmony_ci add r4,r4,r9 @ E+=X[i] 17862306a36Sopenharmony_ci eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) 17962306a36Sopenharmony_ci str r9,[r14,#-4]! 18062306a36Sopenharmony_ci add r4,r4,r10 @ E+=F_00_19(B,C,D) 18162306a36Sopenharmony_ci#if __ARM_ARCH__<7 18262306a36Sopenharmony_ci ldrb r10,[r1,#2] 18362306a36Sopenharmony_ci ldrb r9,[r1,#3] 18462306a36Sopenharmony_ci ldrb r11,[r1,#1] 18562306a36Sopenharmony_ci add r3,r8,r3,ror#2 @ E+=K_00_19 18662306a36Sopenharmony_ci ldrb r12,[r1],#4 18762306a36Sopenharmony_ci orr r9,r9,r10,lsl#8 18862306a36Sopenharmony_ci eor r10,r6,r7 @ F_xx_xx 18962306a36Sopenharmony_ci orr r9,r9,r11,lsl#16 19062306a36Sopenharmony_ci add r3,r3,r4,ror#27 @ E+=ROR(A,27) 19162306a36Sopenharmony_ci orr r9,r9,r12,lsl#24 19262306a36Sopenharmony_ci#else 19362306a36Sopenharmony_ci ldr r9,[r1],#4 @ handles unaligned 19462306a36Sopenharmony_ci add r3,r8,r3,ror#2 @ E+=K_00_19 19562306a36Sopenharmony_ci eor r10,r6,r7 @ F_xx_xx 19662306a36Sopenharmony_ci add r3,r3,r4,ror#27 @ E+=ROR(A,27) 19762306a36Sopenharmony_ci#ifdef __ARMEL__ 19862306a36Sopenharmony_ci rev r9,r9 @ byte swap 19962306a36Sopenharmony_ci#endif 20062306a36Sopenharmony_ci#endif 20162306a36Sopenharmony_ci and r10,r5,r10,ror#2 20262306a36Sopenharmony_ci add r3,r3,r9 @ E+=X[i] 20362306a36Sopenharmony_ci eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) 20462306a36Sopenharmony_ci str r9,[r14,#-4]! 20562306a36Sopenharmony_ci add r3,r3,r10 @ E+=F_00_19(B,C,D) 20662306a36Sopenharmony_ci cmp r14,sp 20762306a36Sopenharmony_ci bne .L_00_15 @ [((11+4)*5+2)*3] 20862306a36Sopenharmony_ci sub sp,sp,#25*4 20962306a36Sopenharmony_ci#if __ARM_ARCH__<7 21062306a36Sopenharmony_ci ldrb r10,[r1,#2] 21162306a36Sopenharmony_ci ldrb r9,[r1,#3] 21262306a36Sopenharmony_ci ldrb r11,[r1,#1] 21362306a36Sopenharmony_ci add r7,r8,r7,ror#2 @ E+=K_00_19 21462306a36Sopenharmony_ci ldrb r12,[r1],#4 21562306a36Sopenharmony_ci orr r9,r9,r10,lsl#8 21662306a36Sopenharmony_ci eor r10,r5,r6 @ F_xx_xx 21762306a36Sopenharmony_ci orr r9,r9,r11,lsl#16 21862306a36Sopenharmony_ci add r7,r7,r3,ror#27 @ E+=ROR(A,27) 21962306a36Sopenharmony_ci orr r9,r9,r12,lsl#24 22062306a36Sopenharmony_ci#else 22162306a36Sopenharmony_ci ldr r9,[r1],#4 @ handles unaligned 22262306a36Sopenharmony_ci add r7,r8,r7,ror#2 @ E+=K_00_19 22362306a36Sopenharmony_ci eor r10,r5,r6 @ F_xx_xx 22462306a36Sopenharmony_ci add r7,r7,r3,ror#27 @ E+=ROR(A,27) 22562306a36Sopenharmony_ci#ifdef __ARMEL__ 22662306a36Sopenharmony_ci rev r9,r9 @ byte swap 22762306a36Sopenharmony_ci#endif 22862306a36Sopenharmony_ci#endif 22962306a36Sopenharmony_ci and r10,r4,r10,ror#2 23062306a36Sopenharmony_ci add r7,r7,r9 @ E+=X[i] 23162306a36Sopenharmony_ci eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) 23262306a36Sopenharmony_ci str r9,[r14,#-4]! 23362306a36Sopenharmony_ci add r7,r7,r10 @ E+=F_00_19(B,C,D) 23462306a36Sopenharmony_ci ldr r9,[r14,#15*4] 23562306a36Sopenharmony_ci ldr r10,[r14,#13*4] 23662306a36Sopenharmony_ci ldr r11,[r14,#7*4] 23762306a36Sopenharmony_ci add r6,r8,r6,ror#2 @ E+=K_xx_xx 23862306a36Sopenharmony_ci ldr r12,[r14,#2*4] 23962306a36Sopenharmony_ci eor r9,r9,r10 24062306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 24162306a36Sopenharmony_ci eor r10,r4,r5 @ F_xx_xx 24262306a36Sopenharmony_ci mov r9,r9,ror#31 24362306a36Sopenharmony_ci add r6,r6,r7,ror#27 @ E+=ROR(A,27) 24462306a36Sopenharmony_ci eor r9,r9,r11,ror#31 24562306a36Sopenharmony_ci str r9,[r14,#-4]! 24662306a36Sopenharmony_ci and r10,r3,r10,ror#2 @ F_xx_xx 24762306a36Sopenharmony_ci @ F_xx_xx 24862306a36Sopenharmony_ci add r6,r6,r9 @ E+=X[i] 24962306a36Sopenharmony_ci eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) 25062306a36Sopenharmony_ci add r6,r6,r10 @ E+=F_00_19(B,C,D) 25162306a36Sopenharmony_ci ldr r9,[r14,#15*4] 25262306a36Sopenharmony_ci ldr r10,[r14,#13*4] 25362306a36Sopenharmony_ci ldr r11,[r14,#7*4] 25462306a36Sopenharmony_ci add r5,r8,r5,ror#2 @ E+=K_xx_xx 25562306a36Sopenharmony_ci ldr r12,[r14,#2*4] 25662306a36Sopenharmony_ci eor r9,r9,r10 25762306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 25862306a36Sopenharmony_ci eor r10,r3,r4 @ F_xx_xx 25962306a36Sopenharmony_ci mov r9,r9,ror#31 26062306a36Sopenharmony_ci add r5,r5,r6,ror#27 @ E+=ROR(A,27) 26162306a36Sopenharmony_ci eor r9,r9,r11,ror#31 26262306a36Sopenharmony_ci str r9,[r14,#-4]! 26362306a36Sopenharmony_ci and r10,r7,r10,ror#2 @ F_xx_xx 26462306a36Sopenharmony_ci @ F_xx_xx 26562306a36Sopenharmony_ci add r5,r5,r9 @ E+=X[i] 26662306a36Sopenharmony_ci eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) 26762306a36Sopenharmony_ci add r5,r5,r10 @ E+=F_00_19(B,C,D) 26862306a36Sopenharmony_ci ldr r9,[r14,#15*4] 26962306a36Sopenharmony_ci ldr r10,[r14,#13*4] 27062306a36Sopenharmony_ci ldr r11,[r14,#7*4] 27162306a36Sopenharmony_ci add r4,r8,r4,ror#2 @ E+=K_xx_xx 27262306a36Sopenharmony_ci ldr r12,[r14,#2*4] 27362306a36Sopenharmony_ci eor r9,r9,r10 27462306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 27562306a36Sopenharmony_ci eor r10,r7,r3 @ F_xx_xx 27662306a36Sopenharmony_ci mov r9,r9,ror#31 27762306a36Sopenharmony_ci add r4,r4,r5,ror#27 @ E+=ROR(A,27) 27862306a36Sopenharmony_ci eor r9,r9,r11,ror#31 27962306a36Sopenharmony_ci str r9,[r14,#-4]! 28062306a36Sopenharmony_ci and r10,r6,r10,ror#2 @ F_xx_xx 28162306a36Sopenharmony_ci @ F_xx_xx 28262306a36Sopenharmony_ci add r4,r4,r9 @ E+=X[i] 28362306a36Sopenharmony_ci eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) 28462306a36Sopenharmony_ci add r4,r4,r10 @ E+=F_00_19(B,C,D) 28562306a36Sopenharmony_ci ldr r9,[r14,#15*4] 28662306a36Sopenharmony_ci ldr r10,[r14,#13*4] 28762306a36Sopenharmony_ci ldr r11,[r14,#7*4] 28862306a36Sopenharmony_ci add r3,r8,r3,ror#2 @ E+=K_xx_xx 28962306a36Sopenharmony_ci ldr r12,[r14,#2*4] 29062306a36Sopenharmony_ci eor r9,r9,r10 29162306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 29262306a36Sopenharmony_ci eor r10,r6,r7 @ F_xx_xx 29362306a36Sopenharmony_ci mov r9,r9,ror#31 29462306a36Sopenharmony_ci add r3,r3,r4,ror#27 @ E+=ROR(A,27) 29562306a36Sopenharmony_ci eor r9,r9,r11,ror#31 29662306a36Sopenharmony_ci str r9,[r14,#-4]! 29762306a36Sopenharmony_ci and r10,r5,r10,ror#2 @ F_xx_xx 29862306a36Sopenharmony_ci @ F_xx_xx 29962306a36Sopenharmony_ci add r3,r3,r9 @ E+=X[i] 30062306a36Sopenharmony_ci eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) 30162306a36Sopenharmony_ci add r3,r3,r10 @ E+=F_00_19(B,C,D) 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci ldr r8,.LK_20_39 @ [+15+16*4] 30462306a36Sopenharmony_ci cmn sp,#0 @ [+3], clear carry to denote 20_39 30562306a36Sopenharmony_ci.L_20_39_or_60_79: 30662306a36Sopenharmony_ci ldr r9,[r14,#15*4] 30762306a36Sopenharmony_ci ldr r10,[r14,#13*4] 30862306a36Sopenharmony_ci ldr r11,[r14,#7*4] 30962306a36Sopenharmony_ci add r7,r8,r7,ror#2 @ E+=K_xx_xx 31062306a36Sopenharmony_ci ldr r12,[r14,#2*4] 31162306a36Sopenharmony_ci eor r9,r9,r10 31262306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 31362306a36Sopenharmony_ci eor r10,r5,r6 @ F_xx_xx 31462306a36Sopenharmony_ci mov r9,r9,ror#31 31562306a36Sopenharmony_ci add r7,r7,r3,ror#27 @ E+=ROR(A,27) 31662306a36Sopenharmony_ci eor r9,r9,r11,ror#31 31762306a36Sopenharmony_ci str r9,[r14,#-4]! 31862306a36Sopenharmony_ci eor r10,r4,r10,ror#2 @ F_xx_xx 31962306a36Sopenharmony_ci @ F_xx_xx 32062306a36Sopenharmony_ci add r7,r7,r9 @ E+=X[i] 32162306a36Sopenharmony_ci add r7,r7,r10 @ E+=F_20_39(B,C,D) 32262306a36Sopenharmony_ci ldr r9,[r14,#15*4] 32362306a36Sopenharmony_ci ldr r10,[r14,#13*4] 32462306a36Sopenharmony_ci ldr r11,[r14,#7*4] 32562306a36Sopenharmony_ci add r6,r8,r6,ror#2 @ E+=K_xx_xx 32662306a36Sopenharmony_ci ldr r12,[r14,#2*4] 32762306a36Sopenharmony_ci eor r9,r9,r10 32862306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 32962306a36Sopenharmony_ci eor r10,r4,r5 @ F_xx_xx 33062306a36Sopenharmony_ci mov r9,r9,ror#31 33162306a36Sopenharmony_ci add r6,r6,r7,ror#27 @ E+=ROR(A,27) 33262306a36Sopenharmony_ci eor r9,r9,r11,ror#31 33362306a36Sopenharmony_ci str r9,[r14,#-4]! 33462306a36Sopenharmony_ci eor r10,r3,r10,ror#2 @ F_xx_xx 33562306a36Sopenharmony_ci @ F_xx_xx 33662306a36Sopenharmony_ci add r6,r6,r9 @ E+=X[i] 33762306a36Sopenharmony_ci add r6,r6,r10 @ E+=F_20_39(B,C,D) 33862306a36Sopenharmony_ci ldr r9,[r14,#15*4] 33962306a36Sopenharmony_ci ldr r10,[r14,#13*4] 34062306a36Sopenharmony_ci ldr r11,[r14,#7*4] 34162306a36Sopenharmony_ci add r5,r8,r5,ror#2 @ E+=K_xx_xx 34262306a36Sopenharmony_ci ldr r12,[r14,#2*4] 34362306a36Sopenharmony_ci eor r9,r9,r10 34462306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 34562306a36Sopenharmony_ci eor r10,r3,r4 @ F_xx_xx 34662306a36Sopenharmony_ci mov r9,r9,ror#31 34762306a36Sopenharmony_ci add r5,r5,r6,ror#27 @ E+=ROR(A,27) 34862306a36Sopenharmony_ci eor r9,r9,r11,ror#31 34962306a36Sopenharmony_ci str r9,[r14,#-4]! 35062306a36Sopenharmony_ci eor r10,r7,r10,ror#2 @ F_xx_xx 35162306a36Sopenharmony_ci @ F_xx_xx 35262306a36Sopenharmony_ci add r5,r5,r9 @ E+=X[i] 35362306a36Sopenharmony_ci add r5,r5,r10 @ E+=F_20_39(B,C,D) 35462306a36Sopenharmony_ci ldr r9,[r14,#15*4] 35562306a36Sopenharmony_ci ldr r10,[r14,#13*4] 35662306a36Sopenharmony_ci ldr r11,[r14,#7*4] 35762306a36Sopenharmony_ci add r4,r8,r4,ror#2 @ E+=K_xx_xx 35862306a36Sopenharmony_ci ldr r12,[r14,#2*4] 35962306a36Sopenharmony_ci eor r9,r9,r10 36062306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 36162306a36Sopenharmony_ci eor r10,r7,r3 @ F_xx_xx 36262306a36Sopenharmony_ci mov r9,r9,ror#31 36362306a36Sopenharmony_ci add r4,r4,r5,ror#27 @ E+=ROR(A,27) 36462306a36Sopenharmony_ci eor r9,r9,r11,ror#31 36562306a36Sopenharmony_ci str r9,[r14,#-4]! 36662306a36Sopenharmony_ci eor r10,r6,r10,ror#2 @ F_xx_xx 36762306a36Sopenharmony_ci @ F_xx_xx 36862306a36Sopenharmony_ci add r4,r4,r9 @ E+=X[i] 36962306a36Sopenharmony_ci add r4,r4,r10 @ E+=F_20_39(B,C,D) 37062306a36Sopenharmony_ci ldr r9,[r14,#15*4] 37162306a36Sopenharmony_ci ldr r10,[r14,#13*4] 37262306a36Sopenharmony_ci ldr r11,[r14,#7*4] 37362306a36Sopenharmony_ci add r3,r8,r3,ror#2 @ E+=K_xx_xx 37462306a36Sopenharmony_ci ldr r12,[r14,#2*4] 37562306a36Sopenharmony_ci eor r9,r9,r10 37662306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 37762306a36Sopenharmony_ci eor r10,r6,r7 @ F_xx_xx 37862306a36Sopenharmony_ci mov r9,r9,ror#31 37962306a36Sopenharmony_ci add r3,r3,r4,ror#27 @ E+=ROR(A,27) 38062306a36Sopenharmony_ci eor r9,r9,r11,ror#31 38162306a36Sopenharmony_ci str r9,[r14,#-4]! 38262306a36Sopenharmony_ci eor r10,r5,r10,ror#2 @ F_xx_xx 38362306a36Sopenharmony_ci @ F_xx_xx 38462306a36Sopenharmony_ci add r3,r3,r9 @ E+=X[i] 38562306a36Sopenharmony_ci add r3,r3,r10 @ E+=F_20_39(B,C,D) 38662306a36Sopenharmony_ci ARM( teq r14,sp ) @ preserve carry 38762306a36Sopenharmony_ci THUMB( mov r11,sp ) 38862306a36Sopenharmony_ci THUMB( teq r14,r11 ) @ preserve carry 38962306a36Sopenharmony_ci bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] 39062306a36Sopenharmony_ci bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci ldr r8,.LK_40_59 39362306a36Sopenharmony_ci sub sp,sp,#20*4 @ [+2] 39462306a36Sopenharmony_ci.L_40_59: 39562306a36Sopenharmony_ci ldr r9,[r14,#15*4] 39662306a36Sopenharmony_ci ldr r10,[r14,#13*4] 39762306a36Sopenharmony_ci ldr r11,[r14,#7*4] 39862306a36Sopenharmony_ci add r7,r8,r7,ror#2 @ E+=K_xx_xx 39962306a36Sopenharmony_ci ldr r12,[r14,#2*4] 40062306a36Sopenharmony_ci eor r9,r9,r10 40162306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 40262306a36Sopenharmony_ci eor r10,r5,r6 @ F_xx_xx 40362306a36Sopenharmony_ci mov r9,r9,ror#31 40462306a36Sopenharmony_ci add r7,r7,r3,ror#27 @ E+=ROR(A,27) 40562306a36Sopenharmony_ci eor r9,r9,r11,ror#31 40662306a36Sopenharmony_ci str r9,[r14,#-4]! 40762306a36Sopenharmony_ci and r10,r4,r10,ror#2 @ F_xx_xx 40862306a36Sopenharmony_ci and r11,r5,r6 @ F_xx_xx 40962306a36Sopenharmony_ci add r7,r7,r9 @ E+=X[i] 41062306a36Sopenharmony_ci add r7,r7,r10 @ E+=F_40_59(B,C,D) 41162306a36Sopenharmony_ci add r7,r7,r11,ror#2 41262306a36Sopenharmony_ci ldr r9,[r14,#15*4] 41362306a36Sopenharmony_ci ldr r10,[r14,#13*4] 41462306a36Sopenharmony_ci ldr r11,[r14,#7*4] 41562306a36Sopenharmony_ci add r6,r8,r6,ror#2 @ E+=K_xx_xx 41662306a36Sopenharmony_ci ldr r12,[r14,#2*4] 41762306a36Sopenharmony_ci eor r9,r9,r10 41862306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 41962306a36Sopenharmony_ci eor r10,r4,r5 @ F_xx_xx 42062306a36Sopenharmony_ci mov r9,r9,ror#31 42162306a36Sopenharmony_ci add r6,r6,r7,ror#27 @ E+=ROR(A,27) 42262306a36Sopenharmony_ci eor r9,r9,r11,ror#31 42362306a36Sopenharmony_ci str r9,[r14,#-4]! 42462306a36Sopenharmony_ci and r10,r3,r10,ror#2 @ F_xx_xx 42562306a36Sopenharmony_ci and r11,r4,r5 @ F_xx_xx 42662306a36Sopenharmony_ci add r6,r6,r9 @ E+=X[i] 42762306a36Sopenharmony_ci add r6,r6,r10 @ E+=F_40_59(B,C,D) 42862306a36Sopenharmony_ci add r6,r6,r11,ror#2 42962306a36Sopenharmony_ci ldr r9,[r14,#15*4] 43062306a36Sopenharmony_ci ldr r10,[r14,#13*4] 43162306a36Sopenharmony_ci ldr r11,[r14,#7*4] 43262306a36Sopenharmony_ci add r5,r8,r5,ror#2 @ E+=K_xx_xx 43362306a36Sopenharmony_ci ldr r12,[r14,#2*4] 43462306a36Sopenharmony_ci eor r9,r9,r10 43562306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 43662306a36Sopenharmony_ci eor r10,r3,r4 @ F_xx_xx 43762306a36Sopenharmony_ci mov r9,r9,ror#31 43862306a36Sopenharmony_ci add r5,r5,r6,ror#27 @ E+=ROR(A,27) 43962306a36Sopenharmony_ci eor r9,r9,r11,ror#31 44062306a36Sopenharmony_ci str r9,[r14,#-4]! 44162306a36Sopenharmony_ci and r10,r7,r10,ror#2 @ F_xx_xx 44262306a36Sopenharmony_ci and r11,r3,r4 @ F_xx_xx 44362306a36Sopenharmony_ci add r5,r5,r9 @ E+=X[i] 44462306a36Sopenharmony_ci add r5,r5,r10 @ E+=F_40_59(B,C,D) 44562306a36Sopenharmony_ci add r5,r5,r11,ror#2 44662306a36Sopenharmony_ci ldr r9,[r14,#15*4] 44762306a36Sopenharmony_ci ldr r10,[r14,#13*4] 44862306a36Sopenharmony_ci ldr r11,[r14,#7*4] 44962306a36Sopenharmony_ci add r4,r8,r4,ror#2 @ E+=K_xx_xx 45062306a36Sopenharmony_ci ldr r12,[r14,#2*4] 45162306a36Sopenharmony_ci eor r9,r9,r10 45262306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 45362306a36Sopenharmony_ci eor r10,r7,r3 @ F_xx_xx 45462306a36Sopenharmony_ci mov r9,r9,ror#31 45562306a36Sopenharmony_ci add r4,r4,r5,ror#27 @ E+=ROR(A,27) 45662306a36Sopenharmony_ci eor r9,r9,r11,ror#31 45762306a36Sopenharmony_ci str r9,[r14,#-4]! 45862306a36Sopenharmony_ci and r10,r6,r10,ror#2 @ F_xx_xx 45962306a36Sopenharmony_ci and r11,r7,r3 @ F_xx_xx 46062306a36Sopenharmony_ci add r4,r4,r9 @ E+=X[i] 46162306a36Sopenharmony_ci add r4,r4,r10 @ E+=F_40_59(B,C,D) 46262306a36Sopenharmony_ci add r4,r4,r11,ror#2 46362306a36Sopenharmony_ci ldr r9,[r14,#15*4] 46462306a36Sopenharmony_ci ldr r10,[r14,#13*4] 46562306a36Sopenharmony_ci ldr r11,[r14,#7*4] 46662306a36Sopenharmony_ci add r3,r8,r3,ror#2 @ E+=K_xx_xx 46762306a36Sopenharmony_ci ldr r12,[r14,#2*4] 46862306a36Sopenharmony_ci eor r9,r9,r10 46962306a36Sopenharmony_ci eor r11,r11,r12 @ 1 cycle stall 47062306a36Sopenharmony_ci eor r10,r6,r7 @ F_xx_xx 47162306a36Sopenharmony_ci mov r9,r9,ror#31 47262306a36Sopenharmony_ci add r3,r3,r4,ror#27 @ E+=ROR(A,27) 47362306a36Sopenharmony_ci eor r9,r9,r11,ror#31 47462306a36Sopenharmony_ci str r9,[r14,#-4]! 47562306a36Sopenharmony_ci and r10,r5,r10,ror#2 @ F_xx_xx 47662306a36Sopenharmony_ci and r11,r6,r7 @ F_xx_xx 47762306a36Sopenharmony_ci add r3,r3,r9 @ E+=X[i] 47862306a36Sopenharmony_ci add r3,r3,r10 @ E+=F_40_59(B,C,D) 47962306a36Sopenharmony_ci add r3,r3,r11,ror#2 48062306a36Sopenharmony_ci cmp r14,sp 48162306a36Sopenharmony_ci bne .L_40_59 @ [+((12+5)*5+2)*4] 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci ldr r8,.LK_60_79 48462306a36Sopenharmony_ci sub sp,sp,#20*4 48562306a36Sopenharmony_ci cmp sp,#0 @ set carry to denote 60_79 48662306a36Sopenharmony_ci b .L_20_39_or_60_79 @ [+4], spare 300 bytes 48762306a36Sopenharmony_ci.L_done: 48862306a36Sopenharmony_ci add sp,sp,#80*4 @ "deallocate" stack frame 48962306a36Sopenharmony_ci ldmia r0,{r8,r9,r10,r11,r12} 49062306a36Sopenharmony_ci add r3,r8,r3 49162306a36Sopenharmony_ci add r4,r9,r4 49262306a36Sopenharmony_ci add r5,r10,r5,ror#2 49362306a36Sopenharmony_ci add r6,r11,r6,ror#2 49462306a36Sopenharmony_ci add r7,r12,r7,ror#2 49562306a36Sopenharmony_ci stmia r0,{r3,r4,r5,r6,r7} 49662306a36Sopenharmony_ci teq r1,r2 49762306a36Sopenharmony_ci bne .Lloop @ [+18], total 1307 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci ldmia sp!,{r4-r12,pc} 50062306a36Sopenharmony_ci.align 2 50162306a36Sopenharmony_ci.LK_00_19: .word 0x5a827999 50262306a36Sopenharmony_ci.LK_20_39: .word 0x6ed9eba1 50362306a36Sopenharmony_ci.LK_40_59: .word 0x8f1bbcdc 50462306a36Sopenharmony_ci.LK_60_79: .word 0xca62c1d6 50562306a36Sopenharmony_ciENDPROC(sha1_block_data_order) 50662306a36Sopenharmony_ci.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" 50762306a36Sopenharmony_ci.align 2 508