162306a36Sopenharmony_ci#define __ARM_ARCH__ __LINUX_ARM_ARCH__
262306a36Sopenharmony_ci@ SPDX-License-Identifier: GPL-2.0
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
562306a36Sopenharmony_ci@ has relicensed it under the GPLv2. Therefore this program is free software;
662306a36Sopenharmony_ci@ you can redistribute it and/or modify it under the terms of the GNU General
762306a36Sopenharmony_ci@ Public License version 2 as published by the Free Software Foundation.
862306a36Sopenharmony_ci@
962306a36Sopenharmony_ci@ The original headers, including the original license headers, are
1062306a36Sopenharmony_ci@ included below for completeness.
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci@ ====================================================================
1362306a36Sopenharmony_ci@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
1462306a36Sopenharmony_ci@ project. The module is, however, dual licensed under OpenSSL and
1562306a36Sopenharmony_ci@ CRYPTOGAMS licenses depending on where you obtain it. For further
1662306a36Sopenharmony_ci@ details see https://www.openssl.org/~appro/cryptogams/.
1762306a36Sopenharmony_ci@ ====================================================================
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci@ sha1_block procedure for ARMv4.
2062306a36Sopenharmony_ci@
2162306a36Sopenharmony_ci@ January 2007.
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci@ Size/performance trade-off
2462306a36Sopenharmony_ci@ ====================================================================
2562306a36Sopenharmony_ci@ impl		size in bytes	comp cycles[*]	measured performance
2662306a36Sopenharmony_ci@ ====================================================================
2762306a36Sopenharmony_ci@ thumb		304		3212		4420
2862306a36Sopenharmony_ci@ armv4-small	392/+29%	1958/+64%	2250/+96%
2962306a36Sopenharmony_ci@ armv4-compact	740/+89%	1552/+26%	1840/+22%
3062306a36Sopenharmony_ci@ armv4-large	1420/+92%	1307/+19%	1370/+34%[***]
3162306a36Sopenharmony_ci@ full unroll	~5100/+260%	~1260/+4%	~1300/+5%
3262306a36Sopenharmony_ci@ ====================================================================
3362306a36Sopenharmony_ci@ thumb		= same as 'small' but in Thumb instructions[**] and
3462306a36Sopenharmony_ci@		  with recurring code in two private functions;
3562306a36Sopenharmony_ci@ small		= detached Xload/update, loops are folded;
3662306a36Sopenharmony_ci@ compact	= detached Xload/update, 5x unroll;
3762306a36Sopenharmony_ci@ large		= interleaved Xload/update, 5x unroll;
3862306a36Sopenharmony_ci@ full unroll	= interleaved Xload/update, full unroll, estimated[!];
3962306a36Sopenharmony_ci@
4062306a36Sopenharmony_ci@ [*]	Manually counted instructions in "grand" loop body. Measured
4162306a36Sopenharmony_ci@	performance is affected by prologue and epilogue overhead,
4262306a36Sopenharmony_ci@	i-cache availability, branch penalties, etc.
4362306a36Sopenharmony_ci@ [**]	While each Thumb instruction is twice smaller, they are not as
4462306a36Sopenharmony_ci@	diverse as ARM ones: e.g., there are only two arithmetic
4562306a36Sopenharmony_ci@	instructions with 3 arguments, no [fixed] rotate, addressing
4662306a36Sopenharmony_ci@	modes are limited. As result it takes more instructions to do
4762306a36Sopenharmony_ci@	the same job in Thumb, therefore the code is never twice as
4862306a36Sopenharmony_ci@	small and always slower.
4962306a36Sopenharmony_ci@ [***]	which is also ~35% better than compiler generated code. Dual-
5062306a36Sopenharmony_ci@	issue Cortex A8 core was measured to process input block in
5162306a36Sopenharmony_ci@	~990 cycles.
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci@ August 2010.
5462306a36Sopenharmony_ci@
5562306a36Sopenharmony_ci@ Rescheduling for dual-issue pipeline resulted in 13% improvement on
5662306a36Sopenharmony_ci@ Cortex A8 core and in absolute terms ~870 cycles per input block
5762306a36Sopenharmony_ci@ [or 13.6 cycles per byte].
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci@ February 2011.
6062306a36Sopenharmony_ci@
6162306a36Sopenharmony_ci@ Profiler-assisted and platform-specific optimization resulted in 10%
6262306a36Sopenharmony_ci@ improvement on Cortex A8 core and 12.2 cycles per byte.
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci#include <linux/linkage.h>
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci.text
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci.align	2
6962306a36Sopenharmony_ciENTRY(sha1_block_data_order)
7062306a36Sopenharmony_ci	stmdb	sp!,{r4-r12,lr}
7162306a36Sopenharmony_ci	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
7262306a36Sopenharmony_ci	ldmia	r0,{r3,r4,r5,r6,r7}
7362306a36Sopenharmony_ci.Lloop:
7462306a36Sopenharmony_ci	ldr	r8,.LK_00_19
7562306a36Sopenharmony_ci	mov	r14,sp
7662306a36Sopenharmony_ci	sub	sp,sp,#15*4
7762306a36Sopenharmony_ci	mov	r5,r5,ror#30
7862306a36Sopenharmony_ci	mov	r6,r6,ror#30
7962306a36Sopenharmony_ci	mov	r7,r7,ror#30		@ [6]
8062306a36Sopenharmony_ci.L_00_15:
8162306a36Sopenharmony_ci#if __ARM_ARCH__<7
8262306a36Sopenharmony_ci	ldrb	r10,[r1,#2]
8362306a36Sopenharmony_ci	ldrb	r9,[r1,#3]
8462306a36Sopenharmony_ci	ldrb	r11,[r1,#1]
8562306a36Sopenharmony_ci	add	r7,r8,r7,ror#2			@ E+=K_00_19
8662306a36Sopenharmony_ci	ldrb	r12,[r1],#4
8762306a36Sopenharmony_ci	orr	r9,r9,r10,lsl#8
8862306a36Sopenharmony_ci	eor	r10,r5,r6			@ F_xx_xx
8962306a36Sopenharmony_ci	orr	r9,r9,r11,lsl#16
9062306a36Sopenharmony_ci	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
9162306a36Sopenharmony_ci	orr	r9,r9,r12,lsl#24
9262306a36Sopenharmony_ci#else
9362306a36Sopenharmony_ci	ldr	r9,[r1],#4			@ handles unaligned
9462306a36Sopenharmony_ci	add	r7,r8,r7,ror#2			@ E+=K_00_19
9562306a36Sopenharmony_ci	eor	r10,r5,r6			@ F_xx_xx
9662306a36Sopenharmony_ci	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
9762306a36Sopenharmony_ci#ifdef __ARMEL__
9862306a36Sopenharmony_ci	rev	r9,r9				@ byte swap
9962306a36Sopenharmony_ci#endif
10062306a36Sopenharmony_ci#endif
10162306a36Sopenharmony_ci	and	r10,r4,r10,ror#2
10262306a36Sopenharmony_ci	add	r7,r7,r9			@ E+=X[i]
10362306a36Sopenharmony_ci	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
10462306a36Sopenharmony_ci	str	r9,[r14,#-4]!
10562306a36Sopenharmony_ci	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
10662306a36Sopenharmony_ci#if __ARM_ARCH__<7
10762306a36Sopenharmony_ci	ldrb	r10,[r1,#2]
10862306a36Sopenharmony_ci	ldrb	r9,[r1,#3]
10962306a36Sopenharmony_ci	ldrb	r11,[r1,#1]
11062306a36Sopenharmony_ci	add	r6,r8,r6,ror#2			@ E+=K_00_19
11162306a36Sopenharmony_ci	ldrb	r12,[r1],#4
11262306a36Sopenharmony_ci	orr	r9,r9,r10,lsl#8
11362306a36Sopenharmony_ci	eor	r10,r4,r5			@ F_xx_xx
11462306a36Sopenharmony_ci	orr	r9,r9,r11,lsl#16
11562306a36Sopenharmony_ci	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
11662306a36Sopenharmony_ci	orr	r9,r9,r12,lsl#24
11762306a36Sopenharmony_ci#else
11862306a36Sopenharmony_ci	ldr	r9,[r1],#4			@ handles unaligned
11962306a36Sopenharmony_ci	add	r6,r8,r6,ror#2			@ E+=K_00_19
12062306a36Sopenharmony_ci	eor	r10,r4,r5			@ F_xx_xx
12162306a36Sopenharmony_ci	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
12262306a36Sopenharmony_ci#ifdef __ARMEL__
12362306a36Sopenharmony_ci	rev	r9,r9				@ byte swap
12462306a36Sopenharmony_ci#endif
12562306a36Sopenharmony_ci#endif
12662306a36Sopenharmony_ci	and	r10,r3,r10,ror#2
12762306a36Sopenharmony_ci	add	r6,r6,r9			@ E+=X[i]
12862306a36Sopenharmony_ci	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
12962306a36Sopenharmony_ci	str	r9,[r14,#-4]!
13062306a36Sopenharmony_ci	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
13162306a36Sopenharmony_ci#if __ARM_ARCH__<7
13262306a36Sopenharmony_ci	ldrb	r10,[r1,#2]
13362306a36Sopenharmony_ci	ldrb	r9,[r1,#3]
13462306a36Sopenharmony_ci	ldrb	r11,[r1,#1]
13562306a36Sopenharmony_ci	add	r5,r8,r5,ror#2			@ E+=K_00_19
13662306a36Sopenharmony_ci	ldrb	r12,[r1],#4
13762306a36Sopenharmony_ci	orr	r9,r9,r10,lsl#8
13862306a36Sopenharmony_ci	eor	r10,r3,r4			@ F_xx_xx
13962306a36Sopenharmony_ci	orr	r9,r9,r11,lsl#16
14062306a36Sopenharmony_ci	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
14162306a36Sopenharmony_ci	orr	r9,r9,r12,lsl#24
14262306a36Sopenharmony_ci#else
14362306a36Sopenharmony_ci	ldr	r9,[r1],#4			@ handles unaligned
14462306a36Sopenharmony_ci	add	r5,r8,r5,ror#2			@ E+=K_00_19
14562306a36Sopenharmony_ci	eor	r10,r3,r4			@ F_xx_xx
14662306a36Sopenharmony_ci	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
14762306a36Sopenharmony_ci#ifdef __ARMEL__
14862306a36Sopenharmony_ci	rev	r9,r9				@ byte swap
14962306a36Sopenharmony_ci#endif
15062306a36Sopenharmony_ci#endif
15162306a36Sopenharmony_ci	and	r10,r7,r10,ror#2
15262306a36Sopenharmony_ci	add	r5,r5,r9			@ E+=X[i]
15362306a36Sopenharmony_ci	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
15462306a36Sopenharmony_ci	str	r9,[r14,#-4]!
15562306a36Sopenharmony_ci	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
15662306a36Sopenharmony_ci#if __ARM_ARCH__<7
15762306a36Sopenharmony_ci	ldrb	r10,[r1,#2]
15862306a36Sopenharmony_ci	ldrb	r9,[r1,#3]
15962306a36Sopenharmony_ci	ldrb	r11,[r1,#1]
16062306a36Sopenharmony_ci	add	r4,r8,r4,ror#2			@ E+=K_00_19
16162306a36Sopenharmony_ci	ldrb	r12,[r1],#4
16262306a36Sopenharmony_ci	orr	r9,r9,r10,lsl#8
16362306a36Sopenharmony_ci	eor	r10,r7,r3			@ F_xx_xx
16462306a36Sopenharmony_ci	orr	r9,r9,r11,lsl#16
16562306a36Sopenharmony_ci	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
16662306a36Sopenharmony_ci	orr	r9,r9,r12,lsl#24
16762306a36Sopenharmony_ci#else
16862306a36Sopenharmony_ci	ldr	r9,[r1],#4			@ handles unaligned
16962306a36Sopenharmony_ci	add	r4,r8,r4,ror#2			@ E+=K_00_19
17062306a36Sopenharmony_ci	eor	r10,r7,r3			@ F_xx_xx
17162306a36Sopenharmony_ci	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
17262306a36Sopenharmony_ci#ifdef __ARMEL__
17362306a36Sopenharmony_ci	rev	r9,r9				@ byte swap
17462306a36Sopenharmony_ci#endif
17562306a36Sopenharmony_ci#endif
17662306a36Sopenharmony_ci	and	r10,r6,r10,ror#2
17762306a36Sopenharmony_ci	add	r4,r4,r9			@ E+=X[i]
17862306a36Sopenharmony_ci	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
17962306a36Sopenharmony_ci	str	r9,[r14,#-4]!
18062306a36Sopenharmony_ci	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
18162306a36Sopenharmony_ci#if __ARM_ARCH__<7
18262306a36Sopenharmony_ci	ldrb	r10,[r1,#2]
18362306a36Sopenharmony_ci	ldrb	r9,[r1,#3]
18462306a36Sopenharmony_ci	ldrb	r11,[r1,#1]
18562306a36Sopenharmony_ci	add	r3,r8,r3,ror#2			@ E+=K_00_19
18662306a36Sopenharmony_ci	ldrb	r12,[r1],#4
18762306a36Sopenharmony_ci	orr	r9,r9,r10,lsl#8
18862306a36Sopenharmony_ci	eor	r10,r6,r7			@ F_xx_xx
18962306a36Sopenharmony_ci	orr	r9,r9,r11,lsl#16
19062306a36Sopenharmony_ci	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
19162306a36Sopenharmony_ci	orr	r9,r9,r12,lsl#24
19262306a36Sopenharmony_ci#else
19362306a36Sopenharmony_ci	ldr	r9,[r1],#4			@ handles unaligned
19462306a36Sopenharmony_ci	add	r3,r8,r3,ror#2			@ E+=K_00_19
19562306a36Sopenharmony_ci	eor	r10,r6,r7			@ F_xx_xx
19662306a36Sopenharmony_ci	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
19762306a36Sopenharmony_ci#ifdef __ARMEL__
19862306a36Sopenharmony_ci	rev	r9,r9				@ byte swap
19962306a36Sopenharmony_ci#endif
20062306a36Sopenharmony_ci#endif
20162306a36Sopenharmony_ci	and	r10,r5,r10,ror#2
20262306a36Sopenharmony_ci	add	r3,r3,r9			@ E+=X[i]
20362306a36Sopenharmony_ci	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
20462306a36Sopenharmony_ci	str	r9,[r14,#-4]!
20562306a36Sopenharmony_ci	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
20662306a36Sopenharmony_ci	cmp	r14,sp
20762306a36Sopenharmony_ci	bne	.L_00_15		@ [((11+4)*5+2)*3]
20862306a36Sopenharmony_ci	sub	sp,sp,#25*4
20962306a36Sopenharmony_ci#if __ARM_ARCH__<7
21062306a36Sopenharmony_ci	ldrb	r10,[r1,#2]
21162306a36Sopenharmony_ci	ldrb	r9,[r1,#3]
21262306a36Sopenharmony_ci	ldrb	r11,[r1,#1]
21362306a36Sopenharmony_ci	add	r7,r8,r7,ror#2			@ E+=K_00_19
21462306a36Sopenharmony_ci	ldrb	r12,[r1],#4
21562306a36Sopenharmony_ci	orr	r9,r9,r10,lsl#8
21662306a36Sopenharmony_ci	eor	r10,r5,r6			@ F_xx_xx
21762306a36Sopenharmony_ci	orr	r9,r9,r11,lsl#16
21862306a36Sopenharmony_ci	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
21962306a36Sopenharmony_ci	orr	r9,r9,r12,lsl#24
22062306a36Sopenharmony_ci#else
22162306a36Sopenharmony_ci	ldr	r9,[r1],#4			@ handles unaligned
22262306a36Sopenharmony_ci	add	r7,r8,r7,ror#2			@ E+=K_00_19
22362306a36Sopenharmony_ci	eor	r10,r5,r6			@ F_xx_xx
22462306a36Sopenharmony_ci	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
22562306a36Sopenharmony_ci#ifdef __ARMEL__
22662306a36Sopenharmony_ci	rev	r9,r9				@ byte swap
22762306a36Sopenharmony_ci#endif
22862306a36Sopenharmony_ci#endif
22962306a36Sopenharmony_ci	and	r10,r4,r10,ror#2
23062306a36Sopenharmony_ci	add	r7,r7,r9			@ E+=X[i]
23162306a36Sopenharmony_ci	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
23262306a36Sopenharmony_ci	str	r9,[r14,#-4]!
23362306a36Sopenharmony_ci	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
23462306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
23562306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
23662306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
23762306a36Sopenharmony_ci	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
23862306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
23962306a36Sopenharmony_ci	eor	r9,r9,r10
24062306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
24162306a36Sopenharmony_ci	eor	r10,r4,r5			@ F_xx_xx
24262306a36Sopenharmony_ci	mov	r9,r9,ror#31
24362306a36Sopenharmony_ci	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
24462306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
24562306a36Sopenharmony_ci	str	r9,[r14,#-4]!
24662306a36Sopenharmony_ci	and r10,r3,r10,ror#2					@ F_xx_xx
24762306a36Sopenharmony_ci						@ F_xx_xx
24862306a36Sopenharmony_ci	add	r6,r6,r9			@ E+=X[i]
24962306a36Sopenharmony_ci	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
25062306a36Sopenharmony_ci	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
25162306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
25262306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
25362306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
25462306a36Sopenharmony_ci	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
25562306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
25662306a36Sopenharmony_ci	eor	r9,r9,r10
25762306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
25862306a36Sopenharmony_ci	eor	r10,r3,r4			@ F_xx_xx
25962306a36Sopenharmony_ci	mov	r9,r9,ror#31
26062306a36Sopenharmony_ci	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
26162306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
26262306a36Sopenharmony_ci	str	r9,[r14,#-4]!
26362306a36Sopenharmony_ci	and r10,r7,r10,ror#2					@ F_xx_xx
26462306a36Sopenharmony_ci						@ F_xx_xx
26562306a36Sopenharmony_ci	add	r5,r5,r9			@ E+=X[i]
26662306a36Sopenharmony_ci	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
26762306a36Sopenharmony_ci	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
26862306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
26962306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
27062306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
27162306a36Sopenharmony_ci	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
27262306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
27362306a36Sopenharmony_ci	eor	r9,r9,r10
27462306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
27562306a36Sopenharmony_ci	eor	r10,r7,r3			@ F_xx_xx
27662306a36Sopenharmony_ci	mov	r9,r9,ror#31
27762306a36Sopenharmony_ci	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
27862306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
27962306a36Sopenharmony_ci	str	r9,[r14,#-4]!
28062306a36Sopenharmony_ci	and r10,r6,r10,ror#2					@ F_xx_xx
28162306a36Sopenharmony_ci						@ F_xx_xx
28262306a36Sopenharmony_ci	add	r4,r4,r9			@ E+=X[i]
28362306a36Sopenharmony_ci	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
28462306a36Sopenharmony_ci	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
28562306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
28662306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
28762306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
28862306a36Sopenharmony_ci	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
28962306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
29062306a36Sopenharmony_ci	eor	r9,r9,r10
29162306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
29262306a36Sopenharmony_ci	eor	r10,r6,r7			@ F_xx_xx
29362306a36Sopenharmony_ci	mov	r9,r9,ror#31
29462306a36Sopenharmony_ci	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
29562306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
29662306a36Sopenharmony_ci	str	r9,[r14,#-4]!
29762306a36Sopenharmony_ci	and r10,r5,r10,ror#2					@ F_xx_xx
29862306a36Sopenharmony_ci						@ F_xx_xx
29962306a36Sopenharmony_ci	add	r3,r3,r9			@ E+=X[i]
30062306a36Sopenharmony_ci	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
30162306a36Sopenharmony_ci	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	ldr	r8,.LK_20_39		@ [+15+16*4]
30462306a36Sopenharmony_ci	cmn	sp,#0			@ [+3], clear carry to denote 20_39
30562306a36Sopenharmony_ci.L_20_39_or_60_79:
30662306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
30762306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
30862306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
30962306a36Sopenharmony_ci	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
31062306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
31162306a36Sopenharmony_ci	eor	r9,r9,r10
31262306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
31362306a36Sopenharmony_ci	eor	r10,r5,r6			@ F_xx_xx
31462306a36Sopenharmony_ci	mov	r9,r9,ror#31
31562306a36Sopenharmony_ci	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
31662306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
31762306a36Sopenharmony_ci	str	r9,[r14,#-4]!
31862306a36Sopenharmony_ci	eor r10,r4,r10,ror#2					@ F_xx_xx
31962306a36Sopenharmony_ci						@ F_xx_xx
32062306a36Sopenharmony_ci	add	r7,r7,r9			@ E+=X[i]
32162306a36Sopenharmony_ci	add	r7,r7,r10			@ E+=F_20_39(B,C,D)
32262306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
32362306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
32462306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
32562306a36Sopenharmony_ci	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
32662306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
32762306a36Sopenharmony_ci	eor	r9,r9,r10
32862306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
32962306a36Sopenharmony_ci	eor	r10,r4,r5			@ F_xx_xx
33062306a36Sopenharmony_ci	mov	r9,r9,ror#31
33162306a36Sopenharmony_ci	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
33262306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
33362306a36Sopenharmony_ci	str	r9,[r14,#-4]!
33462306a36Sopenharmony_ci	eor r10,r3,r10,ror#2					@ F_xx_xx
33562306a36Sopenharmony_ci						@ F_xx_xx
33662306a36Sopenharmony_ci	add	r6,r6,r9			@ E+=X[i]
33762306a36Sopenharmony_ci	add	r6,r6,r10			@ E+=F_20_39(B,C,D)
33862306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
33962306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
34062306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
34162306a36Sopenharmony_ci	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
34262306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
34362306a36Sopenharmony_ci	eor	r9,r9,r10
34462306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
34562306a36Sopenharmony_ci	eor	r10,r3,r4			@ F_xx_xx
34662306a36Sopenharmony_ci	mov	r9,r9,ror#31
34762306a36Sopenharmony_ci	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
34862306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
34962306a36Sopenharmony_ci	str	r9,[r14,#-4]!
35062306a36Sopenharmony_ci	eor r10,r7,r10,ror#2					@ F_xx_xx
35162306a36Sopenharmony_ci						@ F_xx_xx
35262306a36Sopenharmony_ci	add	r5,r5,r9			@ E+=X[i]
35362306a36Sopenharmony_ci	add	r5,r5,r10			@ E+=F_20_39(B,C,D)
35462306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
35562306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
35662306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
35762306a36Sopenharmony_ci	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
35862306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
35962306a36Sopenharmony_ci	eor	r9,r9,r10
36062306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
36162306a36Sopenharmony_ci	eor	r10,r7,r3			@ F_xx_xx
36262306a36Sopenharmony_ci	mov	r9,r9,ror#31
36362306a36Sopenharmony_ci	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
36462306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
36562306a36Sopenharmony_ci	str	r9,[r14,#-4]!
36662306a36Sopenharmony_ci	eor r10,r6,r10,ror#2					@ F_xx_xx
36762306a36Sopenharmony_ci						@ F_xx_xx
36862306a36Sopenharmony_ci	add	r4,r4,r9			@ E+=X[i]
36962306a36Sopenharmony_ci	add	r4,r4,r10			@ E+=F_20_39(B,C,D)
37062306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
37162306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
37262306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
37362306a36Sopenharmony_ci	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
37462306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
37562306a36Sopenharmony_ci	eor	r9,r9,r10
37662306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
37762306a36Sopenharmony_ci	eor	r10,r6,r7			@ F_xx_xx
37862306a36Sopenharmony_ci	mov	r9,r9,ror#31
37962306a36Sopenharmony_ci	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
38062306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
38162306a36Sopenharmony_ci	str	r9,[r14,#-4]!
38262306a36Sopenharmony_ci	eor r10,r5,r10,ror#2					@ F_xx_xx
38362306a36Sopenharmony_ci						@ F_xx_xx
38462306a36Sopenharmony_ci	add	r3,r3,r9			@ E+=X[i]
38562306a36Sopenharmony_ci	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
38662306a36Sopenharmony_ci ARM(	teq	r14,sp		)	@ preserve carry
38762306a36Sopenharmony_ci THUMB(	mov	r11,sp		)
38862306a36Sopenharmony_ci THUMB(	teq	r14,r11		)	@ preserve carry
38962306a36Sopenharmony_ci	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
39062306a36Sopenharmony_ci	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	ldr	r8,.LK_40_59
39362306a36Sopenharmony_ci	sub	sp,sp,#20*4		@ [+2]
39462306a36Sopenharmony_ci.L_40_59:
39562306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
39662306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
39762306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
39862306a36Sopenharmony_ci	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
39962306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
40062306a36Sopenharmony_ci	eor	r9,r9,r10
40162306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
40262306a36Sopenharmony_ci	eor	r10,r5,r6			@ F_xx_xx
40362306a36Sopenharmony_ci	mov	r9,r9,ror#31
40462306a36Sopenharmony_ci	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
40562306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
40662306a36Sopenharmony_ci	str	r9,[r14,#-4]!
40762306a36Sopenharmony_ci	and r10,r4,r10,ror#2					@ F_xx_xx
40862306a36Sopenharmony_ci	and r11,r5,r6					@ F_xx_xx
40962306a36Sopenharmony_ci	add	r7,r7,r9			@ E+=X[i]
41062306a36Sopenharmony_ci	add	r7,r7,r10			@ E+=F_40_59(B,C,D)
41162306a36Sopenharmony_ci	add	r7,r7,r11,ror#2
41262306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
41362306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
41462306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
41562306a36Sopenharmony_ci	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
41662306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
41762306a36Sopenharmony_ci	eor	r9,r9,r10
41862306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
41962306a36Sopenharmony_ci	eor	r10,r4,r5			@ F_xx_xx
42062306a36Sopenharmony_ci	mov	r9,r9,ror#31
42162306a36Sopenharmony_ci	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
42262306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
42362306a36Sopenharmony_ci	str	r9,[r14,#-4]!
42462306a36Sopenharmony_ci	and r10,r3,r10,ror#2					@ F_xx_xx
42562306a36Sopenharmony_ci	and r11,r4,r5					@ F_xx_xx
42662306a36Sopenharmony_ci	add	r6,r6,r9			@ E+=X[i]
42762306a36Sopenharmony_ci	add	r6,r6,r10			@ E+=F_40_59(B,C,D)
42862306a36Sopenharmony_ci	add	r6,r6,r11,ror#2
42962306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
43062306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
43162306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
43262306a36Sopenharmony_ci	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
43362306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
43462306a36Sopenharmony_ci	eor	r9,r9,r10
43562306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
43662306a36Sopenharmony_ci	eor	r10,r3,r4			@ F_xx_xx
43762306a36Sopenharmony_ci	mov	r9,r9,ror#31
43862306a36Sopenharmony_ci	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
43962306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
44062306a36Sopenharmony_ci	str	r9,[r14,#-4]!
44162306a36Sopenharmony_ci	and r10,r7,r10,ror#2					@ F_xx_xx
44262306a36Sopenharmony_ci	and r11,r3,r4					@ F_xx_xx
44362306a36Sopenharmony_ci	add	r5,r5,r9			@ E+=X[i]
44462306a36Sopenharmony_ci	add	r5,r5,r10			@ E+=F_40_59(B,C,D)
44562306a36Sopenharmony_ci	add	r5,r5,r11,ror#2
44662306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
44762306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
44862306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
44962306a36Sopenharmony_ci	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
45062306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
45162306a36Sopenharmony_ci	eor	r9,r9,r10
45262306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
45362306a36Sopenharmony_ci	eor	r10,r7,r3			@ F_xx_xx
45462306a36Sopenharmony_ci	mov	r9,r9,ror#31
45562306a36Sopenharmony_ci	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
45662306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
45762306a36Sopenharmony_ci	str	r9,[r14,#-4]!
45862306a36Sopenharmony_ci	and r10,r6,r10,ror#2					@ F_xx_xx
45962306a36Sopenharmony_ci	and r11,r7,r3					@ F_xx_xx
46062306a36Sopenharmony_ci	add	r4,r4,r9			@ E+=X[i]
46162306a36Sopenharmony_ci	add	r4,r4,r10			@ E+=F_40_59(B,C,D)
46262306a36Sopenharmony_ci	add	r4,r4,r11,ror#2
46362306a36Sopenharmony_ci	ldr	r9,[r14,#15*4]
46462306a36Sopenharmony_ci	ldr	r10,[r14,#13*4]
46562306a36Sopenharmony_ci	ldr	r11,[r14,#7*4]
46662306a36Sopenharmony_ci	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
46762306a36Sopenharmony_ci	ldr	r12,[r14,#2*4]
46862306a36Sopenharmony_ci	eor	r9,r9,r10
46962306a36Sopenharmony_ci	eor	r11,r11,r12			@ 1 cycle stall
47062306a36Sopenharmony_ci	eor	r10,r6,r7			@ F_xx_xx
47162306a36Sopenharmony_ci	mov	r9,r9,ror#31
47262306a36Sopenharmony_ci	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
47362306a36Sopenharmony_ci	eor	r9,r9,r11,ror#31
47462306a36Sopenharmony_ci	str	r9,[r14,#-4]!
47562306a36Sopenharmony_ci	and r10,r5,r10,ror#2					@ F_xx_xx
47662306a36Sopenharmony_ci	and r11,r6,r7					@ F_xx_xx
47762306a36Sopenharmony_ci	add	r3,r3,r9			@ E+=X[i]
47862306a36Sopenharmony_ci	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
47962306a36Sopenharmony_ci	add	r3,r3,r11,ror#2
48062306a36Sopenharmony_ci	cmp	r14,sp
48162306a36Sopenharmony_ci	bne	.L_40_59		@ [+((12+5)*5+2)*4]
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	ldr	r8,.LK_60_79
48462306a36Sopenharmony_ci	sub	sp,sp,#20*4
48562306a36Sopenharmony_ci	cmp	sp,#0			@ set carry to denote 60_79
48662306a36Sopenharmony_ci	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
48762306a36Sopenharmony_ci.L_done:
48862306a36Sopenharmony_ci	add	sp,sp,#80*4		@ "deallocate" stack frame
48962306a36Sopenharmony_ci	ldmia	r0,{r8,r9,r10,r11,r12}
49062306a36Sopenharmony_ci	add	r3,r8,r3
49162306a36Sopenharmony_ci	add	r4,r9,r4
49262306a36Sopenharmony_ci	add	r5,r10,r5,ror#2
49362306a36Sopenharmony_ci	add	r6,r11,r6,ror#2
49462306a36Sopenharmony_ci	add	r7,r12,r7,ror#2
49562306a36Sopenharmony_ci	stmia	r0,{r3,r4,r5,r6,r7}
49662306a36Sopenharmony_ci	teq	r1,r2
49762306a36Sopenharmony_ci	bne	.Lloop			@ [+18], total 1307
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci	ldmia	sp!,{r4-r12,pc}
50062306a36Sopenharmony_ci.align	2
50162306a36Sopenharmony_ci.LK_00_19:	.word	0x5a827999
50262306a36Sopenharmony_ci.LK_20_39:	.word	0x6ed9eba1
50362306a36Sopenharmony_ci.LK_40_59:	.word	0x8f1bbcdc
50462306a36Sopenharmony_ci.LK_60_79:	.word	0xca62c1d6
50562306a36Sopenharmony_ciENDPROC(sha1_block_data_order)
50662306a36Sopenharmony_ci.asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
50762306a36Sopenharmony_ci.align	2
508