1 @ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
2 @
3 @ Licensed under the Apache License 2.0 (the "License").  You may not use
4 @ this file except in compliance with the License.  You can obtain a copy
5 @ in the file LICENSE in the source distribution or at
6 @ https://www.openssl.org/source/license.html
7 
8 
9 @ ====================================================================
10 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11 @ project. The module is, however, dual licensed under OpenSSL and
12 @ CRYPTOGAMS licenses depending on where you obtain it. For further
13 @ details see http://www.openssl.org/~appro/cryptogams/.
14 @
15 @ Permission to use under GPL terms is granted.
16 @ ====================================================================
17 
18 @ SHA256 block procedure for ARMv4. May 2007.
19 
20 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22 @ byte [on single-issue Xscale PXA250 core].
23 
24 @ July 2010.
25 @
26 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27 @ Cortex A8 core and ~20 cycles per processed byte.
28 
29 @ February 2011.
30 @
31 @ Profiler-assisted and platform-specific optimization resulted in 16%
32 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33 
34 @ September 2013.
35 @
36 @ Add NEON implementation. On Cortex A8 it was measured to process one
37 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39 @ code (meaning that latter performs sub-optimally, nothing was done
40 @ about it).
41 
42 @ May 2014.
43 @
44 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45 
46 @ $output is the last argument if it looks like a file (it has an extension)
47 @ $flavour is the first argument if it doesn't look like a file
48 #ifndef __KERNEL__
49 # include "arm_arch.h"
50 #else
51 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
52 # define __ARM_MAX_ARCH__ 7
53 #endif
54 
55 #if defined(__thumb2__)
56 .syntax	unified
57 .thumb
58 #else
59 .code	32
60 #endif
61 
62 .text
63 
64 .type	K256,%object
65 .align	5
66 K256:
67 .word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
68 .word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
69 .word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
70 .word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
71 .word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
72 .word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
73 .word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
74 .word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
75 .word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
76 .word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
77 .word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
78 .word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
79 .word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
80 .word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
81 .word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
82 .word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
83 .size	K256,.-K256
84 .word	0				@ terminator
85 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
86 .LOPENSSL_armcap:
87 # ifdef	_WIN32
88 .word	OPENSSL_armcap_P
89 # else
90 .word	OPENSSL_armcap_P-.Lsha256_block_data_order
91 # endif
92 #endif
93 .align	5
94 
95 .globl	sha256_block_data_order
96 .type	sha256_block_data_order,%function
97 sha256_block_data_order:
98 .Lsha256_block_data_order:
99 #if __ARM_ARCH__<7 && !defined(__thumb2__)
100 	sub	r3,pc,#8		@ sha256_block_data_order
101 #else
102 	adr	r3,.Lsha256_block_data_order
103 #endif
104 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
105 	ldr	r12,.LOPENSSL_armcap
106 # if !defined(_WIN32)
107 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
108 # endif
109 # if defined(__APPLE__) || defined(_WIN32)
110 	ldr	r12,[r12]
111 # endif
112 	tst	r12,#ARMV8_SHA256
113 	bne	.LARMv8
114 	tst	r12,#ARMV7_NEON
115 	bne	.LNEON
116 #endif
117 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
118 	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
119 	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
120 	sub	r14,r3,#256+32	@ K256
121 	sub	sp,sp,#16*4		@ alloca(X[16])
122 .Loop:
123 # if __ARM_ARCH__>=7
124 	ldr	r2,[r1],#4
125 # else
126 	ldrb	r2,[r1,#3]
127 # endif
128 	eor	r3,r5,r6		@ magic
129 	eor	r12,r12,r12
130 #if __ARM_ARCH__>=7
131 	@ ldr	r2,[r1],#4			@ 0
132 # if 0==15
133 	str	r1,[sp,#17*4]			@ make room for r1
134 # endif
135 	eor	r0,r8,r8,ror#5
136 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
137 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
138 # ifndef __ARMEB__
139 	rev	r2,r2
140 # endif
141 #else
142 	@ ldrb	r2,[r1,#3]			@ 0
143 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
144 	ldrb	r12,[r1,#2]
145 	ldrb	r0,[r1,#1]
146 	orr	r2,r2,r12,lsl#8
147 	ldrb	r12,[r1],#4
148 	orr	r2,r2,r0,lsl#16
149 # if 0==15
150 	str	r1,[sp,#17*4]			@ make room for r1
151 # endif
152 	eor	r0,r8,r8,ror#5
153 	orr	r2,r2,r12,lsl#24
154 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
155 #endif
156 	ldr	r12,[r14],#4			@ *K256++
157 	add	r11,r11,r2			@ h+=X[i]
158 	str	r2,[sp,#0*4]
159 	eor	r2,r9,r10
160 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
161 	and	r2,r2,r8
162 	add	r11,r11,r12			@ h+=K256[i]
163 	eor	r2,r2,r10			@ Ch(e,f,g)
164 	eor	r0,r4,r4,ror#11
165 	add	r11,r11,r2			@ h+=Ch(e,f,g)
166 #if 0==31
167 	and	r12,r12,#0xff
168 	cmp	r12,#0xf2			@ done?
169 #endif
170 #if 0<15
171 # if __ARM_ARCH__>=7
172 	ldr	r2,[r1],#4			@ prefetch
173 # else
174 	ldrb	r2,[r1,#3]
175 # endif
176 	eor	r12,r4,r5			@ a^b, b^c in next round
177 #else
178 	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
179 	eor	r12,r4,r5			@ a^b, b^c in next round
180 	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
181 #endif
182 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
183 	and	r3,r3,r12			@ (b^c)&=(a^b)
184 	add	r7,r7,r11			@ d+=h
185 	eor	r3,r3,r5			@ Maj(a,b,c)
186 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
187 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
188 #if __ARM_ARCH__>=7
189 	@ ldr	r2,[r1],#4			@ 1
190 # if 1==15
191 	str	r1,[sp,#17*4]			@ make room for r1
192 # endif
193 	eor	r0,r7,r7,ror#5
194 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
195 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
196 # ifndef __ARMEB__
197 	rev	r2,r2
198 # endif
199 #else
200 	@ ldrb	r2,[r1,#3]			@ 1
201 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
202 	ldrb	r3,[r1,#2]
203 	ldrb	r0,[r1,#1]
204 	orr	r2,r2,r3,lsl#8
205 	ldrb	r3,[r1],#4
206 	orr	r2,r2,r0,lsl#16
207 # if 1==15
208 	str	r1,[sp,#17*4]			@ make room for r1
209 # endif
210 	eor	r0,r7,r7,ror#5
211 	orr	r2,r2,r3,lsl#24
212 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
213 #endif
214 	ldr	r3,[r14],#4			@ *K256++
215 	add	r10,r10,r2			@ h+=X[i]
216 	str	r2,[sp,#1*4]
217 	eor	r2,r8,r9
218 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
219 	and	r2,r2,r7
220 	add	r10,r10,r3			@ h+=K256[i]
221 	eor	r2,r2,r9			@ Ch(e,f,g)
222 	eor	r0,r11,r11,ror#11
223 	add	r10,r10,r2			@ h+=Ch(e,f,g)
224 #if 1==31
225 	and	r3,r3,#0xff
226 	cmp	r3,#0xf2			@ done?
227 #endif
228 #if 1<15
229 # if __ARM_ARCH__>=7
230 	ldr	r2,[r1],#4			@ prefetch
231 # else
232 	ldrb	r2,[r1,#3]
233 # endif
234 	eor	r3,r11,r4			@ a^b, b^c in next round
235 #else
236 	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
237 	eor	r3,r11,r4			@ a^b, b^c in next round
238 	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
239 #endif
240 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
241 	and	r12,r12,r3			@ (b^c)&=(a^b)
242 	add	r6,r6,r10			@ d+=h
243 	eor	r12,r12,r4			@ Maj(a,b,c)
244 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
245 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
246 #if __ARM_ARCH__>=7
247 	@ ldr	r2,[r1],#4			@ 2
248 # if 2==15
249 	str	r1,[sp,#17*4]			@ make room for r1
250 # endif
251 	eor	r0,r6,r6,ror#5
252 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
253 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
254 # ifndef __ARMEB__
255 	rev	r2,r2
256 # endif
257 #else
258 	@ ldrb	r2,[r1,#3]			@ 2
259 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
260 	ldrb	r12,[r1,#2]
261 	ldrb	r0,[r1,#1]
262 	orr	r2,r2,r12,lsl#8
263 	ldrb	r12,[r1],#4
264 	orr	r2,r2,r0,lsl#16
265 # if 2==15
266 	str	r1,[sp,#17*4]			@ make room for r1
267 # endif
268 	eor	r0,r6,r6,ror#5
269 	orr	r2,r2,r12,lsl#24
270 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
271 #endif
272 	ldr	r12,[r14],#4			@ *K256++
273 	add	r9,r9,r2			@ h+=X[i]
274 	str	r2,[sp,#2*4]
275 	eor	r2,r7,r8
276 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
277 	and	r2,r2,r6
278 	add	r9,r9,r12			@ h+=K256[i]
279 	eor	r2,r2,r8			@ Ch(e,f,g)
280 	eor	r0,r10,r10,ror#11
281 	add	r9,r9,r2			@ h+=Ch(e,f,g)
282 #if 2==31
283 	and	r12,r12,#0xff
284 	cmp	r12,#0xf2			@ done?
285 #endif
286 #if 2<15
287 # if __ARM_ARCH__>=7
288 	ldr	r2,[r1],#4			@ prefetch
289 # else
290 	ldrb	r2,[r1,#3]
291 # endif
292 	eor	r12,r10,r11			@ a^b, b^c in next round
293 #else
294 	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
295 	eor	r12,r10,r11			@ a^b, b^c in next round
296 	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
297 #endif
298 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
299 	and	r3,r3,r12			@ (b^c)&=(a^b)
300 	add	r5,r5,r9			@ d+=h
301 	eor	r3,r3,r11			@ Maj(a,b,c)
302 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
303 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
304 #if __ARM_ARCH__>=7
305 	@ ldr	r2,[r1],#4			@ 3
306 # if 3==15
307 	str	r1,[sp,#17*4]			@ make room for r1
308 # endif
309 	eor	r0,r5,r5,ror#5
310 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
311 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
312 # ifndef __ARMEB__
313 	rev	r2,r2
314 # endif
315 #else
316 	@ ldrb	r2,[r1,#3]			@ 3
317 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
318 	ldrb	r3,[r1,#2]
319 	ldrb	r0,[r1,#1]
320 	orr	r2,r2,r3,lsl#8
321 	ldrb	r3,[r1],#4
322 	orr	r2,r2,r0,lsl#16
323 # if 3==15
324 	str	r1,[sp,#17*4]			@ make room for r1
325 # endif
326 	eor	r0,r5,r5,ror#5
327 	orr	r2,r2,r3,lsl#24
328 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
329 #endif
330 	ldr	r3,[r14],#4			@ *K256++
331 	add	r8,r8,r2			@ h+=X[i]
332 	str	r2,[sp,#3*4]
333 	eor	r2,r6,r7
334 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
335 	and	r2,r2,r5
336 	add	r8,r8,r3			@ h+=K256[i]
337 	eor	r2,r2,r7			@ Ch(e,f,g)
338 	eor	r0,r9,r9,ror#11
339 	add	r8,r8,r2			@ h+=Ch(e,f,g)
340 #if 3==31
341 	and	r3,r3,#0xff
342 	cmp	r3,#0xf2			@ done?
343 #endif
344 #if 3<15
345 # if __ARM_ARCH__>=7
346 	ldr	r2,[r1],#4			@ prefetch
347 # else
348 	ldrb	r2,[r1,#3]
349 # endif
350 	eor	r3,r9,r10			@ a^b, b^c in next round
351 #else
352 	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
353 	eor	r3,r9,r10			@ a^b, b^c in next round
354 	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
355 #endif
356 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
357 	and	r12,r12,r3			@ (b^c)&=(a^b)
358 	add	r4,r4,r8			@ d+=h
359 	eor	r12,r12,r10			@ Maj(a,b,c)
360 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
361 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
362 #if __ARM_ARCH__>=7
363 	@ ldr	r2,[r1],#4			@ 4
364 # if 4==15
365 	str	r1,[sp,#17*4]			@ make room for r1
366 # endif
367 	eor	r0,r4,r4,ror#5
368 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
369 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
370 # ifndef __ARMEB__
371 	rev	r2,r2
372 # endif
373 #else
374 	@ ldrb	r2,[r1,#3]			@ 4
375 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
376 	ldrb	r12,[r1,#2]
377 	ldrb	r0,[r1,#1]
378 	orr	r2,r2,r12,lsl#8
379 	ldrb	r12,[r1],#4
380 	orr	r2,r2,r0,lsl#16
381 # if 4==15
382 	str	r1,[sp,#17*4]			@ make room for r1
383 # endif
384 	eor	r0,r4,r4,ror#5
385 	orr	r2,r2,r12,lsl#24
386 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
387 #endif
388 	ldr	r12,[r14],#4			@ *K256++
389 	add	r7,r7,r2			@ h+=X[i]
390 	str	r2,[sp,#4*4]
391 	eor	r2,r5,r6
392 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
393 	and	r2,r2,r4
394 	add	r7,r7,r12			@ h+=K256[i]
395 	eor	r2,r2,r6			@ Ch(e,f,g)
396 	eor	r0,r8,r8,ror#11
397 	add	r7,r7,r2			@ h+=Ch(e,f,g)
398 #if 4==31
399 	and	r12,r12,#0xff
400 	cmp	r12,#0xf2			@ done?
401 #endif
402 #if 4<15
403 # if __ARM_ARCH__>=7
404 	ldr	r2,[r1],#4			@ prefetch
405 # else
406 	ldrb	r2,[r1,#3]
407 # endif
408 	eor	r12,r8,r9			@ a^b, b^c in next round
409 #else
410 	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
411 	eor	r12,r8,r9			@ a^b, b^c in next round
412 	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
413 #endif
414 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
415 	and	r3,r3,r12			@ (b^c)&=(a^b)
416 	add	r11,r11,r7			@ d+=h
417 	eor	r3,r3,r9			@ Maj(a,b,c)
418 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
419 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
420 #if __ARM_ARCH__>=7
421 	@ ldr	r2,[r1],#4			@ 5
422 # if 5==15
423 	str	r1,[sp,#17*4]			@ make room for r1
424 # endif
425 	eor	r0,r11,r11,ror#5
426 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
427 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
428 # ifndef __ARMEB__
429 	rev	r2,r2
430 # endif
431 #else
432 	@ ldrb	r2,[r1,#3]			@ 5
433 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
434 	ldrb	r3,[r1,#2]
435 	ldrb	r0,[r1,#1]
436 	orr	r2,r2,r3,lsl#8
437 	ldrb	r3,[r1],#4
438 	orr	r2,r2,r0,lsl#16
439 # if 5==15
440 	str	r1,[sp,#17*4]			@ make room for r1
441 # endif
442 	eor	r0,r11,r11,ror#5
443 	orr	r2,r2,r3,lsl#24
444 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
445 #endif
446 	ldr	r3,[r14],#4			@ *K256++
447 	add	r6,r6,r2			@ h+=X[i]
448 	str	r2,[sp,#5*4]
449 	eor	r2,r4,r5
450 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
451 	and	r2,r2,r11
452 	add	r6,r6,r3			@ h+=K256[i]
453 	eor	r2,r2,r5			@ Ch(e,f,g)
454 	eor	r0,r7,r7,ror#11
455 	add	r6,r6,r2			@ h+=Ch(e,f,g)
456 #if 5==31
457 	and	r3,r3,#0xff
458 	cmp	r3,#0xf2			@ done?
459 #endif
460 #if 5<15
461 # if __ARM_ARCH__>=7
462 	ldr	r2,[r1],#4			@ prefetch
463 # else
464 	ldrb	r2,[r1,#3]
465 # endif
466 	eor	r3,r7,r8			@ a^b, b^c in next round
467 #else
468 	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
469 	eor	r3,r7,r8			@ a^b, b^c in next round
470 	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
471 #endif
472 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
473 	and	r12,r12,r3			@ (b^c)&=(a^b)
474 	add	r10,r10,r6			@ d+=h
475 	eor	r12,r12,r8			@ Maj(a,b,c)
476 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
477 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
478 #if __ARM_ARCH__>=7
479 	@ ldr	r2,[r1],#4			@ 6
480 # if 6==15
481 	str	r1,[sp,#17*4]			@ make room for r1
482 # endif
483 	eor	r0,r10,r10,ror#5
484 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
485 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
486 # ifndef __ARMEB__
487 	rev	r2,r2
488 # endif
489 #else
490 	@ ldrb	r2,[r1,#3]			@ 6
491 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
492 	ldrb	r12,[r1,#2]
493 	ldrb	r0,[r1,#1]
494 	orr	r2,r2,r12,lsl#8
495 	ldrb	r12,[r1],#4
496 	orr	r2,r2,r0,lsl#16
497 # if 6==15
498 	str	r1,[sp,#17*4]			@ make room for r1
499 # endif
500 	eor	r0,r10,r10,ror#5
501 	orr	r2,r2,r12,lsl#24
502 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
503 #endif
504 	ldr	r12,[r14],#4			@ *K256++
505 	add	r5,r5,r2			@ h+=X[i]
506 	str	r2,[sp,#6*4]
507 	eor	r2,r11,r4
508 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
509 	and	r2,r2,r10
510 	add	r5,r5,r12			@ h+=K256[i]
511 	eor	r2,r2,r4			@ Ch(e,f,g)
512 	eor	r0,r6,r6,ror#11
513 	add	r5,r5,r2			@ h+=Ch(e,f,g)
514 #if 6==31
515 	and	r12,r12,#0xff
516 	cmp	r12,#0xf2			@ done?
517 #endif
518 #if 6<15
519 # if __ARM_ARCH__>=7
520 	ldr	r2,[r1],#4			@ prefetch
521 # else
522 	ldrb	r2,[r1,#3]
523 # endif
524 	eor	r12,r6,r7			@ a^b, b^c in next round
525 #else
526 	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
527 	eor	r12,r6,r7			@ a^b, b^c in next round
528 	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
529 #endif
530 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
531 	and	r3,r3,r12			@ (b^c)&=(a^b)
532 	add	r9,r9,r5			@ d+=h
533 	eor	r3,r3,r7			@ Maj(a,b,c)
534 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
535 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
536 #if __ARM_ARCH__>=7
537 	@ ldr	r2,[r1],#4			@ 7
538 # if 7==15
539 	str	r1,[sp,#17*4]			@ make room for r1
540 # endif
541 	eor	r0,r9,r9,ror#5
542 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
543 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
544 # ifndef __ARMEB__
545 	rev	r2,r2
546 # endif
547 #else
548 	@ ldrb	r2,[r1,#3]			@ 7
549 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
550 	ldrb	r3,[r1,#2]
551 	ldrb	r0,[r1,#1]
552 	orr	r2,r2,r3,lsl#8
553 	ldrb	r3,[r1],#4
554 	orr	r2,r2,r0,lsl#16
555 # if 7==15
556 	str	r1,[sp,#17*4]			@ make room for r1
557 # endif
558 	eor	r0,r9,r9,ror#5
559 	orr	r2,r2,r3,lsl#24
560 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
561 #endif
562 	ldr	r3,[r14],#4			@ *K256++
563 	add	r4,r4,r2			@ h+=X[i]
564 	str	r2,[sp,#7*4]
565 	eor	r2,r10,r11
566 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
567 	and	r2,r2,r9
568 	add	r4,r4,r3			@ h+=K256[i]
569 	eor	r2,r2,r11			@ Ch(e,f,g)
570 	eor	r0,r5,r5,ror#11
571 	add	r4,r4,r2			@ h+=Ch(e,f,g)
572 #if 7==31
573 	and	r3,r3,#0xff
574 	cmp	r3,#0xf2			@ done?
575 #endif
576 #if 7<15
577 # if __ARM_ARCH__>=7
578 	ldr	r2,[r1],#4			@ prefetch
579 # else
580 	ldrb	r2,[r1,#3]
581 # endif
582 	eor	r3,r5,r6			@ a^b, b^c in next round
583 #else
584 	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
585 	eor	r3,r5,r6			@ a^b, b^c in next round
586 	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
587 #endif
588 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
589 	and	r12,r12,r3			@ (b^c)&=(a^b)
590 	add	r8,r8,r4			@ d+=h
591 	eor	r12,r12,r6			@ Maj(a,b,c)
592 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
593 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
594 #if __ARM_ARCH__>=7
595 	@ ldr	r2,[r1],#4			@ 8
596 # if 8==15
597 	str	r1,[sp,#17*4]			@ make room for r1
598 # endif
599 	eor	r0,r8,r8,ror#5
600 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
601 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
602 # ifndef __ARMEB__
603 	rev	r2,r2
604 # endif
605 #else
606 	@ ldrb	r2,[r1,#3]			@ 8
607 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
608 	ldrb	r12,[r1,#2]
609 	ldrb	r0,[r1,#1]
610 	orr	r2,r2,r12,lsl#8
611 	ldrb	r12,[r1],#4
612 	orr	r2,r2,r0,lsl#16
613 # if 8==15
614 	str	r1,[sp,#17*4]			@ make room for r1
615 # endif
616 	eor	r0,r8,r8,ror#5
617 	orr	r2,r2,r12,lsl#24
618 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
619 #endif
620 	ldr	r12,[r14],#4			@ *K256++
621 	add	r11,r11,r2			@ h+=X[i]
622 	str	r2,[sp,#8*4]
623 	eor	r2,r9,r10
624 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
625 	and	r2,r2,r8
626 	add	r11,r11,r12			@ h+=K256[i]
627 	eor	r2,r2,r10			@ Ch(e,f,g)
628 	eor	r0,r4,r4,ror#11
629 	add	r11,r11,r2			@ h+=Ch(e,f,g)
630 #if 8==31
631 	and	r12,r12,#0xff
632 	cmp	r12,#0xf2			@ done?
633 #endif
634 #if 8<15
635 # if __ARM_ARCH__>=7
636 	ldr	r2,[r1],#4			@ prefetch
637 # else
638 	ldrb	r2,[r1,#3]
639 # endif
640 	eor	r12,r4,r5			@ a^b, b^c in next round
641 #else
642 	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
643 	eor	r12,r4,r5			@ a^b, b^c in next round
644 	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
645 #endif
646 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
647 	and	r3,r3,r12			@ (b^c)&=(a^b)
648 	add	r7,r7,r11			@ d+=h
649 	eor	r3,r3,r5			@ Maj(a,b,c)
650 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
651 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
652 #if __ARM_ARCH__>=7
653 	@ ldr	r2,[r1],#4			@ 9
654 # if 9==15
655 	str	r1,[sp,#17*4]			@ make room for r1
656 # endif
657 	eor	r0,r7,r7,ror#5
658 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
659 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
660 # ifndef __ARMEB__
661 	rev	r2,r2
662 # endif
663 #else
664 	@ ldrb	r2,[r1,#3]			@ 9
665 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
666 	ldrb	r3,[r1,#2]
667 	ldrb	r0,[r1,#1]
668 	orr	r2,r2,r3,lsl#8
669 	ldrb	r3,[r1],#4
670 	orr	r2,r2,r0,lsl#16
671 # if 9==15
672 	str	r1,[sp,#17*4]			@ make room for r1
673 # endif
674 	eor	r0,r7,r7,ror#5
675 	orr	r2,r2,r3,lsl#24
676 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
677 #endif
678 	ldr	r3,[r14],#4			@ *K256++
679 	add	r10,r10,r2			@ h+=X[i]
680 	str	r2,[sp,#9*4]
681 	eor	r2,r8,r9
682 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
683 	and	r2,r2,r7
684 	add	r10,r10,r3			@ h+=K256[i]
685 	eor	r2,r2,r9			@ Ch(e,f,g)
686 	eor	r0,r11,r11,ror#11
687 	add	r10,r10,r2			@ h+=Ch(e,f,g)
688 #if 9==31
689 	and	r3,r3,#0xff
690 	cmp	r3,#0xf2			@ done?
691 #endif
692 #if 9<15
693 # if __ARM_ARCH__>=7
694 	ldr	r2,[r1],#4			@ prefetch
695 # else
696 	ldrb	r2,[r1,#3]
697 # endif
698 	eor	r3,r11,r4			@ a^b, b^c in next round
699 #else
700 	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
701 	eor	r3,r11,r4			@ a^b, b^c in next round
702 	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
703 #endif
704 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
705 	and	r12,r12,r3			@ (b^c)&=(a^b)
706 	add	r6,r6,r10			@ d+=h
707 	eor	r12,r12,r4			@ Maj(a,b,c)
708 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
709 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
710 #if __ARM_ARCH__>=7
711 	@ ldr	r2,[r1],#4			@ 10
712 # if 10==15
713 	str	r1,[sp,#17*4]			@ make room for r1
714 # endif
715 	eor	r0,r6,r6,ror#5
716 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
717 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
718 # ifndef __ARMEB__
719 	rev	r2,r2
720 # endif
721 #else
722 	@ ldrb	r2,[r1,#3]			@ 10
723 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
724 	ldrb	r12,[r1,#2]
725 	ldrb	r0,[r1,#1]
726 	orr	r2,r2,r12,lsl#8
727 	ldrb	r12,[r1],#4
728 	orr	r2,r2,r0,lsl#16
729 # if 10==15
730 	str	r1,[sp,#17*4]			@ make room for r1
731 # endif
732 	eor	r0,r6,r6,ror#5
733 	orr	r2,r2,r12,lsl#24
734 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
735 #endif
736 	ldr	r12,[r14],#4			@ *K256++
737 	add	r9,r9,r2			@ h+=X[i]
738 	str	r2,[sp,#10*4]
739 	eor	r2,r7,r8
740 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
741 	and	r2,r2,r6
742 	add	r9,r9,r12			@ h+=K256[i]
743 	eor	r2,r2,r8			@ Ch(e,f,g)
744 	eor	r0,r10,r10,ror#11
745 	add	r9,r9,r2			@ h+=Ch(e,f,g)
746 #if 10==31
747 	and	r12,r12,#0xff
748 	cmp	r12,#0xf2			@ done?
749 #endif
750 #if 10<15
751 # if __ARM_ARCH__>=7
752 	ldr	r2,[r1],#4			@ prefetch
753 # else
754 	ldrb	r2,[r1,#3]
755 # endif
756 	eor	r12,r10,r11			@ a^b, b^c in next round
757 #else
758 	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
759 	eor	r12,r10,r11			@ a^b, b^c in next round
760 	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
761 #endif
762 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
763 	and	r3,r3,r12			@ (b^c)&=(a^b)
764 	add	r5,r5,r9			@ d+=h
765 	eor	r3,r3,r11			@ Maj(a,b,c)
766 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
767 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
768 #if __ARM_ARCH__>=7
769 	@ ldr	r2,[r1],#4			@ 11
770 # if 11==15
771 	str	r1,[sp,#17*4]			@ make room for r1
772 # endif
773 	eor	r0,r5,r5,ror#5
774 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
775 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
776 # ifndef __ARMEB__
777 	rev	r2,r2
778 # endif
779 #else
780 	@ ldrb	r2,[r1,#3]			@ 11
781 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
782 	ldrb	r3,[r1,#2]
783 	ldrb	r0,[r1,#1]
784 	orr	r2,r2,r3,lsl#8
785 	ldrb	r3,[r1],#4
786 	orr	r2,r2,r0,lsl#16
787 # if 11==15
788 	str	r1,[sp,#17*4]			@ make room for r1
789 # endif
790 	eor	r0,r5,r5,ror#5
791 	orr	r2,r2,r3,lsl#24
792 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
793 #endif
794 	ldr	r3,[r14],#4			@ *K256++
795 	add	r8,r8,r2			@ h+=X[i]
796 	str	r2,[sp,#11*4]
797 	eor	r2,r6,r7
798 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
799 	and	r2,r2,r5
800 	add	r8,r8,r3			@ h+=K256[i]
801 	eor	r2,r2,r7			@ Ch(e,f,g)
802 	eor	r0,r9,r9,ror#11
803 	add	r8,r8,r2			@ h+=Ch(e,f,g)
804 #if 11==31
805 	and	r3,r3,#0xff
806 	cmp	r3,#0xf2			@ done?
807 #endif
808 #if 11<15
809 # if __ARM_ARCH__>=7
810 	ldr	r2,[r1],#4			@ prefetch
811 # else
812 	ldrb	r2,[r1,#3]
813 # endif
814 	eor	r3,r9,r10			@ a^b, b^c in next round
815 #else
816 	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
817 	eor	r3,r9,r10			@ a^b, b^c in next round
818 	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
819 #endif
820 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
821 	and	r12,r12,r3			@ (b^c)&=(a^b)
822 	add	r4,r4,r8			@ d+=h
823 	eor	r12,r12,r10			@ Maj(a,b,c)
824 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
825 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
826 #if __ARM_ARCH__>=7
827 	@ ldr	r2,[r1],#4			@ 12
828 # if 12==15
829 	str	r1,[sp,#17*4]			@ make room for r1
830 # endif
831 	eor	r0,r4,r4,ror#5
832 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
833 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
834 # ifndef __ARMEB__
835 	rev	r2,r2
836 # endif
837 #else
838 	@ ldrb	r2,[r1,#3]			@ 12
839 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
840 	ldrb	r12,[r1,#2]
841 	ldrb	r0,[r1,#1]
842 	orr	r2,r2,r12,lsl#8
843 	ldrb	r12,[r1],#4
844 	orr	r2,r2,r0,lsl#16
845 # if 12==15
846 	str	r1,[sp,#17*4]			@ make room for r1
847 # endif
848 	eor	r0,r4,r4,ror#5
849 	orr	r2,r2,r12,lsl#24
850 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
851 #endif
852 	ldr	r12,[r14],#4			@ *K256++
853 	add	r7,r7,r2			@ h+=X[i]
854 	str	r2,[sp,#12*4]
855 	eor	r2,r5,r6
856 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
857 	and	r2,r2,r4
858 	add	r7,r7,r12			@ h+=K256[i]
859 	eor	r2,r2,r6			@ Ch(e,f,g)
860 	eor	r0,r8,r8,ror#11
861 	add	r7,r7,r2			@ h+=Ch(e,f,g)
862 #if 12==31
863 	and	r12,r12,#0xff
864 	cmp	r12,#0xf2			@ done?
865 #endif
866 #if 12<15
867 # if __ARM_ARCH__>=7
868 	ldr	r2,[r1],#4			@ prefetch
869 # else
870 	ldrb	r2,[r1,#3]
871 # endif
872 	eor	r12,r8,r9			@ a^b, b^c in next round
873 #else
874 	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
875 	eor	r12,r8,r9			@ a^b, b^c in next round
876 	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
877 #endif
878 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
879 	and	r3,r3,r12			@ (b^c)&=(a^b)
880 	add	r11,r11,r7			@ d+=h
881 	eor	r3,r3,r9			@ Maj(a,b,c)
882 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
883 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
884 #if __ARM_ARCH__>=7
885 	@ ldr	r2,[r1],#4			@ 13
886 # if 13==15
887 	str	r1,[sp,#17*4]			@ make room for r1
888 # endif
889 	eor	r0,r11,r11,ror#5
890 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
891 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
892 # ifndef __ARMEB__
893 	rev	r2,r2
894 # endif
895 #else
896 	@ ldrb	r2,[r1,#3]			@ 13
897 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
898 	ldrb	r3,[r1,#2]
899 	ldrb	r0,[r1,#1]
900 	orr	r2,r2,r3,lsl#8
901 	ldrb	r3,[r1],#4
902 	orr	r2,r2,r0,lsl#16
903 # if 13==15
904 	str	r1,[sp,#17*4]			@ make room for r1
905 # endif
906 	eor	r0,r11,r11,ror#5
907 	orr	r2,r2,r3,lsl#24
908 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
909 #endif
910 	ldr	r3,[r14],#4			@ *K256++
911 	add	r6,r6,r2			@ h+=X[i]
912 	str	r2,[sp,#13*4]
913 	eor	r2,r4,r5
914 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
915 	and	r2,r2,r11
916 	add	r6,r6,r3			@ h+=K256[i]
917 	eor	r2,r2,r5			@ Ch(e,f,g)
918 	eor	r0,r7,r7,ror#11
919 	add	r6,r6,r2			@ h+=Ch(e,f,g)
920 #if 13==31
921 	and	r3,r3,#0xff
922 	cmp	r3,#0xf2			@ done?
923 #endif
924 #if 13<15
925 # if __ARM_ARCH__>=7
926 	ldr	r2,[r1],#4			@ prefetch
927 # else
928 	ldrb	r2,[r1,#3]
929 # endif
930 	eor	r3,r7,r8			@ a^b, b^c in next round
931 #else
932 	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
933 	eor	r3,r7,r8			@ a^b, b^c in next round
934 	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
935 #endif
936 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
937 	and	r12,r12,r3			@ (b^c)&=(a^b)
938 	add	r10,r10,r6			@ d+=h
939 	eor	r12,r12,r8			@ Maj(a,b,c)
940 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
941 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
942 #if __ARM_ARCH__>=7
943 	@ ldr	r2,[r1],#4			@ 14
944 # if 14==15
945 	str	r1,[sp,#17*4]			@ make room for r1
946 # endif
947 	eor	r0,r10,r10,ror#5
948 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
949 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
950 # ifndef __ARMEB__
951 	rev	r2,r2
952 # endif
953 #else
954 	@ ldrb	r2,[r1,#3]			@ 14
955 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
956 	ldrb	r12,[r1,#2]
957 	ldrb	r0,[r1,#1]
958 	orr	r2,r2,r12,lsl#8
959 	ldrb	r12,[r1],#4
960 	orr	r2,r2,r0,lsl#16
961 # if 14==15
962 	str	r1,[sp,#17*4]			@ make room for r1
963 # endif
964 	eor	r0,r10,r10,ror#5
965 	orr	r2,r2,r12,lsl#24
966 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
967 #endif
968 	ldr	r12,[r14],#4			@ *K256++
969 	add	r5,r5,r2			@ h+=X[i]
970 	str	r2,[sp,#14*4]
971 	eor	r2,r11,r4
972 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
973 	and	r2,r2,r10
974 	add	r5,r5,r12			@ h+=K256[i]
975 	eor	r2,r2,r4			@ Ch(e,f,g)
976 	eor	r0,r6,r6,ror#11
977 	add	r5,r5,r2			@ h+=Ch(e,f,g)
978 #if 14==31
979 	and	r12,r12,#0xff
980 	cmp	r12,#0xf2			@ done?
981 #endif
982 #if 14<15
983 # if __ARM_ARCH__>=7
984 	ldr	r2,[r1],#4			@ prefetch
985 # else
986 	ldrb	r2,[r1,#3]
987 # endif
988 	eor	r12,r6,r7			@ a^b, b^c in next round
989 #else
990 	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
991 	eor	r12,r6,r7			@ a^b, b^c in next round
992 	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
993 #endif
994 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
995 	and	r3,r3,r12			@ (b^c)&=(a^b)
996 	add	r9,r9,r5			@ d+=h
997 	eor	r3,r3,r7			@ Maj(a,b,c)
998 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
999 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1000 #if __ARM_ARCH__>=7
1001 	@ ldr	r2,[r1],#4			@ 15
1002 # if 15==15
1003 	str	r1,[sp,#17*4]			@ make room for r1
1004 # endif
1005 	eor	r0,r9,r9,ror#5
1006 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1007 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1008 # ifndef __ARMEB__
1009 	rev	r2,r2
1010 # endif
1011 #else
1012 	@ ldrb	r2,[r1,#3]			@ 15
1013 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1014 	ldrb	r3,[r1,#2]
1015 	ldrb	r0,[r1,#1]
1016 	orr	r2,r2,r3,lsl#8
1017 	ldrb	r3,[r1],#4
1018 	orr	r2,r2,r0,lsl#16
1019 # if 15==15
1020 	str	r1,[sp,#17*4]			@ make room for r1
1021 # endif
1022 	eor	r0,r9,r9,ror#5
1023 	orr	r2,r2,r3,lsl#24
1024 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1025 #endif
1026 	ldr	r3,[r14],#4			@ *K256++
1027 	add	r4,r4,r2			@ h+=X[i]
1028 	str	r2,[sp,#15*4]
1029 	eor	r2,r10,r11
1030 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1031 	and	r2,r2,r9
1032 	add	r4,r4,r3			@ h+=K256[i]
1033 	eor	r2,r2,r11			@ Ch(e,f,g)
1034 	eor	r0,r5,r5,ror#11
1035 	add	r4,r4,r2			@ h+=Ch(e,f,g)
1036 #if 15==31
1037 	and	r3,r3,#0xff
1038 	cmp	r3,#0xf2			@ done?
1039 #endif
1040 #if 15<15
1041 # if __ARM_ARCH__>=7
1042 	ldr	r2,[r1],#4			@ prefetch
1043 # else
1044 	ldrb	r2,[r1,#3]
1045 # endif
1046 	eor	r3,r5,r6			@ a^b, b^c in next round
1047 #else
1048 	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1049 	eor	r3,r5,r6			@ a^b, b^c in next round
1050 	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1051 #endif
1052 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1053 	and	r12,r12,r3			@ (b^c)&=(a^b)
1054 	add	r8,r8,r4			@ d+=h
1055 	eor	r12,r12,r6			@ Maj(a,b,c)
1056 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1057 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1058 .Lrounds_16_xx:
1059 	@ ldr	r2,[sp,#1*4]		@ 16
1060 	@ ldr	r1,[sp,#14*4]
1061 	mov	r0,r2,ror#7
1062 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1063 	mov	r12,r1,ror#17
1064 	eor	r0,r0,r2,ror#18
1065 	eor	r12,r12,r1,ror#19
1066 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1067 	ldr	r2,[sp,#0*4]
1068 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1069 	ldr	r1,[sp,#9*4]
1070 
1071 	add	r12,r12,r0
1072 	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1073 	add	r2,r2,r12
1074 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1075 	add	r2,r2,r1			@ X[i]
1076 	ldr	r12,[r14],#4			@ *K256++
1077 	add	r11,r11,r2			@ h+=X[i]
1078 	str	r2,[sp,#0*4]
1079 	eor	r2,r9,r10
1080 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1081 	and	r2,r2,r8
1082 	add	r11,r11,r12			@ h+=K256[i]
1083 	eor	r2,r2,r10			@ Ch(e,f,g)
1084 	eor	r0,r4,r4,ror#11
1085 	add	r11,r11,r2			@ h+=Ch(e,f,g)
1086 #if 16==31
1087 	and	r12,r12,#0xff
1088 	cmp	r12,#0xf2			@ done?
1089 #endif
1090 #if 16<15
1091 # if __ARM_ARCH__>=7
1092 	ldr	r2,[r1],#4			@ prefetch
1093 # else
1094 	ldrb	r2,[r1,#3]
1095 # endif
1096 	eor	r12,r4,r5			@ a^b, b^c in next round
1097 #else
1098 	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1099 	eor	r12,r4,r5			@ a^b, b^c in next round
1100 	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1101 #endif
1102 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1103 	and	r3,r3,r12			@ (b^c)&=(a^b)
1104 	add	r7,r7,r11			@ d+=h
1105 	eor	r3,r3,r5			@ Maj(a,b,c)
1106 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1107 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1108 	@ ldr	r2,[sp,#2*4]		@ 17
1109 	@ ldr	r1,[sp,#15*4]
1110 	mov	r0,r2,ror#7
1111 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1112 	mov	r3,r1,ror#17
1113 	eor	r0,r0,r2,ror#18
1114 	eor	r3,r3,r1,ror#19
1115 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1116 	ldr	r2,[sp,#1*4]
1117 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1118 	ldr	r1,[sp,#10*4]
1119 
1120 	add	r3,r3,r0
1121 	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1122 	add	r2,r2,r3
1123 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1124 	add	r2,r2,r1			@ X[i]
1125 	ldr	r3,[r14],#4			@ *K256++
1126 	add	r10,r10,r2			@ h+=X[i]
1127 	str	r2,[sp,#1*4]
1128 	eor	r2,r8,r9
1129 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1130 	and	r2,r2,r7
1131 	add	r10,r10,r3			@ h+=K256[i]
1132 	eor	r2,r2,r9			@ Ch(e,f,g)
1133 	eor	r0,r11,r11,ror#11
1134 	add	r10,r10,r2			@ h+=Ch(e,f,g)
1135 #if 17==31
1136 	and	r3,r3,#0xff
1137 	cmp	r3,#0xf2			@ done?
1138 #endif
1139 #if 17<15
1140 # if __ARM_ARCH__>=7
1141 	ldr	r2,[r1],#4			@ prefetch
1142 # else
1143 	ldrb	r2,[r1,#3]
1144 # endif
1145 	eor	r3,r11,r4			@ a^b, b^c in next round
1146 #else
1147 	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1148 	eor	r3,r11,r4			@ a^b, b^c in next round
1149 	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1150 #endif
1151 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1152 	and	r12,r12,r3			@ (b^c)&=(a^b)
1153 	add	r6,r6,r10			@ d+=h
1154 	eor	r12,r12,r4			@ Maj(a,b,c)
1155 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1156 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1157 	@ ldr	r2,[sp,#3*4]		@ 18
1158 	@ ldr	r1,[sp,#0*4]
1159 	mov	r0,r2,ror#7
1160 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1161 	mov	r12,r1,ror#17
1162 	eor	r0,r0,r2,ror#18
1163 	eor	r12,r12,r1,ror#19
1164 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1165 	ldr	r2,[sp,#2*4]
1166 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1167 	ldr	r1,[sp,#11*4]
1168 
1169 	add	r12,r12,r0
1170 	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1171 	add	r2,r2,r12
1172 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1173 	add	r2,r2,r1			@ X[i]
1174 	ldr	r12,[r14],#4			@ *K256++
1175 	add	r9,r9,r2			@ h+=X[i]
1176 	str	r2,[sp,#2*4]
1177 	eor	r2,r7,r8
1178 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1179 	and	r2,r2,r6
1180 	add	r9,r9,r12			@ h+=K256[i]
1181 	eor	r2,r2,r8			@ Ch(e,f,g)
1182 	eor	r0,r10,r10,ror#11
1183 	add	r9,r9,r2			@ h+=Ch(e,f,g)
1184 #if 18==31
1185 	and	r12,r12,#0xff
1186 	cmp	r12,#0xf2			@ done?
1187 #endif
1188 #if 18<15
1189 # if __ARM_ARCH__>=7
1190 	ldr	r2,[r1],#4			@ prefetch
1191 # else
1192 	ldrb	r2,[r1,#3]
1193 # endif
1194 	eor	r12,r10,r11			@ a^b, b^c in next round
1195 #else
1196 	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1197 	eor	r12,r10,r11			@ a^b, b^c in next round
1198 	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1199 #endif
1200 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1201 	and	r3,r3,r12			@ (b^c)&=(a^b)
1202 	add	r5,r5,r9			@ d+=h
1203 	eor	r3,r3,r11			@ Maj(a,b,c)
1204 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1205 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1206 	@ ldr	r2,[sp,#4*4]		@ 19
1207 	@ ldr	r1,[sp,#1*4]
1208 	mov	r0,r2,ror#7
1209 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1210 	mov	r3,r1,ror#17
1211 	eor	r0,r0,r2,ror#18
1212 	eor	r3,r3,r1,ror#19
1213 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1214 	ldr	r2,[sp,#3*4]
1215 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1216 	ldr	r1,[sp,#12*4]
1217 
1218 	add	r3,r3,r0
1219 	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1220 	add	r2,r2,r3
1221 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1222 	add	r2,r2,r1			@ X[i]
1223 	ldr	r3,[r14],#4			@ *K256++
1224 	add	r8,r8,r2			@ h+=X[i]
1225 	str	r2,[sp,#3*4]
1226 	eor	r2,r6,r7
1227 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1228 	and	r2,r2,r5
1229 	add	r8,r8,r3			@ h+=K256[i]
1230 	eor	r2,r2,r7			@ Ch(e,f,g)
1231 	eor	r0,r9,r9,ror#11
1232 	add	r8,r8,r2			@ h+=Ch(e,f,g)
1233 #if 19==31
1234 	and	r3,r3,#0xff
1235 	cmp	r3,#0xf2			@ done?
1236 #endif
1237 #if 19<15
1238 # if __ARM_ARCH__>=7
1239 	ldr	r2,[r1],#4			@ prefetch
1240 # else
1241 	ldrb	r2,[r1,#3]
1242 # endif
1243 	eor	r3,r9,r10			@ a^b, b^c in next round
1244 #else
1245 	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1246 	eor	r3,r9,r10			@ a^b, b^c in next round
1247 	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1248 #endif
1249 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1250 	and	r12,r12,r3			@ (b^c)&=(a^b)
1251 	add	r4,r4,r8			@ d+=h
1252 	eor	r12,r12,r10			@ Maj(a,b,c)
1253 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1254 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1255 	@ ldr	r2,[sp,#5*4]		@ 20
1256 	@ ldr	r1,[sp,#2*4]
1257 	mov	r0,r2,ror#7
1258 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1259 	mov	r12,r1,ror#17
1260 	eor	r0,r0,r2,ror#18
1261 	eor	r12,r12,r1,ror#19
1262 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1263 	ldr	r2,[sp,#4*4]
1264 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1265 	ldr	r1,[sp,#13*4]
1266 
1267 	add	r12,r12,r0
1268 	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1269 	add	r2,r2,r12
1270 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1271 	add	r2,r2,r1			@ X[i]
1272 	ldr	r12,[r14],#4			@ *K256++
1273 	add	r7,r7,r2			@ h+=X[i]
1274 	str	r2,[sp,#4*4]
1275 	eor	r2,r5,r6
1276 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1277 	and	r2,r2,r4
1278 	add	r7,r7,r12			@ h+=K256[i]
1279 	eor	r2,r2,r6			@ Ch(e,f,g)
1280 	eor	r0,r8,r8,ror#11
1281 	add	r7,r7,r2			@ h+=Ch(e,f,g)
1282 #if 20==31
1283 	and	r12,r12,#0xff
1284 	cmp	r12,#0xf2			@ done?
1285 #endif
1286 #if 20<15
1287 # if __ARM_ARCH__>=7
1288 	ldr	r2,[r1],#4			@ prefetch
1289 # else
1290 	ldrb	r2,[r1,#3]
1291 # endif
1292 	eor	r12,r8,r9			@ a^b, b^c in next round
1293 #else
1294 	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1295 	eor	r12,r8,r9			@ a^b, b^c in next round
1296 	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1297 #endif
1298 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1299 	and	r3,r3,r12			@ (b^c)&=(a^b)
1300 	add	r11,r11,r7			@ d+=h
1301 	eor	r3,r3,r9			@ Maj(a,b,c)
1302 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1303 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1304 	@ ldr	r2,[sp,#6*4]		@ 21
1305 	@ ldr	r1,[sp,#3*4]
1306 	mov	r0,r2,ror#7
1307 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1308 	mov	r3,r1,ror#17
1309 	eor	r0,r0,r2,ror#18
1310 	eor	r3,r3,r1,ror#19
1311 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1312 	ldr	r2,[sp,#5*4]
1313 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1314 	ldr	r1,[sp,#14*4]
1315 
1316 	add	r3,r3,r0
1317 	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1318 	add	r2,r2,r3
1319 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1320 	add	r2,r2,r1			@ X[i]
1321 	ldr	r3,[r14],#4			@ *K256++
1322 	add	r6,r6,r2			@ h+=X[i]
1323 	str	r2,[sp,#5*4]
1324 	eor	r2,r4,r5
1325 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1326 	and	r2,r2,r11
1327 	add	r6,r6,r3			@ h+=K256[i]
1328 	eor	r2,r2,r5			@ Ch(e,f,g)
1329 	eor	r0,r7,r7,ror#11
1330 	add	r6,r6,r2			@ h+=Ch(e,f,g)
1331 #if 21==31
1332 	and	r3,r3,#0xff
1333 	cmp	r3,#0xf2			@ done?
1334 #endif
1335 #if 21<15
1336 # if __ARM_ARCH__>=7
1337 	ldr	r2,[r1],#4			@ prefetch
1338 # else
1339 	ldrb	r2,[r1,#3]
1340 # endif
1341 	eor	r3,r7,r8			@ a^b, b^c in next round
1342 #else
1343 	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1344 	eor	r3,r7,r8			@ a^b, b^c in next round
1345 	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1346 #endif
1347 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1348 	and	r12,r12,r3			@ (b^c)&=(a^b)
1349 	add	r10,r10,r6			@ d+=h
1350 	eor	r12,r12,r8			@ Maj(a,b,c)
1351 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1352 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1353 	@ ldr	r2,[sp,#7*4]		@ 22
1354 	@ ldr	r1,[sp,#4*4]
1355 	mov	r0,r2,ror#7
1356 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1357 	mov	r12,r1,ror#17
1358 	eor	r0,r0,r2,ror#18
1359 	eor	r12,r12,r1,ror#19
1360 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1361 	ldr	r2,[sp,#6*4]
1362 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1363 	ldr	r1,[sp,#15*4]
1364 
1365 	add	r12,r12,r0
1366 	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1367 	add	r2,r2,r12
1368 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1369 	add	r2,r2,r1			@ X[i]
1370 	ldr	r12,[r14],#4			@ *K256++
1371 	add	r5,r5,r2			@ h+=X[i]
1372 	str	r2,[sp,#6*4]
1373 	eor	r2,r11,r4
1374 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1375 	and	r2,r2,r10
1376 	add	r5,r5,r12			@ h+=K256[i]
1377 	eor	r2,r2,r4			@ Ch(e,f,g)
1378 	eor	r0,r6,r6,ror#11
1379 	add	r5,r5,r2			@ h+=Ch(e,f,g)
1380 #if 22==31
1381 	and	r12,r12,#0xff
1382 	cmp	r12,#0xf2			@ done?
1383 #endif
1384 #if 22<15
1385 # if __ARM_ARCH__>=7
1386 	ldr	r2,[r1],#4			@ prefetch
1387 # else
1388 	ldrb	r2,[r1,#3]
1389 # endif
1390 	eor	r12,r6,r7			@ a^b, b^c in next round
1391 #else
1392 	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1393 	eor	r12,r6,r7			@ a^b, b^c in next round
1394 	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1395 #endif
1396 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1397 	and	r3,r3,r12			@ (b^c)&=(a^b)
1398 	add	r9,r9,r5			@ d+=h
1399 	eor	r3,r3,r7			@ Maj(a,b,c)
1400 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1401 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1402 	@ ldr	r2,[sp,#8*4]		@ 23
1403 	@ ldr	r1,[sp,#5*4]
1404 	mov	r0,r2,ror#7
1405 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1406 	mov	r3,r1,ror#17
1407 	eor	r0,r0,r2,ror#18
1408 	eor	r3,r3,r1,ror#19
1409 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1410 	ldr	r2,[sp,#7*4]
1411 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1412 	ldr	r1,[sp,#0*4]
1413 
1414 	add	r3,r3,r0
1415 	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1416 	add	r2,r2,r3
1417 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1418 	add	r2,r2,r1			@ X[i]
1419 	ldr	r3,[r14],#4			@ *K256++
1420 	add	r4,r4,r2			@ h+=X[i]
1421 	str	r2,[sp,#7*4]
1422 	eor	r2,r10,r11
1423 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1424 	and	r2,r2,r9
1425 	add	r4,r4,r3			@ h+=K256[i]
1426 	eor	r2,r2,r11			@ Ch(e,f,g)
1427 	eor	r0,r5,r5,ror#11
1428 	add	r4,r4,r2			@ h+=Ch(e,f,g)
1429 #if 23==31
1430 	and	r3,r3,#0xff
1431 	cmp	r3,#0xf2			@ done?
1432 #endif
1433 #if 23<15
1434 # if __ARM_ARCH__>=7
1435 	ldr	r2,[r1],#4			@ prefetch
1436 # else
1437 	ldrb	r2,[r1,#3]
1438 # endif
1439 	eor	r3,r5,r6			@ a^b, b^c in next round
1440 #else
1441 	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1442 	eor	r3,r5,r6			@ a^b, b^c in next round
1443 	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1444 #endif
1445 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1446 	and	r12,r12,r3			@ (b^c)&=(a^b)
1447 	add	r8,r8,r4			@ d+=h
1448 	eor	r12,r12,r6			@ Maj(a,b,c)
1449 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1450 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1451 	@ ldr	r2,[sp,#9*4]		@ 24
1452 	@ ldr	r1,[sp,#6*4]
1453 	mov	r0,r2,ror#7
1454 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1455 	mov	r12,r1,ror#17
1456 	eor	r0,r0,r2,ror#18
1457 	eor	r12,r12,r1,ror#19
1458 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1459 	ldr	r2,[sp,#8*4]
1460 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1461 	ldr	r1,[sp,#1*4]
1462 
1463 	add	r12,r12,r0
1464 	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1465 	add	r2,r2,r12
1466 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1467 	add	r2,r2,r1			@ X[i]
1468 	ldr	r12,[r14],#4			@ *K256++
1469 	add	r11,r11,r2			@ h+=X[i]
1470 	str	r2,[sp,#8*4]
1471 	eor	r2,r9,r10
1472 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1473 	and	r2,r2,r8
1474 	add	r11,r11,r12			@ h+=K256[i]
1475 	eor	r2,r2,r10			@ Ch(e,f,g)
1476 	eor	r0,r4,r4,ror#11
1477 	add	r11,r11,r2			@ h+=Ch(e,f,g)
1478 #if 24==31
1479 	and	r12,r12,#0xff
1480 	cmp	r12,#0xf2			@ done?
1481 #endif
1482 #if 24<15
1483 # if __ARM_ARCH__>=7
1484 	ldr	r2,[r1],#4			@ prefetch
1485 # else
1486 	ldrb	r2,[r1,#3]
1487 # endif
1488 	eor	r12,r4,r5			@ a^b, b^c in next round
1489 #else
1490 	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1491 	eor	r12,r4,r5			@ a^b, b^c in next round
1492 	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1493 #endif
1494 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1495 	and	r3,r3,r12			@ (b^c)&=(a^b)
1496 	add	r7,r7,r11			@ d+=h
1497 	eor	r3,r3,r5			@ Maj(a,b,c)
1498 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1499 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1500 	@ ldr	r2,[sp,#10*4]		@ 25
1501 	@ ldr	r1,[sp,#7*4]
1502 	mov	r0,r2,ror#7
1503 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1504 	mov	r3,r1,ror#17
1505 	eor	r0,r0,r2,ror#18
1506 	eor	r3,r3,r1,ror#19
1507 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1508 	ldr	r2,[sp,#9*4]
1509 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1510 	ldr	r1,[sp,#2*4]
1511 
1512 	add	r3,r3,r0
1513 	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1514 	add	r2,r2,r3
1515 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1516 	add	r2,r2,r1			@ X[i]
1517 	ldr	r3,[r14],#4			@ *K256++
1518 	add	r10,r10,r2			@ h+=X[i]
1519 	str	r2,[sp,#9*4]
1520 	eor	r2,r8,r9
1521 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1522 	and	r2,r2,r7
1523 	add	r10,r10,r3			@ h+=K256[i]
1524 	eor	r2,r2,r9			@ Ch(e,f,g)
1525 	eor	r0,r11,r11,ror#11
1526 	add	r10,r10,r2			@ h+=Ch(e,f,g)
1527 #if 25==31
1528 	and	r3,r3,#0xff
1529 	cmp	r3,#0xf2			@ done?
1530 #endif
1531 #if 25<15
1532 # if __ARM_ARCH__>=7
1533 	ldr	r2,[r1],#4			@ prefetch
1534 # else
1535 	ldrb	r2,[r1,#3]
1536 # endif
1537 	eor	r3,r11,r4			@ a^b, b^c in next round
1538 #else
1539 	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1540 	eor	r3,r11,r4			@ a^b, b^c in next round
1541 	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1542 #endif
1543 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1544 	and	r12,r12,r3			@ (b^c)&=(a^b)
1545 	add	r6,r6,r10			@ d+=h
1546 	eor	r12,r12,r4			@ Maj(a,b,c)
1547 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1548 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1549 	@ ldr	r2,[sp,#11*4]		@ 26
1550 	@ ldr	r1,[sp,#8*4]
1551 	mov	r0,r2,ror#7
1552 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1553 	mov	r12,r1,ror#17
1554 	eor	r0,r0,r2,ror#18
1555 	eor	r12,r12,r1,ror#19
1556 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1557 	ldr	r2,[sp,#10*4]
1558 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1559 	ldr	r1,[sp,#3*4]
1560 
1561 	add	r12,r12,r0
1562 	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1563 	add	r2,r2,r12
1564 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1565 	add	r2,r2,r1			@ X[i]
1566 	ldr	r12,[r14],#4			@ *K256++
1567 	add	r9,r9,r2			@ h+=X[i]
1568 	str	r2,[sp,#10*4]
1569 	eor	r2,r7,r8
1570 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1571 	and	r2,r2,r6
1572 	add	r9,r9,r12			@ h+=K256[i]
1573 	eor	r2,r2,r8			@ Ch(e,f,g)
1574 	eor	r0,r10,r10,ror#11
1575 	add	r9,r9,r2			@ h+=Ch(e,f,g)
1576 #if 26==31
1577 	and	r12,r12,#0xff
1578 	cmp	r12,#0xf2			@ done?
1579 #endif
1580 #if 26<15
1581 # if __ARM_ARCH__>=7
1582 	ldr	r2,[r1],#4			@ prefetch
1583 # else
1584 	ldrb	r2,[r1,#3]
1585 # endif
1586 	eor	r12,r10,r11			@ a^b, b^c in next round
1587 #else
1588 	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1589 	eor	r12,r10,r11			@ a^b, b^c in next round
1590 	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1591 #endif
1592 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1593 	and	r3,r3,r12			@ (b^c)&=(a^b)
1594 	add	r5,r5,r9			@ d+=h
1595 	eor	r3,r3,r11			@ Maj(a,b,c)
1596 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1597 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1598 	@ ldr	r2,[sp,#12*4]		@ 27
1599 	@ ldr	r1,[sp,#9*4]
1600 	mov	r0,r2,ror#7
1601 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1602 	mov	r3,r1,ror#17
1603 	eor	r0,r0,r2,ror#18
1604 	eor	r3,r3,r1,ror#19
1605 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1606 	ldr	r2,[sp,#11*4]
1607 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1608 	ldr	r1,[sp,#4*4]
1609 
1610 	add	r3,r3,r0
1611 	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1612 	add	r2,r2,r3
1613 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1614 	add	r2,r2,r1			@ X[i]
1615 	ldr	r3,[r14],#4			@ *K256++
1616 	add	r8,r8,r2			@ h+=X[i]
1617 	str	r2,[sp,#11*4]
1618 	eor	r2,r6,r7
1619 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1620 	and	r2,r2,r5
1621 	add	r8,r8,r3			@ h+=K256[i]
1622 	eor	r2,r2,r7			@ Ch(e,f,g)
1623 	eor	r0,r9,r9,ror#11
1624 	add	r8,r8,r2			@ h+=Ch(e,f,g)
1625 #if 27==31
1626 	and	r3,r3,#0xff
1627 	cmp	r3,#0xf2			@ done?
1628 #endif
1629 #if 27<15
1630 # if __ARM_ARCH__>=7
1631 	ldr	r2,[r1],#4			@ prefetch
1632 # else
1633 	ldrb	r2,[r1,#3]
1634 # endif
1635 	eor	r3,r9,r10			@ a^b, b^c in next round
1636 #else
1637 	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1638 	eor	r3,r9,r10			@ a^b, b^c in next round
1639 	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1640 #endif
1641 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1642 	and	r12,r12,r3			@ (b^c)&=(a^b)
1643 	add	r4,r4,r8			@ d+=h
1644 	eor	r12,r12,r10			@ Maj(a,b,c)
1645 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1646 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1647 	@ ldr	r2,[sp,#13*4]		@ 28
1648 	@ ldr	r1,[sp,#10*4]
1649 	mov	r0,r2,ror#7
1650 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1651 	mov	r12,r1,ror#17
1652 	eor	r0,r0,r2,ror#18
1653 	eor	r12,r12,r1,ror#19
1654 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1655 	ldr	r2,[sp,#12*4]
1656 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1657 	ldr	r1,[sp,#5*4]
1658 
1659 	add	r12,r12,r0
1660 	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1661 	add	r2,r2,r12
1662 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1663 	add	r2,r2,r1			@ X[i]
1664 	ldr	r12,[r14],#4			@ *K256++
1665 	add	r7,r7,r2			@ h+=X[i]
1666 	str	r2,[sp,#12*4]
1667 	eor	r2,r5,r6
1668 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1669 	and	r2,r2,r4
1670 	add	r7,r7,r12			@ h+=K256[i]
1671 	eor	r2,r2,r6			@ Ch(e,f,g)
1672 	eor	r0,r8,r8,ror#11
1673 	add	r7,r7,r2			@ h+=Ch(e,f,g)
1674 #if 28==31
1675 	and	r12,r12,#0xff
1676 	cmp	r12,#0xf2			@ done?
1677 #endif
1678 #if 28<15
1679 # if __ARM_ARCH__>=7
1680 	ldr	r2,[r1],#4			@ prefetch
1681 # else
1682 	ldrb	r2,[r1,#3]
1683 # endif
1684 	eor	r12,r8,r9			@ a^b, b^c in next round
1685 #else
1686 	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1687 	eor	r12,r8,r9			@ a^b, b^c in next round
1688 	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1689 #endif
1690 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1691 	and	r3,r3,r12			@ (b^c)&=(a^b)
1692 	add	r11,r11,r7			@ d+=h
1693 	eor	r3,r3,r9			@ Maj(a,b,c)
1694 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1695 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1696 	@ ldr	r2,[sp,#14*4]		@ 29
1697 	@ ldr	r1,[sp,#11*4]
1698 	mov	r0,r2,ror#7
1699 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1700 	mov	r3,r1,ror#17
1701 	eor	r0,r0,r2,ror#18
1702 	eor	r3,r3,r1,ror#19
1703 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1704 	ldr	r2,[sp,#13*4]
1705 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1706 	ldr	r1,[sp,#6*4]
1707 
1708 	add	r3,r3,r0
1709 	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1710 	add	r2,r2,r3
1711 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1712 	add	r2,r2,r1			@ X[i]
1713 	ldr	r3,[r14],#4			@ *K256++
1714 	add	r6,r6,r2			@ h+=X[i]
1715 	str	r2,[sp,#13*4]
1716 	eor	r2,r4,r5
1717 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1718 	and	r2,r2,r11
1719 	add	r6,r6,r3			@ h+=K256[i]
1720 	eor	r2,r2,r5			@ Ch(e,f,g)
1721 	eor	r0,r7,r7,ror#11
1722 	add	r6,r6,r2			@ h+=Ch(e,f,g)
1723 #if 29==31
1724 	and	r3,r3,#0xff
1725 	cmp	r3,#0xf2			@ done?
1726 #endif
1727 #if 29<15
1728 # if __ARM_ARCH__>=7
1729 	ldr	r2,[r1],#4			@ prefetch
1730 # else
1731 	ldrb	r2,[r1,#3]
1732 # endif
1733 	eor	r3,r7,r8			@ a^b, b^c in next round
1734 #else
1735 	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1736 	eor	r3,r7,r8			@ a^b, b^c in next round
1737 	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1738 #endif
1739 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1740 	and	r12,r12,r3			@ (b^c)&=(a^b)
1741 	add	r10,r10,r6			@ d+=h
1742 	eor	r12,r12,r8			@ Maj(a,b,c)
1743 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1744 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1745 	@ ldr	r2,[sp,#15*4]		@ 30
1746 	@ ldr	r1,[sp,#12*4]
1747 	mov	r0,r2,ror#7
1748 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1749 	mov	r12,r1,ror#17
1750 	eor	r0,r0,r2,ror#18
1751 	eor	r12,r12,r1,ror#19
1752 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1753 	ldr	r2,[sp,#14*4]
1754 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1755 	ldr	r1,[sp,#7*4]
1756 
1757 	add	r12,r12,r0
1758 	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1759 	add	r2,r2,r12
1760 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1761 	add	r2,r2,r1			@ X[i]
1762 	ldr	r12,[r14],#4			@ *K256++
1763 	add	r5,r5,r2			@ h+=X[i]
1764 	str	r2,[sp,#14*4]
1765 	eor	r2,r11,r4
1766 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1767 	and	r2,r2,r10
1768 	add	r5,r5,r12			@ h+=K256[i]
1769 	eor	r2,r2,r4			@ Ch(e,f,g)
1770 	eor	r0,r6,r6,ror#11
1771 	add	r5,r5,r2			@ h+=Ch(e,f,g)
1772 #if 30==31
1773 	and	r12,r12,#0xff
1774 	cmp	r12,#0xf2			@ done?
1775 #endif
1776 #if 30<15
1777 # if __ARM_ARCH__>=7
1778 	ldr	r2,[r1],#4			@ prefetch
1779 # else
1780 	ldrb	r2,[r1,#3]
1781 # endif
1782 	eor	r12,r6,r7			@ a^b, b^c in next round
1783 #else
1784 	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1785 	eor	r12,r6,r7			@ a^b, b^c in next round
1786 	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1787 #endif
1788 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1789 	and	r3,r3,r12			@ (b^c)&=(a^b)
1790 	add	r9,r9,r5			@ d+=h
1791 	eor	r3,r3,r7			@ Maj(a,b,c)
1792 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1793 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1794 	@ ldr	r2,[sp,#0*4]		@ 31
1795 	@ ldr	r1,[sp,#13*4]
1796 	mov	r0,r2,ror#7
1797 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1798 	mov	r3,r1,ror#17
1799 	eor	r0,r0,r2,ror#18
1800 	eor	r3,r3,r1,ror#19
1801 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1802 	ldr	r2,[sp,#15*4]
1803 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1804 	ldr	r1,[sp,#8*4]
1805 
1806 	add	r3,r3,r0
1807 	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1808 	add	r2,r2,r3
1809 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1810 	add	r2,r2,r1			@ X[i]
1811 	ldr	r3,[r14],#4			@ *K256++
1812 	add	r4,r4,r2			@ h+=X[i]
1813 	str	r2,[sp,#15*4]
1814 	eor	r2,r10,r11
1815 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1816 	and	r2,r2,r9
1817 	add	r4,r4,r3			@ h+=K256[i]
1818 	eor	r2,r2,r11			@ Ch(e,f,g)
1819 	eor	r0,r5,r5,ror#11
1820 	add	r4,r4,r2			@ h+=Ch(e,f,g)
1821 #if 31==31
1822 	and	r3,r3,#0xff
1823 	cmp	r3,#0xf2			@ done?
1824 #endif
1825 #if 31<15
1826 # if __ARM_ARCH__>=7
1827 	ldr	r2,[r1],#4			@ prefetch
1828 # else
1829 	ldrb	r2,[r1,#3]
1830 # endif
1831 	eor	r3,r5,r6			@ a^b, b^c in next round
1832 #else
1833 	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1834 	eor	r3,r5,r6			@ a^b, b^c in next round
1835 	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1836 #endif
1837 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1838 	and	r12,r12,r3			@ (b^c)&=(a^b)
1839 	add	r8,r8,r4			@ d+=h
1840 	eor	r12,r12,r6			@ Maj(a,b,c)
1841 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1842 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1843 #ifdef	__thumb2__
1844 	ite	eq			@ Thumb2 thing, sanity check in ARM
1845 #endif
1846 	ldreq	r3,[sp,#16*4]		@ pull ctx
1847 	bne	.Lrounds_16_xx
1848 
1849 	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1850 	ldr	r0,[r3,#0]
1851 	ldr	r2,[r3,#4]
1852 	ldr	r12,[r3,#8]
1853 	add	r4,r4,r0
1854 	ldr	r0,[r3,#12]
1855 	add	r5,r5,r2
1856 	ldr	r2,[r3,#16]
1857 	add	r6,r6,r12
1858 	ldr	r12,[r3,#20]
1859 	add	r7,r7,r0
1860 	ldr	r0,[r3,#24]
1861 	add	r8,r8,r2
1862 	ldr	r2,[r3,#28]
1863 	add	r9,r9,r12
1864 	ldr	r1,[sp,#17*4]		@ pull inp
1865 	ldr	r12,[sp,#18*4]		@ pull inp+len
1866 	add	r10,r10,r0
1867 	add	r11,r11,r2
1868 	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1869 	cmp	r1,r12
1870 	sub	r14,r14,#256	@ rewind Ktbl
1871 	bne	.Loop
1872 
1873 	add	sp,sp,#19*4	@ destroy frame
1874 #if __ARM_ARCH__>=5
1875 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1876 #else
1877 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1878 	tst	lr,#1
1879 	moveq	pc,lr			@ be binary compatible with V4, yet
1880 .word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1881 #endif
1882 .size	sha256_block_data_order,.-sha256_block_data_order
1883 #if __ARM_MAX_ARCH__>=7
1884 .arch	armv7-a
1885 .fpu	neon
1886 
1887 .globl	sha256_block_data_order_neon
1888 .type	sha256_block_data_order_neon,%function
1889 .align	5
1890 .skip	16
1891 sha256_block_data_order_neon:
1892 .LNEON:
1893 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1894 
1895 	sub	r11,sp,#16*4+16
1896 	adr	r14,K256
1897 	bic	r11,r11,#15		@ align for 128-bit stores
1898 	mov	r12,sp
1899 	mov	sp,r11			@ alloca
1900 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1901 
1902 	vld1.8	{q0},[r1]!
1903 	vld1.8	{q1},[r1]!
1904 	vld1.8	{q2},[r1]!
1905 	vld1.8	{q3},[r1]!
1906 	vld1.32	{q8},[r14,:128]!
1907 	vld1.32	{q9},[r14,:128]!
1908 	vld1.32	{q10},[r14,:128]!
1909 	vld1.32	{q11},[r14,:128]!
1910 	vrev32.8	q0,q0		@ yes, even on
1911 	str	r0,[sp,#64]
1912 	vrev32.8	q1,q1		@ big-endian
1913 	str	r1,[sp,#68]
1914 	mov	r1,sp
1915 	vrev32.8	q2,q2
1916 	str	r2,[sp,#72]
1917 	vrev32.8	q3,q3
1918 	str	r12,[sp,#76]		@ save original sp
1919 	vadd.i32	q8,q8,q0
1920 	vadd.i32	q9,q9,q1
1921 	vst1.32	{q8},[r1,:128]!
1922 	vadd.i32	q10,q10,q2
1923 	vst1.32	{q9},[r1,:128]!
1924 	vadd.i32	q11,q11,q3
1925 	vst1.32	{q10},[r1,:128]!
1926 	vst1.32	{q11},[r1,:128]!
1927 
1928 	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1929 	sub	r1,r1,#64
1930 	ldr	r2,[sp,#0]
1931 	eor	r12,r12,r12
1932 	eor	r3,r5,r6
1933 	b	.L_00_48
1934 
1935 .align	4
1936 .L_00_48:
1937 	vext.8	q8,q0,q1,#4
1938 	add	r11,r11,r2
1939 	eor	r2,r9,r10
1940 	eor	r0,r8,r8,ror#5
1941 	vext.8	q9,q2,q3,#4
1942 	add	r4,r4,r12
1943 	and	r2,r2,r8
1944 	eor	r12,r0,r8,ror#19
1945 	vshr.u32	q10,q8,#7
1946 	eor	r0,r4,r4,ror#11
1947 	eor	r2,r2,r10
1948 	vadd.i32	q0,q0,q9
1949 	add	r11,r11,r12,ror#6
1950 	eor	r12,r4,r5
1951 	vshr.u32	q9,q8,#3
1952 	eor	r0,r0,r4,ror#20
1953 	add	r11,r11,r2
1954 	vsli.32	q10,q8,#25
1955 	ldr	r2,[sp,#4]
1956 	and	r3,r3,r12
1957 	vshr.u32	q11,q8,#18
1958 	add	r7,r7,r11
1959 	add	r11,r11,r0,ror#2
1960 	eor	r3,r3,r5
1961 	veor	q9,q9,q10
1962 	add	r10,r10,r2
1963 	vsli.32	q11,q8,#14
1964 	eor	r2,r8,r9
1965 	eor	r0,r7,r7,ror#5
1966 	vshr.u32	d24,d7,#17
1967 	add	r11,r11,r3
1968 	and	r2,r2,r7
1969 	veor	q9,q9,q11
1970 	eor	r3,r0,r7,ror#19
1971 	eor	r0,r11,r11,ror#11
1972 	vsli.32	d24,d7,#15
1973 	eor	r2,r2,r9
1974 	add	r10,r10,r3,ror#6
1975 	vshr.u32	d25,d7,#10
1976 	eor	r3,r11,r4
1977 	eor	r0,r0,r11,ror#20
1978 	vadd.i32	q0,q0,q9
1979 	add	r10,r10,r2
1980 	ldr	r2,[sp,#8]
1981 	veor	d25,d25,d24
1982 	and	r12,r12,r3
1983 	add	r6,r6,r10
1984 	vshr.u32	d24,d7,#19
1985 	add	r10,r10,r0,ror#2
1986 	eor	r12,r12,r4
1987 	vsli.32	d24,d7,#13
1988 	add	r9,r9,r2
1989 	eor	r2,r7,r8
1990 	veor	d25,d25,d24
1991 	eor	r0,r6,r6,ror#5
1992 	add	r10,r10,r12
1993 	vadd.i32	d0,d0,d25
1994 	and	r2,r2,r6
1995 	eor	r12,r0,r6,ror#19
1996 	vshr.u32	d24,d0,#17
1997 	eor	r0,r10,r10,ror#11
1998 	eor	r2,r2,r8
1999 	vsli.32	d24,d0,#15
2000 	add	r9,r9,r12,ror#6
2001 	eor	r12,r10,r11
2002 	vshr.u32	d25,d0,#10
2003 	eor	r0,r0,r10,ror#20
2004 	add	r9,r9,r2
2005 	veor	d25,d25,d24
2006 	ldr	r2,[sp,#12]
2007 	and	r3,r3,r12
2008 	vshr.u32	d24,d0,#19
2009 	add	r5,r5,r9
2010 	add	r9,r9,r0,ror#2
2011 	eor	r3,r3,r11
2012 	vld1.32	{q8},[r14,:128]!
2013 	add	r8,r8,r2
2014 	vsli.32	d24,d0,#13
2015 	eor	r2,r6,r7
2016 	eor	r0,r5,r5,ror#5
2017 	veor	d25,d25,d24
2018 	add	r9,r9,r3
2019 	and	r2,r2,r5
2020 	vadd.i32	d1,d1,d25
2021 	eor	r3,r0,r5,ror#19
2022 	eor	r0,r9,r9,ror#11
2023 	vadd.i32	q8,q8,q0
2024 	eor	r2,r2,r7
2025 	add	r8,r8,r3,ror#6
2026 	eor	r3,r9,r10
2027 	eor	r0,r0,r9,ror#20
2028 	add	r8,r8,r2
2029 	ldr	r2,[sp,#16]
2030 	and	r12,r12,r3
2031 	add	r4,r4,r8
2032 	vst1.32	{q8},[r1,:128]!
2033 	add	r8,r8,r0,ror#2
2034 	eor	r12,r12,r10
2035 	vext.8	q8,q1,q2,#4
2036 	add	r7,r7,r2
2037 	eor	r2,r5,r6
2038 	eor	r0,r4,r4,ror#5
2039 	vext.8	q9,q3,q0,#4
2040 	add	r8,r8,r12
2041 	and	r2,r2,r4
2042 	eor	r12,r0,r4,ror#19
2043 	vshr.u32	q10,q8,#7
2044 	eor	r0,r8,r8,ror#11
2045 	eor	r2,r2,r6
2046 	vadd.i32	q1,q1,q9
2047 	add	r7,r7,r12,ror#6
2048 	eor	r12,r8,r9
2049 	vshr.u32	q9,q8,#3
2050 	eor	r0,r0,r8,ror#20
2051 	add	r7,r7,r2
2052 	vsli.32	q10,q8,#25
2053 	ldr	r2,[sp,#20]
2054 	and	r3,r3,r12
2055 	vshr.u32	q11,q8,#18
2056 	add	r11,r11,r7
2057 	add	r7,r7,r0,ror#2
2058 	eor	r3,r3,r9
2059 	veor	q9,q9,q10
2060 	add	r6,r6,r2
2061 	vsli.32	q11,q8,#14
2062 	eor	r2,r4,r5
2063 	eor	r0,r11,r11,ror#5
2064 	vshr.u32	d24,d1,#17
2065 	add	r7,r7,r3
2066 	and	r2,r2,r11
2067 	veor	q9,q9,q11
2068 	eor	r3,r0,r11,ror#19
2069 	eor	r0,r7,r7,ror#11
2070 	vsli.32	d24,d1,#15
2071 	eor	r2,r2,r5
2072 	add	r6,r6,r3,ror#6
2073 	vshr.u32	d25,d1,#10
2074 	eor	r3,r7,r8
2075 	eor	r0,r0,r7,ror#20
2076 	vadd.i32	q1,q1,q9
2077 	add	r6,r6,r2
2078 	ldr	r2,[sp,#24]
2079 	veor	d25,d25,d24
2080 	and	r12,r12,r3
2081 	add	r10,r10,r6
2082 	vshr.u32	d24,d1,#19
2083 	add	r6,r6,r0,ror#2
2084 	eor	r12,r12,r8
2085 	vsli.32	d24,d1,#13
2086 	add	r5,r5,r2
2087 	eor	r2,r11,r4
2088 	veor	d25,d25,d24
2089 	eor	r0,r10,r10,ror#5
2090 	add	r6,r6,r12
2091 	vadd.i32	d2,d2,d25
2092 	and	r2,r2,r10
2093 	eor	r12,r0,r10,ror#19
2094 	vshr.u32	d24,d2,#17
2095 	eor	r0,r6,r6,ror#11
2096 	eor	r2,r2,r4
2097 	vsli.32	d24,d2,#15
2098 	add	r5,r5,r12,ror#6
2099 	eor	r12,r6,r7
2100 	vshr.u32	d25,d2,#10
2101 	eor	r0,r0,r6,ror#20
2102 	add	r5,r5,r2
2103 	veor	d25,d25,d24
2104 	ldr	r2,[sp,#28]
2105 	and	r3,r3,r12
2106 	vshr.u32	d24,d2,#19
2107 	add	r9,r9,r5
2108 	add	r5,r5,r0,ror#2
2109 	eor	r3,r3,r7
2110 	vld1.32	{q8},[r14,:128]!
2111 	add	r4,r4,r2
2112 	vsli.32	d24,d2,#13
2113 	eor	r2,r10,r11
2114 	eor	r0,r9,r9,ror#5
2115 	veor	d25,d25,d24
2116 	add	r5,r5,r3
2117 	and	r2,r2,r9
2118 	vadd.i32	d3,d3,d25
2119 	eor	r3,r0,r9,ror#19
2120 	eor	r0,r5,r5,ror#11
2121 	vadd.i32	q8,q8,q1
2122 	eor	r2,r2,r11
2123 	add	r4,r4,r3,ror#6
2124 	eor	r3,r5,r6
2125 	eor	r0,r0,r5,ror#20
2126 	add	r4,r4,r2
2127 	ldr	r2,[sp,#32]
2128 	and	r12,r12,r3
2129 	add	r8,r8,r4
2130 	vst1.32	{q8},[r1,:128]!
2131 	add	r4,r4,r0,ror#2
2132 	eor	r12,r12,r6
2133 	vext.8	q8,q2,q3,#4
2134 	add	r11,r11,r2
2135 	eor	r2,r9,r10
2136 	eor	r0,r8,r8,ror#5
2137 	vext.8	q9,q0,q1,#4
2138 	add	r4,r4,r12
2139 	and	r2,r2,r8
2140 	eor	r12,r0,r8,ror#19
2141 	vshr.u32	q10,q8,#7
2142 	eor	r0,r4,r4,ror#11
2143 	eor	r2,r2,r10
2144 	vadd.i32	q2,q2,q9
2145 	add	r11,r11,r12,ror#6
2146 	eor	r12,r4,r5
2147 	vshr.u32	q9,q8,#3
2148 	eor	r0,r0,r4,ror#20
2149 	add	r11,r11,r2
2150 	vsli.32	q10,q8,#25
2151 	ldr	r2,[sp,#36]
2152 	and	r3,r3,r12
2153 	vshr.u32	q11,q8,#18
2154 	add	r7,r7,r11
2155 	add	r11,r11,r0,ror#2
2156 	eor	r3,r3,r5
2157 	veor	q9,q9,q10
2158 	add	r10,r10,r2
2159 	vsli.32	q11,q8,#14
2160 	eor	r2,r8,r9
2161 	eor	r0,r7,r7,ror#5
2162 	vshr.u32	d24,d3,#17
2163 	add	r11,r11,r3
2164 	and	r2,r2,r7
2165 	veor	q9,q9,q11
2166 	eor	r3,r0,r7,ror#19
2167 	eor	r0,r11,r11,ror#11
2168 	vsli.32	d24,d3,#15
2169 	eor	r2,r2,r9
2170 	add	r10,r10,r3,ror#6
2171 	vshr.u32	d25,d3,#10
2172 	eor	r3,r11,r4
2173 	eor	r0,r0,r11,ror#20
2174 	vadd.i32	q2,q2,q9
2175 	add	r10,r10,r2
2176 	ldr	r2,[sp,#40]
2177 	veor	d25,d25,d24
2178 	and	r12,r12,r3
2179 	add	r6,r6,r10
2180 	vshr.u32	d24,d3,#19
2181 	add	r10,r10,r0,ror#2
2182 	eor	r12,r12,r4
2183 	vsli.32	d24,d3,#13
2184 	add	r9,r9,r2
2185 	eor	r2,r7,r8
2186 	veor	d25,d25,d24
2187 	eor	r0,r6,r6,ror#5
2188 	add	r10,r10,r12
2189 	vadd.i32	d4,d4,d25
2190 	and	r2,r2,r6
2191 	eor	r12,r0,r6,ror#19
2192 	vshr.u32	d24,d4,#17
2193 	eor	r0,r10,r10,ror#11
2194 	eor	r2,r2,r8
2195 	vsli.32	d24,d4,#15
2196 	add	r9,r9,r12,ror#6
2197 	eor	r12,r10,r11
2198 	vshr.u32	d25,d4,#10
2199 	eor	r0,r0,r10,ror#20
2200 	add	r9,r9,r2
2201 	veor	d25,d25,d24
2202 	ldr	r2,[sp,#44]
2203 	and	r3,r3,r12
2204 	vshr.u32	d24,d4,#19
2205 	add	r5,r5,r9
2206 	add	r9,r9,r0,ror#2
2207 	eor	r3,r3,r11
2208 	vld1.32	{q8},[r14,:128]!
2209 	add	r8,r8,r2
2210 	vsli.32	d24,d4,#13
2211 	eor	r2,r6,r7
2212 	eor	r0,r5,r5,ror#5
2213 	veor	d25,d25,d24
2214 	add	r9,r9,r3
2215 	and	r2,r2,r5
2216 	vadd.i32	d5,d5,d25
2217 	eor	r3,r0,r5,ror#19
2218 	eor	r0,r9,r9,ror#11
2219 	vadd.i32	q8,q8,q2
2220 	eor	r2,r2,r7
2221 	add	r8,r8,r3,ror#6
2222 	eor	r3,r9,r10
2223 	eor	r0,r0,r9,ror#20
2224 	add	r8,r8,r2
2225 	ldr	r2,[sp,#48]
2226 	and	r12,r12,r3
2227 	add	r4,r4,r8
2228 	vst1.32	{q8},[r1,:128]!
2229 	add	r8,r8,r0,ror#2
2230 	eor	r12,r12,r10
2231 	vext.8	q8,q3,q0,#4
2232 	add	r7,r7,r2
2233 	eor	r2,r5,r6
2234 	eor	r0,r4,r4,ror#5
2235 	vext.8	q9,q1,q2,#4
2236 	add	r8,r8,r12
2237 	and	r2,r2,r4
2238 	eor	r12,r0,r4,ror#19
2239 	vshr.u32	q10,q8,#7
2240 	eor	r0,r8,r8,ror#11
2241 	eor	r2,r2,r6
2242 	vadd.i32	q3,q3,q9
2243 	add	r7,r7,r12,ror#6
2244 	eor	r12,r8,r9
2245 	vshr.u32	q9,q8,#3
2246 	eor	r0,r0,r8,ror#20
2247 	add	r7,r7,r2
2248 	vsli.32	q10,q8,#25
2249 	ldr	r2,[sp,#52]
2250 	and	r3,r3,r12
2251 	vshr.u32	q11,q8,#18
2252 	add	r11,r11,r7
2253 	add	r7,r7,r0,ror#2
2254 	eor	r3,r3,r9
2255 	veor	q9,q9,q10
2256 	add	r6,r6,r2
2257 	vsli.32	q11,q8,#14
2258 	eor	r2,r4,r5
2259 	eor	r0,r11,r11,ror#5
2260 	vshr.u32	d24,d5,#17
2261 	add	r7,r7,r3
2262 	and	r2,r2,r11
2263 	veor	q9,q9,q11
2264 	eor	r3,r0,r11,ror#19
2265 	eor	r0,r7,r7,ror#11
2266 	vsli.32	d24,d5,#15
2267 	eor	r2,r2,r5
2268 	add	r6,r6,r3,ror#6
2269 	vshr.u32	d25,d5,#10
2270 	eor	r3,r7,r8
2271 	eor	r0,r0,r7,ror#20
2272 	vadd.i32	q3,q3,q9
2273 	add	r6,r6,r2
2274 	ldr	r2,[sp,#56]
2275 	veor	d25,d25,d24
2276 	and	r12,r12,r3
2277 	add	r10,r10,r6
2278 	vshr.u32	d24,d5,#19
2279 	add	r6,r6,r0,ror#2
2280 	eor	r12,r12,r8
2281 	vsli.32	d24,d5,#13
2282 	add	r5,r5,r2
2283 	eor	r2,r11,r4
2284 	veor	d25,d25,d24
2285 	eor	r0,r10,r10,ror#5
2286 	add	r6,r6,r12
2287 	vadd.i32	d6,d6,d25
2288 	and	r2,r2,r10
2289 	eor	r12,r0,r10,ror#19
2290 	vshr.u32	d24,d6,#17
2291 	eor	r0,r6,r6,ror#11
2292 	eor	r2,r2,r4
2293 	vsli.32	d24,d6,#15
2294 	add	r5,r5,r12,ror#6
2295 	eor	r12,r6,r7
2296 	vshr.u32	d25,d6,#10
2297 	eor	r0,r0,r6,ror#20
2298 	add	r5,r5,r2
2299 	veor	d25,d25,d24
2300 	ldr	r2,[sp,#60]
2301 	and	r3,r3,r12
2302 	vshr.u32	d24,d6,#19
2303 	add	r9,r9,r5
2304 	add	r5,r5,r0,ror#2
2305 	eor	r3,r3,r7
2306 	vld1.32	{q8},[r14,:128]!
2307 	add	r4,r4,r2
2308 	vsli.32	d24,d6,#13
2309 	eor	r2,r10,r11
2310 	eor	r0,r9,r9,ror#5
2311 	veor	d25,d25,d24
2312 	add	r5,r5,r3
2313 	and	r2,r2,r9
2314 	vadd.i32	d7,d7,d25
2315 	eor	r3,r0,r9,ror#19
2316 	eor	r0,r5,r5,ror#11
2317 	vadd.i32	q8,q8,q3
2318 	eor	r2,r2,r11
2319 	add	r4,r4,r3,ror#6
2320 	eor	r3,r5,r6
2321 	eor	r0,r0,r5,ror#20
2322 	add	r4,r4,r2
2323 	ldr	r2,[r14]
2324 	and	r12,r12,r3
2325 	add	r8,r8,r4
2326 	vst1.32	{q8},[r1,:128]!
2327 	add	r4,r4,r0,ror#2
2328 	eor	r12,r12,r6
2329 	teq	r2,#0				@ check for K256 terminator
2330 	ldr	r2,[sp,#0]
2331 	sub	r1,r1,#64
2332 	bne	.L_00_48
2333 
2334 	ldr	r1,[sp,#68]
2335 	ldr	r0,[sp,#72]
2336 	sub	r14,r14,#256	@ rewind r14
2337 	teq	r1,r0
2338 	it	eq
2339 	subeq	r1,r1,#64		@ avoid SEGV
2340 	vld1.8	{q0},[r1]!		@ load next input block
2341 	vld1.8	{q1},[r1]!
2342 	vld1.8	{q2},[r1]!
2343 	vld1.8	{q3},[r1]!
2344 	it	ne
2345 	strne	r1,[sp,#68]
2346 	mov	r1,sp
2347 	add	r11,r11,r2
2348 	eor	r2,r9,r10
2349 	eor	r0,r8,r8,ror#5
2350 	add	r4,r4,r12
2351 	vld1.32	{q8},[r14,:128]!
2352 	and	r2,r2,r8
2353 	eor	r12,r0,r8,ror#19
2354 	eor	r0,r4,r4,ror#11
2355 	eor	r2,r2,r10
2356 	vrev32.8	q0,q0
2357 	add	r11,r11,r12,ror#6
2358 	eor	r12,r4,r5
2359 	eor	r0,r0,r4,ror#20
2360 	add	r11,r11,r2
2361 	vadd.i32	q8,q8,q0
2362 	ldr	r2,[sp,#4]
2363 	and	r3,r3,r12
2364 	add	r7,r7,r11
2365 	add	r11,r11,r0,ror#2
2366 	eor	r3,r3,r5
2367 	add	r10,r10,r2
2368 	eor	r2,r8,r9
2369 	eor	r0,r7,r7,ror#5
2370 	add	r11,r11,r3
2371 	and	r2,r2,r7
2372 	eor	r3,r0,r7,ror#19
2373 	eor	r0,r11,r11,ror#11
2374 	eor	r2,r2,r9
2375 	add	r10,r10,r3,ror#6
2376 	eor	r3,r11,r4
2377 	eor	r0,r0,r11,ror#20
2378 	add	r10,r10,r2
2379 	ldr	r2,[sp,#8]
2380 	and	r12,r12,r3
2381 	add	r6,r6,r10
2382 	add	r10,r10,r0,ror#2
2383 	eor	r12,r12,r4
2384 	add	r9,r9,r2
2385 	eor	r2,r7,r8
2386 	eor	r0,r6,r6,ror#5
2387 	add	r10,r10,r12
2388 	and	r2,r2,r6
2389 	eor	r12,r0,r6,ror#19
2390 	eor	r0,r10,r10,ror#11
2391 	eor	r2,r2,r8
2392 	add	r9,r9,r12,ror#6
2393 	eor	r12,r10,r11
2394 	eor	r0,r0,r10,ror#20
2395 	add	r9,r9,r2
2396 	ldr	r2,[sp,#12]
2397 	and	r3,r3,r12
2398 	add	r5,r5,r9
2399 	add	r9,r9,r0,ror#2
2400 	eor	r3,r3,r11
2401 	add	r8,r8,r2
2402 	eor	r2,r6,r7
2403 	eor	r0,r5,r5,ror#5
2404 	add	r9,r9,r3
2405 	and	r2,r2,r5
2406 	eor	r3,r0,r5,ror#19
2407 	eor	r0,r9,r9,ror#11
2408 	eor	r2,r2,r7
2409 	add	r8,r8,r3,ror#6
2410 	eor	r3,r9,r10
2411 	eor	r0,r0,r9,ror#20
2412 	add	r8,r8,r2
2413 	ldr	r2,[sp,#16]
2414 	and	r12,r12,r3
2415 	add	r4,r4,r8
2416 	add	r8,r8,r0,ror#2
2417 	eor	r12,r12,r10
2418 	vst1.32	{q8},[r1,:128]!
2419 	add	r7,r7,r2
2420 	eor	r2,r5,r6
2421 	eor	r0,r4,r4,ror#5
2422 	add	r8,r8,r12
2423 	vld1.32	{q8},[r14,:128]!
2424 	and	r2,r2,r4
2425 	eor	r12,r0,r4,ror#19
2426 	eor	r0,r8,r8,ror#11
2427 	eor	r2,r2,r6
2428 	vrev32.8	q1,q1
2429 	add	r7,r7,r12,ror#6
2430 	eor	r12,r8,r9
2431 	eor	r0,r0,r8,ror#20
2432 	add	r7,r7,r2
2433 	vadd.i32	q8,q8,q1
2434 	ldr	r2,[sp,#20]
2435 	and	r3,r3,r12
2436 	add	r11,r11,r7
2437 	add	r7,r7,r0,ror#2
2438 	eor	r3,r3,r9
2439 	add	r6,r6,r2
2440 	eor	r2,r4,r5
2441 	eor	r0,r11,r11,ror#5
2442 	add	r7,r7,r3
2443 	and	r2,r2,r11
2444 	eor	r3,r0,r11,ror#19
2445 	eor	r0,r7,r7,ror#11
2446 	eor	r2,r2,r5
2447 	add	r6,r6,r3,ror#6
2448 	eor	r3,r7,r8
2449 	eor	r0,r0,r7,ror#20
2450 	add	r6,r6,r2
2451 	ldr	r2,[sp,#24]
2452 	and	r12,r12,r3
2453 	add	r10,r10,r6
2454 	add	r6,r6,r0,ror#2
2455 	eor	r12,r12,r8
2456 	add	r5,r5,r2
2457 	eor	r2,r11,r4
2458 	eor	r0,r10,r10,ror#5
2459 	add	r6,r6,r12
2460 	and	r2,r2,r10
2461 	eor	r12,r0,r10,ror#19
2462 	eor	r0,r6,r6,ror#11
2463 	eor	r2,r2,r4
2464 	add	r5,r5,r12,ror#6
2465 	eor	r12,r6,r7
2466 	eor	r0,r0,r6,ror#20
2467 	add	r5,r5,r2
2468 	ldr	r2,[sp,#28]
2469 	and	r3,r3,r12
2470 	add	r9,r9,r5
2471 	add	r5,r5,r0,ror#2
2472 	eor	r3,r3,r7
2473 	add	r4,r4,r2
2474 	eor	r2,r10,r11
2475 	eor	r0,r9,r9,ror#5
2476 	add	r5,r5,r3
2477 	and	r2,r2,r9
2478 	eor	r3,r0,r9,ror#19
2479 	eor	r0,r5,r5,ror#11
2480 	eor	r2,r2,r11
2481 	add	r4,r4,r3,ror#6
2482 	eor	r3,r5,r6
2483 	eor	r0,r0,r5,ror#20
2484 	add	r4,r4,r2
2485 	ldr	r2,[sp,#32]
2486 	and	r12,r12,r3
2487 	add	r8,r8,r4
2488 	add	r4,r4,r0,ror#2
2489 	eor	r12,r12,r6
2490 	vst1.32	{q8},[r1,:128]!
2491 	add	r11,r11,r2
2492 	eor	r2,r9,r10
2493 	eor	r0,r8,r8,ror#5
2494 	add	r4,r4,r12
2495 	vld1.32	{q8},[r14,:128]!
2496 	and	r2,r2,r8
2497 	eor	r12,r0,r8,ror#19
2498 	eor	r0,r4,r4,ror#11
2499 	eor	r2,r2,r10
2500 	vrev32.8	q2,q2
2501 	add	r11,r11,r12,ror#6
2502 	eor	r12,r4,r5
2503 	eor	r0,r0,r4,ror#20
2504 	add	r11,r11,r2
2505 	vadd.i32	q8,q8,q2
2506 	ldr	r2,[sp,#36]
2507 	and	r3,r3,r12
2508 	add	r7,r7,r11
2509 	add	r11,r11,r0,ror#2
2510 	eor	r3,r3,r5
2511 	add	r10,r10,r2
2512 	eor	r2,r8,r9
2513 	eor	r0,r7,r7,ror#5
2514 	add	r11,r11,r3
2515 	and	r2,r2,r7
2516 	eor	r3,r0,r7,ror#19
2517 	eor	r0,r11,r11,ror#11
2518 	eor	r2,r2,r9
2519 	add	r10,r10,r3,ror#6
2520 	eor	r3,r11,r4
2521 	eor	r0,r0,r11,ror#20
2522 	add	r10,r10,r2
2523 	ldr	r2,[sp,#40]
2524 	and	r12,r12,r3
2525 	add	r6,r6,r10
2526 	add	r10,r10,r0,ror#2
2527 	eor	r12,r12,r4
2528 	add	r9,r9,r2
2529 	eor	r2,r7,r8
2530 	eor	r0,r6,r6,ror#5
2531 	add	r10,r10,r12
2532 	and	r2,r2,r6
2533 	eor	r12,r0,r6,ror#19
2534 	eor	r0,r10,r10,ror#11
2535 	eor	r2,r2,r8
2536 	add	r9,r9,r12,ror#6
2537 	eor	r12,r10,r11
2538 	eor	r0,r0,r10,ror#20
2539 	add	r9,r9,r2
2540 	ldr	r2,[sp,#44]
2541 	and	r3,r3,r12
2542 	add	r5,r5,r9
2543 	add	r9,r9,r0,ror#2
2544 	eor	r3,r3,r11
2545 	add	r8,r8,r2
2546 	eor	r2,r6,r7
2547 	eor	r0,r5,r5,ror#5
2548 	add	r9,r9,r3
2549 	and	r2,r2,r5
2550 	eor	r3,r0,r5,ror#19
2551 	eor	r0,r9,r9,ror#11
2552 	eor	r2,r2,r7
2553 	add	r8,r8,r3,ror#6
2554 	eor	r3,r9,r10
2555 	eor	r0,r0,r9,ror#20
2556 	add	r8,r8,r2
2557 	ldr	r2,[sp,#48]
2558 	and	r12,r12,r3
2559 	add	r4,r4,r8
2560 	add	r8,r8,r0,ror#2
2561 	eor	r12,r12,r10
2562 	vst1.32	{q8},[r1,:128]!
2563 	add	r7,r7,r2
2564 	eor	r2,r5,r6
2565 	eor	r0,r4,r4,ror#5
2566 	add	r8,r8,r12
2567 	vld1.32	{q8},[r14,:128]!
2568 	and	r2,r2,r4
2569 	eor	r12,r0,r4,ror#19
2570 	eor	r0,r8,r8,ror#11
2571 	eor	r2,r2,r6
2572 	vrev32.8	q3,q3
2573 	add	r7,r7,r12,ror#6
2574 	eor	r12,r8,r9
2575 	eor	r0,r0,r8,ror#20
2576 	add	r7,r7,r2
2577 	vadd.i32	q8,q8,q3
2578 	ldr	r2,[sp,#52]
2579 	and	r3,r3,r12
2580 	add	r11,r11,r7
2581 	add	r7,r7,r0,ror#2
2582 	eor	r3,r3,r9
2583 	add	r6,r6,r2
2584 	eor	r2,r4,r5
2585 	eor	r0,r11,r11,ror#5
2586 	add	r7,r7,r3
2587 	and	r2,r2,r11
2588 	eor	r3,r0,r11,ror#19
2589 	eor	r0,r7,r7,ror#11
2590 	eor	r2,r2,r5
2591 	add	r6,r6,r3,ror#6
2592 	eor	r3,r7,r8
2593 	eor	r0,r0,r7,ror#20
2594 	add	r6,r6,r2
2595 	ldr	r2,[sp,#56]
2596 	and	r12,r12,r3
2597 	add	r10,r10,r6
2598 	add	r6,r6,r0,ror#2
2599 	eor	r12,r12,r8
2600 	add	r5,r5,r2
2601 	eor	r2,r11,r4
2602 	eor	r0,r10,r10,ror#5
2603 	add	r6,r6,r12
2604 	and	r2,r2,r10
2605 	eor	r12,r0,r10,ror#19
2606 	eor	r0,r6,r6,ror#11
2607 	eor	r2,r2,r4
2608 	add	r5,r5,r12,ror#6
2609 	eor	r12,r6,r7
2610 	eor	r0,r0,r6,ror#20
2611 	add	r5,r5,r2
2612 	ldr	r2,[sp,#60]
2613 	and	r3,r3,r12
2614 	add	r9,r9,r5
2615 	add	r5,r5,r0,ror#2
2616 	eor	r3,r3,r7
2617 	add	r4,r4,r2
2618 	eor	r2,r10,r11
2619 	eor	r0,r9,r9,ror#5
2620 	add	r5,r5,r3
2621 	and	r2,r2,r9
2622 	eor	r3,r0,r9,ror#19
2623 	eor	r0,r5,r5,ror#11
2624 	eor	r2,r2,r11
2625 	add	r4,r4,r3,ror#6
2626 	eor	r3,r5,r6
2627 	eor	r0,r0,r5,ror#20
2628 	add	r4,r4,r2
2629 	ldr	r2,[sp,#64]
2630 	and	r12,r12,r3
2631 	add	r8,r8,r4
2632 	add	r4,r4,r0,ror#2
2633 	eor	r12,r12,r6
2634 	vst1.32	{q8},[r1,:128]!
2635 	ldr	r0,[r2,#0]
2636 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2637 	ldr	r12,[r2,#4]
2638 	ldr	r3,[r2,#8]
2639 	ldr	r1,[r2,#12]
2640 	add	r4,r4,r0			@ accumulate
2641 	ldr	r0,[r2,#16]
2642 	add	r5,r5,r12
2643 	ldr	r12,[r2,#20]
2644 	add	r6,r6,r3
2645 	ldr	r3,[r2,#24]
2646 	add	r7,r7,r1
2647 	ldr	r1,[r2,#28]
2648 	add	r8,r8,r0
2649 	str	r4,[r2],#4
2650 	add	r9,r9,r12
2651 	str	r5,[r2],#4
2652 	add	r10,r10,r3
2653 	str	r6,[r2],#4
2654 	add	r11,r11,r1
2655 	str	r7,[r2],#4
2656 	stmia	r2,{r8,r9,r10,r11}
2657 
2658 	ittte	ne
2659 	movne	r1,sp
2660 	ldrne	r2,[sp,#0]
2661 	eorne	r12,r12,r12
2662 	ldreq	sp,[sp,#76]			@ restore original sp
2663 	itt	ne
2664 	eorne	r3,r5,r6
2665 	bne	.L_00_48
2666 
2667 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2668 .size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2669 #endif
2670 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2671 
2672 # if defined(__thumb2__)
2673 #  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2674 # else
2675 #  define INST(a,b,c,d)	.byte	a,b,c,d
2676 # endif
2677 
2678 .type	sha256_block_data_order_armv8,%function
2679 .align	5
2680 sha256_block_data_order_armv8:
2681 .LARMv8:
2682 	vld1.32	{q0,q1},[r0]
2683 	sub	r3,r3,#256+32
2684 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2685 	b	.Loop_v8
2686 
2687 .align	4
2688 .Loop_v8:
2689 	vld1.8	{q8,q9},[r1]!
2690 	vld1.8	{q10,q11},[r1]!
2691 	vld1.32	{q12},[r3]!
2692 	vrev32.8	q8,q8
2693 	vrev32.8	q9,q9
2694 	vrev32.8	q10,q10
2695 	vrev32.8	q11,q11
2696 	vmov	q14,q0	@ offload
2697 	vmov	q15,q1
2698 	teq	r1,r2
2699 	vld1.32	{q13},[r3]!
2700 	vadd.i32	q12,q12,q8
2701 	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2702 	vmov	q2,q0
2703 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2704 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2705 	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2706 	vld1.32	{q12},[r3]!
2707 	vadd.i32	q13,q13,q9
2708 	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2709 	vmov	q2,q0
2710 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2711 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2712 	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2713 	vld1.32	{q13},[r3]!
2714 	vadd.i32	q12,q12,q10
2715 	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2716 	vmov	q2,q0
2717 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2718 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2719 	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2720 	vld1.32	{q12},[r3]!
2721 	vadd.i32	q13,q13,q11
2722 	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2723 	vmov	q2,q0
2724 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2725 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2726 	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2727 	vld1.32	{q13},[r3]!
2728 	vadd.i32	q12,q12,q8
2729 	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2730 	vmov	q2,q0
2731 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2732 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2733 	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2734 	vld1.32	{q12},[r3]!
2735 	vadd.i32	q13,q13,q9
2736 	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2737 	vmov	q2,q0
2738 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2739 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2740 	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2741 	vld1.32	{q13},[r3]!
2742 	vadd.i32	q12,q12,q10
2743 	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2744 	vmov	q2,q0
2745 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2746 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2747 	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2748 	vld1.32	{q12},[r3]!
2749 	vadd.i32	q13,q13,q11
2750 	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2751 	vmov	q2,q0
2752 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2753 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2754 	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2755 	vld1.32	{q13},[r3]!
2756 	vadd.i32	q12,q12,q8
2757 	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2758 	vmov	q2,q0
2759 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2760 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2761 	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2762 	vld1.32	{q12},[r3]!
2763 	vadd.i32	q13,q13,q9
2764 	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2765 	vmov	q2,q0
2766 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2767 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2768 	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2769 	vld1.32	{q13},[r3]!
2770 	vadd.i32	q12,q12,q10
2771 	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2772 	vmov	q2,q0
2773 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2774 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2775 	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2776 	vld1.32	{q12},[r3]!
2777 	vadd.i32	q13,q13,q11
2778 	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2779 	vmov	q2,q0
2780 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2781 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2782 	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2783 	vld1.32	{q13},[r3]!
2784 	vadd.i32	q12,q12,q8
2785 	vmov	q2,q0
2786 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2787 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2788 
2789 	vld1.32	{q12},[r3]!
2790 	vadd.i32	q13,q13,q9
2791 	vmov	q2,q0
2792 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2793 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2794 
2795 	vld1.32	{q13},[r3]
2796 	vadd.i32	q12,q12,q10
2797 	sub	r3,r3,#256-16	@ rewind
2798 	vmov	q2,q0
2799 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2800 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2801 
2802 	vadd.i32	q13,q13,q11
2803 	vmov	q2,q0
2804 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2805 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2806 
2807 	vadd.i32	q0,q0,q14
2808 	vadd.i32	q1,q1,q15
2809 	it	ne
2810 	bne	.Loop_v8
2811 
2812 	vst1.32	{q0,q1},[r0]
2813 
2814 	bx	lr		@ bx lr
2815 .size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2816 #endif
2817 .byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2818 .align	2
2819 .align	2
2820 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2821 .comm	OPENSSL_armcap_P,4,4
2822 #endif
2823