1 #include "arm_arch.h"
2 
3 #if __ARM_MAX_ARCH__>=7
4 .arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
5 .fpu	neon
6 #ifdef	__thumb2__
7 .syntax	unified
8 .thumb
9 # define INST(a,b,c,d)	.byte	c,d|0xc,a,b
10 #else
11 .code	32
12 # define INST(a,b,c,d)	.byte	a,b,c,d
13 #endif
14 
15 .text
16 .align	5
17 .Lrcon:
18 .long	0x01,0x01,0x01,0x01
19 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
20 .long	0x1b,0x1b,0x1b,0x1b
21 
22 .globl	aes_v8_set_encrypt_key
23 .type	aes_v8_set_encrypt_key,%function
24 .align	5
25 aes_v8_set_encrypt_key:
26 .Lenc_key:
27 	mov	r3,#-1
28 	cmp	r0,#0
29 	beq	.Lenc_key_abort
30 	cmp	r2,#0
31 	beq	.Lenc_key_abort
32 	mov	r3,#-2
33 	cmp	r1,#128
34 	blt	.Lenc_key_abort
35 	cmp	r1,#256
36 	bgt	.Lenc_key_abort
37 	tst	r1,#0x3f
38 	bne	.Lenc_key_abort
39 
40 	adr	r3,.Lrcon
41 	cmp	r1,#192
42 
43 	veor	q0,q0,q0
44 	vld1.8	{q3},[r0]!
45 	mov	r1,#8		@ reuse r1
46 	vld1.32	{q1,q2},[r3]!
47 
48 	blt	.Loop128
49 	beq	.L192
50 	b	.L256
51 
52 .align	4
53 .Loop128:
54 	vtbl.8	d20,{q3},d4
55 	vtbl.8	d21,{q3},d5
56 	vext.8	q9,q0,q3,#12
57 	vst1.32	{q3},[r2]!
58 	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
59 	subs	r1,r1,#1
60 
61 	veor	q3,q3,q9
62 	vext.8	q9,q0,q9,#12
63 	veor	q3,q3,q9
64 	vext.8	q9,q0,q9,#12
65 	veor	q10,q10,q1
66 	veor	q3,q3,q9
67 	vshl.u8	q1,q1,#1
68 	veor	q3,q3,q10
69 	bne	.Loop128
70 
71 	vld1.32	{q1},[r3]
72 
73 	vtbl.8	d20,{q3},d4
74 	vtbl.8	d21,{q3},d5
75 	vext.8	q9,q0,q3,#12
76 	vst1.32	{q3},[r2]!
77 	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
78 
79 	veor	q3,q3,q9
80 	vext.8	q9,q0,q9,#12
81 	veor	q3,q3,q9
82 	vext.8	q9,q0,q9,#12
83 	veor	q10,q10,q1
84 	veor	q3,q3,q9
85 	vshl.u8	q1,q1,#1
86 	veor	q3,q3,q10
87 
88 	vtbl.8	d20,{q3},d4
89 	vtbl.8	d21,{q3},d5
90 	vext.8	q9,q0,q3,#12
91 	vst1.32	{q3},[r2]!
92 	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
93 
94 	veor	q3,q3,q9
95 	vext.8	q9,q0,q9,#12
96 	veor	q3,q3,q9
97 	vext.8	q9,q0,q9,#12
98 	veor	q10,q10,q1
99 	veor	q3,q3,q9
100 	veor	q3,q3,q10
101 	vst1.32	{q3},[r2]
102 	add	r2,r2,#0x50
103 
104 	mov	r12,#10
105 	b	.Ldone
106 
107 .align	4
108 .L192:
109 	vld1.8	{d16},[r0]!
110 	vmov.i8	q10,#8			@ borrow q10
111 	vst1.32	{q3},[r2]!
112 	vsub.i8	q2,q2,q10	@ adjust the mask
113 
114 .Loop192:
115 	vtbl.8	d20,{q8},d4
116 	vtbl.8	d21,{q8},d5
117 	vext.8	q9,q0,q3,#12
118 #ifdef __ARMEB__
119 	vst1.32	{q8},[r2]!
120 	sub	r2,r2,#8
121 #else
122 	vst1.32	{d16},[r2]!
123 #endif
124 	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
125 	subs	r1,r1,#1
126 
127 	veor	q3,q3,q9
128 	vext.8	q9,q0,q9,#12
129 	veor	q3,q3,q9
130 	vext.8	q9,q0,q9,#12
131 	veor	q3,q3,q9
132 
133 	vdup.32	q9,d7[1]
134 	veor	q9,q9,q8
135 	veor	q10,q10,q1
136 	vext.8	q8,q0,q8,#12
137 	vshl.u8	q1,q1,#1
138 	veor	q8,q8,q9
139 	veor	q3,q3,q10
140 	veor	q8,q8,q10
141 	vst1.32	{q3},[r2]!
142 	bne	.Loop192
143 
144 	mov	r12,#12
145 	add	r2,r2,#0x20
146 	b	.Ldone
147 
148 .align	4
149 .L256:
150 	vld1.8	{q8},[r0]
151 	mov	r1,#7
152 	mov	r12,#14
153 	vst1.32	{q3},[r2]!
154 
155 .Loop256:
156 	vtbl.8	d20,{q8},d4
157 	vtbl.8	d21,{q8},d5
158 	vext.8	q9,q0,q3,#12
159 	vst1.32	{q8},[r2]!
160 	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
161 	subs	r1,r1,#1
162 
163 	veor	q3,q3,q9
164 	vext.8	q9,q0,q9,#12
165 	veor	q3,q3,q9
166 	vext.8	q9,q0,q9,#12
167 	veor	q10,q10,q1
168 	veor	q3,q3,q9
169 	vshl.u8	q1,q1,#1
170 	veor	q3,q3,q10
171 	vst1.32	{q3},[r2]!
172 	beq	.Ldone
173 
174 	vdup.32	q10,d7[1]
175 	vext.8	q9,q0,q8,#12
176 	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
177 
178 	veor	q8,q8,q9
179 	vext.8	q9,q0,q9,#12
180 	veor	q8,q8,q9
181 	vext.8	q9,q0,q9,#12
182 	veor	q8,q8,q9
183 
184 	veor	q8,q8,q10
185 	b	.Loop256
186 
187 .Ldone:
188 	str	r12,[r2]
189 	mov	r3,#0
190 
191 .Lenc_key_abort:
192 	mov	r0,r3			@ return value
193 
194 	bx	lr
195 .size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
196 
197 .globl	aes_v8_set_decrypt_key
198 .type	aes_v8_set_decrypt_key,%function
199 .align	5
200 aes_v8_set_decrypt_key:
201 	stmdb	sp!,{r4,lr}
202 	bl	.Lenc_key
203 
204 	cmp	r0,#0
205 	bne	.Ldec_key_abort
206 
207 	sub	r2,r2,#240		@ restore original r2
208 	mov	r4,#-16
209 	add	r0,r2,r12,lsl#4	@ end of key schedule
210 
211 	vld1.32	{q0},[r2]
212 	vld1.32	{q1},[r0]
213 	vst1.32	{q0},[r0],r4
214 	vst1.32	{q1},[r2]!
215 
216 .Loop_imc:
217 	vld1.32	{q0},[r2]
218 	vld1.32	{q1},[r0]
219 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
220 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
221 	vst1.32	{q0},[r0],r4
222 	vst1.32	{q1},[r2]!
223 	cmp	r0,r2
224 	bhi	.Loop_imc
225 
226 	vld1.32	{q0},[r2]
227 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
228 	vst1.32	{q0},[r0]
229 
230 	eor	r0,r0,r0		@ return value
231 .Ldec_key_abort:
232 	ldmia	sp!,{r4,pc}
233 .size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
234 .globl	aes_v8_encrypt
235 .type	aes_v8_encrypt,%function
236 .align	5
237 aes_v8_encrypt:
238 	ldr	r3,[r2,#240]
239 	vld1.32	{q0},[r2]!
240 	vld1.8	{q2},[r0]
241 	sub	r3,r3,#2
242 	vld1.32	{q1},[r2]!
243 
244 .Loop_enc:
245 	INST(0x00,0x43,0xb0,0xf3)	@ aese q2,q0
246 	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
247 	vld1.32	{q0},[r2]!
248 	subs	r3,r3,#2
249 	INST(0x02,0x43,0xb0,0xf3)	@ aese q2,q1
250 	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
251 	vld1.32	{q1},[r2]!
252 	bgt	.Loop_enc
253 
254 	INST(0x00,0x43,0xb0,0xf3)	@ aese q2,q0
255 	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
256 	vld1.32	{q0},[r2]
257 	INST(0x02,0x43,0xb0,0xf3)	@ aese q2,q1
258 	veor	q2,q2,q0
259 
260 	vst1.8	{q2},[r1]
261 	bx	lr
262 .size	aes_v8_encrypt,.-aes_v8_encrypt
263 .globl	aes_v8_decrypt
264 .type	aes_v8_decrypt,%function
265 .align	5
266 aes_v8_decrypt:
267 	ldr	r3,[r2,#240]
268 	vld1.32	{q0},[r2]!
269 	vld1.8	{q2},[r0]
270 	sub	r3,r3,#2
271 	vld1.32	{q1},[r2]!
272 
273 .Loop_dec:
274 	INST(0x40,0x43,0xb0,0xf3)	@ aesd q2,q0
275 	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
276 	vld1.32	{q0},[r2]!
277 	subs	r3,r3,#2
278 	INST(0x42,0x43,0xb0,0xf3)	@ aesd q2,q1
279 	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
280 	vld1.32	{q1},[r2]!
281 	bgt	.Loop_dec
282 
283 	INST(0x40,0x43,0xb0,0xf3)	@ aesd q2,q0
284 	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
285 	vld1.32	{q0},[r2]
286 	INST(0x42,0x43,0xb0,0xf3)	@ aesd q2,q1
287 	veor	q2,q2,q0
288 
289 	vst1.8	{q2},[r1]
290 	bx	lr
291 .size	aes_v8_decrypt,.-aes_v8_decrypt
292 .globl	aes_v8_ecb_encrypt
293 .type	aes_v8_ecb_encrypt,%function
294 .align	5
295 aes_v8_ecb_encrypt:
296 	mov	ip,sp
297 	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
298 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}			@ ABI specification says so
299 	ldmia	ip,{r4,r5}			@ load remaining args
300 	subs	r2,r2,#16
301 	mov	r8,#16
302 	blo	.Lecb_done
303 	it	eq
304 	moveq	r8,#0
305 
306 	cmp	r4,#0					@ en- or decrypting?
307 	ldr	r5,[r3,#240]
308 	and	r2,r2,#-16
309 	vld1.8	{q0},[r0],r8
310 
311 	vld1.32	{q8,q9},[r3]				@ load key schedule...
312 	sub	r5,r5,#6
313 	add	r7,r3,r5,lsl#4				@ pointer to last 7 round keys
314 	sub	r5,r5,#2
315 	vld1.32	{q10,q11},[r7]!
316 	vld1.32	{q12,q13},[r7]!
317 	vld1.32	{q14,q15},[r7]!
318 	vld1.32	{q7},[r7]
319 
320 	add	r7,r3,#32
321 	mov	r6,r5
322 	beq	.Lecb_dec
323 
324 	vld1.8	{q1},[r0]!
325 	subs	r2,r2,#32				@ bias
326 	add	r6,r5,#2
327 	vorr	q3,q1,q1
328 	vorr	q10,q1,q1
329 	vorr	q1,q0,q0
330 	blo	.Lecb_enc_tail
331 
332 	vorr	q1,q3,q3
333 	vld1.8	{q10},[r0]!
334 .Loop3x_ecb_enc:
335 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
336 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
337 	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
338 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
339 	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
340 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
341 	vld1.32	{q8},[r7]!
342 	subs	r6,r6,#2
343 	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
344 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
345 	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
346 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
347 	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
348 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
349 	vld1.32	{q9},[r7]!
350 	bgt	.Loop3x_ecb_enc
351 
352 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
353 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
354 	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
355 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
356 	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
357 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
358 	subs	r2,r2,#0x30
359 	it	lo
360 	movlo	r6,r2				@ r6, r6, is zero at this point
361 	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
362 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
363 	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
364 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
365 	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
366 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
367 	add	r0,r0,r6			@ r0 is adjusted in such way that
368 						@ at exit from the loop q1-q10
369 						@ are loaded with last "words"
370 	mov	r7,r3
371 	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
372 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
373 	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
374 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
375 	INST(0x28,0x43,0xf0,0xf3)	@ aese q10,q12
376 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
377 	vld1.8	{q2},[r0]!
378 	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
379 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
380 	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
381 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
382 	INST(0x2a,0x43,0xf0,0xf3)	@ aese q10,q13
383 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
384 	vld1.8	{q3},[r0]!
385 	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
386 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
387 	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
388 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
389 	INST(0x2c,0x43,0xf0,0xf3)	@ aese q10,q14
390 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
391 	vld1.8	{q11},[r0]!
392 	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
393 	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
394 	INST(0x2e,0x43,0xf0,0xf3)	@ aese q10,q15
395 	vld1.32	{q8},[r7]!		@ re-pre-load rndkey[0]
396 	add	r6,r5,#2
397 	veor	q4,q7,q0
398 	veor	q5,q7,q1
399 	veor	q10,q10,q7
400 	vld1.32	{q9},[r7]!		@ re-pre-load rndkey[1]
401 	vst1.8	{q4},[r1]!
402 	vorr	q0,q2,q2
403 	vst1.8	{q5},[r1]!
404 	vorr	q1,q3,q3
405 	vst1.8	{q10},[r1]!
406 	vorr	q10,q11,q11
407 	bhs	.Loop3x_ecb_enc
408 
409 	cmn	r2,#0x30
410 	beq	.Lecb_done
411 	nop
412 
413 .Lecb_enc_tail:
414 	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
415 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
416 	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
417 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
418 	vld1.32	{q8},[r7]!
419 	subs	r6,r6,#2
420 	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
421 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
422 	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
423 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
424 	vld1.32	{q9},[r7]!
425 	bgt	.Lecb_enc_tail
426 
427 	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
428 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
429 	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
430 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
431 	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
432 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
433 	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
434 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
435 	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
436 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
437 	INST(0x28,0x43,0xf0,0xf3)	@ aese q10,q12
438 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
439 	cmn	r2,#0x20
440 	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
441 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
442 	INST(0x2a,0x43,0xf0,0xf3)	@ aese q10,q13
443 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
444 	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
445 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
446 	INST(0x2c,0x43,0xf0,0xf3)	@ aese q10,q14
447 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
448 	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
449 	INST(0x2e,0x43,0xf0,0xf3)	@ aese q10,q15
450 	beq	.Lecb_enc_one
451 	veor	q5,q7,q1
452 	veor	q9,q7,q10
453 	vst1.8	{q5},[r1]!
454 	vst1.8	{q9},[r1]!
455 	b	.Lecb_done
456 
457 .Lecb_enc_one:
458 	veor	q5,q7,q10
459 	vst1.8	{q5},[r1]!
460 	b	.Lecb_done
461 .align	5
462 .Lecb_dec:
463 	vld1.8	{q1},[r0]!
464 	subs	r2,r2,#32			@ bias
465 	add	r6,r5,#2
466 	vorr	q3,q1,q1
467 	vorr	q10,q1,q1
468 	vorr	q1,q0,q0
469 	blo	.Lecb_dec_tail
470 
471 	vorr	q1,q3,q3
472 	vld1.8	{q10},[r0]!
473 .Loop3x_ecb_dec:
474 	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
475 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
476 	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
477 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
478 	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
479 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
480 	vld1.32	{q8},[r7]!
481 	subs	r6,r6,#2
482 	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
483 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
484 	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
485 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
486 	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
487 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
488 	vld1.32	{q9},[r7]!
489 	bgt	.Loop3x_ecb_dec
490 
491 	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
492 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
493 	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
494 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
495 	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
496 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
497 	subs	r2,r2,#0x30
498 	it	lo
499 	movlo	r6,r2				@ r6, r6, is zero at this point
500 	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
501 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
502 	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
503 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
504 	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
505 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
506 	add	r0,r0,r6 			@ r0 is adjusted in such way that
507 						@ at exit from the loop q1-q10
508 						@ are loaded with last "words"
509 	mov	r7,r3
510 	INST(0x68,0x03,0xb0,0xf3)	@ aesd q0,q12
511 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
512 	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
513 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
514 	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
515 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
516 	vld1.8	{q2},[r0]!
517 	INST(0x6a,0x03,0xb0,0xf3)	@ aesd q0,q13
518 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
519 	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
520 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
521 	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
522 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
523 	vld1.8	{q3},[r0]!
524 	INST(0x6c,0x03,0xb0,0xf3)	@ aesd q0,q14
525 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
526 	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
527 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
528 	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
529 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
530 	vld1.8	{q11},[r0]!
531 	INST(0x6e,0x03,0xb0,0xf3)	@ aesd q0,q15
532 	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
533 	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
534 	vld1.32	{q8},[r7]!			@ re-pre-load rndkey[0]
535 	add	r6,r5,#2
536 	veor	q4,q7,q0
537 	veor	q5,q7,q1
538 	veor	q10,q10,q7
539 	vld1.32	{q9},[r7]!			@ re-pre-load rndkey[1]
540 	vst1.8	{q4},[r1]!
541 	vorr	q0,q2,q2
542 	vst1.8	{q5},[r1]!
543 	vorr	q1,q3,q3
544 	vst1.8	{q10},[r1]!
545 	vorr	q10,q11,q11
546 	bhs	.Loop3x_ecb_dec
547 
548 	cmn	r2,#0x30
549 	beq	.Lecb_done
550 	nop
551 
552 .Lecb_dec_tail:
553 	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
554 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
555 	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
556 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
557 	vld1.32	{q8},[r7]!
558 	subs	r6,r6,#2
559 	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
560 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
561 	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
562 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
563 	vld1.32	{q9},[r7]!
564 	bgt	.Lecb_dec_tail
565 
566 	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
567 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
568 	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
569 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
570 	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
571 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
572 	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
573 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
574 	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
575 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
576 	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
577 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
578 	cmn	r2,#0x20
579 	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
580 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
581 	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
582 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
583 	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
584 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
585 	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
586 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
587 	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
588 	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
589 	beq	.Lecb_dec_one
590 	veor	q5,q7,q1
591 	veor	q9,q7,q10
592 	vst1.8	{q5},[r1]!
593 	vst1.8	{q9},[r1]!
594 	b	.Lecb_done
595 
596 .Lecb_dec_one:
597 	veor	q5,q7,q10
598 	vst1.8	{q5},[r1]!
599 
600 .Lecb_done:
601 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
602 	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
603 .size	aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt
604 .globl	aes_v8_cbc_encrypt
605 .type	aes_v8_cbc_encrypt,%function
606 .align	5
607 aes_v8_cbc_encrypt:
608 	mov	ip,sp
609 	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
610 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
611 	ldmia	ip,{r4,r5}		@ load remaining args
612 	subs	r2,r2,#16
613 	mov	r8,#16
614 	blo	.Lcbc_abort
615 	it	eq
616 	moveq	r8,#0
617 
618 	cmp	r5,#0			@ en- or decrypting?
619 	ldr	r5,[r3,#240]
620 	and	r2,r2,#-16
621 	vld1.8	{q6},[r4]
622 	vld1.8	{q0},[r0],r8
623 
624 	vld1.32	{q8,q9},[r3]		@ load key schedule...
625 	sub	r5,r5,#6
626 	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
627 	sub	r5,r5,#2
628 	vld1.32	{q10,q11},[r7]!
629 	vld1.32	{q12,q13},[r7]!
630 	vld1.32	{q14,q15},[r7]!
631 	vld1.32	{q7},[r7]
632 
633 	add	r7,r3,#32
634 	mov	r6,r5
635 	beq	.Lcbc_dec
636 
637 	cmp	r5,#2
638 	veor	q0,q0,q6
639 	veor	q5,q8,q7
640 	beq	.Lcbc_enc128
641 
642 	vld1.32	{q2,q3},[r7]
643 	add	r7,r3,#16
644 	add	r6,r3,#16*4
645 	add	r12,r3,#16*5
646 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
647 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
648 	add	r14,r3,#16*6
649 	add	r3,r3,#16*7
650 	b	.Lenter_cbc_enc
651 
652 .align	4
653 .Loop_cbc_enc:
654 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
655 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
656 	vst1.8	{q6},[r1]!
657 .Lenter_cbc_enc:
658 	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
659 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
660 	INST(0x04,0x03,0xb0,0xf3)	@ aese q0,q2
661 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
662 	vld1.32	{q8},[r6]
663 	cmp	r5,#4
664 	INST(0x06,0x03,0xb0,0xf3)	@ aese q0,q3
665 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
666 	vld1.32	{q9},[r12]
667 	beq	.Lcbc_enc192
668 
669 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
670 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
671 	vld1.32	{q8},[r14]
672 	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
673 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
674 	vld1.32	{q9},[r3]
675 	nop
676 
677 .Lcbc_enc192:
678 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
679 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
680 	subs	r2,r2,#16
681 	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
682 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
683 	it	eq
684 	moveq	r8,#0
685 	INST(0x24,0x03,0xb0,0xf3)	@ aese q0,q10
686 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
687 	INST(0x26,0x03,0xb0,0xf3)	@ aese q0,q11
688 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
689 	vld1.8	{q8},[r0],r8
690 	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
691 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
692 	veor	q8,q8,q5
693 	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
694 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
695 	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
696 	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
697 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
698 	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
699 	veor	q6,q0,q7
700 	bhs	.Loop_cbc_enc
701 
702 	vst1.8	{q6},[r1]!
703 	b	.Lcbc_done
704 
705 .align	5
706 .Lcbc_enc128:
707 	vld1.32	{q2,q3},[r7]
708 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
709 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
710 	b	.Lenter_cbc_enc128
711 .Loop_cbc_enc128:
712 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
713 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
714 	vst1.8	{q6},[r1]!
715 .Lenter_cbc_enc128:
716 	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
717 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
718 	subs	r2,r2,#16
719 	INST(0x04,0x03,0xb0,0xf3)	@ aese q0,q2
720 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
721 	it	eq
722 	moveq	r8,#0
723 	INST(0x06,0x03,0xb0,0xf3)	@ aese q0,q3
724 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
725 	INST(0x24,0x03,0xb0,0xf3)	@ aese q0,q10
726 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
727 	INST(0x26,0x03,0xb0,0xf3)	@ aese q0,q11
728 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
729 	vld1.8	{q8},[r0],r8
730 	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
731 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
732 	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
733 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
734 	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
735 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
736 	veor	q8,q8,q5
737 	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
738 	veor	q6,q0,q7
739 	bhs	.Loop_cbc_enc128
740 
741 	vst1.8	{q6},[r1]!
742 	b	.Lcbc_done
743 .align	5
744 .Lcbc_dec:
745 	vld1.8	{q10},[r0]!
746 	subs	r2,r2,#32		@ bias
747 	add	r6,r5,#2
748 	vorr	q3,q0,q0
749 	vorr	q1,q0,q0
750 	vorr	q11,q10,q10
751 	blo	.Lcbc_dec_tail
752 
753 	vorr	q1,q10,q10
754 	vld1.8	{q10},[r0]!
755 	vorr	q2,q0,q0
756 	vorr	q3,q1,q1
757 	vorr	q11,q10,q10
758 .Loop3x_cbc_dec:
759 	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
760 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
761 	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
762 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
763 	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
764 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
765 	vld1.32	{q8},[r7]!
766 	subs	r6,r6,#2
767 	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
768 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
769 	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
770 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
771 	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
772 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
773 	vld1.32	{q9},[r7]!
774 	bgt	.Loop3x_cbc_dec
775 
776 	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
777 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
778 	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
779 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
780 	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
781 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
782 	veor	q4,q6,q7
783 	subs	r2,r2,#0x30
784 	veor	q5,q2,q7
785 	it	lo
786 	movlo	r6,r2			@ r6, r6, is zero at this point
787 	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
788 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
789 	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
790 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
791 	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
792 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
793 	veor	q9,q3,q7
794 	add	r0,r0,r6		@ r0 is adjusted in such way that
795 					@ at exit from the loop q1-q10
796 					@ are loaded with last "words"
797 	vorr	q6,q11,q11
798 	mov	r7,r3
799 	INST(0x68,0x03,0xb0,0xf3)	@ aesd q0,q12
800 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
801 	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
802 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
803 	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
804 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
805 	vld1.8	{q2},[r0]!
806 	INST(0x6a,0x03,0xb0,0xf3)	@ aesd q0,q13
807 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
808 	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
809 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
810 	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
811 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
812 	vld1.8	{q3},[r0]!
813 	INST(0x6c,0x03,0xb0,0xf3)	@ aesd q0,q14
814 	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
815 	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
816 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
817 	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
818 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
819 	vld1.8	{q11},[r0]!
820 	INST(0x6e,0x03,0xb0,0xf3)	@ aesd q0,q15
821 	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
822 	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
823 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
824 	add	r6,r5,#2
825 	veor	q4,q4,q0
826 	veor	q5,q5,q1
827 	veor	q10,q10,q9
828 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
829 	vst1.8	{q4},[r1]!
830 	vorr	q0,q2,q2
831 	vst1.8	{q5},[r1]!
832 	vorr	q1,q3,q3
833 	vst1.8	{q10},[r1]!
834 	vorr	q10,q11,q11
835 	bhs	.Loop3x_cbc_dec
836 
837 	cmn	r2,#0x30
838 	beq	.Lcbc_done
839 	nop
840 
841 .Lcbc_dec_tail:
842 	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
843 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
844 	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
845 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
846 	vld1.32	{q8},[r7]!
847 	subs	r6,r6,#2
848 	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
849 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
850 	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
851 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
852 	vld1.32	{q9},[r7]!
853 	bgt	.Lcbc_dec_tail
854 
855 	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
856 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
857 	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
858 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
859 	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
860 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
861 	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
862 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
863 	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
864 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
865 	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
866 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
867 	cmn	r2,#0x20
868 	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
869 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
870 	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
871 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
872 	veor	q5,q6,q7
873 	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
874 	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
875 	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
876 	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
877 	veor	q9,q3,q7
878 	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
879 	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
880 	beq	.Lcbc_dec_one
881 	veor	q5,q5,q1
882 	veor	q9,q9,q10
883 	vorr	q6,q11,q11
884 	vst1.8	{q5},[r1]!
885 	vst1.8	{q9},[r1]!
886 	b	.Lcbc_done
887 
888 .Lcbc_dec_one:
889 	veor	q5,q5,q10
890 	vorr	q6,q11,q11
891 	vst1.8	{q5},[r1]!
892 
893 .Lcbc_done:
894 	vst1.8	{q6},[r4]
895 .Lcbc_abort:
896 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
897 	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
898 .size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
899 .globl	aes_v8_ctr32_encrypt_blocks
900 .type	aes_v8_ctr32_encrypt_blocks,%function
901 .align	5
902 aes_v8_ctr32_encrypt_blocks:
903 	mov	ip,sp
904 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
905 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
906 	ldr	r4, [ip]		@ load remaining arg
907 	ldr	r5,[r3,#240]
908 
909 	ldr	r8, [r4, #12]
910 #ifdef __ARMEB__
911 	vld1.8	{q0},[r4]
912 #else
913 	vld1.32	{q0},[r4]
914 #endif
915 	vld1.32	{q8,q9},[r3]		@ load key schedule...
916 	sub	r5,r5,#4
917 	mov	r12,#16
918 	cmp	r2,#2
919 	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
920 	sub	r5,r5,#2
921 	vld1.32	{q12,q13},[r7]!
922 	vld1.32	{q14,q15},[r7]!
923 	vld1.32	{q7},[r7]
924 	add	r7,r3,#32
925 	mov	r6,r5
926 	it	lo
927 	movlo	r12,#0
928 #ifndef __ARMEB__
929 	rev	r8, r8
930 #endif
931 	add	r10, r8, #1
932 	vorr	q6,q0,q0
933 	rev	r10, r10
934 	vmov.32	d13[1],r10
935 	add	r8, r8, #2
936 	vorr	q1,q6,q6
937 	bls	.Lctr32_tail
938 	rev	r12, r8
939 	vmov.32	d13[1],r12
940 	sub	r2,r2,#3		@ bias
941 	vorr	q10,q6,q6
942 	b	.Loop3x_ctr32
943 
944 .align	4
945 .Loop3x_ctr32:
946 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
947 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
948 	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
949 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
950 	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
951 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
952 	vld1.32	{q8},[r7]!
953 	subs	r6,r6,#2
954 	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
955 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
956 	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
957 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
958 	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
959 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
960 	vld1.32	{q9},[r7]!
961 	bgt	.Loop3x_ctr32
962 
963 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
964 	INST(0x80,0x83,0xb0,0xf3)	@ aesmc q4,q0
965 	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
966 	INST(0x82,0xa3,0xb0,0xf3)	@ aesmc q5,q1
967 	vld1.8	{q2},[r0]!
968 	add	r9,r8,#1
969 	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
970 	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
971 	vld1.8	{q3},[r0]!
972 	rev	r9,r9
973 	INST(0x22,0x83,0xb0,0xf3)	@ aese q4,q9
974 	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
975 	INST(0x22,0xa3,0xb0,0xf3)	@ aese q5,q9
976 	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
977 	vld1.8	{q11},[r0]!
978 	mov	r7,r3
979 	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
980 	INST(0xa4,0x23,0xf0,0xf3)	@ aesmc q9,q10
981 	INST(0x28,0x83,0xb0,0xf3)	@ aese q4,q12
982 	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
983 	INST(0x28,0xa3,0xb0,0xf3)	@ aese q5,q12
984 	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
985 	veor	q2,q2,q7
986 	add	r10,r8,#2
987 	INST(0x28,0x23,0xf0,0xf3)	@ aese q9,q12
988 	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
989 	veor	q3,q3,q7
990 	add	r8,r8,#3
991 	INST(0x2a,0x83,0xb0,0xf3)	@ aese q4,q13
992 	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
993 	INST(0x2a,0xa3,0xb0,0xf3)	@ aese q5,q13
994 	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
995 	veor	q11,q11,q7
996 	vmov.32	d13[1], r9
997 	INST(0x2a,0x23,0xf0,0xf3)	@ aese q9,q13
998 	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
999 	vorr	q0,q6,q6
1000 	rev	r10,r10
1001 	INST(0x2c,0x83,0xb0,0xf3)	@ aese q4,q14
1002 	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
1003 	vmov.32	d13[1], r10
1004 	rev	r12,r8
1005 	INST(0x2c,0xa3,0xb0,0xf3)	@ aese q5,q14
1006 	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
1007 	vorr	q1,q6,q6
1008 	vmov.32	d13[1], r12
1009 	INST(0x2c,0x23,0xf0,0xf3)	@ aese q9,q14
1010 	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
1011 	vorr	q10,q6,q6
1012 	subs	r2,r2,#3
1013 	INST(0x2e,0x83,0xb0,0xf3)	@ aese q4,q15
1014 	INST(0x2e,0xa3,0xb0,0xf3)	@ aese q5,q15
1015 	INST(0x2e,0x23,0xf0,0xf3)	@ aese q9,q15
1016 
1017 	veor	q2,q2,q4
1018 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
1019 	vst1.8	{q2},[r1]!
1020 	veor	q3,q3,q5
1021 	mov	r6,r5
1022 	vst1.8	{q3},[r1]!
1023 	veor	q11,q11,q9
1024 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
1025 	vst1.8	{q11},[r1]!
1026 	bhs	.Loop3x_ctr32
1027 
1028 	adds	r2,r2,#3
1029 	beq	.Lctr32_done
1030 	cmp	r2,#1
1031 	mov	r12,#16
1032 	it	eq
1033 	moveq	r12,#0
1034 
1035 .Lctr32_tail:
1036 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
1037 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1038 	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
1039 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1040 	vld1.32	{q8},[r7]!
1041 	subs	r6,r6,#2
1042 	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
1043 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1044 	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
1045 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1046 	vld1.32	{q9},[r7]!
1047 	bgt	.Lctr32_tail
1048 
1049 	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
1050 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1051 	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
1052 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1053 	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
1054 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1055 	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
1056 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1057 	vld1.8	{q2},[r0],r12
1058 	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
1059 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1060 	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
1061 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1062 	vld1.8	{q3},[r0]
1063 	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
1064 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1065 	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
1066 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1067 	veor	q2,q2,q7
1068 	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
1069 	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1070 	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
1071 	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1072 	veor	q3,q3,q7
1073 	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
1074 	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
1075 
1076 	cmp	r2,#1
1077 	veor	q2,q2,q0
1078 	veor	q3,q3,q1
1079 	vst1.8	{q2},[r1]!
1080 	beq	.Lctr32_done
1081 	vst1.8	{q3},[r1]
1082 
1083 .Lctr32_done:
1084 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
1085 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
1086 .size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
1087 #endif
1088