162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/linkage.h>
962306a36Sopenharmony_ci#include <asm/assembler.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci	.text
1262306a36Sopenharmony_ci	.arch		armv8-a
1362306a36Sopenharmony_ci	.fpu		crypto-neon-fp-armv8
1462306a36Sopenharmony_ci	.align		3
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci	.macro		enc_round, state, key
1762306a36Sopenharmony_ci	aese.8		\state, \key
1862306a36Sopenharmony_ci	aesmc.8		\state, \state
1962306a36Sopenharmony_ci	.endm
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci	.macro		dec_round, state, key
2262306a36Sopenharmony_ci	aesd.8		\state, \key
2362306a36Sopenharmony_ci	aesimc.8	\state, \state
2462306a36Sopenharmony_ci	.endm
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	.macro		enc_dround, key1, key2
2762306a36Sopenharmony_ci	enc_round	q0, \key1
2862306a36Sopenharmony_ci	enc_round	q0, \key2
2962306a36Sopenharmony_ci	.endm
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	.macro		dec_dround, key1, key2
3262306a36Sopenharmony_ci	dec_round	q0, \key1
3362306a36Sopenharmony_ci	dec_round	q0, \key2
3462306a36Sopenharmony_ci	.endm
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	.macro		enc_fround, key1, key2, key3
3762306a36Sopenharmony_ci	enc_round	q0, \key1
3862306a36Sopenharmony_ci	aese.8		q0, \key2
3962306a36Sopenharmony_ci	veor		q0, q0, \key3
4062306a36Sopenharmony_ci	.endm
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	.macro		dec_fround, key1, key2, key3
4362306a36Sopenharmony_ci	dec_round	q0, \key1
4462306a36Sopenharmony_ci	aesd.8		q0, \key2
4562306a36Sopenharmony_ci	veor		q0, q0, \key3
4662306a36Sopenharmony_ci	.endm
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	.macro		enc_dround_4x, key1, key2
4962306a36Sopenharmony_ci	enc_round	q0, \key1
5062306a36Sopenharmony_ci	enc_round	q1, \key1
5162306a36Sopenharmony_ci	enc_round	q2, \key1
5262306a36Sopenharmony_ci	enc_round	q3, \key1
5362306a36Sopenharmony_ci	enc_round	q0, \key2
5462306a36Sopenharmony_ci	enc_round	q1, \key2
5562306a36Sopenharmony_ci	enc_round	q2, \key2
5662306a36Sopenharmony_ci	enc_round	q3, \key2
5762306a36Sopenharmony_ci	.endm
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	.macro		dec_dround_4x, key1, key2
6062306a36Sopenharmony_ci	dec_round	q0, \key1
6162306a36Sopenharmony_ci	dec_round	q1, \key1
6262306a36Sopenharmony_ci	dec_round	q2, \key1
6362306a36Sopenharmony_ci	dec_round	q3, \key1
6462306a36Sopenharmony_ci	dec_round	q0, \key2
6562306a36Sopenharmony_ci	dec_round	q1, \key2
6662306a36Sopenharmony_ci	dec_round	q2, \key2
6762306a36Sopenharmony_ci	dec_round	q3, \key2
6862306a36Sopenharmony_ci	.endm
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	.macro		enc_fround_4x, key1, key2, key3
7162306a36Sopenharmony_ci	enc_round	q0, \key1
7262306a36Sopenharmony_ci	enc_round	q1, \key1
7362306a36Sopenharmony_ci	enc_round	q2, \key1
7462306a36Sopenharmony_ci	enc_round	q3, \key1
7562306a36Sopenharmony_ci	aese.8		q0, \key2
7662306a36Sopenharmony_ci	aese.8		q1, \key2
7762306a36Sopenharmony_ci	aese.8		q2, \key2
7862306a36Sopenharmony_ci	aese.8		q3, \key2
7962306a36Sopenharmony_ci	veor		q0, q0, \key3
8062306a36Sopenharmony_ci	veor		q1, q1, \key3
8162306a36Sopenharmony_ci	veor		q2, q2, \key3
8262306a36Sopenharmony_ci	veor		q3, q3, \key3
8362306a36Sopenharmony_ci	.endm
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	.macro		dec_fround_4x, key1, key2, key3
8662306a36Sopenharmony_ci	dec_round	q0, \key1
8762306a36Sopenharmony_ci	dec_round	q1, \key1
8862306a36Sopenharmony_ci	dec_round	q2, \key1
8962306a36Sopenharmony_ci	dec_round	q3, \key1
9062306a36Sopenharmony_ci	aesd.8		q0, \key2
9162306a36Sopenharmony_ci	aesd.8		q1, \key2
9262306a36Sopenharmony_ci	aesd.8		q2, \key2
9362306a36Sopenharmony_ci	aesd.8		q3, \key2
9462306a36Sopenharmony_ci	veor		q0, q0, \key3
9562306a36Sopenharmony_ci	veor		q1, q1, \key3
9662306a36Sopenharmony_ci	veor		q2, q2, \key3
9762306a36Sopenharmony_ci	veor		q3, q3, \key3
9862306a36Sopenharmony_ci	.endm
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	.macro		do_block, dround, fround
10162306a36Sopenharmony_ci	cmp		r3, #12			@ which key size?
10262306a36Sopenharmony_ci	vld1.32		{q10-q11}, [ip]!
10362306a36Sopenharmony_ci	\dround		q8, q9
10462306a36Sopenharmony_ci	vld1.32		{q12-q13}, [ip]!
10562306a36Sopenharmony_ci	\dround		q10, q11
10662306a36Sopenharmony_ci	vld1.32		{q10-q11}, [ip]!
10762306a36Sopenharmony_ci	\dround		q12, q13
10862306a36Sopenharmony_ci	vld1.32		{q12-q13}, [ip]!
10962306a36Sopenharmony_ci	\dround		q10, q11
11062306a36Sopenharmony_ci	blo		0f			@ AES-128: 10 rounds
11162306a36Sopenharmony_ci	vld1.32		{q10-q11}, [ip]!
11262306a36Sopenharmony_ci	\dround		q12, q13
11362306a36Sopenharmony_ci	beq		1f			@ AES-192: 12 rounds
11462306a36Sopenharmony_ci	vld1.32		{q12-q13}, [ip]
11562306a36Sopenharmony_ci	\dround		q10, q11
11662306a36Sopenharmony_ci0:	\fround		q12, q13, q14
11762306a36Sopenharmony_ci	bx		lr
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci1:	\fround		q10, q11, q14
12062306a36Sopenharmony_ci	bx		lr
12162306a36Sopenharmony_ci	.endm
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	/*
12462306a36Sopenharmony_ci	 * Internal, non-AAPCS compliant functions that implement the core AES
12562306a36Sopenharmony_ci	 * transforms. These should preserve all registers except q0 - q2 and ip
12662306a36Sopenharmony_ci	 * Arguments:
12762306a36Sopenharmony_ci	 *   q0        : first in/output block
12862306a36Sopenharmony_ci	 *   q1        : second in/output block (_4x version only)
12962306a36Sopenharmony_ci	 *   q2        : third in/output block (_4x version only)
13062306a36Sopenharmony_ci	 *   q3        : fourth in/output block (_4x version only)
13162306a36Sopenharmony_ci	 *   q8        : first round key
13262306a36Sopenharmony_ci	 *   q9        : secound round key
13362306a36Sopenharmony_ci	 *   q14       : final round key
13462306a36Sopenharmony_ci	 *   r2        : address of round key array
13562306a36Sopenharmony_ci	 *   r3        : number of rounds
13662306a36Sopenharmony_ci	 */
13762306a36Sopenharmony_ci	.align		6
13862306a36Sopenharmony_ciaes_encrypt:
13962306a36Sopenharmony_ci	add		ip, r2, #32		@ 3rd round key
14062306a36Sopenharmony_ci.Laes_encrypt_tweak:
14162306a36Sopenharmony_ci	do_block	enc_dround, enc_fround
14262306a36Sopenharmony_ciENDPROC(aes_encrypt)
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	.align		6
14562306a36Sopenharmony_ciaes_decrypt:
14662306a36Sopenharmony_ci	add		ip, r2, #32		@ 3rd round key
14762306a36Sopenharmony_ci	do_block	dec_dround, dec_fround
14862306a36Sopenharmony_ciENDPROC(aes_decrypt)
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	.align		6
15162306a36Sopenharmony_ciaes_encrypt_4x:
15262306a36Sopenharmony_ci	add		ip, r2, #32		@ 3rd round key
15362306a36Sopenharmony_ci	do_block	enc_dround_4x, enc_fround_4x
15462306a36Sopenharmony_ciENDPROC(aes_encrypt_4x)
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	.align		6
15762306a36Sopenharmony_ciaes_decrypt_4x:
15862306a36Sopenharmony_ci	add		ip, r2, #32		@ 3rd round key
15962306a36Sopenharmony_ci	do_block	dec_dround_4x, dec_fround_4x
16062306a36Sopenharmony_ciENDPROC(aes_decrypt_4x)
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	.macro		prepare_key, rk, rounds
16362306a36Sopenharmony_ci	add		ip, \rk, \rounds, lsl #4
16462306a36Sopenharmony_ci	vld1.32		{q8-q9}, [\rk]		@ load first 2 round keys
16562306a36Sopenharmony_ci	vld1.32		{q14}, [ip]		@ load last round key
16662306a36Sopenharmony_ci	.endm
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	/*
16962306a36Sopenharmony_ci	 * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
17062306a36Sopenharmony_ci	 *		   int blocks)
17162306a36Sopenharmony_ci	 * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
17262306a36Sopenharmony_ci	 *		   int blocks)
17362306a36Sopenharmony_ci	 */
17462306a36Sopenharmony_ciENTRY(ce_aes_ecb_encrypt)
17562306a36Sopenharmony_ci	push		{r4, lr}
17662306a36Sopenharmony_ci	ldr		r4, [sp, #8]
17762306a36Sopenharmony_ci	prepare_key	r2, r3
17862306a36Sopenharmony_ci.Lecbencloop4x:
17962306a36Sopenharmony_ci	subs		r4, r4, #4
18062306a36Sopenharmony_ci	bmi		.Lecbenc1x
18162306a36Sopenharmony_ci	vld1.8		{q0-q1}, [r1]!
18262306a36Sopenharmony_ci	vld1.8		{q2-q3}, [r1]!
18362306a36Sopenharmony_ci	bl		aes_encrypt_4x
18462306a36Sopenharmony_ci	vst1.8		{q0-q1}, [r0]!
18562306a36Sopenharmony_ci	vst1.8		{q2-q3}, [r0]!
18662306a36Sopenharmony_ci	b		.Lecbencloop4x
18762306a36Sopenharmony_ci.Lecbenc1x:
18862306a36Sopenharmony_ci	adds		r4, r4, #4
18962306a36Sopenharmony_ci	beq		.Lecbencout
19062306a36Sopenharmony_ci.Lecbencloop:
19162306a36Sopenharmony_ci	vld1.8		{q0}, [r1]!
19262306a36Sopenharmony_ci	bl		aes_encrypt
19362306a36Sopenharmony_ci	vst1.8		{q0}, [r0]!
19462306a36Sopenharmony_ci	subs		r4, r4, #1
19562306a36Sopenharmony_ci	bne		.Lecbencloop
19662306a36Sopenharmony_ci.Lecbencout:
19762306a36Sopenharmony_ci	pop		{r4, pc}
19862306a36Sopenharmony_ciENDPROC(ce_aes_ecb_encrypt)
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ciENTRY(ce_aes_ecb_decrypt)
20162306a36Sopenharmony_ci	push		{r4, lr}
20262306a36Sopenharmony_ci	ldr		r4, [sp, #8]
20362306a36Sopenharmony_ci	prepare_key	r2, r3
20462306a36Sopenharmony_ci.Lecbdecloop4x:
20562306a36Sopenharmony_ci	subs		r4, r4, #4
20662306a36Sopenharmony_ci	bmi		.Lecbdec1x
20762306a36Sopenharmony_ci	vld1.8		{q0-q1}, [r1]!
20862306a36Sopenharmony_ci	vld1.8		{q2-q3}, [r1]!
20962306a36Sopenharmony_ci	bl		aes_decrypt_4x
21062306a36Sopenharmony_ci	vst1.8		{q0-q1}, [r0]!
21162306a36Sopenharmony_ci	vst1.8		{q2-q3}, [r0]!
21262306a36Sopenharmony_ci	b		.Lecbdecloop4x
21362306a36Sopenharmony_ci.Lecbdec1x:
21462306a36Sopenharmony_ci	adds		r4, r4, #4
21562306a36Sopenharmony_ci	beq		.Lecbdecout
21662306a36Sopenharmony_ci.Lecbdecloop:
21762306a36Sopenharmony_ci	vld1.8		{q0}, [r1]!
21862306a36Sopenharmony_ci	bl		aes_decrypt
21962306a36Sopenharmony_ci	vst1.8		{q0}, [r0]!
22062306a36Sopenharmony_ci	subs		r4, r4, #1
22162306a36Sopenharmony_ci	bne		.Lecbdecloop
22262306a36Sopenharmony_ci.Lecbdecout:
22362306a36Sopenharmony_ci	pop		{r4, pc}
22462306a36Sopenharmony_ciENDPROC(ce_aes_ecb_decrypt)
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	/*
22762306a36Sopenharmony_ci	 * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
22862306a36Sopenharmony_ci	 *		   int blocks, u8 iv[])
22962306a36Sopenharmony_ci	 * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
23062306a36Sopenharmony_ci	 *		   int blocks, u8 iv[])
23162306a36Sopenharmony_ci	 */
23262306a36Sopenharmony_ciENTRY(ce_aes_cbc_encrypt)
23362306a36Sopenharmony_ci	push		{r4-r6, lr}
23462306a36Sopenharmony_ci	ldrd		r4, r5, [sp, #16]
23562306a36Sopenharmony_ci	vld1.8		{q0}, [r5]
23662306a36Sopenharmony_ci	prepare_key	r2, r3
23762306a36Sopenharmony_ci.Lcbcencloop:
23862306a36Sopenharmony_ci	vld1.8		{q1}, [r1]!		@ get next pt block
23962306a36Sopenharmony_ci	veor		q0, q0, q1		@ ..and xor with iv
24062306a36Sopenharmony_ci	bl		aes_encrypt
24162306a36Sopenharmony_ci	vst1.8		{q0}, [r0]!
24262306a36Sopenharmony_ci	subs		r4, r4, #1
24362306a36Sopenharmony_ci	bne		.Lcbcencloop
24462306a36Sopenharmony_ci	vst1.8		{q0}, [r5]
24562306a36Sopenharmony_ci	pop		{r4-r6, pc}
24662306a36Sopenharmony_ciENDPROC(ce_aes_cbc_encrypt)
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ciENTRY(ce_aes_cbc_decrypt)
24962306a36Sopenharmony_ci	push		{r4-r6, lr}
25062306a36Sopenharmony_ci	ldrd		r4, r5, [sp, #16]
25162306a36Sopenharmony_ci	vld1.8		{q15}, [r5]		@ keep iv in q15
25262306a36Sopenharmony_ci	prepare_key	r2, r3
25362306a36Sopenharmony_ci.Lcbcdecloop4x:
25462306a36Sopenharmony_ci	subs		r4, r4, #4
25562306a36Sopenharmony_ci	bmi		.Lcbcdec1x
25662306a36Sopenharmony_ci	vld1.8		{q0-q1}, [r1]!
25762306a36Sopenharmony_ci	vld1.8		{q2-q3}, [r1]!
25862306a36Sopenharmony_ci	vmov		q4, q0
25962306a36Sopenharmony_ci	vmov		q5, q1
26062306a36Sopenharmony_ci	vmov		q6, q2
26162306a36Sopenharmony_ci	vmov		q7, q3
26262306a36Sopenharmony_ci	bl		aes_decrypt_4x
26362306a36Sopenharmony_ci	veor		q0, q0, q15
26462306a36Sopenharmony_ci	veor		q1, q1, q4
26562306a36Sopenharmony_ci	veor		q2, q2, q5
26662306a36Sopenharmony_ci	veor		q3, q3, q6
26762306a36Sopenharmony_ci	vmov		q15, q7
26862306a36Sopenharmony_ci	vst1.8		{q0-q1}, [r0]!
26962306a36Sopenharmony_ci	vst1.8		{q2-q3}, [r0]!
27062306a36Sopenharmony_ci	b		.Lcbcdecloop4x
27162306a36Sopenharmony_ci.Lcbcdec1x:
27262306a36Sopenharmony_ci	adds		r4, r4, #4
27362306a36Sopenharmony_ci	beq		.Lcbcdecout
27462306a36Sopenharmony_ci	vmov		q6, q14			@ preserve last round key
27562306a36Sopenharmony_ci.Lcbcdecloop:
27662306a36Sopenharmony_ci	vld1.8		{q0}, [r1]!		@ get next ct block
27762306a36Sopenharmony_ci	veor		q14, q15, q6		@ combine prev ct with last key
27862306a36Sopenharmony_ci	vmov		q15, q0
27962306a36Sopenharmony_ci	bl		aes_decrypt
28062306a36Sopenharmony_ci	vst1.8		{q0}, [r0]!
28162306a36Sopenharmony_ci	subs		r4, r4, #1
28262306a36Sopenharmony_ci	bne		.Lcbcdecloop
28362306a36Sopenharmony_ci.Lcbcdecout:
28462306a36Sopenharmony_ci	vst1.8		{q15}, [r5]		@ keep iv in q15
28562306a36Sopenharmony_ci	pop		{r4-r6, pc}
28662306a36Sopenharmony_ciENDPROC(ce_aes_cbc_decrypt)
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	/*
29062306a36Sopenharmony_ci	 * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
29162306a36Sopenharmony_ci	 *			  int rounds, int bytes, u8 const iv[])
29262306a36Sopenharmony_ci	 * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
29362306a36Sopenharmony_ci	 *			  int rounds, int bytes, u8 const iv[])
29462306a36Sopenharmony_ci	 */
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ciENTRY(ce_aes_cbc_cts_encrypt)
29762306a36Sopenharmony_ci	push		{r4-r6, lr}
29862306a36Sopenharmony_ci	ldrd		r4, r5, [sp, #16]
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	movw		ip, :lower16:.Lcts_permute_table
30162306a36Sopenharmony_ci	movt		ip, :upper16:.Lcts_permute_table
30262306a36Sopenharmony_ci	sub		r4, r4, #16
30362306a36Sopenharmony_ci	add		lr, ip, #32
30462306a36Sopenharmony_ci	add		ip, ip, r4
30562306a36Sopenharmony_ci	sub		lr, lr, r4
30662306a36Sopenharmony_ci	vld1.8		{q5}, [ip]
30762306a36Sopenharmony_ci	vld1.8		{q6}, [lr]
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	add		ip, r1, r4
31062306a36Sopenharmony_ci	vld1.8		{q0}, [r1]			@ overlapping loads
31162306a36Sopenharmony_ci	vld1.8		{q3}, [ip]
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	vld1.8		{q1}, [r5]			@ get iv
31462306a36Sopenharmony_ci	prepare_key	r2, r3
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	veor		q0, q0, q1			@ xor with iv
31762306a36Sopenharmony_ci	bl		aes_encrypt
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	vtbl.8		d4, {d0-d1}, d10
32062306a36Sopenharmony_ci	vtbl.8		d5, {d0-d1}, d11
32162306a36Sopenharmony_ci	vtbl.8		d2, {d6-d7}, d12
32262306a36Sopenharmony_ci	vtbl.8		d3, {d6-d7}, d13
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci	veor		q0, q0, q1
32562306a36Sopenharmony_ci	bl		aes_encrypt
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	add		r4, r0, r4
32862306a36Sopenharmony_ci	vst1.8		{q2}, [r4]			@ overlapping stores
32962306a36Sopenharmony_ci	vst1.8		{q0}, [r0]
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	pop		{r4-r6, pc}
33262306a36Sopenharmony_ciENDPROC(ce_aes_cbc_cts_encrypt)
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ciENTRY(ce_aes_cbc_cts_decrypt)
33562306a36Sopenharmony_ci	push		{r4-r6, lr}
33662306a36Sopenharmony_ci	ldrd		r4, r5, [sp, #16]
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	movw		ip, :lower16:.Lcts_permute_table
33962306a36Sopenharmony_ci	movt		ip, :upper16:.Lcts_permute_table
34062306a36Sopenharmony_ci	sub		r4, r4, #16
34162306a36Sopenharmony_ci	add		lr, ip, #32
34262306a36Sopenharmony_ci	add		ip, ip, r4
34362306a36Sopenharmony_ci	sub		lr, lr, r4
34462306a36Sopenharmony_ci	vld1.8		{q5}, [ip]
34562306a36Sopenharmony_ci	vld1.8		{q6}, [lr]
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	add		ip, r1, r4
34862306a36Sopenharmony_ci	vld1.8		{q0}, [r1]			@ overlapping loads
34962306a36Sopenharmony_ci	vld1.8		{q1}, [ip]
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	vld1.8		{q3}, [r5]			@ get iv
35262306a36Sopenharmony_ci	prepare_key	r2, r3
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	bl		aes_decrypt
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	vtbl.8		d4, {d0-d1}, d10
35762306a36Sopenharmony_ci	vtbl.8		d5, {d0-d1}, d11
35862306a36Sopenharmony_ci	vtbx.8		d0, {d2-d3}, d12
35962306a36Sopenharmony_ci	vtbx.8		d1, {d2-d3}, d13
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	veor		q1, q1, q2
36262306a36Sopenharmony_ci	bl		aes_decrypt
36362306a36Sopenharmony_ci	veor		q0, q0, q3			@ xor with iv
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	add		r4, r0, r4
36662306a36Sopenharmony_ci	vst1.8		{q1}, [r4]			@ overlapping stores
36762306a36Sopenharmony_ci	vst1.8		{q0}, [r0]
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	pop		{r4-r6, pc}
37062306a36Sopenharmony_ciENDPROC(ce_aes_cbc_cts_decrypt)
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	/*
37462306a36Sopenharmony_ci	 * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
37562306a36Sopenharmony_ci	 *		   int blocks, u8 ctr[])
37662306a36Sopenharmony_ci	 */
37762306a36Sopenharmony_ciENTRY(ce_aes_ctr_encrypt)
37862306a36Sopenharmony_ci	push		{r4-r6, lr}
37962306a36Sopenharmony_ci	ldrd		r4, r5, [sp, #16]
38062306a36Sopenharmony_ci	vld1.8		{q7}, [r5]		@ load ctr
38162306a36Sopenharmony_ci	prepare_key	r2, r3
38262306a36Sopenharmony_ci	vmov		r6, s31			@ keep swabbed ctr in r6
38362306a36Sopenharmony_ci	rev		r6, r6
38462306a36Sopenharmony_ci	cmn		r6, r4			@ 32 bit overflow?
38562306a36Sopenharmony_ci	bcs		.Lctrloop
38662306a36Sopenharmony_ci.Lctrloop4x:
38762306a36Sopenharmony_ci	subs		r4, r4, #4
38862306a36Sopenharmony_ci	bmi		.Lctr1x
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	/*
39162306a36Sopenharmony_ci	 * NOTE: the sequence below has been carefully tweaked to avoid
39262306a36Sopenharmony_ci	 * a silicon erratum that exists in Cortex-A57 (#1742098) and
39362306a36Sopenharmony_ci	 * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
39462306a36Sopenharmony_ci	 * may produce an incorrect result if they take their input from a
39562306a36Sopenharmony_ci	 * register of which a single 32-bit lane has been updated the last
39662306a36Sopenharmony_ci	 * time it was modified. To work around this, the lanes of registers
39762306a36Sopenharmony_ci	 * q0-q3 below are not manipulated individually, and the different
39862306a36Sopenharmony_ci	 * counter values are prepared by successive manipulations of q7.
39962306a36Sopenharmony_ci	 */
40062306a36Sopenharmony_ci	add		ip, r6, #1
40162306a36Sopenharmony_ci	vmov		q0, q7
40262306a36Sopenharmony_ci	rev		ip, ip
40362306a36Sopenharmony_ci	add		lr, r6, #2
40462306a36Sopenharmony_ci	vmov		s31, ip			@ set lane 3 of q1 via q7
40562306a36Sopenharmony_ci	add		ip, r6, #3
40662306a36Sopenharmony_ci	rev		lr, lr
40762306a36Sopenharmony_ci	vmov		q1, q7
40862306a36Sopenharmony_ci	vmov		s31, lr			@ set lane 3 of q2 via q7
40962306a36Sopenharmony_ci	rev		ip, ip
41062306a36Sopenharmony_ci	vmov		q2, q7
41162306a36Sopenharmony_ci	vmov		s31, ip			@ set lane 3 of q3 via q7
41262306a36Sopenharmony_ci	add		r6, r6, #4
41362306a36Sopenharmony_ci	vmov		q3, q7
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	vld1.8		{q4-q5}, [r1]!
41662306a36Sopenharmony_ci	vld1.8		{q6}, [r1]!
41762306a36Sopenharmony_ci	vld1.8		{q15}, [r1]!
41862306a36Sopenharmony_ci	bl		aes_encrypt_4x
41962306a36Sopenharmony_ci	veor		q0, q0, q4
42062306a36Sopenharmony_ci	veor		q1, q1, q5
42162306a36Sopenharmony_ci	veor		q2, q2, q6
42262306a36Sopenharmony_ci	veor		q3, q3, q15
42362306a36Sopenharmony_ci	rev		ip, r6
42462306a36Sopenharmony_ci	vst1.8		{q0-q1}, [r0]!
42562306a36Sopenharmony_ci	vst1.8		{q2-q3}, [r0]!
42662306a36Sopenharmony_ci	vmov		s31, ip
42762306a36Sopenharmony_ci	b		.Lctrloop4x
42862306a36Sopenharmony_ci.Lctr1x:
42962306a36Sopenharmony_ci	adds		r4, r4, #4
43062306a36Sopenharmony_ci	beq		.Lctrout
43162306a36Sopenharmony_ci.Lctrloop:
43262306a36Sopenharmony_ci	vmov		q0, q7
43362306a36Sopenharmony_ci	bl		aes_encrypt
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci	adds		r6, r6, #1		@ increment BE ctr
43662306a36Sopenharmony_ci	rev		ip, r6
43762306a36Sopenharmony_ci	vmov		s31, ip
43862306a36Sopenharmony_ci	bcs		.Lctrcarry
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci.Lctrcarrydone:
44162306a36Sopenharmony_ci	subs		r4, r4, #1
44262306a36Sopenharmony_ci	bmi		.Lctrtailblock		@ blocks < 0 means tail block
44362306a36Sopenharmony_ci	vld1.8		{q3}, [r1]!
44462306a36Sopenharmony_ci	veor		q3, q0, q3
44562306a36Sopenharmony_ci	vst1.8		{q3}, [r0]!
44662306a36Sopenharmony_ci	bne		.Lctrloop
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci.Lctrout:
44962306a36Sopenharmony_ci	vst1.8		{q7}, [r5]		@ return next CTR value
45062306a36Sopenharmony_ci	pop		{r4-r6, pc}
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci.Lctrtailblock:
45362306a36Sopenharmony_ci	vst1.8		{q0}, [r0, :64]		@ return the key stream
45462306a36Sopenharmony_ci	b		.Lctrout
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci.Lctrcarry:
45762306a36Sopenharmony_ci	.irp		sreg, s30, s29, s28
45862306a36Sopenharmony_ci	vmov		ip, \sreg		@ load next word of ctr
45962306a36Sopenharmony_ci	rev		ip, ip			@ ... to handle the carry
46062306a36Sopenharmony_ci	adds		ip, ip, #1
46162306a36Sopenharmony_ci	rev		ip, ip
46262306a36Sopenharmony_ci	vmov		\sreg, ip
46362306a36Sopenharmony_ci	bcc		.Lctrcarrydone
46462306a36Sopenharmony_ci	.endr
46562306a36Sopenharmony_ci	b		.Lctrcarrydone
46662306a36Sopenharmony_ciENDPROC(ce_aes_ctr_encrypt)
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	/*
46962306a36Sopenharmony_ci	 * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
47062306a36Sopenharmony_ci	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
47162306a36Sopenharmony_ci	 * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
47262306a36Sopenharmony_ci	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
47362306a36Sopenharmony_ci	 */
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	.macro		next_tweak, out, in, const, tmp
47662306a36Sopenharmony_ci	vshr.s64	\tmp, \in, #63
47762306a36Sopenharmony_ci	vand		\tmp, \tmp, \const
47862306a36Sopenharmony_ci	vadd.u64	\out, \in, \in
47962306a36Sopenharmony_ci	vext.8		\tmp, \tmp, \tmp, #8
48062306a36Sopenharmony_ci	veor		\out, \out, \tmp
48162306a36Sopenharmony_ci	.endm
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_cice_aes_xts_init:
48462306a36Sopenharmony_ci	vmov.i32	d30, #0x87		@ compose tweak mask vector
48562306a36Sopenharmony_ci	vmovl.u32	q15, d30
48662306a36Sopenharmony_ci	vshr.u64	d30, d31, #7
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	ldrd		r4, r5, [sp, #16]	@ load args
48962306a36Sopenharmony_ci	ldr		r6, [sp, #28]
49062306a36Sopenharmony_ci	vld1.8		{q0}, [r5]		@ load iv
49162306a36Sopenharmony_ci	teq		r6, #1			@ start of a block?
49262306a36Sopenharmony_ci	bxne		lr
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	@ Encrypt the IV in q0 with the second AES key. This should only
49562306a36Sopenharmony_ci	@ be done at the start of a block.
49662306a36Sopenharmony_ci	ldr		r6, [sp, #24]		@ load AES key 2
49762306a36Sopenharmony_ci	prepare_key	r6, r3
49862306a36Sopenharmony_ci	add		ip, r6, #32		@ 3rd round key of key 2
49962306a36Sopenharmony_ci	b		.Laes_encrypt_tweak	@ tail call
50062306a36Sopenharmony_ciENDPROC(ce_aes_xts_init)
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ciENTRY(ce_aes_xts_encrypt)
50362306a36Sopenharmony_ci	push		{r4-r6, lr}
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	bl		ce_aes_xts_init		@ run shared prologue
50662306a36Sopenharmony_ci	prepare_key	r2, r3
50762306a36Sopenharmony_ci	vmov		q4, q0
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	teq		r6, #0			@ start of a block?
51062306a36Sopenharmony_ci	bne		.Lxtsenc4x
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci.Lxtsencloop4x:
51362306a36Sopenharmony_ci	next_tweak	q4, q4, q15, q10
51462306a36Sopenharmony_ci.Lxtsenc4x:
51562306a36Sopenharmony_ci	subs		r4, r4, #64
51662306a36Sopenharmony_ci	bmi		.Lxtsenc1x
51762306a36Sopenharmony_ci	vld1.8		{q0-q1}, [r1]!		@ get 4 pt blocks
51862306a36Sopenharmony_ci	vld1.8		{q2-q3}, [r1]!
51962306a36Sopenharmony_ci	next_tweak	q5, q4, q15, q10
52062306a36Sopenharmony_ci	veor		q0, q0, q4
52162306a36Sopenharmony_ci	next_tweak	q6, q5, q15, q10
52262306a36Sopenharmony_ci	veor		q1, q1, q5
52362306a36Sopenharmony_ci	next_tweak	q7, q6, q15, q10
52462306a36Sopenharmony_ci	veor		q2, q2, q6
52562306a36Sopenharmony_ci	veor		q3, q3, q7
52662306a36Sopenharmony_ci	bl		aes_encrypt_4x
52762306a36Sopenharmony_ci	veor		q0, q0, q4
52862306a36Sopenharmony_ci	veor		q1, q1, q5
52962306a36Sopenharmony_ci	veor		q2, q2, q6
53062306a36Sopenharmony_ci	veor		q3, q3, q7
53162306a36Sopenharmony_ci	vst1.8		{q0-q1}, [r0]!		@ write 4 ct blocks
53262306a36Sopenharmony_ci	vst1.8		{q2-q3}, [r0]!
53362306a36Sopenharmony_ci	vmov		q4, q7
53462306a36Sopenharmony_ci	teq		r4, #0
53562306a36Sopenharmony_ci	beq		.Lxtsencret
53662306a36Sopenharmony_ci	b		.Lxtsencloop4x
53762306a36Sopenharmony_ci.Lxtsenc1x:
53862306a36Sopenharmony_ci	adds		r4, r4, #64
53962306a36Sopenharmony_ci	beq		.Lxtsencout
54062306a36Sopenharmony_ci	subs		r4, r4, #16
54162306a36Sopenharmony_ci	bmi		.LxtsencctsNx
54262306a36Sopenharmony_ci.Lxtsencloop:
54362306a36Sopenharmony_ci	vld1.8		{q0}, [r1]!
54462306a36Sopenharmony_ci.Lxtsencctsout:
54562306a36Sopenharmony_ci	veor		q0, q0, q4
54662306a36Sopenharmony_ci	bl		aes_encrypt
54762306a36Sopenharmony_ci	veor		q0, q0, q4
54862306a36Sopenharmony_ci	teq		r4, #0
54962306a36Sopenharmony_ci	beq		.Lxtsencout
55062306a36Sopenharmony_ci	subs		r4, r4, #16
55162306a36Sopenharmony_ci	next_tweak	q4, q4, q15, q6
55262306a36Sopenharmony_ci	bmi		.Lxtsenccts
55362306a36Sopenharmony_ci	vst1.8		{q0}, [r0]!
55462306a36Sopenharmony_ci	b		.Lxtsencloop
55562306a36Sopenharmony_ci.Lxtsencout:
55662306a36Sopenharmony_ci	vst1.8		{q0}, [r0]
55762306a36Sopenharmony_ci.Lxtsencret:
55862306a36Sopenharmony_ci	vst1.8		{q4}, [r5]
55962306a36Sopenharmony_ci	pop		{r4-r6, pc}
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci.LxtsencctsNx:
56262306a36Sopenharmony_ci	vmov		q0, q3
56362306a36Sopenharmony_ci	sub		r0, r0, #16
56462306a36Sopenharmony_ci.Lxtsenccts:
56562306a36Sopenharmony_ci	movw		ip, :lower16:.Lcts_permute_table
56662306a36Sopenharmony_ci	movt		ip, :upper16:.Lcts_permute_table
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	add		r1, r1, r4		@ rewind input pointer
56962306a36Sopenharmony_ci	add		r4, r4, #16		@ # bytes in final block
57062306a36Sopenharmony_ci	add		lr, ip, #32
57162306a36Sopenharmony_ci	add		ip, ip, r4
57262306a36Sopenharmony_ci	sub		lr, lr, r4
57362306a36Sopenharmony_ci	add		r4, r0, r4		@ output address of final block
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci	vld1.8		{q1}, [r1]		@ load final partial block
57662306a36Sopenharmony_ci	vld1.8		{q2}, [ip]
57762306a36Sopenharmony_ci	vld1.8		{q3}, [lr]
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	vtbl.8		d4, {d0-d1}, d4
58062306a36Sopenharmony_ci	vtbl.8		d5, {d0-d1}, d5
58162306a36Sopenharmony_ci	vtbx.8		d0, {d2-d3}, d6
58262306a36Sopenharmony_ci	vtbx.8		d1, {d2-d3}, d7
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	vst1.8		{q2}, [r4]		@ overlapping stores
58562306a36Sopenharmony_ci	mov		r4, #0
58662306a36Sopenharmony_ci	b		.Lxtsencctsout
58762306a36Sopenharmony_ciENDPROC(ce_aes_xts_encrypt)
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ciENTRY(ce_aes_xts_decrypt)
59162306a36Sopenharmony_ci	push		{r4-r6, lr}
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	bl		ce_aes_xts_init		@ run shared prologue
59462306a36Sopenharmony_ci	prepare_key	r2, r3
59562306a36Sopenharmony_ci	vmov		q4, q0
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	/* subtract 16 bytes if we are doing CTS */
59862306a36Sopenharmony_ci	tst		r4, #0xf
59962306a36Sopenharmony_ci	subne		r4, r4, #0x10
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	teq		r6, #0			@ start of a block?
60262306a36Sopenharmony_ci	bne		.Lxtsdec4x
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci.Lxtsdecloop4x:
60562306a36Sopenharmony_ci	next_tweak	q4, q4, q15, q10
60662306a36Sopenharmony_ci.Lxtsdec4x:
60762306a36Sopenharmony_ci	subs		r4, r4, #64
60862306a36Sopenharmony_ci	bmi		.Lxtsdec1x
60962306a36Sopenharmony_ci	vld1.8		{q0-q1}, [r1]!		@ get 4 ct blocks
61062306a36Sopenharmony_ci	vld1.8		{q2-q3}, [r1]!
61162306a36Sopenharmony_ci	next_tweak	q5, q4, q15, q10
61262306a36Sopenharmony_ci	veor		q0, q0, q4
61362306a36Sopenharmony_ci	next_tweak	q6, q5, q15, q10
61462306a36Sopenharmony_ci	veor		q1, q1, q5
61562306a36Sopenharmony_ci	next_tweak	q7, q6, q15, q10
61662306a36Sopenharmony_ci	veor		q2, q2, q6
61762306a36Sopenharmony_ci	veor		q3, q3, q7
61862306a36Sopenharmony_ci	bl		aes_decrypt_4x
61962306a36Sopenharmony_ci	veor		q0, q0, q4
62062306a36Sopenharmony_ci	veor		q1, q1, q5
62162306a36Sopenharmony_ci	veor		q2, q2, q6
62262306a36Sopenharmony_ci	veor		q3, q3, q7
62362306a36Sopenharmony_ci	vst1.8		{q0-q1}, [r0]!		@ write 4 pt blocks
62462306a36Sopenharmony_ci	vst1.8		{q2-q3}, [r0]!
62562306a36Sopenharmony_ci	vmov		q4, q7
62662306a36Sopenharmony_ci	teq		r4, #0
62762306a36Sopenharmony_ci	beq		.Lxtsdecout
62862306a36Sopenharmony_ci	b		.Lxtsdecloop4x
62962306a36Sopenharmony_ci.Lxtsdec1x:
63062306a36Sopenharmony_ci	adds		r4, r4, #64
63162306a36Sopenharmony_ci	beq		.Lxtsdecout
63262306a36Sopenharmony_ci	subs		r4, r4, #16
63362306a36Sopenharmony_ci.Lxtsdecloop:
63462306a36Sopenharmony_ci	vld1.8		{q0}, [r1]!
63562306a36Sopenharmony_ci	bmi		.Lxtsdeccts
63662306a36Sopenharmony_ci.Lxtsdecctsout:
63762306a36Sopenharmony_ci	veor		q0, q0, q4
63862306a36Sopenharmony_ci	bl		aes_decrypt
63962306a36Sopenharmony_ci	veor		q0, q0, q4
64062306a36Sopenharmony_ci	vst1.8		{q0}, [r0]!
64162306a36Sopenharmony_ci	teq		r4, #0
64262306a36Sopenharmony_ci	beq		.Lxtsdecout
64362306a36Sopenharmony_ci	subs		r4, r4, #16
64462306a36Sopenharmony_ci	next_tweak	q4, q4, q15, q6
64562306a36Sopenharmony_ci	b		.Lxtsdecloop
64662306a36Sopenharmony_ci.Lxtsdecout:
64762306a36Sopenharmony_ci	vst1.8		{q4}, [r5]
64862306a36Sopenharmony_ci	pop		{r4-r6, pc}
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci.Lxtsdeccts:
65162306a36Sopenharmony_ci	movw		ip, :lower16:.Lcts_permute_table
65262306a36Sopenharmony_ci	movt		ip, :upper16:.Lcts_permute_table
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	add		r1, r1, r4		@ rewind input pointer
65562306a36Sopenharmony_ci	add		r4, r4, #16		@ # bytes in final block
65662306a36Sopenharmony_ci	add		lr, ip, #32
65762306a36Sopenharmony_ci	add		ip, ip, r4
65862306a36Sopenharmony_ci	sub		lr, lr, r4
65962306a36Sopenharmony_ci	add		r4, r0, r4		@ output address of final block
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci	next_tweak	q5, q4, q15, q6
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci	vld1.8		{q1}, [r1]		@ load final partial block
66462306a36Sopenharmony_ci	vld1.8		{q2}, [ip]
66562306a36Sopenharmony_ci	vld1.8		{q3}, [lr]
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	veor		q0, q0, q5
66862306a36Sopenharmony_ci	bl		aes_decrypt
66962306a36Sopenharmony_ci	veor		q0, q0, q5
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	vtbl.8		d4, {d0-d1}, d4
67262306a36Sopenharmony_ci	vtbl.8		d5, {d0-d1}, d5
67362306a36Sopenharmony_ci	vtbx.8		d0, {d2-d3}, d6
67462306a36Sopenharmony_ci	vtbx.8		d1, {d2-d3}, d7
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci	vst1.8		{q2}, [r4]		@ overlapping stores
67762306a36Sopenharmony_ci	mov		r4, #0
67862306a36Sopenharmony_ci	b		.Lxtsdecctsout
67962306a36Sopenharmony_ciENDPROC(ce_aes_xts_decrypt)
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	/*
68262306a36Sopenharmony_ci	 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
68362306a36Sopenharmony_ci	 *                             AES sbox substitution on each byte in
68462306a36Sopenharmony_ci	 *                             'input'
68562306a36Sopenharmony_ci	 */
68662306a36Sopenharmony_ciENTRY(ce_aes_sub)
68762306a36Sopenharmony_ci	vdup.32		q1, r0
68862306a36Sopenharmony_ci	veor		q0, q0, q0
68962306a36Sopenharmony_ci	aese.8		q0, q1
69062306a36Sopenharmony_ci	vmov		r0, s0
69162306a36Sopenharmony_ci	bx		lr
69262306a36Sopenharmony_ciENDPROC(ce_aes_sub)
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci	/*
69562306a36Sopenharmony_ci	 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
69662306a36Sopenharmony_ci	 *                                        operation on round key *src
69762306a36Sopenharmony_ci	 */
69862306a36Sopenharmony_ciENTRY(ce_aes_invert)
69962306a36Sopenharmony_ci	vld1.32		{q0}, [r1]
70062306a36Sopenharmony_ci	aesimc.8	q0, q0
70162306a36Sopenharmony_ci	vst1.32		{q0}, [r0]
70262306a36Sopenharmony_ci	bx		lr
70362306a36Sopenharmony_ciENDPROC(ce_aes_invert)
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	.section	".rodata", "a"
70662306a36Sopenharmony_ci	.align		6
70762306a36Sopenharmony_ci.Lcts_permute_table:
70862306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
70962306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
71062306a36Sopenharmony_ci	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
71162306a36Sopenharmony_ci	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
71262306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
71362306a36Sopenharmony_ci	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
714