18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Bit sliced AES using NEON instructions
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci/*
98c2ecf20Sopenharmony_ci * The algorithm implemented here is described in detail by the paper
108c2ecf20Sopenharmony_ci * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
118c2ecf20Sopenharmony_ci * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci * This implementation is based primarily on the OpenSSL implementation
148c2ecf20Sopenharmony_ci * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
158c2ecf20Sopenharmony_ci */
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#include <linux/linkage.h>
188c2ecf20Sopenharmony_ci#include <asm/assembler.h>
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci	.text
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci	rounds		.req	x11
238c2ecf20Sopenharmony_ci	bskey		.req	x12
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
268c2ecf20Sopenharmony_ci	eor		\b2, \b2, \b1
278c2ecf20Sopenharmony_ci	eor		\b5, \b5, \b6
288c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b0
298c2ecf20Sopenharmony_ci	eor		\b6, \b6, \b2
308c2ecf20Sopenharmony_ci	eor		\b5, \b5, \b0
318c2ecf20Sopenharmony_ci	eor		\b6, \b6, \b3
328c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b7
338c2ecf20Sopenharmony_ci	eor		\b7, \b7, \b5
348c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b4
358c2ecf20Sopenharmony_ci	eor		\b4, \b4, \b5
368c2ecf20Sopenharmony_ci	eor		\b2, \b2, \b7
378c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b1
388c2ecf20Sopenharmony_ci	eor		\b1, \b1, \b5
398c2ecf20Sopenharmony_ci	.endm
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
428c2ecf20Sopenharmony_ci	eor		\b0, \b0, \b6
438c2ecf20Sopenharmony_ci	eor		\b1, \b1, \b4
448c2ecf20Sopenharmony_ci	eor		\b4, \b4, \b6
458c2ecf20Sopenharmony_ci	eor		\b2, \b2, \b0
468c2ecf20Sopenharmony_ci	eor		\b6, \b6, \b1
478c2ecf20Sopenharmony_ci	eor		\b1, \b1, \b5
488c2ecf20Sopenharmony_ci	eor		\b5, \b5, \b3
498c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b7
508c2ecf20Sopenharmony_ci	eor		\b7, \b7, \b5
518c2ecf20Sopenharmony_ci	eor		\b2, \b2, \b5
528c2ecf20Sopenharmony_ci	eor		\b4, \b4, \b7
538c2ecf20Sopenharmony_ci	.endm
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
568c2ecf20Sopenharmony_ci	eor		\b1, \b1, \b7
578c2ecf20Sopenharmony_ci	eor		\b4, \b4, \b7
588c2ecf20Sopenharmony_ci	eor		\b7, \b7, \b5
598c2ecf20Sopenharmony_ci	eor		\b1, \b1, \b3
608c2ecf20Sopenharmony_ci	eor		\b2, \b2, \b5
618c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b7
628c2ecf20Sopenharmony_ci	eor		\b6, \b6, \b1
638c2ecf20Sopenharmony_ci	eor		\b2, \b2, \b0
648c2ecf20Sopenharmony_ci	eor		\b5, \b5, \b3
658c2ecf20Sopenharmony_ci	eor		\b4, \b4, \b6
668c2ecf20Sopenharmony_ci	eor		\b0, \b0, \b6
678c2ecf20Sopenharmony_ci	eor		\b1, \b1, \b4
688c2ecf20Sopenharmony_ci	.endm
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
718c2ecf20Sopenharmony_ci	eor		\b1, \b1, \b5
728c2ecf20Sopenharmony_ci	eor		\b2, \b2, \b7
738c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b1
748c2ecf20Sopenharmony_ci	eor		\b4, \b4, \b5
758c2ecf20Sopenharmony_ci	eor		\b7, \b7, \b5
768c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b4
778c2ecf20Sopenharmony_ci	eor 		\b5, \b5, \b0
788c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b7
798c2ecf20Sopenharmony_ci	eor		\b6, \b6, \b2
808c2ecf20Sopenharmony_ci	eor		\b2, \b2, \b1
818c2ecf20Sopenharmony_ci	eor		\b6, \b6, \b3
828c2ecf20Sopenharmony_ci	eor		\b3, \b3, \b0
838c2ecf20Sopenharmony_ci	eor		\b5, \b5, \b6
848c2ecf20Sopenharmony_ci	.endm
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
878c2ecf20Sopenharmony_ci	eor 		\t0, \y0, \y1
888c2ecf20Sopenharmony_ci	and		\t0, \t0, \x0
898c2ecf20Sopenharmony_ci	eor		\x0, \x0, \x1
908c2ecf20Sopenharmony_ci	and		\t1, \x1, \y0
918c2ecf20Sopenharmony_ci	and		\x0, \x0, \y1
928c2ecf20Sopenharmony_ci	eor		\x1, \t1, \t0
938c2ecf20Sopenharmony_ci	eor		\x0, \x0, \t1
948c2ecf20Sopenharmony_ci	.endm
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
978c2ecf20Sopenharmony_ci	eor		\t0, \y0, \y1
988c2ecf20Sopenharmony_ci	eor 		\t1, \y2, \y3
998c2ecf20Sopenharmony_ci	and		\t0, \t0, \x0
1008c2ecf20Sopenharmony_ci	and		\t1, \t1, \x2
1018c2ecf20Sopenharmony_ci	eor		\x0, \x0, \x1
1028c2ecf20Sopenharmony_ci	eor		\x2, \x2, \x3
1038c2ecf20Sopenharmony_ci	and		\x1, \x1, \y0
1048c2ecf20Sopenharmony_ci	and		\x3, \x3, \y2
1058c2ecf20Sopenharmony_ci	and		\x0, \x0, \y1
1068c2ecf20Sopenharmony_ci	and		\x2, \x2, \y3
1078c2ecf20Sopenharmony_ci	eor		\x1, \x1, \x0
1088c2ecf20Sopenharmony_ci	eor		\x2, \x2, \x3
1098c2ecf20Sopenharmony_ci	eor		\x0, \x0, \t0
1108c2ecf20Sopenharmony_ci	eor		\x3, \x3, \t1
1118c2ecf20Sopenharmony_ci	.endm
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
1148c2ecf20Sopenharmony_ci				    y0, y1, y2, y3, t0, t1, t2, t3
1158c2ecf20Sopenharmony_ci	eor		\t0, \x0, \x2
1168c2ecf20Sopenharmony_ci	eor		\t1, \x1, \x3
1178c2ecf20Sopenharmony_ci	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
1188c2ecf20Sopenharmony_ci	eor		\y0, \y0, \y2
1198c2ecf20Sopenharmony_ci	eor		\y1, \y1, \y3
1208c2ecf20Sopenharmony_ci	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
1218c2ecf20Sopenharmony_ci	eor		\x0, \x0, \t0
1228c2ecf20Sopenharmony_ci	eor		\x2, \x2, \t0
1238c2ecf20Sopenharmony_ci	eor		\x1, \x1, \t1
1248c2ecf20Sopenharmony_ci	eor		\x3, \x3, \t1
1258c2ecf20Sopenharmony_ci	eor		\t0, \x4, \x6
1268c2ecf20Sopenharmony_ci	eor		\t1, \x5, \x7
1278c2ecf20Sopenharmony_ci	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
1288c2ecf20Sopenharmony_ci	eor		\y0, \y0, \y2
1298c2ecf20Sopenharmony_ci	eor		\y1, \y1, \y3
1308c2ecf20Sopenharmony_ci	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
1318c2ecf20Sopenharmony_ci	eor		\x4, \x4, \t0
1328c2ecf20Sopenharmony_ci	eor		\x6, \x6, \t0
1338c2ecf20Sopenharmony_ci	eor		\x5, \x5, \t1
1348c2ecf20Sopenharmony_ci	eor		\x7, \x7, \t1
1358c2ecf20Sopenharmony_ci	.endm
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
1388c2ecf20Sopenharmony_ci				   t0, t1, t2, t3, s0, s1, s2, s3
1398c2ecf20Sopenharmony_ci	eor		\t3, \x4, \x6
1408c2ecf20Sopenharmony_ci	eor		\t0, \x5, \x7
1418c2ecf20Sopenharmony_ci	eor		\t1, \x1, \x3
1428c2ecf20Sopenharmony_ci	eor		\s1, \x7, \x6
1438c2ecf20Sopenharmony_ci	eor		\s0, \x0, \x2
1448c2ecf20Sopenharmony_ci	eor		\s3, \t3, \t0
1458c2ecf20Sopenharmony_ci	orr		\t2, \t0, \t1
1468c2ecf20Sopenharmony_ci	and		\s2, \t3, \s0
1478c2ecf20Sopenharmony_ci	orr		\t3, \t3, \s0
1488c2ecf20Sopenharmony_ci	eor		\s0, \s0, \t1
1498c2ecf20Sopenharmony_ci	and		\t0, \t0, \t1
1508c2ecf20Sopenharmony_ci	eor		\t1, \x3, \x2
1518c2ecf20Sopenharmony_ci	and		\s3, \s3, \s0
1528c2ecf20Sopenharmony_ci	and		\s1, \s1, \t1
1538c2ecf20Sopenharmony_ci	eor		\t1, \x4, \x5
1548c2ecf20Sopenharmony_ci	eor		\s0, \x1, \x0
1558c2ecf20Sopenharmony_ci	eor		\t3, \t3, \s1
1568c2ecf20Sopenharmony_ci	eor		\t2, \t2, \s1
1578c2ecf20Sopenharmony_ci	and		\s1, \t1, \s0
1588c2ecf20Sopenharmony_ci	orr		\t1, \t1, \s0
1598c2ecf20Sopenharmony_ci	eor		\t3, \t3, \s3
1608c2ecf20Sopenharmony_ci	eor		\t0, \t0, \s1
1618c2ecf20Sopenharmony_ci	eor		\t2, \t2, \s2
1628c2ecf20Sopenharmony_ci	eor		\t1, \t1, \s3
1638c2ecf20Sopenharmony_ci	eor		\t0, \t0, \s2
1648c2ecf20Sopenharmony_ci	and		\s0, \x7, \x3
1658c2ecf20Sopenharmony_ci	eor		\t1, \t1, \s2
1668c2ecf20Sopenharmony_ci	and		\s1, \x6, \x2
1678c2ecf20Sopenharmony_ci	and		\s2, \x5, \x1
1688c2ecf20Sopenharmony_ci	orr		\s3, \x4, \x0
1698c2ecf20Sopenharmony_ci	eor		\t3, \t3, \s0
1708c2ecf20Sopenharmony_ci	eor		\t1, \t1, \s2
1718c2ecf20Sopenharmony_ci	eor		\s0, \t0, \s3
1728c2ecf20Sopenharmony_ci	eor		\t2, \t2, \s1
1738c2ecf20Sopenharmony_ci	and		\s2, \t3, \t1
1748c2ecf20Sopenharmony_ci	eor		\s1, \t2, \s2
1758c2ecf20Sopenharmony_ci	eor		\s3, \s0, \s2
1768c2ecf20Sopenharmony_ci	bsl		\s1, \t1, \s0
1778c2ecf20Sopenharmony_ci	not		\t0, \s0
1788c2ecf20Sopenharmony_ci	bsl		\s0, \s1, \s3
1798c2ecf20Sopenharmony_ci	bsl		\t0, \s1, \s3
1808c2ecf20Sopenharmony_ci	bsl		\s3, \t3, \t2
1818c2ecf20Sopenharmony_ci	eor		\t3, \t3, \t2
1828c2ecf20Sopenharmony_ci	and		\s2, \s0, \s3
1838c2ecf20Sopenharmony_ci	eor		\t1, \t1, \t0
1848c2ecf20Sopenharmony_ci	eor		\s2, \s2, \t3
1858c2ecf20Sopenharmony_ci	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
1868c2ecf20Sopenharmony_ci			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
1878c2ecf20Sopenharmony_ci	.endm
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
1908c2ecf20Sopenharmony_ci			      t0, t1, t2, t3, s0, s1, s2, s3
1918c2ecf20Sopenharmony_ci	in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
1928c2ecf20Sopenharmony_ci			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
1938c2ecf20Sopenharmony_ci	inv_gf256	\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
1948c2ecf20Sopenharmony_ci			\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
1958c2ecf20Sopenharmony_ci			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
1968c2ecf20Sopenharmony_ci			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
1978c2ecf20Sopenharmony_ci	out_bs_ch	\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
1988c2ecf20Sopenharmony_ci			\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
1998c2ecf20Sopenharmony_ci	.endm
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
2028c2ecf20Sopenharmony_ci				  t0, t1, t2, t3, s0, s1, s2, s3
2038c2ecf20Sopenharmony_ci	inv_in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
2048c2ecf20Sopenharmony_ci			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
2058c2ecf20Sopenharmony_ci	inv_gf256	\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
2068c2ecf20Sopenharmony_ci			\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
2078c2ecf20Sopenharmony_ci			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
2088c2ecf20Sopenharmony_ci			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
2098c2ecf20Sopenharmony_ci	inv_out_bs_ch	\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
2108c2ecf20Sopenharmony_ci			\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
2118c2ecf20Sopenharmony_ci	.endm
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	.macro		enc_next_rk
2148c2ecf20Sopenharmony_ci	ldp		q16, q17, [bskey], #128
2158c2ecf20Sopenharmony_ci	ldp		q18, q19, [bskey, #-96]
2168c2ecf20Sopenharmony_ci	ldp		q20, q21, [bskey, #-64]
2178c2ecf20Sopenharmony_ci	ldp		q22, q23, [bskey, #-32]
2188c2ecf20Sopenharmony_ci	.endm
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	.macro		dec_next_rk
2218c2ecf20Sopenharmony_ci	ldp		q16, q17, [bskey, #-128]!
2228c2ecf20Sopenharmony_ci	ldp		q18, q19, [bskey, #32]
2238c2ecf20Sopenharmony_ci	ldp		q20, q21, [bskey, #64]
2248c2ecf20Sopenharmony_ci	ldp		q22, q23, [bskey, #96]
2258c2ecf20Sopenharmony_ci	.endm
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci	.macro		add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
2288c2ecf20Sopenharmony_ci	eor		\x0\().16b, \x0\().16b, v16.16b
2298c2ecf20Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, v17.16b
2308c2ecf20Sopenharmony_ci	eor		\x2\().16b, \x2\().16b, v18.16b
2318c2ecf20Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, v19.16b
2328c2ecf20Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, v20.16b
2338c2ecf20Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, v21.16b
2348c2ecf20Sopenharmony_ci	eor		\x6\().16b, \x6\().16b, v22.16b
2358c2ecf20Sopenharmony_ci	eor		\x7\().16b, \x7\().16b, v23.16b
2368c2ecf20Sopenharmony_ci	.endm
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
2398c2ecf20Sopenharmony_ci	tbl		\x0\().16b, {\x0\().16b}, \mask\().16b
2408c2ecf20Sopenharmony_ci	tbl		\x1\().16b, {\x1\().16b}, \mask\().16b
2418c2ecf20Sopenharmony_ci	tbl		\x2\().16b, {\x2\().16b}, \mask\().16b
2428c2ecf20Sopenharmony_ci	tbl		\x3\().16b, {\x3\().16b}, \mask\().16b
2438c2ecf20Sopenharmony_ci	tbl		\x4\().16b, {\x4\().16b}, \mask\().16b
2448c2ecf20Sopenharmony_ci	tbl		\x5\().16b, {\x5\().16b}, \mask\().16b
2458c2ecf20Sopenharmony_ci	tbl		\x6\().16b, {\x6\().16b}, \mask\().16b
2468c2ecf20Sopenharmony_ci	tbl		\x7\().16b, {\x7\().16b}, \mask\().16b
2478c2ecf20Sopenharmony_ci	.endm
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_ci	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
2508c2ecf20Sopenharmony_ci				  t0, t1, t2, t3, t4, t5, t6, t7, inv
2518c2ecf20Sopenharmony_ci	ext		\t0\().16b, \x0\().16b, \x0\().16b, #12
2528c2ecf20Sopenharmony_ci	ext		\t1\().16b, \x1\().16b, \x1\().16b, #12
2538c2ecf20Sopenharmony_ci	eor		\x0\().16b, \x0\().16b, \t0\().16b
2548c2ecf20Sopenharmony_ci	ext		\t2\().16b, \x2\().16b, \x2\().16b, #12
2558c2ecf20Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, \t1\().16b
2568c2ecf20Sopenharmony_ci	ext		\t3\().16b, \x3\().16b, \x3\().16b, #12
2578c2ecf20Sopenharmony_ci	eor		\x2\().16b, \x2\().16b, \t2\().16b
2588c2ecf20Sopenharmony_ci	ext		\t4\().16b, \x4\().16b, \x4\().16b, #12
2598c2ecf20Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, \t3\().16b
2608c2ecf20Sopenharmony_ci	ext		\t5\().16b, \x5\().16b, \x5\().16b, #12
2618c2ecf20Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t4\().16b
2628c2ecf20Sopenharmony_ci	ext		\t6\().16b, \x6\().16b, \x6\().16b, #12
2638c2ecf20Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t5\().16b
2648c2ecf20Sopenharmony_ci	ext		\t7\().16b, \x7\().16b, \x7\().16b, #12
2658c2ecf20Sopenharmony_ci	eor		\x6\().16b, \x6\().16b, \t6\().16b
2668c2ecf20Sopenharmony_ci	eor		\t1\().16b, \t1\().16b, \x0\().16b
2678c2ecf20Sopenharmony_ci	eor		\x7\().16b, \x7\().16b, \t7\().16b
2688c2ecf20Sopenharmony_ci	ext		\x0\().16b, \x0\().16b, \x0\().16b, #8
2698c2ecf20Sopenharmony_ci	eor		\t2\().16b, \t2\().16b, \x1\().16b
2708c2ecf20Sopenharmony_ci	eor		\t0\().16b, \t0\().16b, \x7\().16b
2718c2ecf20Sopenharmony_ci	eor		\t1\().16b, \t1\().16b, \x7\().16b
2728c2ecf20Sopenharmony_ci	ext		\x1\().16b, \x1\().16b, \x1\().16b, #8
2738c2ecf20Sopenharmony_ci	eor		\t5\().16b, \t5\().16b, \x4\().16b
2748c2ecf20Sopenharmony_ci	eor		\x0\().16b, \x0\().16b, \t0\().16b
2758c2ecf20Sopenharmony_ci	eor		\t6\().16b, \t6\().16b, \x5\().16b
2768c2ecf20Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, \t1\().16b
2778c2ecf20Sopenharmony_ci	ext		\t0\().16b, \x4\().16b, \x4\().16b, #8
2788c2ecf20Sopenharmony_ci	eor		\t4\().16b, \t4\().16b, \x3\().16b
2798c2ecf20Sopenharmony_ci	ext		\t1\().16b, \x5\().16b, \x5\().16b, #8
2808c2ecf20Sopenharmony_ci	eor		\t7\().16b, \t7\().16b, \x6\().16b
2818c2ecf20Sopenharmony_ci	ext		\x4\().16b, \x3\().16b, \x3\().16b, #8
2828c2ecf20Sopenharmony_ci	eor		\t3\().16b, \t3\().16b, \x2\().16b
2838c2ecf20Sopenharmony_ci	ext		\x5\().16b, \x7\().16b, \x7\().16b, #8
2848c2ecf20Sopenharmony_ci	eor		\t4\().16b, \t4\().16b, \x7\().16b
2858c2ecf20Sopenharmony_ci	ext		\x3\().16b, \x6\().16b, \x6\().16b, #8
2868c2ecf20Sopenharmony_ci	eor		\t3\().16b, \t3\().16b, \x7\().16b
2878c2ecf20Sopenharmony_ci	ext		\x6\().16b, \x2\().16b, \x2\().16b, #8
2888c2ecf20Sopenharmony_ci	eor		\x7\().16b, \t1\().16b, \t5\().16b
2898c2ecf20Sopenharmony_ci	.ifb		\inv
2908c2ecf20Sopenharmony_ci	eor		\x2\().16b, \t0\().16b, \t4\().16b
2918c2ecf20Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t3\().16b
2928c2ecf20Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t7\().16b
2938c2ecf20Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, \t6\().16b
2948c2ecf20Sopenharmony_ci	eor		\x6\().16b, \x6\().16b, \t2\().16b
2958c2ecf20Sopenharmony_ci	.else
2968c2ecf20Sopenharmony_ci	eor		\t3\().16b, \t3\().16b, \x4\().16b
2978c2ecf20Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t7\().16b
2988c2ecf20Sopenharmony_ci	eor		\x2\().16b, \x3\().16b, \t6\().16b
2998c2ecf20Sopenharmony_ci	eor		\x3\().16b, \t0\().16b, \t4\().16b
3008c2ecf20Sopenharmony_ci	eor		\x4\().16b, \x6\().16b, \t2\().16b
3018c2ecf20Sopenharmony_ci	mov		\x6\().16b, \t3\().16b
3028c2ecf20Sopenharmony_ci	.endif
3038c2ecf20Sopenharmony_ci	.endm
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
3068c2ecf20Sopenharmony_ci				      t0, t1, t2, t3, t4, t5, t6, t7
3078c2ecf20Sopenharmony_ci	ext		\t0\().16b, \x0\().16b, \x0\().16b, #8
3088c2ecf20Sopenharmony_ci	ext		\t6\().16b, \x6\().16b, \x6\().16b, #8
3098c2ecf20Sopenharmony_ci	ext		\t7\().16b, \x7\().16b, \x7\().16b, #8
3108c2ecf20Sopenharmony_ci	eor		\t0\().16b, \t0\().16b, \x0\().16b
3118c2ecf20Sopenharmony_ci	ext		\t1\().16b, \x1\().16b, \x1\().16b, #8
3128c2ecf20Sopenharmony_ci	eor		\t6\().16b, \t6\().16b, \x6\().16b
3138c2ecf20Sopenharmony_ci	ext		\t2\().16b, \x2\().16b, \x2\().16b, #8
3148c2ecf20Sopenharmony_ci	eor		\t7\().16b, \t7\().16b, \x7\().16b
3158c2ecf20Sopenharmony_ci	ext		\t3\().16b, \x3\().16b, \x3\().16b, #8
3168c2ecf20Sopenharmony_ci	eor		\t1\().16b, \t1\().16b, \x1\().16b
3178c2ecf20Sopenharmony_ci	ext		\t4\().16b, \x4\().16b, \x4\().16b, #8
3188c2ecf20Sopenharmony_ci	eor		\t2\().16b, \t2\().16b, \x2\().16b
3198c2ecf20Sopenharmony_ci	ext		\t5\().16b, \x5\().16b, \x5\().16b, #8
3208c2ecf20Sopenharmony_ci	eor		\t3\().16b, \t3\().16b, \x3\().16b
3218c2ecf20Sopenharmony_ci	eor		\t4\().16b, \t4\().16b, \x4\().16b
3228c2ecf20Sopenharmony_ci	eor		\t5\().16b, \t5\().16b, \x5\().16b
3238c2ecf20Sopenharmony_ci	eor		\x0\().16b, \x0\().16b, \t6\().16b
3248c2ecf20Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, \t6\().16b
3258c2ecf20Sopenharmony_ci	eor		\x2\().16b, \x2\().16b, \t0\().16b
3268c2ecf20Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t2\().16b
3278c2ecf20Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, \t1\().16b
3288c2ecf20Sopenharmony_ci	eor		\x1\().16b, \x1\().16b, \t7\().16b
3298c2ecf20Sopenharmony_ci	eor		\x2\().16b, \x2\().16b, \t7\().16b
3308c2ecf20Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t6\().16b
3318c2ecf20Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t3\().16b
3328c2ecf20Sopenharmony_ci	eor		\x3\().16b, \x3\().16b, \t6\().16b
3338c2ecf20Sopenharmony_ci	eor		\x6\().16b, \x6\().16b, \t4\().16b
3348c2ecf20Sopenharmony_ci	eor		\x4\().16b, \x4\().16b, \t7\().16b
3358c2ecf20Sopenharmony_ci	eor		\x5\().16b, \x5\().16b, \t7\().16b
3368c2ecf20Sopenharmony_ci	eor		\x7\().16b, \x7\().16b, \t5\().16b
3378c2ecf20Sopenharmony_ci	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
3388c2ecf20Sopenharmony_ci			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
3398c2ecf20Sopenharmony_ci	.endm
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
3428c2ecf20Sopenharmony_ci	ushr		\t0\().2d, \b0\().2d, #\n
3438c2ecf20Sopenharmony_ci	ushr		\t1\().2d, \b1\().2d, #\n
3448c2ecf20Sopenharmony_ci	eor		\t0\().16b, \t0\().16b, \a0\().16b
3458c2ecf20Sopenharmony_ci	eor		\t1\().16b, \t1\().16b, \a1\().16b
3468c2ecf20Sopenharmony_ci	and		\t0\().16b, \t0\().16b, \mask\().16b
3478c2ecf20Sopenharmony_ci	and		\t1\().16b, \t1\().16b, \mask\().16b
3488c2ecf20Sopenharmony_ci	eor		\a0\().16b, \a0\().16b, \t0\().16b
3498c2ecf20Sopenharmony_ci	shl		\t0\().2d, \t0\().2d, #\n
3508c2ecf20Sopenharmony_ci	eor		\a1\().16b, \a1\().16b, \t1\().16b
3518c2ecf20Sopenharmony_ci	shl		\t1\().2d, \t1\().2d, #\n
3528c2ecf20Sopenharmony_ci	eor		\b0\().16b, \b0\().16b, \t0\().16b
3538c2ecf20Sopenharmony_ci	eor		\b1\().16b, \b1\().16b, \t1\().16b
3548c2ecf20Sopenharmony_ci	.endm
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
3578c2ecf20Sopenharmony_ci	movi		\t0\().16b, #0x55
3588c2ecf20Sopenharmony_ci	movi		\t1\().16b, #0x33
3598c2ecf20Sopenharmony_ci	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
3608c2ecf20Sopenharmony_ci	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
3618c2ecf20Sopenharmony_ci	movi		\t0\().16b, #0x0f
3628c2ecf20Sopenharmony_ci	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
3638c2ecf20Sopenharmony_ci	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
3648c2ecf20Sopenharmony_ci	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
3658c2ecf20Sopenharmony_ci	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
3668c2ecf20Sopenharmony_ci	.endm
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci	.align		6
3708c2ecf20Sopenharmony_ciM0:	.octa		0x0004080c0105090d02060a0e03070b0f
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ciM0SR:	.octa		0x0004080c05090d010a0e02060f03070b
3738c2ecf20Sopenharmony_ciSR:	.octa		0x0f0e0d0c0a09080b0504070600030201
3748c2ecf20Sopenharmony_ciSRM0:	.octa		0x01060b0c0207080d0304090e00050a0f
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ciM0ISR:	.octa		0x0004080c0d0105090a0e0206070b0f03
3778c2ecf20Sopenharmony_ciISR:	.octa		0x0f0e0d0c080b0a090504070602010003
3788c2ecf20Sopenharmony_ciISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	/*
3818c2ecf20Sopenharmony_ci	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
3828c2ecf20Sopenharmony_ci	 */
3838c2ecf20Sopenharmony_ciSYM_FUNC_START(aesbs_convert_key)
3848c2ecf20Sopenharmony_ci	ld1		{v7.4s}, [x1], #16		// load round 0 key
3858c2ecf20Sopenharmony_ci	ld1		{v17.4s}, [x1], #16		// load round 1 key
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	movi		v8.16b,  #0x01			// bit masks
3888c2ecf20Sopenharmony_ci	movi		v9.16b,  #0x02
3898c2ecf20Sopenharmony_ci	movi		v10.16b, #0x04
3908c2ecf20Sopenharmony_ci	movi		v11.16b, #0x08
3918c2ecf20Sopenharmony_ci	movi		v12.16b, #0x10
3928c2ecf20Sopenharmony_ci	movi		v13.16b, #0x20
3938c2ecf20Sopenharmony_ci	movi		v14.16b, #0x40
3948c2ecf20Sopenharmony_ci	movi		v15.16b, #0x80
3958c2ecf20Sopenharmony_ci	ldr		q16, M0
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	sub		x2, x2, #1
3988c2ecf20Sopenharmony_ci	str		q7, [x0], #16		// save round 0 key
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci.Lkey_loop:
4018c2ecf20Sopenharmony_ci	tbl		v7.16b ,{v17.16b}, v16.16b
4028c2ecf20Sopenharmony_ci	ld1		{v17.4s}, [x1], #16		// load next round key
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	cmtst		v0.16b, v7.16b, v8.16b
4058c2ecf20Sopenharmony_ci	cmtst		v1.16b, v7.16b, v9.16b
4068c2ecf20Sopenharmony_ci	cmtst		v2.16b, v7.16b, v10.16b
4078c2ecf20Sopenharmony_ci	cmtst		v3.16b, v7.16b, v11.16b
4088c2ecf20Sopenharmony_ci	cmtst		v4.16b, v7.16b, v12.16b
4098c2ecf20Sopenharmony_ci	cmtst		v5.16b, v7.16b, v13.16b
4108c2ecf20Sopenharmony_ci	cmtst		v6.16b, v7.16b, v14.16b
4118c2ecf20Sopenharmony_ci	cmtst		v7.16b, v7.16b, v15.16b
4128c2ecf20Sopenharmony_ci	not		v0.16b, v0.16b
4138c2ecf20Sopenharmony_ci	not		v1.16b, v1.16b
4148c2ecf20Sopenharmony_ci	not		v5.16b, v5.16b
4158c2ecf20Sopenharmony_ci	not		v6.16b, v6.16b
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	subs		x2, x2, #1
4188c2ecf20Sopenharmony_ci	stp		q0, q1, [x0], #128
4198c2ecf20Sopenharmony_ci	stp		q2, q3, [x0, #-96]
4208c2ecf20Sopenharmony_ci	stp		q4, q5, [x0, #-64]
4218c2ecf20Sopenharmony_ci	stp		q6, q7, [x0, #-32]
4228c2ecf20Sopenharmony_ci	b.ne		.Lkey_loop
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci	movi		v7.16b, #0x63			// compose .L63
4258c2ecf20Sopenharmony_ci	eor		v17.16b, v17.16b, v7.16b
4268c2ecf20Sopenharmony_ci	str		q17, [x0]
4278c2ecf20Sopenharmony_ci	ret
4288c2ecf20Sopenharmony_ciSYM_FUNC_END(aesbs_convert_key)
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	.align		4
4318c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(aesbs_encrypt8)
4328c2ecf20Sopenharmony_ci	ldr		q9, [bskey], #16		// round 0 key
4338c2ecf20Sopenharmony_ci	ldr		q8, M0SR
4348c2ecf20Sopenharmony_ci	ldr		q24, SR
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
4378c2ecf20Sopenharmony_ci	eor		v11.16b, v1.16b, v9.16b
4388c2ecf20Sopenharmony_ci	tbl		v0.16b, {v10.16b}, v8.16b
4398c2ecf20Sopenharmony_ci	eor		v12.16b, v2.16b, v9.16b
4408c2ecf20Sopenharmony_ci	tbl		v1.16b, {v11.16b}, v8.16b
4418c2ecf20Sopenharmony_ci	eor		v13.16b, v3.16b, v9.16b
4428c2ecf20Sopenharmony_ci	tbl		v2.16b, {v12.16b}, v8.16b
4438c2ecf20Sopenharmony_ci	eor		v14.16b, v4.16b, v9.16b
4448c2ecf20Sopenharmony_ci	tbl		v3.16b, {v13.16b}, v8.16b
4458c2ecf20Sopenharmony_ci	eor		v15.16b, v5.16b, v9.16b
4468c2ecf20Sopenharmony_ci	tbl		v4.16b, {v14.16b}, v8.16b
4478c2ecf20Sopenharmony_ci	eor		v10.16b, v6.16b, v9.16b
4488c2ecf20Sopenharmony_ci	tbl		v5.16b, {v15.16b}, v8.16b
4498c2ecf20Sopenharmony_ci	eor		v11.16b, v7.16b, v9.16b
4508c2ecf20Sopenharmony_ci	tbl		v6.16b, {v10.16b}, v8.16b
4518c2ecf20Sopenharmony_ci	tbl		v7.16b, {v11.16b}, v8.16b
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci	sub		rounds, rounds, #1
4568c2ecf20Sopenharmony_ci	b		.Lenc_sbox
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci.Lenc_loop:
4598c2ecf20Sopenharmony_ci	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
4608c2ecf20Sopenharmony_ci.Lenc_sbox:
4618c2ecf20Sopenharmony_ci	sbox		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
4628c2ecf20Sopenharmony_ci								v13, v14, v15
4638c2ecf20Sopenharmony_ci	subs		rounds, rounds, #1
4648c2ecf20Sopenharmony_ci	b.cc		.Lenc_done
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci	enc_next_rk
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci	mix_cols	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
4698c2ecf20Sopenharmony_ci								v13, v14, v15
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	add_round_key	v0, v1, v2, v3, v4, v5, v6, v7
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci	b.ne		.Lenc_loop
4748c2ecf20Sopenharmony_ci	ldr		q24, SRM0
4758c2ecf20Sopenharmony_ci	b		.Lenc_loop
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci.Lenc_done:
4788c2ecf20Sopenharmony_ci	ldr		q12, [bskey]			// last round key
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci	bitslice	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v12.16b
4838c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v12.16b
4848c2ecf20Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
4858c2ecf20Sopenharmony_ci	eor		v6.16b, v6.16b, v12.16b
4868c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v12.16b
4878c2ecf20Sopenharmony_ci	eor		v7.16b, v7.16b, v12.16b
4888c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v12.16b
4898c2ecf20Sopenharmony_ci	eor		v5.16b, v5.16b, v12.16b
4908c2ecf20Sopenharmony_ci	ret
4918c2ecf20Sopenharmony_ciSYM_FUNC_END(aesbs_encrypt8)
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci	.align		4
4948c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(aesbs_decrypt8)
4958c2ecf20Sopenharmony_ci	lsl		x9, rounds, #7
4968c2ecf20Sopenharmony_ci	add		bskey, bskey, x9
4978c2ecf20Sopenharmony_ci
4988c2ecf20Sopenharmony_ci	ldr		q9, [bskey, #-112]!		// round 0 key
4998c2ecf20Sopenharmony_ci	ldr		q8, M0ISR
5008c2ecf20Sopenharmony_ci	ldr		q24, ISR
5018c2ecf20Sopenharmony_ci
5028c2ecf20Sopenharmony_ci	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
5038c2ecf20Sopenharmony_ci	eor		v11.16b, v1.16b, v9.16b
5048c2ecf20Sopenharmony_ci	tbl		v0.16b, {v10.16b}, v8.16b
5058c2ecf20Sopenharmony_ci	eor		v12.16b, v2.16b, v9.16b
5068c2ecf20Sopenharmony_ci	tbl		v1.16b, {v11.16b}, v8.16b
5078c2ecf20Sopenharmony_ci	eor		v13.16b, v3.16b, v9.16b
5088c2ecf20Sopenharmony_ci	tbl		v2.16b, {v12.16b}, v8.16b
5098c2ecf20Sopenharmony_ci	eor		v14.16b, v4.16b, v9.16b
5108c2ecf20Sopenharmony_ci	tbl		v3.16b, {v13.16b}, v8.16b
5118c2ecf20Sopenharmony_ci	eor		v15.16b, v5.16b, v9.16b
5128c2ecf20Sopenharmony_ci	tbl		v4.16b, {v14.16b}, v8.16b
5138c2ecf20Sopenharmony_ci	eor		v10.16b, v6.16b, v9.16b
5148c2ecf20Sopenharmony_ci	tbl		v5.16b, {v15.16b}, v8.16b
5158c2ecf20Sopenharmony_ci	eor		v11.16b, v7.16b, v9.16b
5168c2ecf20Sopenharmony_ci	tbl		v6.16b, {v10.16b}, v8.16b
5178c2ecf20Sopenharmony_ci	tbl		v7.16b, {v11.16b}, v8.16b
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	sub		rounds, rounds, #1
5228c2ecf20Sopenharmony_ci	b		.Ldec_sbox
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci.Ldec_loop:
5258c2ecf20Sopenharmony_ci	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
5268c2ecf20Sopenharmony_ci.Ldec_sbox:
5278c2ecf20Sopenharmony_ci	inv_sbox	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
5288c2ecf20Sopenharmony_ci								v13, v14, v15
5298c2ecf20Sopenharmony_ci	subs		rounds, rounds, #1
5308c2ecf20Sopenharmony_ci	b.cc		.Ldec_done
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	dec_next_rk
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	add_round_key	v0, v1, v6, v4, v2, v7, v3, v5
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	inv_mix_cols	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
5378c2ecf20Sopenharmony_ci								v13, v14, v15
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	b.ne		.Ldec_loop
5408c2ecf20Sopenharmony_ci	ldr		q24, ISRM0
5418c2ecf20Sopenharmony_ci	b		.Ldec_loop
5428c2ecf20Sopenharmony_ci.Ldec_done:
5438c2ecf20Sopenharmony_ci	ldr		q12, [bskey, #-16]		// last round key
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	bitslice	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v12.16b
5488c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v12.16b
5498c2ecf20Sopenharmony_ci	eor		v6.16b, v6.16b, v12.16b
5508c2ecf20Sopenharmony_ci	eor		v4.16b, v4.16b, v12.16b
5518c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v12.16b
5528c2ecf20Sopenharmony_ci	eor		v7.16b, v7.16b, v12.16b
5538c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v12.16b
5548c2ecf20Sopenharmony_ci	eor		v5.16b, v5.16b, v12.16b
5558c2ecf20Sopenharmony_ci	ret
5568c2ecf20Sopenharmony_ciSYM_FUNC_END(aesbs_decrypt8)
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_ci	/*
5598c2ecf20Sopenharmony_ci	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
5608c2ecf20Sopenharmony_ci	 *		     int blocks)
5618c2ecf20Sopenharmony_ci	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
5628c2ecf20Sopenharmony_ci	 *		     int blocks)
5638c2ecf20Sopenharmony_ci	 */
5648c2ecf20Sopenharmony_ci	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
5658c2ecf20Sopenharmony_ci	frame_push	5
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci	mov		x19, x0
5688c2ecf20Sopenharmony_ci	mov		x20, x1
5698c2ecf20Sopenharmony_ci	mov		x21, x2
5708c2ecf20Sopenharmony_ci	mov		x22, x3
5718c2ecf20Sopenharmony_ci	mov		x23, x4
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci99:	mov		x5, #1
5748c2ecf20Sopenharmony_ci	lsl		x5, x5, x23
5758c2ecf20Sopenharmony_ci	subs		w23, w23, #8
5768c2ecf20Sopenharmony_ci	csel		x23, x23, xzr, pl
5778c2ecf20Sopenharmony_ci	csel		x5, x5, xzr, mi
5788c2ecf20Sopenharmony_ci
5798c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x20], #16
5808c2ecf20Sopenharmony_ci	tbnz		x5, #1, 0f
5818c2ecf20Sopenharmony_ci	ld1		{v1.16b}, [x20], #16
5828c2ecf20Sopenharmony_ci	tbnz		x5, #2, 0f
5838c2ecf20Sopenharmony_ci	ld1		{v2.16b}, [x20], #16
5848c2ecf20Sopenharmony_ci	tbnz		x5, #3, 0f
5858c2ecf20Sopenharmony_ci	ld1		{v3.16b}, [x20], #16
5868c2ecf20Sopenharmony_ci	tbnz		x5, #4, 0f
5878c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x20], #16
5888c2ecf20Sopenharmony_ci	tbnz		x5, #5, 0f
5898c2ecf20Sopenharmony_ci	ld1		{v5.16b}, [x20], #16
5908c2ecf20Sopenharmony_ci	tbnz		x5, #6, 0f
5918c2ecf20Sopenharmony_ci	ld1		{v6.16b}, [x20], #16
5928c2ecf20Sopenharmony_ci	tbnz		x5, #7, 0f
5938c2ecf20Sopenharmony_ci	ld1		{v7.16b}, [x20], #16
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ci0:	mov		bskey, x21
5968c2ecf20Sopenharmony_ci	mov		rounds, x22
5978c2ecf20Sopenharmony_ci	bl		\do8
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci	st1		{\o0\().16b}, [x19], #16
6008c2ecf20Sopenharmony_ci	tbnz		x5, #1, 1f
6018c2ecf20Sopenharmony_ci	st1		{\o1\().16b}, [x19], #16
6028c2ecf20Sopenharmony_ci	tbnz		x5, #2, 1f
6038c2ecf20Sopenharmony_ci	st1		{\o2\().16b}, [x19], #16
6048c2ecf20Sopenharmony_ci	tbnz		x5, #3, 1f
6058c2ecf20Sopenharmony_ci	st1		{\o3\().16b}, [x19], #16
6068c2ecf20Sopenharmony_ci	tbnz		x5, #4, 1f
6078c2ecf20Sopenharmony_ci	st1		{\o4\().16b}, [x19], #16
6088c2ecf20Sopenharmony_ci	tbnz		x5, #5, 1f
6098c2ecf20Sopenharmony_ci	st1		{\o5\().16b}, [x19], #16
6108c2ecf20Sopenharmony_ci	tbnz		x5, #6, 1f
6118c2ecf20Sopenharmony_ci	st1		{\o6\().16b}, [x19], #16
6128c2ecf20Sopenharmony_ci	tbnz		x5, #7, 1f
6138c2ecf20Sopenharmony_ci	st1		{\o7\().16b}, [x19], #16
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci	cbz		x23, 1f
6168c2ecf20Sopenharmony_ci	b		99b
6178c2ecf20Sopenharmony_ci
6188c2ecf20Sopenharmony_ci1:	frame_pop
6198c2ecf20Sopenharmony_ci	ret
6208c2ecf20Sopenharmony_ci	.endm
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	.align		4
6238c2ecf20Sopenharmony_ciSYM_FUNC_START(aesbs_ecb_encrypt)
6248c2ecf20Sopenharmony_ci	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
6258c2ecf20Sopenharmony_ciSYM_FUNC_END(aesbs_ecb_encrypt)
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci	.align		4
6288c2ecf20Sopenharmony_ciSYM_FUNC_START(aesbs_ecb_decrypt)
6298c2ecf20Sopenharmony_ci	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
6308c2ecf20Sopenharmony_ciSYM_FUNC_END(aesbs_ecb_decrypt)
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci	/*
6338c2ecf20Sopenharmony_ci	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
6348c2ecf20Sopenharmony_ci	 *		     int blocks, u8 iv[])
6358c2ecf20Sopenharmony_ci	 */
6368c2ecf20Sopenharmony_ci	.align		4
6378c2ecf20Sopenharmony_ciSYM_FUNC_START(aesbs_cbc_decrypt)
6388c2ecf20Sopenharmony_ci	frame_push	6
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci	mov		x19, x0
6418c2ecf20Sopenharmony_ci	mov		x20, x1
6428c2ecf20Sopenharmony_ci	mov		x21, x2
6438c2ecf20Sopenharmony_ci	mov		x22, x3
6448c2ecf20Sopenharmony_ci	mov		x23, x4
6458c2ecf20Sopenharmony_ci	mov		x24, x5
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci99:	mov		x6, #1
6488c2ecf20Sopenharmony_ci	lsl		x6, x6, x23
6498c2ecf20Sopenharmony_ci	subs		w23, w23, #8
6508c2ecf20Sopenharmony_ci	csel		x23, x23, xzr, pl
6518c2ecf20Sopenharmony_ci	csel		x6, x6, xzr, mi
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x20], #16
6548c2ecf20Sopenharmony_ci	mov		v25.16b, v0.16b
6558c2ecf20Sopenharmony_ci	tbnz		x6, #1, 0f
6568c2ecf20Sopenharmony_ci	ld1		{v1.16b}, [x20], #16
6578c2ecf20Sopenharmony_ci	mov		v26.16b, v1.16b
6588c2ecf20Sopenharmony_ci	tbnz		x6, #2, 0f
6598c2ecf20Sopenharmony_ci	ld1		{v2.16b}, [x20], #16
6608c2ecf20Sopenharmony_ci	mov		v27.16b, v2.16b
6618c2ecf20Sopenharmony_ci	tbnz		x6, #3, 0f
6628c2ecf20Sopenharmony_ci	ld1		{v3.16b}, [x20], #16
6638c2ecf20Sopenharmony_ci	mov		v28.16b, v3.16b
6648c2ecf20Sopenharmony_ci	tbnz		x6, #4, 0f
6658c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x20], #16
6668c2ecf20Sopenharmony_ci	mov		v29.16b, v4.16b
6678c2ecf20Sopenharmony_ci	tbnz		x6, #5, 0f
6688c2ecf20Sopenharmony_ci	ld1		{v5.16b}, [x20], #16
6698c2ecf20Sopenharmony_ci	mov		v30.16b, v5.16b
6708c2ecf20Sopenharmony_ci	tbnz		x6, #6, 0f
6718c2ecf20Sopenharmony_ci	ld1		{v6.16b}, [x20], #16
6728c2ecf20Sopenharmony_ci	mov		v31.16b, v6.16b
6738c2ecf20Sopenharmony_ci	tbnz		x6, #7, 0f
6748c2ecf20Sopenharmony_ci	ld1		{v7.16b}, [x20]
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci0:	mov		bskey, x21
6778c2ecf20Sopenharmony_ci	mov		rounds, x22
6788c2ecf20Sopenharmony_ci	bl		aesbs_decrypt8
6798c2ecf20Sopenharmony_ci
6808c2ecf20Sopenharmony_ci	ld1		{v24.16b}, [x24]		// load IV
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v25.16b
6838c2ecf20Sopenharmony_ci	eor		v6.16b, v6.16b, v26.16b
6848c2ecf20Sopenharmony_ci	eor		v4.16b, v4.16b, v27.16b
6858c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v28.16b
6868c2ecf20Sopenharmony_ci	eor		v7.16b, v7.16b, v29.16b
6878c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v24.16b
6888c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v30.16b
6898c2ecf20Sopenharmony_ci	eor		v5.16b, v5.16b, v31.16b
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x19], #16
6928c2ecf20Sopenharmony_ci	mov		v24.16b, v25.16b
6938c2ecf20Sopenharmony_ci	tbnz		x6, #1, 1f
6948c2ecf20Sopenharmony_ci	st1		{v1.16b}, [x19], #16
6958c2ecf20Sopenharmony_ci	mov		v24.16b, v26.16b
6968c2ecf20Sopenharmony_ci	tbnz		x6, #2, 1f
6978c2ecf20Sopenharmony_ci	st1		{v6.16b}, [x19], #16
6988c2ecf20Sopenharmony_ci	mov		v24.16b, v27.16b
6998c2ecf20Sopenharmony_ci	tbnz		x6, #3, 1f
7008c2ecf20Sopenharmony_ci	st1		{v4.16b}, [x19], #16
7018c2ecf20Sopenharmony_ci	mov		v24.16b, v28.16b
7028c2ecf20Sopenharmony_ci	tbnz		x6, #4, 1f
7038c2ecf20Sopenharmony_ci	st1		{v2.16b}, [x19], #16
7048c2ecf20Sopenharmony_ci	mov		v24.16b, v29.16b
7058c2ecf20Sopenharmony_ci	tbnz		x6, #5, 1f
7068c2ecf20Sopenharmony_ci	st1		{v7.16b}, [x19], #16
7078c2ecf20Sopenharmony_ci	mov		v24.16b, v30.16b
7088c2ecf20Sopenharmony_ci	tbnz		x6, #6, 1f
7098c2ecf20Sopenharmony_ci	st1		{v3.16b}, [x19], #16
7108c2ecf20Sopenharmony_ci	mov		v24.16b, v31.16b
7118c2ecf20Sopenharmony_ci	tbnz		x6, #7, 1f
7128c2ecf20Sopenharmony_ci	ld1		{v24.16b}, [x20], #16
7138c2ecf20Sopenharmony_ci	st1		{v5.16b}, [x19], #16
7148c2ecf20Sopenharmony_ci1:	st1		{v24.16b}, [x24]		// store IV
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci	cbz		x23, 2f
7178c2ecf20Sopenharmony_ci	b		99b
7188c2ecf20Sopenharmony_ci
7198c2ecf20Sopenharmony_ci2:	frame_pop
7208c2ecf20Sopenharmony_ci	ret
7218c2ecf20Sopenharmony_ciSYM_FUNC_END(aesbs_cbc_decrypt)
7228c2ecf20Sopenharmony_ci
7238c2ecf20Sopenharmony_ci	.macro		next_tweak, out, in, const, tmp
7248c2ecf20Sopenharmony_ci	sshr		\tmp\().2d,  \in\().2d,   #63
7258c2ecf20Sopenharmony_ci	and		\tmp\().16b, \tmp\().16b, \const\().16b
7268c2ecf20Sopenharmony_ci	add		\out\().2d,  \in\().2d,   \in\().2d
7278c2ecf20Sopenharmony_ci	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
7288c2ecf20Sopenharmony_ci	eor		\out\().16b, \out\().16b, \tmp\().16b
7298c2ecf20Sopenharmony_ci	.endm
7308c2ecf20Sopenharmony_ci
7318c2ecf20Sopenharmony_ci	/*
7328c2ecf20Sopenharmony_ci	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
7338c2ecf20Sopenharmony_ci	 *		     int blocks, u8 iv[])
7348c2ecf20Sopenharmony_ci	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
7358c2ecf20Sopenharmony_ci	 *		     int blocks, u8 iv[])
7368c2ecf20Sopenharmony_ci	 */
7378c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(__xts_crypt8)
7388c2ecf20Sopenharmony_ci	mov		x6, #1
7398c2ecf20Sopenharmony_ci	lsl		x6, x6, x23
7408c2ecf20Sopenharmony_ci	subs		w23, w23, #8
7418c2ecf20Sopenharmony_ci	csel		x23, x23, xzr, pl
7428c2ecf20Sopenharmony_ci	csel		x6, x6, xzr, mi
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x20], #16
7458c2ecf20Sopenharmony_ci	next_tweak	v26, v25, v30, v31
7468c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v25.16b
7478c2ecf20Sopenharmony_ci	tbnz		x6, #1, 0f
7488c2ecf20Sopenharmony_ci
7498c2ecf20Sopenharmony_ci	ld1		{v1.16b}, [x20], #16
7508c2ecf20Sopenharmony_ci	next_tweak	v27, v26, v30, v31
7518c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v26.16b
7528c2ecf20Sopenharmony_ci	tbnz		x6, #2, 0f
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	ld1		{v2.16b}, [x20], #16
7558c2ecf20Sopenharmony_ci	next_tweak	v28, v27, v30, v31
7568c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v27.16b
7578c2ecf20Sopenharmony_ci	tbnz		x6, #3, 0f
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_ci	ld1		{v3.16b}, [x20], #16
7608c2ecf20Sopenharmony_ci	next_tweak	v29, v28, v30, v31
7618c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v28.16b
7628c2ecf20Sopenharmony_ci	tbnz		x6, #4, 0f
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_ci	ld1		{v4.16b}, [x20], #16
7658c2ecf20Sopenharmony_ci	str		q29, [sp, #.Lframe_local_offset]
7668c2ecf20Sopenharmony_ci	eor		v4.16b, v4.16b, v29.16b
7678c2ecf20Sopenharmony_ci	next_tweak	v29, v29, v30, v31
7688c2ecf20Sopenharmony_ci	tbnz		x6, #5, 0f
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ci	ld1		{v5.16b}, [x20], #16
7718c2ecf20Sopenharmony_ci	str		q29, [sp, #.Lframe_local_offset + 16]
7728c2ecf20Sopenharmony_ci	eor		v5.16b, v5.16b, v29.16b
7738c2ecf20Sopenharmony_ci	next_tweak	v29, v29, v30, v31
7748c2ecf20Sopenharmony_ci	tbnz		x6, #6, 0f
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_ci	ld1		{v6.16b}, [x20], #16
7778c2ecf20Sopenharmony_ci	str		q29, [sp, #.Lframe_local_offset + 32]
7788c2ecf20Sopenharmony_ci	eor		v6.16b, v6.16b, v29.16b
7798c2ecf20Sopenharmony_ci	next_tweak	v29, v29, v30, v31
7808c2ecf20Sopenharmony_ci	tbnz		x6, #7, 0f
7818c2ecf20Sopenharmony_ci
7828c2ecf20Sopenharmony_ci	ld1		{v7.16b}, [x20], #16
7838c2ecf20Sopenharmony_ci	str		q29, [sp, #.Lframe_local_offset + 48]
7848c2ecf20Sopenharmony_ci	eor		v7.16b, v7.16b, v29.16b
7858c2ecf20Sopenharmony_ci	next_tweak	v29, v29, v30, v31
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci0:	mov		bskey, x21
7888c2ecf20Sopenharmony_ci	mov		rounds, x22
7898c2ecf20Sopenharmony_ci	br		x16
7908c2ecf20Sopenharmony_ciSYM_FUNC_END(__xts_crypt8)
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
7938c2ecf20Sopenharmony_ci	frame_push	6, 64
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_ci	mov		x19, x0
7968c2ecf20Sopenharmony_ci	mov		x20, x1
7978c2ecf20Sopenharmony_ci	mov		x21, x2
7988c2ecf20Sopenharmony_ci	mov		x22, x3
7998c2ecf20Sopenharmony_ci	mov		x23, x4
8008c2ecf20Sopenharmony_ci	mov		x24, x5
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci	movi		v30.2s, #0x1
8038c2ecf20Sopenharmony_ci	movi		v25.2s, #0x87
8048c2ecf20Sopenharmony_ci	uzp1		v30.4s, v30.4s, v25.4s
8058c2ecf20Sopenharmony_ci	ld1		{v25.16b}, [x24]
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_ci99:	adr		x16, \do8
8088c2ecf20Sopenharmony_ci	bl		__xts_crypt8
8098c2ecf20Sopenharmony_ci
8108c2ecf20Sopenharmony_ci	ldp		q16, q17, [sp, #.Lframe_local_offset]
8118c2ecf20Sopenharmony_ci	ldp		q18, q19, [sp, #.Lframe_local_offset + 32]
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci	eor		\o0\().16b, \o0\().16b, v25.16b
8148c2ecf20Sopenharmony_ci	eor		\o1\().16b, \o1\().16b, v26.16b
8158c2ecf20Sopenharmony_ci	eor		\o2\().16b, \o2\().16b, v27.16b
8168c2ecf20Sopenharmony_ci	eor		\o3\().16b, \o3\().16b, v28.16b
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	st1		{\o0\().16b}, [x19], #16
8198c2ecf20Sopenharmony_ci	mov		v25.16b, v26.16b
8208c2ecf20Sopenharmony_ci	tbnz		x6, #1, 1f
8218c2ecf20Sopenharmony_ci	st1		{\o1\().16b}, [x19], #16
8228c2ecf20Sopenharmony_ci	mov		v25.16b, v27.16b
8238c2ecf20Sopenharmony_ci	tbnz		x6, #2, 1f
8248c2ecf20Sopenharmony_ci	st1		{\o2\().16b}, [x19], #16
8258c2ecf20Sopenharmony_ci	mov		v25.16b, v28.16b
8268c2ecf20Sopenharmony_ci	tbnz		x6, #3, 1f
8278c2ecf20Sopenharmony_ci	st1		{\o3\().16b}, [x19], #16
8288c2ecf20Sopenharmony_ci	mov		v25.16b, v29.16b
8298c2ecf20Sopenharmony_ci	tbnz		x6, #4, 1f
8308c2ecf20Sopenharmony_ci
8318c2ecf20Sopenharmony_ci	eor		\o4\().16b, \o4\().16b, v16.16b
8328c2ecf20Sopenharmony_ci	eor		\o5\().16b, \o5\().16b, v17.16b
8338c2ecf20Sopenharmony_ci	eor		\o6\().16b, \o6\().16b, v18.16b
8348c2ecf20Sopenharmony_ci	eor		\o7\().16b, \o7\().16b, v19.16b
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci	st1		{\o4\().16b}, [x19], #16
8378c2ecf20Sopenharmony_ci	tbnz		x6, #5, 1f
8388c2ecf20Sopenharmony_ci	st1		{\o5\().16b}, [x19], #16
8398c2ecf20Sopenharmony_ci	tbnz		x6, #6, 1f
8408c2ecf20Sopenharmony_ci	st1		{\o6\().16b}, [x19], #16
8418c2ecf20Sopenharmony_ci	tbnz		x6, #7, 1f
8428c2ecf20Sopenharmony_ci	st1		{\o7\().16b}, [x19], #16
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci	cbz		x23, 1f
8458c2ecf20Sopenharmony_ci	st1		{v25.16b}, [x24]
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci	b		99b
8488c2ecf20Sopenharmony_ci
8498c2ecf20Sopenharmony_ci1:	st1		{v25.16b}, [x24]
8508c2ecf20Sopenharmony_ci	frame_pop
8518c2ecf20Sopenharmony_ci	ret
8528c2ecf20Sopenharmony_ci	.endm
8538c2ecf20Sopenharmony_ci
8548c2ecf20Sopenharmony_ciSYM_FUNC_START(aesbs_xts_encrypt)
8558c2ecf20Sopenharmony_ci	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
8568c2ecf20Sopenharmony_ciSYM_FUNC_END(aesbs_xts_encrypt)
8578c2ecf20Sopenharmony_ci
8588c2ecf20Sopenharmony_ciSYM_FUNC_START(aesbs_xts_decrypt)
8598c2ecf20Sopenharmony_ci	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
8608c2ecf20Sopenharmony_ciSYM_FUNC_END(aesbs_xts_decrypt)
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci	.macro		next_ctr, v
8638c2ecf20Sopenharmony_ci	mov		\v\().d[1], x8
8648c2ecf20Sopenharmony_ci	adds		x8, x8, #1
8658c2ecf20Sopenharmony_ci	mov		\v\().d[0], x7
8668c2ecf20Sopenharmony_ci	adc		x7, x7, xzr
8678c2ecf20Sopenharmony_ci	rev64		\v\().16b, \v\().16b
8688c2ecf20Sopenharmony_ci	.endm
8698c2ecf20Sopenharmony_ci
8708c2ecf20Sopenharmony_ci	/*
8718c2ecf20Sopenharmony_ci	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
8728c2ecf20Sopenharmony_ci	 *		     int rounds, int blocks, u8 iv[], u8 final[])
8738c2ecf20Sopenharmony_ci	 */
8748c2ecf20Sopenharmony_ciSYM_FUNC_START(aesbs_ctr_encrypt)
8758c2ecf20Sopenharmony_ci	frame_push	8
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_ci	mov		x19, x0
8788c2ecf20Sopenharmony_ci	mov		x20, x1
8798c2ecf20Sopenharmony_ci	mov		x21, x2
8808c2ecf20Sopenharmony_ci	mov		x22, x3
8818c2ecf20Sopenharmony_ci	mov		x23, x4
8828c2ecf20Sopenharmony_ci	mov		x24, x5
8838c2ecf20Sopenharmony_ci	mov		x25, x6
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci	cmp		x25, #0
8868c2ecf20Sopenharmony_ci	cset		x26, ne
8878c2ecf20Sopenharmony_ci	add		x23, x23, x26		// do one extra block if final
8888c2ecf20Sopenharmony_ci
8898c2ecf20Sopenharmony_ci	ldp		x7, x8, [x24]
8908c2ecf20Sopenharmony_ci	ld1		{v0.16b}, [x24]
8918c2ecf20Sopenharmony_ciCPU_LE(	rev		x7, x7		)
8928c2ecf20Sopenharmony_ciCPU_LE(	rev		x8, x8		)
8938c2ecf20Sopenharmony_ci	adds		x8, x8, #1
8948c2ecf20Sopenharmony_ci	adc		x7, x7, xzr
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci99:	mov		x9, #1
8978c2ecf20Sopenharmony_ci	lsl		x9, x9, x23
8988c2ecf20Sopenharmony_ci	subs		w23, w23, #8
8998c2ecf20Sopenharmony_ci	csel		x23, x23, xzr, pl
9008c2ecf20Sopenharmony_ci	csel		x9, x9, xzr, le
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci	tbnz		x9, #1, 0f
9038c2ecf20Sopenharmony_ci	next_ctr	v1
9048c2ecf20Sopenharmony_ci	tbnz		x9, #2, 0f
9058c2ecf20Sopenharmony_ci	next_ctr	v2
9068c2ecf20Sopenharmony_ci	tbnz		x9, #3, 0f
9078c2ecf20Sopenharmony_ci	next_ctr	v3
9088c2ecf20Sopenharmony_ci	tbnz		x9, #4, 0f
9098c2ecf20Sopenharmony_ci	next_ctr	v4
9108c2ecf20Sopenharmony_ci	tbnz		x9, #5, 0f
9118c2ecf20Sopenharmony_ci	next_ctr	v5
9128c2ecf20Sopenharmony_ci	tbnz		x9, #6, 0f
9138c2ecf20Sopenharmony_ci	next_ctr	v6
9148c2ecf20Sopenharmony_ci	tbnz		x9, #7, 0f
9158c2ecf20Sopenharmony_ci	next_ctr	v7
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci0:	mov		bskey, x21
9188c2ecf20Sopenharmony_ci	mov		rounds, x22
9198c2ecf20Sopenharmony_ci	bl		aesbs_encrypt8
9208c2ecf20Sopenharmony_ci
9218c2ecf20Sopenharmony_ci	lsr		x9, x9, x26		// disregard the extra block
9228c2ecf20Sopenharmony_ci	tbnz		x9, #0, 0f
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci	ld1		{v8.16b}, [x20], #16
9258c2ecf20Sopenharmony_ci	eor		v0.16b, v0.16b, v8.16b
9268c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x19], #16
9278c2ecf20Sopenharmony_ci	tbnz		x9, #1, 1f
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci	ld1		{v9.16b}, [x20], #16
9308c2ecf20Sopenharmony_ci	eor		v1.16b, v1.16b, v9.16b
9318c2ecf20Sopenharmony_ci	st1		{v1.16b}, [x19], #16
9328c2ecf20Sopenharmony_ci	tbnz		x9, #2, 2f
9338c2ecf20Sopenharmony_ci
9348c2ecf20Sopenharmony_ci	ld1		{v10.16b}, [x20], #16
9358c2ecf20Sopenharmony_ci	eor		v4.16b, v4.16b, v10.16b
9368c2ecf20Sopenharmony_ci	st1		{v4.16b}, [x19], #16
9378c2ecf20Sopenharmony_ci	tbnz		x9, #3, 3f
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci	ld1		{v11.16b}, [x20], #16
9408c2ecf20Sopenharmony_ci	eor		v6.16b, v6.16b, v11.16b
9418c2ecf20Sopenharmony_ci	st1		{v6.16b}, [x19], #16
9428c2ecf20Sopenharmony_ci	tbnz		x9, #4, 4f
9438c2ecf20Sopenharmony_ci
9448c2ecf20Sopenharmony_ci	ld1		{v12.16b}, [x20], #16
9458c2ecf20Sopenharmony_ci	eor		v3.16b, v3.16b, v12.16b
9468c2ecf20Sopenharmony_ci	st1		{v3.16b}, [x19], #16
9478c2ecf20Sopenharmony_ci	tbnz		x9, #5, 5f
9488c2ecf20Sopenharmony_ci
9498c2ecf20Sopenharmony_ci	ld1		{v13.16b}, [x20], #16
9508c2ecf20Sopenharmony_ci	eor		v7.16b, v7.16b, v13.16b
9518c2ecf20Sopenharmony_ci	st1		{v7.16b}, [x19], #16
9528c2ecf20Sopenharmony_ci	tbnz		x9, #6, 6f
9538c2ecf20Sopenharmony_ci
9548c2ecf20Sopenharmony_ci	ld1		{v14.16b}, [x20], #16
9558c2ecf20Sopenharmony_ci	eor		v2.16b, v2.16b, v14.16b
9568c2ecf20Sopenharmony_ci	st1		{v2.16b}, [x19], #16
9578c2ecf20Sopenharmony_ci	tbnz		x9, #7, 7f
9588c2ecf20Sopenharmony_ci
9598c2ecf20Sopenharmony_ci	ld1		{v15.16b}, [x20], #16
9608c2ecf20Sopenharmony_ci	eor		v5.16b, v5.16b, v15.16b
9618c2ecf20Sopenharmony_ci	st1		{v5.16b}, [x19], #16
9628c2ecf20Sopenharmony_ci
9638c2ecf20Sopenharmony_ci8:	next_ctr	v0
9648c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x24]
9658c2ecf20Sopenharmony_ci	cbz		x23, .Lctr_done
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_ci	b		99b
9688c2ecf20Sopenharmony_ci
9698c2ecf20Sopenharmony_ci.Lctr_done:
9708c2ecf20Sopenharmony_ci	frame_pop
9718c2ecf20Sopenharmony_ci	ret
9728c2ecf20Sopenharmony_ci
9738c2ecf20Sopenharmony_ci	/*
9748c2ecf20Sopenharmony_ci	 * If we are handling the tail of the input (x6 != NULL), return the
9758c2ecf20Sopenharmony_ci	 * final keystream block back to the caller.
9768c2ecf20Sopenharmony_ci	 */
9778c2ecf20Sopenharmony_ci0:	cbz		x25, 8b
9788c2ecf20Sopenharmony_ci	st1		{v0.16b}, [x25]
9798c2ecf20Sopenharmony_ci	b		8b
9808c2ecf20Sopenharmony_ci1:	cbz		x25, 8b
9818c2ecf20Sopenharmony_ci	st1		{v1.16b}, [x25]
9828c2ecf20Sopenharmony_ci	b		8b
9838c2ecf20Sopenharmony_ci2:	cbz		x25, 8b
9848c2ecf20Sopenharmony_ci	st1		{v4.16b}, [x25]
9858c2ecf20Sopenharmony_ci	b		8b
9868c2ecf20Sopenharmony_ci3:	cbz		x25, 8b
9878c2ecf20Sopenharmony_ci	st1		{v6.16b}, [x25]
9888c2ecf20Sopenharmony_ci	b		8b
9898c2ecf20Sopenharmony_ci4:	cbz		x25, 8b
9908c2ecf20Sopenharmony_ci	st1		{v3.16b}, [x25]
9918c2ecf20Sopenharmony_ci	b		8b
9928c2ecf20Sopenharmony_ci5:	cbz		x25, 8b
9938c2ecf20Sopenharmony_ci	st1		{v7.16b}, [x25]
9948c2ecf20Sopenharmony_ci	b		8b
9958c2ecf20Sopenharmony_ci6:	cbz		x25, 8b
9968c2ecf20Sopenharmony_ci	st1		{v2.16b}, [x25]
9978c2ecf20Sopenharmony_ci	b		8b
9988c2ecf20Sopenharmony_ci7:	cbz		x25, 8b
9998c2ecf20Sopenharmony_ci	st1		{v5.16b}, [x25]
10008c2ecf20Sopenharmony_ci	b		8b
10018c2ecf20Sopenharmony_ciSYM_FUNC_END(aesbs_ctr_encrypt)
1002