18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/linkage.h> 98c2ecf20Sopenharmony_ci#include <asm/assembler.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#define AES_FUNC_START(func) SYM_FUNC_START(neon_ ## func) 128c2ecf20Sopenharmony_ci#define AES_FUNC_END(func) SYM_FUNC_END(neon_ ## func) 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci xtsmask .req v7 158c2ecf20Sopenharmony_ci cbciv .req v7 168c2ecf20Sopenharmony_ci vctr .req v4 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci .macro xts_reload_mask, tmp 198c2ecf20Sopenharmony_ci xts_load_mask \tmp 208c2ecf20Sopenharmony_ci .endm 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci /* special case for the neon-bs driver calling into this one for CTS */ 238c2ecf20Sopenharmony_ci .macro xts_cts_skip_tw, reg, lbl 248c2ecf20Sopenharmony_ci tbnz \reg, #1, \lbl 258c2ecf20Sopenharmony_ci .endm 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci /* multiply by polynomial 'x' in GF(2^8) */ 288c2ecf20Sopenharmony_ci .macro mul_by_x, out, in, temp, const 298c2ecf20Sopenharmony_ci sshr \temp, \in, #7 308c2ecf20Sopenharmony_ci shl \out, \in, #1 318c2ecf20Sopenharmony_ci and \temp, \temp, \const 328c2ecf20Sopenharmony_ci eor \out, \out, \temp 338c2ecf20Sopenharmony_ci .endm 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci /* multiply by polynomial 'x^2' in GF(2^8) */ 368c2ecf20Sopenharmony_ci .macro mul_by_x2, out, in, temp, const 378c2ecf20Sopenharmony_ci ushr \temp, \in, #6 388c2ecf20Sopenharmony_ci shl \out, \in, #2 398c2ecf20Sopenharmony_ci pmul \temp, \temp, \const 408c2ecf20Sopenharmony_ci eor \out, \out, \temp 418c2ecf20Sopenharmony_ci .endm 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci /* preload the entire Sbox */ 448c2ecf20Sopenharmony_ci .macro prepare, sbox, shiftrows, temp 458c2ecf20Sopenharmony_ci movi v12.16b, #0x1b 468c2ecf20Sopenharmony_ci ldr_l q13, \shiftrows, \temp 478c2ecf20Sopenharmony_ci ldr_l q14, .Lror32by8, \temp 488c2ecf20Sopenharmony_ci adr_l \temp, \sbox 498c2ecf20Sopenharmony_ci ld1 {v16.16b-v19.16b}, [\temp], #64 508c2ecf20Sopenharmony_ci ld1 {v20.16b-v23.16b}, [\temp], #64 518c2ecf20Sopenharmony_ci ld1 {v24.16b-v27.16b}, [\temp], #64 528c2ecf20Sopenharmony_ci ld1 {v28.16b-v31.16b}, [\temp] 538c2ecf20Sopenharmony_ci .endm 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci /* do preload for encryption */ 568c2ecf20Sopenharmony_ci .macro enc_prepare, ignore0, ignore1, temp 578c2ecf20Sopenharmony_ci prepare crypto_aes_sbox, .LForward_ShiftRows, \temp 588c2ecf20Sopenharmony_ci .endm 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci .macro enc_switch_key, ignore0, ignore1, temp 618c2ecf20Sopenharmony_ci /* do nothing */ 628c2ecf20Sopenharmony_ci .endm 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci /* do preload for decryption */ 658c2ecf20Sopenharmony_ci .macro dec_prepare, ignore0, ignore1, temp 668c2ecf20Sopenharmony_ci prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp 678c2ecf20Sopenharmony_ci .endm 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci /* apply SubBytes transformation using the the preloaded Sbox */ 708c2ecf20Sopenharmony_ci .macro sub_bytes, in 718c2ecf20Sopenharmony_ci sub v9.16b, \in\().16b, v15.16b 728c2ecf20Sopenharmony_ci tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 738c2ecf20Sopenharmony_ci sub v10.16b, v9.16b, v15.16b 748c2ecf20Sopenharmony_ci tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 758c2ecf20Sopenharmony_ci sub v11.16b, v10.16b, v15.16b 768c2ecf20Sopenharmony_ci tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 778c2ecf20Sopenharmony_ci tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 788c2ecf20Sopenharmony_ci .endm 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci /* apply MixColumns transformation */ 818c2ecf20Sopenharmony_ci .macro mix_columns, in, enc 828c2ecf20Sopenharmony_ci .if \enc == 0 838c2ecf20Sopenharmony_ci /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 848c2ecf20Sopenharmony_ci mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b 858c2ecf20Sopenharmony_ci eor \in\().16b, \in\().16b, v8.16b 868c2ecf20Sopenharmony_ci rev32 v8.8h, v8.8h 878c2ecf20Sopenharmony_ci eor \in\().16b, \in\().16b, v8.16b 888c2ecf20Sopenharmony_ci .endif 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b 918c2ecf20Sopenharmony_ci rev32 v8.8h, \in\().8h 928c2ecf20Sopenharmony_ci eor v8.16b, v8.16b, v9.16b 938c2ecf20Sopenharmony_ci eor \in\().16b, \in\().16b, v8.16b 948c2ecf20Sopenharmony_ci tbl \in\().16b, {\in\().16b}, v14.16b 958c2ecf20Sopenharmony_ci eor \in\().16b, \in\().16b, v8.16b 968c2ecf20Sopenharmony_ci .endm 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci .macro do_block, enc, in, rounds, rk, rkp, i 998c2ecf20Sopenharmony_ci ld1 {v15.4s}, [\rk] 1008c2ecf20Sopenharmony_ci add \rkp, \rk, #16 1018c2ecf20Sopenharmony_ci mov \i, \rounds 1028c2ecf20Sopenharmony_ci1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 1038c2ecf20Sopenharmony_ci movi v15.16b, #0x40 1048c2ecf20Sopenharmony_ci tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 1058c2ecf20Sopenharmony_ci sub_bytes \in 1068c2ecf20Sopenharmony_ci subs \i, \i, #1 1078c2ecf20Sopenharmony_ci ld1 {v15.4s}, [\rkp], #16 1088c2ecf20Sopenharmony_ci beq 2222f 1098c2ecf20Sopenharmony_ci mix_columns \in, \enc 1108c2ecf20Sopenharmony_ci b 1111b 1118c2ecf20Sopenharmony_ci2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 1128c2ecf20Sopenharmony_ci .endm 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci .macro encrypt_block, in, rounds, rk, rkp, i 1158c2ecf20Sopenharmony_ci do_block 1, \in, \rounds, \rk, \rkp, \i 1168c2ecf20Sopenharmony_ci .endm 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci .macro decrypt_block, in, rounds, rk, rkp, i 1198c2ecf20Sopenharmony_ci do_block 0, \in, \rounds, \rk, \rkp, \i 1208c2ecf20Sopenharmony_ci .endm 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci /* 1238c2ecf20Sopenharmony_ci * Interleaved versions: functionally equivalent to the 1248c2ecf20Sopenharmony_ci * ones above, but applied to AES states in parallel. 1258c2ecf20Sopenharmony_ci */ 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci .macro sub_bytes_4x, in0, in1, in2, in3 1288c2ecf20Sopenharmony_ci sub v8.16b, \in0\().16b, v15.16b 1298c2ecf20Sopenharmony_ci tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 1308c2ecf20Sopenharmony_ci sub v9.16b, \in1\().16b, v15.16b 1318c2ecf20Sopenharmony_ci tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 1328c2ecf20Sopenharmony_ci sub v10.16b, \in2\().16b, v15.16b 1338c2ecf20Sopenharmony_ci tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 1348c2ecf20Sopenharmony_ci sub v11.16b, \in3\().16b, v15.16b 1358c2ecf20Sopenharmony_ci tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 1368c2ecf20Sopenharmony_ci tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 1378c2ecf20Sopenharmony_ci tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 1388c2ecf20Sopenharmony_ci sub v8.16b, v8.16b, v15.16b 1398c2ecf20Sopenharmony_ci tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 1408c2ecf20Sopenharmony_ci sub v9.16b, v9.16b, v15.16b 1418c2ecf20Sopenharmony_ci tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 1428c2ecf20Sopenharmony_ci sub v10.16b, v10.16b, v15.16b 1438c2ecf20Sopenharmony_ci tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 1448c2ecf20Sopenharmony_ci sub v11.16b, v11.16b, v15.16b 1458c2ecf20Sopenharmony_ci tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 1468c2ecf20Sopenharmony_ci sub v8.16b, v8.16b, v15.16b 1478c2ecf20Sopenharmony_ci tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 1488c2ecf20Sopenharmony_ci sub v9.16b, v9.16b, v15.16b 1498c2ecf20Sopenharmony_ci tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 1508c2ecf20Sopenharmony_ci sub v10.16b, v10.16b, v15.16b 1518c2ecf20Sopenharmony_ci tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 1528c2ecf20Sopenharmony_ci sub v11.16b, v11.16b, v15.16b 1538c2ecf20Sopenharmony_ci tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 1548c2ecf20Sopenharmony_ci tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 1558c2ecf20Sopenharmony_ci tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 1568c2ecf20Sopenharmony_ci .endm 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 1598c2ecf20Sopenharmony_ci sshr \tmp0\().16b, \in0\().16b, #7 1608c2ecf20Sopenharmony_ci shl \out0\().16b, \in0\().16b, #1 1618c2ecf20Sopenharmony_ci sshr \tmp1\().16b, \in1\().16b, #7 1628c2ecf20Sopenharmony_ci and \tmp0\().16b, \tmp0\().16b, \const\().16b 1638c2ecf20Sopenharmony_ci shl \out1\().16b, \in1\().16b, #1 1648c2ecf20Sopenharmony_ci and \tmp1\().16b, \tmp1\().16b, \const\().16b 1658c2ecf20Sopenharmony_ci eor \out0\().16b, \out0\().16b, \tmp0\().16b 1668c2ecf20Sopenharmony_ci eor \out1\().16b, \out1\().16b, \tmp1\().16b 1678c2ecf20Sopenharmony_ci .endm 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const 1708c2ecf20Sopenharmony_ci ushr \tmp0\().16b, \in0\().16b, #6 1718c2ecf20Sopenharmony_ci shl \out0\().16b, \in0\().16b, #2 1728c2ecf20Sopenharmony_ci ushr \tmp1\().16b, \in1\().16b, #6 1738c2ecf20Sopenharmony_ci pmul \tmp0\().16b, \tmp0\().16b, \const\().16b 1748c2ecf20Sopenharmony_ci shl \out1\().16b, \in1\().16b, #2 1758c2ecf20Sopenharmony_ci pmul \tmp1\().16b, \tmp1\().16b, \const\().16b 1768c2ecf20Sopenharmony_ci eor \out0\().16b, \out0\().16b, \tmp0\().16b 1778c2ecf20Sopenharmony_ci eor \out1\().16b, \out1\().16b, \tmp1\().16b 1788c2ecf20Sopenharmony_ci .endm 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci .macro mix_columns_2x, in0, in1, enc 1818c2ecf20Sopenharmony_ci .if \enc == 0 1828c2ecf20Sopenharmony_ci /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 1838c2ecf20Sopenharmony_ci mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 1848c2ecf20Sopenharmony_ci eor \in0\().16b, \in0\().16b, v8.16b 1858c2ecf20Sopenharmony_ci rev32 v8.8h, v8.8h 1868c2ecf20Sopenharmony_ci eor \in1\().16b, \in1\().16b, v9.16b 1878c2ecf20Sopenharmony_ci rev32 v9.8h, v9.8h 1888c2ecf20Sopenharmony_ci eor \in0\().16b, \in0\().16b, v8.16b 1898c2ecf20Sopenharmony_ci eor \in1\().16b, \in1\().16b, v9.16b 1908c2ecf20Sopenharmony_ci .endif 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 1938c2ecf20Sopenharmony_ci rev32 v10.8h, \in0\().8h 1948c2ecf20Sopenharmony_ci rev32 v11.8h, \in1\().8h 1958c2ecf20Sopenharmony_ci eor v10.16b, v10.16b, v8.16b 1968c2ecf20Sopenharmony_ci eor v11.16b, v11.16b, v9.16b 1978c2ecf20Sopenharmony_ci eor \in0\().16b, \in0\().16b, v10.16b 1988c2ecf20Sopenharmony_ci eor \in1\().16b, \in1\().16b, v11.16b 1998c2ecf20Sopenharmony_ci tbl \in0\().16b, {\in0\().16b}, v14.16b 2008c2ecf20Sopenharmony_ci tbl \in1\().16b, {\in1\().16b}, v14.16b 2018c2ecf20Sopenharmony_ci eor \in0\().16b, \in0\().16b, v10.16b 2028c2ecf20Sopenharmony_ci eor \in1\().16b, \in1\().16b, v11.16b 2038c2ecf20Sopenharmony_ci .endm 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 2068c2ecf20Sopenharmony_ci ld1 {v15.4s}, [\rk] 2078c2ecf20Sopenharmony_ci add \rkp, \rk, #16 2088c2ecf20Sopenharmony_ci mov \i, \rounds 2098c2ecf20Sopenharmony_ci1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 2108c2ecf20Sopenharmony_ci eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 2118c2ecf20Sopenharmony_ci eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 2128c2ecf20Sopenharmony_ci eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 2138c2ecf20Sopenharmony_ci movi v15.16b, #0x40 2148c2ecf20Sopenharmony_ci tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 2158c2ecf20Sopenharmony_ci tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 2168c2ecf20Sopenharmony_ci tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 2178c2ecf20Sopenharmony_ci tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 2188c2ecf20Sopenharmony_ci sub_bytes_4x \in0, \in1, \in2, \in3 2198c2ecf20Sopenharmony_ci subs \i, \i, #1 2208c2ecf20Sopenharmony_ci ld1 {v15.4s}, [\rkp], #16 2218c2ecf20Sopenharmony_ci beq 2222f 2228c2ecf20Sopenharmony_ci mix_columns_2x \in0, \in1, \enc 2238c2ecf20Sopenharmony_ci mix_columns_2x \in2, \in3, \enc 2248c2ecf20Sopenharmony_ci b 1111b 2258c2ecf20Sopenharmony_ci2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 2268c2ecf20Sopenharmony_ci eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 2278c2ecf20Sopenharmony_ci eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 2288c2ecf20Sopenharmony_ci eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 2298c2ecf20Sopenharmony_ci .endm 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 2328c2ecf20Sopenharmony_ci do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 2338c2ecf20Sopenharmony_ci .endm 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 2368c2ecf20Sopenharmony_ci do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 2378c2ecf20Sopenharmony_ci .endm 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci#include "aes-modes.S" 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci .section ".rodata", "a" 2428c2ecf20Sopenharmony_ci .align 4 2438c2ecf20Sopenharmony_ci.LForward_ShiftRows: 2448c2ecf20Sopenharmony_ci .octa 0x0b06010c07020d08030e09040f0a0500 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci.LReverse_ShiftRows: 2478c2ecf20Sopenharmony_ci .octa 0x0306090c0f0205080b0e0104070a0d00 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci.Lror32by8: 2508c2ecf20Sopenharmony_ci .octa 0x0c0f0e0d080b0a090407060500030201 251