162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/linkage.h> 962306a36Sopenharmony_ci#include <asm/assembler.h> 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#define AES_FUNC_START(func) SYM_FUNC_START(neon_ ## func) 1262306a36Sopenharmony_ci#define AES_FUNC_END(func) SYM_FUNC_END(neon_ ## func) 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci xtsmask .req v7 1562306a36Sopenharmony_ci cbciv .req v7 1662306a36Sopenharmony_ci vctr .req v4 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci .macro xts_reload_mask, tmp 1962306a36Sopenharmony_ci xts_load_mask \tmp 2062306a36Sopenharmony_ci .endm 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci /* special case for the neon-bs driver calling into this one for CTS */ 2362306a36Sopenharmony_ci .macro xts_cts_skip_tw, reg, lbl 2462306a36Sopenharmony_ci tbnz \reg, #1, \lbl 2562306a36Sopenharmony_ci .endm 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci /* multiply by polynomial 'x' in GF(2^8) */ 2862306a36Sopenharmony_ci .macro mul_by_x, out, in, temp, const 2962306a36Sopenharmony_ci sshr \temp, \in, #7 3062306a36Sopenharmony_ci shl \out, \in, #1 3162306a36Sopenharmony_ci and \temp, \temp, \const 3262306a36Sopenharmony_ci eor \out, \out, \temp 3362306a36Sopenharmony_ci .endm 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci /* multiply by polynomial 'x^2' in GF(2^8) */ 3662306a36Sopenharmony_ci .macro mul_by_x2, out, in, temp, const 3762306a36Sopenharmony_ci ushr \temp, \in, #6 3862306a36Sopenharmony_ci shl \out, \in, #2 3962306a36Sopenharmony_ci pmul \temp, \temp, \const 4062306a36Sopenharmony_ci eor \out, \out, \temp 4162306a36Sopenharmony_ci .endm 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci /* preload the entire Sbox */ 4462306a36Sopenharmony_ci .macro prepare, sbox, shiftrows, temp 4562306a36Sopenharmony_ci movi v12.16b, #0x1b 4662306a36Sopenharmony_ci ldr_l q13, \shiftrows, \temp 4762306a36Sopenharmony_ci ldr_l q14, .Lror32by8, \temp 4862306a36Sopenharmony_ci adr_l \temp, \sbox 4962306a36Sopenharmony_ci ld1 {v16.16b-v19.16b}, [\temp], #64 5062306a36Sopenharmony_ci ld1 {v20.16b-v23.16b}, [\temp], #64 5162306a36Sopenharmony_ci ld1 {v24.16b-v27.16b}, [\temp], #64 5262306a36Sopenharmony_ci ld1 {v28.16b-v31.16b}, [\temp] 5362306a36Sopenharmony_ci .endm 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci /* do preload for encryption */ 5662306a36Sopenharmony_ci .macro enc_prepare, ignore0, ignore1, temp 5762306a36Sopenharmony_ci prepare crypto_aes_sbox, .LForward_ShiftRows, \temp 5862306a36Sopenharmony_ci .endm 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci .macro enc_switch_key, ignore0, ignore1, temp 6162306a36Sopenharmony_ci /* do nothing */ 6262306a36Sopenharmony_ci .endm 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci /* do preload for decryption */ 6562306a36Sopenharmony_ci .macro dec_prepare, ignore0, ignore1, temp 6662306a36Sopenharmony_ci prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp 6762306a36Sopenharmony_ci .endm 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci /* apply SubBytes transformation using the preloaded Sbox */ 7062306a36Sopenharmony_ci .macro sub_bytes, in 7162306a36Sopenharmony_ci sub v9.16b, \in\().16b, v15.16b 7262306a36Sopenharmony_ci tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 7362306a36Sopenharmony_ci sub v10.16b, v9.16b, v15.16b 7462306a36Sopenharmony_ci tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 7562306a36Sopenharmony_ci sub v11.16b, v10.16b, v15.16b 7662306a36Sopenharmony_ci tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 7762306a36Sopenharmony_ci tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 7862306a36Sopenharmony_ci .endm 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci /* apply MixColumns transformation */ 8162306a36Sopenharmony_ci .macro mix_columns, in, enc 8262306a36Sopenharmony_ci .if \enc == 0 8362306a36Sopenharmony_ci /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 8462306a36Sopenharmony_ci mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b 8562306a36Sopenharmony_ci eor \in\().16b, \in\().16b, v8.16b 8662306a36Sopenharmony_ci rev32 v8.8h, v8.8h 8762306a36Sopenharmony_ci eor \in\().16b, \in\().16b, v8.16b 8862306a36Sopenharmony_ci .endif 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b 9162306a36Sopenharmony_ci rev32 v8.8h, \in\().8h 9262306a36Sopenharmony_ci eor v8.16b, v8.16b, v9.16b 9362306a36Sopenharmony_ci eor \in\().16b, \in\().16b, v8.16b 9462306a36Sopenharmony_ci tbl \in\().16b, {\in\().16b}, v14.16b 9562306a36Sopenharmony_ci eor \in\().16b, \in\().16b, v8.16b 9662306a36Sopenharmony_ci .endm 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci .macro do_block, enc, in, rounds, rk, rkp, i 9962306a36Sopenharmony_ci ld1 {v15.4s}, [\rk] 10062306a36Sopenharmony_ci add \rkp, \rk, #16 10162306a36Sopenharmony_ci mov \i, \rounds 10262306a36Sopenharmony_ci1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 10362306a36Sopenharmony_ci movi v15.16b, #0x40 10462306a36Sopenharmony_ci tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 10562306a36Sopenharmony_ci sub_bytes \in 10662306a36Sopenharmony_ci subs \i, \i, #1 10762306a36Sopenharmony_ci ld1 {v15.4s}, [\rkp], #16 10862306a36Sopenharmony_ci beq 2222f 10962306a36Sopenharmony_ci mix_columns \in, \enc 11062306a36Sopenharmony_ci b 1111b 11162306a36Sopenharmony_ci2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 11262306a36Sopenharmony_ci .endm 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci .macro encrypt_block, in, rounds, rk, rkp, i 11562306a36Sopenharmony_ci do_block 1, \in, \rounds, \rk, \rkp, \i 11662306a36Sopenharmony_ci .endm 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci .macro decrypt_block, in, rounds, rk, rkp, i 11962306a36Sopenharmony_ci do_block 0, \in, \rounds, \rk, \rkp, \i 12062306a36Sopenharmony_ci .endm 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci /* 12362306a36Sopenharmony_ci * Interleaved versions: functionally equivalent to the 12462306a36Sopenharmony_ci * ones above, but applied to AES states in parallel. 12562306a36Sopenharmony_ci */ 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci .macro sub_bytes_4x, in0, in1, in2, in3 12862306a36Sopenharmony_ci sub v8.16b, \in0\().16b, v15.16b 12962306a36Sopenharmony_ci tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 13062306a36Sopenharmony_ci sub v9.16b, \in1\().16b, v15.16b 13162306a36Sopenharmony_ci tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 13262306a36Sopenharmony_ci sub v10.16b, \in2\().16b, v15.16b 13362306a36Sopenharmony_ci tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 13462306a36Sopenharmony_ci sub v11.16b, \in3\().16b, v15.16b 13562306a36Sopenharmony_ci tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 13662306a36Sopenharmony_ci tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 13762306a36Sopenharmony_ci tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 13862306a36Sopenharmony_ci sub v8.16b, v8.16b, v15.16b 13962306a36Sopenharmony_ci tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 14062306a36Sopenharmony_ci sub v9.16b, v9.16b, v15.16b 14162306a36Sopenharmony_ci tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 14262306a36Sopenharmony_ci sub v10.16b, v10.16b, v15.16b 14362306a36Sopenharmony_ci tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 14462306a36Sopenharmony_ci sub v11.16b, v11.16b, v15.16b 14562306a36Sopenharmony_ci tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 14662306a36Sopenharmony_ci sub v8.16b, v8.16b, v15.16b 14762306a36Sopenharmony_ci tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 14862306a36Sopenharmony_ci sub v9.16b, v9.16b, v15.16b 14962306a36Sopenharmony_ci tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 15062306a36Sopenharmony_ci sub v10.16b, v10.16b, v15.16b 15162306a36Sopenharmony_ci tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 15262306a36Sopenharmony_ci sub v11.16b, v11.16b, v15.16b 15362306a36Sopenharmony_ci tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 15462306a36Sopenharmony_ci tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 15562306a36Sopenharmony_ci tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 15662306a36Sopenharmony_ci .endm 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 15962306a36Sopenharmony_ci sshr \tmp0\().16b, \in0\().16b, #7 16062306a36Sopenharmony_ci shl \out0\().16b, \in0\().16b, #1 16162306a36Sopenharmony_ci sshr \tmp1\().16b, \in1\().16b, #7 16262306a36Sopenharmony_ci and \tmp0\().16b, \tmp0\().16b, \const\().16b 16362306a36Sopenharmony_ci shl \out1\().16b, \in1\().16b, #1 16462306a36Sopenharmony_ci and \tmp1\().16b, \tmp1\().16b, \const\().16b 16562306a36Sopenharmony_ci eor \out0\().16b, \out0\().16b, \tmp0\().16b 16662306a36Sopenharmony_ci eor \out1\().16b, \out1\().16b, \tmp1\().16b 16762306a36Sopenharmony_ci .endm 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const 17062306a36Sopenharmony_ci ushr \tmp0\().16b, \in0\().16b, #6 17162306a36Sopenharmony_ci shl \out0\().16b, \in0\().16b, #2 17262306a36Sopenharmony_ci ushr \tmp1\().16b, \in1\().16b, #6 17362306a36Sopenharmony_ci pmul \tmp0\().16b, \tmp0\().16b, \const\().16b 17462306a36Sopenharmony_ci shl \out1\().16b, \in1\().16b, #2 17562306a36Sopenharmony_ci pmul \tmp1\().16b, \tmp1\().16b, \const\().16b 17662306a36Sopenharmony_ci eor \out0\().16b, \out0\().16b, \tmp0\().16b 17762306a36Sopenharmony_ci eor \out1\().16b, \out1\().16b, \tmp1\().16b 17862306a36Sopenharmony_ci .endm 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci .macro mix_columns_2x, in0, in1, enc 18162306a36Sopenharmony_ci .if \enc == 0 18262306a36Sopenharmony_ci /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 18362306a36Sopenharmony_ci mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 18462306a36Sopenharmony_ci eor \in0\().16b, \in0\().16b, v8.16b 18562306a36Sopenharmony_ci rev32 v8.8h, v8.8h 18662306a36Sopenharmony_ci eor \in1\().16b, \in1\().16b, v9.16b 18762306a36Sopenharmony_ci rev32 v9.8h, v9.8h 18862306a36Sopenharmony_ci eor \in0\().16b, \in0\().16b, v8.16b 18962306a36Sopenharmony_ci eor \in1\().16b, \in1\().16b, v9.16b 19062306a36Sopenharmony_ci .endif 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 19362306a36Sopenharmony_ci rev32 v10.8h, \in0\().8h 19462306a36Sopenharmony_ci rev32 v11.8h, \in1\().8h 19562306a36Sopenharmony_ci eor v10.16b, v10.16b, v8.16b 19662306a36Sopenharmony_ci eor v11.16b, v11.16b, v9.16b 19762306a36Sopenharmony_ci eor \in0\().16b, \in0\().16b, v10.16b 19862306a36Sopenharmony_ci eor \in1\().16b, \in1\().16b, v11.16b 19962306a36Sopenharmony_ci tbl \in0\().16b, {\in0\().16b}, v14.16b 20062306a36Sopenharmony_ci tbl \in1\().16b, {\in1\().16b}, v14.16b 20162306a36Sopenharmony_ci eor \in0\().16b, \in0\().16b, v10.16b 20262306a36Sopenharmony_ci eor \in1\().16b, \in1\().16b, v11.16b 20362306a36Sopenharmony_ci .endm 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 20662306a36Sopenharmony_ci ld1 {v15.4s}, [\rk] 20762306a36Sopenharmony_ci add \rkp, \rk, #16 20862306a36Sopenharmony_ci mov \i, \rounds 20962306a36Sopenharmony_ci1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 21062306a36Sopenharmony_ci eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 21162306a36Sopenharmony_ci eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 21262306a36Sopenharmony_ci eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 21362306a36Sopenharmony_ci movi v15.16b, #0x40 21462306a36Sopenharmony_ci tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 21562306a36Sopenharmony_ci tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 21662306a36Sopenharmony_ci tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 21762306a36Sopenharmony_ci tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 21862306a36Sopenharmony_ci sub_bytes_4x \in0, \in1, \in2, \in3 21962306a36Sopenharmony_ci subs \i, \i, #1 22062306a36Sopenharmony_ci ld1 {v15.4s}, [\rkp], #16 22162306a36Sopenharmony_ci beq 2222f 22262306a36Sopenharmony_ci mix_columns_2x \in0, \in1, \enc 22362306a36Sopenharmony_ci mix_columns_2x \in2, \in3, \enc 22462306a36Sopenharmony_ci b 1111b 22562306a36Sopenharmony_ci2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 22662306a36Sopenharmony_ci eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 22762306a36Sopenharmony_ci eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 22862306a36Sopenharmony_ci eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 22962306a36Sopenharmony_ci .endm 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 23262306a36Sopenharmony_ci do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 23362306a36Sopenharmony_ci .endm 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 23662306a36Sopenharmony_ci do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 23762306a36Sopenharmony_ci .endm 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci#include "aes-modes.S" 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci .section ".rodata", "a" 24262306a36Sopenharmony_ci .align 4 24362306a36Sopenharmony_ci.LForward_ShiftRows: 24462306a36Sopenharmony_ci .octa 0x0b06010c07020d08030e09040f0a0500 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci.LReverse_ShiftRows: 24762306a36Sopenharmony_ci .octa 0x0306090c0f0205080b0e0104070a0d00 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci.Lror32by8: 25062306a36Sopenharmony_ci .octa 0x0c0f0e0d080b0a090407060500030201 251