162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci/* included by aes-ce.S and aes-neon.S */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci .text 1162306a36Sopenharmony_ci .align 4 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#ifndef MAX_STRIDE 1462306a36Sopenharmony_ci#define MAX_STRIDE 4 1562306a36Sopenharmony_ci#endif 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#if MAX_STRIDE == 4 1862306a36Sopenharmony_ci#define ST4(x...) x 1962306a36Sopenharmony_ci#define ST5(x...) 2062306a36Sopenharmony_ci#else 2162306a36Sopenharmony_ci#define ST4(x...) 2262306a36Sopenharmony_ci#define ST5(x...) x 2362306a36Sopenharmony_ci#endif 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(aes_encrypt_block4x) 2662306a36Sopenharmony_ci encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 2762306a36Sopenharmony_ci ret 2862306a36Sopenharmony_ciSYM_FUNC_END(aes_encrypt_block4x) 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(aes_decrypt_block4x) 3162306a36Sopenharmony_ci decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 3262306a36Sopenharmony_ci ret 3362306a36Sopenharmony_ciSYM_FUNC_END(aes_decrypt_block4x) 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci#if MAX_STRIDE == 5 3662306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(aes_encrypt_block5x) 3762306a36Sopenharmony_ci encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 3862306a36Sopenharmony_ci ret 3962306a36Sopenharmony_ciSYM_FUNC_END(aes_encrypt_block5x) 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(aes_decrypt_block5x) 4262306a36Sopenharmony_ci decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 4362306a36Sopenharmony_ci ret 4462306a36Sopenharmony_ciSYM_FUNC_END(aes_decrypt_block5x) 4562306a36Sopenharmony_ci#endif 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci /* 4862306a36Sopenharmony_ci * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 4962306a36Sopenharmony_ci * int blocks) 5062306a36Sopenharmony_ci * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 5162306a36Sopenharmony_ci * int blocks) 5262306a36Sopenharmony_ci */ 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ciAES_FUNC_START(aes_ecb_encrypt) 5562306a36Sopenharmony_ci frame_push 0 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci enc_prepare w3, x2, x5 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci.LecbencloopNx: 6062306a36Sopenharmony_ci subs w4, w4, #MAX_STRIDE 6162306a36Sopenharmony_ci bmi .Lecbenc1x 6262306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 6362306a36Sopenharmony_ciST4( bl aes_encrypt_block4x ) 6462306a36Sopenharmony_ciST5( ld1 {v4.16b}, [x1], #16 ) 6562306a36Sopenharmony_ciST5( bl aes_encrypt_block5x ) 6662306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x0], #64 6762306a36Sopenharmony_ciST5( st1 {v4.16b}, [x0], #16 ) 6862306a36Sopenharmony_ci b .LecbencloopNx 6962306a36Sopenharmony_ci.Lecbenc1x: 7062306a36Sopenharmony_ci adds w4, w4, #MAX_STRIDE 7162306a36Sopenharmony_ci beq .Lecbencout 7262306a36Sopenharmony_ci.Lecbencloop: 7362306a36Sopenharmony_ci ld1 {v0.16b}, [x1], #16 /* get next pt block */ 7462306a36Sopenharmony_ci encrypt_block v0, w3, x2, x5, w6 7562306a36Sopenharmony_ci st1 {v0.16b}, [x0], #16 7662306a36Sopenharmony_ci subs w4, w4, #1 7762306a36Sopenharmony_ci bne .Lecbencloop 7862306a36Sopenharmony_ci.Lecbencout: 7962306a36Sopenharmony_ci frame_pop 8062306a36Sopenharmony_ci ret 8162306a36Sopenharmony_ciAES_FUNC_END(aes_ecb_encrypt) 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ciAES_FUNC_START(aes_ecb_decrypt) 8562306a36Sopenharmony_ci frame_push 0 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci dec_prepare w3, x2, x5 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci.LecbdecloopNx: 9062306a36Sopenharmony_ci subs w4, w4, #MAX_STRIDE 9162306a36Sopenharmony_ci bmi .Lecbdec1x 9262306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 9362306a36Sopenharmony_ciST4( bl aes_decrypt_block4x ) 9462306a36Sopenharmony_ciST5( ld1 {v4.16b}, [x1], #16 ) 9562306a36Sopenharmony_ciST5( bl aes_decrypt_block5x ) 9662306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x0], #64 9762306a36Sopenharmony_ciST5( st1 {v4.16b}, [x0], #16 ) 9862306a36Sopenharmony_ci b .LecbdecloopNx 9962306a36Sopenharmony_ci.Lecbdec1x: 10062306a36Sopenharmony_ci adds w4, w4, #MAX_STRIDE 10162306a36Sopenharmony_ci beq .Lecbdecout 10262306a36Sopenharmony_ci.Lecbdecloop: 10362306a36Sopenharmony_ci ld1 {v0.16b}, [x1], #16 /* get next ct block */ 10462306a36Sopenharmony_ci decrypt_block v0, w3, x2, x5, w6 10562306a36Sopenharmony_ci st1 {v0.16b}, [x0], #16 10662306a36Sopenharmony_ci subs w4, w4, #1 10762306a36Sopenharmony_ci bne .Lecbdecloop 10862306a36Sopenharmony_ci.Lecbdecout: 10962306a36Sopenharmony_ci frame_pop 11062306a36Sopenharmony_ci ret 11162306a36Sopenharmony_ciAES_FUNC_END(aes_ecb_decrypt) 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci /* 11562306a36Sopenharmony_ci * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 11662306a36Sopenharmony_ci * int blocks, u8 iv[]) 11762306a36Sopenharmony_ci * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 11862306a36Sopenharmony_ci * int blocks, u8 iv[]) 11962306a36Sopenharmony_ci * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], 12062306a36Sopenharmony_ci * int rounds, int blocks, u8 iv[], 12162306a36Sopenharmony_ci * u32 const rk2[]); 12262306a36Sopenharmony_ci * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], 12362306a36Sopenharmony_ci * int rounds, int blocks, u8 iv[], 12462306a36Sopenharmony_ci * u32 const rk2[]); 12562306a36Sopenharmony_ci */ 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ciAES_FUNC_START(aes_essiv_cbc_encrypt) 12862306a36Sopenharmony_ci ld1 {v4.16b}, [x5] /* get iv */ 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci mov w8, #14 /* AES-256: 14 rounds */ 13162306a36Sopenharmony_ci enc_prepare w8, x6, x7 13262306a36Sopenharmony_ci encrypt_block v4, w8, x6, x7, w9 13362306a36Sopenharmony_ci enc_switch_key w3, x2, x6 13462306a36Sopenharmony_ci b .Lcbcencloop4x 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ciAES_FUNC_START(aes_cbc_encrypt) 13762306a36Sopenharmony_ci ld1 {v4.16b}, [x5] /* get iv */ 13862306a36Sopenharmony_ci enc_prepare w3, x2, x6 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci.Lcbcencloop4x: 14162306a36Sopenharmony_ci subs w4, w4, #4 14262306a36Sopenharmony_ci bmi .Lcbcenc1x 14362306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 14462306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 14562306a36Sopenharmony_ci encrypt_block v0, w3, x2, x6, w7 14662306a36Sopenharmony_ci eor v1.16b, v1.16b, v0.16b 14762306a36Sopenharmony_ci encrypt_block v1, w3, x2, x6, w7 14862306a36Sopenharmony_ci eor v2.16b, v2.16b, v1.16b 14962306a36Sopenharmony_ci encrypt_block v2, w3, x2, x6, w7 15062306a36Sopenharmony_ci eor v3.16b, v3.16b, v2.16b 15162306a36Sopenharmony_ci encrypt_block v3, w3, x2, x6, w7 15262306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x0], #64 15362306a36Sopenharmony_ci mov v4.16b, v3.16b 15462306a36Sopenharmony_ci b .Lcbcencloop4x 15562306a36Sopenharmony_ci.Lcbcenc1x: 15662306a36Sopenharmony_ci adds w4, w4, #4 15762306a36Sopenharmony_ci beq .Lcbcencout 15862306a36Sopenharmony_ci.Lcbcencloop: 15962306a36Sopenharmony_ci ld1 {v0.16b}, [x1], #16 /* get next pt block */ 16062306a36Sopenharmony_ci eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 16162306a36Sopenharmony_ci encrypt_block v4, w3, x2, x6, w7 16262306a36Sopenharmony_ci st1 {v4.16b}, [x0], #16 16362306a36Sopenharmony_ci subs w4, w4, #1 16462306a36Sopenharmony_ci bne .Lcbcencloop 16562306a36Sopenharmony_ci.Lcbcencout: 16662306a36Sopenharmony_ci st1 {v4.16b}, [x5] /* return iv */ 16762306a36Sopenharmony_ci ret 16862306a36Sopenharmony_ciAES_FUNC_END(aes_cbc_encrypt) 16962306a36Sopenharmony_ciAES_FUNC_END(aes_essiv_cbc_encrypt) 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ciAES_FUNC_START(aes_essiv_cbc_decrypt) 17262306a36Sopenharmony_ci ld1 {cbciv.16b}, [x5] /* get iv */ 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci mov w8, #14 /* AES-256: 14 rounds */ 17562306a36Sopenharmony_ci enc_prepare w8, x6, x7 17662306a36Sopenharmony_ci encrypt_block cbciv, w8, x6, x7, w9 17762306a36Sopenharmony_ci b .Lessivcbcdecstart 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ciAES_FUNC_START(aes_cbc_decrypt) 18062306a36Sopenharmony_ci ld1 {cbciv.16b}, [x5] /* get iv */ 18162306a36Sopenharmony_ci.Lessivcbcdecstart: 18262306a36Sopenharmony_ci frame_push 0 18362306a36Sopenharmony_ci dec_prepare w3, x2, x6 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci.LcbcdecloopNx: 18662306a36Sopenharmony_ci subs w4, w4, #MAX_STRIDE 18762306a36Sopenharmony_ci bmi .Lcbcdec1x 18862306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 18962306a36Sopenharmony_ci#if MAX_STRIDE == 5 19062306a36Sopenharmony_ci ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ 19162306a36Sopenharmony_ci mov v5.16b, v0.16b 19262306a36Sopenharmony_ci mov v6.16b, v1.16b 19362306a36Sopenharmony_ci mov v7.16b, v2.16b 19462306a36Sopenharmony_ci bl aes_decrypt_block5x 19562306a36Sopenharmony_ci sub x1, x1, #32 19662306a36Sopenharmony_ci eor v0.16b, v0.16b, cbciv.16b 19762306a36Sopenharmony_ci eor v1.16b, v1.16b, v5.16b 19862306a36Sopenharmony_ci ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ 19962306a36Sopenharmony_ci ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 20062306a36Sopenharmony_ci eor v2.16b, v2.16b, v6.16b 20162306a36Sopenharmony_ci eor v3.16b, v3.16b, v7.16b 20262306a36Sopenharmony_ci eor v4.16b, v4.16b, v5.16b 20362306a36Sopenharmony_ci#else 20462306a36Sopenharmony_ci mov v4.16b, v0.16b 20562306a36Sopenharmony_ci mov v5.16b, v1.16b 20662306a36Sopenharmony_ci mov v6.16b, v2.16b 20762306a36Sopenharmony_ci bl aes_decrypt_block4x 20862306a36Sopenharmony_ci sub x1, x1, #16 20962306a36Sopenharmony_ci eor v0.16b, v0.16b, cbciv.16b 21062306a36Sopenharmony_ci eor v1.16b, v1.16b, v4.16b 21162306a36Sopenharmony_ci ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 21262306a36Sopenharmony_ci eor v2.16b, v2.16b, v5.16b 21362306a36Sopenharmony_ci eor v3.16b, v3.16b, v6.16b 21462306a36Sopenharmony_ci#endif 21562306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x0], #64 21662306a36Sopenharmony_ciST5( st1 {v4.16b}, [x0], #16 ) 21762306a36Sopenharmony_ci b .LcbcdecloopNx 21862306a36Sopenharmony_ci.Lcbcdec1x: 21962306a36Sopenharmony_ci adds w4, w4, #MAX_STRIDE 22062306a36Sopenharmony_ci beq .Lcbcdecout 22162306a36Sopenharmony_ci.Lcbcdecloop: 22262306a36Sopenharmony_ci ld1 {v1.16b}, [x1], #16 /* get next ct block */ 22362306a36Sopenharmony_ci mov v0.16b, v1.16b /* ...and copy to v0 */ 22462306a36Sopenharmony_ci decrypt_block v0, w3, x2, x6, w7 22562306a36Sopenharmony_ci eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ 22662306a36Sopenharmony_ci mov cbciv.16b, v1.16b /* ct is next iv */ 22762306a36Sopenharmony_ci st1 {v0.16b}, [x0], #16 22862306a36Sopenharmony_ci subs w4, w4, #1 22962306a36Sopenharmony_ci bne .Lcbcdecloop 23062306a36Sopenharmony_ci.Lcbcdecout: 23162306a36Sopenharmony_ci st1 {cbciv.16b}, [x5] /* return iv */ 23262306a36Sopenharmony_ci frame_pop 23362306a36Sopenharmony_ci ret 23462306a36Sopenharmony_ciAES_FUNC_END(aes_cbc_decrypt) 23562306a36Sopenharmony_ciAES_FUNC_END(aes_essiv_cbc_decrypt) 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci /* 23962306a36Sopenharmony_ci * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 24062306a36Sopenharmony_ci * int rounds, int bytes, u8 const iv[]) 24162306a36Sopenharmony_ci * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 24262306a36Sopenharmony_ci * int rounds, int bytes, u8 const iv[]) 24362306a36Sopenharmony_ci */ 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ciAES_FUNC_START(aes_cbc_cts_encrypt) 24662306a36Sopenharmony_ci adr_l x8, .Lcts_permute_table 24762306a36Sopenharmony_ci sub x4, x4, #16 24862306a36Sopenharmony_ci add x9, x8, #32 24962306a36Sopenharmony_ci add x8, x8, x4 25062306a36Sopenharmony_ci sub x9, x9, x4 25162306a36Sopenharmony_ci ld1 {v3.16b}, [x8] 25262306a36Sopenharmony_ci ld1 {v4.16b}, [x9] 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 25562306a36Sopenharmony_ci ld1 {v1.16b}, [x1] 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci ld1 {v5.16b}, [x5] /* get iv */ 25862306a36Sopenharmony_ci enc_prepare w3, x2, x6 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci eor v0.16b, v0.16b, v5.16b /* xor with iv */ 26162306a36Sopenharmony_ci tbl v1.16b, {v1.16b}, v4.16b 26262306a36Sopenharmony_ci encrypt_block v0, w3, x2, x6, w7 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci eor v1.16b, v1.16b, v0.16b 26562306a36Sopenharmony_ci tbl v0.16b, {v0.16b}, v3.16b 26662306a36Sopenharmony_ci encrypt_block v1, w3, x2, x6, w7 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci add x4, x0, x4 26962306a36Sopenharmony_ci st1 {v0.16b}, [x4] /* overlapping stores */ 27062306a36Sopenharmony_ci st1 {v1.16b}, [x0] 27162306a36Sopenharmony_ci ret 27262306a36Sopenharmony_ciAES_FUNC_END(aes_cbc_cts_encrypt) 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ciAES_FUNC_START(aes_cbc_cts_decrypt) 27562306a36Sopenharmony_ci adr_l x8, .Lcts_permute_table 27662306a36Sopenharmony_ci sub x4, x4, #16 27762306a36Sopenharmony_ci add x9, x8, #32 27862306a36Sopenharmony_ci add x8, x8, x4 27962306a36Sopenharmony_ci sub x9, x9, x4 28062306a36Sopenharmony_ci ld1 {v3.16b}, [x8] 28162306a36Sopenharmony_ci ld1 {v4.16b}, [x9] 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 28462306a36Sopenharmony_ci ld1 {v1.16b}, [x1] 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci ld1 {v5.16b}, [x5] /* get iv */ 28762306a36Sopenharmony_ci dec_prepare w3, x2, x6 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci decrypt_block v0, w3, x2, x6, w7 29062306a36Sopenharmony_ci tbl v2.16b, {v0.16b}, v3.16b 29162306a36Sopenharmony_ci eor v2.16b, v2.16b, v1.16b 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci tbx v0.16b, {v1.16b}, v4.16b 29462306a36Sopenharmony_ci decrypt_block v0, w3, x2, x6, w7 29562306a36Sopenharmony_ci eor v0.16b, v0.16b, v5.16b /* xor with iv */ 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci add x4, x0, x4 29862306a36Sopenharmony_ci st1 {v2.16b}, [x4] /* overlapping stores */ 29962306a36Sopenharmony_ci st1 {v0.16b}, [x0] 30062306a36Sopenharmony_ci ret 30162306a36Sopenharmony_ciAES_FUNC_END(aes_cbc_cts_decrypt) 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci .section ".rodata", "a" 30462306a36Sopenharmony_ci .align 6 30562306a36Sopenharmony_ci.Lcts_permute_table: 30662306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 30762306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 30862306a36Sopenharmony_ci .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 30962306a36Sopenharmony_ci .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 31062306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 31162306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 31262306a36Sopenharmony_ci .previous 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci /* 31562306a36Sopenharmony_ci * This macro generates the code for CTR and XCTR mode. 31662306a36Sopenharmony_ci */ 31762306a36Sopenharmony_ci.macro ctr_encrypt xctr 31862306a36Sopenharmony_ci // Arguments 31962306a36Sopenharmony_ci OUT .req x0 32062306a36Sopenharmony_ci IN .req x1 32162306a36Sopenharmony_ci KEY .req x2 32262306a36Sopenharmony_ci ROUNDS_W .req w3 32362306a36Sopenharmony_ci BYTES_W .req w4 32462306a36Sopenharmony_ci IV .req x5 32562306a36Sopenharmony_ci BYTE_CTR_W .req w6 // XCTR only 32662306a36Sopenharmony_ci // Intermediate values 32762306a36Sopenharmony_ci CTR_W .req w11 // XCTR only 32862306a36Sopenharmony_ci CTR .req x11 // XCTR only 32962306a36Sopenharmony_ci IV_PART .req x12 33062306a36Sopenharmony_ci BLOCKS .req x13 33162306a36Sopenharmony_ci BLOCKS_W .req w13 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci frame_push 0 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci enc_prepare ROUNDS_W, KEY, IV_PART 33662306a36Sopenharmony_ci ld1 {vctr.16b}, [IV] 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci /* 33962306a36Sopenharmony_ci * Keep 64 bits of the IV in a register. For CTR mode this lets us 34062306a36Sopenharmony_ci * easily increment the IV. For XCTR mode this lets us efficiently XOR 34162306a36Sopenharmony_ci * the 64-bit counter with the IV. 34262306a36Sopenharmony_ci */ 34362306a36Sopenharmony_ci .if \xctr 34462306a36Sopenharmony_ci umov IV_PART, vctr.d[0] 34562306a36Sopenharmony_ci lsr CTR_W, BYTE_CTR_W, #4 34662306a36Sopenharmony_ci .else 34762306a36Sopenharmony_ci umov IV_PART, vctr.d[1] 34862306a36Sopenharmony_ci rev IV_PART, IV_PART 34962306a36Sopenharmony_ci .endif 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci.LctrloopNx\xctr: 35262306a36Sopenharmony_ci add BLOCKS_W, BYTES_W, #15 35362306a36Sopenharmony_ci sub BYTES_W, BYTES_W, #MAX_STRIDE << 4 35462306a36Sopenharmony_ci lsr BLOCKS_W, BLOCKS_W, #4 35562306a36Sopenharmony_ci mov w8, #MAX_STRIDE 35662306a36Sopenharmony_ci cmp BLOCKS_W, w8 35762306a36Sopenharmony_ci csel BLOCKS_W, BLOCKS_W, w8, lt 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci /* 36062306a36Sopenharmony_ci * Set up the counter values in v0-v{MAX_STRIDE-1}. 36162306a36Sopenharmony_ci * 36262306a36Sopenharmony_ci * If we are encrypting less than MAX_STRIDE blocks, the tail block 36362306a36Sopenharmony_ci * handling code expects the last keystream block to be in 36462306a36Sopenharmony_ci * v{MAX_STRIDE-1}. For example: if encrypting two blocks with 36562306a36Sopenharmony_ci * MAX_STRIDE=5, then v3 and v4 should have the next two counter blocks. 36662306a36Sopenharmony_ci */ 36762306a36Sopenharmony_ci .if \xctr 36862306a36Sopenharmony_ci add CTR, CTR, BLOCKS 36962306a36Sopenharmony_ci .else 37062306a36Sopenharmony_ci adds IV_PART, IV_PART, BLOCKS 37162306a36Sopenharmony_ci .endif 37262306a36Sopenharmony_ci mov v0.16b, vctr.16b 37362306a36Sopenharmony_ci mov v1.16b, vctr.16b 37462306a36Sopenharmony_ci mov v2.16b, vctr.16b 37562306a36Sopenharmony_ci mov v3.16b, vctr.16b 37662306a36Sopenharmony_ciST5( mov v4.16b, vctr.16b ) 37762306a36Sopenharmony_ci .if \xctr 37862306a36Sopenharmony_ci sub x6, CTR, #MAX_STRIDE - 1 37962306a36Sopenharmony_ci sub x7, CTR, #MAX_STRIDE - 2 38062306a36Sopenharmony_ci sub x8, CTR, #MAX_STRIDE - 3 38162306a36Sopenharmony_ci sub x9, CTR, #MAX_STRIDE - 4 38262306a36Sopenharmony_ciST5( sub x10, CTR, #MAX_STRIDE - 5 ) 38362306a36Sopenharmony_ci eor x6, x6, IV_PART 38462306a36Sopenharmony_ci eor x7, x7, IV_PART 38562306a36Sopenharmony_ci eor x8, x8, IV_PART 38662306a36Sopenharmony_ci eor x9, x9, IV_PART 38762306a36Sopenharmony_ciST5( eor x10, x10, IV_PART ) 38862306a36Sopenharmony_ci mov v0.d[0], x6 38962306a36Sopenharmony_ci mov v1.d[0], x7 39062306a36Sopenharmony_ci mov v2.d[0], x8 39162306a36Sopenharmony_ci mov v3.d[0], x9 39262306a36Sopenharmony_ciST5( mov v4.d[0], x10 ) 39362306a36Sopenharmony_ci .else 39462306a36Sopenharmony_ci bcs 0f 39562306a36Sopenharmony_ci .subsection 1 39662306a36Sopenharmony_ci /* 39762306a36Sopenharmony_ci * This subsection handles carries. 39862306a36Sopenharmony_ci * 39962306a36Sopenharmony_ci * Conditional branching here is allowed with respect to time 40062306a36Sopenharmony_ci * invariance since the branches are dependent on the IV instead 40162306a36Sopenharmony_ci * of the plaintext or key. This code is rarely executed in 40262306a36Sopenharmony_ci * practice anyway. 40362306a36Sopenharmony_ci */ 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci /* Apply carry to outgoing counter. */ 40662306a36Sopenharmony_ci0: umov x8, vctr.d[0] 40762306a36Sopenharmony_ci rev x8, x8 40862306a36Sopenharmony_ci add x8, x8, #1 40962306a36Sopenharmony_ci rev x8, x8 41062306a36Sopenharmony_ci ins vctr.d[0], x8 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci /* 41362306a36Sopenharmony_ci * Apply carry to counter blocks if needed. 41462306a36Sopenharmony_ci * 41562306a36Sopenharmony_ci * Since the carry flag was set, we know 0 <= IV_PART < 41662306a36Sopenharmony_ci * MAX_STRIDE. Using the value of IV_PART we can determine how 41762306a36Sopenharmony_ci * many counter blocks need to be updated. 41862306a36Sopenharmony_ci */ 41962306a36Sopenharmony_ci cbz IV_PART, 2f 42062306a36Sopenharmony_ci adr x16, 1f 42162306a36Sopenharmony_ci sub x16, x16, IV_PART, lsl #3 42262306a36Sopenharmony_ci br x16 42362306a36Sopenharmony_ci bti c 42462306a36Sopenharmony_ci mov v0.d[0], vctr.d[0] 42562306a36Sopenharmony_ci bti c 42662306a36Sopenharmony_ci mov v1.d[0], vctr.d[0] 42762306a36Sopenharmony_ci bti c 42862306a36Sopenharmony_ci mov v2.d[0], vctr.d[0] 42962306a36Sopenharmony_ci bti c 43062306a36Sopenharmony_ci mov v3.d[0], vctr.d[0] 43162306a36Sopenharmony_ciST5( bti c ) 43262306a36Sopenharmony_ciST5( mov v4.d[0], vctr.d[0] ) 43362306a36Sopenharmony_ci1: b 2f 43462306a36Sopenharmony_ci .previous 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci2: rev x7, IV_PART 43762306a36Sopenharmony_ci ins vctr.d[1], x7 43862306a36Sopenharmony_ci sub x7, IV_PART, #MAX_STRIDE - 1 43962306a36Sopenharmony_ci sub x8, IV_PART, #MAX_STRIDE - 2 44062306a36Sopenharmony_ci sub x9, IV_PART, #MAX_STRIDE - 3 44162306a36Sopenharmony_ci rev x7, x7 44262306a36Sopenharmony_ci rev x8, x8 44362306a36Sopenharmony_ci mov v1.d[1], x7 44462306a36Sopenharmony_ci rev x9, x9 44562306a36Sopenharmony_ciST5( sub x10, IV_PART, #MAX_STRIDE - 4 ) 44662306a36Sopenharmony_ci mov v2.d[1], x8 44762306a36Sopenharmony_ciST5( rev x10, x10 ) 44862306a36Sopenharmony_ci mov v3.d[1], x9 44962306a36Sopenharmony_ciST5( mov v4.d[1], x10 ) 45062306a36Sopenharmony_ci .endif 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci /* 45362306a36Sopenharmony_ci * If there are at least MAX_STRIDE blocks left, XOR the data with 45462306a36Sopenharmony_ci * keystream and store. Otherwise jump to tail handling. 45562306a36Sopenharmony_ci */ 45662306a36Sopenharmony_ci tbnz BYTES_W, #31, .Lctrtail\xctr 45762306a36Sopenharmony_ci ld1 {v5.16b-v7.16b}, [IN], #48 45862306a36Sopenharmony_ciST4( bl aes_encrypt_block4x ) 45962306a36Sopenharmony_ciST5( bl aes_encrypt_block5x ) 46062306a36Sopenharmony_ci eor v0.16b, v5.16b, v0.16b 46162306a36Sopenharmony_ciST4( ld1 {v5.16b}, [IN], #16 ) 46262306a36Sopenharmony_ci eor v1.16b, v6.16b, v1.16b 46362306a36Sopenharmony_ciST5( ld1 {v5.16b-v6.16b}, [IN], #32 ) 46462306a36Sopenharmony_ci eor v2.16b, v7.16b, v2.16b 46562306a36Sopenharmony_ci eor v3.16b, v5.16b, v3.16b 46662306a36Sopenharmony_ciST5( eor v4.16b, v6.16b, v4.16b ) 46762306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [OUT], #64 46862306a36Sopenharmony_ciST5( st1 {v4.16b}, [OUT], #16 ) 46962306a36Sopenharmony_ci cbz BYTES_W, .Lctrout\xctr 47062306a36Sopenharmony_ci b .LctrloopNx\xctr 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci.Lctrout\xctr: 47362306a36Sopenharmony_ci .if !\xctr 47462306a36Sopenharmony_ci st1 {vctr.16b}, [IV] /* return next CTR value */ 47562306a36Sopenharmony_ci .endif 47662306a36Sopenharmony_ci frame_pop 47762306a36Sopenharmony_ci ret 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci.Lctrtail\xctr: 48062306a36Sopenharmony_ci /* 48162306a36Sopenharmony_ci * Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext 48262306a36Sopenharmony_ci * 48362306a36Sopenharmony_ci * This code expects the last keystream block to be in v{MAX_STRIDE-1}. 48462306a36Sopenharmony_ci * For example: if encrypting two blocks with MAX_STRIDE=5, then v3 and 48562306a36Sopenharmony_ci * v4 should have the next two counter blocks. 48662306a36Sopenharmony_ci * 48762306a36Sopenharmony_ci * This allows us to store the ciphertext by writing to overlapping 48862306a36Sopenharmony_ci * regions of memory. Any invalid ciphertext blocks get overwritten by 48962306a36Sopenharmony_ci * correctly computed blocks. This approach greatly simplifies the 49062306a36Sopenharmony_ci * logic for storing the ciphertext. 49162306a36Sopenharmony_ci */ 49262306a36Sopenharmony_ci mov x16, #16 49362306a36Sopenharmony_ci ands w7, BYTES_W, #0xf 49462306a36Sopenharmony_ci csel x13, x7, x16, ne 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ciST5( cmp BYTES_W, #64 - (MAX_STRIDE << 4)) 49762306a36Sopenharmony_ciST5( csel x14, x16, xzr, gt ) 49862306a36Sopenharmony_ci cmp BYTES_W, #48 - (MAX_STRIDE << 4) 49962306a36Sopenharmony_ci csel x15, x16, xzr, gt 50062306a36Sopenharmony_ci cmp BYTES_W, #32 - (MAX_STRIDE << 4) 50162306a36Sopenharmony_ci csel x16, x16, xzr, gt 50262306a36Sopenharmony_ci cmp BYTES_W, #16 - (MAX_STRIDE << 4) 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci adr_l x9, .Lcts_permute_table 50562306a36Sopenharmony_ci add x9, x9, x13 50662306a36Sopenharmony_ci ble .Lctrtail1x\xctr 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ciST5( ld1 {v5.16b}, [IN], x14 ) 50962306a36Sopenharmony_ci ld1 {v6.16b}, [IN], x15 51062306a36Sopenharmony_ci ld1 {v7.16b}, [IN], x16 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ciST4( bl aes_encrypt_block4x ) 51362306a36Sopenharmony_ciST5( bl aes_encrypt_block5x ) 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci ld1 {v8.16b}, [IN], x13 51662306a36Sopenharmony_ci ld1 {v9.16b}, [IN] 51762306a36Sopenharmony_ci ld1 {v10.16b}, [x9] 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ciST4( eor v6.16b, v6.16b, v0.16b ) 52062306a36Sopenharmony_ciST4( eor v7.16b, v7.16b, v1.16b ) 52162306a36Sopenharmony_ciST4( tbl v3.16b, {v3.16b}, v10.16b ) 52262306a36Sopenharmony_ciST4( eor v8.16b, v8.16b, v2.16b ) 52362306a36Sopenharmony_ciST4( eor v9.16b, v9.16b, v3.16b ) 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ciST5( eor v5.16b, v5.16b, v0.16b ) 52662306a36Sopenharmony_ciST5( eor v6.16b, v6.16b, v1.16b ) 52762306a36Sopenharmony_ciST5( tbl v4.16b, {v4.16b}, v10.16b ) 52862306a36Sopenharmony_ciST5( eor v7.16b, v7.16b, v2.16b ) 52962306a36Sopenharmony_ciST5( eor v8.16b, v8.16b, v3.16b ) 53062306a36Sopenharmony_ciST5( eor v9.16b, v9.16b, v4.16b ) 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ciST5( st1 {v5.16b}, [OUT], x14 ) 53362306a36Sopenharmony_ci st1 {v6.16b}, [OUT], x15 53462306a36Sopenharmony_ci st1 {v7.16b}, [OUT], x16 53562306a36Sopenharmony_ci add x13, x13, OUT 53662306a36Sopenharmony_ci st1 {v9.16b}, [x13] // overlapping stores 53762306a36Sopenharmony_ci st1 {v8.16b}, [OUT] 53862306a36Sopenharmony_ci b .Lctrout\xctr 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci.Lctrtail1x\xctr: 54162306a36Sopenharmony_ci /* 54262306a36Sopenharmony_ci * Handle <= 16 bytes of plaintext 54362306a36Sopenharmony_ci * 54462306a36Sopenharmony_ci * This code always reads and writes 16 bytes. To avoid out of bounds 54562306a36Sopenharmony_ci * accesses, XCTR and CTR modes must use a temporary buffer when 54662306a36Sopenharmony_ci * encrypting/decrypting less than 16 bytes. 54762306a36Sopenharmony_ci * 54862306a36Sopenharmony_ci * This code is unusual in that it loads the input and stores the output 54962306a36Sopenharmony_ci * relative to the end of the buffers rather than relative to the start. 55062306a36Sopenharmony_ci * This causes unusual behaviour when encrypting/decrypting less than 16 55162306a36Sopenharmony_ci * bytes; the end of the data is expected to be at the end of the 55262306a36Sopenharmony_ci * temporary buffer rather than the start of the data being at the start 55362306a36Sopenharmony_ci * of the temporary buffer. 55462306a36Sopenharmony_ci */ 55562306a36Sopenharmony_ci sub x8, x7, #16 55662306a36Sopenharmony_ci csel x7, x7, x8, eq 55762306a36Sopenharmony_ci add IN, IN, x7 55862306a36Sopenharmony_ci add OUT, OUT, x7 55962306a36Sopenharmony_ci ld1 {v5.16b}, [IN] 56062306a36Sopenharmony_ci ld1 {v6.16b}, [OUT] 56162306a36Sopenharmony_ciST5( mov v3.16b, v4.16b ) 56262306a36Sopenharmony_ci encrypt_block v3, ROUNDS_W, KEY, x8, w7 56362306a36Sopenharmony_ci ld1 {v10.16b-v11.16b}, [x9] 56462306a36Sopenharmony_ci tbl v3.16b, {v3.16b}, v10.16b 56562306a36Sopenharmony_ci sshr v11.16b, v11.16b, #7 56662306a36Sopenharmony_ci eor v5.16b, v5.16b, v3.16b 56762306a36Sopenharmony_ci bif v5.16b, v6.16b, v11.16b 56862306a36Sopenharmony_ci st1 {v5.16b}, [OUT] 56962306a36Sopenharmony_ci b .Lctrout\xctr 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci // Arguments 57262306a36Sopenharmony_ci .unreq OUT 57362306a36Sopenharmony_ci .unreq IN 57462306a36Sopenharmony_ci .unreq KEY 57562306a36Sopenharmony_ci .unreq ROUNDS_W 57662306a36Sopenharmony_ci .unreq BYTES_W 57762306a36Sopenharmony_ci .unreq IV 57862306a36Sopenharmony_ci .unreq BYTE_CTR_W // XCTR only 57962306a36Sopenharmony_ci // Intermediate values 58062306a36Sopenharmony_ci .unreq CTR_W // XCTR only 58162306a36Sopenharmony_ci .unreq CTR // XCTR only 58262306a36Sopenharmony_ci .unreq IV_PART 58362306a36Sopenharmony_ci .unreq BLOCKS 58462306a36Sopenharmony_ci .unreq BLOCKS_W 58562306a36Sopenharmony_ci.endm 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci /* 58862306a36Sopenharmony_ci * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 58962306a36Sopenharmony_ci * int bytes, u8 ctr[]) 59062306a36Sopenharmony_ci * 59162306a36Sopenharmony_ci * The input and output buffers must always be at least 16 bytes even if 59262306a36Sopenharmony_ci * encrypting/decrypting less than 16 bytes. Otherwise out of bounds 59362306a36Sopenharmony_ci * accesses will occur. The data to be encrypted/decrypted is expected 59462306a36Sopenharmony_ci * to be at the end of this 16-byte temporary buffer rather than the 59562306a36Sopenharmony_ci * start. 59662306a36Sopenharmony_ci */ 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ciAES_FUNC_START(aes_ctr_encrypt) 59962306a36Sopenharmony_ci ctr_encrypt 0 60062306a36Sopenharmony_ciAES_FUNC_END(aes_ctr_encrypt) 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci /* 60362306a36Sopenharmony_ci * aes_xctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 60462306a36Sopenharmony_ci * int bytes, u8 const iv[], int byte_ctr) 60562306a36Sopenharmony_ci * 60662306a36Sopenharmony_ci * The input and output buffers must always be at least 16 bytes even if 60762306a36Sopenharmony_ci * encrypting/decrypting less than 16 bytes. Otherwise out of bounds 60862306a36Sopenharmony_ci * accesses will occur. The data to be encrypted/decrypted is expected 60962306a36Sopenharmony_ci * to be at the end of this 16-byte temporary buffer rather than the 61062306a36Sopenharmony_ci * start. 61162306a36Sopenharmony_ci */ 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ciAES_FUNC_START(aes_xctr_encrypt) 61462306a36Sopenharmony_ci ctr_encrypt 1 61562306a36Sopenharmony_ciAES_FUNC_END(aes_xctr_encrypt) 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci /* 61962306a36Sopenharmony_ci * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 62062306a36Sopenharmony_ci * int bytes, u8 const rk2[], u8 iv[], int first) 62162306a36Sopenharmony_ci * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 62262306a36Sopenharmony_ci * int bytes, u8 const rk2[], u8 iv[], int first) 62362306a36Sopenharmony_ci */ 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci .macro next_tweak, out, in, tmp 62662306a36Sopenharmony_ci sshr \tmp\().2d, \in\().2d, #63 62762306a36Sopenharmony_ci and \tmp\().16b, \tmp\().16b, xtsmask.16b 62862306a36Sopenharmony_ci add \out\().2d, \in\().2d, \in\().2d 62962306a36Sopenharmony_ci ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 63062306a36Sopenharmony_ci eor \out\().16b, \out\().16b, \tmp\().16b 63162306a36Sopenharmony_ci .endm 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci .macro xts_load_mask, tmp 63462306a36Sopenharmony_ci movi xtsmask.2s, #0x1 63562306a36Sopenharmony_ci movi \tmp\().2s, #0x87 63662306a36Sopenharmony_ci uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s 63762306a36Sopenharmony_ci .endm 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ciAES_FUNC_START(aes_xts_encrypt) 64062306a36Sopenharmony_ci frame_push 0 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci ld1 {v4.16b}, [x6] 64362306a36Sopenharmony_ci xts_load_mask v8 64462306a36Sopenharmony_ci cbz w7, .Lxtsencnotfirst 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci enc_prepare w3, x5, x8 64762306a36Sopenharmony_ci xts_cts_skip_tw w7, .LxtsencNx 64862306a36Sopenharmony_ci encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 64962306a36Sopenharmony_ci enc_switch_key w3, x2, x8 65062306a36Sopenharmony_ci b .LxtsencNx 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci.Lxtsencnotfirst: 65362306a36Sopenharmony_ci enc_prepare w3, x2, x8 65462306a36Sopenharmony_ci.LxtsencloopNx: 65562306a36Sopenharmony_ci next_tweak v4, v4, v8 65662306a36Sopenharmony_ci.LxtsencNx: 65762306a36Sopenharmony_ci subs w4, w4, #64 65862306a36Sopenharmony_ci bmi .Lxtsenc1x 65962306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 66062306a36Sopenharmony_ci next_tweak v5, v4, v8 66162306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b 66262306a36Sopenharmony_ci next_tweak v6, v5, v8 66362306a36Sopenharmony_ci eor v1.16b, v1.16b, v5.16b 66462306a36Sopenharmony_ci eor v2.16b, v2.16b, v6.16b 66562306a36Sopenharmony_ci next_tweak v7, v6, v8 66662306a36Sopenharmony_ci eor v3.16b, v3.16b, v7.16b 66762306a36Sopenharmony_ci bl aes_encrypt_block4x 66862306a36Sopenharmony_ci eor v3.16b, v3.16b, v7.16b 66962306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b 67062306a36Sopenharmony_ci eor v1.16b, v1.16b, v5.16b 67162306a36Sopenharmony_ci eor v2.16b, v2.16b, v6.16b 67262306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x0], #64 67362306a36Sopenharmony_ci mov v4.16b, v7.16b 67462306a36Sopenharmony_ci cbz w4, .Lxtsencret 67562306a36Sopenharmony_ci xts_reload_mask v8 67662306a36Sopenharmony_ci b .LxtsencloopNx 67762306a36Sopenharmony_ci.Lxtsenc1x: 67862306a36Sopenharmony_ci adds w4, w4, #64 67962306a36Sopenharmony_ci beq .Lxtsencout 68062306a36Sopenharmony_ci subs w4, w4, #16 68162306a36Sopenharmony_ci bmi .LxtsencctsNx 68262306a36Sopenharmony_ci.Lxtsencloop: 68362306a36Sopenharmony_ci ld1 {v0.16b}, [x1], #16 68462306a36Sopenharmony_ci.Lxtsencctsout: 68562306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b 68662306a36Sopenharmony_ci encrypt_block v0, w3, x2, x8, w7 68762306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b 68862306a36Sopenharmony_ci cbz w4, .Lxtsencout 68962306a36Sopenharmony_ci subs w4, w4, #16 69062306a36Sopenharmony_ci next_tweak v4, v4, v8 69162306a36Sopenharmony_ci bmi .Lxtsenccts 69262306a36Sopenharmony_ci st1 {v0.16b}, [x0], #16 69362306a36Sopenharmony_ci b .Lxtsencloop 69462306a36Sopenharmony_ci.Lxtsencout: 69562306a36Sopenharmony_ci st1 {v0.16b}, [x0] 69662306a36Sopenharmony_ci.Lxtsencret: 69762306a36Sopenharmony_ci st1 {v4.16b}, [x6] 69862306a36Sopenharmony_ci frame_pop 69962306a36Sopenharmony_ci ret 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci.LxtsencctsNx: 70262306a36Sopenharmony_ci mov v0.16b, v3.16b 70362306a36Sopenharmony_ci sub x0, x0, #16 70462306a36Sopenharmony_ci.Lxtsenccts: 70562306a36Sopenharmony_ci adr_l x8, .Lcts_permute_table 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci add x1, x1, w4, sxtw /* rewind input pointer */ 70862306a36Sopenharmony_ci add w4, w4, #16 /* # bytes in final block */ 70962306a36Sopenharmony_ci add x9, x8, #32 71062306a36Sopenharmony_ci add x8, x8, x4 71162306a36Sopenharmony_ci sub x9, x9, x4 71262306a36Sopenharmony_ci add x4, x0, x4 /* output address of final block */ 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci ld1 {v1.16b}, [x1] /* load final block */ 71562306a36Sopenharmony_ci ld1 {v2.16b}, [x8] 71662306a36Sopenharmony_ci ld1 {v3.16b}, [x9] 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci tbl v2.16b, {v0.16b}, v2.16b 71962306a36Sopenharmony_ci tbx v0.16b, {v1.16b}, v3.16b 72062306a36Sopenharmony_ci st1 {v2.16b}, [x4] /* overlapping stores */ 72162306a36Sopenharmony_ci mov w4, wzr 72262306a36Sopenharmony_ci b .Lxtsencctsout 72362306a36Sopenharmony_ciAES_FUNC_END(aes_xts_encrypt) 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_ciAES_FUNC_START(aes_xts_decrypt) 72662306a36Sopenharmony_ci frame_push 0 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci /* subtract 16 bytes if we are doing CTS */ 72962306a36Sopenharmony_ci sub w8, w4, #0x10 73062306a36Sopenharmony_ci tst w4, #0xf 73162306a36Sopenharmony_ci csel w4, w4, w8, eq 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci ld1 {v4.16b}, [x6] 73462306a36Sopenharmony_ci xts_load_mask v8 73562306a36Sopenharmony_ci xts_cts_skip_tw w7, .Lxtsdecskiptw 73662306a36Sopenharmony_ci cbz w7, .Lxtsdecnotfirst 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci enc_prepare w3, x5, x8 73962306a36Sopenharmony_ci encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 74062306a36Sopenharmony_ci.Lxtsdecskiptw: 74162306a36Sopenharmony_ci dec_prepare w3, x2, x8 74262306a36Sopenharmony_ci b .LxtsdecNx 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci.Lxtsdecnotfirst: 74562306a36Sopenharmony_ci dec_prepare w3, x2, x8 74662306a36Sopenharmony_ci.LxtsdecloopNx: 74762306a36Sopenharmony_ci next_tweak v4, v4, v8 74862306a36Sopenharmony_ci.LxtsdecNx: 74962306a36Sopenharmony_ci subs w4, w4, #64 75062306a36Sopenharmony_ci bmi .Lxtsdec1x 75162306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 75262306a36Sopenharmony_ci next_tweak v5, v4, v8 75362306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b 75462306a36Sopenharmony_ci next_tweak v6, v5, v8 75562306a36Sopenharmony_ci eor v1.16b, v1.16b, v5.16b 75662306a36Sopenharmony_ci eor v2.16b, v2.16b, v6.16b 75762306a36Sopenharmony_ci next_tweak v7, v6, v8 75862306a36Sopenharmony_ci eor v3.16b, v3.16b, v7.16b 75962306a36Sopenharmony_ci bl aes_decrypt_block4x 76062306a36Sopenharmony_ci eor v3.16b, v3.16b, v7.16b 76162306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b 76262306a36Sopenharmony_ci eor v1.16b, v1.16b, v5.16b 76362306a36Sopenharmony_ci eor v2.16b, v2.16b, v6.16b 76462306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x0], #64 76562306a36Sopenharmony_ci mov v4.16b, v7.16b 76662306a36Sopenharmony_ci cbz w4, .Lxtsdecout 76762306a36Sopenharmony_ci xts_reload_mask v8 76862306a36Sopenharmony_ci b .LxtsdecloopNx 76962306a36Sopenharmony_ci.Lxtsdec1x: 77062306a36Sopenharmony_ci adds w4, w4, #64 77162306a36Sopenharmony_ci beq .Lxtsdecout 77262306a36Sopenharmony_ci subs w4, w4, #16 77362306a36Sopenharmony_ci.Lxtsdecloop: 77462306a36Sopenharmony_ci ld1 {v0.16b}, [x1], #16 77562306a36Sopenharmony_ci bmi .Lxtsdeccts 77662306a36Sopenharmony_ci.Lxtsdecctsout: 77762306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b 77862306a36Sopenharmony_ci decrypt_block v0, w3, x2, x8, w7 77962306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b 78062306a36Sopenharmony_ci st1 {v0.16b}, [x0], #16 78162306a36Sopenharmony_ci cbz w4, .Lxtsdecout 78262306a36Sopenharmony_ci subs w4, w4, #16 78362306a36Sopenharmony_ci next_tweak v4, v4, v8 78462306a36Sopenharmony_ci b .Lxtsdecloop 78562306a36Sopenharmony_ci.Lxtsdecout: 78662306a36Sopenharmony_ci st1 {v4.16b}, [x6] 78762306a36Sopenharmony_ci frame_pop 78862306a36Sopenharmony_ci ret 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci.Lxtsdeccts: 79162306a36Sopenharmony_ci adr_l x8, .Lcts_permute_table 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci add x1, x1, w4, sxtw /* rewind input pointer */ 79462306a36Sopenharmony_ci add w4, w4, #16 /* # bytes in final block */ 79562306a36Sopenharmony_ci add x9, x8, #32 79662306a36Sopenharmony_ci add x8, x8, x4 79762306a36Sopenharmony_ci sub x9, x9, x4 79862306a36Sopenharmony_ci add x4, x0, x4 /* output address of final block */ 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci next_tweak v5, v4, v8 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci ld1 {v1.16b}, [x1] /* load final block */ 80362306a36Sopenharmony_ci ld1 {v2.16b}, [x8] 80462306a36Sopenharmony_ci ld1 {v3.16b}, [x9] 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci eor v0.16b, v0.16b, v5.16b 80762306a36Sopenharmony_ci decrypt_block v0, w3, x2, x8, w7 80862306a36Sopenharmony_ci eor v0.16b, v0.16b, v5.16b 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci tbl v2.16b, {v0.16b}, v2.16b 81162306a36Sopenharmony_ci tbx v0.16b, {v1.16b}, v3.16b 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci st1 {v2.16b}, [x4] /* overlapping stores */ 81462306a36Sopenharmony_ci mov w4, wzr 81562306a36Sopenharmony_ci b .Lxtsdecctsout 81662306a36Sopenharmony_ciAES_FUNC_END(aes_xts_decrypt) 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci /* 81962306a36Sopenharmony_ci * aes_mac_update(u8 const in[], u32 const rk[], int rounds, 82062306a36Sopenharmony_ci * int blocks, u8 dg[], int enc_before, int enc_after) 82162306a36Sopenharmony_ci */ 82262306a36Sopenharmony_ciAES_FUNC_START(aes_mac_update) 82362306a36Sopenharmony_ci ld1 {v0.16b}, [x4] /* get dg */ 82462306a36Sopenharmony_ci enc_prepare w2, x1, x7 82562306a36Sopenharmony_ci cbz w5, .Lmacloop4x 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci encrypt_block v0, w2, x1, x7, w8 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci.Lmacloop4x: 83062306a36Sopenharmony_ci subs w3, w3, #4 83162306a36Sopenharmony_ci bmi .Lmac1x 83262306a36Sopenharmony_ci ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ 83362306a36Sopenharmony_ci eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 83462306a36Sopenharmony_ci encrypt_block v0, w2, x1, x7, w8 83562306a36Sopenharmony_ci eor v0.16b, v0.16b, v2.16b 83662306a36Sopenharmony_ci encrypt_block v0, w2, x1, x7, w8 83762306a36Sopenharmony_ci eor v0.16b, v0.16b, v3.16b 83862306a36Sopenharmony_ci encrypt_block v0, w2, x1, x7, w8 83962306a36Sopenharmony_ci eor v0.16b, v0.16b, v4.16b 84062306a36Sopenharmony_ci cmp w3, wzr 84162306a36Sopenharmony_ci csinv x5, x6, xzr, eq 84262306a36Sopenharmony_ci cbz w5, .Lmacout 84362306a36Sopenharmony_ci encrypt_block v0, w2, x1, x7, w8 84462306a36Sopenharmony_ci st1 {v0.16b}, [x4] /* return dg */ 84562306a36Sopenharmony_ci cond_yield .Lmacout, x7, x8 84662306a36Sopenharmony_ci b .Lmacloop4x 84762306a36Sopenharmony_ci.Lmac1x: 84862306a36Sopenharmony_ci add w3, w3, #4 84962306a36Sopenharmony_ci.Lmacloop: 85062306a36Sopenharmony_ci cbz w3, .Lmacout 85162306a36Sopenharmony_ci ld1 {v1.16b}, [x0], #16 /* get next pt block */ 85262306a36Sopenharmony_ci eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci subs w3, w3, #1 85562306a36Sopenharmony_ci csinv x5, x6, xzr, eq 85662306a36Sopenharmony_ci cbz w5, .Lmacout 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci.Lmacenc: 85962306a36Sopenharmony_ci encrypt_block v0, w2, x1, x7, w8 86062306a36Sopenharmony_ci b .Lmacloop 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_ci.Lmacout: 86362306a36Sopenharmony_ci st1 {v0.16b}, [x4] /* return dg */ 86462306a36Sopenharmony_ci mov w0, w3 86562306a36Sopenharmony_ci ret 86662306a36Sopenharmony_ciAES_FUNC_END(aes_mac_update) 867