162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * SM4 Cipher Algorithm for ARMv8 with Crypto Extensions 462306a36Sopenharmony_ci * as specified in 562306a36Sopenharmony_ci * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Copyright (C) 2022, Alibaba Group. 862306a36Sopenharmony_ci * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/linkage.h> 1262306a36Sopenharmony_ci#include <asm/assembler.h> 1362306a36Sopenharmony_ci#include "sm4-ce-asm.h" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci.arch armv8-a+crypto 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \ 1862306a36Sopenharmony_ci 20, 24, 25, 26, 27, 28, 29, 30, 31 1962306a36Sopenharmony_ci .set .Lv\b\().4s, \b 2062306a36Sopenharmony_ci.endr 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci.macro sm4e, vd, vn 2362306a36Sopenharmony_ci .inst 0xcec08400 | (.L\vn << 5) | .L\vd 2462306a36Sopenharmony_ci.endm 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci.macro sm4ekey, vd, vn, vm 2762306a36Sopenharmony_ci .inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd 2862306a36Sopenharmony_ci.endm 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci/* Register macros */ 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci#define RTMP0 v16 3362306a36Sopenharmony_ci#define RTMP1 v17 3462306a36Sopenharmony_ci#define RTMP2 v18 3562306a36Sopenharmony_ci#define RTMP3 v19 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci#define RIV v20 3862306a36Sopenharmony_ci#define RMAC v20 3962306a36Sopenharmony_ci#define RMASK v21 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci.align 3 4362306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_expand_key) 4462306a36Sopenharmony_ci /* input: 4562306a36Sopenharmony_ci * x0: 128-bit key 4662306a36Sopenharmony_ci * x1: rkey_enc 4762306a36Sopenharmony_ci * x2: rkey_dec 4862306a36Sopenharmony_ci * x3: fk array 4962306a36Sopenharmony_ci * x4: ck array 5062306a36Sopenharmony_ci */ 5162306a36Sopenharmony_ci ld1 {v0.16b}, [x0]; 5262306a36Sopenharmony_ci rev32 v0.16b, v0.16b; 5362306a36Sopenharmony_ci ld1 {v1.16b}, [x3]; 5462306a36Sopenharmony_ci /* load ck */ 5562306a36Sopenharmony_ci ld1 {v24.16b-v27.16b}, [x4], #64; 5662306a36Sopenharmony_ci ld1 {v28.16b-v31.16b}, [x4]; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci /* input ^ fk */ 5962306a36Sopenharmony_ci eor v0.16b, v0.16b, v1.16b; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci sm4ekey v0.4s, v0.4s, v24.4s; 6262306a36Sopenharmony_ci sm4ekey v1.4s, v0.4s, v25.4s; 6362306a36Sopenharmony_ci sm4ekey v2.4s, v1.4s, v26.4s; 6462306a36Sopenharmony_ci sm4ekey v3.4s, v2.4s, v27.4s; 6562306a36Sopenharmony_ci sm4ekey v4.4s, v3.4s, v28.4s; 6662306a36Sopenharmony_ci sm4ekey v5.4s, v4.4s, v29.4s; 6762306a36Sopenharmony_ci sm4ekey v6.4s, v5.4s, v30.4s; 6862306a36Sopenharmony_ci sm4ekey v7.4s, v6.4s, v31.4s; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci adr_l x5, .Lbswap128_mask 7162306a36Sopenharmony_ci ld1 {v24.16b}, [x5] 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64; 7462306a36Sopenharmony_ci st1 {v4.16b-v7.16b}, [x1]; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci tbl v16.16b, {v7.16b}, v24.16b 7762306a36Sopenharmony_ci tbl v17.16b, {v6.16b}, v24.16b 7862306a36Sopenharmony_ci tbl v18.16b, {v5.16b}, v24.16b 7962306a36Sopenharmony_ci tbl v19.16b, {v4.16b}, v24.16b 8062306a36Sopenharmony_ci tbl v20.16b, {v3.16b}, v24.16b 8162306a36Sopenharmony_ci tbl v21.16b, {v2.16b}, v24.16b 8262306a36Sopenharmony_ci tbl v22.16b, {v1.16b}, v24.16b 8362306a36Sopenharmony_ci tbl v23.16b, {v0.16b}, v24.16b 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci st1 {v16.16b-v19.16b}, [x2], #64 8662306a36Sopenharmony_ci st1 {v20.16b-v23.16b}, [x2] 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci ret; 8962306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_expand_key) 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci.align 3 9262306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_crypt_block) 9362306a36Sopenharmony_ci /* input: 9462306a36Sopenharmony_ci * x0: round key array, CTX 9562306a36Sopenharmony_ci * x1: dst 9662306a36Sopenharmony_ci * x2: src 9762306a36Sopenharmony_ci */ 9862306a36Sopenharmony_ci SM4_PREPARE(x0) 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci ld1 {v0.16b}, [x2]; 10162306a36Sopenharmony_ci SM4_CRYPT_BLK(v0); 10262306a36Sopenharmony_ci st1 {v0.16b}, [x1]; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci ret; 10562306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_crypt_block) 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci.align 3 10862306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_crypt) 10962306a36Sopenharmony_ci /* input: 11062306a36Sopenharmony_ci * x0: round key array, CTX 11162306a36Sopenharmony_ci * x1: dst 11262306a36Sopenharmony_ci * x2: src 11362306a36Sopenharmony_ci * w3: nblocks 11462306a36Sopenharmony_ci */ 11562306a36Sopenharmony_ci SM4_PREPARE(x0) 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci.Lcrypt_loop_blk: 11862306a36Sopenharmony_ci sub w3, w3, #8; 11962306a36Sopenharmony_ci tbnz w3, #31, .Lcrypt_tail8; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64; 12262306a36Sopenharmony_ci ld1 {v4.16b-v7.16b}, [x2], #64; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64; 12762306a36Sopenharmony_ci st1 {v4.16b-v7.16b}, [x1], #64; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci cbz w3, .Lcrypt_end; 13062306a36Sopenharmony_ci b .Lcrypt_loop_blk; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci.Lcrypt_tail8: 13362306a36Sopenharmony_ci add w3, w3, #8; 13462306a36Sopenharmony_ci cmp w3, #4; 13562306a36Sopenharmony_ci blt .Lcrypt_tail4; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci sub w3, w3, #4; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64; 14062306a36Sopenharmony_ci SM4_CRYPT_BLK4(v0, v1, v2, v3); 14162306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci cbz w3, .Lcrypt_end; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci.Lcrypt_tail4: 14662306a36Sopenharmony_ci sub w3, w3, #1; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16; 14962306a36Sopenharmony_ci SM4_CRYPT_BLK(v0); 15062306a36Sopenharmony_ci st1 {v0.16b}, [x1], #16; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci cbnz w3, .Lcrypt_tail4; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci.Lcrypt_end: 15562306a36Sopenharmony_ci ret; 15662306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_crypt) 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci.align 3 15962306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbc_enc) 16062306a36Sopenharmony_ci /* input: 16162306a36Sopenharmony_ci * x0: round key array, CTX 16262306a36Sopenharmony_ci * x1: dst 16362306a36Sopenharmony_ci * x2: src 16462306a36Sopenharmony_ci * x3: iv (big endian, 128 bit) 16562306a36Sopenharmony_ci * w4: nblocks 16662306a36Sopenharmony_ci */ 16762306a36Sopenharmony_ci SM4_PREPARE(x0) 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci ld1 {RIV.16b}, [x3] 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci.Lcbc_enc_loop_4x: 17262306a36Sopenharmony_ci cmp w4, #4 17362306a36Sopenharmony_ci blt .Lcbc_enc_loop_1x 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci sub w4, w4, #4 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci eor v0.16b, v0.16b, RIV.16b 18062306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 18162306a36Sopenharmony_ci eor v1.16b, v1.16b, v0.16b 18262306a36Sopenharmony_ci SM4_CRYPT_BLK(v1) 18362306a36Sopenharmony_ci eor v2.16b, v2.16b, v1.16b 18462306a36Sopenharmony_ci SM4_CRYPT_BLK(v2) 18562306a36Sopenharmony_ci eor v3.16b, v3.16b, v2.16b 18662306a36Sopenharmony_ci SM4_CRYPT_BLK(v3) 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 18962306a36Sopenharmony_ci mov RIV.16b, v3.16b 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci cbz w4, .Lcbc_enc_end 19262306a36Sopenharmony_ci b .Lcbc_enc_loop_4x 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci.Lcbc_enc_loop_1x: 19562306a36Sopenharmony_ci sub w4, w4, #1 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci eor RIV.16b, RIV.16b, v0.16b 20062306a36Sopenharmony_ci SM4_CRYPT_BLK(RIV) 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci st1 {RIV.16b}, [x1], #16 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci cbnz w4, .Lcbc_enc_loop_1x 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci.Lcbc_enc_end: 20762306a36Sopenharmony_ci /* store new IV */ 20862306a36Sopenharmony_ci st1 {RIV.16b}, [x3] 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci ret 21162306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbc_enc) 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci.align 3 21462306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbc_dec) 21562306a36Sopenharmony_ci /* input: 21662306a36Sopenharmony_ci * x0: round key array, CTX 21762306a36Sopenharmony_ci * x1: dst 21862306a36Sopenharmony_ci * x2: src 21962306a36Sopenharmony_ci * x3: iv (big endian, 128 bit) 22062306a36Sopenharmony_ci * w4: nblocks 22162306a36Sopenharmony_ci */ 22262306a36Sopenharmony_ci SM4_PREPARE(x0) 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci ld1 {RIV.16b}, [x3] 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci.Lcbc_dec_loop_8x: 22762306a36Sopenharmony_ci sub w4, w4, #8 22862306a36Sopenharmony_ci tbnz w4, #31, .Lcbc_dec_4x 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 23162306a36Sopenharmony_ci ld1 {v4.16b-v7.16b}, [x2], #64 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci rev32 v8.16b, v0.16b 23462306a36Sopenharmony_ci rev32 v9.16b, v1.16b 23562306a36Sopenharmony_ci rev32 v10.16b, v2.16b 23662306a36Sopenharmony_ci rev32 v11.16b, v3.16b 23762306a36Sopenharmony_ci rev32 v12.16b, v4.16b 23862306a36Sopenharmony_ci rev32 v13.16b, v5.16b 23962306a36Sopenharmony_ci rev32 v14.16b, v6.16b 24062306a36Sopenharmony_ci rev32 v15.16b, v7.16b 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15) 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci eor v8.16b, v8.16b, RIV.16b 24562306a36Sopenharmony_ci eor v9.16b, v9.16b, v0.16b 24662306a36Sopenharmony_ci eor v10.16b, v10.16b, v1.16b 24762306a36Sopenharmony_ci eor v11.16b, v11.16b, v2.16b 24862306a36Sopenharmony_ci eor v12.16b, v12.16b, v3.16b 24962306a36Sopenharmony_ci eor v13.16b, v13.16b, v4.16b 25062306a36Sopenharmony_ci eor v14.16b, v14.16b, v5.16b 25162306a36Sopenharmony_ci eor v15.16b, v15.16b, v6.16b 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci st1 {v8.16b-v11.16b}, [x1], #64 25462306a36Sopenharmony_ci st1 {v12.16b-v15.16b}, [x1], #64 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci mov RIV.16b, v7.16b 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci cbz w4, .Lcbc_dec_end 25962306a36Sopenharmony_ci b .Lcbc_dec_loop_8x 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci.Lcbc_dec_4x: 26262306a36Sopenharmony_ci add w4, w4, #8 26362306a36Sopenharmony_ci cmp w4, #4 26462306a36Sopenharmony_ci blt .Lcbc_dec_loop_1x 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci sub w4, w4, #4 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci rev32 v8.16b, v0.16b 27162306a36Sopenharmony_ci rev32 v9.16b, v1.16b 27262306a36Sopenharmony_ci rev32 v10.16b, v2.16b 27362306a36Sopenharmony_ci rev32 v11.16b, v3.16b 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci SM4_CRYPT_BLK4_BE(v8, v9, v10, v11) 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci eor v8.16b, v8.16b, RIV.16b 27862306a36Sopenharmony_ci eor v9.16b, v9.16b, v0.16b 27962306a36Sopenharmony_ci eor v10.16b, v10.16b, v1.16b 28062306a36Sopenharmony_ci eor v11.16b, v11.16b, v2.16b 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci st1 {v8.16b-v11.16b}, [x1], #64 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci mov RIV.16b, v3.16b 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci cbz w4, .Lcbc_dec_end 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci.Lcbc_dec_loop_1x: 28962306a36Sopenharmony_ci sub w4, w4, #1 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci rev32 v8.16b, v0.16b 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci SM4_CRYPT_BLK_BE(v8) 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci eor v8.16b, v8.16b, RIV.16b 29862306a36Sopenharmony_ci st1 {v8.16b}, [x1], #16 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci mov RIV.16b, v0.16b 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci cbnz w4, .Lcbc_dec_loop_1x 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci.Lcbc_dec_end: 30562306a36Sopenharmony_ci /* store new IV */ 30662306a36Sopenharmony_ci st1 {RIV.16b}, [x3] 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci ret 30962306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbc_dec) 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci.align 3 31262306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbc_cts_enc) 31362306a36Sopenharmony_ci /* input: 31462306a36Sopenharmony_ci * x0: round key array, CTX 31562306a36Sopenharmony_ci * x1: dst 31662306a36Sopenharmony_ci * x2: src 31762306a36Sopenharmony_ci * x3: iv (big endian, 128 bit) 31862306a36Sopenharmony_ci * w4: nbytes 31962306a36Sopenharmony_ci */ 32062306a36Sopenharmony_ci SM4_PREPARE(x0) 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci sub w5, w4, #16 32362306a36Sopenharmony_ci uxtw x5, w5 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci ld1 {RIV.16b}, [x3] 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci ld1 {v0.16b}, [x2] 32862306a36Sopenharmony_ci eor RIV.16b, RIV.16b, v0.16b 32962306a36Sopenharmony_ci SM4_CRYPT_BLK(RIV) 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci /* load permute table */ 33262306a36Sopenharmony_ci adr_l x6, .Lcts_permute_table 33362306a36Sopenharmony_ci add x7, x6, #32 33462306a36Sopenharmony_ci add x6, x6, x5 33562306a36Sopenharmony_ci sub x7, x7, x5 33662306a36Sopenharmony_ci ld1 {v3.16b}, [x6] 33762306a36Sopenharmony_ci ld1 {v4.16b}, [x7] 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci /* overlapping loads */ 34062306a36Sopenharmony_ci add x2, x2, x5 34162306a36Sopenharmony_ci ld1 {v1.16b}, [x2] 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci /* create Cn from En-1 */ 34462306a36Sopenharmony_ci tbl v0.16b, {RIV.16b}, v3.16b 34562306a36Sopenharmony_ci /* padding Pn with zeros */ 34662306a36Sopenharmony_ci tbl v1.16b, {v1.16b}, v4.16b 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci eor v1.16b, v1.16b, RIV.16b 34962306a36Sopenharmony_ci SM4_CRYPT_BLK(v1) 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci /* overlapping stores */ 35262306a36Sopenharmony_ci add x5, x1, x5 35362306a36Sopenharmony_ci st1 {v0.16b}, [x5] 35462306a36Sopenharmony_ci st1 {v1.16b}, [x1] 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci ret 35762306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbc_cts_enc) 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci.align 3 36062306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbc_cts_dec) 36162306a36Sopenharmony_ci /* input: 36262306a36Sopenharmony_ci * x0: round key array, CTX 36362306a36Sopenharmony_ci * x1: dst 36462306a36Sopenharmony_ci * x2: src 36562306a36Sopenharmony_ci * x3: iv (big endian, 128 bit) 36662306a36Sopenharmony_ci * w4: nbytes 36762306a36Sopenharmony_ci */ 36862306a36Sopenharmony_ci SM4_PREPARE(x0) 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci sub w5, w4, #16 37162306a36Sopenharmony_ci uxtw x5, w5 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci ld1 {RIV.16b}, [x3] 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci /* load permute table */ 37662306a36Sopenharmony_ci adr_l x6, .Lcts_permute_table 37762306a36Sopenharmony_ci add x7, x6, #32 37862306a36Sopenharmony_ci add x6, x6, x5 37962306a36Sopenharmony_ci sub x7, x7, x5 38062306a36Sopenharmony_ci ld1 {v3.16b}, [x6] 38162306a36Sopenharmony_ci ld1 {v4.16b}, [x7] 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci /* overlapping loads */ 38462306a36Sopenharmony_ci ld1 {v0.16b}, [x2], x5 38562306a36Sopenharmony_ci ld1 {v1.16b}, [x2] 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 38862306a36Sopenharmony_ci /* select the first Ln bytes of Xn to create Pn */ 38962306a36Sopenharmony_ci tbl v2.16b, {v0.16b}, v3.16b 39062306a36Sopenharmony_ci eor v2.16b, v2.16b, v1.16b 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci /* overwrite the first Ln bytes with Cn to create En-1 */ 39362306a36Sopenharmony_ci tbx v0.16b, {v1.16b}, v4.16b 39462306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 39562306a36Sopenharmony_ci eor v0.16b, v0.16b, RIV.16b 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci /* overlapping stores */ 39862306a36Sopenharmony_ci add x5, x1, x5 39962306a36Sopenharmony_ci st1 {v2.16b}, [x5] 40062306a36Sopenharmony_ci st1 {v0.16b}, [x1] 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci ret 40362306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbc_cts_dec) 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci.align 3 40662306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cfb_enc) 40762306a36Sopenharmony_ci /* input: 40862306a36Sopenharmony_ci * x0: round key array, CTX 40962306a36Sopenharmony_ci * x1: dst 41062306a36Sopenharmony_ci * x2: src 41162306a36Sopenharmony_ci * x3: iv (big endian, 128 bit) 41262306a36Sopenharmony_ci * w4: nblocks 41362306a36Sopenharmony_ci */ 41462306a36Sopenharmony_ci SM4_PREPARE(x0) 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci ld1 {RIV.16b}, [x3] 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci.Lcfb_enc_loop_4x: 41962306a36Sopenharmony_ci cmp w4, #4 42062306a36Sopenharmony_ci blt .Lcfb_enc_loop_1x 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci sub w4, w4, #4 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci rev32 v8.16b, RIV.16b 42762306a36Sopenharmony_ci SM4_CRYPT_BLK_BE(v8) 42862306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci rev32 v8.16b, v0.16b 43162306a36Sopenharmony_ci SM4_CRYPT_BLK_BE(v8) 43262306a36Sopenharmony_ci eor v1.16b, v1.16b, v8.16b 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci rev32 v8.16b, v1.16b 43562306a36Sopenharmony_ci SM4_CRYPT_BLK_BE(v8) 43662306a36Sopenharmony_ci eor v2.16b, v2.16b, v8.16b 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci rev32 v8.16b, v2.16b 43962306a36Sopenharmony_ci SM4_CRYPT_BLK_BE(v8) 44062306a36Sopenharmony_ci eor v3.16b, v3.16b, v8.16b 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 44362306a36Sopenharmony_ci mov RIV.16b, v3.16b 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci cbz w4, .Lcfb_enc_end 44662306a36Sopenharmony_ci b .Lcfb_enc_loop_4x 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci.Lcfb_enc_loop_1x: 44962306a36Sopenharmony_ci sub w4, w4, #1 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci SM4_CRYPT_BLK(RIV) 45462306a36Sopenharmony_ci eor RIV.16b, RIV.16b, v0.16b 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci st1 {RIV.16b}, [x1], #16 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci cbnz w4, .Lcfb_enc_loop_1x 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci.Lcfb_enc_end: 46162306a36Sopenharmony_ci /* store new IV */ 46262306a36Sopenharmony_ci st1 {RIV.16b}, [x3] 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci ret 46562306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cfb_enc) 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci.align 3 46862306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cfb_dec) 46962306a36Sopenharmony_ci /* input: 47062306a36Sopenharmony_ci * x0: round key array, CTX 47162306a36Sopenharmony_ci * x1: dst 47262306a36Sopenharmony_ci * x2: src 47362306a36Sopenharmony_ci * x3: iv (big endian, 128 bit) 47462306a36Sopenharmony_ci * w4: nblocks 47562306a36Sopenharmony_ci */ 47662306a36Sopenharmony_ci SM4_PREPARE(x0) 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci ld1 {RIV.16b}, [x3] 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci.Lcfb_dec_loop_8x: 48162306a36Sopenharmony_ci sub w4, w4, #8 48262306a36Sopenharmony_ci tbnz w4, #31, .Lcfb_dec_4x 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 48562306a36Sopenharmony_ci ld1 {v4.16b-v7.16b}, [x2], #64 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci rev32 v8.16b, RIV.16b 48862306a36Sopenharmony_ci rev32 v9.16b, v0.16b 48962306a36Sopenharmony_ci rev32 v10.16b, v1.16b 49062306a36Sopenharmony_ci rev32 v11.16b, v2.16b 49162306a36Sopenharmony_ci rev32 v12.16b, v3.16b 49262306a36Sopenharmony_ci rev32 v13.16b, v4.16b 49362306a36Sopenharmony_ci rev32 v14.16b, v5.16b 49462306a36Sopenharmony_ci rev32 v15.16b, v6.16b 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15) 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci mov RIV.16b, v7.16b 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 50162306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 50262306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 50362306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 50462306a36Sopenharmony_ci eor v4.16b, v4.16b, v12.16b 50562306a36Sopenharmony_ci eor v5.16b, v5.16b, v13.16b 50662306a36Sopenharmony_ci eor v6.16b, v6.16b, v14.16b 50762306a36Sopenharmony_ci eor v7.16b, v7.16b, v15.16b 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 51062306a36Sopenharmony_ci st1 {v4.16b-v7.16b}, [x1], #64 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci cbz w4, .Lcfb_dec_end 51362306a36Sopenharmony_ci b .Lcfb_dec_loop_8x 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci.Lcfb_dec_4x: 51662306a36Sopenharmony_ci add w4, w4, #8 51762306a36Sopenharmony_ci cmp w4, #4 51862306a36Sopenharmony_ci blt .Lcfb_dec_loop_1x 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci sub w4, w4, #4 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci rev32 v8.16b, RIV.16b 52562306a36Sopenharmony_ci rev32 v9.16b, v0.16b 52662306a36Sopenharmony_ci rev32 v10.16b, v1.16b 52762306a36Sopenharmony_ci rev32 v11.16b, v2.16b 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci SM4_CRYPT_BLK4_BE(v8, v9, v10, v11) 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci mov RIV.16b, v3.16b 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 53462306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 53562306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 53662306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci cbz w4, .Lcfb_dec_end 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci.Lcfb_dec_loop_1x: 54362306a36Sopenharmony_ci sub w4, w4, #1 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci SM4_CRYPT_BLK(RIV) 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci eor RIV.16b, RIV.16b, v0.16b 55062306a36Sopenharmony_ci st1 {RIV.16b}, [x1], #16 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci mov RIV.16b, v0.16b 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci cbnz w4, .Lcfb_dec_loop_1x 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci.Lcfb_dec_end: 55762306a36Sopenharmony_ci /* store new IV */ 55862306a36Sopenharmony_ci st1 {RIV.16b}, [x3] 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci ret 56162306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cfb_dec) 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci.align 3 56462306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_ctr_enc) 56562306a36Sopenharmony_ci /* input: 56662306a36Sopenharmony_ci * x0: round key array, CTX 56762306a36Sopenharmony_ci * x1: dst 56862306a36Sopenharmony_ci * x2: src 56962306a36Sopenharmony_ci * x3: ctr (big endian, 128 bit) 57062306a36Sopenharmony_ci * w4: nblocks 57162306a36Sopenharmony_ci */ 57262306a36Sopenharmony_ci SM4_PREPARE(x0) 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci ldp x7, x8, [x3] 57562306a36Sopenharmony_ci rev x7, x7 57662306a36Sopenharmony_ci rev x8, x8 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci.Lctr_loop_8x: 57962306a36Sopenharmony_ci sub w4, w4, #8 58062306a36Sopenharmony_ci tbnz w4, #31, .Lctr_4x 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci#define inc_le128(vctr) \ 58362306a36Sopenharmony_ci mov vctr.d[1], x8; \ 58462306a36Sopenharmony_ci mov vctr.d[0], x7; \ 58562306a36Sopenharmony_ci adds x8, x8, #1; \ 58662306a36Sopenharmony_ci rev64 vctr.16b, vctr.16b; \ 58762306a36Sopenharmony_ci adc x7, x7, xzr; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci /* construct CTRs */ 59062306a36Sopenharmony_ci inc_le128(v0) /* +0 */ 59162306a36Sopenharmony_ci inc_le128(v1) /* +1 */ 59262306a36Sopenharmony_ci inc_le128(v2) /* +2 */ 59362306a36Sopenharmony_ci inc_le128(v3) /* +3 */ 59462306a36Sopenharmony_ci inc_le128(v4) /* +4 */ 59562306a36Sopenharmony_ci inc_le128(v5) /* +5 */ 59662306a36Sopenharmony_ci inc_le128(v6) /* +6 */ 59762306a36Sopenharmony_ci inc_le128(v7) /* +7 */ 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci ld1 {v8.16b-v11.16b}, [x2], #64 60062306a36Sopenharmony_ci ld1 {v12.16b-v15.16b}, [x2], #64 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 60562306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 60662306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 60762306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 60862306a36Sopenharmony_ci eor v4.16b, v4.16b, v12.16b 60962306a36Sopenharmony_ci eor v5.16b, v5.16b, v13.16b 61062306a36Sopenharmony_ci eor v6.16b, v6.16b, v14.16b 61162306a36Sopenharmony_ci eor v7.16b, v7.16b, v15.16b 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 61462306a36Sopenharmony_ci st1 {v4.16b-v7.16b}, [x1], #64 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci cbz w4, .Lctr_end 61762306a36Sopenharmony_ci b .Lctr_loop_8x 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci.Lctr_4x: 62062306a36Sopenharmony_ci add w4, w4, #8 62162306a36Sopenharmony_ci cmp w4, #4 62262306a36Sopenharmony_ci blt .Lctr_loop_1x 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci sub w4, w4, #4 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci /* construct CTRs */ 62762306a36Sopenharmony_ci inc_le128(v0) /* +0 */ 62862306a36Sopenharmony_ci inc_le128(v1) /* +1 */ 62962306a36Sopenharmony_ci inc_le128(v2) /* +2 */ 63062306a36Sopenharmony_ci inc_le128(v3) /* +3 */ 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci ld1 {v8.16b-v11.16b}, [x2], #64 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci SM4_CRYPT_BLK4(v0, v1, v2, v3) 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 63762306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 63862306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 63962306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_ci cbz w4, .Lctr_end 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci.Lctr_loop_1x: 64662306a36Sopenharmony_ci sub w4, w4, #1 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci /* construct CTRs */ 64962306a36Sopenharmony_ci inc_le128(v0) 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci ld1 {v8.16b}, [x2], #16 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 65662306a36Sopenharmony_ci st1 {v0.16b}, [x1], #16 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci cbnz w4, .Lctr_loop_1x 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ci.Lctr_end: 66162306a36Sopenharmony_ci /* store new CTR */ 66262306a36Sopenharmony_ci rev x7, x7 66362306a36Sopenharmony_ci rev x8, x8 66462306a36Sopenharmony_ci stp x7, x8, [x3] 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci ret 66762306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_ctr_enc) 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci#define tweak_next(vt, vin, RTMP) \ 67162306a36Sopenharmony_ci sshr RTMP.2d, vin.2d, #63; \ 67262306a36Sopenharmony_ci and RTMP.16b, RTMP.16b, RMASK.16b; \ 67362306a36Sopenharmony_ci add vt.2d, vin.2d, vin.2d; \ 67462306a36Sopenharmony_ci ext RTMP.16b, RTMP.16b, RTMP.16b, #8; \ 67562306a36Sopenharmony_ci eor vt.16b, vt.16b, RTMP.16b; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci.align 3 67862306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_xts_enc) 67962306a36Sopenharmony_ci /* input: 68062306a36Sopenharmony_ci * x0: round key array, CTX 68162306a36Sopenharmony_ci * x1: dst 68262306a36Sopenharmony_ci * x2: src 68362306a36Sopenharmony_ci * x3: tweak (big endian, 128 bit) 68462306a36Sopenharmony_ci * w4: nbytes 68562306a36Sopenharmony_ci * x5: round key array for IV 68662306a36Sopenharmony_ci */ 68762306a36Sopenharmony_ci ld1 {v8.16b}, [x3] 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci cbz x5, .Lxts_enc_nofirst 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci SM4_PREPARE(x5) 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci /* Generate first tweak */ 69462306a36Sopenharmony_ci SM4_CRYPT_BLK(v8) 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci.Lxts_enc_nofirst: 69762306a36Sopenharmony_ci SM4_PREPARE(x0) 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci ands w5, w4, #15 70062306a36Sopenharmony_ci lsr w4, w4, #4 70162306a36Sopenharmony_ci sub w6, w4, #1 70262306a36Sopenharmony_ci csel w4, w4, w6, eq 70362306a36Sopenharmony_ci uxtw x5, w5 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci movi RMASK.2s, #0x1 70662306a36Sopenharmony_ci movi RTMP0.2s, #0x87 70762306a36Sopenharmony_ci uzp1 RMASK.4s, RMASK.4s, RTMP0.4s 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci cbz w4, .Lxts_enc_cts 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci.Lxts_enc_loop_8x: 71262306a36Sopenharmony_ci sub w4, w4, #8 71362306a36Sopenharmony_ci tbnz w4, #31, .Lxts_enc_4x 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci tweak_next( v9, v8, RTMP0) 71662306a36Sopenharmony_ci tweak_next(v10, v9, RTMP1) 71762306a36Sopenharmony_ci tweak_next(v11, v10, RTMP2) 71862306a36Sopenharmony_ci tweak_next(v12, v11, RTMP3) 71962306a36Sopenharmony_ci tweak_next(v13, v12, RTMP0) 72062306a36Sopenharmony_ci tweak_next(v14, v13, RTMP1) 72162306a36Sopenharmony_ci tweak_next(v15, v14, RTMP2) 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 72462306a36Sopenharmony_ci ld1 {v4.16b-v7.16b}, [x2], #64 72562306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 72662306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 72762306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 72862306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 72962306a36Sopenharmony_ci eor v4.16b, v4.16b, v12.16b 73062306a36Sopenharmony_ci eor v5.16b, v5.16b, v13.16b 73162306a36Sopenharmony_ci eor v6.16b, v6.16b, v14.16b 73262306a36Sopenharmony_ci eor v7.16b, v7.16b, v15.16b 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 73762306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 73862306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 73962306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 74062306a36Sopenharmony_ci eor v4.16b, v4.16b, v12.16b 74162306a36Sopenharmony_ci eor v5.16b, v5.16b, v13.16b 74262306a36Sopenharmony_ci eor v6.16b, v6.16b, v14.16b 74362306a36Sopenharmony_ci eor v7.16b, v7.16b, v15.16b 74462306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 74562306a36Sopenharmony_ci st1 {v4.16b-v7.16b}, [x1], #64 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci tweak_next(v8, v15, RTMP3) 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci cbz w4, .Lxts_enc_cts 75062306a36Sopenharmony_ci b .Lxts_enc_loop_8x 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci.Lxts_enc_4x: 75362306a36Sopenharmony_ci add w4, w4, #8 75462306a36Sopenharmony_ci cmp w4, #4 75562306a36Sopenharmony_ci blt .Lxts_enc_loop_1x 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci sub w4, w4, #4 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci tweak_next( v9, v8, RTMP0) 76062306a36Sopenharmony_ci tweak_next(v10, v9, RTMP1) 76162306a36Sopenharmony_ci tweak_next(v11, v10, RTMP2) 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 76462306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 76562306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 76662306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 76762306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci SM4_CRYPT_BLK4(v0, v1, v2, v3) 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 77262306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 77362306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 77462306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 77562306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci tweak_next(v8, v11, RTMP3) 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci cbz w4, .Lxts_enc_cts 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci.Lxts_enc_loop_1x: 78262306a36Sopenharmony_ci sub w4, w4, #1 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 78562306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 79062306a36Sopenharmony_ci st1 {v0.16b}, [x1], #16 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci tweak_next(v8, v8, RTMP0) 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci cbnz w4, .Lxts_enc_loop_1x 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci.Lxts_enc_cts: 79762306a36Sopenharmony_ci cbz x5, .Lxts_enc_end 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci /* cipher text stealing */ 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci tweak_next(v9, v8, RTMP0) 80262306a36Sopenharmony_ci ld1 {v0.16b}, [x2] 80362306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 80462306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 80562306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci /* load permute table */ 80862306a36Sopenharmony_ci adr_l x6, .Lcts_permute_table 80962306a36Sopenharmony_ci add x7, x6, #32 81062306a36Sopenharmony_ci add x6, x6, x5 81162306a36Sopenharmony_ci sub x7, x7, x5 81262306a36Sopenharmony_ci ld1 {v3.16b}, [x6] 81362306a36Sopenharmony_ci ld1 {v4.16b}, [x7] 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci /* overlapping loads */ 81662306a36Sopenharmony_ci add x2, x2, x5 81762306a36Sopenharmony_ci ld1 {v1.16b}, [x2] 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci /* create Cn from En-1 */ 82062306a36Sopenharmony_ci tbl v2.16b, {v0.16b}, v3.16b 82162306a36Sopenharmony_ci /* padding Pn with En-1 at the end */ 82262306a36Sopenharmony_ci tbx v0.16b, {v1.16b}, v4.16b 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci eor v0.16b, v0.16b, v9.16b 82562306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 82662306a36Sopenharmony_ci eor v0.16b, v0.16b, v9.16b 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci /* overlapping stores */ 83062306a36Sopenharmony_ci add x5, x1, x5 83162306a36Sopenharmony_ci st1 {v2.16b}, [x5] 83262306a36Sopenharmony_ci st1 {v0.16b}, [x1] 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci b .Lxts_enc_ret 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci.Lxts_enc_end: 83762306a36Sopenharmony_ci /* store new tweak */ 83862306a36Sopenharmony_ci st1 {v8.16b}, [x3] 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci.Lxts_enc_ret: 84162306a36Sopenharmony_ci ret 84262306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_xts_enc) 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci.align 3 84562306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_xts_dec) 84662306a36Sopenharmony_ci /* input: 84762306a36Sopenharmony_ci * x0: round key array, CTX 84862306a36Sopenharmony_ci * x1: dst 84962306a36Sopenharmony_ci * x2: src 85062306a36Sopenharmony_ci * x3: tweak (big endian, 128 bit) 85162306a36Sopenharmony_ci * w4: nbytes 85262306a36Sopenharmony_ci * x5: round key array for IV 85362306a36Sopenharmony_ci */ 85462306a36Sopenharmony_ci ld1 {v8.16b}, [x3] 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci cbz x5, .Lxts_dec_nofirst 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci SM4_PREPARE(x5) 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci /* Generate first tweak */ 86162306a36Sopenharmony_ci SM4_CRYPT_BLK(v8) 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci.Lxts_dec_nofirst: 86462306a36Sopenharmony_ci SM4_PREPARE(x0) 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci ands w5, w4, #15 86762306a36Sopenharmony_ci lsr w4, w4, #4 86862306a36Sopenharmony_ci sub w6, w4, #1 86962306a36Sopenharmony_ci csel w4, w4, w6, eq 87062306a36Sopenharmony_ci uxtw x5, w5 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci movi RMASK.2s, #0x1 87362306a36Sopenharmony_ci movi RTMP0.2s, #0x87 87462306a36Sopenharmony_ci uzp1 RMASK.4s, RMASK.4s, RTMP0.4s 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci cbz w4, .Lxts_dec_cts 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci.Lxts_dec_loop_8x: 87962306a36Sopenharmony_ci sub w4, w4, #8 88062306a36Sopenharmony_ci tbnz w4, #31, .Lxts_dec_4x 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci tweak_next( v9, v8, RTMP0) 88362306a36Sopenharmony_ci tweak_next(v10, v9, RTMP1) 88462306a36Sopenharmony_ci tweak_next(v11, v10, RTMP2) 88562306a36Sopenharmony_ci tweak_next(v12, v11, RTMP3) 88662306a36Sopenharmony_ci tweak_next(v13, v12, RTMP0) 88762306a36Sopenharmony_ci tweak_next(v14, v13, RTMP1) 88862306a36Sopenharmony_ci tweak_next(v15, v14, RTMP2) 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 89162306a36Sopenharmony_ci ld1 {v4.16b-v7.16b}, [x2], #64 89262306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 89362306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 89462306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 89562306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 89662306a36Sopenharmony_ci eor v4.16b, v4.16b, v12.16b 89762306a36Sopenharmony_ci eor v5.16b, v5.16b, v13.16b 89862306a36Sopenharmony_ci eor v6.16b, v6.16b, v14.16b 89962306a36Sopenharmony_ci eor v7.16b, v7.16b, v15.16b 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 90462306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 90562306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 90662306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 90762306a36Sopenharmony_ci eor v4.16b, v4.16b, v12.16b 90862306a36Sopenharmony_ci eor v5.16b, v5.16b, v13.16b 90962306a36Sopenharmony_ci eor v6.16b, v6.16b, v14.16b 91062306a36Sopenharmony_ci eor v7.16b, v7.16b, v15.16b 91162306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 91262306a36Sopenharmony_ci st1 {v4.16b-v7.16b}, [x1], #64 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci tweak_next(v8, v15, RTMP3) 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci cbz w4, .Lxts_dec_cts 91762306a36Sopenharmony_ci b .Lxts_dec_loop_8x 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci.Lxts_dec_4x: 92062306a36Sopenharmony_ci add w4, w4, #8 92162306a36Sopenharmony_ci cmp w4, #4 92262306a36Sopenharmony_ci blt .Lxts_dec_loop_1x 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci sub w4, w4, #4 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci tweak_next( v9, v8, RTMP0) 92762306a36Sopenharmony_ci tweak_next(v10, v9, RTMP1) 92862306a36Sopenharmony_ci tweak_next(v11, v10, RTMP2) 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 93162306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 93262306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 93362306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 93462306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci SM4_CRYPT_BLK4(v0, v1, v2, v3) 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 93962306a36Sopenharmony_ci eor v1.16b, v1.16b, v9.16b 94062306a36Sopenharmony_ci eor v2.16b, v2.16b, v10.16b 94162306a36Sopenharmony_ci eor v3.16b, v3.16b, v11.16b 94262306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci tweak_next(v8, v11, RTMP3) 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci cbz w4, .Lxts_dec_cts 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci.Lxts_dec_loop_1x: 94962306a36Sopenharmony_ci sub w4, w4, #1 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 95262306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 95762306a36Sopenharmony_ci st1 {v0.16b}, [x1], #16 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci tweak_next(v8, v8, RTMP0) 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci cbnz w4, .Lxts_dec_loop_1x 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci.Lxts_dec_cts: 96462306a36Sopenharmony_ci cbz x5, .Lxts_dec_end 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci /* cipher text stealing */ 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci tweak_next(v9, v8, RTMP0) 96962306a36Sopenharmony_ci ld1 {v0.16b}, [x2] 97062306a36Sopenharmony_ci eor v0.16b, v0.16b, v9.16b 97162306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 97262306a36Sopenharmony_ci eor v0.16b, v0.16b, v9.16b 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ci /* load permute table */ 97562306a36Sopenharmony_ci adr_l x6, .Lcts_permute_table 97662306a36Sopenharmony_ci add x7, x6, #32 97762306a36Sopenharmony_ci add x6, x6, x5 97862306a36Sopenharmony_ci sub x7, x7, x5 97962306a36Sopenharmony_ci ld1 {v3.16b}, [x6] 98062306a36Sopenharmony_ci ld1 {v4.16b}, [x7] 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci /* overlapping loads */ 98362306a36Sopenharmony_ci add x2, x2, x5 98462306a36Sopenharmony_ci ld1 {v1.16b}, [x2] 98562306a36Sopenharmony_ci 98662306a36Sopenharmony_ci /* create Cn from En-1 */ 98762306a36Sopenharmony_ci tbl v2.16b, {v0.16b}, v3.16b 98862306a36Sopenharmony_ci /* padding Pn with En-1 at the end */ 98962306a36Sopenharmony_ci tbx v0.16b, {v1.16b}, v4.16b 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 99262306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 99362306a36Sopenharmony_ci eor v0.16b, v0.16b, v8.16b 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci /* overlapping stores */ 99762306a36Sopenharmony_ci add x5, x1, x5 99862306a36Sopenharmony_ci st1 {v2.16b}, [x5] 99962306a36Sopenharmony_ci st1 {v0.16b}, [x1] 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_ci b .Lxts_dec_ret 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci.Lxts_dec_end: 100462306a36Sopenharmony_ci /* store new tweak */ 100562306a36Sopenharmony_ci st1 {v8.16b}, [x3] 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci.Lxts_dec_ret: 100862306a36Sopenharmony_ci ret 100962306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_xts_dec) 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci.align 3 101262306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_mac_update) 101362306a36Sopenharmony_ci /* input: 101462306a36Sopenharmony_ci * x0: round key array, CTX 101562306a36Sopenharmony_ci * x1: digest 101662306a36Sopenharmony_ci * x2: src 101762306a36Sopenharmony_ci * w3: nblocks 101862306a36Sopenharmony_ci * w4: enc_before 101962306a36Sopenharmony_ci * w5: enc_after 102062306a36Sopenharmony_ci */ 102162306a36Sopenharmony_ci SM4_PREPARE(x0) 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci ld1 {RMAC.16b}, [x1] 102462306a36Sopenharmony_ci 102562306a36Sopenharmony_ci cbz w4, .Lmac_update 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci.Lmac_update: 103062306a36Sopenharmony_ci cbz w3, .Lmac_ret 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci sub w6, w3, #1 103362306a36Sopenharmony_ci cmp w5, wzr 103462306a36Sopenharmony_ci csel w3, w3, w6, ne 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci cbz w3, .Lmac_end 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci.Lmac_loop_4x: 103962306a36Sopenharmony_ci cmp w3, #4 104062306a36Sopenharmony_ci blt .Lmac_loop_1x 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ci sub w3, w3, #4 104362306a36Sopenharmony_ci 104462306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v0.16b 104762306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 104862306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v1.16b 104962306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 105062306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v2.16b 105162306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 105262306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v3.16b 105362306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 105462306a36Sopenharmony_ci 105562306a36Sopenharmony_ci cbz w3, .Lmac_end 105662306a36Sopenharmony_ci b .Lmac_loop_4x 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci.Lmac_loop_1x: 105962306a36Sopenharmony_ci sub w3, w3, #1 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v0.16b 106462306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ci cbnz w3, .Lmac_loop_1x 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ci.Lmac_end: 107062306a36Sopenharmony_ci cbnz w5, .Lmac_ret 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 107362306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v0.16b 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci.Lmac_ret: 107662306a36Sopenharmony_ci st1 {RMAC.16b}, [x1] 107762306a36Sopenharmony_ci ret 107862306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_mac_update) 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci .section ".rodata", "a" 108262306a36Sopenharmony_ci .align 4 108362306a36Sopenharmony_ci.Lbswap128_mask: 108462306a36Sopenharmony_ci .byte 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b 108562306a36Sopenharmony_ci .byte 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci.Lcts_permute_table: 108862306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 108962306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 109062306a36Sopenharmony_ci .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 109162306a36Sopenharmony_ci .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 109262306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 109362306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 1094