162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions 462306a36Sopenharmony_ci * as specified in rfc8998 562306a36Sopenharmony_ci * https://datatracker.ietf.org/doc/html/rfc8998 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/linkage.h> 1162306a36Sopenharmony_ci#include <linux/cfi_types.h> 1262306a36Sopenharmony_ci#include <asm/assembler.h> 1362306a36Sopenharmony_ci#include "sm4-ce-asm.h" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci.arch armv8-a+crypto 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31 1862306a36Sopenharmony_ci .set .Lv\b\().4s, \b 1962306a36Sopenharmony_ci.endr 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci.macro sm4e, vd, vn 2262306a36Sopenharmony_ci .inst 0xcec08400 | (.L\vn << 5) | .L\vd 2362306a36Sopenharmony_ci.endm 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/* Register macros */ 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#define RMAC v16 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci/* Helper macros. */ 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define inc_le128(vctr) \ 3262306a36Sopenharmony_ci mov vctr.d[1], x8; \ 3362306a36Sopenharmony_ci mov vctr.d[0], x7; \ 3462306a36Sopenharmony_ci adds x8, x8, #1; \ 3562306a36Sopenharmony_ci rev64 vctr.16b, vctr.16b; \ 3662306a36Sopenharmony_ci adc x7, x7, xzr; 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci.align 3 4062306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_cbcmac_update) 4162306a36Sopenharmony_ci /* input: 4262306a36Sopenharmony_ci * x0: round key array, CTX 4362306a36Sopenharmony_ci * x1: mac 4462306a36Sopenharmony_ci * x2: src 4562306a36Sopenharmony_ci * w3: nblocks 4662306a36Sopenharmony_ci */ 4762306a36Sopenharmony_ci SM4_PREPARE(x0) 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci ld1 {RMAC.16b}, [x1] 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci.Lcbcmac_loop_4x: 5262306a36Sopenharmony_ci cmp w3, #4 5362306a36Sopenharmony_ci blt .Lcbcmac_loop_1x 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci sub w3, w3, #4 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 6062306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v0.16b 6162306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 6262306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v1.16b 6362306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 6462306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v2.16b 6562306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 6662306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v3.16b 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci cbz w3, .Lcbcmac_end 6962306a36Sopenharmony_ci b .Lcbcmac_loop_4x 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci.Lcbcmac_loop_1x: 7262306a36Sopenharmony_ci sub w3, w3, #1 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci SM4_CRYPT_BLK(RMAC) 7762306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v0.16b 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci cbnz w3, .Lcbcmac_loop_1x 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci.Lcbcmac_end: 8262306a36Sopenharmony_ci st1 {RMAC.16b}, [x1] 8362306a36Sopenharmony_ci ret 8462306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_cbcmac_update) 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci.align 3 8762306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_ccm_final) 8862306a36Sopenharmony_ci /* input: 8962306a36Sopenharmony_ci * x0: round key array, CTX 9062306a36Sopenharmony_ci * x1: ctr0 (big endian, 128 bit) 9162306a36Sopenharmony_ci * x2: mac 9262306a36Sopenharmony_ci */ 9362306a36Sopenharmony_ci SM4_PREPARE(x0) 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci ld1 {RMAC.16b}, [x2] 9662306a36Sopenharmony_ci ld1 {v0.16b}, [x1] 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci SM4_CRYPT_BLK2(RMAC, v0) 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci /* en-/decrypt the mac with ctr0 */ 10162306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v0.16b 10262306a36Sopenharmony_ci st1 {RMAC.16b}, [x2] 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci ret 10562306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_ccm_final) 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci.align 3 10862306a36Sopenharmony_ciSYM_TYPED_FUNC_START(sm4_ce_ccm_enc) 10962306a36Sopenharmony_ci /* input: 11062306a36Sopenharmony_ci * x0: round key array, CTX 11162306a36Sopenharmony_ci * x1: dst 11262306a36Sopenharmony_ci * x2: src 11362306a36Sopenharmony_ci * x3: ctr (big endian, 128 bit) 11462306a36Sopenharmony_ci * w4: nbytes 11562306a36Sopenharmony_ci * x5: mac 11662306a36Sopenharmony_ci */ 11762306a36Sopenharmony_ci SM4_PREPARE(x0) 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci ldp x7, x8, [x3] 12062306a36Sopenharmony_ci rev x7, x7 12162306a36Sopenharmony_ci rev x8, x8 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci ld1 {RMAC.16b}, [x5] 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci.Lccm_enc_loop_4x: 12662306a36Sopenharmony_ci cmp w4, #(4 * 16) 12762306a36Sopenharmony_ci blt .Lccm_enc_loop_1x 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci sub w4, w4, #(4 * 16) 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci /* construct CTRs */ 13262306a36Sopenharmony_ci inc_le128(v8) /* +0 */ 13362306a36Sopenharmony_ci inc_le128(v9) /* +1 */ 13462306a36Sopenharmony_ci inc_le128(v10) /* +2 */ 13562306a36Sopenharmony_ci inc_le128(v11) /* +3 */ 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci SM4_CRYPT_BLK2(v8, RMAC) 14062306a36Sopenharmony_ci eor v8.16b, v8.16b, v0.16b 14162306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v0.16b 14262306a36Sopenharmony_ci SM4_CRYPT_BLK2(v9, RMAC) 14362306a36Sopenharmony_ci eor v9.16b, v9.16b, v1.16b 14462306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v1.16b 14562306a36Sopenharmony_ci SM4_CRYPT_BLK2(v10, RMAC) 14662306a36Sopenharmony_ci eor v10.16b, v10.16b, v2.16b 14762306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v2.16b 14862306a36Sopenharmony_ci SM4_CRYPT_BLK2(v11, RMAC) 14962306a36Sopenharmony_ci eor v11.16b, v11.16b, v3.16b 15062306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v3.16b 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci st1 {v8.16b-v11.16b}, [x1], #64 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci cbz w4, .Lccm_enc_end 15562306a36Sopenharmony_ci b .Lccm_enc_loop_4x 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci.Lccm_enc_loop_1x: 15862306a36Sopenharmony_ci cmp w4, #16 15962306a36Sopenharmony_ci blt .Lccm_enc_tail 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci sub w4, w4, #16 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci /* construct CTRs */ 16462306a36Sopenharmony_ci inc_le128(v8) 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci SM4_CRYPT_BLK2(v8, RMAC) 16962306a36Sopenharmony_ci eor v8.16b, v8.16b, v0.16b 17062306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v0.16b 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci st1 {v8.16b}, [x1], #16 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci cbz w4, .Lccm_enc_end 17562306a36Sopenharmony_ci b .Lccm_enc_loop_1x 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci.Lccm_enc_tail: 17862306a36Sopenharmony_ci /* construct CTRs */ 17962306a36Sopenharmony_ci inc_le128(v8) 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci SM4_CRYPT_BLK2(RMAC, v8) 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci /* store new MAC */ 18462306a36Sopenharmony_ci st1 {RMAC.16b}, [x5] 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci.Lccm_enc_tail_loop: 18762306a36Sopenharmony_ci ldrb w0, [x2], #1 /* get 1 byte from input */ 18862306a36Sopenharmony_ci umov w9, v8.b[0] /* get top crypted CTR byte */ 18962306a36Sopenharmony_ci umov w6, RMAC.b[0] /* get top MAC byte */ 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci eor w9, w9, w0 /* w9 = CTR ^ input */ 19262306a36Sopenharmony_ci eor w6, w6, w0 /* w6 = MAC ^ input */ 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci strb w9, [x1], #1 /* store out byte */ 19562306a36Sopenharmony_ci strb w6, [x5], #1 /* store MAC byte */ 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci subs w4, w4, #1 19862306a36Sopenharmony_ci beq .Lccm_enc_ret 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci /* shift out one byte */ 20162306a36Sopenharmony_ci ext RMAC.16b, RMAC.16b, RMAC.16b, #1 20262306a36Sopenharmony_ci ext v8.16b, v8.16b, v8.16b, #1 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci b .Lccm_enc_tail_loop 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci.Lccm_enc_end: 20762306a36Sopenharmony_ci /* store new MAC */ 20862306a36Sopenharmony_ci st1 {RMAC.16b}, [x5] 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci /* store new CTR */ 21162306a36Sopenharmony_ci rev x7, x7 21262306a36Sopenharmony_ci rev x8, x8 21362306a36Sopenharmony_ci stp x7, x8, [x3] 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci.Lccm_enc_ret: 21662306a36Sopenharmony_ci ret 21762306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_ccm_enc) 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci.align 3 22062306a36Sopenharmony_ciSYM_TYPED_FUNC_START(sm4_ce_ccm_dec) 22162306a36Sopenharmony_ci /* input: 22262306a36Sopenharmony_ci * x0: round key array, CTX 22362306a36Sopenharmony_ci * x1: dst 22462306a36Sopenharmony_ci * x2: src 22562306a36Sopenharmony_ci * x3: ctr (big endian, 128 bit) 22662306a36Sopenharmony_ci * w4: nbytes 22762306a36Sopenharmony_ci * x5: mac 22862306a36Sopenharmony_ci */ 22962306a36Sopenharmony_ci SM4_PREPARE(x0) 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci ldp x7, x8, [x3] 23262306a36Sopenharmony_ci rev x7, x7 23362306a36Sopenharmony_ci rev x8, x8 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci ld1 {RMAC.16b}, [x5] 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci.Lccm_dec_loop_4x: 23862306a36Sopenharmony_ci cmp w4, #(4 * 16) 23962306a36Sopenharmony_ci blt .Lccm_dec_loop_1x 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci sub w4, w4, #(4 * 16) 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci /* construct CTRs */ 24462306a36Sopenharmony_ci inc_le128(v8) /* +0 */ 24562306a36Sopenharmony_ci inc_le128(v9) /* +1 */ 24662306a36Sopenharmony_ci inc_le128(v10) /* +2 */ 24762306a36Sopenharmony_ci inc_le128(v11) /* +3 */ 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci SM4_CRYPT_BLK2(v8, RMAC) 25262306a36Sopenharmony_ci eor v8.16b, v8.16b, v0.16b 25362306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v8.16b 25462306a36Sopenharmony_ci SM4_CRYPT_BLK2(v9, RMAC) 25562306a36Sopenharmony_ci eor v9.16b, v9.16b, v1.16b 25662306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v9.16b 25762306a36Sopenharmony_ci SM4_CRYPT_BLK2(v10, RMAC) 25862306a36Sopenharmony_ci eor v10.16b, v10.16b, v2.16b 25962306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v10.16b 26062306a36Sopenharmony_ci SM4_CRYPT_BLK2(v11, RMAC) 26162306a36Sopenharmony_ci eor v11.16b, v11.16b, v3.16b 26262306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v11.16b 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci st1 {v8.16b-v11.16b}, [x1], #64 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci cbz w4, .Lccm_dec_end 26762306a36Sopenharmony_ci b .Lccm_dec_loop_4x 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci.Lccm_dec_loop_1x: 27062306a36Sopenharmony_ci cmp w4, #16 27162306a36Sopenharmony_ci blt .Lccm_dec_tail 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci sub w4, w4, #16 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci /* construct CTRs */ 27662306a36Sopenharmony_ci inc_le128(v8) 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci SM4_CRYPT_BLK2(v8, RMAC) 28162306a36Sopenharmony_ci eor v8.16b, v8.16b, v0.16b 28262306a36Sopenharmony_ci eor RMAC.16b, RMAC.16b, v8.16b 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci st1 {v8.16b}, [x1], #16 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci cbz w4, .Lccm_dec_end 28762306a36Sopenharmony_ci b .Lccm_dec_loop_1x 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci.Lccm_dec_tail: 29062306a36Sopenharmony_ci /* construct CTRs */ 29162306a36Sopenharmony_ci inc_le128(v8) 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci SM4_CRYPT_BLK2(RMAC, v8) 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci /* store new MAC */ 29662306a36Sopenharmony_ci st1 {RMAC.16b}, [x5] 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci.Lccm_dec_tail_loop: 29962306a36Sopenharmony_ci ldrb w0, [x2], #1 /* get 1 byte from input */ 30062306a36Sopenharmony_ci umov w9, v8.b[0] /* get top crypted CTR byte */ 30162306a36Sopenharmony_ci umov w6, RMAC.b[0] /* get top MAC byte */ 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci eor w9, w9, w0 /* w9 = CTR ^ input */ 30462306a36Sopenharmony_ci eor w6, w6, w9 /* w6 = MAC ^ output */ 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci strb w9, [x1], #1 /* store out byte */ 30762306a36Sopenharmony_ci strb w6, [x5], #1 /* store MAC byte */ 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci subs w4, w4, #1 31062306a36Sopenharmony_ci beq .Lccm_dec_ret 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci /* shift out one byte */ 31362306a36Sopenharmony_ci ext RMAC.16b, RMAC.16b, RMAC.16b, #1 31462306a36Sopenharmony_ci ext v8.16b, v8.16b, v8.16b, #1 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci b .Lccm_dec_tail_loop 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci.Lccm_dec_end: 31962306a36Sopenharmony_ci /* store new MAC */ 32062306a36Sopenharmony_ci st1 {RMAC.16b}, [x5] 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci /* store new CTR */ 32362306a36Sopenharmony_ci rev x7, x7 32462306a36Sopenharmony_ci rev x8, x8 32562306a36Sopenharmony_ci stp x7, x8, [x3] 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci.Lccm_dec_ret: 32862306a36Sopenharmony_ci ret 32962306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_ccm_dec) 330