162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions 462306a36Sopenharmony_ci * as specified in rfc8998 562306a36Sopenharmony_ci * https://datatracker.ietf.org/doc/html/rfc8998 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> 862306a36Sopenharmony_ci * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/linkage.h> 1262306a36Sopenharmony_ci#include <linux/cfi_types.h> 1362306a36Sopenharmony_ci#include <asm/assembler.h> 1462306a36Sopenharmony_ci#include "sm4-ce-asm.h" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci.arch armv8-a+crypto 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci.irp b, 0, 1, 2, 3, 24, 25, 26, 27, 28, 29, 30, 31 1962306a36Sopenharmony_ci .set .Lv\b\().4s, \b 2062306a36Sopenharmony_ci.endr 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci.macro sm4e, vd, vn 2362306a36Sopenharmony_ci .inst 0xcec08400 | (.L\vn << 5) | .L\vd 2462306a36Sopenharmony_ci.endm 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* Register macros */ 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci/* Used for both encryption and decryption */ 2962306a36Sopenharmony_ci#define RHASH v21 3062306a36Sopenharmony_ci#define RRCONST v22 3162306a36Sopenharmony_ci#define RZERO v23 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci/* Helper macros. */ 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci/* 3662306a36Sopenharmony_ci * input: m0, m1 3762306a36Sopenharmony_ci * output: r0:r1 (low 128-bits in r0, high in r1) 3862306a36Sopenharmony_ci */ 3962306a36Sopenharmony_ci#define PMUL_128x128(r0, r1, m0, m1, T0, T1) \ 4062306a36Sopenharmony_ci ext T0.16b, m1.16b, m1.16b, #8; \ 4162306a36Sopenharmony_ci pmull r0.1q, m0.1d, m1.1d; \ 4262306a36Sopenharmony_ci pmull T1.1q, m0.1d, T0.1d; \ 4362306a36Sopenharmony_ci pmull2 T0.1q, m0.2d, T0.2d; \ 4462306a36Sopenharmony_ci pmull2 r1.1q, m0.2d, m1.2d; \ 4562306a36Sopenharmony_ci eor T0.16b, T0.16b, T1.16b; \ 4662306a36Sopenharmony_ci ext T1.16b, RZERO.16b, T0.16b, #8; \ 4762306a36Sopenharmony_ci ext T0.16b, T0.16b, RZERO.16b, #8; \ 4862306a36Sopenharmony_ci eor r0.16b, r0.16b, T1.16b; \ 4962306a36Sopenharmony_ci eor r1.16b, r1.16b, T0.16b; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci#define PMUL_128x128_4x(r0, r1, m0, m1, T0, T1, \ 5262306a36Sopenharmony_ci r2, r3, m2, m3, T2, T3, \ 5362306a36Sopenharmony_ci r4, r5, m4, m5, T4, T5, \ 5462306a36Sopenharmony_ci r6, r7, m6, m7, T6, T7) \ 5562306a36Sopenharmony_ci ext T0.16b, m1.16b, m1.16b, #8; \ 5662306a36Sopenharmony_ci ext T2.16b, m3.16b, m3.16b, #8; \ 5762306a36Sopenharmony_ci ext T4.16b, m5.16b, m5.16b, #8; \ 5862306a36Sopenharmony_ci ext T6.16b, m7.16b, m7.16b, #8; \ 5962306a36Sopenharmony_ci pmull r0.1q, m0.1d, m1.1d; \ 6062306a36Sopenharmony_ci pmull r2.1q, m2.1d, m3.1d; \ 6162306a36Sopenharmony_ci pmull r4.1q, m4.1d, m5.1d; \ 6262306a36Sopenharmony_ci pmull r6.1q, m6.1d, m7.1d; \ 6362306a36Sopenharmony_ci pmull T1.1q, m0.1d, T0.1d; \ 6462306a36Sopenharmony_ci pmull T3.1q, m2.1d, T2.1d; \ 6562306a36Sopenharmony_ci pmull T5.1q, m4.1d, T4.1d; \ 6662306a36Sopenharmony_ci pmull T7.1q, m6.1d, T6.1d; \ 6762306a36Sopenharmony_ci pmull2 T0.1q, m0.2d, T0.2d; \ 6862306a36Sopenharmony_ci pmull2 T2.1q, m2.2d, T2.2d; \ 6962306a36Sopenharmony_ci pmull2 T4.1q, m4.2d, T4.2d; \ 7062306a36Sopenharmony_ci pmull2 T6.1q, m6.2d, T6.2d; \ 7162306a36Sopenharmony_ci pmull2 r1.1q, m0.2d, m1.2d; \ 7262306a36Sopenharmony_ci pmull2 r3.1q, m2.2d, m3.2d; \ 7362306a36Sopenharmony_ci pmull2 r5.1q, m4.2d, m5.2d; \ 7462306a36Sopenharmony_ci pmull2 r7.1q, m6.2d, m7.2d; \ 7562306a36Sopenharmony_ci eor T0.16b, T0.16b, T1.16b; \ 7662306a36Sopenharmony_ci eor T2.16b, T2.16b, T3.16b; \ 7762306a36Sopenharmony_ci eor T4.16b, T4.16b, T5.16b; \ 7862306a36Sopenharmony_ci eor T6.16b, T6.16b, T7.16b; \ 7962306a36Sopenharmony_ci ext T1.16b, RZERO.16b, T0.16b, #8; \ 8062306a36Sopenharmony_ci ext T3.16b, RZERO.16b, T2.16b, #8; \ 8162306a36Sopenharmony_ci ext T5.16b, RZERO.16b, T4.16b, #8; \ 8262306a36Sopenharmony_ci ext T7.16b, RZERO.16b, T6.16b, #8; \ 8362306a36Sopenharmony_ci ext T0.16b, T0.16b, RZERO.16b, #8; \ 8462306a36Sopenharmony_ci ext T2.16b, T2.16b, RZERO.16b, #8; \ 8562306a36Sopenharmony_ci ext T4.16b, T4.16b, RZERO.16b, #8; \ 8662306a36Sopenharmony_ci ext T6.16b, T6.16b, RZERO.16b, #8; \ 8762306a36Sopenharmony_ci eor r0.16b, r0.16b, T1.16b; \ 8862306a36Sopenharmony_ci eor r2.16b, r2.16b, T3.16b; \ 8962306a36Sopenharmony_ci eor r4.16b, r4.16b, T5.16b; \ 9062306a36Sopenharmony_ci eor r6.16b, r6.16b, T7.16b; \ 9162306a36Sopenharmony_ci eor r1.16b, r1.16b, T0.16b; \ 9262306a36Sopenharmony_ci eor r3.16b, r3.16b, T2.16b; \ 9362306a36Sopenharmony_ci eor r5.16b, r5.16b, T4.16b; \ 9462306a36Sopenharmony_ci eor r7.16b, r7.16b, T6.16b; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci/* 9762306a36Sopenharmony_ci * input: r0:r1 (low 128-bits in r0, high in r1) 9862306a36Sopenharmony_ci * output: a 9962306a36Sopenharmony_ci */ 10062306a36Sopenharmony_ci#define REDUCTION(a, r0, r1, rconst, T0, T1) \ 10162306a36Sopenharmony_ci pmull2 T0.1q, r1.2d, rconst.2d; \ 10262306a36Sopenharmony_ci ext T1.16b, T0.16b, RZERO.16b, #8; \ 10362306a36Sopenharmony_ci ext T0.16b, RZERO.16b, T0.16b, #8; \ 10462306a36Sopenharmony_ci eor r1.16b, r1.16b, T1.16b; \ 10562306a36Sopenharmony_ci eor r0.16b, r0.16b, T0.16b; \ 10662306a36Sopenharmony_ci pmull T0.1q, r1.1d, rconst.1d; \ 10762306a36Sopenharmony_ci eor a.16b, r0.16b, T0.16b; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci#define SM4_CRYPT_PMUL_128x128_BLK(b0, r0, r1, m0, m1, T0, T1) \ 11062306a36Sopenharmony_ci rev32 b0.16b, b0.16b; \ 11162306a36Sopenharmony_ci ext T0.16b, m1.16b, m1.16b, #8; \ 11262306a36Sopenharmony_ci sm4e b0.4s, v24.4s; \ 11362306a36Sopenharmony_ci pmull r0.1q, m0.1d, m1.1d; \ 11462306a36Sopenharmony_ci sm4e b0.4s, v25.4s; \ 11562306a36Sopenharmony_ci pmull T1.1q, m0.1d, T0.1d; \ 11662306a36Sopenharmony_ci sm4e b0.4s, v26.4s; \ 11762306a36Sopenharmony_ci pmull2 T0.1q, m0.2d, T0.2d; \ 11862306a36Sopenharmony_ci sm4e b0.4s, v27.4s; \ 11962306a36Sopenharmony_ci pmull2 r1.1q, m0.2d, m1.2d; \ 12062306a36Sopenharmony_ci sm4e b0.4s, v28.4s; \ 12162306a36Sopenharmony_ci eor T0.16b, T0.16b, T1.16b; \ 12262306a36Sopenharmony_ci sm4e b0.4s, v29.4s; \ 12362306a36Sopenharmony_ci ext T1.16b, RZERO.16b, T0.16b, #8; \ 12462306a36Sopenharmony_ci sm4e b0.4s, v30.4s; \ 12562306a36Sopenharmony_ci ext T0.16b, T0.16b, RZERO.16b, #8; \ 12662306a36Sopenharmony_ci sm4e b0.4s, v31.4s; \ 12762306a36Sopenharmony_ci eor r0.16b, r0.16b, T1.16b; \ 12862306a36Sopenharmony_ci rev64 b0.4s, b0.4s; \ 12962306a36Sopenharmony_ci eor r1.16b, r1.16b, T0.16b; \ 13062306a36Sopenharmony_ci ext b0.16b, b0.16b, b0.16b, #8; \ 13162306a36Sopenharmony_ci rev32 b0.16b, b0.16b; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci#define SM4_CRYPT_PMUL_128x128_BLK3(b0, b1, b2, \ 13462306a36Sopenharmony_ci r0, r1, m0, m1, T0, T1, \ 13562306a36Sopenharmony_ci r2, r3, m2, m3, T2, T3, \ 13662306a36Sopenharmony_ci r4, r5, m4, m5, T4, T5) \ 13762306a36Sopenharmony_ci rev32 b0.16b, b0.16b; \ 13862306a36Sopenharmony_ci rev32 b1.16b, b1.16b; \ 13962306a36Sopenharmony_ci rev32 b2.16b, b2.16b; \ 14062306a36Sopenharmony_ci ext T0.16b, m1.16b, m1.16b, #8; \ 14162306a36Sopenharmony_ci ext T2.16b, m3.16b, m3.16b, #8; \ 14262306a36Sopenharmony_ci ext T4.16b, m5.16b, m5.16b, #8; \ 14362306a36Sopenharmony_ci sm4e b0.4s, v24.4s; \ 14462306a36Sopenharmony_ci sm4e b1.4s, v24.4s; \ 14562306a36Sopenharmony_ci sm4e b2.4s, v24.4s; \ 14662306a36Sopenharmony_ci pmull r0.1q, m0.1d, m1.1d; \ 14762306a36Sopenharmony_ci pmull r2.1q, m2.1d, m3.1d; \ 14862306a36Sopenharmony_ci pmull r4.1q, m4.1d, m5.1d; \ 14962306a36Sopenharmony_ci sm4e b0.4s, v25.4s; \ 15062306a36Sopenharmony_ci sm4e b1.4s, v25.4s; \ 15162306a36Sopenharmony_ci sm4e b2.4s, v25.4s; \ 15262306a36Sopenharmony_ci pmull T1.1q, m0.1d, T0.1d; \ 15362306a36Sopenharmony_ci pmull T3.1q, m2.1d, T2.1d; \ 15462306a36Sopenharmony_ci pmull T5.1q, m4.1d, T4.1d; \ 15562306a36Sopenharmony_ci sm4e b0.4s, v26.4s; \ 15662306a36Sopenharmony_ci sm4e b1.4s, v26.4s; \ 15762306a36Sopenharmony_ci sm4e b2.4s, v26.4s; \ 15862306a36Sopenharmony_ci pmull2 T0.1q, m0.2d, T0.2d; \ 15962306a36Sopenharmony_ci pmull2 T2.1q, m2.2d, T2.2d; \ 16062306a36Sopenharmony_ci pmull2 T4.1q, m4.2d, T4.2d; \ 16162306a36Sopenharmony_ci sm4e b0.4s, v27.4s; \ 16262306a36Sopenharmony_ci sm4e b1.4s, v27.4s; \ 16362306a36Sopenharmony_ci sm4e b2.4s, v27.4s; \ 16462306a36Sopenharmony_ci pmull2 r1.1q, m0.2d, m1.2d; \ 16562306a36Sopenharmony_ci pmull2 r3.1q, m2.2d, m3.2d; \ 16662306a36Sopenharmony_ci pmull2 r5.1q, m4.2d, m5.2d; \ 16762306a36Sopenharmony_ci sm4e b0.4s, v28.4s; \ 16862306a36Sopenharmony_ci sm4e b1.4s, v28.4s; \ 16962306a36Sopenharmony_ci sm4e b2.4s, v28.4s; \ 17062306a36Sopenharmony_ci eor T0.16b, T0.16b, T1.16b; \ 17162306a36Sopenharmony_ci eor T2.16b, T2.16b, T3.16b; \ 17262306a36Sopenharmony_ci eor T4.16b, T4.16b, T5.16b; \ 17362306a36Sopenharmony_ci sm4e b0.4s, v29.4s; \ 17462306a36Sopenharmony_ci sm4e b1.4s, v29.4s; \ 17562306a36Sopenharmony_ci sm4e b2.4s, v29.4s; \ 17662306a36Sopenharmony_ci ext T1.16b, RZERO.16b, T0.16b, #8; \ 17762306a36Sopenharmony_ci ext T3.16b, RZERO.16b, T2.16b, #8; \ 17862306a36Sopenharmony_ci ext T5.16b, RZERO.16b, T4.16b, #8; \ 17962306a36Sopenharmony_ci sm4e b0.4s, v30.4s; \ 18062306a36Sopenharmony_ci sm4e b1.4s, v30.4s; \ 18162306a36Sopenharmony_ci sm4e b2.4s, v30.4s; \ 18262306a36Sopenharmony_ci ext T0.16b, T0.16b, RZERO.16b, #8; \ 18362306a36Sopenharmony_ci ext T2.16b, T2.16b, RZERO.16b, #8; \ 18462306a36Sopenharmony_ci ext T4.16b, T4.16b, RZERO.16b, #8; \ 18562306a36Sopenharmony_ci sm4e b0.4s, v31.4s; \ 18662306a36Sopenharmony_ci sm4e b1.4s, v31.4s; \ 18762306a36Sopenharmony_ci sm4e b2.4s, v31.4s; \ 18862306a36Sopenharmony_ci eor r0.16b, r0.16b, T1.16b; \ 18962306a36Sopenharmony_ci eor r2.16b, r2.16b, T3.16b; \ 19062306a36Sopenharmony_ci eor r4.16b, r4.16b, T5.16b; \ 19162306a36Sopenharmony_ci rev64 b0.4s, b0.4s; \ 19262306a36Sopenharmony_ci rev64 b1.4s, b1.4s; \ 19362306a36Sopenharmony_ci rev64 b2.4s, b2.4s; \ 19462306a36Sopenharmony_ci eor r1.16b, r1.16b, T0.16b; \ 19562306a36Sopenharmony_ci eor r3.16b, r3.16b, T2.16b; \ 19662306a36Sopenharmony_ci eor r5.16b, r5.16b, T4.16b; \ 19762306a36Sopenharmony_ci ext b0.16b, b0.16b, b0.16b, #8; \ 19862306a36Sopenharmony_ci ext b1.16b, b1.16b, b1.16b, #8; \ 19962306a36Sopenharmony_ci ext b2.16b, b2.16b, b2.16b, #8; \ 20062306a36Sopenharmony_ci eor r0.16b, r0.16b, r2.16b; \ 20162306a36Sopenharmony_ci eor r1.16b, r1.16b, r3.16b; \ 20262306a36Sopenharmony_ci rev32 b0.16b, b0.16b; \ 20362306a36Sopenharmony_ci rev32 b1.16b, b1.16b; \ 20462306a36Sopenharmony_ci rev32 b2.16b, b2.16b; \ 20562306a36Sopenharmony_ci eor r0.16b, r0.16b, r4.16b; \ 20662306a36Sopenharmony_ci eor r1.16b, r1.16b, r5.16b; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci#define inc32_le128(vctr) \ 20962306a36Sopenharmony_ci mov vctr.d[1], x9; \ 21062306a36Sopenharmony_ci add w6, w9, #1; \ 21162306a36Sopenharmony_ci mov vctr.d[0], x8; \ 21262306a36Sopenharmony_ci bfi x9, x6, #0, #32; \ 21362306a36Sopenharmony_ci rev64 vctr.16b, vctr.16b; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci#define GTAG_HASH_LENGTHS(vctr0, vlen) \ 21662306a36Sopenharmony_ci ld1 {vlen.16b}, [x7]; \ 21762306a36Sopenharmony_ci /* construct CTR0 */ \ 21862306a36Sopenharmony_ci /* the lower 32-bits of initial IV is always be32(1) */ \ 21962306a36Sopenharmony_ci mov x6, #0x1; \ 22062306a36Sopenharmony_ci bfi x9, x6, #0, #32; \ 22162306a36Sopenharmony_ci mov vctr0.d[0], x8; \ 22262306a36Sopenharmony_ci mov vctr0.d[1], x9; \ 22362306a36Sopenharmony_ci rbit vlen.16b, vlen.16b; \ 22462306a36Sopenharmony_ci rev64 vctr0.16b, vctr0.16b; \ 22562306a36Sopenharmony_ci /* authtag = GCTR(CTR0, GHASH) */ \ 22662306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, vlen.16b; \ 22762306a36Sopenharmony_ci SM4_CRYPT_PMUL_128x128_BLK(vctr0, RR0, RR1, RHASH, RH1, \ 22862306a36Sopenharmony_ci RTMP0, RTMP1); \ 22962306a36Sopenharmony_ci REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3); \ 23062306a36Sopenharmony_ci rbit RHASH.16b, RHASH.16b; \ 23162306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, vctr0.16b; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci/* Register macros for encrypt and ghash */ 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci/* can be the same as input v0-v3 */ 23762306a36Sopenharmony_ci#define RR1 v0 23862306a36Sopenharmony_ci#define RR3 v1 23962306a36Sopenharmony_ci#define RR5 v2 24062306a36Sopenharmony_ci#define RR7 v3 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci#define RR0 v4 24362306a36Sopenharmony_ci#define RR2 v5 24462306a36Sopenharmony_ci#define RR4 v6 24562306a36Sopenharmony_ci#define RR6 v7 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci#define RTMP0 v8 24862306a36Sopenharmony_ci#define RTMP1 v9 24962306a36Sopenharmony_ci#define RTMP2 v10 25062306a36Sopenharmony_ci#define RTMP3 v11 25162306a36Sopenharmony_ci#define RTMP4 v12 25262306a36Sopenharmony_ci#define RTMP5 v13 25362306a36Sopenharmony_ci#define RTMP6 v14 25462306a36Sopenharmony_ci#define RTMP7 v15 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci#define RH1 v16 25762306a36Sopenharmony_ci#define RH2 v17 25862306a36Sopenharmony_ci#define RH3 v18 25962306a36Sopenharmony_ci#define RH4 v19 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci.align 3 26262306a36Sopenharmony_ciSYM_FUNC_START(sm4_ce_pmull_ghash_setup) 26362306a36Sopenharmony_ci /* input: 26462306a36Sopenharmony_ci * x0: round key array, CTX 26562306a36Sopenharmony_ci * x1: ghash table 26662306a36Sopenharmony_ci */ 26762306a36Sopenharmony_ci SM4_PREPARE(x0) 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci adr_l x2, .Lghash_rconst 27062306a36Sopenharmony_ci ld1r {RRCONST.2d}, [x2] 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci eor RZERO.16b, RZERO.16b, RZERO.16b 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci /* H = E(K, 0^128) */ 27562306a36Sopenharmony_ci rev32 v0.16b, RZERO.16b 27662306a36Sopenharmony_ci SM4_CRYPT_BLK_BE(v0) 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci /* H ^ 1 */ 27962306a36Sopenharmony_ci rbit RH1.16b, v0.16b 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci /* H ^ 2 */ 28262306a36Sopenharmony_ci PMUL_128x128(RR0, RR1, RH1, RH1, RTMP0, RTMP1) 28362306a36Sopenharmony_ci REDUCTION(RH2, RR0, RR1, RRCONST, RTMP2, RTMP3) 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci /* H ^ 3 */ 28662306a36Sopenharmony_ci PMUL_128x128(RR0, RR1, RH2, RH1, RTMP0, RTMP1) 28762306a36Sopenharmony_ci REDUCTION(RH3, RR0, RR1, RRCONST, RTMP2, RTMP3) 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci /* H ^ 4 */ 29062306a36Sopenharmony_ci PMUL_128x128(RR0, RR1, RH2, RH2, RTMP0, RTMP1) 29162306a36Sopenharmony_ci REDUCTION(RH4, RR0, RR1, RRCONST, RTMP2, RTMP3) 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci st1 {RH1.16b-RH4.16b}, [x1] 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci ret 29662306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_pmull_ghash_setup) 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci.align 3 29962306a36Sopenharmony_ciSYM_FUNC_START(pmull_ghash_update) 30062306a36Sopenharmony_ci /* input: 30162306a36Sopenharmony_ci * x0: ghash table 30262306a36Sopenharmony_ci * x1: ghash result 30362306a36Sopenharmony_ci * x2: src 30462306a36Sopenharmony_ci * w3: nblocks 30562306a36Sopenharmony_ci */ 30662306a36Sopenharmony_ci ld1 {RH1.16b-RH4.16b}, [x0] 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci ld1 {RHASH.16b}, [x1] 30962306a36Sopenharmony_ci rbit RHASH.16b, RHASH.16b 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci adr_l x4, .Lghash_rconst 31262306a36Sopenharmony_ci ld1r {RRCONST.2d}, [x4] 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci eor RZERO.16b, RZERO.16b, RZERO.16b 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci.Lghash_loop_4x: 31762306a36Sopenharmony_ci cmp w3, #4 31862306a36Sopenharmony_ci blt .Lghash_loop_1x 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci sub w3, w3, #4 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci ld1 {v0.16b-v3.16b}, [x2], #64 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci rbit v0.16b, v0.16b 32562306a36Sopenharmony_ci rbit v1.16b, v1.16b 32662306a36Sopenharmony_ci rbit v2.16b, v2.16b 32762306a36Sopenharmony_ci rbit v3.16b, v3.16b 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci /* 33062306a36Sopenharmony_ci * (in0 ^ HASH) * H^4 => rr0:rr1 33162306a36Sopenharmony_ci * (in1) * H^3 => rr2:rr3 33262306a36Sopenharmony_ci * (in2) * H^2 => rr4:rr5 33362306a36Sopenharmony_ci * (in3) * H^1 => rr6:rr7 33462306a36Sopenharmony_ci */ 33562306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, v0.16b 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1, 33862306a36Sopenharmony_ci RR2, RR3, v1, RH3, RTMP2, RTMP3, 33962306a36Sopenharmony_ci RR4, RR5, v2, RH2, RTMP4, RTMP5, 34062306a36Sopenharmony_ci RR6, RR7, v3, RH1, RTMP6, RTMP7) 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci eor RR0.16b, RR0.16b, RR2.16b 34362306a36Sopenharmony_ci eor RR1.16b, RR1.16b, RR3.16b 34462306a36Sopenharmony_ci eor RR0.16b, RR0.16b, RR4.16b 34562306a36Sopenharmony_ci eor RR1.16b, RR1.16b, RR5.16b 34662306a36Sopenharmony_ci eor RR0.16b, RR0.16b, RR6.16b 34762306a36Sopenharmony_ci eor RR1.16b, RR1.16b, RR7.16b 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1) 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci cbz w3, .Lghash_end 35262306a36Sopenharmony_ci b .Lghash_loop_4x 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci.Lghash_loop_1x: 35562306a36Sopenharmony_ci sub w3, w3, #1 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci ld1 {v0.16b}, [x2], #16 35862306a36Sopenharmony_ci rbit v0.16b, v0.16b 35962306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, v0.16b 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) 36262306a36Sopenharmony_ci REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci cbnz w3, .Lghash_loop_1x 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci.Lghash_end: 36762306a36Sopenharmony_ci rbit RHASH.16b, RHASH.16b 36862306a36Sopenharmony_ci st1 {RHASH.2d}, [x1] 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci ret 37162306a36Sopenharmony_ciSYM_FUNC_END(pmull_ghash_update) 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci.align 3 37462306a36Sopenharmony_ciSYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_enc) 37562306a36Sopenharmony_ci /* input: 37662306a36Sopenharmony_ci * x0: round key array, CTX 37762306a36Sopenharmony_ci * x1: dst 37862306a36Sopenharmony_ci * x2: src 37962306a36Sopenharmony_ci * x3: ctr (big endian, 128 bit) 38062306a36Sopenharmony_ci * w4: nbytes 38162306a36Sopenharmony_ci * x5: ghash result 38262306a36Sopenharmony_ci * x6: ghash table 38362306a36Sopenharmony_ci * x7: lengths (only for last block) 38462306a36Sopenharmony_ci */ 38562306a36Sopenharmony_ci SM4_PREPARE(x0) 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci ldp x8, x9, [x3] 38862306a36Sopenharmony_ci rev x8, x8 38962306a36Sopenharmony_ci rev x9, x9 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci ld1 {RH1.16b-RH4.16b}, [x6] 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci ld1 {RHASH.16b}, [x5] 39462306a36Sopenharmony_ci rbit RHASH.16b, RHASH.16b 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci adr_l x6, .Lghash_rconst 39762306a36Sopenharmony_ci ld1r {RRCONST.2d}, [x6] 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci eor RZERO.16b, RZERO.16b, RZERO.16b 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci cbz w4, .Lgcm_enc_hash_len 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci.Lgcm_enc_loop_4x: 40462306a36Sopenharmony_ci cmp w4, #(4 * 16) 40562306a36Sopenharmony_ci blt .Lgcm_enc_loop_1x 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci sub w4, w4, #(4 * 16) 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci /* construct CTRs */ 41062306a36Sopenharmony_ci inc32_le128(v0) /* +0 */ 41162306a36Sopenharmony_ci inc32_le128(v1) /* +1 */ 41262306a36Sopenharmony_ci inc32_le128(v2) /* +2 */ 41362306a36Sopenharmony_ci inc32_le128(v3) /* +3 */ 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci SM4_CRYPT_BLK4(v0, v1, v2, v3) 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci eor v0.16b, v0.16b, RTMP0.16b 42062306a36Sopenharmony_ci eor v1.16b, v1.16b, RTMP1.16b 42162306a36Sopenharmony_ci eor v2.16b, v2.16b, RTMP2.16b 42262306a36Sopenharmony_ci eor v3.16b, v3.16b, RTMP3.16b 42362306a36Sopenharmony_ci st1 {v0.16b-v3.16b}, [x1], #64 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci /* ghash update */ 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci rbit v0.16b, v0.16b 42862306a36Sopenharmony_ci rbit v1.16b, v1.16b 42962306a36Sopenharmony_ci rbit v2.16b, v2.16b 43062306a36Sopenharmony_ci rbit v3.16b, v3.16b 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci /* 43362306a36Sopenharmony_ci * (in0 ^ HASH) * H^4 => rr0:rr1 43462306a36Sopenharmony_ci * (in1) * H^3 => rr2:rr3 43562306a36Sopenharmony_ci * (in2) * H^2 => rr4:rr5 43662306a36Sopenharmony_ci * (in3) * H^1 => rr6:rr7 43762306a36Sopenharmony_ci */ 43862306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, v0.16b 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1, 44162306a36Sopenharmony_ci RR2, RR3, v1, RH3, RTMP2, RTMP3, 44262306a36Sopenharmony_ci RR4, RR5, v2, RH2, RTMP4, RTMP5, 44362306a36Sopenharmony_ci RR6, RR7, v3, RH1, RTMP6, RTMP7) 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci eor RR0.16b, RR0.16b, RR2.16b 44662306a36Sopenharmony_ci eor RR1.16b, RR1.16b, RR3.16b 44762306a36Sopenharmony_ci eor RR0.16b, RR0.16b, RR4.16b 44862306a36Sopenharmony_ci eor RR1.16b, RR1.16b, RR5.16b 44962306a36Sopenharmony_ci eor RR0.16b, RR0.16b, RR6.16b 45062306a36Sopenharmony_ci eor RR1.16b, RR1.16b, RR7.16b 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1) 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci cbz w4, .Lgcm_enc_hash_len 45562306a36Sopenharmony_ci b .Lgcm_enc_loop_4x 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci.Lgcm_enc_loop_1x: 45862306a36Sopenharmony_ci cmp w4, #16 45962306a36Sopenharmony_ci blt .Lgcm_enc_tail 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci sub w4, w4, #16 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci /* construct CTRs */ 46462306a36Sopenharmony_ci inc32_le128(v0) 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci ld1 {RTMP0.16b}, [x2], #16 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci eor v0.16b, v0.16b, RTMP0.16b 47162306a36Sopenharmony_ci st1 {v0.16b}, [x1], #16 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci /* ghash update */ 47462306a36Sopenharmony_ci rbit v0.16b, v0.16b 47562306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, v0.16b 47662306a36Sopenharmony_ci PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) 47762306a36Sopenharmony_ci REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci cbz w4, .Lgcm_enc_hash_len 48062306a36Sopenharmony_ci b .Lgcm_enc_loop_1x 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci.Lgcm_enc_tail: 48362306a36Sopenharmony_ci /* construct CTRs */ 48462306a36Sopenharmony_ci inc32_le128(v0) 48562306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci /* load permute table */ 48862306a36Sopenharmony_ci adr_l x0, .Lcts_permute_table 48962306a36Sopenharmony_ci add x0, x0, #32 49062306a36Sopenharmony_ci sub x0, x0, w4, uxtw 49162306a36Sopenharmony_ci ld1 {v3.16b}, [x0] 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci.Lgcm_enc_tail_loop: 49462306a36Sopenharmony_ci /* do encrypt */ 49562306a36Sopenharmony_ci ldrb w0, [x2], #1 /* get 1 byte from input */ 49662306a36Sopenharmony_ci umov w6, v0.b[0] /* get top crypted byte */ 49762306a36Sopenharmony_ci eor w6, w6, w0 /* w6 = CTR ^ input */ 49862306a36Sopenharmony_ci strb w6, [x1], #1 /* store out byte */ 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci /* shift right out one byte */ 50162306a36Sopenharmony_ci ext v0.16b, v0.16b, v0.16b, #1 50262306a36Sopenharmony_ci /* the last ciphertext is placed in high bytes */ 50362306a36Sopenharmony_ci ins v0.b[15], w6 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci subs w4, w4, #1 50662306a36Sopenharmony_ci bne .Lgcm_enc_tail_loop 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci /* padding last block with zeros */ 50962306a36Sopenharmony_ci tbl v0.16b, {v0.16b}, v3.16b 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci /* ghash update */ 51262306a36Sopenharmony_ci rbit v0.16b, v0.16b 51362306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, v0.16b 51462306a36Sopenharmony_ci PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) 51562306a36Sopenharmony_ci REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci.Lgcm_enc_hash_len: 51862306a36Sopenharmony_ci cbz x7, .Lgcm_enc_end 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci GTAG_HASH_LENGTHS(v1, v3) 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci b .Lgcm_enc_ret 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci.Lgcm_enc_end: 52562306a36Sopenharmony_ci /* store new CTR */ 52662306a36Sopenharmony_ci rev x8, x8 52762306a36Sopenharmony_ci rev x9, x9 52862306a36Sopenharmony_ci stp x8, x9, [x3] 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci rbit RHASH.16b, RHASH.16b 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci.Lgcm_enc_ret: 53362306a36Sopenharmony_ci /* store new MAC */ 53462306a36Sopenharmony_ci st1 {RHASH.2d}, [x5] 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci ret 53762306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_pmull_gcm_enc) 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci#undef RR1 54062306a36Sopenharmony_ci#undef RR3 54162306a36Sopenharmony_ci#undef RR5 54262306a36Sopenharmony_ci#undef RR7 54362306a36Sopenharmony_ci#undef RR0 54462306a36Sopenharmony_ci#undef RR2 54562306a36Sopenharmony_ci#undef RR4 54662306a36Sopenharmony_ci#undef RR6 54762306a36Sopenharmony_ci#undef RTMP0 54862306a36Sopenharmony_ci#undef RTMP1 54962306a36Sopenharmony_ci#undef RTMP2 55062306a36Sopenharmony_ci#undef RTMP3 55162306a36Sopenharmony_ci#undef RTMP4 55262306a36Sopenharmony_ci#undef RTMP5 55362306a36Sopenharmony_ci#undef RTMP6 55462306a36Sopenharmony_ci#undef RTMP7 55562306a36Sopenharmony_ci#undef RH1 55662306a36Sopenharmony_ci#undef RH2 55762306a36Sopenharmony_ci#undef RH3 55862306a36Sopenharmony_ci#undef RH4 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci/* Register macros for decrypt */ 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci/* v0-v2 for building CTRs, v3-v5 for saving inputs */ 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci#define RR1 v6 56662306a36Sopenharmony_ci#define RR3 v7 56762306a36Sopenharmony_ci#define RR5 v8 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci#define RR0 v9 57062306a36Sopenharmony_ci#define RR2 v10 57162306a36Sopenharmony_ci#define RR4 v11 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci#define RTMP0 v12 57462306a36Sopenharmony_ci#define RTMP1 v13 57562306a36Sopenharmony_ci#define RTMP2 v14 57662306a36Sopenharmony_ci#define RTMP3 v15 57762306a36Sopenharmony_ci#define RTMP4 v16 57862306a36Sopenharmony_ci#define RTMP5 v17 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci#define RH1 v18 58162306a36Sopenharmony_ci#define RH2 v19 58262306a36Sopenharmony_ci#define RH3 v20 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci.align 3 58562306a36Sopenharmony_ciSYM_TYPED_FUNC_START(sm4_ce_pmull_gcm_dec) 58662306a36Sopenharmony_ci /* input: 58762306a36Sopenharmony_ci * x0: round key array, CTX 58862306a36Sopenharmony_ci * x1: dst 58962306a36Sopenharmony_ci * x2: src 59062306a36Sopenharmony_ci * x3: ctr (big endian, 128 bit) 59162306a36Sopenharmony_ci * w4: nbytes 59262306a36Sopenharmony_ci * x5: ghash result 59362306a36Sopenharmony_ci * x6: ghash table 59462306a36Sopenharmony_ci * x7: lengths (only for last block) 59562306a36Sopenharmony_ci */ 59662306a36Sopenharmony_ci SM4_PREPARE(x0) 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci ldp x8, x9, [x3] 59962306a36Sopenharmony_ci rev x8, x8 60062306a36Sopenharmony_ci rev x9, x9 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci ld1 {RH1.16b-RH3.16b}, [x6] 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci ld1 {RHASH.16b}, [x5] 60562306a36Sopenharmony_ci rbit RHASH.16b, RHASH.16b 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci adr_l x6, .Lghash_rconst 60862306a36Sopenharmony_ci ld1r {RRCONST.2d}, [x6] 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci eor RZERO.16b, RZERO.16b, RZERO.16b 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci cbz w4, .Lgcm_dec_hash_len 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci.Lgcm_dec_loop_3x: 61562306a36Sopenharmony_ci cmp w4, #(3 * 16) 61662306a36Sopenharmony_ci blt .Lgcm_dec_loop_1x 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci sub w4, w4, #(3 * 16) 61962306a36Sopenharmony_ci 62062306a36Sopenharmony_ci ld1 {v3.16b-v5.16b}, [x2], #(3 * 16) 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci /* construct CTRs */ 62362306a36Sopenharmony_ci inc32_le128(v0) /* +0 */ 62462306a36Sopenharmony_ci rbit v6.16b, v3.16b 62562306a36Sopenharmony_ci inc32_le128(v1) /* +1 */ 62662306a36Sopenharmony_ci rbit v7.16b, v4.16b 62762306a36Sopenharmony_ci inc32_le128(v2) /* +2 */ 62862306a36Sopenharmony_ci rbit v8.16b, v5.16b 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, v6.16b 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci /* decrypt & ghash update */ 63362306a36Sopenharmony_ci SM4_CRYPT_PMUL_128x128_BLK3(v0, v1, v2, 63462306a36Sopenharmony_ci RR0, RR1, RHASH, RH3, RTMP0, RTMP1, 63562306a36Sopenharmony_ci RR2, RR3, v7, RH2, RTMP2, RTMP3, 63662306a36Sopenharmony_ci RR4, RR5, v8, RH1, RTMP4, RTMP5) 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci eor v0.16b, v0.16b, v3.16b 63962306a36Sopenharmony_ci eor v1.16b, v1.16b, v4.16b 64062306a36Sopenharmony_ci eor v2.16b, v2.16b, v5.16b 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1) 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci st1 {v0.16b-v2.16b}, [x1], #(3 * 16) 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci cbz w4, .Lgcm_dec_hash_len 64762306a36Sopenharmony_ci b .Lgcm_dec_loop_3x 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci.Lgcm_dec_loop_1x: 65062306a36Sopenharmony_ci cmp w4, #16 65162306a36Sopenharmony_ci blt .Lgcm_dec_tail 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci sub w4, w4, #16 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci ld1 {v3.16b}, [x2], #16 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci /* construct CTRs */ 65862306a36Sopenharmony_ci inc32_le128(v0) 65962306a36Sopenharmony_ci rbit v6.16b, v3.16b 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, v6.16b 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci SM4_CRYPT_PMUL_128x128_BLK(v0, RR0, RR1, RHASH, RH1, RTMP0, RTMP1) 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci eor v0.16b, v0.16b, v3.16b 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci st1 {v0.16b}, [x1], #16 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci cbz w4, .Lgcm_dec_hash_len 67262306a36Sopenharmony_ci b .Lgcm_dec_loop_1x 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci.Lgcm_dec_tail: 67562306a36Sopenharmony_ci /* construct CTRs */ 67662306a36Sopenharmony_ci inc32_le128(v0) 67762306a36Sopenharmony_ci SM4_CRYPT_BLK(v0) 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci /* load permute table */ 68062306a36Sopenharmony_ci adr_l x0, .Lcts_permute_table 68162306a36Sopenharmony_ci add x0, x0, #32 68262306a36Sopenharmony_ci sub x0, x0, w4, uxtw 68362306a36Sopenharmony_ci ld1 {v3.16b}, [x0] 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci.Lgcm_dec_tail_loop: 68662306a36Sopenharmony_ci /* do decrypt */ 68762306a36Sopenharmony_ci ldrb w0, [x2], #1 /* get 1 byte from input */ 68862306a36Sopenharmony_ci umov w6, v0.b[0] /* get top crypted byte */ 68962306a36Sopenharmony_ci eor w6, w6, w0 /* w6 = CTR ^ input */ 69062306a36Sopenharmony_ci strb w6, [x1], #1 /* store out byte */ 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci /* shift right out one byte */ 69362306a36Sopenharmony_ci ext v0.16b, v0.16b, v0.16b, #1 69462306a36Sopenharmony_ci /* the last ciphertext is placed in high bytes */ 69562306a36Sopenharmony_ci ins v0.b[15], w0 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci subs w4, w4, #1 69862306a36Sopenharmony_ci bne .Lgcm_dec_tail_loop 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci /* padding last block with zeros */ 70162306a36Sopenharmony_ci tbl v0.16b, {v0.16b}, v3.16b 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci /* ghash update */ 70462306a36Sopenharmony_ci rbit v0.16b, v0.16b 70562306a36Sopenharmony_ci eor RHASH.16b, RHASH.16b, v0.16b 70662306a36Sopenharmony_ci PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) 70762306a36Sopenharmony_ci REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci.Lgcm_dec_hash_len: 71062306a36Sopenharmony_ci cbz x7, .Lgcm_dec_end 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci GTAG_HASH_LENGTHS(v1, v3) 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci b .Lgcm_dec_ret 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci.Lgcm_dec_end: 71762306a36Sopenharmony_ci /* store new CTR */ 71862306a36Sopenharmony_ci rev x8, x8 71962306a36Sopenharmony_ci rev x9, x9 72062306a36Sopenharmony_ci stp x8, x9, [x3] 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci rbit RHASH.16b, RHASH.16b 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci.Lgcm_dec_ret: 72562306a36Sopenharmony_ci /* store new MAC */ 72662306a36Sopenharmony_ci st1 {RHASH.2d}, [x5] 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci ret 72962306a36Sopenharmony_ciSYM_FUNC_END(sm4_ce_pmull_gcm_dec) 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci .section ".rodata", "a" 73262306a36Sopenharmony_ci .align 4 73362306a36Sopenharmony_ci.Lcts_permute_table: 73462306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 73562306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 73662306a36Sopenharmony_ci .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 73762306a36Sopenharmony_ci .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 73862306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 73962306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci.Lghash_rconst: 74262306a36Sopenharmony_ci .quad 0x87 743