162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Accelerated GHASH implementation with ARMv8 PMULL instructions. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/linkage.h> 962306a36Sopenharmony_ci#include <linux/cfi_types.h> 1062306a36Sopenharmony_ci#include <asm/assembler.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci SHASH .req v0 1362306a36Sopenharmony_ci SHASH2 .req v1 1462306a36Sopenharmony_ci T1 .req v2 1562306a36Sopenharmony_ci T2 .req v3 1662306a36Sopenharmony_ci MASK .req v4 1762306a36Sopenharmony_ci XM .req v5 1862306a36Sopenharmony_ci XL .req v6 1962306a36Sopenharmony_ci XH .req v7 2062306a36Sopenharmony_ci IN1 .req v7 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci k00_16 .req v8 2362306a36Sopenharmony_ci k32_48 .req v9 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci t3 .req v10 2662306a36Sopenharmony_ci t4 .req v11 2762306a36Sopenharmony_ci t5 .req v12 2862306a36Sopenharmony_ci t6 .req v13 2962306a36Sopenharmony_ci t7 .req v14 3062306a36Sopenharmony_ci t8 .req v15 3162306a36Sopenharmony_ci t9 .req v16 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci perm1 .req v17 3462306a36Sopenharmony_ci perm2 .req v18 3562306a36Sopenharmony_ci perm3 .req v19 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci sh1 .req v20 3862306a36Sopenharmony_ci sh2 .req v21 3962306a36Sopenharmony_ci sh3 .req v22 4062306a36Sopenharmony_ci sh4 .req v23 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci ss1 .req v24 4362306a36Sopenharmony_ci ss2 .req v25 4462306a36Sopenharmony_ci ss3 .req v26 4562306a36Sopenharmony_ci ss4 .req v27 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci XL2 .req v8 4862306a36Sopenharmony_ci XM2 .req v9 4962306a36Sopenharmony_ci XH2 .req v10 5062306a36Sopenharmony_ci XL3 .req v11 5162306a36Sopenharmony_ci XM3 .req v12 5262306a36Sopenharmony_ci XH3 .req v13 5362306a36Sopenharmony_ci TT3 .req v14 5462306a36Sopenharmony_ci TT4 .req v15 5562306a36Sopenharmony_ci HH .req v16 5662306a36Sopenharmony_ci HH3 .req v17 5762306a36Sopenharmony_ci HH4 .req v18 5862306a36Sopenharmony_ci HH34 .req v19 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci .text 6162306a36Sopenharmony_ci .arch armv8-a+crypto 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci .macro __pmull_p64, rd, rn, rm 6462306a36Sopenharmony_ci pmull \rd\().1q, \rn\().1d, \rm\().1d 6562306a36Sopenharmony_ci .endm 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci .macro __pmull2_p64, rd, rn, rm 6862306a36Sopenharmony_ci pmull2 \rd\().1q, \rn\().2d, \rm\().2d 6962306a36Sopenharmony_ci .endm 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci .macro __pmull_p8, rq, ad, bd 7262306a36Sopenharmony_ci ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1 7362306a36Sopenharmony_ci ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2 7462306a36Sopenharmony_ci ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci __pmull_p8_\bd \rq, \ad 7762306a36Sopenharmony_ci .endm 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci .macro __pmull2_p8, rq, ad, bd 8062306a36Sopenharmony_ci tbl t3.16b, {\ad\().16b}, perm1.16b // A1 8162306a36Sopenharmony_ci tbl t5.16b, {\ad\().16b}, perm2.16b // A2 8262306a36Sopenharmony_ci tbl t7.16b, {\ad\().16b}, perm3.16b // A3 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci __pmull2_p8_\bd \rq, \ad 8562306a36Sopenharmony_ci .endm 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci .macro __pmull_p8_SHASH, rq, ad 8862306a36Sopenharmony_ci __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4 8962306a36Sopenharmony_ci .endm 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci .macro __pmull_p8_SHASH2, rq, ad 9262306a36Sopenharmony_ci __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4 9362306a36Sopenharmony_ci .endm 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci .macro __pmull2_p8_SHASH, rq, ad 9662306a36Sopenharmony_ci __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4 9762306a36Sopenharmony_ci .endm 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4 10062306a36Sopenharmony_ci pmull\t t3.8h, t3.\nb, \bd // F = A1*B 10162306a36Sopenharmony_ci pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1 10262306a36Sopenharmony_ci pmull\t t5.8h, t5.\nb, \bd // H = A2*B 10362306a36Sopenharmony_ci pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2 10462306a36Sopenharmony_ci pmull\t t7.8h, t7.\nb, \bd // J = A3*B 10562306a36Sopenharmony_ci pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3 10662306a36Sopenharmony_ci pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4 10762306a36Sopenharmony_ci pmull\t \rq\().8h, \ad, \bd // D = A*B 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci eor t3.16b, t3.16b, t4.16b // L = E + F 11062306a36Sopenharmony_ci eor t5.16b, t5.16b, t6.16b // M = G + H 11162306a36Sopenharmony_ci eor t7.16b, t7.16b, t8.16b // N = I + J 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci uzp1 t4.2d, t3.2d, t5.2d 11462306a36Sopenharmony_ci uzp2 t3.2d, t3.2d, t5.2d 11562306a36Sopenharmony_ci uzp1 t6.2d, t7.2d, t9.2d 11662306a36Sopenharmony_ci uzp2 t7.2d, t7.2d, t9.2d 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci // t3 = (L) (P0 + P1) << 8 11962306a36Sopenharmony_ci // t5 = (M) (P2 + P3) << 16 12062306a36Sopenharmony_ci eor t4.16b, t4.16b, t3.16b 12162306a36Sopenharmony_ci and t3.16b, t3.16b, k32_48.16b 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci // t7 = (N) (P4 + P5) << 24 12462306a36Sopenharmony_ci // t9 = (K) (P6 + P7) << 32 12562306a36Sopenharmony_ci eor t6.16b, t6.16b, t7.16b 12662306a36Sopenharmony_ci and t7.16b, t7.16b, k00_16.16b 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci eor t4.16b, t4.16b, t3.16b 12962306a36Sopenharmony_ci eor t6.16b, t6.16b, t7.16b 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci zip2 t5.2d, t4.2d, t3.2d 13262306a36Sopenharmony_ci zip1 t3.2d, t4.2d, t3.2d 13362306a36Sopenharmony_ci zip2 t9.2d, t6.2d, t7.2d 13462306a36Sopenharmony_ci zip1 t7.2d, t6.2d, t7.2d 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci ext t3.16b, t3.16b, t3.16b, #15 13762306a36Sopenharmony_ci ext t5.16b, t5.16b, t5.16b, #14 13862306a36Sopenharmony_ci ext t7.16b, t7.16b, t7.16b, #13 13962306a36Sopenharmony_ci ext t9.16b, t9.16b, t9.16b, #12 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci eor t3.16b, t3.16b, t5.16b 14262306a36Sopenharmony_ci eor t7.16b, t7.16b, t9.16b 14362306a36Sopenharmony_ci eor \rq\().16b, \rq\().16b, t3.16b 14462306a36Sopenharmony_ci eor \rq\().16b, \rq\().16b, t7.16b 14562306a36Sopenharmony_ci .endm 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci .macro __pmull_pre_p64 14862306a36Sopenharmony_ci add x8, x3, #16 14962306a36Sopenharmony_ci ld1 {HH.2d-HH4.2d}, [x8] 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci trn1 SHASH2.2d, SHASH.2d, HH.2d 15262306a36Sopenharmony_ci trn2 T1.2d, SHASH.2d, HH.2d 15362306a36Sopenharmony_ci eor SHASH2.16b, SHASH2.16b, T1.16b 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci trn1 HH34.2d, HH3.2d, HH4.2d 15662306a36Sopenharmony_ci trn2 T1.2d, HH3.2d, HH4.2d 15762306a36Sopenharmony_ci eor HH34.16b, HH34.16b, T1.16b 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci movi MASK.16b, #0xe1 16062306a36Sopenharmony_ci shl MASK.2d, MASK.2d, #57 16162306a36Sopenharmony_ci .endm 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci .macro __pmull_pre_p8 16462306a36Sopenharmony_ci ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 16562306a36Sopenharmony_ci eor SHASH2.16b, SHASH2.16b, SHASH.16b 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci // k00_16 := 0x0000000000000000_000000000000ffff 16862306a36Sopenharmony_ci // k32_48 := 0x00000000ffffffff_0000ffffffffffff 16962306a36Sopenharmony_ci movi k32_48.2d, #0xffffffff 17062306a36Sopenharmony_ci mov k32_48.h[2], k32_48.h[0] 17162306a36Sopenharmony_ci ushr k00_16.2d, k32_48.2d, #32 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci // prepare the permutation vectors 17462306a36Sopenharmony_ci mov_q x5, 0x080f0e0d0c0b0a09 17562306a36Sopenharmony_ci movi T1.8b, #8 17662306a36Sopenharmony_ci dup perm1.2d, x5 17762306a36Sopenharmony_ci eor perm1.16b, perm1.16b, T1.16b 17862306a36Sopenharmony_ci ushr perm2.2d, perm1.2d, #8 17962306a36Sopenharmony_ci ushr perm3.2d, perm1.2d, #16 18062306a36Sopenharmony_ci ushr T1.2d, perm1.2d, #24 18162306a36Sopenharmony_ci sli perm2.2d, perm1.2d, #56 18262306a36Sopenharmony_ci sli perm3.2d, perm1.2d, #48 18362306a36Sopenharmony_ci sli T1.2d, perm1.2d, #40 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci // precompute loop invariants 18662306a36Sopenharmony_ci tbl sh1.16b, {SHASH.16b}, perm1.16b 18762306a36Sopenharmony_ci tbl sh2.16b, {SHASH.16b}, perm2.16b 18862306a36Sopenharmony_ci tbl sh3.16b, {SHASH.16b}, perm3.16b 18962306a36Sopenharmony_ci tbl sh4.16b, {SHASH.16b}, T1.16b 19062306a36Sopenharmony_ci ext ss1.8b, SHASH2.8b, SHASH2.8b, #1 19162306a36Sopenharmony_ci ext ss2.8b, SHASH2.8b, SHASH2.8b, #2 19262306a36Sopenharmony_ci ext ss3.8b, SHASH2.8b, SHASH2.8b, #3 19362306a36Sopenharmony_ci ext ss4.8b, SHASH2.8b, SHASH2.8b, #4 19462306a36Sopenharmony_ci .endm 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci // 19762306a36Sopenharmony_ci // PMULL (64x64->128) based reduction for CPUs that can do 19862306a36Sopenharmony_ci // it in a single instruction. 19962306a36Sopenharmony_ci // 20062306a36Sopenharmony_ci .macro __pmull_reduce_p64 20162306a36Sopenharmony_ci pmull T2.1q, XL.1d, MASK.1d 20262306a36Sopenharmony_ci eor XM.16b, XM.16b, T1.16b 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci mov XH.d[0], XM.d[1] 20562306a36Sopenharmony_ci mov XM.d[1], XL.d[0] 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci eor XL.16b, XM.16b, T2.16b 20862306a36Sopenharmony_ci ext T2.16b, XL.16b, XL.16b, #8 20962306a36Sopenharmony_ci pmull XL.1q, XL.1d, MASK.1d 21062306a36Sopenharmony_ci .endm 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci // 21362306a36Sopenharmony_ci // Alternative reduction for CPUs that lack support for the 21462306a36Sopenharmony_ci // 64x64->128 PMULL instruction 21562306a36Sopenharmony_ci // 21662306a36Sopenharmony_ci .macro __pmull_reduce_p8 21762306a36Sopenharmony_ci eor XM.16b, XM.16b, T1.16b 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci mov XL.d[1], XM.d[0] 22062306a36Sopenharmony_ci mov XH.d[0], XM.d[1] 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci shl T1.2d, XL.2d, #57 22362306a36Sopenharmony_ci shl T2.2d, XL.2d, #62 22462306a36Sopenharmony_ci eor T2.16b, T2.16b, T1.16b 22562306a36Sopenharmony_ci shl T1.2d, XL.2d, #63 22662306a36Sopenharmony_ci eor T2.16b, T2.16b, T1.16b 22762306a36Sopenharmony_ci ext T1.16b, XL.16b, XH.16b, #8 22862306a36Sopenharmony_ci eor T2.16b, T2.16b, T1.16b 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci mov XL.d[1], T2.d[0] 23162306a36Sopenharmony_ci mov XH.d[0], T2.d[1] 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci ushr T2.2d, XL.2d, #1 23462306a36Sopenharmony_ci eor XH.16b, XH.16b, XL.16b 23562306a36Sopenharmony_ci eor XL.16b, XL.16b, T2.16b 23662306a36Sopenharmony_ci ushr T2.2d, T2.2d, #6 23762306a36Sopenharmony_ci ushr XL.2d, XL.2d, #1 23862306a36Sopenharmony_ci .endm 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci .macro __pmull_ghash, pn 24162306a36Sopenharmony_ci ld1 {SHASH.2d}, [x3] 24262306a36Sopenharmony_ci ld1 {XL.2d}, [x1] 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci __pmull_pre_\pn 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci /* do the head block first, if supplied */ 24762306a36Sopenharmony_ci cbz x4, 0f 24862306a36Sopenharmony_ci ld1 {T1.2d}, [x4] 24962306a36Sopenharmony_ci mov x4, xzr 25062306a36Sopenharmony_ci b 3f 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci0: .ifc \pn, p64 25362306a36Sopenharmony_ci tbnz w0, #0, 2f // skip until #blocks is a 25462306a36Sopenharmony_ci tbnz w0, #1, 2f // round multiple of 4 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci1: ld1 {XM3.16b-TT4.16b}, [x2], #64 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci sub w0, w0, #4 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci rev64 T1.16b, XM3.16b 26162306a36Sopenharmony_ci rev64 T2.16b, XH3.16b 26262306a36Sopenharmony_ci rev64 TT4.16b, TT4.16b 26362306a36Sopenharmony_ci rev64 TT3.16b, TT3.16b 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci ext IN1.16b, TT4.16b, TT4.16b, #8 26662306a36Sopenharmony_ci ext XL3.16b, TT3.16b, TT3.16b, #8 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci eor TT4.16b, TT4.16b, IN1.16b 26962306a36Sopenharmony_ci pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1 27062306a36Sopenharmony_ci pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0 27162306a36Sopenharmony_ci pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0) 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci eor TT3.16b, TT3.16b, XL3.16b 27462306a36Sopenharmony_ci pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1 27562306a36Sopenharmony_ci pmull XL3.1q, HH.1d, XL3.1d // a0 * b0 27662306a36Sopenharmony_ci pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0) 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci ext IN1.16b, T2.16b, T2.16b, #8 27962306a36Sopenharmony_ci eor XL2.16b, XL2.16b, XL3.16b 28062306a36Sopenharmony_ci eor XH2.16b, XH2.16b, XH3.16b 28162306a36Sopenharmony_ci eor XM2.16b, XM2.16b, XM3.16b 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci eor T2.16b, T2.16b, IN1.16b 28462306a36Sopenharmony_ci pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1 28562306a36Sopenharmony_ci pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0 28662306a36Sopenharmony_ci pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0) 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci eor XL2.16b, XL2.16b, XL3.16b 28962306a36Sopenharmony_ci eor XH2.16b, XH2.16b, XH3.16b 29062306a36Sopenharmony_ci eor XM2.16b, XM2.16b, XM3.16b 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci ext IN1.16b, T1.16b, T1.16b, #8 29362306a36Sopenharmony_ci ext TT3.16b, XL.16b, XL.16b, #8 29462306a36Sopenharmony_ci eor XL.16b, XL.16b, IN1.16b 29562306a36Sopenharmony_ci eor T1.16b, T1.16b, TT3.16b 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1 29862306a36Sopenharmony_ci eor T1.16b, T1.16b, XL.16b 29962306a36Sopenharmony_ci pmull XL.1q, HH4.1d, XL.1d // a0 * b0 30062306a36Sopenharmony_ci pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0) 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci eor XL.16b, XL.16b, XL2.16b 30362306a36Sopenharmony_ci eor XH.16b, XH.16b, XH2.16b 30462306a36Sopenharmony_ci eor XM.16b, XM.16b, XM2.16b 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci eor T2.16b, XL.16b, XH.16b 30762306a36Sopenharmony_ci ext T1.16b, XL.16b, XH.16b, #8 30862306a36Sopenharmony_ci eor XM.16b, XM.16b, T2.16b 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci __pmull_reduce_p64 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci eor T2.16b, T2.16b, XH.16b 31362306a36Sopenharmony_ci eor XL.16b, XL.16b, T2.16b 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci cbz w0, 5f 31662306a36Sopenharmony_ci b 1b 31762306a36Sopenharmony_ci .endif 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci2: ld1 {T1.2d}, [x2], #16 32062306a36Sopenharmony_ci sub w0, w0, #1 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci3: /* multiply XL by SHASH in GF(2^128) */ 32362306a36Sopenharmony_ciCPU_LE( rev64 T1.16b, T1.16b ) 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci ext T2.16b, XL.16b, XL.16b, #8 32662306a36Sopenharmony_ci ext IN1.16b, T1.16b, T1.16b, #8 32762306a36Sopenharmony_ci eor T1.16b, T1.16b, T2.16b 32862306a36Sopenharmony_ci eor XL.16b, XL.16b, IN1.16b 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci __pmull2_\pn XH, XL, SHASH // a1 * b1 33162306a36Sopenharmony_ci eor T1.16b, T1.16b, XL.16b 33262306a36Sopenharmony_ci __pmull_\pn XL, XL, SHASH // a0 * b0 33362306a36Sopenharmony_ci __pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0) 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci4: eor T2.16b, XL.16b, XH.16b 33662306a36Sopenharmony_ci ext T1.16b, XL.16b, XH.16b, #8 33762306a36Sopenharmony_ci eor XM.16b, XM.16b, T2.16b 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci __pmull_reduce_\pn 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci eor T2.16b, T2.16b, XH.16b 34262306a36Sopenharmony_ci eor XL.16b, XL.16b, T2.16b 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci cbnz w0, 0b 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci5: st1 {XL.2d}, [x1] 34762306a36Sopenharmony_ci ret 34862306a36Sopenharmony_ci .endm 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci /* 35162306a36Sopenharmony_ci * void pmull_ghash_update(int blocks, u64 dg[], const char *src, 35262306a36Sopenharmony_ci * struct ghash_key const *k, const char *head) 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ciSYM_TYPED_FUNC_START(pmull_ghash_update_p64) 35562306a36Sopenharmony_ci __pmull_ghash p64 35662306a36Sopenharmony_ciSYM_FUNC_END(pmull_ghash_update_p64) 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ciSYM_TYPED_FUNC_START(pmull_ghash_update_p8) 35962306a36Sopenharmony_ci __pmull_ghash p8 36062306a36Sopenharmony_ciSYM_FUNC_END(pmull_ghash_update_p8) 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci KS0 .req v8 36362306a36Sopenharmony_ci KS1 .req v9 36462306a36Sopenharmony_ci KS2 .req v10 36562306a36Sopenharmony_ci KS3 .req v11 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci INP0 .req v21 36862306a36Sopenharmony_ci INP1 .req v22 36962306a36Sopenharmony_ci INP2 .req v23 37062306a36Sopenharmony_ci INP3 .req v24 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci K0 .req v25 37362306a36Sopenharmony_ci K1 .req v26 37462306a36Sopenharmony_ci K2 .req v27 37562306a36Sopenharmony_ci K3 .req v28 37662306a36Sopenharmony_ci K4 .req v12 37762306a36Sopenharmony_ci K5 .req v13 37862306a36Sopenharmony_ci K6 .req v4 37962306a36Sopenharmony_ci K7 .req v5 38062306a36Sopenharmony_ci K8 .req v14 38162306a36Sopenharmony_ci K9 .req v15 38262306a36Sopenharmony_ci KK .req v29 38362306a36Sopenharmony_ci KL .req v30 38462306a36Sopenharmony_ci KM .req v31 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci .macro load_round_keys, rounds, rk, tmp 38762306a36Sopenharmony_ci add \tmp, \rk, #64 38862306a36Sopenharmony_ci ld1 {K0.4s-K3.4s}, [\rk] 38962306a36Sopenharmony_ci ld1 {K4.4s-K5.4s}, [\tmp] 39062306a36Sopenharmony_ci add \tmp, \rk, \rounds, lsl #4 39162306a36Sopenharmony_ci sub \tmp, \tmp, #32 39262306a36Sopenharmony_ci ld1 {KK.4s-KM.4s}, [\tmp] 39362306a36Sopenharmony_ci .endm 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci .macro enc_round, state, key 39662306a36Sopenharmony_ci aese \state\().16b, \key\().16b 39762306a36Sopenharmony_ci aesmc \state\().16b, \state\().16b 39862306a36Sopenharmony_ci .endm 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci .macro enc_qround, s0, s1, s2, s3, key 40162306a36Sopenharmony_ci enc_round \s0, \key 40262306a36Sopenharmony_ci enc_round \s1, \key 40362306a36Sopenharmony_ci enc_round \s2, \key 40462306a36Sopenharmony_ci enc_round \s3, \key 40562306a36Sopenharmony_ci .endm 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci .macro enc_block, state, rounds, rk, tmp 40862306a36Sopenharmony_ci add \tmp, \rk, #96 40962306a36Sopenharmony_ci ld1 {K6.4s-K7.4s}, [\tmp], #32 41062306a36Sopenharmony_ci .irp key, K0, K1, K2, K3, K4 K5 41162306a36Sopenharmony_ci enc_round \state, \key 41262306a36Sopenharmony_ci .endr 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci tbnz \rounds, #2, .Lnot128_\@ 41562306a36Sopenharmony_ci.Lout256_\@: 41662306a36Sopenharmony_ci enc_round \state, K6 41762306a36Sopenharmony_ci enc_round \state, K7 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci.Lout192_\@: 42062306a36Sopenharmony_ci enc_round \state, KK 42162306a36Sopenharmony_ci aese \state\().16b, KL.16b 42262306a36Sopenharmony_ci eor \state\().16b, \state\().16b, KM.16b 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci .subsection 1 42562306a36Sopenharmony_ci.Lnot128_\@: 42662306a36Sopenharmony_ci ld1 {K8.4s-K9.4s}, [\tmp], #32 42762306a36Sopenharmony_ci enc_round \state, K6 42862306a36Sopenharmony_ci enc_round \state, K7 42962306a36Sopenharmony_ci ld1 {K6.4s-K7.4s}, [\tmp] 43062306a36Sopenharmony_ci enc_round \state, K8 43162306a36Sopenharmony_ci enc_round \state, K9 43262306a36Sopenharmony_ci tbz \rounds, #1, .Lout192_\@ 43362306a36Sopenharmony_ci b .Lout256_\@ 43462306a36Sopenharmony_ci .previous 43562306a36Sopenharmony_ci .endm 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci .align 6 43862306a36Sopenharmony_ci .macro pmull_gcm_do_crypt, enc 43962306a36Sopenharmony_ci frame_push 1 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci load_round_keys x7, x6, x8 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci ld1 {SHASH.2d}, [x3], #16 44462306a36Sopenharmony_ci ld1 {HH.2d-HH4.2d}, [x3] 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci trn1 SHASH2.2d, SHASH.2d, HH.2d 44762306a36Sopenharmony_ci trn2 T1.2d, SHASH.2d, HH.2d 44862306a36Sopenharmony_ci eor SHASH2.16b, SHASH2.16b, T1.16b 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci trn1 HH34.2d, HH3.2d, HH4.2d 45162306a36Sopenharmony_ci trn2 T1.2d, HH3.2d, HH4.2d 45262306a36Sopenharmony_ci eor HH34.16b, HH34.16b, T1.16b 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci ld1 {XL.2d}, [x4] 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci cbz x0, 3f // tag only? 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci ldr w8, [x5, #12] // load lower counter 45962306a36Sopenharmony_ciCPU_LE( rev w8, w8 ) 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci0: mov w9, #4 // max blocks per round 46262306a36Sopenharmony_ci add x10, x0, #0xf 46362306a36Sopenharmony_ci lsr x10, x10, #4 // remaining blocks 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci subs x0, x0, #64 46662306a36Sopenharmony_ci csel w9, w10, w9, mi 46762306a36Sopenharmony_ci add w8, w8, w9 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci bmi 1f 47062306a36Sopenharmony_ci ld1 {INP0.16b-INP3.16b}, [x2], #64 47162306a36Sopenharmony_ci .subsection 1 47262306a36Sopenharmony_ci /* 47362306a36Sopenharmony_ci * Populate the four input registers right to left with up to 63 bytes 47462306a36Sopenharmony_ci * of data, using overlapping loads to avoid branches. 47562306a36Sopenharmony_ci * 47662306a36Sopenharmony_ci * INP0 INP1 INP2 INP3 47762306a36Sopenharmony_ci * 1 byte | | | |x | 47862306a36Sopenharmony_ci * 16 bytes | | | |xxxxxxxx| 47962306a36Sopenharmony_ci * 17 bytes | | |xxxxxxxx|x | 48062306a36Sopenharmony_ci * 47 bytes | |xxxxxxxx|xxxxxxxx|xxxxxxx | 48162306a36Sopenharmony_ci * etc etc 48262306a36Sopenharmony_ci * 48362306a36Sopenharmony_ci * Note that this code may read up to 15 bytes before the start of 48462306a36Sopenharmony_ci * the input. It is up to the calling code to ensure this is safe if 48562306a36Sopenharmony_ci * this happens in the first iteration of the loop (i.e., when the 48662306a36Sopenharmony_ci * input size is < 16 bytes) 48762306a36Sopenharmony_ci */ 48862306a36Sopenharmony_ci1: mov x15, #16 48962306a36Sopenharmony_ci ands x19, x0, #0xf 49062306a36Sopenharmony_ci csel x19, x19, x15, ne 49162306a36Sopenharmony_ci adr_l x17, .Lpermute_table + 16 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci sub x11, x15, x19 49462306a36Sopenharmony_ci add x12, x17, x11 49562306a36Sopenharmony_ci sub x17, x17, x11 49662306a36Sopenharmony_ci ld1 {T1.16b}, [x12] 49762306a36Sopenharmony_ci sub x10, x1, x11 49862306a36Sopenharmony_ci sub x11, x2, x11 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci cmp x0, #-16 50162306a36Sopenharmony_ci csel x14, x15, xzr, gt 50262306a36Sopenharmony_ci cmp x0, #-32 50362306a36Sopenharmony_ci csel x15, x15, xzr, gt 50462306a36Sopenharmony_ci cmp x0, #-48 50562306a36Sopenharmony_ci csel x16, x19, xzr, gt 50662306a36Sopenharmony_ci csel x1, x1, x10, gt 50762306a36Sopenharmony_ci csel x2, x2, x11, gt 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci ld1 {INP0.16b}, [x2], x14 51062306a36Sopenharmony_ci ld1 {INP1.16b}, [x2], x15 51162306a36Sopenharmony_ci ld1 {INP2.16b}, [x2], x16 51262306a36Sopenharmony_ci ld1 {INP3.16b}, [x2] 51362306a36Sopenharmony_ci tbl INP3.16b, {INP3.16b}, T1.16b 51462306a36Sopenharmony_ci b 2f 51562306a36Sopenharmony_ci .previous 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci2: .if \enc == 0 51862306a36Sopenharmony_ci bl pmull_gcm_ghash_4x 51962306a36Sopenharmony_ci .endif 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci bl pmull_gcm_enc_4x 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci tbnz x0, #63, 6f 52462306a36Sopenharmony_ci st1 {INP0.16b-INP3.16b}, [x1], #64 52562306a36Sopenharmony_ci .if \enc == 1 52662306a36Sopenharmony_ci bl pmull_gcm_ghash_4x 52762306a36Sopenharmony_ci .endif 52862306a36Sopenharmony_ci bne 0b 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci3: ldr x10, [sp, #.Lframe_local_offset] 53162306a36Sopenharmony_ci cbz x10, 5f // output tag? 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci ld1 {INP3.16b}, [x10] // load lengths[] 53462306a36Sopenharmony_ci mov w9, #1 53562306a36Sopenharmony_ci bl pmull_gcm_ghash_4x 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci mov w11, #(0x1 << 24) // BE '1U' 53862306a36Sopenharmony_ci ld1 {KS0.16b}, [x5] 53962306a36Sopenharmony_ci mov KS0.s[3], w11 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci enc_block KS0, x7, x6, x12 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci ext XL.16b, XL.16b, XL.16b, #8 54462306a36Sopenharmony_ci rev64 XL.16b, XL.16b 54562306a36Sopenharmony_ci eor XL.16b, XL.16b, KS0.16b 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci .if \enc == 1 54862306a36Sopenharmony_ci st1 {XL.16b}, [x10] // store tag 54962306a36Sopenharmony_ci .else 55062306a36Sopenharmony_ci ldp x11, x12, [sp, #40] // load tag pointer and authsize 55162306a36Sopenharmony_ci adr_l x17, .Lpermute_table 55262306a36Sopenharmony_ci ld1 {KS0.16b}, [x11] // load supplied tag 55362306a36Sopenharmony_ci add x17, x17, x12 55462306a36Sopenharmony_ci ld1 {KS1.16b}, [x17] // load permute vector 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci cmeq XL.16b, XL.16b, KS0.16b // compare tags 55762306a36Sopenharmony_ci mvn XL.16b, XL.16b // -1 for fail, 0 for pass 55862306a36Sopenharmony_ci tbl XL.16b, {XL.16b}, KS1.16b // keep authsize bytes only 55962306a36Sopenharmony_ci sminv b0, XL.16b // signed minimum across XL 56062306a36Sopenharmony_ci smov w0, v0.b[0] // return b0 56162306a36Sopenharmony_ci .endif 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci4: frame_pop 56462306a36Sopenharmony_ci ret 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci5: 56762306a36Sopenharmony_ciCPU_LE( rev w8, w8 ) 56862306a36Sopenharmony_ci str w8, [x5, #12] // store lower counter 56962306a36Sopenharmony_ci st1 {XL.2d}, [x4] 57062306a36Sopenharmony_ci b 4b 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci6: ld1 {T1.16b-T2.16b}, [x17], #32 // permute vectors 57362306a36Sopenharmony_ci sub x17, x17, x19, lsl #1 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci cmp w9, #1 57662306a36Sopenharmony_ci beq 7f 57762306a36Sopenharmony_ci .subsection 1 57862306a36Sopenharmony_ci7: ld1 {INP2.16b}, [x1] 57962306a36Sopenharmony_ci tbx INP2.16b, {INP3.16b}, T1.16b 58062306a36Sopenharmony_ci mov INP3.16b, INP2.16b 58162306a36Sopenharmony_ci b 8f 58262306a36Sopenharmony_ci .previous 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci st1 {INP0.16b}, [x1], x14 58562306a36Sopenharmony_ci st1 {INP1.16b}, [x1], x15 58662306a36Sopenharmony_ci st1 {INP2.16b}, [x1], x16 58762306a36Sopenharmony_ci tbl INP3.16b, {INP3.16b}, T1.16b 58862306a36Sopenharmony_ci tbx INP3.16b, {INP2.16b}, T2.16b 58962306a36Sopenharmony_ci8: st1 {INP3.16b}, [x1] 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci .if \enc == 1 59262306a36Sopenharmony_ci ld1 {T1.16b}, [x17] 59362306a36Sopenharmony_ci tbl INP3.16b, {INP3.16b}, T1.16b // clear non-data bits 59462306a36Sopenharmony_ci bl pmull_gcm_ghash_4x 59562306a36Sopenharmony_ci .endif 59662306a36Sopenharmony_ci b 3b 59762306a36Sopenharmony_ci .endm 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci /* 60062306a36Sopenharmony_ci * void pmull_gcm_encrypt(int blocks, u8 dst[], const u8 src[], 60162306a36Sopenharmony_ci * struct ghash_key const *k, u64 dg[], u8 ctr[], 60262306a36Sopenharmony_ci * int rounds, u8 tag) 60362306a36Sopenharmony_ci */ 60462306a36Sopenharmony_ciSYM_FUNC_START(pmull_gcm_encrypt) 60562306a36Sopenharmony_ci pmull_gcm_do_crypt 1 60662306a36Sopenharmony_ciSYM_FUNC_END(pmull_gcm_encrypt) 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci /* 60962306a36Sopenharmony_ci * void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[], 61062306a36Sopenharmony_ci * struct ghash_key const *k, u64 dg[], u8 ctr[], 61162306a36Sopenharmony_ci * int rounds, u8 tag) 61262306a36Sopenharmony_ci */ 61362306a36Sopenharmony_ciSYM_FUNC_START(pmull_gcm_decrypt) 61462306a36Sopenharmony_ci pmull_gcm_do_crypt 0 61562306a36Sopenharmony_ciSYM_FUNC_END(pmull_gcm_decrypt) 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x) 61862306a36Sopenharmony_ci movi MASK.16b, #0xe1 61962306a36Sopenharmony_ci shl MASK.2d, MASK.2d, #57 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci rev64 T1.16b, INP0.16b 62262306a36Sopenharmony_ci rev64 T2.16b, INP1.16b 62362306a36Sopenharmony_ci rev64 TT3.16b, INP2.16b 62462306a36Sopenharmony_ci rev64 TT4.16b, INP3.16b 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci ext XL.16b, XL.16b, XL.16b, #8 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci tbz w9, #2, 0f // <4 blocks? 62962306a36Sopenharmony_ci .subsection 1 63062306a36Sopenharmony_ci0: movi XH2.16b, #0 63162306a36Sopenharmony_ci movi XM2.16b, #0 63262306a36Sopenharmony_ci movi XL2.16b, #0 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci tbz w9, #0, 1f // 2 blocks? 63562306a36Sopenharmony_ci tbz w9, #1, 2f // 1 block? 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci eor T2.16b, T2.16b, XL.16b 63862306a36Sopenharmony_ci ext T1.16b, T2.16b, T2.16b, #8 63962306a36Sopenharmony_ci b .Lgh3 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci1: eor TT3.16b, TT3.16b, XL.16b 64262306a36Sopenharmony_ci ext T2.16b, TT3.16b, TT3.16b, #8 64362306a36Sopenharmony_ci b .Lgh2 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci2: eor TT4.16b, TT4.16b, XL.16b 64662306a36Sopenharmony_ci ext IN1.16b, TT4.16b, TT4.16b, #8 64762306a36Sopenharmony_ci b .Lgh1 64862306a36Sopenharmony_ci .previous 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci eor T1.16b, T1.16b, XL.16b 65162306a36Sopenharmony_ci ext IN1.16b, T1.16b, T1.16b, #8 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci pmull2 XH2.1q, HH4.2d, IN1.2d // a1 * b1 65462306a36Sopenharmony_ci eor T1.16b, T1.16b, IN1.16b 65562306a36Sopenharmony_ci pmull XL2.1q, HH4.1d, IN1.1d // a0 * b0 65662306a36Sopenharmony_ci pmull2 XM2.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0) 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci ext T1.16b, T2.16b, T2.16b, #8 65962306a36Sopenharmony_ci.Lgh3: eor T2.16b, T2.16b, T1.16b 66062306a36Sopenharmony_ci pmull2 XH.1q, HH3.2d, T1.2d // a1 * b1 66162306a36Sopenharmony_ci pmull XL.1q, HH3.1d, T1.1d // a0 * b0 66262306a36Sopenharmony_ci pmull XM.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0) 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci eor XH2.16b, XH2.16b, XH.16b 66562306a36Sopenharmony_ci eor XL2.16b, XL2.16b, XL.16b 66662306a36Sopenharmony_ci eor XM2.16b, XM2.16b, XM.16b 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci ext T2.16b, TT3.16b, TT3.16b, #8 66962306a36Sopenharmony_ci.Lgh2: eor TT3.16b, TT3.16b, T2.16b 67062306a36Sopenharmony_ci pmull2 XH.1q, HH.2d, T2.2d // a1 * b1 67162306a36Sopenharmony_ci pmull XL.1q, HH.1d, T2.1d // a0 * b0 67262306a36Sopenharmony_ci pmull2 XM.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0) 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci eor XH2.16b, XH2.16b, XH.16b 67562306a36Sopenharmony_ci eor XL2.16b, XL2.16b, XL.16b 67662306a36Sopenharmony_ci eor XM2.16b, XM2.16b, XM.16b 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci ext IN1.16b, TT4.16b, TT4.16b, #8 67962306a36Sopenharmony_ci.Lgh1: eor TT4.16b, TT4.16b, IN1.16b 68062306a36Sopenharmony_ci pmull XL.1q, SHASH.1d, IN1.1d // a0 * b0 68162306a36Sopenharmony_ci pmull2 XH.1q, SHASH.2d, IN1.2d // a1 * b1 68262306a36Sopenharmony_ci pmull XM.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0) 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci eor XH.16b, XH.16b, XH2.16b 68562306a36Sopenharmony_ci eor XL.16b, XL.16b, XL2.16b 68662306a36Sopenharmony_ci eor XM.16b, XM.16b, XM2.16b 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci eor T2.16b, XL.16b, XH.16b 68962306a36Sopenharmony_ci ext T1.16b, XL.16b, XH.16b, #8 69062306a36Sopenharmony_ci eor XM.16b, XM.16b, T2.16b 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci __pmull_reduce_p64 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci eor T2.16b, T2.16b, XH.16b 69562306a36Sopenharmony_ci eor XL.16b, XL.16b, T2.16b 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci ret 69862306a36Sopenharmony_ciSYM_FUNC_END(pmull_gcm_ghash_4x) 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(pmull_gcm_enc_4x) 70162306a36Sopenharmony_ci ld1 {KS0.16b}, [x5] // load upper counter 70262306a36Sopenharmony_ci sub w10, w8, #4 70362306a36Sopenharmony_ci sub w11, w8, #3 70462306a36Sopenharmony_ci sub w12, w8, #2 70562306a36Sopenharmony_ci sub w13, w8, #1 70662306a36Sopenharmony_ci rev w10, w10 70762306a36Sopenharmony_ci rev w11, w11 70862306a36Sopenharmony_ci rev w12, w12 70962306a36Sopenharmony_ci rev w13, w13 71062306a36Sopenharmony_ci mov KS1.16b, KS0.16b 71162306a36Sopenharmony_ci mov KS2.16b, KS0.16b 71262306a36Sopenharmony_ci mov KS3.16b, KS0.16b 71362306a36Sopenharmony_ci ins KS0.s[3], w10 // set lower counter 71462306a36Sopenharmony_ci ins KS1.s[3], w11 71562306a36Sopenharmony_ci ins KS2.s[3], w12 71662306a36Sopenharmony_ci ins KS3.s[3], w13 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci add x10, x6, #96 // round key pointer 71962306a36Sopenharmony_ci ld1 {K6.4s-K7.4s}, [x10], #32 72062306a36Sopenharmony_ci .irp key, K0, K1, K2, K3, K4, K5 72162306a36Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, \key 72262306a36Sopenharmony_ci .endr 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci tbnz x7, #2, .Lnot128 72562306a36Sopenharmony_ci .subsection 1 72662306a36Sopenharmony_ci.Lnot128: 72762306a36Sopenharmony_ci ld1 {K8.4s-K9.4s}, [x10], #32 72862306a36Sopenharmony_ci .irp key, K6, K7 72962306a36Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, \key 73062306a36Sopenharmony_ci .endr 73162306a36Sopenharmony_ci ld1 {K6.4s-K7.4s}, [x10] 73262306a36Sopenharmony_ci .irp key, K8, K9 73362306a36Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, \key 73462306a36Sopenharmony_ci .endr 73562306a36Sopenharmony_ci tbz x7, #1, .Lout192 73662306a36Sopenharmony_ci b .Lout256 73762306a36Sopenharmony_ci .previous 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci.Lout256: 74062306a36Sopenharmony_ci .irp key, K6, K7 74162306a36Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, \key 74262306a36Sopenharmony_ci .endr 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci.Lout192: 74562306a36Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, KK 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci aese KS0.16b, KL.16b 74862306a36Sopenharmony_ci aese KS1.16b, KL.16b 74962306a36Sopenharmony_ci aese KS2.16b, KL.16b 75062306a36Sopenharmony_ci aese KS3.16b, KL.16b 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci eor KS0.16b, KS0.16b, KM.16b 75362306a36Sopenharmony_ci eor KS1.16b, KS1.16b, KM.16b 75462306a36Sopenharmony_ci eor KS2.16b, KS2.16b, KM.16b 75562306a36Sopenharmony_ci eor KS3.16b, KS3.16b, KM.16b 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci eor INP0.16b, INP0.16b, KS0.16b 75862306a36Sopenharmony_ci eor INP1.16b, INP1.16b, KS1.16b 75962306a36Sopenharmony_ci eor INP2.16b, INP2.16b, KS2.16b 76062306a36Sopenharmony_ci eor INP3.16b, INP3.16b, KS3.16b 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci ret 76362306a36Sopenharmony_ciSYM_FUNC_END(pmull_gcm_enc_4x) 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci .section ".rodata", "a" 76662306a36Sopenharmony_ci .align 6 76762306a36Sopenharmony_ci.Lpermute_table: 76862306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 76962306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 77062306a36Sopenharmony_ci .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 77162306a36Sopenharmony_ci .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 77262306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 77362306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 77462306a36Sopenharmony_ci .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 77562306a36Sopenharmony_ci .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 77662306a36Sopenharmony_ci .previous 777