18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Accelerated GHASH implementation with ARMv8 PMULL instructions. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/linkage.h> 98c2ecf20Sopenharmony_ci#include <asm/assembler.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci SHASH .req v0 128c2ecf20Sopenharmony_ci SHASH2 .req v1 138c2ecf20Sopenharmony_ci T1 .req v2 148c2ecf20Sopenharmony_ci T2 .req v3 158c2ecf20Sopenharmony_ci MASK .req v4 168c2ecf20Sopenharmony_ci XM .req v5 178c2ecf20Sopenharmony_ci XL .req v6 188c2ecf20Sopenharmony_ci XH .req v7 198c2ecf20Sopenharmony_ci IN1 .req v7 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci k00_16 .req v8 228c2ecf20Sopenharmony_ci k32_48 .req v9 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci t3 .req v10 258c2ecf20Sopenharmony_ci t4 .req v11 268c2ecf20Sopenharmony_ci t5 .req v12 278c2ecf20Sopenharmony_ci t6 .req v13 288c2ecf20Sopenharmony_ci t7 .req v14 298c2ecf20Sopenharmony_ci t8 .req v15 308c2ecf20Sopenharmony_ci t9 .req v16 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci perm1 .req v17 338c2ecf20Sopenharmony_ci perm2 .req v18 348c2ecf20Sopenharmony_ci perm3 .req v19 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci sh1 .req v20 378c2ecf20Sopenharmony_ci sh2 .req v21 388c2ecf20Sopenharmony_ci sh3 .req v22 398c2ecf20Sopenharmony_ci sh4 .req v23 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci ss1 .req v24 428c2ecf20Sopenharmony_ci ss2 .req v25 438c2ecf20Sopenharmony_ci ss3 .req v26 448c2ecf20Sopenharmony_ci ss4 .req v27 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci XL2 .req v8 478c2ecf20Sopenharmony_ci XM2 .req v9 488c2ecf20Sopenharmony_ci XH2 .req v10 498c2ecf20Sopenharmony_ci XL3 .req v11 508c2ecf20Sopenharmony_ci XM3 .req v12 518c2ecf20Sopenharmony_ci XH3 .req v13 528c2ecf20Sopenharmony_ci TT3 .req v14 538c2ecf20Sopenharmony_ci TT4 .req v15 548c2ecf20Sopenharmony_ci HH .req v16 558c2ecf20Sopenharmony_ci HH3 .req v17 568c2ecf20Sopenharmony_ci HH4 .req v18 578c2ecf20Sopenharmony_ci HH34 .req v19 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci .text 608c2ecf20Sopenharmony_ci .arch armv8-a+crypto 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci .macro __pmull_p64, rd, rn, rm 638c2ecf20Sopenharmony_ci pmull \rd\().1q, \rn\().1d, \rm\().1d 648c2ecf20Sopenharmony_ci .endm 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci .macro __pmull2_p64, rd, rn, rm 678c2ecf20Sopenharmony_ci pmull2 \rd\().1q, \rn\().2d, \rm\().2d 688c2ecf20Sopenharmony_ci .endm 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci .macro __pmull_p8, rq, ad, bd 718c2ecf20Sopenharmony_ci ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1 728c2ecf20Sopenharmony_ci ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2 738c2ecf20Sopenharmony_ci ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci __pmull_p8_\bd \rq, \ad 768c2ecf20Sopenharmony_ci .endm 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci .macro __pmull2_p8, rq, ad, bd 798c2ecf20Sopenharmony_ci tbl t3.16b, {\ad\().16b}, perm1.16b // A1 808c2ecf20Sopenharmony_ci tbl t5.16b, {\ad\().16b}, perm2.16b // A2 818c2ecf20Sopenharmony_ci tbl t7.16b, {\ad\().16b}, perm3.16b // A3 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci __pmull2_p8_\bd \rq, \ad 848c2ecf20Sopenharmony_ci .endm 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci .macro __pmull_p8_SHASH, rq, ad 878c2ecf20Sopenharmony_ci __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4 888c2ecf20Sopenharmony_ci .endm 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci .macro __pmull_p8_SHASH2, rq, ad 918c2ecf20Sopenharmony_ci __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4 928c2ecf20Sopenharmony_ci .endm 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci .macro __pmull2_p8_SHASH, rq, ad 958c2ecf20Sopenharmony_ci __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4 968c2ecf20Sopenharmony_ci .endm 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4 998c2ecf20Sopenharmony_ci pmull\t t3.8h, t3.\nb, \bd // F = A1*B 1008c2ecf20Sopenharmony_ci pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1 1018c2ecf20Sopenharmony_ci pmull\t t5.8h, t5.\nb, \bd // H = A2*B 1028c2ecf20Sopenharmony_ci pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2 1038c2ecf20Sopenharmony_ci pmull\t t7.8h, t7.\nb, \bd // J = A3*B 1048c2ecf20Sopenharmony_ci pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3 1058c2ecf20Sopenharmony_ci pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4 1068c2ecf20Sopenharmony_ci pmull\t \rq\().8h, \ad, \bd // D = A*B 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci eor t3.16b, t3.16b, t4.16b // L = E + F 1098c2ecf20Sopenharmony_ci eor t5.16b, t5.16b, t6.16b // M = G + H 1108c2ecf20Sopenharmony_ci eor t7.16b, t7.16b, t8.16b // N = I + J 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci uzp1 t4.2d, t3.2d, t5.2d 1138c2ecf20Sopenharmony_ci uzp2 t3.2d, t3.2d, t5.2d 1148c2ecf20Sopenharmony_ci uzp1 t6.2d, t7.2d, t9.2d 1158c2ecf20Sopenharmony_ci uzp2 t7.2d, t7.2d, t9.2d 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci // t3 = (L) (P0 + P1) << 8 1188c2ecf20Sopenharmony_ci // t5 = (M) (P2 + P3) << 16 1198c2ecf20Sopenharmony_ci eor t4.16b, t4.16b, t3.16b 1208c2ecf20Sopenharmony_ci and t3.16b, t3.16b, k32_48.16b 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci // t7 = (N) (P4 + P5) << 24 1238c2ecf20Sopenharmony_ci // t9 = (K) (P6 + P7) << 32 1248c2ecf20Sopenharmony_ci eor t6.16b, t6.16b, t7.16b 1258c2ecf20Sopenharmony_ci and t7.16b, t7.16b, k00_16.16b 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci eor t4.16b, t4.16b, t3.16b 1288c2ecf20Sopenharmony_ci eor t6.16b, t6.16b, t7.16b 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci zip2 t5.2d, t4.2d, t3.2d 1318c2ecf20Sopenharmony_ci zip1 t3.2d, t4.2d, t3.2d 1328c2ecf20Sopenharmony_ci zip2 t9.2d, t6.2d, t7.2d 1338c2ecf20Sopenharmony_ci zip1 t7.2d, t6.2d, t7.2d 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci ext t3.16b, t3.16b, t3.16b, #15 1368c2ecf20Sopenharmony_ci ext t5.16b, t5.16b, t5.16b, #14 1378c2ecf20Sopenharmony_ci ext t7.16b, t7.16b, t7.16b, #13 1388c2ecf20Sopenharmony_ci ext t9.16b, t9.16b, t9.16b, #12 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci eor t3.16b, t3.16b, t5.16b 1418c2ecf20Sopenharmony_ci eor t7.16b, t7.16b, t9.16b 1428c2ecf20Sopenharmony_ci eor \rq\().16b, \rq\().16b, t3.16b 1438c2ecf20Sopenharmony_ci eor \rq\().16b, \rq\().16b, t7.16b 1448c2ecf20Sopenharmony_ci .endm 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci .macro __pmull_pre_p64 1478c2ecf20Sopenharmony_ci add x8, x3, #16 1488c2ecf20Sopenharmony_ci ld1 {HH.2d-HH4.2d}, [x8] 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci trn1 SHASH2.2d, SHASH.2d, HH.2d 1518c2ecf20Sopenharmony_ci trn2 T1.2d, SHASH.2d, HH.2d 1528c2ecf20Sopenharmony_ci eor SHASH2.16b, SHASH2.16b, T1.16b 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci trn1 HH34.2d, HH3.2d, HH4.2d 1558c2ecf20Sopenharmony_ci trn2 T1.2d, HH3.2d, HH4.2d 1568c2ecf20Sopenharmony_ci eor HH34.16b, HH34.16b, T1.16b 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci movi MASK.16b, #0xe1 1598c2ecf20Sopenharmony_ci shl MASK.2d, MASK.2d, #57 1608c2ecf20Sopenharmony_ci .endm 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci .macro __pmull_pre_p8 1638c2ecf20Sopenharmony_ci ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 1648c2ecf20Sopenharmony_ci eor SHASH2.16b, SHASH2.16b, SHASH.16b 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci // k00_16 := 0x0000000000000000_000000000000ffff 1678c2ecf20Sopenharmony_ci // k32_48 := 0x00000000ffffffff_0000ffffffffffff 1688c2ecf20Sopenharmony_ci movi k32_48.2d, #0xffffffff 1698c2ecf20Sopenharmony_ci mov k32_48.h[2], k32_48.h[0] 1708c2ecf20Sopenharmony_ci ushr k00_16.2d, k32_48.2d, #32 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci // prepare the permutation vectors 1738c2ecf20Sopenharmony_ci mov_q x5, 0x080f0e0d0c0b0a09 1748c2ecf20Sopenharmony_ci movi T1.8b, #8 1758c2ecf20Sopenharmony_ci dup perm1.2d, x5 1768c2ecf20Sopenharmony_ci eor perm1.16b, perm1.16b, T1.16b 1778c2ecf20Sopenharmony_ci ushr perm2.2d, perm1.2d, #8 1788c2ecf20Sopenharmony_ci ushr perm3.2d, perm1.2d, #16 1798c2ecf20Sopenharmony_ci ushr T1.2d, perm1.2d, #24 1808c2ecf20Sopenharmony_ci sli perm2.2d, perm1.2d, #56 1818c2ecf20Sopenharmony_ci sli perm3.2d, perm1.2d, #48 1828c2ecf20Sopenharmony_ci sli T1.2d, perm1.2d, #40 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci // precompute loop invariants 1858c2ecf20Sopenharmony_ci tbl sh1.16b, {SHASH.16b}, perm1.16b 1868c2ecf20Sopenharmony_ci tbl sh2.16b, {SHASH.16b}, perm2.16b 1878c2ecf20Sopenharmony_ci tbl sh3.16b, {SHASH.16b}, perm3.16b 1888c2ecf20Sopenharmony_ci tbl sh4.16b, {SHASH.16b}, T1.16b 1898c2ecf20Sopenharmony_ci ext ss1.8b, SHASH2.8b, SHASH2.8b, #1 1908c2ecf20Sopenharmony_ci ext ss2.8b, SHASH2.8b, SHASH2.8b, #2 1918c2ecf20Sopenharmony_ci ext ss3.8b, SHASH2.8b, SHASH2.8b, #3 1928c2ecf20Sopenharmony_ci ext ss4.8b, SHASH2.8b, SHASH2.8b, #4 1938c2ecf20Sopenharmony_ci .endm 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci // 1968c2ecf20Sopenharmony_ci // PMULL (64x64->128) based reduction for CPUs that can do 1978c2ecf20Sopenharmony_ci // it in a single instruction. 1988c2ecf20Sopenharmony_ci // 1998c2ecf20Sopenharmony_ci .macro __pmull_reduce_p64 2008c2ecf20Sopenharmony_ci pmull T2.1q, XL.1d, MASK.1d 2018c2ecf20Sopenharmony_ci eor XM.16b, XM.16b, T1.16b 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci mov XH.d[0], XM.d[1] 2048c2ecf20Sopenharmony_ci mov XM.d[1], XL.d[0] 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci eor XL.16b, XM.16b, T2.16b 2078c2ecf20Sopenharmony_ci ext T2.16b, XL.16b, XL.16b, #8 2088c2ecf20Sopenharmony_ci pmull XL.1q, XL.1d, MASK.1d 2098c2ecf20Sopenharmony_ci .endm 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci // 2128c2ecf20Sopenharmony_ci // Alternative reduction for CPUs that lack support for the 2138c2ecf20Sopenharmony_ci // 64x64->128 PMULL instruction 2148c2ecf20Sopenharmony_ci // 2158c2ecf20Sopenharmony_ci .macro __pmull_reduce_p8 2168c2ecf20Sopenharmony_ci eor XM.16b, XM.16b, T1.16b 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci mov XL.d[1], XM.d[0] 2198c2ecf20Sopenharmony_ci mov XH.d[0], XM.d[1] 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci shl T1.2d, XL.2d, #57 2228c2ecf20Sopenharmony_ci shl T2.2d, XL.2d, #62 2238c2ecf20Sopenharmony_ci eor T2.16b, T2.16b, T1.16b 2248c2ecf20Sopenharmony_ci shl T1.2d, XL.2d, #63 2258c2ecf20Sopenharmony_ci eor T2.16b, T2.16b, T1.16b 2268c2ecf20Sopenharmony_ci ext T1.16b, XL.16b, XH.16b, #8 2278c2ecf20Sopenharmony_ci eor T2.16b, T2.16b, T1.16b 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci mov XL.d[1], T2.d[0] 2308c2ecf20Sopenharmony_ci mov XH.d[0], T2.d[1] 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci ushr T2.2d, XL.2d, #1 2338c2ecf20Sopenharmony_ci eor XH.16b, XH.16b, XL.16b 2348c2ecf20Sopenharmony_ci eor XL.16b, XL.16b, T2.16b 2358c2ecf20Sopenharmony_ci ushr T2.2d, T2.2d, #6 2368c2ecf20Sopenharmony_ci ushr XL.2d, XL.2d, #1 2378c2ecf20Sopenharmony_ci .endm 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci .macro __pmull_ghash, pn 2408c2ecf20Sopenharmony_ci ld1 {SHASH.2d}, [x3] 2418c2ecf20Sopenharmony_ci ld1 {XL.2d}, [x1] 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci __pmull_pre_\pn 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci /* do the head block first, if supplied */ 2468c2ecf20Sopenharmony_ci cbz x4, 0f 2478c2ecf20Sopenharmony_ci ld1 {T1.2d}, [x4] 2488c2ecf20Sopenharmony_ci mov x4, xzr 2498c2ecf20Sopenharmony_ci b 3f 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci0: .ifc \pn, p64 2528c2ecf20Sopenharmony_ci tbnz w0, #0, 2f // skip until #blocks is a 2538c2ecf20Sopenharmony_ci tbnz w0, #1, 2f // round multiple of 4 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci1: ld1 {XM3.16b-TT4.16b}, [x2], #64 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci sub w0, w0, #4 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci rev64 T1.16b, XM3.16b 2608c2ecf20Sopenharmony_ci rev64 T2.16b, XH3.16b 2618c2ecf20Sopenharmony_ci rev64 TT4.16b, TT4.16b 2628c2ecf20Sopenharmony_ci rev64 TT3.16b, TT3.16b 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci ext IN1.16b, TT4.16b, TT4.16b, #8 2658c2ecf20Sopenharmony_ci ext XL3.16b, TT3.16b, TT3.16b, #8 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci eor TT4.16b, TT4.16b, IN1.16b 2688c2ecf20Sopenharmony_ci pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1 2698c2ecf20Sopenharmony_ci pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0 2708c2ecf20Sopenharmony_ci pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0) 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci eor TT3.16b, TT3.16b, XL3.16b 2738c2ecf20Sopenharmony_ci pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1 2748c2ecf20Sopenharmony_ci pmull XL3.1q, HH.1d, XL3.1d // a0 * b0 2758c2ecf20Sopenharmony_ci pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0) 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci ext IN1.16b, T2.16b, T2.16b, #8 2788c2ecf20Sopenharmony_ci eor XL2.16b, XL2.16b, XL3.16b 2798c2ecf20Sopenharmony_ci eor XH2.16b, XH2.16b, XH3.16b 2808c2ecf20Sopenharmony_ci eor XM2.16b, XM2.16b, XM3.16b 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci eor T2.16b, T2.16b, IN1.16b 2838c2ecf20Sopenharmony_ci pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1 2848c2ecf20Sopenharmony_ci pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0 2858c2ecf20Sopenharmony_ci pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0) 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci eor XL2.16b, XL2.16b, XL3.16b 2888c2ecf20Sopenharmony_ci eor XH2.16b, XH2.16b, XH3.16b 2898c2ecf20Sopenharmony_ci eor XM2.16b, XM2.16b, XM3.16b 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci ext IN1.16b, T1.16b, T1.16b, #8 2928c2ecf20Sopenharmony_ci ext TT3.16b, XL.16b, XL.16b, #8 2938c2ecf20Sopenharmony_ci eor XL.16b, XL.16b, IN1.16b 2948c2ecf20Sopenharmony_ci eor T1.16b, T1.16b, TT3.16b 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1 2978c2ecf20Sopenharmony_ci eor T1.16b, T1.16b, XL.16b 2988c2ecf20Sopenharmony_ci pmull XL.1q, HH4.1d, XL.1d // a0 * b0 2998c2ecf20Sopenharmony_ci pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0) 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci eor XL.16b, XL.16b, XL2.16b 3028c2ecf20Sopenharmony_ci eor XH.16b, XH.16b, XH2.16b 3038c2ecf20Sopenharmony_ci eor XM.16b, XM.16b, XM2.16b 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci eor T2.16b, XL.16b, XH.16b 3068c2ecf20Sopenharmony_ci ext T1.16b, XL.16b, XH.16b, #8 3078c2ecf20Sopenharmony_ci eor XM.16b, XM.16b, T2.16b 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci __pmull_reduce_p64 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci eor T2.16b, T2.16b, XH.16b 3128c2ecf20Sopenharmony_ci eor XL.16b, XL.16b, T2.16b 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci cbz w0, 5f 3158c2ecf20Sopenharmony_ci b 1b 3168c2ecf20Sopenharmony_ci .endif 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci2: ld1 {T1.2d}, [x2], #16 3198c2ecf20Sopenharmony_ci sub w0, w0, #1 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci3: /* multiply XL by SHASH in GF(2^128) */ 3228c2ecf20Sopenharmony_ciCPU_LE( rev64 T1.16b, T1.16b ) 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci ext T2.16b, XL.16b, XL.16b, #8 3258c2ecf20Sopenharmony_ci ext IN1.16b, T1.16b, T1.16b, #8 3268c2ecf20Sopenharmony_ci eor T1.16b, T1.16b, T2.16b 3278c2ecf20Sopenharmony_ci eor XL.16b, XL.16b, IN1.16b 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci __pmull2_\pn XH, XL, SHASH // a1 * b1 3308c2ecf20Sopenharmony_ci eor T1.16b, T1.16b, XL.16b 3318c2ecf20Sopenharmony_ci __pmull_\pn XL, XL, SHASH // a0 * b0 3328c2ecf20Sopenharmony_ci __pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0) 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci4: eor T2.16b, XL.16b, XH.16b 3358c2ecf20Sopenharmony_ci ext T1.16b, XL.16b, XH.16b, #8 3368c2ecf20Sopenharmony_ci eor XM.16b, XM.16b, T2.16b 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci __pmull_reduce_\pn 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci eor T2.16b, T2.16b, XH.16b 3418c2ecf20Sopenharmony_ci eor XL.16b, XL.16b, T2.16b 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci cbnz w0, 0b 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci5: st1 {XL.2d}, [x1] 3468c2ecf20Sopenharmony_ci ret 3478c2ecf20Sopenharmony_ci .endm 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci /* 3508c2ecf20Sopenharmony_ci * void pmull_ghash_update(int blocks, u64 dg[], const char *src, 3518c2ecf20Sopenharmony_ci * struct ghash_key const *k, const char *head) 3528c2ecf20Sopenharmony_ci */ 3538c2ecf20Sopenharmony_ciSYM_FUNC_START(pmull_ghash_update_p64) 3548c2ecf20Sopenharmony_ci __pmull_ghash p64 3558c2ecf20Sopenharmony_ciSYM_FUNC_END(pmull_ghash_update_p64) 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ciSYM_FUNC_START(pmull_ghash_update_p8) 3588c2ecf20Sopenharmony_ci __pmull_ghash p8 3598c2ecf20Sopenharmony_ciSYM_FUNC_END(pmull_ghash_update_p8) 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci KS0 .req v8 3628c2ecf20Sopenharmony_ci KS1 .req v9 3638c2ecf20Sopenharmony_ci KS2 .req v10 3648c2ecf20Sopenharmony_ci KS3 .req v11 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci INP0 .req v21 3678c2ecf20Sopenharmony_ci INP1 .req v22 3688c2ecf20Sopenharmony_ci INP2 .req v23 3698c2ecf20Sopenharmony_ci INP3 .req v24 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci K0 .req v25 3728c2ecf20Sopenharmony_ci K1 .req v26 3738c2ecf20Sopenharmony_ci K2 .req v27 3748c2ecf20Sopenharmony_ci K3 .req v28 3758c2ecf20Sopenharmony_ci K4 .req v12 3768c2ecf20Sopenharmony_ci K5 .req v13 3778c2ecf20Sopenharmony_ci K6 .req v4 3788c2ecf20Sopenharmony_ci K7 .req v5 3798c2ecf20Sopenharmony_ci K8 .req v14 3808c2ecf20Sopenharmony_ci K9 .req v15 3818c2ecf20Sopenharmony_ci KK .req v29 3828c2ecf20Sopenharmony_ci KL .req v30 3838c2ecf20Sopenharmony_ci KM .req v31 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci .macro load_round_keys, rounds, rk, tmp 3868c2ecf20Sopenharmony_ci add \tmp, \rk, #64 3878c2ecf20Sopenharmony_ci ld1 {K0.4s-K3.4s}, [\rk] 3888c2ecf20Sopenharmony_ci ld1 {K4.4s-K5.4s}, [\tmp] 3898c2ecf20Sopenharmony_ci add \tmp, \rk, \rounds, lsl #4 3908c2ecf20Sopenharmony_ci sub \tmp, \tmp, #32 3918c2ecf20Sopenharmony_ci ld1 {KK.4s-KM.4s}, [\tmp] 3928c2ecf20Sopenharmony_ci .endm 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci .macro enc_round, state, key 3958c2ecf20Sopenharmony_ci aese \state\().16b, \key\().16b 3968c2ecf20Sopenharmony_ci aesmc \state\().16b, \state\().16b 3978c2ecf20Sopenharmony_ci .endm 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci .macro enc_qround, s0, s1, s2, s3, key 4008c2ecf20Sopenharmony_ci enc_round \s0, \key 4018c2ecf20Sopenharmony_ci enc_round \s1, \key 4028c2ecf20Sopenharmony_ci enc_round \s2, \key 4038c2ecf20Sopenharmony_ci enc_round \s3, \key 4048c2ecf20Sopenharmony_ci .endm 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci .macro enc_block, state, rounds, rk, tmp 4078c2ecf20Sopenharmony_ci add \tmp, \rk, #96 4088c2ecf20Sopenharmony_ci ld1 {K6.4s-K7.4s}, [\tmp], #32 4098c2ecf20Sopenharmony_ci .irp key, K0, K1, K2, K3, K4 K5 4108c2ecf20Sopenharmony_ci enc_round \state, \key 4118c2ecf20Sopenharmony_ci .endr 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci tbnz \rounds, #2, .Lnot128_\@ 4148c2ecf20Sopenharmony_ci.Lout256_\@: 4158c2ecf20Sopenharmony_ci enc_round \state, K6 4168c2ecf20Sopenharmony_ci enc_round \state, K7 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci.Lout192_\@: 4198c2ecf20Sopenharmony_ci enc_round \state, KK 4208c2ecf20Sopenharmony_ci aese \state\().16b, KL.16b 4218c2ecf20Sopenharmony_ci eor \state\().16b, \state\().16b, KM.16b 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci .subsection 1 4248c2ecf20Sopenharmony_ci.Lnot128_\@: 4258c2ecf20Sopenharmony_ci ld1 {K8.4s-K9.4s}, [\tmp], #32 4268c2ecf20Sopenharmony_ci enc_round \state, K6 4278c2ecf20Sopenharmony_ci enc_round \state, K7 4288c2ecf20Sopenharmony_ci ld1 {K6.4s-K7.4s}, [\tmp] 4298c2ecf20Sopenharmony_ci enc_round \state, K8 4308c2ecf20Sopenharmony_ci enc_round \state, K9 4318c2ecf20Sopenharmony_ci tbz \rounds, #1, .Lout192_\@ 4328c2ecf20Sopenharmony_ci b .Lout256_\@ 4338c2ecf20Sopenharmony_ci .previous 4348c2ecf20Sopenharmony_ci .endm 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci .align 6 4378c2ecf20Sopenharmony_ci .macro pmull_gcm_do_crypt, enc 4388c2ecf20Sopenharmony_ci stp x29, x30, [sp, #-32]! 4398c2ecf20Sopenharmony_ci mov x29, sp 4408c2ecf20Sopenharmony_ci str x19, [sp, #24] 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci load_round_keys x7, x6, x8 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci ld1 {SHASH.2d}, [x3], #16 4458c2ecf20Sopenharmony_ci ld1 {HH.2d-HH4.2d}, [x3] 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci trn1 SHASH2.2d, SHASH.2d, HH.2d 4488c2ecf20Sopenharmony_ci trn2 T1.2d, SHASH.2d, HH.2d 4498c2ecf20Sopenharmony_ci eor SHASH2.16b, SHASH2.16b, T1.16b 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci trn1 HH34.2d, HH3.2d, HH4.2d 4528c2ecf20Sopenharmony_ci trn2 T1.2d, HH3.2d, HH4.2d 4538c2ecf20Sopenharmony_ci eor HH34.16b, HH34.16b, T1.16b 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_ci ld1 {XL.2d}, [x4] 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci cbz x0, 3f // tag only? 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci ldr w8, [x5, #12] // load lower counter 4608c2ecf20Sopenharmony_ciCPU_LE( rev w8, w8 ) 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci0: mov w9, #4 // max blocks per round 4638c2ecf20Sopenharmony_ci add x10, x0, #0xf 4648c2ecf20Sopenharmony_ci lsr x10, x10, #4 // remaining blocks 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ci subs x0, x0, #64 4678c2ecf20Sopenharmony_ci csel w9, w10, w9, mi 4688c2ecf20Sopenharmony_ci add w8, w8, w9 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci bmi 1f 4718c2ecf20Sopenharmony_ci ld1 {INP0.16b-INP3.16b}, [x2], #64 4728c2ecf20Sopenharmony_ci .subsection 1 4738c2ecf20Sopenharmony_ci /* 4748c2ecf20Sopenharmony_ci * Populate the four input registers right to left with up to 63 bytes 4758c2ecf20Sopenharmony_ci * of data, using overlapping loads to avoid branches. 4768c2ecf20Sopenharmony_ci * 4778c2ecf20Sopenharmony_ci * INP0 INP1 INP2 INP3 4788c2ecf20Sopenharmony_ci * 1 byte | | | |x | 4798c2ecf20Sopenharmony_ci * 16 bytes | | | |xxxxxxxx| 4808c2ecf20Sopenharmony_ci * 17 bytes | | |xxxxxxxx|x | 4818c2ecf20Sopenharmony_ci * 47 bytes | |xxxxxxxx|xxxxxxxx|xxxxxxx | 4828c2ecf20Sopenharmony_ci * etc etc 4838c2ecf20Sopenharmony_ci * 4848c2ecf20Sopenharmony_ci * Note that this code may read up to 15 bytes before the start of 4858c2ecf20Sopenharmony_ci * the input. It is up to the calling code to ensure this is safe if 4868c2ecf20Sopenharmony_ci * this happens in the first iteration of the loop (i.e., when the 4878c2ecf20Sopenharmony_ci * input size is < 16 bytes) 4888c2ecf20Sopenharmony_ci */ 4898c2ecf20Sopenharmony_ci1: mov x15, #16 4908c2ecf20Sopenharmony_ci ands x19, x0, #0xf 4918c2ecf20Sopenharmony_ci csel x19, x19, x15, ne 4928c2ecf20Sopenharmony_ci adr_l x17, .Lpermute_table + 16 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci sub x11, x15, x19 4958c2ecf20Sopenharmony_ci add x12, x17, x11 4968c2ecf20Sopenharmony_ci sub x17, x17, x11 4978c2ecf20Sopenharmony_ci ld1 {T1.16b}, [x12] 4988c2ecf20Sopenharmony_ci sub x10, x1, x11 4998c2ecf20Sopenharmony_ci sub x11, x2, x11 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci cmp x0, #-16 5028c2ecf20Sopenharmony_ci csel x14, x15, xzr, gt 5038c2ecf20Sopenharmony_ci cmp x0, #-32 5048c2ecf20Sopenharmony_ci csel x15, x15, xzr, gt 5058c2ecf20Sopenharmony_ci cmp x0, #-48 5068c2ecf20Sopenharmony_ci csel x16, x19, xzr, gt 5078c2ecf20Sopenharmony_ci csel x1, x1, x10, gt 5088c2ecf20Sopenharmony_ci csel x2, x2, x11, gt 5098c2ecf20Sopenharmony_ci 5108c2ecf20Sopenharmony_ci ld1 {INP0.16b}, [x2], x14 5118c2ecf20Sopenharmony_ci ld1 {INP1.16b}, [x2], x15 5128c2ecf20Sopenharmony_ci ld1 {INP2.16b}, [x2], x16 5138c2ecf20Sopenharmony_ci ld1 {INP3.16b}, [x2] 5148c2ecf20Sopenharmony_ci tbl INP3.16b, {INP3.16b}, T1.16b 5158c2ecf20Sopenharmony_ci b 2f 5168c2ecf20Sopenharmony_ci .previous 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ci2: .if \enc == 0 5198c2ecf20Sopenharmony_ci bl pmull_gcm_ghash_4x 5208c2ecf20Sopenharmony_ci .endif 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci bl pmull_gcm_enc_4x 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci tbnz x0, #63, 6f 5258c2ecf20Sopenharmony_ci st1 {INP0.16b-INP3.16b}, [x1], #64 5268c2ecf20Sopenharmony_ci .if \enc == 1 5278c2ecf20Sopenharmony_ci bl pmull_gcm_ghash_4x 5288c2ecf20Sopenharmony_ci .endif 5298c2ecf20Sopenharmony_ci bne 0b 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci3: ldp x19, x10, [sp, #24] 5328c2ecf20Sopenharmony_ci cbz x10, 5f // output tag? 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci ld1 {INP3.16b}, [x10] // load lengths[] 5358c2ecf20Sopenharmony_ci mov w9, #1 5368c2ecf20Sopenharmony_ci bl pmull_gcm_ghash_4x 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci mov w11, #(0x1 << 24) // BE '1U' 5398c2ecf20Sopenharmony_ci ld1 {KS0.16b}, [x5] 5408c2ecf20Sopenharmony_ci mov KS0.s[3], w11 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci enc_block KS0, x7, x6, x12 5438c2ecf20Sopenharmony_ci 5448c2ecf20Sopenharmony_ci ext XL.16b, XL.16b, XL.16b, #8 5458c2ecf20Sopenharmony_ci rev64 XL.16b, XL.16b 5468c2ecf20Sopenharmony_ci eor XL.16b, XL.16b, KS0.16b 5478c2ecf20Sopenharmony_ci st1 {XL.16b}, [x10] // store tag 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci4: ldp x29, x30, [sp], #32 5508c2ecf20Sopenharmony_ci ret 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_ci5: 5538c2ecf20Sopenharmony_ciCPU_LE( rev w8, w8 ) 5548c2ecf20Sopenharmony_ci str w8, [x5, #12] // store lower counter 5558c2ecf20Sopenharmony_ci st1 {XL.2d}, [x4] 5568c2ecf20Sopenharmony_ci b 4b 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_ci6: ld1 {T1.16b-T2.16b}, [x17], #32 // permute vectors 5598c2ecf20Sopenharmony_ci sub x17, x17, x19, lsl #1 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci cmp w9, #1 5628c2ecf20Sopenharmony_ci beq 7f 5638c2ecf20Sopenharmony_ci .subsection 1 5648c2ecf20Sopenharmony_ci7: ld1 {INP2.16b}, [x1] 5658c2ecf20Sopenharmony_ci tbx INP2.16b, {INP3.16b}, T1.16b 5668c2ecf20Sopenharmony_ci mov INP3.16b, INP2.16b 5678c2ecf20Sopenharmony_ci b 8f 5688c2ecf20Sopenharmony_ci .previous 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci st1 {INP0.16b}, [x1], x14 5718c2ecf20Sopenharmony_ci st1 {INP1.16b}, [x1], x15 5728c2ecf20Sopenharmony_ci st1 {INP2.16b}, [x1], x16 5738c2ecf20Sopenharmony_ci tbl INP3.16b, {INP3.16b}, T1.16b 5748c2ecf20Sopenharmony_ci tbx INP3.16b, {INP2.16b}, T2.16b 5758c2ecf20Sopenharmony_ci8: st1 {INP3.16b}, [x1] 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci .if \enc == 1 5788c2ecf20Sopenharmony_ci ld1 {T1.16b}, [x17] 5798c2ecf20Sopenharmony_ci tbl INP3.16b, {INP3.16b}, T1.16b // clear non-data bits 5808c2ecf20Sopenharmony_ci bl pmull_gcm_ghash_4x 5818c2ecf20Sopenharmony_ci .endif 5828c2ecf20Sopenharmony_ci b 3b 5838c2ecf20Sopenharmony_ci .endm 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci /* 5868c2ecf20Sopenharmony_ci * void pmull_gcm_encrypt(int blocks, u8 dst[], const u8 src[], 5878c2ecf20Sopenharmony_ci * struct ghash_key const *k, u64 dg[], u8 ctr[], 5888c2ecf20Sopenharmony_ci * int rounds, u8 tag) 5898c2ecf20Sopenharmony_ci */ 5908c2ecf20Sopenharmony_ciSYM_FUNC_START(pmull_gcm_encrypt) 5918c2ecf20Sopenharmony_ci pmull_gcm_do_crypt 1 5928c2ecf20Sopenharmony_ciSYM_FUNC_END(pmull_gcm_encrypt) 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci /* 5958c2ecf20Sopenharmony_ci * void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[], 5968c2ecf20Sopenharmony_ci * struct ghash_key const *k, u64 dg[], u8 ctr[], 5978c2ecf20Sopenharmony_ci * int rounds, u8 tag) 5988c2ecf20Sopenharmony_ci */ 5998c2ecf20Sopenharmony_ciSYM_FUNC_START(pmull_gcm_decrypt) 6008c2ecf20Sopenharmony_ci pmull_gcm_do_crypt 0 6018c2ecf20Sopenharmony_ciSYM_FUNC_END(pmull_gcm_decrypt) 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x) 6048c2ecf20Sopenharmony_ci movi MASK.16b, #0xe1 6058c2ecf20Sopenharmony_ci shl MASK.2d, MASK.2d, #57 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci rev64 T1.16b, INP0.16b 6088c2ecf20Sopenharmony_ci rev64 T2.16b, INP1.16b 6098c2ecf20Sopenharmony_ci rev64 TT3.16b, INP2.16b 6108c2ecf20Sopenharmony_ci rev64 TT4.16b, INP3.16b 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci ext XL.16b, XL.16b, XL.16b, #8 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci tbz w9, #2, 0f // <4 blocks? 6158c2ecf20Sopenharmony_ci .subsection 1 6168c2ecf20Sopenharmony_ci0: movi XH2.16b, #0 6178c2ecf20Sopenharmony_ci movi XM2.16b, #0 6188c2ecf20Sopenharmony_ci movi XL2.16b, #0 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci tbz w9, #0, 1f // 2 blocks? 6218c2ecf20Sopenharmony_ci tbz w9, #1, 2f // 1 block? 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci eor T2.16b, T2.16b, XL.16b 6248c2ecf20Sopenharmony_ci ext T1.16b, T2.16b, T2.16b, #8 6258c2ecf20Sopenharmony_ci b .Lgh3 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci1: eor TT3.16b, TT3.16b, XL.16b 6288c2ecf20Sopenharmony_ci ext T2.16b, TT3.16b, TT3.16b, #8 6298c2ecf20Sopenharmony_ci b .Lgh2 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci2: eor TT4.16b, TT4.16b, XL.16b 6328c2ecf20Sopenharmony_ci ext IN1.16b, TT4.16b, TT4.16b, #8 6338c2ecf20Sopenharmony_ci b .Lgh1 6348c2ecf20Sopenharmony_ci .previous 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci eor T1.16b, T1.16b, XL.16b 6378c2ecf20Sopenharmony_ci ext IN1.16b, T1.16b, T1.16b, #8 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ci pmull2 XH2.1q, HH4.2d, IN1.2d // a1 * b1 6408c2ecf20Sopenharmony_ci eor T1.16b, T1.16b, IN1.16b 6418c2ecf20Sopenharmony_ci pmull XL2.1q, HH4.1d, IN1.1d // a0 * b0 6428c2ecf20Sopenharmony_ci pmull2 XM2.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0) 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_ci ext T1.16b, T2.16b, T2.16b, #8 6458c2ecf20Sopenharmony_ci.Lgh3: eor T2.16b, T2.16b, T1.16b 6468c2ecf20Sopenharmony_ci pmull2 XH.1q, HH3.2d, T1.2d // a1 * b1 6478c2ecf20Sopenharmony_ci pmull XL.1q, HH3.1d, T1.1d // a0 * b0 6488c2ecf20Sopenharmony_ci pmull XM.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0) 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci eor XH2.16b, XH2.16b, XH.16b 6518c2ecf20Sopenharmony_ci eor XL2.16b, XL2.16b, XL.16b 6528c2ecf20Sopenharmony_ci eor XM2.16b, XM2.16b, XM.16b 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_ci ext T2.16b, TT3.16b, TT3.16b, #8 6558c2ecf20Sopenharmony_ci.Lgh2: eor TT3.16b, TT3.16b, T2.16b 6568c2ecf20Sopenharmony_ci pmull2 XH.1q, HH.2d, T2.2d // a1 * b1 6578c2ecf20Sopenharmony_ci pmull XL.1q, HH.1d, T2.1d // a0 * b0 6588c2ecf20Sopenharmony_ci pmull2 XM.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0) 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci eor XH2.16b, XH2.16b, XH.16b 6618c2ecf20Sopenharmony_ci eor XL2.16b, XL2.16b, XL.16b 6628c2ecf20Sopenharmony_ci eor XM2.16b, XM2.16b, XM.16b 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci ext IN1.16b, TT4.16b, TT4.16b, #8 6658c2ecf20Sopenharmony_ci.Lgh1: eor TT4.16b, TT4.16b, IN1.16b 6668c2ecf20Sopenharmony_ci pmull XL.1q, SHASH.1d, IN1.1d // a0 * b0 6678c2ecf20Sopenharmony_ci pmull2 XH.1q, SHASH.2d, IN1.2d // a1 * b1 6688c2ecf20Sopenharmony_ci pmull XM.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0) 6698c2ecf20Sopenharmony_ci 6708c2ecf20Sopenharmony_ci eor XH.16b, XH.16b, XH2.16b 6718c2ecf20Sopenharmony_ci eor XL.16b, XL.16b, XL2.16b 6728c2ecf20Sopenharmony_ci eor XM.16b, XM.16b, XM2.16b 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci eor T2.16b, XL.16b, XH.16b 6758c2ecf20Sopenharmony_ci ext T1.16b, XL.16b, XH.16b, #8 6768c2ecf20Sopenharmony_ci eor XM.16b, XM.16b, T2.16b 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci __pmull_reduce_p64 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_ci eor T2.16b, T2.16b, XH.16b 6818c2ecf20Sopenharmony_ci eor XL.16b, XL.16b, T2.16b 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci ret 6848c2ecf20Sopenharmony_ciSYM_FUNC_END(pmull_gcm_ghash_4x) 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(pmull_gcm_enc_4x) 6878c2ecf20Sopenharmony_ci ld1 {KS0.16b}, [x5] // load upper counter 6888c2ecf20Sopenharmony_ci sub w10, w8, #4 6898c2ecf20Sopenharmony_ci sub w11, w8, #3 6908c2ecf20Sopenharmony_ci sub w12, w8, #2 6918c2ecf20Sopenharmony_ci sub w13, w8, #1 6928c2ecf20Sopenharmony_ci rev w10, w10 6938c2ecf20Sopenharmony_ci rev w11, w11 6948c2ecf20Sopenharmony_ci rev w12, w12 6958c2ecf20Sopenharmony_ci rev w13, w13 6968c2ecf20Sopenharmony_ci mov KS1.16b, KS0.16b 6978c2ecf20Sopenharmony_ci mov KS2.16b, KS0.16b 6988c2ecf20Sopenharmony_ci mov KS3.16b, KS0.16b 6998c2ecf20Sopenharmony_ci ins KS0.s[3], w10 // set lower counter 7008c2ecf20Sopenharmony_ci ins KS1.s[3], w11 7018c2ecf20Sopenharmony_ci ins KS2.s[3], w12 7028c2ecf20Sopenharmony_ci ins KS3.s[3], w13 7038c2ecf20Sopenharmony_ci 7048c2ecf20Sopenharmony_ci add x10, x6, #96 // round key pointer 7058c2ecf20Sopenharmony_ci ld1 {K6.4s-K7.4s}, [x10], #32 7068c2ecf20Sopenharmony_ci .irp key, K0, K1, K2, K3, K4, K5 7078c2ecf20Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, \key 7088c2ecf20Sopenharmony_ci .endr 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci tbnz x7, #2, .Lnot128 7118c2ecf20Sopenharmony_ci .subsection 1 7128c2ecf20Sopenharmony_ci.Lnot128: 7138c2ecf20Sopenharmony_ci ld1 {K8.4s-K9.4s}, [x10], #32 7148c2ecf20Sopenharmony_ci .irp key, K6, K7 7158c2ecf20Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, \key 7168c2ecf20Sopenharmony_ci .endr 7178c2ecf20Sopenharmony_ci ld1 {K6.4s-K7.4s}, [x10] 7188c2ecf20Sopenharmony_ci .irp key, K8, K9 7198c2ecf20Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, \key 7208c2ecf20Sopenharmony_ci .endr 7218c2ecf20Sopenharmony_ci tbz x7, #1, .Lout192 7228c2ecf20Sopenharmony_ci b .Lout256 7238c2ecf20Sopenharmony_ci .previous 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci.Lout256: 7268c2ecf20Sopenharmony_ci .irp key, K6, K7 7278c2ecf20Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, \key 7288c2ecf20Sopenharmony_ci .endr 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci.Lout192: 7318c2ecf20Sopenharmony_ci enc_qround KS0, KS1, KS2, KS3, KK 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_ci aese KS0.16b, KL.16b 7348c2ecf20Sopenharmony_ci aese KS1.16b, KL.16b 7358c2ecf20Sopenharmony_ci aese KS2.16b, KL.16b 7368c2ecf20Sopenharmony_ci aese KS3.16b, KL.16b 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci eor KS0.16b, KS0.16b, KM.16b 7398c2ecf20Sopenharmony_ci eor KS1.16b, KS1.16b, KM.16b 7408c2ecf20Sopenharmony_ci eor KS2.16b, KS2.16b, KM.16b 7418c2ecf20Sopenharmony_ci eor KS3.16b, KS3.16b, KM.16b 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_ci eor INP0.16b, INP0.16b, KS0.16b 7448c2ecf20Sopenharmony_ci eor INP1.16b, INP1.16b, KS1.16b 7458c2ecf20Sopenharmony_ci eor INP2.16b, INP2.16b, KS2.16b 7468c2ecf20Sopenharmony_ci eor INP3.16b, INP3.16b, KS3.16b 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_ci ret 7498c2ecf20Sopenharmony_ciSYM_FUNC_END(pmull_gcm_enc_4x) 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_ci .section ".rodata", "a" 7528c2ecf20Sopenharmony_ci .align 6 7538c2ecf20Sopenharmony_ci.Lpermute_table: 7548c2ecf20Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 7558c2ecf20Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 7568c2ecf20Sopenharmony_ci .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 7578c2ecf20Sopenharmony_ci .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 7588c2ecf20Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 7598c2ecf20Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 7608c2ecf20Sopenharmony_ci .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 7618c2ecf20Sopenharmony_ci .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 7628c2ecf20Sopenharmony_ci .previous 763