162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * sm3-neon-core.S - SM3 secure hash using NEON instructions 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Linux/arm64 port of the libgcrypt SM3 implementation for AArch64 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi> 862306a36Sopenharmony_ci * Copyright (c) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/linkage.h> 1262306a36Sopenharmony_ci#include <linux/cfi_types.h> 1362306a36Sopenharmony_ci#include <asm/assembler.h> 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci/* Context structure */ 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#define state_h0 0 1862306a36Sopenharmony_ci#define state_h1 4 1962306a36Sopenharmony_ci#define state_h2 8 2062306a36Sopenharmony_ci#define state_h3 12 2162306a36Sopenharmony_ci#define state_h4 16 2262306a36Sopenharmony_ci#define state_h5 20 2362306a36Sopenharmony_ci#define state_h6 24 2462306a36Sopenharmony_ci#define state_h7 28 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* Stack structure */ 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#define STACK_W_SIZE (32 * 2 * 3) 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci#define STACK_W (0) 3162306a36Sopenharmony_ci#define STACK_SIZE (STACK_W + STACK_W_SIZE) 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci/* Register macros */ 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci#define RSTATE x0 3662306a36Sopenharmony_ci#define RDATA x1 3762306a36Sopenharmony_ci#define RNBLKS x2 3862306a36Sopenharmony_ci#define RKPTR x28 3962306a36Sopenharmony_ci#define RFRAME x29 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#define ra w3 4262306a36Sopenharmony_ci#define rb w4 4362306a36Sopenharmony_ci#define rc w5 4462306a36Sopenharmony_ci#define rd w6 4562306a36Sopenharmony_ci#define re w7 4662306a36Sopenharmony_ci#define rf w8 4762306a36Sopenharmony_ci#define rg w9 4862306a36Sopenharmony_ci#define rh w10 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci#define t0 w11 5162306a36Sopenharmony_ci#define t1 w12 5262306a36Sopenharmony_ci#define t2 w13 5362306a36Sopenharmony_ci#define t3 w14 5462306a36Sopenharmony_ci#define t4 w15 5562306a36Sopenharmony_ci#define t5 w16 5662306a36Sopenharmony_ci#define t6 w17 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci#define k_even w19 5962306a36Sopenharmony_ci#define k_odd w20 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci#define addr0 x21 6262306a36Sopenharmony_ci#define addr1 x22 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci#define s0 w23 6562306a36Sopenharmony_ci#define s1 w24 6662306a36Sopenharmony_ci#define s2 w25 6762306a36Sopenharmony_ci#define s3 w26 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci#define W0 v0 7062306a36Sopenharmony_ci#define W1 v1 7162306a36Sopenharmony_ci#define W2 v2 7262306a36Sopenharmony_ci#define W3 v3 7362306a36Sopenharmony_ci#define W4 v4 7462306a36Sopenharmony_ci#define W5 v5 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci#define XTMP0 v6 7762306a36Sopenharmony_ci#define XTMP1 v7 7862306a36Sopenharmony_ci#define XTMP2 v16 7962306a36Sopenharmony_ci#define XTMP3 v17 8062306a36Sopenharmony_ci#define XTMP4 v18 8162306a36Sopenharmony_ci#define XTMP5 v19 8262306a36Sopenharmony_ci#define XTMP6 v20 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci/* Helper macros. */ 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci#define _(...) /*_*/ 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci#define clear_vec(x) \ 8962306a36Sopenharmony_ci movi x.8h, #0; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci#define rolw(o, a, n) \ 9262306a36Sopenharmony_ci ror o, a, #(32 - n); 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci/* Round function macros. */ 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci#define GG1_1(x, y, z, o, t) \ 9762306a36Sopenharmony_ci eor o, x, y; 9862306a36Sopenharmony_ci#define GG1_2(x, y, z, o, t) \ 9962306a36Sopenharmony_ci eor o, o, z; 10062306a36Sopenharmony_ci#define GG1_3(x, y, z, o, t) 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci#define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t) 10362306a36Sopenharmony_ci#define FF1_2(x, y, z, o, t) 10462306a36Sopenharmony_ci#define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t) 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci#define GG2_1(x, y, z, o, t) \ 10762306a36Sopenharmony_ci bic o, z, x; 10862306a36Sopenharmony_ci#define GG2_2(x, y, z, o, t) \ 10962306a36Sopenharmony_ci and t, y, x; 11062306a36Sopenharmony_ci#define GG2_3(x, y, z, o, t) \ 11162306a36Sopenharmony_ci eor o, o, t; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci#define FF2_1(x, y, z, o, t) \ 11462306a36Sopenharmony_ci eor o, x, y; 11562306a36Sopenharmony_ci#define FF2_2(x, y, z, o, t) \ 11662306a36Sopenharmony_ci and t, x, y; \ 11762306a36Sopenharmony_ci and o, o, z; 11862306a36Sopenharmony_ci#define FF2_3(x, y, z, o, t) \ 11962306a36Sopenharmony_ci eor o, o, t; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci#define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ 12262306a36Sopenharmony_ci K_LOAD(round); \ 12362306a36Sopenharmony_ci ldr t5, [sp, #(wtype##_W1_ADDR(round, widx))]; \ 12462306a36Sopenharmony_ci rolw(t0, a, 12); /* rol(a, 12) => t0 */ \ 12562306a36Sopenharmony_ci IOP(1, iop_param); \ 12662306a36Sopenharmony_ci FF##i##_1(a, b, c, t1, t2); \ 12762306a36Sopenharmony_ci ldr t6, [sp, #(wtype##_W1W2_ADDR(round, widx))]; \ 12862306a36Sopenharmony_ci add k, k, e; \ 12962306a36Sopenharmony_ci IOP(2, iop_param); \ 13062306a36Sopenharmony_ci GG##i##_1(e, f, g, t3, t4); \ 13162306a36Sopenharmony_ci FF##i##_2(a, b, c, t1, t2); \ 13262306a36Sopenharmony_ci IOP(3, iop_param); \ 13362306a36Sopenharmony_ci add k, k, t0; \ 13462306a36Sopenharmony_ci add h, h, t5; \ 13562306a36Sopenharmony_ci add d, d, t6; /* w1w2 + d => d */ \ 13662306a36Sopenharmony_ci IOP(4, iop_param); \ 13762306a36Sopenharmony_ci rolw(k, k, 7); /* rol (t0 + e + t), 7) => k */ \ 13862306a36Sopenharmony_ci GG##i##_2(e, f, g, t3, t4); \ 13962306a36Sopenharmony_ci add h, h, k; /* h + w1 + k => h */ \ 14062306a36Sopenharmony_ci IOP(5, iop_param); \ 14162306a36Sopenharmony_ci FF##i##_3(a, b, c, t1, t2); \ 14262306a36Sopenharmony_ci eor t0, t0, k; /* k ^ t0 => t0 */ \ 14362306a36Sopenharmony_ci GG##i##_3(e, f, g, t3, t4); \ 14462306a36Sopenharmony_ci add d, d, t1; /* FF(a,b,c) + d => d */ \ 14562306a36Sopenharmony_ci IOP(6, iop_param); \ 14662306a36Sopenharmony_ci add t3, t3, h; /* GG(e,f,g) + h => t3 */ \ 14762306a36Sopenharmony_ci rolw(b, b, 9); /* rol(b, 9) => b */ \ 14862306a36Sopenharmony_ci eor h, t3, t3, ror #(32-9); \ 14962306a36Sopenharmony_ci IOP(7, iop_param); \ 15062306a36Sopenharmony_ci add d, d, t0; /* t0 + d => d */ \ 15162306a36Sopenharmony_ci rolw(f, f, 19); /* rol(f, 19) => f */ \ 15262306a36Sopenharmony_ci IOP(8, iop_param); \ 15362306a36Sopenharmony_ci eor h, h, t3, ror #(32-17); /* P0(t3) => h */ 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci#define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ 15662306a36Sopenharmony_ci R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param) 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci#define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ 15962306a36Sopenharmony_ci R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param) 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci#define KL(round) \ 16262306a36Sopenharmony_ci ldp k_even, k_odd, [RKPTR, #(4*(round))]; 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci/* Input expansion macros. */ 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci/* Byte-swapped input address. */ 16762306a36Sopenharmony_ci#define IW_W_ADDR(round, widx, offs) \ 16862306a36Sopenharmony_ci (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4)) 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci/* Expanded input address. */ 17162306a36Sopenharmony_ci#define XW_W_ADDR(round, widx, offs) \ 17262306a36Sopenharmony_ci (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4)) 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci/* Rounds 1-12, byte-swapped input block addresses. */ 17562306a36Sopenharmony_ci#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 32) 17662306a36Sopenharmony_ci#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 48) 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci/* Rounds 1-12, expanded input block addresses. */ 17962306a36Sopenharmony_ci#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0) 18062306a36Sopenharmony_ci#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 16) 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci/* Input block loading. 18362306a36Sopenharmony_ci * Interleaving within round function needed for in-order CPUs. */ 18462306a36Sopenharmony_ci#define LOAD_W_VEC_1_1() \ 18562306a36Sopenharmony_ci add addr0, sp, #IW_W1_ADDR(0, 0); 18662306a36Sopenharmony_ci#define LOAD_W_VEC_1_2() \ 18762306a36Sopenharmony_ci add addr1, sp, #IW_W1_ADDR(4, 0); 18862306a36Sopenharmony_ci#define LOAD_W_VEC_1_3() \ 18962306a36Sopenharmony_ci ld1 {W0.16b}, [RDATA], #16; 19062306a36Sopenharmony_ci#define LOAD_W_VEC_1_4() \ 19162306a36Sopenharmony_ci ld1 {W1.16b}, [RDATA], #16; 19262306a36Sopenharmony_ci#define LOAD_W_VEC_1_5() \ 19362306a36Sopenharmony_ci ld1 {W2.16b}, [RDATA], #16; 19462306a36Sopenharmony_ci#define LOAD_W_VEC_1_6() \ 19562306a36Sopenharmony_ci ld1 {W3.16b}, [RDATA], #16; 19662306a36Sopenharmony_ci#define LOAD_W_VEC_1_7() \ 19762306a36Sopenharmony_ci rev32 XTMP0.16b, W0.16b; 19862306a36Sopenharmony_ci#define LOAD_W_VEC_1_8() \ 19962306a36Sopenharmony_ci rev32 XTMP1.16b, W1.16b; 20062306a36Sopenharmony_ci#define LOAD_W_VEC_2_1() \ 20162306a36Sopenharmony_ci rev32 XTMP2.16b, W2.16b; 20262306a36Sopenharmony_ci#define LOAD_W_VEC_2_2() \ 20362306a36Sopenharmony_ci rev32 XTMP3.16b, W3.16b; 20462306a36Sopenharmony_ci#define LOAD_W_VEC_2_3() \ 20562306a36Sopenharmony_ci eor XTMP4.16b, XTMP1.16b, XTMP0.16b; 20662306a36Sopenharmony_ci#define LOAD_W_VEC_2_4() \ 20762306a36Sopenharmony_ci eor XTMP5.16b, XTMP2.16b, XTMP1.16b; 20862306a36Sopenharmony_ci#define LOAD_W_VEC_2_5() \ 20962306a36Sopenharmony_ci st1 {XTMP0.16b}, [addr0], #16; 21062306a36Sopenharmony_ci#define LOAD_W_VEC_2_6() \ 21162306a36Sopenharmony_ci st1 {XTMP4.16b}, [addr0]; \ 21262306a36Sopenharmony_ci add addr0, sp, #IW_W1_ADDR(8, 0); 21362306a36Sopenharmony_ci#define LOAD_W_VEC_2_7() \ 21462306a36Sopenharmony_ci eor XTMP6.16b, XTMP3.16b, XTMP2.16b; 21562306a36Sopenharmony_ci#define LOAD_W_VEC_2_8() \ 21662306a36Sopenharmony_ci ext W0.16b, XTMP0.16b, XTMP0.16b, #8; /* W0: xx, w0, xx, xx */ 21762306a36Sopenharmony_ci#define LOAD_W_VEC_3_1() \ 21862306a36Sopenharmony_ci mov W2.16b, XTMP1.16b; /* W2: xx, w6, w5, w4 */ 21962306a36Sopenharmony_ci#define LOAD_W_VEC_3_2() \ 22062306a36Sopenharmony_ci st1 {XTMP1.16b}, [addr1], #16; 22162306a36Sopenharmony_ci#define LOAD_W_VEC_3_3() \ 22262306a36Sopenharmony_ci st1 {XTMP5.16b}, [addr1]; \ 22362306a36Sopenharmony_ci ext W1.16b, XTMP0.16b, XTMP0.16b, #4; /* W1: xx, w3, w2, w1 */ 22462306a36Sopenharmony_ci#define LOAD_W_VEC_3_4() \ 22562306a36Sopenharmony_ci ext W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */ 22662306a36Sopenharmony_ci#define LOAD_W_VEC_3_5() \ 22762306a36Sopenharmony_ci ext W4.16b, XTMP2.16b, XTMP3.16b, #8; /* W4: xx, w12, w11, w10 */ 22862306a36Sopenharmony_ci#define LOAD_W_VEC_3_6() \ 22962306a36Sopenharmony_ci st1 {XTMP2.16b}, [addr0], #16; 23062306a36Sopenharmony_ci#define LOAD_W_VEC_3_7() \ 23162306a36Sopenharmony_ci st1 {XTMP6.16b}, [addr0]; 23262306a36Sopenharmony_ci#define LOAD_W_VEC_3_8() \ 23362306a36Sopenharmony_ci ext W5.16b, XTMP3.16b, XTMP3.16b, #4; /* W5: xx, w15, w14, w13 */ 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci#define LOAD_W_VEC_1(iop_num, ...) \ 23662306a36Sopenharmony_ci LOAD_W_VEC_1_##iop_num() 23762306a36Sopenharmony_ci#define LOAD_W_VEC_2(iop_num, ...) \ 23862306a36Sopenharmony_ci LOAD_W_VEC_2_##iop_num() 23962306a36Sopenharmony_ci#define LOAD_W_VEC_3(iop_num, ...) \ 24062306a36Sopenharmony_ci LOAD_W_VEC_3_##iop_num() 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci/* Message scheduling. Note: 3 words per vector register. 24362306a36Sopenharmony_ci * Interleaving within round function needed for in-order CPUs. */ 24462306a36Sopenharmony_ci#define SCHED_W_1_1(round, w0, w1, w2, w3, w4, w5) \ 24562306a36Sopenharmony_ci /* Load (w[i - 16]) => XTMP0 */ \ 24662306a36Sopenharmony_ci /* Load (w[i - 13]) => XTMP5 */ \ 24762306a36Sopenharmony_ci ext XTMP0.16b, w0.16b, w0.16b, #12; /* XTMP0: w0, xx, xx, xx */ 24862306a36Sopenharmony_ci#define SCHED_W_1_2(round, w0, w1, w2, w3, w4, w5) \ 24962306a36Sopenharmony_ci ext XTMP5.16b, w1.16b, w1.16b, #12; 25062306a36Sopenharmony_ci#define SCHED_W_1_3(round, w0, w1, w2, w3, w4, w5) \ 25162306a36Sopenharmony_ci ext XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */ 25262306a36Sopenharmony_ci#define SCHED_W_1_4(round, w0, w1, w2, w3, w4, w5) \ 25362306a36Sopenharmony_ci ext XTMP5.16b, XTMP5.16b, w2.16b, #12; 25462306a36Sopenharmony_ci#define SCHED_W_1_5(round, w0, w1, w2, w3, w4, w5) \ 25562306a36Sopenharmony_ci /* w[i - 9] == w3 */ \ 25662306a36Sopenharmony_ci /* W3 ^ XTMP0 => XTMP0 */ \ 25762306a36Sopenharmony_ci eor XTMP0.16b, XTMP0.16b, w3.16b; 25862306a36Sopenharmony_ci#define SCHED_W_1_6(round, w0, w1, w2, w3, w4, w5) \ 25962306a36Sopenharmony_ci /* w[i - 3] == w5 */ \ 26062306a36Sopenharmony_ci /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \ 26162306a36Sopenharmony_ci /* rol(XTMP5, 7) => XTMP1 */ \ 26262306a36Sopenharmony_ci add addr0, sp, #XW_W1_ADDR((round), 0); \ 26362306a36Sopenharmony_ci shl XTMP2.4s, w5.4s, #15; 26462306a36Sopenharmony_ci#define SCHED_W_1_7(round, w0, w1, w2, w3, w4, w5) \ 26562306a36Sopenharmony_ci shl XTMP1.4s, XTMP5.4s, #7; 26662306a36Sopenharmony_ci#define SCHED_W_1_8(round, w0, w1, w2, w3, w4, w5) \ 26762306a36Sopenharmony_ci sri XTMP2.4s, w5.4s, #(32-15); 26862306a36Sopenharmony_ci#define SCHED_W_2_1(round, w0, w1, w2, w3, w4, w5) \ 26962306a36Sopenharmony_ci sri XTMP1.4s, XTMP5.4s, #(32-7); 27062306a36Sopenharmony_ci#define SCHED_W_2_2(round, w0, w1, w2, w3, w4, w5) \ 27162306a36Sopenharmony_ci eor XTMP0.16b, XTMP0.16b, XTMP2.16b; 27262306a36Sopenharmony_ci#define SCHED_W_2_3(round, w0, w1, w2, w3, w4, w5) \ 27362306a36Sopenharmony_ci /* w[i - 6] == W4 */ \ 27462306a36Sopenharmony_ci /* W4 ^ XTMP1 => XTMP1 */ \ 27562306a36Sopenharmony_ci eor XTMP1.16b, XTMP1.16b, w4.16b; 27662306a36Sopenharmony_ci#define SCHED_W_2_4(round, w0, w1, w2, w3, w4, w5) \ 27762306a36Sopenharmony_ci /* P1(XTMP0) ^ XTMP1 => W0 */ \ 27862306a36Sopenharmony_ci shl XTMP3.4s, XTMP0.4s, #15; 27962306a36Sopenharmony_ci#define SCHED_W_2_5(round, w0, w1, w2, w3, w4, w5) \ 28062306a36Sopenharmony_ci shl XTMP4.4s, XTMP0.4s, #23; 28162306a36Sopenharmony_ci#define SCHED_W_2_6(round, w0, w1, w2, w3, w4, w5) \ 28262306a36Sopenharmony_ci eor w0.16b, XTMP1.16b, XTMP0.16b; 28362306a36Sopenharmony_ci#define SCHED_W_2_7(round, w0, w1, w2, w3, w4, w5) \ 28462306a36Sopenharmony_ci sri XTMP3.4s, XTMP0.4s, #(32-15); 28562306a36Sopenharmony_ci#define SCHED_W_2_8(round, w0, w1, w2, w3, w4, w5) \ 28662306a36Sopenharmony_ci sri XTMP4.4s, XTMP0.4s, #(32-23); 28762306a36Sopenharmony_ci#define SCHED_W_3_1(round, w0, w1, w2, w3, w4, w5) \ 28862306a36Sopenharmony_ci eor w0.16b, w0.16b, XTMP3.16b; 28962306a36Sopenharmony_ci#define SCHED_W_3_2(round, w0, w1, w2, w3, w4, w5) \ 29062306a36Sopenharmony_ci /* Load (w[i - 3]) => XTMP2 */ \ 29162306a36Sopenharmony_ci ext XTMP2.16b, w4.16b, w4.16b, #12; 29262306a36Sopenharmony_ci#define SCHED_W_3_3(round, w0, w1, w2, w3, w4, w5) \ 29362306a36Sopenharmony_ci eor w0.16b, w0.16b, XTMP4.16b; 29462306a36Sopenharmony_ci#define SCHED_W_3_4(round, w0, w1, w2, w3, w4, w5) \ 29562306a36Sopenharmony_ci ext XTMP2.16b, XTMP2.16b, w5.16b, #12; 29662306a36Sopenharmony_ci#define SCHED_W_3_5(round, w0, w1, w2, w3, w4, w5) \ 29762306a36Sopenharmony_ci /* W1 ^ W2 => XTMP3 */ \ 29862306a36Sopenharmony_ci eor XTMP3.16b, XTMP2.16b, w0.16b; 29962306a36Sopenharmony_ci#define SCHED_W_3_6(round, w0, w1, w2, w3, w4, w5) 30062306a36Sopenharmony_ci#define SCHED_W_3_7(round, w0, w1, w2, w3, w4, w5) \ 30162306a36Sopenharmony_ci st1 {XTMP2.16b-XTMP3.16b}, [addr0]; 30262306a36Sopenharmony_ci#define SCHED_W_3_8(round, w0, w1, w2, w3, w4, w5) 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci#define SCHED_W_W0W1W2W3W4W5_1(iop_num, round) \ 30562306a36Sopenharmony_ci SCHED_W_1_##iop_num(round, W0, W1, W2, W3, W4, W5) 30662306a36Sopenharmony_ci#define SCHED_W_W0W1W2W3W4W5_2(iop_num, round) \ 30762306a36Sopenharmony_ci SCHED_W_2_##iop_num(round, W0, W1, W2, W3, W4, W5) 30862306a36Sopenharmony_ci#define SCHED_W_W0W1W2W3W4W5_3(iop_num, round) \ 30962306a36Sopenharmony_ci SCHED_W_3_##iop_num(round, W0, W1, W2, W3, W4, W5) 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci#define SCHED_W_W1W2W3W4W5W0_1(iop_num, round) \ 31262306a36Sopenharmony_ci SCHED_W_1_##iop_num(round, W1, W2, W3, W4, W5, W0) 31362306a36Sopenharmony_ci#define SCHED_W_W1W2W3W4W5W0_2(iop_num, round) \ 31462306a36Sopenharmony_ci SCHED_W_2_##iop_num(round, W1, W2, W3, W4, W5, W0) 31562306a36Sopenharmony_ci#define SCHED_W_W1W2W3W4W5W0_3(iop_num, round) \ 31662306a36Sopenharmony_ci SCHED_W_3_##iop_num(round, W1, W2, W3, W4, W5, W0) 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci#define SCHED_W_W2W3W4W5W0W1_1(iop_num, round) \ 31962306a36Sopenharmony_ci SCHED_W_1_##iop_num(round, W2, W3, W4, W5, W0, W1) 32062306a36Sopenharmony_ci#define SCHED_W_W2W3W4W5W0W1_2(iop_num, round) \ 32162306a36Sopenharmony_ci SCHED_W_2_##iop_num(round, W2, W3, W4, W5, W0, W1) 32262306a36Sopenharmony_ci#define SCHED_W_W2W3W4W5W0W1_3(iop_num, round) \ 32362306a36Sopenharmony_ci SCHED_W_3_##iop_num(round, W2, W3, W4, W5, W0, W1) 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci#define SCHED_W_W3W4W5W0W1W2_1(iop_num, round) \ 32662306a36Sopenharmony_ci SCHED_W_1_##iop_num(round, W3, W4, W5, W0, W1, W2) 32762306a36Sopenharmony_ci#define SCHED_W_W3W4W5W0W1W2_2(iop_num, round) \ 32862306a36Sopenharmony_ci SCHED_W_2_##iop_num(round, W3, W4, W5, W0, W1, W2) 32962306a36Sopenharmony_ci#define SCHED_W_W3W4W5W0W1W2_3(iop_num, round) \ 33062306a36Sopenharmony_ci SCHED_W_3_##iop_num(round, W3, W4, W5, W0, W1, W2) 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci#define SCHED_W_W4W5W0W1W2W3_1(iop_num, round) \ 33362306a36Sopenharmony_ci SCHED_W_1_##iop_num(round, W4, W5, W0, W1, W2, W3) 33462306a36Sopenharmony_ci#define SCHED_W_W4W5W0W1W2W3_2(iop_num, round) \ 33562306a36Sopenharmony_ci SCHED_W_2_##iop_num(round, W4, W5, W0, W1, W2, W3) 33662306a36Sopenharmony_ci#define SCHED_W_W4W5W0W1W2W3_3(iop_num, round) \ 33762306a36Sopenharmony_ci SCHED_W_3_##iop_num(round, W4, W5, W0, W1, W2, W3) 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci#define SCHED_W_W5W0W1W2W3W4_1(iop_num, round) \ 34062306a36Sopenharmony_ci SCHED_W_1_##iop_num(round, W5, W0, W1, W2, W3, W4) 34162306a36Sopenharmony_ci#define SCHED_W_W5W0W1W2W3W4_2(iop_num, round) \ 34262306a36Sopenharmony_ci SCHED_W_2_##iop_num(round, W5, W0, W1, W2, W3, W4) 34362306a36Sopenharmony_ci#define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \ 34462306a36Sopenharmony_ci SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4) 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci /* 34862306a36Sopenharmony_ci * Transform blocks*64 bytes (blocks*16 32-bit words) at 'src'. 34962306a36Sopenharmony_ci * 35062306a36Sopenharmony_ci * void sm3_neon_transform(struct sm3_state *sst, u8 const *src, 35162306a36Sopenharmony_ci * int blocks) 35262306a36Sopenharmony_ci */ 35362306a36Sopenharmony_ci .text 35462306a36Sopenharmony_ci.align 3 35562306a36Sopenharmony_ciSYM_TYPED_FUNC_START(sm3_neon_transform) 35662306a36Sopenharmony_ci ldp ra, rb, [RSTATE, #0] 35762306a36Sopenharmony_ci ldp rc, rd, [RSTATE, #8] 35862306a36Sopenharmony_ci ldp re, rf, [RSTATE, #16] 35962306a36Sopenharmony_ci ldp rg, rh, [RSTATE, #24] 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci stp x28, x29, [sp, #-16]! 36262306a36Sopenharmony_ci stp x19, x20, [sp, #-16]! 36362306a36Sopenharmony_ci stp x21, x22, [sp, #-16]! 36462306a36Sopenharmony_ci stp x23, x24, [sp, #-16]! 36562306a36Sopenharmony_ci stp x25, x26, [sp, #-16]! 36662306a36Sopenharmony_ci mov RFRAME, sp 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci sub addr0, sp, #STACK_SIZE 36962306a36Sopenharmony_ci adr_l RKPTR, .LKtable 37062306a36Sopenharmony_ci and sp, addr0, #(~63) 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci /* Preload first block. */ 37362306a36Sopenharmony_ci LOAD_W_VEC_1(1, 0) 37462306a36Sopenharmony_ci LOAD_W_VEC_1(2, 0) 37562306a36Sopenharmony_ci LOAD_W_VEC_1(3, 0) 37662306a36Sopenharmony_ci LOAD_W_VEC_1(4, 0) 37762306a36Sopenharmony_ci LOAD_W_VEC_1(5, 0) 37862306a36Sopenharmony_ci LOAD_W_VEC_1(6, 0) 37962306a36Sopenharmony_ci LOAD_W_VEC_1(7, 0) 38062306a36Sopenharmony_ci LOAD_W_VEC_1(8, 0) 38162306a36Sopenharmony_ci LOAD_W_VEC_2(1, 0) 38262306a36Sopenharmony_ci LOAD_W_VEC_2(2, 0) 38362306a36Sopenharmony_ci LOAD_W_VEC_2(3, 0) 38462306a36Sopenharmony_ci LOAD_W_VEC_2(4, 0) 38562306a36Sopenharmony_ci LOAD_W_VEC_2(5, 0) 38662306a36Sopenharmony_ci LOAD_W_VEC_2(6, 0) 38762306a36Sopenharmony_ci LOAD_W_VEC_2(7, 0) 38862306a36Sopenharmony_ci LOAD_W_VEC_2(8, 0) 38962306a36Sopenharmony_ci LOAD_W_VEC_3(1, 0) 39062306a36Sopenharmony_ci LOAD_W_VEC_3(2, 0) 39162306a36Sopenharmony_ci LOAD_W_VEC_3(3, 0) 39262306a36Sopenharmony_ci LOAD_W_VEC_3(4, 0) 39362306a36Sopenharmony_ci LOAD_W_VEC_3(5, 0) 39462306a36Sopenharmony_ci LOAD_W_VEC_3(6, 0) 39562306a36Sopenharmony_ci LOAD_W_VEC_3(7, 0) 39662306a36Sopenharmony_ci LOAD_W_VEC_3(8, 0) 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci.balign 16 39962306a36Sopenharmony_ci.Loop: 40062306a36Sopenharmony_ci /* Transform 0-3 */ 40162306a36Sopenharmony_ci R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0) 40262306a36Sopenharmony_ci R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 1, 1, IW, _, 0) 40362306a36Sopenharmony_ci R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0) 40462306a36Sopenharmony_ci R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 3, 3, IW, _, 0) 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci /* Transform 4-7 + Precalc 12-14 */ 40762306a36Sopenharmony_ci R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0) 40862306a36Sopenharmony_ci R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 5, 1, IW, _, 0) 40962306a36Sopenharmony_ci R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, SCHED_W_W0W1W2W3W4W5_1, 12) 41062306a36Sopenharmony_ci R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 7, 3, IW, SCHED_W_W0W1W2W3W4W5_2, 12) 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci /* Transform 8-11 + Precalc 12-17 */ 41362306a36Sopenharmony_ci R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, SCHED_W_W0W1W2W3W4W5_3, 12) 41462306a36Sopenharmony_ci R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 9, 1, IW, SCHED_W_W1W2W3W4W5W0_1, 15) 41562306a36Sopenharmony_ci R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, SCHED_W_W1W2W3W4W5W0_2, 15) 41662306a36Sopenharmony_ci R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 11, 3, IW, SCHED_W_W1W2W3W4W5W0_3, 15) 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci /* Transform 12-14 + Precalc 18-20 */ 41962306a36Sopenharmony_ci R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 18) 42062306a36Sopenharmony_ci R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 13, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 18) 42162306a36Sopenharmony_ci R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 18) 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci /* Transform 15-17 + Precalc 21-23 */ 42462306a36Sopenharmony_ci R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 15, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 21) 42562306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 21) 42662306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 17, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 21) 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci /* Transform 18-20 + Precalc 24-26 */ 42962306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 24) 43062306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 19, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 24) 43162306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 24) 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci /* Transform 21-23 + Precalc 27-29 */ 43462306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 21, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 27) 43562306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 27) 43662306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 23, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 27) 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci /* Transform 24-26 + Precalc 30-32 */ 43962306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 30) 44062306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 25, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 30) 44162306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 30) 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci /* Transform 27-29 + Precalc 33-35 */ 44462306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 27, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 33) 44562306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 33) 44662306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 29, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 33) 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci /* Transform 30-32 + Precalc 36-38 */ 44962306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 36) 45062306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 31, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 36) 45162306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36) 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci /* Transform 33-35 + Precalc 39-41 */ 45462306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 33, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 39) 45562306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 39) 45662306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 35, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 39) 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci /* Transform 36-38 + Precalc 42-44 */ 45962306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 42) 46062306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 37, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 42) 46162306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 42) 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci /* Transform 39-41 + Precalc 45-47 */ 46462306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 39, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 45) 46562306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 45) 46662306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 41, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 45) 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci /* Transform 42-44 + Precalc 48-50 */ 46962306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 48) 47062306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 43, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 48) 47162306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 48) 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci /* Transform 45-47 + Precalc 51-53 */ 47462306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 45, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 51) 47562306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 51) 47662306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 47, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 51) 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci /* Transform 48-50 + Precalc 54-56 */ 47962306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 54) 48062306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 49, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 54) 48162306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 54) 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci /* Transform 51-53 + Precalc 57-59 */ 48462306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 51, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 57) 48562306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 57) 48662306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 53, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 57) 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci /* Transform 54-56 + Precalc 60-62 */ 48962306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 60) 49062306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 55, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 60) 49162306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 60) 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci /* Transform 57-59 + Precalc 63 */ 49462306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 57, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 63) 49562306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 63) 49662306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 59, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 63) 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci /* Transform 60 */ 49962306a36Sopenharmony_ci R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _) 50062306a36Sopenharmony_ci subs RNBLKS, RNBLKS, #1 50162306a36Sopenharmony_ci b.eq .Lend 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci /* Transform 61-63 + Preload next block */ 50462306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, LOAD_W_VEC_1, _) 50562306a36Sopenharmony_ci ldp s0, s1, [RSTATE, #0] 50662306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _) 50762306a36Sopenharmony_ci ldp s2, s3, [RSTATE, #8] 50862306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, LOAD_W_VEC_3, _) 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci /* Update the chaining variables. */ 51162306a36Sopenharmony_ci eor ra, ra, s0 51262306a36Sopenharmony_ci eor rb, rb, s1 51362306a36Sopenharmony_ci ldp s0, s1, [RSTATE, #16] 51462306a36Sopenharmony_ci eor rc, rc, s2 51562306a36Sopenharmony_ci ldp k_even, k_odd, [RSTATE, #24] 51662306a36Sopenharmony_ci eor rd, rd, s3 51762306a36Sopenharmony_ci eor re, re, s0 51862306a36Sopenharmony_ci stp ra, rb, [RSTATE, #0] 51962306a36Sopenharmony_ci eor rf, rf, s1 52062306a36Sopenharmony_ci stp rc, rd, [RSTATE, #8] 52162306a36Sopenharmony_ci eor rg, rg, k_even 52262306a36Sopenharmony_ci stp re, rf, [RSTATE, #16] 52362306a36Sopenharmony_ci eor rh, rh, k_odd 52462306a36Sopenharmony_ci stp rg, rh, [RSTATE, #24] 52562306a36Sopenharmony_ci b .Loop 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci.Lend: 52862306a36Sopenharmony_ci /* Transform 61-63 */ 52962306a36Sopenharmony_ci R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, _, _) 53062306a36Sopenharmony_ci ldp s0, s1, [RSTATE, #0] 53162306a36Sopenharmony_ci R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _) 53262306a36Sopenharmony_ci ldp s2, s3, [RSTATE, #8] 53362306a36Sopenharmony_ci R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, _, _) 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci /* Update the chaining variables. */ 53662306a36Sopenharmony_ci eor ra, ra, s0 53762306a36Sopenharmony_ci clear_vec(W0) 53862306a36Sopenharmony_ci eor rb, rb, s1 53962306a36Sopenharmony_ci clear_vec(W1) 54062306a36Sopenharmony_ci ldp s0, s1, [RSTATE, #16] 54162306a36Sopenharmony_ci clear_vec(W2) 54262306a36Sopenharmony_ci eor rc, rc, s2 54362306a36Sopenharmony_ci clear_vec(W3) 54462306a36Sopenharmony_ci ldp k_even, k_odd, [RSTATE, #24] 54562306a36Sopenharmony_ci clear_vec(W4) 54662306a36Sopenharmony_ci eor rd, rd, s3 54762306a36Sopenharmony_ci clear_vec(W5) 54862306a36Sopenharmony_ci eor re, re, s0 54962306a36Sopenharmony_ci clear_vec(XTMP0) 55062306a36Sopenharmony_ci stp ra, rb, [RSTATE, #0] 55162306a36Sopenharmony_ci clear_vec(XTMP1) 55262306a36Sopenharmony_ci eor rf, rf, s1 55362306a36Sopenharmony_ci clear_vec(XTMP2) 55462306a36Sopenharmony_ci stp rc, rd, [RSTATE, #8] 55562306a36Sopenharmony_ci clear_vec(XTMP3) 55662306a36Sopenharmony_ci eor rg, rg, k_even 55762306a36Sopenharmony_ci clear_vec(XTMP4) 55862306a36Sopenharmony_ci stp re, rf, [RSTATE, #16] 55962306a36Sopenharmony_ci clear_vec(XTMP5) 56062306a36Sopenharmony_ci eor rh, rh, k_odd 56162306a36Sopenharmony_ci clear_vec(XTMP6) 56262306a36Sopenharmony_ci stp rg, rh, [RSTATE, #24] 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci /* Clear message expansion area */ 56562306a36Sopenharmony_ci add addr0, sp, #STACK_W 56662306a36Sopenharmony_ci st1 {W0.16b-W3.16b}, [addr0], #64 56762306a36Sopenharmony_ci st1 {W0.16b-W3.16b}, [addr0], #64 56862306a36Sopenharmony_ci st1 {W0.16b-W3.16b}, [addr0] 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci mov sp, RFRAME 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci ldp x25, x26, [sp], #16 57362306a36Sopenharmony_ci ldp x23, x24, [sp], #16 57462306a36Sopenharmony_ci ldp x21, x22, [sp], #16 57562306a36Sopenharmony_ci ldp x19, x20, [sp], #16 57662306a36Sopenharmony_ci ldp x28, x29, [sp], #16 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci ret 57962306a36Sopenharmony_ciSYM_FUNC_END(sm3_neon_transform) 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci .section ".rodata", "a" 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci .align 4 58562306a36Sopenharmony_ci.LKtable: 58662306a36Sopenharmony_ci .long 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb 58762306a36Sopenharmony_ci .long 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc 58862306a36Sopenharmony_ci .long 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce 58962306a36Sopenharmony_ci .long 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6 59062306a36Sopenharmony_ci .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c 59162306a36Sopenharmony_ci .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce 59262306a36Sopenharmony_ci .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec 59362306a36Sopenharmony_ci .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 59462306a36Sopenharmony_ci .long 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53 59562306a36Sopenharmony_ci .long 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d 59662306a36Sopenharmony_ci .long 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4 59762306a36Sopenharmony_ci .long 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43 59862306a36Sopenharmony_ci .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c 59962306a36Sopenharmony_ci .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce 60062306a36Sopenharmony_ci .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec 60162306a36Sopenharmony_ci .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 602