162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * NH - ε-almost-universal hash function, ARM64 NEON accelerated version 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright 2018 Google LLC 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Author: Eric Biggers <ebiggers@google.com> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/linkage.h> 1162306a36Sopenharmony_ci#include <linux/cfi_types.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci KEY .req x0 1462306a36Sopenharmony_ci MESSAGE .req x1 1562306a36Sopenharmony_ci MESSAGE_LEN .req x2 1662306a36Sopenharmony_ci HASH .req x3 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci PASS0_SUMS .req v0 1962306a36Sopenharmony_ci PASS1_SUMS .req v1 2062306a36Sopenharmony_ci PASS2_SUMS .req v2 2162306a36Sopenharmony_ci PASS3_SUMS .req v3 2262306a36Sopenharmony_ci K0 .req v4 2362306a36Sopenharmony_ci K1 .req v5 2462306a36Sopenharmony_ci K2 .req v6 2562306a36Sopenharmony_ci K3 .req v7 2662306a36Sopenharmony_ci T0 .req v8 2762306a36Sopenharmony_ci T1 .req v9 2862306a36Sopenharmony_ci T2 .req v10 2962306a36Sopenharmony_ci T3 .req v11 3062306a36Sopenharmony_ci T4 .req v12 3162306a36Sopenharmony_ci T5 .req v13 3262306a36Sopenharmony_ci T6 .req v14 3362306a36Sopenharmony_ci T7 .req v15 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci.macro _nh_stride k0, k1, k2, k3 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci // Load next message stride 3862306a36Sopenharmony_ci ld1 {T3.16b}, [MESSAGE], #16 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci // Load next key stride 4162306a36Sopenharmony_ci ld1 {\k3\().4s}, [KEY], #16 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci // Add message words to key words 4462306a36Sopenharmony_ci add T0.4s, T3.4s, \k0\().4s 4562306a36Sopenharmony_ci add T1.4s, T3.4s, \k1\().4s 4662306a36Sopenharmony_ci add T2.4s, T3.4s, \k2\().4s 4762306a36Sopenharmony_ci add T3.4s, T3.4s, \k3\().4s 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci // Multiply 32x32 => 64 and accumulate 5062306a36Sopenharmony_ci mov T4.d[0], T0.d[1] 5162306a36Sopenharmony_ci mov T5.d[0], T1.d[1] 5262306a36Sopenharmony_ci mov T6.d[0], T2.d[1] 5362306a36Sopenharmony_ci mov T7.d[0], T3.d[1] 5462306a36Sopenharmony_ci umlal PASS0_SUMS.2d, T0.2s, T4.2s 5562306a36Sopenharmony_ci umlal PASS1_SUMS.2d, T1.2s, T5.2s 5662306a36Sopenharmony_ci umlal PASS2_SUMS.2d, T2.2s, T6.2s 5762306a36Sopenharmony_ci umlal PASS3_SUMS.2d, T3.2s, T7.2s 5862306a36Sopenharmony_ci.endm 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci/* 6162306a36Sopenharmony_ci * void nh_neon(const u32 *key, const u8 *message, size_t message_len, 6262306a36Sopenharmony_ci * __le64 hash[NH_NUM_PASSES]) 6362306a36Sopenharmony_ci * 6462306a36Sopenharmony_ci * It's guaranteed that message_len % 16 == 0. 6562306a36Sopenharmony_ci */ 6662306a36Sopenharmony_ciSYM_TYPED_FUNC_START(nh_neon) 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci ld1 {K0.4s,K1.4s}, [KEY], #32 6962306a36Sopenharmony_ci movi PASS0_SUMS.2d, #0 7062306a36Sopenharmony_ci movi PASS1_SUMS.2d, #0 7162306a36Sopenharmony_ci ld1 {K2.4s}, [KEY], #16 7262306a36Sopenharmony_ci movi PASS2_SUMS.2d, #0 7362306a36Sopenharmony_ci movi PASS3_SUMS.2d, #0 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #64 7662306a36Sopenharmony_ci blt .Lloop4_done 7762306a36Sopenharmony_ci.Lloop4: 7862306a36Sopenharmony_ci _nh_stride K0, K1, K2, K3 7962306a36Sopenharmony_ci _nh_stride K1, K2, K3, K0 8062306a36Sopenharmony_ci _nh_stride K2, K3, K0, K1 8162306a36Sopenharmony_ci _nh_stride K3, K0, K1, K2 8262306a36Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #64 8362306a36Sopenharmony_ci bge .Lloop4 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci.Lloop4_done: 8662306a36Sopenharmony_ci ands MESSAGE_LEN, MESSAGE_LEN, #63 8762306a36Sopenharmony_ci beq .Ldone 8862306a36Sopenharmony_ci _nh_stride K0, K1, K2, K3 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #16 9162306a36Sopenharmony_ci beq .Ldone 9262306a36Sopenharmony_ci _nh_stride K1, K2, K3, K0 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #16 9562306a36Sopenharmony_ci beq .Ldone 9662306a36Sopenharmony_ci _nh_stride K2, K3, K0, K1 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci.Ldone: 9962306a36Sopenharmony_ci // Sum the accumulators for each pass, then store the sums to 'hash' 10062306a36Sopenharmony_ci addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d 10162306a36Sopenharmony_ci addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d 10262306a36Sopenharmony_ci st1 {T0.16b,T1.16b}, [HASH] 10362306a36Sopenharmony_ci ret 10462306a36Sopenharmony_ciSYM_FUNC_END(nh_neon) 105