162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * NH - ε-almost-universal hash function, NEON accelerated version 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright 2018 Google LLC 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Author: Eric Biggers <ebiggers@google.com> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/linkage.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci .text 1362306a36Sopenharmony_ci .fpu neon 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci KEY .req r0 1662306a36Sopenharmony_ci MESSAGE .req r1 1762306a36Sopenharmony_ci MESSAGE_LEN .req r2 1862306a36Sopenharmony_ci HASH .req r3 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci PASS0_SUMS .req q0 2162306a36Sopenharmony_ci PASS0_SUM_A .req d0 2262306a36Sopenharmony_ci PASS0_SUM_B .req d1 2362306a36Sopenharmony_ci PASS1_SUMS .req q1 2462306a36Sopenharmony_ci PASS1_SUM_A .req d2 2562306a36Sopenharmony_ci PASS1_SUM_B .req d3 2662306a36Sopenharmony_ci PASS2_SUMS .req q2 2762306a36Sopenharmony_ci PASS2_SUM_A .req d4 2862306a36Sopenharmony_ci PASS2_SUM_B .req d5 2962306a36Sopenharmony_ci PASS3_SUMS .req q3 3062306a36Sopenharmony_ci PASS3_SUM_A .req d6 3162306a36Sopenharmony_ci PASS3_SUM_B .req d7 3262306a36Sopenharmony_ci K0 .req q4 3362306a36Sopenharmony_ci K1 .req q5 3462306a36Sopenharmony_ci K2 .req q6 3562306a36Sopenharmony_ci K3 .req q7 3662306a36Sopenharmony_ci T0 .req q8 3762306a36Sopenharmony_ci T0_L .req d16 3862306a36Sopenharmony_ci T0_H .req d17 3962306a36Sopenharmony_ci T1 .req q9 4062306a36Sopenharmony_ci T1_L .req d18 4162306a36Sopenharmony_ci T1_H .req d19 4262306a36Sopenharmony_ci T2 .req q10 4362306a36Sopenharmony_ci T2_L .req d20 4462306a36Sopenharmony_ci T2_H .req d21 4562306a36Sopenharmony_ci T3 .req q11 4662306a36Sopenharmony_ci T3_L .req d22 4762306a36Sopenharmony_ci T3_H .req d23 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci.macro _nh_stride k0, k1, k2, k3 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci // Load next message stride 5262306a36Sopenharmony_ci vld1.8 {T3}, [MESSAGE]! 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci // Load next key stride 5562306a36Sopenharmony_ci vld1.32 {\k3}, [KEY]! 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci // Add message words to key words 5862306a36Sopenharmony_ci vadd.u32 T0, T3, \k0 5962306a36Sopenharmony_ci vadd.u32 T1, T3, \k1 6062306a36Sopenharmony_ci vadd.u32 T2, T3, \k2 6162306a36Sopenharmony_ci vadd.u32 T3, T3, \k3 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci // Multiply 32x32 => 64 and accumulate 6462306a36Sopenharmony_ci vmlal.u32 PASS0_SUMS, T0_L, T0_H 6562306a36Sopenharmony_ci vmlal.u32 PASS1_SUMS, T1_L, T1_H 6662306a36Sopenharmony_ci vmlal.u32 PASS2_SUMS, T2_L, T2_H 6762306a36Sopenharmony_ci vmlal.u32 PASS3_SUMS, T3_L, T3_H 6862306a36Sopenharmony_ci.endm 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci/* 7162306a36Sopenharmony_ci * void nh_neon(const u32 *key, const u8 *message, size_t message_len, 7262306a36Sopenharmony_ci * __le64 hash[NH_NUM_PASSES]) 7362306a36Sopenharmony_ci * 7462306a36Sopenharmony_ci * It's guaranteed that message_len % 16 == 0. 7562306a36Sopenharmony_ci */ 7662306a36Sopenharmony_ciENTRY(nh_neon) 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci vld1.32 {K0,K1}, [KEY]! 7962306a36Sopenharmony_ci vmov.u64 PASS0_SUMS, #0 8062306a36Sopenharmony_ci vmov.u64 PASS1_SUMS, #0 8162306a36Sopenharmony_ci vld1.32 {K2}, [KEY]! 8262306a36Sopenharmony_ci vmov.u64 PASS2_SUMS, #0 8362306a36Sopenharmony_ci vmov.u64 PASS3_SUMS, #0 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #64 8662306a36Sopenharmony_ci blt .Lloop4_done 8762306a36Sopenharmony_ci.Lloop4: 8862306a36Sopenharmony_ci _nh_stride K0, K1, K2, K3 8962306a36Sopenharmony_ci _nh_stride K1, K2, K3, K0 9062306a36Sopenharmony_ci _nh_stride K2, K3, K0, K1 9162306a36Sopenharmony_ci _nh_stride K3, K0, K1, K2 9262306a36Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #64 9362306a36Sopenharmony_ci bge .Lloop4 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci.Lloop4_done: 9662306a36Sopenharmony_ci ands MESSAGE_LEN, MESSAGE_LEN, #63 9762306a36Sopenharmony_ci beq .Ldone 9862306a36Sopenharmony_ci _nh_stride K0, K1, K2, K3 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #16 10162306a36Sopenharmony_ci beq .Ldone 10262306a36Sopenharmony_ci _nh_stride K1, K2, K3, K0 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #16 10562306a36Sopenharmony_ci beq .Ldone 10662306a36Sopenharmony_ci _nh_stride K2, K3, K0, K1 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci.Ldone: 10962306a36Sopenharmony_ci // Sum the accumulators for each pass, then store the sums to 'hash' 11062306a36Sopenharmony_ci vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B 11162306a36Sopenharmony_ci vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B 11262306a36Sopenharmony_ci vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B 11362306a36Sopenharmony_ci vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B 11462306a36Sopenharmony_ci vst1.8 {T0-T1}, [HASH] 11562306a36Sopenharmony_ci bx lr 11662306a36Sopenharmony_ciENDPROC(nh_neon) 117