18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * NH - ε-almost-universal hash function, ARM64 NEON accelerated version 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright 2018 Google LLC 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Author: Eric Biggers <ebiggers@google.com> 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include <linux/linkage.h> 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci KEY .req x0 138c2ecf20Sopenharmony_ci MESSAGE .req x1 148c2ecf20Sopenharmony_ci MESSAGE_LEN .req x2 158c2ecf20Sopenharmony_ci HASH .req x3 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci PASS0_SUMS .req v0 188c2ecf20Sopenharmony_ci PASS1_SUMS .req v1 198c2ecf20Sopenharmony_ci PASS2_SUMS .req v2 208c2ecf20Sopenharmony_ci PASS3_SUMS .req v3 218c2ecf20Sopenharmony_ci K0 .req v4 228c2ecf20Sopenharmony_ci K1 .req v5 238c2ecf20Sopenharmony_ci K2 .req v6 248c2ecf20Sopenharmony_ci K3 .req v7 258c2ecf20Sopenharmony_ci T0 .req v8 268c2ecf20Sopenharmony_ci T1 .req v9 278c2ecf20Sopenharmony_ci T2 .req v10 288c2ecf20Sopenharmony_ci T3 .req v11 298c2ecf20Sopenharmony_ci T4 .req v12 308c2ecf20Sopenharmony_ci T5 .req v13 318c2ecf20Sopenharmony_ci T6 .req v14 328c2ecf20Sopenharmony_ci T7 .req v15 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci.macro _nh_stride k0, k1, k2, k3 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci // Load next message stride 378c2ecf20Sopenharmony_ci ld1 {T3.16b}, [MESSAGE], #16 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci // Load next key stride 408c2ecf20Sopenharmony_ci ld1 {\k3\().4s}, [KEY], #16 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci // Add message words to key words 438c2ecf20Sopenharmony_ci add T0.4s, T3.4s, \k0\().4s 448c2ecf20Sopenharmony_ci add T1.4s, T3.4s, \k1\().4s 458c2ecf20Sopenharmony_ci add T2.4s, T3.4s, \k2\().4s 468c2ecf20Sopenharmony_ci add T3.4s, T3.4s, \k3\().4s 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci // Multiply 32x32 => 64 and accumulate 498c2ecf20Sopenharmony_ci mov T4.d[0], T0.d[1] 508c2ecf20Sopenharmony_ci mov T5.d[0], T1.d[1] 518c2ecf20Sopenharmony_ci mov T6.d[0], T2.d[1] 528c2ecf20Sopenharmony_ci mov T7.d[0], T3.d[1] 538c2ecf20Sopenharmony_ci umlal PASS0_SUMS.2d, T0.2s, T4.2s 548c2ecf20Sopenharmony_ci umlal PASS1_SUMS.2d, T1.2s, T5.2s 558c2ecf20Sopenharmony_ci umlal PASS2_SUMS.2d, T2.2s, T6.2s 568c2ecf20Sopenharmony_ci umlal PASS3_SUMS.2d, T3.2s, T7.2s 578c2ecf20Sopenharmony_ci.endm 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci/* 608c2ecf20Sopenharmony_ci * void nh_neon(const u32 *key, const u8 *message, size_t message_len, 618c2ecf20Sopenharmony_ci * u8 hash[NH_HASH_BYTES]) 628c2ecf20Sopenharmony_ci * 638c2ecf20Sopenharmony_ci * It's guaranteed that message_len % 16 == 0. 648c2ecf20Sopenharmony_ci */ 658c2ecf20Sopenharmony_ciSYM_FUNC_START(nh_neon) 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci ld1 {K0.4s,K1.4s}, [KEY], #32 688c2ecf20Sopenharmony_ci movi PASS0_SUMS.2d, #0 698c2ecf20Sopenharmony_ci movi PASS1_SUMS.2d, #0 708c2ecf20Sopenharmony_ci ld1 {K2.4s}, [KEY], #16 718c2ecf20Sopenharmony_ci movi PASS2_SUMS.2d, #0 728c2ecf20Sopenharmony_ci movi PASS3_SUMS.2d, #0 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #64 758c2ecf20Sopenharmony_ci blt .Lloop4_done 768c2ecf20Sopenharmony_ci.Lloop4: 778c2ecf20Sopenharmony_ci _nh_stride K0, K1, K2, K3 788c2ecf20Sopenharmony_ci _nh_stride K1, K2, K3, K0 798c2ecf20Sopenharmony_ci _nh_stride K2, K3, K0, K1 808c2ecf20Sopenharmony_ci _nh_stride K3, K0, K1, K2 818c2ecf20Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #64 828c2ecf20Sopenharmony_ci bge .Lloop4 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci.Lloop4_done: 858c2ecf20Sopenharmony_ci ands MESSAGE_LEN, MESSAGE_LEN, #63 868c2ecf20Sopenharmony_ci beq .Ldone 878c2ecf20Sopenharmony_ci _nh_stride K0, K1, K2, K3 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #16 908c2ecf20Sopenharmony_ci beq .Ldone 918c2ecf20Sopenharmony_ci _nh_stride K1, K2, K3, K0 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci subs MESSAGE_LEN, MESSAGE_LEN, #16 948c2ecf20Sopenharmony_ci beq .Ldone 958c2ecf20Sopenharmony_ci _nh_stride K2, K3, K0, K1 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci.Ldone: 988c2ecf20Sopenharmony_ci // Sum the accumulators for each pass, then store the sums to 'hash' 998c2ecf20Sopenharmony_ci addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d 1008c2ecf20Sopenharmony_ci addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d 1018c2ecf20Sopenharmony_ci st1 {T0.16b,T1.16b}, [HASH] 1028c2ecf20Sopenharmony_ci ret 1038c2ecf20Sopenharmony_ciSYM_FUNC_END(nh_neon) 104