1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * NH - ε-almost-universal hash function, ARM64 NEON accelerated version
4  *
5  * Copyright 2018 Google LLC
6  *
7  * Author: Eric Biggers <ebiggers@google.com>
8  */
9 
10 #include <linux/linkage.h>
11 #include <linux/cfi_types.h>
12 
13 	KEY		.req	x0
14 	MESSAGE		.req	x1
15 	MESSAGE_LEN	.req	x2
16 	HASH		.req	x3
17 
18 	PASS0_SUMS	.req	v0
19 	PASS1_SUMS	.req	v1
20 	PASS2_SUMS	.req	v2
21 	PASS3_SUMS	.req	v3
22 	K0		.req	v4
23 	K1		.req	v5
24 	K2		.req	v6
25 	K3		.req	v7
26 	T0		.req	v8
27 	T1		.req	v9
28 	T2		.req	v10
29 	T3		.req	v11
30 	T4		.req	v12
31 	T5		.req	v13
32 	T6		.req	v14
33 	T7		.req	v15
34 
35 .macro _nh_stride	k0, k1, k2, k3
36 
37 	// Load next message stride
38 	ld1		{T3.16b}, [MESSAGE], #16
39 
40 	// Load next key stride
41 	ld1		{\k3\().4s}, [KEY], #16
42 
43 	// Add message words to key words
44 	add		T0.4s, T3.4s, \k0\().4s
45 	add		T1.4s, T3.4s, \k1\().4s
46 	add		T2.4s, T3.4s, \k2\().4s
47 	add		T3.4s, T3.4s, \k3\().4s
48 
49 	// Multiply 32x32 => 64 and accumulate
50 	mov		T4.d[0], T0.d[1]
51 	mov		T5.d[0], T1.d[1]
52 	mov		T6.d[0], T2.d[1]
53 	mov		T7.d[0], T3.d[1]
54 	umlal		PASS0_SUMS.2d, T0.2s, T4.2s
55 	umlal		PASS1_SUMS.2d, T1.2s, T5.2s
56 	umlal		PASS2_SUMS.2d, T2.2s, T6.2s
57 	umlal		PASS3_SUMS.2d, T3.2s, T7.2s
58 .endm
59 
60 /*
61  * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
62  *		__le64 hash[NH_NUM_PASSES])
63  *
64  * It's guaranteed that message_len % 16 == 0.
65  */
66 SYM_TYPED_FUNC_START(nh_neon)
67 
68 	ld1		{K0.4s,K1.4s}, [KEY], #32
69 	  movi		PASS0_SUMS.2d, #0
70 	  movi		PASS1_SUMS.2d, #0
71 	ld1		{K2.4s}, [KEY], #16
72 	  movi		PASS2_SUMS.2d, #0
73 	  movi		PASS3_SUMS.2d, #0
74 
75 	subs		MESSAGE_LEN, MESSAGE_LEN, #64
76 	blt		.Lloop4_done
77 .Lloop4:
78 	_nh_stride	K0, K1, K2, K3
79 	_nh_stride	K1, K2, K3, K0
80 	_nh_stride	K2, K3, K0, K1
81 	_nh_stride	K3, K0, K1, K2
82 	subs		MESSAGE_LEN, MESSAGE_LEN, #64
83 	bge		.Lloop4
84 
85 .Lloop4_done:
86 	ands		MESSAGE_LEN, MESSAGE_LEN, #63
87 	beq		.Ldone
88 	_nh_stride	K0, K1, K2, K3
89 
90 	subs		MESSAGE_LEN, MESSAGE_LEN, #16
91 	beq		.Ldone
92 	_nh_stride	K1, K2, K3, K0
93 
94 	subs		MESSAGE_LEN, MESSAGE_LEN, #16
95 	beq		.Ldone
96 	_nh_stride	K2, K3, K0, K1
97 
98 .Ldone:
99 	// Sum the accumulators for each pass, then store the sums to 'hash'
100 	addp		T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
101 	addp		T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
102 	st1		{T0.16b,T1.16b}, [HASH]
103 	ret
104 SYM_FUNC_END(nh_neon)
105