162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * NH - ε-almost-universal hash function, ARM64 NEON accelerated version
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright 2018 Google LLC
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Author: Eric Biggers <ebiggers@google.com>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/linkage.h>
1162306a36Sopenharmony_ci#include <linux/cfi_types.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci	KEY		.req	x0
1462306a36Sopenharmony_ci	MESSAGE		.req	x1
1562306a36Sopenharmony_ci	MESSAGE_LEN	.req	x2
1662306a36Sopenharmony_ci	HASH		.req	x3
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci	PASS0_SUMS	.req	v0
1962306a36Sopenharmony_ci	PASS1_SUMS	.req	v1
2062306a36Sopenharmony_ci	PASS2_SUMS	.req	v2
2162306a36Sopenharmony_ci	PASS3_SUMS	.req	v3
2262306a36Sopenharmony_ci	K0		.req	v4
2362306a36Sopenharmony_ci	K1		.req	v5
2462306a36Sopenharmony_ci	K2		.req	v6
2562306a36Sopenharmony_ci	K3		.req	v7
2662306a36Sopenharmony_ci	T0		.req	v8
2762306a36Sopenharmony_ci	T1		.req	v9
2862306a36Sopenharmony_ci	T2		.req	v10
2962306a36Sopenharmony_ci	T3		.req	v11
3062306a36Sopenharmony_ci	T4		.req	v12
3162306a36Sopenharmony_ci	T5		.req	v13
3262306a36Sopenharmony_ci	T6		.req	v14
3362306a36Sopenharmony_ci	T7		.req	v15
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci.macro _nh_stride	k0, k1, k2, k3
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	// Load next message stride
3862306a36Sopenharmony_ci	ld1		{T3.16b}, [MESSAGE], #16
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci	// Load next key stride
4162306a36Sopenharmony_ci	ld1		{\k3\().4s}, [KEY], #16
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci	// Add message words to key words
4462306a36Sopenharmony_ci	add		T0.4s, T3.4s, \k0\().4s
4562306a36Sopenharmony_ci	add		T1.4s, T3.4s, \k1\().4s
4662306a36Sopenharmony_ci	add		T2.4s, T3.4s, \k2\().4s
4762306a36Sopenharmony_ci	add		T3.4s, T3.4s, \k3\().4s
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	// Multiply 32x32 => 64 and accumulate
5062306a36Sopenharmony_ci	mov		T4.d[0], T0.d[1]
5162306a36Sopenharmony_ci	mov		T5.d[0], T1.d[1]
5262306a36Sopenharmony_ci	mov		T6.d[0], T2.d[1]
5362306a36Sopenharmony_ci	mov		T7.d[0], T3.d[1]
5462306a36Sopenharmony_ci	umlal		PASS0_SUMS.2d, T0.2s, T4.2s
5562306a36Sopenharmony_ci	umlal		PASS1_SUMS.2d, T1.2s, T5.2s
5662306a36Sopenharmony_ci	umlal		PASS2_SUMS.2d, T2.2s, T6.2s
5762306a36Sopenharmony_ci	umlal		PASS3_SUMS.2d, T3.2s, T7.2s
5862306a36Sopenharmony_ci.endm
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci/*
6162306a36Sopenharmony_ci * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
6262306a36Sopenharmony_ci *		__le64 hash[NH_NUM_PASSES])
6362306a36Sopenharmony_ci *
6462306a36Sopenharmony_ci * It's guaranteed that message_len % 16 == 0.
6562306a36Sopenharmony_ci */
6662306a36Sopenharmony_ciSYM_TYPED_FUNC_START(nh_neon)
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	ld1		{K0.4s,K1.4s}, [KEY], #32
6962306a36Sopenharmony_ci	  movi		PASS0_SUMS.2d, #0
7062306a36Sopenharmony_ci	  movi		PASS1_SUMS.2d, #0
7162306a36Sopenharmony_ci	ld1		{K2.4s}, [KEY], #16
7262306a36Sopenharmony_ci	  movi		PASS2_SUMS.2d, #0
7362306a36Sopenharmony_ci	  movi		PASS3_SUMS.2d, #0
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	subs		MESSAGE_LEN, MESSAGE_LEN, #64
7662306a36Sopenharmony_ci	blt		.Lloop4_done
7762306a36Sopenharmony_ci.Lloop4:
7862306a36Sopenharmony_ci	_nh_stride	K0, K1, K2, K3
7962306a36Sopenharmony_ci	_nh_stride	K1, K2, K3, K0
8062306a36Sopenharmony_ci	_nh_stride	K2, K3, K0, K1
8162306a36Sopenharmony_ci	_nh_stride	K3, K0, K1, K2
8262306a36Sopenharmony_ci	subs		MESSAGE_LEN, MESSAGE_LEN, #64
8362306a36Sopenharmony_ci	bge		.Lloop4
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci.Lloop4_done:
8662306a36Sopenharmony_ci	ands		MESSAGE_LEN, MESSAGE_LEN, #63
8762306a36Sopenharmony_ci	beq		.Ldone
8862306a36Sopenharmony_ci	_nh_stride	K0, K1, K2, K3
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	subs		MESSAGE_LEN, MESSAGE_LEN, #16
9162306a36Sopenharmony_ci	beq		.Ldone
9262306a36Sopenharmony_ci	_nh_stride	K1, K2, K3, K0
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	subs		MESSAGE_LEN, MESSAGE_LEN, #16
9562306a36Sopenharmony_ci	beq		.Ldone
9662306a36Sopenharmony_ci	_nh_stride	K2, K3, K0, K1
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci.Ldone:
9962306a36Sopenharmony_ci	// Sum the accumulators for each pass, then store the sums to 'hash'
10062306a36Sopenharmony_ci	addp		T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
10162306a36Sopenharmony_ci	addp		T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
10262306a36Sopenharmony_ci	st1		{T0.16b,T1.16b}, [HASH]
10362306a36Sopenharmony_ci	ret
10462306a36Sopenharmony_ciSYM_FUNC_END(nh_neon)
105