162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright 2018 Google LLC
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Author: Eric Biggers <ebiggers@google.com>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/linkage.h>
1162306a36Sopenharmony_ci#include <linux/cfi_types.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#define		PASS0_SUMS	%xmm0
1462306a36Sopenharmony_ci#define		PASS1_SUMS	%xmm1
1562306a36Sopenharmony_ci#define		PASS2_SUMS	%xmm2
1662306a36Sopenharmony_ci#define		PASS3_SUMS	%xmm3
1762306a36Sopenharmony_ci#define		K0		%xmm4
1862306a36Sopenharmony_ci#define		K1		%xmm5
1962306a36Sopenharmony_ci#define		K2		%xmm6
2062306a36Sopenharmony_ci#define		K3		%xmm7
2162306a36Sopenharmony_ci#define		T0		%xmm8
2262306a36Sopenharmony_ci#define		T1		%xmm9
2362306a36Sopenharmony_ci#define		T2		%xmm10
2462306a36Sopenharmony_ci#define		T3		%xmm11
2562306a36Sopenharmony_ci#define		T4		%xmm12
2662306a36Sopenharmony_ci#define		T5		%xmm13
2762306a36Sopenharmony_ci#define		T6		%xmm14
2862306a36Sopenharmony_ci#define		T7		%xmm15
2962306a36Sopenharmony_ci#define		KEY		%rdi
3062306a36Sopenharmony_ci#define		MESSAGE		%rsi
3162306a36Sopenharmony_ci#define		MESSAGE_LEN	%rdx
3262306a36Sopenharmony_ci#define		HASH		%rcx
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci.macro _nh_stride	k0, k1, k2, k3, offset
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	// Load next message stride
3762306a36Sopenharmony_ci	movdqu		\offset(MESSAGE), T1
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci	// Load next key stride
4062306a36Sopenharmony_ci	movdqu		\offset(KEY), \k3
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	// Add message words to key words
4362306a36Sopenharmony_ci	movdqa		T1, T2
4462306a36Sopenharmony_ci	movdqa		T1, T3
4562306a36Sopenharmony_ci	paddd		T1, \k0    // reuse k0 to avoid a move
4662306a36Sopenharmony_ci	paddd		\k1, T1
4762306a36Sopenharmony_ci	paddd		\k2, T2
4862306a36Sopenharmony_ci	paddd		\k3, T3
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	// Multiply 32x32 => 64 and accumulate
5162306a36Sopenharmony_ci	pshufd		$0x10, \k0, T4
5262306a36Sopenharmony_ci	pshufd		$0x32, \k0, \k0
5362306a36Sopenharmony_ci	pshufd		$0x10, T1, T5
5462306a36Sopenharmony_ci	pshufd		$0x32, T1, T1
5562306a36Sopenharmony_ci	pshufd		$0x10, T2, T6
5662306a36Sopenharmony_ci	pshufd		$0x32, T2, T2
5762306a36Sopenharmony_ci	pshufd		$0x10, T3, T7
5862306a36Sopenharmony_ci	pshufd		$0x32, T3, T3
5962306a36Sopenharmony_ci	pmuludq		T4, \k0
6062306a36Sopenharmony_ci	pmuludq		T5, T1
6162306a36Sopenharmony_ci	pmuludq		T6, T2
6262306a36Sopenharmony_ci	pmuludq		T7, T3
6362306a36Sopenharmony_ci	paddq		\k0, PASS0_SUMS
6462306a36Sopenharmony_ci	paddq		T1, PASS1_SUMS
6562306a36Sopenharmony_ci	paddq		T2, PASS2_SUMS
6662306a36Sopenharmony_ci	paddq		T3, PASS3_SUMS
6762306a36Sopenharmony_ci.endm
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci/*
7062306a36Sopenharmony_ci * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
7162306a36Sopenharmony_ci *		__le64 hash[NH_NUM_PASSES])
7262306a36Sopenharmony_ci *
7362306a36Sopenharmony_ci * It's guaranteed that message_len % 16 == 0.
7462306a36Sopenharmony_ci */
7562306a36Sopenharmony_ciSYM_TYPED_FUNC_START(nh_sse2)
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	movdqu		0x00(KEY), K0
7862306a36Sopenharmony_ci	movdqu		0x10(KEY), K1
7962306a36Sopenharmony_ci	movdqu		0x20(KEY), K2
8062306a36Sopenharmony_ci	add		$0x30, KEY
8162306a36Sopenharmony_ci	pxor		PASS0_SUMS, PASS0_SUMS
8262306a36Sopenharmony_ci	pxor		PASS1_SUMS, PASS1_SUMS
8362306a36Sopenharmony_ci	pxor		PASS2_SUMS, PASS2_SUMS
8462306a36Sopenharmony_ci	pxor		PASS3_SUMS, PASS3_SUMS
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	sub		$0x40, MESSAGE_LEN
8762306a36Sopenharmony_ci	jl		.Lloop4_done
8862306a36Sopenharmony_ci.Lloop4:
8962306a36Sopenharmony_ci	_nh_stride	K0, K1, K2, K3, 0x00
9062306a36Sopenharmony_ci	_nh_stride	K1, K2, K3, K0, 0x10
9162306a36Sopenharmony_ci	_nh_stride	K2, K3, K0, K1, 0x20
9262306a36Sopenharmony_ci	_nh_stride	K3, K0, K1, K2, 0x30
9362306a36Sopenharmony_ci	add		$0x40, KEY
9462306a36Sopenharmony_ci	add		$0x40, MESSAGE
9562306a36Sopenharmony_ci	sub		$0x40, MESSAGE_LEN
9662306a36Sopenharmony_ci	jge		.Lloop4
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci.Lloop4_done:
9962306a36Sopenharmony_ci	and		$0x3f, MESSAGE_LEN
10062306a36Sopenharmony_ci	jz		.Ldone
10162306a36Sopenharmony_ci	_nh_stride	K0, K1, K2, K3, 0x00
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	sub		$0x10, MESSAGE_LEN
10462306a36Sopenharmony_ci	jz		.Ldone
10562306a36Sopenharmony_ci	_nh_stride	K1, K2, K3, K0, 0x10
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	sub		$0x10, MESSAGE_LEN
10862306a36Sopenharmony_ci	jz		.Ldone
10962306a36Sopenharmony_ci	_nh_stride	K2, K3, K0, K1, 0x20
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci.Ldone:
11262306a36Sopenharmony_ci	// Sum the accumulators for each pass, then store the sums to 'hash'
11362306a36Sopenharmony_ci	movdqa		PASS0_SUMS, T0
11462306a36Sopenharmony_ci	movdqa		PASS2_SUMS, T1
11562306a36Sopenharmony_ci	punpcklqdq	PASS1_SUMS, T0		// => (PASS0_SUM_A PASS1_SUM_A)
11662306a36Sopenharmony_ci	punpcklqdq	PASS3_SUMS, T1		// => (PASS2_SUM_A PASS3_SUM_A)
11762306a36Sopenharmony_ci	punpckhqdq	PASS1_SUMS, PASS0_SUMS	// => (PASS0_SUM_B PASS1_SUM_B)
11862306a36Sopenharmony_ci	punpckhqdq	PASS3_SUMS, PASS2_SUMS	// => (PASS2_SUM_B PASS3_SUM_B)
11962306a36Sopenharmony_ci	paddq		PASS0_SUMS, T0
12062306a36Sopenharmony_ci	paddq		PASS2_SUMS, T1
12162306a36Sopenharmony_ci	movdqu		T0, 0x00(HASH)
12262306a36Sopenharmony_ci	movdqu		T1, 0x10(HASH)
12362306a36Sopenharmony_ci	RET
12462306a36Sopenharmony_ciSYM_FUNC_END(nh_sse2)
125