162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  arch/arm/include/asm/xor.h
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *  Copyright (C) 2001 Russell King
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci#include <linux/hardirq.h>
862306a36Sopenharmony_ci#include <asm-generic/xor.h>
962306a36Sopenharmony_ci#include <asm/hwcap.h>
1062306a36Sopenharmony_ci#include <asm/neon.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#define __XOR(a1, a2) a1 ^= a2
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#define GET_BLOCK_2(dst) \
1562306a36Sopenharmony_ci	__asm__("ldmia	%0, {%1, %2}" \
1662306a36Sopenharmony_ci		: "=r" (dst), "=r" (a1), "=r" (a2) \
1762306a36Sopenharmony_ci		: "0" (dst))
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#define GET_BLOCK_4(dst) \
2062306a36Sopenharmony_ci	__asm__("ldmia	%0, {%1, %2, %3, %4}" \
2162306a36Sopenharmony_ci		: "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \
2262306a36Sopenharmony_ci		: "0" (dst))
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci#define XOR_BLOCK_2(src) \
2562306a36Sopenharmony_ci	__asm__("ldmia	%0!, {%1, %2}" \
2662306a36Sopenharmony_ci		: "=r" (src), "=r" (b1), "=r" (b2) \
2762306a36Sopenharmony_ci		: "0" (src)); \
2862306a36Sopenharmony_ci	__XOR(a1, b1); __XOR(a2, b2);
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#define XOR_BLOCK_4(src) \
3162306a36Sopenharmony_ci	__asm__("ldmia	%0!, {%1, %2, %3, %4}" \
3262306a36Sopenharmony_ci		: "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \
3362306a36Sopenharmony_ci		: "0" (src)); \
3462306a36Sopenharmony_ci	__XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4)
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci#define PUT_BLOCK_2(dst) \
3762306a36Sopenharmony_ci	__asm__ __volatile__("stmia	%0!, {%2, %3}" \
3862306a36Sopenharmony_ci		: "=r" (dst) \
3962306a36Sopenharmony_ci		: "0" (dst), "r" (a1), "r" (a2))
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#define PUT_BLOCK_4(dst) \
4262306a36Sopenharmony_ci	__asm__ __volatile__("stmia	%0!, {%2, %3, %4, %5}" \
4362306a36Sopenharmony_ci		: "=r" (dst) \
4462306a36Sopenharmony_ci		: "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistatic void
4762306a36Sopenharmony_cixor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1,
4862306a36Sopenharmony_ci	       const unsigned long * __restrict p2)
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	unsigned int lines = bytes / sizeof(unsigned long) / 4;
5162306a36Sopenharmony_ci	register unsigned int a1 __asm__("r4");
5262306a36Sopenharmony_ci	register unsigned int a2 __asm__("r5");
5362306a36Sopenharmony_ci	register unsigned int a3 __asm__("r6");
5462306a36Sopenharmony_ci	register unsigned int a4 __asm__("r10");
5562306a36Sopenharmony_ci	register unsigned int b1 __asm__("r8");
5662306a36Sopenharmony_ci	register unsigned int b2 __asm__("r9");
5762306a36Sopenharmony_ci	register unsigned int b3 __asm__("ip");
5862306a36Sopenharmony_ci	register unsigned int b4 __asm__("lr");
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	do {
6162306a36Sopenharmony_ci		GET_BLOCK_4(p1);
6262306a36Sopenharmony_ci		XOR_BLOCK_4(p2);
6362306a36Sopenharmony_ci		PUT_BLOCK_4(p1);
6462306a36Sopenharmony_ci	} while (--lines);
6562306a36Sopenharmony_ci}
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistatic void
6862306a36Sopenharmony_cixor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1,
6962306a36Sopenharmony_ci	       const unsigned long * __restrict p2,
7062306a36Sopenharmony_ci	       const unsigned long * __restrict p3)
7162306a36Sopenharmony_ci{
7262306a36Sopenharmony_ci	unsigned int lines = bytes / sizeof(unsigned long) / 4;
7362306a36Sopenharmony_ci	register unsigned int a1 __asm__("r4");
7462306a36Sopenharmony_ci	register unsigned int a2 __asm__("r5");
7562306a36Sopenharmony_ci	register unsigned int a3 __asm__("r6");
7662306a36Sopenharmony_ci	register unsigned int a4 __asm__("r10");
7762306a36Sopenharmony_ci	register unsigned int b1 __asm__("r8");
7862306a36Sopenharmony_ci	register unsigned int b2 __asm__("r9");
7962306a36Sopenharmony_ci	register unsigned int b3 __asm__("ip");
8062306a36Sopenharmony_ci	register unsigned int b4 __asm__("lr");
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	do {
8362306a36Sopenharmony_ci		GET_BLOCK_4(p1);
8462306a36Sopenharmony_ci		XOR_BLOCK_4(p2);
8562306a36Sopenharmony_ci		XOR_BLOCK_4(p3);
8662306a36Sopenharmony_ci		PUT_BLOCK_4(p1);
8762306a36Sopenharmony_ci	} while (--lines);
8862306a36Sopenharmony_ci}
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_cistatic void
9162306a36Sopenharmony_cixor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1,
9262306a36Sopenharmony_ci	       const unsigned long * __restrict p2,
9362306a36Sopenharmony_ci	       const unsigned long * __restrict p3,
9462306a36Sopenharmony_ci	       const unsigned long * __restrict p4)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	unsigned int lines = bytes / sizeof(unsigned long) / 2;
9762306a36Sopenharmony_ci	register unsigned int a1 __asm__("r8");
9862306a36Sopenharmony_ci	register unsigned int a2 __asm__("r9");
9962306a36Sopenharmony_ci	register unsigned int b1 __asm__("ip");
10062306a36Sopenharmony_ci	register unsigned int b2 __asm__("lr");
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	do {
10362306a36Sopenharmony_ci		GET_BLOCK_2(p1);
10462306a36Sopenharmony_ci		XOR_BLOCK_2(p2);
10562306a36Sopenharmony_ci		XOR_BLOCK_2(p3);
10662306a36Sopenharmony_ci		XOR_BLOCK_2(p4);
10762306a36Sopenharmony_ci		PUT_BLOCK_2(p1);
10862306a36Sopenharmony_ci	} while (--lines);
10962306a36Sopenharmony_ci}
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_cistatic void
11262306a36Sopenharmony_cixor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1,
11362306a36Sopenharmony_ci	       const unsigned long * __restrict p2,
11462306a36Sopenharmony_ci	       const unsigned long * __restrict p3,
11562306a36Sopenharmony_ci	       const unsigned long * __restrict p4,
11662306a36Sopenharmony_ci	       const unsigned long * __restrict p5)
11762306a36Sopenharmony_ci{
11862306a36Sopenharmony_ci	unsigned int lines = bytes / sizeof(unsigned long) / 2;
11962306a36Sopenharmony_ci	register unsigned int a1 __asm__("r8");
12062306a36Sopenharmony_ci	register unsigned int a2 __asm__("r9");
12162306a36Sopenharmony_ci	register unsigned int b1 __asm__("ip");
12262306a36Sopenharmony_ci	register unsigned int b2 __asm__("lr");
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	do {
12562306a36Sopenharmony_ci		GET_BLOCK_2(p1);
12662306a36Sopenharmony_ci		XOR_BLOCK_2(p2);
12762306a36Sopenharmony_ci		XOR_BLOCK_2(p3);
12862306a36Sopenharmony_ci		XOR_BLOCK_2(p4);
12962306a36Sopenharmony_ci		XOR_BLOCK_2(p5);
13062306a36Sopenharmony_ci		PUT_BLOCK_2(p1);
13162306a36Sopenharmony_ci	} while (--lines);
13262306a36Sopenharmony_ci}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_cistatic struct xor_block_template xor_block_arm4regs = {
13562306a36Sopenharmony_ci	.name	= "arm4regs",
13662306a36Sopenharmony_ci	.do_2	= xor_arm4regs_2,
13762306a36Sopenharmony_ci	.do_3	= xor_arm4regs_3,
13862306a36Sopenharmony_ci	.do_4	= xor_arm4regs_4,
13962306a36Sopenharmony_ci	.do_5	= xor_arm4regs_5,
14062306a36Sopenharmony_ci};
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci#undef XOR_TRY_TEMPLATES
14362306a36Sopenharmony_ci#define XOR_TRY_TEMPLATES			\
14462306a36Sopenharmony_ci	do {					\
14562306a36Sopenharmony_ci		xor_speed(&xor_block_arm4regs);	\
14662306a36Sopenharmony_ci		xor_speed(&xor_block_8regs);	\
14762306a36Sopenharmony_ci		xor_speed(&xor_block_32regs);	\
14862306a36Sopenharmony_ci		NEON_TEMPLATES;			\
14962306a36Sopenharmony_ci	} while (0)
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci#ifdef CONFIG_KERNEL_MODE_NEON
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ciextern struct xor_block_template const xor_block_neon_inner;
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_cistatic void
15662306a36Sopenharmony_cixor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
15762306a36Sopenharmony_ci	   const unsigned long * __restrict p2)
15862306a36Sopenharmony_ci{
15962306a36Sopenharmony_ci	if (in_interrupt()) {
16062306a36Sopenharmony_ci		xor_arm4regs_2(bytes, p1, p2);
16162306a36Sopenharmony_ci	} else {
16262306a36Sopenharmony_ci		kernel_neon_begin();
16362306a36Sopenharmony_ci		xor_block_neon_inner.do_2(bytes, p1, p2);
16462306a36Sopenharmony_ci		kernel_neon_end();
16562306a36Sopenharmony_ci	}
16662306a36Sopenharmony_ci}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_cistatic void
16962306a36Sopenharmony_cixor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
17062306a36Sopenharmony_ci	   const unsigned long * __restrict p2,
17162306a36Sopenharmony_ci	   const unsigned long * __restrict p3)
17262306a36Sopenharmony_ci{
17362306a36Sopenharmony_ci	if (in_interrupt()) {
17462306a36Sopenharmony_ci		xor_arm4regs_3(bytes, p1, p2, p3);
17562306a36Sopenharmony_ci	} else {
17662306a36Sopenharmony_ci		kernel_neon_begin();
17762306a36Sopenharmony_ci		xor_block_neon_inner.do_3(bytes, p1, p2, p3);
17862306a36Sopenharmony_ci		kernel_neon_end();
17962306a36Sopenharmony_ci	}
18062306a36Sopenharmony_ci}
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_cistatic void
18362306a36Sopenharmony_cixor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
18462306a36Sopenharmony_ci	   const unsigned long * __restrict p2,
18562306a36Sopenharmony_ci	   const unsigned long * __restrict p3,
18662306a36Sopenharmony_ci	   const unsigned long * __restrict p4)
18762306a36Sopenharmony_ci{
18862306a36Sopenharmony_ci	if (in_interrupt()) {
18962306a36Sopenharmony_ci		xor_arm4regs_4(bytes, p1, p2, p3, p4);
19062306a36Sopenharmony_ci	} else {
19162306a36Sopenharmony_ci		kernel_neon_begin();
19262306a36Sopenharmony_ci		xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
19362306a36Sopenharmony_ci		kernel_neon_end();
19462306a36Sopenharmony_ci	}
19562306a36Sopenharmony_ci}
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_cistatic void
19862306a36Sopenharmony_cixor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
19962306a36Sopenharmony_ci	   const unsigned long * __restrict p2,
20062306a36Sopenharmony_ci	   const unsigned long * __restrict p3,
20162306a36Sopenharmony_ci	   const unsigned long * __restrict p4,
20262306a36Sopenharmony_ci	   const unsigned long * __restrict p5)
20362306a36Sopenharmony_ci{
20462306a36Sopenharmony_ci	if (in_interrupt()) {
20562306a36Sopenharmony_ci		xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
20662306a36Sopenharmony_ci	} else {
20762306a36Sopenharmony_ci		kernel_neon_begin();
20862306a36Sopenharmony_ci		xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
20962306a36Sopenharmony_ci		kernel_neon_end();
21062306a36Sopenharmony_ci	}
21162306a36Sopenharmony_ci}
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_cistatic struct xor_block_template xor_block_neon = {
21462306a36Sopenharmony_ci	.name	= "neon",
21562306a36Sopenharmony_ci	.do_2	= xor_neon_2,
21662306a36Sopenharmony_ci	.do_3	= xor_neon_3,
21762306a36Sopenharmony_ci	.do_4	= xor_neon_4,
21862306a36Sopenharmony_ci	.do_5	= xor_neon_5
21962306a36Sopenharmony_ci};
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci#define NEON_TEMPLATES	\
22262306a36Sopenharmony_ci	do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0)
22362306a36Sopenharmony_ci#else
22462306a36Sopenharmony_ci#define NEON_TEMPLATES
22562306a36Sopenharmony_ci#endif
226