162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Template for XOR operations, instantiated in xor_simd.c.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Expected preprocessor definitions:
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * - LINE_WIDTH
1062306a36Sopenharmony_ci * - XOR_FUNC_NAME(nr)
1162306a36Sopenharmony_ci * - LD_INOUT_LINE(buf)
1262306a36Sopenharmony_ci * - LD_AND_XOR_LINE(buf)
1362306a36Sopenharmony_ci * - ST_LINE(buf)
1462306a36Sopenharmony_ci */
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_civoid XOR_FUNC_NAME(2)(unsigned long bytes,
1762306a36Sopenharmony_ci		      unsigned long * __restrict v1,
1862306a36Sopenharmony_ci		      const unsigned long * __restrict v2)
1962306a36Sopenharmony_ci{
2062306a36Sopenharmony_ci	unsigned long lines = bytes / LINE_WIDTH;
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci	do {
2362306a36Sopenharmony_ci		__asm__ __volatile__ (
2462306a36Sopenharmony_ci			LD_INOUT_LINE(v1)
2562306a36Sopenharmony_ci			LD_AND_XOR_LINE(v2)
2662306a36Sopenharmony_ci			ST_LINE(v1)
2762306a36Sopenharmony_ci		: : [v1] "r"(v1), [v2] "r"(v2) : "memory"
2862306a36Sopenharmony_ci		);
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci		v1 += LINE_WIDTH / sizeof(unsigned long);
3162306a36Sopenharmony_ci		v2 += LINE_WIDTH / sizeof(unsigned long);
3262306a36Sopenharmony_ci	} while (--lines > 0);
3362306a36Sopenharmony_ci}
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_civoid XOR_FUNC_NAME(3)(unsigned long bytes,
3662306a36Sopenharmony_ci		      unsigned long * __restrict v1,
3762306a36Sopenharmony_ci		      const unsigned long * __restrict v2,
3862306a36Sopenharmony_ci		      const unsigned long * __restrict v3)
3962306a36Sopenharmony_ci{
4062306a36Sopenharmony_ci	unsigned long lines = bytes / LINE_WIDTH;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	do {
4362306a36Sopenharmony_ci		__asm__ __volatile__ (
4462306a36Sopenharmony_ci			LD_INOUT_LINE(v1)
4562306a36Sopenharmony_ci			LD_AND_XOR_LINE(v2)
4662306a36Sopenharmony_ci			LD_AND_XOR_LINE(v3)
4762306a36Sopenharmony_ci			ST_LINE(v1)
4862306a36Sopenharmony_ci		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
4962306a36Sopenharmony_ci		);
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci		v1 += LINE_WIDTH / sizeof(unsigned long);
5262306a36Sopenharmony_ci		v2 += LINE_WIDTH / sizeof(unsigned long);
5362306a36Sopenharmony_ci		v3 += LINE_WIDTH / sizeof(unsigned long);
5462306a36Sopenharmony_ci	} while (--lines > 0);
5562306a36Sopenharmony_ci}
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_civoid XOR_FUNC_NAME(4)(unsigned long bytes,
5862306a36Sopenharmony_ci		      unsigned long * __restrict v1,
5962306a36Sopenharmony_ci		      const unsigned long * __restrict v2,
6062306a36Sopenharmony_ci		      const unsigned long * __restrict v3,
6162306a36Sopenharmony_ci		      const unsigned long * __restrict v4)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	unsigned long lines = bytes / LINE_WIDTH;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	do {
6662306a36Sopenharmony_ci		__asm__ __volatile__ (
6762306a36Sopenharmony_ci			LD_INOUT_LINE(v1)
6862306a36Sopenharmony_ci			LD_AND_XOR_LINE(v2)
6962306a36Sopenharmony_ci			LD_AND_XOR_LINE(v3)
7062306a36Sopenharmony_ci			LD_AND_XOR_LINE(v4)
7162306a36Sopenharmony_ci			ST_LINE(v1)
7262306a36Sopenharmony_ci		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
7362306a36Sopenharmony_ci		: "memory"
7462306a36Sopenharmony_ci		);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci		v1 += LINE_WIDTH / sizeof(unsigned long);
7762306a36Sopenharmony_ci		v2 += LINE_WIDTH / sizeof(unsigned long);
7862306a36Sopenharmony_ci		v3 += LINE_WIDTH / sizeof(unsigned long);
7962306a36Sopenharmony_ci		v4 += LINE_WIDTH / sizeof(unsigned long);
8062306a36Sopenharmony_ci	} while (--lines > 0);
8162306a36Sopenharmony_ci}
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_civoid XOR_FUNC_NAME(5)(unsigned long bytes,
8462306a36Sopenharmony_ci		      unsigned long * __restrict v1,
8562306a36Sopenharmony_ci		      const unsigned long * __restrict v2,
8662306a36Sopenharmony_ci		      const unsigned long * __restrict v3,
8762306a36Sopenharmony_ci		      const unsigned long * __restrict v4,
8862306a36Sopenharmony_ci		      const unsigned long * __restrict v5)
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	unsigned long lines = bytes / LINE_WIDTH;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	do {
9362306a36Sopenharmony_ci		__asm__ __volatile__ (
9462306a36Sopenharmony_ci			LD_INOUT_LINE(v1)
9562306a36Sopenharmony_ci			LD_AND_XOR_LINE(v2)
9662306a36Sopenharmony_ci			LD_AND_XOR_LINE(v3)
9762306a36Sopenharmony_ci			LD_AND_XOR_LINE(v4)
9862306a36Sopenharmony_ci			LD_AND_XOR_LINE(v5)
9962306a36Sopenharmony_ci			ST_LINE(v1)
10062306a36Sopenharmony_ci		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
10162306a36Sopenharmony_ci		    [v5] "r"(v5) : "memory"
10262306a36Sopenharmony_ci		);
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci		v1 += LINE_WIDTH / sizeof(unsigned long);
10562306a36Sopenharmony_ci		v2 += LINE_WIDTH / sizeof(unsigned long);
10662306a36Sopenharmony_ci		v3 += LINE_WIDTH / sizeof(unsigned long);
10762306a36Sopenharmony_ci		v4 += LINE_WIDTH / sizeof(unsigned long);
10862306a36Sopenharmony_ci		v5 += LINE_WIDTH / sizeof(unsigned long);
10962306a36Sopenharmony_ci	} while (--lines > 0);
11062306a36Sopenharmony_ci}
111