1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
4 *
5 * Template for XOR operations, instantiated in xor_simd.c.
6 *
7 * Expected preprocessor definitions:
8 *
9 * - LINE_WIDTH
10 * - XOR_FUNC_NAME(nr)
11 * - LD_INOUT_LINE(buf)
12 * - LD_AND_XOR_LINE(buf)
13 * - ST_LINE(buf)
14 */
15
16void XOR_FUNC_NAME(2)(unsigned long bytes,
17		      unsigned long *v1, unsigned long *v2)
18{
19	unsigned long lines = bytes / LINE_WIDTH;
20
21	do {
22		__asm__ __volatile__ (
23			LD_INOUT_LINE(v1)
24			LD_AND_XOR_LINE(v2)
25			ST_LINE(v1)
26		: : [v1] "r"(v1), [v2] "r"(v2) : "memory"
27		);
28
29		v1 += LINE_WIDTH / sizeof(unsigned long);
30		v2 += LINE_WIDTH / sizeof(unsigned long);
31	} while (--lines > 0);
32}
33
34void XOR_FUNC_NAME(3)(unsigned long bytes,
35		      unsigned long *v1, unsigned long *v2,
36		      unsigned long *v3)
37{
38	unsigned long lines = bytes / LINE_WIDTH;
39
40	do {
41		__asm__ __volatile__ (
42			LD_INOUT_LINE(v1)
43			LD_AND_XOR_LINE(v2)
44			LD_AND_XOR_LINE(v3)
45			ST_LINE(v1)
46		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
47		);
48
49		v1 += LINE_WIDTH / sizeof(unsigned long);
50		v2 += LINE_WIDTH / sizeof(unsigned long);
51		v3 += LINE_WIDTH / sizeof(unsigned long);
52	} while (--lines > 0);
53}
54
55void XOR_FUNC_NAME(4)(unsigned long bytes,
56		      unsigned long *v1, unsigned long *v2,
57		      unsigned long *v3, unsigned long *v4)
58{
59	unsigned long lines = bytes / LINE_WIDTH;
60
61	do {
62		__asm__ __volatile__ (
63			LD_INOUT_LINE(v1)
64			LD_AND_XOR_LINE(v2)
65			LD_AND_XOR_LINE(v3)
66			LD_AND_XOR_LINE(v4)
67			ST_LINE(v1)
68		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
69		: "memory"
70		);
71
72		v1 += LINE_WIDTH / sizeof(unsigned long);
73		v2 += LINE_WIDTH / sizeof(unsigned long);
74		v3 += LINE_WIDTH / sizeof(unsigned long);
75		v4 += LINE_WIDTH / sizeof(unsigned long);
76	} while (--lines > 0);
77}
78
79void XOR_FUNC_NAME(5)(unsigned long bytes,
80		      unsigned long *v1, unsigned long *v2,
81		      unsigned long *v3, unsigned long *v4,
82		      unsigned long *v5)
83{
84	unsigned long lines = bytes / LINE_WIDTH;
85
86	do {
87		__asm__ __volatile__ (
88			LD_INOUT_LINE(v1)
89			LD_AND_XOR_LINE(v2)
90			LD_AND_XOR_LINE(v3)
91			LD_AND_XOR_LINE(v4)
92			LD_AND_XOR_LINE(v5)
93			ST_LINE(v1)
94		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
95		    [v5] "r"(v5) : "memory"
96		);
97
98		v1 += LINE_WIDTH / sizeof(unsigned long);
99		v2 += LINE_WIDTH / sizeof(unsigned long);
100		v3 += LINE_WIDTH / sizeof(unsigned long);
101		v4 += LINE_WIDTH / sizeof(unsigned long);
102		v5 += LINE_WIDTH / sizeof(unsigned long);
103	} while (--lines > 0);
104}
105