18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Template for XOR operations, instantiated in xor_simd.c.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Expected preprocessor definitions:
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * - LINE_WIDTH
108c2ecf20Sopenharmony_ci * - XOR_FUNC_NAME(nr)
118c2ecf20Sopenharmony_ci * - LD_INOUT_LINE(buf)
128c2ecf20Sopenharmony_ci * - LD_AND_XOR_LINE(buf)
138c2ecf20Sopenharmony_ci * - ST_LINE(buf)
148c2ecf20Sopenharmony_ci */
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_civoid XOR_FUNC_NAME(2)(unsigned long bytes,
178c2ecf20Sopenharmony_ci		      unsigned long *v1, unsigned long *v2)
188c2ecf20Sopenharmony_ci{
198c2ecf20Sopenharmony_ci	unsigned long lines = bytes / LINE_WIDTH;
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci	do {
228c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
238c2ecf20Sopenharmony_ci			LD_INOUT_LINE(v1)
248c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v2)
258c2ecf20Sopenharmony_ci			ST_LINE(v1)
268c2ecf20Sopenharmony_ci		: : [v1] "r"(v1), [v2] "r"(v2) : "memory"
278c2ecf20Sopenharmony_ci		);
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci		v1 += LINE_WIDTH / sizeof(unsigned long);
308c2ecf20Sopenharmony_ci		v2 += LINE_WIDTH / sizeof(unsigned long);
318c2ecf20Sopenharmony_ci	} while (--lines > 0);
328c2ecf20Sopenharmony_ci}
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_civoid XOR_FUNC_NAME(3)(unsigned long bytes,
358c2ecf20Sopenharmony_ci		      unsigned long *v1, unsigned long *v2,
368c2ecf20Sopenharmony_ci		      unsigned long *v3)
378c2ecf20Sopenharmony_ci{
388c2ecf20Sopenharmony_ci	unsigned long lines = bytes / LINE_WIDTH;
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci	do {
418c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
428c2ecf20Sopenharmony_ci			LD_INOUT_LINE(v1)
438c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v2)
448c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v3)
458c2ecf20Sopenharmony_ci			ST_LINE(v1)
468c2ecf20Sopenharmony_ci		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
478c2ecf20Sopenharmony_ci		);
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci		v1 += LINE_WIDTH / sizeof(unsigned long);
508c2ecf20Sopenharmony_ci		v2 += LINE_WIDTH / sizeof(unsigned long);
518c2ecf20Sopenharmony_ci		v3 += LINE_WIDTH / sizeof(unsigned long);
528c2ecf20Sopenharmony_ci	} while (--lines > 0);
538c2ecf20Sopenharmony_ci}
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_civoid XOR_FUNC_NAME(4)(unsigned long bytes,
568c2ecf20Sopenharmony_ci		      unsigned long *v1, unsigned long *v2,
578c2ecf20Sopenharmony_ci		      unsigned long *v3, unsigned long *v4)
588c2ecf20Sopenharmony_ci{
598c2ecf20Sopenharmony_ci	unsigned long lines = bytes / LINE_WIDTH;
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	do {
628c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
638c2ecf20Sopenharmony_ci			LD_INOUT_LINE(v1)
648c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v2)
658c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v3)
668c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v4)
678c2ecf20Sopenharmony_ci			ST_LINE(v1)
688c2ecf20Sopenharmony_ci		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
698c2ecf20Sopenharmony_ci		: "memory"
708c2ecf20Sopenharmony_ci		);
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci		v1 += LINE_WIDTH / sizeof(unsigned long);
738c2ecf20Sopenharmony_ci		v2 += LINE_WIDTH / sizeof(unsigned long);
748c2ecf20Sopenharmony_ci		v3 += LINE_WIDTH / sizeof(unsigned long);
758c2ecf20Sopenharmony_ci		v4 += LINE_WIDTH / sizeof(unsigned long);
768c2ecf20Sopenharmony_ci	} while (--lines > 0);
778c2ecf20Sopenharmony_ci}
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_civoid XOR_FUNC_NAME(5)(unsigned long bytes,
808c2ecf20Sopenharmony_ci		      unsigned long *v1, unsigned long *v2,
818c2ecf20Sopenharmony_ci		      unsigned long *v3, unsigned long *v4,
828c2ecf20Sopenharmony_ci		      unsigned long *v5)
838c2ecf20Sopenharmony_ci{
848c2ecf20Sopenharmony_ci	unsigned long lines = bytes / LINE_WIDTH;
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	do {
878c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
888c2ecf20Sopenharmony_ci			LD_INOUT_LINE(v1)
898c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v2)
908c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v3)
918c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v4)
928c2ecf20Sopenharmony_ci			LD_AND_XOR_LINE(v5)
938c2ecf20Sopenharmony_ci			ST_LINE(v1)
948c2ecf20Sopenharmony_ci		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
958c2ecf20Sopenharmony_ci		    [v5] "r"(v5) : "memory"
968c2ecf20Sopenharmony_ci		);
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci		v1 += LINE_WIDTH / sizeof(unsigned long);
998c2ecf20Sopenharmony_ci		v2 += LINE_WIDTH / sizeof(unsigned long);
1008c2ecf20Sopenharmony_ci		v3 += LINE_WIDTH / sizeof(unsigned long);
1018c2ecf20Sopenharmony_ci		v4 += LINE_WIDTH / sizeof(unsigned long);
1028c2ecf20Sopenharmony_ci		v5 += LINE_WIDTH / sizeof(unsigned long);
1038c2ecf20Sopenharmony_ci	} while (--lines > 0);
1048c2ecf20Sopenharmony_ci}
105