162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * LoongArch SIMD XOR operations
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include "xor_simd.h"
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci/*
1162306a36Sopenharmony_ci * Process one cache line (64 bytes) per loop. This is assuming all future
1262306a36Sopenharmony_ci * popular LoongArch cores are similar performance-characteristics-wise to the
1362306a36Sopenharmony_ci * current models.
1462306a36Sopenharmony_ci */
1562306a36Sopenharmony_ci#define LINE_WIDTH 64
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LSX
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#define LD(reg, base, offset)	\
2062306a36Sopenharmony_ci	"vld $vr" #reg ", %[" #base "], " #offset "\n\t"
2162306a36Sopenharmony_ci#define ST(reg, base, offset)	\
2262306a36Sopenharmony_ci	"vst $vr" #reg ", %[" #base "], " #offset "\n\t"
2362306a36Sopenharmony_ci#define XOR(dj, k)	"vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#define LD_INOUT_LINE(base)	\
2662306a36Sopenharmony_ci	LD(0, base, 0)		\
2762306a36Sopenharmony_ci	LD(1, base, 16)		\
2862306a36Sopenharmony_ci	LD(2, base, 32)		\
2962306a36Sopenharmony_ci	LD(3, base, 48)
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#define LD_AND_XOR_LINE(base)	\
3262306a36Sopenharmony_ci	LD(4, base, 0)		\
3362306a36Sopenharmony_ci	LD(5, base, 16)		\
3462306a36Sopenharmony_ci	LD(6, base, 32)		\
3562306a36Sopenharmony_ci	LD(7, base, 48)		\
3662306a36Sopenharmony_ci	XOR(0, 4)		\
3762306a36Sopenharmony_ci	XOR(1, 5)		\
3862306a36Sopenharmony_ci	XOR(2, 6)		\
3962306a36Sopenharmony_ci	XOR(3, 7)
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#define ST_LINE(base)		\
4262306a36Sopenharmony_ci	ST(0, base, 0)		\
4362306a36Sopenharmony_ci	ST(1, base, 16)		\
4462306a36Sopenharmony_ci	ST(2, base, 32)		\
4562306a36Sopenharmony_ci	ST(3, base, 48)
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
4862306a36Sopenharmony_ci#include "xor_template.c"
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci#undef LD
5162306a36Sopenharmony_ci#undef ST
5262306a36Sopenharmony_ci#undef XOR
5362306a36Sopenharmony_ci#undef LD_INOUT_LINE
5462306a36Sopenharmony_ci#undef LD_AND_XOR_LINE
5562306a36Sopenharmony_ci#undef ST_LINE
5662306a36Sopenharmony_ci#undef XOR_FUNC_NAME
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LSX */
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LASX
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci#define LD(reg, base, offset)	\
6362306a36Sopenharmony_ci	"xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
6462306a36Sopenharmony_ci#define ST(reg, base, offset)	\
6562306a36Sopenharmony_ci	"xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
6662306a36Sopenharmony_ci#define XOR(dj, k)	"xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci#define LD_INOUT_LINE(base)	\
6962306a36Sopenharmony_ci	LD(0, base, 0)		\
7062306a36Sopenharmony_ci	LD(1, base, 32)
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci#define LD_AND_XOR_LINE(base)	\
7362306a36Sopenharmony_ci	LD(2, base, 0)		\
7462306a36Sopenharmony_ci	LD(3, base, 32)		\
7562306a36Sopenharmony_ci	XOR(0, 2)		\
7662306a36Sopenharmony_ci	XOR(1, 3)
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci#define ST_LINE(base)		\
7962306a36Sopenharmony_ci	ST(0, base, 0)		\
8062306a36Sopenharmony_ci	ST(1, base, 32)
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
8362306a36Sopenharmony_ci#include "xor_template.c"
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci#undef LD
8662306a36Sopenharmony_ci#undef ST
8762306a36Sopenharmony_ci#undef XOR
8862306a36Sopenharmony_ci#undef LD_INOUT_LINE
8962306a36Sopenharmony_ci#undef LD_AND_XOR_LINE
9062306a36Sopenharmony_ci#undef ST_LINE
9162306a36Sopenharmony_ci#undef XOR_FUNC_NAME
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LASX */
94