162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * LoongArch SIMD XOR operations 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include "xor_simd.h" 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci/* 1162306a36Sopenharmony_ci * Process one cache line (64 bytes) per loop. This is assuming all future 1262306a36Sopenharmony_ci * popular LoongArch cores are similar performance-characteristics-wise to the 1362306a36Sopenharmony_ci * current models. 1462306a36Sopenharmony_ci */ 1562306a36Sopenharmony_ci#define LINE_WIDTH 64 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LSX 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#define LD(reg, base, offset) \ 2062306a36Sopenharmony_ci "vld $vr" #reg ", %[" #base "], " #offset "\n\t" 2162306a36Sopenharmony_ci#define ST(reg, base, offset) \ 2262306a36Sopenharmony_ci "vst $vr" #reg ", %[" #base "], " #offset "\n\t" 2362306a36Sopenharmony_ci#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t" 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#define LD_INOUT_LINE(base) \ 2662306a36Sopenharmony_ci LD(0, base, 0) \ 2762306a36Sopenharmony_ci LD(1, base, 16) \ 2862306a36Sopenharmony_ci LD(2, base, 32) \ 2962306a36Sopenharmony_ci LD(3, base, 48) 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define LD_AND_XOR_LINE(base) \ 3262306a36Sopenharmony_ci LD(4, base, 0) \ 3362306a36Sopenharmony_ci LD(5, base, 16) \ 3462306a36Sopenharmony_ci LD(6, base, 32) \ 3562306a36Sopenharmony_ci LD(7, base, 48) \ 3662306a36Sopenharmony_ci XOR(0, 4) \ 3762306a36Sopenharmony_ci XOR(1, 5) \ 3862306a36Sopenharmony_ci XOR(2, 6) \ 3962306a36Sopenharmony_ci XOR(3, 7) 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#define ST_LINE(base) \ 4262306a36Sopenharmony_ci ST(0, base, 0) \ 4362306a36Sopenharmony_ci ST(1, base, 16) \ 4462306a36Sopenharmony_ci ST(2, base, 32) \ 4562306a36Sopenharmony_ci ST(3, base, 48) 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci#define XOR_FUNC_NAME(nr) __xor_lsx_##nr 4862306a36Sopenharmony_ci#include "xor_template.c" 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci#undef LD 5162306a36Sopenharmony_ci#undef ST 5262306a36Sopenharmony_ci#undef XOR 5362306a36Sopenharmony_ci#undef LD_INOUT_LINE 5462306a36Sopenharmony_ci#undef LD_AND_XOR_LINE 5562306a36Sopenharmony_ci#undef ST_LINE 5662306a36Sopenharmony_ci#undef XOR_FUNC_NAME 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LSX */ 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LASX 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci#define LD(reg, base, offset) \ 6362306a36Sopenharmony_ci "xvld $xr" #reg ", %[" #base "], " #offset "\n\t" 6462306a36Sopenharmony_ci#define ST(reg, base, offset) \ 6562306a36Sopenharmony_ci "xvst $xr" #reg ", %[" #base "], " #offset "\n\t" 6662306a36Sopenharmony_ci#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t" 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci#define LD_INOUT_LINE(base) \ 6962306a36Sopenharmony_ci LD(0, base, 0) \ 7062306a36Sopenharmony_ci LD(1, base, 32) 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci#define LD_AND_XOR_LINE(base) \ 7362306a36Sopenharmony_ci LD(2, base, 0) \ 7462306a36Sopenharmony_ci LD(3, base, 32) \ 7562306a36Sopenharmony_ci XOR(0, 2) \ 7662306a36Sopenharmony_ci XOR(1, 3) 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci#define ST_LINE(base) \ 7962306a36Sopenharmony_ci ST(0, base, 0) \ 8062306a36Sopenharmony_ci ST(1, base, 32) 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci#define XOR_FUNC_NAME(nr) __xor_lasx_##nr 8362306a36Sopenharmony_ci#include "xor_template.c" 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci#undef LD 8662306a36Sopenharmony_ci#undef ST 8762306a36Sopenharmony_ci#undef XOR 8862306a36Sopenharmony_ci#undef LD_INOUT_LINE 8962306a36Sopenharmony_ci#undef LD_AND_XOR_LINE 9062306a36Sopenharmony_ci#undef ST_LINE 9162306a36Sopenharmony_ci#undef XOR_FUNC_NAME 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LASX */ 94