18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * LoongArch SIMD XOR operations 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include "xor_simd.h" 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci/* 118c2ecf20Sopenharmony_ci * Process one cache line (64 bytes) per loop. This is assuming all future 128c2ecf20Sopenharmony_ci * popular LoongArch cores are similar performance-characteristics-wise to the 138c2ecf20Sopenharmony_ci * current models. 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci#define LINE_WIDTH 64 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LSX 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#define LD(reg, base, offset) \ 208c2ecf20Sopenharmony_ci "vld $vr" #reg ", %[" #base "], " #offset "\n\t" 218c2ecf20Sopenharmony_ci#define ST(reg, base, offset) \ 228c2ecf20Sopenharmony_ci "vst $vr" #reg ", %[" #base "], " #offset "\n\t" 238c2ecf20Sopenharmony_ci#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t" 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#define LD_INOUT_LINE(base) \ 268c2ecf20Sopenharmony_ci LD(0, base, 0) \ 278c2ecf20Sopenharmony_ci LD(1, base, 16) \ 288c2ecf20Sopenharmony_ci LD(2, base, 32) \ 298c2ecf20Sopenharmony_ci LD(3, base, 48) 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci#define LD_AND_XOR_LINE(base) \ 328c2ecf20Sopenharmony_ci LD(4, base, 0) \ 338c2ecf20Sopenharmony_ci LD(5, base, 16) \ 348c2ecf20Sopenharmony_ci LD(6, base, 32) \ 358c2ecf20Sopenharmony_ci LD(7, base, 48) \ 368c2ecf20Sopenharmony_ci XOR(0, 4) \ 378c2ecf20Sopenharmony_ci XOR(1, 5) \ 388c2ecf20Sopenharmony_ci XOR(2, 6) \ 398c2ecf20Sopenharmony_ci XOR(3, 7) 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci#define ST_LINE(base) \ 428c2ecf20Sopenharmony_ci ST(0, base, 0) \ 438c2ecf20Sopenharmony_ci ST(1, base, 16) \ 448c2ecf20Sopenharmony_ci ST(2, base, 32) \ 458c2ecf20Sopenharmony_ci ST(3, base, 48) 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci#define XOR_FUNC_NAME(nr) __xor_lsx_##nr 488c2ecf20Sopenharmony_ci#include "xor_template.c" 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci#undef LD 518c2ecf20Sopenharmony_ci#undef ST 528c2ecf20Sopenharmony_ci#undef XOR 538c2ecf20Sopenharmony_ci#undef LD_INOUT_LINE 548c2ecf20Sopenharmony_ci#undef LD_AND_XOR_LINE 558c2ecf20Sopenharmony_ci#undef ST_LINE 568c2ecf20Sopenharmony_ci#undef XOR_FUNC_NAME 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LSX */ 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LASX 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#define LD(reg, base, offset) \ 638c2ecf20Sopenharmony_ci "xvld $xr" #reg ", %[" #base "], " #offset "\n\t" 648c2ecf20Sopenharmony_ci#define ST(reg, base, offset) \ 658c2ecf20Sopenharmony_ci "xvst $xr" #reg ", %[" #base "], " #offset "\n\t" 668c2ecf20Sopenharmony_ci#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t" 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci#define LD_INOUT_LINE(base) \ 698c2ecf20Sopenharmony_ci LD(0, base, 0) \ 708c2ecf20Sopenharmony_ci LD(1, base, 32) 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci#define LD_AND_XOR_LINE(base) \ 738c2ecf20Sopenharmony_ci LD(2, base, 0) \ 748c2ecf20Sopenharmony_ci LD(3, base, 32) \ 758c2ecf20Sopenharmony_ci XOR(0, 2) \ 768c2ecf20Sopenharmony_ci XOR(1, 3) 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci#define ST_LINE(base) \ 798c2ecf20Sopenharmony_ci ST(0, base, 0) \ 808c2ecf20Sopenharmony_ci ST(1, base, 32) 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci#define XOR_FUNC_NAME(nr) __xor_lasx_##nr 838c2ecf20Sopenharmony_ci#include "xor_template.c" 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci#undef LD 868c2ecf20Sopenharmony_ci#undef ST 878c2ecf20Sopenharmony_ci#undef XOR 888c2ecf20Sopenharmony_ci#undef LD_INOUT_LINE 898c2ecf20Sopenharmony_ci#undef LD_AND_XOR_LINE 908c2ecf20Sopenharmony_ci#undef ST_LINE 918c2ecf20Sopenharmony_ci#undef XOR_FUNC_NAME 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LASX */ 94