18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Template for XOR operations, instantiated in xor_simd.c. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Expected preprocessor definitions: 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * - LINE_WIDTH 108c2ecf20Sopenharmony_ci * - XOR_FUNC_NAME(nr) 118c2ecf20Sopenharmony_ci * - LD_INOUT_LINE(buf) 128c2ecf20Sopenharmony_ci * - LD_AND_XOR_LINE(buf) 138c2ecf20Sopenharmony_ci * - ST_LINE(buf) 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_civoid XOR_FUNC_NAME(2)(unsigned long bytes, 178c2ecf20Sopenharmony_ci unsigned long *v1, unsigned long *v2) 188c2ecf20Sopenharmony_ci{ 198c2ecf20Sopenharmony_ci unsigned long lines = bytes / LINE_WIDTH; 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci do { 228c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 238c2ecf20Sopenharmony_ci LD_INOUT_LINE(v1) 248c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v2) 258c2ecf20Sopenharmony_ci ST_LINE(v1) 268c2ecf20Sopenharmony_ci : : [v1] "r"(v1), [v2] "r"(v2) : "memory" 278c2ecf20Sopenharmony_ci ); 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci v1 += LINE_WIDTH / sizeof(unsigned long); 308c2ecf20Sopenharmony_ci v2 += LINE_WIDTH / sizeof(unsigned long); 318c2ecf20Sopenharmony_ci } while (--lines > 0); 328c2ecf20Sopenharmony_ci} 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_civoid XOR_FUNC_NAME(3)(unsigned long bytes, 358c2ecf20Sopenharmony_ci unsigned long *v1, unsigned long *v2, 368c2ecf20Sopenharmony_ci unsigned long *v3) 378c2ecf20Sopenharmony_ci{ 388c2ecf20Sopenharmony_ci unsigned long lines = bytes / LINE_WIDTH; 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci do { 418c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 428c2ecf20Sopenharmony_ci LD_INOUT_LINE(v1) 438c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v2) 448c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v3) 458c2ecf20Sopenharmony_ci ST_LINE(v1) 468c2ecf20Sopenharmony_ci : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory" 478c2ecf20Sopenharmony_ci ); 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci v1 += LINE_WIDTH / sizeof(unsigned long); 508c2ecf20Sopenharmony_ci v2 += LINE_WIDTH / sizeof(unsigned long); 518c2ecf20Sopenharmony_ci v3 += LINE_WIDTH / sizeof(unsigned long); 528c2ecf20Sopenharmony_ci } while (--lines > 0); 538c2ecf20Sopenharmony_ci} 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_civoid XOR_FUNC_NAME(4)(unsigned long bytes, 568c2ecf20Sopenharmony_ci unsigned long *v1, unsigned long *v2, 578c2ecf20Sopenharmony_ci unsigned long *v3, unsigned long *v4) 588c2ecf20Sopenharmony_ci{ 598c2ecf20Sopenharmony_ci unsigned long lines = bytes / LINE_WIDTH; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci do { 628c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 638c2ecf20Sopenharmony_ci LD_INOUT_LINE(v1) 648c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v2) 658c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v3) 668c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v4) 678c2ecf20Sopenharmony_ci ST_LINE(v1) 688c2ecf20Sopenharmony_ci : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4) 698c2ecf20Sopenharmony_ci : "memory" 708c2ecf20Sopenharmony_ci ); 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci v1 += LINE_WIDTH / sizeof(unsigned long); 738c2ecf20Sopenharmony_ci v2 += LINE_WIDTH / sizeof(unsigned long); 748c2ecf20Sopenharmony_ci v3 += LINE_WIDTH / sizeof(unsigned long); 758c2ecf20Sopenharmony_ci v4 += LINE_WIDTH / sizeof(unsigned long); 768c2ecf20Sopenharmony_ci } while (--lines > 0); 778c2ecf20Sopenharmony_ci} 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_civoid XOR_FUNC_NAME(5)(unsigned long bytes, 808c2ecf20Sopenharmony_ci unsigned long *v1, unsigned long *v2, 818c2ecf20Sopenharmony_ci unsigned long *v3, unsigned long *v4, 828c2ecf20Sopenharmony_ci unsigned long *v5) 838c2ecf20Sopenharmony_ci{ 848c2ecf20Sopenharmony_ci unsigned long lines = bytes / LINE_WIDTH; 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci do { 878c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 888c2ecf20Sopenharmony_ci LD_INOUT_LINE(v1) 898c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v2) 908c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v3) 918c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v4) 928c2ecf20Sopenharmony_ci LD_AND_XOR_LINE(v5) 938c2ecf20Sopenharmony_ci ST_LINE(v1) 948c2ecf20Sopenharmony_ci : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4), 958c2ecf20Sopenharmony_ci [v5] "r"(v5) : "memory" 968c2ecf20Sopenharmony_ci ); 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci v1 += LINE_WIDTH / sizeof(unsigned long); 998c2ecf20Sopenharmony_ci v2 += LINE_WIDTH / sizeof(unsigned long); 1008c2ecf20Sopenharmony_ci v3 += LINE_WIDTH / sizeof(unsigned long); 1018c2ecf20Sopenharmony_ci v4 += LINE_WIDTH / sizeof(unsigned long); 1028c2ecf20Sopenharmony_ci v5 += LINE_WIDTH / sizeof(unsigned long); 1038c2ecf20Sopenharmony_ci } while (--lines > 0); 1048c2ecf20Sopenharmony_ci} 105