162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Template for XOR operations, instantiated in xor_simd.c. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Expected preprocessor definitions: 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * - LINE_WIDTH 1062306a36Sopenharmony_ci * - XOR_FUNC_NAME(nr) 1162306a36Sopenharmony_ci * - LD_INOUT_LINE(buf) 1262306a36Sopenharmony_ci * - LD_AND_XOR_LINE(buf) 1362306a36Sopenharmony_ci * - ST_LINE(buf) 1462306a36Sopenharmony_ci */ 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_civoid XOR_FUNC_NAME(2)(unsigned long bytes, 1762306a36Sopenharmony_ci unsigned long * __restrict v1, 1862306a36Sopenharmony_ci const unsigned long * __restrict v2) 1962306a36Sopenharmony_ci{ 2062306a36Sopenharmony_ci unsigned long lines = bytes / LINE_WIDTH; 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci do { 2362306a36Sopenharmony_ci __asm__ __volatile__ ( 2462306a36Sopenharmony_ci LD_INOUT_LINE(v1) 2562306a36Sopenharmony_ci LD_AND_XOR_LINE(v2) 2662306a36Sopenharmony_ci ST_LINE(v1) 2762306a36Sopenharmony_ci : : [v1] "r"(v1), [v2] "r"(v2) : "memory" 2862306a36Sopenharmony_ci ); 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci v1 += LINE_WIDTH / sizeof(unsigned long); 3162306a36Sopenharmony_ci v2 += LINE_WIDTH / sizeof(unsigned long); 3262306a36Sopenharmony_ci } while (--lines > 0); 3362306a36Sopenharmony_ci} 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_civoid XOR_FUNC_NAME(3)(unsigned long bytes, 3662306a36Sopenharmony_ci unsigned long * __restrict v1, 3762306a36Sopenharmony_ci const unsigned long * __restrict v2, 3862306a36Sopenharmony_ci const unsigned long * __restrict v3) 3962306a36Sopenharmony_ci{ 4062306a36Sopenharmony_ci unsigned long lines = bytes / LINE_WIDTH; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci do { 4362306a36Sopenharmony_ci __asm__ __volatile__ ( 4462306a36Sopenharmony_ci LD_INOUT_LINE(v1) 4562306a36Sopenharmony_ci LD_AND_XOR_LINE(v2) 4662306a36Sopenharmony_ci LD_AND_XOR_LINE(v3) 4762306a36Sopenharmony_ci ST_LINE(v1) 4862306a36Sopenharmony_ci : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory" 4962306a36Sopenharmony_ci ); 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci v1 += LINE_WIDTH / sizeof(unsigned long); 5262306a36Sopenharmony_ci v2 += LINE_WIDTH / sizeof(unsigned long); 5362306a36Sopenharmony_ci v3 += LINE_WIDTH / sizeof(unsigned long); 5462306a36Sopenharmony_ci } while (--lines > 0); 5562306a36Sopenharmony_ci} 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_civoid XOR_FUNC_NAME(4)(unsigned long bytes, 5862306a36Sopenharmony_ci unsigned long * __restrict v1, 5962306a36Sopenharmony_ci const unsigned long * __restrict v2, 6062306a36Sopenharmony_ci const unsigned long * __restrict v3, 6162306a36Sopenharmony_ci const unsigned long * __restrict v4) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci unsigned long lines = bytes / LINE_WIDTH; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci do { 6662306a36Sopenharmony_ci __asm__ __volatile__ ( 6762306a36Sopenharmony_ci LD_INOUT_LINE(v1) 6862306a36Sopenharmony_ci LD_AND_XOR_LINE(v2) 6962306a36Sopenharmony_ci LD_AND_XOR_LINE(v3) 7062306a36Sopenharmony_ci LD_AND_XOR_LINE(v4) 7162306a36Sopenharmony_ci ST_LINE(v1) 7262306a36Sopenharmony_ci : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4) 7362306a36Sopenharmony_ci : "memory" 7462306a36Sopenharmony_ci ); 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci v1 += LINE_WIDTH / sizeof(unsigned long); 7762306a36Sopenharmony_ci v2 += LINE_WIDTH / sizeof(unsigned long); 7862306a36Sopenharmony_ci v3 += LINE_WIDTH / sizeof(unsigned long); 7962306a36Sopenharmony_ci v4 += LINE_WIDTH / sizeof(unsigned long); 8062306a36Sopenharmony_ci } while (--lines > 0); 8162306a36Sopenharmony_ci} 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_civoid XOR_FUNC_NAME(5)(unsigned long bytes, 8462306a36Sopenharmony_ci unsigned long * __restrict v1, 8562306a36Sopenharmony_ci const unsigned long * __restrict v2, 8662306a36Sopenharmony_ci const unsigned long * __restrict v3, 8762306a36Sopenharmony_ci const unsigned long * __restrict v4, 8862306a36Sopenharmony_ci const unsigned long * __restrict v5) 8962306a36Sopenharmony_ci{ 9062306a36Sopenharmony_ci unsigned long lines = bytes / LINE_WIDTH; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci do { 9362306a36Sopenharmony_ci __asm__ __volatile__ ( 9462306a36Sopenharmony_ci LD_INOUT_LINE(v1) 9562306a36Sopenharmony_ci LD_AND_XOR_LINE(v2) 9662306a36Sopenharmony_ci LD_AND_XOR_LINE(v3) 9762306a36Sopenharmony_ci LD_AND_XOR_LINE(v4) 9862306a36Sopenharmony_ci LD_AND_XOR_LINE(v5) 9962306a36Sopenharmony_ci ST_LINE(v1) 10062306a36Sopenharmony_ci : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4), 10162306a36Sopenharmony_ci [v5] "r"(v5) : "memory" 10262306a36Sopenharmony_ci ); 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci v1 += LINE_WIDTH / sizeof(unsigned long); 10562306a36Sopenharmony_ci v2 += LINE_WIDTH / sizeof(unsigned long); 10662306a36Sopenharmony_ci v3 += LINE_WIDTH / sizeof(unsigned long); 10762306a36Sopenharmony_ci v4 += LINE_WIDTH / sizeof(unsigned long); 10862306a36Sopenharmony_ci v5 += LINE_WIDTH / sizeof(unsigned long); 10962306a36Sopenharmony_ci } while (--lines > 0); 11062306a36Sopenharmony_ci} 111