1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 4 * 5 * Template for XOR operations, instantiated in xor_simd.c. 6 * 7 * Expected preprocessor definitions: 8 * 9 * - LINE_WIDTH 10 * - XOR_FUNC_NAME(nr) 11 * - LD_INOUT_LINE(buf) 12 * - LD_AND_XOR_LINE(buf) 13 * - ST_LINE(buf) 14 */ 15 16 void XOR_FUNC_NAME(2)(unsigned long bytes, 17 unsigned long *v1, unsigned long *v2) 18 { 19 unsigned long lines = bytes / LINE_WIDTH; 20 21 do { 22 __asm__ __volatile__ ( 23 LD_INOUT_LINE(v1) 24 LD_AND_XOR_LINE(v2) 25 ST_LINE(v1) 26 : : [v1] "r"(v1), [v2] "r"(v2) : "memory" 27 ); 28 29 v1 += LINE_WIDTH / sizeof(unsigned long); 30 v2 += LINE_WIDTH / sizeof(unsigned long); 31 } while (--lines > 0); 32 } 33 34 void XOR_FUNC_NAME(3)(unsigned long bytes, 35 unsigned long *v1, unsigned long *v2, 36 unsigned long *v3) 37 { 38 unsigned long lines = bytes / LINE_WIDTH; 39 40 do { 41 __asm__ __volatile__ ( 42 LD_INOUT_LINE(v1) 43 LD_AND_XOR_LINE(v2) 44 LD_AND_XOR_LINE(v3) 45 ST_LINE(v1) 46 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory" 47 ); 48 49 v1 += LINE_WIDTH / sizeof(unsigned long); 50 v2 += LINE_WIDTH / sizeof(unsigned long); 51 v3 += LINE_WIDTH / sizeof(unsigned long); 52 } while (--lines > 0); 53 } 54 55 void XOR_FUNC_NAME(4)(unsigned long bytes, 56 unsigned long *v1, unsigned long *v2, 57 unsigned long *v3, unsigned long *v4) 58 { 59 unsigned long lines = bytes / LINE_WIDTH; 60 61 do { 62 __asm__ __volatile__ ( 63 LD_INOUT_LINE(v1) 64 LD_AND_XOR_LINE(v2) 65 LD_AND_XOR_LINE(v3) 66 LD_AND_XOR_LINE(v4) 67 ST_LINE(v1) 68 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4) 69 : "memory" 70 ); 71 72 v1 += LINE_WIDTH / sizeof(unsigned long); 73 v2 += LINE_WIDTH / sizeof(unsigned long); 74 v3 += LINE_WIDTH / sizeof(unsigned long); 75 v4 += LINE_WIDTH / sizeof(unsigned long); 76 } while (--lines > 0); 77 } 78 79 void XOR_FUNC_NAME(5)(unsigned long bytes, 80 unsigned long *v1, unsigned long *v2, 81 unsigned long *v3, unsigned long *v4, 82 unsigned long *v5) 83 { 84 unsigned long lines = bytes / LINE_WIDTH; 85 86 do { 87 __asm__ __volatile__ ( 88 LD_INOUT_LINE(v1) 89 LD_AND_XOR_LINE(v2) 90 LD_AND_XOR_LINE(v3) 91 LD_AND_XOR_LINE(v4) 92 LD_AND_XOR_LINE(v5) 93 ST_LINE(v1) 94 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4), 95 [v5] "r"(v5) : "memory" 96 ); 97 98 v1 += LINE_WIDTH / sizeof(unsigned long); 99 v2 += LINE_WIDTH / sizeof(unsigned long); 100 v3 += LINE_WIDTH / sizeof(unsigned long); 101 v4 += LINE_WIDTH / sizeof(unsigned long); 102 v5 += LINE_WIDTH / sizeof(unsigned long); 103 } while (--lines > 0); 104 } 105