18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX) 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright 2023 WANG Xuerui <git@xen0n.name> 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Based on the generic RAID-6 code (int.uc): 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Copyright 2002-2004 H. Peter Anvin 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/raid/pq.h> 138c2ecf20Sopenharmony_ci#include "loongarch.h" 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci/* 168c2ecf20Sopenharmony_ci * The vector algorithms are currently priority 0, which means the generic 178c2ecf20Sopenharmony_ci * scalar algorithms are not being disabled if vector support is present. 188c2ecf20Sopenharmony_ci * This is like the similar LoongArch RAID5 XOR code, with the main reason 198c2ecf20Sopenharmony_ci * repeated here: it cannot be ruled out at this point of time, that some 208c2ecf20Sopenharmony_ci * future (maybe reduced) models could run the vector algorithms slower than 218c2ecf20Sopenharmony_ci * the scalar ones, maybe for errata or micro-op reasons. It may be 228c2ecf20Sopenharmony_ci * appropriate to revisit this after one or two more uarch generations. 238c2ecf20Sopenharmony_ci */ 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LSX 268c2ecf20Sopenharmony_ci#define NSIZE 16 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_cistatic int raid6_has_lsx(void) 298c2ecf20Sopenharmony_ci{ 308c2ecf20Sopenharmony_ci return cpu_has_lsx; 318c2ecf20Sopenharmony_ci} 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_cistatic void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs) 348c2ecf20Sopenharmony_ci{ 358c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 368c2ecf20Sopenharmony_ci u8 *p, *q; 378c2ecf20Sopenharmony_ci int d, z, z0; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci z0 = disks - 3; /* Highest data disk */ 408c2ecf20Sopenharmony_ci p = dptr[z0+1]; /* XOR parity */ 418c2ecf20Sopenharmony_ci q = dptr[z0+2]; /* RS syndrome */ 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci kernel_fpu_begin(); 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci /* 468c2ecf20Sopenharmony_ci * $vr0, $vr1, $vr2, $vr3: wp 478c2ecf20Sopenharmony_ci * $vr4, $vr5, $vr6, $vr7: wq 488c2ecf20Sopenharmony_ci * $vr8, $vr9, $vr10, $vr11: wd 498c2ecf20Sopenharmony_ci * $vr12, $vr13, $vr14, $vr15: w2 508c2ecf20Sopenharmony_ci * $vr16, $vr17, $vr18, $vr19: w1 518c2ecf20Sopenharmony_ci */ 528c2ecf20Sopenharmony_ci for (d = 0; d < bytes; d += NSIZE*4) { 538c2ecf20Sopenharmony_ci /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 548c2ecf20Sopenharmony_ci asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); 558c2ecf20Sopenharmony_ci asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); 568c2ecf20Sopenharmony_ci asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); 578c2ecf20Sopenharmony_ci asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); 588c2ecf20Sopenharmony_ci asm volatile("vori.b $vr4, $vr0, 0"); 598c2ecf20Sopenharmony_ci asm volatile("vori.b $vr5, $vr1, 0"); 608c2ecf20Sopenharmony_ci asm volatile("vori.b $vr6, $vr2, 0"); 618c2ecf20Sopenharmony_ci asm volatile("vori.b $vr7, $vr3, 0"); 628c2ecf20Sopenharmony_ci for (z = z0-1; z >= 0; z--) { 638c2ecf20Sopenharmony_ci /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ 648c2ecf20Sopenharmony_ci asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); 658c2ecf20Sopenharmony_ci asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); 668c2ecf20Sopenharmony_ci asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); 678c2ecf20Sopenharmony_ci asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); 688c2ecf20Sopenharmony_ci /* wp$$ ^= wd$$; */ 698c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr0, $vr0, $vr8"); 708c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr1, $vr1, $vr9"); 718c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr2, $vr2, $vr10"); 728c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr3, $vr3, $vr11"); 738c2ecf20Sopenharmony_ci /* w2$$ = MASK(wq$$); */ 748c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr12, $vr4, 0"); 758c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr13, $vr5, 0"); 768c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr14, $vr6, 0"); 778c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr15, $vr7, 0"); 788c2ecf20Sopenharmony_ci /* w1$$ = SHLBYTE(wq$$); */ 798c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr16, $vr4, 1"); 808c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr17, $vr5, 1"); 818c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr18, $vr6, 1"); 828c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr19, $vr7, 1"); 838c2ecf20Sopenharmony_ci /* w2$$ &= NBYTES(0x1d); */ 848c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr12, $vr12, 0x1d"); 858c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr13, $vr13, 0x1d"); 868c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr14, $vr14, 0x1d"); 878c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr15, $vr15, 0x1d"); 888c2ecf20Sopenharmony_ci /* w1$$ ^= w2$$; */ 898c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr16, $vr16, $vr12"); 908c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr17, $vr17, $vr13"); 918c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr18, $vr18, $vr14"); 928c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr19, $vr19, $vr15"); 938c2ecf20Sopenharmony_ci /* wq$$ = w1$$ ^ wd$$; */ 948c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr4, $vr16, $vr8"); 958c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr5, $vr17, $vr9"); 968c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr6, $vr18, $vr10"); 978c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr7, $vr19, $vr11"); 988c2ecf20Sopenharmony_ci } 998c2ecf20Sopenharmony_ci /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ 1008c2ecf20Sopenharmony_ci asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0])); 1018c2ecf20Sopenharmony_ci asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1])); 1028c2ecf20Sopenharmony_ci asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2])); 1038c2ecf20Sopenharmony_ci asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3])); 1048c2ecf20Sopenharmony_ci /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ 1058c2ecf20Sopenharmony_ci asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0])); 1068c2ecf20Sopenharmony_ci asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1])); 1078c2ecf20Sopenharmony_ci asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2])); 1088c2ecf20Sopenharmony_ci asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3])); 1098c2ecf20Sopenharmony_ci } 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci kernel_fpu_end(); 1128c2ecf20Sopenharmony_ci} 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_cistatic void raid6_lsx_xor_syndrome(int disks, int start, int stop, 1158c2ecf20Sopenharmony_ci size_t bytes, void **ptrs) 1168c2ecf20Sopenharmony_ci{ 1178c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 1188c2ecf20Sopenharmony_ci u8 *p, *q; 1198c2ecf20Sopenharmony_ci int d, z, z0; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci z0 = stop; /* P/Q right side optimization */ 1228c2ecf20Sopenharmony_ci p = dptr[disks-2]; /* XOR parity */ 1238c2ecf20Sopenharmony_ci q = dptr[disks-1]; /* RS syndrome */ 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci kernel_fpu_begin(); 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci /* 1288c2ecf20Sopenharmony_ci * $vr0, $vr1, $vr2, $vr3: wp 1298c2ecf20Sopenharmony_ci * $vr4, $vr5, $vr6, $vr7: wq 1308c2ecf20Sopenharmony_ci * $vr8, $vr9, $vr10, $vr11: wd 1318c2ecf20Sopenharmony_ci * $vr12, $vr13, $vr14, $vr15: w2 1328c2ecf20Sopenharmony_ci * $vr16, $vr17, $vr18, $vr19: w1 1338c2ecf20Sopenharmony_ci */ 1348c2ecf20Sopenharmony_ci for (d = 0; d < bytes; d += NSIZE*4) { 1358c2ecf20Sopenharmony_ci /* P/Q data pages */ 1368c2ecf20Sopenharmony_ci /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 1378c2ecf20Sopenharmony_ci asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); 1388c2ecf20Sopenharmony_ci asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); 1398c2ecf20Sopenharmony_ci asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); 1408c2ecf20Sopenharmony_ci asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); 1418c2ecf20Sopenharmony_ci asm volatile("vori.b $vr4, $vr0, 0"); 1428c2ecf20Sopenharmony_ci asm volatile("vori.b $vr5, $vr1, 0"); 1438c2ecf20Sopenharmony_ci asm volatile("vori.b $vr6, $vr2, 0"); 1448c2ecf20Sopenharmony_ci asm volatile("vori.b $vr7, $vr3, 0"); 1458c2ecf20Sopenharmony_ci for (z = z0-1; z >= start; z--) { 1468c2ecf20Sopenharmony_ci /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ 1478c2ecf20Sopenharmony_ci asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); 1488c2ecf20Sopenharmony_ci asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); 1498c2ecf20Sopenharmony_ci asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); 1508c2ecf20Sopenharmony_ci asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); 1518c2ecf20Sopenharmony_ci /* wp$$ ^= wd$$; */ 1528c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr0, $vr0, $vr8"); 1538c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr1, $vr1, $vr9"); 1548c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr2, $vr2, $vr10"); 1558c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr3, $vr3, $vr11"); 1568c2ecf20Sopenharmony_ci /* w2$$ = MASK(wq$$); */ 1578c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr12, $vr4, 0"); 1588c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr13, $vr5, 0"); 1598c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr14, $vr6, 0"); 1608c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr15, $vr7, 0"); 1618c2ecf20Sopenharmony_ci /* w1$$ = SHLBYTE(wq$$); */ 1628c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr16, $vr4, 1"); 1638c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr17, $vr5, 1"); 1648c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr18, $vr6, 1"); 1658c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr19, $vr7, 1"); 1668c2ecf20Sopenharmony_ci /* w2$$ &= NBYTES(0x1d); */ 1678c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr12, $vr12, 0x1d"); 1688c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr13, $vr13, 0x1d"); 1698c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr14, $vr14, 0x1d"); 1708c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr15, $vr15, 0x1d"); 1718c2ecf20Sopenharmony_ci /* w1$$ ^= w2$$; */ 1728c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr16, $vr16, $vr12"); 1738c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr17, $vr17, $vr13"); 1748c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr18, $vr18, $vr14"); 1758c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr19, $vr19, $vr15"); 1768c2ecf20Sopenharmony_ci /* wq$$ = w1$$ ^ wd$$; */ 1778c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr4, $vr16, $vr8"); 1788c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr5, $vr17, $vr9"); 1798c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr6, $vr18, $vr10"); 1808c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr7, $vr19, $vr11"); 1818c2ecf20Sopenharmony_ci } 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci /* P/Q left side optimization */ 1848c2ecf20Sopenharmony_ci for (z = start-1; z >= 0; z--) { 1858c2ecf20Sopenharmony_ci /* w2$$ = MASK(wq$$); */ 1868c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr12, $vr4, 0"); 1878c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr13, $vr5, 0"); 1888c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr14, $vr6, 0"); 1898c2ecf20Sopenharmony_ci asm volatile("vslti.b $vr15, $vr7, 0"); 1908c2ecf20Sopenharmony_ci /* w1$$ = SHLBYTE(wq$$); */ 1918c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr16, $vr4, 1"); 1928c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr17, $vr5, 1"); 1938c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr18, $vr6, 1"); 1948c2ecf20Sopenharmony_ci asm volatile("vslli.b $vr19, $vr7, 1"); 1958c2ecf20Sopenharmony_ci /* w2$$ &= NBYTES(0x1d); */ 1968c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr12, $vr12, 0x1d"); 1978c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr13, $vr13, 0x1d"); 1988c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr14, $vr14, 0x1d"); 1998c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr15, $vr15, 0x1d"); 2008c2ecf20Sopenharmony_ci /* wq$$ = w1$$ ^ w2$$; */ 2018c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr4, $vr16, $vr12"); 2028c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr5, $vr17, $vr13"); 2038c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr6, $vr18, $vr14"); 2048c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr7, $vr19, $vr15"); 2058c2ecf20Sopenharmony_ci } 2068c2ecf20Sopenharmony_ci /* 2078c2ecf20Sopenharmony_ci * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 2088c2ecf20Sopenharmony_ci * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 2098c2ecf20Sopenharmony_ci */ 2108c2ecf20Sopenharmony_ci asm volatile( 2118c2ecf20Sopenharmony_ci "vld $vr20, %0\n\t" 2128c2ecf20Sopenharmony_ci "vld $vr21, %1\n\t" 2138c2ecf20Sopenharmony_ci "vld $vr22, %2\n\t" 2148c2ecf20Sopenharmony_ci "vld $vr23, %3\n\t" 2158c2ecf20Sopenharmony_ci "vld $vr24, %4\n\t" 2168c2ecf20Sopenharmony_ci "vld $vr25, %5\n\t" 2178c2ecf20Sopenharmony_ci "vld $vr26, %6\n\t" 2188c2ecf20Sopenharmony_ci "vld $vr27, %7\n\t" 2198c2ecf20Sopenharmony_ci "vxor.v $vr20, $vr20, $vr0\n\t" 2208c2ecf20Sopenharmony_ci "vxor.v $vr21, $vr21, $vr1\n\t" 2218c2ecf20Sopenharmony_ci "vxor.v $vr22, $vr22, $vr2\n\t" 2228c2ecf20Sopenharmony_ci "vxor.v $vr23, $vr23, $vr3\n\t" 2238c2ecf20Sopenharmony_ci "vxor.v $vr24, $vr24, $vr4\n\t" 2248c2ecf20Sopenharmony_ci "vxor.v $vr25, $vr25, $vr5\n\t" 2258c2ecf20Sopenharmony_ci "vxor.v $vr26, $vr26, $vr6\n\t" 2268c2ecf20Sopenharmony_ci "vxor.v $vr27, $vr27, $vr7\n\t" 2278c2ecf20Sopenharmony_ci "vst $vr20, %0\n\t" 2288c2ecf20Sopenharmony_ci "vst $vr21, %1\n\t" 2298c2ecf20Sopenharmony_ci "vst $vr22, %2\n\t" 2308c2ecf20Sopenharmony_ci "vst $vr23, %3\n\t" 2318c2ecf20Sopenharmony_ci "vst $vr24, %4\n\t" 2328c2ecf20Sopenharmony_ci "vst $vr25, %5\n\t" 2338c2ecf20Sopenharmony_ci "vst $vr26, %6\n\t" 2348c2ecf20Sopenharmony_ci "vst $vr27, %7\n\t" 2358c2ecf20Sopenharmony_ci : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), 2368c2ecf20Sopenharmony_ci "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]), 2378c2ecf20Sopenharmony_ci "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]), 2388c2ecf20Sopenharmony_ci "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3]) 2398c2ecf20Sopenharmony_ci ); 2408c2ecf20Sopenharmony_ci } 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci kernel_fpu_end(); 2438c2ecf20Sopenharmony_ci} 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ciconst struct raid6_calls raid6_lsx = { 2468c2ecf20Sopenharmony_ci raid6_lsx_gen_syndrome, 2478c2ecf20Sopenharmony_ci raid6_lsx_xor_syndrome, 2488c2ecf20Sopenharmony_ci raid6_has_lsx, 2498c2ecf20Sopenharmony_ci "lsx", 2508c2ecf20Sopenharmony_ci}; 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci#undef NSIZE 2538c2ecf20Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LSX */ 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LASX 2568c2ecf20Sopenharmony_ci#define NSIZE 32 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_cistatic int raid6_has_lasx(void) 2598c2ecf20Sopenharmony_ci{ 2608c2ecf20Sopenharmony_ci return cpu_has_lasx; 2618c2ecf20Sopenharmony_ci} 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_cistatic void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs) 2648c2ecf20Sopenharmony_ci{ 2658c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 2668c2ecf20Sopenharmony_ci u8 *p, *q; 2678c2ecf20Sopenharmony_ci int d, z, z0; 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci z0 = disks - 3; /* Highest data disk */ 2708c2ecf20Sopenharmony_ci p = dptr[z0+1]; /* XOR parity */ 2718c2ecf20Sopenharmony_ci q = dptr[z0+2]; /* RS syndrome */ 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci kernel_fpu_begin(); 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci /* 2768c2ecf20Sopenharmony_ci * $xr0, $xr1: wp 2778c2ecf20Sopenharmony_ci * $xr2, $xr3: wq 2788c2ecf20Sopenharmony_ci * $xr4, $xr5: wd 2798c2ecf20Sopenharmony_ci * $xr6, $xr7: w2 2808c2ecf20Sopenharmony_ci * $xr8, $xr9: w1 2818c2ecf20Sopenharmony_ci */ 2828c2ecf20Sopenharmony_ci for (d = 0; d < bytes; d += NSIZE*2) { 2838c2ecf20Sopenharmony_ci /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 2848c2ecf20Sopenharmony_ci asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); 2858c2ecf20Sopenharmony_ci asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); 2868c2ecf20Sopenharmony_ci asm volatile("xvori.b $xr2, $xr0, 0"); 2878c2ecf20Sopenharmony_ci asm volatile("xvori.b $xr3, $xr1, 0"); 2888c2ecf20Sopenharmony_ci for (z = z0-1; z >= 0; z--) { 2898c2ecf20Sopenharmony_ci /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ 2908c2ecf20Sopenharmony_ci asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); 2918c2ecf20Sopenharmony_ci asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); 2928c2ecf20Sopenharmony_ci /* wp$$ ^= wd$$; */ 2938c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr0, $xr0, $xr4"); 2948c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr1, $xr1, $xr5"); 2958c2ecf20Sopenharmony_ci /* w2$$ = MASK(wq$$); */ 2968c2ecf20Sopenharmony_ci asm volatile("xvslti.b $xr6, $xr2, 0"); 2978c2ecf20Sopenharmony_ci asm volatile("xvslti.b $xr7, $xr3, 0"); 2988c2ecf20Sopenharmony_ci /* w1$$ = SHLBYTE(wq$$); */ 2998c2ecf20Sopenharmony_ci asm volatile("xvslli.b $xr8, $xr2, 1"); 3008c2ecf20Sopenharmony_ci asm volatile("xvslli.b $xr9, $xr3, 1"); 3018c2ecf20Sopenharmony_ci /* w2$$ &= NBYTES(0x1d); */ 3028c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr6, $xr6, 0x1d"); 3038c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr7, $xr7, 0x1d"); 3048c2ecf20Sopenharmony_ci /* w1$$ ^= w2$$; */ 3058c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr8, $xr8, $xr6"); 3068c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr9, $xr9, $xr7"); 3078c2ecf20Sopenharmony_ci /* wq$$ = w1$$ ^ wd$$; */ 3088c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr2, $xr8, $xr4"); 3098c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr3, $xr9, $xr5"); 3108c2ecf20Sopenharmony_ci } 3118c2ecf20Sopenharmony_ci /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ 3128c2ecf20Sopenharmony_ci asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0])); 3138c2ecf20Sopenharmony_ci asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1])); 3148c2ecf20Sopenharmony_ci /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ 3158c2ecf20Sopenharmony_ci asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0])); 3168c2ecf20Sopenharmony_ci asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1])); 3178c2ecf20Sopenharmony_ci } 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci kernel_fpu_end(); 3208c2ecf20Sopenharmony_ci} 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_cistatic void raid6_lasx_xor_syndrome(int disks, int start, int stop, 3238c2ecf20Sopenharmony_ci size_t bytes, void **ptrs) 3248c2ecf20Sopenharmony_ci{ 3258c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 3268c2ecf20Sopenharmony_ci u8 *p, *q; 3278c2ecf20Sopenharmony_ci int d, z, z0; 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci z0 = stop; /* P/Q right side optimization */ 3308c2ecf20Sopenharmony_ci p = dptr[disks-2]; /* XOR parity */ 3318c2ecf20Sopenharmony_ci q = dptr[disks-1]; /* RS syndrome */ 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci kernel_fpu_begin(); 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci /* 3368c2ecf20Sopenharmony_ci * $xr0, $xr1: wp 3378c2ecf20Sopenharmony_ci * $xr2, $xr3: wq 3388c2ecf20Sopenharmony_ci * $xr4, $xr5: wd 3398c2ecf20Sopenharmony_ci * $xr6, $xr7: w2 3408c2ecf20Sopenharmony_ci * $xr8, $xr9: w1 3418c2ecf20Sopenharmony_ci */ 3428c2ecf20Sopenharmony_ci for (d = 0; d < bytes; d += NSIZE*2) { 3438c2ecf20Sopenharmony_ci /* P/Q data pages */ 3448c2ecf20Sopenharmony_ci /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 3458c2ecf20Sopenharmony_ci asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); 3468c2ecf20Sopenharmony_ci asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); 3478c2ecf20Sopenharmony_ci asm volatile("xvori.b $xr2, $xr0, 0"); 3488c2ecf20Sopenharmony_ci asm volatile("xvori.b $xr3, $xr1, 0"); 3498c2ecf20Sopenharmony_ci for (z = z0-1; z >= start; z--) { 3508c2ecf20Sopenharmony_ci /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ 3518c2ecf20Sopenharmony_ci asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); 3528c2ecf20Sopenharmony_ci asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); 3538c2ecf20Sopenharmony_ci /* wp$$ ^= wd$$; */ 3548c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr0, $xr0, $xr4"); 3558c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr1, $xr1, $xr5"); 3568c2ecf20Sopenharmony_ci /* w2$$ = MASK(wq$$); */ 3578c2ecf20Sopenharmony_ci asm volatile("xvslti.b $xr6, $xr2, 0"); 3588c2ecf20Sopenharmony_ci asm volatile("xvslti.b $xr7, $xr3, 0"); 3598c2ecf20Sopenharmony_ci /* w1$$ = SHLBYTE(wq$$); */ 3608c2ecf20Sopenharmony_ci asm volatile("xvslli.b $xr8, $xr2, 1"); 3618c2ecf20Sopenharmony_ci asm volatile("xvslli.b $xr9, $xr3, 1"); 3628c2ecf20Sopenharmony_ci /* w2$$ &= NBYTES(0x1d); */ 3638c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr6, $xr6, 0x1d"); 3648c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr7, $xr7, 0x1d"); 3658c2ecf20Sopenharmony_ci /* w1$$ ^= w2$$; */ 3668c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr8, $xr8, $xr6"); 3678c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr9, $xr9, $xr7"); 3688c2ecf20Sopenharmony_ci /* wq$$ = w1$$ ^ wd$$; */ 3698c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr2, $xr8, $xr4"); 3708c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr3, $xr9, $xr5"); 3718c2ecf20Sopenharmony_ci } 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci /* P/Q left side optimization */ 3748c2ecf20Sopenharmony_ci for (z = start-1; z >= 0; z--) { 3758c2ecf20Sopenharmony_ci /* w2$$ = MASK(wq$$); */ 3768c2ecf20Sopenharmony_ci asm volatile("xvslti.b $xr6, $xr2, 0"); 3778c2ecf20Sopenharmony_ci asm volatile("xvslti.b $xr7, $xr3, 0"); 3788c2ecf20Sopenharmony_ci /* w1$$ = SHLBYTE(wq$$); */ 3798c2ecf20Sopenharmony_ci asm volatile("xvslli.b $xr8, $xr2, 1"); 3808c2ecf20Sopenharmony_ci asm volatile("xvslli.b $xr9, $xr3, 1"); 3818c2ecf20Sopenharmony_ci /* w2$$ &= NBYTES(0x1d); */ 3828c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr6, $xr6, 0x1d"); 3838c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr7, $xr7, 0x1d"); 3848c2ecf20Sopenharmony_ci /* wq$$ = w1$$ ^ w2$$; */ 3858c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr2, $xr8, $xr6"); 3868c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr3, $xr9, $xr7"); 3878c2ecf20Sopenharmony_ci } 3888c2ecf20Sopenharmony_ci /* 3898c2ecf20Sopenharmony_ci * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 3908c2ecf20Sopenharmony_ci * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 3918c2ecf20Sopenharmony_ci */ 3928c2ecf20Sopenharmony_ci asm volatile( 3938c2ecf20Sopenharmony_ci "xvld $xr10, %0\n\t" 3948c2ecf20Sopenharmony_ci "xvld $xr11, %1\n\t" 3958c2ecf20Sopenharmony_ci "xvld $xr12, %2\n\t" 3968c2ecf20Sopenharmony_ci "xvld $xr13, %3\n\t" 3978c2ecf20Sopenharmony_ci "xvxor.v $xr10, $xr10, $xr0\n\t" 3988c2ecf20Sopenharmony_ci "xvxor.v $xr11, $xr11, $xr1\n\t" 3998c2ecf20Sopenharmony_ci "xvxor.v $xr12, $xr12, $xr2\n\t" 4008c2ecf20Sopenharmony_ci "xvxor.v $xr13, $xr13, $xr3\n\t" 4018c2ecf20Sopenharmony_ci "xvst $xr10, %0\n\t" 4028c2ecf20Sopenharmony_ci "xvst $xr11, %1\n\t" 4038c2ecf20Sopenharmony_ci "xvst $xr12, %2\n\t" 4048c2ecf20Sopenharmony_ci "xvst $xr13, %3\n\t" 4058c2ecf20Sopenharmony_ci : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), 4068c2ecf20Sopenharmony_ci "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]) 4078c2ecf20Sopenharmony_ci ); 4088c2ecf20Sopenharmony_ci } 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci kernel_fpu_end(); 4118c2ecf20Sopenharmony_ci} 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ciconst struct raid6_calls raid6_lasx = { 4148c2ecf20Sopenharmony_ci raid6_lasx_gen_syndrome, 4158c2ecf20Sopenharmony_ci raid6_lasx_xor_syndrome, 4168c2ecf20Sopenharmony_ci raid6_has_lasx, 4178c2ecf20Sopenharmony_ci "lasx", 4188c2ecf20Sopenharmony_ci}; 4198c2ecf20Sopenharmony_ci#undef NSIZE 4208c2ecf20Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LASX */ 421