18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX) 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Originally based on recov_avx2.c and recov_ssse3.c: 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Copyright (C) 2012 Intel Corporation 108c2ecf20Sopenharmony_ci * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> 118c2ecf20Sopenharmony_ci */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <linux/raid/pq.h> 148c2ecf20Sopenharmony_ci#include "loongarch.h" 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci/* 178c2ecf20Sopenharmony_ci * Unlike with the syndrome calculation algorithms, there's no boot-time 188c2ecf20Sopenharmony_ci * selection of recovery algorithms by benchmarking, so we have to specify 198c2ecf20Sopenharmony_ci * the priorities and hope the future cores will all have decent vector 208c2ecf20Sopenharmony_ci * support (i.e. no LASX slower than LSX, or even scalar code). 218c2ecf20Sopenharmony_ci */ 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LSX 248c2ecf20Sopenharmony_cistatic int raid6_has_lsx(void) 258c2ecf20Sopenharmony_ci{ 268c2ecf20Sopenharmony_ci return cpu_has_lsx; 278c2ecf20Sopenharmony_ci} 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_cistatic void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, 308c2ecf20Sopenharmony_ci int failb, void **ptrs) 318c2ecf20Sopenharmony_ci{ 328c2ecf20Sopenharmony_ci u8 *p, *q, *dp, *dq; 338c2ecf20Sopenharmony_ci const u8 *pbmul; /* P multiplier table for B data */ 348c2ecf20Sopenharmony_ci const u8 *qmul; /* Q multiplier table (for both) */ 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci p = (u8 *)ptrs[disks - 2]; 378c2ecf20Sopenharmony_ci q = (u8 *)ptrs[disks - 1]; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci /* 408c2ecf20Sopenharmony_ci * Compute syndrome with zero for the missing data pages 418c2ecf20Sopenharmony_ci * Use the dead data pages as temporary storage for 428c2ecf20Sopenharmony_ci * delta p and delta q 438c2ecf20Sopenharmony_ci */ 448c2ecf20Sopenharmony_ci dp = (u8 *)ptrs[faila]; 458c2ecf20Sopenharmony_ci ptrs[faila] = (void *)raid6_empty_zero_page; 468c2ecf20Sopenharmony_ci ptrs[disks - 2] = dp; 478c2ecf20Sopenharmony_ci dq = (u8 *)ptrs[failb]; 488c2ecf20Sopenharmony_ci ptrs[failb] = (void *)raid6_empty_zero_page; 498c2ecf20Sopenharmony_ci ptrs[disks - 1] = dq; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(disks, bytes, ptrs); 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci /* Restore pointer table */ 548c2ecf20Sopenharmony_ci ptrs[faila] = dp; 558c2ecf20Sopenharmony_ci ptrs[failb] = dq; 568c2ecf20Sopenharmony_ci ptrs[disks - 2] = p; 578c2ecf20Sopenharmony_ci ptrs[disks - 1] = q; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci /* Now, pick the proper data tables */ 608c2ecf20Sopenharmony_ci pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; 618c2ecf20Sopenharmony_ci qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci kernel_fpu_begin(); 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci /* 668c2ecf20Sopenharmony_ci * vr20, vr21: qmul 678c2ecf20Sopenharmony_ci * vr22, vr23: pbmul 688c2ecf20Sopenharmony_ci */ 698c2ecf20Sopenharmony_ci asm volatile("vld $vr20, %0" : : "m" (qmul[0])); 708c2ecf20Sopenharmony_ci asm volatile("vld $vr21, %0" : : "m" (qmul[16])); 718c2ecf20Sopenharmony_ci asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); 728c2ecf20Sopenharmony_ci asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci while (bytes) { 758c2ecf20Sopenharmony_ci /* vr4 - vr7: Q */ 768c2ecf20Sopenharmony_ci asm volatile("vld $vr4, %0" : : "m" (q[0])); 778c2ecf20Sopenharmony_ci asm volatile("vld $vr5, %0" : : "m" (q[16])); 788c2ecf20Sopenharmony_ci asm volatile("vld $vr6, %0" : : "m" (q[32])); 798c2ecf20Sopenharmony_ci asm volatile("vld $vr7, %0" : : "m" (q[48])); 808c2ecf20Sopenharmony_ci /* vr4 - vr7: Q + Qxy */ 818c2ecf20Sopenharmony_ci asm volatile("vld $vr8, %0" : : "m" (dq[0])); 828c2ecf20Sopenharmony_ci asm volatile("vld $vr9, %0" : : "m" (dq[16])); 838c2ecf20Sopenharmony_ci asm volatile("vld $vr10, %0" : : "m" (dq[32])); 848c2ecf20Sopenharmony_ci asm volatile("vld $vr11, %0" : : "m" (dq[48])); 858c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr4, $vr4, $vr8"); 868c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr5, $vr5, $vr9"); 878c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr6, $vr6, $vr10"); 888c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr7, $vr7, $vr11"); 898c2ecf20Sopenharmony_ci /* vr0 - vr3: P */ 908c2ecf20Sopenharmony_ci asm volatile("vld $vr0, %0" : : "m" (p[0])); 918c2ecf20Sopenharmony_ci asm volatile("vld $vr1, %0" : : "m" (p[16])); 928c2ecf20Sopenharmony_ci asm volatile("vld $vr2, %0" : : "m" (p[32])); 938c2ecf20Sopenharmony_ci asm volatile("vld $vr3, %0" : : "m" (p[48])); 948c2ecf20Sopenharmony_ci /* vr0 - vr3: P + Pxy */ 958c2ecf20Sopenharmony_ci asm volatile("vld $vr8, %0" : : "m" (dp[0])); 968c2ecf20Sopenharmony_ci asm volatile("vld $vr9, %0" : : "m" (dp[16])); 978c2ecf20Sopenharmony_ci asm volatile("vld $vr10, %0" : : "m" (dp[32])); 988c2ecf20Sopenharmony_ci asm volatile("vld $vr11, %0" : : "m" (dp[48])); 998c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr0, $vr0, $vr8"); 1008c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr1, $vr1, $vr9"); 1018c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr2, $vr2, $vr10"); 1028c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr3, $vr3, $vr11"); 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */ 1058c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr8, $vr4, 4"); 1068c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr9, $vr5, 4"); 1078c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr10, $vr6, 4"); 1088c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr11, $vr7, 4"); 1098c2ecf20Sopenharmony_ci /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */ 1108c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr4, $vr4, 0x0f"); 1118c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr5, $vr5, 0x0f"); 1128c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr6, $vr6, 0x0f"); 1138c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr7, $vr7, 0x0f"); 1148c2ecf20Sopenharmony_ci /* lookup from qmul[0] */ 1158c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4"); 1168c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5"); 1178c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6"); 1188c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7"); 1198c2ecf20Sopenharmony_ci /* lookup from qmul[16] */ 1208c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8"); 1218c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9"); 1228c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10"); 1238c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11"); 1248c2ecf20Sopenharmony_ci /* vr16 - vr19: B(Q + Qxy) */ 1258c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr16, $vr8, $vr4"); 1268c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr17, $vr9, $vr5"); 1278c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr18, $vr10, $vr6"); 1288c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr19, $vr11, $vr7"); 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */ 1318c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr4, $vr0, 4"); 1328c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr5, $vr1, 4"); 1338c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr6, $vr2, 4"); 1348c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr7, $vr3, 4"); 1358c2ecf20Sopenharmony_ci /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */ 1368c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr12, $vr0, 0x0f"); 1378c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr13, $vr1, 0x0f"); 1388c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr14, $vr2, 0x0f"); 1398c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr15, $vr3, 0x0f"); 1408c2ecf20Sopenharmony_ci /* lookup from pbmul[0] */ 1418c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12"); 1428c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13"); 1438c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14"); 1448c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15"); 1458c2ecf20Sopenharmony_ci /* lookup from pbmul[16] */ 1468c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4"); 1478c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5"); 1488c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6"); 1498c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7"); 1508c2ecf20Sopenharmony_ci /* vr4 - vr7: A(P + Pxy) */ 1518c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr4, $vr4, $vr12"); 1528c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr5, $vr5, $vr13"); 1538c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr6, $vr6, $vr14"); 1548c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr7, $vr7, $vr15"); 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */ 1578c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr4, $vr4, $vr16"); 1588c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr5, $vr5, $vr17"); 1598c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr6, $vr6, $vr18"); 1608c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr7, $vr7, $vr19"); 1618c2ecf20Sopenharmony_ci asm volatile("vst $vr4, %0" : "=m" (dq[0])); 1628c2ecf20Sopenharmony_ci asm volatile("vst $vr5, %0" : "=m" (dq[16])); 1638c2ecf20Sopenharmony_ci asm volatile("vst $vr6, %0" : "=m" (dq[32])); 1648c2ecf20Sopenharmony_ci asm volatile("vst $vr7, %0" : "=m" (dq[48])); 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci /* vr0 - vr3: P + Pxy + Dx = Dy */ 1678c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr0, $vr0, $vr4"); 1688c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr1, $vr1, $vr5"); 1698c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr2, $vr2, $vr6"); 1708c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr3, $vr3, $vr7"); 1718c2ecf20Sopenharmony_ci asm volatile("vst $vr0, %0" : "=m" (dp[0])); 1728c2ecf20Sopenharmony_ci asm volatile("vst $vr1, %0" : "=m" (dp[16])); 1738c2ecf20Sopenharmony_ci asm volatile("vst $vr2, %0" : "=m" (dp[32])); 1748c2ecf20Sopenharmony_ci asm volatile("vst $vr3, %0" : "=m" (dp[48])); 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci bytes -= 64; 1778c2ecf20Sopenharmony_ci p += 64; 1788c2ecf20Sopenharmony_ci q += 64; 1798c2ecf20Sopenharmony_ci dp += 64; 1808c2ecf20Sopenharmony_ci dq += 64; 1818c2ecf20Sopenharmony_ci } 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci kernel_fpu_end(); 1848c2ecf20Sopenharmony_ci} 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_cistatic void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, 1878c2ecf20Sopenharmony_ci void **ptrs) 1888c2ecf20Sopenharmony_ci{ 1898c2ecf20Sopenharmony_ci u8 *p, *q, *dq; 1908c2ecf20Sopenharmony_ci const u8 *qmul; /* Q multiplier table */ 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci p = (u8 *)ptrs[disks - 2]; 1938c2ecf20Sopenharmony_ci q = (u8 *)ptrs[disks - 1]; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci /* 1968c2ecf20Sopenharmony_ci * Compute syndrome with zero for the missing data page 1978c2ecf20Sopenharmony_ci * Use the dead data page as temporary storage for delta q 1988c2ecf20Sopenharmony_ci */ 1998c2ecf20Sopenharmony_ci dq = (u8 *)ptrs[faila]; 2008c2ecf20Sopenharmony_ci ptrs[faila] = (void *)raid6_empty_zero_page; 2018c2ecf20Sopenharmony_ci ptrs[disks - 1] = dq; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(disks, bytes, ptrs); 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci /* Restore pointer table */ 2068c2ecf20Sopenharmony_ci ptrs[faila] = dq; 2078c2ecf20Sopenharmony_ci ptrs[disks - 1] = q; 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci /* Now, pick the proper data tables */ 2108c2ecf20Sopenharmony_ci qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci kernel_fpu_begin(); 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci /* vr22, vr23: qmul */ 2158c2ecf20Sopenharmony_ci asm volatile("vld $vr22, %0" : : "m" (qmul[0])); 2168c2ecf20Sopenharmony_ci asm volatile("vld $vr23, %0" : : "m" (qmul[16])); 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci while (bytes) { 2198c2ecf20Sopenharmony_ci /* vr0 - vr3: P + Dx */ 2208c2ecf20Sopenharmony_ci asm volatile("vld $vr0, %0" : : "m" (p[0])); 2218c2ecf20Sopenharmony_ci asm volatile("vld $vr1, %0" : : "m" (p[16])); 2228c2ecf20Sopenharmony_ci asm volatile("vld $vr2, %0" : : "m" (p[32])); 2238c2ecf20Sopenharmony_ci asm volatile("vld $vr3, %0" : : "m" (p[48])); 2248c2ecf20Sopenharmony_ci /* vr4 - vr7: Qx */ 2258c2ecf20Sopenharmony_ci asm volatile("vld $vr4, %0" : : "m" (dq[0])); 2268c2ecf20Sopenharmony_ci asm volatile("vld $vr5, %0" : : "m" (dq[16])); 2278c2ecf20Sopenharmony_ci asm volatile("vld $vr6, %0" : : "m" (dq[32])); 2288c2ecf20Sopenharmony_ci asm volatile("vld $vr7, %0" : : "m" (dq[48])); 2298c2ecf20Sopenharmony_ci /* vr4 - vr7: Q + Qx */ 2308c2ecf20Sopenharmony_ci asm volatile("vld $vr8, %0" : : "m" (q[0])); 2318c2ecf20Sopenharmony_ci asm volatile("vld $vr9, %0" : : "m" (q[16])); 2328c2ecf20Sopenharmony_ci asm volatile("vld $vr10, %0" : : "m" (q[32])); 2338c2ecf20Sopenharmony_ci asm volatile("vld $vr11, %0" : : "m" (q[48])); 2348c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr4, $vr4, $vr8"); 2358c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr5, $vr5, $vr9"); 2368c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr6, $vr6, $vr10"); 2378c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr7, $vr7, $vr11"); 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */ 2408c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr8, $vr4, 4"); 2418c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr9, $vr5, 4"); 2428c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr10, $vr6, 4"); 2438c2ecf20Sopenharmony_ci asm volatile("vsrli.b $vr11, $vr7, 4"); 2448c2ecf20Sopenharmony_ci /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */ 2458c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr4, $vr4, 0x0f"); 2468c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr5, $vr5, 0x0f"); 2478c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr6, $vr6, 0x0f"); 2488c2ecf20Sopenharmony_ci asm volatile("vandi.b $vr7, $vr7, 0x0f"); 2498c2ecf20Sopenharmony_ci /* lookup from qmul[0] */ 2508c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4"); 2518c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5"); 2528c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6"); 2538c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7"); 2548c2ecf20Sopenharmony_ci /* lookup from qmul[16] */ 2558c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8"); 2568c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9"); 2578c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10"); 2588c2ecf20Sopenharmony_ci asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11"); 2598c2ecf20Sopenharmony_ci /* vr4 - vr7: qmul(Q + Qx) = Dx */ 2608c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr4, $vr4, $vr8"); 2618c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr5, $vr5, $vr9"); 2628c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr6, $vr6, $vr10"); 2638c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr7, $vr7, $vr11"); 2648c2ecf20Sopenharmony_ci asm volatile("vst $vr4, %0" : "=m" (dq[0])); 2658c2ecf20Sopenharmony_ci asm volatile("vst $vr5, %0" : "=m" (dq[16])); 2668c2ecf20Sopenharmony_ci asm volatile("vst $vr6, %0" : "=m" (dq[32])); 2678c2ecf20Sopenharmony_ci asm volatile("vst $vr7, %0" : "=m" (dq[48])); 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci /* vr0 - vr3: P + Dx + Dx = P */ 2708c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr0, $vr0, $vr4"); 2718c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr1, $vr1, $vr5"); 2728c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr2, $vr2, $vr6"); 2738c2ecf20Sopenharmony_ci asm volatile("vxor.v $vr3, $vr3, $vr7"); 2748c2ecf20Sopenharmony_ci asm volatile("vst $vr0, %0" : "=m" (p[0])); 2758c2ecf20Sopenharmony_ci asm volatile("vst $vr1, %0" : "=m" (p[16])); 2768c2ecf20Sopenharmony_ci asm volatile("vst $vr2, %0" : "=m" (p[32])); 2778c2ecf20Sopenharmony_ci asm volatile("vst $vr3, %0" : "=m" (p[48])); 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci bytes -= 64; 2808c2ecf20Sopenharmony_ci p += 64; 2818c2ecf20Sopenharmony_ci q += 64; 2828c2ecf20Sopenharmony_ci dq += 64; 2838c2ecf20Sopenharmony_ci } 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci kernel_fpu_end(); 2868c2ecf20Sopenharmony_ci} 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ciconst struct raid6_recov_calls raid6_recov_lsx = { 2898c2ecf20Sopenharmony_ci .data2 = raid6_2data_recov_lsx, 2908c2ecf20Sopenharmony_ci .datap = raid6_datap_recov_lsx, 2918c2ecf20Sopenharmony_ci .valid = raid6_has_lsx, 2928c2ecf20Sopenharmony_ci .name = "lsx", 2938c2ecf20Sopenharmony_ci .priority = 1, 2948c2ecf20Sopenharmony_ci}; 2958c2ecf20Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LSX */ 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LASX 2988c2ecf20Sopenharmony_cistatic int raid6_has_lasx(void) 2998c2ecf20Sopenharmony_ci{ 3008c2ecf20Sopenharmony_ci return cpu_has_lasx; 3018c2ecf20Sopenharmony_ci} 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_cistatic void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, 3048c2ecf20Sopenharmony_ci int failb, void **ptrs) 3058c2ecf20Sopenharmony_ci{ 3068c2ecf20Sopenharmony_ci u8 *p, *q, *dp, *dq; 3078c2ecf20Sopenharmony_ci const u8 *pbmul; /* P multiplier table for B data */ 3088c2ecf20Sopenharmony_ci const u8 *qmul; /* Q multiplier table (for both) */ 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci p = (u8 *)ptrs[disks - 2]; 3118c2ecf20Sopenharmony_ci q = (u8 *)ptrs[disks - 1]; 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci /* 3148c2ecf20Sopenharmony_ci * Compute syndrome with zero for the missing data pages 3158c2ecf20Sopenharmony_ci * Use the dead data pages as temporary storage for 3168c2ecf20Sopenharmony_ci * delta p and delta q 3178c2ecf20Sopenharmony_ci */ 3188c2ecf20Sopenharmony_ci dp = (u8 *)ptrs[faila]; 3198c2ecf20Sopenharmony_ci ptrs[faila] = (void *)raid6_empty_zero_page; 3208c2ecf20Sopenharmony_ci ptrs[disks - 2] = dp; 3218c2ecf20Sopenharmony_ci dq = (u8 *)ptrs[failb]; 3228c2ecf20Sopenharmony_ci ptrs[failb] = (void *)raid6_empty_zero_page; 3238c2ecf20Sopenharmony_ci ptrs[disks - 1] = dq; 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(disks, bytes, ptrs); 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci /* Restore pointer table */ 3288c2ecf20Sopenharmony_ci ptrs[faila] = dp; 3298c2ecf20Sopenharmony_ci ptrs[failb] = dq; 3308c2ecf20Sopenharmony_ci ptrs[disks - 2] = p; 3318c2ecf20Sopenharmony_ci ptrs[disks - 1] = q; 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci /* Now, pick the proper data tables */ 3348c2ecf20Sopenharmony_ci pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; 3358c2ecf20Sopenharmony_ci qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci kernel_fpu_begin(); 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci /* 3408c2ecf20Sopenharmony_ci * xr20, xr21: qmul 3418c2ecf20Sopenharmony_ci * xr22, xr23: pbmul 3428c2ecf20Sopenharmony_ci */ 3438c2ecf20Sopenharmony_ci asm volatile("vld $vr20, %0" : : "m" (qmul[0])); 3448c2ecf20Sopenharmony_ci asm volatile("vld $vr21, %0" : : "m" (qmul[16])); 3458c2ecf20Sopenharmony_ci asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); 3468c2ecf20Sopenharmony_ci asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); 3478c2ecf20Sopenharmony_ci asm volatile("xvreplve0.q $xr20, $xr20"); 3488c2ecf20Sopenharmony_ci asm volatile("xvreplve0.q $xr21, $xr21"); 3498c2ecf20Sopenharmony_ci asm volatile("xvreplve0.q $xr22, $xr22"); 3508c2ecf20Sopenharmony_ci asm volatile("xvreplve0.q $xr23, $xr23"); 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci while (bytes) { 3538c2ecf20Sopenharmony_ci /* xr0, xr1: Q */ 3548c2ecf20Sopenharmony_ci asm volatile("xvld $xr0, %0" : : "m" (q[0])); 3558c2ecf20Sopenharmony_ci asm volatile("xvld $xr1, %0" : : "m" (q[32])); 3568c2ecf20Sopenharmony_ci /* xr0, xr1: Q + Qxy */ 3578c2ecf20Sopenharmony_ci asm volatile("xvld $xr4, %0" : : "m" (dq[0])); 3588c2ecf20Sopenharmony_ci asm volatile("xvld $xr5, %0" : : "m" (dq[32])); 3598c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr0, $xr0, $xr4"); 3608c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr1, $xr1, $xr5"); 3618c2ecf20Sopenharmony_ci /* xr2, xr3: P */ 3628c2ecf20Sopenharmony_ci asm volatile("xvld $xr2, %0" : : "m" (p[0])); 3638c2ecf20Sopenharmony_ci asm volatile("xvld $xr3, %0" : : "m" (p[32])); 3648c2ecf20Sopenharmony_ci /* xr2, xr3: P + Pxy */ 3658c2ecf20Sopenharmony_ci asm volatile("xvld $xr4, %0" : : "m" (dp[0])); 3668c2ecf20Sopenharmony_ci asm volatile("xvld $xr5, %0" : : "m" (dp[32])); 3678c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr2, $xr2, $xr4"); 3688c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr3, $xr3, $xr5"); 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */ 3718c2ecf20Sopenharmony_ci asm volatile("xvsrli.b $xr4, $xr0, 4"); 3728c2ecf20Sopenharmony_ci asm volatile("xvsrli.b $xr5, $xr1, 4"); 3738c2ecf20Sopenharmony_ci /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */ 3748c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr0, $xr0, 0x0f"); 3758c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr1, $xr1, 0x0f"); 3768c2ecf20Sopenharmony_ci /* lookup from qmul[0] */ 3778c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0"); 3788c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1"); 3798c2ecf20Sopenharmony_ci /* lookup from qmul[16] */ 3808c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4"); 3818c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5"); 3828c2ecf20Sopenharmony_ci /* xr6, xr7: B(Q + Qxy) */ 3838c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr6, $xr4, $xr0"); 3848c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr7, $xr5, $xr1"); 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */ 3878c2ecf20Sopenharmony_ci asm volatile("xvsrli.b $xr4, $xr2, 4"); 3888c2ecf20Sopenharmony_ci asm volatile("xvsrli.b $xr5, $xr3, 4"); 3898c2ecf20Sopenharmony_ci /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */ 3908c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr0, $xr2, 0x0f"); 3918c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr1, $xr3, 0x0f"); 3928c2ecf20Sopenharmony_ci /* lookup from pbmul[0] */ 3938c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0"); 3948c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1"); 3958c2ecf20Sopenharmony_ci /* lookup from pbmul[16] */ 3968c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); 3978c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); 3988c2ecf20Sopenharmony_ci /* xr0, xr1: A(P + Pxy) */ 3998c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr0, $xr0, $xr4"); 4008c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr1, $xr1, $xr5"); 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */ 4038c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr0, $xr0, $xr6"); 4048c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr1, $xr1, $xr7"); 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci /* xr2, xr3: P + Pxy + Dx = Dy */ 4078c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr2, $xr2, $xr0"); 4088c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr3, $xr3, $xr1"); 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci asm volatile("xvst $xr0, %0" : "=m" (dq[0])); 4118c2ecf20Sopenharmony_ci asm volatile("xvst $xr1, %0" : "=m" (dq[32])); 4128c2ecf20Sopenharmony_ci asm volatile("xvst $xr2, %0" : "=m" (dp[0])); 4138c2ecf20Sopenharmony_ci asm volatile("xvst $xr3, %0" : "=m" (dp[32])); 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci bytes -= 64; 4168c2ecf20Sopenharmony_ci p += 64; 4178c2ecf20Sopenharmony_ci q += 64; 4188c2ecf20Sopenharmony_ci dp += 64; 4198c2ecf20Sopenharmony_ci dq += 64; 4208c2ecf20Sopenharmony_ci } 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci kernel_fpu_end(); 4238c2ecf20Sopenharmony_ci} 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_cistatic void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, 4268c2ecf20Sopenharmony_ci void **ptrs) 4278c2ecf20Sopenharmony_ci{ 4288c2ecf20Sopenharmony_ci u8 *p, *q, *dq; 4298c2ecf20Sopenharmony_ci const u8 *qmul; /* Q multiplier table */ 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci p = (u8 *)ptrs[disks - 2]; 4328c2ecf20Sopenharmony_ci q = (u8 *)ptrs[disks - 1]; 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci /* 4358c2ecf20Sopenharmony_ci * Compute syndrome with zero for the missing data page 4368c2ecf20Sopenharmony_ci * Use the dead data page as temporary storage for delta q 4378c2ecf20Sopenharmony_ci */ 4388c2ecf20Sopenharmony_ci dq = (u8 *)ptrs[faila]; 4398c2ecf20Sopenharmony_ci ptrs[faila] = (void *)raid6_empty_zero_page; 4408c2ecf20Sopenharmony_ci ptrs[disks - 1] = dq; 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(disks, bytes, ptrs); 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci /* Restore pointer table */ 4458c2ecf20Sopenharmony_ci ptrs[faila] = dq; 4468c2ecf20Sopenharmony_ci ptrs[disks - 1] = q; 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci /* Now, pick the proper data tables */ 4498c2ecf20Sopenharmony_ci qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci kernel_fpu_begin(); 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci /* xr22, xr23: qmul */ 4548c2ecf20Sopenharmony_ci asm volatile("vld $vr22, %0" : : "m" (qmul[0])); 4558c2ecf20Sopenharmony_ci asm volatile("xvreplve0.q $xr22, $xr22"); 4568c2ecf20Sopenharmony_ci asm volatile("vld $vr23, %0" : : "m" (qmul[16])); 4578c2ecf20Sopenharmony_ci asm volatile("xvreplve0.q $xr23, $xr23"); 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci while (bytes) { 4608c2ecf20Sopenharmony_ci /* xr0, xr1: P + Dx */ 4618c2ecf20Sopenharmony_ci asm volatile("xvld $xr0, %0" : : "m" (p[0])); 4628c2ecf20Sopenharmony_ci asm volatile("xvld $xr1, %0" : : "m" (p[32])); 4638c2ecf20Sopenharmony_ci /* xr2, xr3: Qx */ 4648c2ecf20Sopenharmony_ci asm volatile("xvld $xr2, %0" : : "m" (dq[0])); 4658c2ecf20Sopenharmony_ci asm volatile("xvld $xr3, %0" : : "m" (dq[32])); 4668c2ecf20Sopenharmony_ci /* xr2, xr3: Q + Qx */ 4678c2ecf20Sopenharmony_ci asm volatile("xvld $xr4, %0" : : "m" (q[0])); 4688c2ecf20Sopenharmony_ci asm volatile("xvld $xr5, %0" : : "m" (q[32])); 4698c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr2, $xr2, $xr4"); 4708c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr3, $xr3, $xr5"); 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */ 4738c2ecf20Sopenharmony_ci asm volatile("xvsrli.b $xr4, $xr2, 4"); 4748c2ecf20Sopenharmony_ci asm volatile("xvsrli.b $xr5, $xr3, 4"); 4758c2ecf20Sopenharmony_ci /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */ 4768c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr2, $xr2, 0x0f"); 4778c2ecf20Sopenharmony_ci asm volatile("xvandi.b $xr3, $xr3, 0x0f"); 4788c2ecf20Sopenharmony_ci /* lookup from qmul[0] */ 4798c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2"); 4808c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3"); 4818c2ecf20Sopenharmony_ci /* lookup from qmul[16] */ 4828c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); 4838c2ecf20Sopenharmony_ci asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); 4848c2ecf20Sopenharmony_ci /* xr2, xr3: qmul(Q + Qx) = Dx */ 4858c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr2, $xr2, $xr4"); 4868c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr3, $xr3, $xr5"); 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci /* xr0, xr1: P + Dx + Dx = P */ 4898c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr0, $xr0, $xr2"); 4908c2ecf20Sopenharmony_ci asm volatile("xvxor.v $xr1, $xr1, $xr3"); 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci asm volatile("xvst $xr2, %0" : "=m" (dq[0])); 4938c2ecf20Sopenharmony_ci asm volatile("xvst $xr3, %0" : "=m" (dq[32])); 4948c2ecf20Sopenharmony_ci asm volatile("xvst $xr0, %0" : "=m" (p[0])); 4958c2ecf20Sopenharmony_ci asm volatile("xvst $xr1, %0" : "=m" (p[32])); 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci bytes -= 64; 4988c2ecf20Sopenharmony_ci p += 64; 4998c2ecf20Sopenharmony_ci q += 64; 5008c2ecf20Sopenharmony_ci dq += 64; 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci kernel_fpu_end(); 5048c2ecf20Sopenharmony_ci} 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_ciconst struct raid6_recov_calls raid6_recov_lasx = { 5078c2ecf20Sopenharmony_ci .data2 = raid6_2data_recov_lasx, 5088c2ecf20Sopenharmony_ci .datap = raid6_datap_recov_lasx, 5098c2ecf20Sopenharmony_ci .valid = raid6_has_lasx, 5108c2ecf20Sopenharmony_ci .name = "lasx", 5118c2ecf20Sopenharmony_ci .priority = 2, 5128c2ecf20Sopenharmony_ci}; 5138c2ecf20Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LASX */ 514