18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2012 Intel Corporation 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <linux/raid/pq.h> 78c2ecf20Sopenharmony_ci#include "x86.h" 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_cistatic int raid6_has_ssse3(void) 108c2ecf20Sopenharmony_ci{ 118c2ecf20Sopenharmony_ci return boot_cpu_has(X86_FEATURE_XMM) && 128c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_XMM2) && 138c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_SSSE3); 148c2ecf20Sopenharmony_ci} 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_cistatic void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, 178c2ecf20Sopenharmony_ci int failb, void **ptrs) 188c2ecf20Sopenharmony_ci{ 198c2ecf20Sopenharmony_ci u8 *p, *q, *dp, *dq; 208c2ecf20Sopenharmony_ci const u8 *pbmul; /* P multiplier table for B data */ 218c2ecf20Sopenharmony_ci const u8 *qmul; /* Q multiplier table (for both) */ 228c2ecf20Sopenharmony_ci static const u8 __aligned(16) x0f[16] = { 238c2ecf20Sopenharmony_ci 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 248c2ecf20Sopenharmony_ci 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci p = (u8 *)ptrs[disks-2]; 278c2ecf20Sopenharmony_ci q = (u8 *)ptrs[disks-1]; 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci /* Compute syndrome with zero for the missing data pages 308c2ecf20Sopenharmony_ci Use the dead data pages as temporary storage for 318c2ecf20Sopenharmony_ci delta p and delta q */ 328c2ecf20Sopenharmony_ci dp = (u8 *)ptrs[faila]; 338c2ecf20Sopenharmony_ci ptrs[faila] = (void *)raid6_empty_zero_page; 348c2ecf20Sopenharmony_ci ptrs[disks-2] = dp; 358c2ecf20Sopenharmony_ci dq = (u8 *)ptrs[failb]; 368c2ecf20Sopenharmony_ci ptrs[failb] = (void *)raid6_empty_zero_page; 378c2ecf20Sopenharmony_ci ptrs[disks-1] = dq; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(disks, bytes, ptrs); 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci /* Restore pointer table */ 428c2ecf20Sopenharmony_ci ptrs[faila] = dp; 438c2ecf20Sopenharmony_ci ptrs[failb] = dq; 448c2ecf20Sopenharmony_ci ptrs[disks-2] = p; 458c2ecf20Sopenharmony_ci ptrs[disks-1] = q; 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci /* Now, pick the proper data tables */ 488c2ecf20Sopenharmony_ci pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; 498c2ecf20Sopenharmony_ci qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 508c2ecf20Sopenharmony_ci raid6_gfexp[failb]]]; 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci kernel_fpu_begin(); 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0])); 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 578c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0])); 588c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0])); 598c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16])); 608c2ecf20Sopenharmony_ci#endif 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci /* Now do it... */ 638c2ecf20Sopenharmony_ci while (bytes) { 648c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 658c2ecf20Sopenharmony_ci /* xmm6, xmm14, xmm15 */ 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm1" : : "m" (q[0])); 688c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm9" : : "m" (q[16])); 698c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm0" : : "m" (p[0])); 708c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm8" : : "m" (p[16])); 718c2ecf20Sopenharmony_ci asm volatile("pxor %0,%%xmm1" : : "m" (dq[0])); 728c2ecf20Sopenharmony_ci asm volatile("pxor %0,%%xmm9" : : "m" (dq[16])); 738c2ecf20Sopenharmony_ci asm volatile("pxor %0,%%xmm0" : : "m" (dp[0])); 748c2ecf20Sopenharmony_ci asm volatile("pxor %0,%%xmm8" : : "m" (dp[16])); 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci /* xmm0/8 = px */ 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm6,%xmm4"); 798c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); 808c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm6,%xmm12"); 818c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm5,%xmm13"); 828c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm1,%xmm3"); 838c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm9,%xmm11"); 848c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */ 858c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm8,%xmm10"); 868c2ecf20Sopenharmony_ci asm volatile("psraw $4,%xmm1"); 878c2ecf20Sopenharmony_ci asm volatile("psraw $4,%xmm9"); 888c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm3"); 898c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm11"); 908c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm1"); 918c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm9"); 928c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm3,%xmm4"); 938c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm11,%xmm12"); 948c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm1,%xmm5"); 958c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm9,%xmm13"); 968c2ecf20Sopenharmony_ci asm volatile("pxor %xmm4,%xmm5"); 978c2ecf20Sopenharmony_ci asm volatile("pxor %xmm12,%xmm13"); 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci /* xmm5/13 = qx */ 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm14,%xmm4"); 1028c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm15,%xmm1"); 1038c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm14,%xmm12"); 1048c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm15,%xmm9"); 1058c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm2,%xmm3"); 1068c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm10,%xmm11"); 1078c2ecf20Sopenharmony_ci asm volatile("psraw $4,%xmm2"); 1088c2ecf20Sopenharmony_ci asm volatile("psraw $4,%xmm10"); 1098c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm3"); 1108c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm11"); 1118c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm2"); 1128c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm10"); 1138c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm3,%xmm4"); 1148c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm11,%xmm12"); 1158c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm2,%xmm1"); 1168c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm10,%xmm9"); 1178c2ecf20Sopenharmony_ci asm volatile("pxor %xmm4,%xmm1"); 1188c2ecf20Sopenharmony_ci asm volatile("pxor %xmm12,%xmm9"); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* xmm1/9 = pbmul[px] */ 1218c2ecf20Sopenharmony_ci asm volatile("pxor %xmm5,%xmm1"); 1228c2ecf20Sopenharmony_ci asm volatile("pxor %xmm13,%xmm9"); 1238c2ecf20Sopenharmony_ci /* xmm1/9 = db = DQ */ 1248c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0])); 1258c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16])); 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci asm volatile("pxor %xmm1,%xmm0"); 1288c2ecf20Sopenharmony_ci asm volatile("pxor %xmm9,%xmm8"); 1298c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0])); 1308c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16])); 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci bytes -= 32; 1338c2ecf20Sopenharmony_ci p += 32; 1348c2ecf20Sopenharmony_ci q += 32; 1358c2ecf20Sopenharmony_ci dp += 32; 1368c2ecf20Sopenharmony_ci dq += 32; 1378c2ecf20Sopenharmony_ci#else 1388c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm1" : : "m" (*q)); 1398c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm0" : : "m" (*p)); 1408c2ecf20Sopenharmony_ci asm volatile("pxor %0,%%xmm1" : : "m" (*dq)); 1418c2ecf20Sopenharmony_ci asm volatile("pxor %0,%%xmm0" : : "m" (*dp)); 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci /* 1 = dq ^ q 1448c2ecf20Sopenharmony_ci * 0 = dp ^ p 1458c2ecf20Sopenharmony_ci */ 1468c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0])); 1478c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm1,%xmm3"); 1508c2ecf20Sopenharmony_ci asm volatile("psraw $4,%xmm1"); 1518c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm3"); 1528c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm1"); 1538c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm3,%xmm4"); 1548c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm1,%xmm5"); 1558c2ecf20Sopenharmony_ci asm volatile("pxor %xmm4,%xmm5"); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */ 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci /* xmm5 = qx */ 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0])); 1628c2ecf20Sopenharmony_ci asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16])); 1638c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm2,%xmm3"); 1648c2ecf20Sopenharmony_ci asm volatile("psraw $4,%xmm2"); 1658c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm3"); 1668c2ecf20Sopenharmony_ci asm volatile("pand %xmm7,%xmm2"); 1678c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm3,%xmm4"); 1688c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm2,%xmm1"); 1698c2ecf20Sopenharmony_ci asm volatile("pxor %xmm4,%xmm1"); 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci /* xmm1 = pbmul[px] */ 1728c2ecf20Sopenharmony_ci asm volatile("pxor %xmm5,%xmm1"); 1738c2ecf20Sopenharmony_ci /* xmm1 = db = DQ */ 1748c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm1,%0" : "=m" (*dq)); 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci asm volatile("pxor %xmm1,%xmm0"); 1778c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm0,%0" : "=m" (*dp)); 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci bytes -= 16; 1808c2ecf20Sopenharmony_ci p += 16; 1818c2ecf20Sopenharmony_ci q += 16; 1828c2ecf20Sopenharmony_ci dp += 16; 1838c2ecf20Sopenharmony_ci dq += 16; 1848c2ecf20Sopenharmony_ci#endif 1858c2ecf20Sopenharmony_ci } 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci kernel_fpu_end(); 1888c2ecf20Sopenharmony_ci} 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_cistatic void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, 1928c2ecf20Sopenharmony_ci void **ptrs) 1938c2ecf20Sopenharmony_ci{ 1948c2ecf20Sopenharmony_ci u8 *p, *q, *dq; 1958c2ecf20Sopenharmony_ci const u8 *qmul; /* Q multiplier table */ 1968c2ecf20Sopenharmony_ci static const u8 __aligned(16) x0f[16] = { 1978c2ecf20Sopenharmony_ci 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 1988c2ecf20Sopenharmony_ci 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci p = (u8 *)ptrs[disks-2]; 2018c2ecf20Sopenharmony_ci q = (u8 *)ptrs[disks-1]; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci /* Compute syndrome with zero for the missing data page 2048c2ecf20Sopenharmony_ci Use the dead data page as temporary storage for delta q */ 2058c2ecf20Sopenharmony_ci dq = (u8 *)ptrs[faila]; 2068c2ecf20Sopenharmony_ci ptrs[faila] = (void *)raid6_empty_zero_page; 2078c2ecf20Sopenharmony_ci ptrs[disks-1] = dq; 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(disks, bytes, ptrs); 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci /* Restore pointer table */ 2128c2ecf20Sopenharmony_ci ptrs[faila] = dq; 2138c2ecf20Sopenharmony_ci ptrs[disks-1] = q; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci /* Now, pick the proper data tables */ 2168c2ecf20Sopenharmony_ci qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci kernel_fpu_begin(); 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0])); 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci while (bytes) { 2238c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 2248c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); 2258c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16])); 2268c2ecf20Sopenharmony_ci asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); 2278c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci /* xmm3 = q[0] ^ dq[0] */ 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci asm volatile("pxor %0, %%xmm4" : : "m" (q[16])); 2328c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci /* xmm4 = q[16] ^ dq[16] */ 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm3, %xmm6"); 2378c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm4, %xmm8"); 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci /* xmm4 = xmm8 = q[16] ^ dq[16] */ 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci asm volatile("psraw $4, %xmm3"); 2428c2ecf20Sopenharmony_ci asm volatile("pand %xmm7, %xmm6"); 2438c2ecf20Sopenharmony_ci asm volatile("pand %xmm7, %xmm3"); 2448c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm6, %xmm0"); 2458c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm3, %xmm1"); 2468c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0])); 2478c2ecf20Sopenharmony_ci asm volatile("pxor %xmm0, %xmm1"); 2488c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16])); 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci /* xmm1 = qmul[q[0] ^ dq[0]] */ 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci asm volatile("psraw $4, %xmm4"); 2538c2ecf20Sopenharmony_ci asm volatile("pand %xmm7, %xmm8"); 2548c2ecf20Sopenharmony_ci asm volatile("pand %xmm7, %xmm4"); 2558c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm8, %xmm10"); 2568c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm4, %xmm11"); 2578c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); 2588c2ecf20Sopenharmony_ci asm volatile("pxor %xmm10, %xmm11"); 2598c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm12" : : "m" (p[16])); 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci /* xmm11 = qmul[q[16] ^ dq[16]] */ 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci asm volatile("pxor %xmm1, %xmm2"); 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci /* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */ 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci asm volatile("pxor %xmm11, %xmm12"); 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci /* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */ 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); 2728c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16])); 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); 2758c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm12, %0" : "=m" (p[16])); 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci bytes -= 32; 2788c2ecf20Sopenharmony_ci p += 32; 2798c2ecf20Sopenharmony_ci q += 32; 2808c2ecf20Sopenharmony_ci dq += 32; 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci#else 2838c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); 2848c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); 2858c2ecf20Sopenharmony_ci asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); 2868c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci /* xmm3 = *q ^ *dq */ 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci asm volatile("movdqa %xmm3, %xmm6"); 2918c2ecf20Sopenharmony_ci asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); 2928c2ecf20Sopenharmony_ci asm volatile("psraw $4, %xmm3"); 2938c2ecf20Sopenharmony_ci asm volatile("pand %xmm7, %xmm6"); 2948c2ecf20Sopenharmony_ci asm volatile("pand %xmm7, %xmm3"); 2958c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm6, %xmm0"); 2968c2ecf20Sopenharmony_ci asm volatile("pshufb %xmm3, %xmm1"); 2978c2ecf20Sopenharmony_ci asm volatile("pxor %xmm0, %xmm1"); 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci /* xmm1 = qmul[*q ^ *dq */ 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci asm volatile("pxor %xmm1, %xmm2"); 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci /* xmm2 = *p ^ qmul[*q ^ *dq] */ 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); 3068c2ecf20Sopenharmony_ci asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci bytes -= 16; 3098c2ecf20Sopenharmony_ci p += 16; 3108c2ecf20Sopenharmony_ci q += 16; 3118c2ecf20Sopenharmony_ci dq += 16; 3128c2ecf20Sopenharmony_ci#endif 3138c2ecf20Sopenharmony_ci } 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci kernel_fpu_end(); 3168c2ecf20Sopenharmony_ci} 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ciconst struct raid6_recov_calls raid6_recov_ssse3 = { 3198c2ecf20Sopenharmony_ci .data2 = raid6_2data_recov_ssse3, 3208c2ecf20Sopenharmony_ci .datap = raid6_datap_recov_ssse3, 3218c2ecf20Sopenharmony_ci .valid = raid6_has_ssse3, 3228c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 3238c2ecf20Sopenharmony_ci .name = "ssse3x2", 3248c2ecf20Sopenharmony_ci#else 3258c2ecf20Sopenharmony_ci .name = "ssse3x1", 3268c2ecf20Sopenharmony_ci#endif 3278c2ecf20Sopenharmony_ci .priority = 1, 3288c2ecf20Sopenharmony_ci}; 329