18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2016 Intel Corporation 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Author: Gayatri Kammela <gayatri.kammela@intel.com> 68c2ecf20Sopenharmony_ci * Author: Megha Dey <megha.dey@linux.intel.com> 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#ifdef CONFIG_AS_AVX512 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include <linux/raid/pq.h> 128c2ecf20Sopenharmony_ci#include "x86.h" 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_cistatic int raid6_has_avx512(void) 158c2ecf20Sopenharmony_ci{ 168c2ecf20Sopenharmony_ci return boot_cpu_has(X86_FEATURE_AVX2) && 178c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX) && 188c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512F) && 198c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512BW) && 208c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512VL) && 218c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512DQ); 228c2ecf20Sopenharmony_ci} 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_cistatic void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, 258c2ecf20Sopenharmony_ci int failb, void **ptrs) 268c2ecf20Sopenharmony_ci{ 278c2ecf20Sopenharmony_ci u8 *p, *q, *dp, *dq; 288c2ecf20Sopenharmony_ci const u8 *pbmul; /* P multiplier table for B data */ 298c2ecf20Sopenharmony_ci const u8 *qmul; /* Q multiplier table (for both) */ 308c2ecf20Sopenharmony_ci const u8 x0f = 0x0f; 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci p = (u8 *)ptrs[disks-2]; 338c2ecf20Sopenharmony_ci q = (u8 *)ptrs[disks-1]; 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci /* 368c2ecf20Sopenharmony_ci * Compute syndrome with zero for the missing data pages 378c2ecf20Sopenharmony_ci * Use the dead data pages as temporary storage for 388c2ecf20Sopenharmony_ci * delta p and delta q 398c2ecf20Sopenharmony_ci */ 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci dp = (u8 *)ptrs[faila]; 428c2ecf20Sopenharmony_ci ptrs[faila] = (void *)raid6_empty_zero_page; 438c2ecf20Sopenharmony_ci ptrs[disks-2] = dp; 448c2ecf20Sopenharmony_ci dq = (u8 *)ptrs[failb]; 458c2ecf20Sopenharmony_ci ptrs[failb] = (void *)raid6_empty_zero_page; 468c2ecf20Sopenharmony_ci ptrs[disks-1] = dq; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(disks, bytes, ptrs); 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci /* Restore pointer table */ 518c2ecf20Sopenharmony_ci ptrs[faila] = dp; 528c2ecf20Sopenharmony_ci ptrs[failb] = dq; 538c2ecf20Sopenharmony_ci ptrs[disks-2] = p; 548c2ecf20Sopenharmony_ci ptrs[disks-1] = q; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci /* Now, pick the proper data tables */ 578c2ecf20Sopenharmony_ci pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; 588c2ecf20Sopenharmony_ci qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 598c2ecf20Sopenharmony_ci raid6_gfexp[failb]]]; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci kernel_fpu_begin(); 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci /* zmm0 = x0f[16] */ 648c2ecf20Sopenharmony_ci asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci while (bytes) { 678c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 688c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0, %%zmm1\n\t" 698c2ecf20Sopenharmony_ci "vmovdqa64 %1, %%zmm9\n\t" 708c2ecf20Sopenharmony_ci "vmovdqa64 %2, %%zmm0\n\t" 718c2ecf20Sopenharmony_ci "vmovdqa64 %3, %%zmm8\n\t" 728c2ecf20Sopenharmony_ci "vpxorq %4, %%zmm1, %%zmm1\n\t" 738c2ecf20Sopenharmony_ci "vpxorq %5, %%zmm9, %%zmm9\n\t" 748c2ecf20Sopenharmony_ci "vpxorq %6, %%zmm0, %%zmm0\n\t" 758c2ecf20Sopenharmony_ci "vpxorq %7, %%zmm8, %%zmm8" 768c2ecf20Sopenharmony_ci : 778c2ecf20Sopenharmony_ci : "m" (q[0]), "m" (q[64]), "m" (p[0]), 788c2ecf20Sopenharmony_ci "m" (p[64]), "m" (dq[0]), "m" (dq[64]), 798c2ecf20Sopenharmony_ci "m" (dp[0]), "m" (dp[64])); 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci /* 828c2ecf20Sopenharmony_ci * 1 = dq[0] ^ q[0] 838c2ecf20Sopenharmony_ci * 9 = dq[64] ^ q[64] 848c2ecf20Sopenharmony_ci * 0 = dp[0] ^ p[0] 858c2ecf20Sopenharmony_ci * 8 = dp[64] ^ p[64] 868c2ecf20Sopenharmony_ci */ 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 898c2ecf20Sopenharmony_ci "vbroadcasti64x2 %1, %%zmm5" 908c2ecf20Sopenharmony_ci : 918c2ecf20Sopenharmony_ci : "m" (qmul[0]), "m" (qmul[16])); 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 948c2ecf20Sopenharmony_ci "vpsraw $4, %%zmm9, %%zmm12\n\t" 958c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 968c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" 978c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 988c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 998c2ecf20Sopenharmony_ci "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" 1008c2ecf20Sopenharmony_ci "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 1018c2ecf20Sopenharmony_ci "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" 1028c2ecf20Sopenharmony_ci "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 1038c2ecf20Sopenharmony_ci "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" 1048c2ecf20Sopenharmony_ci "vpxorq %%zmm4, %%zmm5, %%zmm5" 1058c2ecf20Sopenharmony_ci : 1068c2ecf20Sopenharmony_ci : ); 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci /* 1098c2ecf20Sopenharmony_ci * 5 = qx[0] 1108c2ecf20Sopenharmony_ci * 15 = qx[64] 1118c2ecf20Sopenharmony_ci */ 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 1148c2ecf20Sopenharmony_ci "vbroadcasti64x2 %1, %%zmm1\n\t" 1158c2ecf20Sopenharmony_ci "vpsraw $4, %%zmm0, %%zmm2\n\t" 1168c2ecf20Sopenharmony_ci "vpsraw $4, %%zmm8, %%zmm6\n\t" 1178c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 1188c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" 1198c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 1208c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 1218c2ecf20Sopenharmony_ci "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" 1228c2ecf20Sopenharmony_ci "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 1238c2ecf20Sopenharmony_ci "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" 1248c2ecf20Sopenharmony_ci "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 1258c2ecf20Sopenharmony_ci "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" 1268c2ecf20Sopenharmony_ci "vpxorq %%zmm12, %%zmm13, %%zmm13" 1278c2ecf20Sopenharmony_ci : 1288c2ecf20Sopenharmony_ci : "m" (pbmul[0]), "m" (pbmul[16])); 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci /* 1318c2ecf20Sopenharmony_ci * 1 = pbmul[px[0]] 1328c2ecf20Sopenharmony_ci * 13 = pbmul[px[64]] 1338c2ecf20Sopenharmony_ci */ 1348c2ecf20Sopenharmony_ci asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 1358c2ecf20Sopenharmony_ci "vpxorq %%zmm15, %%zmm13, %%zmm13" 1368c2ecf20Sopenharmony_ci : 1378c2ecf20Sopenharmony_ci : ); 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci /* 1408c2ecf20Sopenharmony_ci * 1 = db = DQ 1418c2ecf20Sopenharmony_ci * 13 = db[64] = DQ[64] 1428c2ecf20Sopenharmony_ci */ 1438c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %%zmm1, %0\n\t" 1448c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm13,%1\n\t" 1458c2ecf20Sopenharmony_ci "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 1468c2ecf20Sopenharmony_ci "vpxorq %%zmm13, %%zmm8, %%zmm8" 1478c2ecf20Sopenharmony_ci : 1488c2ecf20Sopenharmony_ci : "m" (dq[0]), "m" (dq[64])); 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %%zmm0, %0\n\t" 1518c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm8, %1" 1528c2ecf20Sopenharmony_ci : 1538c2ecf20Sopenharmony_ci : "m" (dp[0]), "m" (dp[64])); 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci bytes -= 128; 1568c2ecf20Sopenharmony_ci p += 128; 1578c2ecf20Sopenharmony_ci q += 128; 1588c2ecf20Sopenharmony_ci dp += 128; 1598c2ecf20Sopenharmony_ci dq += 128; 1608c2ecf20Sopenharmony_ci#else 1618c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0, %%zmm1\n\t" 1628c2ecf20Sopenharmony_ci "vmovdqa64 %1, %%zmm0\n\t" 1638c2ecf20Sopenharmony_ci "vpxorq %2, %%zmm1, %%zmm1\n\t" 1648c2ecf20Sopenharmony_ci "vpxorq %3, %%zmm0, %%zmm0" 1658c2ecf20Sopenharmony_ci : 1668c2ecf20Sopenharmony_ci : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci /* 1 = dq ^ q; 0 = dp ^ p */ 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 1718c2ecf20Sopenharmony_ci "vbroadcasti64x2 %1, %%zmm5" 1728c2ecf20Sopenharmony_ci : 1738c2ecf20Sopenharmony_ci : "m" (qmul[0]), "m" (qmul[16])); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci /* 1768c2ecf20Sopenharmony_ci * 1 = dq ^ q 1778c2ecf20Sopenharmony_ci * 3 = dq ^ p >> 4 1788c2ecf20Sopenharmony_ci */ 1798c2ecf20Sopenharmony_ci asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 1808c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 1818c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 1828c2ecf20Sopenharmony_ci "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 1838c2ecf20Sopenharmony_ci "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 1848c2ecf20Sopenharmony_ci "vpxorq %%zmm4, %%zmm5, %%zmm5" 1858c2ecf20Sopenharmony_ci : 1868c2ecf20Sopenharmony_ci : ); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci /* 5 = qx */ 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 1918c2ecf20Sopenharmony_ci "vbroadcasti64x2 %1, %%zmm1" 1928c2ecf20Sopenharmony_ci : 1938c2ecf20Sopenharmony_ci : "m" (pbmul[0]), "m" (pbmul[16])); 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" 1968c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 1978c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 1988c2ecf20Sopenharmony_ci "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 1998c2ecf20Sopenharmony_ci "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 2008c2ecf20Sopenharmony_ci "vpxorq %%zmm4, %%zmm1, %%zmm1" 2018c2ecf20Sopenharmony_ci : 2028c2ecf20Sopenharmony_ci : ); 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci /* 1 = pbmul[px] */ 2058c2ecf20Sopenharmony_ci asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 2068c2ecf20Sopenharmony_ci /* 1 = db = DQ */ 2078c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm1, %0\n\t" 2088c2ecf20Sopenharmony_ci : 2098c2ecf20Sopenharmony_ci : "m" (dq[0])); 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 2128c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm0, %0" 2138c2ecf20Sopenharmony_ci : 2148c2ecf20Sopenharmony_ci : "m" (dp[0])); 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci bytes -= 64; 2178c2ecf20Sopenharmony_ci p += 64; 2188c2ecf20Sopenharmony_ci q += 64; 2198c2ecf20Sopenharmony_ci dp += 64; 2208c2ecf20Sopenharmony_ci dq += 64; 2218c2ecf20Sopenharmony_ci#endif 2228c2ecf20Sopenharmony_ci } 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci kernel_fpu_end(); 2258c2ecf20Sopenharmony_ci} 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_cistatic void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, 2288c2ecf20Sopenharmony_ci void **ptrs) 2298c2ecf20Sopenharmony_ci{ 2308c2ecf20Sopenharmony_ci u8 *p, *q, *dq; 2318c2ecf20Sopenharmony_ci const u8 *qmul; /* Q multiplier table */ 2328c2ecf20Sopenharmony_ci const u8 x0f = 0x0f; 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci p = (u8 *)ptrs[disks-2]; 2358c2ecf20Sopenharmony_ci q = (u8 *)ptrs[disks-1]; 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci /* 2388c2ecf20Sopenharmony_ci * Compute syndrome with zero for the missing data page 2398c2ecf20Sopenharmony_ci * Use the dead data page as temporary storage for delta q 2408c2ecf20Sopenharmony_ci */ 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci dq = (u8 *)ptrs[faila]; 2438c2ecf20Sopenharmony_ci ptrs[faila] = (void *)raid6_empty_zero_page; 2448c2ecf20Sopenharmony_ci ptrs[disks-1] = dq; 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci raid6_call.gen_syndrome(disks, bytes, ptrs); 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci /* Restore pointer table */ 2498c2ecf20Sopenharmony_ci ptrs[faila] = dq; 2508c2ecf20Sopenharmony_ci ptrs[disks-1] = q; 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci /* Now, pick the proper data tables */ 2538c2ecf20Sopenharmony_ci qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci kernel_fpu_begin(); 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci while (bytes) { 2608c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 2618c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0, %%zmm3\n\t" 2628c2ecf20Sopenharmony_ci "vmovdqa64 %1, %%zmm8\n\t" 2638c2ecf20Sopenharmony_ci "vpxorq %2, %%zmm3, %%zmm3\n\t" 2648c2ecf20Sopenharmony_ci "vpxorq %3, %%zmm8, %%zmm8" 2658c2ecf20Sopenharmony_ci : 2668c2ecf20Sopenharmony_ci : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), 2678c2ecf20Sopenharmony_ci "m" (q[64])); 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci /* 2708c2ecf20Sopenharmony_ci * 3 = q[0] ^ dq[0] 2718c2ecf20Sopenharmony_ci * 8 = q[64] ^ dq[64] 2728c2ecf20Sopenharmony_ci */ 2738c2ecf20Sopenharmony_ci asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 2748c2ecf20Sopenharmony_ci "vmovapd %%zmm0, %%zmm13\n\t" 2758c2ecf20Sopenharmony_ci "vbroadcasti64x2 %1, %%zmm1\n\t" 2768c2ecf20Sopenharmony_ci "vmovapd %%zmm1, %%zmm14" 2778c2ecf20Sopenharmony_ci : 2788c2ecf20Sopenharmony_ci : "m" (qmul[0]), "m" (qmul[16])); 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ci asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 2818c2ecf20Sopenharmony_ci "vpsraw $4, %%zmm8, %%zmm12\n\t" 2828c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 2838c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" 2848c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 2858c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 2868c2ecf20Sopenharmony_ci "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 2878c2ecf20Sopenharmony_ci "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" 2888c2ecf20Sopenharmony_ci "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 2898c2ecf20Sopenharmony_ci "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" 2908c2ecf20Sopenharmony_ci "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" 2918c2ecf20Sopenharmony_ci "vpxorq %%zmm13, %%zmm14, %%zmm14" 2928c2ecf20Sopenharmony_ci : 2938c2ecf20Sopenharmony_ci : ); 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci /* 2968c2ecf20Sopenharmony_ci * 1 = qmul[q[0] ^ dq[0]] 2978c2ecf20Sopenharmony_ci * 14 = qmul[q[64] ^ dq[64]] 2988c2ecf20Sopenharmony_ci */ 2998c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0, %%zmm2\n\t" 3008c2ecf20Sopenharmony_ci "vmovdqa64 %1, %%zmm12\n\t" 3018c2ecf20Sopenharmony_ci "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" 3028c2ecf20Sopenharmony_ci "vpxorq %%zmm14, %%zmm12, %%zmm12" 3038c2ecf20Sopenharmony_ci : 3048c2ecf20Sopenharmony_ci : "m" (p[0]), "m" (p[64])); 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci /* 3078c2ecf20Sopenharmony_ci * 2 = p[0] ^ qmul[q[0] ^ dq[0]] 3088c2ecf20Sopenharmony_ci * 12 = p[64] ^ qmul[q[64] ^ dq[64]] 3098c2ecf20Sopenharmony_ci */ 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %%zmm1, %0\n\t" 3128c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm14, %1\n\t" 3138c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm2, %2\n\t" 3148c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm12,%3" 3158c2ecf20Sopenharmony_ci : 3168c2ecf20Sopenharmony_ci : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), 3178c2ecf20Sopenharmony_ci "m" (p[64])); 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci bytes -= 128; 3208c2ecf20Sopenharmony_ci p += 128; 3218c2ecf20Sopenharmony_ci q += 128; 3228c2ecf20Sopenharmony_ci dq += 128; 3238c2ecf20Sopenharmony_ci#else 3248c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0, %%zmm3\n\t" 3258c2ecf20Sopenharmony_ci "vpxorq %1, %%zmm3, %%zmm3" 3268c2ecf20Sopenharmony_ci : 3278c2ecf20Sopenharmony_ci : "m" (dq[0]), "m" (q[0])); 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci /* 3 = q ^ dq */ 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_ci asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 3328c2ecf20Sopenharmony_ci "vbroadcasti64x2 %1, %%zmm1" 3338c2ecf20Sopenharmony_ci : 3348c2ecf20Sopenharmony_ci : "m" (qmul[0]), "m" (qmul[16])); 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 3378c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 3388c2ecf20Sopenharmony_ci "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 3398c2ecf20Sopenharmony_ci "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 3408c2ecf20Sopenharmony_ci "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 3418c2ecf20Sopenharmony_ci "vpxorq %%zmm0, %%zmm1, %%zmm1" 3428c2ecf20Sopenharmony_ci : 3438c2ecf20Sopenharmony_ci : ); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci /* 1 = qmul[q ^ dq] */ 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0, %%zmm2\n\t" 3488c2ecf20Sopenharmony_ci "vpxorq %%zmm1, %%zmm2, %%zmm2" 3498c2ecf20Sopenharmony_ci : 3508c2ecf20Sopenharmony_ci : "m" (p[0])); 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci /* 2 = p ^ qmul[q ^ dq] */ 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %%zmm1, %0\n\t" 3558c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm2, %1" 3568c2ecf20Sopenharmony_ci : 3578c2ecf20Sopenharmony_ci : "m" (dq[0]), "m" (p[0])); 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci bytes -= 64; 3608c2ecf20Sopenharmony_ci p += 64; 3618c2ecf20Sopenharmony_ci q += 64; 3628c2ecf20Sopenharmony_ci dq += 64; 3638c2ecf20Sopenharmony_ci#endif 3648c2ecf20Sopenharmony_ci } 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci kernel_fpu_end(); 3678c2ecf20Sopenharmony_ci} 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ciconst struct raid6_recov_calls raid6_recov_avx512 = { 3708c2ecf20Sopenharmony_ci .data2 = raid6_2data_recov_avx512, 3718c2ecf20Sopenharmony_ci .datap = raid6_datap_recov_avx512, 3728c2ecf20Sopenharmony_ci .valid = raid6_has_avx512, 3738c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 3748c2ecf20Sopenharmony_ci .name = "avx512x2", 3758c2ecf20Sopenharmony_ci#else 3768c2ecf20Sopenharmony_ci .name = "avx512x1", 3778c2ecf20Sopenharmony_ci#endif 3788c2ecf20Sopenharmony_ci .priority = 3, 3798c2ecf20Sopenharmony_ci}; 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci#else 3828c2ecf20Sopenharmony_ci#warning "your version of binutils lacks AVX512 support" 3838c2ecf20Sopenharmony_ci#endif 384