18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2016 Intel Corporation
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Author: Gayatri Kammela <gayatri.kammela@intel.com>
68c2ecf20Sopenharmony_ci * Author: Megha Dey <megha.dey@linux.intel.com>
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#ifdef CONFIG_AS_AVX512
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include <linux/raid/pq.h>
128c2ecf20Sopenharmony_ci#include "x86.h"
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_cistatic int raid6_has_avx512(void)
158c2ecf20Sopenharmony_ci{
168c2ecf20Sopenharmony_ci	return boot_cpu_has(X86_FEATURE_AVX2) &&
178c2ecf20Sopenharmony_ci		boot_cpu_has(X86_FEATURE_AVX) &&
188c2ecf20Sopenharmony_ci		boot_cpu_has(X86_FEATURE_AVX512F) &&
198c2ecf20Sopenharmony_ci		boot_cpu_has(X86_FEATURE_AVX512BW) &&
208c2ecf20Sopenharmony_ci		boot_cpu_has(X86_FEATURE_AVX512VL) &&
218c2ecf20Sopenharmony_ci		boot_cpu_has(X86_FEATURE_AVX512DQ);
228c2ecf20Sopenharmony_ci}
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistatic void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
258c2ecf20Sopenharmony_ci				     int failb, void **ptrs)
268c2ecf20Sopenharmony_ci{
278c2ecf20Sopenharmony_ci	u8 *p, *q, *dp, *dq;
288c2ecf20Sopenharmony_ci	const u8 *pbmul;	/* P multiplier table for B data */
298c2ecf20Sopenharmony_ci	const u8 *qmul;		/* Q multiplier table (for both) */
308c2ecf20Sopenharmony_ci	const u8 x0f = 0x0f;
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci	p = (u8 *)ptrs[disks-2];
338c2ecf20Sopenharmony_ci	q = (u8 *)ptrs[disks-1];
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci	/*
368c2ecf20Sopenharmony_ci	 * Compute syndrome with zero for the missing data pages
378c2ecf20Sopenharmony_ci	 * Use the dead data pages as temporary storage for
388c2ecf20Sopenharmony_ci	 * delta p and delta q
398c2ecf20Sopenharmony_ci	 */
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	dp = (u8 *)ptrs[faila];
428c2ecf20Sopenharmony_ci	ptrs[faila] = (void *)raid6_empty_zero_page;
438c2ecf20Sopenharmony_ci	ptrs[disks-2] = dp;
448c2ecf20Sopenharmony_ci	dq = (u8 *)ptrs[failb];
458c2ecf20Sopenharmony_ci	ptrs[failb] = (void *)raid6_empty_zero_page;
468c2ecf20Sopenharmony_ci	ptrs[disks-1] = dq;
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	raid6_call.gen_syndrome(disks, bytes, ptrs);
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	/* Restore pointer table */
518c2ecf20Sopenharmony_ci	ptrs[faila]   = dp;
528c2ecf20Sopenharmony_ci	ptrs[failb]   = dq;
538c2ecf20Sopenharmony_ci	ptrs[disks-2] = p;
548c2ecf20Sopenharmony_ci	ptrs[disks-1] = q;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	/* Now, pick the proper data tables */
578c2ecf20Sopenharmony_ci	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
588c2ecf20Sopenharmony_ci	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
598c2ecf20Sopenharmony_ci		raid6_gfexp[failb]]];
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	kernel_fpu_begin();
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	/* zmm0 = x0f[16] */
648c2ecf20Sopenharmony_ci	asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	while (bytes) {
678c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
688c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %0, %%zmm1\n\t"
698c2ecf20Sopenharmony_ci			     "vmovdqa64 %1, %%zmm9\n\t"
708c2ecf20Sopenharmony_ci			     "vmovdqa64 %2, %%zmm0\n\t"
718c2ecf20Sopenharmony_ci			     "vmovdqa64 %3, %%zmm8\n\t"
728c2ecf20Sopenharmony_ci			     "vpxorq %4, %%zmm1, %%zmm1\n\t"
738c2ecf20Sopenharmony_ci			     "vpxorq %5, %%zmm9, %%zmm9\n\t"
748c2ecf20Sopenharmony_ci			     "vpxorq %6, %%zmm0, %%zmm0\n\t"
758c2ecf20Sopenharmony_ci			     "vpxorq %7, %%zmm8, %%zmm8"
768c2ecf20Sopenharmony_ci			     :
778c2ecf20Sopenharmony_ci			     : "m" (q[0]), "m" (q[64]), "m" (p[0]),
788c2ecf20Sopenharmony_ci			       "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
798c2ecf20Sopenharmony_ci			       "m" (dp[0]), "m" (dp[64]));
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci		/*
828c2ecf20Sopenharmony_ci		 * 1 = dq[0]  ^ q[0]
838c2ecf20Sopenharmony_ci		 * 9 = dq[64] ^ q[64]
848c2ecf20Sopenharmony_ci		 * 0 = dp[0]  ^ p[0]
858c2ecf20Sopenharmony_ci		 * 8 = dp[64] ^ p[64]
868c2ecf20Sopenharmony_ci		 */
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
898c2ecf20Sopenharmony_ci			     "vbroadcasti64x2 %1, %%zmm5"
908c2ecf20Sopenharmony_ci			     :
918c2ecf20Sopenharmony_ci			     : "m" (qmul[0]), "m" (qmul[16]));
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
948c2ecf20Sopenharmony_ci			     "vpsraw $4, %%zmm9, %%zmm12\n\t"
958c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
968c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
978c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
988c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
998c2ecf20Sopenharmony_ci			     "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
1008c2ecf20Sopenharmony_ci			     "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
1018c2ecf20Sopenharmony_ci			     "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
1028c2ecf20Sopenharmony_ci			     "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
1038c2ecf20Sopenharmony_ci			     "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
1048c2ecf20Sopenharmony_ci			     "vpxorq %%zmm4, %%zmm5, %%zmm5"
1058c2ecf20Sopenharmony_ci			     :
1068c2ecf20Sopenharmony_ci			     : );
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci		/*
1098c2ecf20Sopenharmony_ci		 * 5 = qx[0]
1108c2ecf20Sopenharmony_ci		 * 15 = qx[64]
1118c2ecf20Sopenharmony_ci		 */
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
1148c2ecf20Sopenharmony_ci			     "vbroadcasti64x2 %1, %%zmm1\n\t"
1158c2ecf20Sopenharmony_ci			     "vpsraw $4, %%zmm0, %%zmm2\n\t"
1168c2ecf20Sopenharmony_ci			     "vpsraw $4, %%zmm8, %%zmm6\n\t"
1178c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
1188c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
1198c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
1208c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
1218c2ecf20Sopenharmony_ci			     "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
1228c2ecf20Sopenharmony_ci			     "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
1238c2ecf20Sopenharmony_ci			     "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
1248c2ecf20Sopenharmony_ci			     "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
1258c2ecf20Sopenharmony_ci			     "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
1268c2ecf20Sopenharmony_ci			     "vpxorq %%zmm12, %%zmm13, %%zmm13"
1278c2ecf20Sopenharmony_ci			     :
1288c2ecf20Sopenharmony_ci			     : "m" (pbmul[0]), "m" (pbmul[16]));
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci		/*
1318c2ecf20Sopenharmony_ci		 * 1  = pbmul[px[0]]
1328c2ecf20Sopenharmony_ci		 * 13 = pbmul[px[64]]
1338c2ecf20Sopenharmony_ci		 */
1348c2ecf20Sopenharmony_ci		asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
1358c2ecf20Sopenharmony_ci			     "vpxorq %%zmm15, %%zmm13, %%zmm13"
1368c2ecf20Sopenharmony_ci			     :
1378c2ecf20Sopenharmony_ci			     : );
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci		/*
1408c2ecf20Sopenharmony_ci		 * 1 = db = DQ
1418c2ecf20Sopenharmony_ci		 * 13 = db[64] = DQ[64]
1428c2ecf20Sopenharmony_ci		 */
1438c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %%zmm1, %0\n\t"
1448c2ecf20Sopenharmony_ci			     "vmovdqa64 %%zmm13,%1\n\t"
1458c2ecf20Sopenharmony_ci			     "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
1468c2ecf20Sopenharmony_ci			     "vpxorq %%zmm13, %%zmm8, %%zmm8"
1478c2ecf20Sopenharmony_ci			     :
1488c2ecf20Sopenharmony_ci			     : "m" (dq[0]), "m" (dq[64]));
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %%zmm0, %0\n\t"
1518c2ecf20Sopenharmony_ci			     "vmovdqa64 %%zmm8, %1"
1528c2ecf20Sopenharmony_ci			     :
1538c2ecf20Sopenharmony_ci			     : "m" (dp[0]), "m" (dp[64]));
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci		bytes -= 128;
1568c2ecf20Sopenharmony_ci		p += 128;
1578c2ecf20Sopenharmony_ci		q += 128;
1588c2ecf20Sopenharmony_ci		dp += 128;
1598c2ecf20Sopenharmony_ci		dq += 128;
1608c2ecf20Sopenharmony_ci#else
1618c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %0, %%zmm1\n\t"
1628c2ecf20Sopenharmony_ci			     "vmovdqa64 %1, %%zmm0\n\t"
1638c2ecf20Sopenharmony_ci			     "vpxorq %2, %%zmm1, %%zmm1\n\t"
1648c2ecf20Sopenharmony_ci			     "vpxorq %3, %%zmm0, %%zmm0"
1658c2ecf20Sopenharmony_ci			     :
1668c2ecf20Sopenharmony_ci			     : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci		/* 1 = dq ^ q;  0 = dp ^ p */
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
1718c2ecf20Sopenharmony_ci			     "vbroadcasti64x2 %1, %%zmm5"
1728c2ecf20Sopenharmony_ci			     :
1738c2ecf20Sopenharmony_ci			     : "m" (qmul[0]), "m" (qmul[16]));
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci		/*
1768c2ecf20Sopenharmony_ci		 * 1 = dq ^ q
1778c2ecf20Sopenharmony_ci		 * 3 = dq ^ p >> 4
1788c2ecf20Sopenharmony_ci		 */
1798c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
1808c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
1818c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
1828c2ecf20Sopenharmony_ci			     "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
1838c2ecf20Sopenharmony_ci			     "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
1848c2ecf20Sopenharmony_ci			     "vpxorq %%zmm4, %%zmm5, %%zmm5"
1858c2ecf20Sopenharmony_ci			     :
1868c2ecf20Sopenharmony_ci			     : );
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci		/* 5 = qx */
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
1918c2ecf20Sopenharmony_ci			     "vbroadcasti64x2 %1, %%zmm1"
1928c2ecf20Sopenharmony_ci			     :
1938c2ecf20Sopenharmony_ci			     : "m" (pbmul[0]), "m" (pbmul[16]));
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
1968c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
1978c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
1988c2ecf20Sopenharmony_ci			     "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
1998c2ecf20Sopenharmony_ci			     "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
2008c2ecf20Sopenharmony_ci			     "vpxorq %%zmm4, %%zmm1, %%zmm1"
2018c2ecf20Sopenharmony_ci			     :
2028c2ecf20Sopenharmony_ci			     : );
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci		/* 1 = pbmul[px] */
2058c2ecf20Sopenharmony_ci		asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
2068c2ecf20Sopenharmony_ci			     /* 1 = db = DQ */
2078c2ecf20Sopenharmony_ci			     "vmovdqa64 %%zmm1, %0\n\t"
2088c2ecf20Sopenharmony_ci			     :
2098c2ecf20Sopenharmony_ci			     : "m" (dq[0]));
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci		asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
2128c2ecf20Sopenharmony_ci			     "vmovdqa64 %%zmm0, %0"
2138c2ecf20Sopenharmony_ci			     :
2148c2ecf20Sopenharmony_ci			     : "m" (dp[0]));
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci		bytes -= 64;
2178c2ecf20Sopenharmony_ci		p += 64;
2188c2ecf20Sopenharmony_ci		q += 64;
2198c2ecf20Sopenharmony_ci		dp += 64;
2208c2ecf20Sopenharmony_ci		dq += 64;
2218c2ecf20Sopenharmony_ci#endif
2228c2ecf20Sopenharmony_ci	}
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	kernel_fpu_end();
2258c2ecf20Sopenharmony_ci}
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_cistatic void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
2288c2ecf20Sopenharmony_ci				     void **ptrs)
2298c2ecf20Sopenharmony_ci{
2308c2ecf20Sopenharmony_ci	u8 *p, *q, *dq;
2318c2ecf20Sopenharmony_ci	const u8 *qmul;		/* Q multiplier table */
2328c2ecf20Sopenharmony_ci	const u8 x0f = 0x0f;
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci	p = (u8 *)ptrs[disks-2];
2358c2ecf20Sopenharmony_ci	q = (u8 *)ptrs[disks-1];
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci	/*
2388c2ecf20Sopenharmony_ci	 * Compute syndrome with zero for the missing data page
2398c2ecf20Sopenharmony_ci	 * Use the dead data page as temporary storage for delta q
2408c2ecf20Sopenharmony_ci	 */
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	dq = (u8 *)ptrs[faila];
2438c2ecf20Sopenharmony_ci	ptrs[faila] = (void *)raid6_empty_zero_page;
2448c2ecf20Sopenharmony_ci	ptrs[disks-1] = dq;
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	raid6_call.gen_syndrome(disks, bytes, ptrs);
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	/* Restore pointer table */
2498c2ecf20Sopenharmony_ci	ptrs[faila]   = dq;
2508c2ecf20Sopenharmony_ci	ptrs[disks-1] = q;
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci	/* Now, pick the proper data tables */
2538c2ecf20Sopenharmony_ci	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	kernel_fpu_begin();
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	while (bytes) {
2608c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
2618c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %0, %%zmm3\n\t"
2628c2ecf20Sopenharmony_ci			     "vmovdqa64 %1, %%zmm8\n\t"
2638c2ecf20Sopenharmony_ci			     "vpxorq %2, %%zmm3, %%zmm3\n\t"
2648c2ecf20Sopenharmony_ci			     "vpxorq %3, %%zmm8, %%zmm8"
2658c2ecf20Sopenharmony_ci			     :
2668c2ecf20Sopenharmony_ci			     : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
2678c2ecf20Sopenharmony_ci			       "m" (q[64]));
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_ci		/*
2708c2ecf20Sopenharmony_ci		 * 3 = q[0] ^ dq[0]
2718c2ecf20Sopenharmony_ci		 * 8 = q[64] ^ dq[64]
2728c2ecf20Sopenharmony_ci		 */
2738c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
2748c2ecf20Sopenharmony_ci			     "vmovapd %%zmm0, %%zmm13\n\t"
2758c2ecf20Sopenharmony_ci			     "vbroadcasti64x2 %1, %%zmm1\n\t"
2768c2ecf20Sopenharmony_ci			     "vmovapd %%zmm1, %%zmm14"
2778c2ecf20Sopenharmony_ci			     :
2788c2ecf20Sopenharmony_ci			     : "m" (qmul[0]), "m" (qmul[16]));
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
2818c2ecf20Sopenharmony_ci			     "vpsraw $4, %%zmm8, %%zmm12\n\t"
2828c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
2838c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
2848c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
2858c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
2868c2ecf20Sopenharmony_ci			     "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
2878c2ecf20Sopenharmony_ci			     "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
2888c2ecf20Sopenharmony_ci			     "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
2898c2ecf20Sopenharmony_ci			     "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
2908c2ecf20Sopenharmony_ci			     "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
2918c2ecf20Sopenharmony_ci			     "vpxorq %%zmm13, %%zmm14, %%zmm14"
2928c2ecf20Sopenharmony_ci			     :
2938c2ecf20Sopenharmony_ci			     : );
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci		/*
2968c2ecf20Sopenharmony_ci		 * 1  = qmul[q[0]  ^ dq[0]]
2978c2ecf20Sopenharmony_ci		 * 14 = qmul[q[64] ^ dq[64]]
2988c2ecf20Sopenharmony_ci		 */
2998c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %0, %%zmm2\n\t"
3008c2ecf20Sopenharmony_ci			     "vmovdqa64 %1, %%zmm12\n\t"
3018c2ecf20Sopenharmony_ci			     "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
3028c2ecf20Sopenharmony_ci			     "vpxorq %%zmm14, %%zmm12, %%zmm12"
3038c2ecf20Sopenharmony_ci			     :
3048c2ecf20Sopenharmony_ci			     : "m" (p[0]), "m" (p[64]));
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci		/*
3078c2ecf20Sopenharmony_ci		 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
3088c2ecf20Sopenharmony_ci		 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
3098c2ecf20Sopenharmony_ci		 */
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %%zmm1, %0\n\t"
3128c2ecf20Sopenharmony_ci			     "vmovdqa64 %%zmm14, %1\n\t"
3138c2ecf20Sopenharmony_ci			     "vmovdqa64 %%zmm2, %2\n\t"
3148c2ecf20Sopenharmony_ci			     "vmovdqa64 %%zmm12,%3"
3158c2ecf20Sopenharmony_ci			     :
3168c2ecf20Sopenharmony_ci			     : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
3178c2ecf20Sopenharmony_ci			       "m" (p[64]));
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci		bytes -= 128;
3208c2ecf20Sopenharmony_ci		p += 128;
3218c2ecf20Sopenharmony_ci		q += 128;
3228c2ecf20Sopenharmony_ci		dq += 128;
3238c2ecf20Sopenharmony_ci#else
3248c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %0, %%zmm3\n\t"
3258c2ecf20Sopenharmony_ci			     "vpxorq %1, %%zmm3, %%zmm3"
3268c2ecf20Sopenharmony_ci			     :
3278c2ecf20Sopenharmony_ci			     : "m" (dq[0]), "m" (q[0]));
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci		/* 3 = q ^ dq */
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
3328c2ecf20Sopenharmony_ci			     "vbroadcasti64x2 %1, %%zmm1"
3338c2ecf20Sopenharmony_ci			     :
3348c2ecf20Sopenharmony_ci			     : "m" (qmul[0]), "m" (qmul[16]));
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
3378c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
3388c2ecf20Sopenharmony_ci			     "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
3398c2ecf20Sopenharmony_ci			     "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
3408c2ecf20Sopenharmony_ci			     "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
3418c2ecf20Sopenharmony_ci			     "vpxorq %%zmm0, %%zmm1, %%zmm1"
3428c2ecf20Sopenharmony_ci			     :
3438c2ecf20Sopenharmony_ci			     : );
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci		/* 1 = qmul[q ^ dq] */
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %0, %%zmm2\n\t"
3488c2ecf20Sopenharmony_ci			     "vpxorq %%zmm1, %%zmm2, %%zmm2"
3498c2ecf20Sopenharmony_ci			     :
3508c2ecf20Sopenharmony_ci			     : "m" (p[0]));
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci		/* 2 = p ^ qmul[q ^ dq] */
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci		asm volatile("vmovdqa64 %%zmm1, %0\n\t"
3558c2ecf20Sopenharmony_ci			     "vmovdqa64 %%zmm2, %1"
3568c2ecf20Sopenharmony_ci			     :
3578c2ecf20Sopenharmony_ci			     : "m" (dq[0]), "m" (p[0]));
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci		bytes -= 64;
3608c2ecf20Sopenharmony_ci		p += 64;
3618c2ecf20Sopenharmony_ci		q += 64;
3628c2ecf20Sopenharmony_ci		dq += 64;
3638c2ecf20Sopenharmony_ci#endif
3648c2ecf20Sopenharmony_ci	}
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	kernel_fpu_end();
3678c2ecf20Sopenharmony_ci}
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ciconst struct raid6_recov_calls raid6_recov_avx512 = {
3708c2ecf20Sopenharmony_ci	.data2 = raid6_2data_recov_avx512,
3718c2ecf20Sopenharmony_ci	.datap = raid6_datap_recov_avx512,
3728c2ecf20Sopenharmony_ci	.valid = raid6_has_avx512,
3738c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
3748c2ecf20Sopenharmony_ci	.name = "avx512x2",
3758c2ecf20Sopenharmony_ci#else
3768c2ecf20Sopenharmony_ci	.name = "avx512x1",
3778c2ecf20Sopenharmony_ci#endif
3788c2ecf20Sopenharmony_ci	.priority = 3,
3798c2ecf20Sopenharmony_ci};
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci#else
3828c2ecf20Sopenharmony_ci#warning "your version of binutils lacks AVX512 support"
3838c2ecf20Sopenharmony_ci#endif
384