18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2012 Intel Corporation
48c2ecf20Sopenharmony_ci * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#include <linux/raid/pq.h>
88c2ecf20Sopenharmony_ci#include "x86.h"
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_cistatic int raid6_has_avx2(void)
118c2ecf20Sopenharmony_ci{
128c2ecf20Sopenharmony_ci	return boot_cpu_has(X86_FEATURE_AVX2) &&
138c2ecf20Sopenharmony_ci		boot_cpu_has(X86_FEATURE_AVX);
148c2ecf20Sopenharmony_ci}
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_cistatic void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
178c2ecf20Sopenharmony_ci		int failb, void **ptrs)
188c2ecf20Sopenharmony_ci{
198c2ecf20Sopenharmony_ci	u8 *p, *q, *dp, *dq;
208c2ecf20Sopenharmony_ci	const u8 *pbmul;	/* P multiplier table for B data */
218c2ecf20Sopenharmony_ci	const u8 *qmul;		/* Q multiplier table (for both) */
228c2ecf20Sopenharmony_ci	const u8 x0f = 0x0f;
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci	p = (u8 *)ptrs[disks-2];
258c2ecf20Sopenharmony_ci	q = (u8 *)ptrs[disks-1];
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci	/* Compute syndrome with zero for the missing data pages
288c2ecf20Sopenharmony_ci	   Use the dead data pages as temporary storage for
298c2ecf20Sopenharmony_ci	   delta p and delta q */
308c2ecf20Sopenharmony_ci	dp = (u8 *)ptrs[faila];
318c2ecf20Sopenharmony_ci	ptrs[faila] = (void *)raid6_empty_zero_page;
328c2ecf20Sopenharmony_ci	ptrs[disks-2] = dp;
338c2ecf20Sopenharmony_ci	dq = (u8 *)ptrs[failb];
348c2ecf20Sopenharmony_ci	ptrs[failb] = (void *)raid6_empty_zero_page;
358c2ecf20Sopenharmony_ci	ptrs[disks-1] = dq;
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci	raid6_call.gen_syndrome(disks, bytes, ptrs);
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	/* Restore pointer table */
408c2ecf20Sopenharmony_ci	ptrs[faila]   = dp;
418c2ecf20Sopenharmony_ci	ptrs[failb]   = dq;
428c2ecf20Sopenharmony_ci	ptrs[disks-2] = p;
438c2ecf20Sopenharmony_ci	ptrs[disks-1] = q;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	/* Now, pick the proper data tables */
468c2ecf20Sopenharmony_ci	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
478c2ecf20Sopenharmony_ci	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
488c2ecf20Sopenharmony_ci		raid6_gfexp[failb]]];
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	kernel_fpu_begin();
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	/* ymm0 = x0f[16] */
538c2ecf20Sopenharmony_ci	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	while (bytes) {
568c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
578c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
588c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
598c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
608c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
618c2ecf20Sopenharmony_ci		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
628c2ecf20Sopenharmony_ci		asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
638c2ecf20Sopenharmony_ci		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
648c2ecf20Sopenharmony_ci		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci		/*
678c2ecf20Sopenharmony_ci		 * 1 = dq[0]  ^ q[0]
688c2ecf20Sopenharmony_ci		 * 9 = dq[32] ^ q[32]
698c2ecf20Sopenharmony_ci		 * 0 = dp[0]  ^ p[0]
708c2ecf20Sopenharmony_ci		 * 8 = dp[32] ^ p[32]
718c2ecf20Sopenharmony_ci		 */
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
748c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %ymm1, %ymm3");
778c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %ymm9, %ymm12");
788c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm1, %ymm1");
798c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm9, %ymm9");
808c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm3, %ymm3");
818c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm12, %ymm12");
828c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
838c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
848c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
858c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
868c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm14, %ymm15, %ymm15");
878c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci		/*
908c2ecf20Sopenharmony_ci		 * 5 = qx[0]
918c2ecf20Sopenharmony_ci		 * 15 = qx[32]
928c2ecf20Sopenharmony_ci		 */
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
958c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
968c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %ymm0, %ymm2");
978c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %ymm8, %ymm6");
988c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm0, %ymm3");
998c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm8, %ymm14");
1008c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm2, %ymm2");
1018c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm6, %ymm6");
1028c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
1038c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
1048c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
1058c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
1068c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
1078c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm12, %ymm13, %ymm13");
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci		/*
1108c2ecf20Sopenharmony_ci		 * 1  = pbmul[px[0]]
1118c2ecf20Sopenharmony_ci		 * 13 = pbmul[px[32]]
1128c2ecf20Sopenharmony_ci		 */
1138c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
1148c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm15, %ymm13, %ymm13");
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci		/*
1178c2ecf20Sopenharmony_ci		 * 1 = db = DQ
1188c2ecf20Sopenharmony_ci		 * 13 = db[32] = DQ[32]
1198c2ecf20Sopenharmony_ci		 */
1208c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
1218c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
1228c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
1238c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm13, %ymm8, %ymm8");
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
1268c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci		bytes -= 64;
1298c2ecf20Sopenharmony_ci		p += 64;
1308c2ecf20Sopenharmony_ci		q += 64;
1318c2ecf20Sopenharmony_ci		dp += 64;
1328c2ecf20Sopenharmony_ci		dq += 64;
1338c2ecf20Sopenharmony_ci#else
1348c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
1358c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
1368c2ecf20Sopenharmony_ci		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
1378c2ecf20Sopenharmony_ci		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci		/* 1 = dq ^ q;  0 = dp ^ p */
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
1428c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci		/*
1458c2ecf20Sopenharmony_ci		 * 1 = dq ^ q
1468c2ecf20Sopenharmony_ci		 * 3 = dq ^ p >> 4
1478c2ecf20Sopenharmony_ci		 */
1488c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %ymm1, %ymm3");
1498c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm1, %ymm1");
1508c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm3, %ymm3");
1518c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
1528c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
1538c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci		/* 5 = qx */
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
1588c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %ymm0, %ymm2");
1618c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm0, %ymm3");
1628c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm2, %ymm2");
1638c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
1648c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
1658c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci		/* 1 = pbmul[px] */
1688c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
1698c2ecf20Sopenharmony_ci		/* 1 = db = DQ */
1708c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
1738c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci		bytes -= 32;
1768c2ecf20Sopenharmony_ci		p += 32;
1778c2ecf20Sopenharmony_ci		q += 32;
1788c2ecf20Sopenharmony_ci		dp += 32;
1798c2ecf20Sopenharmony_ci		dq += 32;
1808c2ecf20Sopenharmony_ci#endif
1818c2ecf20Sopenharmony_ci	}
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	kernel_fpu_end();
1848c2ecf20Sopenharmony_ci}
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_cistatic void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
1878c2ecf20Sopenharmony_ci		void **ptrs)
1888c2ecf20Sopenharmony_ci{
1898c2ecf20Sopenharmony_ci	u8 *p, *q, *dq;
1908c2ecf20Sopenharmony_ci	const u8 *qmul;		/* Q multiplier table */
1918c2ecf20Sopenharmony_ci	const u8 x0f = 0x0f;
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	p = (u8 *)ptrs[disks-2];
1948c2ecf20Sopenharmony_ci	q = (u8 *)ptrs[disks-1];
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	/* Compute syndrome with zero for the missing data page
1978c2ecf20Sopenharmony_ci	   Use the dead data page as temporary storage for delta q */
1988c2ecf20Sopenharmony_ci	dq = (u8 *)ptrs[faila];
1998c2ecf20Sopenharmony_ci	ptrs[faila] = (void *)raid6_empty_zero_page;
2008c2ecf20Sopenharmony_ci	ptrs[disks-1] = dq;
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	raid6_call.gen_syndrome(disks, bytes, ptrs);
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	/* Restore pointer table */
2058c2ecf20Sopenharmony_ci	ptrs[faila]   = dq;
2068c2ecf20Sopenharmony_ci	ptrs[disks-1] = q;
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	/* Now, pick the proper data tables */
2098c2ecf20Sopenharmony_ci	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci	kernel_fpu_begin();
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	while (bytes) {
2168c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
2178c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
2188c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
2198c2ecf20Sopenharmony_ci		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
2208c2ecf20Sopenharmony_ci		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci		/*
2238c2ecf20Sopenharmony_ci		 * 3 = q[0] ^ dq[0]
2248c2ecf20Sopenharmony_ci		 * 8 = q[32] ^ dq[32]
2258c2ecf20Sopenharmony_ci		 */
2268c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
2278c2ecf20Sopenharmony_ci		asm volatile("vmovapd %ymm0, %ymm13");
2288c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
2298c2ecf20Sopenharmony_ci		asm volatile("vmovapd %ymm1, %ymm14");
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %ymm3, %ymm6");
2328c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %ymm8, %ymm12");
2338c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm3, %ymm3");
2348c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm8, %ymm8");
2358c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm6, %ymm6");
2368c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm12, %ymm12");
2378c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
2388c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
2398c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
2408c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
2418c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
2428c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm13, %ymm14, %ymm14");
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci		/*
2458c2ecf20Sopenharmony_ci		 * 1  = qmul[q[0]  ^ dq[0]]
2468c2ecf20Sopenharmony_ci		 * 14 = qmul[q[32] ^ dq[32]]
2478c2ecf20Sopenharmony_ci		 */
2488c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
2498c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
2508c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
2518c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm14, %ymm12, %ymm12");
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci		/*
2548c2ecf20Sopenharmony_ci		 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
2558c2ecf20Sopenharmony_ci		 * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
2568c2ecf20Sopenharmony_ci		 */
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
2598c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
2608c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
2618c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci		bytes -= 64;
2648c2ecf20Sopenharmony_ci		p += 64;
2658c2ecf20Sopenharmony_ci		q += 64;
2668c2ecf20Sopenharmony_ci		dq += 64;
2678c2ecf20Sopenharmony_ci#else
2688c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
2698c2ecf20Sopenharmony_ci		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci		/* 3 = q ^ dq */
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
2748c2ecf20Sopenharmony_ci		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci		asm volatile("vpsraw $4, %ymm3, %ymm6");
2778c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm3, %ymm3");
2788c2ecf20Sopenharmony_ci		asm volatile("vpand %ymm7, %ymm6, %ymm6");
2798c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
2808c2ecf20Sopenharmony_ci		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
2818c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci		/* 1 = qmul[q ^ dq] */
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
2868c2ecf20Sopenharmony_ci		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci		/* 2 = p ^ qmul[q ^ dq] */
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
2918c2ecf20Sopenharmony_ci		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci		bytes -= 32;
2948c2ecf20Sopenharmony_ci		p += 32;
2958c2ecf20Sopenharmony_ci		q += 32;
2968c2ecf20Sopenharmony_ci		dq += 32;
2978c2ecf20Sopenharmony_ci#endif
2988c2ecf20Sopenharmony_ci	}
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	kernel_fpu_end();
3018c2ecf20Sopenharmony_ci}
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ciconst struct raid6_recov_calls raid6_recov_avx2 = {
3048c2ecf20Sopenharmony_ci	.data2 = raid6_2data_recov_avx2,
3058c2ecf20Sopenharmony_ci	.datap = raid6_datap_recov_avx2,
3068c2ecf20Sopenharmony_ci	.valid = raid6_has_avx2,
3078c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64
3088c2ecf20Sopenharmony_ci	.name = "avx2x2",
3098c2ecf20Sopenharmony_ci#else
3108c2ecf20Sopenharmony_ci	.name = "avx2x1",
3118c2ecf20Sopenharmony_ci#endif
3128c2ecf20Sopenharmony_ci	.priority = 2,
3138c2ecf20Sopenharmony_ci};
314