18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX)
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright 2023 WANG Xuerui <git@xen0n.name>
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Based on the generic RAID-6 code (int.uc):
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Copyright 2002-2004 H. Peter Anvin
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/raid/pq.h>
138c2ecf20Sopenharmony_ci#include "loongarch.h"
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci/*
168c2ecf20Sopenharmony_ci * The vector algorithms are currently priority 0, which means the generic
178c2ecf20Sopenharmony_ci * scalar algorithms are not being disabled if vector support is present.
188c2ecf20Sopenharmony_ci * This is like the similar LoongArch RAID5 XOR code, with the main reason
198c2ecf20Sopenharmony_ci * repeated here: it cannot be ruled out at this point of time, that some
208c2ecf20Sopenharmony_ci * future (maybe reduced) models could run the vector algorithms slower than
218c2ecf20Sopenharmony_ci * the scalar ones, maybe for errata or micro-op reasons. It may be
228c2ecf20Sopenharmony_ci * appropriate to revisit this after one or two more uarch generations.
238c2ecf20Sopenharmony_ci */
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LSX
268c2ecf20Sopenharmony_ci#define NSIZE 16
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_cistatic int raid6_has_lsx(void)
298c2ecf20Sopenharmony_ci{
308c2ecf20Sopenharmony_ci	return cpu_has_lsx;
318c2ecf20Sopenharmony_ci}
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistatic void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs)
348c2ecf20Sopenharmony_ci{
358c2ecf20Sopenharmony_ci	u8 **dptr = (u8 **)ptrs;
368c2ecf20Sopenharmony_ci	u8 *p, *q;
378c2ecf20Sopenharmony_ci	int d, z, z0;
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	z0 = disks - 3;		/* Highest data disk */
408c2ecf20Sopenharmony_ci	p = dptr[z0+1];		/* XOR parity */
418c2ecf20Sopenharmony_ci	q = dptr[z0+2];		/* RS syndrome */
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	kernel_fpu_begin();
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	/*
468c2ecf20Sopenharmony_ci	 * $vr0, $vr1, $vr2, $vr3: wp
478c2ecf20Sopenharmony_ci	 * $vr4, $vr5, $vr6, $vr7: wq
488c2ecf20Sopenharmony_ci	 * $vr8, $vr9, $vr10, $vr11: wd
498c2ecf20Sopenharmony_ci	 * $vr12, $vr13, $vr14, $vr15: w2
508c2ecf20Sopenharmony_ci	 * $vr16, $vr17, $vr18, $vr19: w1
518c2ecf20Sopenharmony_ci	 */
528c2ecf20Sopenharmony_ci	for (d = 0; d < bytes; d += NSIZE*4) {
538c2ecf20Sopenharmony_ci		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
548c2ecf20Sopenharmony_ci		asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
558c2ecf20Sopenharmony_ci		asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
568c2ecf20Sopenharmony_ci		asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
578c2ecf20Sopenharmony_ci		asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
588c2ecf20Sopenharmony_ci		asm volatile("vori.b $vr4, $vr0, 0");
598c2ecf20Sopenharmony_ci		asm volatile("vori.b $vr5, $vr1, 0");
608c2ecf20Sopenharmony_ci		asm volatile("vori.b $vr6, $vr2, 0");
618c2ecf20Sopenharmony_ci		asm volatile("vori.b $vr7, $vr3, 0");
628c2ecf20Sopenharmony_ci		for (z = z0-1; z >= 0; z--) {
638c2ecf20Sopenharmony_ci			/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
648c2ecf20Sopenharmony_ci			asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
658c2ecf20Sopenharmony_ci			asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
668c2ecf20Sopenharmony_ci			asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
678c2ecf20Sopenharmony_ci			asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
688c2ecf20Sopenharmony_ci			/* wp$$ ^= wd$$; */
698c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr0, $vr0, $vr8");
708c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr1, $vr1, $vr9");
718c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr2, $vr2, $vr10");
728c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr3, $vr3, $vr11");
738c2ecf20Sopenharmony_ci			/* w2$$ = MASK(wq$$); */
748c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr12, $vr4, 0");
758c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr13, $vr5, 0");
768c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr14, $vr6, 0");
778c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr15, $vr7, 0");
788c2ecf20Sopenharmony_ci			/* w1$$ = SHLBYTE(wq$$); */
798c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr16, $vr4, 1");
808c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr17, $vr5, 1");
818c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr18, $vr6, 1");
828c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr19, $vr7, 1");
838c2ecf20Sopenharmony_ci			/* w2$$ &= NBYTES(0x1d); */
848c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr12, $vr12, 0x1d");
858c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr13, $vr13, 0x1d");
868c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr14, $vr14, 0x1d");
878c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr15, $vr15, 0x1d");
888c2ecf20Sopenharmony_ci			/* w1$$ ^= w2$$; */
898c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr16, $vr16, $vr12");
908c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr17, $vr17, $vr13");
918c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr18, $vr18, $vr14");
928c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr19, $vr19, $vr15");
938c2ecf20Sopenharmony_ci			/* wq$$ = w1$$ ^ wd$$; */
948c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr4, $vr16, $vr8");
958c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr5, $vr17, $vr9");
968c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr6, $vr18, $vr10");
978c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr7, $vr19, $vr11");
988c2ecf20Sopenharmony_ci		}
998c2ecf20Sopenharmony_ci		/* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
1008c2ecf20Sopenharmony_ci		asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0]));
1018c2ecf20Sopenharmony_ci		asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1]));
1028c2ecf20Sopenharmony_ci		asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2]));
1038c2ecf20Sopenharmony_ci		asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3]));
1048c2ecf20Sopenharmony_ci		/* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
1058c2ecf20Sopenharmony_ci		asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0]));
1068c2ecf20Sopenharmony_ci		asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1]));
1078c2ecf20Sopenharmony_ci		asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2]));
1088c2ecf20Sopenharmony_ci		asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3]));
1098c2ecf20Sopenharmony_ci	}
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	kernel_fpu_end();
1128c2ecf20Sopenharmony_ci}
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_cistatic void raid6_lsx_xor_syndrome(int disks, int start, int stop,
1158c2ecf20Sopenharmony_ci				   size_t bytes, void **ptrs)
1168c2ecf20Sopenharmony_ci{
1178c2ecf20Sopenharmony_ci	u8 **dptr = (u8 **)ptrs;
1188c2ecf20Sopenharmony_ci	u8 *p, *q;
1198c2ecf20Sopenharmony_ci	int d, z, z0;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	z0 = stop;		/* P/Q right side optimization */
1228c2ecf20Sopenharmony_ci	p = dptr[disks-2];	/* XOR parity */
1238c2ecf20Sopenharmony_ci	q = dptr[disks-1];	/* RS syndrome */
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	kernel_fpu_begin();
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	/*
1288c2ecf20Sopenharmony_ci	 * $vr0, $vr1, $vr2, $vr3: wp
1298c2ecf20Sopenharmony_ci	 * $vr4, $vr5, $vr6, $vr7: wq
1308c2ecf20Sopenharmony_ci	 * $vr8, $vr9, $vr10, $vr11: wd
1318c2ecf20Sopenharmony_ci	 * $vr12, $vr13, $vr14, $vr15: w2
1328c2ecf20Sopenharmony_ci	 * $vr16, $vr17, $vr18, $vr19: w1
1338c2ecf20Sopenharmony_ci	 */
1348c2ecf20Sopenharmony_ci	for (d = 0; d < bytes; d += NSIZE*4) {
1358c2ecf20Sopenharmony_ci		/* P/Q data pages */
1368c2ecf20Sopenharmony_ci		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
1378c2ecf20Sopenharmony_ci		asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
1388c2ecf20Sopenharmony_ci		asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
1398c2ecf20Sopenharmony_ci		asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
1408c2ecf20Sopenharmony_ci		asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
1418c2ecf20Sopenharmony_ci		asm volatile("vori.b $vr4, $vr0, 0");
1428c2ecf20Sopenharmony_ci		asm volatile("vori.b $vr5, $vr1, 0");
1438c2ecf20Sopenharmony_ci		asm volatile("vori.b $vr6, $vr2, 0");
1448c2ecf20Sopenharmony_ci		asm volatile("vori.b $vr7, $vr3, 0");
1458c2ecf20Sopenharmony_ci		for (z = z0-1; z >= start; z--) {
1468c2ecf20Sopenharmony_ci			/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
1478c2ecf20Sopenharmony_ci			asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
1488c2ecf20Sopenharmony_ci			asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
1498c2ecf20Sopenharmony_ci			asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
1508c2ecf20Sopenharmony_ci			asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
1518c2ecf20Sopenharmony_ci			/* wp$$ ^= wd$$; */
1528c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr0, $vr0, $vr8");
1538c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr1, $vr1, $vr9");
1548c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr2, $vr2, $vr10");
1558c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr3, $vr3, $vr11");
1568c2ecf20Sopenharmony_ci			/* w2$$ = MASK(wq$$); */
1578c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr12, $vr4, 0");
1588c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr13, $vr5, 0");
1598c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr14, $vr6, 0");
1608c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr15, $vr7, 0");
1618c2ecf20Sopenharmony_ci			/* w1$$ = SHLBYTE(wq$$); */
1628c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr16, $vr4, 1");
1638c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr17, $vr5, 1");
1648c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr18, $vr6, 1");
1658c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr19, $vr7, 1");
1668c2ecf20Sopenharmony_ci			/* w2$$ &= NBYTES(0x1d); */
1678c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr12, $vr12, 0x1d");
1688c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr13, $vr13, 0x1d");
1698c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr14, $vr14, 0x1d");
1708c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr15, $vr15, 0x1d");
1718c2ecf20Sopenharmony_ci			/* w1$$ ^= w2$$; */
1728c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr16, $vr16, $vr12");
1738c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr17, $vr17, $vr13");
1748c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr18, $vr18, $vr14");
1758c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr19, $vr19, $vr15");
1768c2ecf20Sopenharmony_ci			/* wq$$ = w1$$ ^ wd$$; */
1778c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr4, $vr16, $vr8");
1788c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr5, $vr17, $vr9");
1798c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr6, $vr18, $vr10");
1808c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr7, $vr19, $vr11");
1818c2ecf20Sopenharmony_ci		}
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci		/* P/Q left side optimization */
1848c2ecf20Sopenharmony_ci		for (z = start-1; z >= 0; z--) {
1858c2ecf20Sopenharmony_ci			/* w2$$ = MASK(wq$$); */
1868c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr12, $vr4, 0");
1878c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr13, $vr5, 0");
1888c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr14, $vr6, 0");
1898c2ecf20Sopenharmony_ci			asm volatile("vslti.b $vr15, $vr7, 0");
1908c2ecf20Sopenharmony_ci			/* w1$$ = SHLBYTE(wq$$); */
1918c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr16, $vr4, 1");
1928c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr17, $vr5, 1");
1938c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr18, $vr6, 1");
1948c2ecf20Sopenharmony_ci			asm volatile("vslli.b $vr19, $vr7, 1");
1958c2ecf20Sopenharmony_ci			/* w2$$ &= NBYTES(0x1d); */
1968c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr12, $vr12, 0x1d");
1978c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr13, $vr13, 0x1d");
1988c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr14, $vr14, 0x1d");
1998c2ecf20Sopenharmony_ci			asm volatile("vandi.b $vr15, $vr15, 0x1d");
2008c2ecf20Sopenharmony_ci			/* wq$$ = w1$$ ^ w2$$; */
2018c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr4, $vr16, $vr12");
2028c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr5, $vr17, $vr13");
2038c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr6, $vr18, $vr14");
2048c2ecf20Sopenharmony_ci			asm volatile("vxor.v $vr7, $vr19, $vr15");
2058c2ecf20Sopenharmony_ci		}
2068c2ecf20Sopenharmony_ci		/*
2078c2ecf20Sopenharmony_ci		 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
2088c2ecf20Sopenharmony_ci		 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
2098c2ecf20Sopenharmony_ci		 */
2108c2ecf20Sopenharmony_ci		asm volatile(
2118c2ecf20Sopenharmony_ci			"vld $vr20, %0\n\t"
2128c2ecf20Sopenharmony_ci			"vld $vr21, %1\n\t"
2138c2ecf20Sopenharmony_ci			"vld $vr22, %2\n\t"
2148c2ecf20Sopenharmony_ci			"vld $vr23, %3\n\t"
2158c2ecf20Sopenharmony_ci			"vld $vr24, %4\n\t"
2168c2ecf20Sopenharmony_ci			"vld $vr25, %5\n\t"
2178c2ecf20Sopenharmony_ci			"vld $vr26, %6\n\t"
2188c2ecf20Sopenharmony_ci			"vld $vr27, %7\n\t"
2198c2ecf20Sopenharmony_ci			"vxor.v $vr20, $vr20, $vr0\n\t"
2208c2ecf20Sopenharmony_ci			"vxor.v $vr21, $vr21, $vr1\n\t"
2218c2ecf20Sopenharmony_ci			"vxor.v $vr22, $vr22, $vr2\n\t"
2228c2ecf20Sopenharmony_ci			"vxor.v $vr23, $vr23, $vr3\n\t"
2238c2ecf20Sopenharmony_ci			"vxor.v $vr24, $vr24, $vr4\n\t"
2248c2ecf20Sopenharmony_ci			"vxor.v $vr25, $vr25, $vr5\n\t"
2258c2ecf20Sopenharmony_ci			"vxor.v $vr26, $vr26, $vr6\n\t"
2268c2ecf20Sopenharmony_ci			"vxor.v $vr27, $vr27, $vr7\n\t"
2278c2ecf20Sopenharmony_ci			"vst $vr20, %0\n\t"
2288c2ecf20Sopenharmony_ci			"vst $vr21, %1\n\t"
2298c2ecf20Sopenharmony_ci			"vst $vr22, %2\n\t"
2308c2ecf20Sopenharmony_ci			"vst $vr23, %3\n\t"
2318c2ecf20Sopenharmony_ci			"vst $vr24, %4\n\t"
2328c2ecf20Sopenharmony_ci			"vst $vr25, %5\n\t"
2338c2ecf20Sopenharmony_ci			"vst $vr26, %6\n\t"
2348c2ecf20Sopenharmony_ci			"vst $vr27, %7\n\t"
2358c2ecf20Sopenharmony_ci			: "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
2368c2ecf20Sopenharmony_ci			  "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]),
2378c2ecf20Sopenharmony_ci			  "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]),
2388c2ecf20Sopenharmony_ci			  "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3])
2398c2ecf20Sopenharmony_ci		);
2408c2ecf20Sopenharmony_ci	}
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	kernel_fpu_end();
2438c2ecf20Sopenharmony_ci}
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ciconst struct raid6_calls raid6_lsx = {
2468c2ecf20Sopenharmony_ci	raid6_lsx_gen_syndrome,
2478c2ecf20Sopenharmony_ci	raid6_lsx_xor_syndrome,
2488c2ecf20Sopenharmony_ci	raid6_has_lsx,
2498c2ecf20Sopenharmony_ci	"lsx",
2508c2ecf20Sopenharmony_ci};
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci#undef NSIZE
2538c2ecf20Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LSX */
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_HAS_LASX
2568c2ecf20Sopenharmony_ci#define NSIZE 32
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_cistatic int raid6_has_lasx(void)
2598c2ecf20Sopenharmony_ci{
2608c2ecf20Sopenharmony_ci	return cpu_has_lasx;
2618c2ecf20Sopenharmony_ci}
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_cistatic void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs)
2648c2ecf20Sopenharmony_ci{
2658c2ecf20Sopenharmony_ci	u8 **dptr = (u8 **)ptrs;
2668c2ecf20Sopenharmony_ci	u8 *p, *q;
2678c2ecf20Sopenharmony_ci	int d, z, z0;
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_ci	z0 = disks - 3;		/* Highest data disk */
2708c2ecf20Sopenharmony_ci	p = dptr[z0+1];		/* XOR parity */
2718c2ecf20Sopenharmony_ci	q = dptr[z0+2];		/* RS syndrome */
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	kernel_fpu_begin();
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	/*
2768c2ecf20Sopenharmony_ci	 * $xr0, $xr1: wp
2778c2ecf20Sopenharmony_ci	 * $xr2, $xr3: wq
2788c2ecf20Sopenharmony_ci	 * $xr4, $xr5: wd
2798c2ecf20Sopenharmony_ci	 * $xr6, $xr7: w2
2808c2ecf20Sopenharmony_ci	 * $xr8, $xr9: w1
2818c2ecf20Sopenharmony_ci	 */
2828c2ecf20Sopenharmony_ci	for (d = 0; d < bytes; d += NSIZE*2) {
2838c2ecf20Sopenharmony_ci		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
2848c2ecf20Sopenharmony_ci		asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
2858c2ecf20Sopenharmony_ci		asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
2868c2ecf20Sopenharmony_ci		asm volatile("xvori.b $xr2, $xr0, 0");
2878c2ecf20Sopenharmony_ci		asm volatile("xvori.b $xr3, $xr1, 0");
2888c2ecf20Sopenharmony_ci		for (z = z0-1; z >= 0; z--) {
2898c2ecf20Sopenharmony_ci			/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
2908c2ecf20Sopenharmony_ci			asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
2918c2ecf20Sopenharmony_ci			asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
2928c2ecf20Sopenharmony_ci			/* wp$$ ^= wd$$; */
2938c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr0, $xr0, $xr4");
2948c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr1, $xr1, $xr5");
2958c2ecf20Sopenharmony_ci			/* w2$$ = MASK(wq$$); */
2968c2ecf20Sopenharmony_ci			asm volatile("xvslti.b $xr6, $xr2, 0");
2978c2ecf20Sopenharmony_ci			asm volatile("xvslti.b $xr7, $xr3, 0");
2988c2ecf20Sopenharmony_ci			/* w1$$ = SHLBYTE(wq$$); */
2998c2ecf20Sopenharmony_ci			asm volatile("xvslli.b $xr8, $xr2, 1");
3008c2ecf20Sopenharmony_ci			asm volatile("xvslli.b $xr9, $xr3, 1");
3018c2ecf20Sopenharmony_ci			/* w2$$ &= NBYTES(0x1d); */
3028c2ecf20Sopenharmony_ci			asm volatile("xvandi.b $xr6, $xr6, 0x1d");
3038c2ecf20Sopenharmony_ci			asm volatile("xvandi.b $xr7, $xr7, 0x1d");
3048c2ecf20Sopenharmony_ci			/* w1$$ ^= w2$$; */
3058c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr8, $xr8, $xr6");
3068c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr9, $xr9, $xr7");
3078c2ecf20Sopenharmony_ci			/* wq$$ = w1$$ ^ wd$$; */
3088c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr2, $xr8, $xr4");
3098c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr3, $xr9, $xr5");
3108c2ecf20Sopenharmony_ci		}
3118c2ecf20Sopenharmony_ci		/* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
3128c2ecf20Sopenharmony_ci		asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0]));
3138c2ecf20Sopenharmony_ci		asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1]));
3148c2ecf20Sopenharmony_ci		/* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
3158c2ecf20Sopenharmony_ci		asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0]));
3168c2ecf20Sopenharmony_ci		asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1]));
3178c2ecf20Sopenharmony_ci	}
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	kernel_fpu_end();
3208c2ecf20Sopenharmony_ci}
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_cistatic void raid6_lasx_xor_syndrome(int disks, int start, int stop,
3238c2ecf20Sopenharmony_ci				    size_t bytes, void **ptrs)
3248c2ecf20Sopenharmony_ci{
3258c2ecf20Sopenharmony_ci	u8 **dptr = (u8 **)ptrs;
3268c2ecf20Sopenharmony_ci	u8 *p, *q;
3278c2ecf20Sopenharmony_ci	int d, z, z0;
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci	z0 = stop;		/* P/Q right side optimization */
3308c2ecf20Sopenharmony_ci	p = dptr[disks-2];	/* XOR parity */
3318c2ecf20Sopenharmony_ci	q = dptr[disks-1];	/* RS syndrome */
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_ci	kernel_fpu_begin();
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	/*
3368c2ecf20Sopenharmony_ci	 * $xr0, $xr1: wp
3378c2ecf20Sopenharmony_ci	 * $xr2, $xr3: wq
3388c2ecf20Sopenharmony_ci	 * $xr4, $xr5: wd
3398c2ecf20Sopenharmony_ci	 * $xr6, $xr7: w2
3408c2ecf20Sopenharmony_ci	 * $xr8, $xr9: w1
3418c2ecf20Sopenharmony_ci	 */
3428c2ecf20Sopenharmony_ci	for (d = 0; d < bytes; d += NSIZE*2) {
3438c2ecf20Sopenharmony_ci		/* P/Q data pages */
3448c2ecf20Sopenharmony_ci		/* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
3458c2ecf20Sopenharmony_ci		asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
3468c2ecf20Sopenharmony_ci		asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
3478c2ecf20Sopenharmony_ci		asm volatile("xvori.b $xr2, $xr0, 0");
3488c2ecf20Sopenharmony_ci		asm volatile("xvori.b $xr3, $xr1, 0");
3498c2ecf20Sopenharmony_ci		for (z = z0-1; z >= start; z--) {
3508c2ecf20Sopenharmony_ci			/* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
3518c2ecf20Sopenharmony_ci			asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
3528c2ecf20Sopenharmony_ci			asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
3538c2ecf20Sopenharmony_ci			/* wp$$ ^= wd$$; */
3548c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr0, $xr0, $xr4");
3558c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr1, $xr1, $xr5");
3568c2ecf20Sopenharmony_ci			/* w2$$ = MASK(wq$$); */
3578c2ecf20Sopenharmony_ci			asm volatile("xvslti.b $xr6, $xr2, 0");
3588c2ecf20Sopenharmony_ci			asm volatile("xvslti.b $xr7, $xr3, 0");
3598c2ecf20Sopenharmony_ci			/* w1$$ = SHLBYTE(wq$$); */
3608c2ecf20Sopenharmony_ci			asm volatile("xvslli.b $xr8, $xr2, 1");
3618c2ecf20Sopenharmony_ci			asm volatile("xvslli.b $xr9, $xr3, 1");
3628c2ecf20Sopenharmony_ci			/* w2$$ &= NBYTES(0x1d); */
3638c2ecf20Sopenharmony_ci			asm volatile("xvandi.b $xr6, $xr6, 0x1d");
3648c2ecf20Sopenharmony_ci			asm volatile("xvandi.b $xr7, $xr7, 0x1d");
3658c2ecf20Sopenharmony_ci			/* w1$$ ^= w2$$; */
3668c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr8, $xr8, $xr6");
3678c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr9, $xr9, $xr7");
3688c2ecf20Sopenharmony_ci			/* wq$$ = w1$$ ^ wd$$; */
3698c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr2, $xr8, $xr4");
3708c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr3, $xr9, $xr5");
3718c2ecf20Sopenharmony_ci		}
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci		/* P/Q left side optimization */
3748c2ecf20Sopenharmony_ci		for (z = start-1; z >= 0; z--) {
3758c2ecf20Sopenharmony_ci			/* w2$$ = MASK(wq$$); */
3768c2ecf20Sopenharmony_ci			asm volatile("xvslti.b $xr6, $xr2, 0");
3778c2ecf20Sopenharmony_ci			asm volatile("xvslti.b $xr7, $xr3, 0");
3788c2ecf20Sopenharmony_ci			/* w1$$ = SHLBYTE(wq$$); */
3798c2ecf20Sopenharmony_ci			asm volatile("xvslli.b $xr8, $xr2, 1");
3808c2ecf20Sopenharmony_ci			asm volatile("xvslli.b $xr9, $xr3, 1");
3818c2ecf20Sopenharmony_ci			/* w2$$ &= NBYTES(0x1d); */
3828c2ecf20Sopenharmony_ci			asm volatile("xvandi.b $xr6, $xr6, 0x1d");
3838c2ecf20Sopenharmony_ci			asm volatile("xvandi.b $xr7, $xr7, 0x1d");
3848c2ecf20Sopenharmony_ci			/* wq$$ = w1$$ ^ w2$$; */
3858c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr2, $xr8, $xr6");
3868c2ecf20Sopenharmony_ci			asm volatile("xvxor.v $xr3, $xr9, $xr7");
3878c2ecf20Sopenharmony_ci		}
3888c2ecf20Sopenharmony_ci		/*
3898c2ecf20Sopenharmony_ci		 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
3908c2ecf20Sopenharmony_ci		 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
3918c2ecf20Sopenharmony_ci		 */
3928c2ecf20Sopenharmony_ci		asm volatile(
3938c2ecf20Sopenharmony_ci			"xvld $xr10, %0\n\t"
3948c2ecf20Sopenharmony_ci			"xvld $xr11, %1\n\t"
3958c2ecf20Sopenharmony_ci			"xvld $xr12, %2\n\t"
3968c2ecf20Sopenharmony_ci			"xvld $xr13, %3\n\t"
3978c2ecf20Sopenharmony_ci			"xvxor.v $xr10, $xr10, $xr0\n\t"
3988c2ecf20Sopenharmony_ci			"xvxor.v $xr11, $xr11, $xr1\n\t"
3998c2ecf20Sopenharmony_ci			"xvxor.v $xr12, $xr12, $xr2\n\t"
4008c2ecf20Sopenharmony_ci			"xvxor.v $xr13, $xr13, $xr3\n\t"
4018c2ecf20Sopenharmony_ci			"xvst $xr10, %0\n\t"
4028c2ecf20Sopenharmony_ci			"xvst $xr11, %1\n\t"
4038c2ecf20Sopenharmony_ci			"xvst $xr12, %2\n\t"
4048c2ecf20Sopenharmony_ci			"xvst $xr13, %3\n\t"
4058c2ecf20Sopenharmony_ci			: "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
4068c2ecf20Sopenharmony_ci			  "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1])
4078c2ecf20Sopenharmony_ci		);
4088c2ecf20Sopenharmony_ci	}
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci	kernel_fpu_end();
4118c2ecf20Sopenharmony_ci}
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ciconst struct raid6_calls raid6_lasx = {
4148c2ecf20Sopenharmony_ci	raid6_lasx_gen_syndrome,
4158c2ecf20Sopenharmony_ci	raid6_lasx_xor_syndrome,
4168c2ecf20Sopenharmony_ci	raid6_has_lasx,
4178c2ecf20Sopenharmony_ci	"lasx",
4188c2ecf20Sopenharmony_ci};
4198c2ecf20Sopenharmony_ci#undef NSIZE
4208c2ecf20Sopenharmony_ci#endif /* CONFIG_CPU_HAS_LASX */
421