18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* -*- linux-c -*- -------------------------------------------------------- 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) 2016 Intel Corporation 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Author: Gayatri Kammela <gayatri.kammela@intel.com> 78c2ecf20Sopenharmony_ci * Author: Megha Dey <megha.dey@linux.intel.com> 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved 108c2ecf20Sopenharmony_ci * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * ----------------------------------------------------------------------- 138c2ecf20Sopenharmony_ci */ 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci/* 168c2ecf20Sopenharmony_ci * AVX512 implementation of RAID-6 syndrome functions 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci */ 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ci#ifdef CONFIG_AS_AVX512 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#include <linux/raid/pq.h> 238c2ecf20Sopenharmony_ci#include "x86.h" 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_cistatic const struct raid6_avx512_constants { 268c2ecf20Sopenharmony_ci u64 x1d[8]; 278c2ecf20Sopenharmony_ci} raid6_avx512_constants __aligned(512/8) = { 288c2ecf20Sopenharmony_ci { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 298c2ecf20Sopenharmony_ci 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 308c2ecf20Sopenharmony_ci 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 318c2ecf20Sopenharmony_ci 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, 328c2ecf20Sopenharmony_ci}; 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_cistatic int raid6_have_avx512(void) 358c2ecf20Sopenharmony_ci{ 368c2ecf20Sopenharmony_ci return boot_cpu_has(X86_FEATURE_AVX2) && 378c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX) && 388c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512F) && 398c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512BW) && 408c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512VL) && 418c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512DQ); 428c2ecf20Sopenharmony_ci} 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_cistatic void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs) 458c2ecf20Sopenharmony_ci{ 468c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 478c2ecf20Sopenharmony_ci u8 *p, *q; 488c2ecf20Sopenharmony_ci int d, z, z0; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci z0 = disks - 3; /* Highest data disk */ 518c2ecf20Sopenharmony_ci p = dptr[z0+1]; /* XOR parity */ 528c2ecf20Sopenharmony_ci q = dptr[z0+2]; /* RS syndrome */ 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci kernel_fpu_begin(); 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0,%%zmm0\n\t" 578c2ecf20Sopenharmony_ci "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ 588c2ecf20Sopenharmony_ci : 598c2ecf20Sopenharmony_ci : "m" (raid6_avx512_constants.x1d[0])); 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci for (d = 0; d < bytes; d += 64) { 628c2ecf20Sopenharmony_ci asm volatile("prefetchnta %0\n\t" 638c2ecf20Sopenharmony_ci "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ 648c2ecf20Sopenharmony_ci "prefetchnta %1\n\t" 658c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ 668c2ecf20Sopenharmony_ci "vmovdqa64 %1,%%zmm6" 678c2ecf20Sopenharmony_ci : 688c2ecf20Sopenharmony_ci : "m" (dptr[z0][d]), "m" (dptr[z0-1][d])); 698c2ecf20Sopenharmony_ci for (z = z0-2; z >= 0; z--) { 708c2ecf20Sopenharmony_ci asm volatile("prefetchnta %0\n\t" 718c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 728c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 738c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 748c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 758c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 768c2ecf20Sopenharmony_ci "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" 778c2ecf20Sopenharmony_ci "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" 788c2ecf20Sopenharmony_ci "vmovdqa64 %0,%%zmm6" 798c2ecf20Sopenharmony_ci : 808c2ecf20Sopenharmony_ci : "m" (dptr[z][d])); 818c2ecf20Sopenharmony_ci } 828c2ecf20Sopenharmony_ci asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 838c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 848c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 858c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 868c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 878c2ecf20Sopenharmony_ci "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" 888c2ecf20Sopenharmony_ci "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" 898c2ecf20Sopenharmony_ci "vmovntdq %%zmm2,%0\n\t" 908c2ecf20Sopenharmony_ci "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" 918c2ecf20Sopenharmony_ci "vmovntdq %%zmm4,%1\n\t" 928c2ecf20Sopenharmony_ci "vpxorq %%zmm4,%%zmm4,%%zmm4" 938c2ecf20Sopenharmony_ci : 948c2ecf20Sopenharmony_ci : "m" (p[d]), "m" (q[d])); 958c2ecf20Sopenharmony_ci } 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci asm volatile("sfence" : : : "memory"); 988c2ecf20Sopenharmony_ci kernel_fpu_end(); 998c2ecf20Sopenharmony_ci} 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_cistatic void raid6_avx5121_xor_syndrome(int disks, int start, int stop, 1028c2ecf20Sopenharmony_ci size_t bytes, void **ptrs) 1038c2ecf20Sopenharmony_ci{ 1048c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 1058c2ecf20Sopenharmony_ci u8 *p, *q; 1068c2ecf20Sopenharmony_ci int d, z, z0; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci z0 = stop; /* P/Q right side optimization */ 1098c2ecf20Sopenharmony_ci p = dptr[disks-2]; /* XOR parity */ 1108c2ecf20Sopenharmony_ci q = dptr[disks-1]; /* RS syndrome */ 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci kernel_fpu_begin(); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0,%%zmm0" 1158c2ecf20Sopenharmony_ci : : "m" (raid6_avx512_constants.x1d[0])); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci for (d = 0 ; d < bytes ; d += 64) { 1188c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0,%%zmm4\n\t" 1198c2ecf20Sopenharmony_ci "vmovdqa64 %1,%%zmm2\n\t" 1208c2ecf20Sopenharmony_ci "vpxorq %%zmm4,%%zmm2,%%zmm2" 1218c2ecf20Sopenharmony_ci : 1228c2ecf20Sopenharmony_ci : "m" (dptr[z0][d]), "m" (p[d])); 1238c2ecf20Sopenharmony_ci /* P/Q data pages */ 1248c2ecf20Sopenharmony_ci for (z = z0-1 ; z >= start ; z--) { 1258c2ecf20Sopenharmony_ci asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 1268c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 1278c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 1288c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 1298c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 1308c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 1318c2ecf20Sopenharmony_ci "vmovdqa64 %0,%%zmm5\n\t" 1328c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 1338c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4" 1348c2ecf20Sopenharmony_ci : 1358c2ecf20Sopenharmony_ci : "m" (dptr[z][d])); 1368c2ecf20Sopenharmony_ci } 1378c2ecf20Sopenharmony_ci /* P/Q left side optimization */ 1388c2ecf20Sopenharmony_ci for (z = start-1 ; z >= 0 ; z--) { 1398c2ecf20Sopenharmony_ci asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 1408c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 1418c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 1428c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 1438c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 1448c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4" 1458c2ecf20Sopenharmony_ci : 1468c2ecf20Sopenharmony_ci : ); 1478c2ecf20Sopenharmony_ci } 1488c2ecf20Sopenharmony_ci asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" 1498c2ecf20Sopenharmony_ci /* Don't use movntdq for r/w memory area < cache line */ 1508c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm4,%0\n\t" 1518c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm2,%1" 1528c2ecf20Sopenharmony_ci : 1538c2ecf20Sopenharmony_ci : "m" (q[d]), "m" (p[d])); 1548c2ecf20Sopenharmony_ci } 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci asm volatile("sfence" : : : "memory"); 1578c2ecf20Sopenharmony_ci kernel_fpu_end(); 1588c2ecf20Sopenharmony_ci} 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ciconst struct raid6_calls raid6_avx512x1 = { 1618c2ecf20Sopenharmony_ci raid6_avx5121_gen_syndrome, 1628c2ecf20Sopenharmony_ci raid6_avx5121_xor_syndrome, 1638c2ecf20Sopenharmony_ci raid6_have_avx512, 1648c2ecf20Sopenharmony_ci "avx512x1", 1658c2ecf20Sopenharmony_ci 1 /* Has cache hints */ 1668c2ecf20Sopenharmony_ci}; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci/* 1698c2ecf20Sopenharmony_ci * Unrolled-by-2 AVX512 implementation 1708c2ecf20Sopenharmony_ci */ 1718c2ecf20Sopenharmony_cistatic void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs) 1728c2ecf20Sopenharmony_ci{ 1738c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 1748c2ecf20Sopenharmony_ci u8 *p, *q; 1758c2ecf20Sopenharmony_ci int d, z, z0; 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci z0 = disks - 3; /* Highest data disk */ 1788c2ecf20Sopenharmony_ci p = dptr[z0+1]; /* XOR parity */ 1798c2ecf20Sopenharmony_ci q = dptr[z0+2]; /* RS syndrome */ 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci kernel_fpu_begin(); 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0,%%zmm0\n\t" 1848c2ecf20Sopenharmony_ci "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ 1858c2ecf20Sopenharmony_ci : 1868c2ecf20Sopenharmony_ci : "m" (raid6_avx512_constants.x1d[0])); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci /* We uniformly assume a single prefetch covers at least 64 bytes */ 1898c2ecf20Sopenharmony_ci for (d = 0; d < bytes; d += 128) { 1908c2ecf20Sopenharmony_ci asm volatile("prefetchnta %0\n\t" 1918c2ecf20Sopenharmony_ci "prefetchnta %1\n\t" 1928c2ecf20Sopenharmony_ci "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ 1938c2ecf20Sopenharmony_ci "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */ 1948c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ 1958c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */ 1968c2ecf20Sopenharmony_ci : 1978c2ecf20Sopenharmony_ci : "m" (dptr[z0][d]), "m" (dptr[z0][d+64])); 1988c2ecf20Sopenharmony_ci for (z = z0-1; z >= 0; z--) { 1998c2ecf20Sopenharmony_ci asm volatile("prefetchnta %0\n\t" 2008c2ecf20Sopenharmony_ci "prefetchnta %1\n\t" 2018c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 2028c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" 2038c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 2048c2ecf20Sopenharmony_ci "vpmovm2b %%k2,%%zmm7\n\t" 2058c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 2068c2ecf20Sopenharmony_ci "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 2078c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 2088c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 2098c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 2108c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 2118c2ecf20Sopenharmony_ci "vmovdqa64 %0,%%zmm5\n\t" 2128c2ecf20Sopenharmony_ci "vmovdqa64 %1,%%zmm7\n\t" 2138c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 2148c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 2158c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 2168c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6" 2178c2ecf20Sopenharmony_ci : 2188c2ecf20Sopenharmony_ci : "m" (dptr[z][d]), "m" (dptr[z][d+64])); 2198c2ecf20Sopenharmony_ci } 2208c2ecf20Sopenharmony_ci asm volatile("vmovntdq %%zmm2,%0\n\t" 2218c2ecf20Sopenharmony_ci "vmovntdq %%zmm3,%1\n\t" 2228c2ecf20Sopenharmony_ci "vmovntdq %%zmm4,%2\n\t" 2238c2ecf20Sopenharmony_ci "vmovntdq %%zmm6,%3" 2248c2ecf20Sopenharmony_ci : 2258c2ecf20Sopenharmony_ci : "m" (p[d]), "m" (p[d+64]), "m" (q[d]), 2268c2ecf20Sopenharmony_ci "m" (q[d+64])); 2278c2ecf20Sopenharmony_ci } 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci asm volatile("sfence" : : : "memory"); 2308c2ecf20Sopenharmony_ci kernel_fpu_end(); 2318c2ecf20Sopenharmony_ci} 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_cistatic void raid6_avx5122_xor_syndrome(int disks, int start, int stop, 2348c2ecf20Sopenharmony_ci size_t bytes, void **ptrs) 2358c2ecf20Sopenharmony_ci{ 2368c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 2378c2ecf20Sopenharmony_ci u8 *p, *q; 2388c2ecf20Sopenharmony_ci int d, z, z0; 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci z0 = stop; /* P/Q right side optimization */ 2418c2ecf20Sopenharmony_ci p = dptr[disks-2]; /* XOR parity */ 2428c2ecf20Sopenharmony_ci q = dptr[disks-1]; /* RS syndrome */ 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci kernel_fpu_begin(); 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0,%%zmm0" 2478c2ecf20Sopenharmony_ci : : "m" (raid6_avx512_constants.x1d[0])); 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci for (d = 0 ; d < bytes ; d += 128) { 2508c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0,%%zmm4\n\t" 2518c2ecf20Sopenharmony_ci "vmovdqa64 %1,%%zmm6\n\t" 2528c2ecf20Sopenharmony_ci "vmovdqa64 %2,%%zmm2\n\t" 2538c2ecf20Sopenharmony_ci "vmovdqa64 %3,%%zmm3\n\t" 2548c2ecf20Sopenharmony_ci "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" 2558c2ecf20Sopenharmony_ci "vpxorq %%zmm6,%%zmm3,%%zmm3" 2568c2ecf20Sopenharmony_ci : 2578c2ecf20Sopenharmony_ci : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), 2588c2ecf20Sopenharmony_ci "m" (p[d]), "m" (p[d+64])); 2598c2ecf20Sopenharmony_ci /* P/Q data pages */ 2608c2ecf20Sopenharmony_ci for (z = z0-1 ; z >= start ; z--) { 2618c2ecf20Sopenharmony_ci asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 2628c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" 2638c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 2648c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" 2658c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 2668c2ecf20Sopenharmony_ci "vpmovm2b %%k2,%%zmm7\n\t" 2678c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 2688c2ecf20Sopenharmony_ci "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 2698c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 2708c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 2718c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 2728c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 2738c2ecf20Sopenharmony_ci "vmovdqa64 %0,%%zmm5\n\t" 2748c2ecf20Sopenharmony_ci "vmovdqa64 %1,%%zmm7\n\t" 2758c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 2768c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 2778c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 2788c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6" 2798c2ecf20Sopenharmony_ci : 2808c2ecf20Sopenharmony_ci : "m" (dptr[z][d]), "m" (dptr[z][d+64])); 2818c2ecf20Sopenharmony_ci } 2828c2ecf20Sopenharmony_ci /* P/Q left side optimization */ 2838c2ecf20Sopenharmony_ci for (z = start-1 ; z >= 0 ; z--) { 2848c2ecf20Sopenharmony_ci asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 2858c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" 2868c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 2878c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" 2888c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 2898c2ecf20Sopenharmony_ci "vpmovm2b %%k2,%%zmm7\n\t" 2908c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 2918c2ecf20Sopenharmony_ci "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 2928c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 2938c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 2948c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 2958c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6" 2968c2ecf20Sopenharmony_ci : 2978c2ecf20Sopenharmony_ci : ); 2988c2ecf20Sopenharmony_ci } 2998c2ecf20Sopenharmony_ci asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" 3008c2ecf20Sopenharmony_ci "vpxorq %1,%%zmm6,%%zmm6\n\t" 3018c2ecf20Sopenharmony_ci /* Don't use movntdq for r/w 3028c2ecf20Sopenharmony_ci * memory area < cache line 3038c2ecf20Sopenharmony_ci */ 3048c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm4,%0\n\t" 3058c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm6,%1\n\t" 3068c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm2,%2\n\t" 3078c2ecf20Sopenharmony_ci "vmovdqa64 %%zmm3,%3" 3088c2ecf20Sopenharmony_ci : 3098c2ecf20Sopenharmony_ci : "m" (q[d]), "m" (q[d+64]), "m" (p[d]), 3108c2ecf20Sopenharmony_ci "m" (p[d+64])); 3118c2ecf20Sopenharmony_ci } 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci asm volatile("sfence" : : : "memory"); 3148c2ecf20Sopenharmony_ci kernel_fpu_end(); 3158c2ecf20Sopenharmony_ci} 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ciconst struct raid6_calls raid6_avx512x2 = { 3188c2ecf20Sopenharmony_ci raid6_avx5122_gen_syndrome, 3198c2ecf20Sopenharmony_ci raid6_avx5122_xor_syndrome, 3208c2ecf20Sopenharmony_ci raid6_have_avx512, 3218c2ecf20Sopenharmony_ci "avx512x2", 3228c2ecf20Sopenharmony_ci 1 /* Has cache hints */ 3238c2ecf20Sopenharmony_ci}; 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci#ifdef CONFIG_X86_64 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci/* 3288c2ecf20Sopenharmony_ci * Unrolled-by-4 AVX2 implementation 3298c2ecf20Sopenharmony_ci */ 3308c2ecf20Sopenharmony_cistatic void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs) 3318c2ecf20Sopenharmony_ci{ 3328c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 3338c2ecf20Sopenharmony_ci u8 *p, *q; 3348c2ecf20Sopenharmony_ci int d, z, z0; 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci z0 = disks - 3; /* Highest data disk */ 3378c2ecf20Sopenharmony_ci p = dptr[z0+1]; /* XOR parity */ 3388c2ecf20Sopenharmony_ci q = dptr[z0+2]; /* RS syndrome */ 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci kernel_fpu_begin(); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0,%%zmm0\n\t" 3438c2ecf20Sopenharmony_ci "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */ 3448c2ecf20Sopenharmony_ci "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */ 3458c2ecf20Sopenharmony_ci "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */ 3468c2ecf20Sopenharmony_ci "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */ 3478c2ecf20Sopenharmony_ci "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */ 3488c2ecf20Sopenharmony_ci "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */ 3498c2ecf20Sopenharmony_ci "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */ 3508c2ecf20Sopenharmony_ci "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */ 3518c2ecf20Sopenharmony_ci "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */ 3528c2ecf20Sopenharmony_ci : 3538c2ecf20Sopenharmony_ci : "m" (raid6_avx512_constants.x1d[0])); 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci for (d = 0; d < bytes; d += 256) { 3568c2ecf20Sopenharmony_ci for (z = z0; z >= 0; z--) { 3578c2ecf20Sopenharmony_ci asm volatile("prefetchnta %0\n\t" 3588c2ecf20Sopenharmony_ci "prefetchnta %1\n\t" 3598c2ecf20Sopenharmony_ci "prefetchnta %2\n\t" 3608c2ecf20Sopenharmony_ci "prefetchnta %3\n\t" 3618c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 3628c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" 3638c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t" 3648c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t" 3658c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 3668c2ecf20Sopenharmony_ci "vpmovm2b %%k2,%%zmm7\n\t" 3678c2ecf20Sopenharmony_ci "vpmovm2b %%k3,%%zmm13\n\t" 3688c2ecf20Sopenharmony_ci "vpmovm2b %%k4,%%zmm15\n\t" 3698c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 3708c2ecf20Sopenharmony_ci "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 3718c2ecf20Sopenharmony_ci "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" 3728c2ecf20Sopenharmony_ci "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" 3738c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 3748c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 3758c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" 3768c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" 3778c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 3788c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 3798c2ecf20Sopenharmony_ci "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 3808c2ecf20Sopenharmony_ci "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" 3818c2ecf20Sopenharmony_ci "vmovdqa64 %0,%%zmm5\n\t" 3828c2ecf20Sopenharmony_ci "vmovdqa64 %1,%%zmm7\n\t" 3838c2ecf20Sopenharmony_ci "vmovdqa64 %2,%%zmm13\n\t" 3848c2ecf20Sopenharmony_ci "vmovdqa64 %3,%%zmm15\n\t" 3858c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 3868c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 3878c2ecf20Sopenharmony_ci "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" 3888c2ecf20Sopenharmony_ci "vpxorq %%zmm15,%%zmm11,%%zmm11\n" 3898c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 3908c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 3918c2ecf20Sopenharmony_ci "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 3928c2ecf20Sopenharmony_ci "vpxorq %%zmm15,%%zmm14,%%zmm14" 3938c2ecf20Sopenharmony_ci : 3948c2ecf20Sopenharmony_ci : "m" (dptr[z][d]), "m" (dptr[z][d+64]), 3958c2ecf20Sopenharmony_ci "m" (dptr[z][d+128]), "m" (dptr[z][d+192])); 3968c2ecf20Sopenharmony_ci } 3978c2ecf20Sopenharmony_ci asm volatile("vmovntdq %%zmm2,%0\n\t" 3988c2ecf20Sopenharmony_ci "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" 3998c2ecf20Sopenharmony_ci "vmovntdq %%zmm3,%1\n\t" 4008c2ecf20Sopenharmony_ci "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" 4018c2ecf20Sopenharmony_ci "vmovntdq %%zmm10,%2\n\t" 4028c2ecf20Sopenharmony_ci "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" 4038c2ecf20Sopenharmony_ci "vmovntdq %%zmm11,%3\n\t" 4048c2ecf20Sopenharmony_ci "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" 4058c2ecf20Sopenharmony_ci "vmovntdq %%zmm4,%4\n\t" 4068c2ecf20Sopenharmony_ci "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" 4078c2ecf20Sopenharmony_ci "vmovntdq %%zmm6,%5\n\t" 4088c2ecf20Sopenharmony_ci "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" 4098c2ecf20Sopenharmony_ci "vmovntdq %%zmm12,%6\n\t" 4108c2ecf20Sopenharmony_ci "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" 4118c2ecf20Sopenharmony_ci "vmovntdq %%zmm14,%7\n\t" 4128c2ecf20Sopenharmony_ci "vpxorq %%zmm14,%%zmm14,%%zmm14" 4138c2ecf20Sopenharmony_ci : 4148c2ecf20Sopenharmony_ci : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), 4158c2ecf20Sopenharmony_ci "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), 4168c2ecf20Sopenharmony_ci "m" (q[d+128]), "m" (q[d+192])); 4178c2ecf20Sopenharmony_ci } 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci asm volatile("sfence" : : : "memory"); 4208c2ecf20Sopenharmony_ci kernel_fpu_end(); 4218c2ecf20Sopenharmony_ci} 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_cistatic void raid6_avx5124_xor_syndrome(int disks, int start, int stop, 4248c2ecf20Sopenharmony_ci size_t bytes, void **ptrs) 4258c2ecf20Sopenharmony_ci{ 4268c2ecf20Sopenharmony_ci u8 **dptr = (u8 **)ptrs; 4278c2ecf20Sopenharmony_ci u8 *p, *q; 4288c2ecf20Sopenharmony_ci int d, z, z0; 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci z0 = stop; /* P/Q right side optimization */ 4318c2ecf20Sopenharmony_ci p = dptr[disks-2]; /* XOR parity */ 4328c2ecf20Sopenharmony_ci q = dptr[disks-1]; /* RS syndrome */ 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci kernel_fpu_begin(); 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0,%%zmm0" 4378c2ecf20Sopenharmony_ci :: "m" (raid6_avx512_constants.x1d[0])); 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci for (d = 0 ; d < bytes ; d += 256) { 4408c2ecf20Sopenharmony_ci asm volatile("vmovdqa64 %0,%%zmm4\n\t" 4418c2ecf20Sopenharmony_ci "vmovdqa64 %1,%%zmm6\n\t" 4428c2ecf20Sopenharmony_ci "vmovdqa64 %2,%%zmm12\n\t" 4438c2ecf20Sopenharmony_ci "vmovdqa64 %3,%%zmm14\n\t" 4448c2ecf20Sopenharmony_ci "vmovdqa64 %4,%%zmm2\n\t" 4458c2ecf20Sopenharmony_ci "vmovdqa64 %5,%%zmm3\n\t" 4468c2ecf20Sopenharmony_ci "vmovdqa64 %6,%%zmm10\n\t" 4478c2ecf20Sopenharmony_ci "vmovdqa64 %7,%%zmm11\n\t" 4488c2ecf20Sopenharmony_ci "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" 4498c2ecf20Sopenharmony_ci "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t" 4508c2ecf20Sopenharmony_ci "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t" 4518c2ecf20Sopenharmony_ci "vpxorq %%zmm14,%%zmm11,%%zmm11" 4528c2ecf20Sopenharmony_ci : 4538c2ecf20Sopenharmony_ci : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), 4548c2ecf20Sopenharmony_ci "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]), 4558c2ecf20Sopenharmony_ci "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), 4568c2ecf20Sopenharmony_ci "m" (p[d+192])); 4578c2ecf20Sopenharmony_ci /* P/Q data pages */ 4588c2ecf20Sopenharmony_ci for (z = z0-1 ; z >= start ; z--) { 4598c2ecf20Sopenharmony_ci asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 4608c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" 4618c2ecf20Sopenharmony_ci "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" 4628c2ecf20Sopenharmony_ci "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" 4638c2ecf20Sopenharmony_ci "prefetchnta %0\n\t" 4648c2ecf20Sopenharmony_ci "prefetchnta %2\n\t" 4658c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 4668c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" 4678c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" 4688c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" 4698c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 4708c2ecf20Sopenharmony_ci "vpmovm2b %%k2,%%zmm7\n\t" 4718c2ecf20Sopenharmony_ci "vpmovm2b %%k3,%%zmm13\n\t" 4728c2ecf20Sopenharmony_ci "vpmovm2b %%k4,%%zmm15\n\t" 4738c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 4748c2ecf20Sopenharmony_ci "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 4758c2ecf20Sopenharmony_ci "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" 4768c2ecf20Sopenharmony_ci "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t" 4778c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 4788c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 4798c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" 4808c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" 4818c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 4828c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 4838c2ecf20Sopenharmony_ci "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 4848c2ecf20Sopenharmony_ci "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" 4858c2ecf20Sopenharmony_ci "vmovdqa64 %0,%%zmm5\n\t" 4868c2ecf20Sopenharmony_ci "vmovdqa64 %1,%%zmm7\n\t" 4878c2ecf20Sopenharmony_ci "vmovdqa64 %2,%%zmm13\n\t" 4888c2ecf20Sopenharmony_ci "vmovdqa64 %3,%%zmm15\n\t" 4898c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 4908c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 4918c2ecf20Sopenharmony_ci "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" 4928c2ecf20Sopenharmony_ci "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t" 4938c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 4948c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 4958c2ecf20Sopenharmony_ci "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 4968c2ecf20Sopenharmony_ci "vpxorq %%zmm15,%%zmm14,%%zmm14" 4978c2ecf20Sopenharmony_ci : 4988c2ecf20Sopenharmony_ci : "m" (dptr[z][d]), "m" (dptr[z][d+64]), 4998c2ecf20Sopenharmony_ci "m" (dptr[z][d+128]), 5008c2ecf20Sopenharmony_ci "m" (dptr[z][d+192])); 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci asm volatile("prefetchnta %0\n\t" 5038c2ecf20Sopenharmony_ci "prefetchnta %1\n\t" 5048c2ecf20Sopenharmony_ci : 5058c2ecf20Sopenharmony_ci : "m" (q[d]), "m" (q[d+128])); 5068c2ecf20Sopenharmony_ci /* P/Q left side optimization */ 5078c2ecf20Sopenharmony_ci for (z = start-1 ; z >= 0 ; z--) { 5088c2ecf20Sopenharmony_ci asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 5098c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" 5108c2ecf20Sopenharmony_ci "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" 5118c2ecf20Sopenharmony_ci "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" 5128c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 5138c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" 5148c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" 5158c2ecf20Sopenharmony_ci "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" 5168c2ecf20Sopenharmony_ci "vpmovm2b %%k1,%%zmm5\n\t" 5178c2ecf20Sopenharmony_ci "vpmovm2b %%k2,%%zmm7\n\t" 5188c2ecf20Sopenharmony_ci "vpmovm2b %%k3,%%zmm13\n\t" 5198c2ecf20Sopenharmony_ci "vpmovm2b %%k4,%%zmm15\n\t" 5208c2ecf20Sopenharmony_ci "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 5218c2ecf20Sopenharmony_ci "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 5228c2ecf20Sopenharmony_ci "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" 5238c2ecf20Sopenharmony_ci "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" 5248c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 5258c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 5268c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" 5278c2ecf20Sopenharmony_ci "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" 5288c2ecf20Sopenharmony_ci "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 5298c2ecf20Sopenharmony_ci "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 5308c2ecf20Sopenharmony_ci "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 5318c2ecf20Sopenharmony_ci "vpxorq %%zmm15,%%zmm14,%%zmm14" 5328c2ecf20Sopenharmony_ci : 5338c2ecf20Sopenharmony_ci : ); 5348c2ecf20Sopenharmony_ci } 5358c2ecf20Sopenharmony_ci asm volatile("vmovntdq %%zmm2,%0\n\t" 5368c2ecf20Sopenharmony_ci "vmovntdq %%zmm3,%1\n\t" 5378c2ecf20Sopenharmony_ci "vmovntdq %%zmm10,%2\n\t" 5388c2ecf20Sopenharmony_ci "vmovntdq %%zmm11,%3\n\t" 5398c2ecf20Sopenharmony_ci "vpxorq %4,%%zmm4,%%zmm4\n\t" 5408c2ecf20Sopenharmony_ci "vpxorq %5,%%zmm6,%%zmm6\n\t" 5418c2ecf20Sopenharmony_ci "vpxorq %6,%%zmm12,%%zmm12\n\t" 5428c2ecf20Sopenharmony_ci "vpxorq %7,%%zmm14,%%zmm14\n\t" 5438c2ecf20Sopenharmony_ci "vmovntdq %%zmm4,%4\n\t" 5448c2ecf20Sopenharmony_ci "vmovntdq %%zmm6,%5\n\t" 5458c2ecf20Sopenharmony_ci "vmovntdq %%zmm12,%6\n\t" 5468c2ecf20Sopenharmony_ci "vmovntdq %%zmm14,%7" 5478c2ecf20Sopenharmony_ci : 5488c2ecf20Sopenharmony_ci : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), 5498c2ecf20Sopenharmony_ci "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), 5508c2ecf20Sopenharmony_ci "m" (q[d+128]), "m" (q[d+192])); 5518c2ecf20Sopenharmony_ci } 5528c2ecf20Sopenharmony_ci asm volatile("sfence" : : : "memory"); 5538c2ecf20Sopenharmony_ci kernel_fpu_end(); 5548c2ecf20Sopenharmony_ci} 5558c2ecf20Sopenharmony_ciconst struct raid6_calls raid6_avx512x4 = { 5568c2ecf20Sopenharmony_ci raid6_avx5124_gen_syndrome, 5578c2ecf20Sopenharmony_ci raid6_avx5124_xor_syndrome, 5588c2ecf20Sopenharmony_ci raid6_have_avx512, 5598c2ecf20Sopenharmony_ci "avx512x4", 5608c2ecf20Sopenharmony_ci 1 /* Has cache hints */ 5618c2ecf20Sopenharmony_ci}; 5628c2ecf20Sopenharmony_ci#endif 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci#endif /* CONFIG_AS_AVX512 */ 565