18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * raid6_vx$#.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * $#-way unrolled RAID6 gen/xor functions for s390 68c2ecf20Sopenharmony_ci * based on the vector facility 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Copyright IBM Corp. 2016 98c2ecf20Sopenharmony_ci * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * This file is postprocessed using unroll.awk. 128c2ecf20Sopenharmony_ci */ 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include <linux/raid/pq.h> 158c2ecf20Sopenharmony_ci#include <asm/fpu/api.h> 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ciasm(".include \"asm/vx-insn.h\"\n"); 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#define NSIZE 16 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_cistatic inline void LOAD_CONST(void) 228c2ecf20Sopenharmony_ci{ 238c2ecf20Sopenharmony_ci asm volatile("VREPIB %v24,7"); 248c2ecf20Sopenharmony_ci asm volatile("VREPIB %v25,0x1d"); 258c2ecf20Sopenharmony_ci} 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci/* 288c2ecf20Sopenharmony_ci * The SHLBYTE() operation shifts each of the 16 bytes in 298c2ecf20Sopenharmony_ci * vector register y left by 1 bit and stores the result in 308c2ecf20Sopenharmony_ci * vector register x. 318c2ecf20Sopenharmony_ci */ 328c2ecf20Sopenharmony_cistatic inline void SHLBYTE(int x, int y) 338c2ecf20Sopenharmony_ci{ 348c2ecf20Sopenharmony_ci asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y)); 358c2ecf20Sopenharmony_ci} 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci/* 388c2ecf20Sopenharmony_ci * For each of the 16 bytes in the vector register y the MASK() 398c2ecf20Sopenharmony_ci * operation returns 0xFF if the high bit of the byte is 1, 408c2ecf20Sopenharmony_ci * or 0x00 if the high bit is 0. The result is stored in vector 418c2ecf20Sopenharmony_ci * register x. 428c2ecf20Sopenharmony_ci */ 438c2ecf20Sopenharmony_cistatic inline void MASK(int x, int y) 448c2ecf20Sopenharmony_ci{ 458c2ecf20Sopenharmony_ci asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y)); 468c2ecf20Sopenharmony_ci} 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_cistatic inline void AND(int x, int y, int z) 498c2ecf20Sopenharmony_ci{ 508c2ecf20Sopenharmony_ci asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); 518c2ecf20Sopenharmony_ci} 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_cistatic inline void XOR(int x, int y, int z) 548c2ecf20Sopenharmony_ci{ 558c2ecf20Sopenharmony_ci asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); 568c2ecf20Sopenharmony_ci} 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_cistatic inline void LOAD_DATA(int x, u8 *ptr) 598c2ecf20Sopenharmony_ci{ 608c2ecf20Sopenharmony_ci typedef struct { u8 _[16 * $#]; } addrtype; 618c2ecf20Sopenharmony_ci register addrtype *__ptr asm("1") = (addrtype *) ptr; 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci asm volatile ("VLM %2,%3,0,%1" 648c2ecf20Sopenharmony_ci : : "m" (*__ptr), "a" (__ptr), "i" (x), 658c2ecf20Sopenharmony_ci "i" (x + $# - 1)); 668c2ecf20Sopenharmony_ci} 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_cistatic inline void STORE_DATA(int x, u8 *ptr) 698c2ecf20Sopenharmony_ci{ 708c2ecf20Sopenharmony_ci typedef struct { u8 _[16 * $#]; } addrtype; 718c2ecf20Sopenharmony_ci register addrtype *__ptr asm("1") = (addrtype *) ptr; 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci asm volatile ("VSTM %2,%3,0,1" 748c2ecf20Sopenharmony_ci : "=m" (*__ptr) : "a" (__ptr), "i" (x), 758c2ecf20Sopenharmony_ci "i" (x + $# - 1)); 768c2ecf20Sopenharmony_ci} 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_cistatic inline void COPY_VEC(int x, int y) 798c2ecf20Sopenharmony_ci{ 808c2ecf20Sopenharmony_ci asm volatile ("VLR %0,%1" : : "i" (x), "i" (y)); 818c2ecf20Sopenharmony_ci} 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_cistatic void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) 848c2ecf20Sopenharmony_ci{ 858c2ecf20Sopenharmony_ci struct kernel_fpu vxstate; 868c2ecf20Sopenharmony_ci u8 **dptr, *p, *q; 878c2ecf20Sopenharmony_ci int d, z, z0; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci kernel_fpu_begin(&vxstate, KERNEL_VXR); 908c2ecf20Sopenharmony_ci LOAD_CONST(); 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci dptr = (u8 **) ptrs; 938c2ecf20Sopenharmony_ci z0 = disks - 3; /* Highest data disk */ 948c2ecf20Sopenharmony_ci p = dptr[z0 + 1]; /* XOR parity */ 958c2ecf20Sopenharmony_ci q = dptr[z0 + 2]; /* RS syndrome */ 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci for (d = 0; d < bytes; d += $#*NSIZE) { 988c2ecf20Sopenharmony_ci LOAD_DATA(0,&dptr[z0][d]); 998c2ecf20Sopenharmony_ci COPY_VEC(8+$$,0+$$); 1008c2ecf20Sopenharmony_ci for (z = z0 - 1; z >= 0; z--) { 1018c2ecf20Sopenharmony_ci MASK(16+$$,8+$$); 1028c2ecf20Sopenharmony_ci AND(16+$$,16+$$,25); 1038c2ecf20Sopenharmony_ci SHLBYTE(8+$$,8+$$); 1048c2ecf20Sopenharmony_ci XOR(8+$$,8+$$,16+$$); 1058c2ecf20Sopenharmony_ci LOAD_DATA(16,&dptr[z][d]); 1068c2ecf20Sopenharmony_ci XOR(0+$$,0+$$,16+$$); 1078c2ecf20Sopenharmony_ci XOR(8+$$,8+$$,16+$$); 1088c2ecf20Sopenharmony_ci } 1098c2ecf20Sopenharmony_ci STORE_DATA(0,&p[d]); 1108c2ecf20Sopenharmony_ci STORE_DATA(8,&q[d]); 1118c2ecf20Sopenharmony_ci } 1128c2ecf20Sopenharmony_ci kernel_fpu_end(&vxstate, KERNEL_VXR); 1138c2ecf20Sopenharmony_ci} 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_cistatic void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, 1168c2ecf20Sopenharmony_ci size_t bytes, void **ptrs) 1178c2ecf20Sopenharmony_ci{ 1188c2ecf20Sopenharmony_ci struct kernel_fpu vxstate; 1198c2ecf20Sopenharmony_ci u8 **dptr, *p, *q; 1208c2ecf20Sopenharmony_ci int d, z, z0; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci dptr = (u8 **) ptrs; 1238c2ecf20Sopenharmony_ci z0 = stop; /* P/Q right side optimization */ 1248c2ecf20Sopenharmony_ci p = dptr[disks - 2]; /* XOR parity */ 1258c2ecf20Sopenharmony_ci q = dptr[disks - 1]; /* RS syndrome */ 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci kernel_fpu_begin(&vxstate, KERNEL_VXR); 1288c2ecf20Sopenharmony_ci LOAD_CONST(); 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci for (d = 0; d < bytes; d += $#*NSIZE) { 1318c2ecf20Sopenharmony_ci /* P/Q data pages */ 1328c2ecf20Sopenharmony_ci LOAD_DATA(0,&dptr[z0][d]); 1338c2ecf20Sopenharmony_ci COPY_VEC(8+$$,0+$$); 1348c2ecf20Sopenharmony_ci for (z = z0 - 1; z >= start; z--) { 1358c2ecf20Sopenharmony_ci MASK(16+$$,8+$$); 1368c2ecf20Sopenharmony_ci AND(16+$$,16+$$,25); 1378c2ecf20Sopenharmony_ci SHLBYTE(8+$$,8+$$); 1388c2ecf20Sopenharmony_ci XOR(8+$$,8+$$,16+$$); 1398c2ecf20Sopenharmony_ci LOAD_DATA(16,&dptr[z][d]); 1408c2ecf20Sopenharmony_ci XOR(0+$$,0+$$,16+$$); 1418c2ecf20Sopenharmony_ci XOR(8+$$,8+$$,16+$$); 1428c2ecf20Sopenharmony_ci } 1438c2ecf20Sopenharmony_ci /* P/Q left side optimization */ 1448c2ecf20Sopenharmony_ci for (z = start - 1; z >= 0; z--) { 1458c2ecf20Sopenharmony_ci MASK(16+$$,8+$$); 1468c2ecf20Sopenharmony_ci AND(16+$$,16+$$,25); 1478c2ecf20Sopenharmony_ci SHLBYTE(8+$$,8+$$); 1488c2ecf20Sopenharmony_ci XOR(8+$$,8+$$,16+$$); 1498c2ecf20Sopenharmony_ci } 1508c2ecf20Sopenharmony_ci LOAD_DATA(16,&p[d]); 1518c2ecf20Sopenharmony_ci XOR(16+$$,16+$$,0+$$); 1528c2ecf20Sopenharmony_ci STORE_DATA(16,&p[d]); 1538c2ecf20Sopenharmony_ci LOAD_DATA(16,&q[d]); 1548c2ecf20Sopenharmony_ci XOR(16+$$,16+$$,8+$$); 1558c2ecf20Sopenharmony_ci STORE_DATA(16,&q[d]); 1568c2ecf20Sopenharmony_ci } 1578c2ecf20Sopenharmony_ci kernel_fpu_end(&vxstate, KERNEL_VXR); 1588c2ecf20Sopenharmony_ci} 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_cistatic int raid6_s390vx$#_valid(void) 1618c2ecf20Sopenharmony_ci{ 1628c2ecf20Sopenharmony_ci return MACHINE_HAS_VX; 1638c2ecf20Sopenharmony_ci} 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ciconst struct raid6_calls raid6_s390vx$# = { 1668c2ecf20Sopenharmony_ci raid6_s390vx$#_gen_syndrome, 1678c2ecf20Sopenharmony_ci raid6_s390vx$#_xor_syndrome, 1688c2ecf20Sopenharmony_ci raid6_s390vx$#_valid, 1698c2ecf20Sopenharmony_ci "vx128x$#", 1708c2ecf20Sopenharmony_ci 1 1718c2ecf20Sopenharmony_ci}; 172