18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * arch/ia64/lib/xor.S 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Optimized RAID-5 checksumming functions for IA-64. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <asm/asmmacro.h> 98c2ecf20Sopenharmony_ci#include <asm/export.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ciGLOBAL_ENTRY(xor_ia64_2) 128c2ecf20Sopenharmony_ci .prologue 138c2ecf20Sopenharmony_ci .fframe 0 148c2ecf20Sopenharmony_ci .save ar.pfs, r31 158c2ecf20Sopenharmony_ci alloc r31 = ar.pfs, 3, 0, 13, 16 168c2ecf20Sopenharmony_ci .save ar.lc, r30 178c2ecf20Sopenharmony_ci mov r30 = ar.lc 188c2ecf20Sopenharmony_ci .save pr, r29 198c2ecf20Sopenharmony_ci mov r29 = pr 208c2ecf20Sopenharmony_ci ;; 218c2ecf20Sopenharmony_ci .body 228c2ecf20Sopenharmony_ci mov r8 = in1 238c2ecf20Sopenharmony_ci mov ar.ec = 6 + 2 248c2ecf20Sopenharmony_ci shr in0 = in0, 3 258c2ecf20Sopenharmony_ci ;; 268c2ecf20Sopenharmony_ci adds in0 = -1, in0 278c2ecf20Sopenharmony_ci mov r16 = in1 288c2ecf20Sopenharmony_ci mov r17 = in2 298c2ecf20Sopenharmony_ci ;; 308c2ecf20Sopenharmony_ci mov ar.lc = in0 318c2ecf20Sopenharmony_ci mov pr.rot = 1 << 16 328c2ecf20Sopenharmony_ci ;; 338c2ecf20Sopenharmony_ci .rotr s1[6+1], s2[6+1], d[2] 348c2ecf20Sopenharmony_ci .rotp p[6+2] 358c2ecf20Sopenharmony_ci0: 368c2ecf20Sopenharmony_ci(p[0]) ld8.nta s1[0] = [r16], 8 378c2ecf20Sopenharmony_ci(p[0]) ld8.nta s2[0] = [r17], 8 388c2ecf20Sopenharmony_ci(p[6]) xor d[0] = s1[6], s2[6] 398c2ecf20Sopenharmony_ci(p[6+1])st8.nta [r8] = d[1], 8 408c2ecf20Sopenharmony_ci nop.f 0 418c2ecf20Sopenharmony_ci br.ctop.dptk.few 0b 428c2ecf20Sopenharmony_ci ;; 438c2ecf20Sopenharmony_ci mov ar.lc = r30 448c2ecf20Sopenharmony_ci mov pr = r29, -1 458c2ecf20Sopenharmony_ci br.ret.sptk.few rp 468c2ecf20Sopenharmony_ciEND(xor_ia64_2) 478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(xor_ia64_2) 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ciGLOBAL_ENTRY(xor_ia64_3) 508c2ecf20Sopenharmony_ci .prologue 518c2ecf20Sopenharmony_ci .fframe 0 528c2ecf20Sopenharmony_ci .save ar.pfs, r31 538c2ecf20Sopenharmony_ci alloc r31 = ar.pfs, 4, 0, 20, 24 548c2ecf20Sopenharmony_ci .save ar.lc, r30 558c2ecf20Sopenharmony_ci mov r30 = ar.lc 568c2ecf20Sopenharmony_ci .save pr, r29 578c2ecf20Sopenharmony_ci mov r29 = pr 588c2ecf20Sopenharmony_ci ;; 598c2ecf20Sopenharmony_ci .body 608c2ecf20Sopenharmony_ci mov r8 = in1 618c2ecf20Sopenharmony_ci mov ar.ec = 6 + 2 628c2ecf20Sopenharmony_ci shr in0 = in0, 3 638c2ecf20Sopenharmony_ci ;; 648c2ecf20Sopenharmony_ci adds in0 = -1, in0 658c2ecf20Sopenharmony_ci mov r16 = in1 668c2ecf20Sopenharmony_ci mov r17 = in2 678c2ecf20Sopenharmony_ci ;; 688c2ecf20Sopenharmony_ci mov r18 = in3 698c2ecf20Sopenharmony_ci mov ar.lc = in0 708c2ecf20Sopenharmony_ci mov pr.rot = 1 << 16 718c2ecf20Sopenharmony_ci ;; 728c2ecf20Sopenharmony_ci .rotr s1[6+1], s2[6+1], s3[6+1], d[2] 738c2ecf20Sopenharmony_ci .rotp p[6+2] 748c2ecf20Sopenharmony_ci0: 758c2ecf20Sopenharmony_ci(p[0]) ld8.nta s1[0] = [r16], 8 768c2ecf20Sopenharmony_ci(p[0]) ld8.nta s2[0] = [r17], 8 778c2ecf20Sopenharmony_ci(p[6]) xor d[0] = s1[6], s2[6] 788c2ecf20Sopenharmony_ci ;; 798c2ecf20Sopenharmony_ci(p[0]) ld8.nta s3[0] = [r18], 8 808c2ecf20Sopenharmony_ci(p[6+1])st8.nta [r8] = d[1], 8 818c2ecf20Sopenharmony_ci(p[6]) xor d[0] = d[0], s3[6] 828c2ecf20Sopenharmony_ci br.ctop.dptk.few 0b 838c2ecf20Sopenharmony_ci ;; 848c2ecf20Sopenharmony_ci mov ar.lc = r30 858c2ecf20Sopenharmony_ci mov pr = r29, -1 868c2ecf20Sopenharmony_ci br.ret.sptk.few rp 878c2ecf20Sopenharmony_ciEND(xor_ia64_3) 888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(xor_ia64_3) 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ciGLOBAL_ENTRY(xor_ia64_4) 918c2ecf20Sopenharmony_ci .prologue 928c2ecf20Sopenharmony_ci .fframe 0 938c2ecf20Sopenharmony_ci .save ar.pfs, r31 948c2ecf20Sopenharmony_ci alloc r31 = ar.pfs, 5, 0, 27, 32 958c2ecf20Sopenharmony_ci .save ar.lc, r30 968c2ecf20Sopenharmony_ci mov r30 = ar.lc 978c2ecf20Sopenharmony_ci .save pr, r29 988c2ecf20Sopenharmony_ci mov r29 = pr 998c2ecf20Sopenharmony_ci ;; 1008c2ecf20Sopenharmony_ci .body 1018c2ecf20Sopenharmony_ci mov r8 = in1 1028c2ecf20Sopenharmony_ci mov ar.ec = 6 + 2 1038c2ecf20Sopenharmony_ci shr in0 = in0, 3 1048c2ecf20Sopenharmony_ci ;; 1058c2ecf20Sopenharmony_ci adds in0 = -1, in0 1068c2ecf20Sopenharmony_ci mov r16 = in1 1078c2ecf20Sopenharmony_ci mov r17 = in2 1088c2ecf20Sopenharmony_ci ;; 1098c2ecf20Sopenharmony_ci mov r18 = in3 1108c2ecf20Sopenharmony_ci mov ar.lc = in0 1118c2ecf20Sopenharmony_ci mov pr.rot = 1 << 16 1128c2ecf20Sopenharmony_ci mov r19 = in4 1138c2ecf20Sopenharmony_ci ;; 1148c2ecf20Sopenharmony_ci .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] 1158c2ecf20Sopenharmony_ci .rotp p[6+2] 1168c2ecf20Sopenharmony_ci0: 1178c2ecf20Sopenharmony_ci(p[0]) ld8.nta s1[0] = [r16], 8 1188c2ecf20Sopenharmony_ci(p[0]) ld8.nta s2[0] = [r17], 8 1198c2ecf20Sopenharmony_ci(p[6]) xor d[0] = s1[6], s2[6] 1208c2ecf20Sopenharmony_ci(p[0]) ld8.nta s3[0] = [r18], 8 1218c2ecf20Sopenharmony_ci(p[0]) ld8.nta s4[0] = [r19], 8 1228c2ecf20Sopenharmony_ci(p[6]) xor r20 = s3[6], s4[6] 1238c2ecf20Sopenharmony_ci ;; 1248c2ecf20Sopenharmony_ci(p[6+1])st8.nta [r8] = d[1], 8 1258c2ecf20Sopenharmony_ci(p[6]) xor d[0] = d[0], r20 1268c2ecf20Sopenharmony_ci br.ctop.dptk.few 0b 1278c2ecf20Sopenharmony_ci ;; 1288c2ecf20Sopenharmony_ci mov ar.lc = r30 1298c2ecf20Sopenharmony_ci mov pr = r29, -1 1308c2ecf20Sopenharmony_ci br.ret.sptk.few rp 1318c2ecf20Sopenharmony_ciEND(xor_ia64_4) 1328c2ecf20Sopenharmony_ciEXPORT_SYMBOL(xor_ia64_4) 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ciGLOBAL_ENTRY(xor_ia64_5) 1358c2ecf20Sopenharmony_ci .prologue 1368c2ecf20Sopenharmony_ci .fframe 0 1378c2ecf20Sopenharmony_ci .save ar.pfs, r31 1388c2ecf20Sopenharmony_ci alloc r31 = ar.pfs, 6, 0, 34, 40 1398c2ecf20Sopenharmony_ci .save ar.lc, r30 1408c2ecf20Sopenharmony_ci mov r30 = ar.lc 1418c2ecf20Sopenharmony_ci .save pr, r29 1428c2ecf20Sopenharmony_ci mov r29 = pr 1438c2ecf20Sopenharmony_ci ;; 1448c2ecf20Sopenharmony_ci .body 1458c2ecf20Sopenharmony_ci mov r8 = in1 1468c2ecf20Sopenharmony_ci mov ar.ec = 6 + 2 1478c2ecf20Sopenharmony_ci shr in0 = in0, 3 1488c2ecf20Sopenharmony_ci ;; 1498c2ecf20Sopenharmony_ci adds in0 = -1, in0 1508c2ecf20Sopenharmony_ci mov r16 = in1 1518c2ecf20Sopenharmony_ci mov r17 = in2 1528c2ecf20Sopenharmony_ci ;; 1538c2ecf20Sopenharmony_ci mov r18 = in3 1548c2ecf20Sopenharmony_ci mov ar.lc = in0 1558c2ecf20Sopenharmony_ci mov pr.rot = 1 << 16 1568c2ecf20Sopenharmony_ci mov r19 = in4 1578c2ecf20Sopenharmony_ci mov r20 = in5 1588c2ecf20Sopenharmony_ci ;; 1598c2ecf20Sopenharmony_ci .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] 1608c2ecf20Sopenharmony_ci .rotp p[6+2] 1618c2ecf20Sopenharmony_ci0: 1628c2ecf20Sopenharmony_ci(p[0]) ld8.nta s1[0] = [r16], 8 1638c2ecf20Sopenharmony_ci(p[0]) ld8.nta s2[0] = [r17], 8 1648c2ecf20Sopenharmony_ci(p[6]) xor d[0] = s1[6], s2[6] 1658c2ecf20Sopenharmony_ci(p[0]) ld8.nta s3[0] = [r18], 8 1668c2ecf20Sopenharmony_ci(p[0]) ld8.nta s4[0] = [r19], 8 1678c2ecf20Sopenharmony_ci(p[6]) xor r21 = s3[6], s4[6] 1688c2ecf20Sopenharmony_ci ;; 1698c2ecf20Sopenharmony_ci(p[0]) ld8.nta s5[0] = [r20], 8 1708c2ecf20Sopenharmony_ci(p[6+1])st8.nta [r8] = d[1], 8 1718c2ecf20Sopenharmony_ci(p[6]) xor d[0] = d[0], r21 1728c2ecf20Sopenharmony_ci ;; 1738c2ecf20Sopenharmony_ci(p[6]) xor d[0] = d[0], s5[6] 1748c2ecf20Sopenharmony_ci nop.f 0 1758c2ecf20Sopenharmony_ci br.ctop.dptk.few 0b 1768c2ecf20Sopenharmony_ci ;; 1778c2ecf20Sopenharmony_ci mov ar.lc = r30 1788c2ecf20Sopenharmony_ci mov pr = r29, -1 1798c2ecf20Sopenharmony_ci br.ret.sptk.few rp 1808c2ecf20Sopenharmony_ciEND(xor_ia64_5) 1818c2ecf20Sopenharmony_ciEXPORT_SYMBOL(xor_ia64_5) 182