18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * arch/ia64/lib/xor.S
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Optimized RAID-5 checksumming functions for IA-64.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <asm/asmmacro.h>
98c2ecf20Sopenharmony_ci#include <asm/export.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ciGLOBAL_ENTRY(xor_ia64_2)
128c2ecf20Sopenharmony_ci	.prologue
138c2ecf20Sopenharmony_ci	.fframe 0
148c2ecf20Sopenharmony_ci	.save ar.pfs, r31
158c2ecf20Sopenharmony_ci	alloc r31 = ar.pfs, 3, 0, 13, 16
168c2ecf20Sopenharmony_ci	.save ar.lc, r30
178c2ecf20Sopenharmony_ci	mov r30 = ar.lc
188c2ecf20Sopenharmony_ci	.save pr, r29
198c2ecf20Sopenharmony_ci	mov r29 = pr
208c2ecf20Sopenharmony_ci	;;
218c2ecf20Sopenharmony_ci	.body
228c2ecf20Sopenharmony_ci	mov r8 = in1
238c2ecf20Sopenharmony_ci	mov ar.ec = 6 + 2
248c2ecf20Sopenharmony_ci	shr in0 = in0, 3
258c2ecf20Sopenharmony_ci	;;
268c2ecf20Sopenharmony_ci	adds in0 = -1, in0
278c2ecf20Sopenharmony_ci	mov r16 = in1
288c2ecf20Sopenharmony_ci	mov r17 = in2
298c2ecf20Sopenharmony_ci	;;
308c2ecf20Sopenharmony_ci	mov ar.lc = in0
318c2ecf20Sopenharmony_ci	mov pr.rot = 1 << 16
328c2ecf20Sopenharmony_ci	;;
338c2ecf20Sopenharmony_ci	.rotr s1[6+1], s2[6+1], d[2]
348c2ecf20Sopenharmony_ci	.rotp p[6+2]
358c2ecf20Sopenharmony_ci0:
368c2ecf20Sopenharmony_ci(p[0])	ld8.nta s1[0] = [r16], 8
378c2ecf20Sopenharmony_ci(p[0])	ld8.nta s2[0] = [r17], 8
388c2ecf20Sopenharmony_ci(p[6])	xor d[0] = s1[6], s2[6]
398c2ecf20Sopenharmony_ci(p[6+1])st8.nta [r8] = d[1], 8
408c2ecf20Sopenharmony_ci	nop.f 0
418c2ecf20Sopenharmony_ci	br.ctop.dptk.few 0b
428c2ecf20Sopenharmony_ci	;;
438c2ecf20Sopenharmony_ci	mov ar.lc = r30
448c2ecf20Sopenharmony_ci	mov pr = r29, -1
458c2ecf20Sopenharmony_ci	br.ret.sptk.few rp
468c2ecf20Sopenharmony_ciEND(xor_ia64_2)
478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(xor_ia64_2)
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ciGLOBAL_ENTRY(xor_ia64_3)
508c2ecf20Sopenharmony_ci	.prologue
518c2ecf20Sopenharmony_ci	.fframe 0
528c2ecf20Sopenharmony_ci	.save ar.pfs, r31
538c2ecf20Sopenharmony_ci	alloc r31 = ar.pfs, 4, 0, 20, 24
548c2ecf20Sopenharmony_ci	.save ar.lc, r30
558c2ecf20Sopenharmony_ci	mov r30 = ar.lc
568c2ecf20Sopenharmony_ci	.save pr, r29
578c2ecf20Sopenharmony_ci	mov r29 = pr
588c2ecf20Sopenharmony_ci	;;
598c2ecf20Sopenharmony_ci	.body
608c2ecf20Sopenharmony_ci	mov r8 = in1
618c2ecf20Sopenharmony_ci	mov ar.ec = 6 + 2
628c2ecf20Sopenharmony_ci	shr in0 = in0, 3
638c2ecf20Sopenharmony_ci	;;
648c2ecf20Sopenharmony_ci	adds in0 = -1, in0
658c2ecf20Sopenharmony_ci	mov r16 = in1
668c2ecf20Sopenharmony_ci	mov r17 = in2
678c2ecf20Sopenharmony_ci	;;
688c2ecf20Sopenharmony_ci	mov r18 = in3
698c2ecf20Sopenharmony_ci	mov ar.lc = in0
708c2ecf20Sopenharmony_ci	mov pr.rot = 1 << 16
718c2ecf20Sopenharmony_ci	;;
728c2ecf20Sopenharmony_ci	.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
738c2ecf20Sopenharmony_ci	.rotp p[6+2]
748c2ecf20Sopenharmony_ci0:
758c2ecf20Sopenharmony_ci(p[0])	ld8.nta s1[0] = [r16], 8
768c2ecf20Sopenharmony_ci(p[0])	ld8.nta s2[0] = [r17], 8
778c2ecf20Sopenharmony_ci(p[6])	xor d[0] = s1[6], s2[6]
788c2ecf20Sopenharmony_ci	;;
798c2ecf20Sopenharmony_ci(p[0])	ld8.nta s3[0] = [r18], 8
808c2ecf20Sopenharmony_ci(p[6+1])st8.nta [r8] = d[1], 8
818c2ecf20Sopenharmony_ci(p[6])	xor d[0] = d[0], s3[6]
828c2ecf20Sopenharmony_ci	br.ctop.dptk.few 0b
838c2ecf20Sopenharmony_ci	;;
848c2ecf20Sopenharmony_ci	mov ar.lc = r30
858c2ecf20Sopenharmony_ci	mov pr = r29, -1
868c2ecf20Sopenharmony_ci	br.ret.sptk.few rp
878c2ecf20Sopenharmony_ciEND(xor_ia64_3)
888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(xor_ia64_3)
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ciGLOBAL_ENTRY(xor_ia64_4)
918c2ecf20Sopenharmony_ci	.prologue
928c2ecf20Sopenharmony_ci	.fframe 0
938c2ecf20Sopenharmony_ci	.save ar.pfs, r31
948c2ecf20Sopenharmony_ci	alloc r31 = ar.pfs, 5, 0, 27, 32
958c2ecf20Sopenharmony_ci	.save ar.lc, r30
968c2ecf20Sopenharmony_ci	mov r30 = ar.lc
978c2ecf20Sopenharmony_ci	.save pr, r29
988c2ecf20Sopenharmony_ci	mov r29 = pr
998c2ecf20Sopenharmony_ci	;;
1008c2ecf20Sopenharmony_ci	.body
1018c2ecf20Sopenharmony_ci	mov r8 = in1
1028c2ecf20Sopenharmony_ci	mov ar.ec = 6 + 2
1038c2ecf20Sopenharmony_ci	shr in0 = in0, 3
1048c2ecf20Sopenharmony_ci	;;
1058c2ecf20Sopenharmony_ci	adds in0 = -1, in0
1068c2ecf20Sopenharmony_ci	mov r16 = in1
1078c2ecf20Sopenharmony_ci	mov r17 = in2
1088c2ecf20Sopenharmony_ci	;;
1098c2ecf20Sopenharmony_ci	mov r18 = in3
1108c2ecf20Sopenharmony_ci	mov ar.lc = in0
1118c2ecf20Sopenharmony_ci	mov pr.rot = 1 << 16
1128c2ecf20Sopenharmony_ci	mov r19 = in4
1138c2ecf20Sopenharmony_ci	;;
1148c2ecf20Sopenharmony_ci	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
1158c2ecf20Sopenharmony_ci	.rotp p[6+2]
1168c2ecf20Sopenharmony_ci0:
1178c2ecf20Sopenharmony_ci(p[0])	ld8.nta s1[0] = [r16], 8
1188c2ecf20Sopenharmony_ci(p[0])	ld8.nta s2[0] = [r17], 8
1198c2ecf20Sopenharmony_ci(p[6])	xor d[0] = s1[6], s2[6]
1208c2ecf20Sopenharmony_ci(p[0])	ld8.nta s3[0] = [r18], 8
1218c2ecf20Sopenharmony_ci(p[0])	ld8.nta s4[0] = [r19], 8
1228c2ecf20Sopenharmony_ci(p[6])	xor r20 = s3[6], s4[6]
1238c2ecf20Sopenharmony_ci	;;
1248c2ecf20Sopenharmony_ci(p[6+1])st8.nta [r8] = d[1], 8
1258c2ecf20Sopenharmony_ci(p[6])	xor d[0] = d[0], r20
1268c2ecf20Sopenharmony_ci	br.ctop.dptk.few 0b
1278c2ecf20Sopenharmony_ci	;;
1288c2ecf20Sopenharmony_ci	mov ar.lc = r30
1298c2ecf20Sopenharmony_ci	mov pr = r29, -1
1308c2ecf20Sopenharmony_ci	br.ret.sptk.few rp
1318c2ecf20Sopenharmony_ciEND(xor_ia64_4)
1328c2ecf20Sopenharmony_ciEXPORT_SYMBOL(xor_ia64_4)
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ciGLOBAL_ENTRY(xor_ia64_5)
1358c2ecf20Sopenharmony_ci	.prologue
1368c2ecf20Sopenharmony_ci	.fframe 0
1378c2ecf20Sopenharmony_ci	.save ar.pfs, r31
1388c2ecf20Sopenharmony_ci	alloc r31 = ar.pfs, 6, 0, 34, 40
1398c2ecf20Sopenharmony_ci	.save ar.lc, r30
1408c2ecf20Sopenharmony_ci	mov r30 = ar.lc
1418c2ecf20Sopenharmony_ci	.save pr, r29
1428c2ecf20Sopenharmony_ci	mov r29 = pr
1438c2ecf20Sopenharmony_ci	;;
1448c2ecf20Sopenharmony_ci	.body
1458c2ecf20Sopenharmony_ci	mov r8 = in1
1468c2ecf20Sopenharmony_ci	mov ar.ec = 6 + 2
1478c2ecf20Sopenharmony_ci	shr in0 = in0, 3
1488c2ecf20Sopenharmony_ci	;;
1498c2ecf20Sopenharmony_ci	adds in0 = -1, in0
1508c2ecf20Sopenharmony_ci	mov r16 = in1
1518c2ecf20Sopenharmony_ci	mov r17 = in2
1528c2ecf20Sopenharmony_ci	;;
1538c2ecf20Sopenharmony_ci	mov r18 = in3
1548c2ecf20Sopenharmony_ci	mov ar.lc = in0
1558c2ecf20Sopenharmony_ci	mov pr.rot = 1 << 16
1568c2ecf20Sopenharmony_ci	mov r19 = in4
1578c2ecf20Sopenharmony_ci	mov r20 = in5
1588c2ecf20Sopenharmony_ci	;;
1598c2ecf20Sopenharmony_ci	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
1608c2ecf20Sopenharmony_ci	.rotp p[6+2]
1618c2ecf20Sopenharmony_ci0:
1628c2ecf20Sopenharmony_ci(p[0])	ld8.nta s1[0] = [r16], 8
1638c2ecf20Sopenharmony_ci(p[0])	ld8.nta s2[0] = [r17], 8
1648c2ecf20Sopenharmony_ci(p[6])	xor d[0] = s1[6], s2[6]
1658c2ecf20Sopenharmony_ci(p[0])	ld8.nta s3[0] = [r18], 8
1668c2ecf20Sopenharmony_ci(p[0])	ld8.nta s4[0] = [r19], 8
1678c2ecf20Sopenharmony_ci(p[6])	xor r21 = s3[6], s4[6]
1688c2ecf20Sopenharmony_ci	;;
1698c2ecf20Sopenharmony_ci(p[0])	ld8.nta s5[0] = [r20], 8
1708c2ecf20Sopenharmony_ci(p[6+1])st8.nta [r8] = d[1], 8
1718c2ecf20Sopenharmony_ci(p[6])	xor d[0] = d[0], r21
1728c2ecf20Sopenharmony_ci	;;
1738c2ecf20Sopenharmony_ci(p[6])	  xor d[0] = d[0], s5[6]
1748c2ecf20Sopenharmony_ci	nop.f 0
1758c2ecf20Sopenharmony_ci	br.ctop.dptk.few 0b
1768c2ecf20Sopenharmony_ci	;;
1778c2ecf20Sopenharmony_ci	mov ar.lc = r30
1788c2ecf20Sopenharmony_ci	mov pr = r29, -1
1798c2ecf20Sopenharmony_ci	br.ret.sptk.few rp
1808c2ecf20Sopenharmony_ciEND(xor_ia64_5)
1818c2ecf20Sopenharmony_ciEXPORT_SYMBOL(xor_ia64_5)
182