1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * arch/ia64/lib/xor.S
4  *
5  * Optimized RAID-5 checksumming functions for IA-64.
6  */
7 
8 #include <asm/asmmacro.h>
9 #include <asm/export.h>
10 
11 GLOBAL_ENTRY(xor_ia64_2)
12 	.prologue
13 	.fframe 0
14 	.save ar.pfs, r31
15 	alloc r31 = ar.pfs, 3, 0, 13, 16
16 	.save ar.lc, r30
17 	mov r30 = ar.lc
18 	.save pr, r29
19 	mov r29 = pr
20 	;;
21 	.body
22 	mov r8 = in1
23 	mov ar.ec = 6 + 2
24 	shr in0 = in0, 3
25 	;;
26 	adds in0 = -1, in0
27 	mov r16 = in1
28 	mov r17 = in2
29 	;;
30 	mov ar.lc = in0
31 	mov pr.rot = 1 << 16
32 	;;
33 	.rotr s1[6+1], s2[6+1], d[2]
34 	.rotp p[6+2]
35 0:
36 (p[0])	ld8.nta s1[0] = [r16], 8
37 (p[0])	ld8.nta s2[0] = [r17], 8
38 (p[6])	xor d[0] = s1[6], s2[6]
39 (p[6+1])st8.nta [r8] = d[1], 8
40 	nop.f 0
41 	br.ctop.dptk.few 0b
42 	;;
43 	mov ar.lc = r30
44 	mov pr = r29, -1
45 	br.ret.sptk.few rp
46 END(xor_ia64_2)
47 EXPORT_SYMBOL(xor_ia64_2)
48 
49 GLOBAL_ENTRY(xor_ia64_3)
50 	.prologue
51 	.fframe 0
52 	.save ar.pfs, r31
53 	alloc r31 = ar.pfs, 4, 0, 20, 24
54 	.save ar.lc, r30
55 	mov r30 = ar.lc
56 	.save pr, r29
57 	mov r29 = pr
58 	;;
59 	.body
60 	mov r8 = in1
61 	mov ar.ec = 6 + 2
62 	shr in0 = in0, 3
63 	;;
64 	adds in0 = -1, in0
65 	mov r16 = in1
66 	mov r17 = in2
67 	;;
68 	mov r18 = in3
69 	mov ar.lc = in0
70 	mov pr.rot = 1 << 16
71 	;;
72 	.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
73 	.rotp p[6+2]
74 0:
75 (p[0])	ld8.nta s1[0] = [r16], 8
76 (p[0])	ld8.nta s2[0] = [r17], 8
77 (p[6])	xor d[0] = s1[6], s2[6]
78 	;;
79 (p[0])	ld8.nta s3[0] = [r18], 8
80 (p[6+1])st8.nta [r8] = d[1], 8
81 (p[6])	xor d[0] = d[0], s3[6]
82 	br.ctop.dptk.few 0b
83 	;;
84 	mov ar.lc = r30
85 	mov pr = r29, -1
86 	br.ret.sptk.few rp
87 END(xor_ia64_3)
88 EXPORT_SYMBOL(xor_ia64_3)
89 
90 GLOBAL_ENTRY(xor_ia64_4)
91 	.prologue
92 	.fframe 0
93 	.save ar.pfs, r31
94 	alloc r31 = ar.pfs, 5, 0, 27, 32
95 	.save ar.lc, r30
96 	mov r30 = ar.lc
97 	.save pr, r29
98 	mov r29 = pr
99 	;;
100 	.body
101 	mov r8 = in1
102 	mov ar.ec = 6 + 2
103 	shr in0 = in0, 3
104 	;;
105 	adds in0 = -1, in0
106 	mov r16 = in1
107 	mov r17 = in2
108 	;;
109 	mov r18 = in3
110 	mov ar.lc = in0
111 	mov pr.rot = 1 << 16
112 	mov r19 = in4
113 	;;
114 	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
115 	.rotp p[6+2]
116 0:
117 (p[0])	ld8.nta s1[0] = [r16], 8
118 (p[0])	ld8.nta s2[0] = [r17], 8
119 (p[6])	xor d[0] = s1[6], s2[6]
120 (p[0])	ld8.nta s3[0] = [r18], 8
121 (p[0])	ld8.nta s4[0] = [r19], 8
122 (p[6])	xor r20 = s3[6], s4[6]
123 	;;
124 (p[6+1])st8.nta [r8] = d[1], 8
125 (p[6])	xor d[0] = d[0], r20
126 	br.ctop.dptk.few 0b
127 	;;
128 	mov ar.lc = r30
129 	mov pr = r29, -1
130 	br.ret.sptk.few rp
131 END(xor_ia64_4)
132 EXPORT_SYMBOL(xor_ia64_4)
133 
134 GLOBAL_ENTRY(xor_ia64_5)
135 	.prologue
136 	.fframe 0
137 	.save ar.pfs, r31
138 	alloc r31 = ar.pfs, 6, 0, 34, 40
139 	.save ar.lc, r30
140 	mov r30 = ar.lc
141 	.save pr, r29
142 	mov r29 = pr
143 	;;
144 	.body
145 	mov r8 = in1
146 	mov ar.ec = 6 + 2
147 	shr in0 = in0, 3
148 	;;
149 	adds in0 = -1, in0
150 	mov r16 = in1
151 	mov r17 = in2
152 	;;
153 	mov r18 = in3
154 	mov ar.lc = in0
155 	mov pr.rot = 1 << 16
156 	mov r19 = in4
157 	mov r20 = in5
158 	;;
159 	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
160 	.rotp p[6+2]
161 0:
162 (p[0])	ld8.nta s1[0] = [r16], 8
163 (p[0])	ld8.nta s2[0] = [r17], 8
164 (p[6])	xor d[0] = s1[6], s2[6]
165 (p[0])	ld8.nta s3[0] = [r18], 8
166 (p[0])	ld8.nta s4[0] = [r19], 8
167 (p[6])	xor r21 = s3[6], s4[6]
168 	;;
169 (p[0])	ld8.nta s5[0] = [r20], 8
170 (p[6+1])st8.nta [r8] = d[1], 8
171 (p[6])	xor d[0] = d[0], r21
172 	;;
173 (p[6])	  xor d[0] = d[0], s5[6]
174 	nop.f 0
175 	br.ctop.dptk.few 0b
176 	;;
177 	mov ar.lc = r30
178 	mov pr = r29, -1
179 	br.ret.sptk.few rp
180 END(xor_ia64_5)
181 EXPORT_SYMBOL(xor_ia64_5)
182