xref: /kernel/linux/linux-6.6/arch/alpha/include/asm/xor.h (revision 62306a36)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * include/asm-alpha/xor.h
4 *
5 * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
6 */
7
8extern void
9xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1,
10	    const unsigned long * __restrict p2);
11extern void
12xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1,
13	    const unsigned long * __restrict p2,
14	    const unsigned long * __restrict p3);
15extern void
16xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1,
17	    const unsigned long * __restrict p2,
18	    const unsigned long * __restrict p3,
19	    const unsigned long * __restrict p4);
20extern void
21xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1,
22	    const unsigned long * __restrict p2,
23	    const unsigned long * __restrict p3,
24	    const unsigned long * __restrict p4,
25	    const unsigned long * __restrict p5);
26
27extern void
28xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1,
29		     const unsigned long * __restrict p2);
30extern void
31xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1,
32		     const unsigned long * __restrict p2,
33		     const unsigned long * __restrict p3);
34extern void
35xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1,
36		     const unsigned long * __restrict p2,
37		     const unsigned long * __restrict p3,
38		     const unsigned long * __restrict p4);
39extern void
40xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1,
41		     const unsigned long * __restrict p2,
42		     const unsigned long * __restrict p3,
43		     const unsigned long * __restrict p4,
44		     const unsigned long * __restrict p5);
45
46asm("								\n\
47	.text							\n\
48	.align 3						\n\
49	.ent xor_alpha_2					\n\
50xor_alpha_2:							\n\
51	.prologue 0						\n\
52	srl $16, 6, $16						\n\
53	.align 4						\n\
542:								\n\
55	ldq $0,0($17)						\n\
56	ldq $1,0($18)						\n\
57	ldq $2,8($17)						\n\
58	ldq $3,8($18)						\n\
59								\n\
60	ldq $4,16($17)						\n\
61	ldq $5,16($18)						\n\
62	ldq $6,24($17)						\n\
63	ldq $7,24($18)						\n\
64								\n\
65	ldq $19,32($17)						\n\
66	ldq $20,32($18)						\n\
67	ldq $21,40($17)						\n\
68	ldq $22,40($18)						\n\
69								\n\
70	ldq $23,48($17)						\n\
71	ldq $24,48($18)						\n\
72	ldq $25,56($17)						\n\
73	xor $0,$1,$0		# 7 cycles from $1 load		\n\
74								\n\
75	ldq $27,56($18)						\n\
76	xor $2,$3,$2						\n\
77	stq $0,0($17)						\n\
78	xor $4,$5,$4						\n\
79								\n\
80	stq $2,8($17)						\n\
81	xor $6,$7,$6						\n\
82	stq $4,16($17)						\n\
83	xor $19,$20,$19						\n\
84								\n\
85	stq $6,24($17)						\n\
86	xor $21,$22,$21						\n\
87	stq $19,32($17)						\n\
88	xor $23,$24,$23						\n\
89								\n\
90	stq $21,40($17)						\n\
91	xor $25,$27,$25						\n\
92	stq $23,48($17)						\n\
93	subq $16,1,$16						\n\
94								\n\
95	stq $25,56($17)						\n\
96	addq $17,64,$17						\n\
97	addq $18,64,$18						\n\
98	bgt $16,2b						\n\
99								\n\
100	ret							\n\
101	.end xor_alpha_2					\n\
102								\n\
103	.align 3						\n\
104	.ent xor_alpha_3					\n\
105xor_alpha_3:							\n\
106	.prologue 0						\n\
107	srl $16, 6, $16						\n\
108	.align 4						\n\
1093:								\n\
110	ldq $0,0($17)						\n\
111	ldq $1,0($18)						\n\
112	ldq $2,0($19)						\n\
113	ldq $3,8($17)						\n\
114								\n\
115	ldq $4,8($18)						\n\
116	ldq $6,16($17)						\n\
117	ldq $7,16($18)						\n\
118	ldq $21,24($17)						\n\
119								\n\
120	ldq $22,24($18)						\n\
121	ldq $24,32($17)						\n\
122	ldq $25,32($18)						\n\
123	ldq $5,8($19)						\n\
124								\n\
125	ldq $20,16($19)						\n\
126	ldq $23,24($19)						\n\
127	ldq $27,32($19)						\n\
128	nop							\n\
129								\n\
130	xor $0,$1,$1		# 8 cycles from $0 load		\n\
131	xor $3,$4,$4		# 6 cycles from $4 load		\n\
132	xor $6,$7,$7		# 6 cycles from $7 load		\n\
133	xor $21,$22,$22		# 5 cycles from $22 load	\n\
134								\n\
135	xor $1,$2,$2		# 9 cycles from $2 load		\n\
136	xor $24,$25,$25		# 5 cycles from $25 load	\n\
137	stq $2,0($17)						\n\
138	xor $4,$5,$5		# 6 cycles from $5 load		\n\
139								\n\
140	stq $5,8($17)						\n\
141	xor $7,$20,$20		# 7 cycles from $20 load	\n\
142	stq $20,16($17)						\n\
143	xor $22,$23,$23		# 7 cycles from $23 load	\n\
144								\n\
145	stq $23,24($17)						\n\
146	xor $25,$27,$27		# 7 cycles from $27 load	\n\
147	stq $27,32($17)						\n\
148	nop							\n\
149								\n\
150	ldq $0,40($17)						\n\
151	ldq $1,40($18)						\n\
152	ldq $3,48($17)						\n\
153	ldq $4,48($18)						\n\
154								\n\
155	ldq $6,56($17)						\n\
156	ldq $7,56($18)						\n\
157	ldq $2,40($19)						\n\
158	ldq $5,48($19)						\n\
159								\n\
160	ldq $20,56($19)						\n\
161	xor $0,$1,$1		# 4 cycles from $1 load		\n\
162	xor $3,$4,$4		# 5 cycles from $4 load		\n\
163	xor $6,$7,$7		# 5 cycles from $7 load		\n\
164								\n\
165	xor $1,$2,$2		# 4 cycles from $2 load		\n\
166	xor $4,$5,$5		# 5 cycles from $5 load		\n\
167	stq $2,40($17)						\n\
168	xor $7,$20,$20		# 4 cycles from $20 load	\n\
169								\n\
170	stq $5,48($17)						\n\
171	subq $16,1,$16						\n\
172	stq $20,56($17)						\n\
173	addq $19,64,$19						\n\
174								\n\
175	addq $18,64,$18						\n\
176	addq $17,64,$17						\n\
177	bgt $16,3b						\n\
178	ret							\n\
179	.end xor_alpha_3					\n\
180								\n\
181	.align 3						\n\
182	.ent xor_alpha_4					\n\
183xor_alpha_4:							\n\
184	.prologue 0						\n\
185	srl $16, 6, $16						\n\
186	.align 4						\n\
1874:								\n\
188	ldq $0,0($17)						\n\
189	ldq $1,0($18)						\n\
190	ldq $2,0($19)						\n\
191	ldq $3,0($20)						\n\
192								\n\
193	ldq $4,8($17)						\n\
194	ldq $5,8($18)						\n\
195	ldq $6,8($19)						\n\
196	ldq $7,8($20)						\n\
197								\n\
198	ldq $21,16($17)						\n\
199	ldq $22,16($18)						\n\
200	ldq $23,16($19)						\n\
201	ldq $24,16($20)						\n\
202								\n\
203	ldq $25,24($17)						\n\
204	xor $0,$1,$1		# 6 cycles from $1 load		\n\
205	ldq $27,24($18)						\n\
206	xor $2,$3,$3		# 6 cycles from $3 load		\n\
207								\n\
208	ldq $0,24($19)						\n\
209	xor $1,$3,$3						\n\
210	ldq $1,24($20)						\n\
211	xor $4,$5,$5		# 7 cycles from $5 load		\n\
212								\n\
213	stq $3,0($17)						\n\
214	xor $6,$7,$7						\n\
215	xor $21,$22,$22		# 7 cycles from $22 load	\n\
216	xor $5,$7,$7						\n\
217								\n\
218	stq $7,8($17)						\n\
219	xor $23,$24,$24		# 7 cycles from $24 load	\n\
220	ldq $2,32($17)						\n\
221	xor $22,$24,$24						\n\
222								\n\
223	ldq $3,32($18)						\n\
224	ldq $4,32($19)						\n\
225	ldq $5,32($20)						\n\
226	xor $25,$27,$27		# 8 cycles from $27 load	\n\
227								\n\
228	ldq $6,40($17)						\n\
229	ldq $7,40($18)						\n\
230	ldq $21,40($19)						\n\
231	ldq $22,40($20)						\n\
232								\n\
233	stq $24,16($17)						\n\
234	xor $0,$1,$1		# 9 cycles from $1 load		\n\
235	xor $2,$3,$3		# 5 cycles from $3 load		\n\
236	xor $27,$1,$1						\n\
237								\n\
238	stq $1,24($17)						\n\
239	xor $4,$5,$5		# 5 cycles from $5 load		\n\
240	ldq $23,48($17)						\n\
241	ldq $24,48($18)						\n\
242								\n\
243	ldq $25,48($19)						\n\
244	xor $3,$5,$5						\n\
245	ldq $27,48($20)						\n\
246	ldq $0,56($17)						\n\
247								\n\
248	ldq $1,56($18)						\n\
249	ldq $2,56($19)						\n\
250	xor $6,$7,$7		# 8 cycles from $6 load		\n\
251	ldq $3,56($20)						\n\
252								\n\
253	stq $5,32($17)						\n\
254	xor $21,$22,$22		# 8 cycles from $22 load	\n\
255	xor $7,$22,$22						\n\
256	xor $23,$24,$24		# 5 cycles from $24 load	\n\
257								\n\
258	stq $22,40($17)						\n\
259	xor $25,$27,$27		# 5 cycles from $27 load	\n\
260	xor $24,$27,$27						\n\
261	xor $0,$1,$1		# 5 cycles from $1 load		\n\
262								\n\
263	stq $27,48($17)						\n\
264	xor $2,$3,$3		# 4 cycles from $3 load		\n\
265	xor $1,$3,$3						\n\
266	subq $16,1,$16						\n\
267								\n\
268	stq $3,56($17)						\n\
269	addq $20,64,$20						\n\
270	addq $19,64,$19						\n\
271	addq $18,64,$18						\n\
272								\n\
273	addq $17,64,$17						\n\
274	bgt $16,4b						\n\
275	ret							\n\
276	.end xor_alpha_4					\n\
277								\n\
278	.align 3						\n\
279	.ent xor_alpha_5					\n\
280xor_alpha_5:							\n\
281	.prologue 0						\n\
282	srl $16, 6, $16						\n\
283	.align 4						\n\
2845:								\n\
285	ldq $0,0($17)						\n\
286	ldq $1,0($18)						\n\
287	ldq $2,0($19)						\n\
288	ldq $3,0($20)						\n\
289								\n\
290	ldq $4,0($21)						\n\
291	ldq $5,8($17)						\n\
292	ldq $6,8($18)						\n\
293	ldq $7,8($19)						\n\
294								\n\
295	ldq $22,8($20)						\n\
296	ldq $23,8($21)						\n\
297	ldq $24,16($17)						\n\
298	ldq $25,16($18)						\n\
299								\n\
300	ldq $27,16($19)						\n\
301	xor $0,$1,$1		# 6 cycles from $1 load		\n\
302	ldq $28,16($20)						\n\
303	xor $2,$3,$3		# 6 cycles from $3 load		\n\
304								\n\
305	ldq $0,16($21)						\n\
306	xor $1,$3,$3						\n\
307	ldq $1,24($17)						\n\
308	xor $3,$4,$4		# 7 cycles from $4 load		\n\
309								\n\
310	stq $4,0($17)						\n\
311	xor $5,$6,$6		# 7 cycles from $6 load		\n\
312	xor $7,$22,$22		# 7 cycles from $22 load	\n\
313	xor $6,$23,$23		# 7 cycles from $23 load	\n\
314								\n\
315	ldq $2,24($18)						\n\
316	xor $22,$23,$23						\n\
317	ldq $3,24($19)						\n\
318	xor $24,$25,$25		# 8 cycles from $25 load	\n\
319								\n\
320	stq $23,8($17)						\n\
321	xor $25,$27,$27		# 8 cycles from $27 load	\n\
322	ldq $4,24($20)						\n\
323	xor $28,$0,$0		# 7 cycles from $0 load		\n\
324								\n\
325	ldq $5,24($21)						\n\
326	xor $27,$0,$0						\n\
327	ldq $6,32($17)						\n\
328	ldq $7,32($18)						\n\
329								\n\
330	stq $0,16($17)						\n\
331	xor $1,$2,$2		# 6 cycles from $2 load		\n\
332	ldq $22,32($19)						\n\
333	xor $3,$4,$4		# 4 cycles from $4 load		\n\
334								\n\
335	ldq $23,32($20)						\n\
336	xor $2,$4,$4						\n\
337	ldq $24,32($21)						\n\
338	ldq $25,40($17)						\n\
339								\n\
340	ldq $27,40($18)						\n\
341	ldq $28,40($19)						\n\
342	ldq $0,40($20)						\n\
343	xor $4,$5,$5		# 7 cycles from $5 load		\n\
344								\n\
345	stq $5,24($17)						\n\
346	xor $6,$7,$7		# 7 cycles from $7 load		\n\
347	ldq $1,40($21)						\n\
348	ldq $2,48($17)						\n\
349								\n\
350	ldq $3,48($18)						\n\
351	xor $7,$22,$22		# 7 cycles from $22 load	\n\
352	ldq $4,48($19)						\n\
353	xor $23,$24,$24		# 6 cycles from $24 load	\n\
354								\n\
355	ldq $5,48($20)						\n\
356	xor $22,$24,$24						\n\
357	ldq $6,48($21)						\n\
358	xor $25,$27,$27		# 7 cycles from $27 load	\n\
359								\n\
360	stq $24,32($17)						\n\
361	xor $27,$28,$28		# 8 cycles from $28 load	\n\
362	ldq $7,56($17)						\n\
363	xor $0,$1,$1		# 6 cycles from $1 load		\n\
364								\n\
365	ldq $22,56($18)						\n\
366	ldq $23,56($19)						\n\
367	ldq $24,56($20)						\n\
368	ldq $25,56($21)						\n\
369								\n\
370	xor $28,$1,$1						\n\
371	xor $2,$3,$3		# 9 cycles from $3 load		\n\
372	xor $3,$4,$4		# 9 cycles from $4 load		\n\
373	xor $5,$6,$6		# 8 cycles from $6 load		\n\
374								\n\
375	stq $1,40($17)						\n\
376	xor $4,$6,$6						\n\
377	xor $7,$22,$22		# 7 cycles from $22 load	\n\
378	xor $23,$24,$24		# 6 cycles from $24 load	\n\
379								\n\
380	stq $6,48($17)						\n\
381	xor $22,$24,$24						\n\
382	subq $16,1,$16						\n\
383	xor $24,$25,$25		# 8 cycles from $25 load	\n\
384								\n\
385	stq $25,56($17)						\n\
386	addq $21,64,$21						\n\
387	addq $20,64,$20						\n\
388	addq $19,64,$19						\n\
389								\n\
390	addq $18,64,$18						\n\
391	addq $17,64,$17						\n\
392	bgt $16,5b						\n\
393	ret							\n\
394	.end xor_alpha_5					\n\
395								\n\
396	.align 3						\n\
397	.ent xor_alpha_prefetch_2				\n\
398xor_alpha_prefetch_2:						\n\
399	.prologue 0						\n\
400	srl $16, 6, $16						\n\
401								\n\
402	ldq $31, 0($17)						\n\
403	ldq $31, 0($18)						\n\
404								\n\
405	ldq $31, 64($17)					\n\
406	ldq $31, 64($18)					\n\
407								\n\
408	ldq $31, 128($17)					\n\
409	ldq $31, 128($18)					\n\
410								\n\
411	ldq $31, 192($17)					\n\
412	ldq $31, 192($18)					\n\
413	.align 4						\n\
4142:								\n\
415	ldq $0,0($17)						\n\
416	ldq $1,0($18)						\n\
417	ldq $2,8($17)						\n\
418	ldq $3,8($18)						\n\
419								\n\
420	ldq $4,16($17)						\n\
421	ldq $5,16($18)						\n\
422	ldq $6,24($17)						\n\
423	ldq $7,24($18)						\n\
424								\n\
425	ldq $19,32($17)						\n\
426	ldq $20,32($18)						\n\
427	ldq $21,40($17)						\n\
428	ldq $22,40($18)						\n\
429								\n\
430	ldq $23,48($17)						\n\
431	ldq $24,48($18)						\n\
432	ldq $25,56($17)						\n\
433	ldq $27,56($18)						\n\
434								\n\
435	ldq $31,256($17)					\n\
436	xor $0,$1,$0		# 8 cycles from $1 load		\n\
437	ldq $31,256($18)					\n\
438	xor $2,$3,$2						\n\
439								\n\
440	stq $0,0($17)						\n\
441	xor $4,$5,$4						\n\
442	stq $2,8($17)						\n\
443	xor $6,$7,$6						\n\
444								\n\
445	stq $4,16($17)						\n\
446	xor $19,$20,$19						\n\
447	stq $6,24($17)						\n\
448	xor $21,$22,$21						\n\
449								\n\
450	stq $19,32($17)						\n\
451	xor $23,$24,$23						\n\
452	stq $21,40($17)						\n\
453	xor $25,$27,$25						\n\
454								\n\
455	stq $23,48($17)						\n\
456	subq $16,1,$16						\n\
457	stq $25,56($17)						\n\
458	addq $17,64,$17						\n\
459								\n\
460	addq $18,64,$18						\n\
461	bgt $16,2b						\n\
462	ret							\n\
463	.end xor_alpha_prefetch_2				\n\
464								\n\
465	.align 3						\n\
466	.ent xor_alpha_prefetch_3				\n\
467xor_alpha_prefetch_3:						\n\
468	.prologue 0						\n\
469	srl $16, 6, $16						\n\
470								\n\
471	ldq $31, 0($17)						\n\
472	ldq $31, 0($18)						\n\
473	ldq $31, 0($19)						\n\
474								\n\
475	ldq $31, 64($17)					\n\
476	ldq $31, 64($18)					\n\
477	ldq $31, 64($19)					\n\
478								\n\
479	ldq $31, 128($17)					\n\
480	ldq $31, 128($18)					\n\
481	ldq $31, 128($19)					\n\
482								\n\
483	ldq $31, 192($17)					\n\
484	ldq $31, 192($18)					\n\
485	ldq $31, 192($19)					\n\
486	.align 4						\n\
4873:								\n\
488	ldq $0,0($17)						\n\
489	ldq $1,0($18)						\n\
490	ldq $2,0($19)						\n\
491	ldq $3,8($17)						\n\
492								\n\
493	ldq $4,8($18)						\n\
494	ldq $6,16($17)						\n\
495	ldq $7,16($18)						\n\
496	ldq $21,24($17)						\n\
497								\n\
498	ldq $22,24($18)						\n\
499	ldq $24,32($17)						\n\
500	ldq $25,32($18)						\n\
501	ldq $5,8($19)						\n\
502								\n\
503	ldq $20,16($19)						\n\
504	ldq $23,24($19)						\n\
505	ldq $27,32($19)						\n\
506	nop							\n\
507								\n\
508	xor $0,$1,$1		# 8 cycles from $0 load		\n\
509	xor $3,$4,$4		# 7 cycles from $4 load		\n\
510	xor $6,$7,$7		# 6 cycles from $7 load		\n\
511	xor $21,$22,$22		# 5 cycles from $22 load	\n\
512								\n\
513	xor $1,$2,$2		# 9 cycles from $2 load		\n\
514	xor $24,$25,$25		# 5 cycles from $25 load	\n\
515	stq $2,0($17)						\n\
516	xor $4,$5,$5		# 6 cycles from $5 load		\n\
517								\n\
518	stq $5,8($17)						\n\
519	xor $7,$20,$20		# 7 cycles from $20 load	\n\
520	stq $20,16($17)						\n\
521	xor $22,$23,$23		# 7 cycles from $23 load	\n\
522								\n\
523	stq $23,24($17)						\n\
524	xor $25,$27,$27		# 7 cycles from $27 load	\n\
525	stq $27,32($17)						\n\
526	nop							\n\
527								\n\
528	ldq $0,40($17)						\n\
529	ldq $1,40($18)						\n\
530	ldq $3,48($17)						\n\
531	ldq $4,48($18)						\n\
532								\n\
533	ldq $6,56($17)						\n\
534	ldq $7,56($18)						\n\
535	ldq $2,40($19)						\n\
536	ldq $5,48($19)						\n\
537								\n\
538	ldq $20,56($19)						\n\
539	ldq $31,256($17)					\n\
540	ldq $31,256($18)					\n\
541	ldq $31,256($19)					\n\
542								\n\
543	xor $0,$1,$1		# 6 cycles from $1 load		\n\
544	xor $3,$4,$4		# 5 cycles from $4 load		\n\
545	xor $6,$7,$7		# 5 cycles from $7 load		\n\
546	xor $1,$2,$2		# 4 cycles from $2 load		\n\
547								\n\
548	xor $4,$5,$5		# 5 cycles from $5 load		\n\
549	xor $7,$20,$20		# 4 cycles from $20 load	\n\
550	stq $2,40($17)						\n\
551	subq $16,1,$16						\n\
552								\n\
553	stq $5,48($17)						\n\
554	addq $19,64,$19						\n\
555	stq $20,56($17)						\n\
556	addq $18,64,$18						\n\
557								\n\
558	addq $17,64,$17						\n\
559	bgt $16,3b						\n\
560	ret							\n\
561	.end xor_alpha_prefetch_3				\n\
562								\n\
563	.align 3						\n\
564	.ent xor_alpha_prefetch_4				\n\
565xor_alpha_prefetch_4:						\n\
566	.prologue 0						\n\
567	srl $16, 6, $16						\n\
568								\n\
569	ldq $31, 0($17)						\n\
570	ldq $31, 0($18)						\n\
571	ldq $31, 0($19)						\n\
572	ldq $31, 0($20)						\n\
573								\n\
574	ldq $31, 64($17)					\n\
575	ldq $31, 64($18)					\n\
576	ldq $31, 64($19)					\n\
577	ldq $31, 64($20)					\n\
578								\n\
579	ldq $31, 128($17)					\n\
580	ldq $31, 128($18)					\n\
581	ldq $31, 128($19)					\n\
582	ldq $31, 128($20)					\n\
583								\n\
584	ldq $31, 192($17)					\n\
585	ldq $31, 192($18)					\n\
586	ldq $31, 192($19)					\n\
587	ldq $31, 192($20)					\n\
588	.align 4						\n\
5894:								\n\
590	ldq $0,0($17)						\n\
591	ldq $1,0($18)						\n\
592	ldq $2,0($19)						\n\
593	ldq $3,0($20)						\n\
594								\n\
595	ldq $4,8($17)						\n\
596	ldq $5,8($18)						\n\
597	ldq $6,8($19)						\n\
598	ldq $7,8($20)						\n\
599								\n\
600	ldq $21,16($17)						\n\
601	ldq $22,16($18)						\n\
602	ldq $23,16($19)						\n\
603	ldq $24,16($20)						\n\
604								\n\
605	ldq $25,24($17)						\n\
606	xor $0,$1,$1		# 6 cycles from $1 load		\n\
607	ldq $27,24($18)						\n\
608	xor $2,$3,$3		# 6 cycles from $3 load		\n\
609								\n\
610	ldq $0,24($19)						\n\
611	xor $1,$3,$3						\n\
612	ldq $1,24($20)						\n\
613	xor $4,$5,$5		# 7 cycles from $5 load		\n\
614								\n\
615	stq $3,0($17)						\n\
616	xor $6,$7,$7						\n\
617	xor $21,$22,$22		# 7 cycles from $22 load	\n\
618	xor $5,$7,$7						\n\
619								\n\
620	stq $7,8($17)						\n\
621	xor $23,$24,$24		# 7 cycles from $24 load	\n\
622	ldq $2,32($17)						\n\
623	xor $22,$24,$24						\n\
624								\n\
625	ldq $3,32($18)						\n\
626	ldq $4,32($19)						\n\
627	ldq $5,32($20)						\n\
628	xor $25,$27,$27		# 8 cycles from $27 load	\n\
629								\n\
630	ldq $6,40($17)						\n\
631	ldq $7,40($18)						\n\
632	ldq $21,40($19)						\n\
633	ldq $22,40($20)						\n\
634								\n\
635	stq $24,16($17)						\n\
636	xor $0,$1,$1		# 9 cycles from $1 load		\n\
637	xor $2,$3,$3		# 5 cycles from $3 load		\n\
638	xor $27,$1,$1						\n\
639								\n\
640	stq $1,24($17)						\n\
641	xor $4,$5,$5		# 5 cycles from $5 load		\n\
642	ldq $23,48($17)						\n\
643	xor $3,$5,$5						\n\
644								\n\
645	ldq $24,48($18)						\n\
646	ldq $25,48($19)						\n\
647	ldq $27,48($20)						\n\
648	ldq $0,56($17)						\n\
649								\n\
650	ldq $1,56($18)						\n\
651	ldq $2,56($19)						\n\
652	ldq $3,56($20)						\n\
653	xor $6,$7,$7		# 8 cycles from $6 load		\n\
654								\n\
655	ldq $31,256($17)					\n\
656	xor $21,$22,$22		# 8 cycles from $22 load	\n\
657	ldq $31,256($18)					\n\
658	xor $7,$22,$22						\n\
659								\n\
660	ldq $31,256($19)					\n\
661	xor $23,$24,$24		# 6 cycles from $24 load	\n\
662	ldq $31,256($20)					\n\
663	xor $25,$27,$27		# 6 cycles from $27 load	\n\
664								\n\
665	stq $5,32($17)						\n\
666	xor $24,$27,$27						\n\
667	xor $0,$1,$1		# 7 cycles from $1 load		\n\
668	xor $2,$3,$3		# 6 cycles from $3 load		\n\
669								\n\
670	stq $22,40($17)						\n\
671	xor $1,$3,$3						\n\
672	stq $27,48($17)						\n\
673	subq $16,1,$16						\n\
674								\n\
675	stq $3,56($17)						\n\
676	addq $20,64,$20						\n\
677	addq $19,64,$19						\n\
678	addq $18,64,$18						\n\
679								\n\
680	addq $17,64,$17						\n\
681	bgt $16,4b						\n\
682	ret							\n\
683	.end xor_alpha_prefetch_4				\n\
684								\n\
685	.align 3						\n\
686	.ent xor_alpha_prefetch_5				\n\
687xor_alpha_prefetch_5:						\n\
688	.prologue 0						\n\
689	srl $16, 6, $16						\n\
690								\n\
691	ldq $31, 0($17)						\n\
692	ldq $31, 0($18)						\n\
693	ldq $31, 0($19)						\n\
694	ldq $31, 0($20)						\n\
695	ldq $31, 0($21)						\n\
696								\n\
697	ldq $31, 64($17)					\n\
698	ldq $31, 64($18)					\n\
699	ldq $31, 64($19)					\n\
700	ldq $31, 64($20)					\n\
701	ldq $31, 64($21)					\n\
702								\n\
703	ldq $31, 128($17)					\n\
704	ldq $31, 128($18)					\n\
705	ldq $31, 128($19)					\n\
706	ldq $31, 128($20)					\n\
707	ldq $31, 128($21)					\n\
708								\n\
709	ldq $31, 192($17)					\n\
710	ldq $31, 192($18)					\n\
711	ldq $31, 192($19)					\n\
712	ldq $31, 192($20)					\n\
713	ldq $31, 192($21)					\n\
714	.align 4						\n\
7155:								\n\
716	ldq $0,0($17)						\n\
717	ldq $1,0($18)						\n\
718	ldq $2,0($19)						\n\
719	ldq $3,0($20)						\n\
720								\n\
721	ldq $4,0($21)						\n\
722	ldq $5,8($17)						\n\
723	ldq $6,8($18)						\n\
724	ldq $7,8($19)						\n\
725								\n\
726	ldq $22,8($20)						\n\
727	ldq $23,8($21)						\n\
728	ldq $24,16($17)						\n\
729	ldq $25,16($18)						\n\
730								\n\
731	ldq $27,16($19)						\n\
732	xor $0,$1,$1		# 6 cycles from $1 load		\n\
733	ldq $28,16($20)						\n\
734	xor $2,$3,$3		# 6 cycles from $3 load		\n\
735								\n\
736	ldq $0,16($21)						\n\
737	xor $1,$3,$3						\n\
738	ldq $1,24($17)						\n\
739	xor $3,$4,$4		# 7 cycles from $4 load		\n\
740								\n\
741	stq $4,0($17)						\n\
742	xor $5,$6,$6		# 7 cycles from $6 load		\n\
743	xor $7,$22,$22		# 7 cycles from $22 load	\n\
744	xor $6,$23,$23		# 7 cycles from $23 load	\n\
745								\n\
746	ldq $2,24($18)						\n\
747	xor $22,$23,$23						\n\
748	ldq $3,24($19)						\n\
749	xor $24,$25,$25		# 8 cycles from $25 load	\n\
750								\n\
751	stq $23,8($17)						\n\
752	xor $25,$27,$27		# 8 cycles from $27 load	\n\
753	ldq $4,24($20)						\n\
754	xor $28,$0,$0		# 7 cycles from $0 load		\n\
755								\n\
756	ldq $5,24($21)						\n\
757	xor $27,$0,$0						\n\
758	ldq $6,32($17)						\n\
759	ldq $7,32($18)						\n\
760								\n\
761	stq $0,16($17)						\n\
762	xor $1,$2,$2		# 6 cycles from $2 load		\n\
763	ldq $22,32($19)						\n\
764	xor $3,$4,$4		# 4 cycles from $4 load		\n\
765								\n\
766	ldq $23,32($20)						\n\
767	xor $2,$4,$4						\n\
768	ldq $24,32($21)						\n\
769	ldq $25,40($17)						\n\
770								\n\
771	ldq $27,40($18)						\n\
772	ldq $28,40($19)						\n\
773	ldq $0,40($20)						\n\
774	xor $4,$5,$5		# 7 cycles from $5 load		\n\
775								\n\
776	stq $5,24($17)						\n\
777	xor $6,$7,$7		# 7 cycles from $7 load		\n\
778	ldq $1,40($21)						\n\
779	ldq $2,48($17)						\n\
780								\n\
781	ldq $3,48($18)						\n\
782	xor $7,$22,$22		# 7 cycles from $22 load	\n\
783	ldq $4,48($19)						\n\
784	xor $23,$24,$24		# 6 cycles from $24 load	\n\
785								\n\
786	ldq $5,48($20)						\n\
787	xor $22,$24,$24						\n\
788	ldq $6,48($21)						\n\
789	xor $25,$27,$27		# 7 cycles from $27 load	\n\
790								\n\
791	stq $24,32($17)						\n\
792	xor $27,$28,$28		# 8 cycles from $28 load	\n\
793	ldq $7,56($17)						\n\
794	xor $0,$1,$1		# 6 cycles from $1 load		\n\
795								\n\
796	ldq $22,56($18)						\n\
797	ldq $23,56($19)						\n\
798	ldq $24,56($20)						\n\
799	ldq $25,56($21)						\n\
800								\n\
801	ldq $31,256($17)					\n\
802	xor $28,$1,$1						\n\
803	ldq $31,256($18)					\n\
804	xor $2,$3,$3		# 9 cycles from $3 load		\n\
805								\n\
806	ldq $31,256($19)					\n\
807	xor $3,$4,$4		# 9 cycles from $4 load		\n\
808	ldq $31,256($20)					\n\
809	xor $5,$6,$6		# 8 cycles from $6 load		\n\
810								\n\
811	stq $1,40($17)						\n\
812	xor $4,$6,$6						\n\
813	xor $7,$22,$22		# 7 cycles from $22 load	\n\
814	xor $23,$24,$24		# 6 cycles from $24 load	\n\
815								\n\
816	stq $6,48($17)						\n\
817	xor $22,$24,$24						\n\
818	ldq $31,256($21)					\n\
819	xor $24,$25,$25		# 8 cycles from $25 load	\n\
820								\n\
821	stq $25,56($17)						\n\
822	subq $16,1,$16						\n\
823	addq $21,64,$21						\n\
824	addq $20,64,$20						\n\
825								\n\
826	addq $19,64,$19						\n\
827	addq $18,64,$18						\n\
828	addq $17,64,$17						\n\
829	bgt $16,5b						\n\
830								\n\
831	ret							\n\
832	.end xor_alpha_prefetch_5				\n\
833");
834
835static struct xor_block_template xor_block_alpha = {
836	.name	= "alpha",
837	.do_2	= xor_alpha_2,
838	.do_3	= xor_alpha_3,
839	.do_4	= xor_alpha_4,
840	.do_5	= xor_alpha_5,
841};
842
843static struct xor_block_template xor_block_alpha_prefetch = {
844	.name	= "alpha prefetch",
845	.do_2	= xor_alpha_prefetch_2,
846	.do_3	= xor_alpha_prefetch_3,
847	.do_4	= xor_alpha_prefetch_4,
848	.do_5	= xor_alpha_prefetch_5,
849};
850
851/* For grins, also test the generic routines.  */
852#include <asm-generic/xor.h>
853
854#undef XOR_TRY_TEMPLATES
855#define XOR_TRY_TEMPLATES				\
856	do {						\
857		xor_speed(&xor_block_8regs);		\
858		xor_speed(&xor_block_32regs);		\
859		xor_speed(&xor_block_alpha);		\
860		xor_speed(&xor_block_alpha_prefetch);	\
861	} while (0)
862
863/* Force the use of alpha_prefetch if EV6, as it is significantly
864   faster in the cold cache case.  */
865#define XOR_SELECT_TEMPLATE(FASTEST) \
866	(implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)
867