1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Optmized version of the ip_fast_csum() function
4  * Used for calculating IP header checksum
5  *
6  * Return: 16bit checksum, complemented
7  *
8  * Inputs:
9  *      in0: address of buffer to checksum (char *)
10  *      in1: length of the buffer (int)
11  *
12  * Copyright (C) 2002, 2006 Intel Corp.
13  * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
14  */
15 
16 #include <linux/export.h>
17 #include <asm/asmmacro.h>
18 
19 /*
20  * Since we know that most likely this function is called with buf aligned
21  * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
22  * versus calling generic version of do_csum, which has lots of overhead in
23  * handling various alignments and sizes.  However, due to lack of constrains
24  * put on the function input argument, cases with alignment not on 4-byte or
25  * size not equal to 20 bytes will be handled by the generic do_csum function.
26  */
27 
28 #define in0	r32
29 #define in1	r33
30 #define in2	r34
31 #define in3	r35
32 #define in4	r36
33 #define ret0	r8
34 
35 GLOBAL_ENTRY(ip_fast_csum)
36 	.prologue
37 	.body
38 	cmp.ne	p6,p7=5,in1	// size other than 20 byte?
39 	and	r14=3,in0	// is it aligned on 4-byte?
40 	add	r15=4,in0	// second source pointer
41 	;;
42 	cmp.ne.or.andcm p6,p7=r14,r0
43 	;;
44 (p7)	ld4	r20=[in0],8
45 (p7)	ld4	r21=[r15],8
46 (p6)	br.spnt	.generic
47 	;;
48 	ld4	r22=[in0],8
49 	ld4	r23=[r15],8
50 	;;
51 	ld4	r24=[in0]
52 	add	r20=r20,r21
53 	add	r22=r22,r23
54 	;;
55 	add	r20=r20,r22
56 	;;
57 	add	r20=r20,r24
58 	;;
59 	shr.u	ret0=r20,16	// now need to add the carry
60 	zxt2	r20=r20
61 	;;
62 	add	r20=ret0,r20
63 	;;
64 	shr.u	ret0=r20,16	// add carry again
65 	zxt2	r20=r20
66 	;;
67 	add	r20=ret0,r20
68 	;;
69 	shr.u	ret0=r20,16
70 	zxt2	r20=r20
71 	;;
72 	add	r20=ret0,r20
73 	mov	r9=0xffff
74 	;;
75 	andcm	ret0=r9,r20
76 	.restore sp		// reset frame state
77 	br.ret.sptk.many b0
78 	;;
79 
80 .generic:
81 	.prologue
82 	.save ar.pfs, r35
83 	alloc	r35=ar.pfs,2,2,2,0
84 	.save rp, r34
85 	mov	r34=b0
86 	.body
87 	dep.z	out1=in1,2,30
88 	mov	out0=in0
89 	;;
90 	br.call.sptk.many b0=do_csum
91 	;;
92 	andcm	ret0=-1,ret0
93 	mov	ar.pfs=r35
94 	mov	b0=r34
95 	br.ret.sptk.many b0
96 END(ip_fast_csum)
97 EXPORT_SYMBOL(ip_fast_csum)
98 
99 GLOBAL_ENTRY(csum_ipv6_magic)
100 	ld4	r20=[in0],4
101 	ld4	r21=[in1],4
102 	zxt4	in2=in2
103 	;;
104 	ld4	r22=[in0],4
105 	ld4	r23=[in1],4
106 	dep	r15=in3,in2,32,16
107 	;;
108 	ld4	r24=[in0],4
109 	ld4	r25=[in1],4
110 	mux1	r15=r15,@rev
111 	add	r16=r20,r21
112 	add	r17=r22,r23
113 	zxt4	in4=in4
114 	;;
115 	ld4	r26=[in0],4
116 	ld4	r27=[in1],4
117 	shr.u	r15=r15,16
118 	add	r18=r24,r25
119 	add	r8=r16,r17
120 	;;
121 	add	r19=r26,r27
122 	add	r8=r8,r18
123 	;;
124 	add	r8=r8,r19
125 	add	r15=r15,in4
126 	;;
127 	add	r8=r8,r15
128 	;;
129 	shr.u	r10=r8,32	// now fold sum into short
130 	zxt4	r11=r8
131 	;;
132 	add	r8=r10,r11
133 	;;
134 	shr.u	r10=r8,16	// yeah, keep it rolling
135 	zxt2	r11=r8
136 	;;
137 	add	r8=r10,r11
138 	;;
139 	shr.u	r10=r8,16	// three times lucky
140 	zxt2	r11=r8
141 	;;
142 	add	r8=r10,r11
143 	mov	r9=0xffff
144 	;;
145 	andcm	r8=r9,r8
146 	br.ret.sptk.many b0
147 END(csum_ipv6_magic)
148 EXPORT_SYMBOL(csum_ipv6_magic)
149