1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * This file contains assembly-language implementations
4  * of IP-style 1's complement checksum routines.
5  *
6  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7  *
8  * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
9  */
10 
11 #include <linux/export.h>
12 #include <linux/sys.h>
13 #include <asm/processor.h>
14 #include <asm/cache.h>
15 #include <asm/errno.h>
16 #include <asm/ppc_asm.h>
17 
18 	.text
19 
20 /*
21  * computes the checksum of a memory block at buff, length len,
22  * and adds in "sum" (32-bit)
23  *
24  * __csum_partial(buff, len, sum)
25  */
26 _GLOBAL(__csum_partial)
27 	subi	r3,r3,4
28 	srawi.	r6,r4,2		/* Divide len by 4 and also clear carry */
29 	beq	3f		/* if we're doing < 4 bytes */
30 	andi.	r0,r3,2		/* Align buffer to longword boundary */
31 	beq+	1f
32 	lhz	r0,4(r3)	/* do 2 bytes to get aligned */
33 	subi	r4,r4,2
34 	addi	r3,r3,2
35 	srwi.	r6,r4,2		/* # words to do */
36 	adde	r5,r5,r0
37 	beq	3f
38 1:	andi.	r6,r6,3		/* Prepare to handle words 4 by 4 */
39 	beq	21f
40 	mtctr	r6
41 2:	lwzu	r0,4(r3)
42 	adde	r5,r5,r0
43 	bdnz	2b
44 21:	srwi.	r6,r4,4		/* # blocks of 4 words to do */
45 	beq	3f
46 	lwz	r0,4(r3)
47 	mtctr	r6
48 	lwz	r6,8(r3)
49 	adde	r5,r5,r0
50 	lwz	r7,12(r3)
51 	adde	r5,r5,r6
52 	lwzu	r8,16(r3)
53 	adde	r5,r5,r7
54 	bdz	23f
55 22:	lwz	r0,4(r3)
56 	adde	r5,r5,r8
57 	lwz	r6,8(r3)
58 	adde	r5,r5,r0
59 	lwz	r7,12(r3)
60 	adde	r5,r5,r6
61 	lwzu	r8,16(r3)
62 	adde	r5,r5,r7
63 	bdnz	22b
64 23:	adde	r5,r5,r8
65 3:	andi.	r0,r4,2
66 	beq+	4f
67 	lhz	r0,4(r3)
68 	addi	r3,r3,2
69 	adde	r5,r5,r0
70 4:	andi.	r0,r4,1
71 	beq+	5f
72 	lbz	r0,4(r3)
73 	slwi	r0,r0,8		/* Upper byte of word */
74 	adde	r5,r5,r0
75 5:	addze	r3,r5		/* add in final carry */
76 	blr
77 EXPORT_SYMBOL(__csum_partial)
78 
79 /*
80  * Computes the checksum of a memory block at src, length len,
81  * and adds in 0xffffffff, while copying the block to dst.
82  * If an access exception occurs it returns zero.
83  *
84  * csum_partial_copy_generic(src, dst, len)
85  */
86 #define CSUM_COPY_16_BYTES_WITHEX(n)	\
87 8 ## n ## 0:			\
88 	lwz	r7,4(r4);	\
89 8 ## n ## 1:			\
90 	lwz	r8,8(r4);	\
91 8 ## n ## 2:			\
92 	lwz	r9,12(r4);	\
93 8 ## n ## 3:			\
94 	lwzu	r10,16(r4);	\
95 8 ## n ## 4:			\
96 	stw	r7,4(r6);	\
97 	adde	r12,r12,r7;	\
98 8 ## n ## 5:			\
99 	stw	r8,8(r6);	\
100 	adde	r12,r12,r8;	\
101 8 ## n ## 6:			\
102 	stw	r9,12(r6);	\
103 	adde	r12,r12,r9;	\
104 8 ## n ## 7:			\
105 	stwu	r10,16(r6);	\
106 	adde	r12,r12,r10
107 
108 #define CSUM_COPY_16_BYTES_EXCODE(n)		\
109 	EX_TABLE(8 ## n ## 0b, fault);	\
110 	EX_TABLE(8 ## n ## 1b, fault);	\
111 	EX_TABLE(8 ## n ## 2b, fault);	\
112 	EX_TABLE(8 ## n ## 3b, fault);	\
113 	EX_TABLE(8 ## n ## 4b, fault);	\
114 	EX_TABLE(8 ## n ## 5b, fault);	\
115 	EX_TABLE(8 ## n ## 6b, fault);	\
116 	EX_TABLE(8 ## n ## 7b, fault);
117 
118 	.text
119 
120 CACHELINE_BYTES = L1_CACHE_BYTES
121 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
122 CACHELINE_MASK = (L1_CACHE_BYTES-1)
123 
124 _GLOBAL(csum_partial_copy_generic)
125 	li	r12,-1
126 	addic	r0,r0,0			/* clear carry */
127 	addi	r6,r4,-4
128 	neg	r0,r4
129 	addi	r4,r3,-4
130 	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
131 	crset	4*cr7+eq
132 	beq	58f
133 
134 	cmplw	0,r5,r0			/* is this more than total to do? */
135 	blt	63f			/* if not much to do */
136 	rlwinm	r7,r6,3,0x8
137 	rlwnm	r12,r12,r7,0,31	/* odd destination address: rotate one byte */
138 	cmplwi	cr7,r7,0	/* is destination address even ? */
139 	andi.	r8,r0,3			/* get it word-aligned first */
140 	mtctr	r8
141 	beq+	61f
142 	li	r3,0
143 70:	lbz	r9,4(r4)		/* do some bytes */
144 	addi	r4,r4,1
145 	slwi	r3,r3,8
146 	rlwimi	r3,r9,0,24,31
147 71:	stb	r9,4(r6)
148 	addi	r6,r6,1
149 	bdnz	70b
150 	adde	r12,r12,r3
151 61:	subf	r5,r0,r5
152 	srwi.	r0,r0,2
153 	mtctr	r0
154 	beq	58f
155 72:	lwzu	r9,4(r4)		/* do some words */
156 	adde	r12,r12,r9
157 73:	stwu	r9,4(r6)
158 	bdnz	72b
159 
160 58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
161 	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
162 	li	r11,4
163 	beq	63f
164 
165 	/* Here we decide how far ahead to prefetch the source */
166 	li	r3,4
167 	cmpwi	r0,1
168 	li	r7,0
169 	ble	114f
170 	li	r7,1
171 #if MAX_COPY_PREFETCH > 1
172 	/* Heuristically, for large transfers we prefetch
173 	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
174 	   we prefetch 1 cacheline ahead. */
175 	cmpwi	r0,MAX_COPY_PREFETCH
176 	ble	112f
177 	li	r7,MAX_COPY_PREFETCH
178 112:	mtctr	r7
179 111:	dcbt	r3,r4
180 	addi	r3,r3,CACHELINE_BYTES
181 	bdnz	111b
182 #else
183 	dcbt	r3,r4
184 	addi	r3,r3,CACHELINE_BYTES
185 #endif /* MAX_COPY_PREFETCH > 1 */
186 
187 114:	subf	r8,r7,r0
188 	mr	r0,r7
189 	mtctr	r8
190 
191 53:	dcbt	r3,r4
192 54:	dcbz	r11,r6
193 /* the main body of the cacheline loop */
194 	CSUM_COPY_16_BYTES_WITHEX(0)
195 #if L1_CACHE_BYTES >= 32
196 	CSUM_COPY_16_BYTES_WITHEX(1)
197 #if L1_CACHE_BYTES >= 64
198 	CSUM_COPY_16_BYTES_WITHEX(2)
199 	CSUM_COPY_16_BYTES_WITHEX(3)
200 #if L1_CACHE_BYTES >= 128
201 	CSUM_COPY_16_BYTES_WITHEX(4)
202 	CSUM_COPY_16_BYTES_WITHEX(5)
203 	CSUM_COPY_16_BYTES_WITHEX(6)
204 	CSUM_COPY_16_BYTES_WITHEX(7)
205 #endif
206 #endif
207 #endif
208 	bdnz	53b
209 	cmpwi	r0,0
210 	li	r3,4
211 	li	r7,0
212 	bne	114b
213 
214 63:	srwi.	r0,r5,2
215 	mtctr	r0
216 	beq	64f
217 30:	lwzu	r0,4(r4)
218 	adde	r12,r12,r0
219 31:	stwu	r0,4(r6)
220 	bdnz	30b
221 
222 64:	andi.	r0,r5,2
223 	beq+	65f
224 40:	lhz	r0,4(r4)
225 	addi	r4,r4,2
226 41:	sth	r0,4(r6)
227 	adde	r12,r12,r0
228 	addi	r6,r6,2
229 65:	andi.	r0,r5,1
230 	beq+	66f
231 50:	lbz	r0,4(r4)
232 51:	stb	r0,4(r6)
233 	slwi	r0,r0,8
234 	adde	r12,r12,r0
235 66:	addze	r3,r12
236 	beqlr+	cr7
237 	rlwinm	r3,r3,8,0,31	/* odd destination address: rotate one byte */
238 	blr
239 
240 fault:
241 	li	r3,0
242 	blr
243 
244 	EX_TABLE(70b, fault);
245 	EX_TABLE(71b, fault);
246 	EX_TABLE(72b, fault);
247 	EX_TABLE(73b, fault);
248 	EX_TABLE(54b, fault);
249 
250 /*
251  * this stuff handles faults in the cacheline loop and branches to either
252  * fault (if in read part) or fault (if in write part)
253  */
254 	CSUM_COPY_16_BYTES_EXCODE(0)
255 #if L1_CACHE_BYTES >= 32
256 	CSUM_COPY_16_BYTES_EXCODE(1)
257 #if L1_CACHE_BYTES >= 64
258 	CSUM_COPY_16_BYTES_EXCODE(2)
259 	CSUM_COPY_16_BYTES_EXCODE(3)
260 #if L1_CACHE_BYTES >= 128
261 	CSUM_COPY_16_BYTES_EXCODE(4)
262 	CSUM_COPY_16_BYTES_EXCODE(5)
263 	CSUM_COPY_16_BYTES_EXCODE(6)
264 	CSUM_COPY_16_BYTES_EXCODE(7)
265 #endif
266 #endif
267 #endif
268 
269 	EX_TABLE(30b, fault);
270 	EX_TABLE(31b, fault);
271 	EX_TABLE(40b, fault);
272 	EX_TABLE(41b, fault);
273 	EX_TABLE(50b, fault);
274 	EX_TABLE(51b, fault);
275 
276 EXPORT_SYMBOL(csum_partial_copy_generic)
277 
278 /*
279  * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
280  *			   const struct in6_addr *daddr,
281  *			   __u32 len, __u8 proto, __wsum sum)
282  */
283 
284 _GLOBAL(csum_ipv6_magic)
285 	lwz	r8, 0(r3)
286 	lwz	r9, 4(r3)
287 	addc	r0, r7, r8
288 	lwz	r10, 8(r3)
289 	adde	r0, r0, r9
290 	lwz	r11, 12(r3)
291 	adde	r0, r0, r10
292 	lwz	r8, 0(r4)
293 	adde	r0, r0, r11
294 	lwz	r9, 4(r4)
295 	adde	r0, r0, r8
296 	lwz	r10, 8(r4)
297 	adde	r0, r0, r9
298 	lwz	r11, 12(r4)
299 	adde	r0, r0, r10
300 	add	r5, r5, r6	/* assumption: len + proto doesn't carry */
301 	adde	r0, r0, r11
302 	adde	r0, r0, r5
303 	addze	r0, r0
304 	rotlwi	r3, r0, 16
305 	add	r3, r0, r3
306 	not	r3, r3
307 	rlwinm	r3, r3, 16, 16, 31
308 	blr
309 EXPORT_SYMBOL(csum_ipv6_magic)
310