1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * INET		An implementation of the TCP/IP protocol suite for the LINUX
4  *		operating system.  INET is implemented using the  BSD Socket
5  *		interface as the means of communication with the user level.
6  *
7  *		IP/TCP/UDP checksumming routines
8  *
9  * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
10  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11  *		Tom May, <ftom@netcom.com>
12  *              Pentium Pro/II routines:
13  *              Alexander Kjeldaas <astor@guardian.no>
14  *              Finn Arne Gangstad <finnag@guardian.no>
15  *		Lots of code moved from tcp.c and ip.c; see those files
16  *		for more names.
17  *
18  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
19  *			     handling.
20  *		Andi Kleen,  add zeroing on error
21  *                   converted to pure assembler
22  */
23 
24 #include <linux/linkage.h>
25 #include <asm/errno.h>
26 #include <asm/asm.h>
27 #include <asm/export.h>
28 #include <asm/nospec-branch.h>
29 
30 /*
31  * computes a partial checksum, e.g. for TCP/UDP fragments
32  */
33 
34 /*
35 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
36  */
37 
38 .text
39 
40 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
41 
42 	  /*
43 	   * Experiments with Ethernet and SLIP connections show that buff
44 	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
45 	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
46 	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
47 	   * alignment for the unrolled loop.
48 	   */
49 SYM_FUNC_START(csum_partial)
50 	pushl %esi
51 	pushl %ebx
52 	movl 20(%esp),%eax	# Function arg: unsigned int sum
53 	movl 16(%esp),%ecx	# Function arg: int len
54 	movl 12(%esp),%esi	# Function arg: unsigned char *buff
55 	testl $3, %esi		# Check alignment.
56 	jz 2f			# Jump if alignment is ok.
57 	testl $1, %esi		# Check alignment.
58 	jz 10f			# Jump if alignment is boundary of 2 bytes.
59 
60 	# buf is odd
61 	dec %ecx
62 	jl 8f
63 	movzbl (%esi), %ebx
64 	adcl %ebx, %eax
65 	roll $8, %eax
66 	inc %esi
67 	testl $2, %esi
68 	jz 2f
69 10:
70 	subl $2, %ecx		# Alignment uses up two bytes.
71 	jae 1f			# Jump if we had at least two bytes.
72 	addl $2, %ecx		# ecx was < 2.  Deal with it.
73 	jmp 4f
74 1:	movw (%esi), %bx
75 	addl $2, %esi
76 	addw %bx, %ax
77 	adcl $0, %eax
78 2:
79 	movl %ecx, %edx
80 	shrl $5, %ecx
81 	jz 2f
82 	testl %esi, %esi
83 1:	movl (%esi), %ebx
84 	adcl %ebx, %eax
85 	movl 4(%esi), %ebx
86 	adcl %ebx, %eax
87 	movl 8(%esi), %ebx
88 	adcl %ebx, %eax
89 	movl 12(%esi), %ebx
90 	adcl %ebx, %eax
91 	movl 16(%esi), %ebx
92 	adcl %ebx, %eax
93 	movl 20(%esi), %ebx
94 	adcl %ebx, %eax
95 	movl 24(%esi), %ebx
96 	adcl %ebx, %eax
97 	movl 28(%esi), %ebx
98 	adcl %ebx, %eax
99 	lea 32(%esi), %esi
100 	dec %ecx
101 	jne 1b
102 	adcl $0, %eax
103 2:	movl %edx, %ecx
104 	andl $0x1c, %edx
105 	je 4f
106 	shrl $2, %edx		# This clears CF
107 3:	adcl (%esi), %eax
108 	lea 4(%esi), %esi
109 	dec %edx
110 	jne 3b
111 	adcl $0, %eax
112 4:	andl $3, %ecx
113 	jz 7f
114 	cmpl $2, %ecx
115 	jb 5f
116 	movw (%esi),%cx
117 	leal 2(%esi),%esi
118 	je 6f
119 	shll $16,%ecx
120 5:	movb (%esi),%cl
121 6:	addl %ecx,%eax
122 	adcl $0, %eax
123 7:
124 	testb $1, 12(%esp)
125 	jz 8f
126 	roll $8, %eax
127 8:
128 	popl %ebx
129 	popl %esi
130 	RET
131 SYM_FUNC_END(csum_partial)
132 
133 #else
134 
135 /* Version for PentiumII/PPro */
136 
137 SYM_FUNC_START(csum_partial)
138 	pushl %esi
139 	pushl %ebx
140 	movl 20(%esp),%eax	# Function arg: unsigned int sum
141 	movl 16(%esp),%ecx	# Function arg: int len
142 	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
143 
144 	testl $3, %esi
145 	jnz 25f
146 10:
147 	movl %ecx, %edx
148 	movl %ecx, %ebx
149 	andl $0x7c, %ebx
150 	shrl $7, %ecx
151 	addl %ebx,%esi
152 	shrl $2, %ebx
153 	negl %ebx
154 	lea 45f(%ebx,%ebx,2), %ebx
155 	testl %esi, %esi
156 	JMP_NOSPEC ebx
157 
158 	# Handle 2-byte-aligned regions
159 20:	addw (%esi), %ax
160 	lea 2(%esi), %esi
161 	adcl $0, %eax
162 	jmp 10b
163 25:
164 	testl $1, %esi
165 	jz 30f
166 	# buf is odd
167 	dec %ecx
168 	jl 90f
169 	movzbl (%esi), %ebx
170 	addl %ebx, %eax
171 	adcl $0, %eax
172 	roll $8, %eax
173 	inc %esi
174 	testl $2, %esi
175 	jz 10b
176 
177 30:	subl $2, %ecx
178 	ja 20b
179 	je 32f
180 	addl $2, %ecx
181 	jz 80f
182 	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
183 	addl %ebx, %eax
184 	adcl $0, %eax
185 	jmp 80f
186 32:
187 	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
188 	adcl $0, %eax
189 	jmp 80f
190 
191 40:
192 	addl -128(%esi), %eax
193 	adcl -124(%esi), %eax
194 	adcl -120(%esi), %eax
195 	adcl -116(%esi), %eax
196 	adcl -112(%esi), %eax
197 	adcl -108(%esi), %eax
198 	adcl -104(%esi), %eax
199 	adcl -100(%esi), %eax
200 	adcl -96(%esi), %eax
201 	adcl -92(%esi), %eax
202 	adcl -88(%esi), %eax
203 	adcl -84(%esi), %eax
204 	adcl -80(%esi), %eax
205 	adcl -76(%esi), %eax
206 	adcl -72(%esi), %eax
207 	adcl -68(%esi), %eax
208 	adcl -64(%esi), %eax
209 	adcl -60(%esi), %eax
210 	adcl -56(%esi), %eax
211 	adcl -52(%esi), %eax
212 	adcl -48(%esi), %eax
213 	adcl -44(%esi), %eax
214 	adcl -40(%esi), %eax
215 	adcl -36(%esi), %eax
216 	adcl -32(%esi), %eax
217 	adcl -28(%esi), %eax
218 	adcl -24(%esi), %eax
219 	adcl -20(%esi), %eax
220 	adcl -16(%esi), %eax
221 	adcl -12(%esi), %eax
222 	adcl -8(%esi), %eax
223 	adcl -4(%esi), %eax
224 45:
225 	lea 128(%esi), %esi
226 	adcl $0, %eax
227 	dec %ecx
228 	jge 40b
229 	movl %edx, %ecx
230 50:	andl $3, %ecx
231 	jz 80f
232 
233 	# Handle the last 1-3 bytes without jumping
234 	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
235 	movl $0xffffff,%ebx	# by the shll and shrl instructions
236 	shll $3,%ecx
237 	shrl %cl,%ebx
238 	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
239 	addl %ebx,%eax
240 	adcl $0,%eax
241 80:
242 	testb $1, 12(%esp)
243 	jz 90f
244 	roll $8, %eax
245 90:
246 	popl %ebx
247 	popl %esi
248 	RET
249 SYM_FUNC_END(csum_partial)
250 
251 #endif
252 EXPORT_SYMBOL(csum_partial)
253 
254 /*
255 unsigned int csum_partial_copy_generic (const char *src, char *dst,
256 				  int len)
257  */
258 
259 /*
260  * Copy from ds while checksumming, otherwise like csum_partial
261  */
262 
263 #define EXC(y...)						\
264 	9999: y;						\
265 	_ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX)
266 
267 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
268 
269 #define ARGBASE 16
270 #define FP		12
271 
272 SYM_FUNC_START(csum_partial_copy_generic)
273 	subl  $4,%esp
274 	pushl %edi
275 	pushl %esi
276 	pushl %ebx
277 	movl ARGBASE+12(%esp),%ecx	# len
278 	movl ARGBASE+4(%esp),%esi	# src
279 	movl ARGBASE+8(%esp),%edi	# dst
280 
281 	movl $-1, %eax			# sum
282 	testl $2, %edi			# Check alignment.
283 	jz 2f				# Jump if alignment is ok.
284 	subl $2, %ecx			# Alignment uses up two bytes.
285 	jae 1f				# Jump if we had at least two bytes.
286 	addl $2, %ecx			# ecx was < 2.  Deal with it.
287 	jmp 4f
288 EXC(1:	movw (%esi), %bx	)
289 	addl $2, %esi
290 EXC(	movw %bx, (%edi)	)
291 	addl $2, %edi
292 	addw %bx, %ax
293 	adcl $0, %eax
294 2:
295 	movl %ecx, FP(%esp)
296 	shrl $5, %ecx
297 	jz 2f
298 	testl %esi, %esi		# what's wrong with clc?
299 EXC(1:	movl (%esi), %ebx	)
300 EXC(	movl 4(%esi), %edx	)
301 	adcl %ebx, %eax
302 EXC(	movl %ebx, (%edi)	)
303 	adcl %edx, %eax
304 EXC(	movl %edx, 4(%edi)	)
305 
306 EXC(	movl 8(%esi), %ebx	)
307 EXC(	movl 12(%esi), %edx	)
308 	adcl %ebx, %eax
309 EXC(	movl %ebx, 8(%edi)	)
310 	adcl %edx, %eax
311 EXC(	movl %edx, 12(%edi)	)
312 
313 EXC(	movl 16(%esi), %ebx 	)
314 EXC(	movl 20(%esi), %edx	)
315 	adcl %ebx, %eax
316 EXC(	movl %ebx, 16(%edi)	)
317 	adcl %edx, %eax
318 EXC(	movl %edx, 20(%edi)	)
319 
320 EXC(	movl 24(%esi), %ebx	)
321 EXC(	movl 28(%esi), %edx	)
322 	adcl %ebx, %eax
323 EXC(	movl %ebx, 24(%edi)	)
324 	adcl %edx, %eax
325 EXC(	movl %edx, 28(%edi)	)
326 
327 	lea 32(%esi), %esi
328 	lea 32(%edi), %edi
329 	dec %ecx
330 	jne 1b
331 	adcl $0, %eax
332 2:	movl FP(%esp), %edx
333 	movl %edx, %ecx
334 	andl $0x1c, %edx
335 	je 4f
336 	shrl $2, %edx			# This clears CF
337 EXC(3:	movl (%esi), %ebx	)
338 	adcl %ebx, %eax
339 EXC(	movl %ebx, (%edi)	)
340 	lea 4(%esi), %esi
341 	lea 4(%edi), %edi
342 	dec %edx
343 	jne 3b
344 	adcl $0, %eax
345 4:	andl $3, %ecx
346 	jz 7f
347 	cmpl $2, %ecx
348 	jb 5f
349 EXC(	movw (%esi), %cx	)
350 	leal 2(%esi), %esi
351 EXC(	movw %cx, (%edi)	)
352 	leal 2(%edi), %edi
353 	je 6f
354 	shll $16,%ecx
355 EXC(5:	movb (%esi), %cl	)
356 EXC(	movb %cl, (%edi)	)
357 6:	addl %ecx, %eax
358 	adcl $0, %eax
359 7:
360 
361 	popl %ebx
362 	popl %esi
363 	popl %edi
364 	popl %ecx			# equivalent to addl $4,%esp
365 	RET
366 SYM_FUNC_END(csum_partial_copy_generic)
367 
368 #else
369 
370 /* Version for PentiumII/PPro */
371 
372 #define ROUND1(x) \
373 	EXC(movl x(%esi), %ebx	)	;	\
374 	addl %ebx, %eax			;	\
375 	EXC(movl %ebx, x(%edi)	)	;
376 
377 #define ROUND(x) \
378 	EXC(movl x(%esi), %ebx	)	;	\
379 	adcl %ebx, %eax			;	\
380 	EXC(movl %ebx, x(%edi)	)	;
381 
382 #define ARGBASE 12
383 
384 SYM_FUNC_START(csum_partial_copy_generic)
385 	pushl %ebx
386 	pushl %edi
387 	pushl %esi
388 	movl ARGBASE+4(%esp),%esi	#src
389 	movl ARGBASE+8(%esp),%edi	#dst
390 	movl ARGBASE+12(%esp),%ecx	#len
391 	movl $-1, %eax			#sum
392 #	movl %ecx, %edx
393 	movl %ecx, %ebx
394 	movl %esi, %edx
395 	shrl $6, %ecx
396 	andl $0x3c, %ebx
397 	negl %ebx
398 	subl %ebx, %esi
399 	subl %ebx, %edi
400 	lea  -1(%esi),%edx
401 	andl $-32,%edx
402 	lea 3f(%ebx,%ebx), %ebx
403 	testl %esi, %esi
404 	JMP_NOSPEC ebx
405 1:	addl $64,%esi
406 	addl $64,%edi
407 	EXC(movb -32(%edx),%bl)	; EXC(movb (%edx),%bl)
408 	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
409 	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
410 	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
411 	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)
412 3:	adcl $0,%eax
413 	addl $64, %edx
414 	dec %ecx
415 	jge 1b
416 4:	movl ARGBASE+12(%esp),%edx	#len
417 	andl $3, %edx
418 	jz 7f
419 	cmpl $2, %edx
420 	jb 5f
421 EXC(	movw (%esi), %dx         )
422 	leal 2(%esi), %esi
423 EXC(	movw %dx, (%edi)         )
424 	leal 2(%edi), %edi
425 	je 6f
426 	shll $16,%edx
427 5:
428 EXC(	movb (%esi), %dl         )
429 EXC(	movb %dl, (%edi)         )
430 6:	addl %edx, %eax
431 	adcl $0, %eax
432 7:
433 
434 	popl %esi
435 	popl %edi
436 	popl %ebx
437 	RET
438 SYM_FUNC_END(csum_partial_copy_generic)
439 
440 #undef ROUND
441 #undef ROUND1
442 
443 #endif
444 EXPORT_SYMBOL(csum_partial_copy_generic)
445