1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * INET		An implementation of the TCP/IP protocol suite for the LINUX
4  *		operating system.  INET is implemented using the  BSD Socket
5  *		interface as the means of communication with the user level.
6  *
7  *		IP/TCP/UDP checksumming routines
8  *
9  * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
10  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11  *		Tom May, <ftom@netcom.com>
12  *              Pentium Pro/II routines:
13  *              Alexander Kjeldaas <astor@guardian.no>
14  *              Finn Arne Gangstad <finnag@guardian.no>
15  *		Lots of code moved from tcp.c and ip.c; see those files
16  *		for more names.
17  *
18  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
19  *			     handling.
20  *		Andi Kleen,  add zeroing on error
21  *                   converted to pure assembler
22  */
23 
24 #include <linux/linkage.h>
25 #include <asm/errno.h>
26 #include <asm/asm.h>
27 #include <asm/export.h>
28 #include <asm/nospec-branch.h>
29 
30 /*
31  * computes a partial checksum, e.g. for TCP/UDP fragments
32  */
33 
34 /*
35 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
36  */
37 
38 .text
39 
40 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
41 
42 	  /*
43 	   * Experiments with Ethernet and SLIP connections show that buff
44 	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
45 	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
46 	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
47 	   * alignment for the unrolled loop.
48 	   */
49 SYM_FUNC_START(csum_partial)
50 	pushl %esi
51 	pushl %ebx
52 	movl 20(%esp),%eax	# Function arg: unsigned int sum
53 	movl 16(%esp),%ecx	# Function arg: int len
54 	movl 12(%esp),%esi	# Function arg: unsigned char *buff
55 	testl $3, %esi		# Check alignment.
56 	jz 2f			# Jump if alignment is ok.
57 	testl $1, %esi		# Check alignment.
58 	jz 10f			# Jump if alignment is boundary of 2 bytes.
59 
60 	# buf is odd
61 	dec %ecx
62 	jl 8f
63 	movzbl (%esi), %ebx
64 	adcl %ebx, %eax
65 	roll $8, %eax
66 	inc %esi
67 	testl $2, %esi
68 	jz 2f
69 10:
70 	subl $2, %ecx		# Alignment uses up two bytes.
71 	jae 1f			# Jump if we had at least two bytes.
72 	addl $2, %ecx		# ecx was < 2.  Deal with it.
73 	jmp 4f
74 1:	movw (%esi), %bx
75 	addl $2, %esi
76 	addw %bx, %ax
77 	adcl $0, %eax
78 2:
79 	movl %ecx, %edx
80 	shrl $5, %ecx
81 	jz 2f
82 	testl %esi, %esi
83 1:	movl (%esi), %ebx
84 	adcl %ebx, %eax
85 	movl 4(%esi), %ebx
86 	adcl %ebx, %eax
87 	movl 8(%esi), %ebx
88 	adcl %ebx, %eax
89 	movl 12(%esi), %ebx
90 	adcl %ebx, %eax
91 	movl 16(%esi), %ebx
92 	adcl %ebx, %eax
93 	movl 20(%esi), %ebx
94 	adcl %ebx, %eax
95 	movl 24(%esi), %ebx
96 	adcl %ebx, %eax
97 	movl 28(%esi), %ebx
98 	adcl %ebx, %eax
99 	lea 32(%esi), %esi
100 	dec %ecx
101 	jne 1b
102 	adcl $0, %eax
103 2:	movl %edx, %ecx
104 	andl $0x1c, %edx
105 	je 4f
106 	shrl $2, %edx		# This clears CF
107 3:	adcl (%esi), %eax
108 	lea 4(%esi), %esi
109 	dec %edx
110 	jne 3b
111 	adcl $0, %eax
112 4:	andl $3, %ecx
113 	jz 7f
114 	cmpl $2, %ecx
115 	jb 5f
116 	movw (%esi),%cx
117 	leal 2(%esi),%esi
118 	je 6f
119 	shll $16,%ecx
120 5:	movb (%esi),%cl
121 6:	addl %ecx,%eax
122 	adcl $0, %eax
123 7:
124 	testb $1, 12(%esp)
125 	jz 8f
126 	roll $8, %eax
127 8:
128 	popl %ebx
129 	popl %esi
130 	RET
131 SYM_FUNC_END(csum_partial)
132 
133 #else
134 
135 /* Version for PentiumII/PPro */
136 
137 SYM_FUNC_START(csum_partial)
138 	pushl %esi
139 	pushl %ebx
140 	movl 20(%esp),%eax	# Function arg: unsigned int sum
141 	movl 16(%esp),%ecx	# Function arg: int len
142 	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
143 
144 	testl $3, %esi
145 	jnz 25f
146 10:
147 	movl %ecx, %edx
148 	movl %ecx, %ebx
149 	andl $0x7c, %ebx
150 	shrl $7, %ecx
151 	addl %ebx,%esi
152 	shrl $2, %ebx
153 	negl %ebx
154 	lea 45f(%ebx,%ebx,2), %ebx
155 	testl %esi, %esi
156 	JMP_NOSPEC ebx
157 
158 	# Handle 2-byte-aligned regions
159 20:	addw (%esi), %ax
160 	lea 2(%esi), %esi
161 	adcl $0, %eax
162 	jmp 10b
163 25:
164 	testl $1, %esi
165 	jz 30f
166 	# buf is odd
167 	dec %ecx
168 	jl 90f
169 	movzbl (%esi), %ebx
170 	addl %ebx, %eax
171 	adcl $0, %eax
172 	roll $8, %eax
173 	inc %esi
174 	testl $2, %esi
175 	jz 10b
176 
177 30:	subl $2, %ecx
178 	ja 20b
179 	je 32f
180 	addl $2, %ecx
181 	jz 80f
182 	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
183 	addl %ebx, %eax
184 	adcl $0, %eax
185 	jmp 80f
186 32:
187 	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
188 	adcl $0, %eax
189 	jmp 80f
190 
191 40:
192 	addl -128(%esi), %eax
193 	adcl -124(%esi), %eax
194 	adcl -120(%esi), %eax
195 	adcl -116(%esi), %eax
196 	adcl -112(%esi), %eax
197 	adcl -108(%esi), %eax
198 	adcl -104(%esi), %eax
199 	adcl -100(%esi), %eax
200 	adcl -96(%esi), %eax
201 	adcl -92(%esi), %eax
202 	adcl -88(%esi), %eax
203 	adcl -84(%esi), %eax
204 	adcl -80(%esi), %eax
205 	adcl -76(%esi), %eax
206 	adcl -72(%esi), %eax
207 	adcl -68(%esi), %eax
208 	adcl -64(%esi), %eax
209 	adcl -60(%esi), %eax
210 	adcl -56(%esi), %eax
211 	adcl -52(%esi), %eax
212 	adcl -48(%esi), %eax
213 	adcl -44(%esi), %eax
214 	adcl -40(%esi), %eax
215 	adcl -36(%esi), %eax
216 	adcl -32(%esi), %eax
217 	adcl -28(%esi), %eax
218 	adcl -24(%esi), %eax
219 	adcl -20(%esi), %eax
220 	adcl -16(%esi), %eax
221 	adcl -12(%esi), %eax
222 	adcl -8(%esi), %eax
223 	adcl -4(%esi), %eax
224 45:
225 	lea 128(%esi), %esi
226 	adcl $0, %eax
227 	dec %ecx
228 	jge 40b
229 	movl %edx, %ecx
230 50:	andl $3, %ecx
231 	jz 80f
232 
233 	# Handle the last 1-3 bytes without jumping
234 	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
235 	movl $0xffffff,%ebx	# by the shll and shrl instructions
236 	shll $3,%ecx
237 	shrl %cl,%ebx
238 	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
239 	addl %ebx,%eax
240 	adcl $0,%eax
241 80:
242 	testb $1, 12(%esp)
243 	jz 90f
244 	roll $8, %eax
245 90:
246 	popl %ebx
247 	popl %esi
248 	RET
249 SYM_FUNC_END(csum_partial)
250 
251 #endif
252 EXPORT_SYMBOL(csum_partial)
253 
254 /*
255 unsigned int csum_partial_copy_generic (const char *src, char *dst,
256 				  int len)
257  */
258 
259 /*
260  * Copy from ds while checksumming, otherwise like csum_partial
261  */
262 
263 #define EXC(y...)			\
264 	9999: y;			\
265 	_ASM_EXTABLE_UA(9999b, 6001f)
266 
267 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
268 
269 #define ARGBASE 16
270 #define FP		12
271 
272 SYM_FUNC_START(csum_partial_copy_generic)
273 	subl  $4,%esp
274 	pushl %edi
275 	pushl %esi
276 	pushl %ebx
277 	movl ARGBASE+12(%esp),%ecx	# len
278 	movl ARGBASE+4(%esp),%esi	# src
279 	movl ARGBASE+8(%esp),%edi	# dst
280 
281 	movl $-1, %eax			# sum
282 	testl $2, %edi			# Check alignment.
283 	jz 2f				# Jump if alignment is ok.
284 	subl $2, %ecx			# Alignment uses up two bytes.
285 	jae 1f				# Jump if we had at least two bytes.
286 	addl $2, %ecx			# ecx was < 2.  Deal with it.
287 	jmp 4f
288 EXC(1:	movw (%esi), %bx	)
289 	addl $2, %esi
290 EXC(	movw %bx, (%edi)	)
291 	addl $2, %edi
292 	addw %bx, %ax
293 	adcl $0, %eax
294 2:
295 	movl %ecx, FP(%esp)
296 	shrl $5, %ecx
297 	jz 2f
298 	testl %esi, %esi		# what's wrong with clc?
299 EXC(1:	movl (%esi), %ebx	)
300 EXC(	movl 4(%esi), %edx	)
301 	adcl %ebx, %eax
302 EXC(	movl %ebx, (%edi)	)
303 	adcl %edx, %eax
304 EXC(	movl %edx, 4(%edi)	)
305 
306 EXC(	movl 8(%esi), %ebx	)
307 EXC(	movl 12(%esi), %edx	)
308 	adcl %ebx, %eax
309 EXC(	movl %ebx, 8(%edi)	)
310 	adcl %edx, %eax
311 EXC(	movl %edx, 12(%edi)	)
312 
313 EXC(	movl 16(%esi), %ebx 	)
314 EXC(	movl 20(%esi), %edx	)
315 	adcl %ebx, %eax
316 EXC(	movl %ebx, 16(%edi)	)
317 	adcl %edx, %eax
318 EXC(	movl %edx, 20(%edi)	)
319 
320 EXC(	movl 24(%esi), %ebx	)
321 EXC(	movl 28(%esi), %edx	)
322 	adcl %ebx, %eax
323 EXC(	movl %ebx, 24(%edi)	)
324 	adcl %edx, %eax
325 EXC(	movl %edx, 28(%edi)	)
326 
327 	lea 32(%esi), %esi
328 	lea 32(%edi), %edi
329 	dec %ecx
330 	jne 1b
331 	adcl $0, %eax
332 2:	movl FP(%esp), %edx
333 	movl %edx, %ecx
334 	andl $0x1c, %edx
335 	je 4f
336 	shrl $2, %edx			# This clears CF
337 EXC(3:	movl (%esi), %ebx	)
338 	adcl %ebx, %eax
339 EXC(	movl %ebx, (%edi)	)
340 	lea 4(%esi), %esi
341 	lea 4(%edi), %edi
342 	dec %edx
343 	jne 3b
344 	adcl $0, %eax
345 4:	andl $3, %ecx
346 	jz 7f
347 	cmpl $2, %ecx
348 	jb 5f
349 EXC(	movw (%esi), %cx	)
350 	leal 2(%esi), %esi
351 EXC(	movw %cx, (%edi)	)
352 	leal 2(%edi), %edi
353 	je 6f
354 	shll $16,%ecx
355 EXC(5:	movb (%esi), %cl	)
356 EXC(	movb %cl, (%edi)	)
357 6:	addl %ecx, %eax
358 	adcl $0, %eax
359 7:
360 
361 # Exception handler:
362 .section .fixup, "ax"
363 
364 6001:
365 	xorl %eax, %eax
366 	jmp 7b
367 
368 .previous
369 
370 	popl %ebx
371 	popl %esi
372 	popl %edi
373 	popl %ecx			# equivalent to addl $4,%esp
374 	RET
375 SYM_FUNC_END(csum_partial_copy_generic)
376 
377 #else
378 
379 /* Version for PentiumII/PPro */
380 
381 #define ROUND1(x) \
382 	EXC(movl x(%esi), %ebx	)	;	\
383 	addl %ebx, %eax			;	\
384 	EXC(movl %ebx, x(%edi)	)	;
385 
386 #define ROUND(x) \
387 	EXC(movl x(%esi), %ebx	)	;	\
388 	adcl %ebx, %eax			;	\
389 	EXC(movl %ebx, x(%edi)	)	;
390 
391 #define ARGBASE 12
392 
393 SYM_FUNC_START(csum_partial_copy_generic)
394 	pushl %ebx
395 	pushl %edi
396 	pushl %esi
397 	movl ARGBASE+4(%esp),%esi	#src
398 	movl ARGBASE+8(%esp),%edi	#dst
399 	movl ARGBASE+12(%esp),%ecx	#len
400 	movl $-1, %eax			#sum
401 #	movl %ecx, %edx
402 	movl %ecx, %ebx
403 	movl %esi, %edx
404 	shrl $6, %ecx
405 	andl $0x3c, %ebx
406 	negl %ebx
407 	subl %ebx, %esi
408 	subl %ebx, %edi
409 	lea  -1(%esi),%edx
410 	andl $-32,%edx
411 	lea 3f(%ebx,%ebx), %ebx
412 	testl %esi, %esi
413 	JMP_NOSPEC ebx
414 1:	addl $64,%esi
415 	addl $64,%edi
416 	EXC(movb -32(%edx),%bl)	; EXC(movb (%edx),%bl)
417 	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
418 	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
419 	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
420 	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)
421 3:	adcl $0,%eax
422 	addl $64, %edx
423 	dec %ecx
424 	jge 1b
425 4:	movl ARGBASE+12(%esp),%edx	#len
426 	andl $3, %edx
427 	jz 7f
428 	cmpl $2, %edx
429 	jb 5f
430 EXC(	movw (%esi), %dx         )
431 	leal 2(%esi), %esi
432 EXC(	movw %dx, (%edi)         )
433 	leal 2(%edi), %edi
434 	je 6f
435 	shll $16,%edx
436 5:
437 EXC(	movb (%esi), %dl         )
438 EXC(	movb %dl, (%edi)         )
439 6:	addl %edx, %eax
440 	adcl $0, %eax
441 7:
442 .section .fixup, "ax"
443 6001:	xorl %eax, %eax
444 	jmp  7b
445 .previous
446 
447 	popl %esi
448 	popl %edi
449 	popl %ebx
450 	RET
451 SYM_FUNC_END(csum_partial_copy_generic)
452 
453 #undef ROUND
454 #undef ROUND1
455 
456 #endif
457 EXPORT_SYMBOL(csum_partial_copy_generic)
458