1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * INET		An implementation of the TCP/IP protocol suite for the LINUX
4  *		operating system.  INET is implemented using the  BSD Socket
5  *		interface as the means of communication with the user level.
6  *
7  *		IP/TCP/UDP checksumming routines
8  *
9  * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
10  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11  *		Tom May, <ftom@netcom.com>
12  *              Pentium Pro/II routines:
13  *              Alexander Kjeldaas <astor@guardian.no>
14  *              Finn Arne Gangstad <finnag@guardian.no>
15  *		Lots of code moved from tcp.c and ip.c; see those files
16  *		for more names.
17  *
18  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
19  *			     handling.
20  *		Andi Kleen,  add zeroing on error
21  *                   converted to pure assembler
22  */
23 
24 #include <asm/errno.h>
25 #include <asm/asm.h>
26 #include <asm/export.h>
27 
28 /*
29  * computes a partial checksum, e.g. for TCP/UDP fragments
30  */
31 
32 /*
33 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
34  */
35 
36 .text
37 .align 4
38 .globl csum_partial
39 
40 #ifndef CONFIG_X86_USE_PPRO_CHECKSUM
41 
42 	  /*
43 	   * Experiments with Ethernet and SLIP connections show that buff
44 	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
45 	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
46 	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
47 	   * alignment for the unrolled loop.
48 	   */
49 csum_partial:
50 	pushl %esi
51 	pushl %ebx
52 	movl 20(%esp),%eax	# Function arg: unsigned int sum
53 	movl 16(%esp),%ecx	# Function arg: int len
54 	movl 12(%esp),%esi	# Function arg: unsigned char *buff
55 	testl $2, %esi		# Check alignment.
56 	jz 2f			# Jump if alignment is ok.
57 	subl $2, %ecx		# Alignment uses up two bytes.
58 	jae 1f			# Jump if we had at least two bytes.
59 	addl $2, %ecx		# ecx was < 2.  Deal with it.
60 	jmp 4f
61 1:	movw (%esi), %bx
62 	addl $2, %esi
63 	addw %bx, %ax
64 	adcl $0, %eax
65 2:
66 	movl %ecx, %edx
67 	shrl $5, %ecx
68 	jz 2f
69 	testl %esi, %esi
70 1:	movl (%esi), %ebx
71 	adcl %ebx, %eax
72 	movl 4(%esi), %ebx
73 	adcl %ebx, %eax
74 	movl 8(%esi), %ebx
75 	adcl %ebx, %eax
76 	movl 12(%esi), %ebx
77 	adcl %ebx, %eax
78 	movl 16(%esi), %ebx
79 	adcl %ebx, %eax
80 	movl 20(%esi), %ebx
81 	adcl %ebx, %eax
82 	movl 24(%esi), %ebx
83 	adcl %ebx, %eax
84 	movl 28(%esi), %ebx
85 	adcl %ebx, %eax
86 	lea 32(%esi), %esi
87 	dec %ecx
88 	jne 1b
89 	adcl $0, %eax
90 2:	movl %edx, %ecx
91 	andl $0x1c, %edx
92 	je 4f
93 	shrl $2, %edx		# This clears CF
94 3:	adcl (%esi), %eax
95 	lea 4(%esi), %esi
96 	dec %edx
97 	jne 3b
98 	adcl $0, %eax
99 4:	andl $3, %ecx
100 	jz 7f
101 	cmpl $2, %ecx
102 	jb 5f
103 	movw (%esi),%cx
104 	leal 2(%esi),%esi
105 	je 6f
106 	shll $16,%ecx
107 5:	movb (%esi),%cl
108 6:	addl %ecx,%eax
109 	adcl $0, %eax
110 7:
111 	popl %ebx
112 	popl %esi
113 	RET
114 
115 #else
116 
117 /* Version for PentiumII/PPro */
118 
119 csum_partial:
120 	pushl %esi
121 	pushl %ebx
122 	movl 20(%esp),%eax	# Function arg: unsigned int sum
123 	movl 16(%esp),%ecx	# Function arg: int len
124 	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
125 
126 	testl $2, %esi
127 	jnz 30f
128 10:
129 	movl %ecx, %edx
130 	movl %ecx, %ebx
131 	andl $0x7c, %ebx
132 	shrl $7, %ecx
133 	addl %ebx,%esi
134 	shrl $2, %ebx
135 	negl %ebx
136 	lea 45f(%ebx,%ebx,2), %ebx
137 	testl %esi, %esi
138 	jmp *%ebx
139 
140 	# Handle 2-byte-aligned regions
141 20:	addw (%esi), %ax
142 	lea 2(%esi), %esi
143 	adcl $0, %eax
144 	jmp 10b
145 
146 30:	subl $2, %ecx
147 	ja 20b
148 	je 32f
149 	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
150 	addl %ebx, %eax
151 	adcl $0, %eax
152 	jmp 80f
153 32:
154 	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
155 	adcl $0, %eax
156 	jmp 80f
157 
158 40:
159 	addl -128(%esi), %eax
160 	adcl -124(%esi), %eax
161 	adcl -120(%esi), %eax
162 	adcl -116(%esi), %eax
163 	adcl -112(%esi), %eax
164 	adcl -108(%esi), %eax
165 	adcl -104(%esi), %eax
166 	adcl -100(%esi), %eax
167 	adcl -96(%esi), %eax
168 	adcl -92(%esi), %eax
169 	adcl -88(%esi), %eax
170 	adcl -84(%esi), %eax
171 	adcl -80(%esi), %eax
172 	adcl -76(%esi), %eax
173 	adcl -72(%esi), %eax
174 	adcl -68(%esi), %eax
175 	adcl -64(%esi), %eax
176 	adcl -60(%esi), %eax
177 	adcl -56(%esi), %eax
178 	adcl -52(%esi), %eax
179 	adcl -48(%esi), %eax
180 	adcl -44(%esi), %eax
181 	adcl -40(%esi), %eax
182 	adcl -36(%esi), %eax
183 	adcl -32(%esi), %eax
184 	adcl -28(%esi), %eax
185 	adcl -24(%esi), %eax
186 	adcl -20(%esi), %eax
187 	adcl -16(%esi), %eax
188 	adcl -12(%esi), %eax
189 	adcl -8(%esi), %eax
190 	adcl -4(%esi), %eax
191 45:
192 	lea 128(%esi), %esi
193 	adcl $0, %eax
194 	dec %ecx
195 	jge 40b
196 	movl %edx, %ecx
197 50:	andl $3, %ecx
198 	jz 80f
199 
200 	# Handle the last 1-3 bytes without jumping
201 	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
202 	movl $0xffffff,%ebx	# by the shll and shrl instructions
203 	shll $3,%ecx
204 	shrl %cl,%ebx
205 	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
206 	addl %ebx,%eax
207 	adcl $0,%eax
208 80:
209 	popl %ebx
210 	popl %esi
211 	RET
212 
213 #endif
214 	EXPORT_SYMBOL(csum_partial)
215