18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
48c2ecf20Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
58c2ecf20Sopenharmony_ci *		interface as the means of communication with the user level.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci *		IP/TCP/UDP checksumming routines
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
108c2ecf20Sopenharmony_ci *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
118c2ecf20Sopenharmony_ci *		Tom May, <ftom@netcom.com>
128c2ecf20Sopenharmony_ci *              Pentium Pro/II routines:
138c2ecf20Sopenharmony_ci *              Alexander Kjeldaas <astor@guardian.no>
148c2ecf20Sopenharmony_ci *              Finn Arne Gangstad <finnag@guardian.no>
158c2ecf20Sopenharmony_ci *		Lots of code moved from tcp.c and ip.c; see those files
168c2ecf20Sopenharmony_ci *		for more names.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
198c2ecf20Sopenharmony_ci *			     handling.
208c2ecf20Sopenharmony_ci *		Andi Kleen,  add zeroing on error
218c2ecf20Sopenharmony_ci *                   converted to pure assembler
228c2ecf20Sopenharmony_ci */
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci#include <asm/errno.h>
258c2ecf20Sopenharmony_ci#include <asm/asm.h>
268c2ecf20Sopenharmony_ci#include <asm/export.h>
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci/*
298c2ecf20Sopenharmony_ci * computes a partial checksum, e.g. for TCP/UDP fragments
308c2ecf20Sopenharmony_ci */
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci/*
338c2ecf20Sopenharmony_ciunsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
348c2ecf20Sopenharmony_ci */
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci.text
378c2ecf20Sopenharmony_ci.align 4
388c2ecf20Sopenharmony_ci.globl csum_partial
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci	  /*
438c2ecf20Sopenharmony_ci	   * Experiments with Ethernet and SLIP connections show that buff
448c2ecf20Sopenharmony_ci	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
458c2ecf20Sopenharmony_ci	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
468c2ecf20Sopenharmony_ci	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
478c2ecf20Sopenharmony_ci	   * alignment for the unrolled loop.
488c2ecf20Sopenharmony_ci	   */
498c2ecf20Sopenharmony_cicsum_partial:
508c2ecf20Sopenharmony_ci	pushl %esi
518c2ecf20Sopenharmony_ci	pushl %ebx
528c2ecf20Sopenharmony_ci	movl 20(%esp),%eax	# Function arg: unsigned int sum
538c2ecf20Sopenharmony_ci	movl 16(%esp),%ecx	# Function arg: int len
548c2ecf20Sopenharmony_ci	movl 12(%esp),%esi	# Function arg: unsigned char *buff
558c2ecf20Sopenharmony_ci	testl $2, %esi		# Check alignment.
568c2ecf20Sopenharmony_ci	jz 2f			# Jump if alignment is ok.
578c2ecf20Sopenharmony_ci	subl $2, %ecx		# Alignment uses up two bytes.
588c2ecf20Sopenharmony_ci	jae 1f			# Jump if we had at least two bytes.
598c2ecf20Sopenharmony_ci	addl $2, %ecx		# ecx was < 2.  Deal with it.
608c2ecf20Sopenharmony_ci	jmp 4f
618c2ecf20Sopenharmony_ci1:	movw (%esi), %bx
628c2ecf20Sopenharmony_ci	addl $2, %esi
638c2ecf20Sopenharmony_ci	addw %bx, %ax
648c2ecf20Sopenharmony_ci	adcl $0, %eax
658c2ecf20Sopenharmony_ci2:
668c2ecf20Sopenharmony_ci	movl %ecx, %edx
678c2ecf20Sopenharmony_ci	shrl $5, %ecx
688c2ecf20Sopenharmony_ci	jz 2f
698c2ecf20Sopenharmony_ci	testl %esi, %esi
708c2ecf20Sopenharmony_ci1:	movl (%esi), %ebx
718c2ecf20Sopenharmony_ci	adcl %ebx, %eax
728c2ecf20Sopenharmony_ci	movl 4(%esi), %ebx
738c2ecf20Sopenharmony_ci	adcl %ebx, %eax
748c2ecf20Sopenharmony_ci	movl 8(%esi), %ebx
758c2ecf20Sopenharmony_ci	adcl %ebx, %eax
768c2ecf20Sopenharmony_ci	movl 12(%esi), %ebx
778c2ecf20Sopenharmony_ci	adcl %ebx, %eax
788c2ecf20Sopenharmony_ci	movl 16(%esi), %ebx
798c2ecf20Sopenharmony_ci	adcl %ebx, %eax
808c2ecf20Sopenharmony_ci	movl 20(%esi), %ebx
818c2ecf20Sopenharmony_ci	adcl %ebx, %eax
828c2ecf20Sopenharmony_ci	movl 24(%esi), %ebx
838c2ecf20Sopenharmony_ci	adcl %ebx, %eax
848c2ecf20Sopenharmony_ci	movl 28(%esi), %ebx
858c2ecf20Sopenharmony_ci	adcl %ebx, %eax
868c2ecf20Sopenharmony_ci	lea 32(%esi), %esi
878c2ecf20Sopenharmony_ci	dec %ecx
888c2ecf20Sopenharmony_ci	jne 1b
898c2ecf20Sopenharmony_ci	adcl $0, %eax
908c2ecf20Sopenharmony_ci2:	movl %edx, %ecx
918c2ecf20Sopenharmony_ci	andl $0x1c, %edx
928c2ecf20Sopenharmony_ci	je 4f
938c2ecf20Sopenharmony_ci	shrl $2, %edx		# This clears CF
948c2ecf20Sopenharmony_ci3:	adcl (%esi), %eax
958c2ecf20Sopenharmony_ci	lea 4(%esi), %esi
968c2ecf20Sopenharmony_ci	dec %edx
978c2ecf20Sopenharmony_ci	jne 3b
988c2ecf20Sopenharmony_ci	adcl $0, %eax
998c2ecf20Sopenharmony_ci4:	andl $3, %ecx
1008c2ecf20Sopenharmony_ci	jz 7f
1018c2ecf20Sopenharmony_ci	cmpl $2, %ecx
1028c2ecf20Sopenharmony_ci	jb 5f
1038c2ecf20Sopenharmony_ci	movw (%esi),%cx
1048c2ecf20Sopenharmony_ci	leal 2(%esi),%esi
1058c2ecf20Sopenharmony_ci	je 6f
1068c2ecf20Sopenharmony_ci	shll $16,%ecx
1078c2ecf20Sopenharmony_ci5:	movb (%esi),%cl
1088c2ecf20Sopenharmony_ci6:	addl %ecx,%eax
1098c2ecf20Sopenharmony_ci	adcl $0, %eax
1108c2ecf20Sopenharmony_ci7:
1118c2ecf20Sopenharmony_ci	popl %ebx
1128c2ecf20Sopenharmony_ci	popl %esi
1138c2ecf20Sopenharmony_ci	RET
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci#else
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci/* Version for PentiumII/PPro */
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_cicsum_partial:
1208c2ecf20Sopenharmony_ci	pushl %esi
1218c2ecf20Sopenharmony_ci	pushl %ebx
1228c2ecf20Sopenharmony_ci	movl 20(%esp),%eax	# Function arg: unsigned int sum
1238c2ecf20Sopenharmony_ci	movl 16(%esp),%ecx	# Function arg: int len
1248c2ecf20Sopenharmony_ci	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	testl $2, %esi
1278c2ecf20Sopenharmony_ci	jnz 30f
1288c2ecf20Sopenharmony_ci10:
1298c2ecf20Sopenharmony_ci	movl %ecx, %edx
1308c2ecf20Sopenharmony_ci	movl %ecx, %ebx
1318c2ecf20Sopenharmony_ci	andl $0x7c, %ebx
1328c2ecf20Sopenharmony_ci	shrl $7, %ecx
1338c2ecf20Sopenharmony_ci	addl %ebx,%esi
1348c2ecf20Sopenharmony_ci	shrl $2, %ebx
1358c2ecf20Sopenharmony_ci	negl %ebx
1368c2ecf20Sopenharmony_ci	lea 45f(%ebx,%ebx,2), %ebx
1378c2ecf20Sopenharmony_ci	testl %esi, %esi
1388c2ecf20Sopenharmony_ci	jmp *%ebx
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	# Handle 2-byte-aligned regions
1418c2ecf20Sopenharmony_ci20:	addw (%esi), %ax
1428c2ecf20Sopenharmony_ci	lea 2(%esi), %esi
1438c2ecf20Sopenharmony_ci	adcl $0, %eax
1448c2ecf20Sopenharmony_ci	jmp 10b
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci30:	subl $2, %ecx
1478c2ecf20Sopenharmony_ci	ja 20b
1488c2ecf20Sopenharmony_ci	je 32f
1498c2ecf20Sopenharmony_ci	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
1508c2ecf20Sopenharmony_ci	addl %ebx, %eax
1518c2ecf20Sopenharmony_ci	adcl $0, %eax
1528c2ecf20Sopenharmony_ci	jmp 80f
1538c2ecf20Sopenharmony_ci32:
1548c2ecf20Sopenharmony_ci	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
1558c2ecf20Sopenharmony_ci	adcl $0, %eax
1568c2ecf20Sopenharmony_ci	jmp 80f
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci40:
1598c2ecf20Sopenharmony_ci	addl -128(%esi), %eax
1608c2ecf20Sopenharmony_ci	adcl -124(%esi), %eax
1618c2ecf20Sopenharmony_ci	adcl -120(%esi), %eax
1628c2ecf20Sopenharmony_ci	adcl -116(%esi), %eax
1638c2ecf20Sopenharmony_ci	adcl -112(%esi), %eax
1648c2ecf20Sopenharmony_ci	adcl -108(%esi), %eax
1658c2ecf20Sopenharmony_ci	adcl -104(%esi), %eax
1668c2ecf20Sopenharmony_ci	adcl -100(%esi), %eax
1678c2ecf20Sopenharmony_ci	adcl -96(%esi), %eax
1688c2ecf20Sopenharmony_ci	adcl -92(%esi), %eax
1698c2ecf20Sopenharmony_ci	adcl -88(%esi), %eax
1708c2ecf20Sopenharmony_ci	adcl -84(%esi), %eax
1718c2ecf20Sopenharmony_ci	adcl -80(%esi), %eax
1728c2ecf20Sopenharmony_ci	adcl -76(%esi), %eax
1738c2ecf20Sopenharmony_ci	adcl -72(%esi), %eax
1748c2ecf20Sopenharmony_ci	adcl -68(%esi), %eax
1758c2ecf20Sopenharmony_ci	adcl -64(%esi), %eax
1768c2ecf20Sopenharmony_ci	adcl -60(%esi), %eax
1778c2ecf20Sopenharmony_ci	adcl -56(%esi), %eax
1788c2ecf20Sopenharmony_ci	adcl -52(%esi), %eax
1798c2ecf20Sopenharmony_ci	adcl -48(%esi), %eax
1808c2ecf20Sopenharmony_ci	adcl -44(%esi), %eax
1818c2ecf20Sopenharmony_ci	adcl -40(%esi), %eax
1828c2ecf20Sopenharmony_ci	adcl -36(%esi), %eax
1838c2ecf20Sopenharmony_ci	adcl -32(%esi), %eax
1848c2ecf20Sopenharmony_ci	adcl -28(%esi), %eax
1858c2ecf20Sopenharmony_ci	adcl -24(%esi), %eax
1868c2ecf20Sopenharmony_ci	adcl -20(%esi), %eax
1878c2ecf20Sopenharmony_ci	adcl -16(%esi), %eax
1888c2ecf20Sopenharmony_ci	adcl -12(%esi), %eax
1898c2ecf20Sopenharmony_ci	adcl -8(%esi), %eax
1908c2ecf20Sopenharmony_ci	adcl -4(%esi), %eax
1918c2ecf20Sopenharmony_ci45:
1928c2ecf20Sopenharmony_ci	lea 128(%esi), %esi
1938c2ecf20Sopenharmony_ci	adcl $0, %eax
1948c2ecf20Sopenharmony_ci	dec %ecx
1958c2ecf20Sopenharmony_ci	jge 40b
1968c2ecf20Sopenharmony_ci	movl %edx, %ecx
1978c2ecf20Sopenharmony_ci50:	andl $3, %ecx
1988c2ecf20Sopenharmony_ci	jz 80f
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	# Handle the last 1-3 bytes without jumping
2018c2ecf20Sopenharmony_ci	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
2028c2ecf20Sopenharmony_ci	movl $0xffffff,%ebx	# by the shll and shrl instructions
2038c2ecf20Sopenharmony_ci	shll $3,%ecx
2048c2ecf20Sopenharmony_ci	shrl %cl,%ebx
2058c2ecf20Sopenharmony_ci	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
2068c2ecf20Sopenharmony_ci	addl %ebx,%eax
2078c2ecf20Sopenharmony_ci	adcl $0,%eax
2088c2ecf20Sopenharmony_ci80:
2098c2ecf20Sopenharmony_ci	popl %ebx
2108c2ecf20Sopenharmony_ci	popl %esi
2118c2ecf20Sopenharmony_ci	RET
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci#endif
2148c2ecf20Sopenharmony_ci	EXPORT_SYMBOL(csum_partial)
215