162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
462306a36Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
562306a36Sopenharmony_ci *		interface as the means of communication with the user level.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *		IP/TCP/UDP checksumming routines
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
1062306a36Sopenharmony_ci *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
1162306a36Sopenharmony_ci *		Tom May, <ftom@netcom.com>
1262306a36Sopenharmony_ci *              Pentium Pro/II routines:
1362306a36Sopenharmony_ci *              Alexander Kjeldaas <astor@guardian.no>
1462306a36Sopenharmony_ci *              Finn Arne Gangstad <finnag@guardian.no>
1562306a36Sopenharmony_ci *		Lots of code moved from tcp.c and ip.c; see those files
1662306a36Sopenharmony_ci *		for more names.
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
1962306a36Sopenharmony_ci *			     handling.
2062306a36Sopenharmony_ci *		Andi Kleen,  add zeroing on error
2162306a36Sopenharmony_ci *                   converted to pure assembler
2262306a36Sopenharmony_ci */
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci#include <linux/linkage.h>
2562306a36Sopenharmony_ci#include <asm/errno.h>
2662306a36Sopenharmony_ci#include <asm/asm.h>
2762306a36Sopenharmony_ci#include <asm/export.h>
2862306a36Sopenharmony_ci#include <asm/nospec-branch.h>
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/*
3162306a36Sopenharmony_ci * computes a partial checksum, e.g. for TCP/UDP fragments
3262306a36Sopenharmony_ci */
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci/*
3562306a36Sopenharmony_ciunsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
3662306a36Sopenharmony_ci */
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci.text
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	  /*
4362306a36Sopenharmony_ci	   * Experiments with Ethernet and SLIP connections show that buff
4462306a36Sopenharmony_ci	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
4562306a36Sopenharmony_ci	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
4662306a36Sopenharmony_ci	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
4762306a36Sopenharmony_ci	   * alignment for the unrolled loop.
4862306a36Sopenharmony_ci	   */
4962306a36Sopenharmony_ciSYM_FUNC_START(csum_partial)
5062306a36Sopenharmony_ci	pushl %esi
5162306a36Sopenharmony_ci	pushl %ebx
5262306a36Sopenharmony_ci	movl 20(%esp),%eax	# Function arg: unsigned int sum
5362306a36Sopenharmony_ci	movl 16(%esp),%ecx	# Function arg: int len
5462306a36Sopenharmony_ci	movl 12(%esp),%esi	# Function arg: unsigned char *buff
5562306a36Sopenharmony_ci	testl $3, %esi		# Check alignment.
5662306a36Sopenharmony_ci	jz 2f			# Jump if alignment is ok.
5762306a36Sopenharmony_ci	testl $1, %esi		# Check alignment.
5862306a36Sopenharmony_ci	jz 10f			# Jump if alignment is boundary of 2 bytes.
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	# buf is odd
6162306a36Sopenharmony_ci	dec %ecx
6262306a36Sopenharmony_ci	jl 8f
6362306a36Sopenharmony_ci	movzbl (%esi), %ebx
6462306a36Sopenharmony_ci	adcl %ebx, %eax
6562306a36Sopenharmony_ci	roll $8, %eax
6662306a36Sopenharmony_ci	inc %esi
6762306a36Sopenharmony_ci	testl $2, %esi
6862306a36Sopenharmony_ci	jz 2f
6962306a36Sopenharmony_ci10:
7062306a36Sopenharmony_ci	subl $2, %ecx		# Alignment uses up two bytes.
7162306a36Sopenharmony_ci	jae 1f			# Jump if we had at least two bytes.
7262306a36Sopenharmony_ci	addl $2, %ecx		# ecx was < 2.  Deal with it.
7362306a36Sopenharmony_ci	jmp 4f
7462306a36Sopenharmony_ci1:	movw (%esi), %bx
7562306a36Sopenharmony_ci	addl $2, %esi
7662306a36Sopenharmony_ci	addw %bx, %ax
7762306a36Sopenharmony_ci	adcl $0, %eax
7862306a36Sopenharmony_ci2:
7962306a36Sopenharmony_ci	movl %ecx, %edx
8062306a36Sopenharmony_ci	shrl $5, %ecx
8162306a36Sopenharmony_ci	jz 2f
8262306a36Sopenharmony_ci	testl %esi, %esi
8362306a36Sopenharmony_ci1:	movl (%esi), %ebx
8462306a36Sopenharmony_ci	adcl %ebx, %eax
8562306a36Sopenharmony_ci	movl 4(%esi), %ebx
8662306a36Sopenharmony_ci	adcl %ebx, %eax
8762306a36Sopenharmony_ci	movl 8(%esi), %ebx
8862306a36Sopenharmony_ci	adcl %ebx, %eax
8962306a36Sopenharmony_ci	movl 12(%esi), %ebx
9062306a36Sopenharmony_ci	adcl %ebx, %eax
9162306a36Sopenharmony_ci	movl 16(%esi), %ebx
9262306a36Sopenharmony_ci	adcl %ebx, %eax
9362306a36Sopenharmony_ci	movl 20(%esi), %ebx
9462306a36Sopenharmony_ci	adcl %ebx, %eax
9562306a36Sopenharmony_ci	movl 24(%esi), %ebx
9662306a36Sopenharmony_ci	adcl %ebx, %eax
9762306a36Sopenharmony_ci	movl 28(%esi), %ebx
9862306a36Sopenharmony_ci	adcl %ebx, %eax
9962306a36Sopenharmony_ci	lea 32(%esi), %esi
10062306a36Sopenharmony_ci	dec %ecx
10162306a36Sopenharmony_ci	jne 1b
10262306a36Sopenharmony_ci	adcl $0, %eax
10362306a36Sopenharmony_ci2:	movl %edx, %ecx
10462306a36Sopenharmony_ci	andl $0x1c, %edx
10562306a36Sopenharmony_ci	je 4f
10662306a36Sopenharmony_ci	shrl $2, %edx		# This clears CF
10762306a36Sopenharmony_ci3:	adcl (%esi), %eax
10862306a36Sopenharmony_ci	lea 4(%esi), %esi
10962306a36Sopenharmony_ci	dec %edx
11062306a36Sopenharmony_ci	jne 3b
11162306a36Sopenharmony_ci	adcl $0, %eax
11262306a36Sopenharmony_ci4:	andl $3, %ecx
11362306a36Sopenharmony_ci	jz 7f
11462306a36Sopenharmony_ci	cmpl $2, %ecx
11562306a36Sopenharmony_ci	jb 5f
11662306a36Sopenharmony_ci	movw (%esi),%cx
11762306a36Sopenharmony_ci	leal 2(%esi),%esi
11862306a36Sopenharmony_ci	je 6f
11962306a36Sopenharmony_ci	shll $16,%ecx
12062306a36Sopenharmony_ci5:	movb (%esi),%cl
12162306a36Sopenharmony_ci6:	addl %ecx,%eax
12262306a36Sopenharmony_ci	adcl $0, %eax
12362306a36Sopenharmony_ci7:
12462306a36Sopenharmony_ci	testb $1, 12(%esp)
12562306a36Sopenharmony_ci	jz 8f
12662306a36Sopenharmony_ci	roll $8, %eax
12762306a36Sopenharmony_ci8:
12862306a36Sopenharmony_ci	popl %ebx
12962306a36Sopenharmony_ci	popl %esi
13062306a36Sopenharmony_ci	RET
13162306a36Sopenharmony_ciSYM_FUNC_END(csum_partial)
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci#else
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci/* Version for PentiumII/PPro */
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ciSYM_FUNC_START(csum_partial)
13862306a36Sopenharmony_ci	pushl %esi
13962306a36Sopenharmony_ci	pushl %ebx
14062306a36Sopenharmony_ci	movl 20(%esp),%eax	# Function arg: unsigned int sum
14162306a36Sopenharmony_ci	movl 16(%esp),%ecx	# Function arg: int len
14262306a36Sopenharmony_ci	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	testl $3, %esi
14562306a36Sopenharmony_ci	jnz 25f
14662306a36Sopenharmony_ci10:
14762306a36Sopenharmony_ci	movl %ecx, %edx
14862306a36Sopenharmony_ci	movl %ecx, %ebx
14962306a36Sopenharmony_ci	andl $0x7c, %ebx
15062306a36Sopenharmony_ci	shrl $7, %ecx
15162306a36Sopenharmony_ci	addl %ebx,%esi
15262306a36Sopenharmony_ci	shrl $2, %ebx
15362306a36Sopenharmony_ci	negl %ebx
15462306a36Sopenharmony_ci	lea 45f(%ebx,%ebx,2), %ebx
15562306a36Sopenharmony_ci	testl %esi, %esi
15662306a36Sopenharmony_ci	JMP_NOSPEC ebx
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	# Handle 2-byte-aligned regions
15962306a36Sopenharmony_ci20:	addw (%esi), %ax
16062306a36Sopenharmony_ci	lea 2(%esi), %esi
16162306a36Sopenharmony_ci	adcl $0, %eax
16262306a36Sopenharmony_ci	jmp 10b
16362306a36Sopenharmony_ci25:
16462306a36Sopenharmony_ci	testl $1, %esi
16562306a36Sopenharmony_ci	jz 30f
16662306a36Sopenharmony_ci	# buf is odd
16762306a36Sopenharmony_ci	dec %ecx
16862306a36Sopenharmony_ci	jl 90f
16962306a36Sopenharmony_ci	movzbl (%esi), %ebx
17062306a36Sopenharmony_ci	addl %ebx, %eax
17162306a36Sopenharmony_ci	adcl $0, %eax
17262306a36Sopenharmony_ci	roll $8, %eax
17362306a36Sopenharmony_ci	inc %esi
17462306a36Sopenharmony_ci	testl $2, %esi
17562306a36Sopenharmony_ci	jz 10b
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci30:	subl $2, %ecx
17862306a36Sopenharmony_ci	ja 20b
17962306a36Sopenharmony_ci	je 32f
18062306a36Sopenharmony_ci	addl $2, %ecx
18162306a36Sopenharmony_ci	jz 80f
18262306a36Sopenharmony_ci	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
18362306a36Sopenharmony_ci	addl %ebx, %eax
18462306a36Sopenharmony_ci	adcl $0, %eax
18562306a36Sopenharmony_ci	jmp 80f
18662306a36Sopenharmony_ci32:
18762306a36Sopenharmony_ci	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
18862306a36Sopenharmony_ci	adcl $0, %eax
18962306a36Sopenharmony_ci	jmp 80f
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci40:
19262306a36Sopenharmony_ci	addl -128(%esi), %eax
19362306a36Sopenharmony_ci	adcl -124(%esi), %eax
19462306a36Sopenharmony_ci	adcl -120(%esi), %eax
19562306a36Sopenharmony_ci	adcl -116(%esi), %eax
19662306a36Sopenharmony_ci	adcl -112(%esi), %eax
19762306a36Sopenharmony_ci	adcl -108(%esi), %eax
19862306a36Sopenharmony_ci	adcl -104(%esi), %eax
19962306a36Sopenharmony_ci	adcl -100(%esi), %eax
20062306a36Sopenharmony_ci	adcl -96(%esi), %eax
20162306a36Sopenharmony_ci	adcl -92(%esi), %eax
20262306a36Sopenharmony_ci	adcl -88(%esi), %eax
20362306a36Sopenharmony_ci	adcl -84(%esi), %eax
20462306a36Sopenharmony_ci	adcl -80(%esi), %eax
20562306a36Sopenharmony_ci	adcl -76(%esi), %eax
20662306a36Sopenharmony_ci	adcl -72(%esi), %eax
20762306a36Sopenharmony_ci	adcl -68(%esi), %eax
20862306a36Sopenharmony_ci	adcl -64(%esi), %eax
20962306a36Sopenharmony_ci	adcl -60(%esi), %eax
21062306a36Sopenharmony_ci	adcl -56(%esi), %eax
21162306a36Sopenharmony_ci	adcl -52(%esi), %eax
21262306a36Sopenharmony_ci	adcl -48(%esi), %eax
21362306a36Sopenharmony_ci	adcl -44(%esi), %eax
21462306a36Sopenharmony_ci	adcl -40(%esi), %eax
21562306a36Sopenharmony_ci	adcl -36(%esi), %eax
21662306a36Sopenharmony_ci	adcl -32(%esi), %eax
21762306a36Sopenharmony_ci	adcl -28(%esi), %eax
21862306a36Sopenharmony_ci	adcl -24(%esi), %eax
21962306a36Sopenharmony_ci	adcl -20(%esi), %eax
22062306a36Sopenharmony_ci	adcl -16(%esi), %eax
22162306a36Sopenharmony_ci	adcl -12(%esi), %eax
22262306a36Sopenharmony_ci	adcl -8(%esi), %eax
22362306a36Sopenharmony_ci	adcl -4(%esi), %eax
22462306a36Sopenharmony_ci45:
22562306a36Sopenharmony_ci	lea 128(%esi), %esi
22662306a36Sopenharmony_ci	adcl $0, %eax
22762306a36Sopenharmony_ci	dec %ecx
22862306a36Sopenharmony_ci	jge 40b
22962306a36Sopenharmony_ci	movl %edx, %ecx
23062306a36Sopenharmony_ci50:	andl $3, %ecx
23162306a36Sopenharmony_ci	jz 80f
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	# Handle the last 1-3 bytes without jumping
23462306a36Sopenharmony_ci	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
23562306a36Sopenharmony_ci	movl $0xffffff,%ebx	# by the shll and shrl instructions
23662306a36Sopenharmony_ci	shll $3,%ecx
23762306a36Sopenharmony_ci	shrl %cl,%ebx
23862306a36Sopenharmony_ci	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
23962306a36Sopenharmony_ci	addl %ebx,%eax
24062306a36Sopenharmony_ci	adcl $0,%eax
24162306a36Sopenharmony_ci80:
24262306a36Sopenharmony_ci	testb $1, 12(%esp)
24362306a36Sopenharmony_ci	jz 90f
24462306a36Sopenharmony_ci	roll $8, %eax
24562306a36Sopenharmony_ci90:
24662306a36Sopenharmony_ci	popl %ebx
24762306a36Sopenharmony_ci	popl %esi
24862306a36Sopenharmony_ci	RET
24962306a36Sopenharmony_ciSYM_FUNC_END(csum_partial)
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci#endif
25262306a36Sopenharmony_ciEXPORT_SYMBOL(csum_partial)
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci/*
25562306a36Sopenharmony_ciunsigned int csum_partial_copy_generic (const char *src, char *dst,
25662306a36Sopenharmony_ci				  int len)
25762306a36Sopenharmony_ci */
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci/*
26062306a36Sopenharmony_ci * Copy from ds while checksumming, otherwise like csum_partial
26162306a36Sopenharmony_ci */
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci#define EXC(y...)						\
26462306a36Sopenharmony_ci	9999: y;						\
26562306a36Sopenharmony_ci	_ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX)
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci#define ARGBASE 16
27062306a36Sopenharmony_ci#define FP		12
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ciSYM_FUNC_START(csum_partial_copy_generic)
27362306a36Sopenharmony_ci	subl  $4,%esp
27462306a36Sopenharmony_ci	pushl %edi
27562306a36Sopenharmony_ci	pushl %esi
27662306a36Sopenharmony_ci	pushl %ebx
27762306a36Sopenharmony_ci	movl ARGBASE+12(%esp),%ecx	# len
27862306a36Sopenharmony_ci	movl ARGBASE+4(%esp),%esi	# src
27962306a36Sopenharmony_ci	movl ARGBASE+8(%esp),%edi	# dst
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	movl $-1, %eax			# sum
28262306a36Sopenharmony_ci	testl $2, %edi			# Check alignment.
28362306a36Sopenharmony_ci	jz 2f				# Jump if alignment is ok.
28462306a36Sopenharmony_ci	subl $2, %ecx			# Alignment uses up two bytes.
28562306a36Sopenharmony_ci	jae 1f				# Jump if we had at least two bytes.
28662306a36Sopenharmony_ci	addl $2, %ecx			# ecx was < 2.  Deal with it.
28762306a36Sopenharmony_ci	jmp 4f
28862306a36Sopenharmony_ciEXC(1:	movw (%esi), %bx	)
28962306a36Sopenharmony_ci	addl $2, %esi
29062306a36Sopenharmony_ciEXC(	movw %bx, (%edi)	)
29162306a36Sopenharmony_ci	addl $2, %edi
29262306a36Sopenharmony_ci	addw %bx, %ax
29362306a36Sopenharmony_ci	adcl $0, %eax
29462306a36Sopenharmony_ci2:
29562306a36Sopenharmony_ci	movl %ecx, FP(%esp)
29662306a36Sopenharmony_ci	shrl $5, %ecx
29762306a36Sopenharmony_ci	jz 2f
29862306a36Sopenharmony_ci	testl %esi, %esi		# what's wrong with clc?
29962306a36Sopenharmony_ciEXC(1:	movl (%esi), %ebx	)
30062306a36Sopenharmony_ciEXC(	movl 4(%esi), %edx	)
30162306a36Sopenharmony_ci	adcl %ebx, %eax
30262306a36Sopenharmony_ciEXC(	movl %ebx, (%edi)	)
30362306a36Sopenharmony_ci	adcl %edx, %eax
30462306a36Sopenharmony_ciEXC(	movl %edx, 4(%edi)	)
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ciEXC(	movl 8(%esi), %ebx	)
30762306a36Sopenharmony_ciEXC(	movl 12(%esi), %edx	)
30862306a36Sopenharmony_ci	adcl %ebx, %eax
30962306a36Sopenharmony_ciEXC(	movl %ebx, 8(%edi)	)
31062306a36Sopenharmony_ci	adcl %edx, %eax
31162306a36Sopenharmony_ciEXC(	movl %edx, 12(%edi)	)
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ciEXC(	movl 16(%esi), %ebx 	)
31462306a36Sopenharmony_ciEXC(	movl 20(%esi), %edx	)
31562306a36Sopenharmony_ci	adcl %ebx, %eax
31662306a36Sopenharmony_ciEXC(	movl %ebx, 16(%edi)	)
31762306a36Sopenharmony_ci	adcl %edx, %eax
31862306a36Sopenharmony_ciEXC(	movl %edx, 20(%edi)	)
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ciEXC(	movl 24(%esi), %ebx	)
32162306a36Sopenharmony_ciEXC(	movl 28(%esi), %edx	)
32262306a36Sopenharmony_ci	adcl %ebx, %eax
32362306a36Sopenharmony_ciEXC(	movl %ebx, 24(%edi)	)
32462306a36Sopenharmony_ci	adcl %edx, %eax
32562306a36Sopenharmony_ciEXC(	movl %edx, 28(%edi)	)
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	lea 32(%esi), %esi
32862306a36Sopenharmony_ci	lea 32(%edi), %edi
32962306a36Sopenharmony_ci	dec %ecx
33062306a36Sopenharmony_ci	jne 1b
33162306a36Sopenharmony_ci	adcl $0, %eax
33262306a36Sopenharmony_ci2:	movl FP(%esp), %edx
33362306a36Sopenharmony_ci	movl %edx, %ecx
33462306a36Sopenharmony_ci	andl $0x1c, %edx
33562306a36Sopenharmony_ci	je 4f
33662306a36Sopenharmony_ci	shrl $2, %edx			# This clears CF
33762306a36Sopenharmony_ciEXC(3:	movl (%esi), %ebx	)
33862306a36Sopenharmony_ci	adcl %ebx, %eax
33962306a36Sopenharmony_ciEXC(	movl %ebx, (%edi)	)
34062306a36Sopenharmony_ci	lea 4(%esi), %esi
34162306a36Sopenharmony_ci	lea 4(%edi), %edi
34262306a36Sopenharmony_ci	dec %edx
34362306a36Sopenharmony_ci	jne 3b
34462306a36Sopenharmony_ci	adcl $0, %eax
34562306a36Sopenharmony_ci4:	andl $3, %ecx
34662306a36Sopenharmony_ci	jz 7f
34762306a36Sopenharmony_ci	cmpl $2, %ecx
34862306a36Sopenharmony_ci	jb 5f
34962306a36Sopenharmony_ciEXC(	movw (%esi), %cx	)
35062306a36Sopenharmony_ci	leal 2(%esi), %esi
35162306a36Sopenharmony_ciEXC(	movw %cx, (%edi)	)
35262306a36Sopenharmony_ci	leal 2(%edi), %edi
35362306a36Sopenharmony_ci	je 6f
35462306a36Sopenharmony_ci	shll $16,%ecx
35562306a36Sopenharmony_ciEXC(5:	movb (%esi), %cl	)
35662306a36Sopenharmony_ciEXC(	movb %cl, (%edi)	)
35762306a36Sopenharmony_ci6:	addl %ecx, %eax
35862306a36Sopenharmony_ci	adcl $0, %eax
35962306a36Sopenharmony_ci7:
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	popl %ebx
36262306a36Sopenharmony_ci	popl %esi
36362306a36Sopenharmony_ci	popl %edi
36462306a36Sopenharmony_ci	popl %ecx			# equivalent to addl $4,%esp
36562306a36Sopenharmony_ci	RET
36662306a36Sopenharmony_ciSYM_FUNC_END(csum_partial_copy_generic)
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci#else
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci/* Version for PentiumII/PPro */
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci#define ROUND1(x) \
37362306a36Sopenharmony_ci	EXC(movl x(%esi), %ebx	)	;	\
37462306a36Sopenharmony_ci	addl %ebx, %eax			;	\
37562306a36Sopenharmony_ci	EXC(movl %ebx, x(%edi)	)	;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci#define ROUND(x) \
37862306a36Sopenharmony_ci	EXC(movl x(%esi), %ebx	)	;	\
37962306a36Sopenharmony_ci	adcl %ebx, %eax			;	\
38062306a36Sopenharmony_ci	EXC(movl %ebx, x(%edi)	)	;
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci#define ARGBASE 12
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ciSYM_FUNC_START(csum_partial_copy_generic)
38562306a36Sopenharmony_ci	pushl %ebx
38662306a36Sopenharmony_ci	pushl %edi
38762306a36Sopenharmony_ci	pushl %esi
38862306a36Sopenharmony_ci	movl ARGBASE+4(%esp),%esi	#src
38962306a36Sopenharmony_ci	movl ARGBASE+8(%esp),%edi	#dst
39062306a36Sopenharmony_ci	movl ARGBASE+12(%esp),%ecx	#len
39162306a36Sopenharmony_ci	movl $-1, %eax			#sum
39262306a36Sopenharmony_ci#	movl %ecx, %edx
39362306a36Sopenharmony_ci	movl %ecx, %ebx
39462306a36Sopenharmony_ci	movl %esi, %edx
39562306a36Sopenharmony_ci	shrl $6, %ecx
39662306a36Sopenharmony_ci	andl $0x3c, %ebx
39762306a36Sopenharmony_ci	negl %ebx
39862306a36Sopenharmony_ci	subl %ebx, %esi
39962306a36Sopenharmony_ci	subl %ebx, %edi
40062306a36Sopenharmony_ci	lea  -1(%esi),%edx
40162306a36Sopenharmony_ci	andl $-32,%edx
40262306a36Sopenharmony_ci	lea 3f(%ebx,%ebx), %ebx
40362306a36Sopenharmony_ci	testl %esi, %esi
40462306a36Sopenharmony_ci	JMP_NOSPEC ebx
40562306a36Sopenharmony_ci1:	addl $64,%esi
40662306a36Sopenharmony_ci	addl $64,%edi
40762306a36Sopenharmony_ci	EXC(movb -32(%edx),%bl)	; EXC(movb (%edx),%bl)
40862306a36Sopenharmony_ci	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
40962306a36Sopenharmony_ci	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
41062306a36Sopenharmony_ci	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
41162306a36Sopenharmony_ci	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)
41262306a36Sopenharmony_ci3:	adcl $0,%eax
41362306a36Sopenharmony_ci	addl $64, %edx
41462306a36Sopenharmony_ci	dec %ecx
41562306a36Sopenharmony_ci	jge 1b
41662306a36Sopenharmony_ci4:	movl ARGBASE+12(%esp),%edx	#len
41762306a36Sopenharmony_ci	andl $3, %edx
41862306a36Sopenharmony_ci	jz 7f
41962306a36Sopenharmony_ci	cmpl $2, %edx
42062306a36Sopenharmony_ci	jb 5f
42162306a36Sopenharmony_ciEXC(	movw (%esi), %dx         )
42262306a36Sopenharmony_ci	leal 2(%esi), %esi
42362306a36Sopenharmony_ciEXC(	movw %dx, (%edi)         )
42462306a36Sopenharmony_ci	leal 2(%edi), %edi
42562306a36Sopenharmony_ci	je 6f
42662306a36Sopenharmony_ci	shll $16,%edx
42762306a36Sopenharmony_ci5:
42862306a36Sopenharmony_ciEXC(	movb (%esi), %dl         )
42962306a36Sopenharmony_ciEXC(	movb %dl, (%edi)         )
43062306a36Sopenharmony_ci6:	addl %edx, %eax
43162306a36Sopenharmony_ci	adcl $0, %eax
43262306a36Sopenharmony_ci7:
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	popl %esi
43562306a36Sopenharmony_ci	popl %edi
43662306a36Sopenharmony_ci	popl %ebx
43762306a36Sopenharmony_ci	RET
43862306a36Sopenharmony_ciSYM_FUNC_END(csum_partial_copy_generic)
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci#undef ROUND
44162306a36Sopenharmony_ci#undef ROUND1
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci#endif
44462306a36Sopenharmony_ciEXPORT_SYMBOL(csum_partial_copy_generic)
445