162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 462306a36Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 562306a36Sopenharmony_ci * interface as the means of communication with the user level. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * IP/TCP/UDP checksumming routines 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Authors: Jorge Cwik, <jorge@laser.satlink.net> 1062306a36Sopenharmony_ci * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 1162306a36Sopenharmony_ci * Tom May, <ftom@netcom.com> 1262306a36Sopenharmony_ci * Pentium Pro/II routines: 1362306a36Sopenharmony_ci * Alexander Kjeldaas <astor@guardian.no> 1462306a36Sopenharmony_ci * Finn Arne Gangstad <finnag@guardian.no> 1562306a36Sopenharmony_ci * Lots of code moved from tcp.c and ip.c; see those files 1662306a36Sopenharmony_ci * for more names. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 1962306a36Sopenharmony_ci * handling. 2062306a36Sopenharmony_ci * Andi Kleen, add zeroing on error 2162306a36Sopenharmony_ci * converted to pure assembler 2262306a36Sopenharmony_ci */ 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci#include <linux/linkage.h> 2562306a36Sopenharmony_ci#include <asm/errno.h> 2662306a36Sopenharmony_ci#include <asm/asm.h> 2762306a36Sopenharmony_ci#include <asm/export.h> 2862306a36Sopenharmony_ci#include <asm/nospec-branch.h> 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci/* 3162306a36Sopenharmony_ci * computes a partial checksum, e.g. for TCP/UDP fragments 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci/* 3562306a36Sopenharmony_ciunsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 3662306a36Sopenharmony_ci */ 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci.text 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci /* 4362306a36Sopenharmony_ci * Experiments with Ethernet and SLIP connections show that buff 4462306a36Sopenharmony_ci * is aligned on either a 2-byte or 4-byte boundary. We get at 4562306a36Sopenharmony_ci * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 4662306a36Sopenharmony_ci * Fortunately, it is easy to convert 2-byte alignment to 4-byte 4762306a36Sopenharmony_ci * alignment for the unrolled loop. 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_ciSYM_FUNC_START(csum_partial) 5062306a36Sopenharmony_ci pushl %esi 5162306a36Sopenharmony_ci pushl %ebx 5262306a36Sopenharmony_ci movl 20(%esp),%eax # Function arg: unsigned int sum 5362306a36Sopenharmony_ci movl 16(%esp),%ecx # Function arg: int len 5462306a36Sopenharmony_ci movl 12(%esp),%esi # Function arg: unsigned char *buff 5562306a36Sopenharmony_ci testl $3, %esi # Check alignment. 5662306a36Sopenharmony_ci jz 2f # Jump if alignment is ok. 5762306a36Sopenharmony_ci testl $1, %esi # Check alignment. 5862306a36Sopenharmony_ci jz 10f # Jump if alignment is boundary of 2 bytes. 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci # buf is odd 6162306a36Sopenharmony_ci dec %ecx 6262306a36Sopenharmony_ci jl 8f 6362306a36Sopenharmony_ci movzbl (%esi), %ebx 6462306a36Sopenharmony_ci adcl %ebx, %eax 6562306a36Sopenharmony_ci roll $8, %eax 6662306a36Sopenharmony_ci inc %esi 6762306a36Sopenharmony_ci testl $2, %esi 6862306a36Sopenharmony_ci jz 2f 6962306a36Sopenharmony_ci10: 7062306a36Sopenharmony_ci subl $2, %ecx # Alignment uses up two bytes. 7162306a36Sopenharmony_ci jae 1f # Jump if we had at least two bytes. 7262306a36Sopenharmony_ci addl $2, %ecx # ecx was < 2. Deal with it. 7362306a36Sopenharmony_ci jmp 4f 7462306a36Sopenharmony_ci1: movw (%esi), %bx 7562306a36Sopenharmony_ci addl $2, %esi 7662306a36Sopenharmony_ci addw %bx, %ax 7762306a36Sopenharmony_ci adcl $0, %eax 7862306a36Sopenharmony_ci2: 7962306a36Sopenharmony_ci movl %ecx, %edx 8062306a36Sopenharmony_ci shrl $5, %ecx 8162306a36Sopenharmony_ci jz 2f 8262306a36Sopenharmony_ci testl %esi, %esi 8362306a36Sopenharmony_ci1: movl (%esi), %ebx 8462306a36Sopenharmony_ci adcl %ebx, %eax 8562306a36Sopenharmony_ci movl 4(%esi), %ebx 8662306a36Sopenharmony_ci adcl %ebx, %eax 8762306a36Sopenharmony_ci movl 8(%esi), %ebx 8862306a36Sopenharmony_ci adcl %ebx, %eax 8962306a36Sopenharmony_ci movl 12(%esi), %ebx 9062306a36Sopenharmony_ci adcl %ebx, %eax 9162306a36Sopenharmony_ci movl 16(%esi), %ebx 9262306a36Sopenharmony_ci adcl %ebx, %eax 9362306a36Sopenharmony_ci movl 20(%esi), %ebx 9462306a36Sopenharmony_ci adcl %ebx, %eax 9562306a36Sopenharmony_ci movl 24(%esi), %ebx 9662306a36Sopenharmony_ci adcl %ebx, %eax 9762306a36Sopenharmony_ci movl 28(%esi), %ebx 9862306a36Sopenharmony_ci adcl %ebx, %eax 9962306a36Sopenharmony_ci lea 32(%esi), %esi 10062306a36Sopenharmony_ci dec %ecx 10162306a36Sopenharmony_ci jne 1b 10262306a36Sopenharmony_ci adcl $0, %eax 10362306a36Sopenharmony_ci2: movl %edx, %ecx 10462306a36Sopenharmony_ci andl $0x1c, %edx 10562306a36Sopenharmony_ci je 4f 10662306a36Sopenharmony_ci shrl $2, %edx # This clears CF 10762306a36Sopenharmony_ci3: adcl (%esi), %eax 10862306a36Sopenharmony_ci lea 4(%esi), %esi 10962306a36Sopenharmony_ci dec %edx 11062306a36Sopenharmony_ci jne 3b 11162306a36Sopenharmony_ci adcl $0, %eax 11262306a36Sopenharmony_ci4: andl $3, %ecx 11362306a36Sopenharmony_ci jz 7f 11462306a36Sopenharmony_ci cmpl $2, %ecx 11562306a36Sopenharmony_ci jb 5f 11662306a36Sopenharmony_ci movw (%esi),%cx 11762306a36Sopenharmony_ci leal 2(%esi),%esi 11862306a36Sopenharmony_ci je 6f 11962306a36Sopenharmony_ci shll $16,%ecx 12062306a36Sopenharmony_ci5: movb (%esi),%cl 12162306a36Sopenharmony_ci6: addl %ecx,%eax 12262306a36Sopenharmony_ci adcl $0, %eax 12362306a36Sopenharmony_ci7: 12462306a36Sopenharmony_ci testb $1, 12(%esp) 12562306a36Sopenharmony_ci jz 8f 12662306a36Sopenharmony_ci roll $8, %eax 12762306a36Sopenharmony_ci8: 12862306a36Sopenharmony_ci popl %ebx 12962306a36Sopenharmony_ci popl %esi 13062306a36Sopenharmony_ci RET 13162306a36Sopenharmony_ciSYM_FUNC_END(csum_partial) 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci#else 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci/* Version for PentiumII/PPro */ 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ciSYM_FUNC_START(csum_partial) 13862306a36Sopenharmony_ci pushl %esi 13962306a36Sopenharmony_ci pushl %ebx 14062306a36Sopenharmony_ci movl 20(%esp),%eax # Function arg: unsigned int sum 14162306a36Sopenharmony_ci movl 16(%esp),%ecx # Function arg: int len 14262306a36Sopenharmony_ci movl 12(%esp),%esi # Function arg: const unsigned char *buf 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci testl $3, %esi 14562306a36Sopenharmony_ci jnz 25f 14662306a36Sopenharmony_ci10: 14762306a36Sopenharmony_ci movl %ecx, %edx 14862306a36Sopenharmony_ci movl %ecx, %ebx 14962306a36Sopenharmony_ci andl $0x7c, %ebx 15062306a36Sopenharmony_ci shrl $7, %ecx 15162306a36Sopenharmony_ci addl %ebx,%esi 15262306a36Sopenharmony_ci shrl $2, %ebx 15362306a36Sopenharmony_ci negl %ebx 15462306a36Sopenharmony_ci lea 45f(%ebx,%ebx,2), %ebx 15562306a36Sopenharmony_ci testl %esi, %esi 15662306a36Sopenharmony_ci JMP_NOSPEC ebx 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci # Handle 2-byte-aligned regions 15962306a36Sopenharmony_ci20: addw (%esi), %ax 16062306a36Sopenharmony_ci lea 2(%esi), %esi 16162306a36Sopenharmony_ci adcl $0, %eax 16262306a36Sopenharmony_ci jmp 10b 16362306a36Sopenharmony_ci25: 16462306a36Sopenharmony_ci testl $1, %esi 16562306a36Sopenharmony_ci jz 30f 16662306a36Sopenharmony_ci # buf is odd 16762306a36Sopenharmony_ci dec %ecx 16862306a36Sopenharmony_ci jl 90f 16962306a36Sopenharmony_ci movzbl (%esi), %ebx 17062306a36Sopenharmony_ci addl %ebx, %eax 17162306a36Sopenharmony_ci adcl $0, %eax 17262306a36Sopenharmony_ci roll $8, %eax 17362306a36Sopenharmony_ci inc %esi 17462306a36Sopenharmony_ci testl $2, %esi 17562306a36Sopenharmony_ci jz 10b 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci30: subl $2, %ecx 17862306a36Sopenharmony_ci ja 20b 17962306a36Sopenharmony_ci je 32f 18062306a36Sopenharmony_ci addl $2, %ecx 18162306a36Sopenharmony_ci jz 80f 18262306a36Sopenharmony_ci movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 18362306a36Sopenharmony_ci addl %ebx, %eax 18462306a36Sopenharmony_ci adcl $0, %eax 18562306a36Sopenharmony_ci jmp 80f 18662306a36Sopenharmony_ci32: 18762306a36Sopenharmony_ci addw (%esi), %ax # csumming 2 bytes, 2-aligned 18862306a36Sopenharmony_ci adcl $0, %eax 18962306a36Sopenharmony_ci jmp 80f 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci40: 19262306a36Sopenharmony_ci addl -128(%esi), %eax 19362306a36Sopenharmony_ci adcl -124(%esi), %eax 19462306a36Sopenharmony_ci adcl -120(%esi), %eax 19562306a36Sopenharmony_ci adcl -116(%esi), %eax 19662306a36Sopenharmony_ci adcl -112(%esi), %eax 19762306a36Sopenharmony_ci adcl -108(%esi), %eax 19862306a36Sopenharmony_ci adcl -104(%esi), %eax 19962306a36Sopenharmony_ci adcl -100(%esi), %eax 20062306a36Sopenharmony_ci adcl -96(%esi), %eax 20162306a36Sopenharmony_ci adcl -92(%esi), %eax 20262306a36Sopenharmony_ci adcl -88(%esi), %eax 20362306a36Sopenharmony_ci adcl -84(%esi), %eax 20462306a36Sopenharmony_ci adcl -80(%esi), %eax 20562306a36Sopenharmony_ci adcl -76(%esi), %eax 20662306a36Sopenharmony_ci adcl -72(%esi), %eax 20762306a36Sopenharmony_ci adcl -68(%esi), %eax 20862306a36Sopenharmony_ci adcl -64(%esi), %eax 20962306a36Sopenharmony_ci adcl -60(%esi), %eax 21062306a36Sopenharmony_ci adcl -56(%esi), %eax 21162306a36Sopenharmony_ci adcl -52(%esi), %eax 21262306a36Sopenharmony_ci adcl -48(%esi), %eax 21362306a36Sopenharmony_ci adcl -44(%esi), %eax 21462306a36Sopenharmony_ci adcl -40(%esi), %eax 21562306a36Sopenharmony_ci adcl -36(%esi), %eax 21662306a36Sopenharmony_ci adcl -32(%esi), %eax 21762306a36Sopenharmony_ci adcl -28(%esi), %eax 21862306a36Sopenharmony_ci adcl -24(%esi), %eax 21962306a36Sopenharmony_ci adcl -20(%esi), %eax 22062306a36Sopenharmony_ci adcl -16(%esi), %eax 22162306a36Sopenharmony_ci adcl -12(%esi), %eax 22262306a36Sopenharmony_ci adcl -8(%esi), %eax 22362306a36Sopenharmony_ci adcl -4(%esi), %eax 22462306a36Sopenharmony_ci45: 22562306a36Sopenharmony_ci lea 128(%esi), %esi 22662306a36Sopenharmony_ci adcl $0, %eax 22762306a36Sopenharmony_ci dec %ecx 22862306a36Sopenharmony_ci jge 40b 22962306a36Sopenharmony_ci movl %edx, %ecx 23062306a36Sopenharmony_ci50: andl $3, %ecx 23162306a36Sopenharmony_ci jz 80f 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci # Handle the last 1-3 bytes without jumping 23462306a36Sopenharmony_ci notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 23562306a36Sopenharmony_ci movl $0xffffff,%ebx # by the shll and shrl instructions 23662306a36Sopenharmony_ci shll $3,%ecx 23762306a36Sopenharmony_ci shrl %cl,%ebx 23862306a36Sopenharmony_ci andl -128(%esi),%ebx # esi is 4-aligned so should be ok 23962306a36Sopenharmony_ci addl %ebx,%eax 24062306a36Sopenharmony_ci adcl $0,%eax 24162306a36Sopenharmony_ci80: 24262306a36Sopenharmony_ci testb $1, 12(%esp) 24362306a36Sopenharmony_ci jz 90f 24462306a36Sopenharmony_ci roll $8, %eax 24562306a36Sopenharmony_ci90: 24662306a36Sopenharmony_ci popl %ebx 24762306a36Sopenharmony_ci popl %esi 24862306a36Sopenharmony_ci RET 24962306a36Sopenharmony_ciSYM_FUNC_END(csum_partial) 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci#endif 25262306a36Sopenharmony_ciEXPORT_SYMBOL(csum_partial) 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci/* 25562306a36Sopenharmony_ciunsigned int csum_partial_copy_generic (const char *src, char *dst, 25662306a36Sopenharmony_ci int len) 25762306a36Sopenharmony_ci */ 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci/* 26062306a36Sopenharmony_ci * Copy from ds while checksumming, otherwise like csum_partial 26162306a36Sopenharmony_ci */ 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci#define EXC(y...) \ 26462306a36Sopenharmony_ci 9999: y; \ 26562306a36Sopenharmony_ci _ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX) 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci#define ARGBASE 16 27062306a36Sopenharmony_ci#define FP 12 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ciSYM_FUNC_START(csum_partial_copy_generic) 27362306a36Sopenharmony_ci subl $4,%esp 27462306a36Sopenharmony_ci pushl %edi 27562306a36Sopenharmony_ci pushl %esi 27662306a36Sopenharmony_ci pushl %ebx 27762306a36Sopenharmony_ci movl ARGBASE+12(%esp),%ecx # len 27862306a36Sopenharmony_ci movl ARGBASE+4(%esp),%esi # src 27962306a36Sopenharmony_ci movl ARGBASE+8(%esp),%edi # dst 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci movl $-1, %eax # sum 28262306a36Sopenharmony_ci testl $2, %edi # Check alignment. 28362306a36Sopenharmony_ci jz 2f # Jump if alignment is ok. 28462306a36Sopenharmony_ci subl $2, %ecx # Alignment uses up two bytes. 28562306a36Sopenharmony_ci jae 1f # Jump if we had at least two bytes. 28662306a36Sopenharmony_ci addl $2, %ecx # ecx was < 2. Deal with it. 28762306a36Sopenharmony_ci jmp 4f 28862306a36Sopenharmony_ciEXC(1: movw (%esi), %bx ) 28962306a36Sopenharmony_ci addl $2, %esi 29062306a36Sopenharmony_ciEXC( movw %bx, (%edi) ) 29162306a36Sopenharmony_ci addl $2, %edi 29262306a36Sopenharmony_ci addw %bx, %ax 29362306a36Sopenharmony_ci adcl $0, %eax 29462306a36Sopenharmony_ci2: 29562306a36Sopenharmony_ci movl %ecx, FP(%esp) 29662306a36Sopenharmony_ci shrl $5, %ecx 29762306a36Sopenharmony_ci jz 2f 29862306a36Sopenharmony_ci testl %esi, %esi # what's wrong with clc? 29962306a36Sopenharmony_ciEXC(1: movl (%esi), %ebx ) 30062306a36Sopenharmony_ciEXC( movl 4(%esi), %edx ) 30162306a36Sopenharmony_ci adcl %ebx, %eax 30262306a36Sopenharmony_ciEXC( movl %ebx, (%edi) ) 30362306a36Sopenharmony_ci adcl %edx, %eax 30462306a36Sopenharmony_ciEXC( movl %edx, 4(%edi) ) 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ciEXC( movl 8(%esi), %ebx ) 30762306a36Sopenharmony_ciEXC( movl 12(%esi), %edx ) 30862306a36Sopenharmony_ci adcl %ebx, %eax 30962306a36Sopenharmony_ciEXC( movl %ebx, 8(%edi) ) 31062306a36Sopenharmony_ci adcl %edx, %eax 31162306a36Sopenharmony_ciEXC( movl %edx, 12(%edi) ) 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ciEXC( movl 16(%esi), %ebx ) 31462306a36Sopenharmony_ciEXC( movl 20(%esi), %edx ) 31562306a36Sopenharmony_ci adcl %ebx, %eax 31662306a36Sopenharmony_ciEXC( movl %ebx, 16(%edi) ) 31762306a36Sopenharmony_ci adcl %edx, %eax 31862306a36Sopenharmony_ciEXC( movl %edx, 20(%edi) ) 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ciEXC( movl 24(%esi), %ebx ) 32162306a36Sopenharmony_ciEXC( movl 28(%esi), %edx ) 32262306a36Sopenharmony_ci adcl %ebx, %eax 32362306a36Sopenharmony_ciEXC( movl %ebx, 24(%edi) ) 32462306a36Sopenharmony_ci adcl %edx, %eax 32562306a36Sopenharmony_ciEXC( movl %edx, 28(%edi) ) 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci lea 32(%esi), %esi 32862306a36Sopenharmony_ci lea 32(%edi), %edi 32962306a36Sopenharmony_ci dec %ecx 33062306a36Sopenharmony_ci jne 1b 33162306a36Sopenharmony_ci adcl $0, %eax 33262306a36Sopenharmony_ci2: movl FP(%esp), %edx 33362306a36Sopenharmony_ci movl %edx, %ecx 33462306a36Sopenharmony_ci andl $0x1c, %edx 33562306a36Sopenharmony_ci je 4f 33662306a36Sopenharmony_ci shrl $2, %edx # This clears CF 33762306a36Sopenharmony_ciEXC(3: movl (%esi), %ebx ) 33862306a36Sopenharmony_ci adcl %ebx, %eax 33962306a36Sopenharmony_ciEXC( movl %ebx, (%edi) ) 34062306a36Sopenharmony_ci lea 4(%esi), %esi 34162306a36Sopenharmony_ci lea 4(%edi), %edi 34262306a36Sopenharmony_ci dec %edx 34362306a36Sopenharmony_ci jne 3b 34462306a36Sopenharmony_ci adcl $0, %eax 34562306a36Sopenharmony_ci4: andl $3, %ecx 34662306a36Sopenharmony_ci jz 7f 34762306a36Sopenharmony_ci cmpl $2, %ecx 34862306a36Sopenharmony_ci jb 5f 34962306a36Sopenharmony_ciEXC( movw (%esi), %cx ) 35062306a36Sopenharmony_ci leal 2(%esi), %esi 35162306a36Sopenharmony_ciEXC( movw %cx, (%edi) ) 35262306a36Sopenharmony_ci leal 2(%edi), %edi 35362306a36Sopenharmony_ci je 6f 35462306a36Sopenharmony_ci shll $16,%ecx 35562306a36Sopenharmony_ciEXC(5: movb (%esi), %cl ) 35662306a36Sopenharmony_ciEXC( movb %cl, (%edi) ) 35762306a36Sopenharmony_ci6: addl %ecx, %eax 35862306a36Sopenharmony_ci adcl $0, %eax 35962306a36Sopenharmony_ci7: 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci popl %ebx 36262306a36Sopenharmony_ci popl %esi 36362306a36Sopenharmony_ci popl %edi 36462306a36Sopenharmony_ci popl %ecx # equivalent to addl $4,%esp 36562306a36Sopenharmony_ci RET 36662306a36Sopenharmony_ciSYM_FUNC_END(csum_partial_copy_generic) 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci#else 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci/* Version for PentiumII/PPro */ 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci#define ROUND1(x) \ 37362306a36Sopenharmony_ci EXC(movl x(%esi), %ebx ) ; \ 37462306a36Sopenharmony_ci addl %ebx, %eax ; \ 37562306a36Sopenharmony_ci EXC(movl %ebx, x(%edi) ) ; 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci#define ROUND(x) \ 37862306a36Sopenharmony_ci EXC(movl x(%esi), %ebx ) ; \ 37962306a36Sopenharmony_ci adcl %ebx, %eax ; \ 38062306a36Sopenharmony_ci EXC(movl %ebx, x(%edi) ) ; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci#define ARGBASE 12 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ciSYM_FUNC_START(csum_partial_copy_generic) 38562306a36Sopenharmony_ci pushl %ebx 38662306a36Sopenharmony_ci pushl %edi 38762306a36Sopenharmony_ci pushl %esi 38862306a36Sopenharmony_ci movl ARGBASE+4(%esp),%esi #src 38962306a36Sopenharmony_ci movl ARGBASE+8(%esp),%edi #dst 39062306a36Sopenharmony_ci movl ARGBASE+12(%esp),%ecx #len 39162306a36Sopenharmony_ci movl $-1, %eax #sum 39262306a36Sopenharmony_ci# movl %ecx, %edx 39362306a36Sopenharmony_ci movl %ecx, %ebx 39462306a36Sopenharmony_ci movl %esi, %edx 39562306a36Sopenharmony_ci shrl $6, %ecx 39662306a36Sopenharmony_ci andl $0x3c, %ebx 39762306a36Sopenharmony_ci negl %ebx 39862306a36Sopenharmony_ci subl %ebx, %esi 39962306a36Sopenharmony_ci subl %ebx, %edi 40062306a36Sopenharmony_ci lea -1(%esi),%edx 40162306a36Sopenharmony_ci andl $-32,%edx 40262306a36Sopenharmony_ci lea 3f(%ebx,%ebx), %ebx 40362306a36Sopenharmony_ci testl %esi, %esi 40462306a36Sopenharmony_ci JMP_NOSPEC ebx 40562306a36Sopenharmony_ci1: addl $64,%esi 40662306a36Sopenharmony_ci addl $64,%edi 40762306a36Sopenharmony_ci EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl) 40862306a36Sopenharmony_ci ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) 40962306a36Sopenharmony_ci ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) 41062306a36Sopenharmony_ci ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) 41162306a36Sopenharmony_ci ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) 41262306a36Sopenharmony_ci3: adcl $0,%eax 41362306a36Sopenharmony_ci addl $64, %edx 41462306a36Sopenharmony_ci dec %ecx 41562306a36Sopenharmony_ci jge 1b 41662306a36Sopenharmony_ci4: movl ARGBASE+12(%esp),%edx #len 41762306a36Sopenharmony_ci andl $3, %edx 41862306a36Sopenharmony_ci jz 7f 41962306a36Sopenharmony_ci cmpl $2, %edx 42062306a36Sopenharmony_ci jb 5f 42162306a36Sopenharmony_ciEXC( movw (%esi), %dx ) 42262306a36Sopenharmony_ci leal 2(%esi), %esi 42362306a36Sopenharmony_ciEXC( movw %dx, (%edi) ) 42462306a36Sopenharmony_ci leal 2(%edi), %edi 42562306a36Sopenharmony_ci je 6f 42662306a36Sopenharmony_ci shll $16,%edx 42762306a36Sopenharmony_ci5: 42862306a36Sopenharmony_ciEXC( movb (%esi), %dl ) 42962306a36Sopenharmony_ciEXC( movb %dl, (%edi) ) 43062306a36Sopenharmony_ci6: addl %edx, %eax 43162306a36Sopenharmony_ci adcl $0, %eax 43262306a36Sopenharmony_ci7: 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci popl %esi 43562306a36Sopenharmony_ci popl %edi 43662306a36Sopenharmony_ci popl %ebx 43762306a36Sopenharmony_ci RET 43862306a36Sopenharmony_ciSYM_FUNC_END(csum_partial_copy_generic) 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci#undef ROUND 44162306a36Sopenharmony_ci#undef ROUND1 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci#endif 44462306a36Sopenharmony_ciEXPORT_SYMBOL(csum_partial_copy_generic) 445