18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 48c2ecf20Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 58c2ecf20Sopenharmony_ci * interface as the means of communication with the user level. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * IP/TCP/UDP checksumming routines 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Authors: Jorge Cwik, <jorge@laser.satlink.net> 108c2ecf20Sopenharmony_ci * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 118c2ecf20Sopenharmony_ci * Tom May, <ftom@netcom.com> 128c2ecf20Sopenharmony_ci * Pentium Pro/II routines: 138c2ecf20Sopenharmony_ci * Alexander Kjeldaas <astor@guardian.no> 148c2ecf20Sopenharmony_ci * Finn Arne Gangstad <finnag@guardian.no> 158c2ecf20Sopenharmony_ci * Lots of code moved from tcp.c and ip.c; see those files 168c2ecf20Sopenharmony_ci * for more names. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 198c2ecf20Sopenharmony_ci * handling. 208c2ecf20Sopenharmony_ci * Andi Kleen, add zeroing on error 218c2ecf20Sopenharmony_ci * converted to pure assembler 228c2ecf20Sopenharmony_ci */ 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci#include <asm/errno.h> 258c2ecf20Sopenharmony_ci#include <asm/asm.h> 268c2ecf20Sopenharmony_ci#include <asm/export.h> 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci/* 298c2ecf20Sopenharmony_ci * computes a partial checksum, e.g. for TCP/UDP fragments 308c2ecf20Sopenharmony_ci */ 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci/* 338c2ecf20Sopenharmony_ciunsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) 348c2ecf20Sopenharmony_ci */ 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci.text 378c2ecf20Sopenharmony_ci.align 4 388c2ecf20Sopenharmony_ci.globl csum_partial 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci#ifndef CONFIG_X86_USE_PPRO_CHECKSUM 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci /* 438c2ecf20Sopenharmony_ci * Experiments with Ethernet and SLIP connections show that buff 448c2ecf20Sopenharmony_ci * is aligned on either a 2-byte or 4-byte boundary. We get at 458c2ecf20Sopenharmony_ci * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 468c2ecf20Sopenharmony_ci * Fortunately, it is easy to convert 2-byte alignment to 4-byte 478c2ecf20Sopenharmony_ci * alignment for the unrolled loop. 488c2ecf20Sopenharmony_ci */ 498c2ecf20Sopenharmony_cicsum_partial: 508c2ecf20Sopenharmony_ci pushl %esi 518c2ecf20Sopenharmony_ci pushl %ebx 528c2ecf20Sopenharmony_ci movl 20(%esp),%eax # Function arg: unsigned int sum 538c2ecf20Sopenharmony_ci movl 16(%esp),%ecx # Function arg: int len 548c2ecf20Sopenharmony_ci movl 12(%esp),%esi # Function arg: unsigned char *buff 558c2ecf20Sopenharmony_ci testl $2, %esi # Check alignment. 568c2ecf20Sopenharmony_ci jz 2f # Jump if alignment is ok. 578c2ecf20Sopenharmony_ci subl $2, %ecx # Alignment uses up two bytes. 588c2ecf20Sopenharmony_ci jae 1f # Jump if we had at least two bytes. 598c2ecf20Sopenharmony_ci addl $2, %ecx # ecx was < 2. Deal with it. 608c2ecf20Sopenharmony_ci jmp 4f 618c2ecf20Sopenharmony_ci1: movw (%esi), %bx 628c2ecf20Sopenharmony_ci addl $2, %esi 638c2ecf20Sopenharmony_ci addw %bx, %ax 648c2ecf20Sopenharmony_ci adcl $0, %eax 658c2ecf20Sopenharmony_ci2: 668c2ecf20Sopenharmony_ci movl %ecx, %edx 678c2ecf20Sopenharmony_ci shrl $5, %ecx 688c2ecf20Sopenharmony_ci jz 2f 698c2ecf20Sopenharmony_ci testl %esi, %esi 708c2ecf20Sopenharmony_ci1: movl (%esi), %ebx 718c2ecf20Sopenharmony_ci adcl %ebx, %eax 728c2ecf20Sopenharmony_ci movl 4(%esi), %ebx 738c2ecf20Sopenharmony_ci adcl %ebx, %eax 748c2ecf20Sopenharmony_ci movl 8(%esi), %ebx 758c2ecf20Sopenharmony_ci adcl %ebx, %eax 768c2ecf20Sopenharmony_ci movl 12(%esi), %ebx 778c2ecf20Sopenharmony_ci adcl %ebx, %eax 788c2ecf20Sopenharmony_ci movl 16(%esi), %ebx 798c2ecf20Sopenharmony_ci adcl %ebx, %eax 808c2ecf20Sopenharmony_ci movl 20(%esi), %ebx 818c2ecf20Sopenharmony_ci adcl %ebx, %eax 828c2ecf20Sopenharmony_ci movl 24(%esi), %ebx 838c2ecf20Sopenharmony_ci adcl %ebx, %eax 848c2ecf20Sopenharmony_ci movl 28(%esi), %ebx 858c2ecf20Sopenharmony_ci adcl %ebx, %eax 868c2ecf20Sopenharmony_ci lea 32(%esi), %esi 878c2ecf20Sopenharmony_ci dec %ecx 888c2ecf20Sopenharmony_ci jne 1b 898c2ecf20Sopenharmony_ci adcl $0, %eax 908c2ecf20Sopenharmony_ci2: movl %edx, %ecx 918c2ecf20Sopenharmony_ci andl $0x1c, %edx 928c2ecf20Sopenharmony_ci je 4f 938c2ecf20Sopenharmony_ci shrl $2, %edx # This clears CF 948c2ecf20Sopenharmony_ci3: adcl (%esi), %eax 958c2ecf20Sopenharmony_ci lea 4(%esi), %esi 968c2ecf20Sopenharmony_ci dec %edx 978c2ecf20Sopenharmony_ci jne 3b 988c2ecf20Sopenharmony_ci adcl $0, %eax 998c2ecf20Sopenharmony_ci4: andl $3, %ecx 1008c2ecf20Sopenharmony_ci jz 7f 1018c2ecf20Sopenharmony_ci cmpl $2, %ecx 1028c2ecf20Sopenharmony_ci jb 5f 1038c2ecf20Sopenharmony_ci movw (%esi),%cx 1048c2ecf20Sopenharmony_ci leal 2(%esi),%esi 1058c2ecf20Sopenharmony_ci je 6f 1068c2ecf20Sopenharmony_ci shll $16,%ecx 1078c2ecf20Sopenharmony_ci5: movb (%esi),%cl 1088c2ecf20Sopenharmony_ci6: addl %ecx,%eax 1098c2ecf20Sopenharmony_ci adcl $0, %eax 1108c2ecf20Sopenharmony_ci7: 1118c2ecf20Sopenharmony_ci popl %ebx 1128c2ecf20Sopenharmony_ci popl %esi 1138c2ecf20Sopenharmony_ci RET 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci#else 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci/* Version for PentiumII/PPro */ 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_cicsum_partial: 1208c2ecf20Sopenharmony_ci pushl %esi 1218c2ecf20Sopenharmony_ci pushl %ebx 1228c2ecf20Sopenharmony_ci movl 20(%esp),%eax # Function arg: unsigned int sum 1238c2ecf20Sopenharmony_ci movl 16(%esp),%ecx # Function arg: int len 1248c2ecf20Sopenharmony_ci movl 12(%esp),%esi # Function arg: const unsigned char *buf 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci testl $2, %esi 1278c2ecf20Sopenharmony_ci jnz 30f 1288c2ecf20Sopenharmony_ci10: 1298c2ecf20Sopenharmony_ci movl %ecx, %edx 1308c2ecf20Sopenharmony_ci movl %ecx, %ebx 1318c2ecf20Sopenharmony_ci andl $0x7c, %ebx 1328c2ecf20Sopenharmony_ci shrl $7, %ecx 1338c2ecf20Sopenharmony_ci addl %ebx,%esi 1348c2ecf20Sopenharmony_ci shrl $2, %ebx 1358c2ecf20Sopenharmony_ci negl %ebx 1368c2ecf20Sopenharmony_ci lea 45f(%ebx,%ebx,2), %ebx 1378c2ecf20Sopenharmony_ci testl %esi, %esi 1388c2ecf20Sopenharmony_ci jmp *%ebx 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci # Handle 2-byte-aligned regions 1418c2ecf20Sopenharmony_ci20: addw (%esi), %ax 1428c2ecf20Sopenharmony_ci lea 2(%esi), %esi 1438c2ecf20Sopenharmony_ci adcl $0, %eax 1448c2ecf20Sopenharmony_ci jmp 10b 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci30: subl $2, %ecx 1478c2ecf20Sopenharmony_ci ja 20b 1488c2ecf20Sopenharmony_ci je 32f 1498c2ecf20Sopenharmony_ci movzbl (%esi),%ebx # csumming 1 byte, 2-aligned 1508c2ecf20Sopenharmony_ci addl %ebx, %eax 1518c2ecf20Sopenharmony_ci adcl $0, %eax 1528c2ecf20Sopenharmony_ci jmp 80f 1538c2ecf20Sopenharmony_ci32: 1548c2ecf20Sopenharmony_ci addw (%esi), %ax # csumming 2 bytes, 2-aligned 1558c2ecf20Sopenharmony_ci adcl $0, %eax 1568c2ecf20Sopenharmony_ci jmp 80f 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci40: 1598c2ecf20Sopenharmony_ci addl -128(%esi), %eax 1608c2ecf20Sopenharmony_ci adcl -124(%esi), %eax 1618c2ecf20Sopenharmony_ci adcl -120(%esi), %eax 1628c2ecf20Sopenharmony_ci adcl -116(%esi), %eax 1638c2ecf20Sopenharmony_ci adcl -112(%esi), %eax 1648c2ecf20Sopenharmony_ci adcl -108(%esi), %eax 1658c2ecf20Sopenharmony_ci adcl -104(%esi), %eax 1668c2ecf20Sopenharmony_ci adcl -100(%esi), %eax 1678c2ecf20Sopenharmony_ci adcl -96(%esi), %eax 1688c2ecf20Sopenharmony_ci adcl -92(%esi), %eax 1698c2ecf20Sopenharmony_ci adcl -88(%esi), %eax 1708c2ecf20Sopenharmony_ci adcl -84(%esi), %eax 1718c2ecf20Sopenharmony_ci adcl -80(%esi), %eax 1728c2ecf20Sopenharmony_ci adcl -76(%esi), %eax 1738c2ecf20Sopenharmony_ci adcl -72(%esi), %eax 1748c2ecf20Sopenharmony_ci adcl -68(%esi), %eax 1758c2ecf20Sopenharmony_ci adcl -64(%esi), %eax 1768c2ecf20Sopenharmony_ci adcl -60(%esi), %eax 1778c2ecf20Sopenharmony_ci adcl -56(%esi), %eax 1788c2ecf20Sopenharmony_ci adcl -52(%esi), %eax 1798c2ecf20Sopenharmony_ci adcl -48(%esi), %eax 1808c2ecf20Sopenharmony_ci adcl -44(%esi), %eax 1818c2ecf20Sopenharmony_ci adcl -40(%esi), %eax 1828c2ecf20Sopenharmony_ci adcl -36(%esi), %eax 1838c2ecf20Sopenharmony_ci adcl -32(%esi), %eax 1848c2ecf20Sopenharmony_ci adcl -28(%esi), %eax 1858c2ecf20Sopenharmony_ci adcl -24(%esi), %eax 1868c2ecf20Sopenharmony_ci adcl -20(%esi), %eax 1878c2ecf20Sopenharmony_ci adcl -16(%esi), %eax 1888c2ecf20Sopenharmony_ci adcl -12(%esi), %eax 1898c2ecf20Sopenharmony_ci adcl -8(%esi), %eax 1908c2ecf20Sopenharmony_ci adcl -4(%esi), %eax 1918c2ecf20Sopenharmony_ci45: 1928c2ecf20Sopenharmony_ci lea 128(%esi), %esi 1938c2ecf20Sopenharmony_ci adcl $0, %eax 1948c2ecf20Sopenharmony_ci dec %ecx 1958c2ecf20Sopenharmony_ci jge 40b 1968c2ecf20Sopenharmony_ci movl %edx, %ecx 1978c2ecf20Sopenharmony_ci50: andl $3, %ecx 1988c2ecf20Sopenharmony_ci jz 80f 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci # Handle the last 1-3 bytes without jumping 2018c2ecf20Sopenharmony_ci notl %ecx # 1->2, 2->1, 3->0, higher bits are masked 2028c2ecf20Sopenharmony_ci movl $0xffffff,%ebx # by the shll and shrl instructions 2038c2ecf20Sopenharmony_ci shll $3,%ecx 2048c2ecf20Sopenharmony_ci shrl %cl,%ebx 2058c2ecf20Sopenharmony_ci andl -128(%esi),%ebx # esi is 4-aligned so should be ok 2068c2ecf20Sopenharmony_ci addl %ebx,%eax 2078c2ecf20Sopenharmony_ci adcl $0,%eax 2088c2ecf20Sopenharmony_ci80: 2098c2ecf20Sopenharmony_ci popl %ebx 2108c2ecf20Sopenharmony_ci popl %esi 2118c2ecf20Sopenharmony_ci RET 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci#endif 2148c2ecf20Sopenharmony_ci EXPORT_SYMBOL(csum_partial) 215