162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0+ 262306a36Sopenharmony_ci * 362306a36Sopenharmony_ci * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 662306a36Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 762306a36Sopenharmony_ci * interface as the means of communication with the user level. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * IP/TCP/UDP checksumming routines 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * Authors: Jorge Cwik, <jorge@laser.satlink.net> 1262306a36Sopenharmony_ci * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 1362306a36Sopenharmony_ci * Tom May, <ftom@netcom.com> 1462306a36Sopenharmony_ci * Pentium Pro/II routines: 1562306a36Sopenharmony_ci * Alexander Kjeldaas <astor@guardian.no> 1662306a36Sopenharmony_ci * Finn Arne Gangstad <finnag@guardian.no> 1762306a36Sopenharmony_ci * Lots of code moved from tcp.c and ip.c; see those files 1862306a36Sopenharmony_ci * for more names. 1962306a36Sopenharmony_ci * 2062306a36Sopenharmony_ci * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception 2162306a36Sopenharmony_ci * handling. 2262306a36Sopenharmony_ci * Andi Kleen, add zeroing on error 2362306a36Sopenharmony_ci * converted to pure assembler 2462306a36Sopenharmony_ci * 2562306a36Sopenharmony_ci * SuperH version: Copyright (C) 1999 Niibe Yutaka 2662306a36Sopenharmony_ci */ 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#include <asm/errno.h> 2962306a36Sopenharmony_ci#include <linux/linkage.h> 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci/* 3262306a36Sopenharmony_ci * computes a partial checksum, e.g. for TCP/UDP fragments 3362306a36Sopenharmony_ci */ 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci/* 3662306a36Sopenharmony_ci * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); 3762306a36Sopenharmony_ci */ 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci.text 4062306a36Sopenharmony_ciENTRY(csum_partial) 4162306a36Sopenharmony_ci /* 4262306a36Sopenharmony_ci * Experiments with Ethernet and SLIP connections show that buff 4362306a36Sopenharmony_ci * is aligned on either a 2-byte or 4-byte boundary. We get at 4462306a36Sopenharmony_ci * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. 4562306a36Sopenharmony_ci * Fortunately, it is easy to convert 2-byte alignment to 4-byte 4662306a36Sopenharmony_ci * alignment for the unrolled loop. 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_ci mov r4, r0 4962306a36Sopenharmony_ci tst #3, r0 ! Check alignment. 5062306a36Sopenharmony_ci bt/s 2f ! Jump if alignment is ok. 5162306a36Sopenharmony_ci mov r4, r7 ! Keep a copy to check for alignment 5262306a36Sopenharmony_ci ! 5362306a36Sopenharmony_ci tst #1, r0 ! Check alignment. 5462306a36Sopenharmony_ci bt 21f ! Jump if alignment is boundary of 2bytes. 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci ! buf is odd 5762306a36Sopenharmony_ci tst r5, r5 5862306a36Sopenharmony_ci add #-1, r5 5962306a36Sopenharmony_ci bt 9f 6062306a36Sopenharmony_ci mov.b @r4+, r0 6162306a36Sopenharmony_ci extu.b r0, r0 6262306a36Sopenharmony_ci addc r0, r6 ! t=0 from previous tst 6362306a36Sopenharmony_ci mov r6, r0 6462306a36Sopenharmony_ci shll8 r6 6562306a36Sopenharmony_ci shlr16 r0 6662306a36Sopenharmony_ci shlr8 r0 6762306a36Sopenharmony_ci or r0, r6 6862306a36Sopenharmony_ci mov r4, r0 6962306a36Sopenharmony_ci tst #2, r0 7062306a36Sopenharmony_ci bt 2f 7162306a36Sopenharmony_ci21: 7262306a36Sopenharmony_ci ! buf is 2 byte aligned (len could be 0) 7362306a36Sopenharmony_ci add #-2, r5 ! Alignment uses up two bytes. 7462306a36Sopenharmony_ci cmp/pz r5 ! 7562306a36Sopenharmony_ci bt/s 1f ! Jump if we had at least two bytes. 7662306a36Sopenharmony_ci clrt 7762306a36Sopenharmony_ci bra 6f 7862306a36Sopenharmony_ci add #2, r5 ! r5 was < 2. Deal with it. 7962306a36Sopenharmony_ci1: 8062306a36Sopenharmony_ci mov.w @r4+, r0 8162306a36Sopenharmony_ci extu.w r0, r0 8262306a36Sopenharmony_ci addc r0, r6 8362306a36Sopenharmony_ci bf 2f 8462306a36Sopenharmony_ci add #1, r6 8562306a36Sopenharmony_ci2: 8662306a36Sopenharmony_ci ! buf is 4 byte aligned (len could be 0) 8762306a36Sopenharmony_ci mov r5, r1 8862306a36Sopenharmony_ci mov #-5, r0 8962306a36Sopenharmony_ci shld r0, r1 9062306a36Sopenharmony_ci tst r1, r1 9162306a36Sopenharmony_ci bt/s 4f ! if it's =0, go to 4f 9262306a36Sopenharmony_ci clrt 9362306a36Sopenharmony_ci .align 2 9462306a36Sopenharmony_ci3: 9562306a36Sopenharmony_ci mov.l @r4+, r0 9662306a36Sopenharmony_ci mov.l @r4+, r2 9762306a36Sopenharmony_ci mov.l @r4+, r3 9862306a36Sopenharmony_ci addc r0, r6 9962306a36Sopenharmony_ci mov.l @r4+, r0 10062306a36Sopenharmony_ci addc r2, r6 10162306a36Sopenharmony_ci mov.l @r4+, r2 10262306a36Sopenharmony_ci addc r3, r6 10362306a36Sopenharmony_ci mov.l @r4+, r3 10462306a36Sopenharmony_ci addc r0, r6 10562306a36Sopenharmony_ci mov.l @r4+, r0 10662306a36Sopenharmony_ci addc r2, r6 10762306a36Sopenharmony_ci mov.l @r4+, r2 10862306a36Sopenharmony_ci addc r3, r6 10962306a36Sopenharmony_ci addc r0, r6 11062306a36Sopenharmony_ci addc r2, r6 11162306a36Sopenharmony_ci movt r0 11262306a36Sopenharmony_ci dt r1 11362306a36Sopenharmony_ci bf/s 3b 11462306a36Sopenharmony_ci cmp/eq #1, r0 11562306a36Sopenharmony_ci ! here, we know r1==0 11662306a36Sopenharmony_ci addc r1, r6 ! add carry to r6 11762306a36Sopenharmony_ci4: 11862306a36Sopenharmony_ci mov r5, r0 11962306a36Sopenharmony_ci and #0x1c, r0 12062306a36Sopenharmony_ci tst r0, r0 12162306a36Sopenharmony_ci bt 6f 12262306a36Sopenharmony_ci ! 4 bytes or more remaining 12362306a36Sopenharmony_ci mov r0, r1 12462306a36Sopenharmony_ci shlr2 r1 12562306a36Sopenharmony_ci mov #0, r2 12662306a36Sopenharmony_ci5: 12762306a36Sopenharmony_ci addc r2, r6 12862306a36Sopenharmony_ci mov.l @r4+, r2 12962306a36Sopenharmony_ci movt r0 13062306a36Sopenharmony_ci dt r1 13162306a36Sopenharmony_ci bf/s 5b 13262306a36Sopenharmony_ci cmp/eq #1, r0 13362306a36Sopenharmony_ci addc r2, r6 13462306a36Sopenharmony_ci addc r1, r6 ! r1==0 here, so it means add carry-bit 13562306a36Sopenharmony_ci6: 13662306a36Sopenharmony_ci ! 3 bytes or less remaining 13762306a36Sopenharmony_ci mov #3, r0 13862306a36Sopenharmony_ci and r0, r5 13962306a36Sopenharmony_ci tst r5, r5 14062306a36Sopenharmony_ci bt 9f ! if it's =0 go to 9f 14162306a36Sopenharmony_ci mov #2, r1 14262306a36Sopenharmony_ci cmp/hs r1, r5 14362306a36Sopenharmony_ci bf 7f 14462306a36Sopenharmony_ci mov.w @r4+, r0 14562306a36Sopenharmony_ci extu.w r0, r0 14662306a36Sopenharmony_ci cmp/eq r1, r5 14762306a36Sopenharmony_ci bt/s 8f 14862306a36Sopenharmony_ci clrt 14962306a36Sopenharmony_ci shll16 r0 15062306a36Sopenharmony_ci addc r0, r6 15162306a36Sopenharmony_ci7: 15262306a36Sopenharmony_ci mov.b @r4+, r0 15362306a36Sopenharmony_ci extu.b r0, r0 15462306a36Sopenharmony_ci#ifndef __LITTLE_ENDIAN__ 15562306a36Sopenharmony_ci shll8 r0 15662306a36Sopenharmony_ci#endif 15762306a36Sopenharmony_ci8: 15862306a36Sopenharmony_ci addc r0, r6 15962306a36Sopenharmony_ci mov #0, r0 16062306a36Sopenharmony_ci addc r0, r6 16162306a36Sopenharmony_ci9: 16262306a36Sopenharmony_ci ! Check if the buffer was misaligned, if so realign sum 16362306a36Sopenharmony_ci mov r7, r0 16462306a36Sopenharmony_ci tst #1, r0 16562306a36Sopenharmony_ci bt 10f 16662306a36Sopenharmony_ci mov r6, r0 16762306a36Sopenharmony_ci shll8 r6 16862306a36Sopenharmony_ci shlr16 r0 16962306a36Sopenharmony_ci shlr8 r0 17062306a36Sopenharmony_ci or r0, r6 17162306a36Sopenharmony_ci10: 17262306a36Sopenharmony_ci rts 17362306a36Sopenharmony_ci mov r6, r0 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci/* 17662306a36Sopenharmony_ciunsigned int csum_partial_copy_generic (const char *src, char *dst, int len) 17762306a36Sopenharmony_ci */ 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci/* 18062306a36Sopenharmony_ci * Copy from ds while checksumming, otherwise like csum_partial with initial 18162306a36Sopenharmony_ci * sum being ~0U 18262306a36Sopenharmony_ci */ 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci#define EXC(...) \ 18562306a36Sopenharmony_ci 9999: __VA_ARGS__ ; \ 18662306a36Sopenharmony_ci .section __ex_table, "a"; \ 18762306a36Sopenharmony_ci .long 9999b, 6001f ; \ 18862306a36Sopenharmony_ci .previous 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci! 19162306a36Sopenharmony_ci! r4: const char *SRC 19262306a36Sopenharmony_ci! r5: char *DST 19362306a36Sopenharmony_ci! r6: int LEN 19462306a36Sopenharmony_ci! 19562306a36Sopenharmony_ciENTRY(csum_partial_copy_generic) 19662306a36Sopenharmony_ci mov #-1,r7 19762306a36Sopenharmony_ci mov #3,r0 ! Check src and dest are equally aligned 19862306a36Sopenharmony_ci mov r4,r1 19962306a36Sopenharmony_ci and r0,r1 20062306a36Sopenharmony_ci and r5,r0 20162306a36Sopenharmony_ci cmp/eq r1,r0 20262306a36Sopenharmony_ci bf 3f ! Different alignments, use slow version 20362306a36Sopenharmony_ci tst #1,r0 ! Check dest word aligned 20462306a36Sopenharmony_ci bf 3f ! If not, do it the slow way 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci mov #2,r0 20762306a36Sopenharmony_ci tst r0,r5 ! Check dest alignment. 20862306a36Sopenharmony_ci bt 2f ! Jump if alignment is ok. 20962306a36Sopenharmony_ci add #-2,r6 ! Alignment uses up two bytes. 21062306a36Sopenharmony_ci cmp/pz r6 ! Jump if we had at least two bytes. 21162306a36Sopenharmony_ci bt/s 1f 21262306a36Sopenharmony_ci clrt 21362306a36Sopenharmony_ci add #2,r6 ! r6 was < 2. Deal with it. 21462306a36Sopenharmony_ci bra 4f 21562306a36Sopenharmony_ci mov r6,r2 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci3: ! Handle different src and dest alignments. 21862306a36Sopenharmony_ci ! This is not common, so simple byte by byte copy will do. 21962306a36Sopenharmony_ci mov r6,r2 22062306a36Sopenharmony_ci shlr r6 22162306a36Sopenharmony_ci tst r6,r6 22262306a36Sopenharmony_ci bt 4f 22362306a36Sopenharmony_ci clrt 22462306a36Sopenharmony_ci .align 2 22562306a36Sopenharmony_ci5: 22662306a36Sopenharmony_ciEXC( mov.b @r4+,r1 ) 22762306a36Sopenharmony_ciEXC( mov.b @r4+,r0 ) 22862306a36Sopenharmony_ci extu.b r1,r1 22962306a36Sopenharmony_ciEXC( mov.b r1,@r5 ) 23062306a36Sopenharmony_ciEXC( mov.b r0,@(1,r5) ) 23162306a36Sopenharmony_ci extu.b r0,r0 23262306a36Sopenharmony_ci add #2,r5 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN__ 23562306a36Sopenharmony_ci shll8 r0 23662306a36Sopenharmony_ci#else 23762306a36Sopenharmony_ci shll8 r1 23862306a36Sopenharmony_ci#endif 23962306a36Sopenharmony_ci or r1,r0 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci addc r0,r7 24262306a36Sopenharmony_ci movt r0 24362306a36Sopenharmony_ci dt r6 24462306a36Sopenharmony_ci bf/s 5b 24562306a36Sopenharmony_ci cmp/eq #1,r0 24662306a36Sopenharmony_ci mov #0,r0 24762306a36Sopenharmony_ci addc r0, r7 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci mov r2, r0 25062306a36Sopenharmony_ci tst #1, r0 25162306a36Sopenharmony_ci bt 7f 25262306a36Sopenharmony_ci bra 5f 25362306a36Sopenharmony_ci clrt 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci ! src and dest equally aligned, but to a two byte boundary. 25662306a36Sopenharmony_ci ! Handle first two bytes as a special case 25762306a36Sopenharmony_ci .align 2 25862306a36Sopenharmony_ci1: 25962306a36Sopenharmony_ciEXC( mov.w @r4+,r0 ) 26062306a36Sopenharmony_ciEXC( mov.w r0,@r5 ) 26162306a36Sopenharmony_ci add #2,r5 26262306a36Sopenharmony_ci extu.w r0,r0 26362306a36Sopenharmony_ci addc r0,r7 26462306a36Sopenharmony_ci mov #0,r0 26562306a36Sopenharmony_ci addc r0,r7 26662306a36Sopenharmony_ci2: 26762306a36Sopenharmony_ci mov r6,r2 26862306a36Sopenharmony_ci mov #-5,r0 26962306a36Sopenharmony_ci shld r0,r6 27062306a36Sopenharmony_ci tst r6,r6 27162306a36Sopenharmony_ci bt/s 2f 27262306a36Sopenharmony_ci clrt 27362306a36Sopenharmony_ci .align 2 27462306a36Sopenharmony_ci1: 27562306a36Sopenharmony_ciEXC( mov.l @r4+,r0 ) 27662306a36Sopenharmony_ciEXC( mov.l @r4+,r1 ) 27762306a36Sopenharmony_ci addc r0,r7 27862306a36Sopenharmony_ciEXC( mov.l r0,@r5 ) 27962306a36Sopenharmony_ciEXC( mov.l r1,@(4,r5) ) 28062306a36Sopenharmony_ci addc r1,r7 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ciEXC( mov.l @r4+,r0 ) 28362306a36Sopenharmony_ciEXC( mov.l @r4+,r1 ) 28462306a36Sopenharmony_ci addc r0,r7 28562306a36Sopenharmony_ciEXC( mov.l r0,@(8,r5) ) 28662306a36Sopenharmony_ciEXC( mov.l r1,@(12,r5) ) 28762306a36Sopenharmony_ci addc r1,r7 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ciEXC( mov.l @r4+,r0 ) 29062306a36Sopenharmony_ciEXC( mov.l @r4+,r1 ) 29162306a36Sopenharmony_ci addc r0,r7 29262306a36Sopenharmony_ciEXC( mov.l r0,@(16,r5) ) 29362306a36Sopenharmony_ciEXC( mov.l r1,@(20,r5) ) 29462306a36Sopenharmony_ci addc r1,r7 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ciEXC( mov.l @r4+,r0 ) 29762306a36Sopenharmony_ciEXC( mov.l @r4+,r1 ) 29862306a36Sopenharmony_ci addc r0,r7 29962306a36Sopenharmony_ciEXC( mov.l r0,@(24,r5) ) 30062306a36Sopenharmony_ciEXC( mov.l r1,@(28,r5) ) 30162306a36Sopenharmony_ci addc r1,r7 30262306a36Sopenharmony_ci add #32,r5 30362306a36Sopenharmony_ci movt r0 30462306a36Sopenharmony_ci dt r6 30562306a36Sopenharmony_ci bf/s 1b 30662306a36Sopenharmony_ci cmp/eq #1,r0 30762306a36Sopenharmony_ci mov #0,r0 30862306a36Sopenharmony_ci addc r0,r7 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci2: mov r2,r6 31162306a36Sopenharmony_ci mov #0x1c,r0 31262306a36Sopenharmony_ci and r0,r6 31362306a36Sopenharmony_ci cmp/pl r6 31462306a36Sopenharmony_ci bf/s 4f 31562306a36Sopenharmony_ci clrt 31662306a36Sopenharmony_ci shlr2 r6 31762306a36Sopenharmony_ci3: 31862306a36Sopenharmony_ciEXC( mov.l @r4+,r0 ) 31962306a36Sopenharmony_ci addc r0,r7 32062306a36Sopenharmony_ciEXC( mov.l r0,@r5 ) 32162306a36Sopenharmony_ci add #4,r5 32262306a36Sopenharmony_ci movt r0 32362306a36Sopenharmony_ci dt r6 32462306a36Sopenharmony_ci bf/s 3b 32562306a36Sopenharmony_ci cmp/eq #1,r0 32662306a36Sopenharmony_ci mov #0,r0 32762306a36Sopenharmony_ci addc r0,r7 32862306a36Sopenharmony_ci4: mov r2,r6 32962306a36Sopenharmony_ci mov #3,r0 33062306a36Sopenharmony_ci and r0,r6 33162306a36Sopenharmony_ci cmp/pl r6 33262306a36Sopenharmony_ci bf 7f 33362306a36Sopenharmony_ci mov #2,r1 33462306a36Sopenharmony_ci cmp/hs r1,r6 33562306a36Sopenharmony_ci bf 5f 33662306a36Sopenharmony_ciEXC( mov.w @r4+,r0 ) 33762306a36Sopenharmony_ciEXC( mov.w r0,@r5 ) 33862306a36Sopenharmony_ci extu.w r0,r0 33962306a36Sopenharmony_ci add #2,r5 34062306a36Sopenharmony_ci cmp/eq r1,r6 34162306a36Sopenharmony_ci bt/s 6f 34262306a36Sopenharmony_ci clrt 34362306a36Sopenharmony_ci shll16 r0 34462306a36Sopenharmony_ci addc r0,r7 34562306a36Sopenharmony_ci5: 34662306a36Sopenharmony_ciEXC( mov.b @r4+,r0 ) 34762306a36Sopenharmony_ciEXC( mov.b r0,@r5 ) 34862306a36Sopenharmony_ci extu.b r0,r0 34962306a36Sopenharmony_ci#ifndef __LITTLE_ENDIAN__ 35062306a36Sopenharmony_ci shll8 r0 35162306a36Sopenharmony_ci#endif 35262306a36Sopenharmony_ci6: addc r0,r7 35362306a36Sopenharmony_ci mov #0,r0 35462306a36Sopenharmony_ci addc r0,r7 35562306a36Sopenharmony_ci7: 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci# Exception handler: 35862306a36Sopenharmony_ci.section .fixup, "ax" 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci6001: 36162306a36Sopenharmony_ci rts 36262306a36Sopenharmony_ci mov #0,r0 36362306a36Sopenharmony_ci.previous 36462306a36Sopenharmony_ci rts 36562306a36Sopenharmony_ci mov r7,r0 366