162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 462306a36Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 562306a36Sopenharmony_ci * interface as the means of communication with the user level. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * IP/TCP/UDP checksumming routines 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Xtensa version: Copyright (C) 2001 Tensilica, Inc. by Kevin Chea 1062306a36Sopenharmony_ci * Optimized by Joe Taylor 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <linux/errno.h> 1462306a36Sopenharmony_ci#include <linux/linkage.h> 1562306a36Sopenharmony_ci#include <asm/asmmacro.h> 1662306a36Sopenharmony_ci#include <asm/core.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* 1962306a36Sopenharmony_ci * computes a partial checksum, e.g. for TCP/UDP fragments 2062306a36Sopenharmony_ci */ 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci/* 2362306a36Sopenharmony_ci * unsigned int csum_partial(const unsigned char *buf, int len, 2462306a36Sopenharmony_ci * unsigned int sum); 2562306a36Sopenharmony_ci * a2 = buf 2662306a36Sopenharmony_ci * a3 = len 2762306a36Sopenharmony_ci * a4 = sum 2862306a36Sopenharmony_ci * 2962306a36Sopenharmony_ci * This function assumes 2- or 4-byte alignment. Other alignments will fail! 3062306a36Sopenharmony_ci */ 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci/* ONES_ADD converts twos-complement math to ones-complement. */ 3362306a36Sopenharmony_ci#define ONES_ADD(sum, val) \ 3462306a36Sopenharmony_ci add sum, sum, val ; \ 3562306a36Sopenharmony_ci bgeu sum, val, 99f ; \ 3662306a36Sopenharmony_ci addi sum, sum, 1 ; \ 3762306a36Sopenharmony_ci99: ; 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci.text 4062306a36Sopenharmony_ciENTRY(csum_partial) 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci /* 4362306a36Sopenharmony_ci * Experiments with Ethernet and SLIP connections show that buf 4462306a36Sopenharmony_ci * is aligned on either a 2-byte or 4-byte boundary. 4562306a36Sopenharmony_ci */ 4662306a36Sopenharmony_ci abi_entry_default 4762306a36Sopenharmony_ci extui a5, a2, 0, 2 4862306a36Sopenharmony_ci bnez a5, 8f /* branch if 2-byte aligned */ 4962306a36Sopenharmony_ci /* Fall-through on common case, 4-byte alignment */ 5062306a36Sopenharmony_ci1: 5162306a36Sopenharmony_ci srli a5, a3, 5 /* 32-byte chunks */ 5262306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 5362306a36Sopenharmony_ci loopgtz a5, 2f 5462306a36Sopenharmony_ci#else 5562306a36Sopenharmony_ci beqz a5, 2f 5662306a36Sopenharmony_ci slli a5, a5, 5 5762306a36Sopenharmony_ci add a5, a5, a2 /* a5 = end of last 32-byte chunk */ 5862306a36Sopenharmony_ci.Loop1: 5962306a36Sopenharmony_ci#endif 6062306a36Sopenharmony_ci l32i a6, a2, 0 6162306a36Sopenharmony_ci l32i a7, a2, 4 6262306a36Sopenharmony_ci ONES_ADD(a4, a6) 6362306a36Sopenharmony_ci ONES_ADD(a4, a7) 6462306a36Sopenharmony_ci l32i a6, a2, 8 6562306a36Sopenharmony_ci l32i a7, a2, 12 6662306a36Sopenharmony_ci ONES_ADD(a4, a6) 6762306a36Sopenharmony_ci ONES_ADD(a4, a7) 6862306a36Sopenharmony_ci l32i a6, a2, 16 6962306a36Sopenharmony_ci l32i a7, a2, 20 7062306a36Sopenharmony_ci ONES_ADD(a4, a6) 7162306a36Sopenharmony_ci ONES_ADD(a4, a7) 7262306a36Sopenharmony_ci l32i a6, a2, 24 7362306a36Sopenharmony_ci l32i a7, a2, 28 7462306a36Sopenharmony_ci ONES_ADD(a4, a6) 7562306a36Sopenharmony_ci ONES_ADD(a4, a7) 7662306a36Sopenharmony_ci addi a2, a2, 4*8 7762306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 7862306a36Sopenharmony_ci blt a2, a5, .Loop1 7962306a36Sopenharmony_ci#endif 8062306a36Sopenharmony_ci2: 8162306a36Sopenharmony_ci extui a5, a3, 2, 3 /* remaining 4-byte chunks */ 8262306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 8362306a36Sopenharmony_ci loopgtz a5, 3f 8462306a36Sopenharmony_ci#else 8562306a36Sopenharmony_ci beqz a5, 3f 8662306a36Sopenharmony_ci slli a5, a5, 2 8762306a36Sopenharmony_ci add a5, a5, a2 /* a5 = end of last 4-byte chunk */ 8862306a36Sopenharmony_ci.Loop2: 8962306a36Sopenharmony_ci#endif 9062306a36Sopenharmony_ci l32i a6, a2, 0 9162306a36Sopenharmony_ci ONES_ADD(a4, a6) 9262306a36Sopenharmony_ci addi a2, a2, 4 9362306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 9462306a36Sopenharmony_ci blt a2, a5, .Loop2 9562306a36Sopenharmony_ci#endif 9662306a36Sopenharmony_ci3: 9762306a36Sopenharmony_ci _bbci.l a3, 1, 5f /* remaining 2-byte chunk */ 9862306a36Sopenharmony_ci l16ui a6, a2, 0 9962306a36Sopenharmony_ci ONES_ADD(a4, a6) 10062306a36Sopenharmony_ci addi a2, a2, 2 10162306a36Sopenharmony_ci5: 10262306a36Sopenharmony_ci _bbci.l a3, 0, 7f /* remaining 1-byte chunk */ 10362306a36Sopenharmony_ci6: l8ui a6, a2, 0 10462306a36Sopenharmony_ci#ifdef __XTENSA_EB__ 10562306a36Sopenharmony_ci slli a6, a6, 8 /* load byte into bits 8..15 */ 10662306a36Sopenharmony_ci#endif 10762306a36Sopenharmony_ci ONES_ADD(a4, a6) 10862306a36Sopenharmony_ci7: 10962306a36Sopenharmony_ci mov a2, a4 11062306a36Sopenharmony_ci abi_ret_default 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci /* uncommon case, buf is 2-byte aligned */ 11362306a36Sopenharmony_ci8: 11462306a36Sopenharmony_ci beqz a3, 7b /* branch if len == 0 */ 11562306a36Sopenharmony_ci beqi a3, 1, 6b /* branch if len == 1 */ 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci extui a5, a2, 0, 1 11862306a36Sopenharmony_ci bnez a5, 8f /* branch if 1-byte aligned */ 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci l16ui a6, a2, 0 /* common case, len >= 2 */ 12162306a36Sopenharmony_ci ONES_ADD(a4, a6) 12262306a36Sopenharmony_ci addi a2, a2, 2 /* adjust buf */ 12362306a36Sopenharmony_ci addi a3, a3, -2 /* adjust len */ 12462306a36Sopenharmony_ci j 1b /* now buf is 4-byte aligned */ 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci /* case: odd-byte aligned, len > 1 12762306a36Sopenharmony_ci * This case is dog slow, so don't give us an odd address. 12862306a36Sopenharmony_ci * (I don't think this ever happens, but just in case.) 12962306a36Sopenharmony_ci */ 13062306a36Sopenharmony_ci8: 13162306a36Sopenharmony_ci srli a5, a3, 2 /* 4-byte chunks */ 13262306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 13362306a36Sopenharmony_ci loopgtz a5, 2f 13462306a36Sopenharmony_ci#else 13562306a36Sopenharmony_ci beqz a5, 2f 13662306a36Sopenharmony_ci slli a5, a5, 2 13762306a36Sopenharmony_ci add a5, a5, a2 /* a5 = end of last 4-byte chunk */ 13862306a36Sopenharmony_ci.Loop3: 13962306a36Sopenharmony_ci#endif 14062306a36Sopenharmony_ci l8ui a6, a2, 0 /* bits 24..31 */ 14162306a36Sopenharmony_ci l16ui a7, a2, 1 /* bits 8..23 */ 14262306a36Sopenharmony_ci l8ui a8, a2, 3 /* bits 0.. 8 */ 14362306a36Sopenharmony_ci#ifdef __XTENSA_EB__ 14462306a36Sopenharmony_ci slli a6, a6, 24 14562306a36Sopenharmony_ci#else 14662306a36Sopenharmony_ci slli a8, a8, 24 14762306a36Sopenharmony_ci#endif 14862306a36Sopenharmony_ci slli a7, a7, 8 14962306a36Sopenharmony_ci or a7, a7, a6 15062306a36Sopenharmony_ci or a7, a7, a8 15162306a36Sopenharmony_ci ONES_ADD(a4, a7) 15262306a36Sopenharmony_ci addi a2, a2, 4 15362306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 15462306a36Sopenharmony_ci blt a2, a5, .Loop3 15562306a36Sopenharmony_ci#endif 15662306a36Sopenharmony_ci2: 15762306a36Sopenharmony_ci _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */ 15862306a36Sopenharmony_ci l8ui a6, a2, 0 15962306a36Sopenharmony_ci l8ui a7, a2, 1 16062306a36Sopenharmony_ci#ifdef __XTENSA_EB__ 16162306a36Sopenharmony_ci slli a6, a6, 8 16262306a36Sopenharmony_ci#else 16362306a36Sopenharmony_ci slli a7, a7, 8 16462306a36Sopenharmony_ci#endif 16562306a36Sopenharmony_ci or a7, a7, a6 16662306a36Sopenharmony_ci ONES_ADD(a4, a7) 16762306a36Sopenharmony_ci addi a2, a2, 2 16862306a36Sopenharmony_ci3: 16962306a36Sopenharmony_ci j 5b /* branch to handle the remaining byte */ 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ciENDPROC(csum_partial) 17262306a36Sopenharmony_ciEXPORT_SYMBOL(csum_partial) 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci/* 17562306a36Sopenharmony_ci * Copy from ds while checksumming, otherwise like csum_partial 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci/* 17962306a36Sopenharmony_ciunsigned int csum_partial_copy_generic (const char *src, char *dst, int len) 18062306a36Sopenharmony_ci a2 = src 18162306a36Sopenharmony_ci a3 = dst 18262306a36Sopenharmony_ci a4 = len 18362306a36Sopenharmony_ci a5 = sum 18462306a36Sopenharmony_ci a8 = temp 18562306a36Sopenharmony_ci a9 = temp 18662306a36Sopenharmony_ci a10 = temp 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci This function is optimized for 4-byte aligned addresses. Other 18962306a36Sopenharmony_ci alignments work, but not nearly as efficiently. 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ciENTRY(csum_partial_copy_generic) 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci abi_entry_default 19562306a36Sopenharmony_ci movi a5, -1 19662306a36Sopenharmony_ci or a10, a2, a3 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci /* We optimize the following alignment tests for the 4-byte 19962306a36Sopenharmony_ci aligned case. Two bbsi.l instructions might seem more optimal 20062306a36Sopenharmony_ci (commented out below). However, both labels 5: and 3: are out 20162306a36Sopenharmony_ci of the imm8 range, so the assembler relaxes them into 20262306a36Sopenharmony_ci equivalent bbci.l, j combinations, which is actually 20362306a36Sopenharmony_ci slower. */ 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci extui a9, a10, 0, 2 20662306a36Sopenharmony_ci beqz a9, 1f /* branch if both are 4-byte aligned */ 20762306a36Sopenharmony_ci bbsi.l a10, 0, 5f /* branch if one address is odd */ 20862306a36Sopenharmony_ci j 3f /* one address is 2-byte aligned */ 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci/* _bbsi.l a10, 0, 5f */ /* branch if odd address */ 21162306a36Sopenharmony_ci/* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */ 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci1: 21462306a36Sopenharmony_ci /* src and dst are both 4-byte aligned */ 21562306a36Sopenharmony_ci srli a10, a4, 5 /* 32-byte chunks */ 21662306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 21762306a36Sopenharmony_ci loopgtz a10, 2f 21862306a36Sopenharmony_ci#else 21962306a36Sopenharmony_ci beqz a10, 2f 22062306a36Sopenharmony_ci slli a10, a10, 5 22162306a36Sopenharmony_ci add a10, a10, a2 /* a10 = end of last 32-byte src chunk */ 22262306a36Sopenharmony_ci.Loop5: 22362306a36Sopenharmony_ci#endif 22462306a36Sopenharmony_ciEX(10f) l32i a9, a2, 0 22562306a36Sopenharmony_ciEX(10f) l32i a8, a2, 4 22662306a36Sopenharmony_ciEX(10f) s32i a9, a3, 0 22762306a36Sopenharmony_ciEX(10f) s32i a8, a3, 4 22862306a36Sopenharmony_ci ONES_ADD(a5, a9) 22962306a36Sopenharmony_ci ONES_ADD(a5, a8) 23062306a36Sopenharmony_ciEX(10f) l32i a9, a2, 8 23162306a36Sopenharmony_ciEX(10f) l32i a8, a2, 12 23262306a36Sopenharmony_ciEX(10f) s32i a9, a3, 8 23362306a36Sopenharmony_ciEX(10f) s32i a8, a3, 12 23462306a36Sopenharmony_ci ONES_ADD(a5, a9) 23562306a36Sopenharmony_ci ONES_ADD(a5, a8) 23662306a36Sopenharmony_ciEX(10f) l32i a9, a2, 16 23762306a36Sopenharmony_ciEX(10f) l32i a8, a2, 20 23862306a36Sopenharmony_ciEX(10f) s32i a9, a3, 16 23962306a36Sopenharmony_ciEX(10f) s32i a8, a3, 20 24062306a36Sopenharmony_ci ONES_ADD(a5, a9) 24162306a36Sopenharmony_ci ONES_ADD(a5, a8) 24262306a36Sopenharmony_ciEX(10f) l32i a9, a2, 24 24362306a36Sopenharmony_ciEX(10f) l32i a8, a2, 28 24462306a36Sopenharmony_ciEX(10f) s32i a9, a3, 24 24562306a36Sopenharmony_ciEX(10f) s32i a8, a3, 28 24662306a36Sopenharmony_ci ONES_ADD(a5, a9) 24762306a36Sopenharmony_ci ONES_ADD(a5, a8) 24862306a36Sopenharmony_ci addi a2, a2, 32 24962306a36Sopenharmony_ci addi a3, a3, 32 25062306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 25162306a36Sopenharmony_ci blt a2, a10, .Loop5 25262306a36Sopenharmony_ci#endif 25362306a36Sopenharmony_ci2: 25462306a36Sopenharmony_ci extui a10, a4, 2, 3 /* remaining 4-byte chunks */ 25562306a36Sopenharmony_ci extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */ 25662306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 25762306a36Sopenharmony_ci loopgtz a10, 3f 25862306a36Sopenharmony_ci#else 25962306a36Sopenharmony_ci beqz a10, 3f 26062306a36Sopenharmony_ci slli a10, a10, 2 26162306a36Sopenharmony_ci add a10, a10, a2 /* a10 = end of last 4-byte src chunk */ 26262306a36Sopenharmony_ci.Loop6: 26362306a36Sopenharmony_ci#endif 26462306a36Sopenharmony_ciEX(10f) l32i a9, a2, 0 26562306a36Sopenharmony_ciEX(10f) s32i a9, a3, 0 26662306a36Sopenharmony_ci ONES_ADD(a5, a9) 26762306a36Sopenharmony_ci addi a2, a2, 4 26862306a36Sopenharmony_ci addi a3, a3, 4 26962306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 27062306a36Sopenharmony_ci blt a2, a10, .Loop6 27162306a36Sopenharmony_ci#endif 27262306a36Sopenharmony_ci3: 27362306a36Sopenharmony_ci /* 27462306a36Sopenharmony_ci Control comes to here in two cases: (1) It may fall through 27562306a36Sopenharmony_ci to here from the 4-byte alignment case to process, at most, 27662306a36Sopenharmony_ci one 2-byte chunk. (2) It branches to here from above if 27762306a36Sopenharmony_ci either src or dst is 2-byte aligned, and we process all bytes 27862306a36Sopenharmony_ci here, except for perhaps a trailing odd byte. It's 27962306a36Sopenharmony_ci inefficient, so align your addresses to 4-byte boundaries. 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci a2 = src 28262306a36Sopenharmony_ci a3 = dst 28362306a36Sopenharmony_ci a4 = len 28462306a36Sopenharmony_ci a5 = sum 28562306a36Sopenharmony_ci */ 28662306a36Sopenharmony_ci srli a10, a4, 1 /* 2-byte chunks */ 28762306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 28862306a36Sopenharmony_ci loopgtz a10, 4f 28962306a36Sopenharmony_ci#else 29062306a36Sopenharmony_ci beqz a10, 4f 29162306a36Sopenharmony_ci slli a10, a10, 1 29262306a36Sopenharmony_ci add a10, a10, a2 /* a10 = end of last 2-byte src chunk */ 29362306a36Sopenharmony_ci.Loop7: 29462306a36Sopenharmony_ci#endif 29562306a36Sopenharmony_ciEX(10f) l16ui a9, a2, 0 29662306a36Sopenharmony_ciEX(10f) s16i a9, a3, 0 29762306a36Sopenharmony_ci ONES_ADD(a5, a9) 29862306a36Sopenharmony_ci addi a2, a2, 2 29962306a36Sopenharmony_ci addi a3, a3, 2 30062306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 30162306a36Sopenharmony_ci blt a2, a10, .Loop7 30262306a36Sopenharmony_ci#endif 30362306a36Sopenharmony_ci4: 30462306a36Sopenharmony_ci /* This section processes a possible trailing odd byte. */ 30562306a36Sopenharmony_ci _bbci.l a4, 0, 8f /* 1-byte chunk */ 30662306a36Sopenharmony_ciEX(10f) l8ui a9, a2, 0 30762306a36Sopenharmony_ciEX(10f) s8i a9, a3, 0 30862306a36Sopenharmony_ci#ifdef __XTENSA_EB__ 30962306a36Sopenharmony_ci slli a9, a9, 8 /* shift byte to bits 8..15 */ 31062306a36Sopenharmony_ci#endif 31162306a36Sopenharmony_ci ONES_ADD(a5, a9) 31262306a36Sopenharmony_ci8: 31362306a36Sopenharmony_ci mov a2, a5 31462306a36Sopenharmony_ci abi_ret_default 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci5: 31762306a36Sopenharmony_ci /* Control branch to here when either src or dst is odd. We 31862306a36Sopenharmony_ci process all bytes using 8-bit accesses. Grossly inefficient, 31962306a36Sopenharmony_ci so don't feed us an odd address. */ 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci srli a10, a4, 1 /* handle in pairs for 16-bit csum */ 32262306a36Sopenharmony_ci#if XCHAL_HAVE_LOOPS 32362306a36Sopenharmony_ci loopgtz a10, 6f 32462306a36Sopenharmony_ci#else 32562306a36Sopenharmony_ci beqz a10, 6f 32662306a36Sopenharmony_ci slli a10, a10, 1 32762306a36Sopenharmony_ci add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */ 32862306a36Sopenharmony_ci.Loop8: 32962306a36Sopenharmony_ci#endif 33062306a36Sopenharmony_ciEX(10f) l8ui a9, a2, 0 33162306a36Sopenharmony_ciEX(10f) l8ui a8, a2, 1 33262306a36Sopenharmony_ciEX(10f) s8i a9, a3, 0 33362306a36Sopenharmony_ciEX(10f) s8i a8, a3, 1 33462306a36Sopenharmony_ci#ifdef __XTENSA_EB__ 33562306a36Sopenharmony_ci slli a9, a9, 8 /* combine into a single 16-bit value */ 33662306a36Sopenharmony_ci#else /* for checksum computation */ 33762306a36Sopenharmony_ci slli a8, a8, 8 33862306a36Sopenharmony_ci#endif 33962306a36Sopenharmony_ci or a9, a9, a8 34062306a36Sopenharmony_ci ONES_ADD(a5, a9) 34162306a36Sopenharmony_ci addi a2, a2, 2 34262306a36Sopenharmony_ci addi a3, a3, 2 34362306a36Sopenharmony_ci#if !XCHAL_HAVE_LOOPS 34462306a36Sopenharmony_ci blt a2, a10, .Loop8 34562306a36Sopenharmony_ci#endif 34662306a36Sopenharmony_ci6: 34762306a36Sopenharmony_ci j 4b /* process the possible trailing odd byte */ 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ciENDPROC(csum_partial_copy_generic) 35062306a36Sopenharmony_ciEXPORT_SYMBOL(csum_partial_copy_generic) 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci# Exception handler: 35462306a36Sopenharmony_ci.section .fixup, "ax" 35562306a36Sopenharmony_ci10: 35662306a36Sopenharmony_ci movi a2, 0 35762306a36Sopenharmony_ci abi_ret_default 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci.previous 360