18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
48c2ecf20Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
58c2ecf20Sopenharmony_ci *		interface as the means of communication with the user level.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci *		IP/TCP/UDP checksumming routines
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Xtensa version:  Copyright (C) 2001 Tensilica, Inc. by Kevin Chea
108c2ecf20Sopenharmony_ci *                  Optimized by Joe Taylor
118c2ecf20Sopenharmony_ci */
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/errno.h>
148c2ecf20Sopenharmony_ci#include <linux/linkage.h>
158c2ecf20Sopenharmony_ci#include <asm/asmmacro.h>
168c2ecf20Sopenharmony_ci#include <asm/core.h>
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci/*
198c2ecf20Sopenharmony_ci * computes a partial checksum, e.g. for TCP/UDP fragments
208c2ecf20Sopenharmony_ci */
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci/*
238c2ecf20Sopenharmony_ci * unsigned int csum_partial(const unsigned char *buf, int len,
248c2ecf20Sopenharmony_ci *                           unsigned int sum);
258c2ecf20Sopenharmony_ci *    a2 = buf
268c2ecf20Sopenharmony_ci *    a3 = len
278c2ecf20Sopenharmony_ci *    a4 = sum
288c2ecf20Sopenharmony_ci *
298c2ecf20Sopenharmony_ci * This function assumes 2- or 4-byte alignment.  Other alignments will fail!
308c2ecf20Sopenharmony_ci */
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci/* ONES_ADD converts twos-complement math to ones-complement. */
338c2ecf20Sopenharmony_ci#define ONES_ADD(sum, val)	  \
348c2ecf20Sopenharmony_ci	add	sum, sum, val	; \
358c2ecf20Sopenharmony_ci	bgeu	sum, val, 99f	; \
368c2ecf20Sopenharmony_ci	addi	sum, sum, 1	; \
378c2ecf20Sopenharmony_ci99:				;
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci.text
408c2ecf20Sopenharmony_ciENTRY(csum_partial)
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci	/*
438c2ecf20Sopenharmony_ci	 * Experiments with Ethernet and SLIP connections show that buf
448c2ecf20Sopenharmony_ci	 * is aligned on either a 2-byte or 4-byte boundary.
458c2ecf20Sopenharmony_ci	 */
468c2ecf20Sopenharmony_ci	abi_entry_default
478c2ecf20Sopenharmony_ci	extui	a5, a2, 0, 2
488c2ecf20Sopenharmony_ci	bnez	a5, 8f		/* branch if 2-byte aligned */
498c2ecf20Sopenharmony_ci	/* Fall-through on common case, 4-byte alignment */
508c2ecf20Sopenharmony_ci1:
518c2ecf20Sopenharmony_ci	srli	a5, a3, 5	/* 32-byte chunks */
528c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
538c2ecf20Sopenharmony_ci	loopgtz	a5, 2f
548c2ecf20Sopenharmony_ci#else
558c2ecf20Sopenharmony_ci	beqz	a5, 2f
568c2ecf20Sopenharmony_ci	slli	a5, a5, 5
578c2ecf20Sopenharmony_ci	add	a5, a5, a2	/* a5 = end of last 32-byte chunk */
588c2ecf20Sopenharmony_ci.Loop1:
598c2ecf20Sopenharmony_ci#endif
608c2ecf20Sopenharmony_ci	l32i	a6, a2, 0
618c2ecf20Sopenharmony_ci	l32i	a7, a2, 4
628c2ecf20Sopenharmony_ci	ONES_ADD(a4, a6)
638c2ecf20Sopenharmony_ci	ONES_ADD(a4, a7)
648c2ecf20Sopenharmony_ci	l32i	a6, a2, 8
658c2ecf20Sopenharmony_ci	l32i	a7, a2, 12
668c2ecf20Sopenharmony_ci	ONES_ADD(a4, a6)
678c2ecf20Sopenharmony_ci	ONES_ADD(a4, a7)
688c2ecf20Sopenharmony_ci	l32i	a6, a2, 16
698c2ecf20Sopenharmony_ci	l32i	a7, a2, 20
708c2ecf20Sopenharmony_ci	ONES_ADD(a4, a6)
718c2ecf20Sopenharmony_ci	ONES_ADD(a4, a7)
728c2ecf20Sopenharmony_ci	l32i	a6, a2, 24
738c2ecf20Sopenharmony_ci	l32i	a7, a2, 28
748c2ecf20Sopenharmony_ci	ONES_ADD(a4, a6)
758c2ecf20Sopenharmony_ci	ONES_ADD(a4, a7)
768c2ecf20Sopenharmony_ci	addi	a2, a2, 4*8
778c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
788c2ecf20Sopenharmony_ci	blt	a2, a5, .Loop1
798c2ecf20Sopenharmony_ci#endif
808c2ecf20Sopenharmony_ci2:
818c2ecf20Sopenharmony_ci	extui	a5, a3, 2, 3	/* remaining 4-byte chunks */
828c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
838c2ecf20Sopenharmony_ci	loopgtz	a5, 3f
848c2ecf20Sopenharmony_ci#else
858c2ecf20Sopenharmony_ci	beqz	a5, 3f
868c2ecf20Sopenharmony_ci	slli	a5, a5, 2
878c2ecf20Sopenharmony_ci	add	a5, a5, a2	/* a5 = end of last 4-byte chunk */
888c2ecf20Sopenharmony_ci.Loop2:
898c2ecf20Sopenharmony_ci#endif
908c2ecf20Sopenharmony_ci	l32i	a6, a2, 0
918c2ecf20Sopenharmony_ci	ONES_ADD(a4, a6)
928c2ecf20Sopenharmony_ci	addi	a2, a2, 4
938c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
948c2ecf20Sopenharmony_ci	blt	a2, a5, .Loop2
958c2ecf20Sopenharmony_ci#endif
968c2ecf20Sopenharmony_ci3:
978c2ecf20Sopenharmony_ci	_bbci.l	a3, 1, 5f	/* remaining 2-byte chunk */
988c2ecf20Sopenharmony_ci	l16ui	a6, a2, 0
998c2ecf20Sopenharmony_ci	ONES_ADD(a4, a6)
1008c2ecf20Sopenharmony_ci	addi	a2, a2, 2
1018c2ecf20Sopenharmony_ci5:
1028c2ecf20Sopenharmony_ci	_bbci.l	a3, 0, 7f	/* remaining 1-byte chunk */
1038c2ecf20Sopenharmony_ci6:	l8ui	a6, a2, 0
1048c2ecf20Sopenharmony_ci#ifdef __XTENSA_EB__
1058c2ecf20Sopenharmony_ci	slli	a6, a6, 8	/* load byte into bits 8..15 */
1068c2ecf20Sopenharmony_ci#endif
1078c2ecf20Sopenharmony_ci	ONES_ADD(a4, a6)
1088c2ecf20Sopenharmony_ci7:
1098c2ecf20Sopenharmony_ci	mov	a2, a4
1108c2ecf20Sopenharmony_ci	abi_ret_default
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	/* uncommon case, buf is 2-byte aligned */
1138c2ecf20Sopenharmony_ci8:
1148c2ecf20Sopenharmony_ci	beqz	a3, 7b		/* branch if len == 0 */
1158c2ecf20Sopenharmony_ci	beqi	a3, 1, 6b	/* branch if len == 1 */
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	extui	a5, a2, 0, 1
1188c2ecf20Sopenharmony_ci	bnez	a5, 8f		/* branch if 1-byte aligned */
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	l16ui	a6, a2, 0	/* common case, len >= 2 */
1218c2ecf20Sopenharmony_ci	ONES_ADD(a4, a6)
1228c2ecf20Sopenharmony_ci	addi	a2, a2, 2	/* adjust buf */
1238c2ecf20Sopenharmony_ci	addi	a3, a3, -2	/* adjust len */
1248c2ecf20Sopenharmony_ci	j	1b		/* now buf is 4-byte aligned */
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	/* case: odd-byte aligned, len > 1
1278c2ecf20Sopenharmony_ci	 * This case is dog slow, so don't give us an odd address.
1288c2ecf20Sopenharmony_ci	 * (I don't think this ever happens, but just in case.)
1298c2ecf20Sopenharmony_ci	 */
1308c2ecf20Sopenharmony_ci8:
1318c2ecf20Sopenharmony_ci	srli	a5, a3, 2	/* 4-byte chunks */
1328c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
1338c2ecf20Sopenharmony_ci	loopgtz	a5, 2f
1348c2ecf20Sopenharmony_ci#else
1358c2ecf20Sopenharmony_ci	beqz	a5, 2f
1368c2ecf20Sopenharmony_ci	slli	a5, a5, 2
1378c2ecf20Sopenharmony_ci	add	a5, a5, a2	/* a5 = end of last 4-byte chunk */
1388c2ecf20Sopenharmony_ci.Loop3:
1398c2ecf20Sopenharmony_ci#endif
1408c2ecf20Sopenharmony_ci	l8ui	a6, a2, 0	/* bits 24..31 */
1418c2ecf20Sopenharmony_ci	l16ui	a7, a2, 1	/* bits  8..23 */
1428c2ecf20Sopenharmony_ci	l8ui	a8, a2, 3	/* bits  0.. 8 */
1438c2ecf20Sopenharmony_ci#ifdef	__XTENSA_EB__
1448c2ecf20Sopenharmony_ci	slli	a6, a6, 24
1458c2ecf20Sopenharmony_ci#else
1468c2ecf20Sopenharmony_ci	slli	a8, a8, 24
1478c2ecf20Sopenharmony_ci#endif
1488c2ecf20Sopenharmony_ci	slli	a7, a7, 8
1498c2ecf20Sopenharmony_ci	or	a7, a7, a6
1508c2ecf20Sopenharmony_ci	or	a7, a7, a8
1518c2ecf20Sopenharmony_ci	ONES_ADD(a4, a7)
1528c2ecf20Sopenharmony_ci	addi	a2, a2, 4
1538c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
1548c2ecf20Sopenharmony_ci	blt	a2, a5, .Loop3
1558c2ecf20Sopenharmony_ci#endif
1568c2ecf20Sopenharmony_ci2:
1578c2ecf20Sopenharmony_ci	_bbci.l	a3, 1, 3f	/* remaining 2-byte chunk, still odd addr */
1588c2ecf20Sopenharmony_ci	l8ui	a6, a2, 0
1598c2ecf20Sopenharmony_ci	l8ui	a7, a2, 1
1608c2ecf20Sopenharmony_ci#ifdef	__XTENSA_EB__
1618c2ecf20Sopenharmony_ci	slli	a6, a6, 8
1628c2ecf20Sopenharmony_ci#else
1638c2ecf20Sopenharmony_ci	slli	a7, a7, 8
1648c2ecf20Sopenharmony_ci#endif
1658c2ecf20Sopenharmony_ci	or	a7, a7, a6
1668c2ecf20Sopenharmony_ci	ONES_ADD(a4, a7)
1678c2ecf20Sopenharmony_ci	addi	a2, a2, 2
1688c2ecf20Sopenharmony_ci3:
1698c2ecf20Sopenharmony_ci	j	5b		/* branch to handle the remaining byte */
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ciENDPROC(csum_partial)
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci/*
1748c2ecf20Sopenharmony_ci * Copy from ds while checksumming, otherwise like csum_partial
1758c2ecf20Sopenharmony_ci */
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci/*
1788c2ecf20Sopenharmony_ciunsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
1798c2ecf20Sopenharmony_ci	a2  = src
1808c2ecf20Sopenharmony_ci	a3  = dst
1818c2ecf20Sopenharmony_ci	a4  = len
1828c2ecf20Sopenharmony_ci	a5  = sum
1838c2ecf20Sopenharmony_ci	a8  = temp
1848c2ecf20Sopenharmony_ci	a9  = temp
1858c2ecf20Sopenharmony_ci	a10 = temp
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci    This function is optimized for 4-byte aligned addresses.  Other
1888c2ecf20Sopenharmony_ci    alignments work, but not nearly as efficiently.
1898c2ecf20Sopenharmony_ci */
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ciENTRY(csum_partial_copy_generic)
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	abi_entry_default
1948c2ecf20Sopenharmony_ci	movi	a5, -1
1958c2ecf20Sopenharmony_ci	or	a10, a2, a3
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	/* We optimize the following alignment tests for the 4-byte
1988c2ecf20Sopenharmony_ci	aligned case.  Two bbsi.l instructions might seem more optimal
1998c2ecf20Sopenharmony_ci	(commented out below).  However, both labels 5: and 3: are out
2008c2ecf20Sopenharmony_ci	of the imm8 range, so the assembler relaxes them into
2018c2ecf20Sopenharmony_ci	equivalent bbci.l, j combinations, which is actually
2028c2ecf20Sopenharmony_ci	slower. */
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	extui	a9, a10, 0, 2
2058c2ecf20Sopenharmony_ci	beqz	a9, 1f		/* branch if both are 4-byte aligned */
2068c2ecf20Sopenharmony_ci	bbsi.l	a10, 0, 5f	/* branch if one address is odd */
2078c2ecf20Sopenharmony_ci	j	3f		/* one address is 2-byte aligned */
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci/*	_bbsi.l	a10, 0, 5f */	/* branch if odd address */
2108c2ecf20Sopenharmony_ci/*	_bbsi.l	a10, 1, 3f */	/* branch if 2-byte-aligned address */
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci1:
2138c2ecf20Sopenharmony_ci	/* src and dst are both 4-byte aligned */
2148c2ecf20Sopenharmony_ci	srli	a10, a4, 5	/* 32-byte chunks */
2158c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
2168c2ecf20Sopenharmony_ci	loopgtz	a10, 2f
2178c2ecf20Sopenharmony_ci#else
2188c2ecf20Sopenharmony_ci	beqz	a10, 2f
2198c2ecf20Sopenharmony_ci	slli	a10, a10, 5
2208c2ecf20Sopenharmony_ci	add	a10, a10, a2	/* a10 = end of last 32-byte src chunk */
2218c2ecf20Sopenharmony_ci.Loop5:
2228c2ecf20Sopenharmony_ci#endif
2238c2ecf20Sopenharmony_ciEX(10f)	l32i	a9, a2, 0
2248c2ecf20Sopenharmony_ciEX(10f)	l32i	a8, a2, 4
2258c2ecf20Sopenharmony_ciEX(10f)	s32i	a9, a3, 0
2268c2ecf20Sopenharmony_ciEX(10f)	s32i	a8, a3, 4
2278c2ecf20Sopenharmony_ci	ONES_ADD(a5, a9)
2288c2ecf20Sopenharmony_ci	ONES_ADD(a5, a8)
2298c2ecf20Sopenharmony_ciEX(10f)	l32i	a9, a2, 8
2308c2ecf20Sopenharmony_ciEX(10f)	l32i	a8, a2, 12
2318c2ecf20Sopenharmony_ciEX(10f)	s32i	a9, a3, 8
2328c2ecf20Sopenharmony_ciEX(10f)	s32i	a8, a3, 12
2338c2ecf20Sopenharmony_ci	ONES_ADD(a5, a9)
2348c2ecf20Sopenharmony_ci	ONES_ADD(a5, a8)
2358c2ecf20Sopenharmony_ciEX(10f)	l32i	a9, a2, 16
2368c2ecf20Sopenharmony_ciEX(10f)	l32i	a8, a2, 20
2378c2ecf20Sopenharmony_ciEX(10f)	s32i	a9, a3, 16
2388c2ecf20Sopenharmony_ciEX(10f)	s32i	a8, a3, 20
2398c2ecf20Sopenharmony_ci	ONES_ADD(a5, a9)
2408c2ecf20Sopenharmony_ci	ONES_ADD(a5, a8)
2418c2ecf20Sopenharmony_ciEX(10f)	l32i	a9, a2, 24
2428c2ecf20Sopenharmony_ciEX(10f)	l32i	a8, a2, 28
2438c2ecf20Sopenharmony_ciEX(10f)	s32i	a9, a3, 24
2448c2ecf20Sopenharmony_ciEX(10f)	s32i	a8, a3, 28
2458c2ecf20Sopenharmony_ci	ONES_ADD(a5, a9)
2468c2ecf20Sopenharmony_ci	ONES_ADD(a5, a8)
2478c2ecf20Sopenharmony_ci	addi	a2, a2, 32
2488c2ecf20Sopenharmony_ci	addi	a3, a3, 32
2498c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
2508c2ecf20Sopenharmony_ci	blt	a2, a10, .Loop5
2518c2ecf20Sopenharmony_ci#endif
2528c2ecf20Sopenharmony_ci2:
2538c2ecf20Sopenharmony_ci	extui	a10, a4, 2, 3	/* remaining 4-byte chunks */
2548c2ecf20Sopenharmony_ci	extui	a4, a4, 0, 2	/* reset len for general-case, 2-byte chunks */
2558c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
2568c2ecf20Sopenharmony_ci	loopgtz	a10, 3f
2578c2ecf20Sopenharmony_ci#else
2588c2ecf20Sopenharmony_ci	beqz	a10, 3f
2598c2ecf20Sopenharmony_ci	slli	a10, a10, 2
2608c2ecf20Sopenharmony_ci	add	a10, a10, a2	/* a10 = end of last 4-byte src chunk */
2618c2ecf20Sopenharmony_ci.Loop6:
2628c2ecf20Sopenharmony_ci#endif
2638c2ecf20Sopenharmony_ciEX(10f)	l32i	a9, a2, 0
2648c2ecf20Sopenharmony_ciEX(10f)	s32i	a9, a3, 0
2658c2ecf20Sopenharmony_ci	ONES_ADD(a5, a9)
2668c2ecf20Sopenharmony_ci	addi	a2, a2, 4
2678c2ecf20Sopenharmony_ci	addi	a3, a3, 4
2688c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
2698c2ecf20Sopenharmony_ci	blt	a2, a10, .Loop6
2708c2ecf20Sopenharmony_ci#endif
2718c2ecf20Sopenharmony_ci3:
2728c2ecf20Sopenharmony_ci	/*
2738c2ecf20Sopenharmony_ci	Control comes to here in two cases: (1) It may fall through
2748c2ecf20Sopenharmony_ci	to here from the 4-byte alignment case to process, at most,
2758c2ecf20Sopenharmony_ci	one 2-byte chunk.  (2) It branches to here from above if
2768c2ecf20Sopenharmony_ci	either src or dst is 2-byte aligned, and we process all bytes
2778c2ecf20Sopenharmony_ci	here, except for perhaps a trailing odd byte.  It's
2788c2ecf20Sopenharmony_ci	inefficient, so align your addresses to 4-byte boundaries.
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci	a2 = src
2818c2ecf20Sopenharmony_ci	a3 = dst
2828c2ecf20Sopenharmony_ci	a4 = len
2838c2ecf20Sopenharmony_ci	a5 = sum
2848c2ecf20Sopenharmony_ci	*/
2858c2ecf20Sopenharmony_ci	srli	a10, a4, 1	/* 2-byte chunks */
2868c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
2878c2ecf20Sopenharmony_ci	loopgtz	a10, 4f
2888c2ecf20Sopenharmony_ci#else
2898c2ecf20Sopenharmony_ci	beqz	a10, 4f
2908c2ecf20Sopenharmony_ci	slli	a10, a10, 1
2918c2ecf20Sopenharmony_ci	add	a10, a10, a2	/* a10 = end of last 2-byte src chunk */
2928c2ecf20Sopenharmony_ci.Loop7:
2938c2ecf20Sopenharmony_ci#endif
2948c2ecf20Sopenharmony_ciEX(10f)	l16ui	a9, a2, 0
2958c2ecf20Sopenharmony_ciEX(10f)	s16i	a9, a3, 0
2968c2ecf20Sopenharmony_ci	ONES_ADD(a5, a9)
2978c2ecf20Sopenharmony_ci	addi	a2, a2, 2
2988c2ecf20Sopenharmony_ci	addi	a3, a3, 2
2998c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
3008c2ecf20Sopenharmony_ci	blt	a2, a10, .Loop7
3018c2ecf20Sopenharmony_ci#endif
3028c2ecf20Sopenharmony_ci4:
3038c2ecf20Sopenharmony_ci	/* This section processes a possible trailing odd byte. */
3048c2ecf20Sopenharmony_ci	_bbci.l	a4, 0, 8f	/* 1-byte chunk */
3058c2ecf20Sopenharmony_ciEX(10f)	l8ui	a9, a2, 0
3068c2ecf20Sopenharmony_ciEX(10f)	s8i	a9, a3, 0
3078c2ecf20Sopenharmony_ci#ifdef __XTENSA_EB__
3088c2ecf20Sopenharmony_ci	slli	a9, a9, 8	/* shift byte to bits 8..15 */
3098c2ecf20Sopenharmony_ci#endif
3108c2ecf20Sopenharmony_ci	ONES_ADD(a5, a9)
3118c2ecf20Sopenharmony_ci8:
3128c2ecf20Sopenharmony_ci	mov	a2, a5
3138c2ecf20Sopenharmony_ci	abi_ret_default
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci5:
3168c2ecf20Sopenharmony_ci	/* Control branch to here when either src or dst is odd.  We
3178c2ecf20Sopenharmony_ci	process all bytes using 8-bit accesses.  Grossly inefficient,
3188c2ecf20Sopenharmony_ci	so don't feed us an odd address. */
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci	srli	a10, a4, 1	/* handle in pairs for 16-bit csum */
3218c2ecf20Sopenharmony_ci#if XCHAL_HAVE_LOOPS
3228c2ecf20Sopenharmony_ci	loopgtz	a10, 6f
3238c2ecf20Sopenharmony_ci#else
3248c2ecf20Sopenharmony_ci	beqz	a10, 6f
3258c2ecf20Sopenharmony_ci	slli	a10, a10, 1
3268c2ecf20Sopenharmony_ci	add	a10, a10, a2	/* a10 = end of last odd-aligned, 2-byte src chunk */
3278c2ecf20Sopenharmony_ci.Loop8:
3288c2ecf20Sopenharmony_ci#endif
3298c2ecf20Sopenharmony_ciEX(10f)	l8ui	a9, a2, 0
3308c2ecf20Sopenharmony_ciEX(10f)	l8ui	a8, a2, 1
3318c2ecf20Sopenharmony_ciEX(10f)	s8i	a9, a3, 0
3328c2ecf20Sopenharmony_ciEX(10f)	s8i	a8, a3, 1
3338c2ecf20Sopenharmony_ci#ifdef __XTENSA_EB__
3348c2ecf20Sopenharmony_ci	slli	a9, a9, 8	/* combine into a single 16-bit value */
3358c2ecf20Sopenharmony_ci#else				/* for checksum computation */
3368c2ecf20Sopenharmony_ci	slli	a8, a8, 8
3378c2ecf20Sopenharmony_ci#endif
3388c2ecf20Sopenharmony_ci	or	a9, a9, a8
3398c2ecf20Sopenharmony_ci	ONES_ADD(a5, a9)
3408c2ecf20Sopenharmony_ci	addi	a2, a2, 2
3418c2ecf20Sopenharmony_ci	addi	a3, a3, 2
3428c2ecf20Sopenharmony_ci#if !XCHAL_HAVE_LOOPS
3438c2ecf20Sopenharmony_ci	blt	a2, a10, .Loop8
3448c2ecf20Sopenharmony_ci#endif
3458c2ecf20Sopenharmony_ci6:
3468c2ecf20Sopenharmony_ci	j	4b		/* process the possible trailing odd byte */
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ciENDPROC(csum_partial_copy_generic)
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci# Exception handler:
3528c2ecf20Sopenharmony_ci.section .fixup, "ax"
3538c2ecf20Sopenharmony_ci10:
3548c2ecf20Sopenharmony_ci	movi	a2, 0
3558c2ecf20Sopenharmony_ci	abi_ret_default
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci.previous
358