18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * This file contains assembly-language implementations
48c2ecf20Sopenharmony_ci * of IP-style 1's complement checksum routines.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include <linux/sys.h>
128c2ecf20Sopenharmony_ci#include <asm/processor.h>
138c2ecf20Sopenharmony_ci#include <asm/errno.h>
148c2ecf20Sopenharmony_ci#include <asm/ppc_asm.h>
158c2ecf20Sopenharmony_ci#include <asm/export.h>
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci/*
188c2ecf20Sopenharmony_ci * Computes the checksum of a memory block at buff, length len,
198c2ecf20Sopenharmony_ci * and adds in "sum" (32-bit).
208c2ecf20Sopenharmony_ci *
218c2ecf20Sopenharmony_ci * __csum_partial(r3=buff, r4=len, r5=sum)
228c2ecf20Sopenharmony_ci */
238c2ecf20Sopenharmony_ci_GLOBAL(__csum_partial)
248c2ecf20Sopenharmony_ci	addic	r0,r5,0			/* clear carry */
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci	srdi.	r6,r4,3			/* less than 8 bytes? */
278c2ecf20Sopenharmony_ci	beq	.Lcsum_tail_word
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	/*
308c2ecf20Sopenharmony_ci	 * If only halfword aligned, align to a double word. Since odd
318c2ecf20Sopenharmony_ci	 * aligned addresses should be rare and they would require more
328c2ecf20Sopenharmony_ci	 * work to calculate the correct checksum, we ignore that case
338c2ecf20Sopenharmony_ci	 * and take the potential slowdown of unaligned loads.
348c2ecf20Sopenharmony_ci	 */
358c2ecf20Sopenharmony_ci	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
368c2ecf20Sopenharmony_ci	beq	.Lcsum_aligned
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci	li	r7,4
398c2ecf20Sopenharmony_ci	sub	r6,r7,r6
408c2ecf20Sopenharmony_ci	mtctr	r6
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci1:
438c2ecf20Sopenharmony_ci	lhz	r6,0(r3)		/* align to doubleword */
448c2ecf20Sopenharmony_ci	subi	r4,r4,2
458c2ecf20Sopenharmony_ci	addi	r3,r3,2
468c2ecf20Sopenharmony_ci	adde	r0,r0,r6
478c2ecf20Sopenharmony_ci	bdnz	1b
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci.Lcsum_aligned:
508c2ecf20Sopenharmony_ci	/*
518c2ecf20Sopenharmony_ci	 * We unroll the loop such that each iteration is 64 bytes with an
528c2ecf20Sopenharmony_ci	 * entry and exit limb of 64 bytes, meaning a minimum size of
538c2ecf20Sopenharmony_ci	 * 128 bytes.
548c2ecf20Sopenharmony_ci	 */
558c2ecf20Sopenharmony_ci	srdi.	r6,r4,7
568c2ecf20Sopenharmony_ci	beq	.Lcsum_tail_doublewords		/* len < 128 */
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	srdi	r6,r4,6
598c2ecf20Sopenharmony_ci	subi	r6,r6,1
608c2ecf20Sopenharmony_ci	mtctr	r6
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	stdu	r1,-STACKFRAMESIZE(r1)
638c2ecf20Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
648c2ecf20Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
658c2ecf20Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	ld	r6,0(r3)
688c2ecf20Sopenharmony_ci	ld	r9,8(r3)
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	ld	r10,16(r3)
718c2ecf20Sopenharmony_ci	ld	r11,24(r3)
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	/*
748c2ecf20Sopenharmony_ci	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
758c2ecf20Sopenharmony_ci	 * because of the XER dependency. This means the fastest this loop can
768c2ecf20Sopenharmony_ci	 * go is 16 cycles per iteration. The scheduling of the loop below has
778c2ecf20Sopenharmony_ci	 * been shown to hit this on both POWER6 and POWER7.
788c2ecf20Sopenharmony_ci	 */
798c2ecf20Sopenharmony_ci	.align 5
808c2ecf20Sopenharmony_ci2:
818c2ecf20Sopenharmony_ci	adde	r0,r0,r6
828c2ecf20Sopenharmony_ci	ld	r12,32(r3)
838c2ecf20Sopenharmony_ci	ld	r14,40(r3)
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci	adde	r0,r0,r9
868c2ecf20Sopenharmony_ci	ld	r15,48(r3)
878c2ecf20Sopenharmony_ci	ld	r16,56(r3)
888c2ecf20Sopenharmony_ci	addi	r3,r3,64
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	adde	r0,r0,r10
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	adde	r0,r0,r11
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	adde	r0,r0,r12
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci	adde	r0,r0,r14
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci	adde	r0,r0,r15
998c2ecf20Sopenharmony_ci	ld	r6,0(r3)
1008c2ecf20Sopenharmony_ci	ld	r9,8(r3)
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	adde	r0,r0,r16
1038c2ecf20Sopenharmony_ci	ld	r10,16(r3)
1048c2ecf20Sopenharmony_ci	ld	r11,24(r3)
1058c2ecf20Sopenharmony_ci	bdnz	2b
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	adde	r0,r0,r6
1098c2ecf20Sopenharmony_ci	ld	r12,32(r3)
1108c2ecf20Sopenharmony_ci	ld	r14,40(r3)
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	adde	r0,r0,r9
1138c2ecf20Sopenharmony_ci	ld	r15,48(r3)
1148c2ecf20Sopenharmony_ci	ld	r16,56(r3)
1158c2ecf20Sopenharmony_ci	addi	r3,r3,64
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	adde	r0,r0,r10
1188c2ecf20Sopenharmony_ci	adde	r0,r0,r11
1198c2ecf20Sopenharmony_ci	adde	r0,r0,r12
1208c2ecf20Sopenharmony_ci	adde	r0,r0,r14
1218c2ecf20Sopenharmony_ci	adde	r0,r0,r15
1228c2ecf20Sopenharmony_ci	adde	r0,r0,r16
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
1258c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
1268c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
1278c2ecf20Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	andi.	r4,r4,63
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci.Lcsum_tail_doublewords:		/* Up to 127 bytes to go */
1328c2ecf20Sopenharmony_ci	srdi.	r6,r4,3
1338c2ecf20Sopenharmony_ci	beq	.Lcsum_tail_word
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	mtctr	r6
1368c2ecf20Sopenharmony_ci3:
1378c2ecf20Sopenharmony_ci	ld	r6,0(r3)
1388c2ecf20Sopenharmony_ci	addi	r3,r3,8
1398c2ecf20Sopenharmony_ci	adde	r0,r0,r6
1408c2ecf20Sopenharmony_ci	bdnz	3b
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci	andi.	r4,r4,7
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci.Lcsum_tail_word:			/* Up to 7 bytes to go */
1458c2ecf20Sopenharmony_ci	srdi.	r6,r4,2
1468c2ecf20Sopenharmony_ci	beq	.Lcsum_tail_halfword
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci	lwz	r6,0(r3)
1498c2ecf20Sopenharmony_ci	addi	r3,r3,4
1508c2ecf20Sopenharmony_ci	adde	r0,r0,r6
1518c2ecf20Sopenharmony_ci	subi	r4,r4,4
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci.Lcsum_tail_halfword:			/* Up to 3 bytes to go */
1548c2ecf20Sopenharmony_ci	srdi.	r6,r4,1
1558c2ecf20Sopenharmony_ci	beq	.Lcsum_tail_byte
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	lhz	r6,0(r3)
1588c2ecf20Sopenharmony_ci	addi	r3,r3,2
1598c2ecf20Sopenharmony_ci	adde	r0,r0,r6
1608c2ecf20Sopenharmony_ci	subi	r4,r4,2
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci.Lcsum_tail_byte:			/* Up to 1 byte to go */
1638c2ecf20Sopenharmony_ci	andi.	r6,r4,1
1648c2ecf20Sopenharmony_ci	beq	.Lcsum_finish
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	lbz	r6,0(r3)
1678c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__
1688c2ecf20Sopenharmony_ci	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
1698c2ecf20Sopenharmony_ci	adde	r0,r0,r9
1708c2ecf20Sopenharmony_ci#else
1718c2ecf20Sopenharmony_ci	adde	r0,r0,r6
1728c2ecf20Sopenharmony_ci#endif
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci.Lcsum_finish:
1758c2ecf20Sopenharmony_ci	addze	r0,r0			/* add in final carry */
1768c2ecf20Sopenharmony_ci	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
1778c2ecf20Sopenharmony_ci	add	r3,r4,r0
1788c2ecf20Sopenharmony_ci	srdi	r3,r3,32
1798c2ecf20Sopenharmony_ci	blr
1808c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__csum_partial)
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	.macro srcnr
1848c2ecf20Sopenharmony_ci100:
1858c2ecf20Sopenharmony_ci	EX_TABLE(100b,.Lerror_nr)
1868c2ecf20Sopenharmony_ci	.endm
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	.macro source
1898c2ecf20Sopenharmony_ci150:
1908c2ecf20Sopenharmony_ci	EX_TABLE(150b,.Lerror)
1918c2ecf20Sopenharmony_ci	.endm
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	.macro dstnr
1948c2ecf20Sopenharmony_ci200:
1958c2ecf20Sopenharmony_ci	EX_TABLE(200b,.Lerror_nr)
1968c2ecf20Sopenharmony_ci	.endm
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	.macro dest
1998c2ecf20Sopenharmony_ci250:
2008c2ecf20Sopenharmony_ci	EX_TABLE(250b,.Lerror)
2018c2ecf20Sopenharmony_ci	.endm
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci/*
2048c2ecf20Sopenharmony_ci * Computes the checksum of a memory block at src, length len,
2058c2ecf20Sopenharmony_ci * and adds in 0xffffffff (32-bit), while copying the block to dst.
2068c2ecf20Sopenharmony_ci * If an access exception occurs, it returns 0.
2078c2ecf20Sopenharmony_ci *
2088c2ecf20Sopenharmony_ci * csum_partial_copy_generic(r3=src, r4=dst, r5=len)
2098c2ecf20Sopenharmony_ci */
2108c2ecf20Sopenharmony_ci_GLOBAL(csum_partial_copy_generic)
2118c2ecf20Sopenharmony_ci	li	r6,-1
2128c2ecf20Sopenharmony_ci	addic	r0,r6,0			/* clear carry */
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	srdi.	r6,r5,3			/* less than 8 bytes? */
2158c2ecf20Sopenharmony_ci	beq	.Lcopy_tail_word
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	/*
2188c2ecf20Sopenharmony_ci	 * If only halfword aligned, align to a double word. Since odd
2198c2ecf20Sopenharmony_ci	 * aligned addresses should be rare and they would require more
2208c2ecf20Sopenharmony_ci	 * work to calculate the correct checksum, we ignore that case
2218c2ecf20Sopenharmony_ci	 * and take the potential slowdown of unaligned loads.
2228c2ecf20Sopenharmony_ci	 *
2238c2ecf20Sopenharmony_ci	 * If the source and destination are relatively unaligned we only
2248c2ecf20Sopenharmony_ci	 * align the source. This keeps things simple.
2258c2ecf20Sopenharmony_ci	 */
2268c2ecf20Sopenharmony_ci	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
2278c2ecf20Sopenharmony_ci	beq	.Lcopy_aligned
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	li	r9,4
2308c2ecf20Sopenharmony_ci	sub	r6,r9,r6
2318c2ecf20Sopenharmony_ci	mtctr	r6
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci1:
2348c2ecf20Sopenharmony_cisrcnr;	lhz	r6,0(r3)		/* align to doubleword */
2358c2ecf20Sopenharmony_ci	subi	r5,r5,2
2368c2ecf20Sopenharmony_ci	addi	r3,r3,2
2378c2ecf20Sopenharmony_ci	adde	r0,r0,r6
2388c2ecf20Sopenharmony_cidstnr;	sth	r6,0(r4)
2398c2ecf20Sopenharmony_ci	addi	r4,r4,2
2408c2ecf20Sopenharmony_ci	bdnz	1b
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci.Lcopy_aligned:
2438c2ecf20Sopenharmony_ci	/*
2448c2ecf20Sopenharmony_ci	 * We unroll the loop such that each iteration is 64 bytes with an
2458c2ecf20Sopenharmony_ci	 * entry and exit limb of 64 bytes, meaning a minimum size of
2468c2ecf20Sopenharmony_ci	 * 128 bytes.
2478c2ecf20Sopenharmony_ci	 */
2488c2ecf20Sopenharmony_ci	srdi.	r6,r5,7
2498c2ecf20Sopenharmony_ci	beq	.Lcopy_tail_doublewords		/* len < 128 */
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	srdi	r6,r5,6
2528c2ecf20Sopenharmony_ci	subi	r6,r6,1
2538c2ecf20Sopenharmony_ci	mtctr	r6
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	stdu	r1,-STACKFRAMESIZE(r1)
2568c2ecf20Sopenharmony_ci	std	r14,STK_REG(R14)(r1)
2578c2ecf20Sopenharmony_ci	std	r15,STK_REG(R15)(r1)
2588c2ecf20Sopenharmony_ci	std	r16,STK_REG(R16)(r1)
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_cisource;	ld	r6,0(r3)
2618c2ecf20Sopenharmony_cisource;	ld	r9,8(r3)
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_cisource;	ld	r10,16(r3)
2648c2ecf20Sopenharmony_cisource;	ld	r11,24(r3)
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	/*
2678c2ecf20Sopenharmony_ci	 * On POWER6 and POWER7 back to back adde instructions take 2 cycles
2688c2ecf20Sopenharmony_ci	 * because of the XER dependency. This means the fastest this loop can
2698c2ecf20Sopenharmony_ci	 * go is 16 cycles per iteration. The scheduling of the loop below has
2708c2ecf20Sopenharmony_ci	 * been shown to hit this on both POWER6 and POWER7.
2718c2ecf20Sopenharmony_ci	 */
2728c2ecf20Sopenharmony_ci	.align 5
2738c2ecf20Sopenharmony_ci2:
2748c2ecf20Sopenharmony_ci	adde	r0,r0,r6
2758c2ecf20Sopenharmony_cisource;	ld	r12,32(r3)
2768c2ecf20Sopenharmony_cisource;	ld	r14,40(r3)
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci	adde	r0,r0,r9
2798c2ecf20Sopenharmony_cisource;	ld	r15,48(r3)
2808c2ecf20Sopenharmony_cisource;	ld	r16,56(r3)
2818c2ecf20Sopenharmony_ci	addi	r3,r3,64
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	adde	r0,r0,r10
2848c2ecf20Sopenharmony_cidest;	std	r6,0(r4)
2858c2ecf20Sopenharmony_cidest;	std	r9,8(r4)
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	adde	r0,r0,r11
2888c2ecf20Sopenharmony_cidest;	std	r10,16(r4)
2898c2ecf20Sopenharmony_cidest;	std	r11,24(r4)
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	adde	r0,r0,r12
2928c2ecf20Sopenharmony_cidest;	std	r12,32(r4)
2938c2ecf20Sopenharmony_cidest;	std	r14,40(r4)
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci	adde	r0,r0,r14
2968c2ecf20Sopenharmony_cidest;	std	r15,48(r4)
2978c2ecf20Sopenharmony_cidest;	std	r16,56(r4)
2988c2ecf20Sopenharmony_ci	addi	r4,r4,64
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	adde	r0,r0,r15
3018c2ecf20Sopenharmony_cisource;	ld	r6,0(r3)
3028c2ecf20Sopenharmony_cisource;	ld	r9,8(r3)
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	adde	r0,r0,r16
3058c2ecf20Sopenharmony_cisource;	ld	r10,16(r3)
3068c2ecf20Sopenharmony_cisource;	ld	r11,24(r3)
3078c2ecf20Sopenharmony_ci	bdnz	2b
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	adde	r0,r0,r6
3118c2ecf20Sopenharmony_cisource;	ld	r12,32(r3)
3128c2ecf20Sopenharmony_cisource;	ld	r14,40(r3)
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci	adde	r0,r0,r9
3158c2ecf20Sopenharmony_cisource;	ld	r15,48(r3)
3168c2ecf20Sopenharmony_cisource;	ld	r16,56(r3)
3178c2ecf20Sopenharmony_ci	addi	r3,r3,64
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	adde	r0,r0,r10
3208c2ecf20Sopenharmony_cidest;	std	r6,0(r4)
3218c2ecf20Sopenharmony_cidest;	std	r9,8(r4)
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	adde	r0,r0,r11
3248c2ecf20Sopenharmony_cidest;	std	r10,16(r4)
3258c2ecf20Sopenharmony_cidest;	std	r11,24(r4)
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	adde	r0,r0,r12
3288c2ecf20Sopenharmony_cidest;	std	r12,32(r4)
3298c2ecf20Sopenharmony_cidest;	std	r14,40(r4)
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	adde	r0,r0,r14
3328c2ecf20Sopenharmony_cidest;	std	r15,48(r4)
3338c2ecf20Sopenharmony_cidest;	std	r16,56(r4)
3348c2ecf20Sopenharmony_ci	addi	r4,r4,64
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci	adde	r0,r0,r15
3378c2ecf20Sopenharmony_ci	adde	r0,r0,r16
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
3408c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
3418c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
3428c2ecf20Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	andi.	r5,r5,63
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci.Lcopy_tail_doublewords:		/* Up to 127 bytes to go */
3478c2ecf20Sopenharmony_ci	srdi.	r6,r5,3
3488c2ecf20Sopenharmony_ci	beq	.Lcopy_tail_word
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	mtctr	r6
3518c2ecf20Sopenharmony_ci3:
3528c2ecf20Sopenharmony_cisrcnr;	ld	r6,0(r3)
3538c2ecf20Sopenharmony_ci	addi	r3,r3,8
3548c2ecf20Sopenharmony_ci	adde	r0,r0,r6
3558c2ecf20Sopenharmony_cidstnr;	std	r6,0(r4)
3568c2ecf20Sopenharmony_ci	addi	r4,r4,8
3578c2ecf20Sopenharmony_ci	bdnz	3b
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci	andi.	r5,r5,7
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci.Lcopy_tail_word:			/* Up to 7 bytes to go */
3628c2ecf20Sopenharmony_ci	srdi.	r6,r5,2
3638c2ecf20Sopenharmony_ci	beq	.Lcopy_tail_halfword
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_cisrcnr;	lwz	r6,0(r3)
3668c2ecf20Sopenharmony_ci	addi	r3,r3,4
3678c2ecf20Sopenharmony_ci	adde	r0,r0,r6
3688c2ecf20Sopenharmony_cidstnr;	stw	r6,0(r4)
3698c2ecf20Sopenharmony_ci	addi	r4,r4,4
3708c2ecf20Sopenharmony_ci	subi	r5,r5,4
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci.Lcopy_tail_halfword:			/* Up to 3 bytes to go */
3738c2ecf20Sopenharmony_ci	srdi.	r6,r5,1
3748c2ecf20Sopenharmony_ci	beq	.Lcopy_tail_byte
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_cisrcnr;	lhz	r6,0(r3)
3778c2ecf20Sopenharmony_ci	addi	r3,r3,2
3788c2ecf20Sopenharmony_ci	adde	r0,r0,r6
3798c2ecf20Sopenharmony_cidstnr;	sth	r6,0(r4)
3808c2ecf20Sopenharmony_ci	addi	r4,r4,2
3818c2ecf20Sopenharmony_ci	subi	r5,r5,2
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci.Lcopy_tail_byte:			/* Up to 1 byte to go */
3848c2ecf20Sopenharmony_ci	andi.	r6,r5,1
3858c2ecf20Sopenharmony_ci	beq	.Lcopy_finish
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_cisrcnr;	lbz	r6,0(r3)
3888c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN__
3898c2ecf20Sopenharmony_ci	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
3908c2ecf20Sopenharmony_ci	adde	r0,r0,r9
3918c2ecf20Sopenharmony_ci#else
3928c2ecf20Sopenharmony_ci	adde	r0,r0,r6
3938c2ecf20Sopenharmony_ci#endif
3948c2ecf20Sopenharmony_cidstnr;	stb	r6,0(r4)
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci.Lcopy_finish:
3978c2ecf20Sopenharmony_ci	addze	r0,r0			/* add in final carry */
3988c2ecf20Sopenharmony_ci	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
3998c2ecf20Sopenharmony_ci	add	r3,r4,r0
4008c2ecf20Sopenharmony_ci	srdi	r3,r3,32
4018c2ecf20Sopenharmony_ci	blr
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci.Lerror:
4048c2ecf20Sopenharmony_ci	ld	r14,STK_REG(R14)(r1)
4058c2ecf20Sopenharmony_ci	ld	r15,STK_REG(R15)(r1)
4068c2ecf20Sopenharmony_ci	ld	r16,STK_REG(R16)(r1)
4078c2ecf20Sopenharmony_ci	addi	r1,r1,STACKFRAMESIZE
4088c2ecf20Sopenharmony_ci.Lerror_nr:
4098c2ecf20Sopenharmony_ci	li	r3,0
4108c2ecf20Sopenharmony_ci	blr
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ciEXPORT_SYMBOL(csum_partial_copy_generic)
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci/*
4158c2ecf20Sopenharmony_ci * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
4168c2ecf20Sopenharmony_ci *			   const struct in6_addr *daddr,
4178c2ecf20Sopenharmony_ci *			   __u32 len, __u8 proto, __wsum sum)
4188c2ecf20Sopenharmony_ci */
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_ci_GLOBAL(csum_ipv6_magic)
4218c2ecf20Sopenharmony_ci	ld	r8, 0(r3)
4228c2ecf20Sopenharmony_ci	ld	r9, 8(r3)
4238c2ecf20Sopenharmony_ci	add	r5, r5, r6
4248c2ecf20Sopenharmony_ci	addc	r0, r8, r9
4258c2ecf20Sopenharmony_ci	ld	r10, 0(r4)
4268c2ecf20Sopenharmony_ci	ld	r11, 8(r4)
4278c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_LITTLE_ENDIAN
4288c2ecf20Sopenharmony_ci	rotldi	r5, r5, 8
4298c2ecf20Sopenharmony_ci#endif
4308c2ecf20Sopenharmony_ci	adde	r0, r0, r10
4318c2ecf20Sopenharmony_ci	add	r5, r5, r7
4328c2ecf20Sopenharmony_ci	adde	r0, r0, r11
4338c2ecf20Sopenharmony_ci	adde	r0, r0, r5
4348c2ecf20Sopenharmony_ci	addze	r0, r0
4358c2ecf20Sopenharmony_ci	rotldi  r3, r0, 32		/* fold two 32 bit halves together */
4368c2ecf20Sopenharmony_ci	add	r3, r0, r3
4378c2ecf20Sopenharmony_ci	srdi	r0, r3, 32
4388c2ecf20Sopenharmony_ci	rotlwi	r3, r0, 16		/* fold two 16 bit halves together */
4398c2ecf20Sopenharmony_ci	add	r3, r0, r3
4408c2ecf20Sopenharmony_ci	not	r3, r3
4418c2ecf20Sopenharmony_ci	rlwinm	r3, r3, 16, 16, 31
4428c2ecf20Sopenharmony_ci	blr
4438c2ecf20Sopenharmony_ciEXPORT_SYMBOL(csum_ipv6_magic)
444