162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  linux/arch/arm/lib/csumpartialcopygeneric.S
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *  Copyright (C) 1995-2001 Russell King
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci#include <asm/assembler.h>
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci/*
1062306a36Sopenharmony_ci * unsigned int
1162306a36Sopenharmony_ci * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
1262306a36Sopenharmony_ci *  r0 = src, r1 = dst, r2 = len, r3 = sum
1362306a36Sopenharmony_ci *  Returns : r0 = checksum
1462306a36Sopenharmony_ci *
1562306a36Sopenharmony_ci * Note that 'tst' and 'teq' preserve the carry flag.
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_cisrc	.req	r0
1962306a36Sopenharmony_cidst	.req	r1
2062306a36Sopenharmony_cilen	.req	r2
2162306a36Sopenharmony_cisum	.req	r3
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci.Lzero:		mov	r0, sum
2462306a36Sopenharmony_ci		load_regs
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci		/*
2762306a36Sopenharmony_ci		 * Align an unaligned destination pointer.  We know that
2862306a36Sopenharmony_ci		 * we have >= 8 bytes here, so we don't need to check
2962306a36Sopenharmony_ci		 * the length.  Note that the source pointer hasn't been
3062306a36Sopenharmony_ci		 * aligned yet.
3162306a36Sopenharmony_ci		 */
3262306a36Sopenharmony_ci.Ldst_unaligned:
3362306a36Sopenharmony_ci		tst	dst, #1
3462306a36Sopenharmony_ci		beq	.Ldst_16bit
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci		load1b	ip
3762306a36Sopenharmony_ci		sub	len, len, #1
3862306a36Sopenharmony_ci		adcs	sum, sum, ip, put_byte_1	@ update checksum
3962306a36Sopenharmony_ci		strb	ip, [dst], #1
4062306a36Sopenharmony_ci		tst	dst, #2
4162306a36Sopenharmony_ci		reteq	lr			@ dst is now 32bit aligned
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci.Ldst_16bit:	load2b	r8, ip
4462306a36Sopenharmony_ci		sub	len, len, #2
4562306a36Sopenharmony_ci		adcs	sum, sum, r8, put_byte_0
4662306a36Sopenharmony_ci		strb	r8, [dst], #1
4762306a36Sopenharmony_ci		adcs	sum, sum, ip, put_byte_1
4862306a36Sopenharmony_ci		strb	ip, [dst], #1
4962306a36Sopenharmony_ci		ret	lr			@ dst is now 32bit aligned
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci		/*
5262306a36Sopenharmony_ci		 * Handle 0 to 7 bytes, with any alignment of source and
5362306a36Sopenharmony_ci		 * destination pointers.  Note that when we get here, C = 0
5462306a36Sopenharmony_ci		 */
5562306a36Sopenharmony_ci.Lless8:	teq	len, #0			@ check for zero count
5662306a36Sopenharmony_ci		beq	.Lzero
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci		/* we must have at least one byte. */
5962306a36Sopenharmony_ci		tst	dst, #1			@ dst 16-bit aligned
6062306a36Sopenharmony_ci		beq	.Lless8_aligned
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci		/* Align dst */
6362306a36Sopenharmony_ci		load1b	ip
6462306a36Sopenharmony_ci		sub	len, len, #1
6562306a36Sopenharmony_ci		adcs	sum, sum, ip, put_byte_1	@ update checksum
6662306a36Sopenharmony_ci		strb	ip, [dst], #1
6762306a36Sopenharmony_ci		tst	len, #6
6862306a36Sopenharmony_ci		beq	.Lless8_byteonly
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci1:		load2b	r8, ip
7162306a36Sopenharmony_ci		sub	len, len, #2
7262306a36Sopenharmony_ci		adcs	sum, sum, r8, put_byte_0
7362306a36Sopenharmony_ci		strb	r8, [dst], #1
7462306a36Sopenharmony_ci		adcs	sum, sum, ip, put_byte_1
7562306a36Sopenharmony_ci		strb	ip, [dst], #1
7662306a36Sopenharmony_ci.Lless8_aligned:
7762306a36Sopenharmony_ci		tst	len, #6
7862306a36Sopenharmony_ci		bne	1b
7962306a36Sopenharmony_ci.Lless8_byteonly:
8062306a36Sopenharmony_ci		tst	len, #1
8162306a36Sopenharmony_ci		beq	.Ldone
8262306a36Sopenharmony_ci		load1b	r8
8362306a36Sopenharmony_ci		adcs	sum, sum, r8, put_byte_0	@ update checksum
8462306a36Sopenharmony_ci		strb	r8, [dst], #1
8562306a36Sopenharmony_ci		b	.Ldone
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ciFN_ENTRY
8862306a36Sopenharmony_ci		save_regs
8962306a36Sopenharmony_ci		mov	sum, #-1
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci		cmp	len, #8			@ Ensure that we have at least
9262306a36Sopenharmony_ci		blo	.Lless8			@ 8 bytes to copy.
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci		adds	sum, sum, #0		@ C = 0
9562306a36Sopenharmony_ci		tst	dst, #3			@ Test destination alignment
9662306a36Sopenharmony_ci		blne	.Ldst_unaligned		@ align destination, return here
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci		/*
9962306a36Sopenharmony_ci		 * Ok, the dst pointer is now 32bit aligned, and we know
10062306a36Sopenharmony_ci		 * that we must have more than 4 bytes to copy.  Note
10162306a36Sopenharmony_ci		 * that C contains the carry from the dst alignment above.
10262306a36Sopenharmony_ci		 */
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci		tst	src, #3			@ Test source alignment
10562306a36Sopenharmony_ci		bne	.Lsrc_not_aligned
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci		/* Routine for src & dst aligned */
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci		bics	ip, len, #15
11062306a36Sopenharmony_ci		beq	2f
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci1:		load4l	r4, r5, r6, r7
11362306a36Sopenharmony_ci		stmia	dst!, {r4, r5, r6, r7}
11462306a36Sopenharmony_ci		adcs	sum, sum, r4
11562306a36Sopenharmony_ci		adcs	sum, sum, r5
11662306a36Sopenharmony_ci		adcs	sum, sum, r6
11762306a36Sopenharmony_ci		adcs	sum, sum, r7
11862306a36Sopenharmony_ci		sub	ip, ip, #16
11962306a36Sopenharmony_ci		teq	ip, #0
12062306a36Sopenharmony_ci		bne	1b
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci2:		ands	ip, len, #12
12362306a36Sopenharmony_ci		beq	4f
12462306a36Sopenharmony_ci		tst	ip, #8
12562306a36Sopenharmony_ci		beq	3f
12662306a36Sopenharmony_ci		load2l	r4, r5
12762306a36Sopenharmony_ci		stmia	dst!, {r4, r5}
12862306a36Sopenharmony_ci		adcs	sum, sum, r4
12962306a36Sopenharmony_ci		adcs	sum, sum, r5
13062306a36Sopenharmony_ci		tst	ip, #4
13162306a36Sopenharmony_ci		beq	4f
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci3:		load1l	r4
13462306a36Sopenharmony_ci		str	r4, [dst], #4
13562306a36Sopenharmony_ci		adcs	sum, sum, r4
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci4:		ands	len, len, #3
13862306a36Sopenharmony_ci		beq	.Ldone
13962306a36Sopenharmony_ci		load1l	r4
14062306a36Sopenharmony_ci		tst	len, #2
14162306a36Sopenharmony_ci		mov	r5, r4, get_byte_0
14262306a36Sopenharmony_ci		beq	.Lexit
14362306a36Sopenharmony_ci		adcs	sum, sum, r4, lspush #16
14462306a36Sopenharmony_ci		strb	r5, [dst], #1
14562306a36Sopenharmony_ci		mov	r5, r4, get_byte_1
14662306a36Sopenharmony_ci		strb	r5, [dst], #1
14762306a36Sopenharmony_ci		mov	r5, r4, get_byte_2
14862306a36Sopenharmony_ci.Lexit:		tst	len, #1
14962306a36Sopenharmony_ci		strbne	r5, [dst], #1
15062306a36Sopenharmony_ci		andne	r5, r5, #255
15162306a36Sopenharmony_ci		adcsne	sum, sum, r5, put_byte_0
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci		/*
15462306a36Sopenharmony_ci		 * If the dst pointer was not 16-bit aligned, we
15562306a36Sopenharmony_ci		 * need to rotate the checksum here to get around
15662306a36Sopenharmony_ci		 * the inefficient byte manipulations in the
15762306a36Sopenharmony_ci		 * architecture independent code.
15862306a36Sopenharmony_ci		 */
15962306a36Sopenharmony_ci.Ldone:		adc	r0, sum, #0
16062306a36Sopenharmony_ci		ldr	sum, [sp, #0]		@ dst
16162306a36Sopenharmony_ci		tst	sum, #1
16262306a36Sopenharmony_ci		movne	r0, r0, ror #8
16362306a36Sopenharmony_ci		load_regs
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci.Lsrc_not_aligned:
16662306a36Sopenharmony_ci		adc	sum, sum, #0		@ include C from dst alignment
16762306a36Sopenharmony_ci		and	ip, src, #3
16862306a36Sopenharmony_ci		bic	src, src, #3
16962306a36Sopenharmony_ci		load1l	r5
17062306a36Sopenharmony_ci		cmp	ip, #2
17162306a36Sopenharmony_ci		beq	.Lsrc2_aligned
17262306a36Sopenharmony_ci		bhi	.Lsrc3_aligned
17362306a36Sopenharmony_ci		mov	r4, r5, lspull #8		@ C = 0
17462306a36Sopenharmony_ci		bics	ip, len, #15
17562306a36Sopenharmony_ci		beq	2f
17662306a36Sopenharmony_ci1:		load4l	r5, r6, r7, r8
17762306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #24
17862306a36Sopenharmony_ci		mov	r5, r5, lspull #8
17962306a36Sopenharmony_ci		orr	r5, r5, r6, lspush #24
18062306a36Sopenharmony_ci		mov	r6, r6, lspull #8
18162306a36Sopenharmony_ci		orr	r6, r6, r7, lspush #24
18262306a36Sopenharmony_ci		mov	r7, r7, lspull #8
18362306a36Sopenharmony_ci		orr	r7, r7, r8, lspush #24
18462306a36Sopenharmony_ci		stmia	dst!, {r4, r5, r6, r7}
18562306a36Sopenharmony_ci		adcs	sum, sum, r4
18662306a36Sopenharmony_ci		adcs	sum, sum, r5
18762306a36Sopenharmony_ci		adcs	sum, sum, r6
18862306a36Sopenharmony_ci		adcs	sum, sum, r7
18962306a36Sopenharmony_ci		mov	r4, r8, lspull #8
19062306a36Sopenharmony_ci		sub	ip, ip, #16
19162306a36Sopenharmony_ci		teq	ip, #0
19262306a36Sopenharmony_ci		bne	1b
19362306a36Sopenharmony_ci2:		ands	ip, len, #12
19462306a36Sopenharmony_ci		beq	4f
19562306a36Sopenharmony_ci		tst	ip, #8
19662306a36Sopenharmony_ci		beq	3f
19762306a36Sopenharmony_ci		load2l	r5, r6
19862306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #24
19962306a36Sopenharmony_ci		mov	r5, r5, lspull #8
20062306a36Sopenharmony_ci		orr	r5, r5, r6, lspush #24
20162306a36Sopenharmony_ci		stmia	dst!, {r4, r5}
20262306a36Sopenharmony_ci		adcs	sum, sum, r4
20362306a36Sopenharmony_ci		adcs	sum, sum, r5
20462306a36Sopenharmony_ci		mov	r4, r6, lspull #8
20562306a36Sopenharmony_ci		tst	ip, #4
20662306a36Sopenharmony_ci		beq	4f
20762306a36Sopenharmony_ci3:		load1l	r5
20862306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #24
20962306a36Sopenharmony_ci		str	r4, [dst], #4
21062306a36Sopenharmony_ci		adcs	sum, sum, r4
21162306a36Sopenharmony_ci		mov	r4, r5, lspull #8
21262306a36Sopenharmony_ci4:		ands	len, len, #3
21362306a36Sopenharmony_ci		beq	.Ldone
21462306a36Sopenharmony_ci		mov	r5, r4, get_byte_0
21562306a36Sopenharmony_ci		tst	len, #2
21662306a36Sopenharmony_ci		beq	.Lexit
21762306a36Sopenharmony_ci		adcs	sum, sum, r4, lspush #16
21862306a36Sopenharmony_ci		strb	r5, [dst], #1
21962306a36Sopenharmony_ci		mov	r5, r4, get_byte_1
22062306a36Sopenharmony_ci		strb	r5, [dst], #1
22162306a36Sopenharmony_ci		mov	r5, r4, get_byte_2
22262306a36Sopenharmony_ci		b	.Lexit
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci.Lsrc2_aligned:	mov	r4, r5, lspull #16
22562306a36Sopenharmony_ci		adds	sum, sum, #0
22662306a36Sopenharmony_ci		bics	ip, len, #15
22762306a36Sopenharmony_ci		beq	2f
22862306a36Sopenharmony_ci1:		load4l	r5, r6, r7, r8
22962306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #16
23062306a36Sopenharmony_ci		mov	r5, r5, lspull #16
23162306a36Sopenharmony_ci		orr	r5, r5, r6, lspush #16
23262306a36Sopenharmony_ci		mov	r6, r6, lspull #16
23362306a36Sopenharmony_ci		orr	r6, r6, r7, lspush #16
23462306a36Sopenharmony_ci		mov	r7, r7, lspull #16
23562306a36Sopenharmony_ci		orr	r7, r7, r8, lspush #16
23662306a36Sopenharmony_ci		stmia	dst!, {r4, r5, r6, r7}
23762306a36Sopenharmony_ci		adcs	sum, sum, r4
23862306a36Sopenharmony_ci		adcs	sum, sum, r5
23962306a36Sopenharmony_ci		adcs	sum, sum, r6
24062306a36Sopenharmony_ci		adcs	sum, sum, r7
24162306a36Sopenharmony_ci		mov	r4, r8, lspull #16
24262306a36Sopenharmony_ci		sub	ip, ip, #16
24362306a36Sopenharmony_ci		teq	ip, #0
24462306a36Sopenharmony_ci		bne	1b
24562306a36Sopenharmony_ci2:		ands	ip, len, #12
24662306a36Sopenharmony_ci		beq	4f
24762306a36Sopenharmony_ci		tst	ip, #8
24862306a36Sopenharmony_ci		beq	3f
24962306a36Sopenharmony_ci		load2l	r5, r6
25062306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #16
25162306a36Sopenharmony_ci		mov	r5, r5, lspull #16
25262306a36Sopenharmony_ci		orr	r5, r5, r6, lspush #16
25362306a36Sopenharmony_ci		stmia	dst!, {r4, r5}
25462306a36Sopenharmony_ci		adcs	sum, sum, r4
25562306a36Sopenharmony_ci		adcs	sum, sum, r5
25662306a36Sopenharmony_ci		mov	r4, r6, lspull #16
25762306a36Sopenharmony_ci		tst	ip, #4
25862306a36Sopenharmony_ci		beq	4f
25962306a36Sopenharmony_ci3:		load1l	r5
26062306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #16
26162306a36Sopenharmony_ci		str	r4, [dst], #4
26262306a36Sopenharmony_ci		adcs	sum, sum, r4
26362306a36Sopenharmony_ci		mov	r4, r5, lspull #16
26462306a36Sopenharmony_ci4:		ands	len, len, #3
26562306a36Sopenharmony_ci		beq	.Ldone
26662306a36Sopenharmony_ci		mov	r5, r4, get_byte_0
26762306a36Sopenharmony_ci		tst	len, #2
26862306a36Sopenharmony_ci		beq	.Lexit
26962306a36Sopenharmony_ci		adcs	sum, sum, r4
27062306a36Sopenharmony_ci		strb	r5, [dst], #1
27162306a36Sopenharmony_ci		mov	r5, r4, get_byte_1
27262306a36Sopenharmony_ci		strb	r5, [dst], #1
27362306a36Sopenharmony_ci		tst	len, #1
27462306a36Sopenharmony_ci		beq	.Ldone
27562306a36Sopenharmony_ci		load1b	r5
27662306a36Sopenharmony_ci		b	.Lexit
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci.Lsrc3_aligned:	mov	r4, r5, lspull #24
27962306a36Sopenharmony_ci		adds	sum, sum, #0
28062306a36Sopenharmony_ci		bics	ip, len, #15
28162306a36Sopenharmony_ci		beq	2f
28262306a36Sopenharmony_ci1:		load4l	r5, r6, r7, r8
28362306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #8
28462306a36Sopenharmony_ci		mov	r5, r5, lspull #24
28562306a36Sopenharmony_ci		orr	r5, r5, r6, lspush #8
28662306a36Sopenharmony_ci		mov	r6, r6, lspull #24
28762306a36Sopenharmony_ci		orr	r6, r6, r7, lspush #8
28862306a36Sopenharmony_ci		mov	r7, r7, lspull #24
28962306a36Sopenharmony_ci		orr	r7, r7, r8, lspush #8
29062306a36Sopenharmony_ci		stmia	dst!, {r4, r5, r6, r7}
29162306a36Sopenharmony_ci		adcs	sum, sum, r4
29262306a36Sopenharmony_ci		adcs	sum, sum, r5
29362306a36Sopenharmony_ci		adcs	sum, sum, r6
29462306a36Sopenharmony_ci		adcs	sum, sum, r7
29562306a36Sopenharmony_ci		mov	r4, r8, lspull #24
29662306a36Sopenharmony_ci		sub	ip, ip, #16
29762306a36Sopenharmony_ci		teq	ip, #0
29862306a36Sopenharmony_ci		bne	1b
29962306a36Sopenharmony_ci2:		ands	ip, len, #12
30062306a36Sopenharmony_ci		beq	4f
30162306a36Sopenharmony_ci		tst	ip, #8
30262306a36Sopenharmony_ci		beq	3f
30362306a36Sopenharmony_ci		load2l	r5, r6
30462306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #8
30562306a36Sopenharmony_ci		mov	r5, r5, lspull #24
30662306a36Sopenharmony_ci		orr	r5, r5, r6, lspush #8
30762306a36Sopenharmony_ci		stmia	dst!, {r4, r5}
30862306a36Sopenharmony_ci		adcs	sum, sum, r4
30962306a36Sopenharmony_ci		adcs	sum, sum, r5
31062306a36Sopenharmony_ci		mov	r4, r6, lspull #24
31162306a36Sopenharmony_ci		tst	ip, #4
31262306a36Sopenharmony_ci		beq	4f
31362306a36Sopenharmony_ci3:		load1l	r5
31462306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #8
31562306a36Sopenharmony_ci		str	r4, [dst], #4
31662306a36Sopenharmony_ci		adcs	sum, sum, r4
31762306a36Sopenharmony_ci		mov	r4, r5, lspull #24
31862306a36Sopenharmony_ci4:		ands	len, len, #3
31962306a36Sopenharmony_ci		beq	.Ldone
32062306a36Sopenharmony_ci		mov	r5, r4, get_byte_0
32162306a36Sopenharmony_ci		tst	len, #2
32262306a36Sopenharmony_ci		beq	.Lexit
32362306a36Sopenharmony_ci		strb	r5, [dst], #1
32462306a36Sopenharmony_ci		adcs	sum, sum, r4
32562306a36Sopenharmony_ci		load1l	r4
32662306a36Sopenharmony_ci		mov	r5, r4, get_byte_0
32762306a36Sopenharmony_ci		strb	r5, [dst], #1
32862306a36Sopenharmony_ci		adcs	sum, sum, r4, lspush #24
32962306a36Sopenharmony_ci		mov	r5, r4, get_byte_1
33062306a36Sopenharmony_ci		b	.Lexit
33162306a36Sopenharmony_ciFN_EXIT
332