162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  linux/arch/arm/lib/copy_template.s
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *  Code template for optimized memory copy functions
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *  Author:	Nicolas Pitre
862306a36Sopenharmony_ci *  Created:	Sep 28, 2005
962306a36Sopenharmony_ci *  Copyright:	MontaVista Software, Inc.
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci/*
1362306a36Sopenharmony_ci * Theory of operation
1462306a36Sopenharmony_ci * -------------------
1562306a36Sopenharmony_ci *
1662306a36Sopenharmony_ci * This file provides the core code for a forward memory copy used in
1762306a36Sopenharmony_ci * the implementation of memcopy(), copy_to_user() and copy_from_user().
1862306a36Sopenharmony_ci *
1962306a36Sopenharmony_ci * The including file must define the following accessor macros
2062306a36Sopenharmony_ci * according to the need of the given function:
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci * ldr1w ptr reg abort
2362306a36Sopenharmony_ci *
2462306a36Sopenharmony_ci *	This loads one word from 'ptr', stores it in 'reg' and increments
2562306a36Sopenharmony_ci *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
2662306a36Sopenharmony_ci *
2762306a36Sopenharmony_ci * ldr4w ptr reg1 reg2 reg3 reg4 abort
2862306a36Sopenharmony_ci * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
2962306a36Sopenharmony_ci *
3062306a36Sopenharmony_ci *	This loads four or eight words starting from 'ptr', stores them
3162306a36Sopenharmony_ci *	in provided registers and increments 'ptr' past those words.
3262306a36Sopenharmony_ci *	The'abort' argument is used for fixup tables.
3362306a36Sopenharmony_ci *
3462306a36Sopenharmony_ci * ldr1b ptr reg cond abort
3562306a36Sopenharmony_ci *
3662306a36Sopenharmony_ci *	Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
3762306a36Sopenharmony_ci *	It also must apply the condition code if provided, otherwise the
3862306a36Sopenharmony_ci *	"al" condition is assumed by default.
3962306a36Sopenharmony_ci *
4062306a36Sopenharmony_ci * str1w ptr reg abort
4162306a36Sopenharmony_ci * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
4262306a36Sopenharmony_ci * str1b ptr reg cond abort
4362306a36Sopenharmony_ci *
4462306a36Sopenharmony_ci *	Same as their ldr* counterparts, but data is stored to 'ptr' location
4562306a36Sopenharmony_ci *	rather than being loaded.
4662306a36Sopenharmony_ci *
4762306a36Sopenharmony_ci * enter reg1 reg2
4862306a36Sopenharmony_ci *
4962306a36Sopenharmony_ci *	Preserve the provided registers on the stack plus any additional
5062306a36Sopenharmony_ci *	data as needed by the implementation including this code. Called
5162306a36Sopenharmony_ci *	upon code entry.
5262306a36Sopenharmony_ci *
5362306a36Sopenharmony_ci * usave reg1 reg2
5462306a36Sopenharmony_ci *
5562306a36Sopenharmony_ci *	Unwind annotation macro is corresponding for 'enter' macro.
5662306a36Sopenharmony_ci *	It tell unwinder that preserved some provided registers on the stack
5762306a36Sopenharmony_ci *	and additional data by a prior 'enter' macro.
5862306a36Sopenharmony_ci *
5962306a36Sopenharmony_ci * exit reg1 reg2
6062306a36Sopenharmony_ci *
6162306a36Sopenharmony_ci *	Restore registers with the values previously saved with the
6262306a36Sopenharmony_ci *	'preserv' macro. Called upon code termination.
6362306a36Sopenharmony_ci *
6462306a36Sopenharmony_ci * LDR1W_SHIFT
6562306a36Sopenharmony_ci * STR1W_SHIFT
6662306a36Sopenharmony_ci *
6762306a36Sopenharmony_ci *	Correction to be applied to the "ip" register when branching into
6862306a36Sopenharmony_ci *	the ldr1w or str1w instructions (some of these macros may expand to
6962306a36Sopenharmony_ci *	than one 32bit instruction in Thumb-2)
7062306a36Sopenharmony_ci */
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	UNWIND(	.fnstart			)
7362306a36Sopenharmony_ci		enter	r4, UNWIND(fpreg,) lr
7462306a36Sopenharmony_ci	UNWIND(	.setfp	fpreg, sp		)
7562306a36Sopenharmony_ci	UNWIND(	mov	fpreg, sp		)
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci		subs	r2, r2, #4
7862306a36Sopenharmony_ci		blt	8f
7962306a36Sopenharmony_ci		ands	ip, r0, #3
8062306a36Sopenharmony_ci	PLD(	pld	[r1, #0]		)
8162306a36Sopenharmony_ci		bne	9f
8262306a36Sopenharmony_ci		ands	ip, r1, #3
8362306a36Sopenharmony_ci		bne	10f
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci1:		subs	r2, r2, #(28)
8662306a36Sopenharmony_ci		stmfd	sp!, {r5, r6, r8, r9}
8762306a36Sopenharmony_ci		blt	5f
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	CALGN(	ands	ip, r0, #31		)
9062306a36Sopenharmony_ci	CALGN(	rsb	r3, ip, #32		)
9162306a36Sopenharmony_ci	CALGN(	sbcsne	r4, r3, r2		)  @ C is always set here
9262306a36Sopenharmony_ci	CALGN(	bcs	2f			)
9362306a36Sopenharmony_ci	CALGN(	adr	r4, 6f			)
9462306a36Sopenharmony_ci	CALGN(	subs	r2, r2, r3		)  @ C gets set
9562306a36Sopenharmony_ci	CALGN(	add	pc, r4, ip		)
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	PLD(	pld	[r1, #0]		)
9862306a36Sopenharmony_ci2:	PLD(	subs	r2, r2, #96		)
9962306a36Sopenharmony_ci	PLD(	pld	[r1, #28]		)
10062306a36Sopenharmony_ci	PLD(	blt	4f			)
10162306a36Sopenharmony_ci	PLD(	pld	[r1, #60]		)
10262306a36Sopenharmony_ci	PLD(	pld	[r1, #92]		)
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci3:	PLD(	pld	[r1, #124]		)
10562306a36Sopenharmony_ci4:		ldr8w	r1, r3, r4, r5, r6, r8, r9, ip, lr, abort=20f
10662306a36Sopenharmony_ci		subs	r2, r2, #32
10762306a36Sopenharmony_ci		str8w	r0, r3, r4, r5, r6, r8, r9, ip, lr, abort=20f
10862306a36Sopenharmony_ci		bge	3b
10962306a36Sopenharmony_ci	PLD(	cmn	r2, #96			)
11062306a36Sopenharmony_ci	PLD(	bge	4b			)
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci5:		ands	ip, r2, #28
11362306a36Sopenharmony_ci		rsb	ip, ip, #32
11462306a36Sopenharmony_ci#if LDR1W_SHIFT > 0
11562306a36Sopenharmony_ci		lsl	ip, ip, #LDR1W_SHIFT
11662306a36Sopenharmony_ci#endif
11762306a36Sopenharmony_ci		addne	pc, pc, ip		@ C is always clear here
11862306a36Sopenharmony_ci		b	7f
11962306a36Sopenharmony_ci6:
12062306a36Sopenharmony_ci		.rept	(1 << LDR1W_SHIFT)
12162306a36Sopenharmony_ci		W(nop)
12262306a36Sopenharmony_ci		.endr
12362306a36Sopenharmony_ci		ldr1w	r1, r3, abort=20f
12462306a36Sopenharmony_ci		ldr1w	r1, r4, abort=20f
12562306a36Sopenharmony_ci		ldr1w	r1, r5, abort=20f
12662306a36Sopenharmony_ci		ldr1w	r1, r6, abort=20f
12762306a36Sopenharmony_ci		ldr1w	r1, r8, abort=20f
12862306a36Sopenharmony_ci		ldr1w	r1, r9, abort=20f
12962306a36Sopenharmony_ci		ldr1w	r1, lr, abort=20f
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci#if LDR1W_SHIFT < STR1W_SHIFT
13262306a36Sopenharmony_ci		lsl	ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
13362306a36Sopenharmony_ci#elif LDR1W_SHIFT > STR1W_SHIFT
13462306a36Sopenharmony_ci		lsr	ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
13562306a36Sopenharmony_ci#endif
13662306a36Sopenharmony_ci		add	pc, pc, ip
13762306a36Sopenharmony_ci		nop
13862306a36Sopenharmony_ci		.rept	(1 << STR1W_SHIFT)
13962306a36Sopenharmony_ci		W(nop)
14062306a36Sopenharmony_ci		.endr
14162306a36Sopenharmony_ci		str1w	r0, r3, abort=20f
14262306a36Sopenharmony_ci		str1w	r0, r4, abort=20f
14362306a36Sopenharmony_ci		str1w	r0, r5, abort=20f
14462306a36Sopenharmony_ci		str1w	r0, r6, abort=20f
14562306a36Sopenharmony_ci		str1w	r0, r8, abort=20f
14662306a36Sopenharmony_ci		str1w	r0, r9, abort=20f
14762306a36Sopenharmony_ci		str1w	r0, lr, abort=20f
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	CALGN(	bcs	2b			)
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci7:		ldmfd	sp!, {r5, r6, r8, r9}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci8:		movs	r2, r2, lsl #31
15462306a36Sopenharmony_ci		ldr1b	r1, r3, ne, abort=21f
15562306a36Sopenharmony_ci		ldr1b	r1, r4, cs, abort=21f
15662306a36Sopenharmony_ci		ldr1b	r1, ip, cs, abort=21f
15762306a36Sopenharmony_ci		str1b	r0, r3, ne, abort=21f
15862306a36Sopenharmony_ci		str1b	r0, r4, cs, abort=21f
15962306a36Sopenharmony_ci		str1b	r0, ip, cs, abort=21f
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci		exit	r4, UNWIND(fpreg,) pc
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci9:		rsb	ip, ip, #4
16462306a36Sopenharmony_ci		cmp	ip, #2
16562306a36Sopenharmony_ci		ldr1b	r1, r3, gt, abort=21f
16662306a36Sopenharmony_ci		ldr1b	r1, r4, ge, abort=21f
16762306a36Sopenharmony_ci		ldr1b	r1, lr, abort=21f
16862306a36Sopenharmony_ci		str1b	r0, r3, gt, abort=21f
16962306a36Sopenharmony_ci		str1b	r0, r4, ge, abort=21f
17062306a36Sopenharmony_ci		subs	r2, r2, ip
17162306a36Sopenharmony_ci		str1b	r0, lr, abort=21f
17262306a36Sopenharmony_ci		blt	8b
17362306a36Sopenharmony_ci		ands	ip, r1, #3
17462306a36Sopenharmony_ci		beq	1b
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci10:		bic	r1, r1, #3
17762306a36Sopenharmony_ci		cmp	ip, #2
17862306a36Sopenharmony_ci		ldr1w	r1, lr, abort=21f
17962306a36Sopenharmony_ci		beq	17f
18062306a36Sopenharmony_ci		bgt	18f
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci		.macro	forward_copy_shift pull push
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci		subs	r2, r2, #28
18662306a36Sopenharmony_ci		blt	14f
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	CALGN(	ands	ip, r0, #31		)
18962306a36Sopenharmony_ci	CALGN(	rsb	ip, ip, #32		)
19062306a36Sopenharmony_ci	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
19162306a36Sopenharmony_ci	CALGN(	subcc	r2, r2, ip		)
19262306a36Sopenharmony_ci	CALGN(	bcc	15f			)
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci11:		stmfd	sp!, {r5, r6, r8 - r10}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	PLD(	pld	[r1, #0]		)
19762306a36Sopenharmony_ci	PLD(	subs	r2, r2, #96		)
19862306a36Sopenharmony_ci	PLD(	pld	[r1, #28]		)
19962306a36Sopenharmony_ci	PLD(	blt	13f			)
20062306a36Sopenharmony_ci	PLD(	pld	[r1, #60]		)
20162306a36Sopenharmony_ci	PLD(	pld	[r1, #92]		)
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci12:	PLD(	pld	[r1, #124]		)
20462306a36Sopenharmony_ci13:		ldr4w	r1, r4, r5, r6, r8, abort=19f
20562306a36Sopenharmony_ci		mov	r3, lr, lspull #\pull
20662306a36Sopenharmony_ci		subs	r2, r2, #32
20762306a36Sopenharmony_ci		ldr4w	r1, r9, r10, ip, lr, abort=19f
20862306a36Sopenharmony_ci		orr	r3, r3, r4, lspush #\push
20962306a36Sopenharmony_ci		mov	r4, r4, lspull #\pull
21062306a36Sopenharmony_ci		orr	r4, r4, r5, lspush #\push
21162306a36Sopenharmony_ci		mov	r5, r5, lspull #\pull
21262306a36Sopenharmony_ci		orr	r5, r5, r6, lspush #\push
21362306a36Sopenharmony_ci		mov	r6, r6, lspull #\pull
21462306a36Sopenharmony_ci		orr	r6, r6, r8, lspush #\push
21562306a36Sopenharmony_ci		mov	r8, r8, lspull #\pull
21662306a36Sopenharmony_ci		orr	r8, r8, r9, lspush #\push
21762306a36Sopenharmony_ci		mov	r9, r9, lspull #\pull
21862306a36Sopenharmony_ci		orr	r9, r9, r10, lspush #\push
21962306a36Sopenharmony_ci		mov	r10, r10, lspull #\pull
22062306a36Sopenharmony_ci		orr	r10, r10, ip, lspush #\push
22162306a36Sopenharmony_ci		mov	ip, ip, lspull #\pull
22262306a36Sopenharmony_ci		orr	ip, ip, lr, lspush #\push
22362306a36Sopenharmony_ci		str8w	r0, r3, r4, r5, r6, r8, r9, r10, ip, abort=19f
22462306a36Sopenharmony_ci		bge	12b
22562306a36Sopenharmony_ci	PLD(	cmn	r2, #96			)
22662306a36Sopenharmony_ci	PLD(	bge	13b			)
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci		ldmfd	sp!, {r5, r6, r8 - r10}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci14:		ands	ip, r2, #28
23162306a36Sopenharmony_ci		beq	16f
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci15:		mov	r3, lr, lspull #\pull
23462306a36Sopenharmony_ci		ldr1w	r1, lr, abort=21f
23562306a36Sopenharmony_ci		subs	ip, ip, #4
23662306a36Sopenharmony_ci		orr	r3, r3, lr, lspush #\push
23762306a36Sopenharmony_ci		str1w	r0, r3, abort=21f
23862306a36Sopenharmony_ci		bgt	15b
23962306a36Sopenharmony_ci	CALGN(	cmp	r2, #0			)
24062306a36Sopenharmony_ci	CALGN(	bge	11b			)
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci16:		sub	r1, r1, #(\push / 8)
24362306a36Sopenharmony_ci		b	8b
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci		.endm
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci		forward_copy_shift	pull=8	push=24
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci17:		forward_copy_shift	pull=16	push=16
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci18:		forward_copy_shift	pull=24	push=8
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	UNWIND(	.fnend				)
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci/*
25762306a36Sopenharmony_ci * Abort preamble and completion macros.
25862306a36Sopenharmony_ci * If a fixup handler is required then those macros must surround it.
25962306a36Sopenharmony_ci * It is assumed that the fixup code will handle the private part of
26062306a36Sopenharmony_ci * the exit macro.
26162306a36Sopenharmony_ci */
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	.macro	copy_abort_preamble
26462306a36Sopenharmony_ci19:	ldmfd	sp!, {r5, r6, r8 - r10}
26562306a36Sopenharmony_ci	b	21f
26662306a36Sopenharmony_ci20:	ldmfd	sp!, {r5, r6, r8, r9}
26762306a36Sopenharmony_ci21:
26862306a36Sopenharmony_ci	.endm
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	.macro	copy_abort_end
27162306a36Sopenharmony_ci	ldmfd	sp!, {r4, UNWIND(fpreg,) pc}
27262306a36Sopenharmony_ci	.endm
27362306a36Sopenharmony_ci
274