162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci#include <linux/linkage.h>
462306a36Sopenharmony_ci#include <asm/export.h>
562306a36Sopenharmony_ci
662306a36Sopenharmony_ciSYM_FUNC_START(memmove)
762306a36Sopenharmony_ci/*
862306a36Sopenharmony_ci * void *memmove(void *dest_in, const void *src_in, size_t n)
962306a36Sopenharmony_ci * -mregparm=3 passes these in registers:
1062306a36Sopenharmony_ci * dest_in: %eax
1162306a36Sopenharmony_ci * src_in: %edx
1262306a36Sopenharmony_ci * n: %ecx
1362306a36Sopenharmony_ci * See also: arch/x86/entry/calling.h for description of the calling convention.
1462306a36Sopenharmony_ci *
1562306a36Sopenharmony_ci * n can remain in %ecx, but for `rep movsl`, we'll need dest in %edi and src
1662306a36Sopenharmony_ci * in %esi.
1762306a36Sopenharmony_ci */
1862306a36Sopenharmony_ci.set dest_in, %eax
1962306a36Sopenharmony_ci.set dest, %edi
2062306a36Sopenharmony_ci.set src_in, %edx
2162306a36Sopenharmony_ci.set src, %esi
2262306a36Sopenharmony_ci.set n, %ecx
2362306a36Sopenharmony_ci.set tmp0, %edx
2462306a36Sopenharmony_ci.set tmp0w, %dx
2562306a36Sopenharmony_ci.set tmp1, %ebx
2662306a36Sopenharmony_ci.set tmp1w, %bx
2762306a36Sopenharmony_ci.set tmp2, %eax
2862306a36Sopenharmony_ci.set tmp3b, %cl
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/*
3162306a36Sopenharmony_ci * Save all callee-saved registers, because this function is going to clobber
3262306a36Sopenharmony_ci * all of them:
3362306a36Sopenharmony_ci */
3462306a36Sopenharmony_ci	pushl	%ebp
3562306a36Sopenharmony_ci	movl	%esp, %ebp	// set standard frame pointer
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	pushl	%ebx
3862306a36Sopenharmony_ci	pushl	%edi
3962306a36Sopenharmony_ci	pushl	%esi
4062306a36Sopenharmony_ci	pushl	%eax		// save 'dest_in' parameter [eax] as the return value
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	movl src_in, src
4362306a36Sopenharmony_ci	movl dest_in, dest
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	/* Handle more 16 bytes in loop */
4662306a36Sopenharmony_ci	cmpl	$0x10, n
4762306a36Sopenharmony_ci	jb	.Lmove_16B
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	/* Decide forward/backward copy mode */
5062306a36Sopenharmony_ci	cmpl	dest, src
5162306a36Sopenharmony_ci	jb	.Lbackwards_header
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	/*
5462306a36Sopenharmony_ci	 * movs instruction have many startup latency
5562306a36Sopenharmony_ci	 * so we handle small size by general register.
5662306a36Sopenharmony_ci	 */
5762306a36Sopenharmony_ci	cmpl	$680, n
5862306a36Sopenharmony_ci	jb	.Ltoo_small_forwards
5962306a36Sopenharmony_ci	/* movs instruction is only good for aligned case. */
6062306a36Sopenharmony_ci	movl	src, tmp0
6162306a36Sopenharmony_ci	xorl	dest, tmp0
6262306a36Sopenharmony_ci	andl	$0xff, tmp0
6362306a36Sopenharmony_ci	jz	.Lforward_movs
6462306a36Sopenharmony_ci.Ltoo_small_forwards:
6562306a36Sopenharmony_ci	subl	$0x10, n
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	/* We gobble 16 bytes forward in each loop. */
6862306a36Sopenharmony_ci.Lmove_16B_forwards_loop:
6962306a36Sopenharmony_ci	subl	$0x10, n
7062306a36Sopenharmony_ci	movl	0*4(src), tmp0
7162306a36Sopenharmony_ci	movl	1*4(src), tmp1
7262306a36Sopenharmony_ci	movl	tmp0, 0*4(dest)
7362306a36Sopenharmony_ci	movl	tmp1, 1*4(dest)
7462306a36Sopenharmony_ci	movl	2*4(src), tmp0
7562306a36Sopenharmony_ci	movl	3*4(src), tmp1
7662306a36Sopenharmony_ci	movl	tmp0, 2*4(dest)
7762306a36Sopenharmony_ci	movl	tmp1, 3*4(dest)
7862306a36Sopenharmony_ci	leal	0x10(src), src
7962306a36Sopenharmony_ci	leal	0x10(dest), dest
8062306a36Sopenharmony_ci	jae	.Lmove_16B_forwards_loop
8162306a36Sopenharmony_ci	addl	$0x10, n
8262306a36Sopenharmony_ci	jmp	.Lmove_16B
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	/* Handle data forward by movs. */
8562306a36Sopenharmony_ci.p2align 4
8662306a36Sopenharmony_ci.Lforward_movs:
8762306a36Sopenharmony_ci	movl	-4(src, n), tmp0
8862306a36Sopenharmony_ci	leal	-4(dest, n), tmp1
8962306a36Sopenharmony_ci	shrl	$2, n
9062306a36Sopenharmony_ci	rep	movsl
9162306a36Sopenharmony_ci	movl	tmp0, (tmp1)
9262306a36Sopenharmony_ci	jmp	.Ldone
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	/* Handle data backward by movs. */
9562306a36Sopenharmony_ci.p2align 4
9662306a36Sopenharmony_ci.Lbackwards_movs:
9762306a36Sopenharmony_ci	movl	(src), tmp0
9862306a36Sopenharmony_ci	movl	dest, tmp1
9962306a36Sopenharmony_ci	leal	-4(src, n), src
10062306a36Sopenharmony_ci	leal	-4(dest, n), dest
10162306a36Sopenharmony_ci	shrl	$2, n
10262306a36Sopenharmony_ci	std
10362306a36Sopenharmony_ci	rep	movsl
10462306a36Sopenharmony_ci	movl	tmp0,(tmp1)
10562306a36Sopenharmony_ci	cld
10662306a36Sopenharmony_ci	jmp	.Ldone
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	/* Start to prepare for backward copy. */
10962306a36Sopenharmony_ci.p2align 4
11062306a36Sopenharmony_ci.Lbackwards_header:
11162306a36Sopenharmony_ci	cmpl	$680, n
11262306a36Sopenharmony_ci	jb	.Ltoo_small_backwards
11362306a36Sopenharmony_ci	movl	src, tmp0
11462306a36Sopenharmony_ci	xorl	dest, tmp0
11562306a36Sopenharmony_ci	andl	$0xff, tmp0
11662306a36Sopenharmony_ci	jz	.Lbackwards_movs
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	/* Calculate copy position to tail. */
11962306a36Sopenharmony_ci.Ltoo_small_backwards:
12062306a36Sopenharmony_ci	addl	n, src
12162306a36Sopenharmony_ci	addl	n, dest
12262306a36Sopenharmony_ci	subl	$0x10, n
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	/* We gobble 16 bytes backward in each loop. */
12562306a36Sopenharmony_ci.Lmove_16B_backwards_loop:
12662306a36Sopenharmony_ci	subl	$0x10, n
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	movl	-1*4(src), tmp0
12962306a36Sopenharmony_ci	movl	-2*4(src), tmp1
13062306a36Sopenharmony_ci	movl	tmp0, -1*4(dest)
13162306a36Sopenharmony_ci	movl	tmp1, -2*4(dest)
13262306a36Sopenharmony_ci	movl	-3*4(src), tmp0
13362306a36Sopenharmony_ci	movl	-4*4(src), tmp1
13462306a36Sopenharmony_ci	movl	tmp0, -3*4(dest)
13562306a36Sopenharmony_ci	movl	tmp1, -4*4(dest)
13662306a36Sopenharmony_ci	leal	-0x10(src), src
13762306a36Sopenharmony_ci	leal	-0x10(dest), dest
13862306a36Sopenharmony_ci	jae	.Lmove_16B_backwards_loop
13962306a36Sopenharmony_ci	/* Calculate copy position to head. */
14062306a36Sopenharmony_ci	addl	$0x10, n
14162306a36Sopenharmony_ci	subl	n, src
14262306a36Sopenharmony_ci	subl	n, dest
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	/* Move data from 8 bytes to 15 bytes. */
14562306a36Sopenharmony_ci.p2align 4
14662306a36Sopenharmony_ci.Lmove_16B:
14762306a36Sopenharmony_ci	cmpl	$8, n
14862306a36Sopenharmony_ci	jb	.Lmove_8B
14962306a36Sopenharmony_ci	movl	0*4(src), tmp0
15062306a36Sopenharmony_ci	movl	1*4(src), tmp1
15162306a36Sopenharmony_ci	movl	-2*4(src, n), tmp2
15262306a36Sopenharmony_ci	movl	-1*4(src, n), src
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	movl	tmp0, 0*4(dest)
15562306a36Sopenharmony_ci	movl	tmp1, 1*4(dest)
15662306a36Sopenharmony_ci	movl	tmp2, -2*4(dest, n)
15762306a36Sopenharmony_ci	movl	src, -1*4(dest, n)
15862306a36Sopenharmony_ci	jmp	.Ldone
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	/* Move data from 4 bytes to 7 bytes. */
16162306a36Sopenharmony_ci.p2align 4
16262306a36Sopenharmony_ci.Lmove_8B:
16362306a36Sopenharmony_ci	cmpl	$4, n
16462306a36Sopenharmony_ci	jb	.Lmove_4B
16562306a36Sopenharmony_ci	movl	0*4(src), tmp0
16662306a36Sopenharmony_ci	movl	-1*4(src, n), tmp1
16762306a36Sopenharmony_ci	movl	tmp0, 0*4(dest)
16862306a36Sopenharmony_ci	movl	tmp1, -1*4(dest, n)
16962306a36Sopenharmony_ci	jmp	.Ldone
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	/* Move data from 2 bytes to 3 bytes. */
17262306a36Sopenharmony_ci.p2align 4
17362306a36Sopenharmony_ci.Lmove_4B:
17462306a36Sopenharmony_ci	cmpl	$2, n
17562306a36Sopenharmony_ci	jb	.Lmove_1B
17662306a36Sopenharmony_ci	movw	0*2(src), tmp0w
17762306a36Sopenharmony_ci	movw	-1*2(src, n), tmp1w
17862306a36Sopenharmony_ci	movw	tmp0w, 0*2(dest)
17962306a36Sopenharmony_ci	movw	tmp1w, -1*2(dest, n)
18062306a36Sopenharmony_ci	jmp	.Ldone
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	/* Move data for 1 byte. */
18362306a36Sopenharmony_ci.p2align 4
18462306a36Sopenharmony_ci.Lmove_1B:
18562306a36Sopenharmony_ci	cmpl	$1, n
18662306a36Sopenharmony_ci	jb	.Ldone
18762306a36Sopenharmony_ci	movb	(src), tmp3b
18862306a36Sopenharmony_ci	movb	tmp3b, (dest)
18962306a36Sopenharmony_ci.p2align 4
19062306a36Sopenharmony_ci.Ldone:
19162306a36Sopenharmony_ci	popl	dest_in	// restore 'dest_in' [eax] as the return value
19262306a36Sopenharmony_ci	/* Restore all callee-saved registers: */
19362306a36Sopenharmony_ci	popl	%esi
19462306a36Sopenharmony_ci	popl	%edi
19562306a36Sopenharmony_ci	popl	%ebx
19662306a36Sopenharmony_ci	popl	%ebp
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	RET
19962306a36Sopenharmony_ciSYM_FUNC_END(memmove)
20062306a36Sopenharmony_ciEXPORT_SYMBOL(memmove)
201