162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci#include <linux/linkage.h>
362306a36Sopenharmony_ci#include <asm/asm.h>
462306a36Sopenharmony_ci#include <asm/export.h>
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci/*
762306a36Sopenharmony_ci * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
862306a36Sopenharmony_ci * recommended to use this when possible and we do use them by default.
962306a36Sopenharmony_ci * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
1062306a36Sopenharmony_ci * Otherwise, use original.
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci/*
1462306a36Sopenharmony_ci * Zero a page.
1562306a36Sopenharmony_ci * %rdi	- page
1662306a36Sopenharmony_ci */
1762306a36Sopenharmony_ciSYM_FUNC_START(clear_page_rep)
1862306a36Sopenharmony_ci	movl $4096/8,%ecx
1962306a36Sopenharmony_ci	xorl %eax,%eax
2062306a36Sopenharmony_ci	rep stosq
2162306a36Sopenharmony_ci	RET
2262306a36Sopenharmony_ciSYM_FUNC_END(clear_page_rep)
2362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(clear_page_rep)
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ciSYM_FUNC_START(clear_page_orig)
2662306a36Sopenharmony_ci	xorl   %eax,%eax
2762306a36Sopenharmony_ci	movl   $4096/64,%ecx
2862306a36Sopenharmony_ci	.p2align 4
2962306a36Sopenharmony_ci.Lloop:
3062306a36Sopenharmony_ci	decl	%ecx
3162306a36Sopenharmony_ci#define PUT(x) movq %rax,x*8(%rdi)
3262306a36Sopenharmony_ci	movq %rax,(%rdi)
3362306a36Sopenharmony_ci	PUT(1)
3462306a36Sopenharmony_ci	PUT(2)
3562306a36Sopenharmony_ci	PUT(3)
3662306a36Sopenharmony_ci	PUT(4)
3762306a36Sopenharmony_ci	PUT(5)
3862306a36Sopenharmony_ci	PUT(6)
3962306a36Sopenharmony_ci	PUT(7)
4062306a36Sopenharmony_ci	leaq	64(%rdi),%rdi
4162306a36Sopenharmony_ci	jnz	.Lloop
4262306a36Sopenharmony_ci	nop
4362306a36Sopenharmony_ci	RET
4462306a36Sopenharmony_ciSYM_FUNC_END(clear_page_orig)
4562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(clear_page_orig)
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ciSYM_FUNC_START(clear_page_erms)
4862306a36Sopenharmony_ci	movl $4096,%ecx
4962306a36Sopenharmony_ci	xorl %eax,%eax
5062306a36Sopenharmony_ci	rep stosb
5162306a36Sopenharmony_ci	RET
5262306a36Sopenharmony_ciSYM_FUNC_END(clear_page_erms)
5362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(clear_page_erms)
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/*
5662306a36Sopenharmony_ci * Default clear user-space.
5762306a36Sopenharmony_ci * Input:
5862306a36Sopenharmony_ci * rdi destination
5962306a36Sopenharmony_ci * rcx count
6062306a36Sopenharmony_ci * rax is zero
6162306a36Sopenharmony_ci *
6262306a36Sopenharmony_ci * Output:
6362306a36Sopenharmony_ci * rcx: uncleared bytes or 0 if successful.
6462306a36Sopenharmony_ci */
6562306a36Sopenharmony_ciSYM_FUNC_START(rep_stos_alternative)
6662306a36Sopenharmony_ci	cmpq $64,%rcx
6762306a36Sopenharmony_ci	jae .Lunrolled
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	cmp $8,%ecx
7062306a36Sopenharmony_ci	jae .Lword
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	testl %ecx,%ecx
7362306a36Sopenharmony_ci	je .Lexit
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci.Lclear_user_tail:
7662306a36Sopenharmony_ci0:	movb %al,(%rdi)
7762306a36Sopenharmony_ci	inc %rdi
7862306a36Sopenharmony_ci	dec %rcx
7962306a36Sopenharmony_ci	jnz .Lclear_user_tail
8062306a36Sopenharmony_ci.Lexit:
8162306a36Sopenharmony_ci	RET
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	_ASM_EXTABLE_UA( 0b, .Lexit)
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci.Lword:
8662306a36Sopenharmony_ci1:	movq %rax,(%rdi)
8762306a36Sopenharmony_ci	addq $8,%rdi
8862306a36Sopenharmony_ci	sub $8,%ecx
8962306a36Sopenharmony_ci	je .Lexit
9062306a36Sopenharmony_ci	cmp $8,%ecx
9162306a36Sopenharmony_ci	jae .Lword
9262306a36Sopenharmony_ci	jmp .Lclear_user_tail
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	.p2align 4
9562306a36Sopenharmony_ci.Lunrolled:
9662306a36Sopenharmony_ci10:	movq %rax,(%rdi)
9762306a36Sopenharmony_ci11:	movq %rax,8(%rdi)
9862306a36Sopenharmony_ci12:	movq %rax,16(%rdi)
9962306a36Sopenharmony_ci13:	movq %rax,24(%rdi)
10062306a36Sopenharmony_ci14:	movq %rax,32(%rdi)
10162306a36Sopenharmony_ci15:	movq %rax,40(%rdi)
10262306a36Sopenharmony_ci16:	movq %rax,48(%rdi)
10362306a36Sopenharmony_ci17:	movq %rax,56(%rdi)
10462306a36Sopenharmony_ci	addq $64,%rdi
10562306a36Sopenharmony_ci	subq $64,%rcx
10662306a36Sopenharmony_ci	cmpq $64,%rcx
10762306a36Sopenharmony_ci	jae .Lunrolled
10862306a36Sopenharmony_ci	cmpl $8,%ecx
10962306a36Sopenharmony_ci	jae .Lword
11062306a36Sopenharmony_ci	testl %ecx,%ecx
11162306a36Sopenharmony_ci	jne .Lclear_user_tail
11262306a36Sopenharmony_ci	RET
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	/*
11562306a36Sopenharmony_ci	 * If we take an exception on any of the
11662306a36Sopenharmony_ci	 * word stores, we know that %rcx isn't zero,
11762306a36Sopenharmony_ci	 * so we can just go to the tail clearing to
11862306a36Sopenharmony_ci	 * get the exact count.
11962306a36Sopenharmony_ci	 *
12062306a36Sopenharmony_ci	 * The unrolled case might end up clearing
12162306a36Sopenharmony_ci	 * some bytes twice. Don't care.
12262306a36Sopenharmony_ci	 *
12362306a36Sopenharmony_ci	 * We could use the value in %rdi to avoid
12462306a36Sopenharmony_ci	 * a second fault on the exact count case,
12562306a36Sopenharmony_ci	 * but do we really care? No.
12662306a36Sopenharmony_ci	 *
12762306a36Sopenharmony_ci	 * Finally, we could try to align %rdi at the
12862306a36Sopenharmony_ci	 * top of the unrolling. But unaligned stores
12962306a36Sopenharmony_ci	 * just aren't that common or expensive.
13062306a36Sopenharmony_ci	 */
13162306a36Sopenharmony_ci	_ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
13262306a36Sopenharmony_ci	_ASM_EXTABLE_UA(10b, .Lclear_user_tail)
13362306a36Sopenharmony_ci	_ASM_EXTABLE_UA(11b, .Lclear_user_tail)
13462306a36Sopenharmony_ci	_ASM_EXTABLE_UA(12b, .Lclear_user_tail)
13562306a36Sopenharmony_ci	_ASM_EXTABLE_UA(13b, .Lclear_user_tail)
13662306a36Sopenharmony_ci	_ASM_EXTABLE_UA(14b, .Lclear_user_tail)
13762306a36Sopenharmony_ci	_ASM_EXTABLE_UA(15b, .Lclear_user_tail)
13862306a36Sopenharmony_ci	_ASM_EXTABLE_UA(16b, .Lclear_user_tail)
13962306a36Sopenharmony_ci	_ASM_EXTABLE_UA(17b, .Lclear_user_tail)
14062306a36Sopenharmony_ciSYM_FUNC_END(rep_stos_alternative)
14162306a36Sopenharmony_ciEXPORT_SYMBOL(rep_stos_alternative)
142