1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #include <linux/linkage.h>
3 #include <asm/asm.h>
4 #include <asm/export.h>
5 
6 /*
7  * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
8  * recommended to use this when possible and we do use them by default.
9  * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
10  * Otherwise, use original.
11  */
12 
13 /*
14  * Zero a page.
15  * %rdi	- page
16  */
17 SYM_FUNC_START(clear_page_rep)
18 	movl $4096/8,%ecx
19 	xorl %eax,%eax
20 	rep stosq
21 	RET
22 SYM_FUNC_END(clear_page_rep)
23 EXPORT_SYMBOL_GPL(clear_page_rep)
24 
25 SYM_FUNC_START(clear_page_orig)
26 	xorl   %eax,%eax
27 	movl   $4096/64,%ecx
28 	.p2align 4
29 .Lloop:
30 	decl	%ecx
31 #define PUT(x) movq %rax,x*8(%rdi)
32 	movq %rax,(%rdi)
33 	PUT(1)
34 	PUT(2)
35 	PUT(3)
36 	PUT(4)
37 	PUT(5)
38 	PUT(6)
39 	PUT(7)
40 	leaq	64(%rdi),%rdi
41 	jnz	.Lloop
42 	nop
43 	RET
44 SYM_FUNC_END(clear_page_orig)
45 EXPORT_SYMBOL_GPL(clear_page_orig)
46 
47 SYM_FUNC_START(clear_page_erms)
48 	movl $4096,%ecx
49 	xorl %eax,%eax
50 	rep stosb
51 	RET
52 SYM_FUNC_END(clear_page_erms)
53 EXPORT_SYMBOL_GPL(clear_page_erms)
54 
55 /*
56  * Default clear user-space.
57  * Input:
58  * rdi destination
59  * rcx count
60  * rax is zero
61  *
62  * Output:
63  * rcx: uncleared bytes or 0 if successful.
64  */
65 SYM_FUNC_START(rep_stos_alternative)
66 	cmpq $64,%rcx
67 	jae .Lunrolled
68 
69 	cmp $8,%ecx
70 	jae .Lword
71 
72 	testl %ecx,%ecx
73 	je .Lexit
74 
75 .Lclear_user_tail:
76 0:	movb %al,(%rdi)
77 	inc %rdi
78 	dec %rcx
79 	jnz .Lclear_user_tail
80 .Lexit:
81 	RET
82 
83 	_ASM_EXTABLE_UA( 0b, .Lexit)
84 
85 .Lword:
86 1:	movq %rax,(%rdi)
87 	addq $8,%rdi
88 	sub $8,%ecx
89 	je .Lexit
90 	cmp $8,%ecx
91 	jae .Lword
92 	jmp .Lclear_user_tail
93 
94 	.p2align 4
95 .Lunrolled:
96 10:	movq %rax,(%rdi)
97 11:	movq %rax,8(%rdi)
98 12:	movq %rax,16(%rdi)
99 13:	movq %rax,24(%rdi)
100 14:	movq %rax,32(%rdi)
101 15:	movq %rax,40(%rdi)
102 16:	movq %rax,48(%rdi)
103 17:	movq %rax,56(%rdi)
104 	addq $64,%rdi
105 	subq $64,%rcx
106 	cmpq $64,%rcx
107 	jae .Lunrolled
108 	cmpl $8,%ecx
109 	jae .Lword
110 	testl %ecx,%ecx
111 	jne .Lclear_user_tail
112 	RET
113 
114 	/*
115 	 * If we take an exception on any of the
116 	 * word stores, we know that %rcx isn't zero,
117 	 * so we can just go to the tail clearing to
118 	 * get the exact count.
119 	 *
120 	 * The unrolled case might end up clearing
121 	 * some bytes twice. Don't care.
122 	 *
123 	 * We could use the value in %rdi to avoid
124 	 * a second fault on the exact count case,
125 	 * but do we really care? No.
126 	 *
127 	 * Finally, we could try to align %rdi at the
128 	 * top of the unrolling. But unaligned stores
129 	 * just aren't that common or expensive.
130 	 */
131 	_ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
132 	_ASM_EXTABLE_UA(10b, .Lclear_user_tail)
133 	_ASM_EXTABLE_UA(11b, .Lclear_user_tail)
134 	_ASM_EXTABLE_UA(12b, .Lclear_user_tail)
135 	_ASM_EXTABLE_UA(13b, .Lclear_user_tail)
136 	_ASM_EXTABLE_UA(14b, .Lclear_user_tail)
137 	_ASM_EXTABLE_UA(15b, .Lclear_user_tail)
138 	_ASM_EXTABLE_UA(16b, .Lclear_user_tail)
139 	_ASM_EXTABLE_UA(17b, .Lclear_user_tail)
140 SYM_FUNC_END(rep_stos_alternative)
141 EXPORT_SYMBOL(rep_stos_alternative)
142