1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
4  * Copyright 2002 Andi Kleen, SuSE Labs.
5  *
6  * Functions to copy from and to user space.
7  */
8 
9 #include <linux/linkage.h>
10 #include <asm/current.h>
11 #include <asm/asm-offsets.h>
12 #include <asm/thread_info.h>
13 #include <asm/cpufeatures.h>
14 #include <asm/alternative.h>
15 #include <asm/asm.h>
16 #include <asm/smap.h>
17 #include <asm/export.h>
18 #include <asm/trapnr.h>
19 
20 .macro ALIGN_DESTINATION
21 	/* check for bad alignment of destination */
22 	movl %edi,%ecx
23 	andl $7,%ecx
24 	jz 102f				/* already aligned */
25 	subl $8,%ecx
26 	negl %ecx
27 	subl %ecx,%edx
28 100:	movb (%rsi),%al
29 101:	movb %al,(%rdi)
30 	incq %rsi
31 	incq %rdi
32 	decl %ecx
33 	jnz 100b
34 102:
35 	.section .fixup,"ax"
36 103:	addl %ecx,%edx			/* ecx is zerorest also */
37 	jmp .Lcopy_user_handle_tail
38 	.previous
39 
40 	_ASM_EXTABLE_CPY(100b, 103b)
41 	_ASM_EXTABLE_CPY(101b, 103b)
42 	.endm
43 
44 /*
45  * copy_user_generic_unrolled - memory copy with exception handling.
46  * This version is for CPUs like P4 that don't have efficient micro
47  * code for rep movsq
48  *
49  * Input:
50  * rdi destination
51  * rsi source
52  * rdx count
53  *
54  * Output:
55  * eax uncopied bytes or 0 if successful.
56  */
57 SYM_FUNC_START(copy_user_generic_unrolled)
58 	ASM_STAC
59 	cmpl $8,%edx
60 	jb 20f		/* less then 8 bytes, go to byte copy loop */
61 	ALIGN_DESTINATION
62 	movl %edx,%ecx
63 	andl $63,%edx
64 	shrl $6,%ecx
65 	jz .L_copy_short_string
66 1:	movq (%rsi),%r8
67 2:	movq 1*8(%rsi),%r9
68 3:	movq 2*8(%rsi),%r10
69 4:	movq 3*8(%rsi),%r11
70 5:	movq %r8,(%rdi)
71 6:	movq %r9,1*8(%rdi)
72 7:	movq %r10,2*8(%rdi)
73 8:	movq %r11,3*8(%rdi)
74 9:	movq 4*8(%rsi),%r8
75 10:	movq 5*8(%rsi),%r9
76 11:	movq 6*8(%rsi),%r10
77 12:	movq 7*8(%rsi),%r11
78 13:	movq %r8,4*8(%rdi)
79 14:	movq %r9,5*8(%rdi)
80 15:	movq %r10,6*8(%rdi)
81 16:	movq %r11,7*8(%rdi)
82 	leaq 64(%rsi),%rsi
83 	leaq 64(%rdi),%rdi
84 	decl %ecx
85 	jnz 1b
86 .L_copy_short_string:
87 	movl %edx,%ecx
88 	andl $7,%edx
89 	shrl $3,%ecx
90 	jz 20f
91 18:	movq (%rsi),%r8
92 19:	movq %r8,(%rdi)
93 	leaq 8(%rsi),%rsi
94 	leaq 8(%rdi),%rdi
95 	decl %ecx
96 	jnz 18b
97 20:	andl %edx,%edx
98 	jz 23f
99 	movl %edx,%ecx
100 21:	movb (%rsi),%al
101 22:	movb %al,(%rdi)
102 	incq %rsi
103 	incq %rdi
104 	decl %ecx
105 	jnz 21b
106 23:	xor %eax,%eax
107 	ASM_CLAC
108 	RET
109 
110 	.section .fixup,"ax"
111 30:	shll $6,%ecx
112 	addl %ecx,%edx
113 	jmp 60f
114 40:	leal (%rdx,%rcx,8),%edx
115 	jmp 60f
116 50:	movl %ecx,%edx
117 60:	jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
118 	.previous
119 
120 	_ASM_EXTABLE_CPY(1b, 30b)
121 	_ASM_EXTABLE_CPY(2b, 30b)
122 	_ASM_EXTABLE_CPY(3b, 30b)
123 	_ASM_EXTABLE_CPY(4b, 30b)
124 	_ASM_EXTABLE_CPY(5b, 30b)
125 	_ASM_EXTABLE_CPY(6b, 30b)
126 	_ASM_EXTABLE_CPY(7b, 30b)
127 	_ASM_EXTABLE_CPY(8b, 30b)
128 	_ASM_EXTABLE_CPY(9b, 30b)
129 	_ASM_EXTABLE_CPY(10b, 30b)
130 	_ASM_EXTABLE_CPY(11b, 30b)
131 	_ASM_EXTABLE_CPY(12b, 30b)
132 	_ASM_EXTABLE_CPY(13b, 30b)
133 	_ASM_EXTABLE_CPY(14b, 30b)
134 	_ASM_EXTABLE_CPY(15b, 30b)
135 	_ASM_EXTABLE_CPY(16b, 30b)
136 	_ASM_EXTABLE_CPY(18b, 40b)
137 	_ASM_EXTABLE_CPY(19b, 40b)
138 	_ASM_EXTABLE_CPY(21b, 50b)
139 	_ASM_EXTABLE_CPY(22b, 50b)
140 SYM_FUNC_END(copy_user_generic_unrolled)
141 EXPORT_SYMBOL(copy_user_generic_unrolled)
142 
143 /* Some CPUs run faster using the string copy instructions.
144  * This is also a lot simpler. Use them when possible.
145  *
146  * Only 4GB of copy is supported. This shouldn't be a problem
147  * because the kernel normally only writes from/to page sized chunks
148  * even if user space passed a longer buffer.
149  * And more would be dangerous because both Intel and AMD have
150  * errata with rep movsq > 4GB. If someone feels the need to fix
151  * this please consider this.
152  *
153  * Input:
154  * rdi destination
155  * rsi source
156  * rdx count
157  *
158  * Output:
159  * eax uncopied bytes or 0 if successful.
160  */
161 SYM_FUNC_START(copy_user_generic_string)
162 	ASM_STAC
163 	cmpl $8,%edx
164 	jb 2f		/* less than 8 bytes, go to byte copy loop */
165 	ALIGN_DESTINATION
166 	movl %edx,%ecx
167 	shrl $3,%ecx
168 	andl $7,%edx
169 1:	rep
170 	movsq
171 2:	movl %edx,%ecx
172 3:	rep
173 	movsb
174 	xorl %eax,%eax
175 	ASM_CLAC
176 	RET
177 
178 	.section .fixup,"ax"
179 11:	leal (%rdx,%rcx,8),%ecx
180 12:	movl %ecx,%edx		/* ecx is zerorest also */
181 	jmp .Lcopy_user_handle_tail
182 	.previous
183 
184 	_ASM_EXTABLE_CPY(1b, 11b)
185 	_ASM_EXTABLE_CPY(3b, 12b)
186 SYM_FUNC_END(copy_user_generic_string)
187 EXPORT_SYMBOL(copy_user_generic_string)
188 
189 /*
190  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
191  * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
192  *
193  * Input:
194  * rdi destination
195  * rsi source
196  * rdx count
197  *
198  * Output:
199  * eax uncopied bytes or 0 if successful.
200  */
201 SYM_FUNC_START(copy_user_enhanced_fast_string)
202 	ASM_STAC
203 	cmpl $64,%edx
204 	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
205 	movl %edx,%ecx
206 1:	rep
207 	movsb
208 	xorl %eax,%eax
209 	ASM_CLAC
210 	RET
211 
212 	.section .fixup,"ax"
213 12:	movl %ecx,%edx		/* ecx is zerorest also */
214 	jmp .Lcopy_user_handle_tail
215 	.previous
216 
217 	_ASM_EXTABLE_CPY(1b, 12b)
218 SYM_FUNC_END(copy_user_enhanced_fast_string)
219 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
220 
221 /*
222  * Try to copy last bytes and clear the rest if needed.
223  * Since protection fault in copy_from/to_user is not a normal situation,
224  * it is not necessary to optimize tail handling.
225  * Don't try to copy the tail if machine check happened
226  *
227  * Input:
228  * rdi destination
229  * rsi source
230  * rdx count
231  *
232  * Output:
233  * eax uncopied bytes or 0 if successful.
234  */
235 SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
236 	movl %edx,%ecx
237 	cmp $X86_TRAP_MC,%eax		/* check if X86_TRAP_MC */
238 	je 3f
239 1:	rep movsb
240 2:	mov %ecx,%eax
241 	ASM_CLAC
242 	RET
243 
244 	/*
245 	 * Return zero to pretend that this copy succeeded. This
246 	 * is counter-intuitive, but needed to prevent the code
247 	 * in lib/iov_iter.c from retrying and running back into
248 	 * the poison cache line again. The machine check handler
249 	 * will ensure that a SIGBUS is sent to the task.
250 	 */
251 3:	xorl %eax,%eax
252 	ASM_CLAC
253 	RET
254 
255 	_ASM_EXTABLE_CPY(1b, 2b)
256 SYM_CODE_END(.Lcopy_user_handle_tail)
257 
258 /*
259  * copy_user_nocache - Uncached memory copy with exception handling
260  * This will force destination out of cache for more performance.
261  *
262  * Note: Cached memory copy is used when destination or size is not
263  * naturally aligned. That is:
264  *  - Require 8-byte alignment when size is 8 bytes or larger.
265  *  - Require 4-byte alignment when size is 4 bytes.
266  */
267 SYM_FUNC_START(__copy_user_nocache)
268 	ASM_STAC
269 
270 	/* If size is less than 8 bytes, go to 4-byte copy */
271 	cmpl $8,%edx
272 	jb .L_4b_nocache_copy_entry
273 
274 	/* If destination is not 8-byte aligned, "cache" copy to align it */
275 	ALIGN_DESTINATION
276 
277 	/* Set 4x8-byte copy count and remainder */
278 	movl %edx,%ecx
279 	andl $63,%edx
280 	shrl $6,%ecx
281 	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
282 
283 	/* Perform 4x8-byte nocache loop-copy */
284 .L_4x8b_nocache_copy_loop:
285 1:	movq (%rsi),%r8
286 2:	movq 1*8(%rsi),%r9
287 3:	movq 2*8(%rsi),%r10
288 4:	movq 3*8(%rsi),%r11
289 5:	movnti %r8,(%rdi)
290 6:	movnti %r9,1*8(%rdi)
291 7:	movnti %r10,2*8(%rdi)
292 8:	movnti %r11,3*8(%rdi)
293 9:	movq 4*8(%rsi),%r8
294 10:	movq 5*8(%rsi),%r9
295 11:	movq 6*8(%rsi),%r10
296 12:	movq 7*8(%rsi),%r11
297 13:	movnti %r8,4*8(%rdi)
298 14:	movnti %r9,5*8(%rdi)
299 15:	movnti %r10,6*8(%rdi)
300 16:	movnti %r11,7*8(%rdi)
301 	leaq 64(%rsi),%rsi
302 	leaq 64(%rdi),%rdi
303 	decl %ecx
304 	jnz .L_4x8b_nocache_copy_loop
305 
306 	/* Set 8-byte copy count and remainder */
307 .L_8b_nocache_copy_entry:
308 	movl %edx,%ecx
309 	andl $7,%edx
310 	shrl $3,%ecx
311 	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
312 
313 	/* Perform 8-byte nocache loop-copy */
314 .L_8b_nocache_copy_loop:
315 20:	movq (%rsi),%r8
316 21:	movnti %r8,(%rdi)
317 	leaq 8(%rsi),%rsi
318 	leaq 8(%rdi),%rdi
319 	decl %ecx
320 	jnz .L_8b_nocache_copy_loop
321 
322 	/* If no byte left, we're done */
323 .L_4b_nocache_copy_entry:
324 	andl %edx,%edx
325 	jz .L_finish_copy
326 
327 	/* If destination is not 4-byte aligned, go to byte copy: */
328 	movl %edi,%ecx
329 	andl $3,%ecx
330 	jnz .L_1b_cache_copy_entry
331 
332 	/* Set 4-byte copy count (1 or 0) and remainder */
333 	movl %edx,%ecx
334 	andl $3,%edx
335 	shrl $2,%ecx
336 	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
337 
338 	/* Perform 4-byte nocache copy: */
339 30:	movl (%rsi),%r8d
340 31:	movnti %r8d,(%rdi)
341 	leaq 4(%rsi),%rsi
342 	leaq 4(%rdi),%rdi
343 
344 	/* If no bytes left, we're done: */
345 	andl %edx,%edx
346 	jz .L_finish_copy
347 
348 	/* Perform byte "cache" loop-copy for the remainder */
349 .L_1b_cache_copy_entry:
350 	movl %edx,%ecx
351 .L_1b_cache_copy_loop:
352 40:	movb (%rsi),%al
353 41:	movb %al,(%rdi)
354 	incq %rsi
355 	incq %rdi
356 	decl %ecx
357 	jnz .L_1b_cache_copy_loop
358 
359 	/* Finished copying; fence the prior stores */
360 .L_finish_copy:
361 	xorl %eax,%eax
362 	ASM_CLAC
363 	sfence
364 	RET
365 
366 	.section .fixup,"ax"
367 .L_fixup_4x8b_copy:
368 	shll $6,%ecx
369 	addl %ecx,%edx
370 	jmp .L_fixup_handle_tail
371 .L_fixup_8b_copy:
372 	lea (%rdx,%rcx,8),%rdx
373 	jmp .L_fixup_handle_tail
374 .L_fixup_4b_copy:
375 	lea (%rdx,%rcx,4),%rdx
376 	jmp .L_fixup_handle_tail
377 .L_fixup_1b_copy:
378 	movl %ecx,%edx
379 .L_fixup_handle_tail:
380 	sfence
381 	jmp .Lcopy_user_handle_tail
382 	.previous
383 
384 	_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
385 	_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
386 	_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
387 	_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
388 	_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
389 	_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
390 	_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
391 	_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
392 	_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
393 	_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
394 	_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
395 	_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
396 	_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
397 	_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
398 	_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
399 	_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
400 	_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
401 	_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
402 	_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
403 	_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
404 	_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
405 	_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
406 SYM_FUNC_END(__copy_user_nocache)
407 EXPORT_SYMBOL(__copy_user_nocache)
408