18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
48c2ecf20Sopenharmony_ci * Copyright 2002 Andi Kleen, SuSE Labs.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Functions to copy from and to user space.
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <linux/linkage.h>
108c2ecf20Sopenharmony_ci#include <asm/current.h>
118c2ecf20Sopenharmony_ci#include <asm/asm-offsets.h>
128c2ecf20Sopenharmony_ci#include <asm/thread_info.h>
138c2ecf20Sopenharmony_ci#include <asm/cpufeatures.h>
148c2ecf20Sopenharmony_ci#include <asm/alternative.h>
158c2ecf20Sopenharmony_ci#include <asm/asm.h>
168c2ecf20Sopenharmony_ci#include <asm/smap.h>
178c2ecf20Sopenharmony_ci#include <asm/export.h>
188c2ecf20Sopenharmony_ci#include <asm/trapnr.h>
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci.macro ALIGN_DESTINATION
218c2ecf20Sopenharmony_ci	/* check for bad alignment of destination */
228c2ecf20Sopenharmony_ci	movl %edi,%ecx
238c2ecf20Sopenharmony_ci	andl $7,%ecx
248c2ecf20Sopenharmony_ci	jz 102f				/* already aligned */
258c2ecf20Sopenharmony_ci	subl $8,%ecx
268c2ecf20Sopenharmony_ci	negl %ecx
278c2ecf20Sopenharmony_ci	subl %ecx,%edx
288c2ecf20Sopenharmony_ci100:	movb (%rsi),%al
298c2ecf20Sopenharmony_ci101:	movb %al,(%rdi)
308c2ecf20Sopenharmony_ci	incq %rsi
318c2ecf20Sopenharmony_ci	incq %rdi
328c2ecf20Sopenharmony_ci	decl %ecx
338c2ecf20Sopenharmony_ci	jnz 100b
348c2ecf20Sopenharmony_ci102:
358c2ecf20Sopenharmony_ci	.section .fixup,"ax"
368c2ecf20Sopenharmony_ci103:	addl %ecx,%edx			/* ecx is zerorest also */
378c2ecf20Sopenharmony_ci	jmp .Lcopy_user_handle_tail
388c2ecf20Sopenharmony_ci	.previous
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(100b, 103b)
418c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(101b, 103b)
428c2ecf20Sopenharmony_ci	.endm
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci/*
458c2ecf20Sopenharmony_ci * copy_user_generic_unrolled - memory copy with exception handling.
468c2ecf20Sopenharmony_ci * This version is for CPUs like P4 that don't have efficient micro
478c2ecf20Sopenharmony_ci * code for rep movsq
488c2ecf20Sopenharmony_ci *
498c2ecf20Sopenharmony_ci * Input:
508c2ecf20Sopenharmony_ci * rdi destination
518c2ecf20Sopenharmony_ci * rsi source
528c2ecf20Sopenharmony_ci * rdx count
538c2ecf20Sopenharmony_ci *
548c2ecf20Sopenharmony_ci * Output:
558c2ecf20Sopenharmony_ci * eax uncopied bytes or 0 if successful.
568c2ecf20Sopenharmony_ci */
578c2ecf20Sopenharmony_ciSYM_FUNC_START(copy_user_generic_unrolled)
588c2ecf20Sopenharmony_ci	ASM_STAC
598c2ecf20Sopenharmony_ci	cmpl $8,%edx
608c2ecf20Sopenharmony_ci	jb 20f		/* less then 8 bytes, go to byte copy loop */
618c2ecf20Sopenharmony_ci	ALIGN_DESTINATION
628c2ecf20Sopenharmony_ci	movl %edx,%ecx
638c2ecf20Sopenharmony_ci	andl $63,%edx
648c2ecf20Sopenharmony_ci	shrl $6,%ecx
658c2ecf20Sopenharmony_ci	jz .L_copy_short_string
668c2ecf20Sopenharmony_ci1:	movq (%rsi),%r8
678c2ecf20Sopenharmony_ci2:	movq 1*8(%rsi),%r9
688c2ecf20Sopenharmony_ci3:	movq 2*8(%rsi),%r10
698c2ecf20Sopenharmony_ci4:	movq 3*8(%rsi),%r11
708c2ecf20Sopenharmony_ci5:	movq %r8,(%rdi)
718c2ecf20Sopenharmony_ci6:	movq %r9,1*8(%rdi)
728c2ecf20Sopenharmony_ci7:	movq %r10,2*8(%rdi)
738c2ecf20Sopenharmony_ci8:	movq %r11,3*8(%rdi)
748c2ecf20Sopenharmony_ci9:	movq 4*8(%rsi),%r8
758c2ecf20Sopenharmony_ci10:	movq 5*8(%rsi),%r9
768c2ecf20Sopenharmony_ci11:	movq 6*8(%rsi),%r10
778c2ecf20Sopenharmony_ci12:	movq 7*8(%rsi),%r11
788c2ecf20Sopenharmony_ci13:	movq %r8,4*8(%rdi)
798c2ecf20Sopenharmony_ci14:	movq %r9,5*8(%rdi)
808c2ecf20Sopenharmony_ci15:	movq %r10,6*8(%rdi)
818c2ecf20Sopenharmony_ci16:	movq %r11,7*8(%rdi)
828c2ecf20Sopenharmony_ci	leaq 64(%rsi),%rsi
838c2ecf20Sopenharmony_ci	leaq 64(%rdi),%rdi
848c2ecf20Sopenharmony_ci	decl %ecx
858c2ecf20Sopenharmony_ci	jnz 1b
868c2ecf20Sopenharmony_ci.L_copy_short_string:
878c2ecf20Sopenharmony_ci	movl %edx,%ecx
888c2ecf20Sopenharmony_ci	andl $7,%edx
898c2ecf20Sopenharmony_ci	shrl $3,%ecx
908c2ecf20Sopenharmony_ci	jz 20f
918c2ecf20Sopenharmony_ci18:	movq (%rsi),%r8
928c2ecf20Sopenharmony_ci19:	movq %r8,(%rdi)
938c2ecf20Sopenharmony_ci	leaq 8(%rsi),%rsi
948c2ecf20Sopenharmony_ci	leaq 8(%rdi),%rdi
958c2ecf20Sopenharmony_ci	decl %ecx
968c2ecf20Sopenharmony_ci	jnz 18b
978c2ecf20Sopenharmony_ci20:	andl %edx,%edx
988c2ecf20Sopenharmony_ci	jz 23f
998c2ecf20Sopenharmony_ci	movl %edx,%ecx
1008c2ecf20Sopenharmony_ci21:	movb (%rsi),%al
1018c2ecf20Sopenharmony_ci22:	movb %al,(%rdi)
1028c2ecf20Sopenharmony_ci	incq %rsi
1038c2ecf20Sopenharmony_ci	incq %rdi
1048c2ecf20Sopenharmony_ci	decl %ecx
1058c2ecf20Sopenharmony_ci	jnz 21b
1068c2ecf20Sopenharmony_ci23:	xor %eax,%eax
1078c2ecf20Sopenharmony_ci	ASM_CLAC
1088c2ecf20Sopenharmony_ci	RET
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	.section .fixup,"ax"
1118c2ecf20Sopenharmony_ci30:	shll $6,%ecx
1128c2ecf20Sopenharmony_ci	addl %ecx,%edx
1138c2ecf20Sopenharmony_ci	jmp 60f
1148c2ecf20Sopenharmony_ci40:	leal (%rdx,%rcx,8),%edx
1158c2ecf20Sopenharmony_ci	jmp 60f
1168c2ecf20Sopenharmony_ci50:	movl %ecx,%edx
1178c2ecf20Sopenharmony_ci60:	jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
1188c2ecf20Sopenharmony_ci	.previous
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(1b, 30b)
1218c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(2b, 30b)
1228c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(3b, 30b)
1238c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(4b, 30b)
1248c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(5b, 30b)
1258c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(6b, 30b)
1268c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(7b, 30b)
1278c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(8b, 30b)
1288c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(9b, 30b)
1298c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(10b, 30b)
1308c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(11b, 30b)
1318c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(12b, 30b)
1328c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(13b, 30b)
1338c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(14b, 30b)
1348c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(15b, 30b)
1358c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(16b, 30b)
1368c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(18b, 40b)
1378c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(19b, 40b)
1388c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(21b, 50b)
1398c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(22b, 50b)
1408c2ecf20Sopenharmony_ciSYM_FUNC_END(copy_user_generic_unrolled)
1418c2ecf20Sopenharmony_ciEXPORT_SYMBOL(copy_user_generic_unrolled)
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci/* Some CPUs run faster using the string copy instructions.
1448c2ecf20Sopenharmony_ci * This is also a lot simpler. Use them when possible.
1458c2ecf20Sopenharmony_ci *
1468c2ecf20Sopenharmony_ci * Only 4GB of copy is supported. This shouldn't be a problem
1478c2ecf20Sopenharmony_ci * because the kernel normally only writes from/to page sized chunks
1488c2ecf20Sopenharmony_ci * even if user space passed a longer buffer.
1498c2ecf20Sopenharmony_ci * And more would be dangerous because both Intel and AMD have
1508c2ecf20Sopenharmony_ci * errata with rep movsq > 4GB. If someone feels the need to fix
1518c2ecf20Sopenharmony_ci * this please consider this.
1528c2ecf20Sopenharmony_ci *
1538c2ecf20Sopenharmony_ci * Input:
1548c2ecf20Sopenharmony_ci * rdi destination
1558c2ecf20Sopenharmony_ci * rsi source
1568c2ecf20Sopenharmony_ci * rdx count
1578c2ecf20Sopenharmony_ci *
1588c2ecf20Sopenharmony_ci * Output:
1598c2ecf20Sopenharmony_ci * eax uncopied bytes or 0 if successful.
1608c2ecf20Sopenharmony_ci */
1618c2ecf20Sopenharmony_ciSYM_FUNC_START(copy_user_generic_string)
1628c2ecf20Sopenharmony_ci	ASM_STAC
1638c2ecf20Sopenharmony_ci	cmpl $8,%edx
1648c2ecf20Sopenharmony_ci	jb 2f		/* less than 8 bytes, go to byte copy loop */
1658c2ecf20Sopenharmony_ci	ALIGN_DESTINATION
1668c2ecf20Sopenharmony_ci	movl %edx,%ecx
1678c2ecf20Sopenharmony_ci	shrl $3,%ecx
1688c2ecf20Sopenharmony_ci	andl $7,%edx
1698c2ecf20Sopenharmony_ci1:	rep
1708c2ecf20Sopenharmony_ci	movsq
1718c2ecf20Sopenharmony_ci2:	movl %edx,%ecx
1728c2ecf20Sopenharmony_ci3:	rep
1738c2ecf20Sopenharmony_ci	movsb
1748c2ecf20Sopenharmony_ci	xorl %eax,%eax
1758c2ecf20Sopenharmony_ci	ASM_CLAC
1768c2ecf20Sopenharmony_ci	RET
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	.section .fixup,"ax"
1798c2ecf20Sopenharmony_ci11:	leal (%rdx,%rcx,8),%ecx
1808c2ecf20Sopenharmony_ci12:	movl %ecx,%edx		/* ecx is zerorest also */
1818c2ecf20Sopenharmony_ci	jmp .Lcopy_user_handle_tail
1828c2ecf20Sopenharmony_ci	.previous
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(1b, 11b)
1858c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(3b, 12b)
1868c2ecf20Sopenharmony_ciSYM_FUNC_END(copy_user_generic_string)
1878c2ecf20Sopenharmony_ciEXPORT_SYMBOL(copy_user_generic_string)
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci/*
1908c2ecf20Sopenharmony_ci * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
1918c2ecf20Sopenharmony_ci * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
1928c2ecf20Sopenharmony_ci *
1938c2ecf20Sopenharmony_ci * Input:
1948c2ecf20Sopenharmony_ci * rdi destination
1958c2ecf20Sopenharmony_ci * rsi source
1968c2ecf20Sopenharmony_ci * rdx count
1978c2ecf20Sopenharmony_ci *
1988c2ecf20Sopenharmony_ci * Output:
1998c2ecf20Sopenharmony_ci * eax uncopied bytes or 0 if successful.
2008c2ecf20Sopenharmony_ci */
2018c2ecf20Sopenharmony_ciSYM_FUNC_START(copy_user_enhanced_fast_string)
2028c2ecf20Sopenharmony_ci	ASM_STAC
2038c2ecf20Sopenharmony_ci	cmpl $64,%edx
2048c2ecf20Sopenharmony_ci	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
2058c2ecf20Sopenharmony_ci	movl %edx,%ecx
2068c2ecf20Sopenharmony_ci1:	rep
2078c2ecf20Sopenharmony_ci	movsb
2088c2ecf20Sopenharmony_ci	xorl %eax,%eax
2098c2ecf20Sopenharmony_ci	ASM_CLAC
2108c2ecf20Sopenharmony_ci	RET
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	.section .fixup,"ax"
2138c2ecf20Sopenharmony_ci12:	movl %ecx,%edx		/* ecx is zerorest also */
2148c2ecf20Sopenharmony_ci	jmp .Lcopy_user_handle_tail
2158c2ecf20Sopenharmony_ci	.previous
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(1b, 12b)
2188c2ecf20Sopenharmony_ciSYM_FUNC_END(copy_user_enhanced_fast_string)
2198c2ecf20Sopenharmony_ciEXPORT_SYMBOL(copy_user_enhanced_fast_string)
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci/*
2228c2ecf20Sopenharmony_ci * Try to copy last bytes and clear the rest if needed.
2238c2ecf20Sopenharmony_ci * Since protection fault in copy_from/to_user is not a normal situation,
2248c2ecf20Sopenharmony_ci * it is not necessary to optimize tail handling.
2258c2ecf20Sopenharmony_ci * Don't try to copy the tail if machine check happened
2268c2ecf20Sopenharmony_ci *
2278c2ecf20Sopenharmony_ci * Input:
2288c2ecf20Sopenharmony_ci * rdi destination
2298c2ecf20Sopenharmony_ci * rsi source
2308c2ecf20Sopenharmony_ci * rdx count
2318c2ecf20Sopenharmony_ci *
2328c2ecf20Sopenharmony_ci * Output:
2338c2ecf20Sopenharmony_ci * eax uncopied bytes or 0 if successful.
2348c2ecf20Sopenharmony_ci */
2358c2ecf20Sopenharmony_ciSYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
2368c2ecf20Sopenharmony_ci	movl %edx,%ecx
2378c2ecf20Sopenharmony_ci	cmp $X86_TRAP_MC,%eax		/* check if X86_TRAP_MC */
2388c2ecf20Sopenharmony_ci	je 3f
2398c2ecf20Sopenharmony_ci1:	rep movsb
2408c2ecf20Sopenharmony_ci2:	mov %ecx,%eax
2418c2ecf20Sopenharmony_ci	ASM_CLAC
2428c2ecf20Sopenharmony_ci	RET
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	/*
2458c2ecf20Sopenharmony_ci	 * Return zero to pretend that this copy succeeded. This
2468c2ecf20Sopenharmony_ci	 * is counter-intuitive, but needed to prevent the code
2478c2ecf20Sopenharmony_ci	 * in lib/iov_iter.c from retrying and running back into
2488c2ecf20Sopenharmony_ci	 * the poison cache line again. The machine check handler
2498c2ecf20Sopenharmony_ci	 * will ensure that a SIGBUS is sent to the task.
2508c2ecf20Sopenharmony_ci	 */
2518c2ecf20Sopenharmony_ci3:	xorl %eax,%eax
2528c2ecf20Sopenharmony_ci	ASM_CLAC
2538c2ecf20Sopenharmony_ci	RET
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(1b, 2b)
2568c2ecf20Sopenharmony_ciSYM_CODE_END(.Lcopy_user_handle_tail)
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci/*
2598c2ecf20Sopenharmony_ci * copy_user_nocache - Uncached memory copy with exception handling
2608c2ecf20Sopenharmony_ci * This will force destination out of cache for more performance.
2618c2ecf20Sopenharmony_ci *
2628c2ecf20Sopenharmony_ci * Note: Cached memory copy is used when destination or size is not
2638c2ecf20Sopenharmony_ci * naturally aligned. That is:
2648c2ecf20Sopenharmony_ci *  - Require 8-byte alignment when size is 8 bytes or larger.
2658c2ecf20Sopenharmony_ci *  - Require 4-byte alignment when size is 4 bytes.
2668c2ecf20Sopenharmony_ci */
2678c2ecf20Sopenharmony_ciSYM_FUNC_START(__copy_user_nocache)
2688c2ecf20Sopenharmony_ci	ASM_STAC
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	/* If size is less than 8 bytes, go to 4-byte copy */
2718c2ecf20Sopenharmony_ci	cmpl $8,%edx
2728c2ecf20Sopenharmony_ci	jb .L_4b_nocache_copy_entry
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	/* If destination is not 8-byte aligned, "cache" copy to align it */
2758c2ecf20Sopenharmony_ci	ALIGN_DESTINATION
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci	/* Set 4x8-byte copy count and remainder */
2788c2ecf20Sopenharmony_ci	movl %edx,%ecx
2798c2ecf20Sopenharmony_ci	andl $63,%edx
2808c2ecf20Sopenharmony_ci	shrl $6,%ecx
2818c2ecf20Sopenharmony_ci	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	/* Perform 4x8-byte nocache loop-copy */
2848c2ecf20Sopenharmony_ci.L_4x8b_nocache_copy_loop:
2858c2ecf20Sopenharmony_ci1:	movq (%rsi),%r8
2868c2ecf20Sopenharmony_ci2:	movq 1*8(%rsi),%r9
2878c2ecf20Sopenharmony_ci3:	movq 2*8(%rsi),%r10
2888c2ecf20Sopenharmony_ci4:	movq 3*8(%rsi),%r11
2898c2ecf20Sopenharmony_ci5:	movnti %r8,(%rdi)
2908c2ecf20Sopenharmony_ci6:	movnti %r9,1*8(%rdi)
2918c2ecf20Sopenharmony_ci7:	movnti %r10,2*8(%rdi)
2928c2ecf20Sopenharmony_ci8:	movnti %r11,3*8(%rdi)
2938c2ecf20Sopenharmony_ci9:	movq 4*8(%rsi),%r8
2948c2ecf20Sopenharmony_ci10:	movq 5*8(%rsi),%r9
2958c2ecf20Sopenharmony_ci11:	movq 6*8(%rsi),%r10
2968c2ecf20Sopenharmony_ci12:	movq 7*8(%rsi),%r11
2978c2ecf20Sopenharmony_ci13:	movnti %r8,4*8(%rdi)
2988c2ecf20Sopenharmony_ci14:	movnti %r9,5*8(%rdi)
2998c2ecf20Sopenharmony_ci15:	movnti %r10,6*8(%rdi)
3008c2ecf20Sopenharmony_ci16:	movnti %r11,7*8(%rdi)
3018c2ecf20Sopenharmony_ci	leaq 64(%rsi),%rsi
3028c2ecf20Sopenharmony_ci	leaq 64(%rdi),%rdi
3038c2ecf20Sopenharmony_ci	decl %ecx
3048c2ecf20Sopenharmony_ci	jnz .L_4x8b_nocache_copy_loop
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	/* Set 8-byte copy count and remainder */
3078c2ecf20Sopenharmony_ci.L_8b_nocache_copy_entry:
3088c2ecf20Sopenharmony_ci	movl %edx,%ecx
3098c2ecf20Sopenharmony_ci	andl $7,%edx
3108c2ecf20Sopenharmony_ci	shrl $3,%ecx
3118c2ecf20Sopenharmony_ci	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	/* Perform 8-byte nocache loop-copy */
3148c2ecf20Sopenharmony_ci.L_8b_nocache_copy_loop:
3158c2ecf20Sopenharmony_ci20:	movq (%rsi),%r8
3168c2ecf20Sopenharmony_ci21:	movnti %r8,(%rdi)
3178c2ecf20Sopenharmony_ci	leaq 8(%rsi),%rsi
3188c2ecf20Sopenharmony_ci	leaq 8(%rdi),%rdi
3198c2ecf20Sopenharmony_ci	decl %ecx
3208c2ecf20Sopenharmony_ci	jnz .L_8b_nocache_copy_loop
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci	/* If no byte left, we're done */
3238c2ecf20Sopenharmony_ci.L_4b_nocache_copy_entry:
3248c2ecf20Sopenharmony_ci	andl %edx,%edx
3258c2ecf20Sopenharmony_ci	jz .L_finish_copy
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	/* If destination is not 4-byte aligned, go to byte copy: */
3288c2ecf20Sopenharmony_ci	movl %edi,%ecx
3298c2ecf20Sopenharmony_ci	andl $3,%ecx
3308c2ecf20Sopenharmony_ci	jnz .L_1b_cache_copy_entry
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci	/* Set 4-byte copy count (1 or 0) and remainder */
3338c2ecf20Sopenharmony_ci	movl %edx,%ecx
3348c2ecf20Sopenharmony_ci	andl $3,%edx
3358c2ecf20Sopenharmony_ci	shrl $2,%ecx
3368c2ecf20Sopenharmony_ci	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	/* Perform 4-byte nocache copy: */
3398c2ecf20Sopenharmony_ci30:	movl (%rsi),%r8d
3408c2ecf20Sopenharmony_ci31:	movnti %r8d,(%rdi)
3418c2ecf20Sopenharmony_ci	leaq 4(%rsi),%rsi
3428c2ecf20Sopenharmony_ci	leaq 4(%rdi),%rdi
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	/* If no bytes left, we're done: */
3458c2ecf20Sopenharmony_ci	andl %edx,%edx
3468c2ecf20Sopenharmony_ci	jz .L_finish_copy
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	/* Perform byte "cache" loop-copy for the remainder */
3498c2ecf20Sopenharmony_ci.L_1b_cache_copy_entry:
3508c2ecf20Sopenharmony_ci	movl %edx,%ecx
3518c2ecf20Sopenharmony_ci.L_1b_cache_copy_loop:
3528c2ecf20Sopenharmony_ci40:	movb (%rsi),%al
3538c2ecf20Sopenharmony_ci41:	movb %al,(%rdi)
3548c2ecf20Sopenharmony_ci	incq %rsi
3558c2ecf20Sopenharmony_ci	incq %rdi
3568c2ecf20Sopenharmony_ci	decl %ecx
3578c2ecf20Sopenharmony_ci	jnz .L_1b_cache_copy_loop
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci	/* Finished copying; fence the prior stores */
3608c2ecf20Sopenharmony_ci.L_finish_copy:
3618c2ecf20Sopenharmony_ci	xorl %eax,%eax
3628c2ecf20Sopenharmony_ci	ASM_CLAC
3638c2ecf20Sopenharmony_ci	sfence
3648c2ecf20Sopenharmony_ci	RET
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	.section .fixup,"ax"
3678c2ecf20Sopenharmony_ci.L_fixup_4x8b_copy:
3688c2ecf20Sopenharmony_ci	shll $6,%ecx
3698c2ecf20Sopenharmony_ci	addl %ecx,%edx
3708c2ecf20Sopenharmony_ci	jmp .L_fixup_handle_tail
3718c2ecf20Sopenharmony_ci.L_fixup_8b_copy:
3728c2ecf20Sopenharmony_ci	lea (%rdx,%rcx,8),%rdx
3738c2ecf20Sopenharmony_ci	jmp .L_fixup_handle_tail
3748c2ecf20Sopenharmony_ci.L_fixup_4b_copy:
3758c2ecf20Sopenharmony_ci	lea (%rdx,%rcx,4),%rdx
3768c2ecf20Sopenharmony_ci	jmp .L_fixup_handle_tail
3778c2ecf20Sopenharmony_ci.L_fixup_1b_copy:
3788c2ecf20Sopenharmony_ci	movl %ecx,%edx
3798c2ecf20Sopenharmony_ci.L_fixup_handle_tail:
3808c2ecf20Sopenharmony_ci	sfence
3818c2ecf20Sopenharmony_ci	jmp .Lcopy_user_handle_tail
3828c2ecf20Sopenharmony_ci	.previous
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
3858c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
3868c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
3878c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
3888c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
3898c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
3908c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
3918c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
3928c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
3938c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
3948c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
3958c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
3968c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
3978c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
3988c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
3998c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
4008c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
4018c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
4028c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
4038c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
4048c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
4058c2ecf20Sopenharmony_ci	_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
4068c2ecf20Sopenharmony_ciSYM_FUNC_END(__copy_user_nocache)
4078c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__copy_user_nocache)
408