1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Normally compiler builtins are used, but sometimes the compiler calls out
4  * of line code. Based on asm-i386/string.h.
5  *
6  * This assembly file is re-written from memmove_64.c file.
7  *	- Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
8  */
9 #include <linux/linkage.h>
10 #include <asm/cpufeatures.h>
11 #include <asm/alternative.h>
12 #include <asm/export.h>
13 
14 #undef memmove
15 
16 .section .noinstr.text, "ax"
17 
18 /*
19  * Implement memmove(). This can handle overlap between src and dst.
20  *
21  * Input:
22  * rdi: dest
23  * rsi: src
24  * rdx: count
25  *
26  * Output:
27  * rax: dest
28  */
29 SYM_FUNC_START(__memmove)
30 
31 	mov %rdi, %rax
32 
33 	/* Decide forward/backward copy mode */
34 	cmp %rdi, %rsi
35 	jge .Lmemmove_begin_forward
36 	mov %rsi, %r8
37 	add %rdx, %r8
38 	cmp %rdi, %r8
39 	jg 2f
40 
41 #define CHECK_LEN	cmp $0x20, %rdx; jb 1f
42 #define MEMMOVE_BYTES	movq %rdx, %rcx; rep movsb; RET
43 .Lmemmove_begin_forward:
44 	ALTERNATIVE_2 __stringify(CHECK_LEN), \
45 		      __stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \
46 		      __stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM
47 
48 	/*
49 	 * movsq instruction have many startup latency
50 	 * so we handle small size by general register.
51 	 */
52 	cmp  $680, %rdx
53 	jb	3f
54 	/*
55 	 * movsq instruction is only good for aligned case.
56 	 */
57 
58 	cmpb %dil, %sil
59 	je 4f
60 3:
61 	sub $0x20, %rdx
62 	/*
63 	 * We gobble 32 bytes forward in each loop.
64 	 */
65 5:
66 	sub $0x20, %rdx
67 	movq 0*8(%rsi), %r11
68 	movq 1*8(%rsi), %r10
69 	movq 2*8(%rsi), %r9
70 	movq 3*8(%rsi), %r8
71 	leaq 4*8(%rsi), %rsi
72 
73 	movq %r11, 0*8(%rdi)
74 	movq %r10, 1*8(%rdi)
75 	movq %r9, 2*8(%rdi)
76 	movq %r8, 3*8(%rdi)
77 	leaq 4*8(%rdi), %rdi
78 	jae 5b
79 	addq $0x20, %rdx
80 	jmp 1f
81 	/*
82 	 * Handle data forward by movsq.
83 	 */
84 	.p2align 4
85 4:
86 	movq %rdx, %rcx
87 	movq -8(%rsi, %rdx), %r11
88 	lea -8(%rdi, %rdx), %r10
89 	shrq $3, %rcx
90 	rep movsq
91 	movq %r11, (%r10)
92 	jmp 13f
93 .Lmemmove_end_forward:
94 
95 	/*
96 	 * Handle data backward by movsq.
97 	 */
98 	.p2align 4
99 7:
100 	movq %rdx, %rcx
101 	movq (%rsi), %r11
102 	movq %rdi, %r10
103 	leaq -8(%rsi, %rdx), %rsi
104 	leaq -8(%rdi, %rdx), %rdi
105 	shrq $3, %rcx
106 	std
107 	rep movsq
108 	cld
109 	movq %r11, (%r10)
110 	jmp 13f
111 
112 	/*
113 	 * Start to prepare for backward copy.
114 	 */
115 	.p2align 4
116 2:
117 	cmp $0x20, %rdx
118 	jb 1f
119 	cmp $680, %rdx
120 	jb 6f
121 	cmp %dil, %sil
122 	je 7b
123 6:
124 	/*
125 	 * Calculate copy position to tail.
126 	 */
127 	addq %rdx, %rsi
128 	addq %rdx, %rdi
129 	subq $0x20, %rdx
130 	/*
131 	 * We gobble 32 bytes backward in each loop.
132 	 */
133 8:
134 	subq $0x20, %rdx
135 	movq -1*8(%rsi), %r11
136 	movq -2*8(%rsi), %r10
137 	movq -3*8(%rsi), %r9
138 	movq -4*8(%rsi), %r8
139 	leaq -4*8(%rsi), %rsi
140 
141 	movq %r11, -1*8(%rdi)
142 	movq %r10, -2*8(%rdi)
143 	movq %r9, -3*8(%rdi)
144 	movq %r8, -4*8(%rdi)
145 	leaq -4*8(%rdi), %rdi
146 	jae 8b
147 	/*
148 	 * Calculate copy position to head.
149 	 */
150 	addq $0x20, %rdx
151 	subq %rdx, %rsi
152 	subq %rdx, %rdi
153 1:
154 	cmpq $16, %rdx
155 	jb 9f
156 	/*
157 	 * Move data from 16 bytes to 31 bytes.
158 	 */
159 	movq 0*8(%rsi), %r11
160 	movq 1*8(%rsi), %r10
161 	movq -2*8(%rsi, %rdx), %r9
162 	movq -1*8(%rsi, %rdx), %r8
163 	movq %r11, 0*8(%rdi)
164 	movq %r10, 1*8(%rdi)
165 	movq %r9, -2*8(%rdi, %rdx)
166 	movq %r8, -1*8(%rdi, %rdx)
167 	jmp 13f
168 	.p2align 4
169 9:
170 	cmpq $8, %rdx
171 	jb 10f
172 	/*
173 	 * Move data from 8 bytes to 15 bytes.
174 	 */
175 	movq 0*8(%rsi), %r11
176 	movq -1*8(%rsi, %rdx), %r10
177 	movq %r11, 0*8(%rdi)
178 	movq %r10, -1*8(%rdi, %rdx)
179 	jmp 13f
180 10:
181 	cmpq $4, %rdx
182 	jb 11f
183 	/*
184 	 * Move data from 4 bytes to 7 bytes.
185 	 */
186 	movl (%rsi), %r11d
187 	movl -4(%rsi, %rdx), %r10d
188 	movl %r11d, (%rdi)
189 	movl %r10d, -4(%rdi, %rdx)
190 	jmp 13f
191 11:
192 	cmp $2, %rdx
193 	jb 12f
194 	/*
195 	 * Move data from 2 bytes to 3 bytes.
196 	 */
197 	movw (%rsi), %r11w
198 	movw -2(%rsi, %rdx), %r10w
199 	movw %r11w, (%rdi)
200 	movw %r10w, -2(%rdi, %rdx)
201 	jmp 13f
202 12:
203 	cmp $1, %rdx
204 	jb 13f
205 	/*
206 	 * Move data for 1 byte.
207 	 */
208 	movb (%rsi), %r11b
209 	movb %r11b, (%rdi)
210 13:
211 	RET
212 SYM_FUNC_END(__memmove)
213 EXPORT_SYMBOL(__memmove)
214 
215 SYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove)
216 EXPORT_SYMBOL(memmove)
217