1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 4 * Copyright 2002 Andi Kleen, SuSE Labs. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9 #include <linux/linkage.h> 10 #include <asm/current.h> 11 #include <asm/asm-offsets.h> 12 #include <asm/thread_info.h> 13 #include <asm/cpufeatures.h> 14 #include <asm/alternative.h> 15 #include <asm/asm.h> 16 #include <asm/smap.h> 17 #include <asm/export.h> 18 #include <asm/trapnr.h> 19 20 .macro ALIGN_DESTINATION 21 /* check for bad alignment of destination */ 22 movl %edi,%ecx 23 andl $7,%ecx 24 jz 102f /* already aligned */ 25 subl $8,%ecx 26 negl %ecx 27 subl %ecx,%edx 28 100: movb (%rsi),%al 29 101: movb %al,(%rdi) 30 incq %rsi 31 incq %rdi 32 decl %ecx 33 jnz 100b 34 102: 35 .section .fixup,"ax" 36 103: addl %ecx,%edx /* ecx is zerorest also */ 37 jmp .Lcopy_user_handle_tail 38 .previous 39 40 _ASM_EXTABLE_CPY(100b, 103b) 41 _ASM_EXTABLE_CPY(101b, 103b) 42 .endm 43 44 /* 45 * copy_user_generic_unrolled - memory copy with exception handling. 46 * This version is for CPUs like P4 that don't have efficient micro 47 * code for rep movsq 48 * 49 * Input: 50 * rdi destination 51 * rsi source 52 * rdx count 53 * 54 * Output: 55 * eax uncopied bytes or 0 if successful. 56 */ 57 SYM_FUNC_START(copy_user_generic_unrolled) 58 ASM_STAC 59 cmpl $8,%edx 60 jb 20f /* less then 8 bytes, go to byte copy loop */ 61 ALIGN_DESTINATION 62 movl %edx,%ecx 63 andl $63,%edx 64 shrl $6,%ecx 65 jz .L_copy_short_string 66 1: movq (%rsi),%r8 67 2: movq 1*8(%rsi),%r9 68 3: movq 2*8(%rsi),%r10 69 4: movq 3*8(%rsi),%r11 70 5: movq %r8,(%rdi) 71 6: movq %r9,1*8(%rdi) 72 7: movq %r10,2*8(%rdi) 73 8: movq %r11,3*8(%rdi) 74 9: movq 4*8(%rsi),%r8 75 10: movq 5*8(%rsi),%r9 76 11: movq 6*8(%rsi),%r10 77 12: movq 7*8(%rsi),%r11 78 13: movq %r8,4*8(%rdi) 79 14: movq %r9,5*8(%rdi) 80 15: movq %r10,6*8(%rdi) 81 16: movq %r11,7*8(%rdi) 82 leaq 64(%rsi),%rsi 83 leaq 64(%rdi),%rdi 84 decl %ecx 85 jnz 1b 86 .L_copy_short_string: 87 movl %edx,%ecx 88 andl $7,%edx 89 shrl $3,%ecx 90 jz 20f 91 18: movq (%rsi),%r8 92 19: movq %r8,(%rdi) 93 leaq 8(%rsi),%rsi 94 leaq 8(%rdi),%rdi 95 decl %ecx 96 jnz 18b 97 20: andl %edx,%edx 98 jz 23f 99 movl %edx,%ecx 100 21: movb (%rsi),%al 101 22: movb %al,(%rdi) 102 incq %rsi 103 incq %rdi 104 decl %ecx 105 jnz 21b 106 23: xor %eax,%eax 107 ASM_CLAC 108 RET 109 110 .section .fixup,"ax" 111 30: shll $6,%ecx 112 addl %ecx,%edx 113 jmp 60f 114 40: leal (%rdx,%rcx,8),%edx 115 jmp 60f 116 50: movl %ecx,%edx 117 60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ 118 .previous 119 120 _ASM_EXTABLE_CPY(1b, 30b) 121 _ASM_EXTABLE_CPY(2b, 30b) 122 _ASM_EXTABLE_CPY(3b, 30b) 123 _ASM_EXTABLE_CPY(4b, 30b) 124 _ASM_EXTABLE_CPY(5b, 30b) 125 _ASM_EXTABLE_CPY(6b, 30b) 126 _ASM_EXTABLE_CPY(7b, 30b) 127 _ASM_EXTABLE_CPY(8b, 30b) 128 _ASM_EXTABLE_CPY(9b, 30b) 129 _ASM_EXTABLE_CPY(10b, 30b) 130 _ASM_EXTABLE_CPY(11b, 30b) 131 _ASM_EXTABLE_CPY(12b, 30b) 132 _ASM_EXTABLE_CPY(13b, 30b) 133 _ASM_EXTABLE_CPY(14b, 30b) 134 _ASM_EXTABLE_CPY(15b, 30b) 135 _ASM_EXTABLE_CPY(16b, 30b) 136 _ASM_EXTABLE_CPY(18b, 40b) 137 _ASM_EXTABLE_CPY(19b, 40b) 138 _ASM_EXTABLE_CPY(21b, 50b) 139 _ASM_EXTABLE_CPY(22b, 50b) 140 SYM_FUNC_END(copy_user_generic_unrolled) 141 EXPORT_SYMBOL(copy_user_generic_unrolled) 142 143 /* Some CPUs run faster using the string copy instructions. 144 * This is also a lot simpler. Use them when possible. 145 * 146 * Only 4GB of copy is supported. This shouldn't be a problem 147 * because the kernel normally only writes from/to page sized chunks 148 * even if user space passed a longer buffer. 149 * And more would be dangerous because both Intel and AMD have 150 * errata with rep movsq > 4GB. If someone feels the need to fix 151 * this please consider this. 152 * 153 * Input: 154 * rdi destination 155 * rsi source 156 * rdx count 157 * 158 * Output: 159 * eax uncopied bytes or 0 if successful. 160 */ 161 SYM_FUNC_START(copy_user_generic_string) 162 ASM_STAC 163 cmpl $8,%edx 164 jb 2f /* less than 8 bytes, go to byte copy loop */ 165 ALIGN_DESTINATION 166 movl %edx,%ecx 167 shrl $3,%ecx 168 andl $7,%edx 169 1: rep 170 movsq 171 2: movl %edx,%ecx 172 3: rep 173 movsb 174 xorl %eax,%eax 175 ASM_CLAC 176 RET 177 178 .section .fixup,"ax" 179 11: leal (%rdx,%rcx,8),%ecx 180 12: movl %ecx,%edx /* ecx is zerorest also */ 181 jmp .Lcopy_user_handle_tail 182 .previous 183 184 _ASM_EXTABLE_CPY(1b, 11b) 185 _ASM_EXTABLE_CPY(3b, 12b) 186 SYM_FUNC_END(copy_user_generic_string) 187 EXPORT_SYMBOL(copy_user_generic_string) 188 189 /* 190 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 191 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 192 * 193 * Input: 194 * rdi destination 195 * rsi source 196 * rdx count 197 * 198 * Output: 199 * eax uncopied bytes or 0 if successful. 200 */ 201 SYM_FUNC_START(copy_user_enhanced_fast_string) 202 ASM_STAC 203 cmpl $64,%edx 204 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ 205 movl %edx,%ecx 206 1: rep 207 movsb 208 xorl %eax,%eax 209 ASM_CLAC 210 RET 211 212 .section .fixup,"ax" 213 12: movl %ecx,%edx /* ecx is zerorest also */ 214 jmp .Lcopy_user_handle_tail 215 .previous 216 217 _ASM_EXTABLE_CPY(1b, 12b) 218 SYM_FUNC_END(copy_user_enhanced_fast_string) 219 EXPORT_SYMBOL(copy_user_enhanced_fast_string) 220 221 /* 222 * Try to copy last bytes and clear the rest if needed. 223 * Since protection fault in copy_from/to_user is not a normal situation, 224 * it is not necessary to optimize tail handling. 225 * Don't try to copy the tail if machine check happened 226 * 227 * Input: 228 * rdi destination 229 * rsi source 230 * rdx count 231 * 232 * Output: 233 * eax uncopied bytes or 0 if successful. 234 */ 235 SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) 236 movl %edx,%ecx 237 cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */ 238 je 3f 239 1: rep movsb 240 2: mov %ecx,%eax 241 ASM_CLAC 242 RET 243 244 /* 245 * Return zero to pretend that this copy succeeded. This 246 * is counter-intuitive, but needed to prevent the code 247 * in lib/iov_iter.c from retrying and running back into 248 * the poison cache line again. The machine check handler 249 * will ensure that a SIGBUS is sent to the task. 250 */ 251 3: xorl %eax,%eax 252 ASM_CLAC 253 RET 254 255 _ASM_EXTABLE_CPY(1b, 2b) 256 SYM_CODE_END(.Lcopy_user_handle_tail) 257 258 /* 259 * copy_user_nocache - Uncached memory copy with exception handling 260 * This will force destination out of cache for more performance. 261 * 262 * Note: Cached memory copy is used when destination or size is not 263 * naturally aligned. That is: 264 * - Require 8-byte alignment when size is 8 bytes or larger. 265 * - Require 4-byte alignment when size is 4 bytes. 266 */ 267 SYM_FUNC_START(__copy_user_nocache) 268 ASM_STAC 269 270 /* If size is less than 8 bytes, go to 4-byte copy */ 271 cmpl $8,%edx 272 jb .L_4b_nocache_copy_entry 273 274 /* If destination is not 8-byte aligned, "cache" copy to align it */ 275 ALIGN_DESTINATION 276 277 /* Set 4x8-byte copy count and remainder */ 278 movl %edx,%ecx 279 andl $63,%edx 280 shrl $6,%ecx 281 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 282 283 /* Perform 4x8-byte nocache loop-copy */ 284 .L_4x8b_nocache_copy_loop: 285 1: movq (%rsi),%r8 286 2: movq 1*8(%rsi),%r9 287 3: movq 2*8(%rsi),%r10 288 4: movq 3*8(%rsi),%r11 289 5: movnti %r8,(%rdi) 290 6: movnti %r9,1*8(%rdi) 291 7: movnti %r10,2*8(%rdi) 292 8: movnti %r11,3*8(%rdi) 293 9: movq 4*8(%rsi),%r8 294 10: movq 5*8(%rsi),%r9 295 11: movq 6*8(%rsi),%r10 296 12: movq 7*8(%rsi),%r11 297 13: movnti %r8,4*8(%rdi) 298 14: movnti %r9,5*8(%rdi) 299 15: movnti %r10,6*8(%rdi) 300 16: movnti %r11,7*8(%rdi) 301 leaq 64(%rsi),%rsi 302 leaq 64(%rdi),%rdi 303 decl %ecx 304 jnz .L_4x8b_nocache_copy_loop 305 306 /* Set 8-byte copy count and remainder */ 307 .L_8b_nocache_copy_entry: 308 movl %edx,%ecx 309 andl $7,%edx 310 shrl $3,%ecx 311 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 312 313 /* Perform 8-byte nocache loop-copy */ 314 .L_8b_nocache_copy_loop: 315 20: movq (%rsi),%r8 316 21: movnti %r8,(%rdi) 317 leaq 8(%rsi),%rsi 318 leaq 8(%rdi),%rdi 319 decl %ecx 320 jnz .L_8b_nocache_copy_loop 321 322 /* If no byte left, we're done */ 323 .L_4b_nocache_copy_entry: 324 andl %edx,%edx 325 jz .L_finish_copy 326 327 /* If destination is not 4-byte aligned, go to byte copy: */ 328 movl %edi,%ecx 329 andl $3,%ecx 330 jnz .L_1b_cache_copy_entry 331 332 /* Set 4-byte copy count (1 or 0) and remainder */ 333 movl %edx,%ecx 334 andl $3,%edx 335 shrl $2,%ecx 336 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 337 338 /* Perform 4-byte nocache copy: */ 339 30: movl (%rsi),%r8d 340 31: movnti %r8d,(%rdi) 341 leaq 4(%rsi),%rsi 342 leaq 4(%rdi),%rdi 343 344 /* If no bytes left, we're done: */ 345 andl %edx,%edx 346 jz .L_finish_copy 347 348 /* Perform byte "cache" loop-copy for the remainder */ 349 .L_1b_cache_copy_entry: 350 movl %edx,%ecx 351 .L_1b_cache_copy_loop: 352 40: movb (%rsi),%al 353 41: movb %al,(%rdi) 354 incq %rsi 355 incq %rdi 356 decl %ecx 357 jnz .L_1b_cache_copy_loop 358 359 /* Finished copying; fence the prior stores */ 360 .L_finish_copy: 361 xorl %eax,%eax 362 ASM_CLAC 363 sfence 364 RET 365 366 .section .fixup,"ax" 367 .L_fixup_4x8b_copy: 368 shll $6,%ecx 369 addl %ecx,%edx 370 jmp .L_fixup_handle_tail 371 .L_fixup_8b_copy: 372 lea (%rdx,%rcx,8),%rdx 373 jmp .L_fixup_handle_tail 374 .L_fixup_4b_copy: 375 lea (%rdx,%rcx,4),%rdx 376 jmp .L_fixup_handle_tail 377 .L_fixup_1b_copy: 378 movl %ecx,%edx 379 .L_fixup_handle_tail: 380 sfence 381 jmp .Lcopy_user_handle_tail 382 .previous 383 384 _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy) 385 _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy) 386 _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy) 387 _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy) 388 _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy) 389 _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy) 390 _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy) 391 _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy) 392 _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy) 393 _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy) 394 _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy) 395 _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy) 396 _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy) 397 _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy) 398 _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy) 399 _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy) 400 _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy) 401 _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy) 402 _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy) 403 _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) 404 _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) 405 _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) 406 SYM_FUNC_END(__copy_user_nocache) 407 EXPORT_SYMBOL(__copy_user_nocache) 408