162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2023 Linus Torvalds <torvalds@linux-foundation.org> 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/linkage.h> 762306a36Sopenharmony_ci#include <asm/asm.h> 862306a36Sopenharmony_ci#include <asm/export.h> 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci/* 1162306a36Sopenharmony_ci * copy_user_nocache - Uncached memory copy with exception handling 1262306a36Sopenharmony_ci * 1362306a36Sopenharmony_ci * This copies from user space into kernel space, but the kernel 1462306a36Sopenharmony_ci * space accesses can take a machine check exception, so they too 1562306a36Sopenharmony_ci * need exception handling. 1662306a36Sopenharmony_ci * 1762306a36Sopenharmony_ci * Note: only 32-bit and 64-bit stores have non-temporal versions, 1862306a36Sopenharmony_ci * and we only use aligned versions. Any unaligned parts at the 1962306a36Sopenharmony_ci * start or end of the copy will be done using normal cached stores. 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * Input: 2262306a36Sopenharmony_ci * rdi destination 2362306a36Sopenharmony_ci * rsi source 2462306a36Sopenharmony_ci * edx count 2562306a36Sopenharmony_ci * 2662306a36Sopenharmony_ci * Output: 2762306a36Sopenharmony_ci * rax uncopied bytes or 0 if successful. 2862306a36Sopenharmony_ci */ 2962306a36Sopenharmony_ciSYM_FUNC_START(__copy_user_nocache) 3062306a36Sopenharmony_ci /* If destination is not 7-byte aligned, we'll have to align it */ 3162306a36Sopenharmony_ci testb $7,%dil 3262306a36Sopenharmony_ci jne .Lalign 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci.Lis_aligned: 3562306a36Sopenharmony_ci cmp $64,%edx 3662306a36Sopenharmony_ci jb .Lquadwords 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci .p2align 4,0x90 3962306a36Sopenharmony_ci.Lunrolled: 4062306a36Sopenharmony_ci10: movq (%rsi),%r8 4162306a36Sopenharmony_ci11: movq 8(%rsi),%r9 4262306a36Sopenharmony_ci12: movq 16(%rsi),%r10 4362306a36Sopenharmony_ci13: movq 24(%rsi),%r11 4462306a36Sopenharmony_ci20: movnti %r8,(%rdi) 4562306a36Sopenharmony_ci21: movnti %r9,8(%rdi) 4662306a36Sopenharmony_ci22: movnti %r10,16(%rdi) 4762306a36Sopenharmony_ci23: movnti %r11,24(%rdi) 4862306a36Sopenharmony_ci30: movq 32(%rsi),%r8 4962306a36Sopenharmony_ci31: movq 40(%rsi),%r9 5062306a36Sopenharmony_ci32: movq 48(%rsi),%r10 5162306a36Sopenharmony_ci33: movq 56(%rsi),%r11 5262306a36Sopenharmony_ci40: movnti %r8,32(%rdi) 5362306a36Sopenharmony_ci41: movnti %r9,40(%rdi) 5462306a36Sopenharmony_ci42: movnti %r10,48(%rdi) 5562306a36Sopenharmony_ci43: movnti %r11,56(%rdi) 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci addq $64,%rsi 5862306a36Sopenharmony_ci addq $64,%rdi 5962306a36Sopenharmony_ci sub $64,%edx 6062306a36Sopenharmony_ci cmp $64,%edx 6162306a36Sopenharmony_ci jae .Lunrolled 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci/* 6462306a36Sopenharmony_ci * First set of user mode loads have been done 6562306a36Sopenharmony_ci * without any stores, so if they fail, we can 6662306a36Sopenharmony_ci * just try the non-unrolled loop. 6762306a36Sopenharmony_ci */ 6862306a36Sopenharmony_ci_ASM_EXTABLE_UA(10b, .Lquadwords) 6962306a36Sopenharmony_ci_ASM_EXTABLE_UA(11b, .Lquadwords) 7062306a36Sopenharmony_ci_ASM_EXTABLE_UA(12b, .Lquadwords) 7162306a36Sopenharmony_ci_ASM_EXTABLE_UA(13b, .Lquadwords) 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci/* 7462306a36Sopenharmony_ci * The second set of user mode loads have been 7562306a36Sopenharmony_ci * done with 32 bytes stored to the destination, 7662306a36Sopenharmony_ci * so we need to take that into account before 7762306a36Sopenharmony_ci * falling back to the unrolled loop. 7862306a36Sopenharmony_ci */ 7962306a36Sopenharmony_ci_ASM_EXTABLE_UA(30b, .Lfixup32) 8062306a36Sopenharmony_ci_ASM_EXTABLE_UA(31b, .Lfixup32) 8162306a36Sopenharmony_ci_ASM_EXTABLE_UA(32b, .Lfixup32) 8262306a36Sopenharmony_ci_ASM_EXTABLE_UA(33b, .Lfixup32) 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci/* 8562306a36Sopenharmony_ci * An exception on a write means that we're 8662306a36Sopenharmony_ci * done, but we need to update the count 8762306a36Sopenharmony_ci * depending on where in the unrolled loop 8862306a36Sopenharmony_ci * we were. 8962306a36Sopenharmony_ci */ 9062306a36Sopenharmony_ci_ASM_EXTABLE_UA(20b, .Ldone0) 9162306a36Sopenharmony_ci_ASM_EXTABLE_UA(21b, .Ldone8) 9262306a36Sopenharmony_ci_ASM_EXTABLE_UA(22b, .Ldone16) 9362306a36Sopenharmony_ci_ASM_EXTABLE_UA(23b, .Ldone24) 9462306a36Sopenharmony_ci_ASM_EXTABLE_UA(40b, .Ldone32) 9562306a36Sopenharmony_ci_ASM_EXTABLE_UA(41b, .Ldone40) 9662306a36Sopenharmony_ci_ASM_EXTABLE_UA(42b, .Ldone48) 9762306a36Sopenharmony_ci_ASM_EXTABLE_UA(43b, .Ldone56) 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci.Lquadwords: 10062306a36Sopenharmony_ci cmp $8,%edx 10162306a36Sopenharmony_ci jb .Llong 10262306a36Sopenharmony_ci50: movq (%rsi),%rax 10362306a36Sopenharmony_ci51: movnti %rax,(%rdi) 10462306a36Sopenharmony_ci addq $8,%rsi 10562306a36Sopenharmony_ci addq $8,%rdi 10662306a36Sopenharmony_ci sub $8,%edx 10762306a36Sopenharmony_ci jmp .Lquadwords 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci/* 11062306a36Sopenharmony_ci * If we fail on the last full quadword, we will 11162306a36Sopenharmony_ci * not try to do any byte-wise cached accesses. 11262306a36Sopenharmony_ci * We will try to do one more 4-byte uncached 11362306a36Sopenharmony_ci * one, though. 11462306a36Sopenharmony_ci */ 11562306a36Sopenharmony_ci_ASM_EXTABLE_UA(50b, .Llast4) 11662306a36Sopenharmony_ci_ASM_EXTABLE_UA(51b, .Ldone0) 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci.Llong: 11962306a36Sopenharmony_ci test $4,%dl 12062306a36Sopenharmony_ci je .Lword 12162306a36Sopenharmony_ci60: movl (%rsi),%eax 12262306a36Sopenharmony_ci61: movnti %eax,(%rdi) 12362306a36Sopenharmony_ci addq $4,%rsi 12462306a36Sopenharmony_ci addq $4,%rdi 12562306a36Sopenharmony_ci sub $4,%edx 12662306a36Sopenharmony_ci.Lword: 12762306a36Sopenharmony_ci sfence 12862306a36Sopenharmony_ci test $2,%dl 12962306a36Sopenharmony_ci je .Lbyte 13062306a36Sopenharmony_ci70: movw (%rsi),%ax 13162306a36Sopenharmony_ci71: movw %ax,(%rdi) 13262306a36Sopenharmony_ci addq $2,%rsi 13362306a36Sopenharmony_ci addq $2,%rdi 13462306a36Sopenharmony_ci sub $2,%edx 13562306a36Sopenharmony_ci.Lbyte: 13662306a36Sopenharmony_ci test $1,%dl 13762306a36Sopenharmony_ci je .Ldone 13862306a36Sopenharmony_ci80: movb (%rsi),%al 13962306a36Sopenharmony_ci81: movb %al,(%rdi) 14062306a36Sopenharmony_ci dec %edx 14162306a36Sopenharmony_ci.Ldone: 14262306a36Sopenharmony_ci mov %edx,%eax 14362306a36Sopenharmony_ci RET 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci/* 14662306a36Sopenharmony_ci * If we fail on the last four bytes, we won't 14762306a36Sopenharmony_ci * bother with any fixups. It's dead, Jim. Note 14862306a36Sopenharmony_ci * that there's no need for 'sfence' for any 14962306a36Sopenharmony_ci * of this, since the exception will have been 15062306a36Sopenharmony_ci * serializing. 15162306a36Sopenharmony_ci */ 15262306a36Sopenharmony_ci_ASM_EXTABLE_UA(60b, .Ldone) 15362306a36Sopenharmony_ci_ASM_EXTABLE_UA(61b, .Ldone) 15462306a36Sopenharmony_ci_ASM_EXTABLE_UA(70b, .Ldone) 15562306a36Sopenharmony_ci_ASM_EXTABLE_UA(71b, .Ldone) 15662306a36Sopenharmony_ci_ASM_EXTABLE_UA(80b, .Ldone) 15762306a36Sopenharmony_ci_ASM_EXTABLE_UA(81b, .Ldone) 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci/* 16062306a36Sopenharmony_ci * This is the "head needs aliging" case when 16162306a36Sopenharmony_ci * the destination isn't 8-byte aligned. The 16262306a36Sopenharmony_ci * 4-byte case can be done uncached, but any 16362306a36Sopenharmony_ci * smaller alignment is done with regular stores. 16462306a36Sopenharmony_ci */ 16562306a36Sopenharmony_ci.Lalign: 16662306a36Sopenharmony_ci test $1,%dil 16762306a36Sopenharmony_ci je .Lalign_word 16862306a36Sopenharmony_ci test %edx,%edx 16962306a36Sopenharmony_ci je .Ldone 17062306a36Sopenharmony_ci90: movb (%rsi),%al 17162306a36Sopenharmony_ci91: movb %al,(%rdi) 17262306a36Sopenharmony_ci inc %rsi 17362306a36Sopenharmony_ci inc %rdi 17462306a36Sopenharmony_ci dec %edx 17562306a36Sopenharmony_ci.Lalign_word: 17662306a36Sopenharmony_ci test $2,%dil 17762306a36Sopenharmony_ci je .Lalign_long 17862306a36Sopenharmony_ci cmp $2,%edx 17962306a36Sopenharmony_ci jb .Lbyte 18062306a36Sopenharmony_ci92: movw (%rsi),%ax 18162306a36Sopenharmony_ci93: movw %ax,(%rdi) 18262306a36Sopenharmony_ci addq $2,%rsi 18362306a36Sopenharmony_ci addq $2,%rdi 18462306a36Sopenharmony_ci sub $2,%edx 18562306a36Sopenharmony_ci.Lalign_long: 18662306a36Sopenharmony_ci test $4,%dil 18762306a36Sopenharmony_ci je .Lis_aligned 18862306a36Sopenharmony_ci cmp $4,%edx 18962306a36Sopenharmony_ci jb .Lword 19062306a36Sopenharmony_ci94: movl (%rsi),%eax 19162306a36Sopenharmony_ci95: movnti %eax,(%rdi) 19262306a36Sopenharmony_ci addq $4,%rsi 19362306a36Sopenharmony_ci addq $4,%rdi 19462306a36Sopenharmony_ci sub $4,%edx 19562306a36Sopenharmony_ci jmp .Lis_aligned 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci/* 19862306a36Sopenharmony_ci * If we fail on the initial alignment accesses, 19962306a36Sopenharmony_ci * we're all done. Again, no point in trying to 20062306a36Sopenharmony_ci * do byte-by-byte probing if the 4-byte load 20162306a36Sopenharmony_ci * fails - we're not doing any uncached accesses 20262306a36Sopenharmony_ci * any more. 20362306a36Sopenharmony_ci */ 20462306a36Sopenharmony_ci_ASM_EXTABLE_UA(90b, .Ldone) 20562306a36Sopenharmony_ci_ASM_EXTABLE_UA(91b, .Ldone) 20662306a36Sopenharmony_ci_ASM_EXTABLE_UA(92b, .Ldone) 20762306a36Sopenharmony_ci_ASM_EXTABLE_UA(93b, .Ldone) 20862306a36Sopenharmony_ci_ASM_EXTABLE_UA(94b, .Ldone) 20962306a36Sopenharmony_ci_ASM_EXTABLE_UA(95b, .Ldone) 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci/* 21262306a36Sopenharmony_ci * Exception table fixups for faults in the middle 21362306a36Sopenharmony_ci */ 21462306a36Sopenharmony_ci.Ldone56: sub $8,%edx 21562306a36Sopenharmony_ci.Ldone48: sub $8,%edx 21662306a36Sopenharmony_ci.Ldone40: sub $8,%edx 21762306a36Sopenharmony_ci.Ldone32: sub $8,%edx 21862306a36Sopenharmony_ci.Ldone24: sub $8,%edx 21962306a36Sopenharmony_ci.Ldone16: sub $8,%edx 22062306a36Sopenharmony_ci.Ldone8: sub $8,%edx 22162306a36Sopenharmony_ci.Ldone0: 22262306a36Sopenharmony_ci mov %edx,%eax 22362306a36Sopenharmony_ci RET 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci.Lfixup32: 22662306a36Sopenharmony_ci addq $32,%rsi 22762306a36Sopenharmony_ci addq $32,%rdi 22862306a36Sopenharmony_ci sub $32,%edx 22962306a36Sopenharmony_ci jmp .Lquadwords 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci.Llast4: 23262306a36Sopenharmony_ci52: movl (%rsi),%eax 23362306a36Sopenharmony_ci53: movnti %eax,(%rdi) 23462306a36Sopenharmony_ci sfence 23562306a36Sopenharmony_ci sub $4,%edx 23662306a36Sopenharmony_ci mov %edx,%eax 23762306a36Sopenharmony_ci RET 23862306a36Sopenharmony_ci_ASM_EXTABLE_UA(52b, .Ldone0) 23962306a36Sopenharmony_ci_ASM_EXTABLE_UA(53b, .Ldone0) 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ciSYM_FUNC_END(__copy_user_nocache) 24262306a36Sopenharmony_ciEXPORT_SYMBOL(__copy_user_nocache) 243