18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 38c2ecf20Sopenharmony_ci 48c2ecf20Sopenharmony_ci#include <linux/linkage.h> 58c2ecf20Sopenharmony_ci#include <asm/cpufeatures.h> 68c2ecf20Sopenharmony_ci#include <asm/alternative.h> 78c2ecf20Sopenharmony_ci#include <asm/export.h> 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci/* 108c2ecf20Sopenharmony_ci * Some CPUs run faster using the string copy instructions (sane microcode). 118c2ecf20Sopenharmony_ci * It is also a lot simpler. Use this when possible. But, don't use streaming 128c2ecf20Sopenharmony_ci * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the 138c2ecf20Sopenharmony_ci * prefetch distance based on SMP/UP. 148c2ecf20Sopenharmony_ci */ 158c2ecf20Sopenharmony_ci ALIGN 168c2ecf20Sopenharmony_ciSYM_FUNC_START(copy_page) 178c2ecf20Sopenharmony_ci ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD 188c2ecf20Sopenharmony_ci movl $4096/8, %ecx 198c2ecf20Sopenharmony_ci rep movsq 208c2ecf20Sopenharmony_ci RET 218c2ecf20Sopenharmony_ciSYM_FUNC_END(copy_page) 228c2ecf20Sopenharmony_ciEXPORT_SYMBOL(copy_page) 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ciSYM_FUNC_START_LOCAL(copy_page_regs) 258c2ecf20Sopenharmony_ci subq $2*8, %rsp 268c2ecf20Sopenharmony_ci movq %rbx, (%rsp) 278c2ecf20Sopenharmony_ci movq %r12, 1*8(%rsp) 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci movl $(4096/64)-5, %ecx 308c2ecf20Sopenharmony_ci .p2align 4 318c2ecf20Sopenharmony_ci.Loop64: 328c2ecf20Sopenharmony_ci dec %rcx 338c2ecf20Sopenharmony_ci movq 0x8*0(%rsi), %rax 348c2ecf20Sopenharmony_ci movq 0x8*1(%rsi), %rbx 358c2ecf20Sopenharmony_ci movq 0x8*2(%rsi), %rdx 368c2ecf20Sopenharmony_ci movq 0x8*3(%rsi), %r8 378c2ecf20Sopenharmony_ci movq 0x8*4(%rsi), %r9 388c2ecf20Sopenharmony_ci movq 0x8*5(%rsi), %r10 398c2ecf20Sopenharmony_ci movq 0x8*6(%rsi), %r11 408c2ecf20Sopenharmony_ci movq 0x8*7(%rsi), %r12 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci prefetcht0 5*64(%rsi) 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci movq %rax, 0x8*0(%rdi) 458c2ecf20Sopenharmony_ci movq %rbx, 0x8*1(%rdi) 468c2ecf20Sopenharmony_ci movq %rdx, 0x8*2(%rdi) 478c2ecf20Sopenharmony_ci movq %r8, 0x8*3(%rdi) 488c2ecf20Sopenharmony_ci movq %r9, 0x8*4(%rdi) 498c2ecf20Sopenharmony_ci movq %r10, 0x8*5(%rdi) 508c2ecf20Sopenharmony_ci movq %r11, 0x8*6(%rdi) 518c2ecf20Sopenharmony_ci movq %r12, 0x8*7(%rdi) 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci leaq 64 (%rsi), %rsi 548c2ecf20Sopenharmony_ci leaq 64 (%rdi), %rdi 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci jnz .Loop64 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci movl $5, %ecx 598c2ecf20Sopenharmony_ci .p2align 4 608c2ecf20Sopenharmony_ci.Loop2: 618c2ecf20Sopenharmony_ci decl %ecx 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci movq 0x8*0(%rsi), %rax 648c2ecf20Sopenharmony_ci movq 0x8*1(%rsi), %rbx 658c2ecf20Sopenharmony_ci movq 0x8*2(%rsi), %rdx 668c2ecf20Sopenharmony_ci movq 0x8*3(%rsi), %r8 678c2ecf20Sopenharmony_ci movq 0x8*4(%rsi), %r9 688c2ecf20Sopenharmony_ci movq 0x8*5(%rsi), %r10 698c2ecf20Sopenharmony_ci movq 0x8*6(%rsi), %r11 708c2ecf20Sopenharmony_ci movq 0x8*7(%rsi), %r12 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci movq %rax, 0x8*0(%rdi) 738c2ecf20Sopenharmony_ci movq %rbx, 0x8*1(%rdi) 748c2ecf20Sopenharmony_ci movq %rdx, 0x8*2(%rdi) 758c2ecf20Sopenharmony_ci movq %r8, 0x8*3(%rdi) 768c2ecf20Sopenharmony_ci movq %r9, 0x8*4(%rdi) 778c2ecf20Sopenharmony_ci movq %r10, 0x8*5(%rdi) 788c2ecf20Sopenharmony_ci movq %r11, 0x8*6(%rdi) 798c2ecf20Sopenharmony_ci movq %r12, 0x8*7(%rdi) 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci leaq 64(%rdi), %rdi 828c2ecf20Sopenharmony_ci leaq 64(%rsi), %rsi 838c2ecf20Sopenharmony_ci jnz .Loop2 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci movq (%rsp), %rbx 868c2ecf20Sopenharmony_ci movq 1*8(%rsp), %r12 878c2ecf20Sopenharmony_ci addq $2*8, %rsp 888c2ecf20Sopenharmony_ci RET 898c2ecf20Sopenharmony_ciSYM_FUNC_END(copy_page_regs) 90