162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Normally compiler builtins are used, but sometimes the compiler calls out 462306a36Sopenharmony_ci * of line code. Based on asm-i386/string.h. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This assembly file is re-written from memmove_64.c file. 762306a36Sopenharmony_ci * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci#include <linux/linkage.h> 1062306a36Sopenharmony_ci#include <asm/cpufeatures.h> 1162306a36Sopenharmony_ci#include <asm/alternative.h> 1262306a36Sopenharmony_ci#include <asm/export.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#undef memmove 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci.section .noinstr.text, "ax" 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* 1962306a36Sopenharmony_ci * Implement memmove(). This can handle overlap between src and dst. 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * Input: 2262306a36Sopenharmony_ci * rdi: dest 2362306a36Sopenharmony_ci * rsi: src 2462306a36Sopenharmony_ci * rdx: count 2562306a36Sopenharmony_ci * 2662306a36Sopenharmony_ci * Output: 2762306a36Sopenharmony_ci * rax: dest 2862306a36Sopenharmony_ci */ 2962306a36Sopenharmony_ciSYM_FUNC_START(__memmove) 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci mov %rdi, %rax 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci /* Decide forward/backward copy mode */ 3462306a36Sopenharmony_ci cmp %rdi, %rsi 3562306a36Sopenharmony_ci jge .Lmemmove_begin_forward 3662306a36Sopenharmony_ci mov %rsi, %r8 3762306a36Sopenharmony_ci add %rdx, %r8 3862306a36Sopenharmony_ci cmp %rdi, %r8 3962306a36Sopenharmony_ci jg 2f 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#define CHECK_LEN cmp $0x20, %rdx; jb 1f 4262306a36Sopenharmony_ci#define MEMMOVE_BYTES movq %rdx, %rcx; rep movsb; RET 4362306a36Sopenharmony_ci.Lmemmove_begin_forward: 4462306a36Sopenharmony_ci ALTERNATIVE_2 __stringify(CHECK_LEN), \ 4562306a36Sopenharmony_ci __stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \ 4662306a36Sopenharmony_ci __stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci /* 4962306a36Sopenharmony_ci * movsq instruction have many startup latency 5062306a36Sopenharmony_ci * so we handle small size by general register. 5162306a36Sopenharmony_ci */ 5262306a36Sopenharmony_ci cmp $680, %rdx 5362306a36Sopenharmony_ci jb 3f 5462306a36Sopenharmony_ci /* 5562306a36Sopenharmony_ci * movsq instruction is only good for aligned case. 5662306a36Sopenharmony_ci */ 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci cmpb %dil, %sil 5962306a36Sopenharmony_ci je 4f 6062306a36Sopenharmony_ci3: 6162306a36Sopenharmony_ci sub $0x20, %rdx 6262306a36Sopenharmony_ci /* 6362306a36Sopenharmony_ci * We gobble 32 bytes forward in each loop. 6462306a36Sopenharmony_ci */ 6562306a36Sopenharmony_ci5: 6662306a36Sopenharmony_ci sub $0x20, %rdx 6762306a36Sopenharmony_ci movq 0*8(%rsi), %r11 6862306a36Sopenharmony_ci movq 1*8(%rsi), %r10 6962306a36Sopenharmony_ci movq 2*8(%rsi), %r9 7062306a36Sopenharmony_ci movq 3*8(%rsi), %r8 7162306a36Sopenharmony_ci leaq 4*8(%rsi), %rsi 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci movq %r11, 0*8(%rdi) 7462306a36Sopenharmony_ci movq %r10, 1*8(%rdi) 7562306a36Sopenharmony_ci movq %r9, 2*8(%rdi) 7662306a36Sopenharmony_ci movq %r8, 3*8(%rdi) 7762306a36Sopenharmony_ci leaq 4*8(%rdi), %rdi 7862306a36Sopenharmony_ci jae 5b 7962306a36Sopenharmony_ci addq $0x20, %rdx 8062306a36Sopenharmony_ci jmp 1f 8162306a36Sopenharmony_ci /* 8262306a36Sopenharmony_ci * Handle data forward by movsq. 8362306a36Sopenharmony_ci */ 8462306a36Sopenharmony_ci .p2align 4 8562306a36Sopenharmony_ci4: 8662306a36Sopenharmony_ci movq %rdx, %rcx 8762306a36Sopenharmony_ci movq -8(%rsi, %rdx), %r11 8862306a36Sopenharmony_ci lea -8(%rdi, %rdx), %r10 8962306a36Sopenharmony_ci shrq $3, %rcx 9062306a36Sopenharmony_ci rep movsq 9162306a36Sopenharmony_ci movq %r11, (%r10) 9262306a36Sopenharmony_ci jmp 13f 9362306a36Sopenharmony_ci.Lmemmove_end_forward: 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci /* 9662306a36Sopenharmony_ci * Handle data backward by movsq. 9762306a36Sopenharmony_ci */ 9862306a36Sopenharmony_ci .p2align 4 9962306a36Sopenharmony_ci7: 10062306a36Sopenharmony_ci movq %rdx, %rcx 10162306a36Sopenharmony_ci movq (%rsi), %r11 10262306a36Sopenharmony_ci movq %rdi, %r10 10362306a36Sopenharmony_ci leaq -8(%rsi, %rdx), %rsi 10462306a36Sopenharmony_ci leaq -8(%rdi, %rdx), %rdi 10562306a36Sopenharmony_ci shrq $3, %rcx 10662306a36Sopenharmony_ci std 10762306a36Sopenharmony_ci rep movsq 10862306a36Sopenharmony_ci cld 10962306a36Sopenharmony_ci movq %r11, (%r10) 11062306a36Sopenharmony_ci jmp 13f 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci /* 11362306a36Sopenharmony_ci * Start to prepare for backward copy. 11462306a36Sopenharmony_ci */ 11562306a36Sopenharmony_ci .p2align 4 11662306a36Sopenharmony_ci2: 11762306a36Sopenharmony_ci cmp $0x20, %rdx 11862306a36Sopenharmony_ci jb 1f 11962306a36Sopenharmony_ci cmp $680, %rdx 12062306a36Sopenharmony_ci jb 6f 12162306a36Sopenharmony_ci cmp %dil, %sil 12262306a36Sopenharmony_ci je 7b 12362306a36Sopenharmony_ci6: 12462306a36Sopenharmony_ci /* 12562306a36Sopenharmony_ci * Calculate copy position to tail. 12662306a36Sopenharmony_ci */ 12762306a36Sopenharmony_ci addq %rdx, %rsi 12862306a36Sopenharmony_ci addq %rdx, %rdi 12962306a36Sopenharmony_ci subq $0x20, %rdx 13062306a36Sopenharmony_ci /* 13162306a36Sopenharmony_ci * We gobble 32 bytes backward in each loop. 13262306a36Sopenharmony_ci */ 13362306a36Sopenharmony_ci8: 13462306a36Sopenharmony_ci subq $0x20, %rdx 13562306a36Sopenharmony_ci movq -1*8(%rsi), %r11 13662306a36Sopenharmony_ci movq -2*8(%rsi), %r10 13762306a36Sopenharmony_ci movq -3*8(%rsi), %r9 13862306a36Sopenharmony_ci movq -4*8(%rsi), %r8 13962306a36Sopenharmony_ci leaq -4*8(%rsi), %rsi 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci movq %r11, -1*8(%rdi) 14262306a36Sopenharmony_ci movq %r10, -2*8(%rdi) 14362306a36Sopenharmony_ci movq %r9, -3*8(%rdi) 14462306a36Sopenharmony_ci movq %r8, -4*8(%rdi) 14562306a36Sopenharmony_ci leaq -4*8(%rdi), %rdi 14662306a36Sopenharmony_ci jae 8b 14762306a36Sopenharmony_ci /* 14862306a36Sopenharmony_ci * Calculate copy position to head. 14962306a36Sopenharmony_ci */ 15062306a36Sopenharmony_ci addq $0x20, %rdx 15162306a36Sopenharmony_ci subq %rdx, %rsi 15262306a36Sopenharmony_ci subq %rdx, %rdi 15362306a36Sopenharmony_ci1: 15462306a36Sopenharmony_ci cmpq $16, %rdx 15562306a36Sopenharmony_ci jb 9f 15662306a36Sopenharmony_ci /* 15762306a36Sopenharmony_ci * Move data from 16 bytes to 31 bytes. 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_ci movq 0*8(%rsi), %r11 16062306a36Sopenharmony_ci movq 1*8(%rsi), %r10 16162306a36Sopenharmony_ci movq -2*8(%rsi, %rdx), %r9 16262306a36Sopenharmony_ci movq -1*8(%rsi, %rdx), %r8 16362306a36Sopenharmony_ci movq %r11, 0*8(%rdi) 16462306a36Sopenharmony_ci movq %r10, 1*8(%rdi) 16562306a36Sopenharmony_ci movq %r9, -2*8(%rdi, %rdx) 16662306a36Sopenharmony_ci movq %r8, -1*8(%rdi, %rdx) 16762306a36Sopenharmony_ci jmp 13f 16862306a36Sopenharmony_ci .p2align 4 16962306a36Sopenharmony_ci9: 17062306a36Sopenharmony_ci cmpq $8, %rdx 17162306a36Sopenharmony_ci jb 10f 17262306a36Sopenharmony_ci /* 17362306a36Sopenharmony_ci * Move data from 8 bytes to 15 bytes. 17462306a36Sopenharmony_ci */ 17562306a36Sopenharmony_ci movq 0*8(%rsi), %r11 17662306a36Sopenharmony_ci movq -1*8(%rsi, %rdx), %r10 17762306a36Sopenharmony_ci movq %r11, 0*8(%rdi) 17862306a36Sopenharmony_ci movq %r10, -1*8(%rdi, %rdx) 17962306a36Sopenharmony_ci jmp 13f 18062306a36Sopenharmony_ci10: 18162306a36Sopenharmony_ci cmpq $4, %rdx 18262306a36Sopenharmony_ci jb 11f 18362306a36Sopenharmony_ci /* 18462306a36Sopenharmony_ci * Move data from 4 bytes to 7 bytes. 18562306a36Sopenharmony_ci */ 18662306a36Sopenharmony_ci movl (%rsi), %r11d 18762306a36Sopenharmony_ci movl -4(%rsi, %rdx), %r10d 18862306a36Sopenharmony_ci movl %r11d, (%rdi) 18962306a36Sopenharmony_ci movl %r10d, -4(%rdi, %rdx) 19062306a36Sopenharmony_ci jmp 13f 19162306a36Sopenharmony_ci11: 19262306a36Sopenharmony_ci cmp $2, %rdx 19362306a36Sopenharmony_ci jb 12f 19462306a36Sopenharmony_ci /* 19562306a36Sopenharmony_ci * Move data from 2 bytes to 3 bytes. 19662306a36Sopenharmony_ci */ 19762306a36Sopenharmony_ci movw (%rsi), %r11w 19862306a36Sopenharmony_ci movw -2(%rsi, %rdx), %r10w 19962306a36Sopenharmony_ci movw %r11w, (%rdi) 20062306a36Sopenharmony_ci movw %r10w, -2(%rdi, %rdx) 20162306a36Sopenharmony_ci jmp 13f 20262306a36Sopenharmony_ci12: 20362306a36Sopenharmony_ci cmp $1, %rdx 20462306a36Sopenharmony_ci jb 13f 20562306a36Sopenharmony_ci /* 20662306a36Sopenharmony_ci * Move data for 1 byte. 20762306a36Sopenharmony_ci */ 20862306a36Sopenharmony_ci movb (%rsi), %r11b 20962306a36Sopenharmony_ci movb %r11b, (%rdi) 21062306a36Sopenharmony_ci13: 21162306a36Sopenharmony_ci RET 21262306a36Sopenharmony_ciSYM_FUNC_END(__memmove) 21362306a36Sopenharmony_ciEXPORT_SYMBOL(__memmove) 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ciSYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove) 21662306a36Sopenharmony_ciEXPORT_SYMBOL(memmove) 217