162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include <linux/linkage.h> 462306a36Sopenharmony_ci#include <asm/export.h> 562306a36Sopenharmony_ci 662306a36Sopenharmony_ciSYM_FUNC_START(memmove) 762306a36Sopenharmony_ci/* 862306a36Sopenharmony_ci * void *memmove(void *dest_in, const void *src_in, size_t n) 962306a36Sopenharmony_ci * -mregparm=3 passes these in registers: 1062306a36Sopenharmony_ci * dest_in: %eax 1162306a36Sopenharmony_ci * src_in: %edx 1262306a36Sopenharmony_ci * n: %ecx 1362306a36Sopenharmony_ci * See also: arch/x86/entry/calling.h for description of the calling convention. 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * n can remain in %ecx, but for `rep movsl`, we'll need dest in %edi and src 1662306a36Sopenharmony_ci * in %esi. 1762306a36Sopenharmony_ci */ 1862306a36Sopenharmony_ci.set dest_in, %eax 1962306a36Sopenharmony_ci.set dest, %edi 2062306a36Sopenharmony_ci.set src_in, %edx 2162306a36Sopenharmony_ci.set src, %esi 2262306a36Sopenharmony_ci.set n, %ecx 2362306a36Sopenharmony_ci.set tmp0, %edx 2462306a36Sopenharmony_ci.set tmp0w, %dx 2562306a36Sopenharmony_ci.set tmp1, %ebx 2662306a36Sopenharmony_ci.set tmp1w, %bx 2762306a36Sopenharmony_ci.set tmp2, %eax 2862306a36Sopenharmony_ci.set tmp3b, %cl 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci/* 3162306a36Sopenharmony_ci * Save all callee-saved registers, because this function is going to clobber 3262306a36Sopenharmony_ci * all of them: 3362306a36Sopenharmony_ci */ 3462306a36Sopenharmony_ci pushl %ebp 3562306a36Sopenharmony_ci movl %esp, %ebp // set standard frame pointer 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci pushl %ebx 3862306a36Sopenharmony_ci pushl %edi 3962306a36Sopenharmony_ci pushl %esi 4062306a36Sopenharmony_ci pushl %eax // save 'dest_in' parameter [eax] as the return value 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci movl src_in, src 4362306a36Sopenharmony_ci movl dest_in, dest 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci /* Handle more 16 bytes in loop */ 4662306a36Sopenharmony_ci cmpl $0x10, n 4762306a36Sopenharmony_ci jb .Lmove_16B 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci /* Decide forward/backward copy mode */ 5062306a36Sopenharmony_ci cmpl dest, src 5162306a36Sopenharmony_ci jb .Lbackwards_header 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci /* 5462306a36Sopenharmony_ci * movs instruction have many startup latency 5562306a36Sopenharmony_ci * so we handle small size by general register. 5662306a36Sopenharmony_ci */ 5762306a36Sopenharmony_ci cmpl $680, n 5862306a36Sopenharmony_ci jb .Ltoo_small_forwards 5962306a36Sopenharmony_ci /* movs instruction is only good for aligned case. */ 6062306a36Sopenharmony_ci movl src, tmp0 6162306a36Sopenharmony_ci xorl dest, tmp0 6262306a36Sopenharmony_ci andl $0xff, tmp0 6362306a36Sopenharmony_ci jz .Lforward_movs 6462306a36Sopenharmony_ci.Ltoo_small_forwards: 6562306a36Sopenharmony_ci subl $0x10, n 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci /* We gobble 16 bytes forward in each loop. */ 6862306a36Sopenharmony_ci.Lmove_16B_forwards_loop: 6962306a36Sopenharmony_ci subl $0x10, n 7062306a36Sopenharmony_ci movl 0*4(src), tmp0 7162306a36Sopenharmony_ci movl 1*4(src), tmp1 7262306a36Sopenharmony_ci movl tmp0, 0*4(dest) 7362306a36Sopenharmony_ci movl tmp1, 1*4(dest) 7462306a36Sopenharmony_ci movl 2*4(src), tmp0 7562306a36Sopenharmony_ci movl 3*4(src), tmp1 7662306a36Sopenharmony_ci movl tmp0, 2*4(dest) 7762306a36Sopenharmony_ci movl tmp1, 3*4(dest) 7862306a36Sopenharmony_ci leal 0x10(src), src 7962306a36Sopenharmony_ci leal 0x10(dest), dest 8062306a36Sopenharmony_ci jae .Lmove_16B_forwards_loop 8162306a36Sopenharmony_ci addl $0x10, n 8262306a36Sopenharmony_ci jmp .Lmove_16B 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci /* Handle data forward by movs. */ 8562306a36Sopenharmony_ci.p2align 4 8662306a36Sopenharmony_ci.Lforward_movs: 8762306a36Sopenharmony_ci movl -4(src, n), tmp0 8862306a36Sopenharmony_ci leal -4(dest, n), tmp1 8962306a36Sopenharmony_ci shrl $2, n 9062306a36Sopenharmony_ci rep movsl 9162306a36Sopenharmony_ci movl tmp0, (tmp1) 9262306a36Sopenharmony_ci jmp .Ldone 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci /* Handle data backward by movs. */ 9562306a36Sopenharmony_ci.p2align 4 9662306a36Sopenharmony_ci.Lbackwards_movs: 9762306a36Sopenharmony_ci movl (src), tmp0 9862306a36Sopenharmony_ci movl dest, tmp1 9962306a36Sopenharmony_ci leal -4(src, n), src 10062306a36Sopenharmony_ci leal -4(dest, n), dest 10162306a36Sopenharmony_ci shrl $2, n 10262306a36Sopenharmony_ci std 10362306a36Sopenharmony_ci rep movsl 10462306a36Sopenharmony_ci movl tmp0,(tmp1) 10562306a36Sopenharmony_ci cld 10662306a36Sopenharmony_ci jmp .Ldone 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci /* Start to prepare for backward copy. */ 10962306a36Sopenharmony_ci.p2align 4 11062306a36Sopenharmony_ci.Lbackwards_header: 11162306a36Sopenharmony_ci cmpl $680, n 11262306a36Sopenharmony_ci jb .Ltoo_small_backwards 11362306a36Sopenharmony_ci movl src, tmp0 11462306a36Sopenharmony_ci xorl dest, tmp0 11562306a36Sopenharmony_ci andl $0xff, tmp0 11662306a36Sopenharmony_ci jz .Lbackwards_movs 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci /* Calculate copy position to tail. */ 11962306a36Sopenharmony_ci.Ltoo_small_backwards: 12062306a36Sopenharmony_ci addl n, src 12162306a36Sopenharmony_ci addl n, dest 12262306a36Sopenharmony_ci subl $0x10, n 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci /* We gobble 16 bytes backward in each loop. */ 12562306a36Sopenharmony_ci.Lmove_16B_backwards_loop: 12662306a36Sopenharmony_ci subl $0x10, n 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci movl -1*4(src), tmp0 12962306a36Sopenharmony_ci movl -2*4(src), tmp1 13062306a36Sopenharmony_ci movl tmp0, -1*4(dest) 13162306a36Sopenharmony_ci movl tmp1, -2*4(dest) 13262306a36Sopenharmony_ci movl -3*4(src), tmp0 13362306a36Sopenharmony_ci movl -4*4(src), tmp1 13462306a36Sopenharmony_ci movl tmp0, -3*4(dest) 13562306a36Sopenharmony_ci movl tmp1, -4*4(dest) 13662306a36Sopenharmony_ci leal -0x10(src), src 13762306a36Sopenharmony_ci leal -0x10(dest), dest 13862306a36Sopenharmony_ci jae .Lmove_16B_backwards_loop 13962306a36Sopenharmony_ci /* Calculate copy position to head. */ 14062306a36Sopenharmony_ci addl $0x10, n 14162306a36Sopenharmony_ci subl n, src 14262306a36Sopenharmony_ci subl n, dest 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci /* Move data from 8 bytes to 15 bytes. */ 14562306a36Sopenharmony_ci.p2align 4 14662306a36Sopenharmony_ci.Lmove_16B: 14762306a36Sopenharmony_ci cmpl $8, n 14862306a36Sopenharmony_ci jb .Lmove_8B 14962306a36Sopenharmony_ci movl 0*4(src), tmp0 15062306a36Sopenharmony_ci movl 1*4(src), tmp1 15162306a36Sopenharmony_ci movl -2*4(src, n), tmp2 15262306a36Sopenharmony_ci movl -1*4(src, n), src 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci movl tmp0, 0*4(dest) 15562306a36Sopenharmony_ci movl tmp1, 1*4(dest) 15662306a36Sopenharmony_ci movl tmp2, -2*4(dest, n) 15762306a36Sopenharmony_ci movl src, -1*4(dest, n) 15862306a36Sopenharmony_ci jmp .Ldone 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci /* Move data from 4 bytes to 7 bytes. */ 16162306a36Sopenharmony_ci.p2align 4 16262306a36Sopenharmony_ci.Lmove_8B: 16362306a36Sopenharmony_ci cmpl $4, n 16462306a36Sopenharmony_ci jb .Lmove_4B 16562306a36Sopenharmony_ci movl 0*4(src), tmp0 16662306a36Sopenharmony_ci movl -1*4(src, n), tmp1 16762306a36Sopenharmony_ci movl tmp0, 0*4(dest) 16862306a36Sopenharmony_ci movl tmp1, -1*4(dest, n) 16962306a36Sopenharmony_ci jmp .Ldone 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci /* Move data from 2 bytes to 3 bytes. */ 17262306a36Sopenharmony_ci.p2align 4 17362306a36Sopenharmony_ci.Lmove_4B: 17462306a36Sopenharmony_ci cmpl $2, n 17562306a36Sopenharmony_ci jb .Lmove_1B 17662306a36Sopenharmony_ci movw 0*2(src), tmp0w 17762306a36Sopenharmony_ci movw -1*2(src, n), tmp1w 17862306a36Sopenharmony_ci movw tmp0w, 0*2(dest) 17962306a36Sopenharmony_ci movw tmp1w, -1*2(dest, n) 18062306a36Sopenharmony_ci jmp .Ldone 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci /* Move data for 1 byte. */ 18362306a36Sopenharmony_ci.p2align 4 18462306a36Sopenharmony_ci.Lmove_1B: 18562306a36Sopenharmony_ci cmpl $1, n 18662306a36Sopenharmony_ci jb .Ldone 18762306a36Sopenharmony_ci movb (src), tmp3b 18862306a36Sopenharmony_ci movb tmp3b, (dest) 18962306a36Sopenharmony_ci.p2align 4 19062306a36Sopenharmony_ci.Ldone: 19162306a36Sopenharmony_ci popl dest_in // restore 'dest_in' [eax] as the return value 19262306a36Sopenharmony_ci /* Restore all callee-saved registers: */ 19362306a36Sopenharmony_ci popl %esi 19462306a36Sopenharmony_ci popl %edi 19562306a36Sopenharmony_ci popl %ebx 19662306a36Sopenharmony_ci popl %ebp 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci RET 19962306a36Sopenharmony_ciSYM_FUNC_END(memmove) 20062306a36Sopenharmony_ciEXPORT_SYMBOL(memmove) 201