18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci#include <linux/string.h> 38c2ecf20Sopenharmony_ci#include <linux/export.h> 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci#undef memcpy 68c2ecf20Sopenharmony_ci#undef memset 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci__visible void *memcpy(void *to, const void *from, size_t n) 98c2ecf20Sopenharmony_ci{ 108c2ecf20Sopenharmony_ci#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE) 118c2ecf20Sopenharmony_ci return __memcpy3d(to, from, n); 128c2ecf20Sopenharmony_ci#else 138c2ecf20Sopenharmony_ci return __memcpy(to, from, n); 148c2ecf20Sopenharmony_ci#endif 158c2ecf20Sopenharmony_ci} 168c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy); 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci__visible void *memset(void *s, int c, size_t count) 198c2ecf20Sopenharmony_ci{ 208c2ecf20Sopenharmony_ci return __memset(s, c, count); 218c2ecf20Sopenharmony_ci} 228c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memset); 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci__visible void *memmove(void *dest, const void *src, size_t n) 258c2ecf20Sopenharmony_ci{ 268c2ecf20Sopenharmony_ci int d0,d1,d2,d3,d4,d5; 278c2ecf20Sopenharmony_ci char *ret = dest; 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci __asm__ __volatile__( 308c2ecf20Sopenharmony_ci /* Handle more 16 bytes in loop */ 318c2ecf20Sopenharmony_ci "cmp $0x10, %0\n\t" 328c2ecf20Sopenharmony_ci "jb 1f\n\t" 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci /* Decide forward/backward copy mode */ 358c2ecf20Sopenharmony_ci "cmp %2, %1\n\t" 368c2ecf20Sopenharmony_ci "jb 2f\n\t" 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci /* 398c2ecf20Sopenharmony_ci * movs instruction have many startup latency 408c2ecf20Sopenharmony_ci * so we handle small size by general register. 418c2ecf20Sopenharmony_ci */ 428c2ecf20Sopenharmony_ci "cmp $680, %0\n\t" 438c2ecf20Sopenharmony_ci "jb 3f\n\t" 448c2ecf20Sopenharmony_ci /* 458c2ecf20Sopenharmony_ci * movs instruction is only good for aligned case. 468c2ecf20Sopenharmony_ci */ 478c2ecf20Sopenharmony_ci "mov %1, %3\n\t" 488c2ecf20Sopenharmony_ci "xor %2, %3\n\t" 498c2ecf20Sopenharmony_ci "and $0xff, %3\n\t" 508c2ecf20Sopenharmony_ci "jz 4f\n\t" 518c2ecf20Sopenharmony_ci "3:\n\t" 528c2ecf20Sopenharmony_ci "sub $0x10, %0\n\t" 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci /* 558c2ecf20Sopenharmony_ci * We gobble 16 bytes forward in each loop. 568c2ecf20Sopenharmony_ci */ 578c2ecf20Sopenharmony_ci "3:\n\t" 588c2ecf20Sopenharmony_ci "sub $0x10, %0\n\t" 598c2ecf20Sopenharmony_ci "mov 0*4(%1), %3\n\t" 608c2ecf20Sopenharmony_ci "mov 1*4(%1), %4\n\t" 618c2ecf20Sopenharmony_ci "mov %3, 0*4(%2)\n\t" 628c2ecf20Sopenharmony_ci "mov %4, 1*4(%2)\n\t" 638c2ecf20Sopenharmony_ci "mov 2*4(%1), %3\n\t" 648c2ecf20Sopenharmony_ci "mov 3*4(%1), %4\n\t" 658c2ecf20Sopenharmony_ci "mov %3, 2*4(%2)\n\t" 668c2ecf20Sopenharmony_ci "mov %4, 3*4(%2)\n\t" 678c2ecf20Sopenharmony_ci "lea 0x10(%1), %1\n\t" 688c2ecf20Sopenharmony_ci "lea 0x10(%2), %2\n\t" 698c2ecf20Sopenharmony_ci "jae 3b\n\t" 708c2ecf20Sopenharmony_ci "add $0x10, %0\n\t" 718c2ecf20Sopenharmony_ci "jmp 1f\n\t" 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci /* 748c2ecf20Sopenharmony_ci * Handle data forward by movs. 758c2ecf20Sopenharmony_ci */ 768c2ecf20Sopenharmony_ci ".p2align 4\n\t" 778c2ecf20Sopenharmony_ci "4:\n\t" 788c2ecf20Sopenharmony_ci "mov -4(%1, %0), %3\n\t" 798c2ecf20Sopenharmony_ci "lea -4(%2, %0), %4\n\t" 808c2ecf20Sopenharmony_ci "shr $2, %0\n\t" 818c2ecf20Sopenharmony_ci "rep movsl\n\t" 828c2ecf20Sopenharmony_ci "mov %3, (%4)\n\t" 838c2ecf20Sopenharmony_ci "jmp 11f\n\t" 848c2ecf20Sopenharmony_ci /* 858c2ecf20Sopenharmony_ci * Handle data backward by movs. 868c2ecf20Sopenharmony_ci */ 878c2ecf20Sopenharmony_ci ".p2align 4\n\t" 888c2ecf20Sopenharmony_ci "6:\n\t" 898c2ecf20Sopenharmony_ci "mov (%1), %3\n\t" 908c2ecf20Sopenharmony_ci "mov %2, %4\n\t" 918c2ecf20Sopenharmony_ci "lea -4(%1, %0), %1\n\t" 928c2ecf20Sopenharmony_ci "lea -4(%2, %0), %2\n\t" 938c2ecf20Sopenharmony_ci "shr $2, %0\n\t" 948c2ecf20Sopenharmony_ci "std\n\t" 958c2ecf20Sopenharmony_ci "rep movsl\n\t" 968c2ecf20Sopenharmony_ci "mov %3,(%4)\n\t" 978c2ecf20Sopenharmony_ci "cld\n\t" 988c2ecf20Sopenharmony_ci "jmp 11f\n\t" 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci /* 1018c2ecf20Sopenharmony_ci * Start to prepare for backward copy. 1028c2ecf20Sopenharmony_ci */ 1038c2ecf20Sopenharmony_ci ".p2align 4\n\t" 1048c2ecf20Sopenharmony_ci "2:\n\t" 1058c2ecf20Sopenharmony_ci "cmp $680, %0\n\t" 1068c2ecf20Sopenharmony_ci "jb 5f\n\t" 1078c2ecf20Sopenharmony_ci "mov %1, %3\n\t" 1088c2ecf20Sopenharmony_ci "xor %2, %3\n\t" 1098c2ecf20Sopenharmony_ci "and $0xff, %3\n\t" 1108c2ecf20Sopenharmony_ci "jz 6b\n\t" 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci /* 1138c2ecf20Sopenharmony_ci * Calculate copy position to tail. 1148c2ecf20Sopenharmony_ci */ 1158c2ecf20Sopenharmony_ci "5:\n\t" 1168c2ecf20Sopenharmony_ci "add %0, %1\n\t" 1178c2ecf20Sopenharmony_ci "add %0, %2\n\t" 1188c2ecf20Sopenharmony_ci "sub $0x10, %0\n\t" 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* 1218c2ecf20Sopenharmony_ci * We gobble 16 bytes backward in each loop. 1228c2ecf20Sopenharmony_ci */ 1238c2ecf20Sopenharmony_ci "7:\n\t" 1248c2ecf20Sopenharmony_ci "sub $0x10, %0\n\t" 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci "mov -1*4(%1), %3\n\t" 1278c2ecf20Sopenharmony_ci "mov -2*4(%1), %4\n\t" 1288c2ecf20Sopenharmony_ci "mov %3, -1*4(%2)\n\t" 1298c2ecf20Sopenharmony_ci "mov %4, -2*4(%2)\n\t" 1308c2ecf20Sopenharmony_ci "mov -3*4(%1), %3\n\t" 1318c2ecf20Sopenharmony_ci "mov -4*4(%1), %4\n\t" 1328c2ecf20Sopenharmony_ci "mov %3, -3*4(%2)\n\t" 1338c2ecf20Sopenharmony_ci "mov %4, -4*4(%2)\n\t" 1348c2ecf20Sopenharmony_ci "lea -0x10(%1), %1\n\t" 1358c2ecf20Sopenharmony_ci "lea -0x10(%2), %2\n\t" 1368c2ecf20Sopenharmony_ci "jae 7b\n\t" 1378c2ecf20Sopenharmony_ci /* 1388c2ecf20Sopenharmony_ci * Calculate copy position to head. 1398c2ecf20Sopenharmony_ci */ 1408c2ecf20Sopenharmony_ci "add $0x10, %0\n\t" 1418c2ecf20Sopenharmony_ci "sub %0, %1\n\t" 1428c2ecf20Sopenharmony_ci "sub %0, %2\n\t" 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci /* 1458c2ecf20Sopenharmony_ci * Move data from 8 bytes to 15 bytes. 1468c2ecf20Sopenharmony_ci */ 1478c2ecf20Sopenharmony_ci ".p2align 4\n\t" 1488c2ecf20Sopenharmony_ci "1:\n\t" 1498c2ecf20Sopenharmony_ci "cmp $8, %0\n\t" 1508c2ecf20Sopenharmony_ci "jb 8f\n\t" 1518c2ecf20Sopenharmony_ci "mov 0*4(%1), %3\n\t" 1528c2ecf20Sopenharmony_ci "mov 1*4(%1), %4\n\t" 1538c2ecf20Sopenharmony_ci "mov -2*4(%1, %0), %5\n\t" 1548c2ecf20Sopenharmony_ci "mov -1*4(%1, %0), %1\n\t" 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci "mov %3, 0*4(%2)\n\t" 1578c2ecf20Sopenharmony_ci "mov %4, 1*4(%2)\n\t" 1588c2ecf20Sopenharmony_ci "mov %5, -2*4(%2, %0)\n\t" 1598c2ecf20Sopenharmony_ci "mov %1, -1*4(%2, %0)\n\t" 1608c2ecf20Sopenharmony_ci "jmp 11f\n\t" 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci /* 1638c2ecf20Sopenharmony_ci * Move data from 4 bytes to 7 bytes. 1648c2ecf20Sopenharmony_ci */ 1658c2ecf20Sopenharmony_ci ".p2align 4\n\t" 1668c2ecf20Sopenharmony_ci "8:\n\t" 1678c2ecf20Sopenharmony_ci "cmp $4, %0\n\t" 1688c2ecf20Sopenharmony_ci "jb 9f\n\t" 1698c2ecf20Sopenharmony_ci "mov 0*4(%1), %3\n\t" 1708c2ecf20Sopenharmony_ci "mov -1*4(%1, %0), %4\n\t" 1718c2ecf20Sopenharmony_ci "mov %3, 0*4(%2)\n\t" 1728c2ecf20Sopenharmony_ci "mov %4, -1*4(%2, %0)\n\t" 1738c2ecf20Sopenharmony_ci "jmp 11f\n\t" 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci /* 1768c2ecf20Sopenharmony_ci * Move data from 2 bytes to 3 bytes. 1778c2ecf20Sopenharmony_ci */ 1788c2ecf20Sopenharmony_ci ".p2align 4\n\t" 1798c2ecf20Sopenharmony_ci "9:\n\t" 1808c2ecf20Sopenharmony_ci "cmp $2, %0\n\t" 1818c2ecf20Sopenharmony_ci "jb 10f\n\t" 1828c2ecf20Sopenharmony_ci "movw 0*2(%1), %%dx\n\t" 1838c2ecf20Sopenharmony_ci "movw -1*2(%1, %0), %%bx\n\t" 1848c2ecf20Sopenharmony_ci "movw %%dx, 0*2(%2)\n\t" 1858c2ecf20Sopenharmony_ci "movw %%bx, -1*2(%2, %0)\n\t" 1868c2ecf20Sopenharmony_ci "jmp 11f\n\t" 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci /* 1898c2ecf20Sopenharmony_ci * Move data for 1 byte. 1908c2ecf20Sopenharmony_ci */ 1918c2ecf20Sopenharmony_ci ".p2align 4\n\t" 1928c2ecf20Sopenharmony_ci "10:\n\t" 1938c2ecf20Sopenharmony_ci "cmp $1, %0\n\t" 1948c2ecf20Sopenharmony_ci "jb 11f\n\t" 1958c2ecf20Sopenharmony_ci "movb (%1), %%cl\n\t" 1968c2ecf20Sopenharmony_ci "movb %%cl, (%2)\n\t" 1978c2ecf20Sopenharmony_ci ".p2align 4\n\t" 1988c2ecf20Sopenharmony_ci "11:" 1998c2ecf20Sopenharmony_ci : "=&c" (d0), "=&S" (d1), "=&D" (d2), 2008c2ecf20Sopenharmony_ci "=r" (d3),"=r" (d4), "=r"(d5) 2018c2ecf20Sopenharmony_ci :"0" (n), 2028c2ecf20Sopenharmony_ci "1" (src), 2038c2ecf20Sopenharmony_ci "2" (dest) 2048c2ecf20Sopenharmony_ci :"memory"); 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci return ret; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci} 2098c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memmove); 210