18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci#include <linux/string.h>
38c2ecf20Sopenharmony_ci#include <linux/export.h>
48c2ecf20Sopenharmony_ci
58c2ecf20Sopenharmony_ci#undef memcpy
68c2ecf20Sopenharmony_ci#undef memset
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci__visible void *memcpy(void *to, const void *from, size_t n)
98c2ecf20Sopenharmony_ci{
108c2ecf20Sopenharmony_ci#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE)
118c2ecf20Sopenharmony_ci	return __memcpy3d(to, from, n);
128c2ecf20Sopenharmony_ci#else
138c2ecf20Sopenharmony_ci	return __memcpy(to, from, n);
148c2ecf20Sopenharmony_ci#endif
158c2ecf20Sopenharmony_ci}
168c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memcpy);
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci__visible void *memset(void *s, int c, size_t count)
198c2ecf20Sopenharmony_ci{
208c2ecf20Sopenharmony_ci	return __memset(s, c, count);
218c2ecf20Sopenharmony_ci}
228c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memset);
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci__visible void *memmove(void *dest, const void *src, size_t n)
258c2ecf20Sopenharmony_ci{
268c2ecf20Sopenharmony_ci	int d0,d1,d2,d3,d4,d5;
278c2ecf20Sopenharmony_ci	char *ret = dest;
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	__asm__ __volatile__(
308c2ecf20Sopenharmony_ci		/* Handle more 16 bytes in loop */
318c2ecf20Sopenharmony_ci		"cmp $0x10, %0\n\t"
328c2ecf20Sopenharmony_ci		"jb	1f\n\t"
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci		/* Decide forward/backward copy mode */
358c2ecf20Sopenharmony_ci		"cmp %2, %1\n\t"
368c2ecf20Sopenharmony_ci		"jb	2f\n\t"
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci		/*
398c2ecf20Sopenharmony_ci		 * movs instruction have many startup latency
408c2ecf20Sopenharmony_ci		 * so we handle small size by general register.
418c2ecf20Sopenharmony_ci		 */
428c2ecf20Sopenharmony_ci		"cmp  $680, %0\n\t"
438c2ecf20Sopenharmony_ci		"jb 3f\n\t"
448c2ecf20Sopenharmony_ci		/*
458c2ecf20Sopenharmony_ci		 * movs instruction is only good for aligned case.
468c2ecf20Sopenharmony_ci		 */
478c2ecf20Sopenharmony_ci		"mov %1, %3\n\t"
488c2ecf20Sopenharmony_ci		"xor %2, %3\n\t"
498c2ecf20Sopenharmony_ci		"and $0xff, %3\n\t"
508c2ecf20Sopenharmony_ci		"jz 4f\n\t"
518c2ecf20Sopenharmony_ci		"3:\n\t"
528c2ecf20Sopenharmony_ci		"sub $0x10, %0\n\t"
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci		/*
558c2ecf20Sopenharmony_ci		 * We gobble 16 bytes forward in each loop.
568c2ecf20Sopenharmony_ci		 */
578c2ecf20Sopenharmony_ci		"3:\n\t"
588c2ecf20Sopenharmony_ci		"sub $0x10, %0\n\t"
598c2ecf20Sopenharmony_ci		"mov 0*4(%1), %3\n\t"
608c2ecf20Sopenharmony_ci		"mov 1*4(%1), %4\n\t"
618c2ecf20Sopenharmony_ci		"mov  %3, 0*4(%2)\n\t"
628c2ecf20Sopenharmony_ci		"mov  %4, 1*4(%2)\n\t"
638c2ecf20Sopenharmony_ci		"mov 2*4(%1), %3\n\t"
648c2ecf20Sopenharmony_ci		"mov 3*4(%1), %4\n\t"
658c2ecf20Sopenharmony_ci		"mov  %3, 2*4(%2)\n\t"
668c2ecf20Sopenharmony_ci		"mov  %4, 3*4(%2)\n\t"
678c2ecf20Sopenharmony_ci		"lea  0x10(%1), %1\n\t"
688c2ecf20Sopenharmony_ci		"lea  0x10(%2), %2\n\t"
698c2ecf20Sopenharmony_ci		"jae 3b\n\t"
708c2ecf20Sopenharmony_ci		"add $0x10, %0\n\t"
718c2ecf20Sopenharmony_ci		"jmp 1f\n\t"
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci		/*
748c2ecf20Sopenharmony_ci		 * Handle data forward by movs.
758c2ecf20Sopenharmony_ci		 */
768c2ecf20Sopenharmony_ci		".p2align 4\n\t"
778c2ecf20Sopenharmony_ci		"4:\n\t"
788c2ecf20Sopenharmony_ci		"mov -4(%1, %0), %3\n\t"
798c2ecf20Sopenharmony_ci		"lea -4(%2, %0), %4\n\t"
808c2ecf20Sopenharmony_ci		"shr $2, %0\n\t"
818c2ecf20Sopenharmony_ci		"rep movsl\n\t"
828c2ecf20Sopenharmony_ci		"mov %3, (%4)\n\t"
838c2ecf20Sopenharmony_ci		"jmp 11f\n\t"
848c2ecf20Sopenharmony_ci		/*
858c2ecf20Sopenharmony_ci		 * Handle data backward by movs.
868c2ecf20Sopenharmony_ci		 */
878c2ecf20Sopenharmony_ci		".p2align 4\n\t"
888c2ecf20Sopenharmony_ci		"6:\n\t"
898c2ecf20Sopenharmony_ci		"mov (%1), %3\n\t"
908c2ecf20Sopenharmony_ci		"mov %2, %4\n\t"
918c2ecf20Sopenharmony_ci		"lea -4(%1, %0), %1\n\t"
928c2ecf20Sopenharmony_ci		"lea -4(%2, %0), %2\n\t"
938c2ecf20Sopenharmony_ci		"shr $2, %0\n\t"
948c2ecf20Sopenharmony_ci		"std\n\t"
958c2ecf20Sopenharmony_ci		"rep movsl\n\t"
968c2ecf20Sopenharmony_ci		"mov %3,(%4)\n\t"
978c2ecf20Sopenharmony_ci		"cld\n\t"
988c2ecf20Sopenharmony_ci		"jmp 11f\n\t"
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci		/*
1018c2ecf20Sopenharmony_ci		 * Start to prepare for backward copy.
1028c2ecf20Sopenharmony_ci		 */
1038c2ecf20Sopenharmony_ci		".p2align 4\n\t"
1048c2ecf20Sopenharmony_ci		"2:\n\t"
1058c2ecf20Sopenharmony_ci		"cmp  $680, %0\n\t"
1068c2ecf20Sopenharmony_ci		"jb 5f\n\t"
1078c2ecf20Sopenharmony_ci		"mov %1, %3\n\t"
1088c2ecf20Sopenharmony_ci		"xor %2, %3\n\t"
1098c2ecf20Sopenharmony_ci		"and $0xff, %3\n\t"
1108c2ecf20Sopenharmony_ci		"jz 6b\n\t"
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci		/*
1138c2ecf20Sopenharmony_ci		 * Calculate copy position to tail.
1148c2ecf20Sopenharmony_ci		 */
1158c2ecf20Sopenharmony_ci		"5:\n\t"
1168c2ecf20Sopenharmony_ci		"add %0, %1\n\t"
1178c2ecf20Sopenharmony_ci		"add %0, %2\n\t"
1188c2ecf20Sopenharmony_ci		"sub $0x10, %0\n\t"
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci		/*
1218c2ecf20Sopenharmony_ci		 * We gobble 16 bytes backward in each loop.
1228c2ecf20Sopenharmony_ci		 */
1238c2ecf20Sopenharmony_ci		"7:\n\t"
1248c2ecf20Sopenharmony_ci		"sub $0x10, %0\n\t"
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci		"mov -1*4(%1), %3\n\t"
1278c2ecf20Sopenharmony_ci		"mov -2*4(%1), %4\n\t"
1288c2ecf20Sopenharmony_ci		"mov  %3, -1*4(%2)\n\t"
1298c2ecf20Sopenharmony_ci		"mov  %4, -2*4(%2)\n\t"
1308c2ecf20Sopenharmony_ci		"mov -3*4(%1), %3\n\t"
1318c2ecf20Sopenharmony_ci		"mov -4*4(%1), %4\n\t"
1328c2ecf20Sopenharmony_ci		"mov  %3, -3*4(%2)\n\t"
1338c2ecf20Sopenharmony_ci		"mov  %4, -4*4(%2)\n\t"
1348c2ecf20Sopenharmony_ci		"lea  -0x10(%1), %1\n\t"
1358c2ecf20Sopenharmony_ci		"lea  -0x10(%2), %2\n\t"
1368c2ecf20Sopenharmony_ci		"jae 7b\n\t"
1378c2ecf20Sopenharmony_ci		/*
1388c2ecf20Sopenharmony_ci		 * Calculate copy position to head.
1398c2ecf20Sopenharmony_ci		 */
1408c2ecf20Sopenharmony_ci		"add $0x10, %0\n\t"
1418c2ecf20Sopenharmony_ci		"sub %0, %1\n\t"
1428c2ecf20Sopenharmony_ci		"sub %0, %2\n\t"
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci		/*
1458c2ecf20Sopenharmony_ci		 * Move data from 8 bytes to 15 bytes.
1468c2ecf20Sopenharmony_ci		 */
1478c2ecf20Sopenharmony_ci		".p2align 4\n\t"
1488c2ecf20Sopenharmony_ci		"1:\n\t"
1498c2ecf20Sopenharmony_ci		"cmp $8, %0\n\t"
1508c2ecf20Sopenharmony_ci		"jb 8f\n\t"
1518c2ecf20Sopenharmony_ci		"mov 0*4(%1), %3\n\t"
1528c2ecf20Sopenharmony_ci		"mov 1*4(%1), %4\n\t"
1538c2ecf20Sopenharmony_ci		"mov -2*4(%1, %0), %5\n\t"
1548c2ecf20Sopenharmony_ci		"mov -1*4(%1, %0), %1\n\t"
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci		"mov  %3, 0*4(%2)\n\t"
1578c2ecf20Sopenharmony_ci		"mov  %4, 1*4(%2)\n\t"
1588c2ecf20Sopenharmony_ci		"mov  %5, -2*4(%2, %0)\n\t"
1598c2ecf20Sopenharmony_ci		"mov  %1, -1*4(%2, %0)\n\t"
1608c2ecf20Sopenharmony_ci		"jmp 11f\n\t"
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci		/*
1638c2ecf20Sopenharmony_ci		 * Move data from 4 bytes to 7 bytes.
1648c2ecf20Sopenharmony_ci		 */
1658c2ecf20Sopenharmony_ci		".p2align 4\n\t"
1668c2ecf20Sopenharmony_ci		"8:\n\t"
1678c2ecf20Sopenharmony_ci		"cmp $4, %0\n\t"
1688c2ecf20Sopenharmony_ci		"jb 9f\n\t"
1698c2ecf20Sopenharmony_ci		"mov 0*4(%1), %3\n\t"
1708c2ecf20Sopenharmony_ci		"mov -1*4(%1, %0), %4\n\t"
1718c2ecf20Sopenharmony_ci		"mov  %3, 0*4(%2)\n\t"
1728c2ecf20Sopenharmony_ci		"mov  %4, -1*4(%2, %0)\n\t"
1738c2ecf20Sopenharmony_ci		"jmp 11f\n\t"
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci		/*
1768c2ecf20Sopenharmony_ci		 * Move data from 2 bytes to 3 bytes.
1778c2ecf20Sopenharmony_ci		 */
1788c2ecf20Sopenharmony_ci		".p2align 4\n\t"
1798c2ecf20Sopenharmony_ci		"9:\n\t"
1808c2ecf20Sopenharmony_ci		"cmp $2, %0\n\t"
1818c2ecf20Sopenharmony_ci		"jb 10f\n\t"
1828c2ecf20Sopenharmony_ci		"movw 0*2(%1), %%dx\n\t"
1838c2ecf20Sopenharmony_ci		"movw -1*2(%1, %0), %%bx\n\t"
1848c2ecf20Sopenharmony_ci		"movw %%dx, 0*2(%2)\n\t"
1858c2ecf20Sopenharmony_ci		"movw %%bx, -1*2(%2, %0)\n\t"
1868c2ecf20Sopenharmony_ci		"jmp 11f\n\t"
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci		/*
1898c2ecf20Sopenharmony_ci		 * Move data for 1 byte.
1908c2ecf20Sopenharmony_ci		 */
1918c2ecf20Sopenharmony_ci		".p2align 4\n\t"
1928c2ecf20Sopenharmony_ci		"10:\n\t"
1938c2ecf20Sopenharmony_ci		"cmp $1, %0\n\t"
1948c2ecf20Sopenharmony_ci		"jb 11f\n\t"
1958c2ecf20Sopenharmony_ci		"movb (%1), %%cl\n\t"
1968c2ecf20Sopenharmony_ci		"movb %%cl, (%2)\n\t"
1978c2ecf20Sopenharmony_ci		".p2align 4\n\t"
1988c2ecf20Sopenharmony_ci		"11:"
1998c2ecf20Sopenharmony_ci		: "=&c" (d0), "=&S" (d1), "=&D" (d2),
2008c2ecf20Sopenharmony_ci		  "=r" (d3),"=r" (d4), "=r"(d5)
2018c2ecf20Sopenharmony_ci		:"0" (n),
2028c2ecf20Sopenharmony_ci		 "1" (src),
2038c2ecf20Sopenharmony_ci		 "2" (dest)
2048c2ecf20Sopenharmony_ci		:"memory");
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	return ret;
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci}
2098c2ecf20Sopenharmony_ciEXPORT_SYMBOL(memmove);
210