18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *	MMX 3DNow! library helper functions
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *	To do:
68c2ecf20Sopenharmony_ci *	We can use MMX just for prefetch in IRQ's. This may be a win.
78c2ecf20Sopenharmony_ci *		(reported so on K6-III)
88c2ecf20Sopenharmony_ci *	We should use a better code neutral filler for the short jump
98c2ecf20Sopenharmony_ci *		leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
108c2ecf20Sopenharmony_ci *	We also want to clobber the filler register so we don't get any
118c2ecf20Sopenharmony_ci *		register forwarding stalls on the filler.
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci *	Add *user handling. Checksums are not a win with MMX on any CPU
148c2ecf20Sopenharmony_ci *	tested so far for any MMX solution figured.
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci *	22/09/2000 - Arjan van de Ven
178c2ecf20Sopenharmony_ci *		Improved for non-egineering-sample Athlons
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci */
208c2ecf20Sopenharmony_ci#include <linux/hardirq.h>
218c2ecf20Sopenharmony_ci#include <linux/string.h>
228c2ecf20Sopenharmony_ci#include <linux/export.h>
238c2ecf20Sopenharmony_ci#include <linux/sched.h>
248c2ecf20Sopenharmony_ci#include <linux/types.h>
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci#include <asm/fpu/api.h>
278c2ecf20Sopenharmony_ci#include <asm/asm.h>
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci/*
308c2ecf20Sopenharmony_ci * Use KFPU_387.  MMX instructions are not affected by MXCSR,
318c2ecf20Sopenharmony_ci * but both AMD and Intel documentation states that even integer MMX
328c2ecf20Sopenharmony_ci * operations will result in #MF if an exception is pending in FCW.
338c2ecf20Sopenharmony_ci *
348c2ecf20Sopenharmony_ci * EMMS is not needed afterwards because, after calling kernel_fpu_end(),
358c2ecf20Sopenharmony_ci * any subsequent user of the 387 stack will reinitialize it using
368c2ecf20Sopenharmony_ci * KFPU_387.
378c2ecf20Sopenharmony_ci */
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_civoid *_mmx_memcpy(void *to, const void *from, size_t len)
408c2ecf20Sopenharmony_ci{
418c2ecf20Sopenharmony_ci	void *p;
428c2ecf20Sopenharmony_ci	int i;
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci	if (unlikely(in_interrupt()))
458c2ecf20Sopenharmony_ci		return __memcpy(to, from, len);
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	p = to;
488c2ecf20Sopenharmony_ci	i = len >> 6; /* len/64 */
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	kernel_fpu_begin_mask(KFPU_387);
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	__asm__ __volatile__ (
538c2ecf20Sopenharmony_ci		"1: prefetch (%0)\n"		/* This set is 28 bytes */
548c2ecf20Sopenharmony_ci		"   prefetch 64(%0)\n"
558c2ecf20Sopenharmony_ci		"   prefetch 128(%0)\n"
568c2ecf20Sopenharmony_ci		"   prefetch 192(%0)\n"
578c2ecf20Sopenharmony_ci		"   prefetch 256(%0)\n"
588c2ecf20Sopenharmony_ci		"2:  \n"
598c2ecf20Sopenharmony_ci		".section .fixup, \"ax\"\n"
608c2ecf20Sopenharmony_ci		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
618c2ecf20Sopenharmony_ci		"   jmp 2b\n"
628c2ecf20Sopenharmony_ci		".previous\n"
638c2ecf20Sopenharmony_ci			_ASM_EXTABLE(1b, 3b)
648c2ecf20Sopenharmony_ci			: : "r" (from));
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	for ( ; i > 5; i--) {
678c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
688c2ecf20Sopenharmony_ci		"1:  prefetch 320(%0)\n"
698c2ecf20Sopenharmony_ci		"2:  movq (%0), %%mm0\n"
708c2ecf20Sopenharmony_ci		"  movq 8(%0), %%mm1\n"
718c2ecf20Sopenharmony_ci		"  movq 16(%0), %%mm2\n"
728c2ecf20Sopenharmony_ci		"  movq 24(%0), %%mm3\n"
738c2ecf20Sopenharmony_ci		"  movq %%mm0, (%1)\n"
748c2ecf20Sopenharmony_ci		"  movq %%mm1, 8(%1)\n"
758c2ecf20Sopenharmony_ci		"  movq %%mm2, 16(%1)\n"
768c2ecf20Sopenharmony_ci		"  movq %%mm3, 24(%1)\n"
778c2ecf20Sopenharmony_ci		"  movq 32(%0), %%mm0\n"
788c2ecf20Sopenharmony_ci		"  movq 40(%0), %%mm1\n"
798c2ecf20Sopenharmony_ci		"  movq 48(%0), %%mm2\n"
808c2ecf20Sopenharmony_ci		"  movq 56(%0), %%mm3\n"
818c2ecf20Sopenharmony_ci		"  movq %%mm0, 32(%1)\n"
828c2ecf20Sopenharmony_ci		"  movq %%mm1, 40(%1)\n"
838c2ecf20Sopenharmony_ci		"  movq %%mm2, 48(%1)\n"
848c2ecf20Sopenharmony_ci		"  movq %%mm3, 56(%1)\n"
858c2ecf20Sopenharmony_ci		".section .fixup, \"ax\"\n"
868c2ecf20Sopenharmony_ci		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
878c2ecf20Sopenharmony_ci		"   jmp 2b\n"
888c2ecf20Sopenharmony_ci		".previous\n"
898c2ecf20Sopenharmony_ci			_ASM_EXTABLE(1b, 3b)
908c2ecf20Sopenharmony_ci			: : "r" (from), "r" (to) : "memory");
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci		from += 64;
938c2ecf20Sopenharmony_ci		to += 64;
948c2ecf20Sopenharmony_ci	}
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci	for ( ; i > 0; i--) {
978c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
988c2ecf20Sopenharmony_ci		"  movq (%0), %%mm0\n"
998c2ecf20Sopenharmony_ci		"  movq 8(%0), %%mm1\n"
1008c2ecf20Sopenharmony_ci		"  movq 16(%0), %%mm2\n"
1018c2ecf20Sopenharmony_ci		"  movq 24(%0), %%mm3\n"
1028c2ecf20Sopenharmony_ci		"  movq %%mm0, (%1)\n"
1038c2ecf20Sopenharmony_ci		"  movq %%mm1, 8(%1)\n"
1048c2ecf20Sopenharmony_ci		"  movq %%mm2, 16(%1)\n"
1058c2ecf20Sopenharmony_ci		"  movq %%mm3, 24(%1)\n"
1068c2ecf20Sopenharmony_ci		"  movq 32(%0), %%mm0\n"
1078c2ecf20Sopenharmony_ci		"  movq 40(%0), %%mm1\n"
1088c2ecf20Sopenharmony_ci		"  movq 48(%0), %%mm2\n"
1098c2ecf20Sopenharmony_ci		"  movq 56(%0), %%mm3\n"
1108c2ecf20Sopenharmony_ci		"  movq %%mm0, 32(%1)\n"
1118c2ecf20Sopenharmony_ci		"  movq %%mm1, 40(%1)\n"
1128c2ecf20Sopenharmony_ci		"  movq %%mm2, 48(%1)\n"
1138c2ecf20Sopenharmony_ci		"  movq %%mm3, 56(%1)\n"
1148c2ecf20Sopenharmony_ci			: : "r" (from), "r" (to) : "memory");
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci		from += 64;
1178c2ecf20Sopenharmony_ci		to += 64;
1188c2ecf20Sopenharmony_ci	}
1198c2ecf20Sopenharmony_ci	/*
1208c2ecf20Sopenharmony_ci	 * Now do the tail of the block:
1218c2ecf20Sopenharmony_ci	 */
1228c2ecf20Sopenharmony_ci	__memcpy(to, from, len & 63);
1238c2ecf20Sopenharmony_ci	kernel_fpu_end();
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	return p;
1268c2ecf20Sopenharmony_ci}
1278c2ecf20Sopenharmony_ciEXPORT_SYMBOL(_mmx_memcpy);
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci#ifdef CONFIG_MK7
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci/*
1328c2ecf20Sopenharmony_ci *	The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
1338c2ecf20Sopenharmony_ci *	other MMX using processors do not.
1348c2ecf20Sopenharmony_ci */
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_cistatic void fast_clear_page(void *page)
1378c2ecf20Sopenharmony_ci{
1388c2ecf20Sopenharmony_ci	int i;
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	kernel_fpu_begin_mask(KFPU_387);
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci	__asm__ __volatile__ (
1438c2ecf20Sopenharmony_ci		"  pxor %%mm0, %%mm0\n" : :
1448c2ecf20Sopenharmony_ci	);
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	for (i = 0; i < 4096/64; i++) {
1478c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
1488c2ecf20Sopenharmony_ci		"  movntq %%mm0, (%0)\n"
1498c2ecf20Sopenharmony_ci		"  movntq %%mm0, 8(%0)\n"
1508c2ecf20Sopenharmony_ci		"  movntq %%mm0, 16(%0)\n"
1518c2ecf20Sopenharmony_ci		"  movntq %%mm0, 24(%0)\n"
1528c2ecf20Sopenharmony_ci		"  movntq %%mm0, 32(%0)\n"
1538c2ecf20Sopenharmony_ci		"  movntq %%mm0, 40(%0)\n"
1548c2ecf20Sopenharmony_ci		"  movntq %%mm0, 48(%0)\n"
1558c2ecf20Sopenharmony_ci		"  movntq %%mm0, 56(%0)\n"
1568c2ecf20Sopenharmony_ci		: : "r" (page) : "memory");
1578c2ecf20Sopenharmony_ci		page += 64;
1588c2ecf20Sopenharmony_ci	}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	/*
1618c2ecf20Sopenharmony_ci	 * Since movntq is weakly-ordered, a "sfence" is needed to become
1628c2ecf20Sopenharmony_ci	 * ordered again:
1638c2ecf20Sopenharmony_ci	 */
1648c2ecf20Sopenharmony_ci	__asm__ __volatile__("sfence\n"::);
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	kernel_fpu_end();
1678c2ecf20Sopenharmony_ci}
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_cistatic void fast_copy_page(void *to, void *from)
1708c2ecf20Sopenharmony_ci{
1718c2ecf20Sopenharmony_ci	int i;
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci	kernel_fpu_begin_mask(KFPU_387);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	/*
1768c2ecf20Sopenharmony_ci	 * maybe the prefetch stuff can go before the expensive fnsave...
1778c2ecf20Sopenharmony_ci	 * but that is for later. -AV
1788c2ecf20Sopenharmony_ci	 */
1798c2ecf20Sopenharmony_ci	__asm__ __volatile__(
1808c2ecf20Sopenharmony_ci		"1: prefetch (%0)\n"
1818c2ecf20Sopenharmony_ci		"   prefetch 64(%0)\n"
1828c2ecf20Sopenharmony_ci		"   prefetch 128(%0)\n"
1838c2ecf20Sopenharmony_ci		"   prefetch 192(%0)\n"
1848c2ecf20Sopenharmony_ci		"   prefetch 256(%0)\n"
1858c2ecf20Sopenharmony_ci		"2:  \n"
1868c2ecf20Sopenharmony_ci		".section .fixup, \"ax\"\n"
1878c2ecf20Sopenharmony_ci		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
1888c2ecf20Sopenharmony_ci		"   jmp 2b\n"
1898c2ecf20Sopenharmony_ci		".previous\n"
1908c2ecf20Sopenharmony_ci			_ASM_EXTABLE(1b, 3b) : : "r" (from));
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	for (i = 0; i < (4096-320)/64; i++) {
1938c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
1948c2ecf20Sopenharmony_ci		"1: prefetch 320(%0)\n"
1958c2ecf20Sopenharmony_ci		"2: movq (%0), %%mm0\n"
1968c2ecf20Sopenharmony_ci		"   movntq %%mm0, (%1)\n"
1978c2ecf20Sopenharmony_ci		"   movq 8(%0), %%mm1\n"
1988c2ecf20Sopenharmony_ci		"   movntq %%mm1, 8(%1)\n"
1998c2ecf20Sopenharmony_ci		"   movq 16(%0), %%mm2\n"
2008c2ecf20Sopenharmony_ci		"   movntq %%mm2, 16(%1)\n"
2018c2ecf20Sopenharmony_ci		"   movq 24(%0), %%mm3\n"
2028c2ecf20Sopenharmony_ci		"   movntq %%mm3, 24(%1)\n"
2038c2ecf20Sopenharmony_ci		"   movq 32(%0), %%mm4\n"
2048c2ecf20Sopenharmony_ci		"   movntq %%mm4, 32(%1)\n"
2058c2ecf20Sopenharmony_ci		"   movq 40(%0), %%mm5\n"
2068c2ecf20Sopenharmony_ci		"   movntq %%mm5, 40(%1)\n"
2078c2ecf20Sopenharmony_ci		"   movq 48(%0), %%mm6\n"
2088c2ecf20Sopenharmony_ci		"   movntq %%mm6, 48(%1)\n"
2098c2ecf20Sopenharmony_ci		"   movq 56(%0), %%mm7\n"
2108c2ecf20Sopenharmony_ci		"   movntq %%mm7, 56(%1)\n"
2118c2ecf20Sopenharmony_ci		".section .fixup, \"ax\"\n"
2128c2ecf20Sopenharmony_ci		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
2138c2ecf20Sopenharmony_ci		"   jmp 2b\n"
2148c2ecf20Sopenharmony_ci		".previous\n"
2158c2ecf20Sopenharmony_ci		_ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci		from += 64;
2188c2ecf20Sopenharmony_ci		to += 64;
2198c2ecf20Sopenharmony_ci	}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	for (i = (4096-320)/64; i < 4096/64; i++) {
2228c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
2238c2ecf20Sopenharmony_ci		"2: movq (%0), %%mm0\n"
2248c2ecf20Sopenharmony_ci		"   movntq %%mm0, (%1)\n"
2258c2ecf20Sopenharmony_ci		"   movq 8(%0), %%mm1\n"
2268c2ecf20Sopenharmony_ci		"   movntq %%mm1, 8(%1)\n"
2278c2ecf20Sopenharmony_ci		"   movq 16(%0), %%mm2\n"
2288c2ecf20Sopenharmony_ci		"   movntq %%mm2, 16(%1)\n"
2298c2ecf20Sopenharmony_ci		"   movq 24(%0), %%mm3\n"
2308c2ecf20Sopenharmony_ci		"   movntq %%mm3, 24(%1)\n"
2318c2ecf20Sopenharmony_ci		"   movq 32(%0), %%mm4\n"
2328c2ecf20Sopenharmony_ci		"   movntq %%mm4, 32(%1)\n"
2338c2ecf20Sopenharmony_ci		"   movq 40(%0), %%mm5\n"
2348c2ecf20Sopenharmony_ci		"   movntq %%mm5, 40(%1)\n"
2358c2ecf20Sopenharmony_ci		"   movq 48(%0), %%mm6\n"
2368c2ecf20Sopenharmony_ci		"   movntq %%mm6, 48(%1)\n"
2378c2ecf20Sopenharmony_ci		"   movq 56(%0), %%mm7\n"
2388c2ecf20Sopenharmony_ci		"   movntq %%mm7, 56(%1)\n"
2398c2ecf20Sopenharmony_ci			: : "r" (from), "r" (to) : "memory");
2408c2ecf20Sopenharmony_ci		from += 64;
2418c2ecf20Sopenharmony_ci		to += 64;
2428c2ecf20Sopenharmony_ci	}
2438c2ecf20Sopenharmony_ci	/*
2448c2ecf20Sopenharmony_ci	 * Since movntq is weakly-ordered, a "sfence" is needed to become
2458c2ecf20Sopenharmony_ci	 * ordered again:
2468c2ecf20Sopenharmony_ci	 */
2478c2ecf20Sopenharmony_ci	__asm__ __volatile__("sfence \n"::);
2488c2ecf20Sopenharmony_ci	kernel_fpu_end();
2498c2ecf20Sopenharmony_ci}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci#else /* CONFIG_MK7 */
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci/*
2548c2ecf20Sopenharmony_ci *	Generic MMX implementation without K7 specific streaming
2558c2ecf20Sopenharmony_ci */
2568c2ecf20Sopenharmony_cistatic void fast_clear_page(void *page)
2578c2ecf20Sopenharmony_ci{
2588c2ecf20Sopenharmony_ci	int i;
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci	kernel_fpu_begin_mask(KFPU_387);
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	__asm__ __volatile__ (
2638c2ecf20Sopenharmony_ci		"  pxor %%mm0, %%mm0\n" : :
2648c2ecf20Sopenharmony_ci	);
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	for (i = 0; i < 4096/128; i++) {
2678c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
2688c2ecf20Sopenharmony_ci		"  movq %%mm0, (%0)\n"
2698c2ecf20Sopenharmony_ci		"  movq %%mm0, 8(%0)\n"
2708c2ecf20Sopenharmony_ci		"  movq %%mm0, 16(%0)\n"
2718c2ecf20Sopenharmony_ci		"  movq %%mm0, 24(%0)\n"
2728c2ecf20Sopenharmony_ci		"  movq %%mm0, 32(%0)\n"
2738c2ecf20Sopenharmony_ci		"  movq %%mm0, 40(%0)\n"
2748c2ecf20Sopenharmony_ci		"  movq %%mm0, 48(%0)\n"
2758c2ecf20Sopenharmony_ci		"  movq %%mm0, 56(%0)\n"
2768c2ecf20Sopenharmony_ci		"  movq %%mm0, 64(%0)\n"
2778c2ecf20Sopenharmony_ci		"  movq %%mm0, 72(%0)\n"
2788c2ecf20Sopenharmony_ci		"  movq %%mm0, 80(%0)\n"
2798c2ecf20Sopenharmony_ci		"  movq %%mm0, 88(%0)\n"
2808c2ecf20Sopenharmony_ci		"  movq %%mm0, 96(%0)\n"
2818c2ecf20Sopenharmony_ci		"  movq %%mm0, 104(%0)\n"
2828c2ecf20Sopenharmony_ci		"  movq %%mm0, 112(%0)\n"
2838c2ecf20Sopenharmony_ci		"  movq %%mm0, 120(%0)\n"
2848c2ecf20Sopenharmony_ci			: : "r" (page) : "memory");
2858c2ecf20Sopenharmony_ci		page += 128;
2868c2ecf20Sopenharmony_ci	}
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci	kernel_fpu_end();
2898c2ecf20Sopenharmony_ci}
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_cistatic void fast_copy_page(void *to, void *from)
2928c2ecf20Sopenharmony_ci{
2938c2ecf20Sopenharmony_ci	int i;
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci	kernel_fpu_begin_mask(KFPU_387);
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci	__asm__ __volatile__ (
2988c2ecf20Sopenharmony_ci		"1: prefetch (%0)\n"
2998c2ecf20Sopenharmony_ci		"   prefetch 64(%0)\n"
3008c2ecf20Sopenharmony_ci		"   prefetch 128(%0)\n"
3018c2ecf20Sopenharmony_ci		"   prefetch 192(%0)\n"
3028c2ecf20Sopenharmony_ci		"   prefetch 256(%0)\n"
3038c2ecf20Sopenharmony_ci		"2:  \n"
3048c2ecf20Sopenharmony_ci		".section .fixup, \"ax\"\n"
3058c2ecf20Sopenharmony_ci		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
3068c2ecf20Sopenharmony_ci		"   jmp 2b\n"
3078c2ecf20Sopenharmony_ci		".previous\n"
3088c2ecf20Sopenharmony_ci			_ASM_EXTABLE(1b, 3b) : : "r" (from));
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	for (i = 0; i < 4096/64; i++) {
3118c2ecf20Sopenharmony_ci		__asm__ __volatile__ (
3128c2ecf20Sopenharmony_ci		"1: prefetch 320(%0)\n"
3138c2ecf20Sopenharmony_ci		"2: movq (%0), %%mm0\n"
3148c2ecf20Sopenharmony_ci		"   movq 8(%0), %%mm1\n"
3158c2ecf20Sopenharmony_ci		"   movq 16(%0), %%mm2\n"
3168c2ecf20Sopenharmony_ci		"   movq 24(%0), %%mm3\n"
3178c2ecf20Sopenharmony_ci		"   movq %%mm0, (%1)\n"
3188c2ecf20Sopenharmony_ci		"   movq %%mm1, 8(%1)\n"
3198c2ecf20Sopenharmony_ci		"   movq %%mm2, 16(%1)\n"
3208c2ecf20Sopenharmony_ci		"   movq %%mm3, 24(%1)\n"
3218c2ecf20Sopenharmony_ci		"   movq 32(%0), %%mm0\n"
3228c2ecf20Sopenharmony_ci		"   movq 40(%0), %%mm1\n"
3238c2ecf20Sopenharmony_ci		"   movq 48(%0), %%mm2\n"
3248c2ecf20Sopenharmony_ci		"   movq 56(%0), %%mm3\n"
3258c2ecf20Sopenharmony_ci		"   movq %%mm0, 32(%1)\n"
3268c2ecf20Sopenharmony_ci		"   movq %%mm1, 40(%1)\n"
3278c2ecf20Sopenharmony_ci		"   movq %%mm2, 48(%1)\n"
3288c2ecf20Sopenharmony_ci		"   movq %%mm3, 56(%1)\n"
3298c2ecf20Sopenharmony_ci		".section .fixup, \"ax\"\n"
3308c2ecf20Sopenharmony_ci		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
3318c2ecf20Sopenharmony_ci		"   jmp 2b\n"
3328c2ecf20Sopenharmony_ci		".previous\n"
3338c2ecf20Sopenharmony_ci			_ASM_EXTABLE(1b, 3b)
3348c2ecf20Sopenharmony_ci			: : "r" (from), "r" (to) : "memory");
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci		from += 64;
3378c2ecf20Sopenharmony_ci		to += 64;
3388c2ecf20Sopenharmony_ci	}
3398c2ecf20Sopenharmony_ci	kernel_fpu_end();
3408c2ecf20Sopenharmony_ci}
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci#endif /* !CONFIG_MK7 */
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci/*
3458c2ecf20Sopenharmony_ci * Favour MMX for page clear and copy:
3468c2ecf20Sopenharmony_ci */
3478c2ecf20Sopenharmony_cistatic void slow_zero_page(void *page)
3488c2ecf20Sopenharmony_ci{
3498c2ecf20Sopenharmony_ci	int d0, d1;
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci	__asm__ __volatile__(
3528c2ecf20Sopenharmony_ci		"cld\n\t"
3538c2ecf20Sopenharmony_ci		"rep ; stosl"
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci			: "=&c" (d0), "=&D" (d1)
3568c2ecf20Sopenharmony_ci			:"a" (0), "1" (page), "0" (1024)
3578c2ecf20Sopenharmony_ci			:"memory");
3588c2ecf20Sopenharmony_ci}
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_civoid mmx_clear_page(void *page)
3618c2ecf20Sopenharmony_ci{
3628c2ecf20Sopenharmony_ci	if (unlikely(in_interrupt()))
3638c2ecf20Sopenharmony_ci		slow_zero_page(page);
3648c2ecf20Sopenharmony_ci	else
3658c2ecf20Sopenharmony_ci		fast_clear_page(page);
3668c2ecf20Sopenharmony_ci}
3678c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mmx_clear_page);
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_cistatic void slow_copy_page(void *to, void *from)
3708c2ecf20Sopenharmony_ci{
3718c2ecf20Sopenharmony_ci	int d0, d1, d2;
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	__asm__ __volatile__(
3748c2ecf20Sopenharmony_ci		"cld\n\t"
3758c2ecf20Sopenharmony_ci		"rep ; movsl"
3768c2ecf20Sopenharmony_ci		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
3778c2ecf20Sopenharmony_ci		: "0" (1024), "1" ((long) to), "2" ((long) from)
3788c2ecf20Sopenharmony_ci		: "memory");
3798c2ecf20Sopenharmony_ci}
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_civoid mmx_copy_page(void *to, void *from)
3828c2ecf20Sopenharmony_ci{
3838c2ecf20Sopenharmony_ci	if (unlikely(in_interrupt()))
3848c2ecf20Sopenharmony_ci		slow_copy_page(to, from);
3858c2ecf20Sopenharmony_ci	else
3868c2ecf20Sopenharmony_ci		fast_copy_page(to, from);
3878c2ecf20Sopenharmony_ci}
3888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mmx_copy_page);
389