18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * MMX 3DNow! library helper functions 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * To do: 68c2ecf20Sopenharmony_ci * We can use MMX just for prefetch in IRQ's. This may be a win. 78c2ecf20Sopenharmony_ci * (reported so on K6-III) 88c2ecf20Sopenharmony_ci * We should use a better code neutral filler for the short jump 98c2ecf20Sopenharmony_ci * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? 108c2ecf20Sopenharmony_ci * We also want to clobber the filler register so we don't get any 118c2ecf20Sopenharmony_ci * register forwarding stalls on the filler. 128c2ecf20Sopenharmony_ci * 138c2ecf20Sopenharmony_ci * Add *user handling. Checksums are not a win with MMX on any CPU 148c2ecf20Sopenharmony_ci * tested so far for any MMX solution figured. 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * 22/09/2000 - Arjan van de Ven 178c2ecf20Sopenharmony_ci * Improved for non-egineering-sample Athlons 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci */ 208c2ecf20Sopenharmony_ci#include <linux/hardirq.h> 218c2ecf20Sopenharmony_ci#include <linux/string.h> 228c2ecf20Sopenharmony_ci#include <linux/export.h> 238c2ecf20Sopenharmony_ci#include <linux/sched.h> 248c2ecf20Sopenharmony_ci#include <linux/types.h> 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci#include <asm/fpu/api.h> 278c2ecf20Sopenharmony_ci#include <asm/asm.h> 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci/* 308c2ecf20Sopenharmony_ci * Use KFPU_387. MMX instructions are not affected by MXCSR, 318c2ecf20Sopenharmony_ci * but both AMD and Intel documentation states that even integer MMX 328c2ecf20Sopenharmony_ci * operations will result in #MF if an exception is pending in FCW. 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * EMMS is not needed afterwards because, after calling kernel_fpu_end(), 358c2ecf20Sopenharmony_ci * any subsequent user of the 387 stack will reinitialize it using 368c2ecf20Sopenharmony_ci * KFPU_387. 378c2ecf20Sopenharmony_ci */ 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_civoid *_mmx_memcpy(void *to, const void *from, size_t len) 408c2ecf20Sopenharmony_ci{ 418c2ecf20Sopenharmony_ci void *p; 428c2ecf20Sopenharmony_ci int i; 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci if (unlikely(in_interrupt())) 458c2ecf20Sopenharmony_ci return __memcpy(to, from, len); 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci p = to; 488c2ecf20Sopenharmony_ci i = len >> 6; /* len/64 */ 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci kernel_fpu_begin_mask(KFPU_387); 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 538c2ecf20Sopenharmony_ci "1: prefetch (%0)\n" /* This set is 28 bytes */ 548c2ecf20Sopenharmony_ci " prefetch 64(%0)\n" 558c2ecf20Sopenharmony_ci " prefetch 128(%0)\n" 568c2ecf20Sopenharmony_ci " prefetch 192(%0)\n" 578c2ecf20Sopenharmony_ci " prefetch 256(%0)\n" 588c2ecf20Sopenharmony_ci "2: \n" 598c2ecf20Sopenharmony_ci ".section .fixup, \"ax\"\n" 608c2ecf20Sopenharmony_ci "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 618c2ecf20Sopenharmony_ci " jmp 2b\n" 628c2ecf20Sopenharmony_ci ".previous\n" 638c2ecf20Sopenharmony_ci _ASM_EXTABLE(1b, 3b) 648c2ecf20Sopenharmony_ci : : "r" (from)); 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci for ( ; i > 5; i--) { 678c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 688c2ecf20Sopenharmony_ci "1: prefetch 320(%0)\n" 698c2ecf20Sopenharmony_ci "2: movq (%0), %%mm0\n" 708c2ecf20Sopenharmony_ci " movq 8(%0), %%mm1\n" 718c2ecf20Sopenharmony_ci " movq 16(%0), %%mm2\n" 728c2ecf20Sopenharmony_ci " movq 24(%0), %%mm3\n" 738c2ecf20Sopenharmony_ci " movq %%mm0, (%1)\n" 748c2ecf20Sopenharmony_ci " movq %%mm1, 8(%1)\n" 758c2ecf20Sopenharmony_ci " movq %%mm2, 16(%1)\n" 768c2ecf20Sopenharmony_ci " movq %%mm3, 24(%1)\n" 778c2ecf20Sopenharmony_ci " movq 32(%0), %%mm0\n" 788c2ecf20Sopenharmony_ci " movq 40(%0), %%mm1\n" 798c2ecf20Sopenharmony_ci " movq 48(%0), %%mm2\n" 808c2ecf20Sopenharmony_ci " movq 56(%0), %%mm3\n" 818c2ecf20Sopenharmony_ci " movq %%mm0, 32(%1)\n" 828c2ecf20Sopenharmony_ci " movq %%mm1, 40(%1)\n" 838c2ecf20Sopenharmony_ci " movq %%mm2, 48(%1)\n" 848c2ecf20Sopenharmony_ci " movq %%mm3, 56(%1)\n" 858c2ecf20Sopenharmony_ci ".section .fixup, \"ax\"\n" 868c2ecf20Sopenharmony_ci "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 878c2ecf20Sopenharmony_ci " jmp 2b\n" 888c2ecf20Sopenharmony_ci ".previous\n" 898c2ecf20Sopenharmony_ci _ASM_EXTABLE(1b, 3b) 908c2ecf20Sopenharmony_ci : : "r" (from), "r" (to) : "memory"); 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci from += 64; 938c2ecf20Sopenharmony_ci to += 64; 948c2ecf20Sopenharmony_ci } 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci for ( ; i > 0; i--) { 978c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 988c2ecf20Sopenharmony_ci " movq (%0), %%mm0\n" 998c2ecf20Sopenharmony_ci " movq 8(%0), %%mm1\n" 1008c2ecf20Sopenharmony_ci " movq 16(%0), %%mm2\n" 1018c2ecf20Sopenharmony_ci " movq 24(%0), %%mm3\n" 1028c2ecf20Sopenharmony_ci " movq %%mm0, (%1)\n" 1038c2ecf20Sopenharmony_ci " movq %%mm1, 8(%1)\n" 1048c2ecf20Sopenharmony_ci " movq %%mm2, 16(%1)\n" 1058c2ecf20Sopenharmony_ci " movq %%mm3, 24(%1)\n" 1068c2ecf20Sopenharmony_ci " movq 32(%0), %%mm0\n" 1078c2ecf20Sopenharmony_ci " movq 40(%0), %%mm1\n" 1088c2ecf20Sopenharmony_ci " movq 48(%0), %%mm2\n" 1098c2ecf20Sopenharmony_ci " movq 56(%0), %%mm3\n" 1108c2ecf20Sopenharmony_ci " movq %%mm0, 32(%1)\n" 1118c2ecf20Sopenharmony_ci " movq %%mm1, 40(%1)\n" 1128c2ecf20Sopenharmony_ci " movq %%mm2, 48(%1)\n" 1138c2ecf20Sopenharmony_ci " movq %%mm3, 56(%1)\n" 1148c2ecf20Sopenharmony_ci : : "r" (from), "r" (to) : "memory"); 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci from += 64; 1178c2ecf20Sopenharmony_ci to += 64; 1188c2ecf20Sopenharmony_ci } 1198c2ecf20Sopenharmony_ci /* 1208c2ecf20Sopenharmony_ci * Now do the tail of the block: 1218c2ecf20Sopenharmony_ci */ 1228c2ecf20Sopenharmony_ci __memcpy(to, from, len & 63); 1238c2ecf20Sopenharmony_ci kernel_fpu_end(); 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci return p; 1268c2ecf20Sopenharmony_ci} 1278c2ecf20Sopenharmony_ciEXPORT_SYMBOL(_mmx_memcpy); 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci#ifdef CONFIG_MK7 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci/* 1328c2ecf20Sopenharmony_ci * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and 1338c2ecf20Sopenharmony_ci * other MMX using processors do not. 1348c2ecf20Sopenharmony_ci */ 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_cistatic void fast_clear_page(void *page) 1378c2ecf20Sopenharmony_ci{ 1388c2ecf20Sopenharmony_ci int i; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci kernel_fpu_begin_mask(KFPU_387); 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 1438c2ecf20Sopenharmony_ci " pxor %%mm0, %%mm0\n" : : 1448c2ecf20Sopenharmony_ci ); 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci for (i = 0; i < 4096/64; i++) { 1478c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 1488c2ecf20Sopenharmony_ci " movntq %%mm0, (%0)\n" 1498c2ecf20Sopenharmony_ci " movntq %%mm0, 8(%0)\n" 1508c2ecf20Sopenharmony_ci " movntq %%mm0, 16(%0)\n" 1518c2ecf20Sopenharmony_ci " movntq %%mm0, 24(%0)\n" 1528c2ecf20Sopenharmony_ci " movntq %%mm0, 32(%0)\n" 1538c2ecf20Sopenharmony_ci " movntq %%mm0, 40(%0)\n" 1548c2ecf20Sopenharmony_ci " movntq %%mm0, 48(%0)\n" 1558c2ecf20Sopenharmony_ci " movntq %%mm0, 56(%0)\n" 1568c2ecf20Sopenharmony_ci : : "r" (page) : "memory"); 1578c2ecf20Sopenharmony_ci page += 64; 1588c2ecf20Sopenharmony_ci } 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci /* 1618c2ecf20Sopenharmony_ci * Since movntq is weakly-ordered, a "sfence" is needed to become 1628c2ecf20Sopenharmony_ci * ordered again: 1638c2ecf20Sopenharmony_ci */ 1648c2ecf20Sopenharmony_ci __asm__ __volatile__("sfence\n"::); 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci kernel_fpu_end(); 1678c2ecf20Sopenharmony_ci} 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_cistatic void fast_copy_page(void *to, void *from) 1708c2ecf20Sopenharmony_ci{ 1718c2ecf20Sopenharmony_ci int i; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci kernel_fpu_begin_mask(KFPU_387); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci /* 1768c2ecf20Sopenharmony_ci * maybe the prefetch stuff can go before the expensive fnsave... 1778c2ecf20Sopenharmony_ci * but that is for later. -AV 1788c2ecf20Sopenharmony_ci */ 1798c2ecf20Sopenharmony_ci __asm__ __volatile__( 1808c2ecf20Sopenharmony_ci "1: prefetch (%0)\n" 1818c2ecf20Sopenharmony_ci " prefetch 64(%0)\n" 1828c2ecf20Sopenharmony_ci " prefetch 128(%0)\n" 1838c2ecf20Sopenharmony_ci " prefetch 192(%0)\n" 1848c2ecf20Sopenharmony_ci " prefetch 256(%0)\n" 1858c2ecf20Sopenharmony_ci "2: \n" 1868c2ecf20Sopenharmony_ci ".section .fixup, \"ax\"\n" 1878c2ecf20Sopenharmony_ci "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 1888c2ecf20Sopenharmony_ci " jmp 2b\n" 1898c2ecf20Sopenharmony_ci ".previous\n" 1908c2ecf20Sopenharmony_ci _ASM_EXTABLE(1b, 3b) : : "r" (from)); 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci for (i = 0; i < (4096-320)/64; i++) { 1938c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 1948c2ecf20Sopenharmony_ci "1: prefetch 320(%0)\n" 1958c2ecf20Sopenharmony_ci "2: movq (%0), %%mm0\n" 1968c2ecf20Sopenharmony_ci " movntq %%mm0, (%1)\n" 1978c2ecf20Sopenharmony_ci " movq 8(%0), %%mm1\n" 1988c2ecf20Sopenharmony_ci " movntq %%mm1, 8(%1)\n" 1998c2ecf20Sopenharmony_ci " movq 16(%0), %%mm2\n" 2008c2ecf20Sopenharmony_ci " movntq %%mm2, 16(%1)\n" 2018c2ecf20Sopenharmony_ci " movq 24(%0), %%mm3\n" 2028c2ecf20Sopenharmony_ci " movntq %%mm3, 24(%1)\n" 2038c2ecf20Sopenharmony_ci " movq 32(%0), %%mm4\n" 2048c2ecf20Sopenharmony_ci " movntq %%mm4, 32(%1)\n" 2058c2ecf20Sopenharmony_ci " movq 40(%0), %%mm5\n" 2068c2ecf20Sopenharmony_ci " movntq %%mm5, 40(%1)\n" 2078c2ecf20Sopenharmony_ci " movq 48(%0), %%mm6\n" 2088c2ecf20Sopenharmony_ci " movntq %%mm6, 48(%1)\n" 2098c2ecf20Sopenharmony_ci " movq 56(%0), %%mm7\n" 2108c2ecf20Sopenharmony_ci " movntq %%mm7, 56(%1)\n" 2118c2ecf20Sopenharmony_ci ".section .fixup, \"ax\"\n" 2128c2ecf20Sopenharmony_ci "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 2138c2ecf20Sopenharmony_ci " jmp 2b\n" 2148c2ecf20Sopenharmony_ci ".previous\n" 2158c2ecf20Sopenharmony_ci _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory"); 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci from += 64; 2188c2ecf20Sopenharmony_ci to += 64; 2198c2ecf20Sopenharmony_ci } 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci for (i = (4096-320)/64; i < 4096/64; i++) { 2228c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 2238c2ecf20Sopenharmony_ci "2: movq (%0), %%mm0\n" 2248c2ecf20Sopenharmony_ci " movntq %%mm0, (%1)\n" 2258c2ecf20Sopenharmony_ci " movq 8(%0), %%mm1\n" 2268c2ecf20Sopenharmony_ci " movntq %%mm1, 8(%1)\n" 2278c2ecf20Sopenharmony_ci " movq 16(%0), %%mm2\n" 2288c2ecf20Sopenharmony_ci " movntq %%mm2, 16(%1)\n" 2298c2ecf20Sopenharmony_ci " movq 24(%0), %%mm3\n" 2308c2ecf20Sopenharmony_ci " movntq %%mm3, 24(%1)\n" 2318c2ecf20Sopenharmony_ci " movq 32(%0), %%mm4\n" 2328c2ecf20Sopenharmony_ci " movntq %%mm4, 32(%1)\n" 2338c2ecf20Sopenharmony_ci " movq 40(%0), %%mm5\n" 2348c2ecf20Sopenharmony_ci " movntq %%mm5, 40(%1)\n" 2358c2ecf20Sopenharmony_ci " movq 48(%0), %%mm6\n" 2368c2ecf20Sopenharmony_ci " movntq %%mm6, 48(%1)\n" 2378c2ecf20Sopenharmony_ci " movq 56(%0), %%mm7\n" 2388c2ecf20Sopenharmony_ci " movntq %%mm7, 56(%1)\n" 2398c2ecf20Sopenharmony_ci : : "r" (from), "r" (to) : "memory"); 2408c2ecf20Sopenharmony_ci from += 64; 2418c2ecf20Sopenharmony_ci to += 64; 2428c2ecf20Sopenharmony_ci } 2438c2ecf20Sopenharmony_ci /* 2448c2ecf20Sopenharmony_ci * Since movntq is weakly-ordered, a "sfence" is needed to become 2458c2ecf20Sopenharmony_ci * ordered again: 2468c2ecf20Sopenharmony_ci */ 2478c2ecf20Sopenharmony_ci __asm__ __volatile__("sfence \n"::); 2488c2ecf20Sopenharmony_ci kernel_fpu_end(); 2498c2ecf20Sopenharmony_ci} 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci#else /* CONFIG_MK7 */ 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci/* 2548c2ecf20Sopenharmony_ci * Generic MMX implementation without K7 specific streaming 2558c2ecf20Sopenharmony_ci */ 2568c2ecf20Sopenharmony_cistatic void fast_clear_page(void *page) 2578c2ecf20Sopenharmony_ci{ 2588c2ecf20Sopenharmony_ci int i; 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci kernel_fpu_begin_mask(KFPU_387); 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 2638c2ecf20Sopenharmony_ci " pxor %%mm0, %%mm0\n" : : 2648c2ecf20Sopenharmony_ci ); 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci for (i = 0; i < 4096/128; i++) { 2678c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 2688c2ecf20Sopenharmony_ci " movq %%mm0, (%0)\n" 2698c2ecf20Sopenharmony_ci " movq %%mm0, 8(%0)\n" 2708c2ecf20Sopenharmony_ci " movq %%mm0, 16(%0)\n" 2718c2ecf20Sopenharmony_ci " movq %%mm0, 24(%0)\n" 2728c2ecf20Sopenharmony_ci " movq %%mm0, 32(%0)\n" 2738c2ecf20Sopenharmony_ci " movq %%mm0, 40(%0)\n" 2748c2ecf20Sopenharmony_ci " movq %%mm0, 48(%0)\n" 2758c2ecf20Sopenharmony_ci " movq %%mm0, 56(%0)\n" 2768c2ecf20Sopenharmony_ci " movq %%mm0, 64(%0)\n" 2778c2ecf20Sopenharmony_ci " movq %%mm0, 72(%0)\n" 2788c2ecf20Sopenharmony_ci " movq %%mm0, 80(%0)\n" 2798c2ecf20Sopenharmony_ci " movq %%mm0, 88(%0)\n" 2808c2ecf20Sopenharmony_ci " movq %%mm0, 96(%0)\n" 2818c2ecf20Sopenharmony_ci " movq %%mm0, 104(%0)\n" 2828c2ecf20Sopenharmony_ci " movq %%mm0, 112(%0)\n" 2838c2ecf20Sopenharmony_ci " movq %%mm0, 120(%0)\n" 2848c2ecf20Sopenharmony_ci : : "r" (page) : "memory"); 2858c2ecf20Sopenharmony_ci page += 128; 2868c2ecf20Sopenharmony_ci } 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci kernel_fpu_end(); 2898c2ecf20Sopenharmony_ci} 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_cistatic void fast_copy_page(void *to, void *from) 2928c2ecf20Sopenharmony_ci{ 2938c2ecf20Sopenharmony_ci int i; 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci kernel_fpu_begin_mask(KFPU_387); 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 2988c2ecf20Sopenharmony_ci "1: prefetch (%0)\n" 2998c2ecf20Sopenharmony_ci " prefetch 64(%0)\n" 3008c2ecf20Sopenharmony_ci " prefetch 128(%0)\n" 3018c2ecf20Sopenharmony_ci " prefetch 192(%0)\n" 3028c2ecf20Sopenharmony_ci " prefetch 256(%0)\n" 3038c2ecf20Sopenharmony_ci "2: \n" 3048c2ecf20Sopenharmony_ci ".section .fixup, \"ax\"\n" 3058c2ecf20Sopenharmony_ci "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ 3068c2ecf20Sopenharmony_ci " jmp 2b\n" 3078c2ecf20Sopenharmony_ci ".previous\n" 3088c2ecf20Sopenharmony_ci _ASM_EXTABLE(1b, 3b) : : "r" (from)); 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci for (i = 0; i < 4096/64; i++) { 3118c2ecf20Sopenharmony_ci __asm__ __volatile__ ( 3128c2ecf20Sopenharmony_ci "1: prefetch 320(%0)\n" 3138c2ecf20Sopenharmony_ci "2: movq (%0), %%mm0\n" 3148c2ecf20Sopenharmony_ci " movq 8(%0), %%mm1\n" 3158c2ecf20Sopenharmony_ci " movq 16(%0), %%mm2\n" 3168c2ecf20Sopenharmony_ci " movq 24(%0), %%mm3\n" 3178c2ecf20Sopenharmony_ci " movq %%mm0, (%1)\n" 3188c2ecf20Sopenharmony_ci " movq %%mm1, 8(%1)\n" 3198c2ecf20Sopenharmony_ci " movq %%mm2, 16(%1)\n" 3208c2ecf20Sopenharmony_ci " movq %%mm3, 24(%1)\n" 3218c2ecf20Sopenharmony_ci " movq 32(%0), %%mm0\n" 3228c2ecf20Sopenharmony_ci " movq 40(%0), %%mm1\n" 3238c2ecf20Sopenharmony_ci " movq 48(%0), %%mm2\n" 3248c2ecf20Sopenharmony_ci " movq 56(%0), %%mm3\n" 3258c2ecf20Sopenharmony_ci " movq %%mm0, 32(%1)\n" 3268c2ecf20Sopenharmony_ci " movq %%mm1, 40(%1)\n" 3278c2ecf20Sopenharmony_ci " movq %%mm2, 48(%1)\n" 3288c2ecf20Sopenharmony_ci " movq %%mm3, 56(%1)\n" 3298c2ecf20Sopenharmony_ci ".section .fixup, \"ax\"\n" 3308c2ecf20Sopenharmony_ci "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ 3318c2ecf20Sopenharmony_ci " jmp 2b\n" 3328c2ecf20Sopenharmony_ci ".previous\n" 3338c2ecf20Sopenharmony_ci _ASM_EXTABLE(1b, 3b) 3348c2ecf20Sopenharmony_ci : : "r" (from), "r" (to) : "memory"); 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci from += 64; 3378c2ecf20Sopenharmony_ci to += 64; 3388c2ecf20Sopenharmony_ci } 3398c2ecf20Sopenharmony_ci kernel_fpu_end(); 3408c2ecf20Sopenharmony_ci} 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci#endif /* !CONFIG_MK7 */ 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci/* 3458c2ecf20Sopenharmony_ci * Favour MMX for page clear and copy: 3468c2ecf20Sopenharmony_ci */ 3478c2ecf20Sopenharmony_cistatic void slow_zero_page(void *page) 3488c2ecf20Sopenharmony_ci{ 3498c2ecf20Sopenharmony_ci int d0, d1; 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci __asm__ __volatile__( 3528c2ecf20Sopenharmony_ci "cld\n\t" 3538c2ecf20Sopenharmony_ci "rep ; stosl" 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci : "=&c" (d0), "=&D" (d1) 3568c2ecf20Sopenharmony_ci :"a" (0), "1" (page), "0" (1024) 3578c2ecf20Sopenharmony_ci :"memory"); 3588c2ecf20Sopenharmony_ci} 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_civoid mmx_clear_page(void *page) 3618c2ecf20Sopenharmony_ci{ 3628c2ecf20Sopenharmony_ci if (unlikely(in_interrupt())) 3638c2ecf20Sopenharmony_ci slow_zero_page(page); 3648c2ecf20Sopenharmony_ci else 3658c2ecf20Sopenharmony_ci fast_clear_page(page); 3668c2ecf20Sopenharmony_ci} 3678c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mmx_clear_page); 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_cistatic void slow_copy_page(void *to, void *from) 3708c2ecf20Sopenharmony_ci{ 3718c2ecf20Sopenharmony_ci int d0, d1, d2; 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci __asm__ __volatile__( 3748c2ecf20Sopenharmony_ci "cld\n\t" 3758c2ecf20Sopenharmony_ci "rep ; movsl" 3768c2ecf20Sopenharmony_ci : "=&c" (d0), "=&D" (d1), "=&S" (d2) 3778c2ecf20Sopenharmony_ci : "0" (1024), "1" ((long) to), "2" ((long) from) 3788c2ecf20Sopenharmony_ci : "memory"); 3798c2ecf20Sopenharmony_ci} 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_civoid mmx_copy_page(void *to, void *from) 3828c2ecf20Sopenharmony_ci{ 3838c2ecf20Sopenharmony_ci if (unlikely(in_interrupt())) 3848c2ecf20Sopenharmony_ci slow_copy_page(to, from); 3858c2ecf20Sopenharmony_ci else 3868c2ecf20Sopenharmony_ci fast_copy_page(to, from); 3878c2ecf20Sopenharmony_ci} 3888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mmx_copy_page); 389