18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright © 2016 Intel Corporation 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice (including the next 128c2ecf20Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 138c2ecf20Sopenharmony_ci * Software. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 168c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 178c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 188c2ecf20Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 198c2ecf20Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 208c2ecf20Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 218c2ecf20Sopenharmony_ci * IN THE SOFTWARE. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci */ 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#include <linux/kernel.h> 268c2ecf20Sopenharmony_ci#include <asm/fpu/api.h> 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci#include "i915_memcpy.h" 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) 318c2ecf20Sopenharmony_ci#define CI_BUG_ON(expr) BUG_ON(expr) 328c2ecf20Sopenharmony_ci#else 338c2ecf20Sopenharmony_ci#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) 348c2ecf20Sopenharmony_ci#endif 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(has_movntdqa); 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_cistatic void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) 398c2ecf20Sopenharmony_ci{ 408c2ecf20Sopenharmony_ci kernel_fpu_begin(); 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci while (len >= 4) { 438c2ecf20Sopenharmony_ci asm("movntdqa (%0), %%xmm0\n" 448c2ecf20Sopenharmony_ci "movntdqa 16(%0), %%xmm1\n" 458c2ecf20Sopenharmony_ci "movntdqa 32(%0), %%xmm2\n" 468c2ecf20Sopenharmony_ci "movntdqa 48(%0), %%xmm3\n" 478c2ecf20Sopenharmony_ci "movaps %%xmm0, (%1)\n" 488c2ecf20Sopenharmony_ci "movaps %%xmm1, 16(%1)\n" 498c2ecf20Sopenharmony_ci "movaps %%xmm2, 32(%1)\n" 508c2ecf20Sopenharmony_ci "movaps %%xmm3, 48(%1)\n" 518c2ecf20Sopenharmony_ci :: "r" (src), "r" (dst) : "memory"); 528c2ecf20Sopenharmony_ci src += 64; 538c2ecf20Sopenharmony_ci dst += 64; 548c2ecf20Sopenharmony_ci len -= 4; 558c2ecf20Sopenharmony_ci } 568c2ecf20Sopenharmony_ci while (len--) { 578c2ecf20Sopenharmony_ci asm("movntdqa (%0), %%xmm0\n" 588c2ecf20Sopenharmony_ci "movaps %%xmm0, (%1)\n" 598c2ecf20Sopenharmony_ci :: "r" (src), "r" (dst) : "memory"); 608c2ecf20Sopenharmony_ci src += 16; 618c2ecf20Sopenharmony_ci dst += 16; 628c2ecf20Sopenharmony_ci } 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci kernel_fpu_end(); 658c2ecf20Sopenharmony_ci} 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_cistatic void __memcpy_ntdqu(void *dst, const void *src, unsigned long len) 688c2ecf20Sopenharmony_ci{ 698c2ecf20Sopenharmony_ci kernel_fpu_begin(); 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci while (len >= 4) { 728c2ecf20Sopenharmony_ci asm("movntdqa (%0), %%xmm0\n" 738c2ecf20Sopenharmony_ci "movntdqa 16(%0), %%xmm1\n" 748c2ecf20Sopenharmony_ci "movntdqa 32(%0), %%xmm2\n" 758c2ecf20Sopenharmony_ci "movntdqa 48(%0), %%xmm3\n" 768c2ecf20Sopenharmony_ci "movups %%xmm0, (%1)\n" 778c2ecf20Sopenharmony_ci "movups %%xmm1, 16(%1)\n" 788c2ecf20Sopenharmony_ci "movups %%xmm2, 32(%1)\n" 798c2ecf20Sopenharmony_ci "movups %%xmm3, 48(%1)\n" 808c2ecf20Sopenharmony_ci :: "r" (src), "r" (dst) : "memory"); 818c2ecf20Sopenharmony_ci src += 64; 828c2ecf20Sopenharmony_ci dst += 64; 838c2ecf20Sopenharmony_ci len -= 4; 848c2ecf20Sopenharmony_ci } 858c2ecf20Sopenharmony_ci while (len--) { 868c2ecf20Sopenharmony_ci asm("movntdqa (%0), %%xmm0\n" 878c2ecf20Sopenharmony_ci "movups %%xmm0, (%1)\n" 888c2ecf20Sopenharmony_ci :: "r" (src), "r" (dst) : "memory"); 898c2ecf20Sopenharmony_ci src += 16; 908c2ecf20Sopenharmony_ci dst += 16; 918c2ecf20Sopenharmony_ci } 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci kernel_fpu_end(); 948c2ecf20Sopenharmony_ci} 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci/** 978c2ecf20Sopenharmony_ci * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC 988c2ecf20Sopenharmony_ci * @dst: destination pointer 998c2ecf20Sopenharmony_ci * @src: source pointer 1008c2ecf20Sopenharmony_ci * @len: how many bytes to copy 1018c2ecf20Sopenharmony_ci * 1028c2ecf20Sopenharmony_ci * i915_memcpy_from_wc copies @len bytes from @src to @dst using 1038c2ecf20Sopenharmony_ci * non-temporal instructions where available. Note that all arguments 1048c2ecf20Sopenharmony_ci * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple 1058c2ecf20Sopenharmony_ci * of 16. 1068c2ecf20Sopenharmony_ci * 1078c2ecf20Sopenharmony_ci * To test whether accelerated reads from WC are supported, use 1088c2ecf20Sopenharmony_ci * i915_memcpy_from_wc(NULL, NULL, 0); 1098c2ecf20Sopenharmony_ci * 1108c2ecf20Sopenharmony_ci * Returns true if the copy was successful, false if the preconditions 1118c2ecf20Sopenharmony_ci * are not met. 1128c2ecf20Sopenharmony_ci */ 1138c2ecf20Sopenharmony_cibool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) 1148c2ecf20Sopenharmony_ci{ 1158c2ecf20Sopenharmony_ci if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) 1168c2ecf20Sopenharmony_ci return false; 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci if (static_branch_likely(&has_movntdqa)) { 1198c2ecf20Sopenharmony_ci if (likely(len)) 1208c2ecf20Sopenharmony_ci __memcpy_ntdqa(dst, src, len >> 4); 1218c2ecf20Sopenharmony_ci return true; 1228c2ecf20Sopenharmony_ci } 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci return false; 1258c2ecf20Sopenharmony_ci} 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci/** 1288c2ecf20Sopenharmony_ci * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC 1298c2ecf20Sopenharmony_ci * @dst: destination pointer 1308c2ecf20Sopenharmony_ci * @src: source pointer 1318c2ecf20Sopenharmony_ci * @len: how many bytes to copy 1328c2ecf20Sopenharmony_ci * 1338c2ecf20Sopenharmony_ci * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from 1348c2ecf20Sopenharmony_ci * @src to @dst using * non-temporal instructions where available, but 1358c2ecf20Sopenharmony_ci * accepts that its arguments may not be aligned, but are valid for the 1368c2ecf20Sopenharmony_ci * potential 16-byte read past the end. 1378c2ecf20Sopenharmony_ci */ 1388c2ecf20Sopenharmony_civoid i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len) 1398c2ecf20Sopenharmony_ci{ 1408c2ecf20Sopenharmony_ci unsigned long addr; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci CI_BUG_ON(!i915_has_memcpy_from_wc()); 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci addr = (unsigned long)src; 1458c2ecf20Sopenharmony_ci if (!IS_ALIGNED(addr, 16)) { 1468c2ecf20Sopenharmony_ci unsigned long x = min(ALIGN(addr, 16) - addr, len); 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci memcpy(dst, src, x); 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci len -= x; 1518c2ecf20Sopenharmony_ci dst += x; 1528c2ecf20Sopenharmony_ci src += x; 1538c2ecf20Sopenharmony_ci } 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci if (likely(len)) 1568c2ecf20Sopenharmony_ci __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16)); 1578c2ecf20Sopenharmony_ci} 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_civoid i915_memcpy_init_early(struct drm_i915_private *dev_priv) 1608c2ecf20Sopenharmony_ci{ 1618c2ecf20Sopenharmony_ci /* 1628c2ecf20Sopenharmony_ci * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions 1638c2ecf20Sopenharmony_ci * emulation. So don't enable movntdqa in hypervisor guest. 1648c2ecf20Sopenharmony_ci */ 1658c2ecf20Sopenharmony_ci if (static_cpu_has(X86_FEATURE_XMM4_1) && 1668c2ecf20Sopenharmony_ci !boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1678c2ecf20Sopenharmony_ci static_branch_enable(&has_movntdqa); 1688c2ecf20Sopenharmony_ci} 169