162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright © 2016 Intel Corporation 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 562306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 662306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation 762306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 862306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 962306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * The above copyright notice and this permission notice (including the next 1262306a36Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 1362306a36Sopenharmony_ci * Software. 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1662306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1762306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1862306a36Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1962306a36Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2062306a36Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2162306a36Sopenharmony_ci * IN THE SOFTWARE. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci */ 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#include <linux/kernel.h> 2662306a36Sopenharmony_ci#include <asm/fpu/api.h> 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci#include "i915_memcpy.h" 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_DRM_I915_DEBUG) 3162306a36Sopenharmony_ci#define CI_BUG_ON(expr) BUG_ON(expr) 3262306a36Sopenharmony_ci#else 3362306a36Sopenharmony_ci#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) 3462306a36Sopenharmony_ci#endif 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_cistatic DEFINE_STATIC_KEY_FALSE(has_movntdqa); 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_cistatic void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) 3962306a36Sopenharmony_ci{ 4062306a36Sopenharmony_ci kernel_fpu_begin(); 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci while (len >= 4) { 4362306a36Sopenharmony_ci asm("movntdqa (%0), %%xmm0\n" 4462306a36Sopenharmony_ci "movntdqa 16(%0), %%xmm1\n" 4562306a36Sopenharmony_ci "movntdqa 32(%0), %%xmm2\n" 4662306a36Sopenharmony_ci "movntdqa 48(%0), %%xmm3\n" 4762306a36Sopenharmony_ci "movaps %%xmm0, (%1)\n" 4862306a36Sopenharmony_ci "movaps %%xmm1, 16(%1)\n" 4962306a36Sopenharmony_ci "movaps %%xmm2, 32(%1)\n" 5062306a36Sopenharmony_ci "movaps %%xmm3, 48(%1)\n" 5162306a36Sopenharmony_ci :: "r" (src), "r" (dst) : "memory"); 5262306a36Sopenharmony_ci src += 64; 5362306a36Sopenharmony_ci dst += 64; 5462306a36Sopenharmony_ci len -= 4; 5562306a36Sopenharmony_ci } 5662306a36Sopenharmony_ci while (len--) { 5762306a36Sopenharmony_ci asm("movntdqa (%0), %%xmm0\n" 5862306a36Sopenharmony_ci "movaps %%xmm0, (%1)\n" 5962306a36Sopenharmony_ci :: "r" (src), "r" (dst) : "memory"); 6062306a36Sopenharmony_ci src += 16; 6162306a36Sopenharmony_ci dst += 16; 6262306a36Sopenharmony_ci } 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci kernel_fpu_end(); 6562306a36Sopenharmony_ci} 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cistatic void __memcpy_ntdqu(void *dst, const void *src, unsigned long len) 6862306a36Sopenharmony_ci{ 6962306a36Sopenharmony_ci kernel_fpu_begin(); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci while (len >= 4) { 7262306a36Sopenharmony_ci asm("movntdqa (%0), %%xmm0\n" 7362306a36Sopenharmony_ci "movntdqa 16(%0), %%xmm1\n" 7462306a36Sopenharmony_ci "movntdqa 32(%0), %%xmm2\n" 7562306a36Sopenharmony_ci "movntdqa 48(%0), %%xmm3\n" 7662306a36Sopenharmony_ci "movups %%xmm0, (%1)\n" 7762306a36Sopenharmony_ci "movups %%xmm1, 16(%1)\n" 7862306a36Sopenharmony_ci "movups %%xmm2, 32(%1)\n" 7962306a36Sopenharmony_ci "movups %%xmm3, 48(%1)\n" 8062306a36Sopenharmony_ci :: "r" (src), "r" (dst) : "memory"); 8162306a36Sopenharmony_ci src += 64; 8262306a36Sopenharmony_ci dst += 64; 8362306a36Sopenharmony_ci len -= 4; 8462306a36Sopenharmony_ci } 8562306a36Sopenharmony_ci while (len--) { 8662306a36Sopenharmony_ci asm("movntdqa (%0), %%xmm0\n" 8762306a36Sopenharmony_ci "movups %%xmm0, (%1)\n" 8862306a36Sopenharmony_ci :: "r" (src), "r" (dst) : "memory"); 8962306a36Sopenharmony_ci src += 16; 9062306a36Sopenharmony_ci dst += 16; 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci kernel_fpu_end(); 9462306a36Sopenharmony_ci} 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci/** 9762306a36Sopenharmony_ci * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC 9862306a36Sopenharmony_ci * @dst: destination pointer 9962306a36Sopenharmony_ci * @src: source pointer 10062306a36Sopenharmony_ci * @len: how many bytes to copy 10162306a36Sopenharmony_ci * 10262306a36Sopenharmony_ci * i915_memcpy_from_wc copies @len bytes from @src to @dst using 10362306a36Sopenharmony_ci * non-temporal instructions where available. Note that all arguments 10462306a36Sopenharmony_ci * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple 10562306a36Sopenharmony_ci * of 16. 10662306a36Sopenharmony_ci * 10762306a36Sopenharmony_ci * To test whether accelerated reads from WC are supported, use 10862306a36Sopenharmony_ci * i915_memcpy_from_wc(NULL, NULL, 0); 10962306a36Sopenharmony_ci * 11062306a36Sopenharmony_ci * Returns true if the copy was successful, false if the preconditions 11162306a36Sopenharmony_ci * are not met. 11262306a36Sopenharmony_ci */ 11362306a36Sopenharmony_cibool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) 11462306a36Sopenharmony_ci{ 11562306a36Sopenharmony_ci if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) 11662306a36Sopenharmony_ci return false; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci if (static_branch_likely(&has_movntdqa)) { 11962306a36Sopenharmony_ci if (likely(len)) 12062306a36Sopenharmony_ci __memcpy_ntdqa(dst, src, len >> 4); 12162306a36Sopenharmony_ci return true; 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci return false; 12562306a36Sopenharmony_ci} 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci/** 12862306a36Sopenharmony_ci * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC 12962306a36Sopenharmony_ci * @dst: destination pointer 13062306a36Sopenharmony_ci * @src: source pointer 13162306a36Sopenharmony_ci * @len: how many bytes to copy 13262306a36Sopenharmony_ci * 13362306a36Sopenharmony_ci * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from 13462306a36Sopenharmony_ci * @src to @dst using * non-temporal instructions where available, but 13562306a36Sopenharmony_ci * accepts that its arguments may not be aligned, but are valid for the 13662306a36Sopenharmony_ci * potential 16-byte read past the end. 13762306a36Sopenharmony_ci */ 13862306a36Sopenharmony_civoid i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len) 13962306a36Sopenharmony_ci{ 14062306a36Sopenharmony_ci unsigned long addr; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci CI_BUG_ON(!i915_has_memcpy_from_wc()); 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci addr = (unsigned long)src; 14562306a36Sopenharmony_ci if (!IS_ALIGNED(addr, 16)) { 14662306a36Sopenharmony_ci unsigned long x = min(ALIGN(addr, 16) - addr, len); 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci memcpy(dst, src, x); 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci len -= x; 15162306a36Sopenharmony_ci dst += x; 15262306a36Sopenharmony_ci src += x; 15362306a36Sopenharmony_ci } 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci if (likely(len)) 15662306a36Sopenharmony_ci __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16)); 15762306a36Sopenharmony_ci} 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_civoid i915_memcpy_init_early(struct drm_i915_private *dev_priv) 16062306a36Sopenharmony_ci{ 16162306a36Sopenharmony_ci /* 16262306a36Sopenharmony_ci * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions 16362306a36Sopenharmony_ci * emulation. So don't enable movntdqa in hypervisor guest. 16462306a36Sopenharmony_ci */ 16562306a36Sopenharmony_ci if (static_cpu_has(X86_FEATURE_XMM4_1) && 16662306a36Sopenharmony_ci !boot_cpu_has(X86_FEATURE_HYPERVISOR)) 16762306a36Sopenharmony_ci static_branch_enable(&has_movntdqa); 16862306a36Sopenharmony_ci} 169