18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice (including the next 128c2ecf20Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 138c2ecf20Sopenharmony_ci * Software. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 168c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 178c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 188c2ecf20Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 198c2ecf20Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 208c2ecf20Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 218c2ecf20Sopenharmony_ci * SOFTWARE. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * Authors: 248c2ecf20Sopenharmony_ci * Zhi Wang <zhi.a.wang@intel.com> 258c2ecf20Sopenharmony_ci * 268c2ecf20Sopenharmony_ci * Contributors: 278c2ecf20Sopenharmony_ci * Ping Gao <ping.a.gao@intel.com> 288c2ecf20Sopenharmony_ci * Tina Zhang <tina.zhang@intel.com> 298c2ecf20Sopenharmony_ci * Chanbin Du <changbin.du@intel.com> 308c2ecf20Sopenharmony_ci * Min He <min.he@intel.com> 318c2ecf20Sopenharmony_ci * Bing Niu <bing.niu@intel.com> 328c2ecf20Sopenharmony_ci * Zhenyu Wang <zhenyuw@linux.intel.com> 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci */ 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci#include <linux/kthread.h> 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#include "gem/i915_gem_pm.h" 398c2ecf20Sopenharmony_ci#include "gt/intel_context.h" 408c2ecf20Sopenharmony_ci#include "gt/intel_ring.h" 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#include "i915_drv.h" 438c2ecf20Sopenharmony_ci#include "i915_gem_gtt.h" 448c2ecf20Sopenharmony_ci#include "gvt.h" 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci#define RING_CTX_OFF(x) \ 478c2ecf20Sopenharmony_ci offsetof(struct execlist_ring_context, x) 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_cistatic void set_context_pdp_root_pointer( 508c2ecf20Sopenharmony_ci struct execlist_ring_context *ring_context, 518c2ecf20Sopenharmony_ci u32 pdp[8]) 528c2ecf20Sopenharmony_ci{ 538c2ecf20Sopenharmony_ci int i; 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci for (i = 0; i < 8; i++) 568c2ecf20Sopenharmony_ci ring_context->pdps[i].val = pdp[7 - i]; 578c2ecf20Sopenharmony_ci} 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_cistatic void update_shadow_pdps(struct intel_vgpu_workload *workload) 608c2ecf20Sopenharmony_ci{ 618c2ecf20Sopenharmony_ci struct execlist_ring_context *shadow_ring_context; 628c2ecf20Sopenharmony_ci struct intel_context *ctx = workload->req->context; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci if (WARN_ON(!workload->shadow_mm)) 658c2ecf20Sopenharmony_ci return; 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount))) 688c2ecf20Sopenharmony_ci return; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci shadow_ring_context = (struct execlist_ring_context *)ctx->lrc_reg_state; 718c2ecf20Sopenharmony_ci set_context_pdp_root_pointer(shadow_ring_context, 728c2ecf20Sopenharmony_ci (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); 738c2ecf20Sopenharmony_ci} 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci/* 768c2ecf20Sopenharmony_ci * when populating shadow ctx from guest, we should not overrride oa related 778c2ecf20Sopenharmony_ci * registers, so that they will not be overlapped by guest oa configs. Thus 788c2ecf20Sopenharmony_ci * made it possible to capture oa data from host for both host and guests. 798c2ecf20Sopenharmony_ci */ 808c2ecf20Sopenharmony_cistatic void sr_oa_regs(struct intel_vgpu_workload *workload, 818c2ecf20Sopenharmony_ci u32 *reg_state, bool save) 828c2ecf20Sopenharmony_ci{ 838c2ecf20Sopenharmony_ci struct drm_i915_private *dev_priv = workload->vgpu->gvt->gt->i915; 848c2ecf20Sopenharmony_ci u32 ctx_oactxctrl = dev_priv->perf.ctx_oactxctrl_offset; 858c2ecf20Sopenharmony_ci u32 ctx_flexeu0 = dev_priv->perf.ctx_flexeu0_offset; 868c2ecf20Sopenharmony_ci int i = 0; 878c2ecf20Sopenharmony_ci u32 flex_mmio[] = { 888c2ecf20Sopenharmony_ci i915_mmio_reg_offset(EU_PERF_CNTL0), 898c2ecf20Sopenharmony_ci i915_mmio_reg_offset(EU_PERF_CNTL1), 908c2ecf20Sopenharmony_ci i915_mmio_reg_offset(EU_PERF_CNTL2), 918c2ecf20Sopenharmony_ci i915_mmio_reg_offset(EU_PERF_CNTL3), 928c2ecf20Sopenharmony_ci i915_mmio_reg_offset(EU_PERF_CNTL4), 938c2ecf20Sopenharmony_ci i915_mmio_reg_offset(EU_PERF_CNTL5), 948c2ecf20Sopenharmony_ci i915_mmio_reg_offset(EU_PERF_CNTL6), 958c2ecf20Sopenharmony_ci }; 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci if (workload->engine->id != RCS0) 988c2ecf20Sopenharmony_ci return; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci if (save) { 1018c2ecf20Sopenharmony_ci workload->oactxctrl = reg_state[ctx_oactxctrl + 1]; 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { 1048c2ecf20Sopenharmony_ci u32 state_offset = ctx_flexeu0 + i * 2; 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci workload->flex_mmio[i] = reg_state[state_offset + 1]; 1078c2ecf20Sopenharmony_ci } 1088c2ecf20Sopenharmony_ci } else { 1098c2ecf20Sopenharmony_ci reg_state[ctx_oactxctrl] = 1108c2ecf20Sopenharmony_ci i915_mmio_reg_offset(GEN8_OACTXCONTROL); 1118c2ecf20Sopenharmony_ci reg_state[ctx_oactxctrl + 1] = workload->oactxctrl; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) { 1148c2ecf20Sopenharmony_ci u32 state_offset = ctx_flexeu0 + i * 2; 1158c2ecf20Sopenharmony_ci u32 mmio = flex_mmio[i]; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci reg_state[state_offset] = mmio; 1188c2ecf20Sopenharmony_ci reg_state[state_offset + 1] = workload->flex_mmio[i]; 1198c2ecf20Sopenharmony_ci } 1208c2ecf20Sopenharmony_ci } 1218c2ecf20Sopenharmony_ci} 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_cistatic int populate_shadow_context(struct intel_vgpu_workload *workload) 1248c2ecf20Sopenharmony_ci{ 1258c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 1268c2ecf20Sopenharmony_ci struct intel_gvt *gvt = vgpu->gvt; 1278c2ecf20Sopenharmony_ci struct intel_context *ctx = workload->req->context; 1288c2ecf20Sopenharmony_ci struct execlist_ring_context *shadow_ring_context; 1298c2ecf20Sopenharmony_ci void *dst; 1308c2ecf20Sopenharmony_ci void *context_base; 1318c2ecf20Sopenharmony_ci unsigned long context_gpa, context_page_num; 1328c2ecf20Sopenharmony_ci unsigned long gpa_base; /* first gpa of consecutive GPAs */ 1338c2ecf20Sopenharmony_ci unsigned long gpa_size; /* size of consecutive GPAs */ 1348c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 1358c2ecf20Sopenharmony_ci int i; 1368c2ecf20Sopenharmony_ci bool skip = false; 1378c2ecf20Sopenharmony_ci int ring_id = workload->engine->id; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_context_is_pinned(ctx)); 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci context_base = (void *) ctx->lrc_reg_state - 1428c2ecf20Sopenharmony_ci (LRC_STATE_PN << I915_GTT_PAGE_SHIFT); 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci shadow_ring_context = (void *) ctx->lrc_reg_state; 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci sr_oa_regs(workload, (u32 *)shadow_ring_context, true); 1478c2ecf20Sopenharmony_ci#define COPY_REG(name) \ 1488c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ 1498c2ecf20Sopenharmony_ci + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) 1508c2ecf20Sopenharmony_ci#define COPY_REG_MASKED(name) {\ 1518c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ 1528c2ecf20Sopenharmony_ci + RING_CTX_OFF(name.val),\ 1538c2ecf20Sopenharmony_ci &shadow_ring_context->name.val, 4);\ 1548c2ecf20Sopenharmony_ci shadow_ring_context->name.val |= 0xffff << 16;\ 1558c2ecf20Sopenharmony_ci } 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci COPY_REG_MASKED(ctx_ctrl); 1588c2ecf20Sopenharmony_ci COPY_REG(ctx_timestamp); 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci if (workload->engine->id == RCS0) { 1618c2ecf20Sopenharmony_ci COPY_REG(bb_per_ctx_ptr); 1628c2ecf20Sopenharmony_ci COPY_REG(rcs_indirect_ctx); 1638c2ecf20Sopenharmony_ci COPY_REG(rcs_indirect_ctx_offset); 1648c2ecf20Sopenharmony_ci } 1658c2ecf20Sopenharmony_ci#undef COPY_REG 1668c2ecf20Sopenharmony_ci#undef COPY_REG_MASKED 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, 1698c2ecf20Sopenharmony_ci workload->ring_context_gpa + 1708c2ecf20Sopenharmony_ci sizeof(*shadow_ring_context), 1718c2ecf20Sopenharmony_ci (void *)shadow_ring_context + 1728c2ecf20Sopenharmony_ci sizeof(*shadow_ring_context), 1738c2ecf20Sopenharmony_ci I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci sr_oa_regs(workload, (u32 *)shadow_ring_context, false); 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx", 1788c2ecf20Sopenharmony_ci workload->engine->name, workload->ctx_desc.lrca, 1798c2ecf20Sopenharmony_ci workload->ctx_desc.context_id, 1808c2ecf20Sopenharmony_ci workload->ring_context_gpa); 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci /* only need to ensure this context is not pinned/unpinned during the 1838c2ecf20Sopenharmony_ci * period from last submission to this this submission. 1848c2ecf20Sopenharmony_ci * Upon reaching this function, the currently submitted context is not 1858c2ecf20Sopenharmony_ci * supposed to get unpinned. If a misbehaving guest driver ever does 1868c2ecf20Sopenharmony_ci * this, it would corrupt itself. 1878c2ecf20Sopenharmony_ci */ 1888c2ecf20Sopenharmony_ci if (s->last_ctx[ring_id].valid && 1898c2ecf20Sopenharmony_ci (s->last_ctx[ring_id].lrca == 1908c2ecf20Sopenharmony_ci workload->ctx_desc.lrca) && 1918c2ecf20Sopenharmony_ci (s->last_ctx[ring_id].ring_context_gpa == 1928c2ecf20Sopenharmony_ci workload->ring_context_gpa)) 1938c2ecf20Sopenharmony_ci skip = true; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci s->last_ctx[ring_id].lrca = workload->ctx_desc.lrca; 1968c2ecf20Sopenharmony_ci s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip) 1998c2ecf20Sopenharmony_ci return 0; 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci s->last_ctx[ring_id].valid = false; 2028c2ecf20Sopenharmony_ci context_page_num = workload->engine->context_size; 2038c2ecf20Sopenharmony_ci context_page_num = context_page_num >> PAGE_SHIFT; 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0) 2068c2ecf20Sopenharmony_ci context_page_num = 19; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci /* find consecutive GPAs from gma until the first inconsecutive GPA. 2098c2ecf20Sopenharmony_ci * read from the continuous GPAs into dst virtual address 2108c2ecf20Sopenharmony_ci */ 2118c2ecf20Sopenharmony_ci gpa_size = 0; 2128c2ecf20Sopenharmony_ci for (i = 2; i < context_page_num; i++) { 2138c2ecf20Sopenharmony_ci context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, 2148c2ecf20Sopenharmony_ci (u32)((workload->ctx_desc.lrca + i) << 2158c2ecf20Sopenharmony_ci I915_GTT_PAGE_SHIFT)); 2168c2ecf20Sopenharmony_ci if (context_gpa == INTEL_GVT_INVALID_ADDR) { 2178c2ecf20Sopenharmony_ci gvt_vgpu_err("Invalid guest context descriptor\n"); 2188c2ecf20Sopenharmony_ci return -EFAULT; 2198c2ecf20Sopenharmony_ci } 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci if (gpa_size == 0) { 2228c2ecf20Sopenharmony_ci gpa_base = context_gpa; 2238c2ecf20Sopenharmony_ci dst = context_base + (i << I915_GTT_PAGE_SHIFT); 2248c2ecf20Sopenharmony_ci } else if (context_gpa != gpa_base + gpa_size) 2258c2ecf20Sopenharmony_ci goto read; 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci gpa_size += I915_GTT_PAGE_SIZE; 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci if (i == context_page_num - 1) 2308c2ecf20Sopenharmony_ci goto read; 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci continue; 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ciread: 2358c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, gpa_base, dst, gpa_size); 2368c2ecf20Sopenharmony_ci gpa_base = context_gpa; 2378c2ecf20Sopenharmony_ci gpa_size = I915_GTT_PAGE_SIZE; 2388c2ecf20Sopenharmony_ci dst = context_base + (i << I915_GTT_PAGE_SHIFT); 2398c2ecf20Sopenharmony_ci } 2408c2ecf20Sopenharmony_ci s->last_ctx[ring_id].valid = true; 2418c2ecf20Sopenharmony_ci return 0; 2428c2ecf20Sopenharmony_ci} 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_cistatic inline bool is_gvt_request(struct i915_request *rq) 2458c2ecf20Sopenharmony_ci{ 2468c2ecf20Sopenharmony_ci return intel_context_force_single_submission(rq->context); 2478c2ecf20Sopenharmony_ci} 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_cistatic void save_ring_hw_state(struct intel_vgpu *vgpu, 2508c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine) 2518c2ecf20Sopenharmony_ci{ 2528c2ecf20Sopenharmony_ci struct intel_uncore *uncore = engine->uncore; 2538c2ecf20Sopenharmony_ci i915_reg_t reg; 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci reg = RING_INSTDONE(engine->mmio_base); 2568c2ecf20Sopenharmony_ci vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = 2578c2ecf20Sopenharmony_ci intel_uncore_read(uncore, reg); 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci reg = RING_ACTHD(engine->mmio_base); 2608c2ecf20Sopenharmony_ci vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = 2618c2ecf20Sopenharmony_ci intel_uncore_read(uncore, reg); 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci reg = RING_ACTHD_UDW(engine->mmio_base); 2648c2ecf20Sopenharmony_ci vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = 2658c2ecf20Sopenharmony_ci intel_uncore_read(uncore, reg); 2668c2ecf20Sopenharmony_ci} 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_cistatic int shadow_context_status_change(struct notifier_block *nb, 2698c2ecf20Sopenharmony_ci unsigned long action, void *data) 2708c2ecf20Sopenharmony_ci{ 2718c2ecf20Sopenharmony_ci struct i915_request *rq = data; 2728c2ecf20Sopenharmony_ci struct intel_gvt *gvt = container_of(nb, struct intel_gvt, 2738c2ecf20Sopenharmony_ci shadow_ctx_notifier_block[rq->engine->id]); 2748c2ecf20Sopenharmony_ci struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 2758c2ecf20Sopenharmony_ci enum intel_engine_id ring_id = rq->engine->id; 2768c2ecf20Sopenharmony_ci struct intel_vgpu_workload *workload; 2778c2ecf20Sopenharmony_ci unsigned long flags; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci if (!is_gvt_request(rq)) { 2808c2ecf20Sopenharmony_ci spin_lock_irqsave(&scheduler->mmio_context_lock, flags); 2818c2ecf20Sopenharmony_ci if (action == INTEL_CONTEXT_SCHEDULE_IN && 2828c2ecf20Sopenharmony_ci scheduler->engine_owner[ring_id]) { 2838c2ecf20Sopenharmony_ci /* Switch ring from vGPU to host. */ 2848c2ecf20Sopenharmony_ci intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], 2858c2ecf20Sopenharmony_ci NULL, rq->engine); 2868c2ecf20Sopenharmony_ci scheduler->engine_owner[ring_id] = NULL; 2878c2ecf20Sopenharmony_ci } 2888c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci return NOTIFY_OK; 2918c2ecf20Sopenharmony_ci } 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci workload = scheduler->current_workload[ring_id]; 2948c2ecf20Sopenharmony_ci if (unlikely(!workload)) 2958c2ecf20Sopenharmony_ci return NOTIFY_OK; 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci switch (action) { 2988c2ecf20Sopenharmony_ci case INTEL_CONTEXT_SCHEDULE_IN: 2998c2ecf20Sopenharmony_ci spin_lock_irqsave(&scheduler->mmio_context_lock, flags); 3008c2ecf20Sopenharmony_ci if (workload->vgpu != scheduler->engine_owner[ring_id]) { 3018c2ecf20Sopenharmony_ci /* Switch ring from host to vGPU or vGPU to vGPU. */ 3028c2ecf20Sopenharmony_ci intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], 3038c2ecf20Sopenharmony_ci workload->vgpu, rq->engine); 3048c2ecf20Sopenharmony_ci scheduler->engine_owner[ring_id] = workload->vgpu; 3058c2ecf20Sopenharmony_ci } else 3068c2ecf20Sopenharmony_ci gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n", 3078c2ecf20Sopenharmony_ci ring_id, workload->vgpu->id); 3088c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); 3098c2ecf20Sopenharmony_ci atomic_set(&workload->shadow_ctx_active, 1); 3108c2ecf20Sopenharmony_ci break; 3118c2ecf20Sopenharmony_ci case INTEL_CONTEXT_SCHEDULE_OUT: 3128c2ecf20Sopenharmony_ci save_ring_hw_state(workload->vgpu, rq->engine); 3138c2ecf20Sopenharmony_ci atomic_set(&workload->shadow_ctx_active, 0); 3148c2ecf20Sopenharmony_ci break; 3158c2ecf20Sopenharmony_ci case INTEL_CONTEXT_SCHEDULE_PREEMPTED: 3168c2ecf20Sopenharmony_ci save_ring_hw_state(workload->vgpu, rq->engine); 3178c2ecf20Sopenharmony_ci break; 3188c2ecf20Sopenharmony_ci default: 3198c2ecf20Sopenharmony_ci WARN_ON(1); 3208c2ecf20Sopenharmony_ci return NOTIFY_OK; 3218c2ecf20Sopenharmony_ci } 3228c2ecf20Sopenharmony_ci wake_up(&workload->shadow_ctx_status_wq); 3238c2ecf20Sopenharmony_ci return NOTIFY_OK; 3248c2ecf20Sopenharmony_ci} 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_cistatic void 3278c2ecf20Sopenharmony_cishadow_context_descriptor_update(struct intel_context *ce, 3288c2ecf20Sopenharmony_ci struct intel_vgpu_workload *workload) 3298c2ecf20Sopenharmony_ci{ 3308c2ecf20Sopenharmony_ci u64 desc = ce->lrc.desc; 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci /* 3338c2ecf20Sopenharmony_ci * Update bits 0-11 of the context descriptor which includes flags 3348c2ecf20Sopenharmony_ci * like GEN8_CTX_* cached in desc_template 3358c2ecf20Sopenharmony_ci */ 3368c2ecf20Sopenharmony_ci desc &= ~(0x3ull << GEN8_CTX_ADDRESSING_MODE_SHIFT); 3378c2ecf20Sopenharmony_ci desc |= (u64)workload->ctx_desc.addressing_mode << 3388c2ecf20Sopenharmony_ci GEN8_CTX_ADDRESSING_MODE_SHIFT; 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci ce->lrc.desc = desc; 3418c2ecf20Sopenharmony_ci} 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_cistatic int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) 3448c2ecf20Sopenharmony_ci{ 3458c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 3468c2ecf20Sopenharmony_ci struct i915_request *req = workload->req; 3478c2ecf20Sopenharmony_ci void *shadow_ring_buffer_va; 3488c2ecf20Sopenharmony_ci u32 *cs; 3498c2ecf20Sopenharmony_ci int err; 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci if (IS_GEN(req->engine->i915, 9) && is_inhibit_context(req->context)) 3528c2ecf20Sopenharmony_ci intel_vgpu_restore_inhibit_context(vgpu, req); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci /* 3558c2ecf20Sopenharmony_ci * To track whether a request has started on HW, we can emit a 3568c2ecf20Sopenharmony_ci * breadcrumb at the beginning of the request and check its 3578c2ecf20Sopenharmony_ci * timeline's HWSP to see if the breadcrumb has advanced past the 3588c2ecf20Sopenharmony_ci * start of this request. Actually, the request must have the 3598c2ecf20Sopenharmony_ci * init_breadcrumb if its timeline set has_init_bread_crumb, or the 3608c2ecf20Sopenharmony_ci * scheduler might get a wrong state of it during reset. Since the 3618c2ecf20Sopenharmony_ci * requests from gvt always set the has_init_breadcrumb flag, here 3628c2ecf20Sopenharmony_ci * need to do the emit_init_breadcrumb for all the requests. 3638c2ecf20Sopenharmony_ci */ 3648c2ecf20Sopenharmony_ci if (req->engine->emit_init_breadcrumb) { 3658c2ecf20Sopenharmony_ci err = req->engine->emit_init_breadcrumb(req); 3668c2ecf20Sopenharmony_ci if (err) { 3678c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to emit init breadcrumb\n"); 3688c2ecf20Sopenharmony_ci return err; 3698c2ecf20Sopenharmony_ci } 3708c2ecf20Sopenharmony_ci } 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci /* allocate shadow ring buffer */ 3738c2ecf20Sopenharmony_ci cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); 3748c2ecf20Sopenharmony_ci if (IS_ERR(cs)) { 3758c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n", 3768c2ecf20Sopenharmony_ci workload->rb_len); 3778c2ecf20Sopenharmony_ci return PTR_ERR(cs); 3788c2ecf20Sopenharmony_ci } 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci shadow_ring_buffer_va = workload->shadow_ring_buffer_va; 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci /* get shadow ring buffer va */ 3838c2ecf20Sopenharmony_ci workload->shadow_ring_buffer_va = cs; 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci memcpy(cs, shadow_ring_buffer_va, 3868c2ecf20Sopenharmony_ci workload->rb_len); 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci cs += workload->rb_len / sizeof(u32); 3898c2ecf20Sopenharmony_ci intel_ring_advance(workload->req, cs); 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci return 0; 3928c2ecf20Sopenharmony_ci} 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_cistatic void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) 3958c2ecf20Sopenharmony_ci{ 3968c2ecf20Sopenharmony_ci if (!wa_ctx->indirect_ctx.obj) 3978c2ecf20Sopenharmony_ci return; 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj); 4008c2ecf20Sopenharmony_ci i915_gem_object_put(wa_ctx->indirect_ctx.obj); 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci wa_ctx->indirect_ctx.obj = NULL; 4038c2ecf20Sopenharmony_ci wa_ctx->indirect_ctx.shadow_va = NULL; 4048c2ecf20Sopenharmony_ci} 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_cistatic void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr) 4078c2ecf20Sopenharmony_ci{ 4088c2ecf20Sopenharmony_ci struct scatterlist *sg = pd->pt.base->mm.pages->sgl; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci /* This is not a good idea */ 4118c2ecf20Sopenharmony_ci sg->dma_address = addr; 4128c2ecf20Sopenharmony_ci} 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_cistatic void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, 4158c2ecf20Sopenharmony_ci struct intel_context *ce) 4168c2ecf20Sopenharmony_ci{ 4178c2ecf20Sopenharmony_ci struct intel_vgpu_mm *mm = workload->shadow_mm; 4188c2ecf20Sopenharmony_ci struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm); 4198c2ecf20Sopenharmony_ci int i = 0; 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ci if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 4228c2ecf20Sopenharmony_ci set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]); 4238c2ecf20Sopenharmony_ci } else { 4248c2ecf20Sopenharmony_ci for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { 4258c2ecf20Sopenharmony_ci struct i915_page_directory * const pd = 4268c2ecf20Sopenharmony_ci i915_pd_entry(ppgtt->pd, i); 4278c2ecf20Sopenharmony_ci /* skip now as current i915 ppgtt alloc won't allocate 4288c2ecf20Sopenharmony_ci top level pdp for non 4-level table, won't impact 4298c2ecf20Sopenharmony_ci shadow ppgtt. */ 4308c2ecf20Sopenharmony_ci if (!pd) 4318c2ecf20Sopenharmony_ci break; 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_ci set_dma_address(pd, mm->ppgtt_mm.shadow_pdps[i]); 4348c2ecf20Sopenharmony_ci } 4358c2ecf20Sopenharmony_ci } 4368c2ecf20Sopenharmony_ci} 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_cistatic int 4398c2ecf20Sopenharmony_ciintel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload) 4408c2ecf20Sopenharmony_ci{ 4418c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 4428c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 4438c2ecf20Sopenharmony_ci struct i915_request *rq; 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci if (workload->req) 4468c2ecf20Sopenharmony_ci return 0; 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci rq = i915_request_create(s->shadow[workload->engine->id]); 4498c2ecf20Sopenharmony_ci if (IS_ERR(rq)) { 4508c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to allocate gem request\n"); 4518c2ecf20Sopenharmony_ci return PTR_ERR(rq); 4528c2ecf20Sopenharmony_ci } 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci workload->req = i915_request_get(rq); 4558c2ecf20Sopenharmony_ci return 0; 4568c2ecf20Sopenharmony_ci} 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci/** 4598c2ecf20Sopenharmony_ci * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and 4608c2ecf20Sopenharmony_ci * shadow it as well, include ringbuffer,wa_ctx and ctx. 4618c2ecf20Sopenharmony_ci * @workload: an abstract entity for each execlist submission. 4628c2ecf20Sopenharmony_ci * 4638c2ecf20Sopenharmony_ci * This function is called before the workload submitting to i915, to make 4648c2ecf20Sopenharmony_ci * sure the content of the workload is valid. 4658c2ecf20Sopenharmony_ci */ 4668c2ecf20Sopenharmony_ciint intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) 4678c2ecf20Sopenharmony_ci{ 4688c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 4698c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 4708c2ecf20Sopenharmony_ci int ret; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci lockdep_assert_held(&vgpu->vgpu_lock); 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci if (workload->shadow) 4758c2ecf20Sopenharmony_ci return 0; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci if (!test_and_set_bit(workload->engine->id, s->shadow_ctx_desc_updated)) 4788c2ecf20Sopenharmony_ci shadow_context_descriptor_update(s->shadow[workload->engine->id], 4798c2ecf20Sopenharmony_ci workload); 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci ret = intel_gvt_scan_and_shadow_ringbuffer(workload); 4828c2ecf20Sopenharmony_ci if (ret) 4838c2ecf20Sopenharmony_ci return ret; 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci if (workload->engine->id == RCS0 && 4868c2ecf20Sopenharmony_ci workload->wa_ctx.indirect_ctx.size) { 4878c2ecf20Sopenharmony_ci ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); 4888c2ecf20Sopenharmony_ci if (ret) 4898c2ecf20Sopenharmony_ci goto err_shadow; 4908c2ecf20Sopenharmony_ci } 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci workload->shadow = true; 4938c2ecf20Sopenharmony_ci return 0; 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_cierr_shadow: 4968c2ecf20Sopenharmony_ci release_shadow_wa_ctx(&workload->wa_ctx); 4978c2ecf20Sopenharmony_ci return ret; 4988c2ecf20Sopenharmony_ci} 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_cistatic void release_shadow_batch_buffer(struct intel_vgpu_workload *workload); 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_cistatic int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) 5038c2ecf20Sopenharmony_ci{ 5048c2ecf20Sopenharmony_ci struct intel_gvt *gvt = workload->vgpu->gvt; 5058c2ecf20Sopenharmony_ci const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd; 5068c2ecf20Sopenharmony_ci struct intel_vgpu_shadow_bb *bb; 5078c2ecf20Sopenharmony_ci int ret; 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci list_for_each_entry(bb, &workload->shadow_bb, list) { 5108c2ecf20Sopenharmony_ci /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va 5118c2ecf20Sopenharmony_ci * is only updated into ring_scan_buffer, not real ring address 5128c2ecf20Sopenharmony_ci * allocated in later copy_workload_to_ring_buffer. pls be noted 5138c2ecf20Sopenharmony_ci * shadow_ring_buffer_va is now pointed to real ring buffer va 5148c2ecf20Sopenharmony_ci * in copy_workload_to_ring_buffer. 5158c2ecf20Sopenharmony_ci */ 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci if (bb->bb_offset) 5188c2ecf20Sopenharmony_ci bb->bb_start_cmd_va = workload->shadow_ring_buffer_va 5198c2ecf20Sopenharmony_ci + bb->bb_offset; 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci /* 5228c2ecf20Sopenharmony_ci * For non-priv bb, scan&shadow is only for 5238c2ecf20Sopenharmony_ci * debugging purpose, so the content of shadow bb 5248c2ecf20Sopenharmony_ci * is the same as original bb. Therefore, 5258c2ecf20Sopenharmony_ci * here, rather than switch to shadow bb's gma 5268c2ecf20Sopenharmony_ci * address, we directly use original batch buffer's 5278c2ecf20Sopenharmony_ci * gma address, and send original bb to hardware 5288c2ecf20Sopenharmony_ci * directly 5298c2ecf20Sopenharmony_ci */ 5308c2ecf20Sopenharmony_ci if (!bb->ppgtt) { 5318c2ecf20Sopenharmony_ci bb->vma = i915_gem_object_ggtt_pin(bb->obj, 5328c2ecf20Sopenharmony_ci NULL, 0, 0, 0); 5338c2ecf20Sopenharmony_ci if (IS_ERR(bb->vma)) { 5348c2ecf20Sopenharmony_ci ret = PTR_ERR(bb->vma); 5358c2ecf20Sopenharmony_ci goto err; 5368c2ecf20Sopenharmony_ci } 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci /* relocate shadow batch buffer */ 5398c2ecf20Sopenharmony_ci bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); 5408c2ecf20Sopenharmony_ci if (gmadr_bytes == 8) 5418c2ecf20Sopenharmony_ci bb->bb_start_cmd_va[2] = 0; 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci ret = i915_vma_move_to_active(bb->vma, 5448c2ecf20Sopenharmony_ci workload->req, 5458c2ecf20Sopenharmony_ci 0); 5468c2ecf20Sopenharmony_ci if (ret) 5478c2ecf20Sopenharmony_ci goto err; 5488c2ecf20Sopenharmony_ci } 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci /* No one is going to touch shadow bb from now on. */ 5518c2ecf20Sopenharmony_ci i915_gem_object_flush_map(bb->obj); 5528c2ecf20Sopenharmony_ci } 5538c2ecf20Sopenharmony_ci return 0; 5548c2ecf20Sopenharmony_cierr: 5558c2ecf20Sopenharmony_ci release_shadow_batch_buffer(workload); 5568c2ecf20Sopenharmony_ci return ret; 5578c2ecf20Sopenharmony_ci} 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_cistatic void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) 5608c2ecf20Sopenharmony_ci{ 5618c2ecf20Sopenharmony_ci struct intel_vgpu_workload *workload = 5628c2ecf20Sopenharmony_ci container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx); 5638c2ecf20Sopenharmony_ci struct i915_request *rq = workload->req; 5648c2ecf20Sopenharmony_ci struct execlist_ring_context *shadow_ring_context = 5658c2ecf20Sopenharmony_ci (struct execlist_ring_context *)rq->context->lrc_reg_state; 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_ci shadow_ring_context->bb_per_ctx_ptr.val = 5688c2ecf20Sopenharmony_ci (shadow_ring_context->bb_per_ctx_ptr.val & 5698c2ecf20Sopenharmony_ci (~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma; 5708c2ecf20Sopenharmony_ci shadow_ring_context->rcs_indirect_ctx.val = 5718c2ecf20Sopenharmony_ci (shadow_ring_context->rcs_indirect_ctx.val & 5728c2ecf20Sopenharmony_ci (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; 5738c2ecf20Sopenharmony_ci} 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_cistatic int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) 5768c2ecf20Sopenharmony_ci{ 5778c2ecf20Sopenharmony_ci struct i915_vma *vma; 5788c2ecf20Sopenharmony_ci unsigned char *per_ctx_va = 5798c2ecf20Sopenharmony_ci (unsigned char *)wa_ctx->indirect_ctx.shadow_va + 5808c2ecf20Sopenharmony_ci wa_ctx->indirect_ctx.size; 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci if (wa_ctx->indirect_ctx.size == 0) 5838c2ecf20Sopenharmony_ci return 0; 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, 5868c2ecf20Sopenharmony_ci 0, CACHELINE_BYTES, 0); 5878c2ecf20Sopenharmony_ci if (IS_ERR(vma)) 5888c2ecf20Sopenharmony_ci return PTR_ERR(vma); 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci /* FIXME: we are not tracking our pinned VMA leaving it 5918c2ecf20Sopenharmony_ci * up to the core to fix up the stray pin_count upon 5928c2ecf20Sopenharmony_ci * free. 5938c2ecf20Sopenharmony_ci */ 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma); 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1); 5988c2ecf20Sopenharmony_ci memset(per_ctx_va, 0, CACHELINE_BYTES); 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci update_wa_ctx_2_shadow_ctx(wa_ctx); 6018c2ecf20Sopenharmony_ci return 0; 6028c2ecf20Sopenharmony_ci} 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_cistatic void update_vreg_in_ctx(struct intel_vgpu_workload *workload) 6058c2ecf20Sopenharmony_ci{ 6068c2ecf20Sopenharmony_ci vgpu_vreg_t(workload->vgpu, RING_START(workload->engine->mmio_base)) = 6078c2ecf20Sopenharmony_ci workload->rb_start; 6088c2ecf20Sopenharmony_ci} 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_cistatic void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) 6118c2ecf20Sopenharmony_ci{ 6128c2ecf20Sopenharmony_ci struct intel_vgpu_shadow_bb *bb, *pos; 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci if (list_empty(&workload->shadow_bb)) 6158c2ecf20Sopenharmony_ci return; 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci bb = list_first_entry(&workload->shadow_bb, 6188c2ecf20Sopenharmony_ci struct intel_vgpu_shadow_bb, list); 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { 6218c2ecf20Sopenharmony_ci if (bb->obj) { 6228c2ecf20Sopenharmony_ci if (bb->va && !IS_ERR(bb->va)) 6238c2ecf20Sopenharmony_ci i915_gem_object_unpin_map(bb->obj); 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_ci if (bb->vma && !IS_ERR(bb->vma)) 6268c2ecf20Sopenharmony_ci i915_vma_unpin(bb->vma); 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci i915_gem_object_put(bb->obj); 6298c2ecf20Sopenharmony_ci } 6308c2ecf20Sopenharmony_ci list_del(&bb->list); 6318c2ecf20Sopenharmony_ci kfree(bb); 6328c2ecf20Sopenharmony_ci } 6338c2ecf20Sopenharmony_ci} 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_cistatic int 6368c2ecf20Sopenharmony_ciintel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) 6378c2ecf20Sopenharmony_ci{ 6388c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 6398c2ecf20Sopenharmony_ci struct intel_vgpu_mm *m; 6408c2ecf20Sopenharmony_ci int ret = 0; 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci ret = intel_vgpu_pin_mm(workload->shadow_mm); 6438c2ecf20Sopenharmony_ci if (ret) { 6448c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to vgpu pin mm\n"); 6458c2ecf20Sopenharmony_ci return ret; 6468c2ecf20Sopenharmony_ci } 6478c2ecf20Sopenharmony_ci 6488c2ecf20Sopenharmony_ci if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || 6498c2ecf20Sopenharmony_ci !workload->shadow_mm->ppgtt_mm.shadowed) { 6508c2ecf20Sopenharmony_ci intel_vgpu_unpin_mm(workload->shadow_mm); 6518c2ecf20Sopenharmony_ci gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); 6528c2ecf20Sopenharmony_ci return -EINVAL; 6538c2ecf20Sopenharmony_ci } 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_ci if (!list_empty(&workload->lri_shadow_mm)) { 6568c2ecf20Sopenharmony_ci list_for_each_entry(m, &workload->lri_shadow_mm, 6578c2ecf20Sopenharmony_ci ppgtt_mm.link) { 6588c2ecf20Sopenharmony_ci ret = intel_vgpu_pin_mm(m); 6598c2ecf20Sopenharmony_ci if (ret) { 6608c2ecf20Sopenharmony_ci list_for_each_entry_from_reverse(m, 6618c2ecf20Sopenharmony_ci &workload->lri_shadow_mm, 6628c2ecf20Sopenharmony_ci ppgtt_mm.link) 6638c2ecf20Sopenharmony_ci intel_vgpu_unpin_mm(m); 6648c2ecf20Sopenharmony_ci gvt_vgpu_err("LRI shadow ppgtt fail to pin\n"); 6658c2ecf20Sopenharmony_ci break; 6668c2ecf20Sopenharmony_ci } 6678c2ecf20Sopenharmony_ci } 6688c2ecf20Sopenharmony_ci } 6698c2ecf20Sopenharmony_ci 6708c2ecf20Sopenharmony_ci if (ret) 6718c2ecf20Sopenharmony_ci intel_vgpu_unpin_mm(workload->shadow_mm); 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci return ret; 6748c2ecf20Sopenharmony_ci} 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_cistatic void 6778c2ecf20Sopenharmony_ciintel_vgpu_shadow_mm_unpin(struct intel_vgpu_workload *workload) 6788c2ecf20Sopenharmony_ci{ 6798c2ecf20Sopenharmony_ci struct intel_vgpu_mm *m; 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci if (!list_empty(&workload->lri_shadow_mm)) { 6828c2ecf20Sopenharmony_ci list_for_each_entry(m, &workload->lri_shadow_mm, 6838c2ecf20Sopenharmony_ci ppgtt_mm.link) 6848c2ecf20Sopenharmony_ci intel_vgpu_unpin_mm(m); 6858c2ecf20Sopenharmony_ci } 6868c2ecf20Sopenharmony_ci intel_vgpu_unpin_mm(workload->shadow_mm); 6878c2ecf20Sopenharmony_ci} 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_cistatic int prepare_workload(struct intel_vgpu_workload *workload) 6908c2ecf20Sopenharmony_ci{ 6918c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 6928c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 6938c2ecf20Sopenharmony_ci int ret = 0; 6948c2ecf20Sopenharmony_ci 6958c2ecf20Sopenharmony_ci ret = intel_vgpu_shadow_mm_pin(workload); 6968c2ecf20Sopenharmony_ci if (ret) { 6978c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to pin shadow mm\n"); 6988c2ecf20Sopenharmony_ci return ret; 6998c2ecf20Sopenharmony_ci } 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_ci update_shadow_pdps(workload); 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci set_context_ppgtt_from_shadow(workload, s->shadow[workload->engine->id]); 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_ci ret = intel_vgpu_sync_oos_pages(workload->vgpu); 7068c2ecf20Sopenharmony_ci if (ret) { 7078c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to vgpu sync oos pages\n"); 7088c2ecf20Sopenharmony_ci goto err_unpin_mm; 7098c2ecf20Sopenharmony_ci } 7108c2ecf20Sopenharmony_ci 7118c2ecf20Sopenharmony_ci ret = intel_vgpu_flush_post_shadow(workload->vgpu); 7128c2ecf20Sopenharmony_ci if (ret) { 7138c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to flush post shadow\n"); 7148c2ecf20Sopenharmony_ci goto err_unpin_mm; 7158c2ecf20Sopenharmony_ci } 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_ci ret = copy_workload_to_ring_buffer(workload); 7188c2ecf20Sopenharmony_ci if (ret) { 7198c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to generate request\n"); 7208c2ecf20Sopenharmony_ci goto err_unpin_mm; 7218c2ecf20Sopenharmony_ci } 7228c2ecf20Sopenharmony_ci 7238c2ecf20Sopenharmony_ci ret = prepare_shadow_batch_buffer(workload); 7248c2ecf20Sopenharmony_ci if (ret) { 7258c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); 7268c2ecf20Sopenharmony_ci goto err_unpin_mm; 7278c2ecf20Sopenharmony_ci } 7288c2ecf20Sopenharmony_ci 7298c2ecf20Sopenharmony_ci ret = prepare_shadow_wa_ctx(&workload->wa_ctx); 7308c2ecf20Sopenharmony_ci if (ret) { 7318c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n"); 7328c2ecf20Sopenharmony_ci goto err_shadow_batch; 7338c2ecf20Sopenharmony_ci } 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci if (workload->prepare) { 7368c2ecf20Sopenharmony_ci ret = workload->prepare(workload); 7378c2ecf20Sopenharmony_ci if (ret) 7388c2ecf20Sopenharmony_ci goto err_shadow_wa_ctx; 7398c2ecf20Sopenharmony_ci } 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_ci return 0; 7428c2ecf20Sopenharmony_cierr_shadow_wa_ctx: 7438c2ecf20Sopenharmony_ci release_shadow_wa_ctx(&workload->wa_ctx); 7448c2ecf20Sopenharmony_cierr_shadow_batch: 7458c2ecf20Sopenharmony_ci release_shadow_batch_buffer(workload); 7468c2ecf20Sopenharmony_cierr_unpin_mm: 7478c2ecf20Sopenharmony_ci intel_vgpu_shadow_mm_unpin(workload); 7488c2ecf20Sopenharmony_ci return ret; 7498c2ecf20Sopenharmony_ci} 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_cistatic int dispatch_workload(struct intel_vgpu_workload *workload) 7528c2ecf20Sopenharmony_ci{ 7538c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 7548c2ecf20Sopenharmony_ci struct i915_request *rq; 7558c2ecf20Sopenharmony_ci int ret; 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci gvt_dbg_sched("ring id %s prepare to dispatch workload %p\n", 7588c2ecf20Sopenharmony_ci workload->engine->name, workload); 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci mutex_lock(&vgpu->vgpu_lock); 7618c2ecf20Sopenharmony_ci 7628c2ecf20Sopenharmony_ci ret = intel_gvt_workload_req_alloc(workload); 7638c2ecf20Sopenharmony_ci if (ret) 7648c2ecf20Sopenharmony_ci goto err_req; 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci ret = intel_gvt_scan_and_shadow_workload(workload); 7678c2ecf20Sopenharmony_ci if (ret) 7688c2ecf20Sopenharmony_ci goto out; 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_ci ret = populate_shadow_context(workload); 7718c2ecf20Sopenharmony_ci if (ret) { 7728c2ecf20Sopenharmony_ci release_shadow_wa_ctx(&workload->wa_ctx); 7738c2ecf20Sopenharmony_ci goto out; 7748c2ecf20Sopenharmony_ci } 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci ret = prepare_workload(workload); 7778c2ecf20Sopenharmony_ciout: 7788c2ecf20Sopenharmony_ci if (ret) { 7798c2ecf20Sopenharmony_ci /* We might still need to add request with 7808c2ecf20Sopenharmony_ci * clean ctx to retire it properly.. 7818c2ecf20Sopenharmony_ci */ 7828c2ecf20Sopenharmony_ci rq = fetch_and_zero(&workload->req); 7838c2ecf20Sopenharmony_ci i915_request_put(rq); 7848c2ecf20Sopenharmony_ci } 7858c2ecf20Sopenharmony_ci 7868c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(workload->req)) { 7878c2ecf20Sopenharmony_ci gvt_dbg_sched("ring id %s submit workload to i915 %p\n", 7888c2ecf20Sopenharmony_ci workload->engine->name, workload->req); 7898c2ecf20Sopenharmony_ci i915_request_add(workload->req); 7908c2ecf20Sopenharmony_ci workload->dispatched = true; 7918c2ecf20Sopenharmony_ci } 7928c2ecf20Sopenharmony_cierr_req: 7938c2ecf20Sopenharmony_ci if (ret) 7948c2ecf20Sopenharmony_ci workload->status = ret; 7958c2ecf20Sopenharmony_ci mutex_unlock(&vgpu->vgpu_lock); 7968c2ecf20Sopenharmony_ci return ret; 7978c2ecf20Sopenharmony_ci} 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_cistatic struct intel_vgpu_workload * 8008c2ecf20Sopenharmony_cipick_next_workload(struct intel_gvt *gvt, struct intel_engine_cs *engine) 8018c2ecf20Sopenharmony_ci{ 8028c2ecf20Sopenharmony_ci struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 8038c2ecf20Sopenharmony_ci struct intel_vgpu_workload *workload = NULL; 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_ci mutex_lock(&gvt->sched_lock); 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci /* 8088c2ecf20Sopenharmony_ci * no current vgpu / will be scheduled out / no workload 8098c2ecf20Sopenharmony_ci * bail out 8108c2ecf20Sopenharmony_ci */ 8118c2ecf20Sopenharmony_ci if (!scheduler->current_vgpu) { 8128c2ecf20Sopenharmony_ci gvt_dbg_sched("ring %s stop - no current vgpu\n", engine->name); 8138c2ecf20Sopenharmony_ci goto out; 8148c2ecf20Sopenharmony_ci } 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ci if (scheduler->need_reschedule) { 8178c2ecf20Sopenharmony_ci gvt_dbg_sched("ring %s stop - will reschedule\n", engine->name); 8188c2ecf20Sopenharmony_ci goto out; 8198c2ecf20Sopenharmony_ci } 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci if (!scheduler->current_vgpu->active || 8228c2ecf20Sopenharmony_ci list_empty(workload_q_head(scheduler->current_vgpu, engine))) 8238c2ecf20Sopenharmony_ci goto out; 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_ci /* 8268c2ecf20Sopenharmony_ci * still have current workload, maybe the workload disptacher 8278c2ecf20Sopenharmony_ci * fail to submit it for some reason, resubmit it. 8288c2ecf20Sopenharmony_ci */ 8298c2ecf20Sopenharmony_ci if (scheduler->current_workload[engine->id]) { 8308c2ecf20Sopenharmony_ci workload = scheduler->current_workload[engine->id]; 8318c2ecf20Sopenharmony_ci gvt_dbg_sched("ring %s still have current workload %p\n", 8328c2ecf20Sopenharmony_ci engine->name, workload); 8338c2ecf20Sopenharmony_ci goto out; 8348c2ecf20Sopenharmony_ci } 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci /* 8378c2ecf20Sopenharmony_ci * pick a workload as current workload 8388c2ecf20Sopenharmony_ci * once current workload is set, schedule policy routines 8398c2ecf20Sopenharmony_ci * will wait the current workload is finished when trying to 8408c2ecf20Sopenharmony_ci * schedule out a vgpu. 8418c2ecf20Sopenharmony_ci */ 8428c2ecf20Sopenharmony_ci scheduler->current_workload[engine->id] = 8438c2ecf20Sopenharmony_ci list_first_entry(workload_q_head(scheduler->current_vgpu, 8448c2ecf20Sopenharmony_ci engine), 8458c2ecf20Sopenharmony_ci struct intel_vgpu_workload, list); 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_ci workload = scheduler->current_workload[engine->id]; 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_ci gvt_dbg_sched("ring %s pick new workload %p\n", engine->name, workload); 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci atomic_inc(&workload->vgpu->submission.running_workload_num); 8528c2ecf20Sopenharmony_ciout: 8538c2ecf20Sopenharmony_ci mutex_unlock(&gvt->sched_lock); 8548c2ecf20Sopenharmony_ci return workload; 8558c2ecf20Sopenharmony_ci} 8568c2ecf20Sopenharmony_ci 8578c2ecf20Sopenharmony_cistatic void update_guest_pdps(struct intel_vgpu *vgpu, 8588c2ecf20Sopenharmony_ci u64 ring_context_gpa, u32 pdp[8]) 8598c2ecf20Sopenharmony_ci{ 8608c2ecf20Sopenharmony_ci u64 gpa; 8618c2ecf20Sopenharmony_ci int i; 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ci gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val); 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_ci for (i = 0; i < 8; i++) 8668c2ecf20Sopenharmony_ci intel_gvt_hypervisor_write_gpa(vgpu, 8678c2ecf20Sopenharmony_ci gpa + i * 8, &pdp[7 - i], 4); 8688c2ecf20Sopenharmony_ci} 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_cistatic __maybe_unused bool 8718c2ecf20Sopenharmony_cicheck_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) 8728c2ecf20Sopenharmony_ci{ 8738c2ecf20Sopenharmony_ci if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { 8748c2ecf20Sopenharmony_ci u64 shadow_pdp = c->pdps[7].val | (u64) c->pdps[6].val << 32; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci if (shadow_pdp != m->ppgtt_mm.shadow_pdps[0]) { 8778c2ecf20Sopenharmony_ci gvt_dbg_mm("4-level context ppgtt not match LRI command\n"); 8788c2ecf20Sopenharmony_ci return false; 8798c2ecf20Sopenharmony_ci } 8808c2ecf20Sopenharmony_ci return true; 8818c2ecf20Sopenharmony_ci } else { 8828c2ecf20Sopenharmony_ci /* see comment in LRI handler in cmd_parser.c */ 8838c2ecf20Sopenharmony_ci gvt_dbg_mm("invalid shadow mm type\n"); 8848c2ecf20Sopenharmony_ci return false; 8858c2ecf20Sopenharmony_ci } 8868c2ecf20Sopenharmony_ci} 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_cistatic void update_guest_context(struct intel_vgpu_workload *workload) 8898c2ecf20Sopenharmony_ci{ 8908c2ecf20Sopenharmony_ci struct i915_request *rq = workload->req; 8918c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 8928c2ecf20Sopenharmony_ci struct execlist_ring_context *shadow_ring_context; 8938c2ecf20Sopenharmony_ci struct intel_context *ctx = workload->req->context; 8948c2ecf20Sopenharmony_ci void *context_base; 8958c2ecf20Sopenharmony_ci void *src; 8968c2ecf20Sopenharmony_ci unsigned long context_gpa, context_page_num; 8978c2ecf20Sopenharmony_ci unsigned long gpa_base; /* first gpa of consecutive GPAs */ 8988c2ecf20Sopenharmony_ci unsigned long gpa_size; /* size of consecutive GPAs*/ 8998c2ecf20Sopenharmony_ci int i; 9008c2ecf20Sopenharmony_ci u32 ring_base; 9018c2ecf20Sopenharmony_ci u32 head, tail; 9028c2ecf20Sopenharmony_ci u16 wrap_count; 9038c2ecf20Sopenharmony_ci 9048c2ecf20Sopenharmony_ci gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id, 9058c2ecf20Sopenharmony_ci workload->ctx_desc.lrca); 9068c2ecf20Sopenharmony_ci 9078c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_context_is_pinned(ctx)); 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_ci head = workload->rb_head; 9108c2ecf20Sopenharmony_ci tail = workload->rb_tail; 9118c2ecf20Sopenharmony_ci wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF; 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci if (tail < head) { 9148c2ecf20Sopenharmony_ci if (wrap_count == RB_HEAD_WRAP_CNT_MAX) 9158c2ecf20Sopenharmony_ci wrap_count = 0; 9168c2ecf20Sopenharmony_ci else 9178c2ecf20Sopenharmony_ci wrap_count += 1; 9188c2ecf20Sopenharmony_ci } 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci head = (wrap_count << RB_HEAD_WRAP_CNT_OFF) | tail; 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_ci ring_base = rq->engine->mmio_base; 9238c2ecf20Sopenharmony_ci vgpu_vreg_t(vgpu, RING_TAIL(ring_base)) = tail; 9248c2ecf20Sopenharmony_ci vgpu_vreg_t(vgpu, RING_HEAD(ring_base)) = head; 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci context_page_num = rq->engine->context_size; 9278c2ecf20Sopenharmony_ci context_page_num = context_page_num >> PAGE_SHIFT; 9288c2ecf20Sopenharmony_ci 9298c2ecf20Sopenharmony_ci if (IS_BROADWELL(rq->engine->i915) && rq->engine->id == RCS0) 9308c2ecf20Sopenharmony_ci context_page_num = 19; 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci context_base = (void *) ctx->lrc_reg_state - 9338c2ecf20Sopenharmony_ci (LRC_STATE_PN << I915_GTT_PAGE_SHIFT); 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci /* find consecutive GPAs from gma until the first inconsecutive GPA. 9368c2ecf20Sopenharmony_ci * write to the consecutive GPAs from src virtual address 9378c2ecf20Sopenharmony_ci */ 9388c2ecf20Sopenharmony_ci gpa_size = 0; 9398c2ecf20Sopenharmony_ci for (i = 2; i < context_page_num; i++) { 9408c2ecf20Sopenharmony_ci context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, 9418c2ecf20Sopenharmony_ci (u32)((workload->ctx_desc.lrca + i) << 9428c2ecf20Sopenharmony_ci I915_GTT_PAGE_SHIFT)); 9438c2ecf20Sopenharmony_ci if (context_gpa == INTEL_GVT_INVALID_ADDR) { 9448c2ecf20Sopenharmony_ci gvt_vgpu_err("invalid guest context descriptor\n"); 9458c2ecf20Sopenharmony_ci return; 9468c2ecf20Sopenharmony_ci } 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_ci if (gpa_size == 0) { 9498c2ecf20Sopenharmony_ci gpa_base = context_gpa; 9508c2ecf20Sopenharmony_ci src = context_base + (i << I915_GTT_PAGE_SHIFT); 9518c2ecf20Sopenharmony_ci } else if (context_gpa != gpa_base + gpa_size) 9528c2ecf20Sopenharmony_ci goto write; 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci gpa_size += I915_GTT_PAGE_SIZE; 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci if (i == context_page_num - 1) 9578c2ecf20Sopenharmony_ci goto write; 9588c2ecf20Sopenharmony_ci 9598c2ecf20Sopenharmony_ci continue; 9608c2ecf20Sopenharmony_ci 9618c2ecf20Sopenharmony_ciwrite: 9628c2ecf20Sopenharmony_ci intel_gvt_hypervisor_write_gpa(vgpu, gpa_base, src, gpa_size); 9638c2ecf20Sopenharmony_ci gpa_base = context_gpa; 9648c2ecf20Sopenharmony_ci gpa_size = I915_GTT_PAGE_SIZE; 9658c2ecf20Sopenharmony_ci src = context_base + (i << I915_GTT_PAGE_SHIFT); 9668c2ecf20Sopenharmony_ci } 9678c2ecf20Sopenharmony_ci 9688c2ecf20Sopenharmony_ci intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + 9698c2ecf20Sopenharmony_ci RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4); 9708c2ecf20Sopenharmony_ci 9718c2ecf20Sopenharmony_ci shadow_ring_context = (void *) ctx->lrc_reg_state; 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci if (!list_empty(&workload->lri_shadow_mm)) { 9748c2ecf20Sopenharmony_ci struct intel_vgpu_mm *m = list_last_entry(&workload->lri_shadow_mm, 9758c2ecf20Sopenharmony_ci struct intel_vgpu_mm, 9768c2ecf20Sopenharmony_ci ppgtt_mm.link); 9778c2ecf20Sopenharmony_ci GEM_BUG_ON(!check_shadow_context_ppgtt(shadow_ring_context, m)); 9788c2ecf20Sopenharmony_ci update_guest_pdps(vgpu, workload->ring_context_gpa, 9798c2ecf20Sopenharmony_ci (void *)m->ppgtt_mm.guest_pdps); 9808c2ecf20Sopenharmony_ci } 9818c2ecf20Sopenharmony_ci 9828c2ecf20Sopenharmony_ci#define COPY_REG(name) \ 9838c2ecf20Sopenharmony_ci intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \ 9848c2ecf20Sopenharmony_ci RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci COPY_REG(ctx_ctrl); 9878c2ecf20Sopenharmony_ci COPY_REG(ctx_timestamp); 9888c2ecf20Sopenharmony_ci 9898c2ecf20Sopenharmony_ci#undef COPY_REG 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci intel_gvt_hypervisor_write_gpa(vgpu, 9928c2ecf20Sopenharmony_ci workload->ring_context_gpa + 9938c2ecf20Sopenharmony_ci sizeof(*shadow_ring_context), 9948c2ecf20Sopenharmony_ci (void *)shadow_ring_context + 9958c2ecf20Sopenharmony_ci sizeof(*shadow_ring_context), 9968c2ecf20Sopenharmony_ci I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); 9978c2ecf20Sopenharmony_ci} 9988c2ecf20Sopenharmony_ci 9998c2ecf20Sopenharmony_civoid intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, 10008c2ecf20Sopenharmony_ci intel_engine_mask_t engine_mask) 10018c2ecf20Sopenharmony_ci{ 10028c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 10038c2ecf20Sopenharmony_ci struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; 10048c2ecf20Sopenharmony_ci struct intel_engine_cs *engine; 10058c2ecf20Sopenharmony_ci struct intel_vgpu_workload *pos, *n; 10068c2ecf20Sopenharmony_ci intel_engine_mask_t tmp; 10078c2ecf20Sopenharmony_ci 10088c2ecf20Sopenharmony_ci /* free the unsubmited workloads in the queues. */ 10098c2ecf20Sopenharmony_ci for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { 10108c2ecf20Sopenharmony_ci list_for_each_entry_safe(pos, n, 10118c2ecf20Sopenharmony_ci &s->workload_q_head[engine->id], list) { 10128c2ecf20Sopenharmony_ci list_del_init(&pos->list); 10138c2ecf20Sopenharmony_ci intel_vgpu_destroy_workload(pos); 10148c2ecf20Sopenharmony_ci } 10158c2ecf20Sopenharmony_ci clear_bit(engine->id, s->shadow_ctx_desc_updated); 10168c2ecf20Sopenharmony_ci } 10178c2ecf20Sopenharmony_ci} 10188c2ecf20Sopenharmony_ci 10198c2ecf20Sopenharmony_cistatic void complete_current_workload(struct intel_gvt *gvt, int ring_id) 10208c2ecf20Sopenharmony_ci{ 10218c2ecf20Sopenharmony_ci struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 10228c2ecf20Sopenharmony_ci struct intel_vgpu_workload *workload = 10238c2ecf20Sopenharmony_ci scheduler->current_workload[ring_id]; 10248c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 10258c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 10268c2ecf20Sopenharmony_ci struct i915_request *rq = workload->req; 10278c2ecf20Sopenharmony_ci int event; 10288c2ecf20Sopenharmony_ci 10298c2ecf20Sopenharmony_ci mutex_lock(&vgpu->vgpu_lock); 10308c2ecf20Sopenharmony_ci mutex_lock(&gvt->sched_lock); 10318c2ecf20Sopenharmony_ci 10328c2ecf20Sopenharmony_ci /* For the workload w/ request, needs to wait for the context 10338c2ecf20Sopenharmony_ci * switch to make sure request is completed. 10348c2ecf20Sopenharmony_ci * For the workload w/o request, directly complete the workload. 10358c2ecf20Sopenharmony_ci */ 10368c2ecf20Sopenharmony_ci if (rq) { 10378c2ecf20Sopenharmony_ci wait_event(workload->shadow_ctx_status_wq, 10388c2ecf20Sopenharmony_ci !atomic_read(&workload->shadow_ctx_active)); 10398c2ecf20Sopenharmony_ci 10408c2ecf20Sopenharmony_ci /* If this request caused GPU hang, req->fence.error will 10418c2ecf20Sopenharmony_ci * be set to -EIO. Use -EIO to set workload status so 10428c2ecf20Sopenharmony_ci * that when this request caused GPU hang, didn't trigger 10438c2ecf20Sopenharmony_ci * context switch interrupt to guest. 10448c2ecf20Sopenharmony_ci */ 10458c2ecf20Sopenharmony_ci if (likely(workload->status == -EINPROGRESS)) { 10468c2ecf20Sopenharmony_ci if (workload->req->fence.error == -EIO) 10478c2ecf20Sopenharmony_ci workload->status = -EIO; 10488c2ecf20Sopenharmony_ci else 10498c2ecf20Sopenharmony_ci workload->status = 0; 10508c2ecf20Sopenharmony_ci } 10518c2ecf20Sopenharmony_ci 10528c2ecf20Sopenharmony_ci if (!workload->status && 10538c2ecf20Sopenharmony_ci !(vgpu->resetting_eng & BIT(ring_id))) { 10548c2ecf20Sopenharmony_ci update_guest_context(workload); 10558c2ecf20Sopenharmony_ci 10568c2ecf20Sopenharmony_ci for_each_set_bit(event, workload->pending_events, 10578c2ecf20Sopenharmony_ci INTEL_GVT_EVENT_MAX) 10588c2ecf20Sopenharmony_ci intel_vgpu_trigger_virtual_event(vgpu, event); 10598c2ecf20Sopenharmony_ci } 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_ci i915_request_put(fetch_and_zero(&workload->req)); 10628c2ecf20Sopenharmony_ci } 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_ci gvt_dbg_sched("ring id %d complete workload %p status %d\n", 10658c2ecf20Sopenharmony_ci ring_id, workload, workload->status); 10668c2ecf20Sopenharmony_ci 10678c2ecf20Sopenharmony_ci scheduler->current_workload[ring_id] = NULL; 10688c2ecf20Sopenharmony_ci 10698c2ecf20Sopenharmony_ci list_del_init(&workload->list); 10708c2ecf20Sopenharmony_ci 10718c2ecf20Sopenharmony_ci if (workload->status || vgpu->resetting_eng & BIT(ring_id)) { 10728c2ecf20Sopenharmony_ci /* if workload->status is not successful means HW GPU 10738c2ecf20Sopenharmony_ci * has occurred GPU hang or something wrong with i915/GVT, 10748c2ecf20Sopenharmony_ci * and GVT won't inject context switch interrupt to guest. 10758c2ecf20Sopenharmony_ci * So this error is a vGPU hang actually to the guest. 10768c2ecf20Sopenharmony_ci * According to this we should emunlate a vGPU hang. If 10778c2ecf20Sopenharmony_ci * there are pending workloads which are already submitted 10788c2ecf20Sopenharmony_ci * from guest, we should clean them up like HW GPU does. 10798c2ecf20Sopenharmony_ci * 10808c2ecf20Sopenharmony_ci * if it is in middle of engine resetting, the pending 10818c2ecf20Sopenharmony_ci * workloads won't be submitted to HW GPU and will be 10828c2ecf20Sopenharmony_ci * cleaned up during the resetting process later, so doing 10838c2ecf20Sopenharmony_ci * the workload clean up here doesn't have any impact. 10848c2ecf20Sopenharmony_ci **/ 10858c2ecf20Sopenharmony_ci intel_vgpu_clean_workloads(vgpu, BIT(ring_id)); 10868c2ecf20Sopenharmony_ci } 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_ci workload->complete(workload); 10898c2ecf20Sopenharmony_ci 10908c2ecf20Sopenharmony_ci intel_vgpu_shadow_mm_unpin(workload); 10918c2ecf20Sopenharmony_ci intel_vgpu_destroy_workload(workload); 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci atomic_dec(&s->running_workload_num); 10948c2ecf20Sopenharmony_ci wake_up(&scheduler->workload_complete_wq); 10958c2ecf20Sopenharmony_ci 10968c2ecf20Sopenharmony_ci if (gvt->scheduler.need_reschedule) 10978c2ecf20Sopenharmony_ci intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED); 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_ci mutex_unlock(&gvt->sched_lock); 11008c2ecf20Sopenharmony_ci mutex_unlock(&vgpu->vgpu_lock); 11018c2ecf20Sopenharmony_ci} 11028c2ecf20Sopenharmony_ci 11038c2ecf20Sopenharmony_cistatic int workload_thread(void *arg) 11048c2ecf20Sopenharmony_ci{ 11058c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = arg; 11068c2ecf20Sopenharmony_ci const bool need_force_wake = INTEL_GEN(engine->i915) >= 9; 11078c2ecf20Sopenharmony_ci struct intel_gvt *gvt = engine->i915->gvt; 11088c2ecf20Sopenharmony_ci struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 11098c2ecf20Sopenharmony_ci struct intel_vgpu_workload *workload = NULL; 11108c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = NULL; 11118c2ecf20Sopenharmony_ci int ret; 11128c2ecf20Sopenharmony_ci DEFINE_WAIT_FUNC(wait, woken_wake_function); 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_ci gvt_dbg_core("workload thread for ring %s started\n", engine->name); 11158c2ecf20Sopenharmony_ci 11168c2ecf20Sopenharmony_ci while (!kthread_should_stop()) { 11178c2ecf20Sopenharmony_ci intel_wakeref_t wakeref; 11188c2ecf20Sopenharmony_ci 11198c2ecf20Sopenharmony_ci add_wait_queue(&scheduler->waitq[engine->id], &wait); 11208c2ecf20Sopenharmony_ci do { 11218c2ecf20Sopenharmony_ci workload = pick_next_workload(gvt, engine); 11228c2ecf20Sopenharmony_ci if (workload) 11238c2ecf20Sopenharmony_ci break; 11248c2ecf20Sopenharmony_ci wait_woken(&wait, TASK_INTERRUPTIBLE, 11258c2ecf20Sopenharmony_ci MAX_SCHEDULE_TIMEOUT); 11268c2ecf20Sopenharmony_ci } while (!kthread_should_stop()); 11278c2ecf20Sopenharmony_ci remove_wait_queue(&scheduler->waitq[engine->id], &wait); 11288c2ecf20Sopenharmony_ci 11298c2ecf20Sopenharmony_ci if (!workload) 11308c2ecf20Sopenharmony_ci break; 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci gvt_dbg_sched("ring %s next workload %p vgpu %d\n", 11338c2ecf20Sopenharmony_ci engine->name, workload, 11348c2ecf20Sopenharmony_ci workload->vgpu->id); 11358c2ecf20Sopenharmony_ci 11368c2ecf20Sopenharmony_ci wakeref = intel_runtime_pm_get(engine->uncore->rpm); 11378c2ecf20Sopenharmony_ci 11388c2ecf20Sopenharmony_ci gvt_dbg_sched("ring %s will dispatch workload %p\n", 11398c2ecf20Sopenharmony_ci engine->name, workload); 11408c2ecf20Sopenharmony_ci 11418c2ecf20Sopenharmony_ci if (need_force_wake) 11428c2ecf20Sopenharmony_ci intel_uncore_forcewake_get(engine->uncore, 11438c2ecf20Sopenharmony_ci FORCEWAKE_ALL); 11448c2ecf20Sopenharmony_ci /* 11458c2ecf20Sopenharmony_ci * Update the vReg of the vGPU which submitted this 11468c2ecf20Sopenharmony_ci * workload. The vGPU may use these registers for checking 11478c2ecf20Sopenharmony_ci * the context state. The value comes from GPU commands 11488c2ecf20Sopenharmony_ci * in this workload. 11498c2ecf20Sopenharmony_ci */ 11508c2ecf20Sopenharmony_ci update_vreg_in_ctx(workload); 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_ci ret = dispatch_workload(workload); 11538c2ecf20Sopenharmony_ci 11548c2ecf20Sopenharmony_ci if (ret) { 11558c2ecf20Sopenharmony_ci vgpu = workload->vgpu; 11568c2ecf20Sopenharmony_ci gvt_vgpu_err("fail to dispatch workload, skip\n"); 11578c2ecf20Sopenharmony_ci goto complete; 11588c2ecf20Sopenharmony_ci } 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_ci gvt_dbg_sched("ring %s wait workload %p\n", 11618c2ecf20Sopenharmony_ci engine->name, workload); 11628c2ecf20Sopenharmony_ci i915_request_wait(workload->req, 0, MAX_SCHEDULE_TIMEOUT); 11638c2ecf20Sopenharmony_ci 11648c2ecf20Sopenharmony_cicomplete: 11658c2ecf20Sopenharmony_ci gvt_dbg_sched("will complete workload %p, status: %d\n", 11668c2ecf20Sopenharmony_ci workload, workload->status); 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_ci complete_current_workload(gvt, engine->id); 11698c2ecf20Sopenharmony_ci 11708c2ecf20Sopenharmony_ci if (need_force_wake) 11718c2ecf20Sopenharmony_ci intel_uncore_forcewake_put(engine->uncore, 11728c2ecf20Sopenharmony_ci FORCEWAKE_ALL); 11738c2ecf20Sopenharmony_ci 11748c2ecf20Sopenharmony_ci intel_runtime_pm_put(engine->uncore->rpm, wakeref); 11758c2ecf20Sopenharmony_ci if (ret && (vgpu_is_vm_unhealthy(ret))) 11768c2ecf20Sopenharmony_ci enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); 11778c2ecf20Sopenharmony_ci } 11788c2ecf20Sopenharmony_ci return 0; 11798c2ecf20Sopenharmony_ci} 11808c2ecf20Sopenharmony_ci 11818c2ecf20Sopenharmony_civoid intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu) 11828c2ecf20Sopenharmony_ci{ 11838c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 11848c2ecf20Sopenharmony_ci struct intel_gvt *gvt = vgpu->gvt; 11858c2ecf20Sopenharmony_ci struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 11868c2ecf20Sopenharmony_ci 11878c2ecf20Sopenharmony_ci if (atomic_read(&s->running_workload_num)) { 11888c2ecf20Sopenharmony_ci gvt_dbg_sched("wait vgpu idle\n"); 11898c2ecf20Sopenharmony_ci 11908c2ecf20Sopenharmony_ci wait_event(scheduler->workload_complete_wq, 11918c2ecf20Sopenharmony_ci !atomic_read(&s->running_workload_num)); 11928c2ecf20Sopenharmony_ci } 11938c2ecf20Sopenharmony_ci} 11948c2ecf20Sopenharmony_ci 11958c2ecf20Sopenharmony_civoid intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt) 11968c2ecf20Sopenharmony_ci{ 11978c2ecf20Sopenharmony_ci struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 11988c2ecf20Sopenharmony_ci struct intel_engine_cs *engine; 11998c2ecf20Sopenharmony_ci enum intel_engine_id i; 12008c2ecf20Sopenharmony_ci 12018c2ecf20Sopenharmony_ci gvt_dbg_core("clean workload scheduler\n"); 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_ci for_each_engine(engine, gvt->gt, i) { 12048c2ecf20Sopenharmony_ci atomic_notifier_chain_unregister( 12058c2ecf20Sopenharmony_ci &engine->context_status_notifier, 12068c2ecf20Sopenharmony_ci &gvt->shadow_ctx_notifier_block[i]); 12078c2ecf20Sopenharmony_ci kthread_stop(scheduler->thread[i]); 12088c2ecf20Sopenharmony_ci } 12098c2ecf20Sopenharmony_ci} 12108c2ecf20Sopenharmony_ci 12118c2ecf20Sopenharmony_ciint intel_gvt_init_workload_scheduler(struct intel_gvt *gvt) 12128c2ecf20Sopenharmony_ci{ 12138c2ecf20Sopenharmony_ci struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; 12148c2ecf20Sopenharmony_ci struct intel_engine_cs *engine; 12158c2ecf20Sopenharmony_ci enum intel_engine_id i; 12168c2ecf20Sopenharmony_ci int ret; 12178c2ecf20Sopenharmony_ci 12188c2ecf20Sopenharmony_ci gvt_dbg_core("init workload scheduler\n"); 12198c2ecf20Sopenharmony_ci 12208c2ecf20Sopenharmony_ci init_waitqueue_head(&scheduler->workload_complete_wq); 12218c2ecf20Sopenharmony_ci 12228c2ecf20Sopenharmony_ci for_each_engine(engine, gvt->gt, i) { 12238c2ecf20Sopenharmony_ci init_waitqueue_head(&scheduler->waitq[i]); 12248c2ecf20Sopenharmony_ci 12258c2ecf20Sopenharmony_ci scheduler->thread[i] = kthread_run(workload_thread, engine, 12268c2ecf20Sopenharmony_ci "gvt:%s", engine->name); 12278c2ecf20Sopenharmony_ci if (IS_ERR(scheduler->thread[i])) { 12288c2ecf20Sopenharmony_ci gvt_err("fail to create workload thread\n"); 12298c2ecf20Sopenharmony_ci ret = PTR_ERR(scheduler->thread[i]); 12308c2ecf20Sopenharmony_ci goto err; 12318c2ecf20Sopenharmony_ci } 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_ci gvt->shadow_ctx_notifier_block[i].notifier_call = 12348c2ecf20Sopenharmony_ci shadow_context_status_change; 12358c2ecf20Sopenharmony_ci atomic_notifier_chain_register(&engine->context_status_notifier, 12368c2ecf20Sopenharmony_ci &gvt->shadow_ctx_notifier_block[i]); 12378c2ecf20Sopenharmony_ci } 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_ci return 0; 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_cierr: 12428c2ecf20Sopenharmony_ci intel_gvt_clean_workload_scheduler(gvt); 12438c2ecf20Sopenharmony_ci return ret; 12448c2ecf20Sopenharmony_ci} 12458c2ecf20Sopenharmony_ci 12468c2ecf20Sopenharmony_cistatic void 12478c2ecf20Sopenharmony_cii915_context_ppgtt_root_restore(struct intel_vgpu_submission *s, 12488c2ecf20Sopenharmony_ci struct i915_ppgtt *ppgtt) 12498c2ecf20Sopenharmony_ci{ 12508c2ecf20Sopenharmony_ci int i; 12518c2ecf20Sopenharmony_ci 12528c2ecf20Sopenharmony_ci if (i915_vm_is_4lvl(&ppgtt->vm)) { 12538c2ecf20Sopenharmony_ci set_dma_address(ppgtt->pd, s->i915_context_pml4); 12548c2ecf20Sopenharmony_ci } else { 12558c2ecf20Sopenharmony_ci for (i = 0; i < GEN8_3LVL_PDPES; i++) { 12568c2ecf20Sopenharmony_ci struct i915_page_directory * const pd = 12578c2ecf20Sopenharmony_ci i915_pd_entry(ppgtt->pd, i); 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ci set_dma_address(pd, s->i915_context_pdps[i]); 12608c2ecf20Sopenharmony_ci } 12618c2ecf20Sopenharmony_ci } 12628c2ecf20Sopenharmony_ci} 12638c2ecf20Sopenharmony_ci 12648c2ecf20Sopenharmony_ci/** 12658c2ecf20Sopenharmony_ci * intel_vgpu_clean_submission - free submission-related resource for vGPU 12668c2ecf20Sopenharmony_ci * @vgpu: a vGPU 12678c2ecf20Sopenharmony_ci * 12688c2ecf20Sopenharmony_ci * This function is called when a vGPU is being destroyed. 12698c2ecf20Sopenharmony_ci * 12708c2ecf20Sopenharmony_ci */ 12718c2ecf20Sopenharmony_civoid intel_vgpu_clean_submission(struct intel_vgpu *vgpu) 12728c2ecf20Sopenharmony_ci{ 12738c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 12748c2ecf20Sopenharmony_ci struct intel_engine_cs *engine; 12758c2ecf20Sopenharmony_ci enum intel_engine_id id; 12768c2ecf20Sopenharmony_ci 12778c2ecf20Sopenharmony_ci intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); 12788c2ecf20Sopenharmony_ci 12798c2ecf20Sopenharmony_ci i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm)); 12808c2ecf20Sopenharmony_ci for_each_engine(engine, vgpu->gvt->gt, id) 12818c2ecf20Sopenharmony_ci intel_context_put(s->shadow[id]); 12828c2ecf20Sopenharmony_ci 12838c2ecf20Sopenharmony_ci kmem_cache_destroy(s->workloads); 12848c2ecf20Sopenharmony_ci} 12858c2ecf20Sopenharmony_ci 12868c2ecf20Sopenharmony_ci 12878c2ecf20Sopenharmony_ci/** 12888c2ecf20Sopenharmony_ci * intel_vgpu_reset_submission - reset submission-related resource for vGPU 12898c2ecf20Sopenharmony_ci * @vgpu: a vGPU 12908c2ecf20Sopenharmony_ci * @engine_mask: engines expected to be reset 12918c2ecf20Sopenharmony_ci * 12928c2ecf20Sopenharmony_ci * This function is called when a vGPU is being destroyed. 12938c2ecf20Sopenharmony_ci * 12948c2ecf20Sopenharmony_ci */ 12958c2ecf20Sopenharmony_civoid intel_vgpu_reset_submission(struct intel_vgpu *vgpu, 12968c2ecf20Sopenharmony_ci intel_engine_mask_t engine_mask) 12978c2ecf20Sopenharmony_ci{ 12988c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_ci if (!s->active) 13018c2ecf20Sopenharmony_ci return; 13028c2ecf20Sopenharmony_ci 13038c2ecf20Sopenharmony_ci intel_vgpu_clean_workloads(vgpu, engine_mask); 13048c2ecf20Sopenharmony_ci s->ops->reset(vgpu, engine_mask); 13058c2ecf20Sopenharmony_ci} 13068c2ecf20Sopenharmony_ci 13078c2ecf20Sopenharmony_cistatic void 13088c2ecf20Sopenharmony_cii915_context_ppgtt_root_save(struct intel_vgpu_submission *s, 13098c2ecf20Sopenharmony_ci struct i915_ppgtt *ppgtt) 13108c2ecf20Sopenharmony_ci{ 13118c2ecf20Sopenharmony_ci int i; 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_ci if (i915_vm_is_4lvl(&ppgtt->vm)) { 13148c2ecf20Sopenharmony_ci s->i915_context_pml4 = px_dma(ppgtt->pd); 13158c2ecf20Sopenharmony_ci } else { 13168c2ecf20Sopenharmony_ci for (i = 0; i < GEN8_3LVL_PDPES; i++) { 13178c2ecf20Sopenharmony_ci struct i915_page_directory * const pd = 13188c2ecf20Sopenharmony_ci i915_pd_entry(ppgtt->pd, i); 13198c2ecf20Sopenharmony_ci 13208c2ecf20Sopenharmony_ci s->i915_context_pdps[i] = px_dma(pd); 13218c2ecf20Sopenharmony_ci } 13228c2ecf20Sopenharmony_ci } 13238c2ecf20Sopenharmony_ci} 13248c2ecf20Sopenharmony_ci 13258c2ecf20Sopenharmony_ci/** 13268c2ecf20Sopenharmony_ci * intel_vgpu_setup_submission - setup submission-related resource for vGPU 13278c2ecf20Sopenharmony_ci * @vgpu: a vGPU 13288c2ecf20Sopenharmony_ci * 13298c2ecf20Sopenharmony_ci * This function is called when a vGPU is being created. 13308c2ecf20Sopenharmony_ci * 13318c2ecf20Sopenharmony_ci * Returns: 13328c2ecf20Sopenharmony_ci * Zero on success, negative error code if failed. 13338c2ecf20Sopenharmony_ci * 13348c2ecf20Sopenharmony_ci */ 13358c2ecf20Sopenharmony_ciint intel_vgpu_setup_submission(struct intel_vgpu *vgpu) 13368c2ecf20Sopenharmony_ci{ 13378c2ecf20Sopenharmony_ci struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 13388c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 13398c2ecf20Sopenharmony_ci struct intel_engine_cs *engine; 13408c2ecf20Sopenharmony_ci struct i915_ppgtt *ppgtt; 13418c2ecf20Sopenharmony_ci enum intel_engine_id i; 13428c2ecf20Sopenharmony_ci int ret; 13438c2ecf20Sopenharmony_ci 13448c2ecf20Sopenharmony_ci ppgtt = i915_ppgtt_create(&i915->gt); 13458c2ecf20Sopenharmony_ci if (IS_ERR(ppgtt)) 13468c2ecf20Sopenharmony_ci return PTR_ERR(ppgtt); 13478c2ecf20Sopenharmony_ci 13488c2ecf20Sopenharmony_ci i915_context_ppgtt_root_save(s, ppgtt); 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci for_each_engine(engine, vgpu->gvt->gt, i) { 13518c2ecf20Sopenharmony_ci struct intel_context *ce; 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&s->workload_q_head[i]); 13548c2ecf20Sopenharmony_ci s->shadow[i] = ERR_PTR(-EINVAL); 13558c2ecf20Sopenharmony_ci 13568c2ecf20Sopenharmony_ci ce = intel_context_create(engine); 13578c2ecf20Sopenharmony_ci if (IS_ERR(ce)) { 13588c2ecf20Sopenharmony_ci ret = PTR_ERR(ce); 13598c2ecf20Sopenharmony_ci goto out_shadow_ctx; 13608c2ecf20Sopenharmony_ci } 13618c2ecf20Sopenharmony_ci 13628c2ecf20Sopenharmony_ci i915_vm_put(ce->vm); 13638c2ecf20Sopenharmony_ci ce->vm = i915_vm_get(&ppgtt->vm); 13648c2ecf20Sopenharmony_ci intel_context_set_single_submission(ce); 13658c2ecf20Sopenharmony_ci 13668c2ecf20Sopenharmony_ci /* Max ring buffer size */ 13678c2ecf20Sopenharmony_ci if (!intel_uc_wants_guc_submission(&engine->gt->uc)) { 13688c2ecf20Sopenharmony_ci const unsigned int ring_size = 512 * SZ_4K; 13698c2ecf20Sopenharmony_ci 13708c2ecf20Sopenharmony_ci ce->ring = __intel_context_ring_size(ring_size); 13718c2ecf20Sopenharmony_ci } 13728c2ecf20Sopenharmony_ci 13738c2ecf20Sopenharmony_ci s->shadow[i] = ce; 13748c2ecf20Sopenharmony_ci } 13758c2ecf20Sopenharmony_ci 13768c2ecf20Sopenharmony_ci bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_ci s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload", 13798c2ecf20Sopenharmony_ci sizeof(struct intel_vgpu_workload), 0, 13808c2ecf20Sopenharmony_ci SLAB_HWCACHE_ALIGN, 13818c2ecf20Sopenharmony_ci offsetof(struct intel_vgpu_workload, rb_tail), 13828c2ecf20Sopenharmony_ci sizeof_field(struct intel_vgpu_workload, rb_tail), 13838c2ecf20Sopenharmony_ci NULL); 13848c2ecf20Sopenharmony_ci 13858c2ecf20Sopenharmony_ci if (!s->workloads) { 13868c2ecf20Sopenharmony_ci ret = -ENOMEM; 13878c2ecf20Sopenharmony_ci goto out_shadow_ctx; 13888c2ecf20Sopenharmony_ci } 13898c2ecf20Sopenharmony_ci 13908c2ecf20Sopenharmony_ci atomic_set(&s->running_workload_num, 0); 13918c2ecf20Sopenharmony_ci bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); 13928c2ecf20Sopenharmony_ci 13938c2ecf20Sopenharmony_ci memset(s->last_ctx, 0, sizeof(s->last_ctx)); 13948c2ecf20Sopenharmony_ci 13958c2ecf20Sopenharmony_ci i915_vm_put(&ppgtt->vm); 13968c2ecf20Sopenharmony_ci return 0; 13978c2ecf20Sopenharmony_ci 13988c2ecf20Sopenharmony_ciout_shadow_ctx: 13998c2ecf20Sopenharmony_ci i915_context_ppgtt_root_restore(s, ppgtt); 14008c2ecf20Sopenharmony_ci for_each_engine(engine, vgpu->gvt->gt, i) { 14018c2ecf20Sopenharmony_ci if (IS_ERR(s->shadow[i])) 14028c2ecf20Sopenharmony_ci break; 14038c2ecf20Sopenharmony_ci 14048c2ecf20Sopenharmony_ci intel_context_put(s->shadow[i]); 14058c2ecf20Sopenharmony_ci } 14068c2ecf20Sopenharmony_ci i915_vm_put(&ppgtt->vm); 14078c2ecf20Sopenharmony_ci return ret; 14088c2ecf20Sopenharmony_ci} 14098c2ecf20Sopenharmony_ci 14108c2ecf20Sopenharmony_ci/** 14118c2ecf20Sopenharmony_ci * intel_vgpu_select_submission_ops - select virtual submission interface 14128c2ecf20Sopenharmony_ci * @vgpu: a vGPU 14138c2ecf20Sopenharmony_ci * @engine_mask: either ALL_ENGINES or target engine mask 14148c2ecf20Sopenharmony_ci * @interface: expected vGPU virtual submission interface 14158c2ecf20Sopenharmony_ci * 14168c2ecf20Sopenharmony_ci * This function is called when guest configures submission interface. 14178c2ecf20Sopenharmony_ci * 14188c2ecf20Sopenharmony_ci * Returns: 14198c2ecf20Sopenharmony_ci * Zero on success, negative error code if failed. 14208c2ecf20Sopenharmony_ci * 14218c2ecf20Sopenharmony_ci */ 14228c2ecf20Sopenharmony_ciint intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, 14238c2ecf20Sopenharmony_ci intel_engine_mask_t engine_mask, 14248c2ecf20Sopenharmony_ci unsigned int interface) 14258c2ecf20Sopenharmony_ci{ 14268c2ecf20Sopenharmony_ci struct drm_i915_private *i915 = vgpu->gvt->gt->i915; 14278c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 14288c2ecf20Sopenharmony_ci const struct intel_vgpu_submission_ops *ops[] = { 14298c2ecf20Sopenharmony_ci [INTEL_VGPU_EXECLIST_SUBMISSION] = 14308c2ecf20Sopenharmony_ci &intel_vgpu_execlist_submission_ops, 14318c2ecf20Sopenharmony_ci }; 14328c2ecf20Sopenharmony_ci int ret; 14338c2ecf20Sopenharmony_ci 14348c2ecf20Sopenharmony_ci if (drm_WARN_ON(&i915->drm, interface >= ARRAY_SIZE(ops))) 14358c2ecf20Sopenharmony_ci return -EINVAL; 14368c2ecf20Sopenharmony_ci 14378c2ecf20Sopenharmony_ci if (drm_WARN_ON(&i915->drm, 14388c2ecf20Sopenharmony_ci interface == 0 && engine_mask != ALL_ENGINES)) 14398c2ecf20Sopenharmony_ci return -EINVAL; 14408c2ecf20Sopenharmony_ci 14418c2ecf20Sopenharmony_ci if (s->active) 14428c2ecf20Sopenharmony_ci s->ops->clean(vgpu, engine_mask); 14438c2ecf20Sopenharmony_ci 14448c2ecf20Sopenharmony_ci if (interface == 0) { 14458c2ecf20Sopenharmony_ci s->ops = NULL; 14468c2ecf20Sopenharmony_ci s->virtual_submission_interface = 0; 14478c2ecf20Sopenharmony_ci s->active = false; 14488c2ecf20Sopenharmony_ci gvt_dbg_core("vgpu%d: remove submission ops\n", vgpu->id); 14498c2ecf20Sopenharmony_ci return 0; 14508c2ecf20Sopenharmony_ci } 14518c2ecf20Sopenharmony_ci 14528c2ecf20Sopenharmony_ci ret = ops[interface]->init(vgpu, engine_mask); 14538c2ecf20Sopenharmony_ci if (ret) 14548c2ecf20Sopenharmony_ci return ret; 14558c2ecf20Sopenharmony_ci 14568c2ecf20Sopenharmony_ci s->ops = ops[interface]; 14578c2ecf20Sopenharmony_ci s->virtual_submission_interface = interface; 14588c2ecf20Sopenharmony_ci s->active = true; 14598c2ecf20Sopenharmony_ci 14608c2ecf20Sopenharmony_ci gvt_dbg_core("vgpu%d: activate ops [ %s ]\n", 14618c2ecf20Sopenharmony_ci vgpu->id, s->ops->name); 14628c2ecf20Sopenharmony_ci 14638c2ecf20Sopenharmony_ci return 0; 14648c2ecf20Sopenharmony_ci} 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_ci/** 14678c2ecf20Sopenharmony_ci * intel_vgpu_destroy_workload - destroy a vGPU workload 14688c2ecf20Sopenharmony_ci * @workload: workload to destroy 14698c2ecf20Sopenharmony_ci * 14708c2ecf20Sopenharmony_ci * This function is called when destroy a vGPU workload. 14718c2ecf20Sopenharmony_ci * 14728c2ecf20Sopenharmony_ci */ 14738c2ecf20Sopenharmony_civoid intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) 14748c2ecf20Sopenharmony_ci{ 14758c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &workload->vgpu->submission; 14768c2ecf20Sopenharmony_ci 14778c2ecf20Sopenharmony_ci intel_context_unpin(s->shadow[workload->engine->id]); 14788c2ecf20Sopenharmony_ci release_shadow_batch_buffer(workload); 14798c2ecf20Sopenharmony_ci release_shadow_wa_ctx(&workload->wa_ctx); 14808c2ecf20Sopenharmony_ci 14818c2ecf20Sopenharmony_ci if (!list_empty(&workload->lri_shadow_mm)) { 14828c2ecf20Sopenharmony_ci struct intel_vgpu_mm *m, *mm; 14838c2ecf20Sopenharmony_ci list_for_each_entry_safe(m, mm, &workload->lri_shadow_mm, 14848c2ecf20Sopenharmony_ci ppgtt_mm.link) { 14858c2ecf20Sopenharmony_ci list_del(&m->ppgtt_mm.link); 14868c2ecf20Sopenharmony_ci intel_vgpu_mm_put(m); 14878c2ecf20Sopenharmony_ci } 14888c2ecf20Sopenharmony_ci } 14898c2ecf20Sopenharmony_ci 14908c2ecf20Sopenharmony_ci GEM_BUG_ON(!list_empty(&workload->lri_shadow_mm)); 14918c2ecf20Sopenharmony_ci if (workload->shadow_mm) 14928c2ecf20Sopenharmony_ci intel_vgpu_mm_put(workload->shadow_mm); 14938c2ecf20Sopenharmony_ci 14948c2ecf20Sopenharmony_ci kmem_cache_free(s->workloads, workload); 14958c2ecf20Sopenharmony_ci} 14968c2ecf20Sopenharmony_ci 14978c2ecf20Sopenharmony_cistatic struct intel_vgpu_workload * 14988c2ecf20Sopenharmony_cialloc_workload(struct intel_vgpu *vgpu) 14998c2ecf20Sopenharmony_ci{ 15008c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 15018c2ecf20Sopenharmony_ci struct intel_vgpu_workload *workload; 15028c2ecf20Sopenharmony_ci 15038c2ecf20Sopenharmony_ci workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL); 15048c2ecf20Sopenharmony_ci if (!workload) 15058c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&workload->list); 15088c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&workload->shadow_bb); 15098c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&workload->lri_shadow_mm); 15108c2ecf20Sopenharmony_ci 15118c2ecf20Sopenharmony_ci init_waitqueue_head(&workload->shadow_ctx_status_wq); 15128c2ecf20Sopenharmony_ci atomic_set(&workload->shadow_ctx_active, 0); 15138c2ecf20Sopenharmony_ci 15148c2ecf20Sopenharmony_ci workload->status = -EINPROGRESS; 15158c2ecf20Sopenharmony_ci workload->vgpu = vgpu; 15168c2ecf20Sopenharmony_ci 15178c2ecf20Sopenharmony_ci return workload; 15188c2ecf20Sopenharmony_ci} 15198c2ecf20Sopenharmony_ci 15208c2ecf20Sopenharmony_ci#define RING_CTX_OFF(x) \ 15218c2ecf20Sopenharmony_ci offsetof(struct execlist_ring_context, x) 15228c2ecf20Sopenharmony_ci 15238c2ecf20Sopenharmony_cistatic void read_guest_pdps(struct intel_vgpu *vgpu, 15248c2ecf20Sopenharmony_ci u64 ring_context_gpa, u32 pdp[8]) 15258c2ecf20Sopenharmony_ci{ 15268c2ecf20Sopenharmony_ci u64 gpa; 15278c2ecf20Sopenharmony_ci int i; 15288c2ecf20Sopenharmony_ci 15298c2ecf20Sopenharmony_ci gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val); 15308c2ecf20Sopenharmony_ci 15318c2ecf20Sopenharmony_ci for (i = 0; i < 8; i++) 15328c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, 15338c2ecf20Sopenharmony_ci gpa + i * 8, &pdp[7 - i], 4); 15348c2ecf20Sopenharmony_ci} 15358c2ecf20Sopenharmony_ci 15368c2ecf20Sopenharmony_cistatic int prepare_mm(struct intel_vgpu_workload *workload) 15378c2ecf20Sopenharmony_ci{ 15388c2ecf20Sopenharmony_ci struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; 15398c2ecf20Sopenharmony_ci struct intel_vgpu_mm *mm; 15408c2ecf20Sopenharmony_ci struct intel_vgpu *vgpu = workload->vgpu; 15418c2ecf20Sopenharmony_ci enum intel_gvt_gtt_type root_entry_type; 15428c2ecf20Sopenharmony_ci u64 pdps[GVT_RING_CTX_NR_PDPS]; 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_ci switch (desc->addressing_mode) { 15458c2ecf20Sopenharmony_ci case 1: /* legacy 32-bit */ 15468c2ecf20Sopenharmony_ci root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; 15478c2ecf20Sopenharmony_ci break; 15488c2ecf20Sopenharmony_ci case 3: /* legacy 64-bit */ 15498c2ecf20Sopenharmony_ci root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; 15508c2ecf20Sopenharmony_ci break; 15518c2ecf20Sopenharmony_ci default: 15528c2ecf20Sopenharmony_ci gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); 15538c2ecf20Sopenharmony_ci return -EINVAL; 15548c2ecf20Sopenharmony_ci } 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_ci read_guest_pdps(workload->vgpu, workload->ring_context_gpa, (void *)pdps); 15578c2ecf20Sopenharmony_ci 15588c2ecf20Sopenharmony_ci mm = intel_vgpu_get_ppgtt_mm(workload->vgpu, root_entry_type, pdps); 15598c2ecf20Sopenharmony_ci if (IS_ERR(mm)) 15608c2ecf20Sopenharmony_ci return PTR_ERR(mm); 15618c2ecf20Sopenharmony_ci 15628c2ecf20Sopenharmony_ci workload->shadow_mm = mm; 15638c2ecf20Sopenharmony_ci return 0; 15648c2ecf20Sopenharmony_ci} 15658c2ecf20Sopenharmony_ci 15668c2ecf20Sopenharmony_ci#define same_context(a, b) (((a)->context_id == (b)->context_id) && \ 15678c2ecf20Sopenharmony_ci ((a)->lrca == (b)->lrca)) 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_ci/** 15708c2ecf20Sopenharmony_ci * intel_vgpu_create_workload - create a vGPU workload 15718c2ecf20Sopenharmony_ci * @vgpu: a vGPU 15728c2ecf20Sopenharmony_ci * @engine: the engine 15738c2ecf20Sopenharmony_ci * @desc: a guest context descriptor 15748c2ecf20Sopenharmony_ci * 15758c2ecf20Sopenharmony_ci * This function is called when creating a vGPU workload. 15768c2ecf20Sopenharmony_ci * 15778c2ecf20Sopenharmony_ci * Returns: 15788c2ecf20Sopenharmony_ci * struct intel_vgpu_workload * on success, negative error code in 15798c2ecf20Sopenharmony_ci * pointer if failed. 15808c2ecf20Sopenharmony_ci * 15818c2ecf20Sopenharmony_ci */ 15828c2ecf20Sopenharmony_cistruct intel_vgpu_workload * 15838c2ecf20Sopenharmony_ciintel_vgpu_create_workload(struct intel_vgpu *vgpu, 15848c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine, 15858c2ecf20Sopenharmony_ci struct execlist_ctx_descriptor_format *desc) 15868c2ecf20Sopenharmony_ci{ 15878c2ecf20Sopenharmony_ci struct intel_vgpu_submission *s = &vgpu->submission; 15888c2ecf20Sopenharmony_ci struct list_head *q = workload_q_head(vgpu, engine); 15898c2ecf20Sopenharmony_ci struct intel_vgpu_workload *last_workload = NULL; 15908c2ecf20Sopenharmony_ci struct intel_vgpu_workload *workload = NULL; 15918c2ecf20Sopenharmony_ci u64 ring_context_gpa; 15928c2ecf20Sopenharmony_ci u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; 15938c2ecf20Sopenharmony_ci u32 guest_head; 15948c2ecf20Sopenharmony_ci int ret; 15958c2ecf20Sopenharmony_ci 15968c2ecf20Sopenharmony_ci ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, 15978c2ecf20Sopenharmony_ci (u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT)); 15988c2ecf20Sopenharmony_ci if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { 15998c2ecf20Sopenharmony_ci gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); 16008c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 16018c2ecf20Sopenharmony_ci } 16028c2ecf20Sopenharmony_ci 16038c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + 16048c2ecf20Sopenharmony_ci RING_CTX_OFF(ring_header.val), &head, 4); 16058c2ecf20Sopenharmony_ci 16068c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + 16078c2ecf20Sopenharmony_ci RING_CTX_OFF(ring_tail.val), &tail, 4); 16088c2ecf20Sopenharmony_ci 16098c2ecf20Sopenharmony_ci guest_head = head; 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci head &= RB_HEAD_OFF_MASK; 16128c2ecf20Sopenharmony_ci tail &= RB_TAIL_OFF_MASK; 16138c2ecf20Sopenharmony_ci 16148c2ecf20Sopenharmony_ci list_for_each_entry_reverse(last_workload, q, list) { 16158c2ecf20Sopenharmony_ci 16168c2ecf20Sopenharmony_ci if (same_context(&last_workload->ctx_desc, desc)) { 16178c2ecf20Sopenharmony_ci gvt_dbg_el("ring %s cur workload == last\n", 16188c2ecf20Sopenharmony_ci engine->name); 16198c2ecf20Sopenharmony_ci gvt_dbg_el("ctx head %x real head %lx\n", head, 16208c2ecf20Sopenharmony_ci last_workload->rb_tail); 16218c2ecf20Sopenharmony_ci /* 16228c2ecf20Sopenharmony_ci * cannot use guest context head pointer here, 16238c2ecf20Sopenharmony_ci * as it might not be updated at this time 16248c2ecf20Sopenharmony_ci */ 16258c2ecf20Sopenharmony_ci head = last_workload->rb_tail; 16268c2ecf20Sopenharmony_ci break; 16278c2ecf20Sopenharmony_ci } 16288c2ecf20Sopenharmony_ci } 16298c2ecf20Sopenharmony_ci 16308c2ecf20Sopenharmony_ci gvt_dbg_el("ring %s begin a new workload\n", engine->name); 16318c2ecf20Sopenharmony_ci 16328c2ecf20Sopenharmony_ci /* record some ring buffer register values for scan and shadow */ 16338c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + 16348c2ecf20Sopenharmony_ci RING_CTX_OFF(rb_start.val), &start, 4); 16358c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + 16368c2ecf20Sopenharmony_ci RING_CTX_OFF(rb_ctrl.val), &ctl, 4); 16378c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + 16388c2ecf20Sopenharmony_ci RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); 16398c2ecf20Sopenharmony_ci 16408c2ecf20Sopenharmony_ci if (!intel_gvt_ggtt_validate_range(vgpu, start, 16418c2ecf20Sopenharmony_ci _RING_CTL_BUF_SIZE(ctl))) { 16428c2ecf20Sopenharmony_ci gvt_vgpu_err("context contain invalid rb at: 0x%x\n", start); 16438c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 16448c2ecf20Sopenharmony_ci } 16458c2ecf20Sopenharmony_ci 16468c2ecf20Sopenharmony_ci workload = alloc_workload(vgpu); 16478c2ecf20Sopenharmony_ci if (IS_ERR(workload)) 16488c2ecf20Sopenharmony_ci return workload; 16498c2ecf20Sopenharmony_ci 16508c2ecf20Sopenharmony_ci workload->engine = engine; 16518c2ecf20Sopenharmony_ci workload->ctx_desc = *desc; 16528c2ecf20Sopenharmony_ci workload->ring_context_gpa = ring_context_gpa; 16538c2ecf20Sopenharmony_ci workload->rb_head = head; 16548c2ecf20Sopenharmony_ci workload->guest_rb_head = guest_head; 16558c2ecf20Sopenharmony_ci workload->rb_tail = tail; 16568c2ecf20Sopenharmony_ci workload->rb_start = start; 16578c2ecf20Sopenharmony_ci workload->rb_ctl = ctl; 16588c2ecf20Sopenharmony_ci 16598c2ecf20Sopenharmony_ci if (engine->id == RCS0) { 16608c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + 16618c2ecf20Sopenharmony_ci RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); 16628c2ecf20Sopenharmony_ci intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + 16638c2ecf20Sopenharmony_ci RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4); 16648c2ecf20Sopenharmony_ci 16658c2ecf20Sopenharmony_ci workload->wa_ctx.indirect_ctx.guest_gma = 16668c2ecf20Sopenharmony_ci indirect_ctx & INDIRECT_CTX_ADDR_MASK; 16678c2ecf20Sopenharmony_ci workload->wa_ctx.indirect_ctx.size = 16688c2ecf20Sopenharmony_ci (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * 16698c2ecf20Sopenharmony_ci CACHELINE_BYTES; 16708c2ecf20Sopenharmony_ci 16718c2ecf20Sopenharmony_ci if (workload->wa_ctx.indirect_ctx.size != 0) { 16728c2ecf20Sopenharmony_ci if (!intel_gvt_ggtt_validate_range(vgpu, 16738c2ecf20Sopenharmony_ci workload->wa_ctx.indirect_ctx.guest_gma, 16748c2ecf20Sopenharmony_ci workload->wa_ctx.indirect_ctx.size)) { 16758c2ecf20Sopenharmony_ci gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n", 16768c2ecf20Sopenharmony_ci workload->wa_ctx.indirect_ctx.guest_gma); 16778c2ecf20Sopenharmony_ci kmem_cache_free(s->workloads, workload); 16788c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 16798c2ecf20Sopenharmony_ci } 16808c2ecf20Sopenharmony_ci } 16818c2ecf20Sopenharmony_ci 16828c2ecf20Sopenharmony_ci workload->wa_ctx.per_ctx.guest_gma = 16838c2ecf20Sopenharmony_ci per_ctx & PER_CTX_ADDR_MASK; 16848c2ecf20Sopenharmony_ci workload->wa_ctx.per_ctx.valid = per_ctx & 1; 16858c2ecf20Sopenharmony_ci if (workload->wa_ctx.per_ctx.valid) { 16868c2ecf20Sopenharmony_ci if (!intel_gvt_ggtt_validate_range(vgpu, 16878c2ecf20Sopenharmony_ci workload->wa_ctx.per_ctx.guest_gma, 16888c2ecf20Sopenharmony_ci CACHELINE_BYTES)) { 16898c2ecf20Sopenharmony_ci gvt_vgpu_err("invalid per_ctx at: 0x%lx\n", 16908c2ecf20Sopenharmony_ci workload->wa_ctx.per_ctx.guest_gma); 16918c2ecf20Sopenharmony_ci kmem_cache_free(s->workloads, workload); 16928c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 16938c2ecf20Sopenharmony_ci } 16948c2ecf20Sopenharmony_ci } 16958c2ecf20Sopenharmony_ci } 16968c2ecf20Sopenharmony_ci 16978c2ecf20Sopenharmony_ci gvt_dbg_el("workload %p ring %s head %x tail %x start %x ctl %x\n", 16988c2ecf20Sopenharmony_ci workload, engine->name, head, tail, start, ctl); 16998c2ecf20Sopenharmony_ci 17008c2ecf20Sopenharmony_ci ret = prepare_mm(workload); 17018c2ecf20Sopenharmony_ci if (ret) { 17028c2ecf20Sopenharmony_ci kmem_cache_free(s->workloads, workload); 17038c2ecf20Sopenharmony_ci return ERR_PTR(ret); 17048c2ecf20Sopenharmony_ci } 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_ci /* Only scan and shadow the first workload in the queue 17078c2ecf20Sopenharmony_ci * as there is only one pre-allocated buf-obj for shadow. 17088c2ecf20Sopenharmony_ci */ 17098c2ecf20Sopenharmony_ci if (list_empty(q)) { 17108c2ecf20Sopenharmony_ci intel_wakeref_t wakeref; 17118c2ecf20Sopenharmony_ci 17128c2ecf20Sopenharmony_ci with_intel_runtime_pm(engine->gt->uncore->rpm, wakeref) 17138c2ecf20Sopenharmony_ci ret = intel_gvt_scan_and_shadow_workload(workload); 17148c2ecf20Sopenharmony_ci } 17158c2ecf20Sopenharmony_ci 17168c2ecf20Sopenharmony_ci if (ret) { 17178c2ecf20Sopenharmony_ci if (vgpu_is_vm_unhealthy(ret)) 17188c2ecf20Sopenharmony_ci enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); 17198c2ecf20Sopenharmony_ci intel_vgpu_destroy_workload(workload); 17208c2ecf20Sopenharmony_ci return ERR_PTR(ret); 17218c2ecf20Sopenharmony_ci } 17228c2ecf20Sopenharmony_ci 17238c2ecf20Sopenharmony_ci ret = intel_context_pin(s->shadow[engine->id]); 17248c2ecf20Sopenharmony_ci if (ret) { 17258c2ecf20Sopenharmony_ci intel_vgpu_destroy_workload(workload); 17268c2ecf20Sopenharmony_ci return ERR_PTR(ret); 17278c2ecf20Sopenharmony_ci } 17288c2ecf20Sopenharmony_ci 17298c2ecf20Sopenharmony_ci return workload; 17308c2ecf20Sopenharmony_ci} 17318c2ecf20Sopenharmony_ci 17328c2ecf20Sopenharmony_ci/** 17338c2ecf20Sopenharmony_ci * intel_vgpu_queue_workload - Qeue a vGPU workload 17348c2ecf20Sopenharmony_ci * @workload: the workload to queue in 17358c2ecf20Sopenharmony_ci */ 17368c2ecf20Sopenharmony_civoid intel_vgpu_queue_workload(struct intel_vgpu_workload *workload) 17378c2ecf20Sopenharmony_ci{ 17388c2ecf20Sopenharmony_ci list_add_tail(&workload->list, 17398c2ecf20Sopenharmony_ci workload_q_head(workload->vgpu, workload->engine)); 17408c2ecf20Sopenharmony_ci intel_gvt_kick_schedule(workload->vgpu->gvt); 17418c2ecf20Sopenharmony_ci wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->engine->id]); 17428c2ecf20Sopenharmony_ci} 1743