18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright © 2014 Intel Corporation 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice (including the next 128c2ecf20Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 138c2ecf20Sopenharmony_ci * Software. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 168c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 178c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 188c2ecf20Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 198c2ecf20Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 208c2ecf20Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 218c2ecf20Sopenharmony_ci * IN THE SOFTWARE. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * Authors: 248c2ecf20Sopenharmony_ci * Ben Widawsky <ben@bwidawsk.net> 258c2ecf20Sopenharmony_ci * Michel Thierry <michel.thierry@intel.com> 268c2ecf20Sopenharmony_ci * Thomas Daniel <thomas.daniel@intel.com> 278c2ecf20Sopenharmony_ci * Oscar Mateo <oscar.mateo@intel.com> 288c2ecf20Sopenharmony_ci * 298c2ecf20Sopenharmony_ci */ 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci/** 328c2ecf20Sopenharmony_ci * DOC: Logical Rings, Logical Ring Contexts and Execlists 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * Motivation: 358c2ecf20Sopenharmony_ci * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". 368c2ecf20Sopenharmony_ci * These expanded contexts enable a number of new abilities, especially 378c2ecf20Sopenharmony_ci * "Execlists" (also implemented in this file). 388c2ecf20Sopenharmony_ci * 398c2ecf20Sopenharmony_ci * One of the main differences with the legacy HW contexts is that logical 408c2ecf20Sopenharmony_ci * ring contexts incorporate many more things to the context's state, like 418c2ecf20Sopenharmony_ci * PDPs or ringbuffer control registers: 428c2ecf20Sopenharmony_ci * 438c2ecf20Sopenharmony_ci * The reason why PDPs are included in the context is straightforward: as 448c2ecf20Sopenharmony_ci * PPGTTs (per-process GTTs) are actually per-context, having the PDPs 458c2ecf20Sopenharmony_ci * contained there mean you don't need to do a ppgtt->switch_mm yourself, 468c2ecf20Sopenharmony_ci * instead, the GPU will do it for you on the context switch. 478c2ecf20Sopenharmony_ci * 488c2ecf20Sopenharmony_ci * But, what about the ringbuffer control registers (head, tail, etc..)? 498c2ecf20Sopenharmony_ci * shouldn't we just need a set of those per engine command streamer? This is 508c2ecf20Sopenharmony_ci * where the name "Logical Rings" starts to make sense: by virtualizing the 518c2ecf20Sopenharmony_ci * rings, the engine cs shifts to a new "ring buffer" with every context 528c2ecf20Sopenharmony_ci * switch. When you want to submit a workload to the GPU you: A) choose your 538c2ecf20Sopenharmony_ci * context, B) find its appropriate virtualized ring, C) write commands to it 548c2ecf20Sopenharmony_ci * and then, finally, D) tell the GPU to switch to that context. 558c2ecf20Sopenharmony_ci * 568c2ecf20Sopenharmony_ci * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch 578c2ecf20Sopenharmony_ci * to a contexts is via a context execution list, ergo "Execlists". 588c2ecf20Sopenharmony_ci * 598c2ecf20Sopenharmony_ci * LRC implementation: 608c2ecf20Sopenharmony_ci * Regarding the creation of contexts, we have: 618c2ecf20Sopenharmony_ci * 628c2ecf20Sopenharmony_ci * - One global default context. 638c2ecf20Sopenharmony_ci * - One local default context for each opened fd. 648c2ecf20Sopenharmony_ci * - One local extra context for each context create ioctl call. 658c2ecf20Sopenharmony_ci * 668c2ecf20Sopenharmony_ci * Now that ringbuffers belong per-context (and not per-engine, like before) 678c2ecf20Sopenharmony_ci * and that contexts are uniquely tied to a given engine (and not reusable, 688c2ecf20Sopenharmony_ci * like before) we need: 698c2ecf20Sopenharmony_ci * 708c2ecf20Sopenharmony_ci * - One ringbuffer per-engine inside each context. 718c2ecf20Sopenharmony_ci * - One backing object per-engine inside each context. 728c2ecf20Sopenharmony_ci * 738c2ecf20Sopenharmony_ci * The global default context starts its life with these new objects fully 748c2ecf20Sopenharmony_ci * allocated and populated. The local default context for each opened fd is 758c2ecf20Sopenharmony_ci * more complex, because we don't know at creation time which engine is going 768c2ecf20Sopenharmony_ci * to use them. To handle this, we have implemented a deferred creation of LR 778c2ecf20Sopenharmony_ci * contexts: 788c2ecf20Sopenharmony_ci * 798c2ecf20Sopenharmony_ci * The local context starts its life as a hollow or blank holder, that only 808c2ecf20Sopenharmony_ci * gets populated for a given engine once we receive an execbuffer. If later 818c2ecf20Sopenharmony_ci * on we receive another execbuffer ioctl for the same context but a different 828c2ecf20Sopenharmony_ci * engine, we allocate/populate a new ringbuffer and context backing object and 838c2ecf20Sopenharmony_ci * so on. 848c2ecf20Sopenharmony_ci * 858c2ecf20Sopenharmony_ci * Finally, regarding local contexts created using the ioctl call: as they are 868c2ecf20Sopenharmony_ci * only allowed with the render ring, we can allocate & populate them right 878c2ecf20Sopenharmony_ci * away (no need to defer anything, at least for now). 888c2ecf20Sopenharmony_ci * 898c2ecf20Sopenharmony_ci * Execlists implementation: 908c2ecf20Sopenharmony_ci * Execlists are the new method by which, on gen8+ hardware, workloads are 918c2ecf20Sopenharmony_ci * submitted for execution (as opposed to the legacy, ringbuffer-based, method). 928c2ecf20Sopenharmony_ci * This method works as follows: 938c2ecf20Sopenharmony_ci * 948c2ecf20Sopenharmony_ci * When a request is committed, its commands (the BB start and any leading or 958c2ecf20Sopenharmony_ci * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer 968c2ecf20Sopenharmony_ci * for the appropriate context. The tail pointer in the hardware context is not 978c2ecf20Sopenharmony_ci * updated at this time, but instead, kept by the driver in the ringbuffer 988c2ecf20Sopenharmony_ci * structure. A structure representing this request is added to a request queue 998c2ecf20Sopenharmony_ci * for the appropriate engine: this structure contains a copy of the context's 1008c2ecf20Sopenharmony_ci * tail after the request was written to the ring buffer and a pointer to the 1018c2ecf20Sopenharmony_ci * context itself. 1028c2ecf20Sopenharmony_ci * 1038c2ecf20Sopenharmony_ci * If the engine's request queue was empty before the request was added, the 1048c2ecf20Sopenharmony_ci * queue is processed immediately. Otherwise the queue will be processed during 1058c2ecf20Sopenharmony_ci * a context switch interrupt. In any case, elements on the queue will get sent 1068c2ecf20Sopenharmony_ci * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a 1078c2ecf20Sopenharmony_ci * globally unique 20-bits submission ID. 1088c2ecf20Sopenharmony_ci * 1098c2ecf20Sopenharmony_ci * When execution of a request completes, the GPU updates the context status 1108c2ecf20Sopenharmony_ci * buffer with a context complete event and generates a context switch interrupt. 1118c2ecf20Sopenharmony_ci * During the interrupt handling, the driver examines the events in the buffer: 1128c2ecf20Sopenharmony_ci * for each context complete event, if the announced ID matches that on the head 1138c2ecf20Sopenharmony_ci * of the request queue, then that request is retired and removed from the queue. 1148c2ecf20Sopenharmony_ci * 1158c2ecf20Sopenharmony_ci * After processing, if any requests were retired and the queue is not empty 1168c2ecf20Sopenharmony_ci * then a new execution list can be submitted. The two requests at the front of 1178c2ecf20Sopenharmony_ci * the queue are next to be submitted but since a context may not occur twice in 1188c2ecf20Sopenharmony_ci * an execution list, if subsequent requests have the same ID as the first then 1198c2ecf20Sopenharmony_ci * the two requests must be combined. This is done simply by discarding requests 1208c2ecf20Sopenharmony_ci * at the head of the queue until either only one requests is left (in which case 1218c2ecf20Sopenharmony_ci * we use a NULL second context) or the first two requests have unique IDs. 1228c2ecf20Sopenharmony_ci * 1238c2ecf20Sopenharmony_ci * By always executing the first two requests in the queue the driver ensures 1248c2ecf20Sopenharmony_ci * that the GPU is kept as busy as possible. In the case where a single context 1258c2ecf20Sopenharmony_ci * completes but a second context is still executing, the request for this second 1268c2ecf20Sopenharmony_ci * context will be at the head of the queue when we remove the first one. This 1278c2ecf20Sopenharmony_ci * request will then be resubmitted along with a new request for a different context, 1288c2ecf20Sopenharmony_ci * which will cause the hardware to continue executing the second request and queue 1298c2ecf20Sopenharmony_ci * the new request (the GPU detects the condition of a context getting preempted 1308c2ecf20Sopenharmony_ci * with the same context and optimizes the context switch flow by not doing 1318c2ecf20Sopenharmony_ci * preemption, but just sampling the new tail pointer). 1328c2ecf20Sopenharmony_ci * 1338c2ecf20Sopenharmony_ci */ 1348c2ecf20Sopenharmony_ci#include <linux/interrupt.h> 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci#include "i915_drv.h" 1378c2ecf20Sopenharmony_ci#include "i915_perf.h" 1388c2ecf20Sopenharmony_ci#include "i915_trace.h" 1398c2ecf20Sopenharmony_ci#include "i915_vgpu.h" 1408c2ecf20Sopenharmony_ci#include "intel_breadcrumbs.h" 1418c2ecf20Sopenharmony_ci#include "intel_context.h" 1428c2ecf20Sopenharmony_ci#include "intel_engine_pm.h" 1438c2ecf20Sopenharmony_ci#include "intel_gt.h" 1448c2ecf20Sopenharmony_ci#include "intel_gt_pm.h" 1458c2ecf20Sopenharmony_ci#include "intel_gt_requests.h" 1468c2ecf20Sopenharmony_ci#include "intel_lrc_reg.h" 1478c2ecf20Sopenharmony_ci#include "intel_mocs.h" 1488c2ecf20Sopenharmony_ci#include "intel_reset.h" 1498c2ecf20Sopenharmony_ci#include "intel_ring.h" 1508c2ecf20Sopenharmony_ci#include "intel_workarounds.h" 1518c2ecf20Sopenharmony_ci#include "shmem_utils.h" 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci#define RING_EXECLIST_QFULL (1 << 0x2) 1548c2ecf20Sopenharmony_ci#define RING_EXECLIST1_VALID (1 << 0x3) 1558c2ecf20Sopenharmony_ci#define RING_EXECLIST0_VALID (1 << 0x4) 1568c2ecf20Sopenharmony_ci#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) 1578c2ecf20Sopenharmony_ci#define RING_EXECLIST1_ACTIVE (1 << 0x11) 1588c2ecf20Sopenharmony_ci#define RING_EXECLIST0_ACTIVE (1 << 0x12) 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) 1618c2ecf20Sopenharmony_ci#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) 1628c2ecf20Sopenharmony_ci#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) 1638c2ecf20Sopenharmony_ci#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) 1648c2ecf20Sopenharmony_ci#define GEN8_CTX_STATUS_COMPLETE (1 << 4) 1658c2ecf20Sopenharmony_ci#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci#define GEN8_CTX_STATUS_COMPLETED_MASK \ 1688c2ecf20Sopenharmony_ci (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ 1738c2ecf20Sopenharmony_ci#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ 1748c2ecf20Sopenharmony_ci#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) 1758c2ecf20Sopenharmony_ci#define GEN12_IDLE_CTX_ID 0x7FF 1768c2ecf20Sopenharmony_ci#define GEN12_CSB_CTX_VALID(csb_dw) \ 1778c2ecf20Sopenharmony_ci (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci/* Typical size of the average request (2 pipecontrols and a MI_BB) */ 1808c2ecf20Sopenharmony_ci#define EXECLISTS_REQUEST_SIZE 64 /* bytes */ 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_cistruct virtual_engine { 1838c2ecf20Sopenharmony_ci struct intel_engine_cs base; 1848c2ecf20Sopenharmony_ci struct intel_context context; 1858c2ecf20Sopenharmony_ci struct rcu_work rcu; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci /* 1888c2ecf20Sopenharmony_ci * We allow only a single request through the virtual engine at a time 1898c2ecf20Sopenharmony_ci * (each request in the timeline waits for the completion fence of 1908c2ecf20Sopenharmony_ci * the previous before being submitted). By restricting ourselves to 1918c2ecf20Sopenharmony_ci * only submitting a single request, each request is placed on to a 1928c2ecf20Sopenharmony_ci * physical to maximise load spreading (by virtue of the late greedy 1938c2ecf20Sopenharmony_ci * scheduling -- each real engine takes the next available request 1948c2ecf20Sopenharmony_ci * upon idling). 1958c2ecf20Sopenharmony_ci */ 1968c2ecf20Sopenharmony_ci struct i915_request *request; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci /* 1998c2ecf20Sopenharmony_ci * We keep a rbtree of available virtual engines inside each physical 2008c2ecf20Sopenharmony_ci * engine, sorted by priority. Here we preallocate the nodes we need 2018c2ecf20Sopenharmony_ci * for the virtual engine, indexed by physical_engine->id. 2028c2ecf20Sopenharmony_ci */ 2038c2ecf20Sopenharmony_ci struct ve_node { 2048c2ecf20Sopenharmony_ci struct rb_node rb; 2058c2ecf20Sopenharmony_ci int prio; 2068c2ecf20Sopenharmony_ci } nodes[I915_NUM_ENGINES]; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci /* 2098c2ecf20Sopenharmony_ci * Keep track of bonded pairs -- restrictions upon on our selection 2108c2ecf20Sopenharmony_ci * of physical engines any particular request may be submitted to. 2118c2ecf20Sopenharmony_ci * If we receive a submit-fence from a master engine, we will only 2128c2ecf20Sopenharmony_ci * use one of sibling_mask physical engines. 2138c2ecf20Sopenharmony_ci */ 2148c2ecf20Sopenharmony_ci struct ve_bond { 2158c2ecf20Sopenharmony_ci const struct intel_engine_cs *master; 2168c2ecf20Sopenharmony_ci intel_engine_mask_t sibling_mask; 2178c2ecf20Sopenharmony_ci } *bonds; 2188c2ecf20Sopenharmony_ci unsigned int num_bonds; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci /* And finally, which physical engines this virtual engine maps onto. */ 2218c2ecf20Sopenharmony_ci unsigned int num_siblings; 2228c2ecf20Sopenharmony_ci struct intel_engine_cs *siblings[]; 2238c2ecf20Sopenharmony_ci}; 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_cistatic struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) 2268c2ecf20Sopenharmony_ci{ 2278c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_engine_is_virtual(engine)); 2288c2ecf20Sopenharmony_ci return container_of(engine, struct virtual_engine, base); 2298c2ecf20Sopenharmony_ci} 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_cistatic int __execlists_context_alloc(struct intel_context *ce, 2328c2ecf20Sopenharmony_ci struct intel_engine_cs *engine); 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_cistatic void execlists_init_reg_state(u32 *reg_state, 2358c2ecf20Sopenharmony_ci const struct intel_context *ce, 2368c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine, 2378c2ecf20Sopenharmony_ci const struct intel_ring *ring, 2388c2ecf20Sopenharmony_ci bool close); 2398c2ecf20Sopenharmony_cistatic void 2408c2ecf20Sopenharmony_ci__execlists_update_reg_state(const struct intel_context *ce, 2418c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine, 2428c2ecf20Sopenharmony_ci u32 head); 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_cistatic int lrc_ring_mi_mode(const struct intel_engine_cs *engine) 2458c2ecf20Sopenharmony_ci{ 2468c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 12) 2478c2ecf20Sopenharmony_ci return 0x60; 2488c2ecf20Sopenharmony_ci else if (INTEL_GEN(engine->i915) >= 9) 2498c2ecf20Sopenharmony_ci return 0x54; 2508c2ecf20Sopenharmony_ci else if (engine->class == RENDER_CLASS) 2518c2ecf20Sopenharmony_ci return 0x58; 2528c2ecf20Sopenharmony_ci else 2538c2ecf20Sopenharmony_ci return -1; 2548c2ecf20Sopenharmony_ci} 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_cistatic int lrc_ring_gpr0(const struct intel_engine_cs *engine) 2578c2ecf20Sopenharmony_ci{ 2588c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 12) 2598c2ecf20Sopenharmony_ci return 0x74; 2608c2ecf20Sopenharmony_ci else if (INTEL_GEN(engine->i915) >= 9) 2618c2ecf20Sopenharmony_ci return 0x68; 2628c2ecf20Sopenharmony_ci else if (engine->class == RENDER_CLASS) 2638c2ecf20Sopenharmony_ci return 0xd8; 2648c2ecf20Sopenharmony_ci else 2658c2ecf20Sopenharmony_ci return -1; 2668c2ecf20Sopenharmony_ci} 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_cistatic int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) 2698c2ecf20Sopenharmony_ci{ 2708c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 12) 2718c2ecf20Sopenharmony_ci return 0x12; 2728c2ecf20Sopenharmony_ci else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) 2738c2ecf20Sopenharmony_ci return 0x18; 2748c2ecf20Sopenharmony_ci else 2758c2ecf20Sopenharmony_ci return -1; 2768c2ecf20Sopenharmony_ci} 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_cistatic int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) 2798c2ecf20Sopenharmony_ci{ 2808c2ecf20Sopenharmony_ci int x; 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci x = lrc_ring_wa_bb_per_ctx(engine); 2838c2ecf20Sopenharmony_ci if (x < 0) 2848c2ecf20Sopenharmony_ci return x; 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci return x + 2; 2878c2ecf20Sopenharmony_ci} 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_cistatic int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) 2908c2ecf20Sopenharmony_ci{ 2918c2ecf20Sopenharmony_ci int x; 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci x = lrc_ring_indirect_ptr(engine); 2948c2ecf20Sopenharmony_ci if (x < 0) 2958c2ecf20Sopenharmony_ci return x; 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci return x + 2; 2988c2ecf20Sopenharmony_ci} 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_cistatic int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) 3018c2ecf20Sopenharmony_ci{ 3028c2ecf20Sopenharmony_ci if (engine->class != RENDER_CLASS) 3038c2ecf20Sopenharmony_ci return -1; 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 12) 3068c2ecf20Sopenharmony_ci return 0xb6; 3078c2ecf20Sopenharmony_ci else if (INTEL_GEN(engine->i915) >= 11) 3088c2ecf20Sopenharmony_ci return 0xaa; 3098c2ecf20Sopenharmony_ci else 3108c2ecf20Sopenharmony_ci return -1; 3118c2ecf20Sopenharmony_ci} 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_cistatic u32 3148c2ecf20Sopenharmony_cilrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) 3158c2ecf20Sopenharmony_ci{ 3168c2ecf20Sopenharmony_ci switch (INTEL_GEN(engine->i915)) { 3178c2ecf20Sopenharmony_ci default: 3188c2ecf20Sopenharmony_ci MISSING_CASE(INTEL_GEN(engine->i915)); 3198c2ecf20Sopenharmony_ci fallthrough; 3208c2ecf20Sopenharmony_ci case 12: 3218c2ecf20Sopenharmony_ci return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 3228c2ecf20Sopenharmony_ci case 11: 3238c2ecf20Sopenharmony_ci return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 3248c2ecf20Sopenharmony_ci case 10: 3258c2ecf20Sopenharmony_ci return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 3268c2ecf20Sopenharmony_ci case 9: 3278c2ecf20Sopenharmony_ci return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 3288c2ecf20Sopenharmony_ci case 8: 3298c2ecf20Sopenharmony_ci return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; 3308c2ecf20Sopenharmony_ci } 3318c2ecf20Sopenharmony_ci} 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_cistatic void 3348c2ecf20Sopenharmony_cilrc_ring_setup_indirect_ctx(u32 *regs, 3358c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine, 3368c2ecf20Sopenharmony_ci u32 ctx_bb_ggtt_addr, 3378c2ecf20Sopenharmony_ci u32 size) 3388c2ecf20Sopenharmony_ci{ 3398c2ecf20Sopenharmony_ci GEM_BUG_ON(!size); 3408c2ecf20Sopenharmony_ci GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); 3418c2ecf20Sopenharmony_ci GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); 3428c2ecf20Sopenharmony_ci regs[lrc_ring_indirect_ptr(engine) + 1] = 3438c2ecf20Sopenharmony_ci ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); 3468c2ecf20Sopenharmony_ci regs[lrc_ring_indirect_offset(engine) + 1] = 3478c2ecf20Sopenharmony_ci lrc_ring_indirect_offset_default(engine) << 6; 3488c2ecf20Sopenharmony_ci} 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_cistatic u32 intel_context_get_runtime(const struct intel_context *ce) 3518c2ecf20Sopenharmony_ci{ 3528c2ecf20Sopenharmony_ci /* 3538c2ecf20Sopenharmony_ci * We can use either ppHWSP[16] which is recorded before the context 3548c2ecf20Sopenharmony_ci * switch (and so excludes the cost of context switches) or use the 3558c2ecf20Sopenharmony_ci * value from the context image itself, which is saved/restored earlier 3568c2ecf20Sopenharmony_ci * and so includes the cost of the save. 3578c2ecf20Sopenharmony_ci */ 3588c2ecf20Sopenharmony_ci return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); 3598c2ecf20Sopenharmony_ci} 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_cistatic void mark_eio(struct i915_request *rq) 3628c2ecf20Sopenharmony_ci{ 3638c2ecf20Sopenharmony_ci if (i915_request_completed(rq)) 3648c2ecf20Sopenharmony_ci return; 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_signaled(rq)); 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci i915_request_set_error_once(rq, -EIO); 3698c2ecf20Sopenharmony_ci i915_request_mark_complete(rq); 3708c2ecf20Sopenharmony_ci} 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_cistatic struct i915_request * 3738c2ecf20Sopenharmony_ciactive_request(const struct intel_timeline * const tl, struct i915_request *rq) 3748c2ecf20Sopenharmony_ci{ 3758c2ecf20Sopenharmony_ci struct i915_request *active = rq; 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci rcu_read_lock(); 3788c2ecf20Sopenharmony_ci list_for_each_entry_continue_reverse(rq, &tl->requests, link) { 3798c2ecf20Sopenharmony_ci if (i915_request_completed(rq)) 3808c2ecf20Sopenharmony_ci break; 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci active = rq; 3838c2ecf20Sopenharmony_ci } 3848c2ecf20Sopenharmony_ci rcu_read_unlock(); 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci return active; 3878c2ecf20Sopenharmony_ci} 3888c2ecf20Sopenharmony_ci 3898c2ecf20Sopenharmony_cistatic inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) 3908c2ecf20Sopenharmony_ci{ 3918c2ecf20Sopenharmony_ci return (i915_ggtt_offset(engine->status_page.vma) + 3928c2ecf20Sopenharmony_ci I915_GEM_HWS_PREEMPT_ADDR); 3938c2ecf20Sopenharmony_ci} 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_cistatic inline void 3968c2ecf20Sopenharmony_ciring_set_paused(const struct intel_engine_cs *engine, int state) 3978c2ecf20Sopenharmony_ci{ 3988c2ecf20Sopenharmony_ci /* 3998c2ecf20Sopenharmony_ci * We inspect HWS_PREEMPT with a semaphore inside 4008c2ecf20Sopenharmony_ci * engine->emit_fini_breadcrumb. If the dword is true, 4018c2ecf20Sopenharmony_ci * the ring is paused as the semaphore will busywait 4028c2ecf20Sopenharmony_ci * until the dword is false. 4038c2ecf20Sopenharmony_ci */ 4048c2ecf20Sopenharmony_ci engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; 4058c2ecf20Sopenharmony_ci if (state) 4068c2ecf20Sopenharmony_ci wmb(); 4078c2ecf20Sopenharmony_ci} 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_cistatic inline struct i915_priolist *to_priolist(struct rb_node *rb) 4108c2ecf20Sopenharmony_ci{ 4118c2ecf20Sopenharmony_ci return rb_entry(rb, struct i915_priolist, node); 4128c2ecf20Sopenharmony_ci} 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_cistatic inline int rq_prio(const struct i915_request *rq) 4158c2ecf20Sopenharmony_ci{ 4168c2ecf20Sopenharmony_ci return READ_ONCE(rq->sched.attr.priority); 4178c2ecf20Sopenharmony_ci} 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_cistatic int effective_prio(const struct i915_request *rq) 4208c2ecf20Sopenharmony_ci{ 4218c2ecf20Sopenharmony_ci int prio = rq_prio(rq); 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci /* 4248c2ecf20Sopenharmony_ci * If this request is special and must not be interrupted at any 4258c2ecf20Sopenharmony_ci * cost, so be it. Note we are only checking the most recent request 4268c2ecf20Sopenharmony_ci * in the context and so may be masking an earlier vip request. It 4278c2ecf20Sopenharmony_ci * is hoped that under the conditions where nopreempt is used, this 4288c2ecf20Sopenharmony_ci * will not matter (i.e. all requests to that context will be 4298c2ecf20Sopenharmony_ci * nopreempt for as long as desired). 4308c2ecf20Sopenharmony_ci */ 4318c2ecf20Sopenharmony_ci if (i915_request_has_nopreempt(rq)) 4328c2ecf20Sopenharmony_ci prio = I915_PRIORITY_UNPREEMPTABLE; 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci return prio; 4358c2ecf20Sopenharmony_ci} 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_cistatic int queue_prio(const struct intel_engine_execlists *execlists) 4388c2ecf20Sopenharmony_ci{ 4398c2ecf20Sopenharmony_ci struct i915_priolist *p; 4408c2ecf20Sopenharmony_ci struct rb_node *rb; 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci rb = rb_first_cached(&execlists->queue); 4438c2ecf20Sopenharmony_ci if (!rb) 4448c2ecf20Sopenharmony_ci return INT_MIN; 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci /* 4478c2ecf20Sopenharmony_ci * As the priolist[] are inverted, with the highest priority in [0], 4488c2ecf20Sopenharmony_ci * we have to flip the index value to become priority. 4498c2ecf20Sopenharmony_ci */ 4508c2ecf20Sopenharmony_ci p = to_priolist(rb); 4518c2ecf20Sopenharmony_ci if (!I915_USER_PRIORITY_SHIFT) 4528c2ecf20Sopenharmony_ci return p->priority; 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); 4558c2ecf20Sopenharmony_ci} 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_cistatic inline bool need_preempt(const struct intel_engine_cs *engine, 4588c2ecf20Sopenharmony_ci const struct i915_request *rq, 4598c2ecf20Sopenharmony_ci struct rb_node *rb) 4608c2ecf20Sopenharmony_ci{ 4618c2ecf20Sopenharmony_ci int last_prio; 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ci if (!intel_engine_has_semaphores(engine)) 4648c2ecf20Sopenharmony_ci return false; 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ci /* 4678c2ecf20Sopenharmony_ci * Check if the current priority hint merits a preemption attempt. 4688c2ecf20Sopenharmony_ci * 4698c2ecf20Sopenharmony_ci * We record the highest value priority we saw during rescheduling 4708c2ecf20Sopenharmony_ci * prior to this dequeue, therefore we know that if it is strictly 4718c2ecf20Sopenharmony_ci * less than the current tail of ESLP[0], we do not need to force 4728c2ecf20Sopenharmony_ci * a preempt-to-idle cycle. 4738c2ecf20Sopenharmony_ci * 4748c2ecf20Sopenharmony_ci * However, the priority hint is a mere hint that we may need to 4758c2ecf20Sopenharmony_ci * preempt. If that hint is stale or we may be trying to preempt 4768c2ecf20Sopenharmony_ci * ourselves, ignore the request. 4778c2ecf20Sopenharmony_ci * 4788c2ecf20Sopenharmony_ci * More naturally we would write 4798c2ecf20Sopenharmony_ci * prio >= max(0, last); 4808c2ecf20Sopenharmony_ci * except that we wish to prevent triggering preemption at the same 4818c2ecf20Sopenharmony_ci * priority level: the task that is running should remain running 4828c2ecf20Sopenharmony_ci * to preserve FIFO ordering of dependencies. 4838c2ecf20Sopenharmony_ci */ 4848c2ecf20Sopenharmony_ci last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); 4858c2ecf20Sopenharmony_ci if (engine->execlists.queue_priority_hint <= last_prio) 4868c2ecf20Sopenharmony_ci return false; 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci /* 4898c2ecf20Sopenharmony_ci * Check against the first request in ELSP[1], it will, thanks to the 4908c2ecf20Sopenharmony_ci * power of PI, be the highest priority of that context. 4918c2ecf20Sopenharmony_ci */ 4928c2ecf20Sopenharmony_ci if (!list_is_last(&rq->sched.link, &engine->active.requests) && 4938c2ecf20Sopenharmony_ci rq_prio(list_next_entry(rq, sched.link)) > last_prio) 4948c2ecf20Sopenharmony_ci return true; 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci if (rb) { 4978c2ecf20Sopenharmony_ci struct virtual_engine *ve = 4988c2ecf20Sopenharmony_ci rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 4998c2ecf20Sopenharmony_ci bool preempt = false; 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci if (engine == ve->siblings[0]) { /* only preempt one sibling */ 5028c2ecf20Sopenharmony_ci struct i915_request *next; 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci rcu_read_lock(); 5058c2ecf20Sopenharmony_ci next = READ_ONCE(ve->request); 5068c2ecf20Sopenharmony_ci if (next) 5078c2ecf20Sopenharmony_ci preempt = rq_prio(next) > last_prio; 5088c2ecf20Sopenharmony_ci rcu_read_unlock(); 5098c2ecf20Sopenharmony_ci } 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci if (preempt) 5128c2ecf20Sopenharmony_ci return preempt; 5138c2ecf20Sopenharmony_ci } 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci /* 5168c2ecf20Sopenharmony_ci * If the inflight context did not trigger the preemption, then maybe 5178c2ecf20Sopenharmony_ci * it was the set of queued requests? Pick the highest priority in 5188c2ecf20Sopenharmony_ci * the queue (the first active priolist) and see if it deserves to be 5198c2ecf20Sopenharmony_ci * running instead of ELSP[0]. 5208c2ecf20Sopenharmony_ci * 5218c2ecf20Sopenharmony_ci * The highest priority request in the queue can not be either 5228c2ecf20Sopenharmony_ci * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same 5238c2ecf20Sopenharmony_ci * context, it's priority would not exceed ELSP[0] aka last_prio. 5248c2ecf20Sopenharmony_ci */ 5258c2ecf20Sopenharmony_ci return queue_prio(&engine->execlists) > last_prio; 5268c2ecf20Sopenharmony_ci} 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_ci__maybe_unused static inline bool 5298c2ecf20Sopenharmony_ciassert_priority_queue(const struct i915_request *prev, 5308c2ecf20Sopenharmony_ci const struct i915_request *next) 5318c2ecf20Sopenharmony_ci{ 5328c2ecf20Sopenharmony_ci /* 5338c2ecf20Sopenharmony_ci * Without preemption, the prev may refer to the still active element 5348c2ecf20Sopenharmony_ci * which we refuse to let go. 5358c2ecf20Sopenharmony_ci * 5368c2ecf20Sopenharmony_ci * Even with preemption, there are times when we think it is better not 5378c2ecf20Sopenharmony_ci * to preempt and leave an ostensibly lower priority request in flight. 5388c2ecf20Sopenharmony_ci */ 5398c2ecf20Sopenharmony_ci if (i915_request_is_active(prev)) 5408c2ecf20Sopenharmony_ci return true; 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci return rq_prio(prev) >= rq_prio(next); 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci/* 5468c2ecf20Sopenharmony_ci * The context descriptor encodes various attributes of a context, 5478c2ecf20Sopenharmony_ci * including its GTT address and some flags. Because it's fairly 5488c2ecf20Sopenharmony_ci * expensive to calculate, we'll just do it once and cache the result, 5498c2ecf20Sopenharmony_ci * which remains valid until the context is unpinned. 5508c2ecf20Sopenharmony_ci * 5518c2ecf20Sopenharmony_ci * This is what a descriptor looks like, from LSB to MSB:: 5528c2ecf20Sopenharmony_ci * 5538c2ecf20Sopenharmony_ci * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) 5548c2ecf20Sopenharmony_ci * bits 12-31: LRCA, GTT address of (the HWSP of) this context 5558c2ecf20Sopenharmony_ci * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) 5568c2ecf20Sopenharmony_ci * bits 53-54: mbz, reserved for use by hardware 5578c2ecf20Sopenharmony_ci * bits 55-63: group ID, currently unused and set to 0 5588c2ecf20Sopenharmony_ci * 5598c2ecf20Sopenharmony_ci * Starting from Gen11, the upper dword of the descriptor has a new format: 5608c2ecf20Sopenharmony_ci * 5618c2ecf20Sopenharmony_ci * bits 32-36: reserved 5628c2ecf20Sopenharmony_ci * bits 37-47: SW context ID 5638c2ecf20Sopenharmony_ci * bits 48:53: engine instance 5648c2ecf20Sopenharmony_ci * bit 54: mbz, reserved for use by hardware 5658c2ecf20Sopenharmony_ci * bits 55-60: SW counter 5668c2ecf20Sopenharmony_ci * bits 61-63: engine class 5678c2ecf20Sopenharmony_ci * 5688c2ecf20Sopenharmony_ci * engine info, SW context ID and SW counter need to form a unique number 5698c2ecf20Sopenharmony_ci * (Context ID) per lrc. 5708c2ecf20Sopenharmony_ci */ 5718c2ecf20Sopenharmony_cistatic u32 5728c2ecf20Sopenharmony_cilrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) 5738c2ecf20Sopenharmony_ci{ 5748c2ecf20Sopenharmony_ci u32 desc; 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci desc = INTEL_LEGACY_32B_CONTEXT; 5778c2ecf20Sopenharmony_ci if (i915_vm_is_4lvl(ce->vm)) 5788c2ecf20Sopenharmony_ci desc = INTEL_LEGACY_64B_CONTEXT; 5798c2ecf20Sopenharmony_ci desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_ci desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; 5828c2ecf20Sopenharmony_ci if (IS_GEN(engine->i915, 8)) 5838c2ecf20Sopenharmony_ci desc |= GEN8_CTX_L3LLC_COHERENT; 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci return i915_ggtt_offset(ce->state) | desc; 5868c2ecf20Sopenharmony_ci} 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_cistatic inline unsigned int dword_in_page(void *addr) 5898c2ecf20Sopenharmony_ci{ 5908c2ecf20Sopenharmony_ci return offset_in_page(addr) / sizeof(u32); 5918c2ecf20Sopenharmony_ci} 5928c2ecf20Sopenharmony_ci 5938c2ecf20Sopenharmony_cistatic void set_offsets(u32 *regs, 5948c2ecf20Sopenharmony_ci const u8 *data, 5958c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine, 5968c2ecf20Sopenharmony_ci bool clear) 5978c2ecf20Sopenharmony_ci#define NOP(x) (BIT(7) | (x)) 5988c2ecf20Sopenharmony_ci#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) 5998c2ecf20Sopenharmony_ci#define POSTED BIT(0) 6008c2ecf20Sopenharmony_ci#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) 6018c2ecf20Sopenharmony_ci#define REG16(x) \ 6028c2ecf20Sopenharmony_ci (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ 6038c2ecf20Sopenharmony_ci (((x) >> 2) & 0x7f) 6048c2ecf20Sopenharmony_ci#define END(total_state_size) 0, (total_state_size) 6058c2ecf20Sopenharmony_ci{ 6068c2ecf20Sopenharmony_ci const u32 base = engine->mmio_base; 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci while (*data) { 6098c2ecf20Sopenharmony_ci u8 count, flags; 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci if (*data & BIT(7)) { /* skip */ 6128c2ecf20Sopenharmony_ci count = *data++ & ~BIT(7); 6138c2ecf20Sopenharmony_ci if (clear) 6148c2ecf20Sopenharmony_ci memset32(regs, MI_NOOP, count); 6158c2ecf20Sopenharmony_ci regs += count; 6168c2ecf20Sopenharmony_ci continue; 6178c2ecf20Sopenharmony_ci } 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci count = *data & 0x3f; 6208c2ecf20Sopenharmony_ci flags = *data >> 6; 6218c2ecf20Sopenharmony_ci data++; 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci *regs = MI_LOAD_REGISTER_IMM(count); 6248c2ecf20Sopenharmony_ci if (flags & POSTED) 6258c2ecf20Sopenharmony_ci *regs |= MI_LRI_FORCE_POSTED; 6268c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 11) 6278c2ecf20Sopenharmony_ci *regs |= MI_LRI_LRM_CS_MMIO; 6288c2ecf20Sopenharmony_ci regs++; 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci GEM_BUG_ON(!count); 6318c2ecf20Sopenharmony_ci do { 6328c2ecf20Sopenharmony_ci u32 offset = 0; 6338c2ecf20Sopenharmony_ci u8 v; 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci do { 6368c2ecf20Sopenharmony_ci v = *data++; 6378c2ecf20Sopenharmony_ci offset <<= 7; 6388c2ecf20Sopenharmony_ci offset |= v & ~BIT(7); 6398c2ecf20Sopenharmony_ci } while (v & BIT(7)); 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_ci regs[0] = base + (offset << 2); 6428c2ecf20Sopenharmony_ci if (clear) 6438c2ecf20Sopenharmony_ci regs[1] = 0; 6448c2ecf20Sopenharmony_ci regs += 2; 6458c2ecf20Sopenharmony_ci } while (--count); 6468c2ecf20Sopenharmony_ci } 6478c2ecf20Sopenharmony_ci 6488c2ecf20Sopenharmony_ci if (clear) { 6498c2ecf20Sopenharmony_ci u8 count = *++data; 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci /* Clear past the tail for HW access */ 6528c2ecf20Sopenharmony_ci GEM_BUG_ON(dword_in_page(regs) > count); 6538c2ecf20Sopenharmony_ci memset32(regs, MI_NOOP, count - dword_in_page(regs)); 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_ci /* Close the batch; used mainly by live_lrc_layout() */ 6568c2ecf20Sopenharmony_ci *regs = MI_BATCH_BUFFER_END; 6578c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 10) 6588c2ecf20Sopenharmony_ci *regs |= BIT(0); 6598c2ecf20Sopenharmony_ci } 6608c2ecf20Sopenharmony_ci} 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_cistatic const u8 gen8_xcs_offsets[] = { 6638c2ecf20Sopenharmony_ci NOP(1), 6648c2ecf20Sopenharmony_ci LRI(11, 0), 6658c2ecf20Sopenharmony_ci REG16(0x244), 6668c2ecf20Sopenharmony_ci REG(0x034), 6678c2ecf20Sopenharmony_ci REG(0x030), 6688c2ecf20Sopenharmony_ci REG(0x038), 6698c2ecf20Sopenharmony_ci REG(0x03c), 6708c2ecf20Sopenharmony_ci REG(0x168), 6718c2ecf20Sopenharmony_ci REG(0x140), 6728c2ecf20Sopenharmony_ci REG(0x110), 6738c2ecf20Sopenharmony_ci REG(0x11c), 6748c2ecf20Sopenharmony_ci REG(0x114), 6758c2ecf20Sopenharmony_ci REG(0x118), 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci NOP(9), 6788c2ecf20Sopenharmony_ci LRI(9, 0), 6798c2ecf20Sopenharmony_ci REG16(0x3a8), 6808c2ecf20Sopenharmony_ci REG16(0x28c), 6818c2ecf20Sopenharmony_ci REG16(0x288), 6828c2ecf20Sopenharmony_ci REG16(0x284), 6838c2ecf20Sopenharmony_ci REG16(0x280), 6848c2ecf20Sopenharmony_ci REG16(0x27c), 6858c2ecf20Sopenharmony_ci REG16(0x278), 6868c2ecf20Sopenharmony_ci REG16(0x274), 6878c2ecf20Sopenharmony_ci REG16(0x270), 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci NOP(13), 6908c2ecf20Sopenharmony_ci LRI(2, 0), 6918c2ecf20Sopenharmony_ci REG16(0x200), 6928c2ecf20Sopenharmony_ci REG(0x028), 6938c2ecf20Sopenharmony_ci 6948c2ecf20Sopenharmony_ci END(80) 6958c2ecf20Sopenharmony_ci}; 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_cistatic const u8 gen9_xcs_offsets[] = { 6988c2ecf20Sopenharmony_ci NOP(1), 6998c2ecf20Sopenharmony_ci LRI(14, POSTED), 7008c2ecf20Sopenharmony_ci REG16(0x244), 7018c2ecf20Sopenharmony_ci REG(0x034), 7028c2ecf20Sopenharmony_ci REG(0x030), 7038c2ecf20Sopenharmony_ci REG(0x038), 7048c2ecf20Sopenharmony_ci REG(0x03c), 7058c2ecf20Sopenharmony_ci REG(0x168), 7068c2ecf20Sopenharmony_ci REG(0x140), 7078c2ecf20Sopenharmony_ci REG(0x110), 7088c2ecf20Sopenharmony_ci REG(0x11c), 7098c2ecf20Sopenharmony_ci REG(0x114), 7108c2ecf20Sopenharmony_ci REG(0x118), 7118c2ecf20Sopenharmony_ci REG(0x1c0), 7128c2ecf20Sopenharmony_ci REG(0x1c4), 7138c2ecf20Sopenharmony_ci REG(0x1c8), 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ci NOP(3), 7168c2ecf20Sopenharmony_ci LRI(9, POSTED), 7178c2ecf20Sopenharmony_ci REG16(0x3a8), 7188c2ecf20Sopenharmony_ci REG16(0x28c), 7198c2ecf20Sopenharmony_ci REG16(0x288), 7208c2ecf20Sopenharmony_ci REG16(0x284), 7218c2ecf20Sopenharmony_ci REG16(0x280), 7228c2ecf20Sopenharmony_ci REG16(0x27c), 7238c2ecf20Sopenharmony_ci REG16(0x278), 7248c2ecf20Sopenharmony_ci REG16(0x274), 7258c2ecf20Sopenharmony_ci REG16(0x270), 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci NOP(13), 7288c2ecf20Sopenharmony_ci LRI(1, POSTED), 7298c2ecf20Sopenharmony_ci REG16(0x200), 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_ci NOP(13), 7328c2ecf20Sopenharmony_ci LRI(44, POSTED), 7338c2ecf20Sopenharmony_ci REG(0x028), 7348c2ecf20Sopenharmony_ci REG(0x09c), 7358c2ecf20Sopenharmony_ci REG(0x0c0), 7368c2ecf20Sopenharmony_ci REG(0x178), 7378c2ecf20Sopenharmony_ci REG(0x17c), 7388c2ecf20Sopenharmony_ci REG16(0x358), 7398c2ecf20Sopenharmony_ci REG(0x170), 7408c2ecf20Sopenharmony_ci REG(0x150), 7418c2ecf20Sopenharmony_ci REG(0x154), 7428c2ecf20Sopenharmony_ci REG(0x158), 7438c2ecf20Sopenharmony_ci REG16(0x41c), 7448c2ecf20Sopenharmony_ci REG16(0x600), 7458c2ecf20Sopenharmony_ci REG16(0x604), 7468c2ecf20Sopenharmony_ci REG16(0x608), 7478c2ecf20Sopenharmony_ci REG16(0x60c), 7488c2ecf20Sopenharmony_ci REG16(0x610), 7498c2ecf20Sopenharmony_ci REG16(0x614), 7508c2ecf20Sopenharmony_ci REG16(0x618), 7518c2ecf20Sopenharmony_ci REG16(0x61c), 7528c2ecf20Sopenharmony_ci REG16(0x620), 7538c2ecf20Sopenharmony_ci REG16(0x624), 7548c2ecf20Sopenharmony_ci REG16(0x628), 7558c2ecf20Sopenharmony_ci REG16(0x62c), 7568c2ecf20Sopenharmony_ci REG16(0x630), 7578c2ecf20Sopenharmony_ci REG16(0x634), 7588c2ecf20Sopenharmony_ci REG16(0x638), 7598c2ecf20Sopenharmony_ci REG16(0x63c), 7608c2ecf20Sopenharmony_ci REG16(0x640), 7618c2ecf20Sopenharmony_ci REG16(0x644), 7628c2ecf20Sopenharmony_ci REG16(0x648), 7638c2ecf20Sopenharmony_ci REG16(0x64c), 7648c2ecf20Sopenharmony_ci REG16(0x650), 7658c2ecf20Sopenharmony_ci REG16(0x654), 7668c2ecf20Sopenharmony_ci REG16(0x658), 7678c2ecf20Sopenharmony_ci REG16(0x65c), 7688c2ecf20Sopenharmony_ci REG16(0x660), 7698c2ecf20Sopenharmony_ci REG16(0x664), 7708c2ecf20Sopenharmony_ci REG16(0x668), 7718c2ecf20Sopenharmony_ci REG16(0x66c), 7728c2ecf20Sopenharmony_ci REG16(0x670), 7738c2ecf20Sopenharmony_ci REG16(0x674), 7748c2ecf20Sopenharmony_ci REG16(0x678), 7758c2ecf20Sopenharmony_ci REG16(0x67c), 7768c2ecf20Sopenharmony_ci REG(0x068), 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci END(176) 7798c2ecf20Sopenharmony_ci}; 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_cistatic const u8 gen12_xcs_offsets[] = { 7828c2ecf20Sopenharmony_ci NOP(1), 7838c2ecf20Sopenharmony_ci LRI(13, POSTED), 7848c2ecf20Sopenharmony_ci REG16(0x244), 7858c2ecf20Sopenharmony_ci REG(0x034), 7868c2ecf20Sopenharmony_ci REG(0x030), 7878c2ecf20Sopenharmony_ci REG(0x038), 7888c2ecf20Sopenharmony_ci REG(0x03c), 7898c2ecf20Sopenharmony_ci REG(0x168), 7908c2ecf20Sopenharmony_ci REG(0x140), 7918c2ecf20Sopenharmony_ci REG(0x110), 7928c2ecf20Sopenharmony_ci REG(0x1c0), 7938c2ecf20Sopenharmony_ci REG(0x1c4), 7948c2ecf20Sopenharmony_ci REG(0x1c8), 7958c2ecf20Sopenharmony_ci REG(0x180), 7968c2ecf20Sopenharmony_ci REG16(0x2b4), 7978c2ecf20Sopenharmony_ci 7988c2ecf20Sopenharmony_ci NOP(5), 7998c2ecf20Sopenharmony_ci LRI(9, POSTED), 8008c2ecf20Sopenharmony_ci REG16(0x3a8), 8018c2ecf20Sopenharmony_ci REG16(0x28c), 8028c2ecf20Sopenharmony_ci REG16(0x288), 8038c2ecf20Sopenharmony_ci REG16(0x284), 8048c2ecf20Sopenharmony_ci REG16(0x280), 8058c2ecf20Sopenharmony_ci REG16(0x27c), 8068c2ecf20Sopenharmony_ci REG16(0x278), 8078c2ecf20Sopenharmony_ci REG16(0x274), 8088c2ecf20Sopenharmony_ci REG16(0x270), 8098c2ecf20Sopenharmony_ci 8108c2ecf20Sopenharmony_ci END(80) 8118c2ecf20Sopenharmony_ci}; 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_cistatic const u8 gen8_rcs_offsets[] = { 8148c2ecf20Sopenharmony_ci NOP(1), 8158c2ecf20Sopenharmony_ci LRI(14, POSTED), 8168c2ecf20Sopenharmony_ci REG16(0x244), 8178c2ecf20Sopenharmony_ci REG(0x034), 8188c2ecf20Sopenharmony_ci REG(0x030), 8198c2ecf20Sopenharmony_ci REG(0x038), 8208c2ecf20Sopenharmony_ci REG(0x03c), 8218c2ecf20Sopenharmony_ci REG(0x168), 8228c2ecf20Sopenharmony_ci REG(0x140), 8238c2ecf20Sopenharmony_ci REG(0x110), 8248c2ecf20Sopenharmony_ci REG(0x11c), 8258c2ecf20Sopenharmony_ci REG(0x114), 8268c2ecf20Sopenharmony_ci REG(0x118), 8278c2ecf20Sopenharmony_ci REG(0x1c0), 8288c2ecf20Sopenharmony_ci REG(0x1c4), 8298c2ecf20Sopenharmony_ci REG(0x1c8), 8308c2ecf20Sopenharmony_ci 8318c2ecf20Sopenharmony_ci NOP(3), 8328c2ecf20Sopenharmony_ci LRI(9, POSTED), 8338c2ecf20Sopenharmony_ci REG16(0x3a8), 8348c2ecf20Sopenharmony_ci REG16(0x28c), 8358c2ecf20Sopenharmony_ci REG16(0x288), 8368c2ecf20Sopenharmony_ci REG16(0x284), 8378c2ecf20Sopenharmony_ci REG16(0x280), 8388c2ecf20Sopenharmony_ci REG16(0x27c), 8398c2ecf20Sopenharmony_ci REG16(0x278), 8408c2ecf20Sopenharmony_ci REG16(0x274), 8418c2ecf20Sopenharmony_ci REG16(0x270), 8428c2ecf20Sopenharmony_ci 8438c2ecf20Sopenharmony_ci NOP(13), 8448c2ecf20Sopenharmony_ci LRI(1, 0), 8458c2ecf20Sopenharmony_ci REG(0x0c8), 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_ci END(80) 8488c2ecf20Sopenharmony_ci}; 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_cistatic const u8 gen9_rcs_offsets[] = { 8518c2ecf20Sopenharmony_ci NOP(1), 8528c2ecf20Sopenharmony_ci LRI(14, POSTED), 8538c2ecf20Sopenharmony_ci REG16(0x244), 8548c2ecf20Sopenharmony_ci REG(0x34), 8558c2ecf20Sopenharmony_ci REG(0x30), 8568c2ecf20Sopenharmony_ci REG(0x38), 8578c2ecf20Sopenharmony_ci REG(0x3c), 8588c2ecf20Sopenharmony_ci REG(0x168), 8598c2ecf20Sopenharmony_ci REG(0x140), 8608c2ecf20Sopenharmony_ci REG(0x110), 8618c2ecf20Sopenharmony_ci REG(0x11c), 8628c2ecf20Sopenharmony_ci REG(0x114), 8638c2ecf20Sopenharmony_ci REG(0x118), 8648c2ecf20Sopenharmony_ci REG(0x1c0), 8658c2ecf20Sopenharmony_ci REG(0x1c4), 8668c2ecf20Sopenharmony_ci REG(0x1c8), 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci NOP(3), 8698c2ecf20Sopenharmony_ci LRI(9, POSTED), 8708c2ecf20Sopenharmony_ci REG16(0x3a8), 8718c2ecf20Sopenharmony_ci REG16(0x28c), 8728c2ecf20Sopenharmony_ci REG16(0x288), 8738c2ecf20Sopenharmony_ci REG16(0x284), 8748c2ecf20Sopenharmony_ci REG16(0x280), 8758c2ecf20Sopenharmony_ci REG16(0x27c), 8768c2ecf20Sopenharmony_ci REG16(0x278), 8778c2ecf20Sopenharmony_ci REG16(0x274), 8788c2ecf20Sopenharmony_ci REG16(0x270), 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_ci NOP(13), 8818c2ecf20Sopenharmony_ci LRI(1, 0), 8828c2ecf20Sopenharmony_ci REG(0xc8), 8838c2ecf20Sopenharmony_ci 8848c2ecf20Sopenharmony_ci NOP(13), 8858c2ecf20Sopenharmony_ci LRI(44, POSTED), 8868c2ecf20Sopenharmony_ci REG(0x28), 8878c2ecf20Sopenharmony_ci REG(0x9c), 8888c2ecf20Sopenharmony_ci REG(0xc0), 8898c2ecf20Sopenharmony_ci REG(0x178), 8908c2ecf20Sopenharmony_ci REG(0x17c), 8918c2ecf20Sopenharmony_ci REG16(0x358), 8928c2ecf20Sopenharmony_ci REG(0x170), 8938c2ecf20Sopenharmony_ci REG(0x150), 8948c2ecf20Sopenharmony_ci REG(0x154), 8958c2ecf20Sopenharmony_ci REG(0x158), 8968c2ecf20Sopenharmony_ci REG16(0x41c), 8978c2ecf20Sopenharmony_ci REG16(0x600), 8988c2ecf20Sopenharmony_ci REG16(0x604), 8998c2ecf20Sopenharmony_ci REG16(0x608), 9008c2ecf20Sopenharmony_ci REG16(0x60c), 9018c2ecf20Sopenharmony_ci REG16(0x610), 9028c2ecf20Sopenharmony_ci REG16(0x614), 9038c2ecf20Sopenharmony_ci REG16(0x618), 9048c2ecf20Sopenharmony_ci REG16(0x61c), 9058c2ecf20Sopenharmony_ci REG16(0x620), 9068c2ecf20Sopenharmony_ci REG16(0x624), 9078c2ecf20Sopenharmony_ci REG16(0x628), 9088c2ecf20Sopenharmony_ci REG16(0x62c), 9098c2ecf20Sopenharmony_ci REG16(0x630), 9108c2ecf20Sopenharmony_ci REG16(0x634), 9118c2ecf20Sopenharmony_ci REG16(0x638), 9128c2ecf20Sopenharmony_ci REG16(0x63c), 9138c2ecf20Sopenharmony_ci REG16(0x640), 9148c2ecf20Sopenharmony_ci REG16(0x644), 9158c2ecf20Sopenharmony_ci REG16(0x648), 9168c2ecf20Sopenharmony_ci REG16(0x64c), 9178c2ecf20Sopenharmony_ci REG16(0x650), 9188c2ecf20Sopenharmony_ci REG16(0x654), 9198c2ecf20Sopenharmony_ci REG16(0x658), 9208c2ecf20Sopenharmony_ci REG16(0x65c), 9218c2ecf20Sopenharmony_ci REG16(0x660), 9228c2ecf20Sopenharmony_ci REG16(0x664), 9238c2ecf20Sopenharmony_ci REG16(0x668), 9248c2ecf20Sopenharmony_ci REG16(0x66c), 9258c2ecf20Sopenharmony_ci REG16(0x670), 9268c2ecf20Sopenharmony_ci REG16(0x674), 9278c2ecf20Sopenharmony_ci REG16(0x678), 9288c2ecf20Sopenharmony_ci REG16(0x67c), 9298c2ecf20Sopenharmony_ci REG(0x68), 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_ci END(176) 9328c2ecf20Sopenharmony_ci}; 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_cistatic const u8 gen11_rcs_offsets[] = { 9358c2ecf20Sopenharmony_ci NOP(1), 9368c2ecf20Sopenharmony_ci LRI(15, POSTED), 9378c2ecf20Sopenharmony_ci REG16(0x244), 9388c2ecf20Sopenharmony_ci REG(0x034), 9398c2ecf20Sopenharmony_ci REG(0x030), 9408c2ecf20Sopenharmony_ci REG(0x038), 9418c2ecf20Sopenharmony_ci REG(0x03c), 9428c2ecf20Sopenharmony_ci REG(0x168), 9438c2ecf20Sopenharmony_ci REG(0x140), 9448c2ecf20Sopenharmony_ci REG(0x110), 9458c2ecf20Sopenharmony_ci REG(0x11c), 9468c2ecf20Sopenharmony_ci REG(0x114), 9478c2ecf20Sopenharmony_ci REG(0x118), 9488c2ecf20Sopenharmony_ci REG(0x1c0), 9498c2ecf20Sopenharmony_ci REG(0x1c4), 9508c2ecf20Sopenharmony_ci REG(0x1c8), 9518c2ecf20Sopenharmony_ci REG(0x180), 9528c2ecf20Sopenharmony_ci 9538c2ecf20Sopenharmony_ci NOP(1), 9548c2ecf20Sopenharmony_ci LRI(9, POSTED), 9558c2ecf20Sopenharmony_ci REG16(0x3a8), 9568c2ecf20Sopenharmony_ci REG16(0x28c), 9578c2ecf20Sopenharmony_ci REG16(0x288), 9588c2ecf20Sopenharmony_ci REG16(0x284), 9598c2ecf20Sopenharmony_ci REG16(0x280), 9608c2ecf20Sopenharmony_ci REG16(0x27c), 9618c2ecf20Sopenharmony_ci REG16(0x278), 9628c2ecf20Sopenharmony_ci REG16(0x274), 9638c2ecf20Sopenharmony_ci REG16(0x270), 9648c2ecf20Sopenharmony_ci 9658c2ecf20Sopenharmony_ci LRI(1, POSTED), 9668c2ecf20Sopenharmony_ci REG(0x1b0), 9678c2ecf20Sopenharmony_ci 9688c2ecf20Sopenharmony_ci NOP(10), 9698c2ecf20Sopenharmony_ci LRI(1, 0), 9708c2ecf20Sopenharmony_ci REG(0x0c8), 9718c2ecf20Sopenharmony_ci 9728c2ecf20Sopenharmony_ci END(80) 9738c2ecf20Sopenharmony_ci}; 9748c2ecf20Sopenharmony_ci 9758c2ecf20Sopenharmony_cistatic const u8 gen12_rcs_offsets[] = { 9768c2ecf20Sopenharmony_ci NOP(1), 9778c2ecf20Sopenharmony_ci LRI(13, POSTED), 9788c2ecf20Sopenharmony_ci REG16(0x244), 9798c2ecf20Sopenharmony_ci REG(0x034), 9808c2ecf20Sopenharmony_ci REG(0x030), 9818c2ecf20Sopenharmony_ci REG(0x038), 9828c2ecf20Sopenharmony_ci REG(0x03c), 9838c2ecf20Sopenharmony_ci REG(0x168), 9848c2ecf20Sopenharmony_ci REG(0x140), 9858c2ecf20Sopenharmony_ci REG(0x110), 9868c2ecf20Sopenharmony_ci REG(0x1c0), 9878c2ecf20Sopenharmony_ci REG(0x1c4), 9888c2ecf20Sopenharmony_ci REG(0x1c8), 9898c2ecf20Sopenharmony_ci REG(0x180), 9908c2ecf20Sopenharmony_ci REG16(0x2b4), 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_ci NOP(5), 9938c2ecf20Sopenharmony_ci LRI(9, POSTED), 9948c2ecf20Sopenharmony_ci REG16(0x3a8), 9958c2ecf20Sopenharmony_ci REG16(0x28c), 9968c2ecf20Sopenharmony_ci REG16(0x288), 9978c2ecf20Sopenharmony_ci REG16(0x284), 9988c2ecf20Sopenharmony_ci REG16(0x280), 9998c2ecf20Sopenharmony_ci REG16(0x27c), 10008c2ecf20Sopenharmony_ci REG16(0x278), 10018c2ecf20Sopenharmony_ci REG16(0x274), 10028c2ecf20Sopenharmony_ci REG16(0x270), 10038c2ecf20Sopenharmony_ci 10048c2ecf20Sopenharmony_ci LRI(3, POSTED), 10058c2ecf20Sopenharmony_ci REG(0x1b0), 10068c2ecf20Sopenharmony_ci REG16(0x5a8), 10078c2ecf20Sopenharmony_ci REG16(0x5ac), 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci NOP(6), 10108c2ecf20Sopenharmony_ci LRI(1, 0), 10118c2ecf20Sopenharmony_ci REG(0x0c8), 10128c2ecf20Sopenharmony_ci NOP(3 + 9 + 1), 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_ci LRI(51, POSTED), 10158c2ecf20Sopenharmony_ci REG16(0x588), 10168c2ecf20Sopenharmony_ci REG16(0x588), 10178c2ecf20Sopenharmony_ci REG16(0x588), 10188c2ecf20Sopenharmony_ci REG16(0x588), 10198c2ecf20Sopenharmony_ci REG16(0x588), 10208c2ecf20Sopenharmony_ci REG16(0x588), 10218c2ecf20Sopenharmony_ci REG(0x028), 10228c2ecf20Sopenharmony_ci REG(0x09c), 10238c2ecf20Sopenharmony_ci REG(0x0c0), 10248c2ecf20Sopenharmony_ci REG(0x178), 10258c2ecf20Sopenharmony_ci REG(0x17c), 10268c2ecf20Sopenharmony_ci REG16(0x358), 10278c2ecf20Sopenharmony_ci REG(0x170), 10288c2ecf20Sopenharmony_ci REG(0x150), 10298c2ecf20Sopenharmony_ci REG(0x154), 10308c2ecf20Sopenharmony_ci REG(0x158), 10318c2ecf20Sopenharmony_ci REG16(0x41c), 10328c2ecf20Sopenharmony_ci REG16(0x600), 10338c2ecf20Sopenharmony_ci REG16(0x604), 10348c2ecf20Sopenharmony_ci REG16(0x608), 10358c2ecf20Sopenharmony_ci REG16(0x60c), 10368c2ecf20Sopenharmony_ci REG16(0x610), 10378c2ecf20Sopenharmony_ci REG16(0x614), 10388c2ecf20Sopenharmony_ci REG16(0x618), 10398c2ecf20Sopenharmony_ci REG16(0x61c), 10408c2ecf20Sopenharmony_ci REG16(0x620), 10418c2ecf20Sopenharmony_ci REG16(0x624), 10428c2ecf20Sopenharmony_ci REG16(0x628), 10438c2ecf20Sopenharmony_ci REG16(0x62c), 10448c2ecf20Sopenharmony_ci REG16(0x630), 10458c2ecf20Sopenharmony_ci REG16(0x634), 10468c2ecf20Sopenharmony_ci REG16(0x638), 10478c2ecf20Sopenharmony_ci REG16(0x63c), 10488c2ecf20Sopenharmony_ci REG16(0x640), 10498c2ecf20Sopenharmony_ci REG16(0x644), 10508c2ecf20Sopenharmony_ci REG16(0x648), 10518c2ecf20Sopenharmony_ci REG16(0x64c), 10528c2ecf20Sopenharmony_ci REG16(0x650), 10538c2ecf20Sopenharmony_ci REG16(0x654), 10548c2ecf20Sopenharmony_ci REG16(0x658), 10558c2ecf20Sopenharmony_ci REG16(0x65c), 10568c2ecf20Sopenharmony_ci REG16(0x660), 10578c2ecf20Sopenharmony_ci REG16(0x664), 10588c2ecf20Sopenharmony_ci REG16(0x668), 10598c2ecf20Sopenharmony_ci REG16(0x66c), 10608c2ecf20Sopenharmony_ci REG16(0x670), 10618c2ecf20Sopenharmony_ci REG16(0x674), 10628c2ecf20Sopenharmony_ci REG16(0x678), 10638c2ecf20Sopenharmony_ci REG16(0x67c), 10648c2ecf20Sopenharmony_ci REG(0x068), 10658c2ecf20Sopenharmony_ci REG(0x084), 10668c2ecf20Sopenharmony_ci NOP(1), 10678c2ecf20Sopenharmony_ci 10688c2ecf20Sopenharmony_ci END(192) 10698c2ecf20Sopenharmony_ci}; 10708c2ecf20Sopenharmony_ci 10718c2ecf20Sopenharmony_ci#undef END 10728c2ecf20Sopenharmony_ci#undef REG16 10738c2ecf20Sopenharmony_ci#undef REG 10748c2ecf20Sopenharmony_ci#undef LRI 10758c2ecf20Sopenharmony_ci#undef NOP 10768c2ecf20Sopenharmony_ci 10778c2ecf20Sopenharmony_cistatic const u8 *reg_offsets(const struct intel_engine_cs *engine) 10788c2ecf20Sopenharmony_ci{ 10798c2ecf20Sopenharmony_ci /* 10808c2ecf20Sopenharmony_ci * The gen12+ lists only have the registers we program in the basic 10818c2ecf20Sopenharmony_ci * default state. We rely on the context image using relative 10828c2ecf20Sopenharmony_ci * addressing to automatic fixup the register state between the 10838c2ecf20Sopenharmony_ci * physical engines for virtual engine. 10848c2ecf20Sopenharmony_ci */ 10858c2ecf20Sopenharmony_ci GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && 10868c2ecf20Sopenharmony_ci !intel_engine_has_relative_mmio(engine)); 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_ci if (engine->class == RENDER_CLASS) { 10898c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 12) 10908c2ecf20Sopenharmony_ci return gen12_rcs_offsets; 10918c2ecf20Sopenharmony_ci else if (INTEL_GEN(engine->i915) >= 11) 10928c2ecf20Sopenharmony_ci return gen11_rcs_offsets; 10938c2ecf20Sopenharmony_ci else if (INTEL_GEN(engine->i915) >= 9) 10948c2ecf20Sopenharmony_ci return gen9_rcs_offsets; 10958c2ecf20Sopenharmony_ci else 10968c2ecf20Sopenharmony_ci return gen8_rcs_offsets; 10978c2ecf20Sopenharmony_ci } else { 10988c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 12) 10998c2ecf20Sopenharmony_ci return gen12_xcs_offsets; 11008c2ecf20Sopenharmony_ci else if (INTEL_GEN(engine->i915) >= 9) 11018c2ecf20Sopenharmony_ci return gen9_xcs_offsets; 11028c2ecf20Sopenharmony_ci else 11038c2ecf20Sopenharmony_ci return gen8_xcs_offsets; 11048c2ecf20Sopenharmony_ci } 11058c2ecf20Sopenharmony_ci} 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_cistatic struct i915_request * 11088c2ecf20Sopenharmony_ci__unwind_incomplete_requests(struct intel_engine_cs *engine) 11098c2ecf20Sopenharmony_ci{ 11108c2ecf20Sopenharmony_ci struct i915_request *rq, *rn, *active = NULL; 11118c2ecf20Sopenharmony_ci struct list_head *pl; 11128c2ecf20Sopenharmony_ci int prio = I915_PRIORITY_INVALID; 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_ci lockdep_assert_held(&engine->active.lock); 11158c2ecf20Sopenharmony_ci 11168c2ecf20Sopenharmony_ci list_for_each_entry_safe_reverse(rq, rn, 11178c2ecf20Sopenharmony_ci &engine->active.requests, 11188c2ecf20Sopenharmony_ci sched.link) { 11198c2ecf20Sopenharmony_ci if (i915_request_completed(rq)) 11208c2ecf20Sopenharmony_ci continue; /* XXX */ 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci __i915_request_unsubmit(rq); 11238c2ecf20Sopenharmony_ci 11248c2ecf20Sopenharmony_ci /* 11258c2ecf20Sopenharmony_ci * Push the request back into the queue for later resubmission. 11268c2ecf20Sopenharmony_ci * If this request is not native to this physical engine (i.e. 11278c2ecf20Sopenharmony_ci * it came from a virtual source), push it back onto the virtual 11288c2ecf20Sopenharmony_ci * engine so that it can be moved across onto another physical 11298c2ecf20Sopenharmony_ci * engine as load dictates. 11308c2ecf20Sopenharmony_ci */ 11318c2ecf20Sopenharmony_ci if (likely(rq->execution_mask == engine->mask)) { 11328c2ecf20Sopenharmony_ci GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 11338c2ecf20Sopenharmony_ci if (rq_prio(rq) != prio) { 11348c2ecf20Sopenharmony_ci prio = rq_prio(rq); 11358c2ecf20Sopenharmony_ci pl = i915_sched_lookup_priolist(engine, prio); 11368c2ecf20Sopenharmony_ci } 11378c2ecf20Sopenharmony_ci GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); 11388c2ecf20Sopenharmony_ci 11398c2ecf20Sopenharmony_ci list_move(&rq->sched.link, pl); 11408c2ecf20Sopenharmony_ci set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 11418c2ecf20Sopenharmony_ci 11428c2ecf20Sopenharmony_ci /* Check in case we rollback so far we wrap [size/2] */ 11438c2ecf20Sopenharmony_ci if (intel_ring_direction(rq->ring, 11448c2ecf20Sopenharmony_ci rq->tail, 11458c2ecf20Sopenharmony_ci rq->ring->tail + 8) > 0) 11468c2ecf20Sopenharmony_ci rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; 11478c2ecf20Sopenharmony_ci 11488c2ecf20Sopenharmony_ci active = rq; 11498c2ecf20Sopenharmony_ci } else { 11508c2ecf20Sopenharmony_ci struct intel_engine_cs *owner = rq->context->engine; 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_ci WRITE_ONCE(rq->engine, owner); 11538c2ecf20Sopenharmony_ci owner->submit_request(rq); 11548c2ecf20Sopenharmony_ci active = NULL; 11558c2ecf20Sopenharmony_ci } 11568c2ecf20Sopenharmony_ci } 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci return active; 11598c2ecf20Sopenharmony_ci} 11608c2ecf20Sopenharmony_ci 11618c2ecf20Sopenharmony_cistruct i915_request * 11628c2ecf20Sopenharmony_ciexeclists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) 11638c2ecf20Sopenharmony_ci{ 11648c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = 11658c2ecf20Sopenharmony_ci container_of(execlists, typeof(*engine), execlists); 11668c2ecf20Sopenharmony_ci 11678c2ecf20Sopenharmony_ci return __unwind_incomplete_requests(engine); 11688c2ecf20Sopenharmony_ci} 11698c2ecf20Sopenharmony_ci 11708c2ecf20Sopenharmony_cistatic inline void 11718c2ecf20Sopenharmony_ciexeclists_context_status_change(struct i915_request *rq, unsigned long status) 11728c2ecf20Sopenharmony_ci{ 11738c2ecf20Sopenharmony_ci /* 11748c2ecf20Sopenharmony_ci * Only used when GVT-g is enabled now. When GVT-g is disabled, 11758c2ecf20Sopenharmony_ci * The compiler should eliminate this function as dead-code. 11768c2ecf20Sopenharmony_ci */ 11778c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) 11788c2ecf20Sopenharmony_ci return; 11798c2ecf20Sopenharmony_ci 11808c2ecf20Sopenharmony_ci atomic_notifier_call_chain(&rq->engine->context_status_notifier, 11818c2ecf20Sopenharmony_ci status, rq); 11828c2ecf20Sopenharmony_ci} 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_cistatic void intel_engine_context_in(struct intel_engine_cs *engine) 11858c2ecf20Sopenharmony_ci{ 11868c2ecf20Sopenharmony_ci unsigned long flags; 11878c2ecf20Sopenharmony_ci 11888c2ecf20Sopenharmony_ci if (atomic_add_unless(&engine->stats.active, 1, 0)) 11898c2ecf20Sopenharmony_ci return; 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci write_seqlock_irqsave(&engine->stats.lock, flags); 11928c2ecf20Sopenharmony_ci if (!atomic_add_unless(&engine->stats.active, 1, 0)) { 11938c2ecf20Sopenharmony_ci engine->stats.start = ktime_get(); 11948c2ecf20Sopenharmony_ci atomic_inc(&engine->stats.active); 11958c2ecf20Sopenharmony_ci } 11968c2ecf20Sopenharmony_ci write_sequnlock_irqrestore(&engine->stats.lock, flags); 11978c2ecf20Sopenharmony_ci} 11988c2ecf20Sopenharmony_ci 11998c2ecf20Sopenharmony_cistatic void intel_engine_context_out(struct intel_engine_cs *engine) 12008c2ecf20Sopenharmony_ci{ 12018c2ecf20Sopenharmony_ci unsigned long flags; 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_ci GEM_BUG_ON(!atomic_read(&engine->stats.active)); 12048c2ecf20Sopenharmony_ci 12058c2ecf20Sopenharmony_ci if (atomic_add_unless(&engine->stats.active, -1, 1)) 12068c2ecf20Sopenharmony_ci return; 12078c2ecf20Sopenharmony_ci 12088c2ecf20Sopenharmony_ci write_seqlock_irqsave(&engine->stats.lock, flags); 12098c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&engine->stats.active)) { 12108c2ecf20Sopenharmony_ci engine->stats.total = 12118c2ecf20Sopenharmony_ci ktime_add(engine->stats.total, 12128c2ecf20Sopenharmony_ci ktime_sub(ktime_get(), engine->stats.start)); 12138c2ecf20Sopenharmony_ci } 12148c2ecf20Sopenharmony_ci write_sequnlock_irqrestore(&engine->stats.lock, flags); 12158c2ecf20Sopenharmony_ci} 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_cistatic void 12188c2ecf20Sopenharmony_ciexeclists_check_context(const struct intel_context *ce, 12198c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine) 12208c2ecf20Sopenharmony_ci{ 12218c2ecf20Sopenharmony_ci const struct intel_ring *ring = ce->ring; 12228c2ecf20Sopenharmony_ci u32 *regs = ce->lrc_reg_state; 12238c2ecf20Sopenharmony_ci bool valid = true; 12248c2ecf20Sopenharmony_ci int x; 12258c2ecf20Sopenharmony_ci 12268c2ecf20Sopenharmony_ci if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { 12278c2ecf20Sopenharmony_ci pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", 12288c2ecf20Sopenharmony_ci engine->name, 12298c2ecf20Sopenharmony_ci regs[CTX_RING_START], 12308c2ecf20Sopenharmony_ci i915_ggtt_offset(ring->vma)); 12318c2ecf20Sopenharmony_ci regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 12328c2ecf20Sopenharmony_ci valid = false; 12338c2ecf20Sopenharmony_ci } 12348c2ecf20Sopenharmony_ci 12358c2ecf20Sopenharmony_ci if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != 12368c2ecf20Sopenharmony_ci (RING_CTL_SIZE(ring->size) | RING_VALID)) { 12378c2ecf20Sopenharmony_ci pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", 12388c2ecf20Sopenharmony_ci engine->name, 12398c2ecf20Sopenharmony_ci regs[CTX_RING_CTL], 12408c2ecf20Sopenharmony_ci (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); 12418c2ecf20Sopenharmony_ci regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 12428c2ecf20Sopenharmony_ci valid = false; 12438c2ecf20Sopenharmony_ci } 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_ci x = lrc_ring_mi_mode(engine); 12468c2ecf20Sopenharmony_ci if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { 12478c2ecf20Sopenharmony_ci pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", 12488c2ecf20Sopenharmony_ci engine->name, regs[x + 1]); 12498c2ecf20Sopenharmony_ci regs[x + 1] &= ~STOP_RING; 12508c2ecf20Sopenharmony_ci regs[x + 1] |= STOP_RING << 16; 12518c2ecf20Sopenharmony_ci valid = false; 12528c2ecf20Sopenharmony_ci } 12538c2ecf20Sopenharmony_ci 12548c2ecf20Sopenharmony_ci WARN_ONCE(!valid, "Invalid lrc state found before submission\n"); 12558c2ecf20Sopenharmony_ci} 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_cistatic void restore_default_state(struct intel_context *ce, 12588c2ecf20Sopenharmony_ci struct intel_engine_cs *engine) 12598c2ecf20Sopenharmony_ci{ 12608c2ecf20Sopenharmony_ci u32 *regs; 12618c2ecf20Sopenharmony_ci 12628c2ecf20Sopenharmony_ci regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE); 12638c2ecf20Sopenharmony_ci execlists_init_reg_state(regs, ce, engine, ce->ring, true); 12648c2ecf20Sopenharmony_ci 12658c2ecf20Sopenharmony_ci ce->runtime.last = intel_context_get_runtime(ce); 12668c2ecf20Sopenharmony_ci} 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_cistatic void reset_active(struct i915_request *rq, 12698c2ecf20Sopenharmony_ci struct intel_engine_cs *engine) 12708c2ecf20Sopenharmony_ci{ 12718c2ecf20Sopenharmony_ci struct intel_context * const ce = rq->context; 12728c2ecf20Sopenharmony_ci u32 head; 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci /* 12758c2ecf20Sopenharmony_ci * The executing context has been cancelled. We want to prevent 12768c2ecf20Sopenharmony_ci * further execution along this context and propagate the error on 12778c2ecf20Sopenharmony_ci * to anything depending on its results. 12788c2ecf20Sopenharmony_ci * 12798c2ecf20Sopenharmony_ci * In __i915_request_submit(), we apply the -EIO and remove the 12808c2ecf20Sopenharmony_ci * requests' payloads for any banned requests. But first, we must 12818c2ecf20Sopenharmony_ci * rewind the context back to the start of the incomplete request so 12828c2ecf20Sopenharmony_ci * that we do not jump back into the middle of the batch. 12838c2ecf20Sopenharmony_ci * 12848c2ecf20Sopenharmony_ci * We preserve the breadcrumbs and semaphores of the incomplete 12858c2ecf20Sopenharmony_ci * requests so that inter-timeline dependencies (i.e other timelines) 12868c2ecf20Sopenharmony_ci * remain correctly ordered. And we defer to __i915_request_submit() 12878c2ecf20Sopenharmony_ci * so that all asynchronous waits are correctly handled. 12888c2ecf20Sopenharmony_ci */ 12898c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n", 12908c2ecf20Sopenharmony_ci rq->fence.context, rq->fence.seqno); 12918c2ecf20Sopenharmony_ci 12928c2ecf20Sopenharmony_ci /* On resubmission of the active request, payload will be scrubbed */ 12938c2ecf20Sopenharmony_ci if (i915_request_completed(rq)) 12948c2ecf20Sopenharmony_ci head = rq->tail; 12958c2ecf20Sopenharmony_ci else 12968c2ecf20Sopenharmony_ci head = active_request(ce->timeline, rq)->head; 12978c2ecf20Sopenharmony_ci head = intel_ring_wrap(ce->ring, head); 12988c2ecf20Sopenharmony_ci 12998c2ecf20Sopenharmony_ci /* Scrub the context image to prevent replaying the previous batch */ 13008c2ecf20Sopenharmony_ci restore_default_state(ce, engine); 13018c2ecf20Sopenharmony_ci __execlists_update_reg_state(ce, engine, head); 13028c2ecf20Sopenharmony_ci 13038c2ecf20Sopenharmony_ci /* We've switched away, so this should be a no-op, but intent matters */ 13048c2ecf20Sopenharmony_ci ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; 13058c2ecf20Sopenharmony_ci} 13068c2ecf20Sopenharmony_ci 13078c2ecf20Sopenharmony_cistatic void st_update_runtime_underflow(struct intel_context *ce, s32 dt) 13088c2ecf20Sopenharmony_ci{ 13098c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 13108c2ecf20Sopenharmony_ci ce->runtime.num_underflow += dt < 0; 13118c2ecf20Sopenharmony_ci ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); 13128c2ecf20Sopenharmony_ci#endif 13138c2ecf20Sopenharmony_ci} 13148c2ecf20Sopenharmony_ci 13158c2ecf20Sopenharmony_cistatic void intel_context_update_runtime(struct intel_context *ce) 13168c2ecf20Sopenharmony_ci{ 13178c2ecf20Sopenharmony_ci u32 old; 13188c2ecf20Sopenharmony_ci s32 dt; 13198c2ecf20Sopenharmony_ci 13208c2ecf20Sopenharmony_ci if (intel_context_is_barrier(ce)) 13218c2ecf20Sopenharmony_ci return; 13228c2ecf20Sopenharmony_ci 13238c2ecf20Sopenharmony_ci old = ce->runtime.last; 13248c2ecf20Sopenharmony_ci ce->runtime.last = intel_context_get_runtime(ce); 13258c2ecf20Sopenharmony_ci dt = ce->runtime.last - old; 13268c2ecf20Sopenharmony_ci 13278c2ecf20Sopenharmony_ci if (unlikely(dt <= 0)) { 13288c2ecf20Sopenharmony_ci CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", 13298c2ecf20Sopenharmony_ci old, ce->runtime.last, dt); 13308c2ecf20Sopenharmony_ci st_update_runtime_underflow(ce, dt); 13318c2ecf20Sopenharmony_ci return; 13328c2ecf20Sopenharmony_ci } 13338c2ecf20Sopenharmony_ci 13348c2ecf20Sopenharmony_ci ewma_runtime_add(&ce->runtime.avg, dt); 13358c2ecf20Sopenharmony_ci ce->runtime.total += dt; 13368c2ecf20Sopenharmony_ci} 13378c2ecf20Sopenharmony_ci 13388c2ecf20Sopenharmony_cistatic inline struct intel_engine_cs * 13398c2ecf20Sopenharmony_ci__execlists_schedule_in(struct i915_request *rq) 13408c2ecf20Sopenharmony_ci{ 13418c2ecf20Sopenharmony_ci struct intel_engine_cs * const engine = rq->engine; 13428c2ecf20Sopenharmony_ci struct intel_context * const ce = rq->context; 13438c2ecf20Sopenharmony_ci 13448c2ecf20Sopenharmony_ci intel_context_get(ce); 13458c2ecf20Sopenharmony_ci 13468c2ecf20Sopenharmony_ci if (unlikely(intel_context_is_banned(ce))) 13478c2ecf20Sopenharmony_ci reset_active(rq, engine); 13488c2ecf20Sopenharmony_ci 13498c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 13508c2ecf20Sopenharmony_ci execlists_check_context(ce, engine); 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci if (ce->tag) { 13538c2ecf20Sopenharmony_ci /* Use a fixed tag for OA and friends */ 13548c2ecf20Sopenharmony_ci GEM_BUG_ON(ce->tag <= BITS_PER_LONG); 13558c2ecf20Sopenharmony_ci ce->lrc.ccid = ce->tag; 13568c2ecf20Sopenharmony_ci } else { 13578c2ecf20Sopenharmony_ci /* We don't need a strict matching tag, just different values */ 13588c2ecf20Sopenharmony_ci unsigned int tag = ffs(READ_ONCE(engine->context_tag)); 13598c2ecf20Sopenharmony_ci 13608c2ecf20Sopenharmony_ci GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); 13618c2ecf20Sopenharmony_ci clear_bit(tag - 1, &engine->context_tag); 13628c2ecf20Sopenharmony_ci ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); 13638c2ecf20Sopenharmony_ci 13648c2ecf20Sopenharmony_ci BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); 13658c2ecf20Sopenharmony_ci } 13668c2ecf20Sopenharmony_ci 13678c2ecf20Sopenharmony_ci ce->lrc.ccid |= engine->execlists.ccid; 13688c2ecf20Sopenharmony_ci 13698c2ecf20Sopenharmony_ci __intel_gt_pm_get(engine->gt); 13708c2ecf20Sopenharmony_ci if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)) 13718c2ecf20Sopenharmony_ci intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); 13728c2ecf20Sopenharmony_ci execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); 13738c2ecf20Sopenharmony_ci intel_engine_context_in(engine); 13748c2ecf20Sopenharmony_ci 13758c2ecf20Sopenharmony_ci return engine; 13768c2ecf20Sopenharmony_ci} 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_cistatic inline struct i915_request * 13798c2ecf20Sopenharmony_ciexeclists_schedule_in(struct i915_request *rq, int idx) 13808c2ecf20Sopenharmony_ci{ 13818c2ecf20Sopenharmony_ci struct intel_context * const ce = rq->context; 13828c2ecf20Sopenharmony_ci struct intel_engine_cs *old; 13838c2ecf20Sopenharmony_ci 13848c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); 13858c2ecf20Sopenharmony_ci trace_i915_request_in(rq, idx); 13868c2ecf20Sopenharmony_ci 13878c2ecf20Sopenharmony_ci old = READ_ONCE(ce->inflight); 13888c2ecf20Sopenharmony_ci do { 13898c2ecf20Sopenharmony_ci if (!old) { 13908c2ecf20Sopenharmony_ci WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq)); 13918c2ecf20Sopenharmony_ci break; 13928c2ecf20Sopenharmony_ci } 13938c2ecf20Sopenharmony_ci } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))); 13948c2ecf20Sopenharmony_ci 13958c2ecf20Sopenharmony_ci GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); 13968c2ecf20Sopenharmony_ci return i915_request_get(rq); 13978c2ecf20Sopenharmony_ci} 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_cistatic void kick_siblings(struct i915_request *rq, struct intel_context *ce) 14008c2ecf20Sopenharmony_ci{ 14018c2ecf20Sopenharmony_ci struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 14028c2ecf20Sopenharmony_ci struct i915_request *next = READ_ONCE(ve->request); 14038c2ecf20Sopenharmony_ci 14048c2ecf20Sopenharmony_ci if (next == rq || (next && next->execution_mask & ~rq->execution_mask)) 14058c2ecf20Sopenharmony_ci tasklet_hi_schedule(&ve->base.execlists.tasklet); 14068c2ecf20Sopenharmony_ci} 14078c2ecf20Sopenharmony_ci 14088c2ecf20Sopenharmony_cistatic inline void 14098c2ecf20Sopenharmony_ci__execlists_schedule_out(struct i915_request *rq, 14108c2ecf20Sopenharmony_ci struct intel_engine_cs * const engine, 14118c2ecf20Sopenharmony_ci unsigned int ccid) 14128c2ecf20Sopenharmony_ci{ 14138c2ecf20Sopenharmony_ci struct intel_context * const ce = rq->context; 14148c2ecf20Sopenharmony_ci 14158c2ecf20Sopenharmony_ci /* 14168c2ecf20Sopenharmony_ci * NB process_csb() is not under the engine->active.lock and hence 14178c2ecf20Sopenharmony_ci * schedule_out can race with schedule_in meaning that we should 14188c2ecf20Sopenharmony_ci * refrain from doing non-trivial work here. 14198c2ecf20Sopenharmony_ci */ 14208c2ecf20Sopenharmony_ci 14218c2ecf20Sopenharmony_ci /* 14228c2ecf20Sopenharmony_ci * If we have just completed this context, the engine may now be 14238c2ecf20Sopenharmony_ci * idle and we want to re-enter powersaving. 14248c2ecf20Sopenharmony_ci */ 14258c2ecf20Sopenharmony_ci if (list_is_last_rcu(&rq->link, &ce->timeline->requests) && 14268c2ecf20Sopenharmony_ci i915_request_completed(rq)) 14278c2ecf20Sopenharmony_ci intel_engine_add_retire(engine, ce->timeline); 14288c2ecf20Sopenharmony_ci 14298c2ecf20Sopenharmony_ci ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; 14308c2ecf20Sopenharmony_ci ccid &= GEN12_MAX_CONTEXT_HW_ID; 14318c2ecf20Sopenharmony_ci if (ccid < BITS_PER_LONG) { 14328c2ecf20Sopenharmony_ci GEM_BUG_ON(ccid == 0); 14338c2ecf20Sopenharmony_ci GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); 14348c2ecf20Sopenharmony_ci set_bit(ccid - 1, &engine->context_tag); 14358c2ecf20Sopenharmony_ci } 14368c2ecf20Sopenharmony_ci 14378c2ecf20Sopenharmony_ci intel_context_update_runtime(ce); 14388c2ecf20Sopenharmony_ci intel_engine_context_out(engine); 14398c2ecf20Sopenharmony_ci execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); 14408c2ecf20Sopenharmony_ci if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) 14418c2ecf20Sopenharmony_ci intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); 14428c2ecf20Sopenharmony_ci intel_gt_pm_put_async(engine->gt); 14438c2ecf20Sopenharmony_ci 14448c2ecf20Sopenharmony_ci /* 14458c2ecf20Sopenharmony_ci * If this is part of a virtual engine, its next request may 14468c2ecf20Sopenharmony_ci * have been blocked waiting for access to the active context. 14478c2ecf20Sopenharmony_ci * We have to kick all the siblings again in case we need to 14488c2ecf20Sopenharmony_ci * switch (e.g. the next request is not runnable on this 14498c2ecf20Sopenharmony_ci * engine). Hopefully, we will already have submitted the next 14508c2ecf20Sopenharmony_ci * request before the tasklet runs and do not need to rebuild 14518c2ecf20Sopenharmony_ci * each virtual tree and kick everyone again. 14528c2ecf20Sopenharmony_ci */ 14538c2ecf20Sopenharmony_ci if (ce->engine != engine) 14548c2ecf20Sopenharmony_ci kick_siblings(rq, ce); 14558c2ecf20Sopenharmony_ci 14568c2ecf20Sopenharmony_ci intel_context_put(ce); 14578c2ecf20Sopenharmony_ci} 14588c2ecf20Sopenharmony_ci 14598c2ecf20Sopenharmony_cistatic inline void 14608c2ecf20Sopenharmony_ciexeclists_schedule_out(struct i915_request *rq) 14618c2ecf20Sopenharmony_ci{ 14628c2ecf20Sopenharmony_ci struct intel_context * const ce = rq->context; 14638c2ecf20Sopenharmony_ci struct intel_engine_cs *cur, *old; 14648c2ecf20Sopenharmony_ci u32 ccid; 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_ci trace_i915_request_out(rq); 14678c2ecf20Sopenharmony_ci 14688c2ecf20Sopenharmony_ci ccid = rq->context->lrc.ccid; 14698c2ecf20Sopenharmony_ci old = READ_ONCE(ce->inflight); 14708c2ecf20Sopenharmony_ci do 14718c2ecf20Sopenharmony_ci cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; 14728c2ecf20Sopenharmony_ci while (!try_cmpxchg(&ce->inflight, &old, cur)); 14738c2ecf20Sopenharmony_ci if (!cur) 14748c2ecf20Sopenharmony_ci __execlists_schedule_out(rq, old, ccid); 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_ci i915_request_put(rq); 14778c2ecf20Sopenharmony_ci} 14788c2ecf20Sopenharmony_ci 14798c2ecf20Sopenharmony_cistatic u64 execlists_update_context(struct i915_request *rq) 14808c2ecf20Sopenharmony_ci{ 14818c2ecf20Sopenharmony_ci struct intel_context *ce = rq->context; 14828c2ecf20Sopenharmony_ci u64 desc = ce->lrc.desc; 14838c2ecf20Sopenharmony_ci u32 tail, prev; 14848c2ecf20Sopenharmony_ci 14858c2ecf20Sopenharmony_ci /* 14868c2ecf20Sopenharmony_ci * WaIdleLiteRestore:bdw,skl 14878c2ecf20Sopenharmony_ci * 14888c2ecf20Sopenharmony_ci * We should never submit the context with the same RING_TAIL twice 14898c2ecf20Sopenharmony_ci * just in case we submit an empty ring, which confuses the HW. 14908c2ecf20Sopenharmony_ci * 14918c2ecf20Sopenharmony_ci * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of 14928c2ecf20Sopenharmony_ci * the normal request to be able to always advance the RING_TAIL on 14938c2ecf20Sopenharmony_ci * subsequent resubmissions (for lite restore). Should that fail us, 14948c2ecf20Sopenharmony_ci * and we try and submit the same tail again, force the context 14958c2ecf20Sopenharmony_ci * reload. 14968c2ecf20Sopenharmony_ci * 14978c2ecf20Sopenharmony_ci * If we need to return to a preempted context, we need to skip the 14988c2ecf20Sopenharmony_ci * lite-restore and force it to reload the RING_TAIL. Otherwise, the 14998c2ecf20Sopenharmony_ci * HW has a tendency to ignore us rewinding the TAIL to the end of 15008c2ecf20Sopenharmony_ci * an earlier request. 15018c2ecf20Sopenharmony_ci */ 15028c2ecf20Sopenharmony_ci GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); 15038c2ecf20Sopenharmony_ci prev = rq->ring->tail; 15048c2ecf20Sopenharmony_ci tail = intel_ring_set_tail(rq->ring, rq->tail); 15058c2ecf20Sopenharmony_ci if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) 15068c2ecf20Sopenharmony_ci desc |= CTX_DESC_FORCE_RESTORE; 15078c2ecf20Sopenharmony_ci ce->lrc_reg_state[CTX_RING_TAIL] = tail; 15088c2ecf20Sopenharmony_ci rq->tail = rq->wa_tail; 15098c2ecf20Sopenharmony_ci 15108c2ecf20Sopenharmony_ci /* 15118c2ecf20Sopenharmony_ci * Make sure the context image is complete before we submit it to HW. 15128c2ecf20Sopenharmony_ci * 15138c2ecf20Sopenharmony_ci * Ostensibly, writes (including the WCB) should be flushed prior to 15148c2ecf20Sopenharmony_ci * an uncached write such as our mmio register access, the empirical 15158c2ecf20Sopenharmony_ci * evidence (esp. on Braswell) suggests that the WC write into memory 15168c2ecf20Sopenharmony_ci * may not be visible to the HW prior to the completion of the UC 15178c2ecf20Sopenharmony_ci * register write and that we may begin execution from the context 15188c2ecf20Sopenharmony_ci * before its image is complete leading to invalid PD chasing. 15198c2ecf20Sopenharmony_ci */ 15208c2ecf20Sopenharmony_ci wmb(); 15218c2ecf20Sopenharmony_ci 15228c2ecf20Sopenharmony_ci ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; 15238c2ecf20Sopenharmony_ci return desc; 15248c2ecf20Sopenharmony_ci} 15258c2ecf20Sopenharmony_ci 15268c2ecf20Sopenharmony_cistatic inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) 15278c2ecf20Sopenharmony_ci{ 15288c2ecf20Sopenharmony_ci if (execlists->ctrl_reg) { 15298c2ecf20Sopenharmony_ci writel(lower_32_bits(desc), execlists->submit_reg + port * 2); 15308c2ecf20Sopenharmony_ci writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); 15318c2ecf20Sopenharmony_ci } else { 15328c2ecf20Sopenharmony_ci writel(upper_32_bits(desc), execlists->submit_reg); 15338c2ecf20Sopenharmony_ci writel(lower_32_bits(desc), execlists->submit_reg); 15348c2ecf20Sopenharmony_ci } 15358c2ecf20Sopenharmony_ci} 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_cistatic __maybe_unused char * 15388c2ecf20Sopenharmony_cidump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) 15398c2ecf20Sopenharmony_ci{ 15408c2ecf20Sopenharmony_ci if (!rq) 15418c2ecf20Sopenharmony_ci return ""; 15428c2ecf20Sopenharmony_ci 15438c2ecf20Sopenharmony_ci snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", 15448c2ecf20Sopenharmony_ci prefix, 15458c2ecf20Sopenharmony_ci rq->context->lrc.ccid, 15468c2ecf20Sopenharmony_ci rq->fence.context, rq->fence.seqno, 15478c2ecf20Sopenharmony_ci i915_request_completed(rq) ? "!" : 15488c2ecf20Sopenharmony_ci i915_request_started(rq) ? "*" : 15498c2ecf20Sopenharmony_ci "", 15508c2ecf20Sopenharmony_ci rq_prio(rq)); 15518c2ecf20Sopenharmony_ci 15528c2ecf20Sopenharmony_ci return buf; 15538c2ecf20Sopenharmony_ci} 15548c2ecf20Sopenharmony_ci 15558c2ecf20Sopenharmony_cistatic __maybe_unused void 15568c2ecf20Sopenharmony_citrace_ports(const struct intel_engine_execlists *execlists, 15578c2ecf20Sopenharmony_ci const char *msg, 15588c2ecf20Sopenharmony_ci struct i915_request * const *ports) 15598c2ecf20Sopenharmony_ci{ 15608c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine = 15618c2ecf20Sopenharmony_ci container_of(execlists, typeof(*engine), execlists); 15628c2ecf20Sopenharmony_ci char __maybe_unused p0[40], p1[40]; 15638c2ecf20Sopenharmony_ci 15648c2ecf20Sopenharmony_ci if (!ports[0]) 15658c2ecf20Sopenharmony_ci return; 15668c2ecf20Sopenharmony_ci 15678c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "%s { %s%s }\n", msg, 15688c2ecf20Sopenharmony_ci dump_port(p0, sizeof(p0), "", ports[0]), 15698c2ecf20Sopenharmony_ci dump_port(p1, sizeof(p1), ", ", ports[1])); 15708c2ecf20Sopenharmony_ci} 15718c2ecf20Sopenharmony_ci 15728c2ecf20Sopenharmony_cistatic inline bool 15738c2ecf20Sopenharmony_cireset_in_progress(const struct intel_engine_execlists *execlists) 15748c2ecf20Sopenharmony_ci{ 15758c2ecf20Sopenharmony_ci return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); 15768c2ecf20Sopenharmony_ci} 15778c2ecf20Sopenharmony_ci 15788c2ecf20Sopenharmony_cistatic __maybe_unused bool 15798c2ecf20Sopenharmony_ciassert_pending_valid(const struct intel_engine_execlists *execlists, 15808c2ecf20Sopenharmony_ci const char *msg) 15818c2ecf20Sopenharmony_ci{ 15828c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = 15838c2ecf20Sopenharmony_ci container_of(execlists, typeof(*engine), execlists); 15848c2ecf20Sopenharmony_ci struct i915_request * const *port, *rq; 15858c2ecf20Sopenharmony_ci struct intel_context *ce = NULL; 15868c2ecf20Sopenharmony_ci bool sentinel = false; 15878c2ecf20Sopenharmony_ci u32 ccid = -1; 15888c2ecf20Sopenharmony_ci 15898c2ecf20Sopenharmony_ci trace_ports(execlists, msg, execlists->pending); 15908c2ecf20Sopenharmony_ci 15918c2ecf20Sopenharmony_ci /* We may be messing around with the lists during reset, lalala */ 15928c2ecf20Sopenharmony_ci if (reset_in_progress(execlists)) 15938c2ecf20Sopenharmony_ci return true; 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_ci if (!execlists->pending[0]) { 15968c2ecf20Sopenharmony_ci GEM_TRACE_ERR("%s: Nothing pending for promotion!\n", 15978c2ecf20Sopenharmony_ci engine->name); 15988c2ecf20Sopenharmony_ci return false; 15998c2ecf20Sopenharmony_ci } 16008c2ecf20Sopenharmony_ci 16018c2ecf20Sopenharmony_ci if (execlists->pending[execlists_num_ports(execlists)]) { 16028c2ecf20Sopenharmony_ci GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n", 16038c2ecf20Sopenharmony_ci engine->name, execlists_num_ports(execlists)); 16048c2ecf20Sopenharmony_ci return false; 16058c2ecf20Sopenharmony_ci } 16068c2ecf20Sopenharmony_ci 16078c2ecf20Sopenharmony_ci for (port = execlists->pending; (rq = *port); port++) { 16088c2ecf20Sopenharmony_ci unsigned long flags; 16098c2ecf20Sopenharmony_ci bool ok = true; 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci GEM_BUG_ON(!kref_read(&rq->fence.refcount)); 16128c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_request_is_active(rq)); 16138c2ecf20Sopenharmony_ci 16148c2ecf20Sopenharmony_ci if (ce == rq->context) { 16158c2ecf20Sopenharmony_ci GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n", 16168c2ecf20Sopenharmony_ci engine->name, 16178c2ecf20Sopenharmony_ci ce->timeline->fence_context, 16188c2ecf20Sopenharmony_ci port - execlists->pending); 16198c2ecf20Sopenharmony_ci return false; 16208c2ecf20Sopenharmony_ci } 16218c2ecf20Sopenharmony_ci ce = rq->context; 16228c2ecf20Sopenharmony_ci 16238c2ecf20Sopenharmony_ci if (ccid == ce->lrc.ccid) { 16248c2ecf20Sopenharmony_ci GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n", 16258c2ecf20Sopenharmony_ci engine->name, 16268c2ecf20Sopenharmony_ci ccid, ce->timeline->fence_context, 16278c2ecf20Sopenharmony_ci port - execlists->pending); 16288c2ecf20Sopenharmony_ci return false; 16298c2ecf20Sopenharmony_ci } 16308c2ecf20Sopenharmony_ci ccid = ce->lrc.ccid; 16318c2ecf20Sopenharmony_ci 16328c2ecf20Sopenharmony_ci /* 16338c2ecf20Sopenharmony_ci * Sentinels are supposed to be the last request so they flush 16348c2ecf20Sopenharmony_ci * the current execution off the HW. Check that they are the only 16358c2ecf20Sopenharmony_ci * request in the pending submission. 16368c2ecf20Sopenharmony_ci */ 16378c2ecf20Sopenharmony_ci if (sentinel) { 16388c2ecf20Sopenharmony_ci GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", 16398c2ecf20Sopenharmony_ci engine->name, 16408c2ecf20Sopenharmony_ci ce->timeline->fence_context, 16418c2ecf20Sopenharmony_ci port - execlists->pending); 16428c2ecf20Sopenharmony_ci return false; 16438c2ecf20Sopenharmony_ci } 16448c2ecf20Sopenharmony_ci sentinel = i915_request_has_sentinel(rq); 16458c2ecf20Sopenharmony_ci 16468c2ecf20Sopenharmony_ci /* Hold tightly onto the lock to prevent concurrent retires! */ 16478c2ecf20Sopenharmony_ci if (!spin_trylock_irqsave(&rq->lock, flags)) 16488c2ecf20Sopenharmony_ci continue; 16498c2ecf20Sopenharmony_ci 16508c2ecf20Sopenharmony_ci if (i915_request_completed(rq)) 16518c2ecf20Sopenharmony_ci goto unlock; 16528c2ecf20Sopenharmony_ci 16538c2ecf20Sopenharmony_ci if (i915_active_is_idle(&ce->active) && 16548c2ecf20Sopenharmony_ci !intel_context_is_barrier(ce)) { 16558c2ecf20Sopenharmony_ci GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n", 16568c2ecf20Sopenharmony_ci engine->name, 16578c2ecf20Sopenharmony_ci ce->timeline->fence_context, 16588c2ecf20Sopenharmony_ci port - execlists->pending); 16598c2ecf20Sopenharmony_ci ok = false; 16608c2ecf20Sopenharmony_ci goto unlock; 16618c2ecf20Sopenharmony_ci } 16628c2ecf20Sopenharmony_ci 16638c2ecf20Sopenharmony_ci if (!i915_vma_is_pinned(ce->state)) { 16648c2ecf20Sopenharmony_ci GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n", 16658c2ecf20Sopenharmony_ci engine->name, 16668c2ecf20Sopenharmony_ci ce->timeline->fence_context, 16678c2ecf20Sopenharmony_ci port - execlists->pending); 16688c2ecf20Sopenharmony_ci ok = false; 16698c2ecf20Sopenharmony_ci goto unlock; 16708c2ecf20Sopenharmony_ci } 16718c2ecf20Sopenharmony_ci 16728c2ecf20Sopenharmony_ci if (!i915_vma_is_pinned(ce->ring->vma)) { 16738c2ecf20Sopenharmony_ci GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n", 16748c2ecf20Sopenharmony_ci engine->name, 16758c2ecf20Sopenharmony_ci ce->timeline->fence_context, 16768c2ecf20Sopenharmony_ci port - execlists->pending); 16778c2ecf20Sopenharmony_ci ok = false; 16788c2ecf20Sopenharmony_ci goto unlock; 16798c2ecf20Sopenharmony_ci } 16808c2ecf20Sopenharmony_ci 16818c2ecf20Sopenharmony_ciunlock: 16828c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rq->lock, flags); 16838c2ecf20Sopenharmony_ci if (!ok) 16848c2ecf20Sopenharmony_ci return false; 16858c2ecf20Sopenharmony_ci } 16868c2ecf20Sopenharmony_ci 16878c2ecf20Sopenharmony_ci return ce; 16888c2ecf20Sopenharmony_ci} 16898c2ecf20Sopenharmony_ci 16908c2ecf20Sopenharmony_cistatic void execlists_submit_ports(struct intel_engine_cs *engine) 16918c2ecf20Sopenharmony_ci{ 16928c2ecf20Sopenharmony_ci struct intel_engine_execlists *execlists = &engine->execlists; 16938c2ecf20Sopenharmony_ci unsigned int n; 16948c2ecf20Sopenharmony_ci 16958c2ecf20Sopenharmony_ci GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); 16968c2ecf20Sopenharmony_ci 16978c2ecf20Sopenharmony_ci /* 16988c2ecf20Sopenharmony_ci * We can skip acquiring intel_runtime_pm_get() here as it was taken 16998c2ecf20Sopenharmony_ci * on our behalf by the request (see i915_gem_mark_busy()) and it will 17008c2ecf20Sopenharmony_ci * not be relinquished until the device is idle (see 17018c2ecf20Sopenharmony_ci * i915_gem_idle_work_handler()). As a precaution, we make sure 17028c2ecf20Sopenharmony_ci * that all ELSP are drained i.e. we have processed the CSB, 17038c2ecf20Sopenharmony_ci * before allowing ourselves to idle and calling intel_runtime_pm_put(). 17048c2ecf20Sopenharmony_ci */ 17058c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 17068c2ecf20Sopenharmony_ci 17078c2ecf20Sopenharmony_ci /* 17088c2ecf20Sopenharmony_ci * ELSQ note: the submit queue is not cleared after being submitted 17098c2ecf20Sopenharmony_ci * to the HW so we need to make sure we always clean it up. This is 17108c2ecf20Sopenharmony_ci * currently ensured by the fact that we always write the same number 17118c2ecf20Sopenharmony_ci * of elsq entries, keep this in mind before changing the loop below. 17128c2ecf20Sopenharmony_ci */ 17138c2ecf20Sopenharmony_ci for (n = execlists_num_ports(execlists); n--; ) { 17148c2ecf20Sopenharmony_ci struct i915_request *rq = execlists->pending[n]; 17158c2ecf20Sopenharmony_ci 17168c2ecf20Sopenharmony_ci write_desc(execlists, 17178c2ecf20Sopenharmony_ci rq ? execlists_update_context(rq) : 0, 17188c2ecf20Sopenharmony_ci n); 17198c2ecf20Sopenharmony_ci } 17208c2ecf20Sopenharmony_ci 17218c2ecf20Sopenharmony_ci /* we need to manually load the submit queue */ 17228c2ecf20Sopenharmony_ci if (execlists->ctrl_reg) 17238c2ecf20Sopenharmony_ci writel(EL_CTRL_LOAD, execlists->ctrl_reg); 17248c2ecf20Sopenharmony_ci} 17258c2ecf20Sopenharmony_ci 17268c2ecf20Sopenharmony_cistatic bool ctx_single_port_submission(const struct intel_context *ce) 17278c2ecf20Sopenharmony_ci{ 17288c2ecf20Sopenharmony_ci return (IS_ENABLED(CONFIG_DRM_I915_GVT) && 17298c2ecf20Sopenharmony_ci intel_context_force_single_submission(ce)); 17308c2ecf20Sopenharmony_ci} 17318c2ecf20Sopenharmony_ci 17328c2ecf20Sopenharmony_cistatic bool can_merge_ctx(const struct intel_context *prev, 17338c2ecf20Sopenharmony_ci const struct intel_context *next) 17348c2ecf20Sopenharmony_ci{ 17358c2ecf20Sopenharmony_ci if (prev != next) 17368c2ecf20Sopenharmony_ci return false; 17378c2ecf20Sopenharmony_ci 17388c2ecf20Sopenharmony_ci if (ctx_single_port_submission(prev)) 17398c2ecf20Sopenharmony_ci return false; 17408c2ecf20Sopenharmony_ci 17418c2ecf20Sopenharmony_ci return true; 17428c2ecf20Sopenharmony_ci} 17438c2ecf20Sopenharmony_ci 17448c2ecf20Sopenharmony_cistatic unsigned long i915_request_flags(const struct i915_request *rq) 17458c2ecf20Sopenharmony_ci{ 17468c2ecf20Sopenharmony_ci return READ_ONCE(rq->fence.flags); 17478c2ecf20Sopenharmony_ci} 17488c2ecf20Sopenharmony_ci 17498c2ecf20Sopenharmony_cistatic bool can_merge_rq(const struct i915_request *prev, 17508c2ecf20Sopenharmony_ci const struct i915_request *next) 17518c2ecf20Sopenharmony_ci{ 17528c2ecf20Sopenharmony_ci GEM_BUG_ON(prev == next); 17538c2ecf20Sopenharmony_ci GEM_BUG_ON(!assert_priority_queue(prev, next)); 17548c2ecf20Sopenharmony_ci 17558c2ecf20Sopenharmony_ci /* 17568c2ecf20Sopenharmony_ci * We do not submit known completed requests. Therefore if the next 17578c2ecf20Sopenharmony_ci * request is already completed, we can pretend to merge it in 17588c2ecf20Sopenharmony_ci * with the previous context (and we will skip updating the ELSP 17598c2ecf20Sopenharmony_ci * and tracking). Thus hopefully keeping the ELSP full with active 17608c2ecf20Sopenharmony_ci * contexts, despite the best efforts of preempt-to-busy to confuse 17618c2ecf20Sopenharmony_ci * us. 17628c2ecf20Sopenharmony_ci */ 17638c2ecf20Sopenharmony_ci if (i915_request_completed(next)) 17648c2ecf20Sopenharmony_ci return true; 17658c2ecf20Sopenharmony_ci 17668c2ecf20Sopenharmony_ci if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) & 17678c2ecf20Sopenharmony_ci (BIT(I915_FENCE_FLAG_NOPREEMPT) | 17688c2ecf20Sopenharmony_ci BIT(I915_FENCE_FLAG_SENTINEL)))) 17698c2ecf20Sopenharmony_ci return false; 17708c2ecf20Sopenharmony_ci 17718c2ecf20Sopenharmony_ci if (!can_merge_ctx(prev->context, next->context)) 17728c2ecf20Sopenharmony_ci return false; 17738c2ecf20Sopenharmony_ci 17748c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); 17758c2ecf20Sopenharmony_ci return true; 17768c2ecf20Sopenharmony_ci} 17778c2ecf20Sopenharmony_ci 17788c2ecf20Sopenharmony_cistatic void virtual_update_register_offsets(u32 *regs, 17798c2ecf20Sopenharmony_ci struct intel_engine_cs *engine) 17808c2ecf20Sopenharmony_ci{ 17818c2ecf20Sopenharmony_ci set_offsets(regs, reg_offsets(engine), engine, false); 17828c2ecf20Sopenharmony_ci} 17838c2ecf20Sopenharmony_ci 17848c2ecf20Sopenharmony_cistatic bool virtual_matches(const struct virtual_engine *ve, 17858c2ecf20Sopenharmony_ci const struct i915_request *rq, 17868c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine) 17878c2ecf20Sopenharmony_ci{ 17888c2ecf20Sopenharmony_ci const struct intel_engine_cs *inflight; 17898c2ecf20Sopenharmony_ci 17908c2ecf20Sopenharmony_ci if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ 17918c2ecf20Sopenharmony_ci return false; 17928c2ecf20Sopenharmony_ci 17938c2ecf20Sopenharmony_ci /* 17948c2ecf20Sopenharmony_ci * We track when the HW has completed saving the context image 17958c2ecf20Sopenharmony_ci * (i.e. when we have seen the final CS event switching out of 17968c2ecf20Sopenharmony_ci * the context) and must not overwrite the context image before 17978c2ecf20Sopenharmony_ci * then. This restricts us to only using the active engine 17988c2ecf20Sopenharmony_ci * while the previous virtualized request is inflight (so 17998c2ecf20Sopenharmony_ci * we reuse the register offsets). This is a very small 18008c2ecf20Sopenharmony_ci * hystersis on the greedy seelction algorithm. 18018c2ecf20Sopenharmony_ci */ 18028c2ecf20Sopenharmony_ci inflight = intel_context_inflight(&ve->context); 18038c2ecf20Sopenharmony_ci if (inflight && inflight != engine) 18048c2ecf20Sopenharmony_ci return false; 18058c2ecf20Sopenharmony_ci 18068c2ecf20Sopenharmony_ci return true; 18078c2ecf20Sopenharmony_ci} 18088c2ecf20Sopenharmony_ci 18098c2ecf20Sopenharmony_cistatic void virtual_xfer_context(struct virtual_engine *ve, 18108c2ecf20Sopenharmony_ci struct intel_engine_cs *engine) 18118c2ecf20Sopenharmony_ci{ 18128c2ecf20Sopenharmony_ci unsigned int n; 18138c2ecf20Sopenharmony_ci 18148c2ecf20Sopenharmony_ci if (likely(engine == ve->siblings[0])) 18158c2ecf20Sopenharmony_ci return; 18168c2ecf20Sopenharmony_ci 18178c2ecf20Sopenharmony_ci GEM_BUG_ON(READ_ONCE(ve->context.inflight)); 18188c2ecf20Sopenharmony_ci if (!intel_engine_has_relative_mmio(engine)) 18198c2ecf20Sopenharmony_ci virtual_update_register_offsets(ve->context.lrc_reg_state, 18208c2ecf20Sopenharmony_ci engine); 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_ci /* 18238c2ecf20Sopenharmony_ci * Move the bound engine to the top of the list for 18248c2ecf20Sopenharmony_ci * future execution. We then kick this tasklet first 18258c2ecf20Sopenharmony_ci * before checking others, so that we preferentially 18268c2ecf20Sopenharmony_ci * reuse this set of bound registers. 18278c2ecf20Sopenharmony_ci */ 18288c2ecf20Sopenharmony_ci for (n = 1; n < ve->num_siblings; n++) { 18298c2ecf20Sopenharmony_ci if (ve->siblings[n] == engine) { 18308c2ecf20Sopenharmony_ci swap(ve->siblings[n], ve->siblings[0]); 18318c2ecf20Sopenharmony_ci break; 18328c2ecf20Sopenharmony_ci } 18338c2ecf20Sopenharmony_ci } 18348c2ecf20Sopenharmony_ci} 18358c2ecf20Sopenharmony_ci 18368c2ecf20Sopenharmony_ci#define for_each_waiter(p__, rq__) \ 18378c2ecf20Sopenharmony_ci list_for_each_entry_lockless(p__, \ 18388c2ecf20Sopenharmony_ci &(rq__)->sched.waiters_list, \ 18398c2ecf20Sopenharmony_ci wait_link) 18408c2ecf20Sopenharmony_ci 18418c2ecf20Sopenharmony_ci#define for_each_signaler(p__, rq__) \ 18428c2ecf20Sopenharmony_ci list_for_each_entry_rcu(p__, \ 18438c2ecf20Sopenharmony_ci &(rq__)->sched.signalers_list, \ 18448c2ecf20Sopenharmony_ci signal_link) 18458c2ecf20Sopenharmony_ci 18468c2ecf20Sopenharmony_cistatic void defer_request(struct i915_request *rq, struct list_head * const pl) 18478c2ecf20Sopenharmony_ci{ 18488c2ecf20Sopenharmony_ci LIST_HEAD(list); 18498c2ecf20Sopenharmony_ci 18508c2ecf20Sopenharmony_ci /* 18518c2ecf20Sopenharmony_ci * We want to move the interrupted request to the back of 18528c2ecf20Sopenharmony_ci * the round-robin list (i.e. its priority level), but 18538c2ecf20Sopenharmony_ci * in doing so, we must then move all requests that were in 18548c2ecf20Sopenharmony_ci * flight and were waiting for the interrupted request to 18558c2ecf20Sopenharmony_ci * be run after it again. 18568c2ecf20Sopenharmony_ci */ 18578c2ecf20Sopenharmony_ci do { 18588c2ecf20Sopenharmony_ci struct i915_dependency *p; 18598c2ecf20Sopenharmony_ci 18608c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_is_active(rq)); 18618c2ecf20Sopenharmony_ci list_move_tail(&rq->sched.link, pl); 18628c2ecf20Sopenharmony_ci 18638c2ecf20Sopenharmony_ci for_each_waiter(p, rq) { 18648c2ecf20Sopenharmony_ci struct i915_request *w = 18658c2ecf20Sopenharmony_ci container_of(p->waiter, typeof(*w), sched); 18668c2ecf20Sopenharmony_ci 18678c2ecf20Sopenharmony_ci if (p->flags & I915_DEPENDENCY_WEAK) 18688c2ecf20Sopenharmony_ci continue; 18698c2ecf20Sopenharmony_ci 18708c2ecf20Sopenharmony_ci /* Leave semaphores spinning on the other engines */ 18718c2ecf20Sopenharmony_ci if (w->engine != rq->engine) 18728c2ecf20Sopenharmony_ci continue; 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ci /* No waiter should start before its signaler */ 18758c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && 18768c2ecf20Sopenharmony_ci i915_request_started(w) && 18778c2ecf20Sopenharmony_ci !i915_request_completed(rq)); 18788c2ecf20Sopenharmony_ci 18798c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_is_active(w)); 18808c2ecf20Sopenharmony_ci if (!i915_request_is_ready(w)) 18818c2ecf20Sopenharmony_ci continue; 18828c2ecf20Sopenharmony_ci 18838c2ecf20Sopenharmony_ci if (rq_prio(w) < rq_prio(rq)) 18848c2ecf20Sopenharmony_ci continue; 18858c2ecf20Sopenharmony_ci 18868c2ecf20Sopenharmony_ci GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); 18878c2ecf20Sopenharmony_ci list_move_tail(&w->sched.link, &list); 18888c2ecf20Sopenharmony_ci } 18898c2ecf20Sopenharmony_ci 18908c2ecf20Sopenharmony_ci rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 18918c2ecf20Sopenharmony_ci } while (rq); 18928c2ecf20Sopenharmony_ci} 18938c2ecf20Sopenharmony_ci 18948c2ecf20Sopenharmony_cistatic void defer_active(struct intel_engine_cs *engine) 18958c2ecf20Sopenharmony_ci{ 18968c2ecf20Sopenharmony_ci struct i915_request *rq; 18978c2ecf20Sopenharmony_ci 18988c2ecf20Sopenharmony_ci rq = __unwind_incomplete_requests(engine); 18998c2ecf20Sopenharmony_ci if (!rq) 19008c2ecf20Sopenharmony_ci return; 19018c2ecf20Sopenharmony_ci 19028c2ecf20Sopenharmony_ci defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); 19038c2ecf20Sopenharmony_ci} 19048c2ecf20Sopenharmony_ci 19058c2ecf20Sopenharmony_cistatic bool 19068c2ecf20Sopenharmony_cineed_timeslice(const struct intel_engine_cs *engine, 19078c2ecf20Sopenharmony_ci const struct i915_request *rq, 19088c2ecf20Sopenharmony_ci const struct rb_node *rb) 19098c2ecf20Sopenharmony_ci{ 19108c2ecf20Sopenharmony_ci int hint; 19118c2ecf20Sopenharmony_ci 19128c2ecf20Sopenharmony_ci if (!intel_engine_has_timeslices(engine)) 19138c2ecf20Sopenharmony_ci return false; 19148c2ecf20Sopenharmony_ci 19158c2ecf20Sopenharmony_ci hint = engine->execlists.queue_priority_hint; 19168c2ecf20Sopenharmony_ci 19178c2ecf20Sopenharmony_ci if (rb) { 19188c2ecf20Sopenharmony_ci const struct virtual_engine *ve = 19198c2ecf20Sopenharmony_ci rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 19208c2ecf20Sopenharmony_ci const struct intel_engine_cs *inflight = 19218c2ecf20Sopenharmony_ci intel_context_inflight(&ve->context); 19228c2ecf20Sopenharmony_ci 19238c2ecf20Sopenharmony_ci if (!inflight || inflight == engine) { 19248c2ecf20Sopenharmony_ci struct i915_request *next; 19258c2ecf20Sopenharmony_ci 19268c2ecf20Sopenharmony_ci rcu_read_lock(); 19278c2ecf20Sopenharmony_ci next = READ_ONCE(ve->request); 19288c2ecf20Sopenharmony_ci if (next) 19298c2ecf20Sopenharmony_ci hint = max(hint, rq_prio(next)); 19308c2ecf20Sopenharmony_ci rcu_read_unlock(); 19318c2ecf20Sopenharmony_ci } 19328c2ecf20Sopenharmony_ci } 19338c2ecf20Sopenharmony_ci 19348c2ecf20Sopenharmony_ci if (!list_is_last(&rq->sched.link, &engine->active.requests)) 19358c2ecf20Sopenharmony_ci hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); 19368c2ecf20Sopenharmony_ci 19378c2ecf20Sopenharmony_ci GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE); 19388c2ecf20Sopenharmony_ci return hint >= effective_prio(rq); 19398c2ecf20Sopenharmony_ci} 19408c2ecf20Sopenharmony_ci 19418c2ecf20Sopenharmony_cistatic bool 19428c2ecf20Sopenharmony_citimeslice_yield(const struct intel_engine_execlists *el, 19438c2ecf20Sopenharmony_ci const struct i915_request *rq) 19448c2ecf20Sopenharmony_ci{ 19458c2ecf20Sopenharmony_ci /* 19468c2ecf20Sopenharmony_ci * Once bitten, forever smitten! 19478c2ecf20Sopenharmony_ci * 19488c2ecf20Sopenharmony_ci * If the active context ever busy-waited on a semaphore, 19498c2ecf20Sopenharmony_ci * it will be treated as a hog until the end of its timeslice (i.e. 19508c2ecf20Sopenharmony_ci * until it is scheduled out and replaced by a new submission, 19518c2ecf20Sopenharmony_ci * possibly even its own lite-restore). The HW only sends an interrupt 19528c2ecf20Sopenharmony_ci * on the first miss, and we do know if that semaphore has been 19538c2ecf20Sopenharmony_ci * signaled, or even if it is now stuck on another semaphore. Play 19548c2ecf20Sopenharmony_ci * safe, yield if it might be stuck -- it will be given a fresh 19558c2ecf20Sopenharmony_ci * timeslice in the near future. 19568c2ecf20Sopenharmony_ci */ 19578c2ecf20Sopenharmony_ci return rq->context->lrc.ccid == READ_ONCE(el->yield); 19588c2ecf20Sopenharmony_ci} 19598c2ecf20Sopenharmony_ci 19608c2ecf20Sopenharmony_cistatic bool 19618c2ecf20Sopenharmony_citimeslice_expired(const struct intel_engine_execlists *el, 19628c2ecf20Sopenharmony_ci const struct i915_request *rq) 19638c2ecf20Sopenharmony_ci{ 19648c2ecf20Sopenharmony_ci return timer_expired(&el->timer) || timeslice_yield(el, rq); 19658c2ecf20Sopenharmony_ci} 19668c2ecf20Sopenharmony_ci 19678c2ecf20Sopenharmony_cistatic int 19688c2ecf20Sopenharmony_ciswitch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) 19698c2ecf20Sopenharmony_ci{ 19708c2ecf20Sopenharmony_ci if (list_is_last(&rq->sched.link, &engine->active.requests)) 19718c2ecf20Sopenharmony_ci return engine->execlists.queue_priority_hint; 19728c2ecf20Sopenharmony_ci 19738c2ecf20Sopenharmony_ci return rq_prio(list_next_entry(rq, sched.link)); 19748c2ecf20Sopenharmony_ci} 19758c2ecf20Sopenharmony_ci 19768c2ecf20Sopenharmony_cistatic inline unsigned long 19778c2ecf20Sopenharmony_citimeslice(const struct intel_engine_cs *engine) 19788c2ecf20Sopenharmony_ci{ 19798c2ecf20Sopenharmony_ci return READ_ONCE(engine->props.timeslice_duration_ms); 19808c2ecf20Sopenharmony_ci} 19818c2ecf20Sopenharmony_ci 19828c2ecf20Sopenharmony_cistatic unsigned long active_timeslice(const struct intel_engine_cs *engine) 19838c2ecf20Sopenharmony_ci{ 19848c2ecf20Sopenharmony_ci const struct intel_engine_execlists *execlists = &engine->execlists; 19858c2ecf20Sopenharmony_ci const struct i915_request *rq = *execlists->active; 19868c2ecf20Sopenharmony_ci 19878c2ecf20Sopenharmony_ci if (!rq || i915_request_completed(rq)) 19888c2ecf20Sopenharmony_ci return 0; 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq)) 19918c2ecf20Sopenharmony_ci return 0; 19928c2ecf20Sopenharmony_ci 19938c2ecf20Sopenharmony_ci return timeslice(engine); 19948c2ecf20Sopenharmony_ci} 19958c2ecf20Sopenharmony_ci 19968c2ecf20Sopenharmony_cistatic void set_timeslice(struct intel_engine_cs *engine) 19978c2ecf20Sopenharmony_ci{ 19988c2ecf20Sopenharmony_ci unsigned long duration; 19998c2ecf20Sopenharmony_ci 20008c2ecf20Sopenharmony_ci if (!intel_engine_has_timeslices(engine)) 20018c2ecf20Sopenharmony_ci return; 20028c2ecf20Sopenharmony_ci 20038c2ecf20Sopenharmony_ci duration = active_timeslice(engine); 20048c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration); 20058c2ecf20Sopenharmony_ci 20068c2ecf20Sopenharmony_ci set_timer_ms(&engine->execlists.timer, duration); 20078c2ecf20Sopenharmony_ci} 20088c2ecf20Sopenharmony_ci 20098c2ecf20Sopenharmony_cistatic void start_timeslice(struct intel_engine_cs *engine, int prio) 20108c2ecf20Sopenharmony_ci{ 20118c2ecf20Sopenharmony_ci struct intel_engine_execlists *execlists = &engine->execlists; 20128c2ecf20Sopenharmony_ci unsigned long duration; 20138c2ecf20Sopenharmony_ci 20148c2ecf20Sopenharmony_ci if (!intel_engine_has_timeslices(engine)) 20158c2ecf20Sopenharmony_ci return; 20168c2ecf20Sopenharmony_ci 20178c2ecf20Sopenharmony_ci WRITE_ONCE(execlists->switch_priority_hint, prio); 20188c2ecf20Sopenharmony_ci if (prio == INT_MIN) 20198c2ecf20Sopenharmony_ci return; 20208c2ecf20Sopenharmony_ci 20218c2ecf20Sopenharmony_ci if (timer_pending(&execlists->timer)) 20228c2ecf20Sopenharmony_ci return; 20238c2ecf20Sopenharmony_ci 20248c2ecf20Sopenharmony_ci duration = timeslice(engine); 20258c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 20268c2ecf20Sopenharmony_ci "start timeslicing, prio:%d, interval:%lu", 20278c2ecf20Sopenharmony_ci prio, duration); 20288c2ecf20Sopenharmony_ci 20298c2ecf20Sopenharmony_ci set_timer_ms(&execlists->timer, duration); 20308c2ecf20Sopenharmony_ci} 20318c2ecf20Sopenharmony_ci 20328c2ecf20Sopenharmony_cistatic void record_preemption(struct intel_engine_execlists *execlists) 20338c2ecf20Sopenharmony_ci{ 20348c2ecf20Sopenharmony_ci (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); 20358c2ecf20Sopenharmony_ci} 20368c2ecf20Sopenharmony_ci 20378c2ecf20Sopenharmony_cistatic unsigned long active_preempt_timeout(struct intel_engine_cs *engine, 20388c2ecf20Sopenharmony_ci const struct i915_request *rq) 20398c2ecf20Sopenharmony_ci{ 20408c2ecf20Sopenharmony_ci if (!rq) 20418c2ecf20Sopenharmony_ci return 0; 20428c2ecf20Sopenharmony_ci 20438c2ecf20Sopenharmony_ci /* Force a fast reset for terminated contexts (ignoring sysfs!) */ 20448c2ecf20Sopenharmony_ci if (unlikely(intel_context_is_banned(rq->context))) 20458c2ecf20Sopenharmony_ci return 1; 20468c2ecf20Sopenharmony_ci 20478c2ecf20Sopenharmony_ci return READ_ONCE(engine->props.preempt_timeout_ms); 20488c2ecf20Sopenharmony_ci} 20498c2ecf20Sopenharmony_ci 20508c2ecf20Sopenharmony_cistatic void set_preempt_timeout(struct intel_engine_cs *engine, 20518c2ecf20Sopenharmony_ci const struct i915_request *rq) 20528c2ecf20Sopenharmony_ci{ 20538c2ecf20Sopenharmony_ci if (!intel_engine_has_preempt_reset(engine)) 20548c2ecf20Sopenharmony_ci return; 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_ci set_timer_ms(&engine->execlists.preempt, 20578c2ecf20Sopenharmony_ci active_preempt_timeout(engine, rq)); 20588c2ecf20Sopenharmony_ci} 20598c2ecf20Sopenharmony_ci 20608c2ecf20Sopenharmony_cistatic inline void clear_ports(struct i915_request **ports, int count) 20618c2ecf20Sopenharmony_ci{ 20628c2ecf20Sopenharmony_ci memset_p((void **)ports, NULL, count); 20638c2ecf20Sopenharmony_ci} 20648c2ecf20Sopenharmony_ci 20658c2ecf20Sopenharmony_cistatic inline void 20668c2ecf20Sopenharmony_cicopy_ports(struct i915_request **dst, struct i915_request **src, int count) 20678c2ecf20Sopenharmony_ci{ 20688c2ecf20Sopenharmony_ci /* A memcpy_p() would be very useful here! */ 20698c2ecf20Sopenharmony_ci while (count--) 20708c2ecf20Sopenharmony_ci WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ 20718c2ecf20Sopenharmony_ci} 20728c2ecf20Sopenharmony_ci 20738c2ecf20Sopenharmony_cistatic void execlists_dequeue(struct intel_engine_cs *engine) 20748c2ecf20Sopenharmony_ci{ 20758c2ecf20Sopenharmony_ci struct intel_engine_execlists * const execlists = &engine->execlists; 20768c2ecf20Sopenharmony_ci struct i915_request **port = execlists->pending; 20778c2ecf20Sopenharmony_ci struct i915_request ** const last_port = port + execlists->port_mask; 20788c2ecf20Sopenharmony_ci struct i915_request * const *active; 20798c2ecf20Sopenharmony_ci struct i915_request *last; 20808c2ecf20Sopenharmony_ci struct rb_node *rb; 20818c2ecf20Sopenharmony_ci bool submit = false; 20828c2ecf20Sopenharmony_ci 20838c2ecf20Sopenharmony_ci /* 20848c2ecf20Sopenharmony_ci * Hardware submission is through 2 ports. Conceptually each port 20858c2ecf20Sopenharmony_ci * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is 20868c2ecf20Sopenharmony_ci * static for a context, and unique to each, so we only execute 20878c2ecf20Sopenharmony_ci * requests belonging to a single context from each ring. RING_HEAD 20888c2ecf20Sopenharmony_ci * is maintained by the CS in the context image, it marks the place 20898c2ecf20Sopenharmony_ci * where it got up to last time, and through RING_TAIL we tell the CS 20908c2ecf20Sopenharmony_ci * where we want to execute up to this time. 20918c2ecf20Sopenharmony_ci * 20928c2ecf20Sopenharmony_ci * In this list the requests are in order of execution. Consecutive 20938c2ecf20Sopenharmony_ci * requests from the same context are adjacent in the ringbuffer. We 20948c2ecf20Sopenharmony_ci * can combine these requests into a single RING_TAIL update: 20958c2ecf20Sopenharmony_ci * 20968c2ecf20Sopenharmony_ci * RING_HEAD...req1...req2 20978c2ecf20Sopenharmony_ci * ^- RING_TAIL 20988c2ecf20Sopenharmony_ci * since to execute req2 the CS must first execute req1. 20998c2ecf20Sopenharmony_ci * 21008c2ecf20Sopenharmony_ci * Our goal then is to point each port to the end of a consecutive 21018c2ecf20Sopenharmony_ci * sequence of requests as being the most optimal (fewest wake ups 21028c2ecf20Sopenharmony_ci * and context switches) submission. 21038c2ecf20Sopenharmony_ci */ 21048c2ecf20Sopenharmony_ci 21058c2ecf20Sopenharmony_ci for (rb = rb_first_cached(&execlists->virtual); rb; ) { 21068c2ecf20Sopenharmony_ci struct virtual_engine *ve = 21078c2ecf20Sopenharmony_ci rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 21088c2ecf20Sopenharmony_ci struct i915_request *rq = READ_ONCE(ve->request); 21098c2ecf20Sopenharmony_ci 21108c2ecf20Sopenharmony_ci if (!rq) { /* lazily cleanup after another engine handled rq */ 21118c2ecf20Sopenharmony_ci rb_erase_cached(rb, &execlists->virtual); 21128c2ecf20Sopenharmony_ci RB_CLEAR_NODE(rb); 21138c2ecf20Sopenharmony_ci rb = rb_first_cached(&execlists->virtual); 21148c2ecf20Sopenharmony_ci continue; 21158c2ecf20Sopenharmony_ci } 21168c2ecf20Sopenharmony_ci 21178c2ecf20Sopenharmony_ci if (!virtual_matches(ve, rq, engine)) { 21188c2ecf20Sopenharmony_ci rb = rb_next(rb); 21198c2ecf20Sopenharmony_ci continue; 21208c2ecf20Sopenharmony_ci } 21218c2ecf20Sopenharmony_ci 21228c2ecf20Sopenharmony_ci break; 21238c2ecf20Sopenharmony_ci } 21248c2ecf20Sopenharmony_ci 21258c2ecf20Sopenharmony_ci /* 21268c2ecf20Sopenharmony_ci * If the queue is higher priority than the last 21278c2ecf20Sopenharmony_ci * request in the currently active context, submit afresh. 21288c2ecf20Sopenharmony_ci * We will resubmit again afterwards in case we need to split 21298c2ecf20Sopenharmony_ci * the active context to interject the preemption request, 21308c2ecf20Sopenharmony_ci * i.e. we will retrigger preemption following the ack in case 21318c2ecf20Sopenharmony_ci * of trouble. 21328c2ecf20Sopenharmony_ci */ 21338c2ecf20Sopenharmony_ci active = READ_ONCE(execlists->active); 21348c2ecf20Sopenharmony_ci 21358c2ecf20Sopenharmony_ci /* 21368c2ecf20Sopenharmony_ci * In theory we can skip over completed contexts that have not 21378c2ecf20Sopenharmony_ci * yet been processed by events (as those events are in flight): 21388c2ecf20Sopenharmony_ci * 21398c2ecf20Sopenharmony_ci * while ((last = *active) && i915_request_completed(last)) 21408c2ecf20Sopenharmony_ci * active++; 21418c2ecf20Sopenharmony_ci * 21428c2ecf20Sopenharmony_ci * However, the GPU cannot handle this as it will ultimately 21438c2ecf20Sopenharmony_ci * find itself trying to jump back into a context it has just 21448c2ecf20Sopenharmony_ci * completed and barf. 21458c2ecf20Sopenharmony_ci */ 21468c2ecf20Sopenharmony_ci 21478c2ecf20Sopenharmony_ci if ((last = *active)) { 21488c2ecf20Sopenharmony_ci if (need_preempt(engine, last, rb)) { 21498c2ecf20Sopenharmony_ci if (i915_request_completed(last)) { 21508c2ecf20Sopenharmony_ci tasklet_hi_schedule(&execlists->tasklet); 21518c2ecf20Sopenharmony_ci return; 21528c2ecf20Sopenharmony_ci } 21538c2ecf20Sopenharmony_ci 21548c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 21558c2ecf20Sopenharmony_ci "preempting last=%llx:%lld, prio=%d, hint=%d\n", 21568c2ecf20Sopenharmony_ci last->fence.context, 21578c2ecf20Sopenharmony_ci last->fence.seqno, 21588c2ecf20Sopenharmony_ci last->sched.attr.priority, 21598c2ecf20Sopenharmony_ci execlists->queue_priority_hint); 21608c2ecf20Sopenharmony_ci record_preemption(execlists); 21618c2ecf20Sopenharmony_ci 21628c2ecf20Sopenharmony_ci /* 21638c2ecf20Sopenharmony_ci * Don't let the RING_HEAD advance past the breadcrumb 21648c2ecf20Sopenharmony_ci * as we unwind (and until we resubmit) so that we do 21658c2ecf20Sopenharmony_ci * not accidentally tell it to go backwards. 21668c2ecf20Sopenharmony_ci */ 21678c2ecf20Sopenharmony_ci ring_set_paused(engine, 1); 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci /* 21708c2ecf20Sopenharmony_ci * Note that we have not stopped the GPU at this point, 21718c2ecf20Sopenharmony_ci * so we are unwinding the incomplete requests as they 21728c2ecf20Sopenharmony_ci * remain inflight and so by the time we do complete 21738c2ecf20Sopenharmony_ci * the preemption, some of the unwound requests may 21748c2ecf20Sopenharmony_ci * complete! 21758c2ecf20Sopenharmony_ci */ 21768c2ecf20Sopenharmony_ci __unwind_incomplete_requests(engine); 21778c2ecf20Sopenharmony_ci 21788c2ecf20Sopenharmony_ci last = NULL; 21798c2ecf20Sopenharmony_ci } else if (need_timeslice(engine, last, rb) && 21808c2ecf20Sopenharmony_ci timeslice_expired(execlists, last)) { 21818c2ecf20Sopenharmony_ci if (i915_request_completed(last)) { 21828c2ecf20Sopenharmony_ci tasklet_hi_schedule(&execlists->tasklet); 21838c2ecf20Sopenharmony_ci return; 21848c2ecf20Sopenharmony_ci } 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 21878c2ecf20Sopenharmony_ci "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", 21888c2ecf20Sopenharmony_ci last->fence.context, 21898c2ecf20Sopenharmony_ci last->fence.seqno, 21908c2ecf20Sopenharmony_ci last->sched.attr.priority, 21918c2ecf20Sopenharmony_ci execlists->queue_priority_hint, 21928c2ecf20Sopenharmony_ci yesno(timeslice_yield(execlists, last))); 21938c2ecf20Sopenharmony_ci 21948c2ecf20Sopenharmony_ci ring_set_paused(engine, 1); 21958c2ecf20Sopenharmony_ci defer_active(engine); 21968c2ecf20Sopenharmony_ci 21978c2ecf20Sopenharmony_ci /* 21988c2ecf20Sopenharmony_ci * Unlike for preemption, if we rewind and continue 21998c2ecf20Sopenharmony_ci * executing the same context as previously active, 22008c2ecf20Sopenharmony_ci * the order of execution will remain the same and 22018c2ecf20Sopenharmony_ci * the tail will only advance. We do not need to 22028c2ecf20Sopenharmony_ci * force a full context restore, as a lite-restore 22038c2ecf20Sopenharmony_ci * is sufficient to resample the monotonic TAIL. 22048c2ecf20Sopenharmony_ci * 22058c2ecf20Sopenharmony_ci * If we switch to any other context, similarly we 22068c2ecf20Sopenharmony_ci * will not rewind TAIL of current context, and 22078c2ecf20Sopenharmony_ci * normal save/restore will preserve state and allow 22088c2ecf20Sopenharmony_ci * us to later continue executing the same request. 22098c2ecf20Sopenharmony_ci */ 22108c2ecf20Sopenharmony_ci last = NULL; 22118c2ecf20Sopenharmony_ci } else { 22128c2ecf20Sopenharmony_ci /* 22138c2ecf20Sopenharmony_ci * Otherwise if we already have a request pending 22148c2ecf20Sopenharmony_ci * for execution after the current one, we can 22158c2ecf20Sopenharmony_ci * just wait until the next CS event before 22168c2ecf20Sopenharmony_ci * queuing more. In either case we will force a 22178c2ecf20Sopenharmony_ci * lite-restore preemption event, but if we wait 22188c2ecf20Sopenharmony_ci * we hopefully coalesce several updates into a single 22198c2ecf20Sopenharmony_ci * submission. 22208c2ecf20Sopenharmony_ci */ 22218c2ecf20Sopenharmony_ci if (!list_is_last(&last->sched.link, 22228c2ecf20Sopenharmony_ci &engine->active.requests)) { 22238c2ecf20Sopenharmony_ci /* 22248c2ecf20Sopenharmony_ci * Even if ELSP[1] is occupied and not worthy 22258c2ecf20Sopenharmony_ci * of timeslices, our queue might be. 22268c2ecf20Sopenharmony_ci */ 22278c2ecf20Sopenharmony_ci start_timeslice(engine, queue_prio(execlists)); 22288c2ecf20Sopenharmony_ci return; 22298c2ecf20Sopenharmony_ci } 22308c2ecf20Sopenharmony_ci } 22318c2ecf20Sopenharmony_ci } 22328c2ecf20Sopenharmony_ci 22338c2ecf20Sopenharmony_ci while (rb) { /* XXX virtual is always taking precedence */ 22348c2ecf20Sopenharmony_ci struct virtual_engine *ve = 22358c2ecf20Sopenharmony_ci rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 22368c2ecf20Sopenharmony_ci struct i915_request *rq; 22378c2ecf20Sopenharmony_ci 22388c2ecf20Sopenharmony_ci spin_lock(&ve->base.active.lock); 22398c2ecf20Sopenharmony_ci 22408c2ecf20Sopenharmony_ci rq = ve->request; 22418c2ecf20Sopenharmony_ci if (unlikely(!rq)) { /* lost the race to a sibling */ 22428c2ecf20Sopenharmony_ci spin_unlock(&ve->base.active.lock); 22438c2ecf20Sopenharmony_ci rb_erase_cached(rb, &execlists->virtual); 22448c2ecf20Sopenharmony_ci RB_CLEAR_NODE(rb); 22458c2ecf20Sopenharmony_ci rb = rb_first_cached(&execlists->virtual); 22468c2ecf20Sopenharmony_ci continue; 22478c2ecf20Sopenharmony_ci } 22488c2ecf20Sopenharmony_ci 22498c2ecf20Sopenharmony_ci GEM_BUG_ON(rq != ve->request); 22508c2ecf20Sopenharmony_ci GEM_BUG_ON(rq->engine != &ve->base); 22518c2ecf20Sopenharmony_ci GEM_BUG_ON(rq->context != &ve->context); 22528c2ecf20Sopenharmony_ci 22538c2ecf20Sopenharmony_ci if (rq_prio(rq) >= queue_prio(execlists)) { 22548c2ecf20Sopenharmony_ci if (!virtual_matches(ve, rq, engine)) { 22558c2ecf20Sopenharmony_ci spin_unlock(&ve->base.active.lock); 22568c2ecf20Sopenharmony_ci rb = rb_next(rb); 22578c2ecf20Sopenharmony_ci continue; 22588c2ecf20Sopenharmony_ci } 22598c2ecf20Sopenharmony_ci 22608c2ecf20Sopenharmony_ci if (last && !can_merge_rq(last, rq)) { 22618c2ecf20Sopenharmony_ci spin_unlock(&ve->base.active.lock); 22628c2ecf20Sopenharmony_ci start_timeslice(engine, rq_prio(rq)); 22638c2ecf20Sopenharmony_ci return; /* leave this for another sibling */ 22648c2ecf20Sopenharmony_ci } 22658c2ecf20Sopenharmony_ci 22668c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 22678c2ecf20Sopenharmony_ci "virtual rq=%llx:%lld%s, new engine? %s\n", 22688c2ecf20Sopenharmony_ci rq->fence.context, 22698c2ecf20Sopenharmony_ci rq->fence.seqno, 22708c2ecf20Sopenharmony_ci i915_request_completed(rq) ? "!" : 22718c2ecf20Sopenharmony_ci i915_request_started(rq) ? "*" : 22728c2ecf20Sopenharmony_ci "", 22738c2ecf20Sopenharmony_ci yesno(engine != ve->siblings[0])); 22748c2ecf20Sopenharmony_ci 22758c2ecf20Sopenharmony_ci WRITE_ONCE(ve->request, NULL); 22768c2ecf20Sopenharmony_ci WRITE_ONCE(ve->base.execlists.queue_priority_hint, 22778c2ecf20Sopenharmony_ci INT_MIN); 22788c2ecf20Sopenharmony_ci rb_erase_cached(rb, &execlists->virtual); 22798c2ecf20Sopenharmony_ci RB_CLEAR_NODE(rb); 22808c2ecf20Sopenharmony_ci 22818c2ecf20Sopenharmony_ci GEM_BUG_ON(!(rq->execution_mask & engine->mask)); 22828c2ecf20Sopenharmony_ci WRITE_ONCE(rq->engine, engine); 22838c2ecf20Sopenharmony_ci 22848c2ecf20Sopenharmony_ci if (__i915_request_submit(rq)) { 22858c2ecf20Sopenharmony_ci /* 22868c2ecf20Sopenharmony_ci * Only after we confirm that we will submit 22878c2ecf20Sopenharmony_ci * this request (i.e. it has not already 22888c2ecf20Sopenharmony_ci * completed), do we want to update the context. 22898c2ecf20Sopenharmony_ci * 22908c2ecf20Sopenharmony_ci * This serves two purposes. It avoids 22918c2ecf20Sopenharmony_ci * unnecessary work if we are resubmitting an 22928c2ecf20Sopenharmony_ci * already completed request after timeslicing. 22938c2ecf20Sopenharmony_ci * But more importantly, it prevents us altering 22948c2ecf20Sopenharmony_ci * ve->siblings[] on an idle context, where 22958c2ecf20Sopenharmony_ci * we may be using ve->siblings[] in 22968c2ecf20Sopenharmony_ci * virtual_context_enter / virtual_context_exit. 22978c2ecf20Sopenharmony_ci */ 22988c2ecf20Sopenharmony_ci virtual_xfer_context(ve, engine); 22998c2ecf20Sopenharmony_ci GEM_BUG_ON(ve->siblings[0] != engine); 23008c2ecf20Sopenharmony_ci 23018c2ecf20Sopenharmony_ci submit = true; 23028c2ecf20Sopenharmony_ci last = rq; 23038c2ecf20Sopenharmony_ci } 23048c2ecf20Sopenharmony_ci i915_request_put(rq); 23058c2ecf20Sopenharmony_ci 23068c2ecf20Sopenharmony_ci /* 23078c2ecf20Sopenharmony_ci * Hmm, we have a bunch of virtual engine requests, 23088c2ecf20Sopenharmony_ci * but the first one was already completed (thanks 23098c2ecf20Sopenharmony_ci * preempt-to-busy!). Keep looking at the veng queue 23108c2ecf20Sopenharmony_ci * until we have no more relevant requests (i.e. 23118c2ecf20Sopenharmony_ci * the normal submit queue has higher priority). 23128c2ecf20Sopenharmony_ci */ 23138c2ecf20Sopenharmony_ci if (!submit) { 23148c2ecf20Sopenharmony_ci spin_unlock(&ve->base.active.lock); 23158c2ecf20Sopenharmony_ci rb = rb_first_cached(&execlists->virtual); 23168c2ecf20Sopenharmony_ci continue; 23178c2ecf20Sopenharmony_ci } 23188c2ecf20Sopenharmony_ci } 23198c2ecf20Sopenharmony_ci 23208c2ecf20Sopenharmony_ci spin_unlock(&ve->base.active.lock); 23218c2ecf20Sopenharmony_ci break; 23228c2ecf20Sopenharmony_ci } 23238c2ecf20Sopenharmony_ci 23248c2ecf20Sopenharmony_ci while ((rb = rb_first_cached(&execlists->queue))) { 23258c2ecf20Sopenharmony_ci struct i915_priolist *p = to_priolist(rb); 23268c2ecf20Sopenharmony_ci struct i915_request *rq, *rn; 23278c2ecf20Sopenharmony_ci int i; 23288c2ecf20Sopenharmony_ci 23298c2ecf20Sopenharmony_ci priolist_for_each_request_consume(rq, rn, p, i) { 23308c2ecf20Sopenharmony_ci bool merge = true; 23318c2ecf20Sopenharmony_ci 23328c2ecf20Sopenharmony_ci /* 23338c2ecf20Sopenharmony_ci * Can we combine this request with the current port? 23348c2ecf20Sopenharmony_ci * It has to be the same context/ringbuffer and not 23358c2ecf20Sopenharmony_ci * have any exceptions (e.g. GVT saying never to 23368c2ecf20Sopenharmony_ci * combine contexts). 23378c2ecf20Sopenharmony_ci * 23388c2ecf20Sopenharmony_ci * If we can combine the requests, we can execute both 23398c2ecf20Sopenharmony_ci * by updating the RING_TAIL to point to the end of the 23408c2ecf20Sopenharmony_ci * second request, and so we never need to tell the 23418c2ecf20Sopenharmony_ci * hardware about the first. 23428c2ecf20Sopenharmony_ci */ 23438c2ecf20Sopenharmony_ci if (last && !can_merge_rq(last, rq)) { 23448c2ecf20Sopenharmony_ci /* 23458c2ecf20Sopenharmony_ci * If we are on the second port and cannot 23468c2ecf20Sopenharmony_ci * combine this request with the last, then we 23478c2ecf20Sopenharmony_ci * are done. 23488c2ecf20Sopenharmony_ci */ 23498c2ecf20Sopenharmony_ci if (port == last_port) 23508c2ecf20Sopenharmony_ci goto done; 23518c2ecf20Sopenharmony_ci 23528c2ecf20Sopenharmony_ci /* 23538c2ecf20Sopenharmony_ci * We must not populate both ELSP[] with the 23548c2ecf20Sopenharmony_ci * same LRCA, i.e. we must submit 2 different 23558c2ecf20Sopenharmony_ci * contexts if we submit 2 ELSP. 23568c2ecf20Sopenharmony_ci */ 23578c2ecf20Sopenharmony_ci if (last->context == rq->context) 23588c2ecf20Sopenharmony_ci goto done; 23598c2ecf20Sopenharmony_ci 23608c2ecf20Sopenharmony_ci if (i915_request_has_sentinel(last)) 23618c2ecf20Sopenharmony_ci goto done; 23628c2ecf20Sopenharmony_ci 23638c2ecf20Sopenharmony_ci /* 23648c2ecf20Sopenharmony_ci * If GVT overrides us we only ever submit 23658c2ecf20Sopenharmony_ci * port[0], leaving port[1] empty. Note that we 23668c2ecf20Sopenharmony_ci * also have to be careful that we don't queue 23678c2ecf20Sopenharmony_ci * the same context (even though a different 23688c2ecf20Sopenharmony_ci * request) to the second port. 23698c2ecf20Sopenharmony_ci */ 23708c2ecf20Sopenharmony_ci if (ctx_single_port_submission(last->context) || 23718c2ecf20Sopenharmony_ci ctx_single_port_submission(rq->context)) 23728c2ecf20Sopenharmony_ci goto done; 23738c2ecf20Sopenharmony_ci 23748c2ecf20Sopenharmony_ci merge = false; 23758c2ecf20Sopenharmony_ci } 23768c2ecf20Sopenharmony_ci 23778c2ecf20Sopenharmony_ci if (__i915_request_submit(rq)) { 23788c2ecf20Sopenharmony_ci if (!merge) { 23798c2ecf20Sopenharmony_ci *port = execlists_schedule_in(last, port - execlists->pending); 23808c2ecf20Sopenharmony_ci port++; 23818c2ecf20Sopenharmony_ci last = NULL; 23828c2ecf20Sopenharmony_ci } 23838c2ecf20Sopenharmony_ci 23848c2ecf20Sopenharmony_ci GEM_BUG_ON(last && 23858c2ecf20Sopenharmony_ci !can_merge_ctx(last->context, 23868c2ecf20Sopenharmony_ci rq->context)); 23878c2ecf20Sopenharmony_ci GEM_BUG_ON(last && 23888c2ecf20Sopenharmony_ci i915_seqno_passed(last->fence.seqno, 23898c2ecf20Sopenharmony_ci rq->fence.seqno)); 23908c2ecf20Sopenharmony_ci 23918c2ecf20Sopenharmony_ci submit = true; 23928c2ecf20Sopenharmony_ci last = rq; 23938c2ecf20Sopenharmony_ci } 23948c2ecf20Sopenharmony_ci } 23958c2ecf20Sopenharmony_ci 23968c2ecf20Sopenharmony_ci rb_erase_cached(&p->node, &execlists->queue); 23978c2ecf20Sopenharmony_ci i915_priolist_free(p); 23988c2ecf20Sopenharmony_ci } 23998c2ecf20Sopenharmony_ci 24008c2ecf20Sopenharmony_cidone: 24018c2ecf20Sopenharmony_ci /* 24028c2ecf20Sopenharmony_ci * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. 24038c2ecf20Sopenharmony_ci * 24048c2ecf20Sopenharmony_ci * We choose the priority hint such that if we add a request of greater 24058c2ecf20Sopenharmony_ci * priority than this, we kick the submission tasklet to decide on 24068c2ecf20Sopenharmony_ci * the right order of submitting the requests to hardware. We must 24078c2ecf20Sopenharmony_ci * also be prepared to reorder requests as they are in-flight on the 24088c2ecf20Sopenharmony_ci * HW. We derive the priority hint then as the first "hole" in 24098c2ecf20Sopenharmony_ci * the HW submission ports and if there are no available slots, 24108c2ecf20Sopenharmony_ci * the priority of the lowest executing request, i.e. last. 24118c2ecf20Sopenharmony_ci * 24128c2ecf20Sopenharmony_ci * When we do receive a higher priority request ready to run from the 24138c2ecf20Sopenharmony_ci * user, see queue_request(), the priority hint is bumped to that 24148c2ecf20Sopenharmony_ci * request triggering preemption on the next dequeue (or subsequent 24158c2ecf20Sopenharmony_ci * interrupt for secondary ports). 24168c2ecf20Sopenharmony_ci */ 24178c2ecf20Sopenharmony_ci execlists->queue_priority_hint = queue_prio(execlists); 24188c2ecf20Sopenharmony_ci 24198c2ecf20Sopenharmony_ci if (submit) { 24208c2ecf20Sopenharmony_ci *port = execlists_schedule_in(last, port - execlists->pending); 24218c2ecf20Sopenharmony_ci execlists->switch_priority_hint = 24228c2ecf20Sopenharmony_ci switch_prio(engine, *execlists->pending); 24238c2ecf20Sopenharmony_ci 24248c2ecf20Sopenharmony_ci /* 24258c2ecf20Sopenharmony_ci * Skip if we ended up with exactly the same set of requests, 24268c2ecf20Sopenharmony_ci * e.g. trying to timeslice a pair of ordered contexts 24278c2ecf20Sopenharmony_ci */ 24288c2ecf20Sopenharmony_ci if (!memcmp(active, execlists->pending, 24298c2ecf20Sopenharmony_ci (port - execlists->pending + 1) * sizeof(*port))) { 24308c2ecf20Sopenharmony_ci do 24318c2ecf20Sopenharmony_ci execlists_schedule_out(fetch_and_zero(port)); 24328c2ecf20Sopenharmony_ci while (port-- != execlists->pending); 24338c2ecf20Sopenharmony_ci 24348c2ecf20Sopenharmony_ci goto skip_submit; 24358c2ecf20Sopenharmony_ci } 24368c2ecf20Sopenharmony_ci clear_ports(port + 1, last_port - port); 24378c2ecf20Sopenharmony_ci 24388c2ecf20Sopenharmony_ci WRITE_ONCE(execlists->yield, -1); 24398c2ecf20Sopenharmony_ci set_preempt_timeout(engine, *active); 24408c2ecf20Sopenharmony_ci execlists_submit_ports(engine); 24418c2ecf20Sopenharmony_ci } else { 24428c2ecf20Sopenharmony_ci start_timeslice(engine, execlists->queue_priority_hint); 24438c2ecf20Sopenharmony_ciskip_submit: 24448c2ecf20Sopenharmony_ci ring_set_paused(engine, 0); 24458c2ecf20Sopenharmony_ci } 24468c2ecf20Sopenharmony_ci} 24478c2ecf20Sopenharmony_ci 24488c2ecf20Sopenharmony_cistatic void 24498c2ecf20Sopenharmony_cicancel_port_requests(struct intel_engine_execlists * const execlists) 24508c2ecf20Sopenharmony_ci{ 24518c2ecf20Sopenharmony_ci struct i915_request * const *port; 24528c2ecf20Sopenharmony_ci 24538c2ecf20Sopenharmony_ci for (port = execlists->pending; *port; port++) 24548c2ecf20Sopenharmony_ci execlists_schedule_out(*port); 24558c2ecf20Sopenharmony_ci clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); 24568c2ecf20Sopenharmony_ci 24578c2ecf20Sopenharmony_ci /* Mark the end of active before we overwrite *active */ 24588c2ecf20Sopenharmony_ci for (port = xchg(&execlists->active, execlists->pending); *port; port++) 24598c2ecf20Sopenharmony_ci execlists_schedule_out(*port); 24608c2ecf20Sopenharmony_ci clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); 24618c2ecf20Sopenharmony_ci 24628c2ecf20Sopenharmony_ci smp_wmb(); /* complete the seqlock for execlists_active() */ 24638c2ecf20Sopenharmony_ci WRITE_ONCE(execlists->active, execlists->inflight); 24648c2ecf20Sopenharmony_ci} 24658c2ecf20Sopenharmony_ci 24668c2ecf20Sopenharmony_cistatic inline void 24678c2ecf20Sopenharmony_ciinvalidate_csb_entries(const u64 *first, const u64 *last) 24688c2ecf20Sopenharmony_ci{ 24698c2ecf20Sopenharmony_ci clflush((void *)first); 24708c2ecf20Sopenharmony_ci clflush((void *)last); 24718c2ecf20Sopenharmony_ci} 24728c2ecf20Sopenharmony_ci 24738c2ecf20Sopenharmony_ci/* 24748c2ecf20Sopenharmony_ci * Starting with Gen12, the status has a new format: 24758c2ecf20Sopenharmony_ci * 24768c2ecf20Sopenharmony_ci * bit 0: switched to new queue 24778c2ecf20Sopenharmony_ci * bit 1: reserved 24788c2ecf20Sopenharmony_ci * bit 2: semaphore wait mode (poll or signal), only valid when 24798c2ecf20Sopenharmony_ci * switch detail is set to "wait on semaphore" 24808c2ecf20Sopenharmony_ci * bits 3-5: engine class 24818c2ecf20Sopenharmony_ci * bits 6-11: engine instance 24828c2ecf20Sopenharmony_ci * bits 12-14: reserved 24838c2ecf20Sopenharmony_ci * bits 15-25: sw context id of the lrc the GT switched to 24848c2ecf20Sopenharmony_ci * bits 26-31: sw counter of the lrc the GT switched to 24858c2ecf20Sopenharmony_ci * bits 32-35: context switch detail 24868c2ecf20Sopenharmony_ci * - 0: ctx complete 24878c2ecf20Sopenharmony_ci * - 1: wait on sync flip 24888c2ecf20Sopenharmony_ci * - 2: wait on vblank 24898c2ecf20Sopenharmony_ci * - 3: wait on scanline 24908c2ecf20Sopenharmony_ci * - 4: wait on semaphore 24918c2ecf20Sopenharmony_ci * - 5: context preempted (not on SEMAPHORE_WAIT or 24928c2ecf20Sopenharmony_ci * WAIT_FOR_EVENT) 24938c2ecf20Sopenharmony_ci * bit 36: reserved 24948c2ecf20Sopenharmony_ci * bits 37-43: wait detail (for switch detail 1 to 4) 24958c2ecf20Sopenharmony_ci * bits 44-46: reserved 24968c2ecf20Sopenharmony_ci * bits 47-57: sw context id of the lrc the GT switched away from 24978c2ecf20Sopenharmony_ci * bits 58-63: sw counter of the lrc the GT switched away from 24988c2ecf20Sopenharmony_ci */ 24998c2ecf20Sopenharmony_cistatic inline bool gen12_csb_parse(const u64 *csb) 25008c2ecf20Sopenharmony_ci{ 25018c2ecf20Sopenharmony_ci bool ctx_away_valid; 25028c2ecf20Sopenharmony_ci bool new_queue; 25038c2ecf20Sopenharmony_ci u64 entry; 25048c2ecf20Sopenharmony_ci 25058c2ecf20Sopenharmony_ci /* HSD#22011248461 */ 25068c2ecf20Sopenharmony_ci entry = READ_ONCE(*csb); 25078c2ecf20Sopenharmony_ci if (unlikely(entry == -1)) { 25088c2ecf20Sopenharmony_ci preempt_disable(); 25098c2ecf20Sopenharmony_ci if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50)) 25108c2ecf20Sopenharmony_ci GEM_WARN_ON("50us CSB timeout"); 25118c2ecf20Sopenharmony_ci preempt_enable(); 25128c2ecf20Sopenharmony_ci } 25138c2ecf20Sopenharmony_ci WRITE_ONCE(*(u64 *)csb, -1); 25148c2ecf20Sopenharmony_ci 25158c2ecf20Sopenharmony_ci ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry)); 25168c2ecf20Sopenharmony_ci new_queue = 25178c2ecf20Sopenharmony_ci lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; 25188c2ecf20Sopenharmony_ci 25198c2ecf20Sopenharmony_ci /* 25208c2ecf20Sopenharmony_ci * The context switch detail is not guaranteed to be 5 when a preemption 25218c2ecf20Sopenharmony_ci * occurs, so we can't just check for that. The check below works for 25228c2ecf20Sopenharmony_ci * all the cases we care about, including preemptions of WAIT 25238c2ecf20Sopenharmony_ci * instructions and lite-restore. Preempt-to-idle via the CTRL register 25248c2ecf20Sopenharmony_ci * would require some extra handling, but we don't support that. 25258c2ecf20Sopenharmony_ci */ 25268c2ecf20Sopenharmony_ci if (!ctx_away_valid || new_queue) { 25278c2ecf20Sopenharmony_ci GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry))); 25288c2ecf20Sopenharmony_ci return true; 25298c2ecf20Sopenharmony_ci } 25308c2ecf20Sopenharmony_ci 25318c2ecf20Sopenharmony_ci /* 25328c2ecf20Sopenharmony_ci * switch detail = 5 is covered by the case above and we do not expect a 25338c2ecf20Sopenharmony_ci * context switch on an unsuccessful wait instruction since we always 25348c2ecf20Sopenharmony_ci * use polling mode. 25358c2ecf20Sopenharmony_ci */ 25368c2ecf20Sopenharmony_ci GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry))); 25378c2ecf20Sopenharmony_ci return false; 25388c2ecf20Sopenharmony_ci} 25398c2ecf20Sopenharmony_ci 25408c2ecf20Sopenharmony_cistatic inline bool gen8_csb_parse(const u64 *csb) 25418c2ecf20Sopenharmony_ci{ 25428c2ecf20Sopenharmony_ci return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); 25438c2ecf20Sopenharmony_ci} 25448c2ecf20Sopenharmony_ci 25458c2ecf20Sopenharmony_cistatic void process_csb(struct intel_engine_cs *engine) 25468c2ecf20Sopenharmony_ci{ 25478c2ecf20Sopenharmony_ci struct intel_engine_execlists * const execlists = &engine->execlists; 25488c2ecf20Sopenharmony_ci const u64 * const buf = execlists->csb_status; 25498c2ecf20Sopenharmony_ci const u8 num_entries = execlists->csb_size; 25508c2ecf20Sopenharmony_ci u8 head, tail; 25518c2ecf20Sopenharmony_ci 25528c2ecf20Sopenharmony_ci /* 25538c2ecf20Sopenharmony_ci * As we modify our execlists state tracking we require exclusive 25548c2ecf20Sopenharmony_ci * access. Either we are inside the tasklet, or the tasklet is disabled 25558c2ecf20Sopenharmony_ci * and we assume that is only inside the reset paths and so serialised. 25568c2ecf20Sopenharmony_ci */ 25578c2ecf20Sopenharmony_ci GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && 25588c2ecf20Sopenharmony_ci !reset_in_progress(execlists)); 25598c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); 25608c2ecf20Sopenharmony_ci 25618c2ecf20Sopenharmony_ci /* 25628c2ecf20Sopenharmony_ci * Note that csb_write, csb_status may be either in HWSP or mmio. 25638c2ecf20Sopenharmony_ci * When reading from the csb_write mmio register, we have to be 25648c2ecf20Sopenharmony_ci * careful to only use the GEN8_CSB_WRITE_PTR portion, which is 25658c2ecf20Sopenharmony_ci * the low 4bits. As it happens we know the next 4bits are always 25668c2ecf20Sopenharmony_ci * zero and so we can simply masked off the low u8 of the register 25678c2ecf20Sopenharmony_ci * and treat it identically to reading from the HWSP (without having 25688c2ecf20Sopenharmony_ci * to use explicit shifting and masking, and probably bifurcating 25698c2ecf20Sopenharmony_ci * the code to handle the legacy mmio read). 25708c2ecf20Sopenharmony_ci */ 25718c2ecf20Sopenharmony_ci head = execlists->csb_head; 25728c2ecf20Sopenharmony_ci tail = READ_ONCE(*execlists->csb_write); 25738c2ecf20Sopenharmony_ci if (unlikely(head == tail)) 25748c2ecf20Sopenharmony_ci return; 25758c2ecf20Sopenharmony_ci 25768c2ecf20Sopenharmony_ci /* 25778c2ecf20Sopenharmony_ci * We will consume all events from HW, or at least pretend to. 25788c2ecf20Sopenharmony_ci * 25798c2ecf20Sopenharmony_ci * The sequence of events from the HW is deterministic, and derived 25808c2ecf20Sopenharmony_ci * from our writes to the ELSP, with a smidgen of variability for 25818c2ecf20Sopenharmony_ci * the arrival of the asynchronous requests wrt to the inflight 25828c2ecf20Sopenharmony_ci * execution. If the HW sends an event that does not correspond with 25838c2ecf20Sopenharmony_ci * the one we are expecting, we have to abandon all hope as we lose 25848c2ecf20Sopenharmony_ci * all tracking of what the engine is actually executing. We will 25858c2ecf20Sopenharmony_ci * only detect we are out of sequence with the HW when we get an 25868c2ecf20Sopenharmony_ci * 'impossible' event because we have already drained our own 25878c2ecf20Sopenharmony_ci * preemption/promotion queue. If this occurs, we know that we likely 25888c2ecf20Sopenharmony_ci * lost track of execution earlier and must unwind and restart, the 25898c2ecf20Sopenharmony_ci * simplest way is by stop processing the event queue and force the 25908c2ecf20Sopenharmony_ci * engine to reset. 25918c2ecf20Sopenharmony_ci */ 25928c2ecf20Sopenharmony_ci execlists->csb_head = tail; 25938c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); 25948c2ecf20Sopenharmony_ci 25958c2ecf20Sopenharmony_ci /* 25968c2ecf20Sopenharmony_ci * Hopefully paired with a wmb() in HW! 25978c2ecf20Sopenharmony_ci * 25988c2ecf20Sopenharmony_ci * We must complete the read of the write pointer before any reads 25998c2ecf20Sopenharmony_ci * from the CSB, so that we do not see stale values. Without an rmb 26008c2ecf20Sopenharmony_ci * (lfence) the HW may speculatively perform the CSB[] reads *before* 26018c2ecf20Sopenharmony_ci * we perform the READ_ONCE(*csb_write). 26028c2ecf20Sopenharmony_ci */ 26038c2ecf20Sopenharmony_ci rmb(); 26048c2ecf20Sopenharmony_ci do { 26058c2ecf20Sopenharmony_ci bool promote; 26068c2ecf20Sopenharmony_ci 26078c2ecf20Sopenharmony_ci if (++head == num_entries) 26088c2ecf20Sopenharmony_ci head = 0; 26098c2ecf20Sopenharmony_ci 26108c2ecf20Sopenharmony_ci /* 26118c2ecf20Sopenharmony_ci * We are flying near dragons again. 26128c2ecf20Sopenharmony_ci * 26138c2ecf20Sopenharmony_ci * We hold a reference to the request in execlist_port[] 26148c2ecf20Sopenharmony_ci * but no more than that. We are operating in softirq 26158c2ecf20Sopenharmony_ci * context and so cannot hold any mutex or sleep. That 26168c2ecf20Sopenharmony_ci * prevents us stopping the requests we are processing 26178c2ecf20Sopenharmony_ci * in port[] from being retired simultaneously (the 26188c2ecf20Sopenharmony_ci * breadcrumb will be complete before we see the 26198c2ecf20Sopenharmony_ci * context-switch). As we only hold the reference to the 26208c2ecf20Sopenharmony_ci * request, any pointer chasing underneath the request 26218c2ecf20Sopenharmony_ci * is subject to a potential use-after-free. Thus we 26228c2ecf20Sopenharmony_ci * store all of the bookkeeping within port[] as 26238c2ecf20Sopenharmony_ci * required, and avoid using unguarded pointers beneath 26248c2ecf20Sopenharmony_ci * request itself. The same applies to the atomic 26258c2ecf20Sopenharmony_ci * status notifier. 26268c2ecf20Sopenharmony_ci */ 26278c2ecf20Sopenharmony_ci 26288c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", 26298c2ecf20Sopenharmony_ci head, 26308c2ecf20Sopenharmony_ci upper_32_bits(buf[head]), 26318c2ecf20Sopenharmony_ci lower_32_bits(buf[head])); 26328c2ecf20Sopenharmony_ci 26338c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 12) 26348c2ecf20Sopenharmony_ci promote = gen12_csb_parse(buf + head); 26358c2ecf20Sopenharmony_ci else 26368c2ecf20Sopenharmony_ci promote = gen8_csb_parse(buf + head); 26378c2ecf20Sopenharmony_ci if (promote) { 26388c2ecf20Sopenharmony_ci struct i915_request * const *old = execlists->active; 26398c2ecf20Sopenharmony_ci 26408c2ecf20Sopenharmony_ci if (GEM_WARN_ON(!*execlists->pending)) { 26418c2ecf20Sopenharmony_ci execlists->error_interrupt |= ERROR_CSB; 26428c2ecf20Sopenharmony_ci break; 26438c2ecf20Sopenharmony_ci } 26448c2ecf20Sopenharmony_ci 26458c2ecf20Sopenharmony_ci ring_set_paused(engine, 0); 26468c2ecf20Sopenharmony_ci 26478c2ecf20Sopenharmony_ci /* Point active to the new ELSP; prevent overwriting */ 26488c2ecf20Sopenharmony_ci WRITE_ONCE(execlists->active, execlists->pending); 26498c2ecf20Sopenharmony_ci smp_wmb(); /* notify execlists_active() */ 26508c2ecf20Sopenharmony_ci 26518c2ecf20Sopenharmony_ci /* cancel old inflight, prepare for switch */ 26528c2ecf20Sopenharmony_ci trace_ports(execlists, "preempted", old); 26538c2ecf20Sopenharmony_ci while (*old) 26548c2ecf20Sopenharmony_ci execlists_schedule_out(*old++); 26558c2ecf20Sopenharmony_ci 26568c2ecf20Sopenharmony_ci /* switch pending to inflight */ 26578c2ecf20Sopenharmony_ci GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); 26588c2ecf20Sopenharmony_ci copy_ports(execlists->inflight, 26598c2ecf20Sopenharmony_ci execlists->pending, 26608c2ecf20Sopenharmony_ci execlists_num_ports(execlists)); 26618c2ecf20Sopenharmony_ci smp_wmb(); /* complete the seqlock */ 26628c2ecf20Sopenharmony_ci WRITE_ONCE(execlists->active, execlists->inflight); 26638c2ecf20Sopenharmony_ci 26648c2ecf20Sopenharmony_ci /* XXX Magic delay for tgl */ 26658c2ecf20Sopenharmony_ci ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 26668c2ecf20Sopenharmony_ci 26678c2ecf20Sopenharmony_ci WRITE_ONCE(execlists->pending[0], NULL); 26688c2ecf20Sopenharmony_ci } else { 26698c2ecf20Sopenharmony_ci if (GEM_WARN_ON(!*execlists->active)) { 26708c2ecf20Sopenharmony_ci execlists->error_interrupt |= ERROR_CSB; 26718c2ecf20Sopenharmony_ci break; 26728c2ecf20Sopenharmony_ci } 26738c2ecf20Sopenharmony_ci 26748c2ecf20Sopenharmony_ci /* port0 completed, advanced to port1 */ 26758c2ecf20Sopenharmony_ci trace_ports(execlists, "completed", execlists->active); 26768c2ecf20Sopenharmony_ci 26778c2ecf20Sopenharmony_ci /* 26788c2ecf20Sopenharmony_ci * We rely on the hardware being strongly 26798c2ecf20Sopenharmony_ci * ordered, that the breadcrumb write is 26808c2ecf20Sopenharmony_ci * coherent (visible from the CPU) before the 26818c2ecf20Sopenharmony_ci * user interrupt is processed. One might assume 26828c2ecf20Sopenharmony_ci * that the breadcrumb write being before the 26838c2ecf20Sopenharmony_ci * user interrupt and the CS event for the context 26848c2ecf20Sopenharmony_ci * switch would therefore be before the CS event 26858c2ecf20Sopenharmony_ci * itself... 26868c2ecf20Sopenharmony_ci */ 26878c2ecf20Sopenharmony_ci if (GEM_SHOW_DEBUG() && 26888c2ecf20Sopenharmony_ci !i915_request_completed(*execlists->active)) { 26898c2ecf20Sopenharmony_ci struct i915_request *rq = *execlists->active; 26908c2ecf20Sopenharmony_ci const u32 *regs __maybe_unused = 26918c2ecf20Sopenharmony_ci rq->context->lrc_reg_state; 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 26948c2ecf20Sopenharmony_ci "context completed before request!\n"); 26958c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 26968c2ecf20Sopenharmony_ci "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", 26978c2ecf20Sopenharmony_ci ENGINE_READ(engine, RING_START), 26988c2ecf20Sopenharmony_ci ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, 26998c2ecf20Sopenharmony_ci ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, 27008c2ecf20Sopenharmony_ci ENGINE_READ(engine, RING_CTL), 27018c2ecf20Sopenharmony_ci ENGINE_READ(engine, RING_MI_MODE)); 27028c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 27038c2ecf20Sopenharmony_ci "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", 27048c2ecf20Sopenharmony_ci i915_ggtt_offset(rq->ring->vma), 27058c2ecf20Sopenharmony_ci rq->head, rq->tail, 27068c2ecf20Sopenharmony_ci rq->fence.context, 27078c2ecf20Sopenharmony_ci lower_32_bits(rq->fence.seqno), 27088c2ecf20Sopenharmony_ci hwsp_seqno(rq)); 27098c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 27108c2ecf20Sopenharmony_ci "ctx:{start:%08x, head:%04x, tail:%04x}, ", 27118c2ecf20Sopenharmony_ci regs[CTX_RING_START], 27128c2ecf20Sopenharmony_ci regs[CTX_RING_HEAD], 27138c2ecf20Sopenharmony_ci regs[CTX_RING_TAIL]); 27148c2ecf20Sopenharmony_ci } 27158c2ecf20Sopenharmony_ci 27168c2ecf20Sopenharmony_ci execlists_schedule_out(*execlists->active++); 27178c2ecf20Sopenharmony_ci 27188c2ecf20Sopenharmony_ci GEM_BUG_ON(execlists->active - execlists->inflight > 27198c2ecf20Sopenharmony_ci execlists_num_ports(execlists)); 27208c2ecf20Sopenharmony_ci } 27218c2ecf20Sopenharmony_ci } while (head != tail); 27228c2ecf20Sopenharmony_ci 27238c2ecf20Sopenharmony_ci set_timeslice(engine); 27248c2ecf20Sopenharmony_ci 27258c2ecf20Sopenharmony_ci /* 27268c2ecf20Sopenharmony_ci * Gen11 has proven to fail wrt global observation point between 27278c2ecf20Sopenharmony_ci * entry and tail update, failing on the ordering and thus 27288c2ecf20Sopenharmony_ci * we see an old entry in the context status buffer. 27298c2ecf20Sopenharmony_ci * 27308c2ecf20Sopenharmony_ci * Forcibly evict out entries for the next gpu csb update, 27318c2ecf20Sopenharmony_ci * to increase the odds that we get a fresh entries with non 27328c2ecf20Sopenharmony_ci * working hardware. The cost for doing so comes out mostly with 27338c2ecf20Sopenharmony_ci * the wash as hardware, working or not, will need to do the 27348c2ecf20Sopenharmony_ci * invalidation before. 27358c2ecf20Sopenharmony_ci */ 27368c2ecf20Sopenharmony_ci invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); 27378c2ecf20Sopenharmony_ci} 27388c2ecf20Sopenharmony_ci 27398c2ecf20Sopenharmony_cistatic void __execlists_submission_tasklet(struct intel_engine_cs *const engine) 27408c2ecf20Sopenharmony_ci{ 27418c2ecf20Sopenharmony_ci lockdep_assert_held(&engine->active.lock); 27428c2ecf20Sopenharmony_ci if (!READ_ONCE(engine->execlists.pending[0])) { 27438c2ecf20Sopenharmony_ci rcu_read_lock(); /* protect peeking at execlists->active */ 27448c2ecf20Sopenharmony_ci execlists_dequeue(engine); 27458c2ecf20Sopenharmony_ci rcu_read_unlock(); 27468c2ecf20Sopenharmony_ci } 27478c2ecf20Sopenharmony_ci} 27488c2ecf20Sopenharmony_ci 27498c2ecf20Sopenharmony_cistatic void __execlists_hold(struct i915_request *rq) 27508c2ecf20Sopenharmony_ci{ 27518c2ecf20Sopenharmony_ci LIST_HEAD(list); 27528c2ecf20Sopenharmony_ci 27538c2ecf20Sopenharmony_ci do { 27548c2ecf20Sopenharmony_ci struct i915_dependency *p; 27558c2ecf20Sopenharmony_ci 27568c2ecf20Sopenharmony_ci if (i915_request_is_active(rq)) 27578c2ecf20Sopenharmony_ci __i915_request_unsubmit(rq); 27588c2ecf20Sopenharmony_ci 27598c2ecf20Sopenharmony_ci clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 27608c2ecf20Sopenharmony_ci list_move_tail(&rq->sched.link, &rq->engine->active.hold); 27618c2ecf20Sopenharmony_ci i915_request_set_hold(rq); 27628c2ecf20Sopenharmony_ci RQ_TRACE(rq, "on hold\n"); 27638c2ecf20Sopenharmony_ci 27648c2ecf20Sopenharmony_ci for_each_waiter(p, rq) { 27658c2ecf20Sopenharmony_ci struct i915_request *w = 27668c2ecf20Sopenharmony_ci container_of(p->waiter, typeof(*w), sched); 27678c2ecf20Sopenharmony_ci 27688c2ecf20Sopenharmony_ci /* Leave semaphores spinning on the other engines */ 27698c2ecf20Sopenharmony_ci if (w->engine != rq->engine) 27708c2ecf20Sopenharmony_ci continue; 27718c2ecf20Sopenharmony_ci 27728c2ecf20Sopenharmony_ci if (!i915_request_is_ready(w)) 27738c2ecf20Sopenharmony_ci continue; 27748c2ecf20Sopenharmony_ci 27758c2ecf20Sopenharmony_ci if (i915_request_completed(w)) 27768c2ecf20Sopenharmony_ci continue; 27778c2ecf20Sopenharmony_ci 27788c2ecf20Sopenharmony_ci if (i915_request_on_hold(w)) 27798c2ecf20Sopenharmony_ci continue; 27808c2ecf20Sopenharmony_ci 27818c2ecf20Sopenharmony_ci list_move_tail(&w->sched.link, &list); 27828c2ecf20Sopenharmony_ci } 27838c2ecf20Sopenharmony_ci 27848c2ecf20Sopenharmony_ci rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 27858c2ecf20Sopenharmony_ci } while (rq); 27868c2ecf20Sopenharmony_ci} 27878c2ecf20Sopenharmony_ci 27888c2ecf20Sopenharmony_cistatic bool execlists_hold(struct intel_engine_cs *engine, 27898c2ecf20Sopenharmony_ci struct i915_request *rq) 27908c2ecf20Sopenharmony_ci{ 27918c2ecf20Sopenharmony_ci if (i915_request_on_hold(rq)) 27928c2ecf20Sopenharmony_ci return false; 27938c2ecf20Sopenharmony_ci 27948c2ecf20Sopenharmony_ci spin_lock_irq(&engine->active.lock); 27958c2ecf20Sopenharmony_ci 27968c2ecf20Sopenharmony_ci if (i915_request_completed(rq)) { /* too late! */ 27978c2ecf20Sopenharmony_ci rq = NULL; 27988c2ecf20Sopenharmony_ci goto unlock; 27998c2ecf20Sopenharmony_ci } 28008c2ecf20Sopenharmony_ci 28018c2ecf20Sopenharmony_ci if (rq->engine != engine) { /* preempted virtual engine */ 28028c2ecf20Sopenharmony_ci struct virtual_engine *ve = to_virtual_engine(rq->engine); 28038c2ecf20Sopenharmony_ci 28048c2ecf20Sopenharmony_ci /* 28058c2ecf20Sopenharmony_ci * intel_context_inflight() is only protected by virtue 28068c2ecf20Sopenharmony_ci * of process_csb() being called only by the tasklet (or 28078c2ecf20Sopenharmony_ci * directly from inside reset while the tasklet is suspended). 28088c2ecf20Sopenharmony_ci * Assert that neither of those are allowed to run while we 28098c2ecf20Sopenharmony_ci * poke at the request queues. 28108c2ecf20Sopenharmony_ci */ 28118c2ecf20Sopenharmony_ci GEM_BUG_ON(!reset_in_progress(&engine->execlists)); 28128c2ecf20Sopenharmony_ci 28138c2ecf20Sopenharmony_ci /* 28148c2ecf20Sopenharmony_ci * An unsubmitted request along a virtual engine will 28158c2ecf20Sopenharmony_ci * remain on the active (this) engine until we are able 28168c2ecf20Sopenharmony_ci * to process the context switch away (and so mark the 28178c2ecf20Sopenharmony_ci * context as no longer in flight). That cannot have happened 28188c2ecf20Sopenharmony_ci * yet, otherwise we would not be hanging! 28198c2ecf20Sopenharmony_ci */ 28208c2ecf20Sopenharmony_ci spin_lock(&ve->base.active.lock); 28218c2ecf20Sopenharmony_ci GEM_BUG_ON(intel_context_inflight(rq->context) != engine); 28228c2ecf20Sopenharmony_ci GEM_BUG_ON(ve->request != rq); 28238c2ecf20Sopenharmony_ci ve->request = NULL; 28248c2ecf20Sopenharmony_ci spin_unlock(&ve->base.active.lock); 28258c2ecf20Sopenharmony_ci i915_request_put(rq); 28268c2ecf20Sopenharmony_ci 28278c2ecf20Sopenharmony_ci rq->engine = engine; 28288c2ecf20Sopenharmony_ci } 28298c2ecf20Sopenharmony_ci 28308c2ecf20Sopenharmony_ci /* 28318c2ecf20Sopenharmony_ci * Transfer this request onto the hold queue to prevent it 28328c2ecf20Sopenharmony_ci * being resumbitted to HW (and potentially completed) before we have 28338c2ecf20Sopenharmony_ci * released it. Since we may have already submitted following 28348c2ecf20Sopenharmony_ci * requests, we need to remove those as well. 28358c2ecf20Sopenharmony_ci */ 28368c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_on_hold(rq)); 28378c2ecf20Sopenharmony_ci GEM_BUG_ON(rq->engine != engine); 28388c2ecf20Sopenharmony_ci __execlists_hold(rq); 28398c2ecf20Sopenharmony_ci GEM_BUG_ON(list_empty(&engine->active.hold)); 28408c2ecf20Sopenharmony_ci 28418c2ecf20Sopenharmony_ciunlock: 28428c2ecf20Sopenharmony_ci spin_unlock_irq(&engine->active.lock); 28438c2ecf20Sopenharmony_ci return rq; 28448c2ecf20Sopenharmony_ci} 28458c2ecf20Sopenharmony_ci 28468c2ecf20Sopenharmony_cistatic bool hold_request(const struct i915_request *rq) 28478c2ecf20Sopenharmony_ci{ 28488c2ecf20Sopenharmony_ci struct i915_dependency *p; 28498c2ecf20Sopenharmony_ci bool result = false; 28508c2ecf20Sopenharmony_ci 28518c2ecf20Sopenharmony_ci /* 28528c2ecf20Sopenharmony_ci * If one of our ancestors is on hold, we must also be on hold, 28538c2ecf20Sopenharmony_ci * otherwise we will bypass it and execute before it. 28548c2ecf20Sopenharmony_ci */ 28558c2ecf20Sopenharmony_ci rcu_read_lock(); 28568c2ecf20Sopenharmony_ci for_each_signaler(p, rq) { 28578c2ecf20Sopenharmony_ci const struct i915_request *s = 28588c2ecf20Sopenharmony_ci container_of(p->signaler, typeof(*s), sched); 28598c2ecf20Sopenharmony_ci 28608c2ecf20Sopenharmony_ci if (s->engine != rq->engine) 28618c2ecf20Sopenharmony_ci continue; 28628c2ecf20Sopenharmony_ci 28638c2ecf20Sopenharmony_ci result = i915_request_on_hold(s); 28648c2ecf20Sopenharmony_ci if (result) 28658c2ecf20Sopenharmony_ci break; 28668c2ecf20Sopenharmony_ci } 28678c2ecf20Sopenharmony_ci rcu_read_unlock(); 28688c2ecf20Sopenharmony_ci 28698c2ecf20Sopenharmony_ci return result; 28708c2ecf20Sopenharmony_ci} 28718c2ecf20Sopenharmony_ci 28728c2ecf20Sopenharmony_cistatic void __execlists_unhold(struct i915_request *rq) 28738c2ecf20Sopenharmony_ci{ 28748c2ecf20Sopenharmony_ci LIST_HEAD(list); 28758c2ecf20Sopenharmony_ci 28768c2ecf20Sopenharmony_ci do { 28778c2ecf20Sopenharmony_ci struct i915_dependency *p; 28788c2ecf20Sopenharmony_ci 28798c2ecf20Sopenharmony_ci RQ_TRACE(rq, "hold release\n"); 28808c2ecf20Sopenharmony_ci 28818c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_request_on_hold(rq)); 28828c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); 28838c2ecf20Sopenharmony_ci 28848c2ecf20Sopenharmony_ci i915_request_clear_hold(rq); 28858c2ecf20Sopenharmony_ci list_move_tail(&rq->sched.link, 28868c2ecf20Sopenharmony_ci i915_sched_lookup_priolist(rq->engine, 28878c2ecf20Sopenharmony_ci rq_prio(rq))); 28888c2ecf20Sopenharmony_ci set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 28898c2ecf20Sopenharmony_ci 28908c2ecf20Sopenharmony_ci /* Also release any children on this engine that are ready */ 28918c2ecf20Sopenharmony_ci for_each_waiter(p, rq) { 28928c2ecf20Sopenharmony_ci struct i915_request *w = 28938c2ecf20Sopenharmony_ci container_of(p->waiter, typeof(*w), sched); 28948c2ecf20Sopenharmony_ci 28958c2ecf20Sopenharmony_ci /* Propagate any change in error status */ 28968c2ecf20Sopenharmony_ci if (rq->fence.error) 28978c2ecf20Sopenharmony_ci i915_request_set_error_once(w, rq->fence.error); 28988c2ecf20Sopenharmony_ci 28998c2ecf20Sopenharmony_ci if (w->engine != rq->engine) 29008c2ecf20Sopenharmony_ci continue; 29018c2ecf20Sopenharmony_ci 29028c2ecf20Sopenharmony_ci if (!i915_request_on_hold(w)) 29038c2ecf20Sopenharmony_ci continue; 29048c2ecf20Sopenharmony_ci 29058c2ecf20Sopenharmony_ci /* Check that no other parents are also on hold */ 29068c2ecf20Sopenharmony_ci if (hold_request(w)) 29078c2ecf20Sopenharmony_ci continue; 29088c2ecf20Sopenharmony_ci 29098c2ecf20Sopenharmony_ci list_move_tail(&w->sched.link, &list); 29108c2ecf20Sopenharmony_ci } 29118c2ecf20Sopenharmony_ci 29128c2ecf20Sopenharmony_ci rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); 29138c2ecf20Sopenharmony_ci } while (rq); 29148c2ecf20Sopenharmony_ci} 29158c2ecf20Sopenharmony_ci 29168c2ecf20Sopenharmony_cistatic void execlists_unhold(struct intel_engine_cs *engine, 29178c2ecf20Sopenharmony_ci struct i915_request *rq) 29188c2ecf20Sopenharmony_ci{ 29198c2ecf20Sopenharmony_ci spin_lock_irq(&engine->active.lock); 29208c2ecf20Sopenharmony_ci 29218c2ecf20Sopenharmony_ci /* 29228c2ecf20Sopenharmony_ci * Move this request back to the priority queue, and all of its 29238c2ecf20Sopenharmony_ci * children and grandchildren that were suspended along with it. 29248c2ecf20Sopenharmony_ci */ 29258c2ecf20Sopenharmony_ci __execlists_unhold(rq); 29268c2ecf20Sopenharmony_ci 29278c2ecf20Sopenharmony_ci if (rq_prio(rq) > engine->execlists.queue_priority_hint) { 29288c2ecf20Sopenharmony_ci engine->execlists.queue_priority_hint = rq_prio(rq); 29298c2ecf20Sopenharmony_ci tasklet_hi_schedule(&engine->execlists.tasklet); 29308c2ecf20Sopenharmony_ci } 29318c2ecf20Sopenharmony_ci 29328c2ecf20Sopenharmony_ci spin_unlock_irq(&engine->active.lock); 29338c2ecf20Sopenharmony_ci} 29348c2ecf20Sopenharmony_ci 29358c2ecf20Sopenharmony_cistruct execlists_capture { 29368c2ecf20Sopenharmony_ci struct work_struct work; 29378c2ecf20Sopenharmony_ci struct i915_request *rq; 29388c2ecf20Sopenharmony_ci struct i915_gpu_coredump *error; 29398c2ecf20Sopenharmony_ci}; 29408c2ecf20Sopenharmony_ci 29418c2ecf20Sopenharmony_cistatic void execlists_capture_work(struct work_struct *work) 29428c2ecf20Sopenharmony_ci{ 29438c2ecf20Sopenharmony_ci struct execlists_capture *cap = container_of(work, typeof(*cap), work); 29448c2ecf20Sopenharmony_ci const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; 29458c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = cap->rq->engine; 29468c2ecf20Sopenharmony_ci struct intel_gt_coredump *gt = cap->error->gt; 29478c2ecf20Sopenharmony_ci struct intel_engine_capture_vma *vma; 29488c2ecf20Sopenharmony_ci 29498c2ecf20Sopenharmony_ci /* Compress all the objects attached to the request, slow! */ 29508c2ecf20Sopenharmony_ci vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); 29518c2ecf20Sopenharmony_ci if (vma) { 29528c2ecf20Sopenharmony_ci struct i915_vma_compress *compress = 29538c2ecf20Sopenharmony_ci i915_vma_capture_prepare(gt); 29548c2ecf20Sopenharmony_ci 29558c2ecf20Sopenharmony_ci intel_engine_coredump_add_vma(gt->engine, vma, compress); 29568c2ecf20Sopenharmony_ci i915_vma_capture_finish(gt, compress); 29578c2ecf20Sopenharmony_ci } 29588c2ecf20Sopenharmony_ci 29598c2ecf20Sopenharmony_ci gt->simulated = gt->engine->simulated; 29608c2ecf20Sopenharmony_ci cap->error->simulated = gt->simulated; 29618c2ecf20Sopenharmony_ci 29628c2ecf20Sopenharmony_ci /* Publish the error state, and announce it to the world */ 29638c2ecf20Sopenharmony_ci i915_error_state_store(cap->error); 29648c2ecf20Sopenharmony_ci i915_gpu_coredump_put(cap->error); 29658c2ecf20Sopenharmony_ci 29668c2ecf20Sopenharmony_ci /* Return this request and all that depend upon it for signaling */ 29678c2ecf20Sopenharmony_ci execlists_unhold(engine, cap->rq); 29688c2ecf20Sopenharmony_ci i915_request_put(cap->rq); 29698c2ecf20Sopenharmony_ci 29708c2ecf20Sopenharmony_ci kfree(cap); 29718c2ecf20Sopenharmony_ci} 29728c2ecf20Sopenharmony_ci 29738c2ecf20Sopenharmony_cistatic struct execlists_capture *capture_regs(struct intel_engine_cs *engine) 29748c2ecf20Sopenharmony_ci{ 29758c2ecf20Sopenharmony_ci const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; 29768c2ecf20Sopenharmony_ci struct execlists_capture *cap; 29778c2ecf20Sopenharmony_ci 29788c2ecf20Sopenharmony_ci cap = kmalloc(sizeof(*cap), gfp); 29798c2ecf20Sopenharmony_ci if (!cap) 29808c2ecf20Sopenharmony_ci return NULL; 29818c2ecf20Sopenharmony_ci 29828c2ecf20Sopenharmony_ci cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); 29838c2ecf20Sopenharmony_ci if (!cap->error) 29848c2ecf20Sopenharmony_ci goto err_cap; 29858c2ecf20Sopenharmony_ci 29868c2ecf20Sopenharmony_ci cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); 29878c2ecf20Sopenharmony_ci if (!cap->error->gt) 29888c2ecf20Sopenharmony_ci goto err_gpu; 29898c2ecf20Sopenharmony_ci 29908c2ecf20Sopenharmony_ci cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); 29918c2ecf20Sopenharmony_ci if (!cap->error->gt->engine) 29928c2ecf20Sopenharmony_ci goto err_gt; 29938c2ecf20Sopenharmony_ci 29948c2ecf20Sopenharmony_ci return cap; 29958c2ecf20Sopenharmony_ci 29968c2ecf20Sopenharmony_cierr_gt: 29978c2ecf20Sopenharmony_ci kfree(cap->error->gt); 29988c2ecf20Sopenharmony_cierr_gpu: 29998c2ecf20Sopenharmony_ci kfree(cap->error); 30008c2ecf20Sopenharmony_cierr_cap: 30018c2ecf20Sopenharmony_ci kfree(cap); 30028c2ecf20Sopenharmony_ci return NULL; 30038c2ecf20Sopenharmony_ci} 30048c2ecf20Sopenharmony_ci 30058c2ecf20Sopenharmony_cistatic struct i915_request * 30068c2ecf20Sopenharmony_ciactive_context(struct intel_engine_cs *engine, u32 ccid) 30078c2ecf20Sopenharmony_ci{ 30088c2ecf20Sopenharmony_ci const struct intel_engine_execlists * const el = &engine->execlists; 30098c2ecf20Sopenharmony_ci struct i915_request * const *port, *rq; 30108c2ecf20Sopenharmony_ci 30118c2ecf20Sopenharmony_ci /* 30128c2ecf20Sopenharmony_ci * Use the most recent result from process_csb(), but just in case 30138c2ecf20Sopenharmony_ci * we trigger an error (via interrupt) before the first CS event has 30148c2ecf20Sopenharmony_ci * been written, peek at the next submission. 30158c2ecf20Sopenharmony_ci */ 30168c2ecf20Sopenharmony_ci 30178c2ecf20Sopenharmony_ci for (port = el->active; (rq = *port); port++) { 30188c2ecf20Sopenharmony_ci if (rq->context->lrc.ccid == ccid) { 30198c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 30208c2ecf20Sopenharmony_ci "ccid found at active:%zd\n", 30218c2ecf20Sopenharmony_ci port - el->active); 30228c2ecf20Sopenharmony_ci return rq; 30238c2ecf20Sopenharmony_ci } 30248c2ecf20Sopenharmony_ci } 30258c2ecf20Sopenharmony_ci 30268c2ecf20Sopenharmony_ci for (port = el->pending; (rq = *port); port++) { 30278c2ecf20Sopenharmony_ci if (rq->context->lrc.ccid == ccid) { 30288c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, 30298c2ecf20Sopenharmony_ci "ccid found at pending:%zd\n", 30308c2ecf20Sopenharmony_ci port - el->pending); 30318c2ecf20Sopenharmony_ci return rq; 30328c2ecf20Sopenharmony_ci } 30338c2ecf20Sopenharmony_ci } 30348c2ecf20Sopenharmony_ci 30358c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "ccid:%x not found\n", ccid); 30368c2ecf20Sopenharmony_ci return NULL; 30378c2ecf20Sopenharmony_ci} 30388c2ecf20Sopenharmony_ci 30398c2ecf20Sopenharmony_cistatic u32 active_ccid(struct intel_engine_cs *engine) 30408c2ecf20Sopenharmony_ci{ 30418c2ecf20Sopenharmony_ci return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); 30428c2ecf20Sopenharmony_ci} 30438c2ecf20Sopenharmony_ci 30448c2ecf20Sopenharmony_cistatic void execlists_capture(struct intel_engine_cs *engine) 30458c2ecf20Sopenharmony_ci{ 30468c2ecf20Sopenharmony_ci struct execlists_capture *cap; 30478c2ecf20Sopenharmony_ci 30488c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) 30498c2ecf20Sopenharmony_ci return; 30508c2ecf20Sopenharmony_ci 30518c2ecf20Sopenharmony_ci /* 30528c2ecf20Sopenharmony_ci * We need to _quickly_ capture the engine state before we reset. 30538c2ecf20Sopenharmony_ci * We are inside an atomic section (softirq) here and we are delaying 30548c2ecf20Sopenharmony_ci * the forced preemption event. 30558c2ecf20Sopenharmony_ci */ 30568c2ecf20Sopenharmony_ci cap = capture_regs(engine); 30578c2ecf20Sopenharmony_ci if (!cap) 30588c2ecf20Sopenharmony_ci return; 30598c2ecf20Sopenharmony_ci 30608c2ecf20Sopenharmony_ci spin_lock_irq(&engine->active.lock); 30618c2ecf20Sopenharmony_ci cap->rq = active_context(engine, active_ccid(engine)); 30628c2ecf20Sopenharmony_ci if (cap->rq) { 30638c2ecf20Sopenharmony_ci cap->rq = active_request(cap->rq->context->timeline, cap->rq); 30648c2ecf20Sopenharmony_ci cap->rq = i915_request_get_rcu(cap->rq); 30658c2ecf20Sopenharmony_ci } 30668c2ecf20Sopenharmony_ci spin_unlock_irq(&engine->active.lock); 30678c2ecf20Sopenharmony_ci if (!cap->rq) 30688c2ecf20Sopenharmony_ci goto err_free; 30698c2ecf20Sopenharmony_ci 30708c2ecf20Sopenharmony_ci /* 30718c2ecf20Sopenharmony_ci * Remove the request from the execlists queue, and take ownership 30728c2ecf20Sopenharmony_ci * of the request. We pass it to our worker who will _slowly_ compress 30738c2ecf20Sopenharmony_ci * all the pages the _user_ requested for debugging their batch, after 30748c2ecf20Sopenharmony_ci * which we return it to the queue for signaling. 30758c2ecf20Sopenharmony_ci * 30768c2ecf20Sopenharmony_ci * By removing them from the execlists queue, we also remove the 30778c2ecf20Sopenharmony_ci * requests from being processed by __unwind_incomplete_requests() 30788c2ecf20Sopenharmony_ci * during the intel_engine_reset(), and so they will *not* be replayed 30798c2ecf20Sopenharmony_ci * afterwards. 30808c2ecf20Sopenharmony_ci * 30818c2ecf20Sopenharmony_ci * Note that because we have not yet reset the engine at this point, 30828c2ecf20Sopenharmony_ci * it is possible for the request that we have identified as being 30838c2ecf20Sopenharmony_ci * guilty, did in fact complete and we will then hit an arbitration 30848c2ecf20Sopenharmony_ci * point allowing the outstanding preemption to succeed. The likelihood 30858c2ecf20Sopenharmony_ci * of that is very low (as capturing of the engine registers should be 30868c2ecf20Sopenharmony_ci * fast enough to run inside an irq-off atomic section!), so we will 30878c2ecf20Sopenharmony_ci * simply hold that request accountable for being non-preemptible 30888c2ecf20Sopenharmony_ci * long enough to force the reset. 30898c2ecf20Sopenharmony_ci */ 30908c2ecf20Sopenharmony_ci if (!execlists_hold(engine, cap->rq)) 30918c2ecf20Sopenharmony_ci goto err_rq; 30928c2ecf20Sopenharmony_ci 30938c2ecf20Sopenharmony_ci INIT_WORK(&cap->work, execlists_capture_work); 30948c2ecf20Sopenharmony_ci schedule_work(&cap->work); 30958c2ecf20Sopenharmony_ci return; 30968c2ecf20Sopenharmony_ci 30978c2ecf20Sopenharmony_cierr_rq: 30988c2ecf20Sopenharmony_ci i915_request_put(cap->rq); 30998c2ecf20Sopenharmony_cierr_free: 31008c2ecf20Sopenharmony_ci i915_gpu_coredump_put(cap->error); 31018c2ecf20Sopenharmony_ci kfree(cap); 31028c2ecf20Sopenharmony_ci} 31038c2ecf20Sopenharmony_ci 31048c2ecf20Sopenharmony_cistatic void execlists_reset(struct intel_engine_cs *engine, const char *msg) 31058c2ecf20Sopenharmony_ci{ 31068c2ecf20Sopenharmony_ci const unsigned int bit = I915_RESET_ENGINE + engine->id; 31078c2ecf20Sopenharmony_ci unsigned long *lock = &engine->gt->reset.flags; 31088c2ecf20Sopenharmony_ci 31098c2ecf20Sopenharmony_ci if (!intel_has_reset_engine(engine->gt)) 31108c2ecf20Sopenharmony_ci return; 31118c2ecf20Sopenharmony_ci 31128c2ecf20Sopenharmony_ci if (test_and_set_bit(bit, lock)) 31138c2ecf20Sopenharmony_ci return; 31148c2ecf20Sopenharmony_ci 31158c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "reset for %s\n", msg); 31168c2ecf20Sopenharmony_ci 31178c2ecf20Sopenharmony_ci /* Mark this tasklet as disabled to avoid waiting for it to complete */ 31188c2ecf20Sopenharmony_ci tasklet_disable_nosync(&engine->execlists.tasklet); 31198c2ecf20Sopenharmony_ci 31208c2ecf20Sopenharmony_ci ring_set_paused(engine, 1); /* Freeze the current request in place */ 31218c2ecf20Sopenharmony_ci execlists_capture(engine); 31228c2ecf20Sopenharmony_ci intel_engine_reset(engine, msg); 31238c2ecf20Sopenharmony_ci 31248c2ecf20Sopenharmony_ci tasklet_enable(&engine->execlists.tasklet); 31258c2ecf20Sopenharmony_ci clear_and_wake_up_bit(bit, lock); 31268c2ecf20Sopenharmony_ci} 31278c2ecf20Sopenharmony_ci 31288c2ecf20Sopenharmony_cistatic bool preempt_timeout(const struct intel_engine_cs *const engine) 31298c2ecf20Sopenharmony_ci{ 31308c2ecf20Sopenharmony_ci const struct timer_list *t = &engine->execlists.preempt; 31318c2ecf20Sopenharmony_ci 31328c2ecf20Sopenharmony_ci if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) 31338c2ecf20Sopenharmony_ci return false; 31348c2ecf20Sopenharmony_ci 31358c2ecf20Sopenharmony_ci if (!timer_expired(t)) 31368c2ecf20Sopenharmony_ci return false; 31378c2ecf20Sopenharmony_ci 31388c2ecf20Sopenharmony_ci return READ_ONCE(engine->execlists.pending[0]); 31398c2ecf20Sopenharmony_ci} 31408c2ecf20Sopenharmony_ci 31418c2ecf20Sopenharmony_ci/* 31428c2ecf20Sopenharmony_ci * Check the unread Context Status Buffers and manage the submission of new 31438c2ecf20Sopenharmony_ci * contexts to the ELSP accordingly. 31448c2ecf20Sopenharmony_ci */ 31458c2ecf20Sopenharmony_cistatic void execlists_submission_tasklet(unsigned long data) 31468c2ecf20Sopenharmony_ci{ 31478c2ecf20Sopenharmony_ci struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; 31488c2ecf20Sopenharmony_ci bool timeout = preempt_timeout(engine); 31498c2ecf20Sopenharmony_ci 31508c2ecf20Sopenharmony_ci process_csb(engine); 31518c2ecf20Sopenharmony_ci 31528c2ecf20Sopenharmony_ci if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { 31538c2ecf20Sopenharmony_ci const char *msg; 31548c2ecf20Sopenharmony_ci 31558c2ecf20Sopenharmony_ci /* Generate the error message in priority wrt to the user! */ 31568c2ecf20Sopenharmony_ci if (engine->execlists.error_interrupt & GENMASK(15, 0)) 31578c2ecf20Sopenharmony_ci msg = "CS error"; /* thrown by a user payload */ 31588c2ecf20Sopenharmony_ci else if (engine->execlists.error_interrupt & ERROR_CSB) 31598c2ecf20Sopenharmony_ci msg = "invalid CSB event"; 31608c2ecf20Sopenharmony_ci else 31618c2ecf20Sopenharmony_ci msg = "internal error"; 31628c2ecf20Sopenharmony_ci 31638c2ecf20Sopenharmony_ci engine->execlists.error_interrupt = 0; 31648c2ecf20Sopenharmony_ci execlists_reset(engine, msg); 31658c2ecf20Sopenharmony_ci } 31668c2ecf20Sopenharmony_ci 31678c2ecf20Sopenharmony_ci if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { 31688c2ecf20Sopenharmony_ci unsigned long flags; 31698c2ecf20Sopenharmony_ci 31708c2ecf20Sopenharmony_ci spin_lock_irqsave(&engine->active.lock, flags); 31718c2ecf20Sopenharmony_ci __execlists_submission_tasklet(engine); 31728c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&engine->active.lock, flags); 31738c2ecf20Sopenharmony_ci 31748c2ecf20Sopenharmony_ci /* Recheck after serialising with direct-submission */ 31758c2ecf20Sopenharmony_ci if (unlikely(timeout && preempt_timeout(engine))) { 31768c2ecf20Sopenharmony_ci cancel_timer(&engine->execlists.preempt); 31778c2ecf20Sopenharmony_ci execlists_reset(engine, "preemption time out"); 31788c2ecf20Sopenharmony_ci } 31798c2ecf20Sopenharmony_ci } 31808c2ecf20Sopenharmony_ci} 31818c2ecf20Sopenharmony_ci 31828c2ecf20Sopenharmony_cistatic void __execlists_kick(struct intel_engine_execlists *execlists) 31838c2ecf20Sopenharmony_ci{ 31848c2ecf20Sopenharmony_ci /* Kick the tasklet for some interrupt coalescing and reset handling */ 31858c2ecf20Sopenharmony_ci tasklet_hi_schedule(&execlists->tasklet); 31868c2ecf20Sopenharmony_ci} 31878c2ecf20Sopenharmony_ci 31888c2ecf20Sopenharmony_ci#define execlists_kick(t, member) \ 31898c2ecf20Sopenharmony_ci __execlists_kick(container_of(t, struct intel_engine_execlists, member)) 31908c2ecf20Sopenharmony_ci 31918c2ecf20Sopenharmony_cistatic void execlists_timeslice(struct timer_list *timer) 31928c2ecf20Sopenharmony_ci{ 31938c2ecf20Sopenharmony_ci execlists_kick(timer, timer); 31948c2ecf20Sopenharmony_ci} 31958c2ecf20Sopenharmony_ci 31968c2ecf20Sopenharmony_cistatic void execlists_preempt(struct timer_list *timer) 31978c2ecf20Sopenharmony_ci{ 31988c2ecf20Sopenharmony_ci execlists_kick(timer, preempt); 31998c2ecf20Sopenharmony_ci} 32008c2ecf20Sopenharmony_ci 32018c2ecf20Sopenharmony_cistatic void queue_request(struct intel_engine_cs *engine, 32028c2ecf20Sopenharmony_ci struct i915_request *rq) 32038c2ecf20Sopenharmony_ci{ 32048c2ecf20Sopenharmony_ci GEM_BUG_ON(!list_empty(&rq->sched.link)); 32058c2ecf20Sopenharmony_ci list_add_tail(&rq->sched.link, 32068c2ecf20Sopenharmony_ci i915_sched_lookup_priolist(engine, rq_prio(rq))); 32078c2ecf20Sopenharmony_ci set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 32088c2ecf20Sopenharmony_ci} 32098c2ecf20Sopenharmony_ci 32108c2ecf20Sopenharmony_cistatic void __submit_queue_imm(struct intel_engine_cs *engine) 32118c2ecf20Sopenharmony_ci{ 32128c2ecf20Sopenharmony_ci struct intel_engine_execlists * const execlists = &engine->execlists; 32138c2ecf20Sopenharmony_ci 32148c2ecf20Sopenharmony_ci if (reset_in_progress(execlists)) 32158c2ecf20Sopenharmony_ci return; /* defer until we restart the engine following reset */ 32168c2ecf20Sopenharmony_ci 32178c2ecf20Sopenharmony_ci __execlists_submission_tasklet(engine); 32188c2ecf20Sopenharmony_ci} 32198c2ecf20Sopenharmony_ci 32208c2ecf20Sopenharmony_cistatic void submit_queue(struct intel_engine_cs *engine, 32218c2ecf20Sopenharmony_ci const struct i915_request *rq) 32228c2ecf20Sopenharmony_ci{ 32238c2ecf20Sopenharmony_ci struct intel_engine_execlists *execlists = &engine->execlists; 32248c2ecf20Sopenharmony_ci 32258c2ecf20Sopenharmony_ci if (rq_prio(rq) <= execlists->queue_priority_hint) 32268c2ecf20Sopenharmony_ci return; 32278c2ecf20Sopenharmony_ci 32288c2ecf20Sopenharmony_ci execlists->queue_priority_hint = rq_prio(rq); 32298c2ecf20Sopenharmony_ci __submit_queue_imm(engine); 32308c2ecf20Sopenharmony_ci} 32318c2ecf20Sopenharmony_ci 32328c2ecf20Sopenharmony_cistatic bool ancestor_on_hold(const struct intel_engine_cs *engine, 32338c2ecf20Sopenharmony_ci const struct i915_request *rq) 32348c2ecf20Sopenharmony_ci{ 32358c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_on_hold(rq)); 32368c2ecf20Sopenharmony_ci return !list_empty(&engine->active.hold) && hold_request(rq); 32378c2ecf20Sopenharmony_ci} 32388c2ecf20Sopenharmony_ci 32398c2ecf20Sopenharmony_cistatic void flush_csb(struct intel_engine_cs *engine) 32408c2ecf20Sopenharmony_ci{ 32418c2ecf20Sopenharmony_ci struct intel_engine_execlists *el = &engine->execlists; 32428c2ecf20Sopenharmony_ci 32438c2ecf20Sopenharmony_ci if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) { 32448c2ecf20Sopenharmony_ci if (!reset_in_progress(el)) 32458c2ecf20Sopenharmony_ci process_csb(engine); 32468c2ecf20Sopenharmony_ci tasklet_unlock(&el->tasklet); 32478c2ecf20Sopenharmony_ci } 32488c2ecf20Sopenharmony_ci} 32498c2ecf20Sopenharmony_ci 32508c2ecf20Sopenharmony_cistatic void execlists_submit_request(struct i915_request *request) 32518c2ecf20Sopenharmony_ci{ 32528c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = request->engine; 32538c2ecf20Sopenharmony_ci unsigned long flags; 32548c2ecf20Sopenharmony_ci 32558c2ecf20Sopenharmony_ci /* Hopefully we clear execlists->pending[] to let us through */ 32568c2ecf20Sopenharmony_ci flush_csb(engine); 32578c2ecf20Sopenharmony_ci 32588c2ecf20Sopenharmony_ci /* Will be called from irq-context when using foreign fences. */ 32598c2ecf20Sopenharmony_ci spin_lock_irqsave(&engine->active.lock, flags); 32608c2ecf20Sopenharmony_ci 32618c2ecf20Sopenharmony_ci if (unlikely(ancestor_on_hold(engine, request))) { 32628c2ecf20Sopenharmony_ci RQ_TRACE(request, "ancestor on hold\n"); 32638c2ecf20Sopenharmony_ci list_add_tail(&request->sched.link, &engine->active.hold); 32648c2ecf20Sopenharmony_ci i915_request_set_hold(request); 32658c2ecf20Sopenharmony_ci } else { 32668c2ecf20Sopenharmony_ci queue_request(engine, request); 32678c2ecf20Sopenharmony_ci 32688c2ecf20Sopenharmony_ci GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); 32698c2ecf20Sopenharmony_ci GEM_BUG_ON(list_empty(&request->sched.link)); 32708c2ecf20Sopenharmony_ci 32718c2ecf20Sopenharmony_ci submit_queue(engine, request); 32728c2ecf20Sopenharmony_ci } 32738c2ecf20Sopenharmony_ci 32748c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&engine->active.lock, flags); 32758c2ecf20Sopenharmony_ci} 32768c2ecf20Sopenharmony_ci 32778c2ecf20Sopenharmony_cistatic void __execlists_context_fini(struct intel_context *ce) 32788c2ecf20Sopenharmony_ci{ 32798c2ecf20Sopenharmony_ci intel_ring_put(ce->ring); 32808c2ecf20Sopenharmony_ci i915_vma_put(ce->state); 32818c2ecf20Sopenharmony_ci} 32828c2ecf20Sopenharmony_ci 32838c2ecf20Sopenharmony_cistatic void execlists_context_destroy(struct kref *kref) 32848c2ecf20Sopenharmony_ci{ 32858c2ecf20Sopenharmony_ci struct intel_context *ce = container_of(kref, typeof(*ce), ref); 32868c2ecf20Sopenharmony_ci 32878c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_active_is_idle(&ce->active)); 32888c2ecf20Sopenharmony_ci GEM_BUG_ON(intel_context_is_pinned(ce)); 32898c2ecf20Sopenharmony_ci 32908c2ecf20Sopenharmony_ci if (ce->state) 32918c2ecf20Sopenharmony_ci __execlists_context_fini(ce); 32928c2ecf20Sopenharmony_ci 32938c2ecf20Sopenharmony_ci intel_context_fini(ce); 32948c2ecf20Sopenharmony_ci intel_context_free(ce); 32958c2ecf20Sopenharmony_ci} 32968c2ecf20Sopenharmony_ci 32978c2ecf20Sopenharmony_cistatic void 32988c2ecf20Sopenharmony_ciset_redzone(void *vaddr, const struct intel_engine_cs *engine) 32998c2ecf20Sopenharmony_ci{ 33008c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 33018c2ecf20Sopenharmony_ci return; 33028c2ecf20Sopenharmony_ci 33038c2ecf20Sopenharmony_ci vaddr += engine->context_size; 33048c2ecf20Sopenharmony_ci 33058c2ecf20Sopenharmony_ci memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); 33068c2ecf20Sopenharmony_ci} 33078c2ecf20Sopenharmony_ci 33088c2ecf20Sopenharmony_cistatic void 33098c2ecf20Sopenharmony_cicheck_redzone(const void *vaddr, const struct intel_engine_cs *engine) 33108c2ecf20Sopenharmony_ci{ 33118c2ecf20Sopenharmony_ci if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 33128c2ecf20Sopenharmony_ci return; 33138c2ecf20Sopenharmony_ci 33148c2ecf20Sopenharmony_ci vaddr += engine->context_size; 33158c2ecf20Sopenharmony_ci 33168c2ecf20Sopenharmony_ci if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) 33178c2ecf20Sopenharmony_ci drm_err_once(&engine->i915->drm, 33188c2ecf20Sopenharmony_ci "%s context redzone overwritten!\n", 33198c2ecf20Sopenharmony_ci engine->name); 33208c2ecf20Sopenharmony_ci} 33218c2ecf20Sopenharmony_ci 33228c2ecf20Sopenharmony_cistatic void execlists_context_unpin(struct intel_context *ce) 33238c2ecf20Sopenharmony_ci{ 33248c2ecf20Sopenharmony_ci check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, 33258c2ecf20Sopenharmony_ci ce->engine); 33268c2ecf20Sopenharmony_ci} 33278c2ecf20Sopenharmony_ci 33288c2ecf20Sopenharmony_cistatic void execlists_context_post_unpin(struct intel_context *ce) 33298c2ecf20Sopenharmony_ci{ 33308c2ecf20Sopenharmony_ci i915_gem_object_unpin_map(ce->state->obj); 33318c2ecf20Sopenharmony_ci} 33328c2ecf20Sopenharmony_ci 33338c2ecf20Sopenharmony_cistatic u32 * 33348c2ecf20Sopenharmony_cigen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) 33358c2ecf20Sopenharmony_ci{ 33368c2ecf20Sopenharmony_ci *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 33378c2ecf20Sopenharmony_ci MI_SRM_LRM_GLOBAL_GTT | 33388c2ecf20Sopenharmony_ci MI_LRI_LRM_CS_MMIO; 33398c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 33408c2ecf20Sopenharmony_ci *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 33418c2ecf20Sopenharmony_ci CTX_TIMESTAMP * sizeof(u32); 33428c2ecf20Sopenharmony_ci *cs++ = 0; 33438c2ecf20Sopenharmony_ci 33448c2ecf20Sopenharmony_ci *cs++ = MI_LOAD_REGISTER_REG | 33458c2ecf20Sopenharmony_ci MI_LRR_SOURCE_CS_MMIO | 33468c2ecf20Sopenharmony_ci MI_LRI_LRM_CS_MMIO; 33478c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 33488c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 33498c2ecf20Sopenharmony_ci 33508c2ecf20Sopenharmony_ci *cs++ = MI_LOAD_REGISTER_REG | 33518c2ecf20Sopenharmony_ci MI_LRR_SOURCE_CS_MMIO | 33528c2ecf20Sopenharmony_ci MI_LRI_LRM_CS_MMIO; 33538c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 33548c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)); 33558c2ecf20Sopenharmony_ci 33568c2ecf20Sopenharmony_ci return cs; 33578c2ecf20Sopenharmony_ci} 33588c2ecf20Sopenharmony_ci 33598c2ecf20Sopenharmony_cistatic u32 * 33608c2ecf20Sopenharmony_cigen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs) 33618c2ecf20Sopenharmony_ci{ 33628c2ecf20Sopenharmony_ci GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1); 33638c2ecf20Sopenharmony_ci 33648c2ecf20Sopenharmony_ci *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 33658c2ecf20Sopenharmony_ci MI_SRM_LRM_GLOBAL_GTT | 33668c2ecf20Sopenharmony_ci MI_LRI_LRM_CS_MMIO; 33678c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 33688c2ecf20Sopenharmony_ci *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 33698c2ecf20Sopenharmony_ci (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32); 33708c2ecf20Sopenharmony_ci *cs++ = 0; 33718c2ecf20Sopenharmony_ci 33728c2ecf20Sopenharmony_ci return cs; 33738c2ecf20Sopenharmony_ci} 33748c2ecf20Sopenharmony_ci 33758c2ecf20Sopenharmony_cistatic u32 * 33768c2ecf20Sopenharmony_cigen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) 33778c2ecf20Sopenharmony_ci{ 33788c2ecf20Sopenharmony_ci GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1); 33798c2ecf20Sopenharmony_ci 33808c2ecf20Sopenharmony_ci *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 33818c2ecf20Sopenharmony_ci MI_SRM_LRM_GLOBAL_GTT | 33828c2ecf20Sopenharmony_ci MI_LRI_LRM_CS_MMIO; 33838c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 33848c2ecf20Sopenharmony_ci *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET + 33858c2ecf20Sopenharmony_ci (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32); 33868c2ecf20Sopenharmony_ci *cs++ = 0; 33878c2ecf20Sopenharmony_ci 33888c2ecf20Sopenharmony_ci *cs++ = MI_LOAD_REGISTER_REG | 33898c2ecf20Sopenharmony_ci MI_LRR_SOURCE_CS_MMIO | 33908c2ecf20Sopenharmony_ci MI_LRI_LRM_CS_MMIO; 33918c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)); 33928c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)); 33938c2ecf20Sopenharmony_ci 33948c2ecf20Sopenharmony_ci return cs; 33958c2ecf20Sopenharmony_ci} 33968c2ecf20Sopenharmony_ci 33978c2ecf20Sopenharmony_cistatic u32 * 33988c2ecf20Sopenharmony_cigen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) 33998c2ecf20Sopenharmony_ci{ 34008c2ecf20Sopenharmony_ci cs = gen12_emit_timestamp_wa(ce, cs); 34018c2ecf20Sopenharmony_ci cs = gen12_emit_cmd_buf_wa(ce, cs); 34028c2ecf20Sopenharmony_ci cs = gen12_emit_restore_scratch(ce, cs); 34038c2ecf20Sopenharmony_ci 34048c2ecf20Sopenharmony_ci return cs; 34058c2ecf20Sopenharmony_ci} 34068c2ecf20Sopenharmony_ci 34078c2ecf20Sopenharmony_cistatic u32 * 34088c2ecf20Sopenharmony_cigen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) 34098c2ecf20Sopenharmony_ci{ 34108c2ecf20Sopenharmony_ci cs = gen12_emit_timestamp_wa(ce, cs); 34118c2ecf20Sopenharmony_ci cs = gen12_emit_restore_scratch(ce, cs); 34128c2ecf20Sopenharmony_ci 34138c2ecf20Sopenharmony_ci return cs; 34148c2ecf20Sopenharmony_ci} 34158c2ecf20Sopenharmony_ci 34168c2ecf20Sopenharmony_cistatic inline u32 context_wa_bb_offset(const struct intel_context *ce) 34178c2ecf20Sopenharmony_ci{ 34188c2ecf20Sopenharmony_ci return PAGE_SIZE * ce->wa_bb_page; 34198c2ecf20Sopenharmony_ci} 34208c2ecf20Sopenharmony_ci 34218c2ecf20Sopenharmony_cistatic u32 *context_indirect_bb(const struct intel_context *ce) 34228c2ecf20Sopenharmony_ci{ 34238c2ecf20Sopenharmony_ci void *ptr; 34248c2ecf20Sopenharmony_ci 34258c2ecf20Sopenharmony_ci GEM_BUG_ON(!ce->wa_bb_page); 34268c2ecf20Sopenharmony_ci 34278c2ecf20Sopenharmony_ci ptr = ce->lrc_reg_state; 34288c2ecf20Sopenharmony_ci ptr -= LRC_STATE_OFFSET; /* back to start of context image */ 34298c2ecf20Sopenharmony_ci ptr += context_wa_bb_offset(ce); 34308c2ecf20Sopenharmony_ci 34318c2ecf20Sopenharmony_ci return ptr; 34328c2ecf20Sopenharmony_ci} 34338c2ecf20Sopenharmony_ci 34348c2ecf20Sopenharmony_cistatic void 34358c2ecf20Sopenharmony_cisetup_indirect_ctx_bb(const struct intel_context *ce, 34368c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine, 34378c2ecf20Sopenharmony_ci u32 *(*emit)(const struct intel_context *, u32 *)) 34388c2ecf20Sopenharmony_ci{ 34398c2ecf20Sopenharmony_ci u32 * const start = context_indirect_bb(ce); 34408c2ecf20Sopenharmony_ci u32 *cs; 34418c2ecf20Sopenharmony_ci 34428c2ecf20Sopenharmony_ci cs = emit(ce, start); 34438c2ecf20Sopenharmony_ci GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); 34448c2ecf20Sopenharmony_ci while ((unsigned long)cs % CACHELINE_BYTES) 34458c2ecf20Sopenharmony_ci *cs++ = MI_NOOP; 34468c2ecf20Sopenharmony_ci 34478c2ecf20Sopenharmony_ci lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine, 34488c2ecf20Sopenharmony_ci i915_ggtt_offset(ce->state) + 34498c2ecf20Sopenharmony_ci context_wa_bb_offset(ce), 34508c2ecf20Sopenharmony_ci (cs - start) * sizeof(*cs)); 34518c2ecf20Sopenharmony_ci} 34528c2ecf20Sopenharmony_ci 34538c2ecf20Sopenharmony_cistatic void 34548c2ecf20Sopenharmony_ci__execlists_update_reg_state(const struct intel_context *ce, 34558c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine, 34568c2ecf20Sopenharmony_ci u32 head) 34578c2ecf20Sopenharmony_ci{ 34588c2ecf20Sopenharmony_ci struct intel_ring *ring = ce->ring; 34598c2ecf20Sopenharmony_ci u32 *regs = ce->lrc_reg_state; 34608c2ecf20Sopenharmony_ci 34618c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); 34628c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); 34638c2ecf20Sopenharmony_ci 34648c2ecf20Sopenharmony_ci regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); 34658c2ecf20Sopenharmony_ci regs[CTX_RING_HEAD] = head; 34668c2ecf20Sopenharmony_ci regs[CTX_RING_TAIL] = ring->tail; 34678c2ecf20Sopenharmony_ci regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 34688c2ecf20Sopenharmony_ci 34698c2ecf20Sopenharmony_ci /* RPCS */ 34708c2ecf20Sopenharmony_ci if (engine->class == RENDER_CLASS) { 34718c2ecf20Sopenharmony_ci regs[CTX_R_PWR_CLK_STATE] = 34728c2ecf20Sopenharmony_ci intel_sseu_make_rpcs(engine->gt, &ce->sseu); 34738c2ecf20Sopenharmony_ci 34748c2ecf20Sopenharmony_ci i915_oa_init_reg_state(ce, engine); 34758c2ecf20Sopenharmony_ci } 34768c2ecf20Sopenharmony_ci 34778c2ecf20Sopenharmony_ci if (ce->wa_bb_page) { 34788c2ecf20Sopenharmony_ci u32 *(*fn)(const struct intel_context *ce, u32 *cs); 34798c2ecf20Sopenharmony_ci 34808c2ecf20Sopenharmony_ci fn = gen12_emit_indirect_ctx_xcs; 34818c2ecf20Sopenharmony_ci if (ce->engine->class == RENDER_CLASS) 34828c2ecf20Sopenharmony_ci fn = gen12_emit_indirect_ctx_rcs; 34838c2ecf20Sopenharmony_ci 34848c2ecf20Sopenharmony_ci /* Mutually exclusive wrt to global indirect bb */ 34858c2ecf20Sopenharmony_ci GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); 34868c2ecf20Sopenharmony_ci setup_indirect_ctx_bb(ce, engine, fn); 34878c2ecf20Sopenharmony_ci } 34888c2ecf20Sopenharmony_ci} 34898c2ecf20Sopenharmony_ci 34908c2ecf20Sopenharmony_cistatic int 34918c2ecf20Sopenharmony_ciexeclists_context_pre_pin(struct intel_context *ce, 34928c2ecf20Sopenharmony_ci struct i915_gem_ww_ctx *ww, void **vaddr) 34938c2ecf20Sopenharmony_ci{ 34948c2ecf20Sopenharmony_ci GEM_BUG_ON(!ce->state); 34958c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 34968c2ecf20Sopenharmony_ci 34978c2ecf20Sopenharmony_ci *vaddr = i915_gem_object_pin_map(ce->state->obj, 34988c2ecf20Sopenharmony_ci i915_coherent_map_type(ce->engine->i915) | 34998c2ecf20Sopenharmony_ci I915_MAP_OVERRIDE); 35008c2ecf20Sopenharmony_ci 35018c2ecf20Sopenharmony_ci return PTR_ERR_OR_ZERO(*vaddr); 35028c2ecf20Sopenharmony_ci} 35038c2ecf20Sopenharmony_ci 35048c2ecf20Sopenharmony_cistatic int 35058c2ecf20Sopenharmony_ci__execlists_context_pin(struct intel_context *ce, 35068c2ecf20Sopenharmony_ci struct intel_engine_cs *engine, 35078c2ecf20Sopenharmony_ci void *vaddr) 35088c2ecf20Sopenharmony_ci{ 35098c2ecf20Sopenharmony_ci ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; 35108c2ecf20Sopenharmony_ci ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; 35118c2ecf20Sopenharmony_ci __execlists_update_reg_state(ce, engine, ce->ring->tail); 35128c2ecf20Sopenharmony_ci 35138c2ecf20Sopenharmony_ci return 0; 35148c2ecf20Sopenharmony_ci} 35158c2ecf20Sopenharmony_ci 35168c2ecf20Sopenharmony_cistatic int execlists_context_pin(struct intel_context *ce, void *vaddr) 35178c2ecf20Sopenharmony_ci{ 35188c2ecf20Sopenharmony_ci return __execlists_context_pin(ce, ce->engine, vaddr); 35198c2ecf20Sopenharmony_ci} 35208c2ecf20Sopenharmony_ci 35218c2ecf20Sopenharmony_cistatic int execlists_context_alloc(struct intel_context *ce) 35228c2ecf20Sopenharmony_ci{ 35238c2ecf20Sopenharmony_ci return __execlists_context_alloc(ce, ce->engine); 35248c2ecf20Sopenharmony_ci} 35258c2ecf20Sopenharmony_ci 35268c2ecf20Sopenharmony_cistatic void execlists_context_reset(struct intel_context *ce) 35278c2ecf20Sopenharmony_ci{ 35288c2ecf20Sopenharmony_ci CE_TRACE(ce, "reset\n"); 35298c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_context_is_pinned(ce)); 35308c2ecf20Sopenharmony_ci 35318c2ecf20Sopenharmony_ci intel_ring_reset(ce->ring, ce->ring->emit); 35328c2ecf20Sopenharmony_ci 35338c2ecf20Sopenharmony_ci /* Scrub away the garbage */ 35348c2ecf20Sopenharmony_ci execlists_init_reg_state(ce->lrc_reg_state, 35358c2ecf20Sopenharmony_ci ce, ce->engine, ce->ring, true); 35368c2ecf20Sopenharmony_ci __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); 35378c2ecf20Sopenharmony_ci 35388c2ecf20Sopenharmony_ci ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; 35398c2ecf20Sopenharmony_ci} 35408c2ecf20Sopenharmony_ci 35418c2ecf20Sopenharmony_cistatic const struct intel_context_ops execlists_context_ops = { 35428c2ecf20Sopenharmony_ci .alloc = execlists_context_alloc, 35438c2ecf20Sopenharmony_ci 35448c2ecf20Sopenharmony_ci .pre_pin = execlists_context_pre_pin, 35458c2ecf20Sopenharmony_ci .pin = execlists_context_pin, 35468c2ecf20Sopenharmony_ci .unpin = execlists_context_unpin, 35478c2ecf20Sopenharmony_ci .post_unpin = execlists_context_post_unpin, 35488c2ecf20Sopenharmony_ci 35498c2ecf20Sopenharmony_ci .enter = intel_context_enter_engine, 35508c2ecf20Sopenharmony_ci .exit = intel_context_exit_engine, 35518c2ecf20Sopenharmony_ci 35528c2ecf20Sopenharmony_ci .reset = execlists_context_reset, 35538c2ecf20Sopenharmony_ci .destroy = execlists_context_destroy, 35548c2ecf20Sopenharmony_ci}; 35558c2ecf20Sopenharmony_ci 35568c2ecf20Sopenharmony_cistatic u32 hwsp_offset(const struct i915_request *rq) 35578c2ecf20Sopenharmony_ci{ 35588c2ecf20Sopenharmony_ci const struct intel_timeline_cacheline *cl; 35598c2ecf20Sopenharmony_ci 35608c2ecf20Sopenharmony_ci /* Before the request is executed, the timeline/cachline is fixed */ 35618c2ecf20Sopenharmony_ci 35628c2ecf20Sopenharmony_ci cl = rcu_dereference_protected(rq->hwsp_cacheline, 1); 35638c2ecf20Sopenharmony_ci if (cl) 35648c2ecf20Sopenharmony_ci return cl->ggtt_offset; 35658c2ecf20Sopenharmony_ci 35668c2ecf20Sopenharmony_ci return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset; 35678c2ecf20Sopenharmony_ci} 35688c2ecf20Sopenharmony_ci 35698c2ecf20Sopenharmony_cistatic int gen8_emit_init_breadcrumb(struct i915_request *rq) 35708c2ecf20Sopenharmony_ci{ 35718c2ecf20Sopenharmony_ci u32 *cs; 35728c2ecf20Sopenharmony_ci 35738c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); 35748c2ecf20Sopenharmony_ci if (!i915_request_timeline(rq)->has_initial_breadcrumb) 35758c2ecf20Sopenharmony_ci return 0; 35768c2ecf20Sopenharmony_ci 35778c2ecf20Sopenharmony_ci cs = intel_ring_begin(rq, 6); 35788c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 35798c2ecf20Sopenharmony_ci return PTR_ERR(cs); 35808c2ecf20Sopenharmony_ci 35818c2ecf20Sopenharmony_ci /* 35828c2ecf20Sopenharmony_ci * Check if we have been preempted before we even get started. 35838c2ecf20Sopenharmony_ci * 35848c2ecf20Sopenharmony_ci * After this point i915_request_started() reports true, even if 35858c2ecf20Sopenharmony_ci * we get preempted and so are no longer running. 35868c2ecf20Sopenharmony_ci */ 35878c2ecf20Sopenharmony_ci *cs++ = MI_ARB_CHECK; 35888c2ecf20Sopenharmony_ci *cs++ = MI_NOOP; 35898c2ecf20Sopenharmony_ci 35908c2ecf20Sopenharmony_ci *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 35918c2ecf20Sopenharmony_ci *cs++ = hwsp_offset(rq); 35928c2ecf20Sopenharmony_ci *cs++ = 0; 35938c2ecf20Sopenharmony_ci *cs++ = rq->fence.seqno - 1; 35948c2ecf20Sopenharmony_ci 35958c2ecf20Sopenharmony_ci intel_ring_advance(rq, cs); 35968c2ecf20Sopenharmony_ci 35978c2ecf20Sopenharmony_ci /* Record the updated position of the request's payload */ 35988c2ecf20Sopenharmony_ci rq->infix = intel_ring_offset(rq, cs); 35998c2ecf20Sopenharmony_ci 36008c2ecf20Sopenharmony_ci __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); 36018c2ecf20Sopenharmony_ci 36028c2ecf20Sopenharmony_ci return 0; 36038c2ecf20Sopenharmony_ci} 36048c2ecf20Sopenharmony_ci 36058c2ecf20Sopenharmony_cistatic int emit_pdps(struct i915_request *rq) 36068c2ecf20Sopenharmony_ci{ 36078c2ecf20Sopenharmony_ci const struct intel_engine_cs * const engine = rq->engine; 36088c2ecf20Sopenharmony_ci struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); 36098c2ecf20Sopenharmony_ci int err, i; 36108c2ecf20Sopenharmony_ci u32 *cs; 36118c2ecf20Sopenharmony_ci 36128c2ecf20Sopenharmony_ci GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); 36138c2ecf20Sopenharmony_ci 36148c2ecf20Sopenharmony_ci /* 36158c2ecf20Sopenharmony_ci * Beware ye of the dragons, this sequence is magic! 36168c2ecf20Sopenharmony_ci * 36178c2ecf20Sopenharmony_ci * Small changes to this sequence can cause anything from 36188c2ecf20Sopenharmony_ci * GPU hangs to forcewake errors and machine lockups! 36198c2ecf20Sopenharmony_ci */ 36208c2ecf20Sopenharmony_ci 36218c2ecf20Sopenharmony_ci /* Flush any residual operations from the context load */ 36228c2ecf20Sopenharmony_ci err = engine->emit_flush(rq, EMIT_FLUSH); 36238c2ecf20Sopenharmony_ci if (err) 36248c2ecf20Sopenharmony_ci return err; 36258c2ecf20Sopenharmony_ci 36268c2ecf20Sopenharmony_ci /* Magic required to prevent forcewake errors! */ 36278c2ecf20Sopenharmony_ci err = engine->emit_flush(rq, EMIT_INVALIDATE); 36288c2ecf20Sopenharmony_ci if (err) 36298c2ecf20Sopenharmony_ci return err; 36308c2ecf20Sopenharmony_ci 36318c2ecf20Sopenharmony_ci cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); 36328c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 36338c2ecf20Sopenharmony_ci return PTR_ERR(cs); 36348c2ecf20Sopenharmony_ci 36358c2ecf20Sopenharmony_ci /* Ensure the LRI have landed before we invalidate & continue */ 36368c2ecf20Sopenharmony_ci *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; 36378c2ecf20Sopenharmony_ci for (i = GEN8_3LVL_PDPES; i--; ) { 36388c2ecf20Sopenharmony_ci const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 36398c2ecf20Sopenharmony_ci u32 base = engine->mmio_base; 36408c2ecf20Sopenharmony_ci 36418c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); 36428c2ecf20Sopenharmony_ci *cs++ = upper_32_bits(pd_daddr); 36438c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); 36448c2ecf20Sopenharmony_ci *cs++ = lower_32_bits(pd_daddr); 36458c2ecf20Sopenharmony_ci } 36468c2ecf20Sopenharmony_ci *cs++ = MI_NOOP; 36478c2ecf20Sopenharmony_ci 36488c2ecf20Sopenharmony_ci intel_ring_advance(rq, cs); 36498c2ecf20Sopenharmony_ci 36508c2ecf20Sopenharmony_ci return 0; 36518c2ecf20Sopenharmony_ci} 36528c2ecf20Sopenharmony_ci 36538c2ecf20Sopenharmony_cistatic int execlists_request_alloc(struct i915_request *request) 36548c2ecf20Sopenharmony_ci{ 36558c2ecf20Sopenharmony_ci int ret; 36568c2ecf20Sopenharmony_ci 36578c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_context_is_pinned(request->context)); 36588c2ecf20Sopenharmony_ci 36598c2ecf20Sopenharmony_ci /* 36608c2ecf20Sopenharmony_ci * Flush enough space to reduce the likelihood of waiting after 36618c2ecf20Sopenharmony_ci * we start building the request - in which case we will just 36628c2ecf20Sopenharmony_ci * have to repeat work. 36638c2ecf20Sopenharmony_ci */ 36648c2ecf20Sopenharmony_ci request->reserved_space += EXECLISTS_REQUEST_SIZE; 36658c2ecf20Sopenharmony_ci 36668c2ecf20Sopenharmony_ci /* 36678c2ecf20Sopenharmony_ci * Note that after this point, we have committed to using 36688c2ecf20Sopenharmony_ci * this request as it is being used to both track the 36698c2ecf20Sopenharmony_ci * state of engine initialisation and liveness of the 36708c2ecf20Sopenharmony_ci * golden renderstate above. Think twice before you try 36718c2ecf20Sopenharmony_ci * to cancel/unwind this request now. 36728c2ecf20Sopenharmony_ci */ 36738c2ecf20Sopenharmony_ci 36748c2ecf20Sopenharmony_ci if (!i915_vm_is_4lvl(request->context->vm)) { 36758c2ecf20Sopenharmony_ci ret = emit_pdps(request); 36768c2ecf20Sopenharmony_ci if (ret) 36778c2ecf20Sopenharmony_ci return ret; 36788c2ecf20Sopenharmony_ci } 36798c2ecf20Sopenharmony_ci 36808c2ecf20Sopenharmony_ci /* Unconditionally invalidate GPU caches and TLBs. */ 36818c2ecf20Sopenharmony_ci ret = request->engine->emit_flush(request, EMIT_INVALIDATE); 36828c2ecf20Sopenharmony_ci if (ret) 36838c2ecf20Sopenharmony_ci return ret; 36848c2ecf20Sopenharmony_ci 36858c2ecf20Sopenharmony_ci request->reserved_space -= EXECLISTS_REQUEST_SIZE; 36868c2ecf20Sopenharmony_ci return 0; 36878c2ecf20Sopenharmony_ci} 36888c2ecf20Sopenharmony_ci 36898c2ecf20Sopenharmony_ci/* 36908c2ecf20Sopenharmony_ci * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after 36918c2ecf20Sopenharmony_ci * PIPE_CONTROL instruction. This is required for the flush to happen correctly 36928c2ecf20Sopenharmony_ci * but there is a slight complication as this is applied in WA batch where the 36938c2ecf20Sopenharmony_ci * values are only initialized once so we cannot take register value at the 36948c2ecf20Sopenharmony_ci * beginning and reuse it further; hence we save its value to memory, upload a 36958c2ecf20Sopenharmony_ci * constant value with bit21 set and then we restore it back with the saved value. 36968c2ecf20Sopenharmony_ci * To simplify the WA, a constant value is formed by using the default value 36978c2ecf20Sopenharmony_ci * of this register. This shouldn't be a problem because we are only modifying 36988c2ecf20Sopenharmony_ci * it for a short period and this batch in non-premptible. We can ofcourse 36998c2ecf20Sopenharmony_ci * use additional instructions that read the actual value of the register 37008c2ecf20Sopenharmony_ci * at that time and set our bit of interest but it makes the WA complicated. 37018c2ecf20Sopenharmony_ci * 37028c2ecf20Sopenharmony_ci * This WA is also required for Gen9 so extracting as a function avoids 37038c2ecf20Sopenharmony_ci * code duplication. 37048c2ecf20Sopenharmony_ci */ 37058c2ecf20Sopenharmony_cistatic u32 * 37068c2ecf20Sopenharmony_cigen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) 37078c2ecf20Sopenharmony_ci{ 37088c2ecf20Sopenharmony_ci /* NB no one else is allowed to scribble over scratch + 256! */ 37098c2ecf20Sopenharmony_ci *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 37108c2ecf20Sopenharmony_ci *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 37118c2ecf20Sopenharmony_ci *batch++ = intel_gt_scratch_offset(engine->gt, 37128c2ecf20Sopenharmony_ci INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 37138c2ecf20Sopenharmony_ci *batch++ = 0; 37148c2ecf20Sopenharmony_ci 37158c2ecf20Sopenharmony_ci *batch++ = MI_LOAD_REGISTER_IMM(1); 37168c2ecf20Sopenharmony_ci *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 37178c2ecf20Sopenharmony_ci *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; 37188c2ecf20Sopenharmony_ci 37198c2ecf20Sopenharmony_ci batch = gen8_emit_pipe_control(batch, 37208c2ecf20Sopenharmony_ci PIPE_CONTROL_CS_STALL | 37218c2ecf20Sopenharmony_ci PIPE_CONTROL_DC_FLUSH_ENABLE, 37228c2ecf20Sopenharmony_ci 0); 37238c2ecf20Sopenharmony_ci 37248c2ecf20Sopenharmony_ci *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; 37258c2ecf20Sopenharmony_ci *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); 37268c2ecf20Sopenharmony_ci *batch++ = intel_gt_scratch_offset(engine->gt, 37278c2ecf20Sopenharmony_ci INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); 37288c2ecf20Sopenharmony_ci *batch++ = 0; 37298c2ecf20Sopenharmony_ci 37308c2ecf20Sopenharmony_ci return batch; 37318c2ecf20Sopenharmony_ci} 37328c2ecf20Sopenharmony_ci 37338c2ecf20Sopenharmony_ci/* 37348c2ecf20Sopenharmony_ci * Typically we only have one indirect_ctx and per_ctx batch buffer which are 37358c2ecf20Sopenharmony_ci * initialized at the beginning and shared across all contexts but this field 37368c2ecf20Sopenharmony_ci * helps us to have multiple batches at different offsets and select them based 37378c2ecf20Sopenharmony_ci * on a criteria. At the moment this batch always start at the beginning of the page 37388c2ecf20Sopenharmony_ci * and at this point we don't have multiple wa_ctx batch buffers. 37398c2ecf20Sopenharmony_ci * 37408c2ecf20Sopenharmony_ci * The number of WA applied are not known at the beginning; we use this field 37418c2ecf20Sopenharmony_ci * to return the no of DWORDS written. 37428c2ecf20Sopenharmony_ci * 37438c2ecf20Sopenharmony_ci * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END 37448c2ecf20Sopenharmony_ci * so it adds NOOPs as padding to make it cacheline aligned. 37458c2ecf20Sopenharmony_ci * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together 37468c2ecf20Sopenharmony_ci * makes a complete batch buffer. 37478c2ecf20Sopenharmony_ci */ 37488c2ecf20Sopenharmony_cistatic u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 37498c2ecf20Sopenharmony_ci{ 37508c2ecf20Sopenharmony_ci /* WaDisableCtxRestoreArbitration:bdw,chv */ 37518c2ecf20Sopenharmony_ci *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 37528c2ecf20Sopenharmony_ci 37538c2ecf20Sopenharmony_ci /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ 37548c2ecf20Sopenharmony_ci if (IS_BROADWELL(engine->i915)) 37558c2ecf20Sopenharmony_ci batch = gen8_emit_flush_coherentl3_wa(engine, batch); 37568c2ecf20Sopenharmony_ci 37578c2ecf20Sopenharmony_ci /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ 37588c2ecf20Sopenharmony_ci /* Actual scratch location is at 128 bytes offset */ 37598c2ecf20Sopenharmony_ci batch = gen8_emit_pipe_control(batch, 37608c2ecf20Sopenharmony_ci PIPE_CONTROL_FLUSH_L3 | 37618c2ecf20Sopenharmony_ci PIPE_CONTROL_STORE_DATA_INDEX | 37628c2ecf20Sopenharmony_ci PIPE_CONTROL_CS_STALL | 37638c2ecf20Sopenharmony_ci PIPE_CONTROL_QW_WRITE, 37648c2ecf20Sopenharmony_ci LRC_PPHWSP_SCRATCH_ADDR); 37658c2ecf20Sopenharmony_ci 37668c2ecf20Sopenharmony_ci *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 37678c2ecf20Sopenharmony_ci 37688c2ecf20Sopenharmony_ci /* Pad to end of cacheline */ 37698c2ecf20Sopenharmony_ci while ((unsigned long)batch % CACHELINE_BYTES) 37708c2ecf20Sopenharmony_ci *batch++ = MI_NOOP; 37718c2ecf20Sopenharmony_ci 37728c2ecf20Sopenharmony_ci /* 37738c2ecf20Sopenharmony_ci * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because 37748c2ecf20Sopenharmony_ci * execution depends on the length specified in terms of cache lines 37758c2ecf20Sopenharmony_ci * in the register CTX_RCS_INDIRECT_CTX 37768c2ecf20Sopenharmony_ci */ 37778c2ecf20Sopenharmony_ci 37788c2ecf20Sopenharmony_ci return batch; 37798c2ecf20Sopenharmony_ci} 37808c2ecf20Sopenharmony_ci 37818c2ecf20Sopenharmony_cistruct lri { 37828c2ecf20Sopenharmony_ci i915_reg_t reg; 37838c2ecf20Sopenharmony_ci u32 value; 37848c2ecf20Sopenharmony_ci}; 37858c2ecf20Sopenharmony_ci 37868c2ecf20Sopenharmony_cistatic u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count) 37878c2ecf20Sopenharmony_ci{ 37888c2ecf20Sopenharmony_ci GEM_BUG_ON(!count || count > 63); 37898c2ecf20Sopenharmony_ci 37908c2ecf20Sopenharmony_ci *batch++ = MI_LOAD_REGISTER_IMM(count); 37918c2ecf20Sopenharmony_ci do { 37928c2ecf20Sopenharmony_ci *batch++ = i915_mmio_reg_offset(lri->reg); 37938c2ecf20Sopenharmony_ci *batch++ = lri->value; 37948c2ecf20Sopenharmony_ci } while (lri++, --count); 37958c2ecf20Sopenharmony_ci *batch++ = MI_NOOP; 37968c2ecf20Sopenharmony_ci 37978c2ecf20Sopenharmony_ci return batch; 37988c2ecf20Sopenharmony_ci} 37998c2ecf20Sopenharmony_ci 38008c2ecf20Sopenharmony_cistatic u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 38018c2ecf20Sopenharmony_ci{ 38028c2ecf20Sopenharmony_ci static const struct lri lri[] = { 38038c2ecf20Sopenharmony_ci /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ 38048c2ecf20Sopenharmony_ci { 38058c2ecf20Sopenharmony_ci COMMON_SLICE_CHICKEN2, 38068c2ecf20Sopenharmony_ci __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE, 38078c2ecf20Sopenharmony_ci 0), 38088c2ecf20Sopenharmony_ci }, 38098c2ecf20Sopenharmony_ci 38108c2ecf20Sopenharmony_ci /* BSpec: 11391 */ 38118c2ecf20Sopenharmony_ci { 38128c2ecf20Sopenharmony_ci FF_SLICE_CHICKEN, 38138c2ecf20Sopenharmony_ci __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX, 38148c2ecf20Sopenharmony_ci FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX), 38158c2ecf20Sopenharmony_ci }, 38168c2ecf20Sopenharmony_ci 38178c2ecf20Sopenharmony_ci /* BSpec: 11299 */ 38188c2ecf20Sopenharmony_ci { 38198c2ecf20Sopenharmony_ci _3D_CHICKEN3, 38208c2ecf20Sopenharmony_ci __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX, 38218c2ecf20Sopenharmony_ci _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX), 38228c2ecf20Sopenharmony_ci } 38238c2ecf20Sopenharmony_ci }; 38248c2ecf20Sopenharmony_ci 38258c2ecf20Sopenharmony_ci *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 38268c2ecf20Sopenharmony_ci 38278c2ecf20Sopenharmony_ci /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ 38288c2ecf20Sopenharmony_ci batch = gen8_emit_flush_coherentl3_wa(engine, batch); 38298c2ecf20Sopenharmony_ci 38308c2ecf20Sopenharmony_ci /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ 38318c2ecf20Sopenharmony_ci batch = gen8_emit_pipe_control(batch, 38328c2ecf20Sopenharmony_ci PIPE_CONTROL_FLUSH_L3 | 38338c2ecf20Sopenharmony_ci PIPE_CONTROL_STORE_DATA_INDEX | 38348c2ecf20Sopenharmony_ci PIPE_CONTROL_CS_STALL | 38358c2ecf20Sopenharmony_ci PIPE_CONTROL_QW_WRITE, 38368c2ecf20Sopenharmony_ci LRC_PPHWSP_SCRATCH_ADDR); 38378c2ecf20Sopenharmony_ci 38388c2ecf20Sopenharmony_ci batch = emit_lri(batch, lri, ARRAY_SIZE(lri)); 38398c2ecf20Sopenharmony_ci 38408c2ecf20Sopenharmony_ci /* WaMediaPoolStateCmdInWABB:bxt,glk */ 38418c2ecf20Sopenharmony_ci if (HAS_POOLED_EU(engine->i915)) { 38428c2ecf20Sopenharmony_ci /* 38438c2ecf20Sopenharmony_ci * EU pool configuration is setup along with golden context 38448c2ecf20Sopenharmony_ci * during context initialization. This value depends on 38458c2ecf20Sopenharmony_ci * device type (2x6 or 3x6) and needs to be updated based 38468c2ecf20Sopenharmony_ci * on which subslice is disabled especially for 2x6 38478c2ecf20Sopenharmony_ci * devices, however it is safe to load default 38488c2ecf20Sopenharmony_ci * configuration of 3x6 device instead of masking off 38498c2ecf20Sopenharmony_ci * corresponding bits because HW ignores bits of a disabled 38508c2ecf20Sopenharmony_ci * subslice and drops down to appropriate config. Please 38518c2ecf20Sopenharmony_ci * see render_state_setup() in i915_gem_render_state.c for 38528c2ecf20Sopenharmony_ci * possible configurations, to avoid duplication they are 38538c2ecf20Sopenharmony_ci * not shown here again. 38548c2ecf20Sopenharmony_ci */ 38558c2ecf20Sopenharmony_ci *batch++ = GEN9_MEDIA_POOL_STATE; 38568c2ecf20Sopenharmony_ci *batch++ = GEN9_MEDIA_POOL_ENABLE; 38578c2ecf20Sopenharmony_ci *batch++ = 0x00777000; 38588c2ecf20Sopenharmony_ci *batch++ = 0; 38598c2ecf20Sopenharmony_ci *batch++ = 0; 38608c2ecf20Sopenharmony_ci *batch++ = 0; 38618c2ecf20Sopenharmony_ci } 38628c2ecf20Sopenharmony_ci 38638c2ecf20Sopenharmony_ci *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 38648c2ecf20Sopenharmony_ci 38658c2ecf20Sopenharmony_ci /* Pad to end of cacheline */ 38668c2ecf20Sopenharmony_ci while ((unsigned long)batch % CACHELINE_BYTES) 38678c2ecf20Sopenharmony_ci *batch++ = MI_NOOP; 38688c2ecf20Sopenharmony_ci 38698c2ecf20Sopenharmony_ci return batch; 38708c2ecf20Sopenharmony_ci} 38718c2ecf20Sopenharmony_ci 38728c2ecf20Sopenharmony_cistatic u32 * 38738c2ecf20Sopenharmony_cigen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) 38748c2ecf20Sopenharmony_ci{ 38758c2ecf20Sopenharmony_ci int i; 38768c2ecf20Sopenharmony_ci 38778c2ecf20Sopenharmony_ci /* 38788c2ecf20Sopenharmony_ci * WaPipeControlBefore3DStateSamplePattern: cnl 38798c2ecf20Sopenharmony_ci * 38808c2ecf20Sopenharmony_ci * Ensure the engine is idle prior to programming a 38818c2ecf20Sopenharmony_ci * 3DSTATE_SAMPLE_PATTERN during a context restore. 38828c2ecf20Sopenharmony_ci */ 38838c2ecf20Sopenharmony_ci batch = gen8_emit_pipe_control(batch, 38848c2ecf20Sopenharmony_ci PIPE_CONTROL_CS_STALL, 38858c2ecf20Sopenharmony_ci 0); 38868c2ecf20Sopenharmony_ci /* 38878c2ecf20Sopenharmony_ci * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for 38888c2ecf20Sopenharmony_ci * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in 38898c2ecf20Sopenharmony_ci * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is 38908c2ecf20Sopenharmony_ci * confusing. Since gen8_emit_pipe_control() already advances the 38918c2ecf20Sopenharmony_ci * batch by 6 dwords, we advance the other 10 here, completing a 38928c2ecf20Sopenharmony_ci * cacheline. It's not clear if the workaround requires this padding 38938c2ecf20Sopenharmony_ci * before other commands, or if it's just the regular padding we would 38948c2ecf20Sopenharmony_ci * already have for the workaround bb, so leave it here for now. 38958c2ecf20Sopenharmony_ci */ 38968c2ecf20Sopenharmony_ci for (i = 0; i < 10; i++) 38978c2ecf20Sopenharmony_ci *batch++ = MI_NOOP; 38988c2ecf20Sopenharmony_ci 38998c2ecf20Sopenharmony_ci /* Pad to end of cacheline */ 39008c2ecf20Sopenharmony_ci while ((unsigned long)batch % CACHELINE_BYTES) 39018c2ecf20Sopenharmony_ci *batch++ = MI_NOOP; 39028c2ecf20Sopenharmony_ci 39038c2ecf20Sopenharmony_ci return batch; 39048c2ecf20Sopenharmony_ci} 39058c2ecf20Sopenharmony_ci 39068c2ecf20Sopenharmony_ci#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) 39078c2ecf20Sopenharmony_ci 39088c2ecf20Sopenharmony_cistatic int lrc_setup_wa_ctx(struct intel_engine_cs *engine) 39098c2ecf20Sopenharmony_ci{ 39108c2ecf20Sopenharmony_ci struct drm_i915_gem_object *obj; 39118c2ecf20Sopenharmony_ci struct i915_vma *vma; 39128c2ecf20Sopenharmony_ci int err; 39138c2ecf20Sopenharmony_ci 39148c2ecf20Sopenharmony_ci obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE); 39158c2ecf20Sopenharmony_ci if (IS_ERR(obj)) 39168c2ecf20Sopenharmony_ci return PTR_ERR(obj); 39178c2ecf20Sopenharmony_ci 39188c2ecf20Sopenharmony_ci vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 39198c2ecf20Sopenharmony_ci if (IS_ERR(vma)) { 39208c2ecf20Sopenharmony_ci err = PTR_ERR(vma); 39218c2ecf20Sopenharmony_ci goto err; 39228c2ecf20Sopenharmony_ci } 39238c2ecf20Sopenharmony_ci 39248c2ecf20Sopenharmony_ci err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); 39258c2ecf20Sopenharmony_ci if (err) 39268c2ecf20Sopenharmony_ci goto err; 39278c2ecf20Sopenharmony_ci 39288c2ecf20Sopenharmony_ci engine->wa_ctx.vma = vma; 39298c2ecf20Sopenharmony_ci return 0; 39308c2ecf20Sopenharmony_ci 39318c2ecf20Sopenharmony_cierr: 39328c2ecf20Sopenharmony_ci i915_gem_object_put(obj); 39338c2ecf20Sopenharmony_ci return err; 39348c2ecf20Sopenharmony_ci} 39358c2ecf20Sopenharmony_ci 39368c2ecf20Sopenharmony_cistatic void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) 39378c2ecf20Sopenharmony_ci{ 39388c2ecf20Sopenharmony_ci i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); 39398c2ecf20Sopenharmony_ci 39408c2ecf20Sopenharmony_ci /* Called on error unwind, clear all flags to prevent further use */ 39418c2ecf20Sopenharmony_ci memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx)); 39428c2ecf20Sopenharmony_ci} 39438c2ecf20Sopenharmony_ci 39448c2ecf20Sopenharmony_citypedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); 39458c2ecf20Sopenharmony_ci 39468c2ecf20Sopenharmony_cistatic int intel_init_workaround_bb(struct intel_engine_cs *engine) 39478c2ecf20Sopenharmony_ci{ 39488c2ecf20Sopenharmony_ci struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; 39498c2ecf20Sopenharmony_ci struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, 39508c2ecf20Sopenharmony_ci &wa_ctx->per_ctx }; 39518c2ecf20Sopenharmony_ci wa_bb_func_t wa_bb_fn[2]; 39528c2ecf20Sopenharmony_ci void *batch, *batch_ptr; 39538c2ecf20Sopenharmony_ci unsigned int i; 39548c2ecf20Sopenharmony_ci int ret; 39558c2ecf20Sopenharmony_ci 39568c2ecf20Sopenharmony_ci if (engine->class != RENDER_CLASS) 39578c2ecf20Sopenharmony_ci return 0; 39588c2ecf20Sopenharmony_ci 39598c2ecf20Sopenharmony_ci switch (INTEL_GEN(engine->i915)) { 39608c2ecf20Sopenharmony_ci case 12: 39618c2ecf20Sopenharmony_ci case 11: 39628c2ecf20Sopenharmony_ci return 0; 39638c2ecf20Sopenharmony_ci case 10: 39648c2ecf20Sopenharmony_ci wa_bb_fn[0] = gen10_init_indirectctx_bb; 39658c2ecf20Sopenharmony_ci wa_bb_fn[1] = NULL; 39668c2ecf20Sopenharmony_ci break; 39678c2ecf20Sopenharmony_ci case 9: 39688c2ecf20Sopenharmony_ci wa_bb_fn[0] = gen9_init_indirectctx_bb; 39698c2ecf20Sopenharmony_ci wa_bb_fn[1] = NULL; 39708c2ecf20Sopenharmony_ci break; 39718c2ecf20Sopenharmony_ci case 8: 39728c2ecf20Sopenharmony_ci wa_bb_fn[0] = gen8_init_indirectctx_bb; 39738c2ecf20Sopenharmony_ci wa_bb_fn[1] = NULL; 39748c2ecf20Sopenharmony_ci break; 39758c2ecf20Sopenharmony_ci default: 39768c2ecf20Sopenharmony_ci MISSING_CASE(INTEL_GEN(engine->i915)); 39778c2ecf20Sopenharmony_ci return 0; 39788c2ecf20Sopenharmony_ci } 39798c2ecf20Sopenharmony_ci 39808c2ecf20Sopenharmony_ci ret = lrc_setup_wa_ctx(engine); 39818c2ecf20Sopenharmony_ci if (ret) { 39828c2ecf20Sopenharmony_ci drm_dbg(&engine->i915->drm, 39838c2ecf20Sopenharmony_ci "Failed to setup context WA page: %d\n", ret); 39848c2ecf20Sopenharmony_ci return ret; 39858c2ecf20Sopenharmony_ci } 39868c2ecf20Sopenharmony_ci 39878c2ecf20Sopenharmony_ci batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); 39888c2ecf20Sopenharmony_ci 39898c2ecf20Sopenharmony_ci /* 39908c2ecf20Sopenharmony_ci * Emit the two workaround batch buffers, recording the offset from the 39918c2ecf20Sopenharmony_ci * start of the workaround batch buffer object for each and their 39928c2ecf20Sopenharmony_ci * respective sizes. 39938c2ecf20Sopenharmony_ci */ 39948c2ecf20Sopenharmony_ci batch_ptr = batch; 39958c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { 39968c2ecf20Sopenharmony_ci wa_bb[i]->offset = batch_ptr - batch; 39978c2ecf20Sopenharmony_ci if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, 39988c2ecf20Sopenharmony_ci CACHELINE_BYTES))) { 39998c2ecf20Sopenharmony_ci ret = -EINVAL; 40008c2ecf20Sopenharmony_ci break; 40018c2ecf20Sopenharmony_ci } 40028c2ecf20Sopenharmony_ci if (wa_bb_fn[i]) 40038c2ecf20Sopenharmony_ci batch_ptr = wa_bb_fn[i](engine, batch_ptr); 40048c2ecf20Sopenharmony_ci wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); 40058c2ecf20Sopenharmony_ci } 40068c2ecf20Sopenharmony_ci GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); 40078c2ecf20Sopenharmony_ci 40088c2ecf20Sopenharmony_ci __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); 40098c2ecf20Sopenharmony_ci __i915_gem_object_release_map(wa_ctx->vma->obj); 40108c2ecf20Sopenharmony_ci if (ret) 40118c2ecf20Sopenharmony_ci lrc_destroy_wa_ctx(engine); 40128c2ecf20Sopenharmony_ci 40138c2ecf20Sopenharmony_ci return ret; 40148c2ecf20Sopenharmony_ci} 40158c2ecf20Sopenharmony_ci 40168c2ecf20Sopenharmony_cistatic void reset_csb_pointers(struct intel_engine_cs *engine) 40178c2ecf20Sopenharmony_ci{ 40188c2ecf20Sopenharmony_ci struct intel_engine_execlists * const execlists = &engine->execlists; 40198c2ecf20Sopenharmony_ci const unsigned int reset_value = execlists->csb_size - 1; 40208c2ecf20Sopenharmony_ci 40218c2ecf20Sopenharmony_ci ring_set_paused(engine, 0); 40228c2ecf20Sopenharmony_ci 40238c2ecf20Sopenharmony_ci /* 40248c2ecf20Sopenharmony_ci * Sometimes Icelake forgets to reset its pointers on a GPU reset. 40258c2ecf20Sopenharmony_ci * Bludgeon them with a mmio update to be sure. 40268c2ecf20Sopenharmony_ci */ 40278c2ecf20Sopenharmony_ci ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 40288c2ecf20Sopenharmony_ci 0xffff << 16 | reset_value << 8 | reset_value); 40298c2ecf20Sopenharmony_ci ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 40308c2ecf20Sopenharmony_ci 40318c2ecf20Sopenharmony_ci /* 40328c2ecf20Sopenharmony_ci * After a reset, the HW starts writing into CSB entry [0]. We 40338c2ecf20Sopenharmony_ci * therefore have to set our HEAD pointer back one entry so that 40348c2ecf20Sopenharmony_ci * the *first* entry we check is entry 0. To complicate this further, 40358c2ecf20Sopenharmony_ci * as we don't wait for the first interrupt after reset, we have to 40368c2ecf20Sopenharmony_ci * fake the HW write to point back to the last entry so that our 40378c2ecf20Sopenharmony_ci * inline comparison of our cached head position against the last HW 40388c2ecf20Sopenharmony_ci * write works even before the first interrupt. 40398c2ecf20Sopenharmony_ci */ 40408c2ecf20Sopenharmony_ci execlists->csb_head = reset_value; 40418c2ecf20Sopenharmony_ci WRITE_ONCE(*execlists->csb_write, reset_value); 40428c2ecf20Sopenharmony_ci wmb(); /* Make sure this is visible to HW (paranoia?) */ 40438c2ecf20Sopenharmony_ci 40448c2ecf20Sopenharmony_ci /* Check that the GPU does indeed update the CSB entries! */ 40458c2ecf20Sopenharmony_ci memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); 40468c2ecf20Sopenharmony_ci invalidate_csb_entries(&execlists->csb_status[0], 40478c2ecf20Sopenharmony_ci &execlists->csb_status[reset_value]); 40488c2ecf20Sopenharmony_ci 40498c2ecf20Sopenharmony_ci /* Once more for luck and our trusty paranoia */ 40508c2ecf20Sopenharmony_ci ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, 40518c2ecf20Sopenharmony_ci 0xffff << 16 | reset_value << 8 | reset_value); 40528c2ecf20Sopenharmony_ci ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); 40538c2ecf20Sopenharmony_ci 40548c2ecf20Sopenharmony_ci GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); 40558c2ecf20Sopenharmony_ci} 40568c2ecf20Sopenharmony_ci 40578c2ecf20Sopenharmony_cistatic void execlists_sanitize(struct intel_engine_cs *engine) 40588c2ecf20Sopenharmony_ci{ 40598c2ecf20Sopenharmony_ci /* 40608c2ecf20Sopenharmony_ci * Poison residual state on resume, in case the suspend didn't! 40618c2ecf20Sopenharmony_ci * 40628c2ecf20Sopenharmony_ci * We have to assume that across suspend/resume (or other loss 40638c2ecf20Sopenharmony_ci * of control) that the contents of our pinned buffers has been 40648c2ecf20Sopenharmony_ci * lost, replaced by garbage. Since this doesn't always happen, 40658c2ecf20Sopenharmony_ci * let's poison such state so that we more quickly spot when 40668c2ecf20Sopenharmony_ci * we falsely assume it has been preserved. 40678c2ecf20Sopenharmony_ci */ 40688c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 40698c2ecf20Sopenharmony_ci memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 40708c2ecf20Sopenharmony_ci 40718c2ecf20Sopenharmony_ci reset_csb_pointers(engine); 40728c2ecf20Sopenharmony_ci 40738c2ecf20Sopenharmony_ci /* 40748c2ecf20Sopenharmony_ci * The kernel_context HWSP is stored in the status_page. As above, 40758c2ecf20Sopenharmony_ci * that may be lost on resume/initialisation, and so we need to 40768c2ecf20Sopenharmony_ci * reset the value in the HWSP. 40778c2ecf20Sopenharmony_ci */ 40788c2ecf20Sopenharmony_ci intel_timeline_reset_seqno(engine->kernel_context->timeline); 40798c2ecf20Sopenharmony_ci 40808c2ecf20Sopenharmony_ci /* And scrub the dirty cachelines for the HWSP */ 40818c2ecf20Sopenharmony_ci clflush_cache_range(engine->status_page.addr, PAGE_SIZE); 40828c2ecf20Sopenharmony_ci} 40838c2ecf20Sopenharmony_ci 40848c2ecf20Sopenharmony_cistatic void enable_error_interrupt(struct intel_engine_cs *engine) 40858c2ecf20Sopenharmony_ci{ 40868c2ecf20Sopenharmony_ci u32 status; 40878c2ecf20Sopenharmony_ci 40888c2ecf20Sopenharmony_ci engine->execlists.error_interrupt = 0; 40898c2ecf20Sopenharmony_ci ENGINE_WRITE(engine, RING_EMR, ~0u); 40908c2ecf20Sopenharmony_ci ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ 40918c2ecf20Sopenharmony_ci 40928c2ecf20Sopenharmony_ci status = ENGINE_READ(engine, RING_ESR); 40938c2ecf20Sopenharmony_ci if (unlikely(status)) { 40948c2ecf20Sopenharmony_ci drm_err(&engine->i915->drm, 40958c2ecf20Sopenharmony_ci "engine '%s' resumed still in error: %08x\n", 40968c2ecf20Sopenharmony_ci engine->name, status); 40978c2ecf20Sopenharmony_ci __intel_gt_reset(engine->gt, engine->mask); 40988c2ecf20Sopenharmony_ci } 40998c2ecf20Sopenharmony_ci 41008c2ecf20Sopenharmony_ci /* 41018c2ecf20Sopenharmony_ci * On current gen8+, we have 2 signals to play with 41028c2ecf20Sopenharmony_ci * 41038c2ecf20Sopenharmony_ci * - I915_ERROR_INSTUCTION (bit 0) 41048c2ecf20Sopenharmony_ci * 41058c2ecf20Sopenharmony_ci * Generate an error if the command parser encounters an invalid 41068c2ecf20Sopenharmony_ci * instruction 41078c2ecf20Sopenharmony_ci * 41088c2ecf20Sopenharmony_ci * This is a fatal error. 41098c2ecf20Sopenharmony_ci * 41108c2ecf20Sopenharmony_ci * - CP_PRIV (bit 2) 41118c2ecf20Sopenharmony_ci * 41128c2ecf20Sopenharmony_ci * Generate an error on privilege violation (where the CP replaces 41138c2ecf20Sopenharmony_ci * the instruction with a no-op). This also fires for writes into 41148c2ecf20Sopenharmony_ci * read-only scratch pages. 41158c2ecf20Sopenharmony_ci * 41168c2ecf20Sopenharmony_ci * This is a non-fatal error, parsing continues. 41178c2ecf20Sopenharmony_ci * 41188c2ecf20Sopenharmony_ci * * there are a few others defined for odd HW that we do not use 41198c2ecf20Sopenharmony_ci * 41208c2ecf20Sopenharmony_ci * Since CP_PRIV fires for cases where we have chosen to ignore the 41218c2ecf20Sopenharmony_ci * error (as the HW is validating and suppressing the mistakes), we 41228c2ecf20Sopenharmony_ci * only unmask the instruction error bit. 41238c2ecf20Sopenharmony_ci */ 41248c2ecf20Sopenharmony_ci ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); 41258c2ecf20Sopenharmony_ci} 41268c2ecf20Sopenharmony_ci 41278c2ecf20Sopenharmony_cistatic void enable_execlists(struct intel_engine_cs *engine) 41288c2ecf20Sopenharmony_ci{ 41298c2ecf20Sopenharmony_ci u32 mode; 41308c2ecf20Sopenharmony_ci 41318c2ecf20Sopenharmony_ci assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 41328c2ecf20Sopenharmony_ci 41338c2ecf20Sopenharmony_ci intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 41348c2ecf20Sopenharmony_ci 41358c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 11) 41368c2ecf20Sopenharmony_ci mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); 41378c2ecf20Sopenharmony_ci else 41388c2ecf20Sopenharmony_ci mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); 41398c2ecf20Sopenharmony_ci ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); 41408c2ecf20Sopenharmony_ci 41418c2ecf20Sopenharmony_ci ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 41428c2ecf20Sopenharmony_ci 41438c2ecf20Sopenharmony_ci ENGINE_WRITE_FW(engine, 41448c2ecf20Sopenharmony_ci RING_HWS_PGA, 41458c2ecf20Sopenharmony_ci i915_ggtt_offset(engine->status_page.vma)); 41468c2ecf20Sopenharmony_ci ENGINE_POSTING_READ(engine, RING_HWS_PGA); 41478c2ecf20Sopenharmony_ci 41488c2ecf20Sopenharmony_ci enable_error_interrupt(engine); 41498c2ecf20Sopenharmony_ci 41508c2ecf20Sopenharmony_ci engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); 41518c2ecf20Sopenharmony_ci} 41528c2ecf20Sopenharmony_ci 41538c2ecf20Sopenharmony_cistatic bool unexpected_starting_state(struct intel_engine_cs *engine) 41548c2ecf20Sopenharmony_ci{ 41558c2ecf20Sopenharmony_ci bool unexpected = false; 41568c2ecf20Sopenharmony_ci 41578c2ecf20Sopenharmony_ci if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) { 41588c2ecf20Sopenharmony_ci drm_dbg(&engine->i915->drm, 41598c2ecf20Sopenharmony_ci "STOP_RING still set in RING_MI_MODE\n"); 41608c2ecf20Sopenharmony_ci unexpected = true; 41618c2ecf20Sopenharmony_ci } 41628c2ecf20Sopenharmony_ci 41638c2ecf20Sopenharmony_ci return unexpected; 41648c2ecf20Sopenharmony_ci} 41658c2ecf20Sopenharmony_ci 41668c2ecf20Sopenharmony_cistatic int execlists_resume(struct intel_engine_cs *engine) 41678c2ecf20Sopenharmony_ci{ 41688c2ecf20Sopenharmony_ci intel_mocs_init_engine(engine); 41698c2ecf20Sopenharmony_ci 41708c2ecf20Sopenharmony_ci intel_breadcrumbs_reset(engine->breadcrumbs); 41718c2ecf20Sopenharmony_ci 41728c2ecf20Sopenharmony_ci if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) { 41738c2ecf20Sopenharmony_ci struct drm_printer p = drm_debug_printer(__func__); 41748c2ecf20Sopenharmony_ci 41758c2ecf20Sopenharmony_ci intel_engine_dump(engine, &p, NULL); 41768c2ecf20Sopenharmony_ci } 41778c2ecf20Sopenharmony_ci 41788c2ecf20Sopenharmony_ci enable_execlists(engine); 41798c2ecf20Sopenharmony_ci 41808c2ecf20Sopenharmony_ci return 0; 41818c2ecf20Sopenharmony_ci} 41828c2ecf20Sopenharmony_ci 41838c2ecf20Sopenharmony_cistatic void execlists_reset_prepare(struct intel_engine_cs *engine) 41848c2ecf20Sopenharmony_ci{ 41858c2ecf20Sopenharmony_ci struct intel_engine_execlists * const execlists = &engine->execlists; 41868c2ecf20Sopenharmony_ci unsigned long flags; 41878c2ecf20Sopenharmony_ci 41888c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "depth<-%d\n", 41898c2ecf20Sopenharmony_ci atomic_read(&execlists->tasklet.count)); 41908c2ecf20Sopenharmony_ci 41918c2ecf20Sopenharmony_ci /* 41928c2ecf20Sopenharmony_ci * Prevent request submission to the hardware until we have 41938c2ecf20Sopenharmony_ci * completed the reset in i915_gem_reset_finish(). If a request 41948c2ecf20Sopenharmony_ci * is completed by one engine, it may then queue a request 41958c2ecf20Sopenharmony_ci * to a second via its execlists->tasklet *just* as we are 41968c2ecf20Sopenharmony_ci * calling engine->resume() and also writing the ELSP. 41978c2ecf20Sopenharmony_ci * Turning off the execlists->tasklet until the reset is over 41988c2ecf20Sopenharmony_ci * prevents the race. 41998c2ecf20Sopenharmony_ci */ 42008c2ecf20Sopenharmony_ci __tasklet_disable_sync_once(&execlists->tasklet); 42018c2ecf20Sopenharmony_ci GEM_BUG_ON(!reset_in_progress(execlists)); 42028c2ecf20Sopenharmony_ci 42038c2ecf20Sopenharmony_ci /* And flush any current direct submission. */ 42048c2ecf20Sopenharmony_ci spin_lock_irqsave(&engine->active.lock, flags); 42058c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&engine->active.lock, flags); 42068c2ecf20Sopenharmony_ci 42078c2ecf20Sopenharmony_ci /* 42088c2ecf20Sopenharmony_ci * We stop engines, otherwise we might get failed reset and a 42098c2ecf20Sopenharmony_ci * dead gpu (on elk). Also as modern gpu as kbl can suffer 42108c2ecf20Sopenharmony_ci * from system hang if batchbuffer is progressing when 42118c2ecf20Sopenharmony_ci * the reset is issued, regardless of READY_TO_RESET ack. 42128c2ecf20Sopenharmony_ci * Thus assume it is best to stop engines on all gens 42138c2ecf20Sopenharmony_ci * where we have a gpu reset. 42148c2ecf20Sopenharmony_ci * 42158c2ecf20Sopenharmony_ci * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) 42168c2ecf20Sopenharmony_ci * 42178c2ecf20Sopenharmony_ci * FIXME: Wa for more modern gens needs to be validated 42188c2ecf20Sopenharmony_ci */ 42198c2ecf20Sopenharmony_ci ring_set_paused(engine, 1); 42208c2ecf20Sopenharmony_ci intel_engine_stop_cs(engine); 42218c2ecf20Sopenharmony_ci 42228c2ecf20Sopenharmony_ci engine->execlists.reset_ccid = active_ccid(engine); 42238c2ecf20Sopenharmony_ci} 42248c2ecf20Sopenharmony_ci 42258c2ecf20Sopenharmony_cistatic void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) 42268c2ecf20Sopenharmony_ci{ 42278c2ecf20Sopenharmony_ci int x; 42288c2ecf20Sopenharmony_ci 42298c2ecf20Sopenharmony_ci x = lrc_ring_mi_mode(engine); 42308c2ecf20Sopenharmony_ci if (x != -1) { 42318c2ecf20Sopenharmony_ci regs[x + 1] &= ~STOP_RING; 42328c2ecf20Sopenharmony_ci regs[x + 1] |= STOP_RING << 16; 42338c2ecf20Sopenharmony_ci } 42348c2ecf20Sopenharmony_ci} 42358c2ecf20Sopenharmony_ci 42368c2ecf20Sopenharmony_cistatic void __execlists_reset_reg_state(const struct intel_context *ce, 42378c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine) 42388c2ecf20Sopenharmony_ci{ 42398c2ecf20Sopenharmony_ci u32 *regs = ce->lrc_reg_state; 42408c2ecf20Sopenharmony_ci 42418c2ecf20Sopenharmony_ci __reset_stop_ring(regs, engine); 42428c2ecf20Sopenharmony_ci} 42438c2ecf20Sopenharmony_ci 42448c2ecf20Sopenharmony_cistatic void __execlists_reset(struct intel_engine_cs *engine, bool stalled) 42458c2ecf20Sopenharmony_ci{ 42468c2ecf20Sopenharmony_ci struct intel_engine_execlists * const execlists = &engine->execlists; 42478c2ecf20Sopenharmony_ci struct intel_context *ce; 42488c2ecf20Sopenharmony_ci struct i915_request *rq; 42498c2ecf20Sopenharmony_ci u32 head; 42508c2ecf20Sopenharmony_ci 42518c2ecf20Sopenharmony_ci mb(); /* paranoia: read the CSB pointers from after the reset */ 42528c2ecf20Sopenharmony_ci clflush(execlists->csb_write); 42538c2ecf20Sopenharmony_ci mb(); 42548c2ecf20Sopenharmony_ci 42558c2ecf20Sopenharmony_ci process_csb(engine); /* drain preemption events */ 42568c2ecf20Sopenharmony_ci 42578c2ecf20Sopenharmony_ci /* Following the reset, we need to reload the CSB read/write pointers */ 42588c2ecf20Sopenharmony_ci reset_csb_pointers(engine); 42598c2ecf20Sopenharmony_ci 42608c2ecf20Sopenharmony_ci /* 42618c2ecf20Sopenharmony_ci * Save the currently executing context, even if we completed 42628c2ecf20Sopenharmony_ci * its request, it was still running at the time of the 42638c2ecf20Sopenharmony_ci * reset and will have been clobbered. 42648c2ecf20Sopenharmony_ci */ 42658c2ecf20Sopenharmony_ci rq = active_context(engine, engine->execlists.reset_ccid); 42668c2ecf20Sopenharmony_ci if (!rq) 42678c2ecf20Sopenharmony_ci goto unwind; 42688c2ecf20Sopenharmony_ci 42698c2ecf20Sopenharmony_ci ce = rq->context; 42708c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); 42718c2ecf20Sopenharmony_ci 42728c2ecf20Sopenharmony_ci if (i915_request_completed(rq)) { 42738c2ecf20Sopenharmony_ci /* Idle context; tidy up the ring so we can restart afresh */ 42748c2ecf20Sopenharmony_ci head = intel_ring_wrap(ce->ring, rq->tail); 42758c2ecf20Sopenharmony_ci goto out_replay; 42768c2ecf20Sopenharmony_ci } 42778c2ecf20Sopenharmony_ci 42788c2ecf20Sopenharmony_ci /* We still have requests in-flight; the engine should be active */ 42798c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 42808c2ecf20Sopenharmony_ci 42818c2ecf20Sopenharmony_ci /* Context has requests still in-flight; it should not be idle! */ 42828c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_active_is_idle(&ce->active)); 42838c2ecf20Sopenharmony_ci 42848c2ecf20Sopenharmony_ci rq = active_request(ce->timeline, rq); 42858c2ecf20Sopenharmony_ci head = intel_ring_wrap(ce->ring, rq->head); 42868c2ecf20Sopenharmony_ci GEM_BUG_ON(head == ce->ring->tail); 42878c2ecf20Sopenharmony_ci 42888c2ecf20Sopenharmony_ci /* 42898c2ecf20Sopenharmony_ci * If this request hasn't started yet, e.g. it is waiting on a 42908c2ecf20Sopenharmony_ci * semaphore, we need to avoid skipping the request or else we 42918c2ecf20Sopenharmony_ci * break the signaling chain. However, if the context is corrupt 42928c2ecf20Sopenharmony_ci * the request will not restart and we will be stuck with a wedged 42938c2ecf20Sopenharmony_ci * device. It is quite often the case that if we issue a reset 42948c2ecf20Sopenharmony_ci * while the GPU is loading the context image, that the context 42958c2ecf20Sopenharmony_ci * image becomes corrupt. 42968c2ecf20Sopenharmony_ci * 42978c2ecf20Sopenharmony_ci * Otherwise, if we have not started yet, the request should replay 42988c2ecf20Sopenharmony_ci * perfectly and we do not need to flag the result as being erroneous. 42998c2ecf20Sopenharmony_ci */ 43008c2ecf20Sopenharmony_ci if (!i915_request_started(rq)) 43018c2ecf20Sopenharmony_ci goto out_replay; 43028c2ecf20Sopenharmony_ci 43038c2ecf20Sopenharmony_ci /* 43048c2ecf20Sopenharmony_ci * If the request was innocent, we leave the request in the ELSP 43058c2ecf20Sopenharmony_ci * and will try to replay it on restarting. The context image may 43068c2ecf20Sopenharmony_ci * have been corrupted by the reset, in which case we may have 43078c2ecf20Sopenharmony_ci * to service a new GPU hang, but more likely we can continue on 43088c2ecf20Sopenharmony_ci * without impact. 43098c2ecf20Sopenharmony_ci * 43108c2ecf20Sopenharmony_ci * If the request was guilty, we presume the context is corrupt 43118c2ecf20Sopenharmony_ci * and have to at least restore the RING register in the context 43128c2ecf20Sopenharmony_ci * image back to the expected values to skip over the guilty request. 43138c2ecf20Sopenharmony_ci */ 43148c2ecf20Sopenharmony_ci __i915_request_reset(rq, stalled); 43158c2ecf20Sopenharmony_ci 43168c2ecf20Sopenharmony_ci /* 43178c2ecf20Sopenharmony_ci * We want a simple context + ring to execute the breadcrumb update. 43188c2ecf20Sopenharmony_ci * We cannot rely on the context being intact across the GPU hang, 43198c2ecf20Sopenharmony_ci * so clear it and rebuild just what we need for the breadcrumb. 43208c2ecf20Sopenharmony_ci * All pending requests for this context will be zapped, and any 43218c2ecf20Sopenharmony_ci * future request will be after userspace has had the opportunity 43228c2ecf20Sopenharmony_ci * to recreate its own state. 43238c2ecf20Sopenharmony_ci */ 43248c2ecf20Sopenharmony_ciout_replay: 43258c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", 43268c2ecf20Sopenharmony_ci head, ce->ring->tail); 43278c2ecf20Sopenharmony_ci __execlists_reset_reg_state(ce, engine); 43288c2ecf20Sopenharmony_ci __execlists_update_reg_state(ce, engine, head); 43298c2ecf20Sopenharmony_ci ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ 43308c2ecf20Sopenharmony_ci 43318c2ecf20Sopenharmony_ciunwind: 43328c2ecf20Sopenharmony_ci /* Push back any incomplete requests for replay after the reset. */ 43338c2ecf20Sopenharmony_ci cancel_port_requests(execlists); 43348c2ecf20Sopenharmony_ci __unwind_incomplete_requests(engine); 43358c2ecf20Sopenharmony_ci} 43368c2ecf20Sopenharmony_ci 43378c2ecf20Sopenharmony_cistatic void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) 43388c2ecf20Sopenharmony_ci{ 43398c2ecf20Sopenharmony_ci unsigned long flags; 43408c2ecf20Sopenharmony_ci 43418c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "\n"); 43428c2ecf20Sopenharmony_ci 43438c2ecf20Sopenharmony_ci spin_lock_irqsave(&engine->active.lock, flags); 43448c2ecf20Sopenharmony_ci 43458c2ecf20Sopenharmony_ci __execlists_reset(engine, stalled); 43468c2ecf20Sopenharmony_ci 43478c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&engine->active.lock, flags); 43488c2ecf20Sopenharmony_ci} 43498c2ecf20Sopenharmony_ci 43508c2ecf20Sopenharmony_cistatic void nop_submission_tasklet(unsigned long data) 43518c2ecf20Sopenharmony_ci{ 43528c2ecf20Sopenharmony_ci struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; 43538c2ecf20Sopenharmony_ci 43548c2ecf20Sopenharmony_ci /* The driver is wedged; don't process any more events. */ 43558c2ecf20Sopenharmony_ci WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); 43568c2ecf20Sopenharmony_ci} 43578c2ecf20Sopenharmony_ci 43588c2ecf20Sopenharmony_cistatic void execlists_reset_cancel(struct intel_engine_cs *engine) 43598c2ecf20Sopenharmony_ci{ 43608c2ecf20Sopenharmony_ci struct intel_engine_execlists * const execlists = &engine->execlists; 43618c2ecf20Sopenharmony_ci struct i915_request *rq, *rn; 43628c2ecf20Sopenharmony_ci struct rb_node *rb; 43638c2ecf20Sopenharmony_ci unsigned long flags; 43648c2ecf20Sopenharmony_ci 43658c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "\n"); 43668c2ecf20Sopenharmony_ci 43678c2ecf20Sopenharmony_ci /* 43688c2ecf20Sopenharmony_ci * Before we call engine->cancel_requests(), we should have exclusive 43698c2ecf20Sopenharmony_ci * access to the submission state. This is arranged for us by the 43708c2ecf20Sopenharmony_ci * caller disabling the interrupt generation, the tasklet and other 43718c2ecf20Sopenharmony_ci * threads that may then access the same state, giving us a free hand 43728c2ecf20Sopenharmony_ci * to reset state. However, we still need to let lockdep be aware that 43738c2ecf20Sopenharmony_ci * we know this state may be accessed in hardirq context, so we 43748c2ecf20Sopenharmony_ci * disable the irq around this manipulation and we want to keep 43758c2ecf20Sopenharmony_ci * the spinlock focused on its duties and not accidentally conflate 43768c2ecf20Sopenharmony_ci * coverage to the submission's irq state. (Similarly, although we 43778c2ecf20Sopenharmony_ci * shouldn't need to disable irq around the manipulation of the 43788c2ecf20Sopenharmony_ci * submission's irq state, we also wish to remind ourselves that 43798c2ecf20Sopenharmony_ci * it is irq state.) 43808c2ecf20Sopenharmony_ci */ 43818c2ecf20Sopenharmony_ci spin_lock_irqsave(&engine->active.lock, flags); 43828c2ecf20Sopenharmony_ci 43838c2ecf20Sopenharmony_ci __execlists_reset(engine, true); 43848c2ecf20Sopenharmony_ci 43858c2ecf20Sopenharmony_ci /* Mark all executing requests as skipped. */ 43868c2ecf20Sopenharmony_ci list_for_each_entry(rq, &engine->active.requests, sched.link) 43878c2ecf20Sopenharmony_ci mark_eio(rq); 43888c2ecf20Sopenharmony_ci 43898c2ecf20Sopenharmony_ci /* Flush the queued requests to the timeline list (for retiring). */ 43908c2ecf20Sopenharmony_ci while ((rb = rb_first_cached(&execlists->queue))) { 43918c2ecf20Sopenharmony_ci struct i915_priolist *p = to_priolist(rb); 43928c2ecf20Sopenharmony_ci int i; 43938c2ecf20Sopenharmony_ci 43948c2ecf20Sopenharmony_ci priolist_for_each_request_consume(rq, rn, p, i) { 43958c2ecf20Sopenharmony_ci mark_eio(rq); 43968c2ecf20Sopenharmony_ci __i915_request_submit(rq); 43978c2ecf20Sopenharmony_ci } 43988c2ecf20Sopenharmony_ci 43998c2ecf20Sopenharmony_ci rb_erase_cached(&p->node, &execlists->queue); 44008c2ecf20Sopenharmony_ci i915_priolist_free(p); 44018c2ecf20Sopenharmony_ci } 44028c2ecf20Sopenharmony_ci 44038c2ecf20Sopenharmony_ci /* On-hold requests will be flushed to timeline upon their release */ 44048c2ecf20Sopenharmony_ci list_for_each_entry(rq, &engine->active.hold, sched.link) 44058c2ecf20Sopenharmony_ci mark_eio(rq); 44068c2ecf20Sopenharmony_ci 44078c2ecf20Sopenharmony_ci /* Cancel all attached virtual engines */ 44088c2ecf20Sopenharmony_ci while ((rb = rb_first_cached(&execlists->virtual))) { 44098c2ecf20Sopenharmony_ci struct virtual_engine *ve = 44108c2ecf20Sopenharmony_ci rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 44118c2ecf20Sopenharmony_ci 44128c2ecf20Sopenharmony_ci rb_erase_cached(rb, &execlists->virtual); 44138c2ecf20Sopenharmony_ci RB_CLEAR_NODE(rb); 44148c2ecf20Sopenharmony_ci 44158c2ecf20Sopenharmony_ci spin_lock(&ve->base.active.lock); 44168c2ecf20Sopenharmony_ci rq = fetch_and_zero(&ve->request); 44178c2ecf20Sopenharmony_ci if (rq) { 44188c2ecf20Sopenharmony_ci mark_eio(rq); 44198c2ecf20Sopenharmony_ci 44208c2ecf20Sopenharmony_ci rq->engine = engine; 44218c2ecf20Sopenharmony_ci __i915_request_submit(rq); 44228c2ecf20Sopenharmony_ci i915_request_put(rq); 44238c2ecf20Sopenharmony_ci 44248c2ecf20Sopenharmony_ci ve->base.execlists.queue_priority_hint = INT_MIN; 44258c2ecf20Sopenharmony_ci } 44268c2ecf20Sopenharmony_ci spin_unlock(&ve->base.active.lock); 44278c2ecf20Sopenharmony_ci } 44288c2ecf20Sopenharmony_ci 44298c2ecf20Sopenharmony_ci /* Remaining _unready_ requests will be nop'ed when submitted */ 44308c2ecf20Sopenharmony_ci 44318c2ecf20Sopenharmony_ci execlists->queue_priority_hint = INT_MIN; 44328c2ecf20Sopenharmony_ci execlists->queue = RB_ROOT_CACHED; 44338c2ecf20Sopenharmony_ci 44348c2ecf20Sopenharmony_ci GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); 44358c2ecf20Sopenharmony_ci execlists->tasklet.func = nop_submission_tasklet; 44368c2ecf20Sopenharmony_ci 44378c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&engine->active.lock, flags); 44388c2ecf20Sopenharmony_ci} 44398c2ecf20Sopenharmony_ci 44408c2ecf20Sopenharmony_cistatic void execlists_reset_finish(struct intel_engine_cs *engine) 44418c2ecf20Sopenharmony_ci{ 44428c2ecf20Sopenharmony_ci struct intel_engine_execlists * const execlists = &engine->execlists; 44438c2ecf20Sopenharmony_ci 44448c2ecf20Sopenharmony_ci /* 44458c2ecf20Sopenharmony_ci * After a GPU reset, we may have requests to replay. Do so now while 44468c2ecf20Sopenharmony_ci * we still have the forcewake to be sure that the GPU is not allowed 44478c2ecf20Sopenharmony_ci * to sleep before we restart and reload a context. 44488c2ecf20Sopenharmony_ci */ 44498c2ecf20Sopenharmony_ci GEM_BUG_ON(!reset_in_progress(execlists)); 44508c2ecf20Sopenharmony_ci if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) 44518c2ecf20Sopenharmony_ci execlists->tasklet.func(execlists->tasklet.data); 44528c2ecf20Sopenharmony_ci 44538c2ecf20Sopenharmony_ci if (__tasklet_enable(&execlists->tasklet)) 44548c2ecf20Sopenharmony_ci /* And kick in case we missed a new request submission. */ 44558c2ecf20Sopenharmony_ci tasklet_hi_schedule(&execlists->tasklet); 44568c2ecf20Sopenharmony_ci ENGINE_TRACE(engine, "depth->%d\n", 44578c2ecf20Sopenharmony_ci atomic_read(&execlists->tasklet.count)); 44588c2ecf20Sopenharmony_ci} 44598c2ecf20Sopenharmony_ci 44608c2ecf20Sopenharmony_cistatic int gen8_emit_bb_start_noarb(struct i915_request *rq, 44618c2ecf20Sopenharmony_ci u64 offset, u32 len, 44628c2ecf20Sopenharmony_ci const unsigned int flags) 44638c2ecf20Sopenharmony_ci{ 44648c2ecf20Sopenharmony_ci u32 *cs; 44658c2ecf20Sopenharmony_ci 44668c2ecf20Sopenharmony_ci cs = intel_ring_begin(rq, 4); 44678c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 44688c2ecf20Sopenharmony_ci return PTR_ERR(cs); 44698c2ecf20Sopenharmony_ci 44708c2ecf20Sopenharmony_ci /* 44718c2ecf20Sopenharmony_ci * WaDisableCtxRestoreArbitration:bdw,chv 44728c2ecf20Sopenharmony_ci * 44738c2ecf20Sopenharmony_ci * We don't need to perform MI_ARB_ENABLE as often as we do (in 44748c2ecf20Sopenharmony_ci * particular all the gen that do not need the w/a at all!), if we 44758c2ecf20Sopenharmony_ci * took care to make sure that on every switch into this context 44768c2ecf20Sopenharmony_ci * (both ordinary and for preemption) that arbitrartion was enabled 44778c2ecf20Sopenharmony_ci * we would be fine. However, for gen8 there is another w/a that 44788c2ecf20Sopenharmony_ci * requires us to not preempt inside GPGPU execution, so we keep 44798c2ecf20Sopenharmony_ci * arbitration disabled for gen8 batches. Arbitration will be 44808c2ecf20Sopenharmony_ci * re-enabled before we close the request 44818c2ecf20Sopenharmony_ci * (engine->emit_fini_breadcrumb). 44828c2ecf20Sopenharmony_ci */ 44838c2ecf20Sopenharmony_ci *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 44848c2ecf20Sopenharmony_ci 44858c2ecf20Sopenharmony_ci /* FIXME(BDW+): Address space and security selectors. */ 44868c2ecf20Sopenharmony_ci *cs++ = MI_BATCH_BUFFER_START_GEN8 | 44878c2ecf20Sopenharmony_ci (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 44888c2ecf20Sopenharmony_ci *cs++ = lower_32_bits(offset); 44898c2ecf20Sopenharmony_ci *cs++ = upper_32_bits(offset); 44908c2ecf20Sopenharmony_ci 44918c2ecf20Sopenharmony_ci intel_ring_advance(rq, cs); 44928c2ecf20Sopenharmony_ci 44938c2ecf20Sopenharmony_ci return 0; 44948c2ecf20Sopenharmony_ci} 44958c2ecf20Sopenharmony_ci 44968c2ecf20Sopenharmony_cistatic int gen8_emit_bb_start(struct i915_request *rq, 44978c2ecf20Sopenharmony_ci u64 offset, u32 len, 44988c2ecf20Sopenharmony_ci const unsigned int flags) 44998c2ecf20Sopenharmony_ci{ 45008c2ecf20Sopenharmony_ci u32 *cs; 45018c2ecf20Sopenharmony_ci 45028c2ecf20Sopenharmony_ci cs = intel_ring_begin(rq, 6); 45038c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 45048c2ecf20Sopenharmony_ci return PTR_ERR(cs); 45058c2ecf20Sopenharmony_ci 45068c2ecf20Sopenharmony_ci *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 45078c2ecf20Sopenharmony_ci 45088c2ecf20Sopenharmony_ci *cs++ = MI_BATCH_BUFFER_START_GEN8 | 45098c2ecf20Sopenharmony_ci (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 45108c2ecf20Sopenharmony_ci *cs++ = lower_32_bits(offset); 45118c2ecf20Sopenharmony_ci *cs++ = upper_32_bits(offset); 45128c2ecf20Sopenharmony_ci 45138c2ecf20Sopenharmony_ci *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 45148c2ecf20Sopenharmony_ci *cs++ = MI_NOOP; 45158c2ecf20Sopenharmony_ci 45168c2ecf20Sopenharmony_ci intel_ring_advance(rq, cs); 45178c2ecf20Sopenharmony_ci 45188c2ecf20Sopenharmony_ci return 0; 45198c2ecf20Sopenharmony_ci} 45208c2ecf20Sopenharmony_ci 45218c2ecf20Sopenharmony_cistatic void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) 45228c2ecf20Sopenharmony_ci{ 45238c2ecf20Sopenharmony_ci ENGINE_WRITE(engine, RING_IMR, 45248c2ecf20Sopenharmony_ci ~(engine->irq_enable_mask | engine->irq_keep_mask)); 45258c2ecf20Sopenharmony_ci ENGINE_POSTING_READ(engine, RING_IMR); 45268c2ecf20Sopenharmony_ci} 45278c2ecf20Sopenharmony_ci 45288c2ecf20Sopenharmony_cistatic void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) 45298c2ecf20Sopenharmony_ci{ 45308c2ecf20Sopenharmony_ci ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); 45318c2ecf20Sopenharmony_ci} 45328c2ecf20Sopenharmony_ci 45338c2ecf20Sopenharmony_cistatic int gen8_emit_flush(struct i915_request *request, u32 mode) 45348c2ecf20Sopenharmony_ci{ 45358c2ecf20Sopenharmony_ci u32 cmd, *cs; 45368c2ecf20Sopenharmony_ci 45378c2ecf20Sopenharmony_ci cs = intel_ring_begin(request, 4); 45388c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 45398c2ecf20Sopenharmony_ci return PTR_ERR(cs); 45408c2ecf20Sopenharmony_ci 45418c2ecf20Sopenharmony_ci cmd = MI_FLUSH_DW + 1; 45428c2ecf20Sopenharmony_ci 45438c2ecf20Sopenharmony_ci /* We always require a command barrier so that subsequent 45448c2ecf20Sopenharmony_ci * commands, such as breadcrumb interrupts, are strictly ordered 45458c2ecf20Sopenharmony_ci * wrt the contents of the write cache being flushed to memory 45468c2ecf20Sopenharmony_ci * (and thus being coherent from the CPU). 45478c2ecf20Sopenharmony_ci */ 45488c2ecf20Sopenharmony_ci cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 45498c2ecf20Sopenharmony_ci 45508c2ecf20Sopenharmony_ci if (mode & EMIT_INVALIDATE) { 45518c2ecf20Sopenharmony_ci cmd |= MI_INVALIDATE_TLB; 45528c2ecf20Sopenharmony_ci if (request->engine->class == VIDEO_DECODE_CLASS) 45538c2ecf20Sopenharmony_ci cmd |= MI_INVALIDATE_BSD; 45548c2ecf20Sopenharmony_ci } 45558c2ecf20Sopenharmony_ci 45568c2ecf20Sopenharmony_ci *cs++ = cmd; 45578c2ecf20Sopenharmony_ci *cs++ = LRC_PPHWSP_SCRATCH_ADDR; 45588c2ecf20Sopenharmony_ci *cs++ = 0; /* upper addr */ 45598c2ecf20Sopenharmony_ci *cs++ = 0; /* value */ 45608c2ecf20Sopenharmony_ci intel_ring_advance(request, cs); 45618c2ecf20Sopenharmony_ci 45628c2ecf20Sopenharmony_ci return 0; 45638c2ecf20Sopenharmony_ci} 45648c2ecf20Sopenharmony_ci 45658c2ecf20Sopenharmony_cistatic int gen8_emit_flush_render(struct i915_request *request, 45668c2ecf20Sopenharmony_ci u32 mode) 45678c2ecf20Sopenharmony_ci{ 45688c2ecf20Sopenharmony_ci bool vf_flush_wa = false, dc_flush_wa = false; 45698c2ecf20Sopenharmony_ci u32 *cs, flags = 0; 45708c2ecf20Sopenharmony_ci int len; 45718c2ecf20Sopenharmony_ci 45728c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 45738c2ecf20Sopenharmony_ci 45748c2ecf20Sopenharmony_ci if (mode & EMIT_FLUSH) { 45758c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 45768c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 45778c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 45788c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_FLUSH_ENABLE; 45798c2ecf20Sopenharmony_ci } 45808c2ecf20Sopenharmony_ci 45818c2ecf20Sopenharmony_ci if (mode & EMIT_INVALIDATE) { 45828c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_TLB_INVALIDATE; 45838c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 45848c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 45858c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 45868c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 45878c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 45888c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_QW_WRITE; 45898c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_STORE_DATA_INDEX; 45908c2ecf20Sopenharmony_ci 45918c2ecf20Sopenharmony_ci /* 45928c2ecf20Sopenharmony_ci * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL 45938c2ecf20Sopenharmony_ci * pipe control. 45948c2ecf20Sopenharmony_ci */ 45958c2ecf20Sopenharmony_ci if (IS_GEN(request->engine->i915, 9)) 45968c2ecf20Sopenharmony_ci vf_flush_wa = true; 45978c2ecf20Sopenharmony_ci 45988c2ecf20Sopenharmony_ci /* WaForGAMHang:kbl */ 45998c2ecf20Sopenharmony_ci if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0)) 46008c2ecf20Sopenharmony_ci dc_flush_wa = true; 46018c2ecf20Sopenharmony_ci } 46028c2ecf20Sopenharmony_ci 46038c2ecf20Sopenharmony_ci len = 6; 46048c2ecf20Sopenharmony_ci 46058c2ecf20Sopenharmony_ci if (vf_flush_wa) 46068c2ecf20Sopenharmony_ci len += 6; 46078c2ecf20Sopenharmony_ci 46088c2ecf20Sopenharmony_ci if (dc_flush_wa) 46098c2ecf20Sopenharmony_ci len += 12; 46108c2ecf20Sopenharmony_ci 46118c2ecf20Sopenharmony_ci cs = intel_ring_begin(request, len); 46128c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 46138c2ecf20Sopenharmony_ci return PTR_ERR(cs); 46148c2ecf20Sopenharmony_ci 46158c2ecf20Sopenharmony_ci if (vf_flush_wa) 46168c2ecf20Sopenharmony_ci cs = gen8_emit_pipe_control(cs, 0, 0); 46178c2ecf20Sopenharmony_ci 46188c2ecf20Sopenharmony_ci if (dc_flush_wa) 46198c2ecf20Sopenharmony_ci cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, 46208c2ecf20Sopenharmony_ci 0); 46218c2ecf20Sopenharmony_ci 46228c2ecf20Sopenharmony_ci cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 46238c2ecf20Sopenharmony_ci 46248c2ecf20Sopenharmony_ci if (dc_flush_wa) 46258c2ecf20Sopenharmony_ci cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); 46268c2ecf20Sopenharmony_ci 46278c2ecf20Sopenharmony_ci intel_ring_advance(request, cs); 46288c2ecf20Sopenharmony_ci 46298c2ecf20Sopenharmony_ci return 0; 46308c2ecf20Sopenharmony_ci} 46318c2ecf20Sopenharmony_ci 46328c2ecf20Sopenharmony_cistatic int gen11_emit_flush_render(struct i915_request *request, 46338c2ecf20Sopenharmony_ci u32 mode) 46348c2ecf20Sopenharmony_ci{ 46358c2ecf20Sopenharmony_ci if (mode & EMIT_FLUSH) { 46368c2ecf20Sopenharmony_ci u32 *cs; 46378c2ecf20Sopenharmony_ci u32 flags = 0; 46388c2ecf20Sopenharmony_ci 46398c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 46408c2ecf20Sopenharmony_ci 46418c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; 46428c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 46438c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 46448c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 46458c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_FLUSH_ENABLE; 46468c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_QW_WRITE; 46478c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_STORE_DATA_INDEX; 46488c2ecf20Sopenharmony_ci 46498c2ecf20Sopenharmony_ci cs = intel_ring_begin(request, 6); 46508c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 46518c2ecf20Sopenharmony_ci return PTR_ERR(cs); 46528c2ecf20Sopenharmony_ci 46538c2ecf20Sopenharmony_ci cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 46548c2ecf20Sopenharmony_ci intel_ring_advance(request, cs); 46558c2ecf20Sopenharmony_ci } 46568c2ecf20Sopenharmony_ci 46578c2ecf20Sopenharmony_ci if (mode & EMIT_INVALIDATE) { 46588c2ecf20Sopenharmony_ci u32 *cs; 46598c2ecf20Sopenharmony_ci u32 flags = 0; 46608c2ecf20Sopenharmony_ci 46618c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 46628c2ecf20Sopenharmony_ci 46638c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; 46648c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_TLB_INVALIDATE; 46658c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 46668c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 46678c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 46688c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 46698c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 46708c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_QW_WRITE; 46718c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_STORE_DATA_INDEX; 46728c2ecf20Sopenharmony_ci 46738c2ecf20Sopenharmony_ci cs = intel_ring_begin(request, 6); 46748c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 46758c2ecf20Sopenharmony_ci return PTR_ERR(cs); 46768c2ecf20Sopenharmony_ci 46778c2ecf20Sopenharmony_ci cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 46788c2ecf20Sopenharmony_ci intel_ring_advance(request, cs); 46798c2ecf20Sopenharmony_ci } 46808c2ecf20Sopenharmony_ci 46818c2ecf20Sopenharmony_ci return 0; 46828c2ecf20Sopenharmony_ci} 46838c2ecf20Sopenharmony_ci 46848c2ecf20Sopenharmony_cistatic u32 preparser_disable(bool state) 46858c2ecf20Sopenharmony_ci{ 46868c2ecf20Sopenharmony_ci return MI_ARB_CHECK | 1 << 8 | state; 46878c2ecf20Sopenharmony_ci} 46888c2ecf20Sopenharmony_ci 46898c2ecf20Sopenharmony_cistatic i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) 46908c2ecf20Sopenharmony_ci{ 46918c2ecf20Sopenharmony_ci static const i915_reg_t vd[] = { 46928c2ecf20Sopenharmony_ci GEN12_VD0_AUX_NV, 46938c2ecf20Sopenharmony_ci GEN12_VD1_AUX_NV, 46948c2ecf20Sopenharmony_ci GEN12_VD2_AUX_NV, 46958c2ecf20Sopenharmony_ci GEN12_VD3_AUX_NV, 46968c2ecf20Sopenharmony_ci }; 46978c2ecf20Sopenharmony_ci 46988c2ecf20Sopenharmony_ci static const i915_reg_t ve[] = { 46998c2ecf20Sopenharmony_ci GEN12_VE0_AUX_NV, 47008c2ecf20Sopenharmony_ci GEN12_VE1_AUX_NV, 47018c2ecf20Sopenharmony_ci }; 47028c2ecf20Sopenharmony_ci 47038c2ecf20Sopenharmony_ci if (engine->class == VIDEO_DECODE_CLASS) 47048c2ecf20Sopenharmony_ci return vd[engine->instance]; 47058c2ecf20Sopenharmony_ci 47068c2ecf20Sopenharmony_ci if (engine->class == VIDEO_ENHANCEMENT_CLASS) 47078c2ecf20Sopenharmony_ci return ve[engine->instance]; 47088c2ecf20Sopenharmony_ci 47098c2ecf20Sopenharmony_ci GEM_BUG_ON("unknown aux_inv_reg\n"); 47108c2ecf20Sopenharmony_ci 47118c2ecf20Sopenharmony_ci return INVALID_MMIO_REG; 47128c2ecf20Sopenharmony_ci} 47138c2ecf20Sopenharmony_ci 47148c2ecf20Sopenharmony_cistatic u32 * 47158c2ecf20Sopenharmony_cigen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) 47168c2ecf20Sopenharmony_ci{ 47178c2ecf20Sopenharmony_ci *cs++ = MI_LOAD_REGISTER_IMM(1); 47188c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(inv_reg); 47198c2ecf20Sopenharmony_ci *cs++ = AUX_INV; 47208c2ecf20Sopenharmony_ci *cs++ = MI_NOOP; 47218c2ecf20Sopenharmony_ci 47228c2ecf20Sopenharmony_ci return cs; 47238c2ecf20Sopenharmony_ci} 47248c2ecf20Sopenharmony_ci 47258c2ecf20Sopenharmony_cistatic int gen12_emit_flush_render(struct i915_request *request, 47268c2ecf20Sopenharmony_ci u32 mode) 47278c2ecf20Sopenharmony_ci{ 47288c2ecf20Sopenharmony_ci if (mode & EMIT_FLUSH) { 47298c2ecf20Sopenharmony_ci u32 flags = 0; 47308c2ecf20Sopenharmony_ci u32 *cs; 47318c2ecf20Sopenharmony_ci 47328c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; 47338c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_FLUSH_L3; 47348c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 47358c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 47368c2ecf20Sopenharmony_ci /* Wa_1409600907:tgl */ 47378c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_DEPTH_STALL; 47388c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 47398c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_FLUSH_ENABLE; 47408c2ecf20Sopenharmony_ci 47418c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_STORE_DATA_INDEX; 47428c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_QW_WRITE; 47438c2ecf20Sopenharmony_ci 47448c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 47458c2ecf20Sopenharmony_ci 47468c2ecf20Sopenharmony_ci cs = intel_ring_begin(request, 6); 47478c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 47488c2ecf20Sopenharmony_ci return PTR_ERR(cs); 47498c2ecf20Sopenharmony_ci 47508c2ecf20Sopenharmony_ci cs = gen12_emit_pipe_control(cs, 47518c2ecf20Sopenharmony_ci PIPE_CONTROL0_HDC_PIPELINE_FLUSH, 47528c2ecf20Sopenharmony_ci flags, LRC_PPHWSP_SCRATCH_ADDR); 47538c2ecf20Sopenharmony_ci intel_ring_advance(request, cs); 47548c2ecf20Sopenharmony_ci } 47558c2ecf20Sopenharmony_ci 47568c2ecf20Sopenharmony_ci if (mode & EMIT_INVALIDATE) { 47578c2ecf20Sopenharmony_ci u32 flags = 0; 47588c2ecf20Sopenharmony_ci u32 *cs; 47598c2ecf20Sopenharmony_ci 47608c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; 47618c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_TLB_INVALIDATE; 47628c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 47638c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 47648c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 47658c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 47668c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 47678c2ecf20Sopenharmony_ci 47688c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_STORE_DATA_INDEX; 47698c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_QW_WRITE; 47708c2ecf20Sopenharmony_ci 47718c2ecf20Sopenharmony_ci flags |= PIPE_CONTROL_CS_STALL; 47728c2ecf20Sopenharmony_ci 47738c2ecf20Sopenharmony_ci cs = intel_ring_begin(request, 8 + 4); 47748c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 47758c2ecf20Sopenharmony_ci return PTR_ERR(cs); 47768c2ecf20Sopenharmony_ci 47778c2ecf20Sopenharmony_ci /* 47788c2ecf20Sopenharmony_ci * Prevent the pre-parser from skipping past the TLB 47798c2ecf20Sopenharmony_ci * invalidate and loading a stale page for the batch 47808c2ecf20Sopenharmony_ci * buffer / request payload. 47818c2ecf20Sopenharmony_ci */ 47828c2ecf20Sopenharmony_ci *cs++ = preparser_disable(true); 47838c2ecf20Sopenharmony_ci 47848c2ecf20Sopenharmony_ci cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 47858c2ecf20Sopenharmony_ci 47868c2ecf20Sopenharmony_ci /* hsdes: 1809175790 */ 47878c2ecf20Sopenharmony_ci cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs); 47888c2ecf20Sopenharmony_ci 47898c2ecf20Sopenharmony_ci *cs++ = preparser_disable(false); 47908c2ecf20Sopenharmony_ci intel_ring_advance(request, cs); 47918c2ecf20Sopenharmony_ci } 47928c2ecf20Sopenharmony_ci 47938c2ecf20Sopenharmony_ci return 0; 47948c2ecf20Sopenharmony_ci} 47958c2ecf20Sopenharmony_ci 47968c2ecf20Sopenharmony_cistatic int gen12_emit_flush(struct i915_request *request, u32 mode) 47978c2ecf20Sopenharmony_ci{ 47988c2ecf20Sopenharmony_ci intel_engine_mask_t aux_inv = 0; 47998c2ecf20Sopenharmony_ci u32 cmd, *cs; 48008c2ecf20Sopenharmony_ci 48018c2ecf20Sopenharmony_ci cmd = 4; 48028c2ecf20Sopenharmony_ci if (mode & EMIT_INVALIDATE) 48038c2ecf20Sopenharmony_ci cmd += 2; 48048c2ecf20Sopenharmony_ci if (mode & EMIT_INVALIDATE) 48058c2ecf20Sopenharmony_ci aux_inv = request->engine->mask & ~BIT(BCS0); 48068c2ecf20Sopenharmony_ci if (aux_inv) 48078c2ecf20Sopenharmony_ci cmd += 2 * hweight8(aux_inv) + 2; 48088c2ecf20Sopenharmony_ci 48098c2ecf20Sopenharmony_ci cs = intel_ring_begin(request, cmd); 48108c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 48118c2ecf20Sopenharmony_ci return PTR_ERR(cs); 48128c2ecf20Sopenharmony_ci 48138c2ecf20Sopenharmony_ci if (mode & EMIT_INVALIDATE) 48148c2ecf20Sopenharmony_ci *cs++ = preparser_disable(true); 48158c2ecf20Sopenharmony_ci 48168c2ecf20Sopenharmony_ci cmd = MI_FLUSH_DW + 1; 48178c2ecf20Sopenharmony_ci 48188c2ecf20Sopenharmony_ci /* We always require a command barrier so that subsequent 48198c2ecf20Sopenharmony_ci * commands, such as breadcrumb interrupts, are strictly ordered 48208c2ecf20Sopenharmony_ci * wrt the contents of the write cache being flushed to memory 48218c2ecf20Sopenharmony_ci * (and thus being coherent from the CPU). 48228c2ecf20Sopenharmony_ci */ 48238c2ecf20Sopenharmony_ci cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 48248c2ecf20Sopenharmony_ci 48258c2ecf20Sopenharmony_ci if (mode & EMIT_INVALIDATE) { 48268c2ecf20Sopenharmony_ci cmd |= MI_INVALIDATE_TLB; 48278c2ecf20Sopenharmony_ci if (request->engine->class == VIDEO_DECODE_CLASS) 48288c2ecf20Sopenharmony_ci cmd |= MI_INVALIDATE_BSD; 48298c2ecf20Sopenharmony_ci } 48308c2ecf20Sopenharmony_ci 48318c2ecf20Sopenharmony_ci *cs++ = cmd; 48328c2ecf20Sopenharmony_ci *cs++ = LRC_PPHWSP_SCRATCH_ADDR; 48338c2ecf20Sopenharmony_ci *cs++ = 0; /* upper addr */ 48348c2ecf20Sopenharmony_ci *cs++ = 0; /* value */ 48358c2ecf20Sopenharmony_ci 48368c2ecf20Sopenharmony_ci if (aux_inv) { /* hsdes: 1809175790 */ 48378c2ecf20Sopenharmony_ci struct intel_engine_cs *engine; 48388c2ecf20Sopenharmony_ci unsigned int tmp; 48398c2ecf20Sopenharmony_ci 48408c2ecf20Sopenharmony_ci *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); 48418c2ecf20Sopenharmony_ci for_each_engine_masked(engine, request->engine->gt, 48428c2ecf20Sopenharmony_ci aux_inv, tmp) { 48438c2ecf20Sopenharmony_ci *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); 48448c2ecf20Sopenharmony_ci *cs++ = AUX_INV; 48458c2ecf20Sopenharmony_ci } 48468c2ecf20Sopenharmony_ci *cs++ = MI_NOOP; 48478c2ecf20Sopenharmony_ci } 48488c2ecf20Sopenharmony_ci 48498c2ecf20Sopenharmony_ci if (mode & EMIT_INVALIDATE) 48508c2ecf20Sopenharmony_ci *cs++ = preparser_disable(false); 48518c2ecf20Sopenharmony_ci 48528c2ecf20Sopenharmony_ci intel_ring_advance(request, cs); 48538c2ecf20Sopenharmony_ci 48548c2ecf20Sopenharmony_ci return 0; 48558c2ecf20Sopenharmony_ci} 48568c2ecf20Sopenharmony_ci 48578c2ecf20Sopenharmony_cistatic void assert_request_valid(struct i915_request *rq) 48588c2ecf20Sopenharmony_ci{ 48598c2ecf20Sopenharmony_ci struct intel_ring *ring __maybe_unused = rq->ring; 48608c2ecf20Sopenharmony_ci 48618c2ecf20Sopenharmony_ci /* Can we unwind this request without appearing to go forwards? */ 48628c2ecf20Sopenharmony_ci GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); 48638c2ecf20Sopenharmony_ci} 48648c2ecf20Sopenharmony_ci 48658c2ecf20Sopenharmony_ci/* 48668c2ecf20Sopenharmony_ci * Reserve space for 2 NOOPs at the end of each request to be 48678c2ecf20Sopenharmony_ci * used as a workaround for not being allowed to do lite 48688c2ecf20Sopenharmony_ci * restore with HEAD==TAIL (WaIdleLiteRestore). 48698c2ecf20Sopenharmony_ci */ 48708c2ecf20Sopenharmony_cistatic u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) 48718c2ecf20Sopenharmony_ci{ 48728c2ecf20Sopenharmony_ci /* Ensure there's always at least one preemption point per-request. */ 48738c2ecf20Sopenharmony_ci *cs++ = MI_ARB_CHECK; 48748c2ecf20Sopenharmony_ci *cs++ = MI_NOOP; 48758c2ecf20Sopenharmony_ci request->wa_tail = intel_ring_offset(request, cs); 48768c2ecf20Sopenharmony_ci 48778c2ecf20Sopenharmony_ci /* Check that entire request is less than half the ring */ 48788c2ecf20Sopenharmony_ci assert_request_valid(request); 48798c2ecf20Sopenharmony_ci 48808c2ecf20Sopenharmony_ci return cs; 48818c2ecf20Sopenharmony_ci} 48828c2ecf20Sopenharmony_ci 48838c2ecf20Sopenharmony_cistatic u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) 48848c2ecf20Sopenharmony_ci{ 48858c2ecf20Sopenharmony_ci *cs++ = MI_SEMAPHORE_WAIT | 48868c2ecf20Sopenharmony_ci MI_SEMAPHORE_GLOBAL_GTT | 48878c2ecf20Sopenharmony_ci MI_SEMAPHORE_POLL | 48888c2ecf20Sopenharmony_ci MI_SEMAPHORE_SAD_EQ_SDD; 48898c2ecf20Sopenharmony_ci *cs++ = 0; 48908c2ecf20Sopenharmony_ci *cs++ = intel_hws_preempt_address(request->engine); 48918c2ecf20Sopenharmony_ci *cs++ = 0; 48928c2ecf20Sopenharmony_ci 48938c2ecf20Sopenharmony_ci return cs; 48948c2ecf20Sopenharmony_ci} 48958c2ecf20Sopenharmony_ci 48968c2ecf20Sopenharmony_cistatic __always_inline u32* 48978c2ecf20Sopenharmony_cigen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) 48988c2ecf20Sopenharmony_ci{ 48998c2ecf20Sopenharmony_ci *cs++ = MI_USER_INTERRUPT; 49008c2ecf20Sopenharmony_ci 49018c2ecf20Sopenharmony_ci *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 49028c2ecf20Sopenharmony_ci if (intel_engine_has_semaphores(request->engine)) 49038c2ecf20Sopenharmony_ci cs = emit_preempt_busywait(request, cs); 49048c2ecf20Sopenharmony_ci 49058c2ecf20Sopenharmony_ci request->tail = intel_ring_offset(request, cs); 49068c2ecf20Sopenharmony_ci assert_ring_tail_valid(request->ring, request->tail); 49078c2ecf20Sopenharmony_ci 49088c2ecf20Sopenharmony_ci return gen8_emit_wa_tail(request, cs); 49098c2ecf20Sopenharmony_ci} 49108c2ecf20Sopenharmony_ci 49118c2ecf20Sopenharmony_cistatic u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) 49128c2ecf20Sopenharmony_ci{ 49138c2ecf20Sopenharmony_ci return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0); 49148c2ecf20Sopenharmony_ci} 49158c2ecf20Sopenharmony_ci 49168c2ecf20Sopenharmony_cistatic u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) 49178c2ecf20Sopenharmony_ci{ 49188c2ecf20Sopenharmony_ci return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); 49198c2ecf20Sopenharmony_ci} 49208c2ecf20Sopenharmony_ci 49218c2ecf20Sopenharmony_cistatic u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) 49228c2ecf20Sopenharmony_ci{ 49238c2ecf20Sopenharmony_ci cs = gen8_emit_pipe_control(cs, 49248c2ecf20Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | 49258c2ecf20Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 49268c2ecf20Sopenharmony_ci PIPE_CONTROL_DC_FLUSH_ENABLE, 49278c2ecf20Sopenharmony_ci 0); 49288c2ecf20Sopenharmony_ci 49298c2ecf20Sopenharmony_ci /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ 49308c2ecf20Sopenharmony_ci cs = gen8_emit_ggtt_write_rcs(cs, 49318c2ecf20Sopenharmony_ci request->fence.seqno, 49328c2ecf20Sopenharmony_ci hwsp_offset(request), 49338c2ecf20Sopenharmony_ci PIPE_CONTROL_FLUSH_ENABLE | 49348c2ecf20Sopenharmony_ci PIPE_CONTROL_CS_STALL); 49358c2ecf20Sopenharmony_ci 49368c2ecf20Sopenharmony_ci return gen8_emit_fini_breadcrumb_tail(request, cs); 49378c2ecf20Sopenharmony_ci} 49388c2ecf20Sopenharmony_ci 49398c2ecf20Sopenharmony_cistatic u32 * 49408c2ecf20Sopenharmony_cigen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) 49418c2ecf20Sopenharmony_ci{ 49428c2ecf20Sopenharmony_ci cs = gen8_emit_ggtt_write_rcs(cs, 49438c2ecf20Sopenharmony_ci request->fence.seqno, 49448c2ecf20Sopenharmony_ci hwsp_offset(request), 49458c2ecf20Sopenharmony_ci PIPE_CONTROL_CS_STALL | 49468c2ecf20Sopenharmony_ci PIPE_CONTROL_TILE_CACHE_FLUSH | 49478c2ecf20Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | 49488c2ecf20Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 49498c2ecf20Sopenharmony_ci PIPE_CONTROL_DC_FLUSH_ENABLE | 49508c2ecf20Sopenharmony_ci PIPE_CONTROL_FLUSH_ENABLE); 49518c2ecf20Sopenharmony_ci 49528c2ecf20Sopenharmony_ci return gen8_emit_fini_breadcrumb_tail(request, cs); 49538c2ecf20Sopenharmony_ci} 49548c2ecf20Sopenharmony_ci 49558c2ecf20Sopenharmony_ci/* 49568c2ecf20Sopenharmony_ci * Note that the CS instruction pre-parser will not stall on the breadcrumb 49578c2ecf20Sopenharmony_ci * flush and will continue pre-fetching the instructions after it before the 49588c2ecf20Sopenharmony_ci * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at 49598c2ecf20Sopenharmony_ci * BB_START/END instructions, so, even though we might pre-fetch the pre-amble 49608c2ecf20Sopenharmony_ci * of the next request before the memory has been flushed, we're guaranteed that 49618c2ecf20Sopenharmony_ci * we won't access the batch itself too early. 49628c2ecf20Sopenharmony_ci * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, 49638c2ecf20Sopenharmony_ci * so, if the current request is modifying an instruction in the next request on 49648c2ecf20Sopenharmony_ci * the same intel_context, we might pre-fetch and then execute the pre-update 49658c2ecf20Sopenharmony_ci * instruction. To avoid this, the users of self-modifying code should either 49668c2ecf20Sopenharmony_ci * disable the parser around the code emitting the memory writes, via a new flag 49678c2ecf20Sopenharmony_ci * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For 49688c2ecf20Sopenharmony_ci * the in-kernel use-cases we've opted to use a separate context, see 49698c2ecf20Sopenharmony_ci * reloc_gpu() as an example. 49708c2ecf20Sopenharmony_ci * All the above applies only to the instructions themselves. Non-inline data 49718c2ecf20Sopenharmony_ci * used by the instructions is not pre-fetched. 49728c2ecf20Sopenharmony_ci */ 49738c2ecf20Sopenharmony_ci 49748c2ecf20Sopenharmony_cistatic u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) 49758c2ecf20Sopenharmony_ci{ 49768c2ecf20Sopenharmony_ci *cs++ = MI_SEMAPHORE_WAIT_TOKEN | 49778c2ecf20Sopenharmony_ci MI_SEMAPHORE_GLOBAL_GTT | 49788c2ecf20Sopenharmony_ci MI_SEMAPHORE_POLL | 49798c2ecf20Sopenharmony_ci MI_SEMAPHORE_SAD_EQ_SDD; 49808c2ecf20Sopenharmony_ci *cs++ = 0; 49818c2ecf20Sopenharmony_ci *cs++ = intel_hws_preempt_address(request->engine); 49828c2ecf20Sopenharmony_ci *cs++ = 0; 49838c2ecf20Sopenharmony_ci *cs++ = 0; 49848c2ecf20Sopenharmony_ci *cs++ = MI_NOOP; 49858c2ecf20Sopenharmony_ci 49868c2ecf20Sopenharmony_ci return cs; 49878c2ecf20Sopenharmony_ci} 49888c2ecf20Sopenharmony_ci 49898c2ecf20Sopenharmony_cistatic __always_inline u32* 49908c2ecf20Sopenharmony_cigen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) 49918c2ecf20Sopenharmony_ci{ 49928c2ecf20Sopenharmony_ci *cs++ = MI_USER_INTERRUPT; 49938c2ecf20Sopenharmony_ci 49948c2ecf20Sopenharmony_ci *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 49958c2ecf20Sopenharmony_ci if (intel_engine_has_semaphores(request->engine)) 49968c2ecf20Sopenharmony_ci cs = gen12_emit_preempt_busywait(request, cs); 49978c2ecf20Sopenharmony_ci 49988c2ecf20Sopenharmony_ci request->tail = intel_ring_offset(request, cs); 49998c2ecf20Sopenharmony_ci assert_ring_tail_valid(request->ring, request->tail); 50008c2ecf20Sopenharmony_ci 50018c2ecf20Sopenharmony_ci return gen8_emit_wa_tail(request, cs); 50028c2ecf20Sopenharmony_ci} 50038c2ecf20Sopenharmony_ci 50048c2ecf20Sopenharmony_cistatic u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) 50058c2ecf20Sopenharmony_ci{ 50068c2ecf20Sopenharmony_ci /* XXX Stalling flush before seqno write; post-sync not */ 50078c2ecf20Sopenharmony_ci cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0)); 50088c2ecf20Sopenharmony_ci return gen12_emit_fini_breadcrumb_tail(rq, cs); 50098c2ecf20Sopenharmony_ci} 50108c2ecf20Sopenharmony_ci 50118c2ecf20Sopenharmony_cistatic u32 * 50128c2ecf20Sopenharmony_cigen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) 50138c2ecf20Sopenharmony_ci{ 50148c2ecf20Sopenharmony_ci cs = gen12_emit_ggtt_write_rcs(cs, 50158c2ecf20Sopenharmony_ci request->fence.seqno, 50168c2ecf20Sopenharmony_ci hwsp_offset(request), 50178c2ecf20Sopenharmony_ci PIPE_CONTROL0_HDC_PIPELINE_FLUSH, 50188c2ecf20Sopenharmony_ci PIPE_CONTROL_CS_STALL | 50198c2ecf20Sopenharmony_ci PIPE_CONTROL_TILE_CACHE_FLUSH | 50208c2ecf20Sopenharmony_ci PIPE_CONTROL_FLUSH_L3 | 50218c2ecf20Sopenharmony_ci PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | 50228c2ecf20Sopenharmony_ci PIPE_CONTROL_DEPTH_CACHE_FLUSH | 50238c2ecf20Sopenharmony_ci /* Wa_1409600907:tgl */ 50248c2ecf20Sopenharmony_ci PIPE_CONTROL_DEPTH_STALL | 50258c2ecf20Sopenharmony_ci PIPE_CONTROL_DC_FLUSH_ENABLE | 50268c2ecf20Sopenharmony_ci PIPE_CONTROL_FLUSH_ENABLE); 50278c2ecf20Sopenharmony_ci 50288c2ecf20Sopenharmony_ci return gen12_emit_fini_breadcrumb_tail(request, cs); 50298c2ecf20Sopenharmony_ci} 50308c2ecf20Sopenharmony_ci 50318c2ecf20Sopenharmony_cistatic void execlists_park(struct intel_engine_cs *engine) 50328c2ecf20Sopenharmony_ci{ 50338c2ecf20Sopenharmony_ci cancel_timer(&engine->execlists.timer); 50348c2ecf20Sopenharmony_ci cancel_timer(&engine->execlists.preempt); 50358c2ecf20Sopenharmony_ci} 50368c2ecf20Sopenharmony_ci 50378c2ecf20Sopenharmony_civoid intel_execlists_set_default_submission(struct intel_engine_cs *engine) 50388c2ecf20Sopenharmony_ci{ 50398c2ecf20Sopenharmony_ci engine->submit_request = execlists_submit_request; 50408c2ecf20Sopenharmony_ci engine->schedule = i915_schedule; 50418c2ecf20Sopenharmony_ci engine->execlists.tasklet.func = execlists_submission_tasklet; 50428c2ecf20Sopenharmony_ci 50438c2ecf20Sopenharmony_ci engine->reset.prepare = execlists_reset_prepare; 50448c2ecf20Sopenharmony_ci engine->reset.rewind = execlists_reset_rewind; 50458c2ecf20Sopenharmony_ci engine->reset.cancel = execlists_reset_cancel; 50468c2ecf20Sopenharmony_ci engine->reset.finish = execlists_reset_finish; 50478c2ecf20Sopenharmony_ci 50488c2ecf20Sopenharmony_ci engine->park = execlists_park; 50498c2ecf20Sopenharmony_ci engine->unpark = NULL; 50508c2ecf20Sopenharmony_ci 50518c2ecf20Sopenharmony_ci engine->flags |= I915_ENGINE_SUPPORTS_STATS; 50528c2ecf20Sopenharmony_ci if (!intel_vgpu_active(engine->i915)) { 50538c2ecf20Sopenharmony_ci engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 50548c2ecf20Sopenharmony_ci if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { 50558c2ecf20Sopenharmony_ci engine->flags |= I915_ENGINE_HAS_PREEMPTION; 50568c2ecf20Sopenharmony_ci if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) 50578c2ecf20Sopenharmony_ci engine->flags |= I915_ENGINE_HAS_TIMESLICES; 50588c2ecf20Sopenharmony_ci } 50598c2ecf20Sopenharmony_ci } 50608c2ecf20Sopenharmony_ci 50618c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 12) 50628c2ecf20Sopenharmony_ci engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; 50638c2ecf20Sopenharmony_ci 50648c2ecf20Sopenharmony_ci if (intel_engine_has_preemption(engine)) 50658c2ecf20Sopenharmony_ci engine->emit_bb_start = gen8_emit_bb_start; 50668c2ecf20Sopenharmony_ci else 50678c2ecf20Sopenharmony_ci engine->emit_bb_start = gen8_emit_bb_start_noarb; 50688c2ecf20Sopenharmony_ci} 50698c2ecf20Sopenharmony_ci 50708c2ecf20Sopenharmony_cistatic void execlists_shutdown(struct intel_engine_cs *engine) 50718c2ecf20Sopenharmony_ci{ 50728c2ecf20Sopenharmony_ci /* Synchronise with residual timers and any softirq they raise */ 50738c2ecf20Sopenharmony_ci del_timer_sync(&engine->execlists.timer); 50748c2ecf20Sopenharmony_ci del_timer_sync(&engine->execlists.preempt); 50758c2ecf20Sopenharmony_ci tasklet_kill(&engine->execlists.tasklet); 50768c2ecf20Sopenharmony_ci} 50778c2ecf20Sopenharmony_ci 50788c2ecf20Sopenharmony_cistatic void execlists_release(struct intel_engine_cs *engine) 50798c2ecf20Sopenharmony_ci{ 50808c2ecf20Sopenharmony_ci engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 50818c2ecf20Sopenharmony_ci 50828c2ecf20Sopenharmony_ci execlists_shutdown(engine); 50838c2ecf20Sopenharmony_ci 50848c2ecf20Sopenharmony_ci intel_engine_cleanup_common(engine); 50858c2ecf20Sopenharmony_ci lrc_destroy_wa_ctx(engine); 50868c2ecf20Sopenharmony_ci} 50878c2ecf20Sopenharmony_ci 50888c2ecf20Sopenharmony_cistatic void 50898c2ecf20Sopenharmony_cilogical_ring_default_vfuncs(struct intel_engine_cs *engine) 50908c2ecf20Sopenharmony_ci{ 50918c2ecf20Sopenharmony_ci /* Default vfuncs which can be overriden by each engine. */ 50928c2ecf20Sopenharmony_ci 50938c2ecf20Sopenharmony_ci engine->resume = execlists_resume; 50948c2ecf20Sopenharmony_ci 50958c2ecf20Sopenharmony_ci engine->cops = &execlists_context_ops; 50968c2ecf20Sopenharmony_ci engine->request_alloc = execlists_request_alloc; 50978c2ecf20Sopenharmony_ci 50988c2ecf20Sopenharmony_ci engine->emit_flush = gen8_emit_flush; 50998c2ecf20Sopenharmony_ci engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 51008c2ecf20Sopenharmony_ci engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; 51018c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 12) { 51028c2ecf20Sopenharmony_ci engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; 51038c2ecf20Sopenharmony_ci engine->emit_flush = gen12_emit_flush; 51048c2ecf20Sopenharmony_ci } 51058c2ecf20Sopenharmony_ci engine->set_default_submission = intel_execlists_set_default_submission; 51068c2ecf20Sopenharmony_ci 51078c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) < 11) { 51088c2ecf20Sopenharmony_ci engine->irq_enable = gen8_logical_ring_enable_irq; 51098c2ecf20Sopenharmony_ci engine->irq_disable = gen8_logical_ring_disable_irq; 51108c2ecf20Sopenharmony_ci } else { 51118c2ecf20Sopenharmony_ci /* 51128c2ecf20Sopenharmony_ci * TODO: On Gen11 interrupt masks need to be clear 51138c2ecf20Sopenharmony_ci * to allow C6 entry. Keep interrupts enabled at 51148c2ecf20Sopenharmony_ci * and take the hit of generating extra interrupts 51158c2ecf20Sopenharmony_ci * until a more refined solution exists. 51168c2ecf20Sopenharmony_ci */ 51178c2ecf20Sopenharmony_ci } 51188c2ecf20Sopenharmony_ci} 51198c2ecf20Sopenharmony_ci 51208c2ecf20Sopenharmony_cistatic inline void 51218c2ecf20Sopenharmony_cilogical_ring_default_irqs(struct intel_engine_cs *engine) 51228c2ecf20Sopenharmony_ci{ 51238c2ecf20Sopenharmony_ci unsigned int shift = 0; 51248c2ecf20Sopenharmony_ci 51258c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) < 11) { 51268c2ecf20Sopenharmony_ci const u8 irq_shifts[] = { 51278c2ecf20Sopenharmony_ci [RCS0] = GEN8_RCS_IRQ_SHIFT, 51288c2ecf20Sopenharmony_ci [BCS0] = GEN8_BCS_IRQ_SHIFT, 51298c2ecf20Sopenharmony_ci [VCS0] = GEN8_VCS0_IRQ_SHIFT, 51308c2ecf20Sopenharmony_ci [VCS1] = GEN8_VCS1_IRQ_SHIFT, 51318c2ecf20Sopenharmony_ci [VECS0] = GEN8_VECS_IRQ_SHIFT, 51328c2ecf20Sopenharmony_ci }; 51338c2ecf20Sopenharmony_ci 51348c2ecf20Sopenharmony_ci shift = irq_shifts[engine->id]; 51358c2ecf20Sopenharmony_ci } 51368c2ecf20Sopenharmony_ci 51378c2ecf20Sopenharmony_ci engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; 51388c2ecf20Sopenharmony_ci engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; 51398c2ecf20Sopenharmony_ci engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; 51408c2ecf20Sopenharmony_ci engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; 51418c2ecf20Sopenharmony_ci} 51428c2ecf20Sopenharmony_ci 51438c2ecf20Sopenharmony_cistatic void rcs_submission_override(struct intel_engine_cs *engine) 51448c2ecf20Sopenharmony_ci{ 51458c2ecf20Sopenharmony_ci switch (INTEL_GEN(engine->i915)) { 51468c2ecf20Sopenharmony_ci case 12: 51478c2ecf20Sopenharmony_ci engine->emit_flush = gen12_emit_flush_render; 51488c2ecf20Sopenharmony_ci engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 51498c2ecf20Sopenharmony_ci break; 51508c2ecf20Sopenharmony_ci case 11: 51518c2ecf20Sopenharmony_ci engine->emit_flush = gen11_emit_flush_render; 51528c2ecf20Sopenharmony_ci engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 51538c2ecf20Sopenharmony_ci break; 51548c2ecf20Sopenharmony_ci default: 51558c2ecf20Sopenharmony_ci engine->emit_flush = gen8_emit_flush_render; 51568c2ecf20Sopenharmony_ci engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 51578c2ecf20Sopenharmony_ci break; 51588c2ecf20Sopenharmony_ci } 51598c2ecf20Sopenharmony_ci} 51608c2ecf20Sopenharmony_ci 51618c2ecf20Sopenharmony_ciint intel_execlists_submission_setup(struct intel_engine_cs *engine) 51628c2ecf20Sopenharmony_ci{ 51638c2ecf20Sopenharmony_ci struct intel_engine_execlists * const execlists = &engine->execlists; 51648c2ecf20Sopenharmony_ci struct drm_i915_private *i915 = engine->i915; 51658c2ecf20Sopenharmony_ci struct intel_uncore *uncore = engine->uncore; 51668c2ecf20Sopenharmony_ci u32 base = engine->mmio_base; 51678c2ecf20Sopenharmony_ci 51688c2ecf20Sopenharmony_ci tasklet_init(&engine->execlists.tasklet, 51698c2ecf20Sopenharmony_ci execlists_submission_tasklet, (unsigned long)engine); 51708c2ecf20Sopenharmony_ci timer_setup(&engine->execlists.timer, execlists_timeslice, 0); 51718c2ecf20Sopenharmony_ci timer_setup(&engine->execlists.preempt, execlists_preempt, 0); 51728c2ecf20Sopenharmony_ci 51738c2ecf20Sopenharmony_ci logical_ring_default_vfuncs(engine); 51748c2ecf20Sopenharmony_ci logical_ring_default_irqs(engine); 51758c2ecf20Sopenharmony_ci 51768c2ecf20Sopenharmony_ci if (engine->class == RENDER_CLASS) 51778c2ecf20Sopenharmony_ci rcs_submission_override(engine); 51788c2ecf20Sopenharmony_ci 51798c2ecf20Sopenharmony_ci if (intel_init_workaround_bb(engine)) 51808c2ecf20Sopenharmony_ci /* 51818c2ecf20Sopenharmony_ci * We continue even if we fail to initialize WA batch 51828c2ecf20Sopenharmony_ci * because we only expect rare glitches but nothing 51838c2ecf20Sopenharmony_ci * critical to prevent us from using GPU 51848c2ecf20Sopenharmony_ci */ 51858c2ecf20Sopenharmony_ci drm_err(&i915->drm, "WA batch buffer initialization failed\n"); 51868c2ecf20Sopenharmony_ci 51878c2ecf20Sopenharmony_ci if (HAS_LOGICAL_RING_ELSQ(i915)) { 51888c2ecf20Sopenharmony_ci execlists->submit_reg = uncore->regs + 51898c2ecf20Sopenharmony_ci i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); 51908c2ecf20Sopenharmony_ci execlists->ctrl_reg = uncore->regs + 51918c2ecf20Sopenharmony_ci i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); 51928c2ecf20Sopenharmony_ci } else { 51938c2ecf20Sopenharmony_ci execlists->submit_reg = uncore->regs + 51948c2ecf20Sopenharmony_ci i915_mmio_reg_offset(RING_ELSP(base)); 51958c2ecf20Sopenharmony_ci } 51968c2ecf20Sopenharmony_ci 51978c2ecf20Sopenharmony_ci execlists->csb_status = 51988c2ecf20Sopenharmony_ci (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; 51998c2ecf20Sopenharmony_ci 52008c2ecf20Sopenharmony_ci execlists->csb_write = 52018c2ecf20Sopenharmony_ci &engine->status_page.addr[intel_hws_csb_write_index(i915)]; 52028c2ecf20Sopenharmony_ci 52038c2ecf20Sopenharmony_ci if (INTEL_GEN(i915) < 11) 52048c2ecf20Sopenharmony_ci execlists->csb_size = GEN8_CSB_ENTRIES; 52058c2ecf20Sopenharmony_ci else 52068c2ecf20Sopenharmony_ci execlists->csb_size = GEN11_CSB_ENTRIES; 52078c2ecf20Sopenharmony_ci 52088c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) >= 11) { 52098c2ecf20Sopenharmony_ci execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); 52108c2ecf20Sopenharmony_ci execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); 52118c2ecf20Sopenharmony_ci } 52128c2ecf20Sopenharmony_ci 52138c2ecf20Sopenharmony_ci /* Finally, take ownership and responsibility for cleanup! */ 52148c2ecf20Sopenharmony_ci engine->sanitize = execlists_sanitize; 52158c2ecf20Sopenharmony_ci engine->release = execlists_release; 52168c2ecf20Sopenharmony_ci 52178c2ecf20Sopenharmony_ci return 0; 52188c2ecf20Sopenharmony_ci} 52198c2ecf20Sopenharmony_ci 52208c2ecf20Sopenharmony_cistatic void init_common_reg_state(u32 * const regs, 52218c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine, 52228c2ecf20Sopenharmony_ci const struct intel_ring *ring, 52238c2ecf20Sopenharmony_ci bool inhibit) 52248c2ecf20Sopenharmony_ci{ 52258c2ecf20Sopenharmony_ci u32 ctl; 52268c2ecf20Sopenharmony_ci 52278c2ecf20Sopenharmony_ci ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); 52288c2ecf20Sopenharmony_ci ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); 52298c2ecf20Sopenharmony_ci if (inhibit) 52308c2ecf20Sopenharmony_ci ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; 52318c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) < 11) 52328c2ecf20Sopenharmony_ci ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | 52338c2ecf20Sopenharmony_ci CTX_CTRL_RS_CTX_ENABLE); 52348c2ecf20Sopenharmony_ci regs[CTX_CONTEXT_CONTROL] = ctl; 52358c2ecf20Sopenharmony_ci 52368c2ecf20Sopenharmony_ci regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; 52378c2ecf20Sopenharmony_ci regs[CTX_TIMESTAMP] = 0; 52388c2ecf20Sopenharmony_ci} 52398c2ecf20Sopenharmony_ci 52408c2ecf20Sopenharmony_cistatic void init_wa_bb_reg_state(u32 * const regs, 52418c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine) 52428c2ecf20Sopenharmony_ci{ 52438c2ecf20Sopenharmony_ci const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; 52448c2ecf20Sopenharmony_ci 52458c2ecf20Sopenharmony_ci if (wa_ctx->per_ctx.size) { 52468c2ecf20Sopenharmony_ci const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); 52478c2ecf20Sopenharmony_ci 52488c2ecf20Sopenharmony_ci GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); 52498c2ecf20Sopenharmony_ci regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = 52508c2ecf20Sopenharmony_ci (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; 52518c2ecf20Sopenharmony_ci } 52528c2ecf20Sopenharmony_ci 52538c2ecf20Sopenharmony_ci if (wa_ctx->indirect_ctx.size) { 52548c2ecf20Sopenharmony_ci lrc_ring_setup_indirect_ctx(regs, engine, 52558c2ecf20Sopenharmony_ci i915_ggtt_offset(wa_ctx->vma) + 52568c2ecf20Sopenharmony_ci wa_ctx->indirect_ctx.offset, 52578c2ecf20Sopenharmony_ci wa_ctx->indirect_ctx.size); 52588c2ecf20Sopenharmony_ci } 52598c2ecf20Sopenharmony_ci} 52608c2ecf20Sopenharmony_ci 52618c2ecf20Sopenharmony_cistatic void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt) 52628c2ecf20Sopenharmony_ci{ 52638c2ecf20Sopenharmony_ci if (i915_vm_is_4lvl(&ppgtt->vm)) { 52648c2ecf20Sopenharmony_ci /* 64b PPGTT (48bit canonical) 52658c2ecf20Sopenharmony_ci * PDP0_DESCRIPTOR contains the base address to PML4 and 52668c2ecf20Sopenharmony_ci * other PDP Descriptors are ignored. 52678c2ecf20Sopenharmony_ci */ 52688c2ecf20Sopenharmony_ci ASSIGN_CTX_PML4(ppgtt, regs); 52698c2ecf20Sopenharmony_ci } else { 52708c2ecf20Sopenharmony_ci ASSIGN_CTX_PDP(ppgtt, regs, 3); 52718c2ecf20Sopenharmony_ci ASSIGN_CTX_PDP(ppgtt, regs, 2); 52728c2ecf20Sopenharmony_ci ASSIGN_CTX_PDP(ppgtt, regs, 1); 52738c2ecf20Sopenharmony_ci ASSIGN_CTX_PDP(ppgtt, regs, 0); 52748c2ecf20Sopenharmony_ci } 52758c2ecf20Sopenharmony_ci} 52768c2ecf20Sopenharmony_ci 52778c2ecf20Sopenharmony_cistatic struct i915_ppgtt *vm_alias(struct i915_address_space *vm) 52788c2ecf20Sopenharmony_ci{ 52798c2ecf20Sopenharmony_ci if (i915_is_ggtt(vm)) 52808c2ecf20Sopenharmony_ci return i915_vm_to_ggtt(vm)->alias; 52818c2ecf20Sopenharmony_ci else 52828c2ecf20Sopenharmony_ci return i915_vm_to_ppgtt(vm); 52838c2ecf20Sopenharmony_ci} 52848c2ecf20Sopenharmony_ci 52858c2ecf20Sopenharmony_cistatic void execlists_init_reg_state(u32 *regs, 52868c2ecf20Sopenharmony_ci const struct intel_context *ce, 52878c2ecf20Sopenharmony_ci const struct intel_engine_cs *engine, 52888c2ecf20Sopenharmony_ci const struct intel_ring *ring, 52898c2ecf20Sopenharmony_ci bool inhibit) 52908c2ecf20Sopenharmony_ci{ 52918c2ecf20Sopenharmony_ci /* 52928c2ecf20Sopenharmony_ci * A context is actually a big batch buffer with several 52938c2ecf20Sopenharmony_ci * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The 52948c2ecf20Sopenharmony_ci * values we are setting here are only for the first context restore: 52958c2ecf20Sopenharmony_ci * on a subsequent save, the GPU will recreate this batchbuffer with new 52968c2ecf20Sopenharmony_ci * values (including all the missing MI_LOAD_REGISTER_IMM commands that 52978c2ecf20Sopenharmony_ci * we are not initializing here). 52988c2ecf20Sopenharmony_ci * 52998c2ecf20Sopenharmony_ci * Must keep consistent with virtual_update_register_offsets(). 53008c2ecf20Sopenharmony_ci */ 53018c2ecf20Sopenharmony_ci set_offsets(regs, reg_offsets(engine), engine, inhibit); 53028c2ecf20Sopenharmony_ci 53038c2ecf20Sopenharmony_ci init_common_reg_state(regs, engine, ring, inhibit); 53048c2ecf20Sopenharmony_ci init_ppgtt_reg_state(regs, vm_alias(ce->vm)); 53058c2ecf20Sopenharmony_ci 53068c2ecf20Sopenharmony_ci init_wa_bb_reg_state(regs, engine); 53078c2ecf20Sopenharmony_ci 53088c2ecf20Sopenharmony_ci __reset_stop_ring(regs, engine); 53098c2ecf20Sopenharmony_ci} 53108c2ecf20Sopenharmony_ci 53118c2ecf20Sopenharmony_cistatic int 53128c2ecf20Sopenharmony_cipopulate_lr_context(struct intel_context *ce, 53138c2ecf20Sopenharmony_ci struct drm_i915_gem_object *ctx_obj, 53148c2ecf20Sopenharmony_ci struct intel_engine_cs *engine, 53158c2ecf20Sopenharmony_ci struct intel_ring *ring) 53168c2ecf20Sopenharmony_ci{ 53178c2ecf20Sopenharmony_ci bool inhibit = true; 53188c2ecf20Sopenharmony_ci void *vaddr; 53198c2ecf20Sopenharmony_ci 53208c2ecf20Sopenharmony_ci vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); 53218c2ecf20Sopenharmony_ci if (IS_ERR(vaddr)) { 53228c2ecf20Sopenharmony_ci drm_dbg(&engine->i915->drm, "Could not map object pages!\n"); 53238c2ecf20Sopenharmony_ci return PTR_ERR(vaddr); 53248c2ecf20Sopenharmony_ci } 53258c2ecf20Sopenharmony_ci 53268c2ecf20Sopenharmony_ci set_redzone(vaddr, engine); 53278c2ecf20Sopenharmony_ci 53288c2ecf20Sopenharmony_ci if (engine->default_state) { 53298c2ecf20Sopenharmony_ci shmem_read(engine->default_state, 0, 53308c2ecf20Sopenharmony_ci vaddr, engine->context_size); 53318c2ecf20Sopenharmony_ci __set_bit(CONTEXT_VALID_BIT, &ce->flags); 53328c2ecf20Sopenharmony_ci inhibit = false; 53338c2ecf20Sopenharmony_ci } 53348c2ecf20Sopenharmony_ci 53358c2ecf20Sopenharmony_ci /* Clear the ppHWSP (inc. per-context counters) */ 53368c2ecf20Sopenharmony_ci memset(vaddr, 0, PAGE_SIZE); 53378c2ecf20Sopenharmony_ci 53388c2ecf20Sopenharmony_ci /* 53398c2ecf20Sopenharmony_ci * The second page of the context object contains some registers which 53408c2ecf20Sopenharmony_ci * must be set up prior to the first execution. 53418c2ecf20Sopenharmony_ci */ 53428c2ecf20Sopenharmony_ci execlists_init_reg_state(vaddr + LRC_STATE_OFFSET, 53438c2ecf20Sopenharmony_ci ce, engine, ring, inhibit); 53448c2ecf20Sopenharmony_ci 53458c2ecf20Sopenharmony_ci __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); 53468c2ecf20Sopenharmony_ci i915_gem_object_unpin_map(ctx_obj); 53478c2ecf20Sopenharmony_ci return 0; 53488c2ecf20Sopenharmony_ci} 53498c2ecf20Sopenharmony_ci 53508c2ecf20Sopenharmony_cistatic struct intel_timeline *pinned_timeline(struct intel_context *ce) 53518c2ecf20Sopenharmony_ci{ 53528c2ecf20Sopenharmony_ci struct intel_timeline *tl = fetch_and_zero(&ce->timeline); 53538c2ecf20Sopenharmony_ci 53548c2ecf20Sopenharmony_ci return intel_timeline_create_from_engine(ce->engine, 53558c2ecf20Sopenharmony_ci page_unmask_bits(tl)); 53568c2ecf20Sopenharmony_ci} 53578c2ecf20Sopenharmony_ci 53588c2ecf20Sopenharmony_cistatic int __execlists_context_alloc(struct intel_context *ce, 53598c2ecf20Sopenharmony_ci struct intel_engine_cs *engine) 53608c2ecf20Sopenharmony_ci{ 53618c2ecf20Sopenharmony_ci struct drm_i915_gem_object *ctx_obj; 53628c2ecf20Sopenharmony_ci struct intel_ring *ring; 53638c2ecf20Sopenharmony_ci struct i915_vma *vma; 53648c2ecf20Sopenharmony_ci u32 context_size; 53658c2ecf20Sopenharmony_ci int ret; 53668c2ecf20Sopenharmony_ci 53678c2ecf20Sopenharmony_ci GEM_BUG_ON(ce->state); 53688c2ecf20Sopenharmony_ci context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); 53698c2ecf20Sopenharmony_ci 53708c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 53718c2ecf20Sopenharmony_ci context_size += I915_GTT_PAGE_SIZE; /* for redzone */ 53728c2ecf20Sopenharmony_ci 53738c2ecf20Sopenharmony_ci if (INTEL_GEN(engine->i915) == 12) { 53748c2ecf20Sopenharmony_ci ce->wa_bb_page = context_size / PAGE_SIZE; 53758c2ecf20Sopenharmony_ci context_size += PAGE_SIZE; 53768c2ecf20Sopenharmony_ci } 53778c2ecf20Sopenharmony_ci 53788c2ecf20Sopenharmony_ci ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); 53798c2ecf20Sopenharmony_ci if (IS_ERR(ctx_obj)) 53808c2ecf20Sopenharmony_ci return PTR_ERR(ctx_obj); 53818c2ecf20Sopenharmony_ci 53828c2ecf20Sopenharmony_ci vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL); 53838c2ecf20Sopenharmony_ci if (IS_ERR(vma)) { 53848c2ecf20Sopenharmony_ci ret = PTR_ERR(vma); 53858c2ecf20Sopenharmony_ci goto error_deref_obj; 53868c2ecf20Sopenharmony_ci } 53878c2ecf20Sopenharmony_ci 53888c2ecf20Sopenharmony_ci if (!page_mask_bits(ce->timeline)) { 53898c2ecf20Sopenharmony_ci struct intel_timeline *tl; 53908c2ecf20Sopenharmony_ci 53918c2ecf20Sopenharmony_ci /* 53928c2ecf20Sopenharmony_ci * Use the static global HWSP for the kernel context, and 53938c2ecf20Sopenharmony_ci * a dynamically allocated cacheline for everyone else. 53948c2ecf20Sopenharmony_ci */ 53958c2ecf20Sopenharmony_ci if (unlikely(ce->timeline)) 53968c2ecf20Sopenharmony_ci tl = pinned_timeline(ce); 53978c2ecf20Sopenharmony_ci else 53988c2ecf20Sopenharmony_ci tl = intel_timeline_create(engine->gt); 53998c2ecf20Sopenharmony_ci if (IS_ERR(tl)) { 54008c2ecf20Sopenharmony_ci ret = PTR_ERR(tl); 54018c2ecf20Sopenharmony_ci goto error_deref_obj; 54028c2ecf20Sopenharmony_ci } 54038c2ecf20Sopenharmony_ci 54048c2ecf20Sopenharmony_ci ce->timeline = tl; 54058c2ecf20Sopenharmony_ci } 54068c2ecf20Sopenharmony_ci 54078c2ecf20Sopenharmony_ci ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); 54088c2ecf20Sopenharmony_ci if (IS_ERR(ring)) { 54098c2ecf20Sopenharmony_ci ret = PTR_ERR(ring); 54108c2ecf20Sopenharmony_ci goto error_deref_obj; 54118c2ecf20Sopenharmony_ci } 54128c2ecf20Sopenharmony_ci 54138c2ecf20Sopenharmony_ci ret = populate_lr_context(ce, ctx_obj, engine, ring); 54148c2ecf20Sopenharmony_ci if (ret) { 54158c2ecf20Sopenharmony_ci drm_dbg(&engine->i915->drm, 54168c2ecf20Sopenharmony_ci "Failed to populate LRC: %d\n", ret); 54178c2ecf20Sopenharmony_ci goto error_ring_free; 54188c2ecf20Sopenharmony_ci } 54198c2ecf20Sopenharmony_ci 54208c2ecf20Sopenharmony_ci ce->ring = ring; 54218c2ecf20Sopenharmony_ci ce->state = vma; 54228c2ecf20Sopenharmony_ci 54238c2ecf20Sopenharmony_ci return 0; 54248c2ecf20Sopenharmony_ci 54258c2ecf20Sopenharmony_cierror_ring_free: 54268c2ecf20Sopenharmony_ci intel_ring_put(ring); 54278c2ecf20Sopenharmony_cierror_deref_obj: 54288c2ecf20Sopenharmony_ci i915_gem_object_put(ctx_obj); 54298c2ecf20Sopenharmony_ci return ret; 54308c2ecf20Sopenharmony_ci} 54318c2ecf20Sopenharmony_ci 54328c2ecf20Sopenharmony_cistatic struct list_head *virtual_queue(struct virtual_engine *ve) 54338c2ecf20Sopenharmony_ci{ 54348c2ecf20Sopenharmony_ci return &ve->base.execlists.default_priolist.requests[0]; 54358c2ecf20Sopenharmony_ci} 54368c2ecf20Sopenharmony_ci 54378c2ecf20Sopenharmony_cistatic void rcu_virtual_context_destroy(struct work_struct *wrk) 54388c2ecf20Sopenharmony_ci{ 54398c2ecf20Sopenharmony_ci struct virtual_engine *ve = 54408c2ecf20Sopenharmony_ci container_of(wrk, typeof(*ve), rcu.work); 54418c2ecf20Sopenharmony_ci unsigned int n; 54428c2ecf20Sopenharmony_ci 54438c2ecf20Sopenharmony_ci GEM_BUG_ON(ve->context.inflight); 54448c2ecf20Sopenharmony_ci 54458c2ecf20Sopenharmony_ci /* Preempt-to-busy may leave a stale request behind. */ 54468c2ecf20Sopenharmony_ci if (unlikely(ve->request)) { 54478c2ecf20Sopenharmony_ci struct i915_request *old; 54488c2ecf20Sopenharmony_ci 54498c2ecf20Sopenharmony_ci spin_lock_irq(&ve->base.active.lock); 54508c2ecf20Sopenharmony_ci 54518c2ecf20Sopenharmony_ci old = fetch_and_zero(&ve->request); 54528c2ecf20Sopenharmony_ci if (old) { 54538c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_request_completed(old)); 54548c2ecf20Sopenharmony_ci __i915_request_submit(old); 54558c2ecf20Sopenharmony_ci i915_request_put(old); 54568c2ecf20Sopenharmony_ci } 54578c2ecf20Sopenharmony_ci 54588c2ecf20Sopenharmony_ci spin_unlock_irq(&ve->base.active.lock); 54598c2ecf20Sopenharmony_ci } 54608c2ecf20Sopenharmony_ci 54618c2ecf20Sopenharmony_ci /* 54628c2ecf20Sopenharmony_ci * Flush the tasklet in case it is still running on another core. 54638c2ecf20Sopenharmony_ci * 54648c2ecf20Sopenharmony_ci * This needs to be done before we remove ourselves from the siblings' 54658c2ecf20Sopenharmony_ci * rbtrees as in the case it is running in parallel, it may reinsert 54668c2ecf20Sopenharmony_ci * the rb_node into a sibling. 54678c2ecf20Sopenharmony_ci */ 54688c2ecf20Sopenharmony_ci tasklet_kill(&ve->base.execlists.tasklet); 54698c2ecf20Sopenharmony_ci 54708c2ecf20Sopenharmony_ci /* Decouple ourselves from the siblings, no more access allowed. */ 54718c2ecf20Sopenharmony_ci for (n = 0; n < ve->num_siblings; n++) { 54728c2ecf20Sopenharmony_ci struct intel_engine_cs *sibling = ve->siblings[n]; 54738c2ecf20Sopenharmony_ci struct rb_node *node = &ve->nodes[sibling->id].rb; 54748c2ecf20Sopenharmony_ci 54758c2ecf20Sopenharmony_ci if (RB_EMPTY_NODE(node)) 54768c2ecf20Sopenharmony_ci continue; 54778c2ecf20Sopenharmony_ci 54788c2ecf20Sopenharmony_ci spin_lock_irq(&sibling->active.lock); 54798c2ecf20Sopenharmony_ci 54808c2ecf20Sopenharmony_ci /* Detachment is lazily performed in the execlists tasklet */ 54818c2ecf20Sopenharmony_ci if (!RB_EMPTY_NODE(node)) 54828c2ecf20Sopenharmony_ci rb_erase_cached(node, &sibling->execlists.virtual); 54838c2ecf20Sopenharmony_ci 54848c2ecf20Sopenharmony_ci spin_unlock_irq(&sibling->active.lock); 54858c2ecf20Sopenharmony_ci } 54868c2ecf20Sopenharmony_ci GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); 54878c2ecf20Sopenharmony_ci GEM_BUG_ON(!list_empty(virtual_queue(ve))); 54888c2ecf20Sopenharmony_ci 54898c2ecf20Sopenharmony_ci if (ve->context.state) 54908c2ecf20Sopenharmony_ci __execlists_context_fini(&ve->context); 54918c2ecf20Sopenharmony_ci intel_context_fini(&ve->context); 54928c2ecf20Sopenharmony_ci 54938c2ecf20Sopenharmony_ci intel_breadcrumbs_free(ve->base.breadcrumbs); 54948c2ecf20Sopenharmony_ci intel_engine_free_request_pool(&ve->base); 54958c2ecf20Sopenharmony_ci 54968c2ecf20Sopenharmony_ci kfree(ve->bonds); 54978c2ecf20Sopenharmony_ci kfree(ve); 54988c2ecf20Sopenharmony_ci} 54998c2ecf20Sopenharmony_ci 55008c2ecf20Sopenharmony_cistatic void virtual_context_destroy(struct kref *kref) 55018c2ecf20Sopenharmony_ci{ 55028c2ecf20Sopenharmony_ci struct virtual_engine *ve = 55038c2ecf20Sopenharmony_ci container_of(kref, typeof(*ve), context.ref); 55048c2ecf20Sopenharmony_ci 55058c2ecf20Sopenharmony_ci GEM_BUG_ON(!list_empty(&ve->context.signals)); 55068c2ecf20Sopenharmony_ci 55078c2ecf20Sopenharmony_ci /* 55088c2ecf20Sopenharmony_ci * When destroying the virtual engine, we have to be aware that 55098c2ecf20Sopenharmony_ci * it may still be in use from an hardirq/softirq context causing 55108c2ecf20Sopenharmony_ci * the resubmission of a completed request (background completion 55118c2ecf20Sopenharmony_ci * due to preempt-to-busy). Before we can free the engine, we need 55128c2ecf20Sopenharmony_ci * to flush the submission code and tasklets that are still potentially 55138c2ecf20Sopenharmony_ci * accessing the engine. Flushing the tasklets requires process context, 55148c2ecf20Sopenharmony_ci * and since we can guard the resubmit onto the engine with an RCU read 55158c2ecf20Sopenharmony_ci * lock, we can delegate the free of the engine to an RCU worker. 55168c2ecf20Sopenharmony_ci */ 55178c2ecf20Sopenharmony_ci INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); 55188c2ecf20Sopenharmony_ci queue_rcu_work(system_wq, &ve->rcu); 55198c2ecf20Sopenharmony_ci} 55208c2ecf20Sopenharmony_ci 55218c2ecf20Sopenharmony_cistatic void virtual_engine_initial_hint(struct virtual_engine *ve) 55228c2ecf20Sopenharmony_ci{ 55238c2ecf20Sopenharmony_ci int swp; 55248c2ecf20Sopenharmony_ci 55258c2ecf20Sopenharmony_ci /* 55268c2ecf20Sopenharmony_ci * Pick a random sibling on starting to help spread the load around. 55278c2ecf20Sopenharmony_ci * 55288c2ecf20Sopenharmony_ci * New contexts are typically created with exactly the same order 55298c2ecf20Sopenharmony_ci * of siblings, and often started in batches. Due to the way we iterate 55308c2ecf20Sopenharmony_ci * the array of sibling when submitting requests, sibling[0] is 55318c2ecf20Sopenharmony_ci * prioritised for dequeuing. If we make sure that sibling[0] is fairly 55328c2ecf20Sopenharmony_ci * randomised across the system, we also help spread the load by the 55338c2ecf20Sopenharmony_ci * first engine we inspect being different each time. 55348c2ecf20Sopenharmony_ci * 55358c2ecf20Sopenharmony_ci * NB This does not force us to execute on this engine, it will just 55368c2ecf20Sopenharmony_ci * typically be the first we inspect for submission. 55378c2ecf20Sopenharmony_ci */ 55388c2ecf20Sopenharmony_ci swp = prandom_u32_max(ve->num_siblings); 55398c2ecf20Sopenharmony_ci if (swp) 55408c2ecf20Sopenharmony_ci swap(ve->siblings[swp], ve->siblings[0]); 55418c2ecf20Sopenharmony_ci} 55428c2ecf20Sopenharmony_ci 55438c2ecf20Sopenharmony_cistatic int virtual_context_alloc(struct intel_context *ce) 55448c2ecf20Sopenharmony_ci{ 55458c2ecf20Sopenharmony_ci struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 55468c2ecf20Sopenharmony_ci 55478c2ecf20Sopenharmony_ci return __execlists_context_alloc(ce, ve->siblings[0]); 55488c2ecf20Sopenharmony_ci} 55498c2ecf20Sopenharmony_ci 55508c2ecf20Sopenharmony_cistatic int virtual_context_pin(struct intel_context *ce, void *vaddr) 55518c2ecf20Sopenharmony_ci{ 55528c2ecf20Sopenharmony_ci struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 55538c2ecf20Sopenharmony_ci 55548c2ecf20Sopenharmony_ci /* Note: we must use a real engine class for setting up reg state */ 55558c2ecf20Sopenharmony_ci return __execlists_context_pin(ce, ve->siblings[0], vaddr); 55568c2ecf20Sopenharmony_ci} 55578c2ecf20Sopenharmony_ci 55588c2ecf20Sopenharmony_cistatic void virtual_context_enter(struct intel_context *ce) 55598c2ecf20Sopenharmony_ci{ 55608c2ecf20Sopenharmony_ci struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 55618c2ecf20Sopenharmony_ci unsigned int n; 55628c2ecf20Sopenharmony_ci 55638c2ecf20Sopenharmony_ci for (n = 0; n < ve->num_siblings; n++) 55648c2ecf20Sopenharmony_ci intel_engine_pm_get(ve->siblings[n]); 55658c2ecf20Sopenharmony_ci 55668c2ecf20Sopenharmony_ci intel_timeline_enter(ce->timeline); 55678c2ecf20Sopenharmony_ci} 55688c2ecf20Sopenharmony_ci 55698c2ecf20Sopenharmony_cistatic void virtual_context_exit(struct intel_context *ce) 55708c2ecf20Sopenharmony_ci{ 55718c2ecf20Sopenharmony_ci struct virtual_engine *ve = container_of(ce, typeof(*ve), context); 55728c2ecf20Sopenharmony_ci unsigned int n; 55738c2ecf20Sopenharmony_ci 55748c2ecf20Sopenharmony_ci intel_timeline_exit(ce->timeline); 55758c2ecf20Sopenharmony_ci 55768c2ecf20Sopenharmony_ci for (n = 0; n < ve->num_siblings; n++) 55778c2ecf20Sopenharmony_ci intel_engine_pm_put(ve->siblings[n]); 55788c2ecf20Sopenharmony_ci} 55798c2ecf20Sopenharmony_ci 55808c2ecf20Sopenharmony_cistatic const struct intel_context_ops virtual_context_ops = { 55818c2ecf20Sopenharmony_ci .alloc = virtual_context_alloc, 55828c2ecf20Sopenharmony_ci 55838c2ecf20Sopenharmony_ci .pre_pin = execlists_context_pre_pin, 55848c2ecf20Sopenharmony_ci .pin = virtual_context_pin, 55858c2ecf20Sopenharmony_ci .unpin = execlists_context_unpin, 55868c2ecf20Sopenharmony_ci .post_unpin = execlists_context_post_unpin, 55878c2ecf20Sopenharmony_ci 55888c2ecf20Sopenharmony_ci .enter = virtual_context_enter, 55898c2ecf20Sopenharmony_ci .exit = virtual_context_exit, 55908c2ecf20Sopenharmony_ci 55918c2ecf20Sopenharmony_ci .destroy = virtual_context_destroy, 55928c2ecf20Sopenharmony_ci}; 55938c2ecf20Sopenharmony_ci 55948c2ecf20Sopenharmony_cistatic intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) 55958c2ecf20Sopenharmony_ci{ 55968c2ecf20Sopenharmony_ci struct i915_request *rq; 55978c2ecf20Sopenharmony_ci intel_engine_mask_t mask; 55988c2ecf20Sopenharmony_ci 55998c2ecf20Sopenharmony_ci rq = READ_ONCE(ve->request); 56008c2ecf20Sopenharmony_ci if (!rq) 56018c2ecf20Sopenharmony_ci return 0; 56028c2ecf20Sopenharmony_ci 56038c2ecf20Sopenharmony_ci /* The rq is ready for submission; rq->execution_mask is now stable. */ 56048c2ecf20Sopenharmony_ci mask = rq->execution_mask; 56058c2ecf20Sopenharmony_ci if (unlikely(!mask)) { 56068c2ecf20Sopenharmony_ci /* Invalid selection, submit to a random engine in error */ 56078c2ecf20Sopenharmony_ci i915_request_set_error_once(rq, -ENODEV); 56088c2ecf20Sopenharmony_ci mask = ve->siblings[0]->mask; 56098c2ecf20Sopenharmony_ci } 56108c2ecf20Sopenharmony_ci 56118c2ecf20Sopenharmony_ci ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", 56128c2ecf20Sopenharmony_ci rq->fence.context, rq->fence.seqno, 56138c2ecf20Sopenharmony_ci mask, ve->base.execlists.queue_priority_hint); 56148c2ecf20Sopenharmony_ci 56158c2ecf20Sopenharmony_ci return mask; 56168c2ecf20Sopenharmony_ci} 56178c2ecf20Sopenharmony_ci 56188c2ecf20Sopenharmony_cistatic void virtual_submission_tasklet(unsigned long data) 56198c2ecf20Sopenharmony_ci{ 56208c2ecf20Sopenharmony_ci struct virtual_engine * const ve = (struct virtual_engine *)data; 56218c2ecf20Sopenharmony_ci const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); 56228c2ecf20Sopenharmony_ci intel_engine_mask_t mask; 56238c2ecf20Sopenharmony_ci unsigned int n; 56248c2ecf20Sopenharmony_ci 56258c2ecf20Sopenharmony_ci rcu_read_lock(); 56268c2ecf20Sopenharmony_ci mask = virtual_submission_mask(ve); 56278c2ecf20Sopenharmony_ci rcu_read_unlock(); 56288c2ecf20Sopenharmony_ci if (unlikely(!mask)) 56298c2ecf20Sopenharmony_ci return; 56308c2ecf20Sopenharmony_ci 56318c2ecf20Sopenharmony_ci local_irq_disable(); 56328c2ecf20Sopenharmony_ci for (n = 0; n < ve->num_siblings; n++) { 56338c2ecf20Sopenharmony_ci struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]); 56348c2ecf20Sopenharmony_ci struct ve_node * const node = &ve->nodes[sibling->id]; 56358c2ecf20Sopenharmony_ci struct rb_node **parent, *rb; 56368c2ecf20Sopenharmony_ci bool first; 56378c2ecf20Sopenharmony_ci 56388c2ecf20Sopenharmony_ci if (!READ_ONCE(ve->request)) 56398c2ecf20Sopenharmony_ci break; /* already handled by a sibling's tasklet */ 56408c2ecf20Sopenharmony_ci 56418c2ecf20Sopenharmony_ci if (unlikely(!(mask & sibling->mask))) { 56428c2ecf20Sopenharmony_ci if (!RB_EMPTY_NODE(&node->rb)) { 56438c2ecf20Sopenharmony_ci spin_lock(&sibling->active.lock); 56448c2ecf20Sopenharmony_ci rb_erase_cached(&node->rb, 56458c2ecf20Sopenharmony_ci &sibling->execlists.virtual); 56468c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&node->rb); 56478c2ecf20Sopenharmony_ci spin_unlock(&sibling->active.lock); 56488c2ecf20Sopenharmony_ci } 56498c2ecf20Sopenharmony_ci continue; 56508c2ecf20Sopenharmony_ci } 56518c2ecf20Sopenharmony_ci 56528c2ecf20Sopenharmony_ci spin_lock(&sibling->active.lock); 56538c2ecf20Sopenharmony_ci 56548c2ecf20Sopenharmony_ci if (!RB_EMPTY_NODE(&node->rb)) { 56558c2ecf20Sopenharmony_ci /* 56568c2ecf20Sopenharmony_ci * Cheat and avoid rebalancing the tree if we can 56578c2ecf20Sopenharmony_ci * reuse this node in situ. 56588c2ecf20Sopenharmony_ci */ 56598c2ecf20Sopenharmony_ci first = rb_first_cached(&sibling->execlists.virtual) == 56608c2ecf20Sopenharmony_ci &node->rb; 56618c2ecf20Sopenharmony_ci if (prio == node->prio || (prio > node->prio && first)) 56628c2ecf20Sopenharmony_ci goto submit_engine; 56638c2ecf20Sopenharmony_ci 56648c2ecf20Sopenharmony_ci rb_erase_cached(&node->rb, &sibling->execlists.virtual); 56658c2ecf20Sopenharmony_ci } 56668c2ecf20Sopenharmony_ci 56678c2ecf20Sopenharmony_ci rb = NULL; 56688c2ecf20Sopenharmony_ci first = true; 56698c2ecf20Sopenharmony_ci parent = &sibling->execlists.virtual.rb_root.rb_node; 56708c2ecf20Sopenharmony_ci while (*parent) { 56718c2ecf20Sopenharmony_ci struct ve_node *other; 56728c2ecf20Sopenharmony_ci 56738c2ecf20Sopenharmony_ci rb = *parent; 56748c2ecf20Sopenharmony_ci other = rb_entry(rb, typeof(*other), rb); 56758c2ecf20Sopenharmony_ci if (prio > other->prio) { 56768c2ecf20Sopenharmony_ci parent = &rb->rb_left; 56778c2ecf20Sopenharmony_ci } else { 56788c2ecf20Sopenharmony_ci parent = &rb->rb_right; 56798c2ecf20Sopenharmony_ci first = false; 56808c2ecf20Sopenharmony_ci } 56818c2ecf20Sopenharmony_ci } 56828c2ecf20Sopenharmony_ci 56838c2ecf20Sopenharmony_ci rb_link_node(&node->rb, rb, parent); 56848c2ecf20Sopenharmony_ci rb_insert_color_cached(&node->rb, 56858c2ecf20Sopenharmony_ci &sibling->execlists.virtual, 56868c2ecf20Sopenharmony_ci first); 56878c2ecf20Sopenharmony_ci 56888c2ecf20Sopenharmony_cisubmit_engine: 56898c2ecf20Sopenharmony_ci GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); 56908c2ecf20Sopenharmony_ci node->prio = prio; 56918c2ecf20Sopenharmony_ci if (first && prio > sibling->execlists.queue_priority_hint) 56928c2ecf20Sopenharmony_ci tasklet_hi_schedule(&sibling->execlists.tasklet); 56938c2ecf20Sopenharmony_ci 56948c2ecf20Sopenharmony_ci spin_unlock(&sibling->active.lock); 56958c2ecf20Sopenharmony_ci } 56968c2ecf20Sopenharmony_ci local_irq_enable(); 56978c2ecf20Sopenharmony_ci} 56988c2ecf20Sopenharmony_ci 56998c2ecf20Sopenharmony_cistatic void virtual_submit_request(struct i915_request *rq) 57008c2ecf20Sopenharmony_ci{ 57018c2ecf20Sopenharmony_ci struct virtual_engine *ve = to_virtual_engine(rq->engine); 57028c2ecf20Sopenharmony_ci struct i915_request *old; 57038c2ecf20Sopenharmony_ci unsigned long flags; 57048c2ecf20Sopenharmony_ci 57058c2ecf20Sopenharmony_ci ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", 57068c2ecf20Sopenharmony_ci rq->fence.context, 57078c2ecf20Sopenharmony_ci rq->fence.seqno); 57088c2ecf20Sopenharmony_ci 57098c2ecf20Sopenharmony_ci GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); 57108c2ecf20Sopenharmony_ci 57118c2ecf20Sopenharmony_ci spin_lock_irqsave(&ve->base.active.lock, flags); 57128c2ecf20Sopenharmony_ci 57138c2ecf20Sopenharmony_ci old = ve->request; 57148c2ecf20Sopenharmony_ci if (old) { /* background completion event from preempt-to-busy */ 57158c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_request_completed(old)); 57168c2ecf20Sopenharmony_ci __i915_request_submit(old); 57178c2ecf20Sopenharmony_ci i915_request_put(old); 57188c2ecf20Sopenharmony_ci } 57198c2ecf20Sopenharmony_ci 57208c2ecf20Sopenharmony_ci if (i915_request_completed(rq)) { 57218c2ecf20Sopenharmony_ci __i915_request_submit(rq); 57228c2ecf20Sopenharmony_ci 57238c2ecf20Sopenharmony_ci ve->base.execlists.queue_priority_hint = INT_MIN; 57248c2ecf20Sopenharmony_ci ve->request = NULL; 57258c2ecf20Sopenharmony_ci } else { 57268c2ecf20Sopenharmony_ci ve->base.execlists.queue_priority_hint = rq_prio(rq); 57278c2ecf20Sopenharmony_ci ve->request = i915_request_get(rq); 57288c2ecf20Sopenharmony_ci 57298c2ecf20Sopenharmony_ci GEM_BUG_ON(!list_empty(virtual_queue(ve))); 57308c2ecf20Sopenharmony_ci list_move_tail(&rq->sched.link, virtual_queue(ve)); 57318c2ecf20Sopenharmony_ci 57328c2ecf20Sopenharmony_ci tasklet_hi_schedule(&ve->base.execlists.tasklet); 57338c2ecf20Sopenharmony_ci } 57348c2ecf20Sopenharmony_ci 57358c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&ve->base.active.lock, flags); 57368c2ecf20Sopenharmony_ci} 57378c2ecf20Sopenharmony_ci 57388c2ecf20Sopenharmony_cistatic struct ve_bond * 57398c2ecf20Sopenharmony_civirtual_find_bond(struct virtual_engine *ve, 57408c2ecf20Sopenharmony_ci const struct intel_engine_cs *master) 57418c2ecf20Sopenharmony_ci{ 57428c2ecf20Sopenharmony_ci int i; 57438c2ecf20Sopenharmony_ci 57448c2ecf20Sopenharmony_ci for (i = 0; i < ve->num_bonds; i++) { 57458c2ecf20Sopenharmony_ci if (ve->bonds[i].master == master) 57468c2ecf20Sopenharmony_ci return &ve->bonds[i]; 57478c2ecf20Sopenharmony_ci } 57488c2ecf20Sopenharmony_ci 57498c2ecf20Sopenharmony_ci return NULL; 57508c2ecf20Sopenharmony_ci} 57518c2ecf20Sopenharmony_ci 57528c2ecf20Sopenharmony_cistatic void 57538c2ecf20Sopenharmony_civirtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) 57548c2ecf20Sopenharmony_ci{ 57558c2ecf20Sopenharmony_ci struct virtual_engine *ve = to_virtual_engine(rq->engine); 57568c2ecf20Sopenharmony_ci intel_engine_mask_t allowed, exec; 57578c2ecf20Sopenharmony_ci struct ve_bond *bond; 57588c2ecf20Sopenharmony_ci 57598c2ecf20Sopenharmony_ci allowed = ~to_request(signal)->engine->mask; 57608c2ecf20Sopenharmony_ci 57618c2ecf20Sopenharmony_ci bond = virtual_find_bond(ve, to_request(signal)->engine); 57628c2ecf20Sopenharmony_ci if (bond) 57638c2ecf20Sopenharmony_ci allowed &= bond->sibling_mask; 57648c2ecf20Sopenharmony_ci 57658c2ecf20Sopenharmony_ci /* Restrict the bonded request to run on only the available engines */ 57668c2ecf20Sopenharmony_ci exec = READ_ONCE(rq->execution_mask); 57678c2ecf20Sopenharmony_ci while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) 57688c2ecf20Sopenharmony_ci ; 57698c2ecf20Sopenharmony_ci 57708c2ecf20Sopenharmony_ci /* Prevent the master from being re-run on the bonded engines */ 57718c2ecf20Sopenharmony_ci to_request(signal)->execution_mask &= ~allowed; 57728c2ecf20Sopenharmony_ci} 57738c2ecf20Sopenharmony_ci 57748c2ecf20Sopenharmony_cistruct intel_context * 57758c2ecf20Sopenharmony_ciintel_execlists_create_virtual(struct intel_engine_cs **siblings, 57768c2ecf20Sopenharmony_ci unsigned int count) 57778c2ecf20Sopenharmony_ci{ 57788c2ecf20Sopenharmony_ci struct virtual_engine *ve; 57798c2ecf20Sopenharmony_ci unsigned int n; 57808c2ecf20Sopenharmony_ci int err; 57818c2ecf20Sopenharmony_ci 57828c2ecf20Sopenharmony_ci if (count == 0) 57838c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 57848c2ecf20Sopenharmony_ci 57858c2ecf20Sopenharmony_ci if (count == 1) 57868c2ecf20Sopenharmony_ci return intel_context_create(siblings[0]); 57878c2ecf20Sopenharmony_ci 57888c2ecf20Sopenharmony_ci ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); 57898c2ecf20Sopenharmony_ci if (!ve) 57908c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 57918c2ecf20Sopenharmony_ci 57928c2ecf20Sopenharmony_ci ve->base.i915 = siblings[0]->i915; 57938c2ecf20Sopenharmony_ci ve->base.gt = siblings[0]->gt; 57948c2ecf20Sopenharmony_ci ve->base.uncore = siblings[0]->uncore; 57958c2ecf20Sopenharmony_ci ve->base.id = -1; 57968c2ecf20Sopenharmony_ci 57978c2ecf20Sopenharmony_ci ve->base.class = OTHER_CLASS; 57988c2ecf20Sopenharmony_ci ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 57998c2ecf20Sopenharmony_ci ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 58008c2ecf20Sopenharmony_ci ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 58018c2ecf20Sopenharmony_ci 58028c2ecf20Sopenharmony_ci /* 58038c2ecf20Sopenharmony_ci * The decision on whether to submit a request using semaphores 58048c2ecf20Sopenharmony_ci * depends on the saturated state of the engine. We only compute 58058c2ecf20Sopenharmony_ci * this during HW submission of the request, and we need for this 58068c2ecf20Sopenharmony_ci * state to be globally applied to all requests being submitted 58078c2ecf20Sopenharmony_ci * to this engine. Virtual engines encompass more than one physical 58088c2ecf20Sopenharmony_ci * engine and so we cannot accurately tell in advance if one of those 58098c2ecf20Sopenharmony_ci * engines is already saturated and so cannot afford to use a semaphore 58108c2ecf20Sopenharmony_ci * and be pessimized in priority for doing so -- if we are the only 58118c2ecf20Sopenharmony_ci * context using semaphores after all other clients have stopped, we 58128c2ecf20Sopenharmony_ci * will be starved on the saturated system. Such a global switch for 58138c2ecf20Sopenharmony_ci * semaphores is less than ideal, but alas is the current compromise. 58148c2ecf20Sopenharmony_ci */ 58158c2ecf20Sopenharmony_ci ve->base.saturated = ALL_ENGINES; 58168c2ecf20Sopenharmony_ci 58178c2ecf20Sopenharmony_ci snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 58188c2ecf20Sopenharmony_ci 58198c2ecf20Sopenharmony_ci intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); 58208c2ecf20Sopenharmony_ci intel_engine_init_execlists(&ve->base); 58218c2ecf20Sopenharmony_ci 58228c2ecf20Sopenharmony_ci ve->base.cops = &virtual_context_ops; 58238c2ecf20Sopenharmony_ci ve->base.request_alloc = execlists_request_alloc; 58248c2ecf20Sopenharmony_ci 58258c2ecf20Sopenharmony_ci ve->base.schedule = i915_schedule; 58268c2ecf20Sopenharmony_ci ve->base.submit_request = virtual_submit_request; 58278c2ecf20Sopenharmony_ci ve->base.bond_execute = virtual_bond_execute; 58288c2ecf20Sopenharmony_ci 58298c2ecf20Sopenharmony_ci INIT_LIST_HEAD(virtual_queue(ve)); 58308c2ecf20Sopenharmony_ci ve->base.execlists.queue_priority_hint = INT_MIN; 58318c2ecf20Sopenharmony_ci tasklet_init(&ve->base.execlists.tasklet, 58328c2ecf20Sopenharmony_ci virtual_submission_tasklet, 58338c2ecf20Sopenharmony_ci (unsigned long)ve); 58348c2ecf20Sopenharmony_ci 58358c2ecf20Sopenharmony_ci intel_context_init(&ve->context, &ve->base); 58368c2ecf20Sopenharmony_ci 58378c2ecf20Sopenharmony_ci ve->base.breadcrumbs = intel_breadcrumbs_create(NULL); 58388c2ecf20Sopenharmony_ci if (!ve->base.breadcrumbs) { 58398c2ecf20Sopenharmony_ci err = -ENOMEM; 58408c2ecf20Sopenharmony_ci goto err_put; 58418c2ecf20Sopenharmony_ci } 58428c2ecf20Sopenharmony_ci 58438c2ecf20Sopenharmony_ci for (n = 0; n < count; n++) { 58448c2ecf20Sopenharmony_ci struct intel_engine_cs *sibling = siblings[n]; 58458c2ecf20Sopenharmony_ci 58468c2ecf20Sopenharmony_ci GEM_BUG_ON(!is_power_of_2(sibling->mask)); 58478c2ecf20Sopenharmony_ci if (sibling->mask & ve->base.mask) { 58488c2ecf20Sopenharmony_ci DRM_DEBUG("duplicate %s entry in load balancer\n", 58498c2ecf20Sopenharmony_ci sibling->name); 58508c2ecf20Sopenharmony_ci err = -EINVAL; 58518c2ecf20Sopenharmony_ci goto err_put; 58528c2ecf20Sopenharmony_ci } 58538c2ecf20Sopenharmony_ci 58548c2ecf20Sopenharmony_ci /* 58558c2ecf20Sopenharmony_ci * The virtual engine implementation is tightly coupled to 58568c2ecf20Sopenharmony_ci * the execlists backend -- we push out request directly 58578c2ecf20Sopenharmony_ci * into a tree inside each physical engine. We could support 58588c2ecf20Sopenharmony_ci * layering if we handle cloning of the requests and 58598c2ecf20Sopenharmony_ci * submitting a copy into each backend. 58608c2ecf20Sopenharmony_ci */ 58618c2ecf20Sopenharmony_ci if (sibling->execlists.tasklet.func != 58628c2ecf20Sopenharmony_ci execlists_submission_tasklet) { 58638c2ecf20Sopenharmony_ci err = -ENODEV; 58648c2ecf20Sopenharmony_ci goto err_put; 58658c2ecf20Sopenharmony_ci } 58668c2ecf20Sopenharmony_ci 58678c2ecf20Sopenharmony_ci GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); 58688c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); 58698c2ecf20Sopenharmony_ci 58708c2ecf20Sopenharmony_ci ve->siblings[ve->num_siblings++] = sibling; 58718c2ecf20Sopenharmony_ci ve->base.mask |= sibling->mask; 58728c2ecf20Sopenharmony_ci 58738c2ecf20Sopenharmony_ci /* 58748c2ecf20Sopenharmony_ci * All physical engines must be compatible for their emission 58758c2ecf20Sopenharmony_ci * functions (as we build the instructions during request 58768c2ecf20Sopenharmony_ci * construction and do not alter them before submission 58778c2ecf20Sopenharmony_ci * on the physical engine). We use the engine class as a guide 58788c2ecf20Sopenharmony_ci * here, although that could be refined. 58798c2ecf20Sopenharmony_ci */ 58808c2ecf20Sopenharmony_ci if (ve->base.class != OTHER_CLASS) { 58818c2ecf20Sopenharmony_ci if (ve->base.class != sibling->class) { 58828c2ecf20Sopenharmony_ci DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", 58838c2ecf20Sopenharmony_ci sibling->class, ve->base.class); 58848c2ecf20Sopenharmony_ci err = -EINVAL; 58858c2ecf20Sopenharmony_ci goto err_put; 58868c2ecf20Sopenharmony_ci } 58878c2ecf20Sopenharmony_ci continue; 58888c2ecf20Sopenharmony_ci } 58898c2ecf20Sopenharmony_ci 58908c2ecf20Sopenharmony_ci ve->base.class = sibling->class; 58918c2ecf20Sopenharmony_ci ve->base.uabi_class = sibling->uabi_class; 58928c2ecf20Sopenharmony_ci snprintf(ve->base.name, sizeof(ve->base.name), 58938c2ecf20Sopenharmony_ci "v%dx%d", ve->base.class, count); 58948c2ecf20Sopenharmony_ci ve->base.context_size = sibling->context_size; 58958c2ecf20Sopenharmony_ci 58968c2ecf20Sopenharmony_ci ve->base.emit_bb_start = sibling->emit_bb_start; 58978c2ecf20Sopenharmony_ci ve->base.emit_flush = sibling->emit_flush; 58988c2ecf20Sopenharmony_ci ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; 58998c2ecf20Sopenharmony_ci ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; 59008c2ecf20Sopenharmony_ci ve->base.emit_fini_breadcrumb_dw = 59018c2ecf20Sopenharmony_ci sibling->emit_fini_breadcrumb_dw; 59028c2ecf20Sopenharmony_ci 59038c2ecf20Sopenharmony_ci ve->base.flags = sibling->flags; 59048c2ecf20Sopenharmony_ci } 59058c2ecf20Sopenharmony_ci 59068c2ecf20Sopenharmony_ci ve->base.flags |= I915_ENGINE_IS_VIRTUAL; 59078c2ecf20Sopenharmony_ci 59088c2ecf20Sopenharmony_ci virtual_engine_initial_hint(ve); 59098c2ecf20Sopenharmony_ci return &ve->context; 59108c2ecf20Sopenharmony_ci 59118c2ecf20Sopenharmony_cierr_put: 59128c2ecf20Sopenharmony_ci intel_context_put(&ve->context); 59138c2ecf20Sopenharmony_ci return ERR_PTR(err); 59148c2ecf20Sopenharmony_ci} 59158c2ecf20Sopenharmony_ci 59168c2ecf20Sopenharmony_cistruct intel_context * 59178c2ecf20Sopenharmony_ciintel_execlists_clone_virtual(struct intel_engine_cs *src) 59188c2ecf20Sopenharmony_ci{ 59198c2ecf20Sopenharmony_ci struct virtual_engine *se = to_virtual_engine(src); 59208c2ecf20Sopenharmony_ci struct intel_context *dst; 59218c2ecf20Sopenharmony_ci 59228c2ecf20Sopenharmony_ci dst = intel_execlists_create_virtual(se->siblings, 59238c2ecf20Sopenharmony_ci se->num_siblings); 59248c2ecf20Sopenharmony_ci if (IS_ERR(dst)) 59258c2ecf20Sopenharmony_ci return dst; 59268c2ecf20Sopenharmony_ci 59278c2ecf20Sopenharmony_ci if (se->num_bonds) { 59288c2ecf20Sopenharmony_ci struct virtual_engine *de = to_virtual_engine(dst->engine); 59298c2ecf20Sopenharmony_ci 59308c2ecf20Sopenharmony_ci de->bonds = kmemdup(se->bonds, 59318c2ecf20Sopenharmony_ci sizeof(*se->bonds) * se->num_bonds, 59328c2ecf20Sopenharmony_ci GFP_KERNEL); 59338c2ecf20Sopenharmony_ci if (!de->bonds) { 59348c2ecf20Sopenharmony_ci intel_context_put(dst); 59358c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 59368c2ecf20Sopenharmony_ci } 59378c2ecf20Sopenharmony_ci 59388c2ecf20Sopenharmony_ci de->num_bonds = se->num_bonds; 59398c2ecf20Sopenharmony_ci } 59408c2ecf20Sopenharmony_ci 59418c2ecf20Sopenharmony_ci return dst; 59428c2ecf20Sopenharmony_ci} 59438c2ecf20Sopenharmony_ci 59448c2ecf20Sopenharmony_ciint intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, 59458c2ecf20Sopenharmony_ci const struct intel_engine_cs *master, 59468c2ecf20Sopenharmony_ci const struct intel_engine_cs *sibling) 59478c2ecf20Sopenharmony_ci{ 59488c2ecf20Sopenharmony_ci struct virtual_engine *ve = to_virtual_engine(engine); 59498c2ecf20Sopenharmony_ci struct ve_bond *bond; 59508c2ecf20Sopenharmony_ci int n; 59518c2ecf20Sopenharmony_ci 59528c2ecf20Sopenharmony_ci /* Sanity check the sibling is part of the virtual engine */ 59538c2ecf20Sopenharmony_ci for (n = 0; n < ve->num_siblings; n++) 59548c2ecf20Sopenharmony_ci if (sibling == ve->siblings[n]) 59558c2ecf20Sopenharmony_ci break; 59568c2ecf20Sopenharmony_ci if (n == ve->num_siblings) 59578c2ecf20Sopenharmony_ci return -EINVAL; 59588c2ecf20Sopenharmony_ci 59598c2ecf20Sopenharmony_ci bond = virtual_find_bond(ve, master); 59608c2ecf20Sopenharmony_ci if (bond) { 59618c2ecf20Sopenharmony_ci bond->sibling_mask |= sibling->mask; 59628c2ecf20Sopenharmony_ci return 0; 59638c2ecf20Sopenharmony_ci } 59648c2ecf20Sopenharmony_ci 59658c2ecf20Sopenharmony_ci bond = krealloc(ve->bonds, 59668c2ecf20Sopenharmony_ci sizeof(*bond) * (ve->num_bonds + 1), 59678c2ecf20Sopenharmony_ci GFP_KERNEL); 59688c2ecf20Sopenharmony_ci if (!bond) 59698c2ecf20Sopenharmony_ci return -ENOMEM; 59708c2ecf20Sopenharmony_ci 59718c2ecf20Sopenharmony_ci bond[ve->num_bonds].master = master; 59728c2ecf20Sopenharmony_ci bond[ve->num_bonds].sibling_mask = sibling->mask; 59738c2ecf20Sopenharmony_ci 59748c2ecf20Sopenharmony_ci ve->bonds = bond; 59758c2ecf20Sopenharmony_ci ve->num_bonds++; 59768c2ecf20Sopenharmony_ci 59778c2ecf20Sopenharmony_ci return 0; 59788c2ecf20Sopenharmony_ci} 59798c2ecf20Sopenharmony_ci 59808c2ecf20Sopenharmony_cistruct intel_engine_cs * 59818c2ecf20Sopenharmony_ciintel_virtual_engine_get_sibling(struct intel_engine_cs *engine, 59828c2ecf20Sopenharmony_ci unsigned int sibling) 59838c2ecf20Sopenharmony_ci{ 59848c2ecf20Sopenharmony_ci struct virtual_engine *ve = to_virtual_engine(engine); 59858c2ecf20Sopenharmony_ci 59868c2ecf20Sopenharmony_ci if (sibling >= ve->num_siblings) 59878c2ecf20Sopenharmony_ci return NULL; 59888c2ecf20Sopenharmony_ci 59898c2ecf20Sopenharmony_ci return ve->siblings[sibling]; 59908c2ecf20Sopenharmony_ci} 59918c2ecf20Sopenharmony_ci 59928c2ecf20Sopenharmony_civoid intel_execlists_show_requests(struct intel_engine_cs *engine, 59938c2ecf20Sopenharmony_ci struct drm_printer *m, 59948c2ecf20Sopenharmony_ci void (*show_request)(struct drm_printer *m, 59958c2ecf20Sopenharmony_ci struct i915_request *rq, 59968c2ecf20Sopenharmony_ci const char *prefix), 59978c2ecf20Sopenharmony_ci unsigned int max) 59988c2ecf20Sopenharmony_ci{ 59998c2ecf20Sopenharmony_ci const struct intel_engine_execlists *execlists = &engine->execlists; 60008c2ecf20Sopenharmony_ci struct i915_request *rq, *last; 60018c2ecf20Sopenharmony_ci unsigned long flags; 60028c2ecf20Sopenharmony_ci unsigned int count; 60038c2ecf20Sopenharmony_ci struct rb_node *rb; 60048c2ecf20Sopenharmony_ci 60058c2ecf20Sopenharmony_ci spin_lock_irqsave(&engine->active.lock, flags); 60068c2ecf20Sopenharmony_ci 60078c2ecf20Sopenharmony_ci last = NULL; 60088c2ecf20Sopenharmony_ci count = 0; 60098c2ecf20Sopenharmony_ci list_for_each_entry(rq, &engine->active.requests, sched.link) { 60108c2ecf20Sopenharmony_ci if (count++ < max - 1) 60118c2ecf20Sopenharmony_ci show_request(m, rq, "\t\tE "); 60128c2ecf20Sopenharmony_ci else 60138c2ecf20Sopenharmony_ci last = rq; 60148c2ecf20Sopenharmony_ci } 60158c2ecf20Sopenharmony_ci if (last) { 60168c2ecf20Sopenharmony_ci if (count > max) { 60178c2ecf20Sopenharmony_ci drm_printf(m, 60188c2ecf20Sopenharmony_ci "\t\t...skipping %d executing requests...\n", 60198c2ecf20Sopenharmony_ci count - max); 60208c2ecf20Sopenharmony_ci } 60218c2ecf20Sopenharmony_ci show_request(m, last, "\t\tE "); 60228c2ecf20Sopenharmony_ci } 60238c2ecf20Sopenharmony_ci 60248c2ecf20Sopenharmony_ci if (execlists->switch_priority_hint != INT_MIN) 60258c2ecf20Sopenharmony_ci drm_printf(m, "\t\tSwitch priority hint: %d\n", 60268c2ecf20Sopenharmony_ci READ_ONCE(execlists->switch_priority_hint)); 60278c2ecf20Sopenharmony_ci if (execlists->queue_priority_hint != INT_MIN) 60288c2ecf20Sopenharmony_ci drm_printf(m, "\t\tQueue priority hint: %d\n", 60298c2ecf20Sopenharmony_ci READ_ONCE(execlists->queue_priority_hint)); 60308c2ecf20Sopenharmony_ci 60318c2ecf20Sopenharmony_ci last = NULL; 60328c2ecf20Sopenharmony_ci count = 0; 60338c2ecf20Sopenharmony_ci for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { 60348c2ecf20Sopenharmony_ci struct i915_priolist *p = rb_entry(rb, typeof(*p), node); 60358c2ecf20Sopenharmony_ci int i; 60368c2ecf20Sopenharmony_ci 60378c2ecf20Sopenharmony_ci priolist_for_each_request(rq, p, i) { 60388c2ecf20Sopenharmony_ci if (count++ < max - 1) 60398c2ecf20Sopenharmony_ci show_request(m, rq, "\t\tQ "); 60408c2ecf20Sopenharmony_ci else 60418c2ecf20Sopenharmony_ci last = rq; 60428c2ecf20Sopenharmony_ci } 60438c2ecf20Sopenharmony_ci } 60448c2ecf20Sopenharmony_ci if (last) { 60458c2ecf20Sopenharmony_ci if (count > max) { 60468c2ecf20Sopenharmony_ci drm_printf(m, 60478c2ecf20Sopenharmony_ci "\t\t...skipping %d queued requests...\n", 60488c2ecf20Sopenharmony_ci count - max); 60498c2ecf20Sopenharmony_ci } 60508c2ecf20Sopenharmony_ci show_request(m, last, "\t\tQ "); 60518c2ecf20Sopenharmony_ci } 60528c2ecf20Sopenharmony_ci 60538c2ecf20Sopenharmony_ci last = NULL; 60548c2ecf20Sopenharmony_ci count = 0; 60558c2ecf20Sopenharmony_ci for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { 60568c2ecf20Sopenharmony_ci struct virtual_engine *ve = 60578c2ecf20Sopenharmony_ci rb_entry(rb, typeof(*ve), nodes[engine->id].rb); 60588c2ecf20Sopenharmony_ci struct i915_request *rq = READ_ONCE(ve->request); 60598c2ecf20Sopenharmony_ci 60608c2ecf20Sopenharmony_ci if (rq) { 60618c2ecf20Sopenharmony_ci if (count++ < max - 1) 60628c2ecf20Sopenharmony_ci show_request(m, rq, "\t\tV "); 60638c2ecf20Sopenharmony_ci else 60648c2ecf20Sopenharmony_ci last = rq; 60658c2ecf20Sopenharmony_ci } 60668c2ecf20Sopenharmony_ci } 60678c2ecf20Sopenharmony_ci if (last) { 60688c2ecf20Sopenharmony_ci if (count > max) { 60698c2ecf20Sopenharmony_ci drm_printf(m, 60708c2ecf20Sopenharmony_ci "\t\t...skipping %d virtual requests...\n", 60718c2ecf20Sopenharmony_ci count - max); 60728c2ecf20Sopenharmony_ci } 60738c2ecf20Sopenharmony_ci show_request(m, last, "\t\tV "); 60748c2ecf20Sopenharmony_ci } 60758c2ecf20Sopenharmony_ci 60768c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&engine->active.lock, flags); 60778c2ecf20Sopenharmony_ci} 60788c2ecf20Sopenharmony_ci 60798c2ecf20Sopenharmony_civoid intel_lr_context_reset(struct intel_engine_cs *engine, 60808c2ecf20Sopenharmony_ci struct intel_context *ce, 60818c2ecf20Sopenharmony_ci u32 head, 60828c2ecf20Sopenharmony_ci bool scrub) 60838c2ecf20Sopenharmony_ci{ 60848c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_context_is_pinned(ce)); 60858c2ecf20Sopenharmony_ci 60868c2ecf20Sopenharmony_ci /* 60878c2ecf20Sopenharmony_ci * We want a simple context + ring to execute the breadcrumb update. 60888c2ecf20Sopenharmony_ci * We cannot rely on the context being intact across the GPU hang, 60898c2ecf20Sopenharmony_ci * so clear it and rebuild just what we need for the breadcrumb. 60908c2ecf20Sopenharmony_ci * All pending requests for this context will be zapped, and any 60918c2ecf20Sopenharmony_ci * future request will be after userspace has had the opportunity 60928c2ecf20Sopenharmony_ci * to recreate its own state. 60938c2ecf20Sopenharmony_ci */ 60948c2ecf20Sopenharmony_ci if (scrub) 60958c2ecf20Sopenharmony_ci restore_default_state(ce, engine); 60968c2ecf20Sopenharmony_ci 60978c2ecf20Sopenharmony_ci /* Rerun the request; its payload has been neutered (if guilty). */ 60988c2ecf20Sopenharmony_ci __execlists_update_reg_state(ce, engine, head); 60998c2ecf20Sopenharmony_ci} 61008c2ecf20Sopenharmony_ci 61018c2ecf20Sopenharmony_cibool 61028c2ecf20Sopenharmony_ciintel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) 61038c2ecf20Sopenharmony_ci{ 61048c2ecf20Sopenharmony_ci return engine->set_default_submission == 61058c2ecf20Sopenharmony_ci intel_execlists_set_default_submission; 61068c2ecf20Sopenharmony_ci} 61078c2ecf20Sopenharmony_ci 61088c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 61098c2ecf20Sopenharmony_ci#include "selftest_lrc.c" 61108c2ecf20Sopenharmony_ci#endif 6111