18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright © 2008-2015 Intel Corporation 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice (including the next 128c2ecf20Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 138c2ecf20Sopenharmony_ci * Software. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 168c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 178c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 188c2ecf20Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 198c2ecf20Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 208c2ecf20Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 218c2ecf20Sopenharmony_ci * IN THE SOFTWARE. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci */ 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#include <linux/dma-fence-array.h> 268c2ecf20Sopenharmony_ci#include <linux/dma-fence-chain.h> 278c2ecf20Sopenharmony_ci#include <linux/irq_work.h> 288c2ecf20Sopenharmony_ci#include <linux/prefetch.h> 298c2ecf20Sopenharmony_ci#include <linux/sched.h> 308c2ecf20Sopenharmony_ci#include <linux/sched/clock.h> 318c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci#include "gem/i915_gem_context.h" 348c2ecf20Sopenharmony_ci#include "gt/intel_breadcrumbs.h" 358c2ecf20Sopenharmony_ci#include "gt/intel_context.h" 368c2ecf20Sopenharmony_ci#include "gt/intel_ring.h" 378c2ecf20Sopenharmony_ci#include "gt/intel_rps.h" 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci#include "i915_active.h" 408c2ecf20Sopenharmony_ci#include "i915_drv.h" 418c2ecf20Sopenharmony_ci#include "i915_globals.h" 428c2ecf20Sopenharmony_ci#include "i915_trace.h" 438c2ecf20Sopenharmony_ci#include "intel_pm.h" 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cistruct execute_cb { 468c2ecf20Sopenharmony_ci struct irq_work work; 478c2ecf20Sopenharmony_ci struct i915_sw_fence *fence; 488c2ecf20Sopenharmony_ci void (*hook)(struct i915_request *rq, struct dma_fence *signal); 498c2ecf20Sopenharmony_ci struct i915_request *signal; 508c2ecf20Sopenharmony_ci}; 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistatic struct i915_global_request { 538c2ecf20Sopenharmony_ci struct i915_global base; 548c2ecf20Sopenharmony_ci struct kmem_cache *slab_requests; 558c2ecf20Sopenharmony_ci struct kmem_cache *slab_execute_cbs; 568c2ecf20Sopenharmony_ci} global; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_cistatic const char *i915_fence_get_driver_name(struct dma_fence *fence) 598c2ecf20Sopenharmony_ci{ 608c2ecf20Sopenharmony_ci return dev_name(to_request(fence)->engine->i915->drm.dev); 618c2ecf20Sopenharmony_ci} 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_cistatic const char *i915_fence_get_timeline_name(struct dma_fence *fence) 648c2ecf20Sopenharmony_ci{ 658c2ecf20Sopenharmony_ci const struct i915_gem_context *ctx; 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci /* 688c2ecf20Sopenharmony_ci * The timeline struct (as part of the ppgtt underneath a context) 698c2ecf20Sopenharmony_ci * may be freed when the request is no longer in use by the GPU. 708c2ecf20Sopenharmony_ci * We could extend the life of a context to beyond that of all 718c2ecf20Sopenharmony_ci * fences, possibly keeping the hw resource around indefinitely, 728c2ecf20Sopenharmony_ci * or we just give them a false name. Since 738c2ecf20Sopenharmony_ci * dma_fence_ops.get_timeline_name is a debug feature, the occasional 748c2ecf20Sopenharmony_ci * lie seems justifiable. 758c2ecf20Sopenharmony_ci */ 768c2ecf20Sopenharmony_ci if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 778c2ecf20Sopenharmony_ci return "signaled"; 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci ctx = i915_request_gem_context(to_request(fence)); 808c2ecf20Sopenharmony_ci if (!ctx) 818c2ecf20Sopenharmony_ci return "[" DRIVER_NAME "]"; 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci return ctx->name; 848c2ecf20Sopenharmony_ci} 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_cistatic bool i915_fence_signaled(struct dma_fence *fence) 878c2ecf20Sopenharmony_ci{ 888c2ecf20Sopenharmony_ci return i915_request_completed(to_request(fence)); 898c2ecf20Sopenharmony_ci} 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_cistatic bool i915_fence_enable_signaling(struct dma_fence *fence) 928c2ecf20Sopenharmony_ci{ 938c2ecf20Sopenharmony_ci return i915_request_enable_breadcrumb(to_request(fence)); 948c2ecf20Sopenharmony_ci} 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_cistatic signed long i915_fence_wait(struct dma_fence *fence, 978c2ecf20Sopenharmony_ci bool interruptible, 988c2ecf20Sopenharmony_ci signed long timeout) 998c2ecf20Sopenharmony_ci{ 1008c2ecf20Sopenharmony_ci return i915_request_wait(to_request(fence), 1018c2ecf20Sopenharmony_ci interruptible | I915_WAIT_PRIORITY, 1028c2ecf20Sopenharmony_ci timeout); 1038c2ecf20Sopenharmony_ci} 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_cistruct kmem_cache *i915_request_slab_cache(void) 1068c2ecf20Sopenharmony_ci{ 1078c2ecf20Sopenharmony_ci return global.slab_requests; 1088c2ecf20Sopenharmony_ci} 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_cistatic void i915_fence_release(struct dma_fence *fence) 1118c2ecf20Sopenharmony_ci{ 1128c2ecf20Sopenharmony_ci struct i915_request *rq = to_request(fence); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci /* 1158c2ecf20Sopenharmony_ci * The request is put onto a RCU freelist (i.e. the address 1168c2ecf20Sopenharmony_ci * is immediately reused), mark the fences as being freed now. 1178c2ecf20Sopenharmony_ci * Otherwise the debugobjects for the fences are only marked as 1188c2ecf20Sopenharmony_ci * freed when the slab cache itself is freed, and so we would get 1198c2ecf20Sopenharmony_ci * caught trying to reuse dead objects. 1208c2ecf20Sopenharmony_ci */ 1218c2ecf20Sopenharmony_ci i915_sw_fence_fini(&rq->submit); 1228c2ecf20Sopenharmony_ci i915_sw_fence_fini(&rq->semaphore); 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci /* 1258c2ecf20Sopenharmony_ci * Keep one request on each engine for reserved use under mempressure 1268c2ecf20Sopenharmony_ci * 1278c2ecf20Sopenharmony_ci * We do not hold a reference to the engine here and so have to be 1288c2ecf20Sopenharmony_ci * very careful in what rq->engine we poke. The virtual engine is 1298c2ecf20Sopenharmony_ci * referenced via the rq->context and we released that ref during 1308c2ecf20Sopenharmony_ci * i915_request_retire(), ergo we must not dereference a virtual 1318c2ecf20Sopenharmony_ci * engine here. Not that we would want to, as the only consumer of 1328c2ecf20Sopenharmony_ci * the reserved engine->request_pool is the power management parking, 1338c2ecf20Sopenharmony_ci * which must-not-fail, and that is only run on the physical engines. 1348c2ecf20Sopenharmony_ci * 1358c2ecf20Sopenharmony_ci * Since the request must have been executed to be have completed, 1368c2ecf20Sopenharmony_ci * we know that it will have been processed by the HW and will 1378c2ecf20Sopenharmony_ci * not be unsubmitted again, so rq->engine and rq->execution_mask 1388c2ecf20Sopenharmony_ci * at this point is stable. rq->execution_mask will be a single 1398c2ecf20Sopenharmony_ci * bit if the last and _only_ engine it could execution on was a 1408c2ecf20Sopenharmony_ci * physical engine, if it's multiple bits then it started on and 1418c2ecf20Sopenharmony_ci * could still be on a virtual engine. Thus if the mask is not a 1428c2ecf20Sopenharmony_ci * power-of-two we assume that rq->engine may still be a virtual 1438c2ecf20Sopenharmony_ci * engine and so a dangling invalid pointer that we cannot dereference 1448c2ecf20Sopenharmony_ci * 1458c2ecf20Sopenharmony_ci * For example, consider the flow of a bonded request through a virtual 1468c2ecf20Sopenharmony_ci * engine. The request is created with a wide engine mask (all engines 1478c2ecf20Sopenharmony_ci * that we might execute on). On processing the bond, the request mask 1488c2ecf20Sopenharmony_ci * is reduced to one or more engines. If the request is subsequently 1498c2ecf20Sopenharmony_ci * bound to a single engine, it will then be constrained to only 1508c2ecf20Sopenharmony_ci * execute on that engine and never returned to the virtual engine 1518c2ecf20Sopenharmony_ci * after timeslicing away, see __unwind_incomplete_requests(). Thus we 1528c2ecf20Sopenharmony_ci * know that if the rq->execution_mask is a single bit, rq->engine 1538c2ecf20Sopenharmony_ci * can be a physical engine with the exact corresponding mask. 1548c2ecf20Sopenharmony_ci */ 1558c2ecf20Sopenharmony_ci if (is_power_of_2(rq->execution_mask) && 1568c2ecf20Sopenharmony_ci !cmpxchg(&rq->engine->request_pool, NULL, rq)) 1578c2ecf20Sopenharmony_ci return; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci kmem_cache_free(global.slab_requests, rq); 1608c2ecf20Sopenharmony_ci} 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ciconst struct dma_fence_ops i915_fence_ops = { 1638c2ecf20Sopenharmony_ci .get_driver_name = i915_fence_get_driver_name, 1648c2ecf20Sopenharmony_ci .get_timeline_name = i915_fence_get_timeline_name, 1658c2ecf20Sopenharmony_ci .enable_signaling = i915_fence_enable_signaling, 1668c2ecf20Sopenharmony_ci .signaled = i915_fence_signaled, 1678c2ecf20Sopenharmony_ci .wait = i915_fence_wait, 1688c2ecf20Sopenharmony_ci .release = i915_fence_release, 1698c2ecf20Sopenharmony_ci}; 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_cistatic void irq_execute_cb(struct irq_work *wrk) 1728c2ecf20Sopenharmony_ci{ 1738c2ecf20Sopenharmony_ci struct execute_cb *cb = container_of(wrk, typeof(*cb), work); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci i915_sw_fence_complete(cb->fence); 1768c2ecf20Sopenharmony_ci kmem_cache_free(global.slab_execute_cbs, cb); 1778c2ecf20Sopenharmony_ci} 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_cistatic void irq_execute_cb_hook(struct irq_work *wrk) 1808c2ecf20Sopenharmony_ci{ 1818c2ecf20Sopenharmony_ci struct execute_cb *cb = container_of(wrk, typeof(*cb), work); 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci cb->hook(container_of(cb->fence, struct i915_request, submit), 1848c2ecf20Sopenharmony_ci &cb->signal->fence); 1858c2ecf20Sopenharmony_ci i915_request_put(cb->signal); 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci irq_execute_cb(wrk); 1888c2ecf20Sopenharmony_ci} 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_cistatic __always_inline void 1918c2ecf20Sopenharmony_ci__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk)) 1928c2ecf20Sopenharmony_ci{ 1938c2ecf20Sopenharmony_ci struct execute_cb *cb, *cn; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci if (llist_empty(&rq->execute_cb)) 1968c2ecf20Sopenharmony_ci return; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci llist_for_each_entry_safe(cb, cn, 1998c2ecf20Sopenharmony_ci llist_del_all(&rq->execute_cb), 2008c2ecf20Sopenharmony_ci work.llnode) 2018c2ecf20Sopenharmony_ci fn(&cb->work); 2028c2ecf20Sopenharmony_ci} 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_cistatic void __notify_execute_cb_irq(struct i915_request *rq) 2058c2ecf20Sopenharmony_ci{ 2068c2ecf20Sopenharmony_ci __notify_execute_cb(rq, irq_work_queue); 2078c2ecf20Sopenharmony_ci} 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_cistatic bool irq_work_imm(struct irq_work *wrk) 2108c2ecf20Sopenharmony_ci{ 2118c2ecf20Sopenharmony_ci wrk->func(wrk); 2128c2ecf20Sopenharmony_ci return false; 2138c2ecf20Sopenharmony_ci} 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_cistatic void __notify_execute_cb_imm(struct i915_request *rq) 2168c2ecf20Sopenharmony_ci{ 2178c2ecf20Sopenharmony_ci __notify_execute_cb(rq, irq_work_imm); 2188c2ecf20Sopenharmony_ci} 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_cistatic void free_capture_list(struct i915_request *request) 2218c2ecf20Sopenharmony_ci{ 2228c2ecf20Sopenharmony_ci struct i915_capture_list *capture; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci capture = fetch_and_zero(&request->capture_list); 2258c2ecf20Sopenharmony_ci while (capture) { 2268c2ecf20Sopenharmony_ci struct i915_capture_list *next = capture->next; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci kfree(capture); 2298c2ecf20Sopenharmony_ci capture = next; 2308c2ecf20Sopenharmony_ci } 2318c2ecf20Sopenharmony_ci} 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_cistatic void __i915_request_fill(struct i915_request *rq, u8 val) 2348c2ecf20Sopenharmony_ci{ 2358c2ecf20Sopenharmony_ci void *vaddr = rq->ring->vaddr; 2368c2ecf20Sopenharmony_ci u32 head; 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci head = rq->infix; 2398c2ecf20Sopenharmony_ci if (rq->postfix < head) { 2408c2ecf20Sopenharmony_ci memset(vaddr + head, val, rq->ring->size - head); 2418c2ecf20Sopenharmony_ci head = 0; 2428c2ecf20Sopenharmony_ci } 2438c2ecf20Sopenharmony_ci memset(vaddr + head, val, rq->postfix - head); 2448c2ecf20Sopenharmony_ci} 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_cistatic void remove_from_engine(struct i915_request *rq) 2478c2ecf20Sopenharmony_ci{ 2488c2ecf20Sopenharmony_ci struct intel_engine_cs *engine, *locked; 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci /* 2518c2ecf20Sopenharmony_ci * Virtual engines complicate acquiring the engine timeline lock, 2528c2ecf20Sopenharmony_ci * as their rq->engine pointer is not stable until under that 2538c2ecf20Sopenharmony_ci * engine lock. The simple ploy we use is to take the lock then 2548c2ecf20Sopenharmony_ci * check that the rq still belongs to the newly locked engine. 2558c2ecf20Sopenharmony_ci */ 2568c2ecf20Sopenharmony_ci locked = READ_ONCE(rq->engine); 2578c2ecf20Sopenharmony_ci spin_lock_irq(&locked->active.lock); 2588c2ecf20Sopenharmony_ci while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { 2598c2ecf20Sopenharmony_ci spin_unlock(&locked->active.lock); 2608c2ecf20Sopenharmony_ci spin_lock(&engine->active.lock); 2618c2ecf20Sopenharmony_ci locked = engine; 2628c2ecf20Sopenharmony_ci } 2638c2ecf20Sopenharmony_ci list_del_init(&rq->sched.link); 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2668c2ecf20Sopenharmony_ci clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci /* Prevent further __await_execution() registering a cb, then flush */ 2698c2ecf20Sopenharmony_ci set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci spin_unlock_irq(&locked->active.lock); 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci __notify_execute_cb_imm(rq); 2748c2ecf20Sopenharmony_ci} 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_cibool i915_request_retire(struct i915_request *rq) 2778c2ecf20Sopenharmony_ci{ 2788c2ecf20Sopenharmony_ci if (!i915_request_completed(rq)) 2798c2ecf20Sopenharmony_ci return false; 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci RQ_TRACE(rq, "\n"); 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); 2848c2ecf20Sopenharmony_ci trace_i915_request_retire(rq); 2858c2ecf20Sopenharmony_ci i915_request_mark_complete(rq); 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci /* 2888c2ecf20Sopenharmony_ci * We know the GPU must have read the request to have 2898c2ecf20Sopenharmony_ci * sent us the seqno + interrupt, so use the position 2908c2ecf20Sopenharmony_ci * of tail of the request to update the last known position 2918c2ecf20Sopenharmony_ci * of the GPU head. 2928c2ecf20Sopenharmony_ci * 2938c2ecf20Sopenharmony_ci * Note this requires that we are always called in request 2948c2ecf20Sopenharmony_ci * completion order. 2958c2ecf20Sopenharmony_ci */ 2968c2ecf20Sopenharmony_ci GEM_BUG_ON(!list_is_first(&rq->link, 2978c2ecf20Sopenharmony_ci &i915_request_timeline(rq)->requests)); 2988c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 2998c2ecf20Sopenharmony_ci /* Poison before we release our space in the ring */ 3008c2ecf20Sopenharmony_ci __i915_request_fill(rq, POISON_FREE); 3018c2ecf20Sopenharmony_ci rq->ring->head = rq->postfix; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci if (!i915_request_signaled(rq)) { 3048c2ecf20Sopenharmony_ci spin_lock_irq(&rq->lock); 3058c2ecf20Sopenharmony_ci dma_fence_signal_locked(&rq->fence); 3068c2ecf20Sopenharmony_ci spin_unlock_irq(&rq->lock); 3078c2ecf20Sopenharmony_ci } 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci if (i915_request_has_waitboost(rq)) { 3108c2ecf20Sopenharmony_ci GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); 3118c2ecf20Sopenharmony_ci atomic_dec(&rq->engine->gt->rps.num_waiters); 3128c2ecf20Sopenharmony_ci } 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci /* 3158c2ecf20Sopenharmony_ci * We only loosely track inflight requests across preemption, 3168c2ecf20Sopenharmony_ci * and so we may find ourselves attempting to retire a _completed_ 3178c2ecf20Sopenharmony_ci * request that we have removed from the HW and put back on a run 3188c2ecf20Sopenharmony_ci * queue. 3198c2ecf20Sopenharmony_ci * 3208c2ecf20Sopenharmony_ci * As we set I915_FENCE_FLAG_ACTIVE on the request, this should be 3218c2ecf20Sopenharmony_ci * after removing the breadcrumb and signaling it, so that we do not 3228c2ecf20Sopenharmony_ci * inadvertently attach the breadcrumb to a completed request. 3238c2ecf20Sopenharmony_ci */ 3248c2ecf20Sopenharmony_ci remove_from_engine(rq); 3258c2ecf20Sopenharmony_ci GEM_BUG_ON(!llist_empty(&rq->execute_cb)); 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci intel_context_exit(rq->context); 3308c2ecf20Sopenharmony_ci intel_context_unpin(rq->context); 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci free_capture_list(rq); 3338c2ecf20Sopenharmony_ci i915_sched_node_fini(&rq->sched); 3348c2ecf20Sopenharmony_ci i915_request_put(rq); 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci return true; 3378c2ecf20Sopenharmony_ci} 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_civoid i915_request_retire_upto(struct i915_request *rq) 3408c2ecf20Sopenharmony_ci{ 3418c2ecf20Sopenharmony_ci struct intel_timeline * const tl = i915_request_timeline(rq); 3428c2ecf20Sopenharmony_ci struct i915_request *tmp; 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci RQ_TRACE(rq, "\n"); 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_ci GEM_BUG_ON(!i915_request_completed(rq)); 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci do { 3498c2ecf20Sopenharmony_ci tmp = list_first_entry(&tl->requests, typeof(*tmp), link); 3508c2ecf20Sopenharmony_ci } while (i915_request_retire(tmp) && tmp != rq); 3518c2ecf20Sopenharmony_ci} 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_cistatic struct i915_request * const * 3548c2ecf20Sopenharmony_ci__engine_active(struct intel_engine_cs *engine) 3558c2ecf20Sopenharmony_ci{ 3568c2ecf20Sopenharmony_ci return READ_ONCE(engine->execlists.active); 3578c2ecf20Sopenharmony_ci} 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_cistatic bool __request_in_flight(const struct i915_request *signal) 3608c2ecf20Sopenharmony_ci{ 3618c2ecf20Sopenharmony_ci struct i915_request * const *port, *rq; 3628c2ecf20Sopenharmony_ci bool inflight = false; 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci if (!i915_request_is_ready(signal)) 3658c2ecf20Sopenharmony_ci return false; 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci /* 3688c2ecf20Sopenharmony_ci * Even if we have unwound the request, it may still be on 3698c2ecf20Sopenharmony_ci * the GPU (preempt-to-busy). If that request is inside an 3708c2ecf20Sopenharmony_ci * unpreemptible critical section, it will not be removed. Some 3718c2ecf20Sopenharmony_ci * GPU functions may even be stuck waiting for the paired request 3728c2ecf20Sopenharmony_ci * (__await_execution) to be submitted and cannot be preempted 3738c2ecf20Sopenharmony_ci * until the bond is executing. 3748c2ecf20Sopenharmony_ci * 3758c2ecf20Sopenharmony_ci * As we know that there are always preemption points between 3768c2ecf20Sopenharmony_ci * requests, we know that only the currently executing request 3778c2ecf20Sopenharmony_ci * may be still active even though we have cleared the flag. 3788c2ecf20Sopenharmony_ci * However, we can't rely on our tracking of ELSP[0] to know 3798c2ecf20Sopenharmony_ci * which request is currently active and so maybe stuck, as 3808c2ecf20Sopenharmony_ci * the tracking maybe an event behind. Instead assume that 3818c2ecf20Sopenharmony_ci * if the context is still inflight, then it is still active 3828c2ecf20Sopenharmony_ci * even if the active flag has been cleared. 3838c2ecf20Sopenharmony_ci * 3848c2ecf20Sopenharmony_ci * To further complicate matters, if there a pending promotion, the HW 3858c2ecf20Sopenharmony_ci * may either perform a context switch to the second inflight execlists, 3868c2ecf20Sopenharmony_ci * or it may switch to the pending set of execlists. In the case of the 3878c2ecf20Sopenharmony_ci * latter, it may send the ACK and we process the event copying the 3888c2ecf20Sopenharmony_ci * pending[] over top of inflight[], _overwriting_ our *active. Since 3898c2ecf20Sopenharmony_ci * this implies the HW is arbitrating and not struck in *active, we do 3908c2ecf20Sopenharmony_ci * not worry about complete accuracy, but we do require no read/write 3918c2ecf20Sopenharmony_ci * tearing of the pointer [the read of the pointer must be valid, even 3928c2ecf20Sopenharmony_ci * as the array is being overwritten, for which we require the writes 3938c2ecf20Sopenharmony_ci * to avoid tearing.] 3948c2ecf20Sopenharmony_ci * 3958c2ecf20Sopenharmony_ci * Note that the read of *execlists->active may race with the promotion 3968c2ecf20Sopenharmony_ci * of execlists->pending[] to execlists->inflight[], overwritting 3978c2ecf20Sopenharmony_ci * the value at *execlists->active. This is fine. The promotion implies 3988c2ecf20Sopenharmony_ci * that we received an ACK from the HW, and so the context is not 3998c2ecf20Sopenharmony_ci * stuck -- if we do not see ourselves in *active, the inflight status 4008c2ecf20Sopenharmony_ci * is valid. If instead we see ourselves being copied into *active, 4018c2ecf20Sopenharmony_ci * we are inflight and may signal the callback. 4028c2ecf20Sopenharmony_ci */ 4038c2ecf20Sopenharmony_ci if (!intel_context_inflight(signal->context)) 4048c2ecf20Sopenharmony_ci return false; 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci rcu_read_lock(); 4078c2ecf20Sopenharmony_ci for (port = __engine_active(signal->engine); 4088c2ecf20Sopenharmony_ci (rq = READ_ONCE(*port)); /* may race with promotion of pending[] */ 4098c2ecf20Sopenharmony_ci port++) { 4108c2ecf20Sopenharmony_ci if (rq->context == signal->context) { 4118c2ecf20Sopenharmony_ci inflight = i915_seqno_passed(rq->fence.seqno, 4128c2ecf20Sopenharmony_ci signal->fence.seqno); 4138c2ecf20Sopenharmony_ci break; 4148c2ecf20Sopenharmony_ci } 4158c2ecf20Sopenharmony_ci } 4168c2ecf20Sopenharmony_ci rcu_read_unlock(); 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci return inflight; 4198c2ecf20Sopenharmony_ci} 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_cistatic int 4228c2ecf20Sopenharmony_ci__await_execution(struct i915_request *rq, 4238c2ecf20Sopenharmony_ci struct i915_request *signal, 4248c2ecf20Sopenharmony_ci void (*hook)(struct i915_request *rq, 4258c2ecf20Sopenharmony_ci struct dma_fence *signal), 4268c2ecf20Sopenharmony_ci gfp_t gfp) 4278c2ecf20Sopenharmony_ci{ 4288c2ecf20Sopenharmony_ci struct execute_cb *cb; 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci if (i915_request_is_active(signal)) { 4318c2ecf20Sopenharmony_ci if (hook) 4328c2ecf20Sopenharmony_ci hook(rq, &signal->fence); 4338c2ecf20Sopenharmony_ci return 0; 4348c2ecf20Sopenharmony_ci } 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); 4378c2ecf20Sopenharmony_ci if (!cb) 4388c2ecf20Sopenharmony_ci return -ENOMEM; 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_ci cb->fence = &rq->submit; 4418c2ecf20Sopenharmony_ci i915_sw_fence_await(cb->fence); 4428c2ecf20Sopenharmony_ci init_irq_work(&cb->work, irq_execute_cb); 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci if (hook) { 4458c2ecf20Sopenharmony_ci cb->hook = hook; 4468c2ecf20Sopenharmony_ci cb->signal = i915_request_get(signal); 4478c2ecf20Sopenharmony_ci cb->work.func = irq_execute_cb_hook; 4488c2ecf20Sopenharmony_ci } 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci /* 4518c2ecf20Sopenharmony_ci * Register the callback first, then see if the signaler is already 4528c2ecf20Sopenharmony_ci * active. This ensures that if we race with the 4538c2ecf20Sopenharmony_ci * __notify_execute_cb from i915_request_submit() and we are not 4548c2ecf20Sopenharmony_ci * included in that list, we get a second bite of the cherry and 4558c2ecf20Sopenharmony_ci * execute it ourselves. After this point, a future 4568c2ecf20Sopenharmony_ci * i915_request_submit() will notify us. 4578c2ecf20Sopenharmony_ci * 4588c2ecf20Sopenharmony_ci * In i915_request_retire() we set the ACTIVE bit on a completed 4598c2ecf20Sopenharmony_ci * request (then flush the execute_cb). So by registering the 4608c2ecf20Sopenharmony_ci * callback first, then checking the ACTIVE bit, we serialise with 4618c2ecf20Sopenharmony_ci * the completed/retired request. 4628c2ecf20Sopenharmony_ci */ 4638c2ecf20Sopenharmony_ci if (llist_add(&cb->work.llnode, &signal->execute_cb)) { 4648c2ecf20Sopenharmony_ci if (i915_request_is_active(signal) || 4658c2ecf20Sopenharmony_ci __request_in_flight(signal)) 4668c2ecf20Sopenharmony_ci __notify_execute_cb_imm(signal); 4678c2ecf20Sopenharmony_ci } 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci return 0; 4708c2ecf20Sopenharmony_ci} 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_cistatic bool fatal_error(int error) 4738c2ecf20Sopenharmony_ci{ 4748c2ecf20Sopenharmony_ci switch (error) { 4758c2ecf20Sopenharmony_ci case 0: /* not an error! */ 4768c2ecf20Sopenharmony_ci case -EAGAIN: /* innocent victim of a GT reset (__i915_request_reset) */ 4778c2ecf20Sopenharmony_ci case -ETIMEDOUT: /* waiting for Godot (timer_i915_sw_fence_wake) */ 4788c2ecf20Sopenharmony_ci return false; 4798c2ecf20Sopenharmony_ci default: 4808c2ecf20Sopenharmony_ci return true; 4818c2ecf20Sopenharmony_ci } 4828c2ecf20Sopenharmony_ci} 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_civoid __i915_request_skip(struct i915_request *rq) 4858c2ecf20Sopenharmony_ci{ 4868c2ecf20Sopenharmony_ci GEM_BUG_ON(!fatal_error(rq->fence.error)); 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci if (rq->infix == rq->postfix) 4898c2ecf20Sopenharmony_ci return; 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_ci /* 4928c2ecf20Sopenharmony_ci * As this request likely depends on state from the lost 4938c2ecf20Sopenharmony_ci * context, clear out all the user operations leaving the 4948c2ecf20Sopenharmony_ci * breadcrumb at the end (so we get the fence notifications). 4958c2ecf20Sopenharmony_ci */ 4968c2ecf20Sopenharmony_ci __i915_request_fill(rq, 0); 4978c2ecf20Sopenharmony_ci rq->infix = rq->postfix; 4988c2ecf20Sopenharmony_ci} 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_civoid i915_request_set_error_once(struct i915_request *rq, int error) 5018c2ecf20Sopenharmony_ci{ 5028c2ecf20Sopenharmony_ci int old; 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci GEM_BUG_ON(!IS_ERR_VALUE((long)error)); 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_ci if (i915_request_signaled(rq)) 5078c2ecf20Sopenharmony_ci return; 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci old = READ_ONCE(rq->fence.error); 5108c2ecf20Sopenharmony_ci do { 5118c2ecf20Sopenharmony_ci if (fatal_error(old)) 5128c2ecf20Sopenharmony_ci return; 5138c2ecf20Sopenharmony_ci } while (!try_cmpxchg(&rq->fence.error, &old, error)); 5148c2ecf20Sopenharmony_ci} 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_cibool __i915_request_submit(struct i915_request *request) 5178c2ecf20Sopenharmony_ci{ 5188c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = request->engine; 5198c2ecf20Sopenharmony_ci bool result = false; 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci RQ_TRACE(request, "\n"); 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_ci GEM_BUG_ON(!irqs_disabled()); 5248c2ecf20Sopenharmony_ci lockdep_assert_held(&engine->active.lock); 5258c2ecf20Sopenharmony_ci 5268c2ecf20Sopenharmony_ci /* 5278c2ecf20Sopenharmony_ci * With the advent of preempt-to-busy, we frequently encounter 5288c2ecf20Sopenharmony_ci * requests that we have unsubmitted from HW, but left running 5298c2ecf20Sopenharmony_ci * until the next ack and so have completed in the meantime. On 5308c2ecf20Sopenharmony_ci * resubmission of that completed request, we can skip 5318c2ecf20Sopenharmony_ci * updating the payload, and execlists can even skip submitting 5328c2ecf20Sopenharmony_ci * the request. 5338c2ecf20Sopenharmony_ci * 5348c2ecf20Sopenharmony_ci * We must remove the request from the caller's priority queue, 5358c2ecf20Sopenharmony_ci * and the caller must only call us when the request is in their 5368c2ecf20Sopenharmony_ci * priority queue, under the active.lock. This ensures that the 5378c2ecf20Sopenharmony_ci * request has *not* yet been retired and we can safely move 5388c2ecf20Sopenharmony_ci * the request into the engine->active.list where it will be 5398c2ecf20Sopenharmony_ci * dropped upon retiring. (Otherwise if resubmit a *retired* 5408c2ecf20Sopenharmony_ci * request, this would be a horrible use-after-free.) 5418c2ecf20Sopenharmony_ci */ 5428c2ecf20Sopenharmony_ci if (i915_request_completed(request)) 5438c2ecf20Sopenharmony_ci goto xfer; 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci if (unlikely(intel_context_is_closed(request->context) && 5468c2ecf20Sopenharmony_ci !intel_engine_has_heartbeat(engine))) 5478c2ecf20Sopenharmony_ci intel_context_set_banned(request->context); 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci if (unlikely(intel_context_is_banned(request->context))) 5508c2ecf20Sopenharmony_ci i915_request_set_error_once(request, -EIO); 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_ci if (unlikely(fatal_error(request->fence.error))) 5538c2ecf20Sopenharmony_ci __i915_request_skip(request); 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci /* 5568c2ecf20Sopenharmony_ci * Are we using semaphores when the gpu is already saturated? 5578c2ecf20Sopenharmony_ci * 5588c2ecf20Sopenharmony_ci * Using semaphores incurs a cost in having the GPU poll a 5598c2ecf20Sopenharmony_ci * memory location, busywaiting for it to change. The continual 5608c2ecf20Sopenharmony_ci * memory reads can have a noticeable impact on the rest of the 5618c2ecf20Sopenharmony_ci * system with the extra bus traffic, stalling the cpu as it too 5628c2ecf20Sopenharmony_ci * tries to access memory across the bus (perf stat -e bus-cycles). 5638c2ecf20Sopenharmony_ci * 5648c2ecf20Sopenharmony_ci * If we installed a semaphore on this request and we only submit 5658c2ecf20Sopenharmony_ci * the request after the signaler completed, that indicates the 5668c2ecf20Sopenharmony_ci * system is overloaded and using semaphores at this time only 5678c2ecf20Sopenharmony_ci * increases the amount of work we are doing. If so, we disable 5688c2ecf20Sopenharmony_ci * further use of semaphores until we are idle again, whence we 5698c2ecf20Sopenharmony_ci * optimistically try again. 5708c2ecf20Sopenharmony_ci */ 5718c2ecf20Sopenharmony_ci if (request->sched.semaphores && 5728c2ecf20Sopenharmony_ci i915_sw_fence_signaled(&request->semaphore)) 5738c2ecf20Sopenharmony_ci engine->saturated |= request->sched.semaphores; 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci engine->emit_fini_breadcrumb(request, 5768c2ecf20Sopenharmony_ci request->ring->vaddr + request->postfix); 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci trace_i915_request_execute(request); 5798c2ecf20Sopenharmony_ci engine->serial++; 5808c2ecf20Sopenharmony_ci result = true; 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_cixfer: 5838c2ecf20Sopenharmony_ci if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { 5848c2ecf20Sopenharmony_ci list_move_tail(&request->sched.link, &engine->active.requests); 5858c2ecf20Sopenharmony_ci clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); 5868c2ecf20Sopenharmony_ci } 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci /* 5898c2ecf20Sopenharmony_ci * XXX Rollback bonded-execution on __i915_request_unsubmit()? 5908c2ecf20Sopenharmony_ci * 5918c2ecf20Sopenharmony_ci * In the future, perhaps when we have an active time-slicing scheduler, 5928c2ecf20Sopenharmony_ci * it will be interesting to unsubmit parallel execution and remove 5938c2ecf20Sopenharmony_ci * busywaits from the GPU until their master is restarted. This is 5948c2ecf20Sopenharmony_ci * quite hairy, we have to carefully rollback the fence and do a 5958c2ecf20Sopenharmony_ci * preempt-to-idle cycle on the target engine, all the while the 5968c2ecf20Sopenharmony_ci * master execute_cb may refire. 5978c2ecf20Sopenharmony_ci */ 5988c2ecf20Sopenharmony_ci __notify_execute_cb_irq(request); 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci /* We may be recursing from the signal callback of another i915 fence */ 6018c2ecf20Sopenharmony_ci if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) 6028c2ecf20Sopenharmony_ci i915_request_enable_breadcrumb(request); 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci return result; 6058c2ecf20Sopenharmony_ci} 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_civoid i915_request_submit(struct i915_request *request) 6088c2ecf20Sopenharmony_ci{ 6098c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = request->engine; 6108c2ecf20Sopenharmony_ci unsigned long flags; 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci /* Will be called from irq-context when using foreign fences. */ 6138c2ecf20Sopenharmony_ci spin_lock_irqsave(&engine->active.lock, flags); 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_ci __i915_request_submit(request); 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&engine->active.lock, flags); 6188c2ecf20Sopenharmony_ci} 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_civoid __i915_request_unsubmit(struct i915_request *request) 6218c2ecf20Sopenharmony_ci{ 6228c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = request->engine; 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci /* 6258c2ecf20Sopenharmony_ci * Only unwind in reverse order, required so that the per-context list 6268c2ecf20Sopenharmony_ci * is kept in seqno/ring order. 6278c2ecf20Sopenharmony_ci */ 6288c2ecf20Sopenharmony_ci RQ_TRACE(request, "\n"); 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci GEM_BUG_ON(!irqs_disabled()); 6318c2ecf20Sopenharmony_ci lockdep_assert_held(&engine->active.lock); 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci /* 6348c2ecf20Sopenharmony_ci * Before we remove this breadcrumb from the signal list, we have 6358c2ecf20Sopenharmony_ci * to ensure that a concurrent dma_fence_enable_signaling() does not 6368c2ecf20Sopenharmony_ci * attach itself. We first mark the request as no longer active and 6378c2ecf20Sopenharmony_ci * make sure that is visible to other cores, and then remove the 6388c2ecf20Sopenharmony_ci * breadcrumb if attached. 6398c2ecf20Sopenharmony_ci */ 6408c2ecf20Sopenharmony_ci GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); 6418c2ecf20Sopenharmony_ci clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); 6428c2ecf20Sopenharmony_ci if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) 6438c2ecf20Sopenharmony_ci i915_request_cancel_breadcrumb(request); 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_ci /* We've already spun, don't charge on resubmitting. */ 6468c2ecf20Sopenharmony_ci if (request->sched.semaphores && i915_request_started(request)) 6478c2ecf20Sopenharmony_ci request->sched.semaphores = 0; 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci /* 6508c2ecf20Sopenharmony_ci * We don't need to wake_up any waiters on request->execute, they 6518c2ecf20Sopenharmony_ci * will get woken by any other event or us re-adding this request 6528c2ecf20Sopenharmony_ci * to the engine timeline (__i915_request_submit()). The waiters 6538c2ecf20Sopenharmony_ci * should be quite adapt at finding that the request now has a new 6548c2ecf20Sopenharmony_ci * global_seqno to the one they went to sleep on. 6558c2ecf20Sopenharmony_ci */ 6568c2ecf20Sopenharmony_ci} 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_civoid i915_request_unsubmit(struct i915_request *request) 6598c2ecf20Sopenharmony_ci{ 6608c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = request->engine; 6618c2ecf20Sopenharmony_ci unsigned long flags; 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci /* Will be called from irq-context when using foreign fences. */ 6648c2ecf20Sopenharmony_ci spin_lock_irqsave(&engine->active.lock, flags); 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci __i915_request_unsubmit(request); 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&engine->active.lock, flags); 6698c2ecf20Sopenharmony_ci} 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_cistatic int __i915_sw_fence_call 6728c2ecf20Sopenharmony_cisubmit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 6738c2ecf20Sopenharmony_ci{ 6748c2ecf20Sopenharmony_ci struct i915_request *request = 6758c2ecf20Sopenharmony_ci container_of(fence, typeof(*request), submit); 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci switch (state) { 6788c2ecf20Sopenharmony_ci case FENCE_COMPLETE: 6798c2ecf20Sopenharmony_ci trace_i915_request_submit(request); 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci if (unlikely(fence->error)) 6828c2ecf20Sopenharmony_ci i915_request_set_error_once(request, fence->error); 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci /* 6858c2ecf20Sopenharmony_ci * We need to serialize use of the submit_request() callback 6868c2ecf20Sopenharmony_ci * with its hotplugging performed during an emergency 6878c2ecf20Sopenharmony_ci * i915_gem_set_wedged(). We use the RCU mechanism to mark the 6888c2ecf20Sopenharmony_ci * critical section in order to force i915_gem_set_wedged() to 6898c2ecf20Sopenharmony_ci * wait until the submit_request() is completed before 6908c2ecf20Sopenharmony_ci * proceeding. 6918c2ecf20Sopenharmony_ci */ 6928c2ecf20Sopenharmony_ci rcu_read_lock(); 6938c2ecf20Sopenharmony_ci request->engine->submit_request(request); 6948c2ecf20Sopenharmony_ci rcu_read_unlock(); 6958c2ecf20Sopenharmony_ci break; 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci case FENCE_FREE: 6988c2ecf20Sopenharmony_ci i915_request_put(request); 6998c2ecf20Sopenharmony_ci break; 7008c2ecf20Sopenharmony_ci } 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_ci return NOTIFY_DONE; 7038c2ecf20Sopenharmony_ci} 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_cistatic int __i915_sw_fence_call 7068c2ecf20Sopenharmony_cisemaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 7078c2ecf20Sopenharmony_ci{ 7088c2ecf20Sopenharmony_ci struct i915_request *rq = container_of(fence, typeof(*rq), semaphore); 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci switch (state) { 7118c2ecf20Sopenharmony_ci case FENCE_COMPLETE: 7128c2ecf20Sopenharmony_ci break; 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_ci case FENCE_FREE: 7158c2ecf20Sopenharmony_ci i915_request_put(rq); 7168c2ecf20Sopenharmony_ci break; 7178c2ecf20Sopenharmony_ci } 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_ci return NOTIFY_DONE; 7208c2ecf20Sopenharmony_ci} 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_cistatic void retire_requests(struct intel_timeline *tl) 7238c2ecf20Sopenharmony_ci{ 7248c2ecf20Sopenharmony_ci struct i915_request *rq, *rn; 7258c2ecf20Sopenharmony_ci 7268c2ecf20Sopenharmony_ci list_for_each_entry_safe(rq, rn, &tl->requests, link) 7278c2ecf20Sopenharmony_ci if (!i915_request_retire(rq)) 7288c2ecf20Sopenharmony_ci break; 7298c2ecf20Sopenharmony_ci} 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_cistatic noinline struct i915_request * 7328c2ecf20Sopenharmony_cirequest_alloc_slow(struct intel_timeline *tl, 7338c2ecf20Sopenharmony_ci struct i915_request **rsvd, 7348c2ecf20Sopenharmony_ci gfp_t gfp) 7358c2ecf20Sopenharmony_ci{ 7368c2ecf20Sopenharmony_ci struct i915_request *rq; 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci /* If we cannot wait, dip into our reserves */ 7398c2ecf20Sopenharmony_ci if (!gfpflags_allow_blocking(gfp)) { 7408c2ecf20Sopenharmony_ci rq = xchg(rsvd, NULL); 7418c2ecf20Sopenharmony_ci if (!rq) /* Use the normal failure path for one final WARN */ 7428c2ecf20Sopenharmony_ci goto out; 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci return rq; 7458c2ecf20Sopenharmony_ci } 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci if (list_empty(&tl->requests)) 7488c2ecf20Sopenharmony_ci goto out; 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci /* Move our oldest request to the slab-cache (if not in use!) */ 7518c2ecf20Sopenharmony_ci rq = list_first_entry(&tl->requests, typeof(*rq), link); 7528c2ecf20Sopenharmony_ci i915_request_retire(rq); 7538c2ecf20Sopenharmony_ci 7548c2ecf20Sopenharmony_ci rq = kmem_cache_alloc(global.slab_requests, 7558c2ecf20Sopenharmony_ci gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 7568c2ecf20Sopenharmony_ci if (rq) 7578c2ecf20Sopenharmony_ci return rq; 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci /* Ratelimit ourselves to prevent oom from malicious clients */ 7608c2ecf20Sopenharmony_ci rq = list_last_entry(&tl->requests, typeof(*rq), link); 7618c2ecf20Sopenharmony_ci cond_synchronize_rcu(rq->rcustate); 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci /* Retire our old requests in the hope that we free some */ 7648c2ecf20Sopenharmony_ci retire_requests(tl); 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ciout: 7678c2ecf20Sopenharmony_ci return kmem_cache_alloc(global.slab_requests, gfp); 7688c2ecf20Sopenharmony_ci} 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_cistatic void __i915_request_ctor(void *arg) 7718c2ecf20Sopenharmony_ci{ 7728c2ecf20Sopenharmony_ci struct i915_request *rq = arg; 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci spin_lock_init(&rq->lock); 7758c2ecf20Sopenharmony_ci i915_sched_node_init(&rq->sched); 7768c2ecf20Sopenharmony_ci i915_sw_fence_init(&rq->submit, submit_notify); 7778c2ecf20Sopenharmony_ci i915_sw_fence_init(&rq->semaphore, semaphore_notify); 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci rq->capture_list = NULL; 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_ci init_llist_head(&rq->execute_cb); 7828c2ecf20Sopenharmony_ci} 7838c2ecf20Sopenharmony_ci 7848c2ecf20Sopenharmony_cistruct i915_request * 7858c2ecf20Sopenharmony_ci__i915_request_create(struct intel_context *ce, gfp_t gfp) 7868c2ecf20Sopenharmony_ci{ 7878c2ecf20Sopenharmony_ci struct intel_timeline *tl = ce->timeline; 7888c2ecf20Sopenharmony_ci struct i915_request *rq; 7898c2ecf20Sopenharmony_ci u32 seqno; 7908c2ecf20Sopenharmony_ci int ret; 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci might_sleep_if(gfpflags_allow_blocking(gfp)); 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci /* Check that the caller provided an already pinned context */ 7958c2ecf20Sopenharmony_ci __intel_context_pin(ce); 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci /* 7988c2ecf20Sopenharmony_ci * Beware: Dragons be flying overhead. 7998c2ecf20Sopenharmony_ci * 8008c2ecf20Sopenharmony_ci * We use RCU to look up requests in flight. The lookups may 8018c2ecf20Sopenharmony_ci * race with the request being allocated from the slab freelist. 8028c2ecf20Sopenharmony_ci * That is the request we are writing to here, may be in the process 8038c2ecf20Sopenharmony_ci * of being read by __i915_active_request_get_rcu(). As such, 8048c2ecf20Sopenharmony_ci * we have to be very careful when overwriting the contents. During 8058c2ecf20Sopenharmony_ci * the RCU lookup, we change chase the request->engine pointer, 8068c2ecf20Sopenharmony_ci * read the request->global_seqno and increment the reference count. 8078c2ecf20Sopenharmony_ci * 8088c2ecf20Sopenharmony_ci * The reference count is incremented atomically. If it is zero, 8098c2ecf20Sopenharmony_ci * the lookup knows the request is unallocated and complete. Otherwise, 8108c2ecf20Sopenharmony_ci * it is either still in use, or has been reallocated and reset 8118c2ecf20Sopenharmony_ci * with dma_fence_init(). This increment is safe for release as we 8128c2ecf20Sopenharmony_ci * check that the request we have a reference to and matches the active 8138c2ecf20Sopenharmony_ci * request. 8148c2ecf20Sopenharmony_ci * 8158c2ecf20Sopenharmony_ci * Before we increment the refcount, we chase the request->engine 8168c2ecf20Sopenharmony_ci * pointer. We must not call kmem_cache_zalloc() or else we set 8178c2ecf20Sopenharmony_ci * that pointer to NULL and cause a crash during the lookup. If 8188c2ecf20Sopenharmony_ci * we see the request is completed (based on the value of the 8198c2ecf20Sopenharmony_ci * old engine and seqno), the lookup is complete and reports NULL. 8208c2ecf20Sopenharmony_ci * If we decide the request is not completed (new engine or seqno), 8218c2ecf20Sopenharmony_ci * then we grab a reference and double check that it is still the 8228c2ecf20Sopenharmony_ci * active request - which it won't be and restart the lookup. 8238c2ecf20Sopenharmony_ci * 8248c2ecf20Sopenharmony_ci * Do not use kmem_cache_zalloc() here! 8258c2ecf20Sopenharmony_ci */ 8268c2ecf20Sopenharmony_ci rq = kmem_cache_alloc(global.slab_requests, 8278c2ecf20Sopenharmony_ci gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 8288c2ecf20Sopenharmony_ci if (unlikely(!rq)) { 8298c2ecf20Sopenharmony_ci rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp); 8308c2ecf20Sopenharmony_ci if (!rq) { 8318c2ecf20Sopenharmony_ci ret = -ENOMEM; 8328c2ecf20Sopenharmony_ci goto err_unreserve; 8338c2ecf20Sopenharmony_ci } 8348c2ecf20Sopenharmony_ci } 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci rq->context = ce; 8378c2ecf20Sopenharmony_ci rq->engine = ce->engine; 8388c2ecf20Sopenharmony_ci rq->ring = ce->ring; 8398c2ecf20Sopenharmony_ci rq->execution_mask = ce->engine->mask; 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci ret = intel_timeline_get_seqno(tl, rq, &seqno); 8428c2ecf20Sopenharmony_ci if (ret) 8438c2ecf20Sopenharmony_ci goto err_free; 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 8468c2ecf20Sopenharmony_ci tl->fence_context, seqno); 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci RCU_INIT_POINTER(rq->timeline, tl); 8498c2ecf20Sopenharmony_ci RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); 8508c2ecf20Sopenharmony_ci rq->hwsp_seqno = tl->hwsp_seqno; 8518c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_completed(rq)); 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ 8548c2ecf20Sopenharmony_ci 8558c2ecf20Sopenharmony_ci /* We bump the ref for the fence chain */ 8568c2ecf20Sopenharmony_ci i915_sw_fence_reinit(&i915_request_get(rq)->submit); 8578c2ecf20Sopenharmony_ci i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci i915_sched_node_reinit(&rq->sched); 8608c2ecf20Sopenharmony_ci 8618c2ecf20Sopenharmony_ci /* No zalloc, everything must be cleared after use */ 8628c2ecf20Sopenharmony_ci rq->batch = NULL; 8638c2ecf20Sopenharmony_ci GEM_BUG_ON(rq->capture_list); 8648c2ecf20Sopenharmony_ci GEM_BUG_ON(!llist_empty(&rq->execute_cb)); 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci /* 8678c2ecf20Sopenharmony_ci * Reserve space in the ring buffer for all the commands required to 8688c2ecf20Sopenharmony_ci * eventually emit this request. This is to guarantee that the 8698c2ecf20Sopenharmony_ci * i915_request_add() call can't fail. Note that the reserve may need 8708c2ecf20Sopenharmony_ci * to be redone if the request is not actually submitted straight 8718c2ecf20Sopenharmony_ci * away, e.g. because a GPU scheduler has deferred it. 8728c2ecf20Sopenharmony_ci * 8738c2ecf20Sopenharmony_ci * Note that due to how we add reserved_space to intel_ring_begin() 8748c2ecf20Sopenharmony_ci * we need to double our request to ensure that if we need to wrap 8758c2ecf20Sopenharmony_ci * around inside i915_request_add() there is sufficient space at 8768c2ecf20Sopenharmony_ci * the beginning of the ring as well. 8778c2ecf20Sopenharmony_ci */ 8788c2ecf20Sopenharmony_ci rq->reserved_space = 8798c2ecf20Sopenharmony_ci 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32); 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci /* 8828c2ecf20Sopenharmony_ci * Record the position of the start of the request so that 8838c2ecf20Sopenharmony_ci * should we detect the updated seqno part-way through the 8848c2ecf20Sopenharmony_ci * GPU processing the request, we never over-estimate the 8858c2ecf20Sopenharmony_ci * position of the head. 8868c2ecf20Sopenharmony_ci */ 8878c2ecf20Sopenharmony_ci rq->head = rq->ring->emit; 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci ret = rq->engine->request_alloc(rq); 8908c2ecf20Sopenharmony_ci if (ret) 8918c2ecf20Sopenharmony_ci goto err_unwind; 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci rq->infix = rq->ring->emit; /* end of header; start of user payload */ 8948c2ecf20Sopenharmony_ci 8958c2ecf20Sopenharmony_ci intel_context_mark_active(ce); 8968c2ecf20Sopenharmony_ci list_add_tail_rcu(&rq->link, &tl->requests); 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci return rq; 8998c2ecf20Sopenharmony_ci 9008c2ecf20Sopenharmony_cierr_unwind: 9018c2ecf20Sopenharmony_ci ce->ring->emit = rq->head; 9028c2ecf20Sopenharmony_ci 9038c2ecf20Sopenharmony_ci /* Make sure we didn't add ourselves to external state before freeing */ 9048c2ecf20Sopenharmony_ci GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); 9058c2ecf20Sopenharmony_ci GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); 9068c2ecf20Sopenharmony_ci 9078c2ecf20Sopenharmony_cierr_free: 9088c2ecf20Sopenharmony_ci kmem_cache_free(global.slab_requests, rq); 9098c2ecf20Sopenharmony_cierr_unreserve: 9108c2ecf20Sopenharmony_ci intel_context_unpin(ce); 9118c2ecf20Sopenharmony_ci return ERR_PTR(ret); 9128c2ecf20Sopenharmony_ci} 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_cistruct i915_request * 9158c2ecf20Sopenharmony_cii915_request_create(struct intel_context *ce) 9168c2ecf20Sopenharmony_ci{ 9178c2ecf20Sopenharmony_ci struct i915_request *rq; 9188c2ecf20Sopenharmony_ci struct intel_timeline *tl; 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci tl = intel_context_timeline_lock(ce); 9218c2ecf20Sopenharmony_ci if (IS_ERR(tl)) 9228c2ecf20Sopenharmony_ci return ERR_CAST(tl); 9238c2ecf20Sopenharmony_ci 9248c2ecf20Sopenharmony_ci /* Move our oldest request to the slab-cache (if not in use!) */ 9258c2ecf20Sopenharmony_ci rq = list_first_entry(&tl->requests, typeof(*rq), link); 9268c2ecf20Sopenharmony_ci if (!list_is_last(&rq->link, &tl->requests)) 9278c2ecf20Sopenharmony_ci i915_request_retire(rq); 9288c2ecf20Sopenharmony_ci 9298c2ecf20Sopenharmony_ci intel_context_enter(ce); 9308c2ecf20Sopenharmony_ci rq = __i915_request_create(ce, GFP_KERNEL); 9318c2ecf20Sopenharmony_ci intel_context_exit(ce); /* active reference transferred to request */ 9328c2ecf20Sopenharmony_ci if (IS_ERR(rq)) 9338c2ecf20Sopenharmony_ci goto err_unlock; 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci /* Check that we do not interrupt ourselves with a new request */ 9368c2ecf20Sopenharmony_ci rq->cookie = lockdep_pin_lock(&tl->mutex); 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci return rq; 9398c2ecf20Sopenharmony_ci 9408c2ecf20Sopenharmony_cierr_unlock: 9418c2ecf20Sopenharmony_ci intel_context_timeline_unlock(tl); 9428c2ecf20Sopenharmony_ci return rq; 9438c2ecf20Sopenharmony_ci} 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_cistatic int 9468c2ecf20Sopenharmony_cii915_request_await_start(struct i915_request *rq, struct i915_request *signal) 9478c2ecf20Sopenharmony_ci{ 9488c2ecf20Sopenharmony_ci struct dma_fence *fence; 9498c2ecf20Sopenharmony_ci int err; 9508c2ecf20Sopenharmony_ci 9518c2ecf20Sopenharmony_ci if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline)) 9528c2ecf20Sopenharmony_ci return 0; 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci if (i915_request_started(signal)) 9558c2ecf20Sopenharmony_ci return 0; 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci fence = NULL; 9588c2ecf20Sopenharmony_ci rcu_read_lock(); 9598c2ecf20Sopenharmony_ci spin_lock_irq(&signal->lock); 9608c2ecf20Sopenharmony_ci do { 9618c2ecf20Sopenharmony_ci struct list_head *pos = READ_ONCE(signal->link.prev); 9628c2ecf20Sopenharmony_ci struct i915_request *prev; 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci /* Confirm signal has not been retired, the link is valid */ 9658c2ecf20Sopenharmony_ci if (unlikely(i915_request_started(signal))) 9668c2ecf20Sopenharmony_ci break; 9678c2ecf20Sopenharmony_ci 9688c2ecf20Sopenharmony_ci /* Is signal the earliest request on its timeline? */ 9698c2ecf20Sopenharmony_ci if (pos == &rcu_dereference(signal->timeline)->requests) 9708c2ecf20Sopenharmony_ci break; 9718c2ecf20Sopenharmony_ci 9728c2ecf20Sopenharmony_ci /* 9738c2ecf20Sopenharmony_ci * Peek at the request before us in the timeline. That 9748c2ecf20Sopenharmony_ci * request will only be valid before it is retired, so 9758c2ecf20Sopenharmony_ci * after acquiring a reference to it, confirm that it is 9768c2ecf20Sopenharmony_ci * still part of the signaler's timeline. 9778c2ecf20Sopenharmony_ci */ 9788c2ecf20Sopenharmony_ci prev = list_entry(pos, typeof(*prev), link); 9798c2ecf20Sopenharmony_ci if (!i915_request_get_rcu(prev)) 9808c2ecf20Sopenharmony_ci break; 9818c2ecf20Sopenharmony_ci 9828c2ecf20Sopenharmony_ci /* After the strong barrier, confirm prev is still attached */ 9838c2ecf20Sopenharmony_ci if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) { 9848c2ecf20Sopenharmony_ci i915_request_put(prev); 9858c2ecf20Sopenharmony_ci break; 9868c2ecf20Sopenharmony_ci } 9878c2ecf20Sopenharmony_ci 9888c2ecf20Sopenharmony_ci fence = &prev->fence; 9898c2ecf20Sopenharmony_ci } while (0); 9908c2ecf20Sopenharmony_ci spin_unlock_irq(&signal->lock); 9918c2ecf20Sopenharmony_ci rcu_read_unlock(); 9928c2ecf20Sopenharmony_ci if (!fence) 9938c2ecf20Sopenharmony_ci return 0; 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci err = 0; 9968c2ecf20Sopenharmony_ci if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) 9978c2ecf20Sopenharmony_ci err = i915_sw_fence_await_dma_fence(&rq->submit, 9988c2ecf20Sopenharmony_ci fence, 0, 9998c2ecf20Sopenharmony_ci I915_FENCE_GFP); 10008c2ecf20Sopenharmony_ci dma_fence_put(fence); 10018c2ecf20Sopenharmony_ci 10028c2ecf20Sopenharmony_ci return err; 10038c2ecf20Sopenharmony_ci} 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_cistatic intel_engine_mask_t 10068c2ecf20Sopenharmony_cialready_busywaiting(struct i915_request *rq) 10078c2ecf20Sopenharmony_ci{ 10088c2ecf20Sopenharmony_ci /* 10098c2ecf20Sopenharmony_ci * Polling a semaphore causes bus traffic, delaying other users of 10108c2ecf20Sopenharmony_ci * both the GPU and CPU. We want to limit the impact on others, 10118c2ecf20Sopenharmony_ci * while taking advantage of early submission to reduce GPU 10128c2ecf20Sopenharmony_ci * latency. Therefore we restrict ourselves to not using more 10138c2ecf20Sopenharmony_ci * than one semaphore from each source, and not using a semaphore 10148c2ecf20Sopenharmony_ci * if we have detected the engine is saturated (i.e. would not be 10158c2ecf20Sopenharmony_ci * submitted early and cause bus traffic reading an already passed 10168c2ecf20Sopenharmony_ci * semaphore). 10178c2ecf20Sopenharmony_ci * 10188c2ecf20Sopenharmony_ci * See the are-we-too-late? check in __i915_request_submit(). 10198c2ecf20Sopenharmony_ci */ 10208c2ecf20Sopenharmony_ci return rq->sched.semaphores | READ_ONCE(rq->engine->saturated); 10218c2ecf20Sopenharmony_ci} 10228c2ecf20Sopenharmony_ci 10238c2ecf20Sopenharmony_cistatic int 10248c2ecf20Sopenharmony_ci__emit_semaphore_wait(struct i915_request *to, 10258c2ecf20Sopenharmony_ci struct i915_request *from, 10268c2ecf20Sopenharmony_ci u32 seqno) 10278c2ecf20Sopenharmony_ci{ 10288c2ecf20Sopenharmony_ci const int has_token = INTEL_GEN(to->engine->i915) >= 12; 10298c2ecf20Sopenharmony_ci u32 hwsp_offset; 10308c2ecf20Sopenharmony_ci int len, err; 10318c2ecf20Sopenharmony_ci u32 *cs; 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_ci GEM_BUG_ON(INTEL_GEN(to->engine->i915) < 8); 10348c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_has_initial_breadcrumb(to)); 10358c2ecf20Sopenharmony_ci 10368c2ecf20Sopenharmony_ci /* We need to pin the signaler's HWSP until we are finished reading. */ 10378c2ecf20Sopenharmony_ci err = intel_timeline_read_hwsp(from, to, &hwsp_offset); 10388c2ecf20Sopenharmony_ci if (err) 10398c2ecf20Sopenharmony_ci return err; 10408c2ecf20Sopenharmony_ci 10418c2ecf20Sopenharmony_ci len = 4; 10428c2ecf20Sopenharmony_ci if (has_token) 10438c2ecf20Sopenharmony_ci len += 2; 10448c2ecf20Sopenharmony_ci 10458c2ecf20Sopenharmony_ci cs = intel_ring_begin(to, len); 10468c2ecf20Sopenharmony_ci if (IS_ERR(cs)) 10478c2ecf20Sopenharmony_ci return PTR_ERR(cs); 10488c2ecf20Sopenharmony_ci 10498c2ecf20Sopenharmony_ci /* 10508c2ecf20Sopenharmony_ci * Using greater-than-or-equal here means we have to worry 10518c2ecf20Sopenharmony_ci * about seqno wraparound. To side step that issue, we swap 10528c2ecf20Sopenharmony_ci * the timeline HWSP upon wrapping, so that everyone listening 10538c2ecf20Sopenharmony_ci * for the old (pre-wrap) values do not see the much smaller 10548c2ecf20Sopenharmony_ci * (post-wrap) values than they were expecting (and so wait 10558c2ecf20Sopenharmony_ci * forever). 10568c2ecf20Sopenharmony_ci */ 10578c2ecf20Sopenharmony_ci *cs++ = (MI_SEMAPHORE_WAIT | 10588c2ecf20Sopenharmony_ci MI_SEMAPHORE_GLOBAL_GTT | 10598c2ecf20Sopenharmony_ci MI_SEMAPHORE_POLL | 10608c2ecf20Sopenharmony_ci MI_SEMAPHORE_SAD_GTE_SDD) + 10618c2ecf20Sopenharmony_ci has_token; 10628c2ecf20Sopenharmony_ci *cs++ = seqno; 10638c2ecf20Sopenharmony_ci *cs++ = hwsp_offset; 10648c2ecf20Sopenharmony_ci *cs++ = 0; 10658c2ecf20Sopenharmony_ci if (has_token) { 10668c2ecf20Sopenharmony_ci *cs++ = 0; 10678c2ecf20Sopenharmony_ci *cs++ = MI_NOOP; 10688c2ecf20Sopenharmony_ci } 10698c2ecf20Sopenharmony_ci 10708c2ecf20Sopenharmony_ci intel_ring_advance(to, cs); 10718c2ecf20Sopenharmony_ci return 0; 10728c2ecf20Sopenharmony_ci} 10738c2ecf20Sopenharmony_ci 10748c2ecf20Sopenharmony_cistatic int 10758c2ecf20Sopenharmony_ciemit_semaphore_wait(struct i915_request *to, 10768c2ecf20Sopenharmony_ci struct i915_request *from, 10778c2ecf20Sopenharmony_ci gfp_t gfp) 10788c2ecf20Sopenharmony_ci{ 10798c2ecf20Sopenharmony_ci const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; 10808c2ecf20Sopenharmony_ci struct i915_sw_fence *wait = &to->submit; 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci if (!intel_context_use_semaphores(to->context)) 10838c2ecf20Sopenharmony_ci goto await_fence; 10848c2ecf20Sopenharmony_ci 10858c2ecf20Sopenharmony_ci if (i915_request_has_initial_breadcrumb(to)) 10868c2ecf20Sopenharmony_ci goto await_fence; 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_ci if (!rcu_access_pointer(from->hwsp_cacheline)) 10898c2ecf20Sopenharmony_ci goto await_fence; 10908c2ecf20Sopenharmony_ci 10918c2ecf20Sopenharmony_ci /* 10928c2ecf20Sopenharmony_ci * If this or its dependents are waiting on an external fence 10938c2ecf20Sopenharmony_ci * that may fail catastrophically, then we want to avoid using 10948c2ecf20Sopenharmony_ci * sempahores as they bypass the fence signaling metadata, and we 10958c2ecf20Sopenharmony_ci * lose the fence->error propagation. 10968c2ecf20Sopenharmony_ci */ 10978c2ecf20Sopenharmony_ci if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) 10988c2ecf20Sopenharmony_ci goto await_fence; 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_ci /* Just emit the first semaphore we see as request space is limited. */ 11018c2ecf20Sopenharmony_ci if (already_busywaiting(to) & mask) 11028c2ecf20Sopenharmony_ci goto await_fence; 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci if (i915_request_await_start(to, from) < 0) 11058c2ecf20Sopenharmony_ci goto await_fence; 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci /* Only submit our spinner after the signaler is running! */ 11088c2ecf20Sopenharmony_ci if (__await_execution(to, from, NULL, gfp)) 11098c2ecf20Sopenharmony_ci goto await_fence; 11108c2ecf20Sopenharmony_ci 11118c2ecf20Sopenharmony_ci if (__emit_semaphore_wait(to, from, from->fence.seqno)) 11128c2ecf20Sopenharmony_ci goto await_fence; 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_ci to->sched.semaphores |= mask; 11158c2ecf20Sopenharmony_ci wait = &to->semaphore; 11168c2ecf20Sopenharmony_ci 11178c2ecf20Sopenharmony_ciawait_fence: 11188c2ecf20Sopenharmony_ci return i915_sw_fence_await_dma_fence(wait, 11198c2ecf20Sopenharmony_ci &from->fence, 0, 11208c2ecf20Sopenharmony_ci I915_FENCE_GFP); 11218c2ecf20Sopenharmony_ci} 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_cistatic bool intel_timeline_sync_has_start(struct intel_timeline *tl, 11248c2ecf20Sopenharmony_ci struct dma_fence *fence) 11258c2ecf20Sopenharmony_ci{ 11268c2ecf20Sopenharmony_ci return __intel_timeline_sync_is_later(tl, 11278c2ecf20Sopenharmony_ci fence->context, 11288c2ecf20Sopenharmony_ci fence->seqno - 1); 11298c2ecf20Sopenharmony_ci} 11308c2ecf20Sopenharmony_ci 11318c2ecf20Sopenharmony_cistatic int intel_timeline_sync_set_start(struct intel_timeline *tl, 11328c2ecf20Sopenharmony_ci const struct dma_fence *fence) 11338c2ecf20Sopenharmony_ci{ 11348c2ecf20Sopenharmony_ci return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); 11358c2ecf20Sopenharmony_ci} 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_cistatic int 11388c2ecf20Sopenharmony_ci__i915_request_await_execution(struct i915_request *to, 11398c2ecf20Sopenharmony_ci struct i915_request *from, 11408c2ecf20Sopenharmony_ci void (*hook)(struct i915_request *rq, 11418c2ecf20Sopenharmony_ci struct dma_fence *signal)) 11428c2ecf20Sopenharmony_ci{ 11438c2ecf20Sopenharmony_ci int err; 11448c2ecf20Sopenharmony_ci 11458c2ecf20Sopenharmony_ci GEM_BUG_ON(intel_context_is_barrier(from->context)); 11468c2ecf20Sopenharmony_ci 11478c2ecf20Sopenharmony_ci /* Submit both requests at the same time */ 11488c2ecf20Sopenharmony_ci err = __await_execution(to, from, hook, I915_FENCE_GFP); 11498c2ecf20Sopenharmony_ci if (err) 11508c2ecf20Sopenharmony_ci return err; 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_ci /* Squash repeated depenendices to the same timelines */ 11538c2ecf20Sopenharmony_ci if (intel_timeline_sync_has_start(i915_request_timeline(to), 11548c2ecf20Sopenharmony_ci &from->fence)) 11558c2ecf20Sopenharmony_ci return 0; 11568c2ecf20Sopenharmony_ci 11578c2ecf20Sopenharmony_ci /* 11588c2ecf20Sopenharmony_ci * Wait until the start of this request. 11598c2ecf20Sopenharmony_ci * 11608c2ecf20Sopenharmony_ci * The execution cb fires when we submit the request to HW. But in 11618c2ecf20Sopenharmony_ci * many cases this may be long before the request itself is ready to 11628c2ecf20Sopenharmony_ci * run (consider that we submit 2 requests for the same context, where 11638c2ecf20Sopenharmony_ci * the request of interest is behind an indefinite spinner). So we hook 11648c2ecf20Sopenharmony_ci * up to both to reduce our queues and keep the execution lag minimised 11658c2ecf20Sopenharmony_ci * in the worst case, though we hope that the await_start is elided. 11668c2ecf20Sopenharmony_ci */ 11678c2ecf20Sopenharmony_ci err = i915_request_await_start(to, from); 11688c2ecf20Sopenharmony_ci if (err < 0) 11698c2ecf20Sopenharmony_ci return err; 11708c2ecf20Sopenharmony_ci 11718c2ecf20Sopenharmony_ci /* 11728c2ecf20Sopenharmony_ci * Ensure both start together [after all semaphores in signal] 11738c2ecf20Sopenharmony_ci * 11748c2ecf20Sopenharmony_ci * Now that we are queued to the HW at roughly the same time (thanks 11758c2ecf20Sopenharmony_ci * to the execute cb) and are ready to run at roughly the same time 11768c2ecf20Sopenharmony_ci * (thanks to the await start), our signaler may still be indefinitely 11778c2ecf20Sopenharmony_ci * delayed by waiting on a semaphore from a remote engine. If our 11788c2ecf20Sopenharmony_ci * signaler depends on a semaphore, so indirectly do we, and we do not 11798c2ecf20Sopenharmony_ci * want to start our payload until our signaler also starts theirs. 11808c2ecf20Sopenharmony_ci * So we wait. 11818c2ecf20Sopenharmony_ci * 11828c2ecf20Sopenharmony_ci * However, there is also a second condition for which we need to wait 11838c2ecf20Sopenharmony_ci * for the precise start of the signaler. Consider that the signaler 11848c2ecf20Sopenharmony_ci * was submitted in a chain of requests following another context 11858c2ecf20Sopenharmony_ci * (with just an ordinary intra-engine fence dependency between the 11868c2ecf20Sopenharmony_ci * two). In this case the signaler is queued to HW, but not for 11878c2ecf20Sopenharmony_ci * immediate execution, and so we must wait until it reaches the 11888c2ecf20Sopenharmony_ci * active slot. 11898c2ecf20Sopenharmony_ci */ 11908c2ecf20Sopenharmony_ci if (intel_engine_has_semaphores(to->engine) && 11918c2ecf20Sopenharmony_ci !i915_request_has_initial_breadcrumb(to)) { 11928c2ecf20Sopenharmony_ci err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); 11938c2ecf20Sopenharmony_ci if (err < 0) 11948c2ecf20Sopenharmony_ci return err; 11958c2ecf20Sopenharmony_ci } 11968c2ecf20Sopenharmony_ci 11978c2ecf20Sopenharmony_ci /* Couple the dependency tree for PI on this exposed to->fence */ 11988c2ecf20Sopenharmony_ci if (to->engine->schedule) { 11998c2ecf20Sopenharmony_ci err = i915_sched_node_add_dependency(&to->sched, 12008c2ecf20Sopenharmony_ci &from->sched, 12018c2ecf20Sopenharmony_ci I915_DEPENDENCY_WEAK); 12028c2ecf20Sopenharmony_ci if (err < 0) 12038c2ecf20Sopenharmony_ci return err; 12048c2ecf20Sopenharmony_ci } 12058c2ecf20Sopenharmony_ci 12068c2ecf20Sopenharmony_ci return intel_timeline_sync_set_start(i915_request_timeline(to), 12078c2ecf20Sopenharmony_ci &from->fence); 12088c2ecf20Sopenharmony_ci} 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_cistatic void mark_external(struct i915_request *rq) 12118c2ecf20Sopenharmony_ci{ 12128c2ecf20Sopenharmony_ci /* 12138c2ecf20Sopenharmony_ci * The downside of using semaphores is that we lose metadata passing 12148c2ecf20Sopenharmony_ci * along the signaling chain. This is particularly nasty when we 12158c2ecf20Sopenharmony_ci * need to pass along a fatal error such as EFAULT or EDEADLK. For 12168c2ecf20Sopenharmony_ci * fatal errors we want to scrub the request before it is executed, 12178c2ecf20Sopenharmony_ci * which means that we cannot preload the request onto HW and have 12188c2ecf20Sopenharmony_ci * it wait upon a semaphore. 12198c2ecf20Sopenharmony_ci */ 12208c2ecf20Sopenharmony_ci rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; 12218c2ecf20Sopenharmony_ci} 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_cistatic int 12248c2ecf20Sopenharmony_ci__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) 12258c2ecf20Sopenharmony_ci{ 12268c2ecf20Sopenharmony_ci mark_external(rq); 12278c2ecf20Sopenharmony_ci return i915_sw_fence_await_dma_fence(&rq->submit, fence, 12288c2ecf20Sopenharmony_ci i915_fence_context_timeout(rq->engine->i915, 12298c2ecf20Sopenharmony_ci fence->context), 12308c2ecf20Sopenharmony_ci I915_FENCE_GFP); 12318c2ecf20Sopenharmony_ci} 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_cistatic int 12348c2ecf20Sopenharmony_cii915_request_await_external(struct i915_request *rq, struct dma_fence *fence) 12358c2ecf20Sopenharmony_ci{ 12368c2ecf20Sopenharmony_ci struct dma_fence *iter; 12378c2ecf20Sopenharmony_ci int err = 0; 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_ci if (!to_dma_fence_chain(fence)) 12408c2ecf20Sopenharmony_ci return __i915_request_await_external(rq, fence); 12418c2ecf20Sopenharmony_ci 12428c2ecf20Sopenharmony_ci dma_fence_chain_for_each(iter, fence) { 12438c2ecf20Sopenharmony_ci struct dma_fence_chain *chain = to_dma_fence_chain(iter); 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_ci if (!dma_fence_is_i915(chain->fence)) { 12468c2ecf20Sopenharmony_ci err = __i915_request_await_external(rq, iter); 12478c2ecf20Sopenharmony_ci break; 12488c2ecf20Sopenharmony_ci } 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci err = i915_request_await_dma_fence(rq, chain->fence); 12518c2ecf20Sopenharmony_ci if (err < 0) 12528c2ecf20Sopenharmony_ci break; 12538c2ecf20Sopenharmony_ci } 12548c2ecf20Sopenharmony_ci 12558c2ecf20Sopenharmony_ci dma_fence_put(iter); 12568c2ecf20Sopenharmony_ci return err; 12578c2ecf20Sopenharmony_ci} 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ciint 12608c2ecf20Sopenharmony_cii915_request_await_execution(struct i915_request *rq, 12618c2ecf20Sopenharmony_ci struct dma_fence *fence, 12628c2ecf20Sopenharmony_ci void (*hook)(struct i915_request *rq, 12638c2ecf20Sopenharmony_ci struct dma_fence *signal)) 12648c2ecf20Sopenharmony_ci{ 12658c2ecf20Sopenharmony_ci struct dma_fence **child = &fence; 12668c2ecf20Sopenharmony_ci unsigned int nchild = 1; 12678c2ecf20Sopenharmony_ci int ret; 12688c2ecf20Sopenharmony_ci 12698c2ecf20Sopenharmony_ci if (dma_fence_is_array(fence)) { 12708c2ecf20Sopenharmony_ci struct dma_fence_array *array = to_dma_fence_array(fence); 12718c2ecf20Sopenharmony_ci 12728c2ecf20Sopenharmony_ci /* XXX Error for signal-on-any fence arrays */ 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci child = array->fences; 12758c2ecf20Sopenharmony_ci nchild = array->num_fences; 12768c2ecf20Sopenharmony_ci GEM_BUG_ON(!nchild); 12778c2ecf20Sopenharmony_ci } 12788c2ecf20Sopenharmony_ci 12798c2ecf20Sopenharmony_ci do { 12808c2ecf20Sopenharmony_ci fence = *child++; 12818c2ecf20Sopenharmony_ci if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 12828c2ecf20Sopenharmony_ci continue; 12838c2ecf20Sopenharmony_ci 12848c2ecf20Sopenharmony_ci if (fence->context == rq->fence.context) 12858c2ecf20Sopenharmony_ci continue; 12868c2ecf20Sopenharmony_ci 12878c2ecf20Sopenharmony_ci /* 12888c2ecf20Sopenharmony_ci * We don't squash repeated fence dependencies here as we 12898c2ecf20Sopenharmony_ci * want to run our callback in all cases. 12908c2ecf20Sopenharmony_ci */ 12918c2ecf20Sopenharmony_ci 12928c2ecf20Sopenharmony_ci if (dma_fence_is_i915(fence)) 12938c2ecf20Sopenharmony_ci ret = __i915_request_await_execution(rq, 12948c2ecf20Sopenharmony_ci to_request(fence), 12958c2ecf20Sopenharmony_ci hook); 12968c2ecf20Sopenharmony_ci else 12978c2ecf20Sopenharmony_ci ret = i915_request_await_external(rq, fence); 12988c2ecf20Sopenharmony_ci if (ret < 0) 12998c2ecf20Sopenharmony_ci return ret; 13008c2ecf20Sopenharmony_ci } while (--nchild); 13018c2ecf20Sopenharmony_ci 13028c2ecf20Sopenharmony_ci return 0; 13038c2ecf20Sopenharmony_ci} 13048c2ecf20Sopenharmony_ci 13058c2ecf20Sopenharmony_cistatic int 13068c2ecf20Sopenharmony_ciawait_request_submit(struct i915_request *to, struct i915_request *from) 13078c2ecf20Sopenharmony_ci{ 13088c2ecf20Sopenharmony_ci /* 13098c2ecf20Sopenharmony_ci * If we are waiting on a virtual engine, then it may be 13108c2ecf20Sopenharmony_ci * constrained to execute on a single engine *prior* to submission. 13118c2ecf20Sopenharmony_ci * When it is submitted, it will be first submitted to the virtual 13128c2ecf20Sopenharmony_ci * engine and then passed to the physical engine. We cannot allow 13138c2ecf20Sopenharmony_ci * the waiter to be submitted immediately to the physical engine 13148c2ecf20Sopenharmony_ci * as it may then bypass the virtual request. 13158c2ecf20Sopenharmony_ci */ 13168c2ecf20Sopenharmony_ci if (to->engine == READ_ONCE(from->engine)) 13178c2ecf20Sopenharmony_ci return i915_sw_fence_await_sw_fence_gfp(&to->submit, 13188c2ecf20Sopenharmony_ci &from->submit, 13198c2ecf20Sopenharmony_ci I915_FENCE_GFP); 13208c2ecf20Sopenharmony_ci else 13218c2ecf20Sopenharmony_ci return __i915_request_await_execution(to, from, NULL); 13228c2ecf20Sopenharmony_ci} 13238c2ecf20Sopenharmony_ci 13248c2ecf20Sopenharmony_cistatic int 13258c2ecf20Sopenharmony_cii915_request_await_request(struct i915_request *to, struct i915_request *from) 13268c2ecf20Sopenharmony_ci{ 13278c2ecf20Sopenharmony_ci int ret; 13288c2ecf20Sopenharmony_ci 13298c2ecf20Sopenharmony_ci GEM_BUG_ON(to == from); 13308c2ecf20Sopenharmony_ci GEM_BUG_ON(to->timeline == from->timeline); 13318c2ecf20Sopenharmony_ci 13328c2ecf20Sopenharmony_ci if (i915_request_completed(from)) { 13338c2ecf20Sopenharmony_ci i915_sw_fence_set_error_once(&to->submit, from->fence.error); 13348c2ecf20Sopenharmony_ci return 0; 13358c2ecf20Sopenharmony_ci } 13368c2ecf20Sopenharmony_ci 13378c2ecf20Sopenharmony_ci if (to->engine->schedule) { 13388c2ecf20Sopenharmony_ci ret = i915_sched_node_add_dependency(&to->sched, 13398c2ecf20Sopenharmony_ci &from->sched, 13408c2ecf20Sopenharmony_ci I915_DEPENDENCY_EXTERNAL); 13418c2ecf20Sopenharmony_ci if (ret < 0) 13428c2ecf20Sopenharmony_ci return ret; 13438c2ecf20Sopenharmony_ci } 13448c2ecf20Sopenharmony_ci 13458c2ecf20Sopenharmony_ci if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) 13468c2ecf20Sopenharmony_ci ret = await_request_submit(to, from); 13478c2ecf20Sopenharmony_ci else 13488c2ecf20Sopenharmony_ci ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); 13498c2ecf20Sopenharmony_ci if (ret < 0) 13508c2ecf20Sopenharmony_ci return ret; 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci return 0; 13538c2ecf20Sopenharmony_ci} 13548c2ecf20Sopenharmony_ci 13558c2ecf20Sopenharmony_ciint 13568c2ecf20Sopenharmony_cii915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) 13578c2ecf20Sopenharmony_ci{ 13588c2ecf20Sopenharmony_ci struct dma_fence **child = &fence; 13598c2ecf20Sopenharmony_ci unsigned int nchild = 1; 13608c2ecf20Sopenharmony_ci int ret; 13618c2ecf20Sopenharmony_ci 13628c2ecf20Sopenharmony_ci /* 13638c2ecf20Sopenharmony_ci * Note that if the fence-array was created in signal-on-any mode, 13648c2ecf20Sopenharmony_ci * we should *not* decompose it into its individual fences. However, 13658c2ecf20Sopenharmony_ci * we don't currently store which mode the fence-array is operating 13668c2ecf20Sopenharmony_ci * in. Fortunately, the only user of signal-on-any is private to 13678c2ecf20Sopenharmony_ci * amdgpu and we should not see any incoming fence-array from 13688c2ecf20Sopenharmony_ci * sync-file being in signal-on-any mode. 13698c2ecf20Sopenharmony_ci */ 13708c2ecf20Sopenharmony_ci if (dma_fence_is_array(fence)) { 13718c2ecf20Sopenharmony_ci struct dma_fence_array *array = to_dma_fence_array(fence); 13728c2ecf20Sopenharmony_ci 13738c2ecf20Sopenharmony_ci child = array->fences; 13748c2ecf20Sopenharmony_ci nchild = array->num_fences; 13758c2ecf20Sopenharmony_ci GEM_BUG_ON(!nchild); 13768c2ecf20Sopenharmony_ci } 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_ci do { 13798c2ecf20Sopenharmony_ci fence = *child++; 13808c2ecf20Sopenharmony_ci if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 13818c2ecf20Sopenharmony_ci continue; 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci /* 13848c2ecf20Sopenharmony_ci * Requests on the same timeline are explicitly ordered, along 13858c2ecf20Sopenharmony_ci * with their dependencies, by i915_request_add() which ensures 13868c2ecf20Sopenharmony_ci * that requests are submitted in-order through each ring. 13878c2ecf20Sopenharmony_ci */ 13888c2ecf20Sopenharmony_ci if (fence->context == rq->fence.context) 13898c2ecf20Sopenharmony_ci continue; 13908c2ecf20Sopenharmony_ci 13918c2ecf20Sopenharmony_ci /* Squash repeated waits to the same timelines */ 13928c2ecf20Sopenharmony_ci if (fence->context && 13938c2ecf20Sopenharmony_ci intel_timeline_sync_is_later(i915_request_timeline(rq), 13948c2ecf20Sopenharmony_ci fence)) 13958c2ecf20Sopenharmony_ci continue; 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci if (dma_fence_is_i915(fence)) 13988c2ecf20Sopenharmony_ci ret = i915_request_await_request(rq, to_request(fence)); 13998c2ecf20Sopenharmony_ci else 14008c2ecf20Sopenharmony_ci ret = i915_request_await_external(rq, fence); 14018c2ecf20Sopenharmony_ci if (ret < 0) 14028c2ecf20Sopenharmony_ci return ret; 14038c2ecf20Sopenharmony_ci 14048c2ecf20Sopenharmony_ci /* Record the latest fence used against each timeline */ 14058c2ecf20Sopenharmony_ci if (fence->context) 14068c2ecf20Sopenharmony_ci intel_timeline_sync_set(i915_request_timeline(rq), 14078c2ecf20Sopenharmony_ci fence); 14088c2ecf20Sopenharmony_ci } while (--nchild); 14098c2ecf20Sopenharmony_ci 14108c2ecf20Sopenharmony_ci return 0; 14118c2ecf20Sopenharmony_ci} 14128c2ecf20Sopenharmony_ci 14138c2ecf20Sopenharmony_ci/** 14148c2ecf20Sopenharmony_ci * i915_request_await_object - set this request to (async) wait upon a bo 14158c2ecf20Sopenharmony_ci * @to: request we are wishing to use 14168c2ecf20Sopenharmony_ci * @obj: object which may be in use on another ring. 14178c2ecf20Sopenharmony_ci * @write: whether the wait is on behalf of a writer 14188c2ecf20Sopenharmony_ci * 14198c2ecf20Sopenharmony_ci * This code is meant to abstract object synchronization with the GPU. 14208c2ecf20Sopenharmony_ci * Conceptually we serialise writes between engines inside the GPU. 14218c2ecf20Sopenharmony_ci * We only allow one engine to write into a buffer at any time, but 14228c2ecf20Sopenharmony_ci * multiple readers. To ensure each has a coherent view of memory, we must: 14238c2ecf20Sopenharmony_ci * 14248c2ecf20Sopenharmony_ci * - If there is an outstanding write request to the object, the new 14258c2ecf20Sopenharmony_ci * request must wait for it to complete (either CPU or in hw, requests 14268c2ecf20Sopenharmony_ci * on the same ring will be naturally ordered). 14278c2ecf20Sopenharmony_ci * 14288c2ecf20Sopenharmony_ci * - If we are a write request (pending_write_domain is set), the new 14298c2ecf20Sopenharmony_ci * request must wait for outstanding read requests to complete. 14308c2ecf20Sopenharmony_ci * 14318c2ecf20Sopenharmony_ci * Returns 0 if successful, else propagates up the lower layer error. 14328c2ecf20Sopenharmony_ci */ 14338c2ecf20Sopenharmony_ciint 14348c2ecf20Sopenharmony_cii915_request_await_object(struct i915_request *to, 14358c2ecf20Sopenharmony_ci struct drm_i915_gem_object *obj, 14368c2ecf20Sopenharmony_ci bool write) 14378c2ecf20Sopenharmony_ci{ 14388c2ecf20Sopenharmony_ci struct dma_fence *excl; 14398c2ecf20Sopenharmony_ci int ret = 0; 14408c2ecf20Sopenharmony_ci 14418c2ecf20Sopenharmony_ci if (write) { 14428c2ecf20Sopenharmony_ci struct dma_fence **shared; 14438c2ecf20Sopenharmony_ci unsigned int count, i; 14448c2ecf20Sopenharmony_ci 14458c2ecf20Sopenharmony_ci ret = dma_resv_get_fences_rcu(obj->base.resv, 14468c2ecf20Sopenharmony_ci &excl, &count, &shared); 14478c2ecf20Sopenharmony_ci if (ret) 14488c2ecf20Sopenharmony_ci return ret; 14498c2ecf20Sopenharmony_ci 14508c2ecf20Sopenharmony_ci for (i = 0; i < count; i++) { 14518c2ecf20Sopenharmony_ci ret = i915_request_await_dma_fence(to, shared[i]); 14528c2ecf20Sopenharmony_ci if (ret) 14538c2ecf20Sopenharmony_ci break; 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_ci dma_fence_put(shared[i]); 14568c2ecf20Sopenharmony_ci } 14578c2ecf20Sopenharmony_ci 14588c2ecf20Sopenharmony_ci for (; i < count; i++) 14598c2ecf20Sopenharmony_ci dma_fence_put(shared[i]); 14608c2ecf20Sopenharmony_ci kfree(shared); 14618c2ecf20Sopenharmony_ci } else { 14628c2ecf20Sopenharmony_ci excl = dma_resv_get_excl_rcu(obj->base.resv); 14638c2ecf20Sopenharmony_ci } 14648c2ecf20Sopenharmony_ci 14658c2ecf20Sopenharmony_ci if (excl) { 14668c2ecf20Sopenharmony_ci if (ret == 0) 14678c2ecf20Sopenharmony_ci ret = i915_request_await_dma_fence(to, excl); 14688c2ecf20Sopenharmony_ci 14698c2ecf20Sopenharmony_ci dma_fence_put(excl); 14708c2ecf20Sopenharmony_ci } 14718c2ecf20Sopenharmony_ci 14728c2ecf20Sopenharmony_ci return ret; 14738c2ecf20Sopenharmony_ci} 14748c2ecf20Sopenharmony_ci 14758c2ecf20Sopenharmony_cistatic struct i915_request * 14768c2ecf20Sopenharmony_ci__i915_request_add_to_timeline(struct i915_request *rq) 14778c2ecf20Sopenharmony_ci{ 14788c2ecf20Sopenharmony_ci struct intel_timeline *timeline = i915_request_timeline(rq); 14798c2ecf20Sopenharmony_ci struct i915_request *prev; 14808c2ecf20Sopenharmony_ci 14818c2ecf20Sopenharmony_ci /* 14828c2ecf20Sopenharmony_ci * Dependency tracking and request ordering along the timeline 14838c2ecf20Sopenharmony_ci * is special cased so that we can eliminate redundant ordering 14848c2ecf20Sopenharmony_ci * operations while building the request (we know that the timeline 14858c2ecf20Sopenharmony_ci * itself is ordered, and here we guarantee it). 14868c2ecf20Sopenharmony_ci * 14878c2ecf20Sopenharmony_ci * As we know we will need to emit tracking along the timeline, 14888c2ecf20Sopenharmony_ci * we embed the hooks into our request struct -- at the cost of 14898c2ecf20Sopenharmony_ci * having to have specialised no-allocation interfaces (which will 14908c2ecf20Sopenharmony_ci * be beneficial elsewhere). 14918c2ecf20Sopenharmony_ci * 14928c2ecf20Sopenharmony_ci * A second benefit to open-coding i915_request_await_request is 14938c2ecf20Sopenharmony_ci * that we can apply a slight variant of the rules specialised 14948c2ecf20Sopenharmony_ci * for timelines that jump between engines (such as virtual engines). 14958c2ecf20Sopenharmony_ci * If we consider the case of virtual engine, we must emit a dma-fence 14968c2ecf20Sopenharmony_ci * to prevent scheduling of the second request until the first is 14978c2ecf20Sopenharmony_ci * complete (to maximise our greedy late load balancing) and this 14988c2ecf20Sopenharmony_ci * precludes optimising to use semaphores serialisation of a single 14998c2ecf20Sopenharmony_ci * timeline across engines. 15008c2ecf20Sopenharmony_ci */ 15018c2ecf20Sopenharmony_ci prev = to_request(__i915_active_fence_set(&timeline->last_request, 15028c2ecf20Sopenharmony_ci &rq->fence)); 15038c2ecf20Sopenharmony_ci if (prev && !i915_request_completed(prev)) { 15048c2ecf20Sopenharmony_ci /* 15058c2ecf20Sopenharmony_ci * The requests are supposed to be kept in order. However, 15068c2ecf20Sopenharmony_ci * we need to be wary in case the timeline->last_request 15078c2ecf20Sopenharmony_ci * is used as a barrier for external modification to this 15088c2ecf20Sopenharmony_ci * context. 15098c2ecf20Sopenharmony_ci */ 15108c2ecf20Sopenharmony_ci GEM_BUG_ON(prev->context == rq->context && 15118c2ecf20Sopenharmony_ci i915_seqno_passed(prev->fence.seqno, 15128c2ecf20Sopenharmony_ci rq->fence.seqno)); 15138c2ecf20Sopenharmony_ci 15148c2ecf20Sopenharmony_ci if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) 15158c2ecf20Sopenharmony_ci i915_sw_fence_await_sw_fence(&rq->submit, 15168c2ecf20Sopenharmony_ci &prev->submit, 15178c2ecf20Sopenharmony_ci &rq->submitq); 15188c2ecf20Sopenharmony_ci else 15198c2ecf20Sopenharmony_ci __i915_sw_fence_await_dma_fence(&rq->submit, 15208c2ecf20Sopenharmony_ci &prev->fence, 15218c2ecf20Sopenharmony_ci &rq->dmaq); 15228c2ecf20Sopenharmony_ci if (rq->engine->schedule) 15238c2ecf20Sopenharmony_ci __i915_sched_node_add_dependency(&rq->sched, 15248c2ecf20Sopenharmony_ci &prev->sched, 15258c2ecf20Sopenharmony_ci &rq->dep, 15268c2ecf20Sopenharmony_ci 0); 15278c2ecf20Sopenharmony_ci } 15288c2ecf20Sopenharmony_ci if (prev) 15298c2ecf20Sopenharmony_ci i915_request_put(prev); 15308c2ecf20Sopenharmony_ci 15318c2ecf20Sopenharmony_ci /* 15328c2ecf20Sopenharmony_ci * Make sure that no request gazumped us - if it was allocated after 15338c2ecf20Sopenharmony_ci * our i915_request_alloc() and called __i915_request_add() before 15348c2ecf20Sopenharmony_ci * us, the timeline will hold its seqno which is later than ours. 15358c2ecf20Sopenharmony_ci */ 15368c2ecf20Sopenharmony_ci GEM_BUG_ON(timeline->seqno != rq->fence.seqno); 15378c2ecf20Sopenharmony_ci 15388c2ecf20Sopenharmony_ci return prev; 15398c2ecf20Sopenharmony_ci} 15408c2ecf20Sopenharmony_ci 15418c2ecf20Sopenharmony_ci/* 15428c2ecf20Sopenharmony_ci * NB: This function is not allowed to fail. Doing so would mean the the 15438c2ecf20Sopenharmony_ci * request is not being tracked for completion but the work itself is 15448c2ecf20Sopenharmony_ci * going to happen on the hardware. This would be a Bad Thing(tm). 15458c2ecf20Sopenharmony_ci */ 15468c2ecf20Sopenharmony_cistruct i915_request *__i915_request_commit(struct i915_request *rq) 15478c2ecf20Sopenharmony_ci{ 15488c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = rq->engine; 15498c2ecf20Sopenharmony_ci struct intel_ring *ring = rq->ring; 15508c2ecf20Sopenharmony_ci u32 *cs; 15518c2ecf20Sopenharmony_ci 15528c2ecf20Sopenharmony_ci RQ_TRACE(rq, "\n"); 15538c2ecf20Sopenharmony_ci 15548c2ecf20Sopenharmony_ci /* 15558c2ecf20Sopenharmony_ci * To ensure that this call will not fail, space for its emissions 15568c2ecf20Sopenharmony_ci * should already have been reserved in the ring buffer. Let the ring 15578c2ecf20Sopenharmony_ci * know that it is time to use that space up. 15588c2ecf20Sopenharmony_ci */ 15598c2ecf20Sopenharmony_ci GEM_BUG_ON(rq->reserved_space > ring->space); 15608c2ecf20Sopenharmony_ci rq->reserved_space = 0; 15618c2ecf20Sopenharmony_ci rq->emitted_jiffies = jiffies; 15628c2ecf20Sopenharmony_ci 15638c2ecf20Sopenharmony_ci /* 15648c2ecf20Sopenharmony_ci * Record the position of the start of the breadcrumb so that 15658c2ecf20Sopenharmony_ci * should we detect the updated seqno part-way through the 15668c2ecf20Sopenharmony_ci * GPU processing the request, we never over-estimate the 15678c2ecf20Sopenharmony_ci * position of the ring's HEAD. 15688c2ecf20Sopenharmony_ci */ 15698c2ecf20Sopenharmony_ci cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw); 15708c2ecf20Sopenharmony_ci GEM_BUG_ON(IS_ERR(cs)); 15718c2ecf20Sopenharmony_ci rq->postfix = intel_ring_offset(rq, cs); 15728c2ecf20Sopenharmony_ci 15738c2ecf20Sopenharmony_ci return __i915_request_add_to_timeline(rq); 15748c2ecf20Sopenharmony_ci} 15758c2ecf20Sopenharmony_ci 15768c2ecf20Sopenharmony_civoid __i915_request_queue(struct i915_request *rq, 15778c2ecf20Sopenharmony_ci const struct i915_sched_attr *attr) 15788c2ecf20Sopenharmony_ci{ 15798c2ecf20Sopenharmony_ci /* 15808c2ecf20Sopenharmony_ci * Let the backend know a new request has arrived that may need 15818c2ecf20Sopenharmony_ci * to adjust the existing execution schedule due to a high priority 15828c2ecf20Sopenharmony_ci * request - i.e. we may want to preempt the current request in order 15838c2ecf20Sopenharmony_ci * to run a high priority dependency chain *before* we can execute this 15848c2ecf20Sopenharmony_ci * request. 15858c2ecf20Sopenharmony_ci * 15868c2ecf20Sopenharmony_ci * This is called before the request is ready to run so that we can 15878c2ecf20Sopenharmony_ci * decide whether to preempt the entire chain so that it is ready to 15888c2ecf20Sopenharmony_ci * run at the earliest possible convenience. 15898c2ecf20Sopenharmony_ci */ 15908c2ecf20Sopenharmony_ci if (attr && rq->engine->schedule) 15918c2ecf20Sopenharmony_ci rq->engine->schedule(rq, attr); 15928c2ecf20Sopenharmony_ci i915_sw_fence_commit(&rq->semaphore); 15938c2ecf20Sopenharmony_ci i915_sw_fence_commit(&rq->submit); 15948c2ecf20Sopenharmony_ci} 15958c2ecf20Sopenharmony_ci 15968c2ecf20Sopenharmony_civoid i915_request_add(struct i915_request *rq) 15978c2ecf20Sopenharmony_ci{ 15988c2ecf20Sopenharmony_ci struct intel_timeline * const tl = i915_request_timeline(rq); 15998c2ecf20Sopenharmony_ci struct i915_sched_attr attr = {}; 16008c2ecf20Sopenharmony_ci struct i915_gem_context *ctx; 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_ci lockdep_assert_held(&tl->mutex); 16038c2ecf20Sopenharmony_ci lockdep_unpin_lock(&tl->mutex, rq->cookie); 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci trace_i915_request_add(rq); 16068c2ecf20Sopenharmony_ci __i915_request_commit(rq); 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ci /* XXX placeholder for selftests */ 16098c2ecf20Sopenharmony_ci rcu_read_lock(); 16108c2ecf20Sopenharmony_ci ctx = rcu_dereference(rq->context->gem_context); 16118c2ecf20Sopenharmony_ci if (ctx) 16128c2ecf20Sopenharmony_ci attr = ctx->sched; 16138c2ecf20Sopenharmony_ci rcu_read_unlock(); 16148c2ecf20Sopenharmony_ci 16158c2ecf20Sopenharmony_ci __i915_request_queue(rq, &attr); 16168c2ecf20Sopenharmony_ci 16178c2ecf20Sopenharmony_ci mutex_unlock(&tl->mutex); 16188c2ecf20Sopenharmony_ci} 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_cistatic unsigned long local_clock_ns(unsigned int *cpu) 16218c2ecf20Sopenharmony_ci{ 16228c2ecf20Sopenharmony_ci unsigned long t; 16238c2ecf20Sopenharmony_ci 16248c2ecf20Sopenharmony_ci /* 16258c2ecf20Sopenharmony_ci * Cheaply and approximately convert from nanoseconds to microseconds. 16268c2ecf20Sopenharmony_ci * The result and subsequent calculations are also defined in the same 16278c2ecf20Sopenharmony_ci * approximate microseconds units. The principal source of timing 16288c2ecf20Sopenharmony_ci * error here is from the simple truncation. 16298c2ecf20Sopenharmony_ci * 16308c2ecf20Sopenharmony_ci * Note that local_clock() is only defined wrt to the current CPU; 16318c2ecf20Sopenharmony_ci * the comparisons are no longer valid if we switch CPUs. Instead of 16328c2ecf20Sopenharmony_ci * blocking preemption for the entire busywait, we can detect the CPU 16338c2ecf20Sopenharmony_ci * switch and use that as indicator of system load and a reason to 16348c2ecf20Sopenharmony_ci * stop busywaiting, see busywait_stop(). 16358c2ecf20Sopenharmony_ci */ 16368c2ecf20Sopenharmony_ci *cpu = get_cpu(); 16378c2ecf20Sopenharmony_ci t = local_clock(); 16388c2ecf20Sopenharmony_ci put_cpu(); 16398c2ecf20Sopenharmony_ci 16408c2ecf20Sopenharmony_ci return t; 16418c2ecf20Sopenharmony_ci} 16428c2ecf20Sopenharmony_ci 16438c2ecf20Sopenharmony_cistatic bool busywait_stop(unsigned long timeout, unsigned int cpu) 16448c2ecf20Sopenharmony_ci{ 16458c2ecf20Sopenharmony_ci unsigned int this_cpu; 16468c2ecf20Sopenharmony_ci 16478c2ecf20Sopenharmony_ci if (time_after(local_clock_ns(&this_cpu), timeout)) 16488c2ecf20Sopenharmony_ci return true; 16498c2ecf20Sopenharmony_ci 16508c2ecf20Sopenharmony_ci return this_cpu != cpu; 16518c2ecf20Sopenharmony_ci} 16528c2ecf20Sopenharmony_ci 16538c2ecf20Sopenharmony_cistatic bool __i915_spin_request(struct i915_request * const rq, int state) 16548c2ecf20Sopenharmony_ci{ 16558c2ecf20Sopenharmony_ci unsigned long timeout_ns; 16568c2ecf20Sopenharmony_ci unsigned int cpu; 16578c2ecf20Sopenharmony_ci 16588c2ecf20Sopenharmony_ci /* 16598c2ecf20Sopenharmony_ci * Only wait for the request if we know it is likely to complete. 16608c2ecf20Sopenharmony_ci * 16618c2ecf20Sopenharmony_ci * We don't track the timestamps around requests, nor the average 16628c2ecf20Sopenharmony_ci * request length, so we do not have a good indicator that this 16638c2ecf20Sopenharmony_ci * request will complete within the timeout. What we do know is the 16648c2ecf20Sopenharmony_ci * order in which requests are executed by the context and so we can 16658c2ecf20Sopenharmony_ci * tell if the request has been started. If the request is not even 16668c2ecf20Sopenharmony_ci * running yet, it is a fair assumption that it will not complete 16678c2ecf20Sopenharmony_ci * within our relatively short timeout. 16688c2ecf20Sopenharmony_ci */ 16698c2ecf20Sopenharmony_ci if (!i915_request_is_running(rq)) 16708c2ecf20Sopenharmony_ci return false; 16718c2ecf20Sopenharmony_ci 16728c2ecf20Sopenharmony_ci /* 16738c2ecf20Sopenharmony_ci * When waiting for high frequency requests, e.g. during synchronous 16748c2ecf20Sopenharmony_ci * rendering split between the CPU and GPU, the finite amount of time 16758c2ecf20Sopenharmony_ci * required to set up the irq and wait upon it limits the response 16768c2ecf20Sopenharmony_ci * rate. By busywaiting on the request completion for a short while we 16778c2ecf20Sopenharmony_ci * can service the high frequency waits as quick as possible. However, 16788c2ecf20Sopenharmony_ci * if it is a slow request, we want to sleep as quickly as possible. 16798c2ecf20Sopenharmony_ci * The tradeoff between waiting and sleeping is roughly the time it 16808c2ecf20Sopenharmony_ci * takes to sleep on a request, on the order of a microsecond. 16818c2ecf20Sopenharmony_ci */ 16828c2ecf20Sopenharmony_ci 16838c2ecf20Sopenharmony_ci timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns); 16848c2ecf20Sopenharmony_ci timeout_ns += local_clock_ns(&cpu); 16858c2ecf20Sopenharmony_ci do { 16868c2ecf20Sopenharmony_ci if (dma_fence_is_signaled(&rq->fence)) 16878c2ecf20Sopenharmony_ci return true; 16888c2ecf20Sopenharmony_ci 16898c2ecf20Sopenharmony_ci if (signal_pending_state(state, current)) 16908c2ecf20Sopenharmony_ci break; 16918c2ecf20Sopenharmony_ci 16928c2ecf20Sopenharmony_ci if (busywait_stop(timeout_ns, cpu)) 16938c2ecf20Sopenharmony_ci break; 16948c2ecf20Sopenharmony_ci 16958c2ecf20Sopenharmony_ci cpu_relax(); 16968c2ecf20Sopenharmony_ci } while (!need_resched()); 16978c2ecf20Sopenharmony_ci 16988c2ecf20Sopenharmony_ci return false; 16998c2ecf20Sopenharmony_ci} 17008c2ecf20Sopenharmony_ci 17018c2ecf20Sopenharmony_cistruct request_wait { 17028c2ecf20Sopenharmony_ci struct dma_fence_cb cb; 17038c2ecf20Sopenharmony_ci struct task_struct *tsk; 17048c2ecf20Sopenharmony_ci}; 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_cistatic void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb) 17078c2ecf20Sopenharmony_ci{ 17088c2ecf20Sopenharmony_ci struct request_wait *wait = container_of(cb, typeof(*wait), cb); 17098c2ecf20Sopenharmony_ci 17108c2ecf20Sopenharmony_ci wake_up_process(fetch_and_zero(&wait->tsk)); 17118c2ecf20Sopenharmony_ci} 17128c2ecf20Sopenharmony_ci 17138c2ecf20Sopenharmony_ci/** 17148c2ecf20Sopenharmony_ci * i915_request_wait - wait until execution of request has finished 17158c2ecf20Sopenharmony_ci * @rq: the request to wait upon 17168c2ecf20Sopenharmony_ci * @flags: how to wait 17178c2ecf20Sopenharmony_ci * @timeout: how long to wait in jiffies 17188c2ecf20Sopenharmony_ci * 17198c2ecf20Sopenharmony_ci * i915_request_wait() waits for the request to be completed, for a 17208c2ecf20Sopenharmony_ci * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an 17218c2ecf20Sopenharmony_ci * unbounded wait). 17228c2ecf20Sopenharmony_ci * 17238c2ecf20Sopenharmony_ci * Returns the remaining time (in jiffies) if the request completed, which may 17248c2ecf20Sopenharmony_ci * be zero or -ETIME if the request is unfinished after the timeout expires. 17258c2ecf20Sopenharmony_ci * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is 17268c2ecf20Sopenharmony_ci * pending before the request completes. 17278c2ecf20Sopenharmony_ci */ 17288c2ecf20Sopenharmony_cilong i915_request_wait(struct i915_request *rq, 17298c2ecf20Sopenharmony_ci unsigned int flags, 17308c2ecf20Sopenharmony_ci long timeout) 17318c2ecf20Sopenharmony_ci{ 17328c2ecf20Sopenharmony_ci const int state = flags & I915_WAIT_INTERRUPTIBLE ? 17338c2ecf20Sopenharmony_ci TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 17348c2ecf20Sopenharmony_ci struct request_wait wait; 17358c2ecf20Sopenharmony_ci 17368c2ecf20Sopenharmony_ci might_sleep(); 17378c2ecf20Sopenharmony_ci GEM_BUG_ON(timeout < 0); 17388c2ecf20Sopenharmony_ci 17398c2ecf20Sopenharmony_ci if (dma_fence_is_signaled(&rq->fence)) 17408c2ecf20Sopenharmony_ci return timeout; 17418c2ecf20Sopenharmony_ci 17428c2ecf20Sopenharmony_ci if (!timeout) 17438c2ecf20Sopenharmony_ci return -ETIME; 17448c2ecf20Sopenharmony_ci 17458c2ecf20Sopenharmony_ci trace_i915_request_wait_begin(rq, flags); 17468c2ecf20Sopenharmony_ci 17478c2ecf20Sopenharmony_ci /* 17488c2ecf20Sopenharmony_ci * We must never wait on the GPU while holding a lock as we 17498c2ecf20Sopenharmony_ci * may need to perform a GPU reset. So while we don't need to 17508c2ecf20Sopenharmony_ci * serialise wait/reset with an explicit lock, we do want 17518c2ecf20Sopenharmony_ci * lockdep to detect potential dependency cycles. 17528c2ecf20Sopenharmony_ci */ 17538c2ecf20Sopenharmony_ci mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_); 17548c2ecf20Sopenharmony_ci 17558c2ecf20Sopenharmony_ci /* 17568c2ecf20Sopenharmony_ci * Optimistic spin before touching IRQs. 17578c2ecf20Sopenharmony_ci * 17588c2ecf20Sopenharmony_ci * We may use a rather large value here to offset the penalty of 17598c2ecf20Sopenharmony_ci * switching away from the active task. Frequently, the client will 17608c2ecf20Sopenharmony_ci * wait upon an old swapbuffer to throttle itself to remain within a 17618c2ecf20Sopenharmony_ci * frame of the gpu. If the client is running in lockstep with the gpu, 17628c2ecf20Sopenharmony_ci * then it should not be waiting long at all, and a sleep now will incur 17638c2ecf20Sopenharmony_ci * extra scheduler latency in producing the next frame. To try to 17648c2ecf20Sopenharmony_ci * avoid adding the cost of enabling/disabling the interrupt to the 17658c2ecf20Sopenharmony_ci * short wait, we first spin to see if the request would have completed 17668c2ecf20Sopenharmony_ci * in the time taken to setup the interrupt. 17678c2ecf20Sopenharmony_ci * 17688c2ecf20Sopenharmony_ci * We need upto 5us to enable the irq, and upto 20us to hide the 17698c2ecf20Sopenharmony_ci * scheduler latency of a context switch, ignoring the secondary 17708c2ecf20Sopenharmony_ci * impacts from a context switch such as cache eviction. 17718c2ecf20Sopenharmony_ci * 17728c2ecf20Sopenharmony_ci * The scheme used for low-latency IO is called "hybrid interrupt 17738c2ecf20Sopenharmony_ci * polling". The suggestion there is to sleep until just before you 17748c2ecf20Sopenharmony_ci * expect to be woken by the device interrupt and then poll for its 17758c2ecf20Sopenharmony_ci * completion. That requires having a good predictor for the request 17768c2ecf20Sopenharmony_ci * duration, which we currently lack. 17778c2ecf20Sopenharmony_ci */ 17788c2ecf20Sopenharmony_ci if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) && 17798c2ecf20Sopenharmony_ci __i915_spin_request(rq, state)) 17808c2ecf20Sopenharmony_ci goto out; 17818c2ecf20Sopenharmony_ci 17828c2ecf20Sopenharmony_ci /* 17838c2ecf20Sopenharmony_ci * This client is about to stall waiting for the GPU. In many cases 17848c2ecf20Sopenharmony_ci * this is undesirable and limits the throughput of the system, as 17858c2ecf20Sopenharmony_ci * many clients cannot continue processing user input/output whilst 17868c2ecf20Sopenharmony_ci * blocked. RPS autotuning may take tens of milliseconds to respond 17878c2ecf20Sopenharmony_ci * to the GPU load and thus incurs additional latency for the client. 17888c2ecf20Sopenharmony_ci * We can circumvent that by promoting the GPU frequency to maximum 17898c2ecf20Sopenharmony_ci * before we sleep. This makes the GPU throttle up much more quickly 17908c2ecf20Sopenharmony_ci * (good for benchmarks and user experience, e.g. window animations), 17918c2ecf20Sopenharmony_ci * but at a cost of spending more power processing the workload 17928c2ecf20Sopenharmony_ci * (bad for battery). 17938c2ecf20Sopenharmony_ci */ 17948c2ecf20Sopenharmony_ci if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq)) 17958c2ecf20Sopenharmony_ci intel_rps_boost(rq); 17968c2ecf20Sopenharmony_ci 17978c2ecf20Sopenharmony_ci wait.tsk = current; 17988c2ecf20Sopenharmony_ci if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake)) 17998c2ecf20Sopenharmony_ci goto out; 18008c2ecf20Sopenharmony_ci 18018c2ecf20Sopenharmony_ci /* 18028c2ecf20Sopenharmony_ci * Flush the submission tasklet, but only if it may help this request. 18038c2ecf20Sopenharmony_ci * 18048c2ecf20Sopenharmony_ci * We sometimes experience some latency between the HW interrupts and 18058c2ecf20Sopenharmony_ci * tasklet execution (mostly due to ksoftirqd latency, but it can also 18068c2ecf20Sopenharmony_ci * be due to lazy CS events), so lets run the tasklet manually if there 18078c2ecf20Sopenharmony_ci * is a chance it may submit this request. If the request is not ready 18088c2ecf20Sopenharmony_ci * to run, as it is waiting for other fences to be signaled, flushing 18098c2ecf20Sopenharmony_ci * the tasklet is busy work without any advantage for this client. 18108c2ecf20Sopenharmony_ci * 18118c2ecf20Sopenharmony_ci * If the HW is being lazy, this is the last chance before we go to 18128c2ecf20Sopenharmony_ci * sleep to catch any pending events. We will check periodically in 18138c2ecf20Sopenharmony_ci * the heartbeat to flush the submission tasklets as a last resort 18148c2ecf20Sopenharmony_ci * for unhappy HW. 18158c2ecf20Sopenharmony_ci */ 18168c2ecf20Sopenharmony_ci if (i915_request_is_ready(rq)) 18178c2ecf20Sopenharmony_ci intel_engine_flush_submission(rq->engine); 18188c2ecf20Sopenharmony_ci 18198c2ecf20Sopenharmony_ci for (;;) { 18208c2ecf20Sopenharmony_ci set_current_state(state); 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_ci if (dma_fence_is_signaled(&rq->fence)) 18238c2ecf20Sopenharmony_ci break; 18248c2ecf20Sopenharmony_ci 18258c2ecf20Sopenharmony_ci if (signal_pending_state(state, current)) { 18268c2ecf20Sopenharmony_ci timeout = -ERESTARTSYS; 18278c2ecf20Sopenharmony_ci break; 18288c2ecf20Sopenharmony_ci } 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_ci if (!timeout) { 18318c2ecf20Sopenharmony_ci timeout = -ETIME; 18328c2ecf20Sopenharmony_ci break; 18338c2ecf20Sopenharmony_ci } 18348c2ecf20Sopenharmony_ci 18358c2ecf20Sopenharmony_ci timeout = io_schedule_timeout(timeout); 18368c2ecf20Sopenharmony_ci } 18378c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 18388c2ecf20Sopenharmony_ci 18398c2ecf20Sopenharmony_ci if (READ_ONCE(wait.tsk)) 18408c2ecf20Sopenharmony_ci dma_fence_remove_callback(&rq->fence, &wait.cb); 18418c2ecf20Sopenharmony_ci GEM_BUG_ON(!list_empty(&wait.cb.node)); 18428c2ecf20Sopenharmony_ci 18438c2ecf20Sopenharmony_ciout: 18448c2ecf20Sopenharmony_ci mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_); 18458c2ecf20Sopenharmony_ci trace_i915_request_wait_end(rq); 18468c2ecf20Sopenharmony_ci return timeout; 18478c2ecf20Sopenharmony_ci} 18488c2ecf20Sopenharmony_ci 18498c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 18508c2ecf20Sopenharmony_ci#include "selftests/mock_request.c" 18518c2ecf20Sopenharmony_ci#include "selftests/i915_request.c" 18528c2ecf20Sopenharmony_ci#endif 18538c2ecf20Sopenharmony_ci 18548c2ecf20Sopenharmony_cistatic void i915_global_request_shrink(void) 18558c2ecf20Sopenharmony_ci{ 18568c2ecf20Sopenharmony_ci kmem_cache_shrink(global.slab_execute_cbs); 18578c2ecf20Sopenharmony_ci kmem_cache_shrink(global.slab_requests); 18588c2ecf20Sopenharmony_ci} 18598c2ecf20Sopenharmony_ci 18608c2ecf20Sopenharmony_cistatic void i915_global_request_exit(void) 18618c2ecf20Sopenharmony_ci{ 18628c2ecf20Sopenharmony_ci kmem_cache_destroy(global.slab_execute_cbs); 18638c2ecf20Sopenharmony_ci kmem_cache_destroy(global.slab_requests); 18648c2ecf20Sopenharmony_ci} 18658c2ecf20Sopenharmony_ci 18668c2ecf20Sopenharmony_cistatic struct i915_global_request global = { { 18678c2ecf20Sopenharmony_ci .shrink = i915_global_request_shrink, 18688c2ecf20Sopenharmony_ci .exit = i915_global_request_exit, 18698c2ecf20Sopenharmony_ci} }; 18708c2ecf20Sopenharmony_ci 18718c2ecf20Sopenharmony_ciint __init i915_global_request_init(void) 18728c2ecf20Sopenharmony_ci{ 18738c2ecf20Sopenharmony_ci global.slab_requests = 18748c2ecf20Sopenharmony_ci kmem_cache_create("i915_request", 18758c2ecf20Sopenharmony_ci sizeof(struct i915_request), 18768c2ecf20Sopenharmony_ci __alignof__(struct i915_request), 18778c2ecf20Sopenharmony_ci SLAB_HWCACHE_ALIGN | 18788c2ecf20Sopenharmony_ci SLAB_RECLAIM_ACCOUNT | 18798c2ecf20Sopenharmony_ci SLAB_TYPESAFE_BY_RCU, 18808c2ecf20Sopenharmony_ci __i915_request_ctor); 18818c2ecf20Sopenharmony_ci if (!global.slab_requests) 18828c2ecf20Sopenharmony_ci return -ENOMEM; 18838c2ecf20Sopenharmony_ci 18848c2ecf20Sopenharmony_ci global.slab_execute_cbs = KMEM_CACHE(execute_cb, 18858c2ecf20Sopenharmony_ci SLAB_HWCACHE_ALIGN | 18868c2ecf20Sopenharmony_ci SLAB_RECLAIM_ACCOUNT | 18878c2ecf20Sopenharmony_ci SLAB_TYPESAFE_BY_RCU); 18888c2ecf20Sopenharmony_ci if (!global.slab_execute_cbs) 18898c2ecf20Sopenharmony_ci goto err_requests; 18908c2ecf20Sopenharmony_ci 18918c2ecf20Sopenharmony_ci i915_global_register(&global.base); 18928c2ecf20Sopenharmony_ci return 0; 18938c2ecf20Sopenharmony_ci 18948c2ecf20Sopenharmony_cierr_requests: 18958c2ecf20Sopenharmony_ci kmem_cache_destroy(global.slab_requests); 18968c2ecf20Sopenharmony_ci return -ENOMEM; 18978c2ecf20Sopenharmony_ci} 1898