18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * SPDX-License-Identifier: MIT 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright © 2019 Intel Corporation 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#include <linux/debugobjects.h> 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include "gt/intel_context.h" 108c2ecf20Sopenharmony_ci#include "gt/intel_engine_heartbeat.h" 118c2ecf20Sopenharmony_ci#include "gt/intel_engine_pm.h" 128c2ecf20Sopenharmony_ci#include "gt/intel_ring.h" 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#include "i915_drv.h" 158c2ecf20Sopenharmony_ci#include "i915_active.h" 168c2ecf20Sopenharmony_ci#include "i915_globals.h" 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci/* 198c2ecf20Sopenharmony_ci * Active refs memory management 208c2ecf20Sopenharmony_ci * 218c2ecf20Sopenharmony_ci * To be more economical with memory, we reap all the i915_active trees as 228c2ecf20Sopenharmony_ci * they idle (when we know the active requests are inactive) and allocate the 238c2ecf20Sopenharmony_ci * nodes from a local slab cache to hopefully reduce the fragmentation. 248c2ecf20Sopenharmony_ci */ 258c2ecf20Sopenharmony_cistatic struct i915_global_active { 268c2ecf20Sopenharmony_ci struct i915_global base; 278c2ecf20Sopenharmony_ci struct kmem_cache *slab_cache; 288c2ecf20Sopenharmony_ci} global; 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_cistruct active_node { 318c2ecf20Sopenharmony_ci struct rb_node node; 328c2ecf20Sopenharmony_ci struct i915_active_fence base; 338c2ecf20Sopenharmony_ci struct i915_active *ref; 348c2ecf20Sopenharmony_ci u64 timeline; 358c2ecf20Sopenharmony_ci}; 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node) 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_cistatic inline struct active_node * 408c2ecf20Sopenharmony_cinode_from_active(struct i915_active_fence *active) 418c2ecf20Sopenharmony_ci{ 428c2ecf20Sopenharmony_ci return container_of(active, struct active_node, base); 438c2ecf20Sopenharmony_ci} 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_cistatic inline bool is_barrier(const struct i915_active_fence *active) 488c2ecf20Sopenharmony_ci{ 498c2ecf20Sopenharmony_ci return IS_ERR(rcu_access_pointer(active->fence)); 508c2ecf20Sopenharmony_ci} 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistatic inline struct llist_node *barrier_to_ll(struct active_node *node) 538c2ecf20Sopenharmony_ci{ 548c2ecf20Sopenharmony_ci GEM_BUG_ON(!is_barrier(&node->base)); 558c2ecf20Sopenharmony_ci return (struct llist_node *)&node->base.cb.node; 568c2ecf20Sopenharmony_ci} 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_cistatic inline struct intel_engine_cs * 598c2ecf20Sopenharmony_ci__barrier_to_engine(struct active_node *node) 608c2ecf20Sopenharmony_ci{ 618c2ecf20Sopenharmony_ci return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); 628c2ecf20Sopenharmony_ci} 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_cistatic inline struct intel_engine_cs * 658c2ecf20Sopenharmony_cibarrier_to_engine(struct active_node *node) 668c2ecf20Sopenharmony_ci{ 678c2ecf20Sopenharmony_ci GEM_BUG_ON(!is_barrier(&node->base)); 688c2ecf20Sopenharmony_ci return __barrier_to_engine(node); 698c2ecf20Sopenharmony_ci} 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_cistatic inline struct active_node *barrier_from_ll(struct llist_node *x) 728c2ecf20Sopenharmony_ci{ 738c2ecf20Sopenharmony_ci return container_of((struct list_head *)x, 748c2ecf20Sopenharmony_ci struct active_node, base.cb.node); 758c2ecf20Sopenharmony_ci} 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_cistatic void *active_debug_hint(void *addr) 808c2ecf20Sopenharmony_ci{ 818c2ecf20Sopenharmony_ci struct i915_active *ref = addr; 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; 848c2ecf20Sopenharmony_ci} 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_cistatic const struct debug_obj_descr active_debug_desc = { 878c2ecf20Sopenharmony_ci .name = "i915_active", 888c2ecf20Sopenharmony_ci .debug_hint = active_debug_hint, 898c2ecf20Sopenharmony_ci}; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_cistatic void debug_active_init(struct i915_active *ref) 928c2ecf20Sopenharmony_ci{ 938c2ecf20Sopenharmony_ci debug_object_init(ref, &active_debug_desc); 948c2ecf20Sopenharmony_ci} 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_cistatic void debug_active_activate(struct i915_active *ref) 978c2ecf20Sopenharmony_ci{ 988c2ecf20Sopenharmony_ci lockdep_assert_held(&ref->tree_lock); 998c2ecf20Sopenharmony_ci debug_object_activate(ref, &active_debug_desc); 1008c2ecf20Sopenharmony_ci} 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_cistatic void debug_active_deactivate(struct i915_active *ref) 1038c2ecf20Sopenharmony_ci{ 1048c2ecf20Sopenharmony_ci lockdep_assert_held(&ref->tree_lock); 1058c2ecf20Sopenharmony_ci if (!atomic_read(&ref->count)) /* after the last dec */ 1068c2ecf20Sopenharmony_ci debug_object_deactivate(ref, &active_debug_desc); 1078c2ecf20Sopenharmony_ci} 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_cistatic void debug_active_fini(struct i915_active *ref) 1108c2ecf20Sopenharmony_ci{ 1118c2ecf20Sopenharmony_ci debug_object_free(ref, &active_debug_desc); 1128c2ecf20Sopenharmony_ci} 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_cistatic void debug_active_assert(struct i915_active *ref) 1158c2ecf20Sopenharmony_ci{ 1168c2ecf20Sopenharmony_ci debug_object_assert_init(ref, &active_debug_desc); 1178c2ecf20Sopenharmony_ci} 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci#else 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_cistatic inline void debug_active_init(struct i915_active *ref) { } 1228c2ecf20Sopenharmony_cistatic inline void debug_active_activate(struct i915_active *ref) { } 1238c2ecf20Sopenharmony_cistatic inline void debug_active_deactivate(struct i915_active *ref) { } 1248c2ecf20Sopenharmony_cistatic inline void debug_active_fini(struct i915_active *ref) { } 1258c2ecf20Sopenharmony_cistatic inline void debug_active_assert(struct i915_active *ref) { } 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci#endif 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_cistatic void 1308c2ecf20Sopenharmony_ci__active_retire(struct i915_active *ref) 1318c2ecf20Sopenharmony_ci{ 1328c2ecf20Sopenharmony_ci struct rb_root root = RB_ROOT; 1338c2ecf20Sopenharmony_ci struct active_node *it, *n; 1348c2ecf20Sopenharmony_ci unsigned long flags; 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_active_is_idle(ref)); 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci /* return the unused nodes to our slabcache -- flushing the allocator */ 1398c2ecf20Sopenharmony_ci if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) 1408c2ecf20Sopenharmony_ci return; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); 1438c2ecf20Sopenharmony_ci debug_active_deactivate(ref); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci /* Even if we have not used the cache, we may still have a barrier */ 1468c2ecf20Sopenharmony_ci if (!ref->cache) 1478c2ecf20Sopenharmony_ci ref->cache = fetch_node(ref->tree.rb_node); 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci /* Keep the MRU cached node for reuse */ 1508c2ecf20Sopenharmony_ci if (ref->cache) { 1518c2ecf20Sopenharmony_ci /* Discard all other nodes in the tree */ 1528c2ecf20Sopenharmony_ci rb_erase(&ref->cache->node, &ref->tree); 1538c2ecf20Sopenharmony_ci root = ref->tree; 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci /* Rebuild the tree with only the cached node */ 1568c2ecf20Sopenharmony_ci rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node); 1578c2ecf20Sopenharmony_ci rb_insert_color(&ref->cache->node, &ref->tree); 1588c2ecf20Sopenharmony_ci GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci /* Make the cached node available for reuse with any timeline */ 1618c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_64BIT)) 1628c2ecf20Sopenharmony_ci ref->cache->timeline = 0; /* needs cmpxchg(u64) */ 1638c2ecf20Sopenharmony_ci } 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&ref->tree_lock, flags); 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci /* After the final retire, the entire struct may be freed */ 1688c2ecf20Sopenharmony_ci if (ref->retire) 1698c2ecf20Sopenharmony_ci ref->retire(ref); 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci /* ... except if you wait on it, you must manage your own references! */ 1728c2ecf20Sopenharmony_ci wake_up_var(ref); 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci /* Finally free the discarded timeline tree */ 1758c2ecf20Sopenharmony_ci rbtree_postorder_for_each_entry_safe(it, n, &root, node) { 1768c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_active_fence_isset(&it->base)); 1778c2ecf20Sopenharmony_ci kmem_cache_free(global.slab_cache, it); 1788c2ecf20Sopenharmony_ci } 1798c2ecf20Sopenharmony_ci} 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_cistatic void 1828c2ecf20Sopenharmony_ciactive_work(struct work_struct *wrk) 1838c2ecf20Sopenharmony_ci{ 1848c2ecf20Sopenharmony_ci struct i915_active *ref = container_of(wrk, typeof(*ref), work); 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci GEM_BUG_ON(!atomic_read(&ref->count)); 1878c2ecf20Sopenharmony_ci if (atomic_add_unless(&ref->count, -1, 1)) 1888c2ecf20Sopenharmony_ci return; 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci __active_retire(ref); 1918c2ecf20Sopenharmony_ci} 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_cistatic void 1948c2ecf20Sopenharmony_ciactive_retire(struct i915_active *ref) 1958c2ecf20Sopenharmony_ci{ 1968c2ecf20Sopenharmony_ci GEM_BUG_ON(!atomic_read(&ref->count)); 1978c2ecf20Sopenharmony_ci if (atomic_add_unless(&ref->count, -1, 1)) 1988c2ecf20Sopenharmony_ci return; 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { 2018c2ecf20Sopenharmony_ci queue_work(system_unbound_wq, &ref->work); 2028c2ecf20Sopenharmony_ci return; 2038c2ecf20Sopenharmony_ci } 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci __active_retire(ref); 2068c2ecf20Sopenharmony_ci} 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_cistatic inline struct dma_fence ** 2098c2ecf20Sopenharmony_ci__active_fence_slot(struct i915_active_fence *active) 2108c2ecf20Sopenharmony_ci{ 2118c2ecf20Sopenharmony_ci return (struct dma_fence ** __force)&active->fence; 2128c2ecf20Sopenharmony_ci} 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_cistatic inline bool 2158c2ecf20Sopenharmony_ciactive_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 2168c2ecf20Sopenharmony_ci{ 2178c2ecf20Sopenharmony_ci struct i915_active_fence *active = 2188c2ecf20Sopenharmony_ci container_of(cb, typeof(*active), cb); 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci return cmpxchg(__active_fence_slot(active), fence, NULL) == fence; 2218c2ecf20Sopenharmony_ci} 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_cistatic void 2248c2ecf20Sopenharmony_cinode_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 2258c2ecf20Sopenharmony_ci{ 2268c2ecf20Sopenharmony_ci if (active_fence_cb(fence, cb)) 2278c2ecf20Sopenharmony_ci active_retire(container_of(cb, struct active_node, base.cb)->ref); 2288c2ecf20Sopenharmony_ci} 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_cistatic void 2318c2ecf20Sopenharmony_ciexcl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) 2328c2ecf20Sopenharmony_ci{ 2338c2ecf20Sopenharmony_ci if (active_fence_cb(fence, cb)) 2348c2ecf20Sopenharmony_ci active_retire(container_of(cb, struct i915_active, excl.cb)); 2358c2ecf20Sopenharmony_ci} 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_cistatic struct active_node *__active_lookup(struct i915_active *ref, u64 idx) 2388c2ecf20Sopenharmony_ci{ 2398c2ecf20Sopenharmony_ci struct active_node *it; 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */ 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci /* 2448c2ecf20Sopenharmony_ci * We track the most recently used timeline to skip a rbtree search 2458c2ecf20Sopenharmony_ci * for the common case, under typical loads we never need the rbtree 2468c2ecf20Sopenharmony_ci * at all. We can reuse the last slot if it is empty, that is 2478c2ecf20Sopenharmony_ci * after the previous activity has been retired, or if it matches the 2488c2ecf20Sopenharmony_ci * current timeline. 2498c2ecf20Sopenharmony_ci */ 2508c2ecf20Sopenharmony_ci it = READ_ONCE(ref->cache); 2518c2ecf20Sopenharmony_ci if (it) { 2528c2ecf20Sopenharmony_ci u64 cached = READ_ONCE(it->timeline); 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci /* Once claimed, this slot will only belong to this idx */ 2558c2ecf20Sopenharmony_ci if (cached == idx) 2568c2ecf20Sopenharmony_ci return it; 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci#ifdef CONFIG_64BIT /* for cmpxchg(u64) */ 2598c2ecf20Sopenharmony_ci /* 2608c2ecf20Sopenharmony_ci * An unclaimed cache [.timeline=0] can only be claimed once. 2618c2ecf20Sopenharmony_ci * 2628c2ecf20Sopenharmony_ci * If the value is already non-zero, some other thread has 2638c2ecf20Sopenharmony_ci * claimed the cache and we know that is does not match our 2648c2ecf20Sopenharmony_ci * idx. If, and only if, the timeline is currently zero is it 2658c2ecf20Sopenharmony_ci * worth competing to claim it atomically for ourselves (for 2668c2ecf20Sopenharmony_ci * only the winner of that race will cmpxchg return the old 2678c2ecf20Sopenharmony_ci * value of 0). 2688c2ecf20Sopenharmony_ci */ 2698c2ecf20Sopenharmony_ci if (!cached && !cmpxchg(&it->timeline, 0, idx)) 2708c2ecf20Sopenharmony_ci return it; 2718c2ecf20Sopenharmony_ci#endif 2728c2ecf20Sopenharmony_ci } 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci BUILD_BUG_ON(offsetof(typeof(*it), node)); 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci /* While active, the tree can only be built; not destroyed */ 2778c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_active_is_idle(ref)); 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci it = fetch_node(ref->tree.rb_node); 2808c2ecf20Sopenharmony_ci while (it) { 2818c2ecf20Sopenharmony_ci if (it->timeline < idx) { 2828c2ecf20Sopenharmony_ci it = fetch_node(it->node.rb_right); 2838c2ecf20Sopenharmony_ci } else if (it->timeline > idx) { 2848c2ecf20Sopenharmony_ci it = fetch_node(it->node.rb_left); 2858c2ecf20Sopenharmony_ci } else { 2868c2ecf20Sopenharmony_ci WRITE_ONCE(ref->cache, it); 2878c2ecf20Sopenharmony_ci break; 2888c2ecf20Sopenharmony_ci } 2898c2ecf20Sopenharmony_ci } 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci /* NB: If the tree rotated beneath us, we may miss our target. */ 2928c2ecf20Sopenharmony_ci return it; 2938c2ecf20Sopenharmony_ci} 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_cistatic struct i915_active_fence * 2968c2ecf20Sopenharmony_ciactive_instance(struct i915_active *ref, u64 idx) 2978c2ecf20Sopenharmony_ci{ 2988c2ecf20Sopenharmony_ci struct active_node *node, *prealloc; 2998c2ecf20Sopenharmony_ci struct rb_node **p, *parent; 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci node = __active_lookup(ref, idx); 3028c2ecf20Sopenharmony_ci if (likely(node)) 3038c2ecf20Sopenharmony_ci return &node->base; 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci /* Preallocate a replacement, just in case */ 3068c2ecf20Sopenharmony_ci prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 3078c2ecf20Sopenharmony_ci if (!prealloc) 3088c2ecf20Sopenharmony_ci return NULL; 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci spin_lock_irq(&ref->tree_lock); 3118c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_active_is_idle(ref)); 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci parent = NULL; 3148c2ecf20Sopenharmony_ci p = &ref->tree.rb_node; 3158c2ecf20Sopenharmony_ci while (*p) { 3168c2ecf20Sopenharmony_ci parent = *p; 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci node = rb_entry(parent, struct active_node, node); 3198c2ecf20Sopenharmony_ci if (node->timeline == idx) { 3208c2ecf20Sopenharmony_ci kmem_cache_free(global.slab_cache, prealloc); 3218c2ecf20Sopenharmony_ci goto out; 3228c2ecf20Sopenharmony_ci } 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci if (node->timeline < idx) 3258c2ecf20Sopenharmony_ci p = &parent->rb_right; 3268c2ecf20Sopenharmony_ci else 3278c2ecf20Sopenharmony_ci p = &parent->rb_left; 3288c2ecf20Sopenharmony_ci } 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci node = prealloc; 3318c2ecf20Sopenharmony_ci __i915_active_fence_init(&node->base, NULL, node_retire); 3328c2ecf20Sopenharmony_ci node->ref = ref; 3338c2ecf20Sopenharmony_ci node->timeline = idx; 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci rb_link_node(&node->node, parent, p); 3368c2ecf20Sopenharmony_ci rb_insert_color(&node->node, &ref->tree); 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ciout: 3398c2ecf20Sopenharmony_ci WRITE_ONCE(ref->cache, node); 3408c2ecf20Sopenharmony_ci spin_unlock_irq(&ref->tree_lock); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci return &node->base; 3438c2ecf20Sopenharmony_ci} 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_civoid __i915_active_init(struct i915_active *ref, 3468c2ecf20Sopenharmony_ci int (*active)(struct i915_active *ref), 3478c2ecf20Sopenharmony_ci void (*retire)(struct i915_active *ref), 3488c2ecf20Sopenharmony_ci struct lock_class_key *mkey, 3498c2ecf20Sopenharmony_ci struct lock_class_key *wkey) 3508c2ecf20Sopenharmony_ci{ 3518c2ecf20Sopenharmony_ci unsigned long bits; 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci debug_active_init(ref); 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci ref->flags = 0; 3568c2ecf20Sopenharmony_ci ref->active = active; 3578c2ecf20Sopenharmony_ci ref->retire = ptr_unpack_bits(retire, &bits, 2); 3588c2ecf20Sopenharmony_ci if (bits & I915_ACTIVE_MAY_SLEEP) 3598c2ecf20Sopenharmony_ci ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci spin_lock_init(&ref->tree_lock); 3628c2ecf20Sopenharmony_ci ref->tree = RB_ROOT; 3638c2ecf20Sopenharmony_ci ref->cache = NULL; 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci init_llist_head(&ref->preallocated_barriers); 3668c2ecf20Sopenharmony_ci atomic_set(&ref->count, 0); 3678c2ecf20Sopenharmony_ci __mutex_init(&ref->mutex, "i915_active", mkey); 3688c2ecf20Sopenharmony_ci __i915_active_fence_init(&ref->excl, NULL, excl_retire); 3698c2ecf20Sopenharmony_ci INIT_WORK(&ref->work, active_work); 3708c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_LOCKDEP) 3718c2ecf20Sopenharmony_ci lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0); 3728c2ecf20Sopenharmony_ci#endif 3738c2ecf20Sopenharmony_ci} 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_cistatic bool ____active_del_barrier(struct i915_active *ref, 3768c2ecf20Sopenharmony_ci struct active_node *node, 3778c2ecf20Sopenharmony_ci struct intel_engine_cs *engine) 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci{ 3808c2ecf20Sopenharmony_ci struct llist_node *head = NULL, *tail = NULL; 3818c2ecf20Sopenharmony_ci struct llist_node *pos, *next; 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci /* 3868c2ecf20Sopenharmony_ci * Rebuild the llist excluding our node. We may perform this 3878c2ecf20Sopenharmony_ci * outside of the kernel_context timeline mutex and so someone 3888c2ecf20Sopenharmony_ci * else may be manipulating the engine->barrier_tasks, in 3898c2ecf20Sopenharmony_ci * which case either we or they will be upset :) 3908c2ecf20Sopenharmony_ci * 3918c2ecf20Sopenharmony_ci * A second __active_del_barrier() will report failure to claim 3928c2ecf20Sopenharmony_ci * the active_node and the caller will just shrug and know not to 3938c2ecf20Sopenharmony_ci * claim ownership of its node. 3948c2ecf20Sopenharmony_ci * 3958c2ecf20Sopenharmony_ci * A concurrent i915_request_add_active_barriers() will miss adding 3968c2ecf20Sopenharmony_ci * any of the tasks, but we will try again on the next -- and since 3978c2ecf20Sopenharmony_ci * we are actively using the barrier, we know that there will be 3988c2ecf20Sopenharmony_ci * at least another opportunity when we idle. 3998c2ecf20Sopenharmony_ci */ 4008c2ecf20Sopenharmony_ci llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { 4018c2ecf20Sopenharmony_ci if (node == barrier_from_ll(pos)) { 4028c2ecf20Sopenharmony_ci node = NULL; 4038c2ecf20Sopenharmony_ci continue; 4048c2ecf20Sopenharmony_ci } 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci pos->next = head; 4078c2ecf20Sopenharmony_ci head = pos; 4088c2ecf20Sopenharmony_ci if (!tail) 4098c2ecf20Sopenharmony_ci tail = pos; 4108c2ecf20Sopenharmony_ci } 4118c2ecf20Sopenharmony_ci if (head) 4128c2ecf20Sopenharmony_ci llist_add_batch(head, tail, &engine->barrier_tasks); 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci return !node; 4158c2ecf20Sopenharmony_ci} 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_cistatic bool 4188c2ecf20Sopenharmony_ci__active_del_barrier(struct i915_active *ref, struct active_node *node) 4198c2ecf20Sopenharmony_ci{ 4208c2ecf20Sopenharmony_ci return ____active_del_barrier(ref, node, barrier_to_engine(node)); 4218c2ecf20Sopenharmony_ci} 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_cistatic bool 4248c2ecf20Sopenharmony_cireplace_barrier(struct i915_active *ref, struct i915_active_fence *active) 4258c2ecf20Sopenharmony_ci{ 4268c2ecf20Sopenharmony_ci if (!is_barrier(active)) /* proto-node used by our idle barrier? */ 4278c2ecf20Sopenharmony_ci return false; 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci /* 4308c2ecf20Sopenharmony_ci * This request is on the kernel_context timeline, and so 4318c2ecf20Sopenharmony_ci * we can use it to substitute for the pending idle-barrer 4328c2ecf20Sopenharmony_ci * request that we want to emit on the kernel_context. 4338c2ecf20Sopenharmony_ci */ 4348c2ecf20Sopenharmony_ci return __active_del_barrier(ref, node_from_active(active)); 4358c2ecf20Sopenharmony_ci} 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ciint i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) 4388c2ecf20Sopenharmony_ci{ 4398c2ecf20Sopenharmony_ci struct i915_active_fence *active; 4408c2ecf20Sopenharmony_ci int err; 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci /* Prevent reaping in case we malloc/wait while building the tree */ 4438c2ecf20Sopenharmony_ci err = i915_active_acquire(ref); 4448c2ecf20Sopenharmony_ci if (err) 4458c2ecf20Sopenharmony_ci return err; 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci do { 4488c2ecf20Sopenharmony_ci active = active_instance(ref, idx); 4498c2ecf20Sopenharmony_ci if (!active) { 4508c2ecf20Sopenharmony_ci err = -ENOMEM; 4518c2ecf20Sopenharmony_ci goto out; 4528c2ecf20Sopenharmony_ci } 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci if (replace_barrier(ref, active)) { 4558c2ecf20Sopenharmony_ci RCU_INIT_POINTER(active->fence, NULL); 4568c2ecf20Sopenharmony_ci atomic_dec(&ref->count); 4578c2ecf20Sopenharmony_ci } 4588c2ecf20Sopenharmony_ci } while (unlikely(is_barrier(active))); 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci fence = __i915_active_fence_set(active, fence); 4618c2ecf20Sopenharmony_ci if (!fence) 4628c2ecf20Sopenharmony_ci __i915_active_acquire(ref); 4638c2ecf20Sopenharmony_ci else 4648c2ecf20Sopenharmony_ci dma_fence_put(fence); 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ciout: 4678c2ecf20Sopenharmony_ci i915_active_release(ref); 4688c2ecf20Sopenharmony_ci return err; 4698c2ecf20Sopenharmony_ci} 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_cistatic struct dma_fence * 4728c2ecf20Sopenharmony_ci__i915_active_set_fence(struct i915_active *ref, 4738c2ecf20Sopenharmony_ci struct i915_active_fence *active, 4748c2ecf20Sopenharmony_ci struct dma_fence *fence) 4758c2ecf20Sopenharmony_ci{ 4768c2ecf20Sopenharmony_ci struct dma_fence *prev; 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_ci if (replace_barrier(ref, active)) { 4798c2ecf20Sopenharmony_ci RCU_INIT_POINTER(active->fence, fence); 4808c2ecf20Sopenharmony_ci return NULL; 4818c2ecf20Sopenharmony_ci } 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci prev = __i915_active_fence_set(active, fence); 4848c2ecf20Sopenharmony_ci if (!prev) 4858c2ecf20Sopenharmony_ci __i915_active_acquire(ref); 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci return prev; 4888c2ecf20Sopenharmony_ci} 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_cistatic struct i915_active_fence * 4918c2ecf20Sopenharmony_ci__active_fence(struct i915_active *ref, u64 idx) 4928c2ecf20Sopenharmony_ci{ 4938c2ecf20Sopenharmony_ci struct active_node *it; 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci it = __active_lookup(ref, idx); 4968c2ecf20Sopenharmony_ci if (unlikely(!it)) { /* Contention with parallel tree builders! */ 4978c2ecf20Sopenharmony_ci spin_lock_irq(&ref->tree_lock); 4988c2ecf20Sopenharmony_ci it = __active_lookup(ref, idx); 4998c2ecf20Sopenharmony_ci spin_unlock_irq(&ref->tree_lock); 5008c2ecf20Sopenharmony_ci } 5018c2ecf20Sopenharmony_ci GEM_BUG_ON(!it); /* slot must be preallocated */ 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci return &it->base; 5048c2ecf20Sopenharmony_ci} 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_cistruct dma_fence * 5078c2ecf20Sopenharmony_ci__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence) 5088c2ecf20Sopenharmony_ci{ 5098c2ecf20Sopenharmony_ci /* Only valid while active, see i915_active_acquire_for_context() */ 5108c2ecf20Sopenharmony_ci return __i915_active_set_fence(ref, __active_fence(ref, idx), fence); 5118c2ecf20Sopenharmony_ci} 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_cistruct dma_fence * 5148c2ecf20Sopenharmony_cii915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) 5158c2ecf20Sopenharmony_ci{ 5168c2ecf20Sopenharmony_ci /* We expect the caller to manage the exclusive timeline ordering */ 5178c2ecf20Sopenharmony_ci return __i915_active_set_fence(ref, &ref->excl, f); 5188c2ecf20Sopenharmony_ci} 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_cibool i915_active_acquire_if_busy(struct i915_active *ref) 5218c2ecf20Sopenharmony_ci{ 5228c2ecf20Sopenharmony_ci debug_active_assert(ref); 5238c2ecf20Sopenharmony_ci return atomic_add_unless(&ref->count, 1, 0); 5248c2ecf20Sopenharmony_ci} 5258c2ecf20Sopenharmony_ci 5268c2ecf20Sopenharmony_cistatic void __i915_active_activate(struct i915_active *ref) 5278c2ecf20Sopenharmony_ci{ 5288c2ecf20Sopenharmony_ci spin_lock_irq(&ref->tree_lock); /* __active_retire() */ 5298c2ecf20Sopenharmony_ci if (!atomic_fetch_inc(&ref->count)) 5308c2ecf20Sopenharmony_ci debug_active_activate(ref); 5318c2ecf20Sopenharmony_ci spin_unlock_irq(&ref->tree_lock); 5328c2ecf20Sopenharmony_ci} 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ciint i915_active_acquire(struct i915_active *ref) 5358c2ecf20Sopenharmony_ci{ 5368c2ecf20Sopenharmony_ci int err; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci if (i915_active_acquire_if_busy(ref)) 5398c2ecf20Sopenharmony_ci return 0; 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci if (!ref->active) { 5428c2ecf20Sopenharmony_ci __i915_active_activate(ref); 5438c2ecf20Sopenharmony_ci return 0; 5448c2ecf20Sopenharmony_ci } 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci err = mutex_lock_interruptible(&ref->mutex); 5478c2ecf20Sopenharmony_ci if (err) 5488c2ecf20Sopenharmony_ci return err; 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci if (likely(!i915_active_acquire_if_busy(ref))) { 5518c2ecf20Sopenharmony_ci err = ref->active(ref); 5528c2ecf20Sopenharmony_ci if (!err) 5538c2ecf20Sopenharmony_ci __i915_active_activate(ref); 5548c2ecf20Sopenharmony_ci } 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ci mutex_unlock(&ref->mutex); 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_ci return err; 5598c2ecf20Sopenharmony_ci} 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ciint i915_active_acquire_for_context(struct i915_active *ref, u64 idx) 5628c2ecf20Sopenharmony_ci{ 5638c2ecf20Sopenharmony_ci struct i915_active_fence *active; 5648c2ecf20Sopenharmony_ci int err; 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci err = i915_active_acquire(ref); 5678c2ecf20Sopenharmony_ci if (err) 5688c2ecf20Sopenharmony_ci return err; 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci active = active_instance(ref, idx); 5718c2ecf20Sopenharmony_ci if (!active) { 5728c2ecf20Sopenharmony_ci i915_active_release(ref); 5738c2ecf20Sopenharmony_ci return -ENOMEM; 5748c2ecf20Sopenharmony_ci } 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci return 0; /* return with active ref */ 5778c2ecf20Sopenharmony_ci} 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_civoid i915_active_release(struct i915_active *ref) 5808c2ecf20Sopenharmony_ci{ 5818c2ecf20Sopenharmony_ci debug_active_assert(ref); 5828c2ecf20Sopenharmony_ci active_retire(ref); 5838c2ecf20Sopenharmony_ci} 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_cistatic void enable_signaling(struct i915_active_fence *active) 5868c2ecf20Sopenharmony_ci{ 5878c2ecf20Sopenharmony_ci struct dma_fence *fence; 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci if (unlikely(is_barrier(active))) 5908c2ecf20Sopenharmony_ci return; 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_ci fence = i915_active_fence_get(active); 5938c2ecf20Sopenharmony_ci if (!fence) 5948c2ecf20Sopenharmony_ci return; 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci dma_fence_enable_sw_signaling(fence); 5978c2ecf20Sopenharmony_ci dma_fence_put(fence); 5988c2ecf20Sopenharmony_ci} 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_cistatic int flush_barrier(struct active_node *it) 6018c2ecf20Sopenharmony_ci{ 6028c2ecf20Sopenharmony_ci struct intel_engine_cs *engine; 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci if (likely(!is_barrier(&it->base))) 6058c2ecf20Sopenharmony_ci return 0; 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci engine = __barrier_to_engine(it); 6088c2ecf20Sopenharmony_ci smp_rmb(); /* serialise with add_active_barriers */ 6098c2ecf20Sopenharmony_ci if (!is_barrier(&it->base)) 6108c2ecf20Sopenharmony_ci return 0; 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci return intel_engine_flush_barriers(engine); 6138c2ecf20Sopenharmony_ci} 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_cistatic int flush_lazy_signals(struct i915_active *ref) 6168c2ecf20Sopenharmony_ci{ 6178c2ecf20Sopenharmony_ci struct active_node *it, *n; 6188c2ecf20Sopenharmony_ci int err = 0; 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci enable_signaling(&ref->excl); 6218c2ecf20Sopenharmony_ci rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 6228c2ecf20Sopenharmony_ci err = flush_barrier(it); /* unconnected idle barrier? */ 6238c2ecf20Sopenharmony_ci if (err) 6248c2ecf20Sopenharmony_ci break; 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci enable_signaling(&it->base); 6278c2ecf20Sopenharmony_ci } 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci return err; 6308c2ecf20Sopenharmony_ci} 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ciint __i915_active_wait(struct i915_active *ref, int state) 6338c2ecf20Sopenharmony_ci{ 6348c2ecf20Sopenharmony_ci might_sleep(); 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci /* Any fence added after the wait begins will not be auto-signaled */ 6378c2ecf20Sopenharmony_ci if (i915_active_acquire_if_busy(ref)) { 6388c2ecf20Sopenharmony_ci int err; 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci err = flush_lazy_signals(ref); 6418c2ecf20Sopenharmony_ci i915_active_release(ref); 6428c2ecf20Sopenharmony_ci if (err) 6438c2ecf20Sopenharmony_ci return err; 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_ci if (___wait_var_event(ref, i915_active_is_idle(ref), 6468c2ecf20Sopenharmony_ci state, 0, 0, schedule())) 6478c2ecf20Sopenharmony_ci return -EINTR; 6488c2ecf20Sopenharmony_ci } 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci /* 6518c2ecf20Sopenharmony_ci * After the wait is complete, the caller may free the active. 6528c2ecf20Sopenharmony_ci * We have to flush any concurrent retirement before returning. 6538c2ecf20Sopenharmony_ci */ 6548c2ecf20Sopenharmony_ci flush_work(&ref->work); 6558c2ecf20Sopenharmony_ci return 0; 6568c2ecf20Sopenharmony_ci} 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_cistatic int __await_active(struct i915_active_fence *active, 6598c2ecf20Sopenharmony_ci int (*fn)(void *arg, struct dma_fence *fence), 6608c2ecf20Sopenharmony_ci void *arg) 6618c2ecf20Sopenharmony_ci{ 6628c2ecf20Sopenharmony_ci struct dma_fence *fence; 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci if (is_barrier(active)) /* XXX flush the barrier? */ 6658c2ecf20Sopenharmony_ci return 0; 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci fence = i915_active_fence_get(active); 6688c2ecf20Sopenharmony_ci if (fence) { 6698c2ecf20Sopenharmony_ci int err; 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci err = fn(arg, fence); 6728c2ecf20Sopenharmony_ci dma_fence_put(fence); 6738c2ecf20Sopenharmony_ci if (err < 0) 6748c2ecf20Sopenharmony_ci return err; 6758c2ecf20Sopenharmony_ci } 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci return 0; 6788c2ecf20Sopenharmony_ci} 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_cistruct wait_barrier { 6818c2ecf20Sopenharmony_ci struct wait_queue_entry base; 6828c2ecf20Sopenharmony_ci struct i915_active *ref; 6838c2ecf20Sopenharmony_ci}; 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_cistatic int 6868c2ecf20Sopenharmony_cibarrier_wake(wait_queue_entry_t *wq, unsigned int mode, int flags, void *key) 6878c2ecf20Sopenharmony_ci{ 6888c2ecf20Sopenharmony_ci struct wait_barrier *wb = container_of(wq, typeof(*wb), base); 6898c2ecf20Sopenharmony_ci 6908c2ecf20Sopenharmony_ci if (i915_active_is_idle(wb->ref)) { 6918c2ecf20Sopenharmony_ci list_del(&wq->entry); 6928c2ecf20Sopenharmony_ci i915_sw_fence_complete(wq->private); 6938c2ecf20Sopenharmony_ci kfree(wq); 6948c2ecf20Sopenharmony_ci } 6958c2ecf20Sopenharmony_ci 6968c2ecf20Sopenharmony_ci return 0; 6978c2ecf20Sopenharmony_ci} 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_cistatic int __await_barrier(struct i915_active *ref, struct i915_sw_fence *fence) 7008c2ecf20Sopenharmony_ci{ 7018c2ecf20Sopenharmony_ci struct wait_barrier *wb; 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci wb = kmalloc(sizeof(*wb), GFP_KERNEL); 7048c2ecf20Sopenharmony_ci if (unlikely(!wb)) 7058c2ecf20Sopenharmony_ci return -ENOMEM; 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_active_is_idle(ref)); 7088c2ecf20Sopenharmony_ci if (!i915_sw_fence_await(fence)) { 7098c2ecf20Sopenharmony_ci kfree(wb); 7108c2ecf20Sopenharmony_ci return -EINVAL; 7118c2ecf20Sopenharmony_ci } 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci wb->base.flags = 0; 7148c2ecf20Sopenharmony_ci wb->base.func = barrier_wake; 7158c2ecf20Sopenharmony_ci wb->base.private = fence; 7168c2ecf20Sopenharmony_ci wb->ref = ref; 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ci add_wait_queue(__var_waitqueue(ref), &wb->base); 7198c2ecf20Sopenharmony_ci return 0; 7208c2ecf20Sopenharmony_ci} 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_cistatic int await_active(struct i915_active *ref, 7238c2ecf20Sopenharmony_ci unsigned int flags, 7248c2ecf20Sopenharmony_ci int (*fn)(void *arg, struct dma_fence *fence), 7258c2ecf20Sopenharmony_ci void *arg, struct i915_sw_fence *barrier) 7268c2ecf20Sopenharmony_ci{ 7278c2ecf20Sopenharmony_ci int err = 0; 7288c2ecf20Sopenharmony_ci 7298c2ecf20Sopenharmony_ci if (!i915_active_acquire_if_busy(ref)) 7308c2ecf20Sopenharmony_ci return 0; 7318c2ecf20Sopenharmony_ci 7328c2ecf20Sopenharmony_ci if (flags & I915_ACTIVE_AWAIT_EXCL && 7338c2ecf20Sopenharmony_ci rcu_access_pointer(ref->excl.fence)) { 7348c2ecf20Sopenharmony_ci err = __await_active(&ref->excl, fn, arg); 7358c2ecf20Sopenharmony_ci if (err) 7368c2ecf20Sopenharmony_ci goto out; 7378c2ecf20Sopenharmony_ci } 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci if (flags & I915_ACTIVE_AWAIT_ACTIVE) { 7408c2ecf20Sopenharmony_ci struct active_node *it, *n; 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_ci rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 7438c2ecf20Sopenharmony_ci err = __await_active(&it->base, fn, arg); 7448c2ecf20Sopenharmony_ci if (err) 7458c2ecf20Sopenharmony_ci goto out; 7468c2ecf20Sopenharmony_ci } 7478c2ecf20Sopenharmony_ci } 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci if (flags & I915_ACTIVE_AWAIT_BARRIER) { 7508c2ecf20Sopenharmony_ci err = flush_lazy_signals(ref); 7518c2ecf20Sopenharmony_ci if (err) 7528c2ecf20Sopenharmony_ci goto out; 7538c2ecf20Sopenharmony_ci 7548c2ecf20Sopenharmony_ci err = __await_barrier(ref, barrier); 7558c2ecf20Sopenharmony_ci if (err) 7568c2ecf20Sopenharmony_ci goto out; 7578c2ecf20Sopenharmony_ci } 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ciout: 7608c2ecf20Sopenharmony_ci i915_active_release(ref); 7618c2ecf20Sopenharmony_ci return err; 7628c2ecf20Sopenharmony_ci} 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_cistatic int rq_await_fence(void *arg, struct dma_fence *fence) 7658c2ecf20Sopenharmony_ci{ 7668c2ecf20Sopenharmony_ci return i915_request_await_dma_fence(arg, fence); 7678c2ecf20Sopenharmony_ci} 7688c2ecf20Sopenharmony_ci 7698c2ecf20Sopenharmony_ciint i915_request_await_active(struct i915_request *rq, 7708c2ecf20Sopenharmony_ci struct i915_active *ref, 7718c2ecf20Sopenharmony_ci unsigned int flags) 7728c2ecf20Sopenharmony_ci{ 7738c2ecf20Sopenharmony_ci return await_active(ref, flags, rq_await_fence, rq, &rq->submit); 7748c2ecf20Sopenharmony_ci} 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_cistatic int sw_await_fence(void *arg, struct dma_fence *fence) 7778c2ecf20Sopenharmony_ci{ 7788c2ecf20Sopenharmony_ci return i915_sw_fence_await_dma_fence(arg, fence, 0, 7798c2ecf20Sopenharmony_ci GFP_NOWAIT | __GFP_NOWARN); 7808c2ecf20Sopenharmony_ci} 7818c2ecf20Sopenharmony_ci 7828c2ecf20Sopenharmony_ciint i915_sw_fence_await_active(struct i915_sw_fence *fence, 7838c2ecf20Sopenharmony_ci struct i915_active *ref, 7848c2ecf20Sopenharmony_ci unsigned int flags) 7858c2ecf20Sopenharmony_ci{ 7868c2ecf20Sopenharmony_ci return await_active(ref, flags, sw_await_fence, fence, fence); 7878c2ecf20Sopenharmony_ci} 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_civoid i915_active_fini(struct i915_active *ref) 7908c2ecf20Sopenharmony_ci{ 7918c2ecf20Sopenharmony_ci debug_active_fini(ref); 7928c2ecf20Sopenharmony_ci GEM_BUG_ON(atomic_read(&ref->count)); 7938c2ecf20Sopenharmony_ci GEM_BUG_ON(work_pending(&ref->work)); 7948c2ecf20Sopenharmony_ci mutex_destroy(&ref->mutex); 7958c2ecf20Sopenharmony_ci 7968c2ecf20Sopenharmony_ci if (ref->cache) 7978c2ecf20Sopenharmony_ci kmem_cache_free(global.slab_cache, ref->cache); 7988c2ecf20Sopenharmony_ci} 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_cistatic inline bool is_idle_barrier(struct active_node *node, u64 idx) 8018c2ecf20Sopenharmony_ci{ 8028c2ecf20Sopenharmony_ci return node->timeline == idx && !i915_active_fence_isset(&node->base); 8038c2ecf20Sopenharmony_ci} 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_cistatic struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) 8068c2ecf20Sopenharmony_ci{ 8078c2ecf20Sopenharmony_ci struct rb_node *prev, *p; 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_ci if (RB_EMPTY_ROOT(&ref->tree)) 8108c2ecf20Sopenharmony_ci return NULL; 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_active_is_idle(ref)); 8138c2ecf20Sopenharmony_ci 8148c2ecf20Sopenharmony_ci /* 8158c2ecf20Sopenharmony_ci * Try to reuse any existing barrier nodes already allocated for this 8168c2ecf20Sopenharmony_ci * i915_active, due to overlapping active phases there is likely a 8178c2ecf20Sopenharmony_ci * node kept alive (as we reuse before parking). We prefer to reuse 8188c2ecf20Sopenharmony_ci * completely idle barriers (less hassle in manipulating the llists), 8198c2ecf20Sopenharmony_ci * but otherwise any will do. 8208c2ecf20Sopenharmony_ci */ 8218c2ecf20Sopenharmony_ci if (ref->cache && is_idle_barrier(ref->cache, idx)) { 8228c2ecf20Sopenharmony_ci p = &ref->cache->node; 8238c2ecf20Sopenharmony_ci goto match; 8248c2ecf20Sopenharmony_ci } 8258c2ecf20Sopenharmony_ci 8268c2ecf20Sopenharmony_ci prev = NULL; 8278c2ecf20Sopenharmony_ci p = ref->tree.rb_node; 8288c2ecf20Sopenharmony_ci while (p) { 8298c2ecf20Sopenharmony_ci struct active_node *node = 8308c2ecf20Sopenharmony_ci rb_entry(p, struct active_node, node); 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_ci if (is_idle_barrier(node, idx)) 8338c2ecf20Sopenharmony_ci goto match; 8348c2ecf20Sopenharmony_ci 8358c2ecf20Sopenharmony_ci prev = p; 8368c2ecf20Sopenharmony_ci if (node->timeline < idx) 8378c2ecf20Sopenharmony_ci p = READ_ONCE(p->rb_right); 8388c2ecf20Sopenharmony_ci else 8398c2ecf20Sopenharmony_ci p = READ_ONCE(p->rb_left); 8408c2ecf20Sopenharmony_ci } 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ci /* 8438c2ecf20Sopenharmony_ci * No quick match, but we did find the leftmost rb_node for the 8448c2ecf20Sopenharmony_ci * kernel_context. Walk the rb_tree in-order to see if there were 8458c2ecf20Sopenharmony_ci * any idle-barriers on this timeline that we missed, or just use 8468c2ecf20Sopenharmony_ci * the first pending barrier. 8478c2ecf20Sopenharmony_ci */ 8488c2ecf20Sopenharmony_ci for (p = prev; p; p = rb_next(p)) { 8498c2ecf20Sopenharmony_ci struct active_node *node = 8508c2ecf20Sopenharmony_ci rb_entry(p, struct active_node, node); 8518c2ecf20Sopenharmony_ci struct intel_engine_cs *engine; 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci if (node->timeline > idx) 8548c2ecf20Sopenharmony_ci break; 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci if (node->timeline < idx) 8578c2ecf20Sopenharmony_ci continue; 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci if (is_idle_barrier(node, idx)) 8608c2ecf20Sopenharmony_ci goto match; 8618c2ecf20Sopenharmony_ci 8628c2ecf20Sopenharmony_ci /* 8638c2ecf20Sopenharmony_ci * The list of pending barriers is protected by the 8648c2ecf20Sopenharmony_ci * kernel_context timeline, which notably we do not hold 8658c2ecf20Sopenharmony_ci * here. i915_request_add_active_barriers() may consume 8668c2ecf20Sopenharmony_ci * the barrier before we claim it, so we have to check 8678c2ecf20Sopenharmony_ci * for success. 8688c2ecf20Sopenharmony_ci */ 8698c2ecf20Sopenharmony_ci engine = __barrier_to_engine(node); 8708c2ecf20Sopenharmony_ci smp_rmb(); /* serialise with add_active_barriers */ 8718c2ecf20Sopenharmony_ci if (is_barrier(&node->base) && 8728c2ecf20Sopenharmony_ci ____active_del_barrier(ref, node, engine)) 8738c2ecf20Sopenharmony_ci goto match; 8748c2ecf20Sopenharmony_ci } 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci return NULL; 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_cimatch: 8798c2ecf20Sopenharmony_ci spin_lock_irq(&ref->tree_lock); 8808c2ecf20Sopenharmony_ci rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ 8818c2ecf20Sopenharmony_ci if (p == &ref->cache->node) 8828c2ecf20Sopenharmony_ci WRITE_ONCE(ref->cache, NULL); 8838c2ecf20Sopenharmony_ci spin_unlock_irq(&ref->tree_lock); 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci return rb_entry(p, struct active_node, node); 8868c2ecf20Sopenharmony_ci} 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ciint i915_active_acquire_preallocate_barrier(struct i915_active *ref, 8898c2ecf20Sopenharmony_ci struct intel_engine_cs *engine) 8908c2ecf20Sopenharmony_ci{ 8918c2ecf20Sopenharmony_ci intel_engine_mask_t tmp, mask = engine->mask; 8928c2ecf20Sopenharmony_ci struct llist_node *first = NULL, *last = NULL; 8938c2ecf20Sopenharmony_ci struct intel_gt *gt = engine->gt; 8948c2ecf20Sopenharmony_ci 8958c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_active_is_idle(ref)); 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_ci /* Wait until the previous preallocation is completed */ 8988c2ecf20Sopenharmony_ci while (!llist_empty(&ref->preallocated_barriers)) 8998c2ecf20Sopenharmony_ci cond_resched(); 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci /* 9028c2ecf20Sopenharmony_ci * Preallocate a node for each physical engine supporting the target 9038c2ecf20Sopenharmony_ci * engine (remember virtual engines have more than one sibling). 9048c2ecf20Sopenharmony_ci * We can then use the preallocated nodes in 9058c2ecf20Sopenharmony_ci * i915_active_acquire_barrier() 9068c2ecf20Sopenharmony_ci */ 9078c2ecf20Sopenharmony_ci GEM_BUG_ON(!mask); 9088c2ecf20Sopenharmony_ci for_each_engine_masked(engine, gt, mask, tmp) { 9098c2ecf20Sopenharmony_ci u64 idx = engine->kernel_context->timeline->fence_context; 9108c2ecf20Sopenharmony_ci struct llist_node *prev = first; 9118c2ecf20Sopenharmony_ci struct active_node *node; 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci rcu_read_lock(); 9148c2ecf20Sopenharmony_ci node = reuse_idle_barrier(ref, idx); 9158c2ecf20Sopenharmony_ci rcu_read_unlock(); 9168c2ecf20Sopenharmony_ci if (!node) { 9178c2ecf20Sopenharmony_ci node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 9188c2ecf20Sopenharmony_ci if (!node) 9198c2ecf20Sopenharmony_ci goto unwind; 9208c2ecf20Sopenharmony_ci 9218c2ecf20Sopenharmony_ci RCU_INIT_POINTER(node->base.fence, NULL); 9228c2ecf20Sopenharmony_ci node->base.cb.func = node_retire; 9238c2ecf20Sopenharmony_ci node->timeline = idx; 9248c2ecf20Sopenharmony_ci node->ref = ref; 9258c2ecf20Sopenharmony_ci } 9268c2ecf20Sopenharmony_ci 9278c2ecf20Sopenharmony_ci if (!i915_active_fence_isset(&node->base)) { 9288c2ecf20Sopenharmony_ci /* 9298c2ecf20Sopenharmony_ci * Mark this as being *our* unconnected proto-node. 9308c2ecf20Sopenharmony_ci * 9318c2ecf20Sopenharmony_ci * Since this node is not in any list, and we have 9328c2ecf20Sopenharmony_ci * decoupled it from the rbtree, we can reuse the 9338c2ecf20Sopenharmony_ci * request to indicate this is an idle-barrier node 9348c2ecf20Sopenharmony_ci * and then we can use the rb_node and list pointers 9358c2ecf20Sopenharmony_ci * for our tracking of the pending barrier. 9368c2ecf20Sopenharmony_ci */ 9378c2ecf20Sopenharmony_ci RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); 9388c2ecf20Sopenharmony_ci node->base.cb.node.prev = (void *)engine; 9398c2ecf20Sopenharmony_ci __i915_active_acquire(ref); 9408c2ecf20Sopenharmony_ci } 9418c2ecf20Sopenharmony_ci GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN)); 9428c2ecf20Sopenharmony_ci 9438c2ecf20Sopenharmony_ci GEM_BUG_ON(barrier_to_engine(node) != engine); 9448c2ecf20Sopenharmony_ci first = barrier_to_ll(node); 9458c2ecf20Sopenharmony_ci first->next = prev; 9468c2ecf20Sopenharmony_ci if (!last) 9478c2ecf20Sopenharmony_ci last = first; 9488c2ecf20Sopenharmony_ci intel_engine_pm_get(engine); 9498c2ecf20Sopenharmony_ci } 9508c2ecf20Sopenharmony_ci 9518c2ecf20Sopenharmony_ci GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); 9528c2ecf20Sopenharmony_ci llist_add_batch(first, last, &ref->preallocated_barriers); 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci return 0; 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ciunwind: 9578c2ecf20Sopenharmony_ci while (first) { 9588c2ecf20Sopenharmony_ci struct active_node *node = barrier_from_ll(first); 9598c2ecf20Sopenharmony_ci 9608c2ecf20Sopenharmony_ci first = first->next; 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci atomic_dec(&ref->count); 9638c2ecf20Sopenharmony_ci intel_engine_pm_put(barrier_to_engine(node)); 9648c2ecf20Sopenharmony_ci 9658c2ecf20Sopenharmony_ci kmem_cache_free(global.slab_cache, node); 9668c2ecf20Sopenharmony_ci } 9678c2ecf20Sopenharmony_ci return -ENOMEM; 9688c2ecf20Sopenharmony_ci} 9698c2ecf20Sopenharmony_ci 9708c2ecf20Sopenharmony_civoid i915_active_acquire_barrier(struct i915_active *ref) 9718c2ecf20Sopenharmony_ci{ 9728c2ecf20Sopenharmony_ci struct llist_node *pos, *next; 9738c2ecf20Sopenharmony_ci unsigned long flags; 9748c2ecf20Sopenharmony_ci 9758c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_active_is_idle(ref)); 9768c2ecf20Sopenharmony_ci 9778c2ecf20Sopenharmony_ci /* 9788c2ecf20Sopenharmony_ci * Transfer the list of preallocated barriers into the 9798c2ecf20Sopenharmony_ci * i915_active rbtree, but only as proto-nodes. They will be 9808c2ecf20Sopenharmony_ci * populated by i915_request_add_active_barriers() to point to the 9818c2ecf20Sopenharmony_ci * request that will eventually release them. 9828c2ecf20Sopenharmony_ci */ 9838c2ecf20Sopenharmony_ci llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 9848c2ecf20Sopenharmony_ci struct active_node *node = barrier_from_ll(pos); 9858c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = barrier_to_engine(node); 9868c2ecf20Sopenharmony_ci struct rb_node **p, *parent; 9878c2ecf20Sopenharmony_ci 9888c2ecf20Sopenharmony_ci spin_lock_irqsave_nested(&ref->tree_lock, flags, 9898c2ecf20Sopenharmony_ci SINGLE_DEPTH_NESTING); 9908c2ecf20Sopenharmony_ci parent = NULL; 9918c2ecf20Sopenharmony_ci p = &ref->tree.rb_node; 9928c2ecf20Sopenharmony_ci while (*p) { 9938c2ecf20Sopenharmony_ci struct active_node *it; 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci parent = *p; 9968c2ecf20Sopenharmony_ci 9978c2ecf20Sopenharmony_ci it = rb_entry(parent, struct active_node, node); 9988c2ecf20Sopenharmony_ci if (it->timeline < node->timeline) 9998c2ecf20Sopenharmony_ci p = &parent->rb_right; 10008c2ecf20Sopenharmony_ci else 10018c2ecf20Sopenharmony_ci p = &parent->rb_left; 10028c2ecf20Sopenharmony_ci } 10038c2ecf20Sopenharmony_ci rb_link_node(&node->node, parent, p); 10048c2ecf20Sopenharmony_ci rb_insert_color(&node->node, &ref->tree); 10058c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&ref->tree_lock, flags); 10068c2ecf20Sopenharmony_ci 10078c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 10088c2ecf20Sopenharmony_ci llist_add(barrier_to_ll(node), &engine->barrier_tasks); 10098c2ecf20Sopenharmony_ci intel_engine_pm_put_delay(engine, 1); 10108c2ecf20Sopenharmony_ci } 10118c2ecf20Sopenharmony_ci} 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_cistatic struct dma_fence **ll_to_fence_slot(struct llist_node *node) 10148c2ecf20Sopenharmony_ci{ 10158c2ecf20Sopenharmony_ci return __active_fence_slot(&barrier_from_ll(node)->base); 10168c2ecf20Sopenharmony_ci} 10178c2ecf20Sopenharmony_ci 10188c2ecf20Sopenharmony_civoid i915_request_add_active_barriers(struct i915_request *rq) 10198c2ecf20Sopenharmony_ci{ 10208c2ecf20Sopenharmony_ci struct intel_engine_cs *engine = rq->engine; 10218c2ecf20Sopenharmony_ci struct llist_node *node, *next; 10228c2ecf20Sopenharmony_ci unsigned long flags; 10238c2ecf20Sopenharmony_ci 10248c2ecf20Sopenharmony_ci GEM_BUG_ON(!intel_context_is_barrier(rq->context)); 10258c2ecf20Sopenharmony_ci GEM_BUG_ON(intel_engine_is_virtual(engine)); 10268c2ecf20Sopenharmony_ci GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); 10278c2ecf20Sopenharmony_ci 10288c2ecf20Sopenharmony_ci node = llist_del_all(&engine->barrier_tasks); 10298c2ecf20Sopenharmony_ci if (!node) 10308c2ecf20Sopenharmony_ci return; 10318c2ecf20Sopenharmony_ci /* 10328c2ecf20Sopenharmony_ci * Attach the list of proto-fences to the in-flight request such 10338c2ecf20Sopenharmony_ci * that the parent i915_active will be released when this request 10348c2ecf20Sopenharmony_ci * is retired. 10358c2ecf20Sopenharmony_ci */ 10368c2ecf20Sopenharmony_ci spin_lock_irqsave(&rq->lock, flags); 10378c2ecf20Sopenharmony_ci llist_for_each_safe(node, next, node) { 10388c2ecf20Sopenharmony_ci /* serialise with reuse_idle_barrier */ 10398c2ecf20Sopenharmony_ci smp_store_mb(*ll_to_fence_slot(node), &rq->fence); 10408c2ecf20Sopenharmony_ci list_add_tail((struct list_head *)node, &rq->fence.cb_list); 10418c2ecf20Sopenharmony_ci } 10428c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rq->lock, flags); 10438c2ecf20Sopenharmony_ci} 10448c2ecf20Sopenharmony_ci 10458c2ecf20Sopenharmony_ci/* 10468c2ecf20Sopenharmony_ci * __i915_active_fence_set: Update the last active fence along its timeline 10478c2ecf20Sopenharmony_ci * @active: the active tracker 10488c2ecf20Sopenharmony_ci * @fence: the new fence (under construction) 10498c2ecf20Sopenharmony_ci * 10508c2ecf20Sopenharmony_ci * Records the new @fence as the last active fence along its timeline in 10518c2ecf20Sopenharmony_ci * this active tracker, moving the tracking callbacks from the previous 10528c2ecf20Sopenharmony_ci * fence onto this one. Gets and returns a reference to the previous fence 10538c2ecf20Sopenharmony_ci * (if not already completed), which the caller must put after making sure 10548c2ecf20Sopenharmony_ci * that it is executed before the new fence. To ensure that the order of 10558c2ecf20Sopenharmony_ci * fences within the timeline of the i915_active_fence is understood, it 10568c2ecf20Sopenharmony_ci * should be locked by the caller. 10578c2ecf20Sopenharmony_ci */ 10588c2ecf20Sopenharmony_cistruct dma_fence * 10598c2ecf20Sopenharmony_ci__i915_active_fence_set(struct i915_active_fence *active, 10608c2ecf20Sopenharmony_ci struct dma_fence *fence) 10618c2ecf20Sopenharmony_ci{ 10628c2ecf20Sopenharmony_ci struct dma_fence *prev; 10638c2ecf20Sopenharmony_ci unsigned long flags; 10648c2ecf20Sopenharmony_ci 10658c2ecf20Sopenharmony_ci /* 10668c2ecf20Sopenharmony_ci * In case of fences embedded in i915_requests, their memory is 10678c2ecf20Sopenharmony_ci * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release 10688c2ecf20Sopenharmony_ci * by new requests. Then, there is a risk of passing back a pointer 10698c2ecf20Sopenharmony_ci * to a new, completely unrelated fence that reuses the same memory 10708c2ecf20Sopenharmony_ci * while tracked under a different active tracker. Combined with i915 10718c2ecf20Sopenharmony_ci * perf open/close operations that build await dependencies between 10728c2ecf20Sopenharmony_ci * engine kernel context requests and user requests from different 10738c2ecf20Sopenharmony_ci * timelines, this can lead to dependency loops and infinite waits. 10748c2ecf20Sopenharmony_ci * 10758c2ecf20Sopenharmony_ci * As a countermeasure, we try to get a reference to the active->fence 10768c2ecf20Sopenharmony_ci * first, so if we succeed and pass it back to our user then it is not 10778c2ecf20Sopenharmony_ci * released and potentially reused by an unrelated request before the 10788c2ecf20Sopenharmony_ci * user has a chance to set up an await dependency on it. 10798c2ecf20Sopenharmony_ci */ 10808c2ecf20Sopenharmony_ci prev = i915_active_fence_get(active); 10818c2ecf20Sopenharmony_ci if (fence == prev) 10828c2ecf20Sopenharmony_ci return fence; 10838c2ecf20Sopenharmony_ci 10848c2ecf20Sopenharmony_ci GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); 10858c2ecf20Sopenharmony_ci 10868c2ecf20Sopenharmony_ci /* 10878c2ecf20Sopenharmony_ci * Consider that we have two threads arriving (A and B), with 10888c2ecf20Sopenharmony_ci * C already resident as the active->fence. 10898c2ecf20Sopenharmony_ci * 10908c2ecf20Sopenharmony_ci * Both A and B have got a reference to C or NULL, depending on the 10918c2ecf20Sopenharmony_ci * timing of the interrupt handler. Let's assume that if A has got C 10928c2ecf20Sopenharmony_ci * then it has locked C first (before B). 10938c2ecf20Sopenharmony_ci * 10948c2ecf20Sopenharmony_ci * Note the strong ordering of the timeline also provides consistent 10958c2ecf20Sopenharmony_ci * nesting rules for the fence->lock; the inner lock is always the 10968c2ecf20Sopenharmony_ci * older lock. 10978c2ecf20Sopenharmony_ci */ 10988c2ecf20Sopenharmony_ci spin_lock_irqsave(fence->lock, flags); 10998c2ecf20Sopenharmony_ci if (prev) 11008c2ecf20Sopenharmony_ci spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); 11018c2ecf20Sopenharmony_ci 11028c2ecf20Sopenharmony_ci /* 11038c2ecf20Sopenharmony_ci * A does the cmpxchg first, and so it sees C or NULL, as before, or 11048c2ecf20Sopenharmony_ci * something else, depending on the timing of other threads and/or 11058c2ecf20Sopenharmony_ci * interrupt handler. If not the same as before then A unlocks C if 11068c2ecf20Sopenharmony_ci * applicable and retries, starting from an attempt to get a new 11078c2ecf20Sopenharmony_ci * active->fence. Meanwhile, B follows the same path as A. 11088c2ecf20Sopenharmony_ci * Once A succeeds with cmpxch, B fails again, retires, gets A from 11098c2ecf20Sopenharmony_ci * active->fence, locks it as soon as A completes, and possibly 11108c2ecf20Sopenharmony_ci * succeeds with cmpxchg. 11118c2ecf20Sopenharmony_ci */ 11128c2ecf20Sopenharmony_ci while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) { 11138c2ecf20Sopenharmony_ci if (prev) { 11148c2ecf20Sopenharmony_ci spin_unlock(prev->lock); 11158c2ecf20Sopenharmony_ci dma_fence_put(prev); 11168c2ecf20Sopenharmony_ci } 11178c2ecf20Sopenharmony_ci spin_unlock_irqrestore(fence->lock, flags); 11188c2ecf20Sopenharmony_ci 11198c2ecf20Sopenharmony_ci prev = i915_active_fence_get(active); 11208c2ecf20Sopenharmony_ci GEM_BUG_ON(prev == fence); 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci spin_lock_irqsave(fence->lock, flags); 11238c2ecf20Sopenharmony_ci if (prev) 11248c2ecf20Sopenharmony_ci spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); 11258c2ecf20Sopenharmony_ci } 11268c2ecf20Sopenharmony_ci 11278c2ecf20Sopenharmony_ci /* 11288c2ecf20Sopenharmony_ci * If prev is NULL then the previous fence must have been signaled 11298c2ecf20Sopenharmony_ci * and we know that we are first on the timeline. If it is still 11308c2ecf20Sopenharmony_ci * present then, having the lock on that fence already acquired, we 11318c2ecf20Sopenharmony_ci * serialise with the interrupt handler, in the process of removing it 11328c2ecf20Sopenharmony_ci * from any future interrupt callback. A will then wait on C before 11338c2ecf20Sopenharmony_ci * executing (if present). 11348c2ecf20Sopenharmony_ci * 11358c2ecf20Sopenharmony_ci * As B is second, it sees A as the previous fence and so waits for 11368c2ecf20Sopenharmony_ci * it to complete its transition and takes over the occupancy for 11378c2ecf20Sopenharmony_ci * itself -- remembering that it needs to wait on A before executing. 11388c2ecf20Sopenharmony_ci */ 11398c2ecf20Sopenharmony_ci if (prev) { 11408c2ecf20Sopenharmony_ci __list_del_entry(&active->cb.node); 11418c2ecf20Sopenharmony_ci spin_unlock(prev->lock); /* serialise with prev->cb_list */ 11428c2ecf20Sopenharmony_ci } 11438c2ecf20Sopenharmony_ci list_add_tail(&active->cb.node, &fence->cb_list); 11448c2ecf20Sopenharmony_ci spin_unlock_irqrestore(fence->lock, flags); 11458c2ecf20Sopenharmony_ci 11468c2ecf20Sopenharmony_ci return prev; 11478c2ecf20Sopenharmony_ci} 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_ciint i915_active_fence_set(struct i915_active_fence *active, 11508c2ecf20Sopenharmony_ci struct i915_request *rq) 11518c2ecf20Sopenharmony_ci{ 11528c2ecf20Sopenharmony_ci struct dma_fence *fence; 11538c2ecf20Sopenharmony_ci int err = 0; 11548c2ecf20Sopenharmony_ci 11558c2ecf20Sopenharmony_ci /* Must maintain timeline ordering wrt previous active requests */ 11568c2ecf20Sopenharmony_ci fence = __i915_active_fence_set(active, &rq->fence); 11578c2ecf20Sopenharmony_ci if (fence) { 11588c2ecf20Sopenharmony_ci err = i915_request_await_dma_fence(rq, fence); 11598c2ecf20Sopenharmony_ci dma_fence_put(fence); 11608c2ecf20Sopenharmony_ci } 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci return err; 11638c2ecf20Sopenharmony_ci} 11648c2ecf20Sopenharmony_ci 11658c2ecf20Sopenharmony_civoid i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) 11668c2ecf20Sopenharmony_ci{ 11678c2ecf20Sopenharmony_ci active_fence_cb(fence, cb); 11688c2ecf20Sopenharmony_ci} 11698c2ecf20Sopenharmony_ci 11708c2ecf20Sopenharmony_cistruct auto_active { 11718c2ecf20Sopenharmony_ci struct i915_active base; 11728c2ecf20Sopenharmony_ci struct kref ref; 11738c2ecf20Sopenharmony_ci}; 11748c2ecf20Sopenharmony_ci 11758c2ecf20Sopenharmony_cistruct i915_active *i915_active_get(struct i915_active *ref) 11768c2ecf20Sopenharmony_ci{ 11778c2ecf20Sopenharmony_ci struct auto_active *aa = container_of(ref, typeof(*aa), base); 11788c2ecf20Sopenharmony_ci 11798c2ecf20Sopenharmony_ci kref_get(&aa->ref); 11808c2ecf20Sopenharmony_ci return &aa->base; 11818c2ecf20Sopenharmony_ci} 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_cistatic void auto_release(struct kref *ref) 11848c2ecf20Sopenharmony_ci{ 11858c2ecf20Sopenharmony_ci struct auto_active *aa = container_of(ref, typeof(*aa), ref); 11868c2ecf20Sopenharmony_ci 11878c2ecf20Sopenharmony_ci i915_active_fini(&aa->base); 11888c2ecf20Sopenharmony_ci kfree(aa); 11898c2ecf20Sopenharmony_ci} 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_civoid i915_active_put(struct i915_active *ref) 11928c2ecf20Sopenharmony_ci{ 11938c2ecf20Sopenharmony_ci struct auto_active *aa = container_of(ref, typeof(*aa), base); 11948c2ecf20Sopenharmony_ci 11958c2ecf20Sopenharmony_ci kref_put(&aa->ref, auto_release); 11968c2ecf20Sopenharmony_ci} 11978c2ecf20Sopenharmony_ci 11988c2ecf20Sopenharmony_cistatic int auto_active(struct i915_active *ref) 11998c2ecf20Sopenharmony_ci{ 12008c2ecf20Sopenharmony_ci i915_active_get(ref); 12018c2ecf20Sopenharmony_ci return 0; 12028c2ecf20Sopenharmony_ci} 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_ci__i915_active_call static void 12058c2ecf20Sopenharmony_ciauto_retire(struct i915_active *ref) 12068c2ecf20Sopenharmony_ci{ 12078c2ecf20Sopenharmony_ci i915_active_put(ref); 12088c2ecf20Sopenharmony_ci} 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_cistruct i915_active *i915_active_create(void) 12118c2ecf20Sopenharmony_ci{ 12128c2ecf20Sopenharmony_ci struct auto_active *aa; 12138c2ecf20Sopenharmony_ci 12148c2ecf20Sopenharmony_ci aa = kmalloc(sizeof(*aa), GFP_KERNEL); 12158c2ecf20Sopenharmony_ci if (!aa) 12168c2ecf20Sopenharmony_ci return NULL; 12178c2ecf20Sopenharmony_ci 12188c2ecf20Sopenharmony_ci kref_init(&aa->ref); 12198c2ecf20Sopenharmony_ci i915_active_init(&aa->base, auto_active, auto_retire); 12208c2ecf20Sopenharmony_ci 12218c2ecf20Sopenharmony_ci return &aa->base; 12228c2ecf20Sopenharmony_ci} 12238c2ecf20Sopenharmony_ci 12248c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 12258c2ecf20Sopenharmony_ci#include "selftests/i915_active.c" 12268c2ecf20Sopenharmony_ci#endif 12278c2ecf20Sopenharmony_ci 12288c2ecf20Sopenharmony_cistatic void i915_global_active_shrink(void) 12298c2ecf20Sopenharmony_ci{ 12308c2ecf20Sopenharmony_ci kmem_cache_shrink(global.slab_cache); 12318c2ecf20Sopenharmony_ci} 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_cistatic void i915_global_active_exit(void) 12348c2ecf20Sopenharmony_ci{ 12358c2ecf20Sopenharmony_ci kmem_cache_destroy(global.slab_cache); 12368c2ecf20Sopenharmony_ci} 12378c2ecf20Sopenharmony_ci 12388c2ecf20Sopenharmony_cistatic struct i915_global_active global = { { 12398c2ecf20Sopenharmony_ci .shrink = i915_global_active_shrink, 12408c2ecf20Sopenharmony_ci .exit = i915_global_active_exit, 12418c2ecf20Sopenharmony_ci} }; 12428c2ecf20Sopenharmony_ci 12438c2ecf20Sopenharmony_ciint __init i915_global_active_init(void) 12448c2ecf20Sopenharmony_ci{ 12458c2ecf20Sopenharmony_ci global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); 12468c2ecf20Sopenharmony_ci if (!global.slab_cache) 12478c2ecf20Sopenharmony_ci return -ENOMEM; 12488c2ecf20Sopenharmony_ci 12498c2ecf20Sopenharmony_ci i915_global_register(&global.base); 12508c2ecf20Sopenharmony_ci return 0; 12518c2ecf20Sopenharmony_ci} 1252