18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * SPDX-License-Identifier: MIT
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Copyright © 2019 Intel Corporation
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#ifndef _I915_ACTIVE_H_
88c2ecf20Sopenharmony_ci#define _I915_ACTIVE_H_
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#include <linux/lockdep.h>
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include "i915_active_types.h"
138c2ecf20Sopenharmony_ci#include "i915_request.h"
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_cistruct i915_request;
168c2ecf20Sopenharmony_cistruct intel_engine_cs;
178c2ecf20Sopenharmony_cistruct intel_timeline;
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci/*
208c2ecf20Sopenharmony_ci * We treat requests as fences. This is not be to confused with our
218c2ecf20Sopenharmony_ci * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
228c2ecf20Sopenharmony_ci * We use the fences to synchronize access from the CPU with activity on the
238c2ecf20Sopenharmony_ci * GPU, for example, we should not rewrite an object's PTE whilst the GPU
248c2ecf20Sopenharmony_ci * is reading them. We also track fences at a higher level to provide
258c2ecf20Sopenharmony_ci * implicit synchronisation around GEM objects, e.g. set-domain will wait
268c2ecf20Sopenharmony_ci * for outstanding GPU rendering before marking the object ready for CPU
278c2ecf20Sopenharmony_ci * access, or a pageflip will wait until the GPU is complete before showing
288c2ecf20Sopenharmony_ci * the frame on the scanout.
298c2ecf20Sopenharmony_ci *
308c2ecf20Sopenharmony_ci * In order to use a fence, the object must track the fence it needs to
318c2ecf20Sopenharmony_ci * serialise with. For example, GEM objects want to track both read and
328c2ecf20Sopenharmony_ci * write access so that we can perform concurrent read operations between
338c2ecf20Sopenharmony_ci * the CPU and GPU engines, as well as waiting for all rendering to
348c2ecf20Sopenharmony_ci * complete, or waiting for the last GPU user of a "fence register". The
358c2ecf20Sopenharmony_ci * object then embeds a #i915_active_fence to track the most recent (in
368c2ecf20Sopenharmony_ci * retirement order) request relevant for the desired mode of access.
378c2ecf20Sopenharmony_ci * The #i915_active_fence is updated with i915_active_fence_set() to
388c2ecf20Sopenharmony_ci * track the most recent fence request, typically this is done as part of
398c2ecf20Sopenharmony_ci * i915_vma_move_to_active().
408c2ecf20Sopenharmony_ci *
418c2ecf20Sopenharmony_ci * When the #i915_active_fence completes (is retired), it will
428c2ecf20Sopenharmony_ci * signal its completion to the owner through a callback as well as mark
438c2ecf20Sopenharmony_ci * itself as idle (i915_active_fence.request == NULL). The owner
448c2ecf20Sopenharmony_ci * can then perform any action, such as delayed freeing of an active
458c2ecf20Sopenharmony_ci * resource including itself.
468c2ecf20Sopenharmony_ci */
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_civoid i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb);
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci/**
518c2ecf20Sopenharmony_ci * __i915_active_fence_init - prepares the activity tracker for use
528c2ecf20Sopenharmony_ci * @active - the active tracker
538c2ecf20Sopenharmony_ci * @fence - initial fence to track, can be NULL
548c2ecf20Sopenharmony_ci * @func - a callback when then the tracker is retired (becomes idle),
558c2ecf20Sopenharmony_ci *         can be NULL
568c2ecf20Sopenharmony_ci *
578c2ecf20Sopenharmony_ci * i915_active_fence_init() prepares the embedded @active struct for use as
588c2ecf20Sopenharmony_ci * an activity tracker, that is for tracking the last known active fence
598c2ecf20Sopenharmony_ci * associated with it. When the last fence becomes idle, when it is retired
608c2ecf20Sopenharmony_ci * after completion, the optional callback @func is invoked.
618c2ecf20Sopenharmony_ci */
628c2ecf20Sopenharmony_cistatic inline void
638c2ecf20Sopenharmony_ci__i915_active_fence_init(struct i915_active_fence *active,
648c2ecf20Sopenharmony_ci			 void *fence,
658c2ecf20Sopenharmony_ci			 dma_fence_func_t fn)
668c2ecf20Sopenharmony_ci{
678c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(active->fence, fence);
688c2ecf20Sopenharmony_ci	active->cb.func = fn ?: i915_active_noop;
698c2ecf20Sopenharmony_ci}
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci#define INIT_ACTIVE_FENCE(A) \
728c2ecf20Sopenharmony_ci	__i915_active_fence_init((A), NULL, NULL)
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_cistruct dma_fence *
758c2ecf20Sopenharmony_ci__i915_active_fence_set(struct i915_active_fence *active,
768c2ecf20Sopenharmony_ci			struct dma_fence *fence);
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci/**
798c2ecf20Sopenharmony_ci * i915_active_fence_set - updates the tracker to watch the current fence
808c2ecf20Sopenharmony_ci * @active - the active tracker
818c2ecf20Sopenharmony_ci * @rq - the request to watch
828c2ecf20Sopenharmony_ci *
838c2ecf20Sopenharmony_ci * i915_active_fence_set() watches the given @rq for completion. While
848c2ecf20Sopenharmony_ci * that @rq is busy, the @active reports busy. When that @rq is signaled
858c2ecf20Sopenharmony_ci * (or else retired) the @active tracker is updated to report idle.
868c2ecf20Sopenharmony_ci */
878c2ecf20Sopenharmony_ciint __must_check
888c2ecf20Sopenharmony_cii915_active_fence_set(struct i915_active_fence *active,
898c2ecf20Sopenharmony_ci		      struct i915_request *rq);
908c2ecf20Sopenharmony_ci/**
918c2ecf20Sopenharmony_ci * i915_active_fence_get - return a reference to the active fence
928c2ecf20Sopenharmony_ci * @active - the active tracker
938c2ecf20Sopenharmony_ci *
948c2ecf20Sopenharmony_ci * i915_active_fence_get() returns a reference to the active fence,
958c2ecf20Sopenharmony_ci * or NULL if the active tracker is idle. The reference is obtained under RCU,
968c2ecf20Sopenharmony_ci * so no locking is required by the caller.
978c2ecf20Sopenharmony_ci *
988c2ecf20Sopenharmony_ci * The reference should be freed with dma_fence_put().
998c2ecf20Sopenharmony_ci */
1008c2ecf20Sopenharmony_cistatic inline struct dma_fence *
1018c2ecf20Sopenharmony_cii915_active_fence_get(struct i915_active_fence *active)
1028c2ecf20Sopenharmony_ci{
1038c2ecf20Sopenharmony_ci	struct dma_fence *fence;
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci	rcu_read_lock();
1068c2ecf20Sopenharmony_ci	fence = dma_fence_get_rcu_safe(&active->fence);
1078c2ecf20Sopenharmony_ci	rcu_read_unlock();
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	return fence;
1108c2ecf20Sopenharmony_ci}
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci/**
1138c2ecf20Sopenharmony_ci * i915_active_fence_isset - report whether the active tracker is assigned
1148c2ecf20Sopenharmony_ci * @active - the active tracker
1158c2ecf20Sopenharmony_ci *
1168c2ecf20Sopenharmony_ci * i915_active_fence_isset() returns true if the active tracker is currently
1178c2ecf20Sopenharmony_ci * assigned to a fence. Due to the lazy retiring, that fence may be idle
1188c2ecf20Sopenharmony_ci * and this may report stale information.
1198c2ecf20Sopenharmony_ci */
1208c2ecf20Sopenharmony_cistatic inline bool
1218c2ecf20Sopenharmony_cii915_active_fence_isset(const struct i915_active_fence *active)
1228c2ecf20Sopenharmony_ci{
1238c2ecf20Sopenharmony_ci	return rcu_access_pointer(active->fence);
1248c2ecf20Sopenharmony_ci}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci/*
1278c2ecf20Sopenharmony_ci * GPU activity tracking
1288c2ecf20Sopenharmony_ci *
1298c2ecf20Sopenharmony_ci * Each set of commands submitted to the GPU compromises a single request that
1308c2ecf20Sopenharmony_ci * signals a fence upon completion. struct i915_request combines the
1318c2ecf20Sopenharmony_ci * command submission, scheduling and fence signaling roles. If we want to see
1328c2ecf20Sopenharmony_ci * if a particular task is complete, we need to grab the fence (struct
1338c2ecf20Sopenharmony_ci * i915_request) for that task and check or wait for it to be signaled. More
1348c2ecf20Sopenharmony_ci * often though we want to track the status of a bunch of tasks, for example
1358c2ecf20Sopenharmony_ci * to wait for the GPU to finish accessing some memory across a variety of
1368c2ecf20Sopenharmony_ci * different command pipelines from different clients. We could choose to
1378c2ecf20Sopenharmony_ci * track every single request associated with the task, but knowing that
1388c2ecf20Sopenharmony_ci * each request belongs to an ordered timeline (later requests within a
1398c2ecf20Sopenharmony_ci * timeline must wait for earlier requests), we need only track the
1408c2ecf20Sopenharmony_ci * latest request in each timeline to determine the overall status of the
1418c2ecf20Sopenharmony_ci * task.
1428c2ecf20Sopenharmony_ci *
1438c2ecf20Sopenharmony_ci * struct i915_active provides this tracking across timelines. It builds a
1448c2ecf20Sopenharmony_ci * composite shared-fence, and is updated as new work is submitted to the task,
1458c2ecf20Sopenharmony_ci * forming a snapshot of the current status. It should be embedded into the
1468c2ecf20Sopenharmony_ci * different resources that need to track their associated GPU activity to
1478c2ecf20Sopenharmony_ci * provide a callback when that GPU activity has ceased, or otherwise to
1488c2ecf20Sopenharmony_ci * provide a serialisation point either for request submission or for CPU
1498c2ecf20Sopenharmony_ci * synchronisation.
1508c2ecf20Sopenharmony_ci */
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_civoid __i915_active_init(struct i915_active *ref,
1538c2ecf20Sopenharmony_ci			int (*active)(struct i915_active *ref),
1548c2ecf20Sopenharmony_ci			void (*retire)(struct i915_active *ref),
1558c2ecf20Sopenharmony_ci			struct lock_class_key *mkey,
1568c2ecf20Sopenharmony_ci			struct lock_class_key *wkey);
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci/* Specialise each class of i915_active to avoid impossible lockdep cycles. */
1598c2ecf20Sopenharmony_ci#define i915_active_init(ref, active, retire) do {		\
1608c2ecf20Sopenharmony_ci	static struct lock_class_key __mkey;				\
1618c2ecf20Sopenharmony_ci	static struct lock_class_key __wkey;				\
1628c2ecf20Sopenharmony_ci									\
1638c2ecf20Sopenharmony_ci	__i915_active_init(ref, active, retire, &__mkey, &__wkey);	\
1648c2ecf20Sopenharmony_ci} while (0)
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_cistruct dma_fence *
1678c2ecf20Sopenharmony_ci__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
1688c2ecf20Sopenharmony_ciint i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_cistatic inline int
1718c2ecf20Sopenharmony_cii915_active_add_request(struct i915_active *ref, struct i915_request *rq)
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	return i915_active_ref(ref,
1748c2ecf20Sopenharmony_ci			       i915_request_timeline(rq)->fence_context,
1758c2ecf20Sopenharmony_ci			       &rq->fence);
1768c2ecf20Sopenharmony_ci}
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_cistruct dma_fence *
1798c2ecf20Sopenharmony_cii915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_cistatic inline bool i915_active_has_exclusive(struct i915_active *ref)
1828c2ecf20Sopenharmony_ci{
1838c2ecf20Sopenharmony_ci	return rcu_access_pointer(ref->excl.fence);
1848c2ecf20Sopenharmony_ci}
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ciint __i915_active_wait(struct i915_active *ref, int state);
1878c2ecf20Sopenharmony_cistatic inline int i915_active_wait(struct i915_active *ref)
1888c2ecf20Sopenharmony_ci{
1898c2ecf20Sopenharmony_ci	return __i915_active_wait(ref, TASK_INTERRUPTIBLE);
1908c2ecf20Sopenharmony_ci}
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ciint i915_sw_fence_await_active(struct i915_sw_fence *fence,
1938c2ecf20Sopenharmony_ci			       struct i915_active *ref,
1948c2ecf20Sopenharmony_ci			       unsigned int flags);
1958c2ecf20Sopenharmony_ciint i915_request_await_active(struct i915_request *rq,
1968c2ecf20Sopenharmony_ci			      struct i915_active *ref,
1978c2ecf20Sopenharmony_ci			      unsigned int flags);
1988c2ecf20Sopenharmony_ci#define I915_ACTIVE_AWAIT_EXCL BIT(0)
1998c2ecf20Sopenharmony_ci#define I915_ACTIVE_AWAIT_ACTIVE BIT(1)
2008c2ecf20Sopenharmony_ci#define I915_ACTIVE_AWAIT_BARRIER BIT(2)
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ciint i915_active_acquire(struct i915_active *ref);
2038c2ecf20Sopenharmony_ciint i915_active_acquire_for_context(struct i915_active *ref, u64 idx);
2048c2ecf20Sopenharmony_cibool i915_active_acquire_if_busy(struct i915_active *ref);
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_civoid i915_active_release(struct i915_active *ref);
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_cistatic inline void __i915_active_acquire(struct i915_active *ref)
2098c2ecf20Sopenharmony_ci{
2108c2ecf20Sopenharmony_ci	GEM_BUG_ON(!atomic_read(&ref->count));
2118c2ecf20Sopenharmony_ci	atomic_inc(&ref->count);
2128c2ecf20Sopenharmony_ci}
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_cistatic inline bool
2158c2ecf20Sopenharmony_cii915_active_is_idle(const struct i915_active *ref)
2168c2ecf20Sopenharmony_ci{
2178c2ecf20Sopenharmony_ci	return !atomic_read(&ref->count);
2188c2ecf20Sopenharmony_ci}
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_civoid i915_active_fini(struct i915_active *ref);
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ciint i915_active_acquire_preallocate_barrier(struct i915_active *ref,
2238c2ecf20Sopenharmony_ci					    struct intel_engine_cs *engine);
2248c2ecf20Sopenharmony_civoid i915_active_acquire_barrier(struct i915_active *ref);
2258c2ecf20Sopenharmony_civoid i915_request_add_active_barriers(struct i915_request *rq);
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_civoid i915_active_print(struct i915_active *ref, struct drm_printer *m);
2288c2ecf20Sopenharmony_civoid i915_active_unlock_wait(struct i915_active *ref);
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_cistruct i915_active *i915_active_create(void);
2318c2ecf20Sopenharmony_cistruct i915_active *i915_active_get(struct i915_active *ref);
2328c2ecf20Sopenharmony_civoid i915_active_put(struct i915_active *ref);
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_cistatic inline int __i915_request_await_exclusive(struct i915_request *rq,
2358c2ecf20Sopenharmony_ci						 struct i915_active *active)
2368c2ecf20Sopenharmony_ci{
2378c2ecf20Sopenharmony_ci	struct dma_fence *fence;
2388c2ecf20Sopenharmony_ci	int err = 0;
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	fence = i915_active_fence_get(&active->excl);
2418c2ecf20Sopenharmony_ci	if (fence) {
2428c2ecf20Sopenharmony_ci		err = i915_request_await_dma_fence(rq, fence);
2438c2ecf20Sopenharmony_ci		dma_fence_put(fence);
2448c2ecf20Sopenharmony_ci	}
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	return err;
2478c2ecf20Sopenharmony_ci}
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_ci#endif /* _I915_ACTIVE_H_ */
250