162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright 2015 Advanced Micro Devices, Inc.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
562306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
662306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation
762306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
862306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
962306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
1262306a36Sopenharmony_ci * all copies or substantial portions of the Software.
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1562306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1662306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1762306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1862306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1962306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2062306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci */
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci/**
2562306a36Sopenharmony_ci * DOC: Overview
2662306a36Sopenharmony_ci *
2762306a36Sopenharmony_ci * The GPU scheduler provides entities which allow userspace to push jobs
2862306a36Sopenharmony_ci * into software queues which are then scheduled on a hardware run queue.
2962306a36Sopenharmony_ci * The software queues have a priority among them. The scheduler selects the entities
3062306a36Sopenharmony_ci * from the run queue using a FIFO. The scheduler provides dependency handling
3162306a36Sopenharmony_ci * features among jobs. The driver is supposed to provide callback functions for
3262306a36Sopenharmony_ci * backend operations to the scheduler like submitting a job to hardware run queue,
3362306a36Sopenharmony_ci * returning the dependencies of a job etc.
3462306a36Sopenharmony_ci *
3562306a36Sopenharmony_ci * The organisation of the scheduler is the following:
3662306a36Sopenharmony_ci *
3762306a36Sopenharmony_ci * 1. Each hw run queue has one scheduler
3862306a36Sopenharmony_ci * 2. Each scheduler has multiple run queues with different priorities
3962306a36Sopenharmony_ci *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
4062306a36Sopenharmony_ci * 3. Each scheduler run queue has a queue of entities to schedule
4162306a36Sopenharmony_ci * 4. Entities themselves maintain a queue of jobs that will be scheduled on
4262306a36Sopenharmony_ci *    the hardware.
4362306a36Sopenharmony_ci *
4462306a36Sopenharmony_ci * The jobs in a entity are always scheduled in the order that they were pushed.
4562306a36Sopenharmony_ci *
4662306a36Sopenharmony_ci * Note that once a job was taken from the entities queue and pushed to the
4762306a36Sopenharmony_ci * hardware, i.e. the pending queue, the entity must not be referenced anymore
4862306a36Sopenharmony_ci * through the jobs entity pointer.
4962306a36Sopenharmony_ci */
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci#include <linux/kthread.h>
5262306a36Sopenharmony_ci#include <linux/wait.h>
5362306a36Sopenharmony_ci#include <linux/sched.h>
5462306a36Sopenharmony_ci#include <linux/completion.h>
5562306a36Sopenharmony_ci#include <linux/dma-resv.h>
5662306a36Sopenharmony_ci#include <uapi/linux/sched/types.h>
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci#include <drm/drm_print.h>
5962306a36Sopenharmony_ci#include <drm/drm_gem.h>
6062306a36Sopenharmony_ci#include <drm/drm_syncobj.h>
6162306a36Sopenharmony_ci#include <drm/gpu_scheduler.h>
6262306a36Sopenharmony_ci#include <drm/spsc_queue.h>
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci#define CREATE_TRACE_POINTS
6562306a36Sopenharmony_ci#include "gpu_scheduler_trace.h"
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci#define to_drm_sched_job(sched_job)		\
6862306a36Sopenharmony_ci		container_of((sched_job), struct drm_sched_job, queue_node)
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ciint drm_sched_policy = DRM_SCHED_POLICY_FIFO;
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci/**
7362306a36Sopenharmony_ci * DOC: sched_policy (int)
7462306a36Sopenharmony_ci * Used to override default entities scheduling policy in a run queue.
7562306a36Sopenharmony_ci */
7662306a36Sopenharmony_ciMODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
7762306a36Sopenharmony_cimodule_param_named(sched_policy, drm_sched_policy, int, 0444);
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_cistatic __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
8062306a36Sopenharmony_ci							    const struct rb_node *b)
8162306a36Sopenharmony_ci{
8262306a36Sopenharmony_ci	struct drm_sched_entity *ent_a =  rb_entry((a), struct drm_sched_entity, rb_tree_node);
8362306a36Sopenharmony_ci	struct drm_sched_entity *ent_b =  rb_entry((b), struct drm_sched_entity, rb_tree_node);
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting);
8662306a36Sopenharmony_ci}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_cistatic inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity)
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	struct drm_sched_rq *rq = entity->rq;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
9362306a36Sopenharmony_ci		rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
9462306a36Sopenharmony_ci		RB_CLEAR_NODE(&entity->rb_tree_node);
9562306a36Sopenharmony_ci	}
9662306a36Sopenharmony_ci}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_civoid drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts)
9962306a36Sopenharmony_ci{
10062306a36Sopenharmony_ci	/*
10162306a36Sopenharmony_ci	 * Both locks need to be grabbed, one to protect from entity->rq change
10262306a36Sopenharmony_ci	 * for entity from within concurrent drm_sched_entity_select_rq and the
10362306a36Sopenharmony_ci	 * other to update the rb tree structure.
10462306a36Sopenharmony_ci	 */
10562306a36Sopenharmony_ci	spin_lock(&entity->rq_lock);
10662306a36Sopenharmony_ci	spin_lock(&entity->rq->lock);
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	drm_sched_rq_remove_fifo_locked(entity);
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	entity->oldest_job_waiting = ts;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root,
11362306a36Sopenharmony_ci		      drm_sched_entity_compare_before);
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	spin_unlock(&entity->rq->lock);
11662306a36Sopenharmony_ci	spin_unlock(&entity->rq_lock);
11762306a36Sopenharmony_ci}
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci/**
12062306a36Sopenharmony_ci * drm_sched_rq_init - initialize a given run queue struct
12162306a36Sopenharmony_ci *
12262306a36Sopenharmony_ci * @sched: scheduler instance to associate with this run queue
12362306a36Sopenharmony_ci * @rq: scheduler run queue
12462306a36Sopenharmony_ci *
12562306a36Sopenharmony_ci * Initializes a scheduler runqueue.
12662306a36Sopenharmony_ci */
12762306a36Sopenharmony_cistatic void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
12862306a36Sopenharmony_ci			      struct drm_sched_rq *rq)
12962306a36Sopenharmony_ci{
13062306a36Sopenharmony_ci	spin_lock_init(&rq->lock);
13162306a36Sopenharmony_ci	INIT_LIST_HEAD(&rq->entities);
13262306a36Sopenharmony_ci	rq->rb_tree_root = RB_ROOT_CACHED;
13362306a36Sopenharmony_ci	rq->current_entity = NULL;
13462306a36Sopenharmony_ci	rq->sched = sched;
13562306a36Sopenharmony_ci}
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci/**
13862306a36Sopenharmony_ci * drm_sched_rq_add_entity - add an entity
13962306a36Sopenharmony_ci *
14062306a36Sopenharmony_ci * @rq: scheduler run queue
14162306a36Sopenharmony_ci * @entity: scheduler entity
14262306a36Sopenharmony_ci *
14362306a36Sopenharmony_ci * Adds a scheduler entity to the run queue.
14462306a36Sopenharmony_ci */
14562306a36Sopenharmony_civoid drm_sched_rq_add_entity(struct drm_sched_rq *rq,
14662306a36Sopenharmony_ci			     struct drm_sched_entity *entity)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	if (!list_empty(&entity->list))
14962306a36Sopenharmony_ci		return;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	spin_lock(&rq->lock);
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	atomic_inc(rq->sched->score);
15462306a36Sopenharmony_ci	list_add_tail(&entity->list, &rq->entities);
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	spin_unlock(&rq->lock);
15762306a36Sopenharmony_ci}
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci/**
16062306a36Sopenharmony_ci * drm_sched_rq_remove_entity - remove an entity
16162306a36Sopenharmony_ci *
16262306a36Sopenharmony_ci * @rq: scheduler run queue
16362306a36Sopenharmony_ci * @entity: scheduler entity
16462306a36Sopenharmony_ci *
16562306a36Sopenharmony_ci * Removes a scheduler entity from the run queue.
16662306a36Sopenharmony_ci */
16762306a36Sopenharmony_civoid drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
16862306a36Sopenharmony_ci				struct drm_sched_entity *entity)
16962306a36Sopenharmony_ci{
17062306a36Sopenharmony_ci	if (list_empty(&entity->list))
17162306a36Sopenharmony_ci		return;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	spin_lock(&rq->lock);
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	atomic_dec(rq->sched->score);
17662306a36Sopenharmony_ci	list_del_init(&entity->list);
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	if (rq->current_entity == entity)
17962306a36Sopenharmony_ci		rq->current_entity = NULL;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
18262306a36Sopenharmony_ci		drm_sched_rq_remove_fifo_locked(entity);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	spin_unlock(&rq->lock);
18562306a36Sopenharmony_ci}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci/**
18862306a36Sopenharmony_ci * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
18962306a36Sopenharmony_ci *
19062306a36Sopenharmony_ci * @rq: scheduler run queue to check.
19162306a36Sopenharmony_ci *
19262306a36Sopenharmony_ci * Try to find a ready entity, returns NULL if none found.
19362306a36Sopenharmony_ci */
19462306a36Sopenharmony_cistatic struct drm_sched_entity *
19562306a36Sopenharmony_cidrm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
19662306a36Sopenharmony_ci{
19762306a36Sopenharmony_ci	struct drm_sched_entity *entity;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	spin_lock(&rq->lock);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	entity = rq->current_entity;
20262306a36Sopenharmony_ci	if (entity) {
20362306a36Sopenharmony_ci		list_for_each_entry_continue(entity, &rq->entities, list) {
20462306a36Sopenharmony_ci			if (drm_sched_entity_is_ready(entity)) {
20562306a36Sopenharmony_ci				rq->current_entity = entity;
20662306a36Sopenharmony_ci				reinit_completion(&entity->entity_idle);
20762306a36Sopenharmony_ci				spin_unlock(&rq->lock);
20862306a36Sopenharmony_ci				return entity;
20962306a36Sopenharmony_ci			}
21062306a36Sopenharmony_ci		}
21162306a36Sopenharmony_ci	}
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	list_for_each_entry(entity, &rq->entities, list) {
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci		if (drm_sched_entity_is_ready(entity)) {
21662306a36Sopenharmony_ci			rq->current_entity = entity;
21762306a36Sopenharmony_ci			reinit_completion(&entity->entity_idle);
21862306a36Sopenharmony_ci			spin_unlock(&rq->lock);
21962306a36Sopenharmony_ci			return entity;
22062306a36Sopenharmony_ci		}
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci		if (entity == rq->current_entity)
22362306a36Sopenharmony_ci			break;
22462306a36Sopenharmony_ci	}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	spin_unlock(&rq->lock);
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	return NULL;
22962306a36Sopenharmony_ci}
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci/**
23262306a36Sopenharmony_ci * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
23362306a36Sopenharmony_ci *
23462306a36Sopenharmony_ci * @rq: scheduler run queue to check.
23562306a36Sopenharmony_ci *
23662306a36Sopenharmony_ci * Find oldest waiting ready entity, returns NULL if none found.
23762306a36Sopenharmony_ci */
23862306a36Sopenharmony_cistatic struct drm_sched_entity *
23962306a36Sopenharmony_cidrm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
24062306a36Sopenharmony_ci{
24162306a36Sopenharmony_ci	struct rb_node *rb;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	spin_lock(&rq->lock);
24462306a36Sopenharmony_ci	for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) {
24562306a36Sopenharmony_ci		struct drm_sched_entity *entity;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci		entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
24862306a36Sopenharmony_ci		if (drm_sched_entity_is_ready(entity)) {
24962306a36Sopenharmony_ci			rq->current_entity = entity;
25062306a36Sopenharmony_ci			reinit_completion(&entity->entity_idle);
25162306a36Sopenharmony_ci			break;
25262306a36Sopenharmony_ci		}
25362306a36Sopenharmony_ci	}
25462306a36Sopenharmony_ci	spin_unlock(&rq->lock);
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
25762306a36Sopenharmony_ci}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci/**
26062306a36Sopenharmony_ci * drm_sched_job_done - complete a job
26162306a36Sopenharmony_ci * @s_job: pointer to the job which is done
26262306a36Sopenharmony_ci *
26362306a36Sopenharmony_ci * Finish the job's fence and wake up the worker thread.
26462306a36Sopenharmony_ci */
26562306a36Sopenharmony_cistatic void drm_sched_job_done(struct drm_sched_job *s_job, int result)
26662306a36Sopenharmony_ci{
26762306a36Sopenharmony_ci	struct drm_sched_fence *s_fence = s_job->s_fence;
26862306a36Sopenharmony_ci	struct drm_gpu_scheduler *sched = s_fence->sched;
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	atomic_dec(&sched->hw_rq_count);
27162306a36Sopenharmony_ci	atomic_dec(sched->score);
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	trace_drm_sched_process_job(s_fence);
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	dma_fence_get(&s_fence->finished);
27662306a36Sopenharmony_ci	drm_sched_fence_finished(s_fence, result);
27762306a36Sopenharmony_ci	dma_fence_put(&s_fence->finished);
27862306a36Sopenharmony_ci	wake_up_interruptible(&sched->wake_up_worker);
27962306a36Sopenharmony_ci}
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci/**
28262306a36Sopenharmony_ci * drm_sched_job_done_cb - the callback for a done job
28362306a36Sopenharmony_ci * @f: fence
28462306a36Sopenharmony_ci * @cb: fence callbacks
28562306a36Sopenharmony_ci */
28662306a36Sopenharmony_cistatic void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	drm_sched_job_done(s_job, f->error);
29162306a36Sopenharmony_ci}
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci/**
29462306a36Sopenharmony_ci * drm_sched_start_timeout - start timeout for reset worker
29562306a36Sopenharmony_ci *
29662306a36Sopenharmony_ci * @sched: scheduler instance to start the worker for
29762306a36Sopenharmony_ci *
29862306a36Sopenharmony_ci * Start the timeout for the given scheduler.
29962306a36Sopenharmony_ci */
30062306a36Sopenharmony_cistatic void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
30162306a36Sopenharmony_ci{
30262306a36Sopenharmony_ci	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
30362306a36Sopenharmony_ci	    !list_empty(&sched->pending_list))
30462306a36Sopenharmony_ci		queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci/**
30862306a36Sopenharmony_ci * drm_sched_fault - immediately start timeout handler
30962306a36Sopenharmony_ci *
31062306a36Sopenharmony_ci * @sched: scheduler where the timeout handling should be started.
31162306a36Sopenharmony_ci *
31262306a36Sopenharmony_ci * Start timeout handling immediately when the driver detects a hardware fault.
31362306a36Sopenharmony_ci */
31462306a36Sopenharmony_civoid drm_sched_fault(struct drm_gpu_scheduler *sched)
31562306a36Sopenharmony_ci{
31662306a36Sopenharmony_ci	if (sched->timeout_wq)
31762306a36Sopenharmony_ci		mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
31862306a36Sopenharmony_ci}
31962306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_fault);
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci/**
32262306a36Sopenharmony_ci * drm_sched_suspend_timeout - Suspend scheduler job timeout
32362306a36Sopenharmony_ci *
32462306a36Sopenharmony_ci * @sched: scheduler instance for which to suspend the timeout
32562306a36Sopenharmony_ci *
32662306a36Sopenharmony_ci * Suspend the delayed work timeout for the scheduler. This is done by
32762306a36Sopenharmony_ci * modifying the delayed work timeout to an arbitrary large value,
32862306a36Sopenharmony_ci * MAX_SCHEDULE_TIMEOUT in this case.
32962306a36Sopenharmony_ci *
33062306a36Sopenharmony_ci * Returns the timeout remaining
33162306a36Sopenharmony_ci *
33262306a36Sopenharmony_ci */
33362306a36Sopenharmony_ciunsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
33462306a36Sopenharmony_ci{
33562306a36Sopenharmony_ci	unsigned long sched_timeout, now = jiffies;
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	sched_timeout = sched->work_tdr.timer.expires;
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	/*
34062306a36Sopenharmony_ci	 * Modify the timeout to an arbitrarily large value. This also prevents
34162306a36Sopenharmony_ci	 * the timeout to be restarted when new submissions arrive
34262306a36Sopenharmony_ci	 */
34362306a36Sopenharmony_ci	if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
34462306a36Sopenharmony_ci			&& time_after(sched_timeout, now))
34562306a36Sopenharmony_ci		return sched_timeout - now;
34662306a36Sopenharmony_ci	else
34762306a36Sopenharmony_ci		return sched->timeout;
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_suspend_timeout);
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci/**
35262306a36Sopenharmony_ci * drm_sched_resume_timeout - Resume scheduler job timeout
35362306a36Sopenharmony_ci *
35462306a36Sopenharmony_ci * @sched: scheduler instance for which to resume the timeout
35562306a36Sopenharmony_ci * @remaining: remaining timeout
35662306a36Sopenharmony_ci *
35762306a36Sopenharmony_ci * Resume the delayed work timeout for the scheduler.
35862306a36Sopenharmony_ci */
35962306a36Sopenharmony_civoid drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
36062306a36Sopenharmony_ci		unsigned long remaining)
36162306a36Sopenharmony_ci{
36262306a36Sopenharmony_ci	spin_lock(&sched->job_list_lock);
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	if (list_empty(&sched->pending_list))
36562306a36Sopenharmony_ci		cancel_delayed_work(&sched->work_tdr);
36662306a36Sopenharmony_ci	else
36762306a36Sopenharmony_ci		mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining);
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	spin_unlock(&sched->job_list_lock);
37062306a36Sopenharmony_ci}
37162306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_resume_timeout);
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_cistatic void drm_sched_job_begin(struct drm_sched_job *s_job)
37462306a36Sopenharmony_ci{
37562306a36Sopenharmony_ci	struct drm_gpu_scheduler *sched = s_job->sched;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	spin_lock(&sched->job_list_lock);
37862306a36Sopenharmony_ci	list_add_tail(&s_job->list, &sched->pending_list);
37962306a36Sopenharmony_ci	drm_sched_start_timeout(sched);
38062306a36Sopenharmony_ci	spin_unlock(&sched->job_list_lock);
38162306a36Sopenharmony_ci}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_cistatic void drm_sched_job_timedout(struct work_struct *work)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci	struct drm_gpu_scheduler *sched;
38662306a36Sopenharmony_ci	struct drm_sched_job *job;
38762306a36Sopenharmony_ci	enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL;
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_ci	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
39262306a36Sopenharmony_ci	spin_lock(&sched->job_list_lock);
39362306a36Sopenharmony_ci	job = list_first_entry_or_null(&sched->pending_list,
39462306a36Sopenharmony_ci				       struct drm_sched_job, list);
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	if (job) {
39762306a36Sopenharmony_ci		/*
39862306a36Sopenharmony_ci		 * Remove the bad job so it cannot be freed by concurrent
39962306a36Sopenharmony_ci		 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
40062306a36Sopenharmony_ci		 * is parked at which point it's safe.
40162306a36Sopenharmony_ci		 */
40262306a36Sopenharmony_ci		list_del_init(&job->list);
40362306a36Sopenharmony_ci		spin_unlock(&sched->job_list_lock);
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci		status = job->sched->ops->timedout_job(job);
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci		/*
40862306a36Sopenharmony_ci		 * Guilty job did complete and hence needs to be manually removed
40962306a36Sopenharmony_ci		 * See drm_sched_stop doc.
41062306a36Sopenharmony_ci		 */
41162306a36Sopenharmony_ci		if (sched->free_guilty) {
41262306a36Sopenharmony_ci			job->sched->ops->free_job(job);
41362306a36Sopenharmony_ci			sched->free_guilty = false;
41462306a36Sopenharmony_ci		}
41562306a36Sopenharmony_ci	} else {
41662306a36Sopenharmony_ci		spin_unlock(&sched->job_list_lock);
41762306a36Sopenharmony_ci	}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	if (status != DRM_GPU_SCHED_STAT_ENODEV) {
42062306a36Sopenharmony_ci		spin_lock(&sched->job_list_lock);
42162306a36Sopenharmony_ci		drm_sched_start_timeout(sched);
42262306a36Sopenharmony_ci		spin_unlock(&sched->job_list_lock);
42362306a36Sopenharmony_ci	}
42462306a36Sopenharmony_ci}
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci/**
42762306a36Sopenharmony_ci * drm_sched_stop - stop the scheduler
42862306a36Sopenharmony_ci *
42962306a36Sopenharmony_ci * @sched: scheduler instance
43062306a36Sopenharmony_ci * @bad: job which caused the time out
43162306a36Sopenharmony_ci *
43262306a36Sopenharmony_ci * Stop the scheduler and also removes and frees all completed jobs.
43362306a36Sopenharmony_ci * Note: bad job will not be freed as it might be used later and so it's
43462306a36Sopenharmony_ci * callers responsibility to release it manually if it's not part of the
43562306a36Sopenharmony_ci * pending list any more.
43662306a36Sopenharmony_ci *
43762306a36Sopenharmony_ci */
43862306a36Sopenharmony_civoid drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
43962306a36Sopenharmony_ci{
44062306a36Sopenharmony_ci	struct drm_sched_job *s_job, *tmp;
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	kthread_park(sched->thread);
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	/*
44562306a36Sopenharmony_ci	 * Reinsert back the bad job here - now it's safe as
44662306a36Sopenharmony_ci	 * drm_sched_get_cleanup_job cannot race against us and release the
44762306a36Sopenharmony_ci	 * bad job at this point - we parked (waited for) any in progress
44862306a36Sopenharmony_ci	 * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
44962306a36Sopenharmony_ci	 * now until the scheduler thread is unparked.
45062306a36Sopenharmony_ci	 */
45162306a36Sopenharmony_ci	if (bad && bad->sched == sched)
45262306a36Sopenharmony_ci		/*
45362306a36Sopenharmony_ci		 * Add at the head of the queue to reflect it was the earliest
45462306a36Sopenharmony_ci		 * job extracted.
45562306a36Sopenharmony_ci		 */
45662306a36Sopenharmony_ci		list_add(&bad->list, &sched->pending_list);
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	/*
45962306a36Sopenharmony_ci	 * Iterate the job list from later to  earlier one and either deactive
46062306a36Sopenharmony_ci	 * their HW callbacks or remove them from pending list if they already
46162306a36Sopenharmony_ci	 * signaled.
46262306a36Sopenharmony_ci	 * This iteration is thread safe as sched thread is stopped.
46362306a36Sopenharmony_ci	 */
46462306a36Sopenharmony_ci	list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
46562306a36Sopenharmony_ci					 list) {
46662306a36Sopenharmony_ci		if (s_job->s_fence->parent &&
46762306a36Sopenharmony_ci		    dma_fence_remove_callback(s_job->s_fence->parent,
46862306a36Sopenharmony_ci					      &s_job->cb)) {
46962306a36Sopenharmony_ci			dma_fence_put(s_job->s_fence->parent);
47062306a36Sopenharmony_ci			s_job->s_fence->parent = NULL;
47162306a36Sopenharmony_ci			atomic_dec(&sched->hw_rq_count);
47262306a36Sopenharmony_ci		} else {
47362306a36Sopenharmony_ci			/*
47462306a36Sopenharmony_ci			 * remove job from pending_list.
47562306a36Sopenharmony_ci			 * Locking here is for concurrent resume timeout
47662306a36Sopenharmony_ci			 */
47762306a36Sopenharmony_ci			spin_lock(&sched->job_list_lock);
47862306a36Sopenharmony_ci			list_del_init(&s_job->list);
47962306a36Sopenharmony_ci			spin_unlock(&sched->job_list_lock);
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci			/*
48262306a36Sopenharmony_ci			 * Wait for job's HW fence callback to finish using s_job
48362306a36Sopenharmony_ci			 * before releasing it.
48462306a36Sopenharmony_ci			 *
48562306a36Sopenharmony_ci			 * Job is still alive so fence refcount at least 1
48662306a36Sopenharmony_ci			 */
48762306a36Sopenharmony_ci			dma_fence_wait(&s_job->s_fence->finished, false);
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci			/*
49062306a36Sopenharmony_ci			 * We must keep bad job alive for later use during
49162306a36Sopenharmony_ci			 * recovery by some of the drivers but leave a hint
49262306a36Sopenharmony_ci			 * that the guilty job must be released.
49362306a36Sopenharmony_ci			 */
49462306a36Sopenharmony_ci			if (bad != s_job)
49562306a36Sopenharmony_ci				sched->ops->free_job(s_job);
49662306a36Sopenharmony_ci			else
49762306a36Sopenharmony_ci				sched->free_guilty = true;
49862306a36Sopenharmony_ci		}
49962306a36Sopenharmony_ci	}
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	/*
50262306a36Sopenharmony_ci	 * Stop pending timer in flight as we rearm it in  drm_sched_start. This
50362306a36Sopenharmony_ci	 * avoids the pending timeout work in progress to fire right away after
50462306a36Sopenharmony_ci	 * this TDR finished and before the newly restarted jobs had a
50562306a36Sopenharmony_ci	 * chance to complete.
50662306a36Sopenharmony_ci	 */
50762306a36Sopenharmony_ci	cancel_delayed_work(&sched->work_tdr);
50862306a36Sopenharmony_ci}
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_stop);
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci/**
51362306a36Sopenharmony_ci * drm_sched_start - recover jobs after a reset
51462306a36Sopenharmony_ci *
51562306a36Sopenharmony_ci * @sched: scheduler instance
51662306a36Sopenharmony_ci * @full_recovery: proceed with complete sched restart
51762306a36Sopenharmony_ci *
51862306a36Sopenharmony_ci */
51962306a36Sopenharmony_civoid drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
52062306a36Sopenharmony_ci{
52162306a36Sopenharmony_ci	struct drm_sched_job *s_job, *tmp;
52262306a36Sopenharmony_ci	int r;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	/*
52562306a36Sopenharmony_ci	 * Locking the list is not required here as the sched thread is parked
52662306a36Sopenharmony_ci	 * so no new jobs are being inserted or removed. Also concurrent
52762306a36Sopenharmony_ci	 * GPU recovers can't run in parallel.
52862306a36Sopenharmony_ci	 */
52962306a36Sopenharmony_ci	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
53062306a36Sopenharmony_ci		struct dma_fence *fence = s_job->s_fence->parent;
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci		atomic_inc(&sched->hw_rq_count);
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci		if (!full_recovery)
53562306a36Sopenharmony_ci			continue;
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci		if (fence) {
53862306a36Sopenharmony_ci			r = dma_fence_add_callback(fence, &s_job->cb,
53962306a36Sopenharmony_ci						   drm_sched_job_done_cb);
54062306a36Sopenharmony_ci			if (r == -ENOENT)
54162306a36Sopenharmony_ci				drm_sched_job_done(s_job, fence->error);
54262306a36Sopenharmony_ci			else if (r)
54362306a36Sopenharmony_ci				DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
54462306a36Sopenharmony_ci					  r);
54562306a36Sopenharmony_ci		} else
54662306a36Sopenharmony_ci			drm_sched_job_done(s_job, -ECANCELED);
54762306a36Sopenharmony_ci	}
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	if (full_recovery) {
55062306a36Sopenharmony_ci		spin_lock(&sched->job_list_lock);
55162306a36Sopenharmony_ci		drm_sched_start_timeout(sched);
55262306a36Sopenharmony_ci		spin_unlock(&sched->job_list_lock);
55362306a36Sopenharmony_ci	}
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci	kthread_unpark(sched->thread);
55662306a36Sopenharmony_ci}
55762306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_start);
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci/**
56062306a36Sopenharmony_ci * drm_sched_resubmit_jobs - Deprecated, don't use in new code!
56162306a36Sopenharmony_ci *
56262306a36Sopenharmony_ci * @sched: scheduler instance
56362306a36Sopenharmony_ci *
56462306a36Sopenharmony_ci * Re-submitting jobs was a concept AMD came up as cheap way to implement
56562306a36Sopenharmony_ci * recovery after a job timeout.
56662306a36Sopenharmony_ci *
56762306a36Sopenharmony_ci * This turned out to be not working very well. First of all there are many
56862306a36Sopenharmony_ci * problem with the dma_fence implementation and requirements. Either the
56962306a36Sopenharmony_ci * implementation is risking deadlocks with core memory management or violating
57062306a36Sopenharmony_ci * documented implementation details of the dma_fence object.
57162306a36Sopenharmony_ci *
57262306a36Sopenharmony_ci * Drivers can still save and restore their state for recovery operations, but
57362306a36Sopenharmony_ci * we shouldn't make this a general scheduler feature around the dma_fence
57462306a36Sopenharmony_ci * interface.
57562306a36Sopenharmony_ci */
57662306a36Sopenharmony_civoid drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
57762306a36Sopenharmony_ci{
57862306a36Sopenharmony_ci	struct drm_sched_job *s_job, *tmp;
57962306a36Sopenharmony_ci	uint64_t guilty_context;
58062306a36Sopenharmony_ci	bool found_guilty = false;
58162306a36Sopenharmony_ci	struct dma_fence *fence;
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
58462306a36Sopenharmony_ci		struct drm_sched_fence *s_fence = s_job->s_fence;
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci		if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
58762306a36Sopenharmony_ci			found_guilty = true;
58862306a36Sopenharmony_ci			guilty_context = s_job->s_fence->scheduled.context;
58962306a36Sopenharmony_ci		}
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci		if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
59262306a36Sopenharmony_ci			dma_fence_set_error(&s_fence->finished, -ECANCELED);
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci		fence = sched->ops->run_job(s_job);
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci		if (IS_ERR_OR_NULL(fence)) {
59762306a36Sopenharmony_ci			if (IS_ERR(fence))
59862306a36Sopenharmony_ci				dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci			s_job->s_fence->parent = NULL;
60162306a36Sopenharmony_ci		} else {
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci			s_job->s_fence->parent = dma_fence_get(fence);
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci			/* Drop for orignal kref_init */
60662306a36Sopenharmony_ci			dma_fence_put(fence);
60762306a36Sopenharmony_ci		}
60862306a36Sopenharmony_ci	}
60962306a36Sopenharmony_ci}
61062306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_resubmit_jobs);
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci/**
61362306a36Sopenharmony_ci * drm_sched_job_init - init a scheduler job
61462306a36Sopenharmony_ci * @job: scheduler job to init
61562306a36Sopenharmony_ci * @entity: scheduler entity to use
61662306a36Sopenharmony_ci * @owner: job owner for debugging
61762306a36Sopenharmony_ci *
61862306a36Sopenharmony_ci * Refer to drm_sched_entity_push_job() documentation
61962306a36Sopenharmony_ci * for locking considerations.
62062306a36Sopenharmony_ci *
62162306a36Sopenharmony_ci * Drivers must make sure drm_sched_job_cleanup() if this function returns
62262306a36Sopenharmony_ci * successfully, even when @job is aborted before drm_sched_job_arm() is called.
62362306a36Sopenharmony_ci *
62462306a36Sopenharmony_ci * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
62562306a36Sopenharmony_ci * has died, which can mean that there's no valid runqueue for a @entity.
62662306a36Sopenharmony_ci * This function returns -ENOENT in this case (which probably should be -EIO as
62762306a36Sopenharmony_ci * a more meanigful return value).
62862306a36Sopenharmony_ci *
62962306a36Sopenharmony_ci * Returns 0 for success, negative error code otherwise.
63062306a36Sopenharmony_ci */
63162306a36Sopenharmony_ciint drm_sched_job_init(struct drm_sched_job *job,
63262306a36Sopenharmony_ci		       struct drm_sched_entity *entity,
63362306a36Sopenharmony_ci		       void *owner)
63462306a36Sopenharmony_ci{
63562306a36Sopenharmony_ci	if (!entity->rq)
63662306a36Sopenharmony_ci		return -ENOENT;
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci	job->entity = entity;
63962306a36Sopenharmony_ci	job->s_fence = drm_sched_fence_alloc(entity, owner);
64062306a36Sopenharmony_ci	if (!job->s_fence)
64162306a36Sopenharmony_ci		return -ENOMEM;
64262306a36Sopenharmony_ci
64362306a36Sopenharmony_ci	INIT_LIST_HEAD(&job->list);
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC);
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci	return 0;
64862306a36Sopenharmony_ci}
64962306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_job_init);
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci/**
65262306a36Sopenharmony_ci * drm_sched_job_arm - arm a scheduler job for execution
65362306a36Sopenharmony_ci * @job: scheduler job to arm
65462306a36Sopenharmony_ci *
65562306a36Sopenharmony_ci * This arms a scheduler job for execution. Specifically it initializes the
65662306a36Sopenharmony_ci * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv
65762306a36Sopenharmony_ci * or other places that need to track the completion of this job.
65862306a36Sopenharmony_ci *
65962306a36Sopenharmony_ci * Refer to drm_sched_entity_push_job() documentation for locking
66062306a36Sopenharmony_ci * considerations.
66162306a36Sopenharmony_ci *
66262306a36Sopenharmony_ci * This can only be called if drm_sched_job_init() succeeded.
66362306a36Sopenharmony_ci */
66462306a36Sopenharmony_civoid drm_sched_job_arm(struct drm_sched_job *job)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	struct drm_gpu_scheduler *sched;
66762306a36Sopenharmony_ci	struct drm_sched_entity *entity = job->entity;
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	BUG_ON(!entity);
67062306a36Sopenharmony_ci	drm_sched_entity_select_rq(entity);
67162306a36Sopenharmony_ci	sched = entity->rq->sched;
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci	job->sched = sched;
67462306a36Sopenharmony_ci	job->s_priority = entity->rq - sched->sched_rq;
67562306a36Sopenharmony_ci	job->id = atomic64_inc_return(&sched->job_id_count);
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci	drm_sched_fence_init(job->s_fence, job->entity);
67862306a36Sopenharmony_ci}
67962306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_job_arm);
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci/**
68262306a36Sopenharmony_ci * drm_sched_job_add_dependency - adds the fence as a job dependency
68362306a36Sopenharmony_ci * @job: scheduler job to add the dependencies to
68462306a36Sopenharmony_ci * @fence: the dma_fence to add to the list of dependencies.
68562306a36Sopenharmony_ci *
68662306a36Sopenharmony_ci * Note that @fence is consumed in both the success and error cases.
68762306a36Sopenharmony_ci *
68862306a36Sopenharmony_ci * Returns:
68962306a36Sopenharmony_ci * 0 on success, or an error on failing to expand the array.
69062306a36Sopenharmony_ci */
69162306a36Sopenharmony_ciint drm_sched_job_add_dependency(struct drm_sched_job *job,
69262306a36Sopenharmony_ci				 struct dma_fence *fence)
69362306a36Sopenharmony_ci{
69462306a36Sopenharmony_ci	struct dma_fence *entry;
69562306a36Sopenharmony_ci	unsigned long index;
69662306a36Sopenharmony_ci	u32 id = 0;
69762306a36Sopenharmony_ci	int ret;
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	if (!fence)
70062306a36Sopenharmony_ci		return 0;
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci	/* Deduplicate if we already depend on a fence from the same context.
70362306a36Sopenharmony_ci	 * This lets the size of the array of deps scale with the number of
70462306a36Sopenharmony_ci	 * engines involved, rather than the number of BOs.
70562306a36Sopenharmony_ci	 */
70662306a36Sopenharmony_ci	xa_for_each(&job->dependencies, index, entry) {
70762306a36Sopenharmony_ci		if (entry->context != fence->context)
70862306a36Sopenharmony_ci			continue;
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci		if (dma_fence_is_later(fence, entry)) {
71162306a36Sopenharmony_ci			dma_fence_put(entry);
71262306a36Sopenharmony_ci			xa_store(&job->dependencies, index, fence, GFP_KERNEL);
71362306a36Sopenharmony_ci		} else {
71462306a36Sopenharmony_ci			dma_fence_put(fence);
71562306a36Sopenharmony_ci		}
71662306a36Sopenharmony_ci		return 0;
71762306a36Sopenharmony_ci	}
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci	ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL);
72062306a36Sopenharmony_ci	if (ret != 0)
72162306a36Sopenharmony_ci		dma_fence_put(fence);
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	return ret;
72462306a36Sopenharmony_ci}
72562306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_job_add_dependency);
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci/**
72862306a36Sopenharmony_ci * drm_sched_job_add_syncobj_dependency - adds a syncobj's fence as a job dependency
72962306a36Sopenharmony_ci * @job: scheduler job to add the dependencies to
73062306a36Sopenharmony_ci * @file: drm file private pointer
73162306a36Sopenharmony_ci * @handle: syncobj handle to lookup
73262306a36Sopenharmony_ci * @point: timeline point
73362306a36Sopenharmony_ci *
73462306a36Sopenharmony_ci * This adds the fence matching the given syncobj to @job.
73562306a36Sopenharmony_ci *
73662306a36Sopenharmony_ci * Returns:
73762306a36Sopenharmony_ci * 0 on success, or an error on failing to expand the array.
73862306a36Sopenharmony_ci */
73962306a36Sopenharmony_ciint drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
74062306a36Sopenharmony_ci					 struct drm_file *file,
74162306a36Sopenharmony_ci					 u32 handle,
74262306a36Sopenharmony_ci					 u32 point)
74362306a36Sopenharmony_ci{
74462306a36Sopenharmony_ci	struct dma_fence *fence;
74562306a36Sopenharmony_ci	int ret;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	ret = drm_syncobj_find_fence(file, handle, point, 0, &fence);
74862306a36Sopenharmony_ci	if (ret)
74962306a36Sopenharmony_ci		return ret;
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	return drm_sched_job_add_dependency(job, fence);
75262306a36Sopenharmony_ci}
75362306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_job_add_syncobj_dependency);
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci/**
75662306a36Sopenharmony_ci * drm_sched_job_add_resv_dependencies - add all fences from the resv to the job
75762306a36Sopenharmony_ci * @job: scheduler job to add the dependencies to
75862306a36Sopenharmony_ci * @resv: the dma_resv object to get the fences from
75962306a36Sopenharmony_ci * @usage: the dma_resv_usage to use to filter the fences
76062306a36Sopenharmony_ci *
76162306a36Sopenharmony_ci * This adds all fences matching the given usage from @resv to @job.
76262306a36Sopenharmony_ci * Must be called with the @resv lock held.
76362306a36Sopenharmony_ci *
76462306a36Sopenharmony_ci * Returns:
76562306a36Sopenharmony_ci * 0 on success, or an error on failing to expand the array.
76662306a36Sopenharmony_ci */
76762306a36Sopenharmony_ciint drm_sched_job_add_resv_dependencies(struct drm_sched_job *job,
76862306a36Sopenharmony_ci					struct dma_resv *resv,
76962306a36Sopenharmony_ci					enum dma_resv_usage usage)
77062306a36Sopenharmony_ci{
77162306a36Sopenharmony_ci	struct dma_resv_iter cursor;
77262306a36Sopenharmony_ci	struct dma_fence *fence;
77362306a36Sopenharmony_ci	int ret;
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	dma_resv_assert_held(resv);
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci	dma_resv_for_each_fence(&cursor, resv, usage, fence) {
77862306a36Sopenharmony_ci		/* Make sure to grab an additional ref on the added fence */
77962306a36Sopenharmony_ci		dma_fence_get(fence);
78062306a36Sopenharmony_ci		ret = drm_sched_job_add_dependency(job, fence);
78162306a36Sopenharmony_ci		if (ret) {
78262306a36Sopenharmony_ci			dma_fence_put(fence);
78362306a36Sopenharmony_ci			return ret;
78462306a36Sopenharmony_ci		}
78562306a36Sopenharmony_ci	}
78662306a36Sopenharmony_ci	return 0;
78762306a36Sopenharmony_ci}
78862306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_job_add_resv_dependencies);
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci/**
79162306a36Sopenharmony_ci * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
79262306a36Sopenharmony_ci *   dependencies
79362306a36Sopenharmony_ci * @job: scheduler job to add the dependencies to
79462306a36Sopenharmony_ci * @obj: the gem object to add new dependencies from.
79562306a36Sopenharmony_ci * @write: whether the job might write the object (so we need to depend on
79662306a36Sopenharmony_ci * shared fences in the reservation object).
79762306a36Sopenharmony_ci *
79862306a36Sopenharmony_ci * This should be called after drm_gem_lock_reservations() on your array of
79962306a36Sopenharmony_ci * GEM objects used in the job but before updating the reservations with your
80062306a36Sopenharmony_ci * own fences.
80162306a36Sopenharmony_ci *
80262306a36Sopenharmony_ci * Returns:
80362306a36Sopenharmony_ci * 0 on success, or an error on failing to expand the array.
80462306a36Sopenharmony_ci */
80562306a36Sopenharmony_ciint drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
80662306a36Sopenharmony_ci					    struct drm_gem_object *obj,
80762306a36Sopenharmony_ci					    bool write)
80862306a36Sopenharmony_ci{
80962306a36Sopenharmony_ci	return drm_sched_job_add_resv_dependencies(job, obj->resv,
81062306a36Sopenharmony_ci						   dma_resv_usage_rw(write));
81162306a36Sopenharmony_ci}
81262306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci/**
81562306a36Sopenharmony_ci * drm_sched_job_cleanup - clean up scheduler job resources
81662306a36Sopenharmony_ci * @job: scheduler job to clean up
81762306a36Sopenharmony_ci *
81862306a36Sopenharmony_ci * Cleans up the resources allocated with drm_sched_job_init().
81962306a36Sopenharmony_ci *
82062306a36Sopenharmony_ci * Drivers should call this from their error unwind code if @job is aborted
82162306a36Sopenharmony_ci * before drm_sched_job_arm() is called.
82262306a36Sopenharmony_ci *
82362306a36Sopenharmony_ci * After that point of no return @job is committed to be executed by the
82462306a36Sopenharmony_ci * scheduler, and this function should be called from the
82562306a36Sopenharmony_ci * &drm_sched_backend_ops.free_job callback.
82662306a36Sopenharmony_ci */
82762306a36Sopenharmony_civoid drm_sched_job_cleanup(struct drm_sched_job *job)
82862306a36Sopenharmony_ci{
82962306a36Sopenharmony_ci	struct dma_fence *fence;
83062306a36Sopenharmony_ci	unsigned long index;
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci	if (kref_read(&job->s_fence->finished.refcount)) {
83362306a36Sopenharmony_ci		/* drm_sched_job_arm() has been called */
83462306a36Sopenharmony_ci		dma_fence_put(&job->s_fence->finished);
83562306a36Sopenharmony_ci	} else {
83662306a36Sopenharmony_ci		/* aborted job before committing to run it */
83762306a36Sopenharmony_ci		drm_sched_fence_free(job->s_fence);
83862306a36Sopenharmony_ci	}
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci	job->s_fence = NULL;
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci	xa_for_each(&job->dependencies, index, fence) {
84362306a36Sopenharmony_ci		dma_fence_put(fence);
84462306a36Sopenharmony_ci	}
84562306a36Sopenharmony_ci	xa_destroy(&job->dependencies);
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci}
84862306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_job_cleanup);
84962306a36Sopenharmony_ci
85062306a36Sopenharmony_ci/**
85162306a36Sopenharmony_ci * drm_sched_can_queue -- Can we queue more to the hardware?
85262306a36Sopenharmony_ci * @sched: scheduler instance
85362306a36Sopenharmony_ci *
85462306a36Sopenharmony_ci * Return true if we can push more jobs to the hw, otherwise false.
85562306a36Sopenharmony_ci */
85662306a36Sopenharmony_cistatic bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
85762306a36Sopenharmony_ci{
85862306a36Sopenharmony_ci	return atomic_read(&sched->hw_rq_count) <
85962306a36Sopenharmony_ci		sched->hw_submission_limit;
86062306a36Sopenharmony_ci}
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci/**
86362306a36Sopenharmony_ci * drm_sched_wakeup_if_can_queue - Wake up the scheduler
86462306a36Sopenharmony_ci * @sched: scheduler instance
86562306a36Sopenharmony_ci *
86662306a36Sopenharmony_ci * Wake up the scheduler if we can queue jobs.
86762306a36Sopenharmony_ci */
86862306a36Sopenharmony_civoid drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
86962306a36Sopenharmony_ci{
87062306a36Sopenharmony_ci	if (drm_sched_can_queue(sched))
87162306a36Sopenharmony_ci		wake_up_interruptible(&sched->wake_up_worker);
87262306a36Sopenharmony_ci}
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci/**
87562306a36Sopenharmony_ci * drm_sched_select_entity - Select next entity to process
87662306a36Sopenharmony_ci *
87762306a36Sopenharmony_ci * @sched: scheduler instance
87862306a36Sopenharmony_ci *
87962306a36Sopenharmony_ci * Returns the entity to process or NULL if none are found.
88062306a36Sopenharmony_ci */
88162306a36Sopenharmony_cistatic struct drm_sched_entity *
88262306a36Sopenharmony_cidrm_sched_select_entity(struct drm_gpu_scheduler *sched)
88362306a36Sopenharmony_ci{
88462306a36Sopenharmony_ci	struct drm_sched_entity *entity;
88562306a36Sopenharmony_ci	int i;
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci	if (!drm_sched_can_queue(sched))
88862306a36Sopenharmony_ci		return NULL;
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	/* Kernel run queue has higher priority than normal run queue*/
89162306a36Sopenharmony_ci	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
89262306a36Sopenharmony_ci		entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
89362306a36Sopenharmony_ci			drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) :
89462306a36Sopenharmony_ci			drm_sched_rq_select_entity_rr(&sched->sched_rq[i]);
89562306a36Sopenharmony_ci		if (entity)
89662306a36Sopenharmony_ci			break;
89762306a36Sopenharmony_ci	}
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	return entity;
90062306a36Sopenharmony_ci}
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci/**
90362306a36Sopenharmony_ci * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
90462306a36Sopenharmony_ci *
90562306a36Sopenharmony_ci * @sched: scheduler instance
90662306a36Sopenharmony_ci *
90762306a36Sopenharmony_ci * Returns the next finished job from the pending list (if there is one)
90862306a36Sopenharmony_ci * ready for it to be destroyed.
90962306a36Sopenharmony_ci */
91062306a36Sopenharmony_cistatic struct drm_sched_job *
91162306a36Sopenharmony_cidrm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
91262306a36Sopenharmony_ci{
91362306a36Sopenharmony_ci	struct drm_sched_job *job, *next;
91462306a36Sopenharmony_ci
91562306a36Sopenharmony_ci	spin_lock(&sched->job_list_lock);
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	job = list_first_entry_or_null(&sched->pending_list,
91862306a36Sopenharmony_ci				       struct drm_sched_job, list);
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
92162306a36Sopenharmony_ci		/* remove job from pending_list */
92262306a36Sopenharmony_ci		list_del_init(&job->list);
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci		/* cancel this job's TO timer */
92562306a36Sopenharmony_ci		cancel_delayed_work(&sched->work_tdr);
92662306a36Sopenharmony_ci		/* make the scheduled timestamp more accurate */
92762306a36Sopenharmony_ci		next = list_first_entry_or_null(&sched->pending_list,
92862306a36Sopenharmony_ci						typeof(*next), list);
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci		if (next) {
93162306a36Sopenharmony_ci			next->s_fence->scheduled.timestamp =
93262306a36Sopenharmony_ci				dma_fence_timestamp(&job->s_fence->finished);
93362306a36Sopenharmony_ci			/* start TO timer for next job */
93462306a36Sopenharmony_ci			drm_sched_start_timeout(sched);
93562306a36Sopenharmony_ci		}
93662306a36Sopenharmony_ci	} else {
93762306a36Sopenharmony_ci		job = NULL;
93862306a36Sopenharmony_ci	}
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci	spin_unlock(&sched->job_list_lock);
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci	return job;
94362306a36Sopenharmony_ci}
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci/**
94662306a36Sopenharmony_ci * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
94762306a36Sopenharmony_ci * @sched_list: list of drm_gpu_schedulers
94862306a36Sopenharmony_ci * @num_sched_list: number of drm_gpu_schedulers in the sched_list
94962306a36Sopenharmony_ci *
95062306a36Sopenharmony_ci * Returns pointer of the sched with the least load or NULL if none of the
95162306a36Sopenharmony_ci * drm_gpu_schedulers are ready
95262306a36Sopenharmony_ci */
95362306a36Sopenharmony_cistruct drm_gpu_scheduler *
95462306a36Sopenharmony_cidrm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
95562306a36Sopenharmony_ci		     unsigned int num_sched_list)
95662306a36Sopenharmony_ci{
95762306a36Sopenharmony_ci	struct drm_gpu_scheduler *sched, *picked_sched = NULL;
95862306a36Sopenharmony_ci	int i;
95962306a36Sopenharmony_ci	unsigned int min_score = UINT_MAX, num_score;
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	for (i = 0; i < num_sched_list; ++i) {
96262306a36Sopenharmony_ci		sched = sched_list[i];
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci		if (!sched->ready) {
96562306a36Sopenharmony_ci			DRM_WARN("scheduler %s is not ready, skipping",
96662306a36Sopenharmony_ci				 sched->name);
96762306a36Sopenharmony_ci			continue;
96862306a36Sopenharmony_ci		}
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci		num_score = atomic_read(sched->score);
97162306a36Sopenharmony_ci		if (num_score < min_score) {
97262306a36Sopenharmony_ci			min_score = num_score;
97362306a36Sopenharmony_ci			picked_sched = sched;
97462306a36Sopenharmony_ci		}
97562306a36Sopenharmony_ci	}
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_ci	return picked_sched;
97862306a36Sopenharmony_ci}
97962306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_pick_best);
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci/**
98262306a36Sopenharmony_ci * drm_sched_blocked - check if the scheduler is blocked
98362306a36Sopenharmony_ci *
98462306a36Sopenharmony_ci * @sched: scheduler instance
98562306a36Sopenharmony_ci *
98662306a36Sopenharmony_ci * Returns true if blocked, otherwise false.
98762306a36Sopenharmony_ci */
98862306a36Sopenharmony_cistatic bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
98962306a36Sopenharmony_ci{
99062306a36Sopenharmony_ci	if (kthread_should_park()) {
99162306a36Sopenharmony_ci		kthread_parkme();
99262306a36Sopenharmony_ci		return true;
99362306a36Sopenharmony_ci	}
99462306a36Sopenharmony_ci
99562306a36Sopenharmony_ci	return false;
99662306a36Sopenharmony_ci}
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci/**
99962306a36Sopenharmony_ci * drm_sched_main - main scheduler thread
100062306a36Sopenharmony_ci *
100162306a36Sopenharmony_ci * @param: scheduler instance
100262306a36Sopenharmony_ci *
100362306a36Sopenharmony_ci * Returns 0.
100462306a36Sopenharmony_ci */
100562306a36Sopenharmony_cistatic int drm_sched_main(void *param)
100662306a36Sopenharmony_ci{
100762306a36Sopenharmony_ci	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
100862306a36Sopenharmony_ci	int r;
100962306a36Sopenharmony_ci
101062306a36Sopenharmony_ci	sched_set_fifo_low(current);
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_ci	while (!kthread_should_stop()) {
101362306a36Sopenharmony_ci		struct drm_sched_entity *entity = NULL;
101462306a36Sopenharmony_ci		struct drm_sched_fence *s_fence;
101562306a36Sopenharmony_ci		struct drm_sched_job *sched_job;
101662306a36Sopenharmony_ci		struct dma_fence *fence;
101762306a36Sopenharmony_ci		struct drm_sched_job *cleanup_job = NULL;
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci		wait_event_interruptible(sched->wake_up_worker,
102062306a36Sopenharmony_ci					 (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
102162306a36Sopenharmony_ci					 (!drm_sched_blocked(sched) &&
102262306a36Sopenharmony_ci					  (entity = drm_sched_select_entity(sched))) ||
102362306a36Sopenharmony_ci					 kthread_should_stop());
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci		if (cleanup_job)
102662306a36Sopenharmony_ci			sched->ops->free_job(cleanup_job);
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ci		if (!entity)
102962306a36Sopenharmony_ci			continue;
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_ci		sched_job = drm_sched_entity_pop_job(entity);
103262306a36Sopenharmony_ci
103362306a36Sopenharmony_ci		if (!sched_job) {
103462306a36Sopenharmony_ci			complete_all(&entity->entity_idle);
103562306a36Sopenharmony_ci			continue;
103662306a36Sopenharmony_ci		}
103762306a36Sopenharmony_ci
103862306a36Sopenharmony_ci		s_fence = sched_job->s_fence;
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_ci		atomic_inc(&sched->hw_rq_count);
104162306a36Sopenharmony_ci		drm_sched_job_begin(sched_job);
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci		trace_drm_run_job(sched_job, entity);
104462306a36Sopenharmony_ci		fence = sched->ops->run_job(sched_job);
104562306a36Sopenharmony_ci		complete_all(&entity->entity_idle);
104662306a36Sopenharmony_ci		drm_sched_fence_scheduled(s_fence, fence);
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci		if (!IS_ERR_OR_NULL(fence)) {
104962306a36Sopenharmony_ci			/* Drop for original kref_init of the fence */
105062306a36Sopenharmony_ci			dma_fence_put(fence);
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci			r = dma_fence_add_callback(fence, &sched_job->cb,
105362306a36Sopenharmony_ci						   drm_sched_job_done_cb);
105462306a36Sopenharmony_ci			if (r == -ENOENT)
105562306a36Sopenharmony_ci				drm_sched_job_done(sched_job, fence->error);
105662306a36Sopenharmony_ci			else if (r)
105762306a36Sopenharmony_ci				DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
105862306a36Sopenharmony_ci					  r);
105962306a36Sopenharmony_ci		} else {
106062306a36Sopenharmony_ci			drm_sched_job_done(sched_job, IS_ERR(fence) ?
106162306a36Sopenharmony_ci					   PTR_ERR(fence) : 0);
106262306a36Sopenharmony_ci		}
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci		wake_up(&sched->job_scheduled);
106562306a36Sopenharmony_ci	}
106662306a36Sopenharmony_ci	return 0;
106762306a36Sopenharmony_ci}
106862306a36Sopenharmony_ci
106962306a36Sopenharmony_ci/**
107062306a36Sopenharmony_ci * drm_sched_init - Init a gpu scheduler instance
107162306a36Sopenharmony_ci *
107262306a36Sopenharmony_ci * @sched: scheduler instance
107362306a36Sopenharmony_ci * @ops: backend operations for this scheduler
107462306a36Sopenharmony_ci * @hw_submission: number of hw submissions that can be in flight
107562306a36Sopenharmony_ci * @hang_limit: number of times to allow a job to hang before dropping it
107662306a36Sopenharmony_ci * @timeout: timeout value in jiffies for the scheduler
107762306a36Sopenharmony_ci * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
107862306a36Sopenharmony_ci *		used
107962306a36Sopenharmony_ci * @score: optional score atomic shared with other schedulers
108062306a36Sopenharmony_ci * @name: name used for debugging
108162306a36Sopenharmony_ci * @dev: target &struct device
108262306a36Sopenharmony_ci *
108362306a36Sopenharmony_ci * Return 0 on success, otherwise error code.
108462306a36Sopenharmony_ci */
108562306a36Sopenharmony_ciint drm_sched_init(struct drm_gpu_scheduler *sched,
108662306a36Sopenharmony_ci		   const struct drm_sched_backend_ops *ops,
108762306a36Sopenharmony_ci		   unsigned hw_submission, unsigned hang_limit,
108862306a36Sopenharmony_ci		   long timeout, struct workqueue_struct *timeout_wq,
108962306a36Sopenharmony_ci		   atomic_t *score, const char *name, struct device *dev)
109062306a36Sopenharmony_ci{
109162306a36Sopenharmony_ci	int i, ret;
109262306a36Sopenharmony_ci	sched->ops = ops;
109362306a36Sopenharmony_ci	sched->hw_submission_limit = hw_submission;
109462306a36Sopenharmony_ci	sched->name = name;
109562306a36Sopenharmony_ci	sched->timeout = timeout;
109662306a36Sopenharmony_ci	sched->timeout_wq = timeout_wq ? : system_wq;
109762306a36Sopenharmony_ci	sched->hang_limit = hang_limit;
109862306a36Sopenharmony_ci	sched->score = score ? score : &sched->_score;
109962306a36Sopenharmony_ci	sched->dev = dev;
110062306a36Sopenharmony_ci	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
110162306a36Sopenharmony_ci		drm_sched_rq_init(sched, &sched->sched_rq[i]);
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	init_waitqueue_head(&sched->wake_up_worker);
110462306a36Sopenharmony_ci	init_waitqueue_head(&sched->job_scheduled);
110562306a36Sopenharmony_ci	INIT_LIST_HEAD(&sched->pending_list);
110662306a36Sopenharmony_ci	spin_lock_init(&sched->job_list_lock);
110762306a36Sopenharmony_ci	atomic_set(&sched->hw_rq_count, 0);
110862306a36Sopenharmony_ci	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
110962306a36Sopenharmony_ci	atomic_set(&sched->_score, 0);
111062306a36Sopenharmony_ci	atomic64_set(&sched->job_id_count, 0);
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_ci	/* Each scheduler will run on a seperate kernel thread */
111362306a36Sopenharmony_ci	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
111462306a36Sopenharmony_ci	if (IS_ERR(sched->thread)) {
111562306a36Sopenharmony_ci		ret = PTR_ERR(sched->thread);
111662306a36Sopenharmony_ci		sched->thread = NULL;
111762306a36Sopenharmony_ci		DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
111862306a36Sopenharmony_ci		return ret;
111962306a36Sopenharmony_ci	}
112062306a36Sopenharmony_ci
112162306a36Sopenharmony_ci	sched->ready = true;
112262306a36Sopenharmony_ci	return 0;
112362306a36Sopenharmony_ci}
112462306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_init);
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci/**
112762306a36Sopenharmony_ci * drm_sched_fini - Destroy a gpu scheduler
112862306a36Sopenharmony_ci *
112962306a36Sopenharmony_ci * @sched: scheduler instance
113062306a36Sopenharmony_ci *
113162306a36Sopenharmony_ci * Tears down and cleans up the scheduler.
113262306a36Sopenharmony_ci */
113362306a36Sopenharmony_civoid drm_sched_fini(struct drm_gpu_scheduler *sched)
113462306a36Sopenharmony_ci{
113562306a36Sopenharmony_ci	struct drm_sched_entity *s_entity;
113662306a36Sopenharmony_ci	int i;
113762306a36Sopenharmony_ci
113862306a36Sopenharmony_ci	if (sched->thread)
113962306a36Sopenharmony_ci		kthread_stop(sched->thread);
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
114262306a36Sopenharmony_ci		struct drm_sched_rq *rq = &sched->sched_rq[i];
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci		spin_lock(&rq->lock);
114562306a36Sopenharmony_ci		list_for_each_entry(s_entity, &rq->entities, list)
114662306a36Sopenharmony_ci			/*
114762306a36Sopenharmony_ci			 * Prevents reinsertion and marks job_queue as idle,
114862306a36Sopenharmony_ci			 * it will removed from rq in drm_sched_entity_fini
114962306a36Sopenharmony_ci			 * eventually
115062306a36Sopenharmony_ci			 */
115162306a36Sopenharmony_ci			s_entity->stopped = true;
115262306a36Sopenharmony_ci		spin_unlock(&rq->lock);
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_ci	}
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci	/* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
115762306a36Sopenharmony_ci	wake_up_all(&sched->job_scheduled);
115862306a36Sopenharmony_ci
115962306a36Sopenharmony_ci	/* Confirm no work left behind accessing device structures */
116062306a36Sopenharmony_ci	cancel_delayed_work_sync(&sched->work_tdr);
116162306a36Sopenharmony_ci
116262306a36Sopenharmony_ci	sched->ready = false;
116362306a36Sopenharmony_ci}
116462306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_fini);
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci/**
116762306a36Sopenharmony_ci * drm_sched_increase_karma - Update sched_entity guilty flag
116862306a36Sopenharmony_ci *
116962306a36Sopenharmony_ci * @bad: The job guilty of time out
117062306a36Sopenharmony_ci *
117162306a36Sopenharmony_ci * Increment on every hang caused by the 'bad' job. If this exceeds the hang
117262306a36Sopenharmony_ci * limit of the scheduler then the respective sched entity is marked guilty and
117362306a36Sopenharmony_ci * jobs from it will not be scheduled further
117462306a36Sopenharmony_ci */
117562306a36Sopenharmony_civoid drm_sched_increase_karma(struct drm_sched_job *bad)
117662306a36Sopenharmony_ci{
117762306a36Sopenharmony_ci	int i;
117862306a36Sopenharmony_ci	struct drm_sched_entity *tmp;
117962306a36Sopenharmony_ci	struct drm_sched_entity *entity;
118062306a36Sopenharmony_ci	struct drm_gpu_scheduler *sched = bad->sched;
118162306a36Sopenharmony_ci
118262306a36Sopenharmony_ci	/* don't change @bad's karma if it's from KERNEL RQ,
118362306a36Sopenharmony_ci	 * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
118462306a36Sopenharmony_ci	 * corrupt but keep in mind that kernel jobs always considered good.
118562306a36Sopenharmony_ci	 */
118662306a36Sopenharmony_ci	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
118762306a36Sopenharmony_ci		atomic_inc(&bad->karma);
118862306a36Sopenharmony_ci
118962306a36Sopenharmony_ci		for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
119062306a36Sopenharmony_ci		     i++) {
119162306a36Sopenharmony_ci			struct drm_sched_rq *rq = &sched->sched_rq[i];
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_ci			spin_lock(&rq->lock);
119462306a36Sopenharmony_ci			list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
119562306a36Sopenharmony_ci				if (bad->s_fence->scheduled.context ==
119662306a36Sopenharmony_ci				    entity->fence_context) {
119762306a36Sopenharmony_ci					if (entity->guilty)
119862306a36Sopenharmony_ci						atomic_set(entity->guilty, 1);
119962306a36Sopenharmony_ci					break;
120062306a36Sopenharmony_ci				}
120162306a36Sopenharmony_ci			}
120262306a36Sopenharmony_ci			spin_unlock(&rq->lock);
120362306a36Sopenharmony_ci			if (&entity->list != &rq->entities)
120462306a36Sopenharmony_ci				break;
120562306a36Sopenharmony_ci		}
120662306a36Sopenharmony_ci	}
120762306a36Sopenharmony_ci}
120862306a36Sopenharmony_ciEXPORT_SYMBOL(drm_sched_increase_karma);
1209