162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci/*
462306a36Sopenharmony_ci * Copyright 2016-2019 HabanaLabs, Ltd.
562306a36Sopenharmony_ci * All Rights Reserved.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include "habanalabs.h"
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/slab.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci/*
1362306a36Sopenharmony_ci * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
1462306a36Sopenharmony_ci *
1562306a36Sopenharmony_ci * @ptr: the current pi/ci value
1662306a36Sopenharmony_ci * @val: the amount to add
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * Add val to ptr. It can go until twice the queue length.
1962306a36Sopenharmony_ci */
2062306a36Sopenharmony_ciinline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
2162306a36Sopenharmony_ci{
2262306a36Sopenharmony_ci	ptr += val;
2362306a36Sopenharmony_ci	ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
2462306a36Sopenharmony_ci	return ptr;
2562306a36Sopenharmony_ci}
2662306a36Sopenharmony_cistatic inline int queue_ci_get(atomic_t *ci, u32 queue_len)
2762306a36Sopenharmony_ci{
2862306a36Sopenharmony_ci	return atomic_read(ci) & ((queue_len << 1) - 1);
2962306a36Sopenharmony_ci}
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
3262306a36Sopenharmony_ci{
3362306a36Sopenharmony_ci	int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	if (delta >= 0)
3662306a36Sopenharmony_ci		return (queue_len - delta);
3762306a36Sopenharmony_ci	else
3862306a36Sopenharmony_ci		return (abs(delta) - queue_len);
3962306a36Sopenharmony_ci}
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_civoid hl_hw_queue_update_ci(struct hl_cs *cs)
4262306a36Sopenharmony_ci{
4362306a36Sopenharmony_ci	struct hl_device *hdev = cs->ctx->hdev;
4462306a36Sopenharmony_ci	struct hl_hw_queue *q;
4562306a36Sopenharmony_ci	int i;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	if (hdev->disabled)
4862306a36Sopenharmony_ci		return;
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	q = &hdev->kernel_queues[0];
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	/* There are no internal queues if H/W queues are being used */
5362306a36Sopenharmony_ci	if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
5462306a36Sopenharmony_ci		return;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	/* We must increment CI for every queue that will never get a
5762306a36Sopenharmony_ci	 * completion, there are 2 scenarios this can happen:
5862306a36Sopenharmony_ci	 * 1. All queues of a non completion CS will never get a completion.
5962306a36Sopenharmony_ci	 * 2. Internal queues never gets completion.
6062306a36Sopenharmony_ci	 */
6162306a36Sopenharmony_ci	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
6262306a36Sopenharmony_ci		if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT)
6362306a36Sopenharmony_ci			atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
6462306a36Sopenharmony_ci	}
6562306a36Sopenharmony_ci}
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci/*
6862306a36Sopenharmony_ci * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
6962306a36Sopenharmony_ci *                                H/W queue.
7062306a36Sopenharmony_ci * @hdev: pointer to habanalabs device structure
7162306a36Sopenharmony_ci * @q: pointer to habanalabs queue structure
7262306a36Sopenharmony_ci * @ctl: BD's control word
7362306a36Sopenharmony_ci * @len: BD's length
7462306a36Sopenharmony_ci * @ptr: BD's pointer
7562306a36Sopenharmony_ci *
7662306a36Sopenharmony_ci * This function assumes there is enough space on the queue to submit a new
7762306a36Sopenharmony_ci * BD to it. It initializes the next BD and calls the device specific
7862306a36Sopenharmony_ci * function to set the pi (and doorbell)
7962306a36Sopenharmony_ci *
8062306a36Sopenharmony_ci * This function must be called when the scheduler mutex is taken
8162306a36Sopenharmony_ci *
8262306a36Sopenharmony_ci */
8362306a36Sopenharmony_civoid hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
8462306a36Sopenharmony_ci		u32 ctl, u32 len, u64 ptr)
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	struct hl_bd *bd;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	bd = q->kernel_address;
8962306a36Sopenharmony_ci	bd += hl_pi_2_offset(q->pi);
9062306a36Sopenharmony_ci	bd->ctl = cpu_to_le32(ctl);
9162306a36Sopenharmony_ci	bd->len = cpu_to_le32(len);
9262306a36Sopenharmony_ci	bd->ptr = cpu_to_le64(ptr);
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	q->pi = hl_queue_inc_ptr(q->pi);
9562306a36Sopenharmony_ci	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
9662306a36Sopenharmony_ci}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci/*
9962306a36Sopenharmony_ci * ext_queue_sanity_checks - perform some sanity checks on external queue
10062306a36Sopenharmony_ci *
10162306a36Sopenharmony_ci * @hdev              : pointer to hl_device structure
10262306a36Sopenharmony_ci * @q                 :	pointer to hl_hw_queue structure
10362306a36Sopenharmony_ci * @num_of_entries    : how many entries to check for space
10462306a36Sopenharmony_ci * @reserve_cq_entry  :	whether to reserve an entry in the cq
10562306a36Sopenharmony_ci *
10662306a36Sopenharmony_ci * H/W queues spinlock should be taken before calling this function
10762306a36Sopenharmony_ci *
10862306a36Sopenharmony_ci * Perform the following:
10962306a36Sopenharmony_ci * - Make sure we have enough space in the h/w queue
11062306a36Sopenharmony_ci * - Make sure we have enough space in the completion queue
11162306a36Sopenharmony_ci * - Reserve space in the completion queue (needs to be reversed if there
11262306a36Sopenharmony_ci *   is a failure down the road before the actual submission of work). Only
11362306a36Sopenharmony_ci *   do this action if reserve_cq_entry is true
11462306a36Sopenharmony_ci *
11562306a36Sopenharmony_ci */
11662306a36Sopenharmony_cistatic int ext_queue_sanity_checks(struct hl_device *hdev,
11762306a36Sopenharmony_ci				struct hl_hw_queue *q, int num_of_entries,
11862306a36Sopenharmony_ci				bool reserve_cq_entry)
11962306a36Sopenharmony_ci{
12062306a36Sopenharmony_ci	atomic_t *free_slots =
12162306a36Sopenharmony_ci			&hdev->completion_queue[q->cq_id].free_slots_cnt;
12262306a36Sopenharmony_ci	int free_slots_cnt;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	/* Check we have enough space in the queue */
12562306a36Sopenharmony_ci	free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (free_slots_cnt < num_of_entries) {
12862306a36Sopenharmony_ci		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
12962306a36Sopenharmony_ci			q->hw_queue_id, num_of_entries);
13062306a36Sopenharmony_ci		return -EAGAIN;
13162306a36Sopenharmony_ci	}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	if (reserve_cq_entry) {
13462306a36Sopenharmony_ci		/*
13562306a36Sopenharmony_ci		 * Check we have enough space in the completion queue
13662306a36Sopenharmony_ci		 * Add -1 to counter (decrement) unless counter was already 0
13762306a36Sopenharmony_ci		 * In that case, CQ is full so we can't submit a new CB because
13862306a36Sopenharmony_ci		 * we won't get ack on its completion
13962306a36Sopenharmony_ci		 * atomic_add_unless will return 0 if counter was already 0
14062306a36Sopenharmony_ci		 */
14162306a36Sopenharmony_ci		if (atomic_add_negative(num_of_entries * -1, free_slots)) {
14262306a36Sopenharmony_ci			dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
14362306a36Sopenharmony_ci				num_of_entries, q->hw_queue_id);
14462306a36Sopenharmony_ci			atomic_add(num_of_entries, free_slots);
14562306a36Sopenharmony_ci			return -EAGAIN;
14662306a36Sopenharmony_ci		}
14762306a36Sopenharmony_ci	}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	return 0;
15062306a36Sopenharmony_ci}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci/*
15362306a36Sopenharmony_ci * int_queue_sanity_checks - perform some sanity checks on internal queue
15462306a36Sopenharmony_ci *
15562306a36Sopenharmony_ci * @hdev              : pointer to hl_device structure
15662306a36Sopenharmony_ci * @q                 :	pointer to hl_hw_queue structure
15762306a36Sopenharmony_ci * @num_of_entries    : how many entries to check for space
15862306a36Sopenharmony_ci *
15962306a36Sopenharmony_ci * H/W queues spinlock should be taken before calling this function
16062306a36Sopenharmony_ci *
16162306a36Sopenharmony_ci * Perform the following:
16262306a36Sopenharmony_ci * - Make sure we have enough space in the h/w queue
16362306a36Sopenharmony_ci *
16462306a36Sopenharmony_ci */
16562306a36Sopenharmony_cistatic int int_queue_sanity_checks(struct hl_device *hdev,
16662306a36Sopenharmony_ci					struct hl_hw_queue *q,
16762306a36Sopenharmony_ci					int num_of_entries)
16862306a36Sopenharmony_ci{
16962306a36Sopenharmony_ci	int free_slots_cnt;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	if (num_of_entries > q->int_queue_len) {
17262306a36Sopenharmony_ci		dev_err(hdev->dev,
17362306a36Sopenharmony_ci			"Cannot populate queue %u with %u jobs\n",
17462306a36Sopenharmony_ci			q->hw_queue_id, num_of_entries);
17562306a36Sopenharmony_ci		return -ENOMEM;
17662306a36Sopenharmony_ci	}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	/* Check we have enough space in the queue */
17962306a36Sopenharmony_ci	free_slots_cnt = queue_free_slots(q, q->int_queue_len);
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	if (free_slots_cnt < num_of_entries) {
18262306a36Sopenharmony_ci		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
18362306a36Sopenharmony_ci			q->hw_queue_id, num_of_entries);
18462306a36Sopenharmony_ci		return -EAGAIN;
18562306a36Sopenharmony_ci	}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	return 0;
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci/*
19162306a36Sopenharmony_ci * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
19262306a36Sopenharmony_ci * @hdev: Pointer to hl_device structure.
19362306a36Sopenharmony_ci * @q: Pointer to hl_hw_queue structure.
19462306a36Sopenharmony_ci * @num_of_entries: How many entries to check for space.
19562306a36Sopenharmony_ci *
19662306a36Sopenharmony_ci * Notice: We do not reserve queue entries so this function mustn't be called
19762306a36Sopenharmony_ci *         more than once per CS for the same queue
19862306a36Sopenharmony_ci *
19962306a36Sopenharmony_ci */
20062306a36Sopenharmony_cistatic int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
20162306a36Sopenharmony_ci					int num_of_entries)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	int free_slots_cnt;
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	/* Check we have enough space in the queue */
20662306a36Sopenharmony_ci	free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	if (free_slots_cnt < num_of_entries) {
20962306a36Sopenharmony_ci		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
21062306a36Sopenharmony_ci			q->hw_queue_id, num_of_entries);
21162306a36Sopenharmony_ci		return -EAGAIN;
21262306a36Sopenharmony_ci	}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	return 0;
21562306a36Sopenharmony_ci}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci/*
21862306a36Sopenharmony_ci * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
21962306a36Sopenharmony_ci *
22062306a36Sopenharmony_ci * @hdev: pointer to hl_device structure
22162306a36Sopenharmony_ci * @hw_queue_id: Queue's type
22262306a36Sopenharmony_ci * @cb_size: size of CB
22362306a36Sopenharmony_ci * @cb_ptr: pointer to CB location
22462306a36Sopenharmony_ci *
22562306a36Sopenharmony_ci * This function sends a single CB, that must NOT generate a completion entry.
22662306a36Sopenharmony_ci * Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()'
22762306a36Sopenharmony_ci */
22862306a36Sopenharmony_ciint hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
22962306a36Sopenharmony_ci				u32 cb_size, u64 cb_ptr)
23062306a36Sopenharmony_ci{
23162306a36Sopenharmony_ci	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
23262306a36Sopenharmony_ci	int rc = 0;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	hdev->asic_funcs->hw_queues_lock(hdev);
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	if (hdev->disabled) {
23762306a36Sopenharmony_ci		rc = -EPERM;
23862306a36Sopenharmony_ci		goto out;
23962306a36Sopenharmony_ci	}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	/*
24262306a36Sopenharmony_ci	 * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
24362306a36Sopenharmony_ci	 * type only on init phase, when the queues are empty and being tested,
24462306a36Sopenharmony_ci	 * so there is no need for sanity checks.
24562306a36Sopenharmony_ci	 */
24662306a36Sopenharmony_ci	if (q->queue_type != QUEUE_TYPE_HW) {
24762306a36Sopenharmony_ci		rc = ext_queue_sanity_checks(hdev, q, 1, false);
24862306a36Sopenharmony_ci		if (rc)
24962306a36Sopenharmony_ci			goto out;
25062306a36Sopenharmony_ci	}
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ciout:
25562306a36Sopenharmony_ci	hdev->asic_funcs->hw_queues_unlock(hdev);
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	return rc;
25862306a36Sopenharmony_ci}
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci/*
26162306a36Sopenharmony_ci * ext_queue_schedule_job - submit a JOB to an external queue
26262306a36Sopenharmony_ci *
26362306a36Sopenharmony_ci * @job: pointer to the job that needs to be submitted to the queue
26462306a36Sopenharmony_ci *
26562306a36Sopenharmony_ci * This function must be called when the scheduler mutex is taken
26662306a36Sopenharmony_ci *
26762306a36Sopenharmony_ci */
26862306a36Sopenharmony_cistatic void ext_queue_schedule_job(struct hl_cs_job *job)
26962306a36Sopenharmony_ci{
27062306a36Sopenharmony_ci	struct hl_device *hdev = job->cs->ctx->hdev;
27162306a36Sopenharmony_ci	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
27262306a36Sopenharmony_ci	struct hl_cq_entry cq_pkt;
27362306a36Sopenharmony_ci	struct hl_cq *cq;
27462306a36Sopenharmony_ci	u64 cq_addr;
27562306a36Sopenharmony_ci	struct hl_cb *cb;
27662306a36Sopenharmony_ci	u32 ctl;
27762306a36Sopenharmony_ci	u32 len;
27862306a36Sopenharmony_ci	u64 ptr;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	/*
28162306a36Sopenharmony_ci	 * Update the JOB ID inside the BD CTL so the device would know what
28262306a36Sopenharmony_ci	 * to write in the completion queue
28362306a36Sopenharmony_ci	 */
28462306a36Sopenharmony_ci	ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	cb = job->patched_cb;
28762306a36Sopenharmony_ci	len = job->job_cb_size;
28862306a36Sopenharmony_ci	ptr = cb->bus_address;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	/* Skip completion flow in case this is a non completion CS */
29162306a36Sopenharmony_ci	if (!cs_needs_completion(job->cs))
29262306a36Sopenharmony_ci		goto submit_bd;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	cq_pkt.data = cpu_to_le32(
29562306a36Sopenharmony_ci			((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
29662306a36Sopenharmony_ci				& CQ_ENTRY_SHADOW_INDEX_MASK) |
29762306a36Sopenharmony_ci			FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) |
29862306a36Sopenharmony_ci			FIELD_PREP(CQ_ENTRY_READY_MASK, 1));
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	/*
30162306a36Sopenharmony_ci	 * No need to protect pi_offset because scheduling to the
30262306a36Sopenharmony_ci	 * H/W queues is done under the scheduler mutex
30362306a36Sopenharmony_ci	 *
30462306a36Sopenharmony_ci	 * No need to check if CQ is full because it was already
30562306a36Sopenharmony_ci	 * checked in ext_queue_sanity_checks
30662306a36Sopenharmony_ci	 */
30762306a36Sopenharmony_ci	cq = &hdev->completion_queue[q->cq_id];
30862306a36Sopenharmony_ci	cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
31162306a36Sopenharmony_ci						job->user_cb_size,
31262306a36Sopenharmony_ci						cq_addr,
31362306a36Sopenharmony_ci						le32_to_cpu(cq_pkt.data),
31462306a36Sopenharmony_ci						q->msi_vec,
31562306a36Sopenharmony_ci						job->contains_dma_pkt);
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci	q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	cq->pi = hl_cq_inc_ptr(cq->pi);
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_cisubmit_bd:
32262306a36Sopenharmony_ci	hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
32362306a36Sopenharmony_ci}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci/*
32662306a36Sopenharmony_ci * int_queue_schedule_job - submit a JOB to an internal queue
32762306a36Sopenharmony_ci *
32862306a36Sopenharmony_ci * @job: pointer to the job that needs to be submitted to the queue
32962306a36Sopenharmony_ci *
33062306a36Sopenharmony_ci * This function must be called when the scheduler mutex is taken
33162306a36Sopenharmony_ci *
33262306a36Sopenharmony_ci */
33362306a36Sopenharmony_cistatic void int_queue_schedule_job(struct hl_cs_job *job)
33462306a36Sopenharmony_ci{
33562306a36Sopenharmony_ci	struct hl_device *hdev = job->cs->ctx->hdev;
33662306a36Sopenharmony_ci	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
33762306a36Sopenharmony_ci	struct hl_bd bd;
33862306a36Sopenharmony_ci	__le64 *pi;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	bd.ctl = 0;
34162306a36Sopenharmony_ci	bd.len = cpu_to_le32(job->job_cb_size);
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	if (job->is_kernel_allocated_cb)
34462306a36Sopenharmony_ci		/* bus_address is actually a mmu mapped address
34562306a36Sopenharmony_ci		 * allocated from an internal pool
34662306a36Sopenharmony_ci		 */
34762306a36Sopenharmony_ci		bd.ptr = cpu_to_le64(job->user_cb->bus_address);
34862306a36Sopenharmony_ci	else
34962306a36Sopenharmony_ci		bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd);
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	q->pi++;
35462306a36Sopenharmony_ci	q->pi &= ((q->int_queue_len << 1) - 1);
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	hdev->asic_funcs->pqe_write(hdev, pi, &bd);
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
35962306a36Sopenharmony_ci}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci/*
36262306a36Sopenharmony_ci * hw_queue_schedule_job - submit a JOB to a H/W queue
36362306a36Sopenharmony_ci *
36462306a36Sopenharmony_ci * @job: pointer to the job that needs to be submitted to the queue
36562306a36Sopenharmony_ci *
36662306a36Sopenharmony_ci * This function must be called when the scheduler mutex is taken
36762306a36Sopenharmony_ci *
36862306a36Sopenharmony_ci */
36962306a36Sopenharmony_cistatic void hw_queue_schedule_job(struct hl_cs_job *job)
37062306a36Sopenharmony_ci{
37162306a36Sopenharmony_ci	struct hl_device *hdev = job->cs->ctx->hdev;
37262306a36Sopenharmony_ci	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
37362306a36Sopenharmony_ci	u64 ptr;
37462306a36Sopenharmony_ci	u32 offset, ctl, len;
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_ci	/*
37762306a36Sopenharmony_ci	 * Upon PQE completion, COMP_DATA is used as the write data to the
37862306a36Sopenharmony_ci	 * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
37962306a36Sopenharmony_ci	 * write address offset in the SM block (QMAN LBW message).
38062306a36Sopenharmony_ci	 * The write address offset is calculated as "COMP_OFFSET << 2".
38162306a36Sopenharmony_ci	 */
38262306a36Sopenharmony_ci	offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
38362306a36Sopenharmony_ci	ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
38462306a36Sopenharmony_ci		((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	len = job->job_cb_size;
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	/*
38962306a36Sopenharmony_ci	 * A patched CB is created only if a user CB was allocated by driver and
39062306a36Sopenharmony_ci	 * MMU is disabled. If MMU is enabled, the user CB should be used
39162306a36Sopenharmony_ci	 * instead. If the user CB wasn't allocated by driver, assume that it
39262306a36Sopenharmony_ci	 * holds an address.
39362306a36Sopenharmony_ci	 */
39462306a36Sopenharmony_ci	if (job->patched_cb)
39562306a36Sopenharmony_ci		ptr = job->patched_cb->bus_address;
39662306a36Sopenharmony_ci	else if (job->is_kernel_allocated_cb)
39762306a36Sopenharmony_ci		ptr = job->user_cb->bus_address;
39862306a36Sopenharmony_ci	else
39962306a36Sopenharmony_ci		ptr = (u64) (uintptr_t) job->user_cb;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
40262306a36Sopenharmony_ci}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_cistatic int init_signal_cs(struct hl_device *hdev,
40562306a36Sopenharmony_ci		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
40662306a36Sopenharmony_ci{
40762306a36Sopenharmony_ci	struct hl_sync_stream_properties *prop;
40862306a36Sopenharmony_ci	struct hl_hw_sob *hw_sob;
40962306a36Sopenharmony_ci	u32 q_idx;
41062306a36Sopenharmony_ci	int rc = 0;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	q_idx = job->hw_queue_id;
41362306a36Sopenharmony_ci	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
41462306a36Sopenharmony_ci	hw_sob = &prop->hw_sob[prop->curr_sob_offset];
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci	cs_cmpl->hw_sob = hw_sob;
41762306a36Sopenharmony_ci	cs_cmpl->sob_val = prop->next_sob_val;
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	dev_dbg(hdev->dev,
42062306a36Sopenharmony_ci		"generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n",
42162306a36Sopenharmony_ci		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
42262306a36Sopenharmony_ci		cs_cmpl->cs_seq);
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	/* we set an EB since we must make sure all oeprations are done
42562306a36Sopenharmony_ci	 * when sending the signal
42662306a36Sopenharmony_ci	 */
42762306a36Sopenharmony_ci	hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
42862306a36Sopenharmony_ci				cs_cmpl->hw_sob->sob_id, 0, true);
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
43162306a36Sopenharmony_ci								false);
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	job->cs->sob_addr_offset = hw_sob->sob_addr;
43462306a36Sopenharmony_ci	job->cs->initial_sob_count = prop->next_sob_val - 1;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	return rc;
43762306a36Sopenharmony_ci}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_civoid hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
44062306a36Sopenharmony_ci			struct hl_cs *cs, struct hl_cs_job *job,
44162306a36Sopenharmony_ci			struct hl_cs_compl *cs_cmpl)
44262306a36Sopenharmony_ci{
44362306a36Sopenharmony_ci	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
44462306a36Sopenharmony_ci	u32 offset = 0;
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	cs_cmpl->hw_sob = handle->hw_sob;
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	/* Note that encaps_sig_wait_offset was validated earlier in the flow
44962306a36Sopenharmony_ci	 * for offset value which exceeds the max reserved signal count.
45062306a36Sopenharmony_ci	 * always decrement 1 of the offset since when the user
45162306a36Sopenharmony_ci	 * set offset 1 for example he mean to wait only for the first
45262306a36Sopenharmony_ci	 * signal only, which will be pre_sob_val, and if he set offset 2
45362306a36Sopenharmony_ci	 * then the value required is (pre_sob_val + 1) and so on...
45462306a36Sopenharmony_ci	 * if user set wait offset to 0, then treat it as legacy wait cs,
45562306a36Sopenharmony_ci	 * wait for the next signal.
45662306a36Sopenharmony_ci	 */
45762306a36Sopenharmony_ci	if (job->encaps_sig_wait_offset)
45862306a36Sopenharmony_ci		offset = job->encaps_sig_wait_offset - 1;
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	cs_cmpl->sob_val = handle->pre_sob_val + offset;
46162306a36Sopenharmony_ci}
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_cistatic int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
46462306a36Sopenharmony_ci		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
46562306a36Sopenharmony_ci{
46662306a36Sopenharmony_ci	struct hl_gen_wait_properties wait_prop;
46762306a36Sopenharmony_ci	struct hl_sync_stream_properties *prop;
46862306a36Sopenharmony_ci	struct hl_cs_compl *signal_cs_cmpl;
46962306a36Sopenharmony_ci	u32 q_idx;
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	q_idx = job->hw_queue_id;
47262306a36Sopenharmony_ci	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	signal_cs_cmpl = container_of(cs->signal_fence,
47562306a36Sopenharmony_ci					struct hl_cs_compl,
47662306a36Sopenharmony_ci					base_fence);
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	if (cs->encaps_signals) {
47962306a36Sopenharmony_ci		/* use the encaps signal handle stored earlier in the flow
48062306a36Sopenharmony_ci		 * and set the SOB information from the encaps
48162306a36Sopenharmony_ci		 * signals handle
48262306a36Sopenharmony_ci		 */
48362306a36Sopenharmony_ci		hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl);
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci		dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n",
48662306a36Sopenharmony_ci				cs->encaps_sig_hdl->q_idx,
48762306a36Sopenharmony_ci				cs->encaps_sig_hdl->cs_seq,
48862306a36Sopenharmony_ci				cs_cmpl->sob_val,
48962306a36Sopenharmony_ci				job->encaps_sig_wait_offset);
49062306a36Sopenharmony_ci	} else {
49162306a36Sopenharmony_ci		/* Copy the SOB id and value of the signal CS */
49262306a36Sopenharmony_ci		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
49362306a36Sopenharmony_ci		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
49462306a36Sopenharmony_ci	}
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	/* check again if the signal cs already completed.
49762306a36Sopenharmony_ci	 * if yes then don't send any wait cs since the hw_sob
49862306a36Sopenharmony_ci	 * could be in reset already. if signal is not completed
49962306a36Sopenharmony_ci	 * then get refcount to hw_sob to prevent resetting the sob
50062306a36Sopenharmony_ci	 * while wait cs is not submitted.
50162306a36Sopenharmony_ci	 * note that this check is protected by two locks,
50262306a36Sopenharmony_ci	 * hw queue lock and completion object lock,
50362306a36Sopenharmony_ci	 * and the same completion object lock also protects
50462306a36Sopenharmony_ci	 * the hw_sob reset handler function.
50562306a36Sopenharmony_ci	 * The hw_queue lock prevent out of sync of hw_sob
50662306a36Sopenharmony_ci	 * refcount value, changed by signal/wait flows.
50762306a36Sopenharmony_ci	 */
50862306a36Sopenharmony_ci	spin_lock(&signal_cs_cmpl->lock);
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci	if (completion_done(&cs->signal_fence->completion)) {
51162306a36Sopenharmony_ci		spin_unlock(&signal_cs_cmpl->lock);
51262306a36Sopenharmony_ci		return -EINVAL;
51362306a36Sopenharmony_ci	}
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	kref_get(&cs_cmpl->hw_sob->kref);
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	spin_unlock(&signal_cs_cmpl->lock);
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	dev_dbg(hdev->dev,
52062306a36Sopenharmony_ci		"generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n",
52162306a36Sopenharmony_ci		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
52262306a36Sopenharmony_ci		prop->base_mon_id, q_idx, cs->sequence);
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	wait_prop.data = (void *) job->patched_cb;
52562306a36Sopenharmony_ci	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
52662306a36Sopenharmony_ci	wait_prop.sob_mask = 0x1;
52762306a36Sopenharmony_ci	wait_prop.sob_val = cs_cmpl->sob_val;
52862306a36Sopenharmony_ci	wait_prop.mon_id = prop->base_mon_id;
52962306a36Sopenharmony_ci	wait_prop.q_idx = q_idx;
53062306a36Sopenharmony_ci	wait_prop.size = 0;
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	mb();
53562306a36Sopenharmony_ci	hl_fence_put(cs->signal_fence);
53662306a36Sopenharmony_ci	cs->signal_fence = NULL;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	return 0;
53962306a36Sopenharmony_ci}
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci/*
54262306a36Sopenharmony_ci * init_signal_wait_cs - initialize a signal/wait CS
54362306a36Sopenharmony_ci * @cs: pointer to the signal/wait CS
54462306a36Sopenharmony_ci *
54562306a36Sopenharmony_ci * H/W queues spinlock should be taken before calling this function
54662306a36Sopenharmony_ci */
54762306a36Sopenharmony_cistatic int init_signal_wait_cs(struct hl_cs *cs)
54862306a36Sopenharmony_ci{
54962306a36Sopenharmony_ci	struct hl_ctx *ctx = cs->ctx;
55062306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
55162306a36Sopenharmony_ci	struct hl_cs_job *job;
55262306a36Sopenharmony_ci	struct hl_cs_compl *cs_cmpl =
55362306a36Sopenharmony_ci			container_of(cs->fence, struct hl_cs_compl, base_fence);
55462306a36Sopenharmony_ci	int rc = 0;
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	/* There is only one job in a signal/wait CS */
55762306a36Sopenharmony_ci	job = list_first_entry(&cs->job_list, struct hl_cs_job,
55862306a36Sopenharmony_ci				cs_node);
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	if (cs->type & CS_TYPE_SIGNAL)
56162306a36Sopenharmony_ci		rc = init_signal_cs(hdev, job, cs_cmpl);
56262306a36Sopenharmony_ci	else if (cs->type & CS_TYPE_WAIT)
56362306a36Sopenharmony_ci		rc = init_wait_cs(hdev, cs, job, cs_cmpl);
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	return rc;
56662306a36Sopenharmony_ci}
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_cistatic int encaps_sig_first_staged_cs_handler
56962306a36Sopenharmony_ci			(struct hl_device *hdev, struct hl_cs *cs)
57062306a36Sopenharmony_ci{
57162306a36Sopenharmony_ci	struct hl_cs_compl *cs_cmpl =
57262306a36Sopenharmony_ci			container_of(cs->fence,
57362306a36Sopenharmony_ci					struct hl_cs_compl, base_fence);
57462306a36Sopenharmony_ci	struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
57562306a36Sopenharmony_ci	struct hl_encaps_signals_mgr *mgr;
57662306a36Sopenharmony_ci	int rc = 0;
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	mgr = &cs->ctx->sig_mgr;
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci	spin_lock(&mgr->lock);
58162306a36Sopenharmony_ci	encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
58262306a36Sopenharmony_ci	if (encaps_sig_hdl) {
58362306a36Sopenharmony_ci		/*
58462306a36Sopenharmony_ci		 * Set handler CS sequence,
58562306a36Sopenharmony_ci		 * the CS which contains the encapsulated signals.
58662306a36Sopenharmony_ci		 */
58762306a36Sopenharmony_ci		encaps_sig_hdl->cs_seq = cs->sequence;
58862306a36Sopenharmony_ci		/* store the handle and set encaps signal indication,
58962306a36Sopenharmony_ci		 * to be used later in cs_do_release to put the last
59062306a36Sopenharmony_ci		 * reference to encaps signals handlers.
59162306a36Sopenharmony_ci		 */
59262306a36Sopenharmony_ci		cs_cmpl->encaps_signals = true;
59362306a36Sopenharmony_ci		cs_cmpl->encaps_sig_hdl = encaps_sig_hdl;
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci		/* set hw_sob pointer in completion object
59662306a36Sopenharmony_ci		 * since it's used in cs_do_release flow to put
59762306a36Sopenharmony_ci		 * refcount to sob
59862306a36Sopenharmony_ci		 */
59962306a36Sopenharmony_ci		cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob;
60062306a36Sopenharmony_ci		cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val +
60162306a36Sopenharmony_ci						encaps_sig_hdl->count;
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci		dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n",
60462306a36Sopenharmony_ci				cs->sequence, encaps_sig_hdl->id,
60562306a36Sopenharmony_ci				encaps_sig_hdl->count,
60662306a36Sopenharmony_ci				encaps_sig_hdl->q_idx,
60762306a36Sopenharmony_ci				cs_cmpl->hw_sob->sob_id,
60862306a36Sopenharmony_ci				cs_cmpl->sob_val);
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	} else {
61162306a36Sopenharmony_ci		dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n",
61262306a36Sopenharmony_ci				cs->encaps_sig_hdl_id);
61362306a36Sopenharmony_ci		rc = -EINVAL;
61462306a36Sopenharmony_ci	}
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci	spin_unlock(&mgr->lock);
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	return rc;
61962306a36Sopenharmony_ci}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci/*
62262306a36Sopenharmony_ci * hl_hw_queue_schedule_cs - schedule a command submission
62362306a36Sopenharmony_ci * @cs: pointer to the CS
62462306a36Sopenharmony_ci */
62562306a36Sopenharmony_ciint hl_hw_queue_schedule_cs(struct hl_cs *cs)
62662306a36Sopenharmony_ci{
62762306a36Sopenharmony_ci	enum hl_device_status status;
62862306a36Sopenharmony_ci	struct hl_cs_counters_atomic *cntr;
62962306a36Sopenharmony_ci	struct hl_ctx *ctx = cs->ctx;
63062306a36Sopenharmony_ci	struct hl_device *hdev = ctx->hdev;
63162306a36Sopenharmony_ci	struct hl_cs_job *job, *tmp;
63262306a36Sopenharmony_ci	struct hl_hw_queue *q;
63362306a36Sopenharmony_ci	int rc = 0, i, cq_cnt;
63462306a36Sopenharmony_ci	bool first_entry;
63562306a36Sopenharmony_ci	u32 max_queues;
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	cntr = &hdev->aggregated_cs_counters;
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci	hdev->asic_funcs->hw_queues_lock(hdev);
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	if (!hl_device_operational(hdev, &status)) {
64262306a36Sopenharmony_ci		atomic64_inc(&cntr->device_in_reset_drop_cnt);
64362306a36Sopenharmony_ci		atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt);
64462306a36Sopenharmony_ci		dev_err(hdev->dev,
64562306a36Sopenharmony_ci			"device is %s, CS rejected!\n", hdev->status[status]);
64662306a36Sopenharmony_ci		rc = -EPERM;
64762306a36Sopenharmony_ci		goto out;
64862306a36Sopenharmony_ci	}
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	max_queues = hdev->asic_prop.max_queues;
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	q = &hdev->kernel_queues[0];
65362306a36Sopenharmony_ci	for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
65462306a36Sopenharmony_ci		if (cs->jobs_in_queue_cnt[i]) {
65562306a36Sopenharmony_ci			switch (q->queue_type) {
65662306a36Sopenharmony_ci			case QUEUE_TYPE_EXT:
65762306a36Sopenharmony_ci				rc = ext_queue_sanity_checks(hdev, q,
65862306a36Sopenharmony_ci						cs->jobs_in_queue_cnt[i],
65962306a36Sopenharmony_ci						cs_needs_completion(cs) ?
66062306a36Sopenharmony_ci								true : false);
66162306a36Sopenharmony_ci				break;
66262306a36Sopenharmony_ci			case QUEUE_TYPE_INT:
66362306a36Sopenharmony_ci				rc = int_queue_sanity_checks(hdev, q,
66462306a36Sopenharmony_ci						cs->jobs_in_queue_cnt[i]);
66562306a36Sopenharmony_ci				break;
66662306a36Sopenharmony_ci			case QUEUE_TYPE_HW:
66762306a36Sopenharmony_ci				rc = hw_queue_sanity_checks(hdev, q,
66862306a36Sopenharmony_ci						cs->jobs_in_queue_cnt[i]);
66962306a36Sopenharmony_ci				break;
67062306a36Sopenharmony_ci			default:
67162306a36Sopenharmony_ci				dev_err(hdev->dev, "Queue type %d is invalid\n",
67262306a36Sopenharmony_ci					q->queue_type);
67362306a36Sopenharmony_ci				rc = -EINVAL;
67462306a36Sopenharmony_ci				break;
67562306a36Sopenharmony_ci			}
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci			if (rc) {
67862306a36Sopenharmony_ci				atomic64_inc(
67962306a36Sopenharmony_ci					&ctx->cs_counters.queue_full_drop_cnt);
68062306a36Sopenharmony_ci				atomic64_inc(&cntr->queue_full_drop_cnt);
68162306a36Sopenharmony_ci				goto unroll_cq_resv;
68262306a36Sopenharmony_ci			}
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci			if (q->queue_type == QUEUE_TYPE_EXT)
68562306a36Sopenharmony_ci				cq_cnt++;
68662306a36Sopenharmony_ci		}
68762306a36Sopenharmony_ci	}
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
69062306a36Sopenharmony_ci		rc = init_signal_wait_cs(cs);
69162306a36Sopenharmony_ci		if (rc)
69262306a36Sopenharmony_ci			goto unroll_cq_resv;
69362306a36Sopenharmony_ci	} else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) {
69462306a36Sopenharmony_ci		rc = hdev->asic_funcs->collective_wait_init_cs(cs);
69562306a36Sopenharmony_ci		if (rc)
69662306a36Sopenharmony_ci			goto unroll_cq_resv;
69762306a36Sopenharmony_ci	}
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	rc = hdev->asic_funcs->pre_schedule_cs(cs);
70062306a36Sopenharmony_ci	if (rc) {
70162306a36Sopenharmony_ci		dev_err(hdev->dev,
70262306a36Sopenharmony_ci			"Failed in pre-submission operations of CS %d.%llu\n",
70362306a36Sopenharmony_ci			ctx->asid, cs->sequence);
70462306a36Sopenharmony_ci		goto unroll_cq_resv;
70562306a36Sopenharmony_ci	}
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	hdev->shadow_cs_queue[cs->sequence &
70862306a36Sopenharmony_ci				(hdev->asic_prop.max_pending_cs - 1)] = cs;
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	if (cs->encaps_signals && cs->staged_first) {
71162306a36Sopenharmony_ci		rc = encaps_sig_first_staged_cs_handler(hdev, cs);
71262306a36Sopenharmony_ci		if (rc)
71362306a36Sopenharmony_ci			goto unroll_cq_resv;
71462306a36Sopenharmony_ci	}
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	spin_lock(&hdev->cs_mirror_lock);
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	/* Verify staged CS exists and add to the staged list */
71962306a36Sopenharmony_ci	if (cs->staged_cs && !cs->staged_first) {
72062306a36Sopenharmony_ci		struct hl_cs *staged_cs;
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci		staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
72362306a36Sopenharmony_ci		if (!staged_cs) {
72462306a36Sopenharmony_ci			dev_err(hdev->dev,
72562306a36Sopenharmony_ci				"Cannot find staged submission sequence %llu",
72662306a36Sopenharmony_ci				cs->staged_sequence);
72762306a36Sopenharmony_ci			rc = -EINVAL;
72862306a36Sopenharmony_ci			goto unlock_cs_mirror;
72962306a36Sopenharmony_ci		}
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci		if (is_staged_cs_last_exists(hdev, staged_cs)) {
73262306a36Sopenharmony_ci			dev_err(hdev->dev,
73362306a36Sopenharmony_ci				"Staged submission sequence %llu already submitted",
73462306a36Sopenharmony_ci				cs->staged_sequence);
73562306a36Sopenharmony_ci			rc = -EINVAL;
73662306a36Sopenharmony_ci			goto unlock_cs_mirror;
73762306a36Sopenharmony_ci		}
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci		list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci		/* update stream map of the first CS */
74262306a36Sopenharmony_ci		if (hdev->supports_wait_for_multi_cs)
74362306a36Sopenharmony_ci			staged_cs->fence->stream_master_qid_map |=
74462306a36Sopenharmony_ci					cs->fence->stream_master_qid_map;
74562306a36Sopenharmony_ci	}
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ci	/* Queue TDR if the CS is the first entry and if timeout is wanted */
75062306a36Sopenharmony_ci	first_entry = list_first_entry(&hdev->cs_mirror_list,
75162306a36Sopenharmony_ci					struct hl_cs, mirror_node) == cs;
75262306a36Sopenharmony_ci	if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
75362306a36Sopenharmony_ci				first_entry && cs_needs_timeout(cs)) {
75462306a36Sopenharmony_ci		cs->tdr_active = true;
75562306a36Sopenharmony_ci		schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies);
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	}
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	spin_unlock(&hdev->cs_mirror_lock);
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
76262306a36Sopenharmony_ci		switch (job->queue_type) {
76362306a36Sopenharmony_ci		case QUEUE_TYPE_EXT:
76462306a36Sopenharmony_ci			ext_queue_schedule_job(job);
76562306a36Sopenharmony_ci			break;
76662306a36Sopenharmony_ci		case QUEUE_TYPE_INT:
76762306a36Sopenharmony_ci			int_queue_schedule_job(job);
76862306a36Sopenharmony_ci			break;
76962306a36Sopenharmony_ci		case QUEUE_TYPE_HW:
77062306a36Sopenharmony_ci			hw_queue_schedule_job(job);
77162306a36Sopenharmony_ci			break;
77262306a36Sopenharmony_ci		default:
77362306a36Sopenharmony_ci			break;
77462306a36Sopenharmony_ci		}
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	cs->submitted = true;
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci	goto out;
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ciunlock_cs_mirror:
78162306a36Sopenharmony_ci	spin_unlock(&hdev->cs_mirror_lock);
78262306a36Sopenharmony_ciunroll_cq_resv:
78362306a36Sopenharmony_ci	q = &hdev->kernel_queues[0];
78462306a36Sopenharmony_ci	for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
78562306a36Sopenharmony_ci		if ((q->queue_type == QUEUE_TYPE_EXT) &&
78662306a36Sopenharmony_ci						(cs->jobs_in_queue_cnt[i])) {
78762306a36Sopenharmony_ci			atomic_t *free_slots =
78862306a36Sopenharmony_ci				&hdev->completion_queue[i].free_slots_cnt;
78962306a36Sopenharmony_ci			atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
79062306a36Sopenharmony_ci			cq_cnt--;
79162306a36Sopenharmony_ci		}
79262306a36Sopenharmony_ci	}
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ciout:
79562306a36Sopenharmony_ci	hdev->asic_funcs->hw_queues_unlock(hdev);
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	return rc;
79862306a36Sopenharmony_ci}
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci/*
80162306a36Sopenharmony_ci * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
80262306a36Sopenharmony_ci *
80362306a36Sopenharmony_ci * @hdev: pointer to hl_device structure
80462306a36Sopenharmony_ci * @hw_queue_id: which queue to increment its ci
80562306a36Sopenharmony_ci */
80662306a36Sopenharmony_civoid hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
80762306a36Sopenharmony_ci{
80862306a36Sopenharmony_ci	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	atomic_inc(&q->ci);
81162306a36Sopenharmony_ci}
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_cistatic int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
81462306a36Sopenharmony_ci					bool is_cpu_queue)
81562306a36Sopenharmony_ci{
81662306a36Sopenharmony_ci	void *p;
81762306a36Sopenharmony_ci	int rc;
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	if (is_cpu_queue)
82062306a36Sopenharmony_ci		p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address);
82162306a36Sopenharmony_ci	else
82262306a36Sopenharmony_ci		p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address,
82362306a36Sopenharmony_ci						GFP_KERNEL | __GFP_ZERO);
82462306a36Sopenharmony_ci	if (!p)
82562306a36Sopenharmony_ci		return -ENOMEM;
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	q->kernel_address = p;
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL);
83062306a36Sopenharmony_ci	if (!q->shadow_queue) {
83162306a36Sopenharmony_ci		dev_err(hdev->dev,
83262306a36Sopenharmony_ci			"Failed to allocate shadow queue for H/W queue %d\n",
83362306a36Sopenharmony_ci			q->hw_queue_id);
83462306a36Sopenharmony_ci		rc = -ENOMEM;
83562306a36Sopenharmony_ci		goto free_queue;
83662306a36Sopenharmony_ci	}
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	/* Make sure read/write pointers are initialized to start of queue */
83962306a36Sopenharmony_ci	atomic_set(&q->ci, 0);
84062306a36Sopenharmony_ci	q->pi = 0;
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci	return 0;
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_cifree_queue:
84562306a36Sopenharmony_ci	if (is_cpu_queue)
84662306a36Sopenharmony_ci		hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address);
84762306a36Sopenharmony_ci	else
84862306a36Sopenharmony_ci		hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address,
84962306a36Sopenharmony_ci						q->bus_address);
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ci	return rc;
85262306a36Sopenharmony_ci}
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_cistatic int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
85562306a36Sopenharmony_ci{
85662306a36Sopenharmony_ci	void *p;
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
85962306a36Sopenharmony_ci					&q->bus_address, &q->int_queue_len);
86062306a36Sopenharmony_ci	if (!p) {
86162306a36Sopenharmony_ci		dev_err(hdev->dev,
86262306a36Sopenharmony_ci			"Failed to get base address for internal queue %d\n",
86362306a36Sopenharmony_ci			q->hw_queue_id);
86462306a36Sopenharmony_ci		return -EFAULT;
86562306a36Sopenharmony_ci	}
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	q->kernel_address = p;
86862306a36Sopenharmony_ci	q->pi = 0;
86962306a36Sopenharmony_ci	atomic_set(&q->ci, 0);
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	return 0;
87262306a36Sopenharmony_ci}
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_cistatic int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
87562306a36Sopenharmony_ci{
87662306a36Sopenharmony_ci	return ext_and_cpu_queue_init(hdev, q, true);
87762306a36Sopenharmony_ci}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_cistatic int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
88062306a36Sopenharmony_ci{
88162306a36Sopenharmony_ci	return ext_and_cpu_queue_init(hdev, q, false);
88262306a36Sopenharmony_ci}
88362306a36Sopenharmony_ci
88462306a36Sopenharmony_cistatic int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
88562306a36Sopenharmony_ci{
88662306a36Sopenharmony_ci	void *p;
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci	p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address,
88962306a36Sopenharmony_ci					GFP_KERNEL | __GFP_ZERO);
89062306a36Sopenharmony_ci	if (!p)
89162306a36Sopenharmony_ci		return -ENOMEM;
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	q->kernel_address = p;
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci	/* Make sure read/write pointers are initialized to start of queue */
89662306a36Sopenharmony_ci	atomic_set(&q->ci, 0);
89762306a36Sopenharmony_ci	q->pi = 0;
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	return 0;
90062306a36Sopenharmony_ci}
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_cistatic void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
90362306a36Sopenharmony_ci{
90462306a36Sopenharmony_ci	struct hl_sync_stream_properties *sync_stream_prop;
90562306a36Sopenharmony_ci	struct asic_fixed_properties *prop = &hdev->asic_prop;
90662306a36Sopenharmony_ci	struct hl_hw_sob *hw_sob;
90762306a36Sopenharmony_ci	int sob, reserved_mon_idx, queue_idx;
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	/* We use 'collective_mon_idx' as a running index in order to reserve
91262306a36Sopenharmony_ci	 * monitors for collective master/slave queues.
91362306a36Sopenharmony_ci	 * collective master queue gets 2 reserved monitors
91462306a36Sopenharmony_ci	 * collective slave queue gets 1 reserved monitor
91562306a36Sopenharmony_ci	 */
91662306a36Sopenharmony_ci	if (hdev->kernel_queues[q_idx].collective_mode ==
91762306a36Sopenharmony_ci			HL_COLLECTIVE_MASTER) {
91862306a36Sopenharmony_ci		reserved_mon_idx = hdev->collective_mon_idx;
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci		/* reserve the first monitor for collective master queue */
92162306a36Sopenharmony_ci		sync_stream_prop->collective_mstr_mon_id[0] =
92262306a36Sopenharmony_ci			prop->collective_first_mon + reserved_mon_idx;
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci		/* reserve the second monitor for collective master queue */
92562306a36Sopenharmony_ci		sync_stream_prop->collective_mstr_mon_id[1] =
92662306a36Sopenharmony_ci			prop->collective_first_mon + reserved_mon_idx + 1;
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci		hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS;
92962306a36Sopenharmony_ci	} else if (hdev->kernel_queues[q_idx].collective_mode ==
93062306a36Sopenharmony_ci			HL_COLLECTIVE_SLAVE) {
93162306a36Sopenharmony_ci		reserved_mon_idx = hdev->collective_mon_idx++;
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci		/* reserve a monitor for collective slave queue */
93462306a36Sopenharmony_ci		sync_stream_prop->collective_slave_mon_id =
93562306a36Sopenharmony_ci			prop->collective_first_mon + reserved_mon_idx;
93662306a36Sopenharmony_ci	}
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci	if (!hdev->kernel_queues[q_idx].supports_sync_stream)
93962306a36Sopenharmony_ci		return;
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci	queue_idx = hdev->sync_stream_queue_idx++;
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci	sync_stream_prop->base_sob_id = prop->sync_stream_first_sob +
94462306a36Sopenharmony_ci			(queue_idx * HL_RSVD_SOBS);
94562306a36Sopenharmony_ci	sync_stream_prop->base_mon_id = prop->sync_stream_first_mon +
94662306a36Sopenharmony_ci			(queue_idx * HL_RSVD_MONS);
94762306a36Sopenharmony_ci	sync_stream_prop->next_sob_val = 1;
94862306a36Sopenharmony_ci	sync_stream_prop->curr_sob_offset = 0;
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
95162306a36Sopenharmony_ci		hw_sob = &sync_stream_prop->hw_sob[sob];
95262306a36Sopenharmony_ci		hw_sob->hdev = hdev;
95362306a36Sopenharmony_ci		hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
95462306a36Sopenharmony_ci		hw_sob->sob_addr =
95562306a36Sopenharmony_ci			hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
95662306a36Sopenharmony_ci		hw_sob->q_idx = q_idx;
95762306a36Sopenharmony_ci		kref_init(&hw_sob->kref);
95862306a36Sopenharmony_ci	}
95962306a36Sopenharmony_ci}
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_cistatic void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
96262306a36Sopenharmony_ci{
96362306a36Sopenharmony_ci	struct hl_sync_stream_properties *prop =
96462306a36Sopenharmony_ci			&hdev->kernel_queues[q_idx].sync_stream_prop;
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_ci	/*
96762306a36Sopenharmony_ci	 * In case we got here due to a stuck CS, the refcnt might be bigger
96862306a36Sopenharmony_ci	 * than 1 and therefore we reset it.
96962306a36Sopenharmony_ci	 */
97062306a36Sopenharmony_ci	kref_init(&prop->hw_sob[prop->curr_sob_offset].kref);
97162306a36Sopenharmony_ci	prop->curr_sob_offset = 0;
97262306a36Sopenharmony_ci	prop->next_sob_val = 1;
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci/*
97662306a36Sopenharmony_ci * queue_init - main initialization function for H/W queue object
97762306a36Sopenharmony_ci *
97862306a36Sopenharmony_ci * @hdev: pointer to hl_device device structure
97962306a36Sopenharmony_ci * @q: pointer to hl_hw_queue queue structure
98062306a36Sopenharmony_ci * @hw_queue_id: The id of the H/W queue
98162306a36Sopenharmony_ci *
98262306a36Sopenharmony_ci * Allocate dma-able memory for the queue and initialize fields
98362306a36Sopenharmony_ci * Returns 0 on success
98462306a36Sopenharmony_ci */
98562306a36Sopenharmony_cistatic int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
98662306a36Sopenharmony_ci			u32 hw_queue_id)
98762306a36Sopenharmony_ci{
98862306a36Sopenharmony_ci	int rc;
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_ci	q->hw_queue_id = hw_queue_id;
99162306a36Sopenharmony_ci
99262306a36Sopenharmony_ci	switch (q->queue_type) {
99362306a36Sopenharmony_ci	case QUEUE_TYPE_EXT:
99462306a36Sopenharmony_ci		rc = ext_queue_init(hdev, q);
99562306a36Sopenharmony_ci		break;
99662306a36Sopenharmony_ci	case QUEUE_TYPE_INT:
99762306a36Sopenharmony_ci		rc = int_queue_init(hdev, q);
99862306a36Sopenharmony_ci		break;
99962306a36Sopenharmony_ci	case QUEUE_TYPE_CPU:
100062306a36Sopenharmony_ci		rc = cpu_queue_init(hdev, q);
100162306a36Sopenharmony_ci		break;
100262306a36Sopenharmony_ci	case QUEUE_TYPE_HW:
100362306a36Sopenharmony_ci		rc = hw_queue_init(hdev, q);
100462306a36Sopenharmony_ci		break;
100562306a36Sopenharmony_ci	case QUEUE_TYPE_NA:
100662306a36Sopenharmony_ci		q->valid = 0;
100762306a36Sopenharmony_ci		return 0;
100862306a36Sopenharmony_ci	default:
100962306a36Sopenharmony_ci		dev_crit(hdev->dev, "wrong queue type %d during init\n",
101062306a36Sopenharmony_ci			q->queue_type);
101162306a36Sopenharmony_ci		rc = -EINVAL;
101262306a36Sopenharmony_ci		break;
101362306a36Sopenharmony_ci	}
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci	sync_stream_queue_init(hdev, q->hw_queue_id);
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	if (rc)
101862306a36Sopenharmony_ci		return rc;
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci	q->valid = 1;
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci	return 0;
102362306a36Sopenharmony_ci}
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci/*
102662306a36Sopenharmony_ci * hw_queue_fini - destroy queue
102762306a36Sopenharmony_ci *
102862306a36Sopenharmony_ci * @hdev: pointer to hl_device device structure
102962306a36Sopenharmony_ci * @q: pointer to hl_hw_queue queue structure
103062306a36Sopenharmony_ci *
103162306a36Sopenharmony_ci * Free the queue memory
103262306a36Sopenharmony_ci */
103362306a36Sopenharmony_cistatic void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
103462306a36Sopenharmony_ci{
103562306a36Sopenharmony_ci	if (!q->valid)
103662306a36Sopenharmony_ci		return;
103762306a36Sopenharmony_ci
103862306a36Sopenharmony_ci	/*
103962306a36Sopenharmony_ci	 * If we arrived here, there are no jobs waiting on this queue
104062306a36Sopenharmony_ci	 * so we can safely remove it.
104162306a36Sopenharmony_ci	 * This is because this function can only called when:
104262306a36Sopenharmony_ci	 * 1. Either a context is deleted, which only can occur if all its
104362306a36Sopenharmony_ci	 *    jobs were finished
104462306a36Sopenharmony_ci	 * 2. A context wasn't able to be created due to failure or timeout,
104562306a36Sopenharmony_ci	 *    which means there are no jobs on the queue yet
104662306a36Sopenharmony_ci	 *
104762306a36Sopenharmony_ci	 * The only exception are the queues of the kernel context, but
104862306a36Sopenharmony_ci	 * if they are being destroyed, it means that the entire module is
104962306a36Sopenharmony_ci	 * being removed. If the module is removed, it means there is no open
105062306a36Sopenharmony_ci	 * user context. It also means that if a job was submitted by
105162306a36Sopenharmony_ci	 * the kernel driver (e.g. context creation), the job itself was
105262306a36Sopenharmony_ci	 * released by the kernel driver when a timeout occurred on its
105362306a36Sopenharmony_ci	 * Completion. Thus, we don't need to release it again.
105462306a36Sopenharmony_ci	 */
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	if (q->queue_type == QUEUE_TYPE_INT)
105762306a36Sopenharmony_ci		return;
105862306a36Sopenharmony_ci
105962306a36Sopenharmony_ci	kfree(q->shadow_queue);
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci	if (q->queue_type == QUEUE_TYPE_CPU)
106262306a36Sopenharmony_ci		hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address);
106362306a36Sopenharmony_ci	else
106462306a36Sopenharmony_ci		hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address,
106562306a36Sopenharmony_ci						q->bus_address);
106662306a36Sopenharmony_ci}
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ciint hl_hw_queues_create(struct hl_device *hdev)
106962306a36Sopenharmony_ci{
107062306a36Sopenharmony_ci	struct asic_fixed_properties *asic = &hdev->asic_prop;
107162306a36Sopenharmony_ci	struct hl_hw_queue *q;
107262306a36Sopenharmony_ci	int i, rc, q_ready_cnt;
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci	hdev->kernel_queues = kcalloc(asic->max_queues,
107562306a36Sopenharmony_ci				sizeof(*hdev->kernel_queues), GFP_KERNEL);
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci	if (!hdev->kernel_queues) {
107862306a36Sopenharmony_ci		dev_err(hdev->dev, "Not enough memory for H/W queues\n");
107962306a36Sopenharmony_ci		return -ENOMEM;
108062306a36Sopenharmony_ci	}
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci	/* Initialize the H/W queues */
108362306a36Sopenharmony_ci	for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
108462306a36Sopenharmony_ci			i < asic->max_queues ; i++, q_ready_cnt++, q++) {
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci		q->queue_type = asic->hw_queues_props[i].type;
108762306a36Sopenharmony_ci		q->supports_sync_stream =
108862306a36Sopenharmony_ci				asic->hw_queues_props[i].supports_sync_stream;
108962306a36Sopenharmony_ci		q->collective_mode = asic->hw_queues_props[i].collective_mode;
109062306a36Sopenharmony_ci		rc = queue_init(hdev, q, i);
109162306a36Sopenharmony_ci		if (rc) {
109262306a36Sopenharmony_ci			dev_err(hdev->dev,
109362306a36Sopenharmony_ci				"failed to initialize queue %d\n", i);
109462306a36Sopenharmony_ci			goto release_queues;
109562306a36Sopenharmony_ci		}
109662306a36Sopenharmony_ci	}
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	return 0;
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_cirelease_queues:
110162306a36Sopenharmony_ci	for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
110262306a36Sopenharmony_ci		queue_fini(hdev, q);
110362306a36Sopenharmony_ci
110462306a36Sopenharmony_ci	kfree(hdev->kernel_queues);
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	return rc;
110762306a36Sopenharmony_ci}
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_civoid hl_hw_queues_destroy(struct hl_device *hdev)
111062306a36Sopenharmony_ci{
111162306a36Sopenharmony_ci	struct hl_hw_queue *q;
111262306a36Sopenharmony_ci	u32 max_queues = hdev->asic_prop.max_queues;
111362306a36Sopenharmony_ci	int i;
111462306a36Sopenharmony_ci
111562306a36Sopenharmony_ci	for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
111662306a36Sopenharmony_ci		queue_fini(hdev, q);
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ci	kfree(hdev->kernel_queues);
111962306a36Sopenharmony_ci}
112062306a36Sopenharmony_ci
112162306a36Sopenharmony_civoid hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
112262306a36Sopenharmony_ci{
112362306a36Sopenharmony_ci	struct hl_hw_queue *q;
112462306a36Sopenharmony_ci	u32 max_queues = hdev->asic_prop.max_queues;
112562306a36Sopenharmony_ci	int i;
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_ci	for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
112862306a36Sopenharmony_ci		if ((!q->valid) ||
112962306a36Sopenharmony_ci			((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
113062306a36Sopenharmony_ci			continue;
113162306a36Sopenharmony_ci		q->pi = 0;
113262306a36Sopenharmony_ci		atomic_set(&q->ci, 0);
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci		if (q->supports_sync_stream)
113562306a36Sopenharmony_ci			sync_stream_queue_reset(hdev, q->hw_queue_id);
113662306a36Sopenharmony_ci	}
113762306a36Sopenharmony_ci}
1138