162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci/* 462306a36Sopenharmony_ci * Copyright 2016-2019 HabanaLabs, Ltd. 562306a36Sopenharmony_ci * All Rights Reserved. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include "habanalabs.h" 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/slab.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci/* 1362306a36Sopenharmony_ci * hl_queue_add_ptr - add to pi or ci and checks if it wraps around 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * @ptr: the current pi/ci value 1662306a36Sopenharmony_ci * @val: the amount to add 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * Add val to ptr. It can go until twice the queue length. 1962306a36Sopenharmony_ci */ 2062306a36Sopenharmony_ciinline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val) 2162306a36Sopenharmony_ci{ 2262306a36Sopenharmony_ci ptr += val; 2362306a36Sopenharmony_ci ptr &= ((HL_QUEUE_LENGTH << 1) - 1); 2462306a36Sopenharmony_ci return ptr; 2562306a36Sopenharmony_ci} 2662306a36Sopenharmony_cistatic inline int queue_ci_get(atomic_t *ci, u32 queue_len) 2762306a36Sopenharmony_ci{ 2862306a36Sopenharmony_ci return atomic_read(ci) & ((queue_len << 1) - 1); 2962306a36Sopenharmony_ci} 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_cistatic inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len) 3262306a36Sopenharmony_ci{ 3362306a36Sopenharmony_ci int delta = (q->pi - queue_ci_get(&q->ci, queue_len)); 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci if (delta >= 0) 3662306a36Sopenharmony_ci return (queue_len - delta); 3762306a36Sopenharmony_ci else 3862306a36Sopenharmony_ci return (abs(delta) - queue_len); 3962306a36Sopenharmony_ci} 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_civoid hl_hw_queue_update_ci(struct hl_cs *cs) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci struct hl_device *hdev = cs->ctx->hdev; 4462306a36Sopenharmony_ci struct hl_hw_queue *q; 4562306a36Sopenharmony_ci int i; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci if (hdev->disabled) 4862306a36Sopenharmony_ci return; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci q = &hdev->kernel_queues[0]; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci /* There are no internal queues if H/W queues are being used */ 5362306a36Sopenharmony_ci if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW) 5462306a36Sopenharmony_ci return; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci /* We must increment CI for every queue that will never get a 5762306a36Sopenharmony_ci * completion, there are 2 scenarios this can happen: 5862306a36Sopenharmony_ci * 1. All queues of a non completion CS will never get a completion. 5962306a36Sopenharmony_ci * 2. Internal queues never gets completion. 6062306a36Sopenharmony_ci */ 6162306a36Sopenharmony_ci for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { 6262306a36Sopenharmony_ci if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT) 6362306a36Sopenharmony_ci atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci} 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci/* 6862306a36Sopenharmony_ci * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a 6962306a36Sopenharmony_ci * H/W queue. 7062306a36Sopenharmony_ci * @hdev: pointer to habanalabs device structure 7162306a36Sopenharmony_ci * @q: pointer to habanalabs queue structure 7262306a36Sopenharmony_ci * @ctl: BD's control word 7362306a36Sopenharmony_ci * @len: BD's length 7462306a36Sopenharmony_ci * @ptr: BD's pointer 7562306a36Sopenharmony_ci * 7662306a36Sopenharmony_ci * This function assumes there is enough space on the queue to submit a new 7762306a36Sopenharmony_ci * BD to it. It initializes the next BD and calls the device specific 7862306a36Sopenharmony_ci * function to set the pi (and doorbell) 7962306a36Sopenharmony_ci * 8062306a36Sopenharmony_ci * This function must be called when the scheduler mutex is taken 8162306a36Sopenharmony_ci * 8262306a36Sopenharmony_ci */ 8362306a36Sopenharmony_civoid hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, 8462306a36Sopenharmony_ci u32 ctl, u32 len, u64 ptr) 8562306a36Sopenharmony_ci{ 8662306a36Sopenharmony_ci struct hl_bd *bd; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci bd = q->kernel_address; 8962306a36Sopenharmony_ci bd += hl_pi_2_offset(q->pi); 9062306a36Sopenharmony_ci bd->ctl = cpu_to_le32(ctl); 9162306a36Sopenharmony_ci bd->len = cpu_to_le32(len); 9262306a36Sopenharmony_ci bd->ptr = cpu_to_le64(ptr); 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci q->pi = hl_queue_inc_ptr(q->pi); 9562306a36Sopenharmony_ci hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci/* 9962306a36Sopenharmony_ci * ext_queue_sanity_checks - perform some sanity checks on external queue 10062306a36Sopenharmony_ci * 10162306a36Sopenharmony_ci * @hdev : pointer to hl_device structure 10262306a36Sopenharmony_ci * @q : pointer to hl_hw_queue structure 10362306a36Sopenharmony_ci * @num_of_entries : how many entries to check for space 10462306a36Sopenharmony_ci * @reserve_cq_entry : whether to reserve an entry in the cq 10562306a36Sopenharmony_ci * 10662306a36Sopenharmony_ci * H/W queues spinlock should be taken before calling this function 10762306a36Sopenharmony_ci * 10862306a36Sopenharmony_ci * Perform the following: 10962306a36Sopenharmony_ci * - Make sure we have enough space in the h/w queue 11062306a36Sopenharmony_ci * - Make sure we have enough space in the completion queue 11162306a36Sopenharmony_ci * - Reserve space in the completion queue (needs to be reversed if there 11262306a36Sopenharmony_ci * is a failure down the road before the actual submission of work). Only 11362306a36Sopenharmony_ci * do this action if reserve_cq_entry is true 11462306a36Sopenharmony_ci * 11562306a36Sopenharmony_ci */ 11662306a36Sopenharmony_cistatic int ext_queue_sanity_checks(struct hl_device *hdev, 11762306a36Sopenharmony_ci struct hl_hw_queue *q, int num_of_entries, 11862306a36Sopenharmony_ci bool reserve_cq_entry) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci atomic_t *free_slots = 12162306a36Sopenharmony_ci &hdev->completion_queue[q->cq_id].free_slots_cnt; 12262306a36Sopenharmony_ci int free_slots_cnt; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci /* Check we have enough space in the queue */ 12562306a36Sopenharmony_ci free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if (free_slots_cnt < num_of_entries) { 12862306a36Sopenharmony_ci dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", 12962306a36Sopenharmony_ci q->hw_queue_id, num_of_entries); 13062306a36Sopenharmony_ci return -EAGAIN; 13162306a36Sopenharmony_ci } 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci if (reserve_cq_entry) { 13462306a36Sopenharmony_ci /* 13562306a36Sopenharmony_ci * Check we have enough space in the completion queue 13662306a36Sopenharmony_ci * Add -1 to counter (decrement) unless counter was already 0 13762306a36Sopenharmony_ci * In that case, CQ is full so we can't submit a new CB because 13862306a36Sopenharmony_ci * we won't get ack on its completion 13962306a36Sopenharmony_ci * atomic_add_unless will return 0 if counter was already 0 14062306a36Sopenharmony_ci */ 14162306a36Sopenharmony_ci if (atomic_add_negative(num_of_entries * -1, free_slots)) { 14262306a36Sopenharmony_ci dev_dbg(hdev->dev, "No space for %d on CQ %d\n", 14362306a36Sopenharmony_ci num_of_entries, q->hw_queue_id); 14462306a36Sopenharmony_ci atomic_add(num_of_entries, free_slots); 14562306a36Sopenharmony_ci return -EAGAIN; 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci } 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci return 0; 15062306a36Sopenharmony_ci} 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci/* 15362306a36Sopenharmony_ci * int_queue_sanity_checks - perform some sanity checks on internal queue 15462306a36Sopenharmony_ci * 15562306a36Sopenharmony_ci * @hdev : pointer to hl_device structure 15662306a36Sopenharmony_ci * @q : pointer to hl_hw_queue structure 15762306a36Sopenharmony_ci * @num_of_entries : how many entries to check for space 15862306a36Sopenharmony_ci * 15962306a36Sopenharmony_ci * H/W queues spinlock should be taken before calling this function 16062306a36Sopenharmony_ci * 16162306a36Sopenharmony_ci * Perform the following: 16262306a36Sopenharmony_ci * - Make sure we have enough space in the h/w queue 16362306a36Sopenharmony_ci * 16462306a36Sopenharmony_ci */ 16562306a36Sopenharmony_cistatic int int_queue_sanity_checks(struct hl_device *hdev, 16662306a36Sopenharmony_ci struct hl_hw_queue *q, 16762306a36Sopenharmony_ci int num_of_entries) 16862306a36Sopenharmony_ci{ 16962306a36Sopenharmony_ci int free_slots_cnt; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci if (num_of_entries > q->int_queue_len) { 17262306a36Sopenharmony_ci dev_err(hdev->dev, 17362306a36Sopenharmony_ci "Cannot populate queue %u with %u jobs\n", 17462306a36Sopenharmony_ci q->hw_queue_id, num_of_entries); 17562306a36Sopenharmony_ci return -ENOMEM; 17662306a36Sopenharmony_ci } 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci /* Check we have enough space in the queue */ 17962306a36Sopenharmony_ci free_slots_cnt = queue_free_slots(q, q->int_queue_len); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci if (free_slots_cnt < num_of_entries) { 18262306a36Sopenharmony_ci dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", 18362306a36Sopenharmony_ci q->hw_queue_id, num_of_entries); 18462306a36Sopenharmony_ci return -EAGAIN; 18562306a36Sopenharmony_ci } 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci return 0; 18862306a36Sopenharmony_ci} 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci/* 19162306a36Sopenharmony_ci * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue 19262306a36Sopenharmony_ci * @hdev: Pointer to hl_device structure. 19362306a36Sopenharmony_ci * @q: Pointer to hl_hw_queue structure. 19462306a36Sopenharmony_ci * @num_of_entries: How many entries to check for space. 19562306a36Sopenharmony_ci * 19662306a36Sopenharmony_ci * Notice: We do not reserve queue entries so this function mustn't be called 19762306a36Sopenharmony_ci * more than once per CS for the same queue 19862306a36Sopenharmony_ci * 19962306a36Sopenharmony_ci */ 20062306a36Sopenharmony_cistatic int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, 20162306a36Sopenharmony_ci int num_of_entries) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci int free_slots_cnt; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci /* Check we have enough space in the queue */ 20662306a36Sopenharmony_ci free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci if (free_slots_cnt < num_of_entries) { 20962306a36Sopenharmony_ci dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", 21062306a36Sopenharmony_ci q->hw_queue_id, num_of_entries); 21162306a36Sopenharmony_ci return -EAGAIN; 21262306a36Sopenharmony_ci } 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci return 0; 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci/* 21862306a36Sopenharmony_ci * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion 21962306a36Sopenharmony_ci * 22062306a36Sopenharmony_ci * @hdev: pointer to hl_device structure 22162306a36Sopenharmony_ci * @hw_queue_id: Queue's type 22262306a36Sopenharmony_ci * @cb_size: size of CB 22362306a36Sopenharmony_ci * @cb_ptr: pointer to CB location 22462306a36Sopenharmony_ci * 22562306a36Sopenharmony_ci * This function sends a single CB, that must NOT generate a completion entry. 22662306a36Sopenharmony_ci * Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()' 22762306a36Sopenharmony_ci */ 22862306a36Sopenharmony_ciint hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, 22962306a36Sopenharmony_ci u32 cb_size, u64 cb_ptr) 23062306a36Sopenharmony_ci{ 23162306a36Sopenharmony_ci struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; 23262306a36Sopenharmony_ci int rc = 0; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_lock(hdev); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci if (hdev->disabled) { 23762306a36Sopenharmony_ci rc = -EPERM; 23862306a36Sopenharmony_ci goto out; 23962306a36Sopenharmony_ci } 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci /* 24262306a36Sopenharmony_ci * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue 24362306a36Sopenharmony_ci * type only on init phase, when the queues are empty and being tested, 24462306a36Sopenharmony_ci * so there is no need for sanity checks. 24562306a36Sopenharmony_ci */ 24662306a36Sopenharmony_ci if (q->queue_type != QUEUE_TYPE_HW) { 24762306a36Sopenharmony_ci rc = ext_queue_sanity_checks(hdev, q, 1, false); 24862306a36Sopenharmony_ci if (rc) 24962306a36Sopenharmony_ci goto out; 25062306a36Sopenharmony_ci } 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ciout: 25562306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_unlock(hdev); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci return rc; 25862306a36Sopenharmony_ci} 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci/* 26162306a36Sopenharmony_ci * ext_queue_schedule_job - submit a JOB to an external queue 26262306a36Sopenharmony_ci * 26362306a36Sopenharmony_ci * @job: pointer to the job that needs to be submitted to the queue 26462306a36Sopenharmony_ci * 26562306a36Sopenharmony_ci * This function must be called when the scheduler mutex is taken 26662306a36Sopenharmony_ci * 26762306a36Sopenharmony_ci */ 26862306a36Sopenharmony_cistatic void ext_queue_schedule_job(struct hl_cs_job *job) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci struct hl_device *hdev = job->cs->ctx->hdev; 27162306a36Sopenharmony_ci struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; 27262306a36Sopenharmony_ci struct hl_cq_entry cq_pkt; 27362306a36Sopenharmony_ci struct hl_cq *cq; 27462306a36Sopenharmony_ci u64 cq_addr; 27562306a36Sopenharmony_ci struct hl_cb *cb; 27662306a36Sopenharmony_ci u32 ctl; 27762306a36Sopenharmony_ci u32 len; 27862306a36Sopenharmony_ci u64 ptr; 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci /* 28162306a36Sopenharmony_ci * Update the JOB ID inside the BD CTL so the device would know what 28262306a36Sopenharmony_ci * to write in the completion queue 28362306a36Sopenharmony_ci */ 28462306a36Sopenharmony_ci ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK); 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci cb = job->patched_cb; 28762306a36Sopenharmony_ci len = job->job_cb_size; 28862306a36Sopenharmony_ci ptr = cb->bus_address; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci /* Skip completion flow in case this is a non completion CS */ 29162306a36Sopenharmony_ci if (!cs_needs_completion(job->cs)) 29262306a36Sopenharmony_ci goto submit_bd; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci cq_pkt.data = cpu_to_le32( 29562306a36Sopenharmony_ci ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) 29662306a36Sopenharmony_ci & CQ_ENTRY_SHADOW_INDEX_MASK) | 29762306a36Sopenharmony_ci FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) | 29862306a36Sopenharmony_ci FIELD_PREP(CQ_ENTRY_READY_MASK, 1)); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci /* 30162306a36Sopenharmony_ci * No need to protect pi_offset because scheduling to the 30262306a36Sopenharmony_ci * H/W queues is done under the scheduler mutex 30362306a36Sopenharmony_ci * 30462306a36Sopenharmony_ci * No need to check if CQ is full because it was already 30562306a36Sopenharmony_ci * checked in ext_queue_sanity_checks 30662306a36Sopenharmony_ci */ 30762306a36Sopenharmony_ci cq = &hdev->completion_queue[q->cq_id]; 30862306a36Sopenharmony_ci cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, 31162306a36Sopenharmony_ci job->user_cb_size, 31262306a36Sopenharmony_ci cq_addr, 31362306a36Sopenharmony_ci le32_to_cpu(cq_pkt.data), 31462306a36Sopenharmony_ci q->msi_vec, 31562306a36Sopenharmony_ci job->contains_dma_pkt); 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci q->shadow_queue[hl_pi_2_offset(q->pi)] = job; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci cq->pi = hl_cq_inc_ptr(cq->pi); 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_cisubmit_bd: 32262306a36Sopenharmony_ci hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr); 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci/* 32662306a36Sopenharmony_ci * int_queue_schedule_job - submit a JOB to an internal queue 32762306a36Sopenharmony_ci * 32862306a36Sopenharmony_ci * @job: pointer to the job that needs to be submitted to the queue 32962306a36Sopenharmony_ci * 33062306a36Sopenharmony_ci * This function must be called when the scheduler mutex is taken 33162306a36Sopenharmony_ci * 33262306a36Sopenharmony_ci */ 33362306a36Sopenharmony_cistatic void int_queue_schedule_job(struct hl_cs_job *job) 33462306a36Sopenharmony_ci{ 33562306a36Sopenharmony_ci struct hl_device *hdev = job->cs->ctx->hdev; 33662306a36Sopenharmony_ci struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; 33762306a36Sopenharmony_ci struct hl_bd bd; 33862306a36Sopenharmony_ci __le64 *pi; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci bd.ctl = 0; 34162306a36Sopenharmony_ci bd.len = cpu_to_le32(job->job_cb_size); 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci if (job->is_kernel_allocated_cb) 34462306a36Sopenharmony_ci /* bus_address is actually a mmu mapped address 34562306a36Sopenharmony_ci * allocated from an internal pool 34662306a36Sopenharmony_ci */ 34762306a36Sopenharmony_ci bd.ptr = cpu_to_le64(job->user_cb->bus_address); 34862306a36Sopenharmony_ci else 34962306a36Sopenharmony_ci bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci q->pi++; 35462306a36Sopenharmony_ci q->pi &= ((q->int_queue_len << 1) - 1); 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci hdev->asic_funcs->pqe_write(hdev, pi, &bd); 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); 35962306a36Sopenharmony_ci} 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci/* 36262306a36Sopenharmony_ci * hw_queue_schedule_job - submit a JOB to a H/W queue 36362306a36Sopenharmony_ci * 36462306a36Sopenharmony_ci * @job: pointer to the job that needs to be submitted to the queue 36562306a36Sopenharmony_ci * 36662306a36Sopenharmony_ci * This function must be called when the scheduler mutex is taken 36762306a36Sopenharmony_ci * 36862306a36Sopenharmony_ci */ 36962306a36Sopenharmony_cistatic void hw_queue_schedule_job(struct hl_cs_job *job) 37062306a36Sopenharmony_ci{ 37162306a36Sopenharmony_ci struct hl_device *hdev = job->cs->ctx->hdev; 37262306a36Sopenharmony_ci struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; 37362306a36Sopenharmony_ci u64 ptr; 37462306a36Sopenharmony_ci u32 offset, ctl, len; 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci /* 37762306a36Sopenharmony_ci * Upon PQE completion, COMP_DATA is used as the write data to the 37862306a36Sopenharmony_ci * completion queue (QMAN HBW message), and COMP_OFFSET is used as the 37962306a36Sopenharmony_ci * write address offset in the SM block (QMAN LBW message). 38062306a36Sopenharmony_ci * The write address offset is calculated as "COMP_OFFSET << 2". 38162306a36Sopenharmony_ci */ 38262306a36Sopenharmony_ci offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1); 38362306a36Sopenharmony_ci ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) | 38462306a36Sopenharmony_ci ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci len = job->job_cb_size; 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci /* 38962306a36Sopenharmony_ci * A patched CB is created only if a user CB was allocated by driver and 39062306a36Sopenharmony_ci * MMU is disabled. If MMU is enabled, the user CB should be used 39162306a36Sopenharmony_ci * instead. If the user CB wasn't allocated by driver, assume that it 39262306a36Sopenharmony_ci * holds an address. 39362306a36Sopenharmony_ci */ 39462306a36Sopenharmony_ci if (job->patched_cb) 39562306a36Sopenharmony_ci ptr = job->patched_cb->bus_address; 39662306a36Sopenharmony_ci else if (job->is_kernel_allocated_cb) 39762306a36Sopenharmony_ci ptr = job->user_cb->bus_address; 39862306a36Sopenharmony_ci else 39962306a36Sopenharmony_ci ptr = (u64) (uintptr_t) job->user_cb; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr); 40262306a36Sopenharmony_ci} 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_cistatic int init_signal_cs(struct hl_device *hdev, 40562306a36Sopenharmony_ci struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 40662306a36Sopenharmony_ci{ 40762306a36Sopenharmony_ci struct hl_sync_stream_properties *prop; 40862306a36Sopenharmony_ci struct hl_hw_sob *hw_sob; 40962306a36Sopenharmony_ci u32 q_idx; 41062306a36Sopenharmony_ci int rc = 0; 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci q_idx = job->hw_queue_id; 41362306a36Sopenharmony_ci prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 41462306a36Sopenharmony_ci hw_sob = &prop->hw_sob[prop->curr_sob_offset]; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci cs_cmpl->hw_sob = hw_sob; 41762306a36Sopenharmony_ci cs_cmpl->sob_val = prop->next_sob_val; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci dev_dbg(hdev->dev, 42062306a36Sopenharmony_ci "generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n", 42162306a36Sopenharmony_ci cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx, 42262306a36Sopenharmony_ci cs_cmpl->cs_seq); 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci /* we set an EB since we must make sure all oeprations are done 42562306a36Sopenharmony_ci * when sending the signal 42662306a36Sopenharmony_ci */ 42762306a36Sopenharmony_ci hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, 42862306a36Sopenharmony_ci cs_cmpl->hw_sob->sob_id, 0, true); 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1, 43162306a36Sopenharmony_ci false); 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci job->cs->sob_addr_offset = hw_sob->sob_addr; 43462306a36Sopenharmony_ci job->cs->initial_sob_count = prop->next_sob_val - 1; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci return rc; 43762306a36Sopenharmony_ci} 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_civoid hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev, 44062306a36Sopenharmony_ci struct hl_cs *cs, struct hl_cs_job *job, 44162306a36Sopenharmony_ci struct hl_cs_compl *cs_cmpl) 44262306a36Sopenharmony_ci{ 44362306a36Sopenharmony_ci struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 44462306a36Sopenharmony_ci u32 offset = 0; 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci cs_cmpl->hw_sob = handle->hw_sob; 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci /* Note that encaps_sig_wait_offset was validated earlier in the flow 44962306a36Sopenharmony_ci * for offset value which exceeds the max reserved signal count. 45062306a36Sopenharmony_ci * always decrement 1 of the offset since when the user 45162306a36Sopenharmony_ci * set offset 1 for example he mean to wait only for the first 45262306a36Sopenharmony_ci * signal only, which will be pre_sob_val, and if he set offset 2 45362306a36Sopenharmony_ci * then the value required is (pre_sob_val + 1) and so on... 45462306a36Sopenharmony_ci * if user set wait offset to 0, then treat it as legacy wait cs, 45562306a36Sopenharmony_ci * wait for the next signal. 45662306a36Sopenharmony_ci */ 45762306a36Sopenharmony_ci if (job->encaps_sig_wait_offset) 45862306a36Sopenharmony_ci offset = job->encaps_sig_wait_offset - 1; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci cs_cmpl->sob_val = handle->pre_sob_val + offset; 46162306a36Sopenharmony_ci} 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_cistatic int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, 46462306a36Sopenharmony_ci struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 46562306a36Sopenharmony_ci{ 46662306a36Sopenharmony_ci struct hl_gen_wait_properties wait_prop; 46762306a36Sopenharmony_ci struct hl_sync_stream_properties *prop; 46862306a36Sopenharmony_ci struct hl_cs_compl *signal_cs_cmpl; 46962306a36Sopenharmony_ci u32 q_idx; 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci q_idx = job->hw_queue_id; 47262306a36Sopenharmony_ci prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci signal_cs_cmpl = container_of(cs->signal_fence, 47562306a36Sopenharmony_ci struct hl_cs_compl, 47662306a36Sopenharmony_ci base_fence); 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci if (cs->encaps_signals) { 47962306a36Sopenharmony_ci /* use the encaps signal handle stored earlier in the flow 48062306a36Sopenharmony_ci * and set the SOB information from the encaps 48162306a36Sopenharmony_ci * signals handle 48262306a36Sopenharmony_ci */ 48362306a36Sopenharmony_ci hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl); 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n", 48662306a36Sopenharmony_ci cs->encaps_sig_hdl->q_idx, 48762306a36Sopenharmony_ci cs->encaps_sig_hdl->cs_seq, 48862306a36Sopenharmony_ci cs_cmpl->sob_val, 48962306a36Sopenharmony_ci job->encaps_sig_wait_offset); 49062306a36Sopenharmony_ci } else { 49162306a36Sopenharmony_ci /* Copy the SOB id and value of the signal CS */ 49262306a36Sopenharmony_ci cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 49362306a36Sopenharmony_ci cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 49462306a36Sopenharmony_ci } 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci /* check again if the signal cs already completed. 49762306a36Sopenharmony_ci * if yes then don't send any wait cs since the hw_sob 49862306a36Sopenharmony_ci * could be in reset already. if signal is not completed 49962306a36Sopenharmony_ci * then get refcount to hw_sob to prevent resetting the sob 50062306a36Sopenharmony_ci * while wait cs is not submitted. 50162306a36Sopenharmony_ci * note that this check is protected by two locks, 50262306a36Sopenharmony_ci * hw queue lock and completion object lock, 50362306a36Sopenharmony_ci * and the same completion object lock also protects 50462306a36Sopenharmony_ci * the hw_sob reset handler function. 50562306a36Sopenharmony_ci * The hw_queue lock prevent out of sync of hw_sob 50662306a36Sopenharmony_ci * refcount value, changed by signal/wait flows. 50762306a36Sopenharmony_ci */ 50862306a36Sopenharmony_ci spin_lock(&signal_cs_cmpl->lock); 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci if (completion_done(&cs->signal_fence->completion)) { 51162306a36Sopenharmony_ci spin_unlock(&signal_cs_cmpl->lock); 51262306a36Sopenharmony_ci return -EINVAL; 51362306a36Sopenharmony_ci } 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci kref_get(&cs_cmpl->hw_sob->kref); 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci spin_unlock(&signal_cs_cmpl->lock); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci dev_dbg(hdev->dev, 52062306a36Sopenharmony_ci "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n", 52162306a36Sopenharmony_ci cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 52262306a36Sopenharmony_ci prop->base_mon_id, q_idx, cs->sequence); 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci wait_prop.data = (void *) job->patched_cb; 52562306a36Sopenharmony_ci wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 52662306a36Sopenharmony_ci wait_prop.sob_mask = 0x1; 52762306a36Sopenharmony_ci wait_prop.sob_val = cs_cmpl->sob_val; 52862306a36Sopenharmony_ci wait_prop.mon_id = prop->base_mon_id; 52962306a36Sopenharmony_ci wait_prop.q_idx = q_idx; 53062306a36Sopenharmony_ci wait_prop.size = 0; 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop); 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci mb(); 53562306a36Sopenharmony_ci hl_fence_put(cs->signal_fence); 53662306a36Sopenharmony_ci cs->signal_fence = NULL; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci return 0; 53962306a36Sopenharmony_ci} 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci/* 54262306a36Sopenharmony_ci * init_signal_wait_cs - initialize a signal/wait CS 54362306a36Sopenharmony_ci * @cs: pointer to the signal/wait CS 54462306a36Sopenharmony_ci * 54562306a36Sopenharmony_ci * H/W queues spinlock should be taken before calling this function 54662306a36Sopenharmony_ci */ 54762306a36Sopenharmony_cistatic int init_signal_wait_cs(struct hl_cs *cs) 54862306a36Sopenharmony_ci{ 54962306a36Sopenharmony_ci struct hl_ctx *ctx = cs->ctx; 55062306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 55162306a36Sopenharmony_ci struct hl_cs_job *job; 55262306a36Sopenharmony_ci struct hl_cs_compl *cs_cmpl = 55362306a36Sopenharmony_ci container_of(cs->fence, struct hl_cs_compl, base_fence); 55462306a36Sopenharmony_ci int rc = 0; 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci /* There is only one job in a signal/wait CS */ 55762306a36Sopenharmony_ci job = list_first_entry(&cs->job_list, struct hl_cs_job, 55862306a36Sopenharmony_ci cs_node); 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci if (cs->type & CS_TYPE_SIGNAL) 56162306a36Sopenharmony_ci rc = init_signal_cs(hdev, job, cs_cmpl); 56262306a36Sopenharmony_ci else if (cs->type & CS_TYPE_WAIT) 56362306a36Sopenharmony_ci rc = init_wait_cs(hdev, cs, job, cs_cmpl); 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci return rc; 56662306a36Sopenharmony_ci} 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_cistatic int encaps_sig_first_staged_cs_handler 56962306a36Sopenharmony_ci (struct hl_device *hdev, struct hl_cs *cs) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci struct hl_cs_compl *cs_cmpl = 57262306a36Sopenharmony_ci container_of(cs->fence, 57362306a36Sopenharmony_ci struct hl_cs_compl, base_fence); 57462306a36Sopenharmony_ci struct hl_cs_encaps_sig_handle *encaps_sig_hdl; 57562306a36Sopenharmony_ci struct hl_encaps_signals_mgr *mgr; 57662306a36Sopenharmony_ci int rc = 0; 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci mgr = &cs->ctx->sig_mgr; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci spin_lock(&mgr->lock); 58162306a36Sopenharmony_ci encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id); 58262306a36Sopenharmony_ci if (encaps_sig_hdl) { 58362306a36Sopenharmony_ci /* 58462306a36Sopenharmony_ci * Set handler CS sequence, 58562306a36Sopenharmony_ci * the CS which contains the encapsulated signals. 58662306a36Sopenharmony_ci */ 58762306a36Sopenharmony_ci encaps_sig_hdl->cs_seq = cs->sequence; 58862306a36Sopenharmony_ci /* store the handle and set encaps signal indication, 58962306a36Sopenharmony_ci * to be used later in cs_do_release to put the last 59062306a36Sopenharmony_ci * reference to encaps signals handlers. 59162306a36Sopenharmony_ci */ 59262306a36Sopenharmony_ci cs_cmpl->encaps_signals = true; 59362306a36Sopenharmony_ci cs_cmpl->encaps_sig_hdl = encaps_sig_hdl; 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ci /* set hw_sob pointer in completion object 59662306a36Sopenharmony_ci * since it's used in cs_do_release flow to put 59762306a36Sopenharmony_ci * refcount to sob 59862306a36Sopenharmony_ci */ 59962306a36Sopenharmony_ci cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob; 60062306a36Sopenharmony_ci cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val + 60162306a36Sopenharmony_ci encaps_sig_hdl->count; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n", 60462306a36Sopenharmony_ci cs->sequence, encaps_sig_hdl->id, 60562306a36Sopenharmony_ci encaps_sig_hdl->count, 60662306a36Sopenharmony_ci encaps_sig_hdl->q_idx, 60762306a36Sopenharmony_ci cs_cmpl->hw_sob->sob_id, 60862306a36Sopenharmony_ci cs_cmpl->sob_val); 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci } else { 61162306a36Sopenharmony_ci dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n", 61262306a36Sopenharmony_ci cs->encaps_sig_hdl_id); 61362306a36Sopenharmony_ci rc = -EINVAL; 61462306a36Sopenharmony_ci } 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci spin_unlock(&mgr->lock); 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci return rc; 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci/* 62262306a36Sopenharmony_ci * hl_hw_queue_schedule_cs - schedule a command submission 62362306a36Sopenharmony_ci * @cs: pointer to the CS 62462306a36Sopenharmony_ci */ 62562306a36Sopenharmony_ciint hl_hw_queue_schedule_cs(struct hl_cs *cs) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci enum hl_device_status status; 62862306a36Sopenharmony_ci struct hl_cs_counters_atomic *cntr; 62962306a36Sopenharmony_ci struct hl_ctx *ctx = cs->ctx; 63062306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 63162306a36Sopenharmony_ci struct hl_cs_job *job, *tmp; 63262306a36Sopenharmony_ci struct hl_hw_queue *q; 63362306a36Sopenharmony_ci int rc = 0, i, cq_cnt; 63462306a36Sopenharmony_ci bool first_entry; 63562306a36Sopenharmony_ci u32 max_queues; 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci cntr = &hdev->aggregated_cs_counters; 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_lock(hdev); 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci if (!hl_device_operational(hdev, &status)) { 64262306a36Sopenharmony_ci atomic64_inc(&cntr->device_in_reset_drop_cnt); 64362306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt); 64462306a36Sopenharmony_ci dev_err(hdev->dev, 64562306a36Sopenharmony_ci "device is %s, CS rejected!\n", hdev->status[status]); 64662306a36Sopenharmony_ci rc = -EPERM; 64762306a36Sopenharmony_ci goto out; 64862306a36Sopenharmony_ci } 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci max_queues = hdev->asic_prop.max_queues; 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci q = &hdev->kernel_queues[0]; 65362306a36Sopenharmony_ci for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) { 65462306a36Sopenharmony_ci if (cs->jobs_in_queue_cnt[i]) { 65562306a36Sopenharmony_ci switch (q->queue_type) { 65662306a36Sopenharmony_ci case QUEUE_TYPE_EXT: 65762306a36Sopenharmony_ci rc = ext_queue_sanity_checks(hdev, q, 65862306a36Sopenharmony_ci cs->jobs_in_queue_cnt[i], 65962306a36Sopenharmony_ci cs_needs_completion(cs) ? 66062306a36Sopenharmony_ci true : false); 66162306a36Sopenharmony_ci break; 66262306a36Sopenharmony_ci case QUEUE_TYPE_INT: 66362306a36Sopenharmony_ci rc = int_queue_sanity_checks(hdev, q, 66462306a36Sopenharmony_ci cs->jobs_in_queue_cnt[i]); 66562306a36Sopenharmony_ci break; 66662306a36Sopenharmony_ci case QUEUE_TYPE_HW: 66762306a36Sopenharmony_ci rc = hw_queue_sanity_checks(hdev, q, 66862306a36Sopenharmony_ci cs->jobs_in_queue_cnt[i]); 66962306a36Sopenharmony_ci break; 67062306a36Sopenharmony_ci default: 67162306a36Sopenharmony_ci dev_err(hdev->dev, "Queue type %d is invalid\n", 67262306a36Sopenharmony_ci q->queue_type); 67362306a36Sopenharmony_ci rc = -EINVAL; 67462306a36Sopenharmony_ci break; 67562306a36Sopenharmony_ci } 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci if (rc) { 67862306a36Sopenharmony_ci atomic64_inc( 67962306a36Sopenharmony_ci &ctx->cs_counters.queue_full_drop_cnt); 68062306a36Sopenharmony_ci atomic64_inc(&cntr->queue_full_drop_cnt); 68162306a36Sopenharmony_ci goto unroll_cq_resv; 68262306a36Sopenharmony_ci } 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci if (q->queue_type == QUEUE_TYPE_EXT) 68562306a36Sopenharmony_ci cq_cnt++; 68662306a36Sopenharmony_ci } 68762306a36Sopenharmony_ci } 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) { 69062306a36Sopenharmony_ci rc = init_signal_wait_cs(cs); 69162306a36Sopenharmony_ci if (rc) 69262306a36Sopenharmony_ci goto unroll_cq_resv; 69362306a36Sopenharmony_ci } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) { 69462306a36Sopenharmony_ci rc = hdev->asic_funcs->collective_wait_init_cs(cs); 69562306a36Sopenharmony_ci if (rc) 69662306a36Sopenharmony_ci goto unroll_cq_resv; 69762306a36Sopenharmony_ci } 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci rc = hdev->asic_funcs->pre_schedule_cs(cs); 70062306a36Sopenharmony_ci if (rc) { 70162306a36Sopenharmony_ci dev_err(hdev->dev, 70262306a36Sopenharmony_ci "Failed in pre-submission operations of CS %d.%llu\n", 70362306a36Sopenharmony_ci ctx->asid, cs->sequence); 70462306a36Sopenharmony_ci goto unroll_cq_resv; 70562306a36Sopenharmony_ci } 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci hdev->shadow_cs_queue[cs->sequence & 70862306a36Sopenharmony_ci (hdev->asic_prop.max_pending_cs - 1)] = cs; 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci if (cs->encaps_signals && cs->staged_first) { 71162306a36Sopenharmony_ci rc = encaps_sig_first_staged_cs_handler(hdev, cs); 71262306a36Sopenharmony_ci if (rc) 71362306a36Sopenharmony_ci goto unroll_cq_resv; 71462306a36Sopenharmony_ci } 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci spin_lock(&hdev->cs_mirror_lock); 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci /* Verify staged CS exists and add to the staged list */ 71962306a36Sopenharmony_ci if (cs->staged_cs && !cs->staged_first) { 72062306a36Sopenharmony_ci struct hl_cs *staged_cs; 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); 72362306a36Sopenharmony_ci if (!staged_cs) { 72462306a36Sopenharmony_ci dev_err(hdev->dev, 72562306a36Sopenharmony_ci "Cannot find staged submission sequence %llu", 72662306a36Sopenharmony_ci cs->staged_sequence); 72762306a36Sopenharmony_ci rc = -EINVAL; 72862306a36Sopenharmony_ci goto unlock_cs_mirror; 72962306a36Sopenharmony_ci } 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci if (is_staged_cs_last_exists(hdev, staged_cs)) { 73262306a36Sopenharmony_ci dev_err(hdev->dev, 73362306a36Sopenharmony_ci "Staged submission sequence %llu already submitted", 73462306a36Sopenharmony_ci cs->staged_sequence); 73562306a36Sopenharmony_ci rc = -EINVAL; 73662306a36Sopenharmony_ci goto unlock_cs_mirror; 73762306a36Sopenharmony_ci } 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node); 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci /* update stream map of the first CS */ 74262306a36Sopenharmony_ci if (hdev->supports_wait_for_multi_cs) 74362306a36Sopenharmony_ci staged_cs->fence->stream_master_qid_map |= 74462306a36Sopenharmony_ci cs->fence->stream_master_qid_map; 74562306a36Sopenharmony_ci } 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list); 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci /* Queue TDR if the CS is the first entry and if timeout is wanted */ 75062306a36Sopenharmony_ci first_entry = list_first_entry(&hdev->cs_mirror_list, 75162306a36Sopenharmony_ci struct hl_cs, mirror_node) == cs; 75262306a36Sopenharmony_ci if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && 75362306a36Sopenharmony_ci first_entry && cs_needs_timeout(cs)) { 75462306a36Sopenharmony_ci cs->tdr_active = true; 75562306a36Sopenharmony_ci schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies); 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci } 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci spin_unlock(&hdev->cs_mirror_lock); 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 76262306a36Sopenharmony_ci switch (job->queue_type) { 76362306a36Sopenharmony_ci case QUEUE_TYPE_EXT: 76462306a36Sopenharmony_ci ext_queue_schedule_job(job); 76562306a36Sopenharmony_ci break; 76662306a36Sopenharmony_ci case QUEUE_TYPE_INT: 76762306a36Sopenharmony_ci int_queue_schedule_job(job); 76862306a36Sopenharmony_ci break; 76962306a36Sopenharmony_ci case QUEUE_TYPE_HW: 77062306a36Sopenharmony_ci hw_queue_schedule_job(job); 77162306a36Sopenharmony_ci break; 77262306a36Sopenharmony_ci default: 77362306a36Sopenharmony_ci break; 77462306a36Sopenharmony_ci } 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci cs->submitted = true; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci goto out; 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_ciunlock_cs_mirror: 78162306a36Sopenharmony_ci spin_unlock(&hdev->cs_mirror_lock); 78262306a36Sopenharmony_ciunroll_cq_resv: 78362306a36Sopenharmony_ci q = &hdev->kernel_queues[0]; 78462306a36Sopenharmony_ci for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { 78562306a36Sopenharmony_ci if ((q->queue_type == QUEUE_TYPE_EXT) && 78662306a36Sopenharmony_ci (cs->jobs_in_queue_cnt[i])) { 78762306a36Sopenharmony_ci atomic_t *free_slots = 78862306a36Sopenharmony_ci &hdev->completion_queue[i].free_slots_cnt; 78962306a36Sopenharmony_ci atomic_add(cs->jobs_in_queue_cnt[i], free_slots); 79062306a36Sopenharmony_ci cq_cnt--; 79162306a36Sopenharmony_ci } 79262306a36Sopenharmony_ci } 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ciout: 79562306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_unlock(hdev); 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci return rc; 79862306a36Sopenharmony_ci} 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci/* 80162306a36Sopenharmony_ci * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue 80262306a36Sopenharmony_ci * 80362306a36Sopenharmony_ci * @hdev: pointer to hl_device structure 80462306a36Sopenharmony_ci * @hw_queue_id: which queue to increment its ci 80562306a36Sopenharmony_ci */ 80662306a36Sopenharmony_civoid hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) 80762306a36Sopenharmony_ci{ 80862306a36Sopenharmony_ci struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci atomic_inc(&q->ci); 81162306a36Sopenharmony_ci} 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_cistatic int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, 81462306a36Sopenharmony_ci bool is_cpu_queue) 81562306a36Sopenharmony_ci{ 81662306a36Sopenharmony_ci void *p; 81762306a36Sopenharmony_ci int rc; 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci if (is_cpu_queue) 82062306a36Sopenharmony_ci p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address); 82162306a36Sopenharmony_ci else 82262306a36Sopenharmony_ci p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address, 82362306a36Sopenharmony_ci GFP_KERNEL | __GFP_ZERO); 82462306a36Sopenharmony_ci if (!p) 82562306a36Sopenharmony_ci return -ENOMEM; 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci q->kernel_address = p; 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL); 83062306a36Sopenharmony_ci if (!q->shadow_queue) { 83162306a36Sopenharmony_ci dev_err(hdev->dev, 83262306a36Sopenharmony_ci "Failed to allocate shadow queue for H/W queue %d\n", 83362306a36Sopenharmony_ci q->hw_queue_id); 83462306a36Sopenharmony_ci rc = -ENOMEM; 83562306a36Sopenharmony_ci goto free_queue; 83662306a36Sopenharmony_ci } 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci /* Make sure read/write pointers are initialized to start of queue */ 83962306a36Sopenharmony_ci atomic_set(&q->ci, 0); 84062306a36Sopenharmony_ci q->pi = 0; 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci return 0; 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_cifree_queue: 84562306a36Sopenharmony_ci if (is_cpu_queue) 84662306a36Sopenharmony_ci hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address); 84762306a36Sopenharmony_ci else 84862306a36Sopenharmony_ci hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address, 84962306a36Sopenharmony_ci q->bus_address); 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_ci return rc; 85262306a36Sopenharmony_ci} 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_cistatic int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) 85562306a36Sopenharmony_ci{ 85662306a36Sopenharmony_ci void *p; 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id, 85962306a36Sopenharmony_ci &q->bus_address, &q->int_queue_len); 86062306a36Sopenharmony_ci if (!p) { 86162306a36Sopenharmony_ci dev_err(hdev->dev, 86262306a36Sopenharmony_ci "Failed to get base address for internal queue %d\n", 86362306a36Sopenharmony_ci q->hw_queue_id); 86462306a36Sopenharmony_ci return -EFAULT; 86562306a36Sopenharmony_ci } 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci q->kernel_address = p; 86862306a36Sopenharmony_ci q->pi = 0; 86962306a36Sopenharmony_ci atomic_set(&q->ci, 0); 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci return 0; 87262306a36Sopenharmony_ci} 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_cistatic int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) 87562306a36Sopenharmony_ci{ 87662306a36Sopenharmony_ci return ext_and_cpu_queue_init(hdev, q, true); 87762306a36Sopenharmony_ci} 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_cistatic int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) 88062306a36Sopenharmony_ci{ 88162306a36Sopenharmony_ci return ext_and_cpu_queue_init(hdev, q, false); 88262306a36Sopenharmony_ci} 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_cistatic int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) 88562306a36Sopenharmony_ci{ 88662306a36Sopenharmony_ci void *p; 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address, 88962306a36Sopenharmony_ci GFP_KERNEL | __GFP_ZERO); 89062306a36Sopenharmony_ci if (!p) 89162306a36Sopenharmony_ci return -ENOMEM; 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci q->kernel_address = p; 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci /* Make sure read/write pointers are initialized to start of queue */ 89662306a36Sopenharmony_ci atomic_set(&q->ci, 0); 89762306a36Sopenharmony_ci q->pi = 0; 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci return 0; 90062306a36Sopenharmony_ci} 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_cistatic void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) 90362306a36Sopenharmony_ci{ 90462306a36Sopenharmony_ci struct hl_sync_stream_properties *sync_stream_prop; 90562306a36Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 90662306a36Sopenharmony_ci struct hl_hw_sob *hw_sob; 90762306a36Sopenharmony_ci int sob, reserved_mon_idx, queue_idx; 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci /* We use 'collective_mon_idx' as a running index in order to reserve 91262306a36Sopenharmony_ci * monitors for collective master/slave queues. 91362306a36Sopenharmony_ci * collective master queue gets 2 reserved monitors 91462306a36Sopenharmony_ci * collective slave queue gets 1 reserved monitor 91562306a36Sopenharmony_ci */ 91662306a36Sopenharmony_ci if (hdev->kernel_queues[q_idx].collective_mode == 91762306a36Sopenharmony_ci HL_COLLECTIVE_MASTER) { 91862306a36Sopenharmony_ci reserved_mon_idx = hdev->collective_mon_idx; 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci /* reserve the first monitor for collective master queue */ 92162306a36Sopenharmony_ci sync_stream_prop->collective_mstr_mon_id[0] = 92262306a36Sopenharmony_ci prop->collective_first_mon + reserved_mon_idx; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci /* reserve the second monitor for collective master queue */ 92562306a36Sopenharmony_ci sync_stream_prop->collective_mstr_mon_id[1] = 92662306a36Sopenharmony_ci prop->collective_first_mon + reserved_mon_idx + 1; 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS; 92962306a36Sopenharmony_ci } else if (hdev->kernel_queues[q_idx].collective_mode == 93062306a36Sopenharmony_ci HL_COLLECTIVE_SLAVE) { 93162306a36Sopenharmony_ci reserved_mon_idx = hdev->collective_mon_idx++; 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci /* reserve a monitor for collective slave queue */ 93462306a36Sopenharmony_ci sync_stream_prop->collective_slave_mon_id = 93562306a36Sopenharmony_ci prop->collective_first_mon + reserved_mon_idx; 93662306a36Sopenharmony_ci } 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci if (!hdev->kernel_queues[q_idx].supports_sync_stream) 93962306a36Sopenharmony_ci return; 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci queue_idx = hdev->sync_stream_queue_idx++; 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci sync_stream_prop->base_sob_id = prop->sync_stream_first_sob + 94462306a36Sopenharmony_ci (queue_idx * HL_RSVD_SOBS); 94562306a36Sopenharmony_ci sync_stream_prop->base_mon_id = prop->sync_stream_first_mon + 94662306a36Sopenharmony_ci (queue_idx * HL_RSVD_MONS); 94762306a36Sopenharmony_ci sync_stream_prop->next_sob_val = 1; 94862306a36Sopenharmony_ci sync_stream_prop->curr_sob_offset = 0; 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { 95162306a36Sopenharmony_ci hw_sob = &sync_stream_prop->hw_sob[sob]; 95262306a36Sopenharmony_ci hw_sob->hdev = hdev; 95362306a36Sopenharmony_ci hw_sob->sob_id = sync_stream_prop->base_sob_id + sob; 95462306a36Sopenharmony_ci hw_sob->sob_addr = 95562306a36Sopenharmony_ci hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); 95662306a36Sopenharmony_ci hw_sob->q_idx = q_idx; 95762306a36Sopenharmony_ci kref_init(&hw_sob->kref); 95862306a36Sopenharmony_ci } 95962306a36Sopenharmony_ci} 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_cistatic void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx) 96262306a36Sopenharmony_ci{ 96362306a36Sopenharmony_ci struct hl_sync_stream_properties *prop = 96462306a36Sopenharmony_ci &hdev->kernel_queues[q_idx].sync_stream_prop; 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci /* 96762306a36Sopenharmony_ci * In case we got here due to a stuck CS, the refcnt might be bigger 96862306a36Sopenharmony_ci * than 1 and therefore we reset it. 96962306a36Sopenharmony_ci */ 97062306a36Sopenharmony_ci kref_init(&prop->hw_sob[prop->curr_sob_offset].kref); 97162306a36Sopenharmony_ci prop->curr_sob_offset = 0; 97262306a36Sopenharmony_ci prop->next_sob_val = 1; 97362306a36Sopenharmony_ci} 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci/* 97662306a36Sopenharmony_ci * queue_init - main initialization function for H/W queue object 97762306a36Sopenharmony_ci * 97862306a36Sopenharmony_ci * @hdev: pointer to hl_device device structure 97962306a36Sopenharmony_ci * @q: pointer to hl_hw_queue queue structure 98062306a36Sopenharmony_ci * @hw_queue_id: The id of the H/W queue 98162306a36Sopenharmony_ci * 98262306a36Sopenharmony_ci * Allocate dma-able memory for the queue and initialize fields 98362306a36Sopenharmony_ci * Returns 0 on success 98462306a36Sopenharmony_ci */ 98562306a36Sopenharmony_cistatic int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, 98662306a36Sopenharmony_ci u32 hw_queue_id) 98762306a36Sopenharmony_ci{ 98862306a36Sopenharmony_ci int rc; 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci q->hw_queue_id = hw_queue_id; 99162306a36Sopenharmony_ci 99262306a36Sopenharmony_ci switch (q->queue_type) { 99362306a36Sopenharmony_ci case QUEUE_TYPE_EXT: 99462306a36Sopenharmony_ci rc = ext_queue_init(hdev, q); 99562306a36Sopenharmony_ci break; 99662306a36Sopenharmony_ci case QUEUE_TYPE_INT: 99762306a36Sopenharmony_ci rc = int_queue_init(hdev, q); 99862306a36Sopenharmony_ci break; 99962306a36Sopenharmony_ci case QUEUE_TYPE_CPU: 100062306a36Sopenharmony_ci rc = cpu_queue_init(hdev, q); 100162306a36Sopenharmony_ci break; 100262306a36Sopenharmony_ci case QUEUE_TYPE_HW: 100362306a36Sopenharmony_ci rc = hw_queue_init(hdev, q); 100462306a36Sopenharmony_ci break; 100562306a36Sopenharmony_ci case QUEUE_TYPE_NA: 100662306a36Sopenharmony_ci q->valid = 0; 100762306a36Sopenharmony_ci return 0; 100862306a36Sopenharmony_ci default: 100962306a36Sopenharmony_ci dev_crit(hdev->dev, "wrong queue type %d during init\n", 101062306a36Sopenharmony_ci q->queue_type); 101162306a36Sopenharmony_ci rc = -EINVAL; 101262306a36Sopenharmony_ci break; 101362306a36Sopenharmony_ci } 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci sync_stream_queue_init(hdev, q->hw_queue_id); 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci if (rc) 101862306a36Sopenharmony_ci return rc; 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci q->valid = 1; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci return 0; 102362306a36Sopenharmony_ci} 102462306a36Sopenharmony_ci 102562306a36Sopenharmony_ci/* 102662306a36Sopenharmony_ci * hw_queue_fini - destroy queue 102762306a36Sopenharmony_ci * 102862306a36Sopenharmony_ci * @hdev: pointer to hl_device device structure 102962306a36Sopenharmony_ci * @q: pointer to hl_hw_queue queue structure 103062306a36Sopenharmony_ci * 103162306a36Sopenharmony_ci * Free the queue memory 103262306a36Sopenharmony_ci */ 103362306a36Sopenharmony_cistatic void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) 103462306a36Sopenharmony_ci{ 103562306a36Sopenharmony_ci if (!q->valid) 103662306a36Sopenharmony_ci return; 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci /* 103962306a36Sopenharmony_ci * If we arrived here, there are no jobs waiting on this queue 104062306a36Sopenharmony_ci * so we can safely remove it. 104162306a36Sopenharmony_ci * This is because this function can only called when: 104262306a36Sopenharmony_ci * 1. Either a context is deleted, which only can occur if all its 104362306a36Sopenharmony_ci * jobs were finished 104462306a36Sopenharmony_ci * 2. A context wasn't able to be created due to failure or timeout, 104562306a36Sopenharmony_ci * which means there are no jobs on the queue yet 104662306a36Sopenharmony_ci * 104762306a36Sopenharmony_ci * The only exception are the queues of the kernel context, but 104862306a36Sopenharmony_ci * if they are being destroyed, it means that the entire module is 104962306a36Sopenharmony_ci * being removed. If the module is removed, it means there is no open 105062306a36Sopenharmony_ci * user context. It also means that if a job was submitted by 105162306a36Sopenharmony_ci * the kernel driver (e.g. context creation), the job itself was 105262306a36Sopenharmony_ci * released by the kernel driver when a timeout occurred on its 105362306a36Sopenharmony_ci * Completion. Thus, we don't need to release it again. 105462306a36Sopenharmony_ci */ 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci if (q->queue_type == QUEUE_TYPE_INT) 105762306a36Sopenharmony_ci return; 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_ci kfree(q->shadow_queue); 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci if (q->queue_type == QUEUE_TYPE_CPU) 106262306a36Sopenharmony_ci hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address); 106362306a36Sopenharmony_ci else 106462306a36Sopenharmony_ci hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address, 106562306a36Sopenharmony_ci q->bus_address); 106662306a36Sopenharmony_ci} 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ciint hl_hw_queues_create(struct hl_device *hdev) 106962306a36Sopenharmony_ci{ 107062306a36Sopenharmony_ci struct asic_fixed_properties *asic = &hdev->asic_prop; 107162306a36Sopenharmony_ci struct hl_hw_queue *q; 107262306a36Sopenharmony_ci int i, rc, q_ready_cnt; 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci hdev->kernel_queues = kcalloc(asic->max_queues, 107562306a36Sopenharmony_ci sizeof(*hdev->kernel_queues), GFP_KERNEL); 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci if (!hdev->kernel_queues) { 107862306a36Sopenharmony_ci dev_err(hdev->dev, "Not enough memory for H/W queues\n"); 107962306a36Sopenharmony_ci return -ENOMEM; 108062306a36Sopenharmony_ci } 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci /* Initialize the H/W queues */ 108362306a36Sopenharmony_ci for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues; 108462306a36Sopenharmony_ci i < asic->max_queues ; i++, q_ready_cnt++, q++) { 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci q->queue_type = asic->hw_queues_props[i].type; 108762306a36Sopenharmony_ci q->supports_sync_stream = 108862306a36Sopenharmony_ci asic->hw_queues_props[i].supports_sync_stream; 108962306a36Sopenharmony_ci q->collective_mode = asic->hw_queues_props[i].collective_mode; 109062306a36Sopenharmony_ci rc = queue_init(hdev, q, i); 109162306a36Sopenharmony_ci if (rc) { 109262306a36Sopenharmony_ci dev_err(hdev->dev, 109362306a36Sopenharmony_ci "failed to initialize queue %d\n", i); 109462306a36Sopenharmony_ci goto release_queues; 109562306a36Sopenharmony_ci } 109662306a36Sopenharmony_ci } 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci return 0; 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_cirelease_queues: 110162306a36Sopenharmony_ci for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) 110262306a36Sopenharmony_ci queue_fini(hdev, q); 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci kfree(hdev->kernel_queues); 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci return rc; 110762306a36Sopenharmony_ci} 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_civoid hl_hw_queues_destroy(struct hl_device *hdev) 111062306a36Sopenharmony_ci{ 111162306a36Sopenharmony_ci struct hl_hw_queue *q; 111262306a36Sopenharmony_ci u32 max_queues = hdev->asic_prop.max_queues; 111362306a36Sopenharmony_ci int i; 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) 111662306a36Sopenharmony_ci queue_fini(hdev, q); 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci kfree(hdev->kernel_queues); 111962306a36Sopenharmony_ci} 112062306a36Sopenharmony_ci 112162306a36Sopenharmony_civoid hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset) 112262306a36Sopenharmony_ci{ 112362306a36Sopenharmony_ci struct hl_hw_queue *q; 112462306a36Sopenharmony_ci u32 max_queues = hdev->asic_prop.max_queues; 112562306a36Sopenharmony_ci int i; 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ci for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) { 112862306a36Sopenharmony_ci if ((!q->valid) || 112962306a36Sopenharmony_ci ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU))) 113062306a36Sopenharmony_ci continue; 113162306a36Sopenharmony_ci q->pi = 0; 113262306a36Sopenharmony_ci atomic_set(&q->ci, 0); 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci if (q->supports_sync_stream) 113562306a36Sopenharmony_ci sync_stream_queue_reset(hdev, q->hw_queue_id); 113662306a36Sopenharmony_ci } 113762306a36Sopenharmony_ci} 1138