162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci/* 462306a36Sopenharmony_ci * Copyright 2016-2021 HabanaLabs, Ltd. 562306a36Sopenharmony_ci * All Rights Reserved. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <uapi/drm/habanalabs_accel.h> 962306a36Sopenharmony_ci#include "habanalabs.h" 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/uaccess.h> 1262306a36Sopenharmony_ci#include <linux/slab.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ 1562306a36Sopenharmony_ci HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ 1662306a36Sopenharmony_ci HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \ 1762306a36Sopenharmony_ci HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#define MAX_TS_ITER_NUM 100 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci/** 2362306a36Sopenharmony_ci * enum hl_cs_wait_status - cs wait status 2462306a36Sopenharmony_ci * @CS_WAIT_STATUS_BUSY: cs was not completed yet 2562306a36Sopenharmony_ci * @CS_WAIT_STATUS_COMPLETED: cs completed 2662306a36Sopenharmony_ci * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone 2762306a36Sopenharmony_ci */ 2862306a36Sopenharmony_cienum hl_cs_wait_status { 2962306a36Sopenharmony_ci CS_WAIT_STATUS_BUSY, 3062306a36Sopenharmony_ci CS_WAIT_STATUS_COMPLETED, 3162306a36Sopenharmony_ci CS_WAIT_STATUS_GONE 3262306a36Sopenharmony_ci}; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_cistatic void job_wq_completion(struct work_struct *work); 3562306a36Sopenharmony_cistatic int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, 3662306a36Sopenharmony_ci enum hl_cs_wait_status *status, s64 *timestamp); 3762306a36Sopenharmony_cistatic void cs_do_release(struct kref *ref); 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_cistatic void hl_push_cs_outcome(struct hl_device *hdev, 4062306a36Sopenharmony_ci struct hl_cs_outcome_store *outcome_store, 4162306a36Sopenharmony_ci u64 seq, ktime_t ts, int error) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci struct hl_cs_outcome *node; 4462306a36Sopenharmony_ci unsigned long flags; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci /* 4762306a36Sopenharmony_ci * CS outcome store supports the following operations: 4862306a36Sopenharmony_ci * push outcome - store a recent CS outcome in the store 4962306a36Sopenharmony_ci * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store 5062306a36Sopenharmony_ci * It uses 2 lists: used list and free list. 5162306a36Sopenharmony_ci * It has a pre-allocated amount of nodes, each node stores 5262306a36Sopenharmony_ci * a single CS outcome. 5362306a36Sopenharmony_ci * Initially, all the nodes are in the free list. 5462306a36Sopenharmony_ci * On push outcome, a node (any) is taken from the free list, its 5562306a36Sopenharmony_ci * information is filled in, and the node is moved to the used list. 5662306a36Sopenharmony_ci * It is possible, that there are no nodes left in the free list. 5762306a36Sopenharmony_ci * In this case, we will lose some information about old outcomes. We 5862306a36Sopenharmony_ci * will pop the OLDEST node from the used list, and make it free. 5962306a36Sopenharmony_ci * On pop, the node is searched for in the used list (using a search 6062306a36Sopenharmony_ci * index). 6162306a36Sopenharmony_ci * If found, the node is then removed from the used list, and moved 6262306a36Sopenharmony_ci * back to the free list. The outcome data that the node contained is 6362306a36Sopenharmony_ci * returned back to the user. 6462306a36Sopenharmony_ci */ 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci spin_lock_irqsave(&outcome_store->db_lock, flags); 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci if (list_empty(&outcome_store->free_list)) { 6962306a36Sopenharmony_ci node = list_last_entry(&outcome_store->used_list, 7062306a36Sopenharmony_ci struct hl_cs_outcome, list_link); 7162306a36Sopenharmony_ci hash_del(&node->map_link); 7262306a36Sopenharmony_ci dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq); 7362306a36Sopenharmony_ci } else { 7462306a36Sopenharmony_ci node = list_last_entry(&outcome_store->free_list, 7562306a36Sopenharmony_ci struct hl_cs_outcome, list_link); 7662306a36Sopenharmony_ci } 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci list_del_init(&node->list_link); 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci node->seq = seq; 8162306a36Sopenharmony_ci node->ts = ts; 8262306a36Sopenharmony_ci node->error = error; 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci list_add(&node->list_link, &outcome_store->used_list); 8562306a36Sopenharmony_ci hash_add(outcome_store->outcome_map, &node->map_link, node->seq); 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci spin_unlock_irqrestore(&outcome_store->db_lock, flags); 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_cistatic bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store, 9162306a36Sopenharmony_ci u64 seq, ktime_t *ts, int *error) 9262306a36Sopenharmony_ci{ 9362306a36Sopenharmony_ci struct hl_cs_outcome *node; 9462306a36Sopenharmony_ci unsigned long flags; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci spin_lock_irqsave(&outcome_store->db_lock, flags); 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq) 9962306a36Sopenharmony_ci if (node->seq == seq) { 10062306a36Sopenharmony_ci *ts = node->ts; 10162306a36Sopenharmony_ci *error = node->error; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci hash_del(&node->map_link); 10462306a36Sopenharmony_ci list_del_init(&node->list_link); 10562306a36Sopenharmony_ci list_add(&node->list_link, &outcome_store->free_list); 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci spin_unlock_irqrestore(&outcome_store->db_lock, flags); 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci return true; 11062306a36Sopenharmony_ci } 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci spin_unlock_irqrestore(&outcome_store->db_lock, flags); 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci return false; 11562306a36Sopenharmony_ci} 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_cistatic void hl_sob_reset(struct kref *ref) 11862306a36Sopenharmony_ci{ 11962306a36Sopenharmony_ci struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, 12062306a36Sopenharmony_ci kref); 12162306a36Sopenharmony_ci struct hl_device *hdev = hw_sob->hdev; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci hdev->asic_funcs->reset_sob(hdev, hw_sob); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci hw_sob->need_reset = false; 12862306a36Sopenharmony_ci} 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_civoid hl_sob_reset_error(struct kref *ref) 13162306a36Sopenharmony_ci{ 13262306a36Sopenharmony_ci struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, 13362306a36Sopenharmony_ci kref); 13462306a36Sopenharmony_ci struct hl_device *hdev = hw_sob->hdev; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci dev_crit(hdev->dev, 13762306a36Sopenharmony_ci "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n", 13862306a36Sopenharmony_ci hw_sob->q_idx, hw_sob->sob_id); 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_civoid hw_sob_put(struct hl_hw_sob *hw_sob) 14262306a36Sopenharmony_ci{ 14362306a36Sopenharmony_ci if (hw_sob) 14462306a36Sopenharmony_ci kref_put(&hw_sob->kref, hl_sob_reset); 14562306a36Sopenharmony_ci} 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_cistatic void hw_sob_put_err(struct hl_hw_sob *hw_sob) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci if (hw_sob) 15062306a36Sopenharmony_ci kref_put(&hw_sob->kref, hl_sob_reset_error); 15162306a36Sopenharmony_ci} 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_civoid hw_sob_get(struct hl_hw_sob *hw_sob) 15462306a36Sopenharmony_ci{ 15562306a36Sopenharmony_ci if (hw_sob) 15662306a36Sopenharmony_ci kref_get(&hw_sob->kref); 15762306a36Sopenharmony_ci} 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci/** 16062306a36Sopenharmony_ci * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet 16162306a36Sopenharmony_ci * @sob_base: sob base id 16262306a36Sopenharmony_ci * @sob_mask: sob user mask, each bit represents a sob offset from sob base 16362306a36Sopenharmony_ci * @mask: generated mask 16462306a36Sopenharmony_ci * 16562306a36Sopenharmony_ci * Return: 0 if given parameters are valid 16662306a36Sopenharmony_ci */ 16762306a36Sopenharmony_ciint hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) 16862306a36Sopenharmony_ci{ 16962306a36Sopenharmony_ci int i; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci if (sob_mask == 0) 17262306a36Sopenharmony_ci return -EINVAL; 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci if (sob_mask == 0x1) { 17562306a36Sopenharmony_ci *mask = ~(1 << (sob_base & 0x7)); 17662306a36Sopenharmony_ci } else { 17762306a36Sopenharmony_ci /* find msb in order to verify sob range is valid */ 17862306a36Sopenharmony_ci for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) 17962306a36Sopenharmony_ci if (BIT(i) & sob_mask) 18062306a36Sopenharmony_ci break; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) 18362306a36Sopenharmony_ci return -EINVAL; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci *mask = ~sob_mask; 18662306a36Sopenharmony_ci } 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci return 0; 18962306a36Sopenharmony_ci} 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_cistatic void hl_fence_release(struct kref *kref) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci struct hl_fence *fence = 19462306a36Sopenharmony_ci container_of(kref, struct hl_fence, refcount); 19562306a36Sopenharmony_ci struct hl_cs_compl *hl_cs_cmpl = 19662306a36Sopenharmony_ci container_of(fence, struct hl_cs_compl, base_fence); 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci kfree(hl_cs_cmpl); 19962306a36Sopenharmony_ci} 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_civoid hl_fence_put(struct hl_fence *fence) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci if (IS_ERR_OR_NULL(fence)) 20462306a36Sopenharmony_ci return; 20562306a36Sopenharmony_ci kref_put(&fence->refcount, hl_fence_release); 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_civoid hl_fences_put(struct hl_fence **fence, int len) 20962306a36Sopenharmony_ci{ 21062306a36Sopenharmony_ci int i; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci for (i = 0; i < len; i++, fence++) 21362306a36Sopenharmony_ci hl_fence_put(*fence); 21462306a36Sopenharmony_ci} 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_civoid hl_fence_get(struct hl_fence *fence) 21762306a36Sopenharmony_ci{ 21862306a36Sopenharmony_ci if (fence) 21962306a36Sopenharmony_ci kref_get(&fence->refcount); 22062306a36Sopenharmony_ci} 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_cistatic void hl_fence_init(struct hl_fence *fence, u64 sequence) 22362306a36Sopenharmony_ci{ 22462306a36Sopenharmony_ci kref_init(&fence->refcount); 22562306a36Sopenharmony_ci fence->cs_sequence = sequence; 22662306a36Sopenharmony_ci fence->error = 0; 22762306a36Sopenharmony_ci fence->timestamp = ktime_set(0, 0); 22862306a36Sopenharmony_ci fence->mcs_handling_done = false; 22962306a36Sopenharmony_ci init_completion(&fence->completion); 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_civoid cs_get(struct hl_cs *cs) 23362306a36Sopenharmony_ci{ 23462306a36Sopenharmony_ci kref_get(&cs->refcount); 23562306a36Sopenharmony_ci} 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_cistatic int cs_get_unless_zero(struct hl_cs *cs) 23862306a36Sopenharmony_ci{ 23962306a36Sopenharmony_ci return kref_get_unless_zero(&cs->refcount); 24062306a36Sopenharmony_ci} 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_cistatic void cs_put(struct hl_cs *cs) 24362306a36Sopenharmony_ci{ 24462306a36Sopenharmony_ci kref_put(&cs->refcount, cs_do_release); 24562306a36Sopenharmony_ci} 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_cistatic void cs_job_do_release(struct kref *ref) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount); 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci kfree(job); 25262306a36Sopenharmony_ci} 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_cistatic void hl_cs_job_put(struct hl_cs_job *job) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci kref_put(&job->refcount, cs_job_do_release); 25762306a36Sopenharmony_ci} 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_cibool cs_needs_completion(struct hl_cs *cs) 26062306a36Sopenharmony_ci{ 26162306a36Sopenharmony_ci /* In case this is a staged CS, only the last CS in sequence should 26262306a36Sopenharmony_ci * get a completion, any non staged CS will always get a completion 26362306a36Sopenharmony_ci */ 26462306a36Sopenharmony_ci if (cs->staged_cs && !cs->staged_last) 26562306a36Sopenharmony_ci return false; 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci return true; 26862306a36Sopenharmony_ci} 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_cibool cs_needs_timeout(struct hl_cs *cs) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci /* In case this is a staged CS, only the first CS in sequence should 27362306a36Sopenharmony_ci * get a timeout, any non staged CS will always get a timeout 27462306a36Sopenharmony_ci */ 27562306a36Sopenharmony_ci if (cs->staged_cs && !cs->staged_first) 27662306a36Sopenharmony_ci return false; 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci return true; 27962306a36Sopenharmony_ci} 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_cistatic bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) 28262306a36Sopenharmony_ci{ 28362306a36Sopenharmony_ci /* Patched CB is created for external queues jobs */ 28462306a36Sopenharmony_ci return (job->queue_type == QUEUE_TYPE_EXT); 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci/* 28862306a36Sopenharmony_ci * cs_parser - parse the user command submission 28962306a36Sopenharmony_ci * 29062306a36Sopenharmony_ci * @hpriv : pointer to the private data of the fd 29162306a36Sopenharmony_ci * @job : pointer to the job that holds the command submission info 29262306a36Sopenharmony_ci * 29362306a36Sopenharmony_ci * The function parses the command submission of the user. It calls the 29462306a36Sopenharmony_ci * ASIC specific parser, which returns a list of memory blocks to send 29562306a36Sopenharmony_ci * to the device as different command buffers 29662306a36Sopenharmony_ci * 29762306a36Sopenharmony_ci */ 29862306a36Sopenharmony_cistatic int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) 29962306a36Sopenharmony_ci{ 30062306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 30162306a36Sopenharmony_ci struct hl_cs_parser parser; 30262306a36Sopenharmony_ci int rc; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci parser.ctx_id = job->cs->ctx->asid; 30562306a36Sopenharmony_ci parser.cs_sequence = job->cs->sequence; 30662306a36Sopenharmony_ci parser.job_id = job->id; 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci parser.hw_queue_id = job->hw_queue_id; 30962306a36Sopenharmony_ci parser.job_userptr_list = &job->userptr_list; 31062306a36Sopenharmony_ci parser.patched_cb = NULL; 31162306a36Sopenharmony_ci parser.user_cb = job->user_cb; 31262306a36Sopenharmony_ci parser.user_cb_size = job->user_cb_size; 31362306a36Sopenharmony_ci parser.queue_type = job->queue_type; 31462306a36Sopenharmony_ci parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; 31562306a36Sopenharmony_ci job->patched_cb = NULL; 31662306a36Sopenharmony_ci parser.completion = cs_needs_completion(job->cs); 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci rc = hdev->asic_funcs->cs_parser(hdev, &parser); 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci if (is_cb_patched(hdev, job)) { 32162306a36Sopenharmony_ci if (!rc) { 32262306a36Sopenharmony_ci job->patched_cb = parser.patched_cb; 32362306a36Sopenharmony_ci job->job_cb_size = parser.patched_cb_size; 32462306a36Sopenharmony_ci job->contains_dma_pkt = parser.contains_dma_pkt; 32562306a36Sopenharmony_ci atomic_inc(&job->patched_cb->cs_cnt); 32662306a36Sopenharmony_ci } 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci /* 32962306a36Sopenharmony_ci * Whether the parsing worked or not, we don't need the 33062306a36Sopenharmony_ci * original CB anymore because it was already parsed and 33162306a36Sopenharmony_ci * won't be accessed again for this CS 33262306a36Sopenharmony_ci */ 33362306a36Sopenharmony_ci atomic_dec(&job->user_cb->cs_cnt); 33462306a36Sopenharmony_ci hl_cb_put(job->user_cb); 33562306a36Sopenharmony_ci job->user_cb = NULL; 33662306a36Sopenharmony_ci } else if (!rc) { 33762306a36Sopenharmony_ci job->job_cb_size = job->user_cb_size; 33862306a36Sopenharmony_ci } 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci return rc; 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci struct hl_cs *cs = job->cs; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci if (is_cb_patched(hdev, job)) { 34862306a36Sopenharmony_ci hl_userptr_delete_list(hdev, &job->userptr_list); 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci /* 35162306a36Sopenharmony_ci * We might arrive here from rollback and patched CB wasn't 35262306a36Sopenharmony_ci * created, so we need to check it's not NULL 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ci if (job->patched_cb) { 35562306a36Sopenharmony_ci atomic_dec(&job->patched_cb->cs_cnt); 35662306a36Sopenharmony_ci hl_cb_put(job->patched_cb); 35762306a36Sopenharmony_ci } 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci /* For H/W queue jobs, if a user CB was allocated by driver, 36162306a36Sopenharmony_ci * the user CB isn't released in cs_parser() and thus should be 36262306a36Sopenharmony_ci * released here. This is also true for INT queues jobs which were 36362306a36Sopenharmony_ci * allocated by driver. 36462306a36Sopenharmony_ci */ 36562306a36Sopenharmony_ci if (job->is_kernel_allocated_cb && 36662306a36Sopenharmony_ci (job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) { 36762306a36Sopenharmony_ci atomic_dec(&job->user_cb->cs_cnt); 36862306a36Sopenharmony_ci hl_cb_put(job->user_cb); 36962306a36Sopenharmony_ci } 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci /* 37262306a36Sopenharmony_ci * This is the only place where there can be multiple threads 37362306a36Sopenharmony_ci * modifying the list at the same time 37462306a36Sopenharmony_ci */ 37562306a36Sopenharmony_ci spin_lock(&cs->job_lock); 37662306a36Sopenharmony_ci list_del(&job->cs_node); 37762306a36Sopenharmony_ci spin_unlock(&cs->job_lock); 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci hl_debugfs_remove_job(hdev, job); 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci /* We decrement reference only for a CS that gets completion 38262306a36Sopenharmony_ci * because the reference was incremented only for this kind of CS 38362306a36Sopenharmony_ci * right before it was scheduled. 38462306a36Sopenharmony_ci * 38562306a36Sopenharmony_ci * In staged submission, only the last CS marked as 'staged_last' 38662306a36Sopenharmony_ci * gets completion, hence its release function will be called from here. 38762306a36Sopenharmony_ci * As for all the rest CS's in the staged submission which do not get 38862306a36Sopenharmony_ci * completion, their CS reference will be decremented by the 38962306a36Sopenharmony_ci * 'staged_last' CS during the CS release flow. 39062306a36Sopenharmony_ci * All relevant PQ CI counters will be incremented during the CS release 39162306a36Sopenharmony_ci * flow by calling 'hl_hw_queue_update_ci'. 39262306a36Sopenharmony_ci */ 39362306a36Sopenharmony_ci if (cs_needs_completion(cs) && 39462306a36Sopenharmony_ci (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) { 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci /* In CS based completions, the timestamp is already available, 39762306a36Sopenharmony_ci * so no need to extract it from job 39862306a36Sopenharmony_ci */ 39962306a36Sopenharmony_ci if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB) 40062306a36Sopenharmony_ci cs->completion_timestamp = job->timestamp; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci cs_put(cs); 40362306a36Sopenharmony_ci } 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci hl_cs_job_put(job); 40662306a36Sopenharmony_ci} 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci/* 40962306a36Sopenharmony_ci * hl_staged_cs_find_first - locate the first CS in this staged submission 41062306a36Sopenharmony_ci * 41162306a36Sopenharmony_ci * @hdev: pointer to device structure 41262306a36Sopenharmony_ci * @cs_seq: staged submission sequence number 41362306a36Sopenharmony_ci * 41462306a36Sopenharmony_ci * @note: This function must be called under 'hdev->cs_mirror_lock' 41562306a36Sopenharmony_ci * 41662306a36Sopenharmony_ci * Find and return a CS pointer with the given sequence 41762306a36Sopenharmony_ci */ 41862306a36Sopenharmony_cistruct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci struct hl_cs *cs; 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) 42362306a36Sopenharmony_ci if (cs->staged_cs && cs->staged_first && 42462306a36Sopenharmony_ci cs->sequence == cs_seq) 42562306a36Sopenharmony_ci return cs; 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci return NULL; 42862306a36Sopenharmony_ci} 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci/* 43162306a36Sopenharmony_ci * is_staged_cs_last_exists - returns true if the last CS in sequence exists 43262306a36Sopenharmony_ci * 43362306a36Sopenharmony_ci * @hdev: pointer to device structure 43462306a36Sopenharmony_ci * @cs: staged submission member 43562306a36Sopenharmony_ci * 43662306a36Sopenharmony_ci */ 43762306a36Sopenharmony_cibool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) 43862306a36Sopenharmony_ci{ 43962306a36Sopenharmony_ci struct hl_cs *last_entry; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, 44262306a36Sopenharmony_ci staged_cs_node); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci if (last_entry->staged_last) 44562306a36Sopenharmony_ci return true; 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci return false; 44862306a36Sopenharmony_ci} 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci/* 45162306a36Sopenharmony_ci * staged_cs_get - get CS reference if this CS is a part of a staged CS 45262306a36Sopenharmony_ci * 45362306a36Sopenharmony_ci * @hdev: pointer to device structure 45462306a36Sopenharmony_ci * @cs: current CS 45562306a36Sopenharmony_ci * @cs_seq: staged submission sequence number 45662306a36Sopenharmony_ci * 45762306a36Sopenharmony_ci * Increment CS reference for every CS in this staged submission except for 45862306a36Sopenharmony_ci * the CS which get completion. 45962306a36Sopenharmony_ci */ 46062306a36Sopenharmony_cistatic void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) 46162306a36Sopenharmony_ci{ 46262306a36Sopenharmony_ci /* Only the last CS in this staged submission will get a completion. 46362306a36Sopenharmony_ci * We must increment the reference for all other CS's in this 46462306a36Sopenharmony_ci * staged submission. 46562306a36Sopenharmony_ci * Once we get a completion we will release the whole staged submission. 46662306a36Sopenharmony_ci */ 46762306a36Sopenharmony_ci if (!cs->staged_last) 46862306a36Sopenharmony_ci cs_get(cs); 46962306a36Sopenharmony_ci} 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci/* 47262306a36Sopenharmony_ci * staged_cs_put - put a CS in case it is part of staged submission 47362306a36Sopenharmony_ci * 47462306a36Sopenharmony_ci * @hdev: pointer to device structure 47562306a36Sopenharmony_ci * @cs: CS to put 47662306a36Sopenharmony_ci * 47762306a36Sopenharmony_ci * This function decrements a CS reference (for a non completion CS) 47862306a36Sopenharmony_ci */ 47962306a36Sopenharmony_cistatic void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) 48062306a36Sopenharmony_ci{ 48162306a36Sopenharmony_ci /* We release all CS's in a staged submission except the last 48262306a36Sopenharmony_ci * CS which we have never incremented its reference. 48362306a36Sopenharmony_ci */ 48462306a36Sopenharmony_ci if (!cs_needs_completion(cs)) 48562306a36Sopenharmony_ci cs_put(cs); 48662306a36Sopenharmony_ci} 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_cistatic void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) 48962306a36Sopenharmony_ci{ 49062306a36Sopenharmony_ci struct hl_cs *next = NULL, *iter, *first_cs; 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci if (!cs_needs_timeout(cs)) 49362306a36Sopenharmony_ci return; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci spin_lock(&hdev->cs_mirror_lock); 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci /* We need to handle tdr only once for the complete staged submission. 49862306a36Sopenharmony_ci * Hence, we choose the CS that reaches this function first which is 49962306a36Sopenharmony_ci * the CS marked as 'staged_last'. 50062306a36Sopenharmony_ci * In case single staged cs was submitted which has both first and last 50162306a36Sopenharmony_ci * indications, then "cs_find_first" below will return NULL, since we 50262306a36Sopenharmony_ci * removed the cs node from the list before getting here, 50362306a36Sopenharmony_ci * in such cases just continue with the cs to cancel it's TDR work. 50462306a36Sopenharmony_ci */ 50562306a36Sopenharmony_ci if (cs->staged_cs && cs->staged_last) { 50662306a36Sopenharmony_ci first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); 50762306a36Sopenharmony_ci if (first_cs) 50862306a36Sopenharmony_ci cs = first_cs; 50962306a36Sopenharmony_ci } 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci spin_unlock(&hdev->cs_mirror_lock); 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci /* Don't cancel TDR in case this CS was timedout because we might be 51462306a36Sopenharmony_ci * running from the TDR context 51562306a36Sopenharmony_ci */ 51662306a36Sopenharmony_ci if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) 51762306a36Sopenharmony_ci return; 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci if (cs->tdr_active) 52062306a36Sopenharmony_ci cancel_delayed_work_sync(&cs->work_tdr); 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci spin_lock(&hdev->cs_mirror_lock); 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci /* queue TDR for next CS */ 52562306a36Sopenharmony_ci list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) 52662306a36Sopenharmony_ci if (cs_needs_timeout(iter)) { 52762306a36Sopenharmony_ci next = iter; 52862306a36Sopenharmony_ci break; 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci if (next && !next->tdr_active) { 53262306a36Sopenharmony_ci next->tdr_active = true; 53362306a36Sopenharmony_ci schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); 53462306a36Sopenharmony_ci } 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci spin_unlock(&hdev->cs_mirror_lock); 53762306a36Sopenharmony_ci} 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci/* 54062306a36Sopenharmony_ci * force_complete_multi_cs - complete all contexts that wait on multi-CS 54162306a36Sopenharmony_ci * 54262306a36Sopenharmony_ci * @hdev: pointer to habanalabs device structure 54362306a36Sopenharmony_ci */ 54462306a36Sopenharmony_cistatic void force_complete_multi_cs(struct hl_device *hdev) 54562306a36Sopenharmony_ci{ 54662306a36Sopenharmony_ci int i; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 54962306a36Sopenharmony_ci struct multi_cs_completion *mcs_compl; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci mcs_compl = &hdev->multi_cs_completion[i]; 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci spin_lock(&mcs_compl->lock); 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci if (!mcs_compl->used) { 55662306a36Sopenharmony_ci spin_unlock(&mcs_compl->lock); 55762306a36Sopenharmony_ci continue; 55862306a36Sopenharmony_ci } 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci /* when calling force complete no context should be waiting on 56162306a36Sopenharmony_ci * multi-cS. 56262306a36Sopenharmony_ci * We are calling the function as a protection for such case 56362306a36Sopenharmony_ci * to free any pending context and print error message 56462306a36Sopenharmony_ci */ 56562306a36Sopenharmony_ci dev_err(hdev->dev, 56662306a36Sopenharmony_ci "multi-CS completion context %d still waiting when calling force completion\n", 56762306a36Sopenharmony_ci i); 56862306a36Sopenharmony_ci complete_all(&mcs_compl->completion); 56962306a36Sopenharmony_ci spin_unlock(&mcs_compl->lock); 57062306a36Sopenharmony_ci } 57162306a36Sopenharmony_ci} 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci/* 57462306a36Sopenharmony_ci * complete_multi_cs - complete all waiting entities on multi-CS 57562306a36Sopenharmony_ci * 57662306a36Sopenharmony_ci * @hdev: pointer to habanalabs device structure 57762306a36Sopenharmony_ci * @cs: CS structure 57862306a36Sopenharmony_ci * The function signals a waiting entity that has an overlapping stream masters 57962306a36Sopenharmony_ci * with the completed CS. 58062306a36Sopenharmony_ci * For example: 58162306a36Sopenharmony_ci * - a completed CS worked on stream master QID 4, multi CS completion 58262306a36Sopenharmony_ci * is actively waiting on stream master QIDs 3, 5. don't send signal as no 58362306a36Sopenharmony_ci * common stream master QID 58462306a36Sopenharmony_ci * - a completed CS worked on stream master QID 4, multi CS completion 58562306a36Sopenharmony_ci * is actively waiting on stream master QIDs 3, 4. send signal as stream 58662306a36Sopenharmony_ci * master QID 4 is common 58762306a36Sopenharmony_ci */ 58862306a36Sopenharmony_cistatic void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) 58962306a36Sopenharmony_ci{ 59062306a36Sopenharmony_ci struct hl_fence *fence = cs->fence; 59162306a36Sopenharmony_ci int i; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci /* in case of multi CS check for completion only for the first CS */ 59462306a36Sopenharmony_ci if (cs->staged_cs && !cs->staged_first) 59562306a36Sopenharmony_ci return; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 59862306a36Sopenharmony_ci struct multi_cs_completion *mcs_compl; 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci mcs_compl = &hdev->multi_cs_completion[i]; 60162306a36Sopenharmony_ci if (!mcs_compl->used) 60262306a36Sopenharmony_ci continue; 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci spin_lock(&mcs_compl->lock); 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci /* 60762306a36Sopenharmony_ci * complete if: 60862306a36Sopenharmony_ci * 1. still waiting for completion 60962306a36Sopenharmony_ci * 2. the completed CS has at least one overlapping stream 61062306a36Sopenharmony_ci * master with the stream masters in the completion 61162306a36Sopenharmony_ci */ 61262306a36Sopenharmony_ci if (mcs_compl->used && 61362306a36Sopenharmony_ci (fence->stream_master_qid_map & 61462306a36Sopenharmony_ci mcs_compl->stream_master_qid_map)) { 61562306a36Sopenharmony_ci /* extract the timestamp only of first completed CS */ 61662306a36Sopenharmony_ci if (!mcs_compl->timestamp) 61762306a36Sopenharmony_ci mcs_compl->timestamp = ktime_to_ns(fence->timestamp); 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci complete_all(&mcs_compl->completion); 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci /* 62262306a36Sopenharmony_ci * Setting mcs_handling_done inside the lock ensures 62362306a36Sopenharmony_ci * at least one fence have mcs_handling_done set to 62462306a36Sopenharmony_ci * true before wait for mcs finish. This ensures at 62562306a36Sopenharmony_ci * least one CS will be set as completed when polling 62662306a36Sopenharmony_ci * mcs fences. 62762306a36Sopenharmony_ci */ 62862306a36Sopenharmony_ci fence->mcs_handling_done = true; 62962306a36Sopenharmony_ci } 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci spin_unlock(&mcs_compl->lock); 63262306a36Sopenharmony_ci } 63362306a36Sopenharmony_ci /* In case CS completed without mcs completion initialized */ 63462306a36Sopenharmony_ci fence->mcs_handling_done = true; 63562306a36Sopenharmony_ci} 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_cistatic inline void cs_release_sob_reset_handler(struct hl_device *hdev, 63862306a36Sopenharmony_ci struct hl_cs *cs, 63962306a36Sopenharmony_ci struct hl_cs_compl *hl_cs_cmpl) 64062306a36Sopenharmony_ci{ 64162306a36Sopenharmony_ci /* Skip this handler if the cs wasn't submitted, to avoid putting 64262306a36Sopenharmony_ci * the hw_sob twice, since this case already handled at this point, 64362306a36Sopenharmony_ci * also skip if the hw_sob pointer wasn't set. 64462306a36Sopenharmony_ci */ 64562306a36Sopenharmony_ci if (!hl_cs_cmpl->hw_sob || !cs->submitted) 64662306a36Sopenharmony_ci return; 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci spin_lock(&hl_cs_cmpl->lock); 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci /* 65162306a36Sopenharmony_ci * we get refcount upon reservation of signals or signal/wait cs for the 65262306a36Sopenharmony_ci * hw_sob object, and need to put it when the first staged cs 65362306a36Sopenharmony_ci * (which contains the encaps signals) or cs signal/wait is completed. 65462306a36Sopenharmony_ci */ 65562306a36Sopenharmony_ci if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || 65662306a36Sopenharmony_ci (hl_cs_cmpl->type == CS_TYPE_WAIT) || 65762306a36Sopenharmony_ci (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || 65862306a36Sopenharmony_ci (!!hl_cs_cmpl->encaps_signals)) { 65962306a36Sopenharmony_ci dev_dbg(hdev->dev, 66062306a36Sopenharmony_ci "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n", 66162306a36Sopenharmony_ci hl_cs_cmpl->cs_seq, 66262306a36Sopenharmony_ci hl_cs_cmpl->type, 66362306a36Sopenharmony_ci hl_cs_cmpl->hw_sob->sob_id, 66462306a36Sopenharmony_ci hl_cs_cmpl->sob_val); 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci hw_sob_put(hl_cs_cmpl->hw_sob); 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) 66962306a36Sopenharmony_ci hdev->asic_funcs->reset_sob_group(hdev, 67062306a36Sopenharmony_ci hl_cs_cmpl->sob_group); 67162306a36Sopenharmony_ci } 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci spin_unlock(&hl_cs_cmpl->lock); 67462306a36Sopenharmony_ci} 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_cistatic void cs_do_release(struct kref *ref) 67762306a36Sopenharmony_ci{ 67862306a36Sopenharmony_ci struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); 67962306a36Sopenharmony_ci struct hl_device *hdev = cs->ctx->hdev; 68062306a36Sopenharmony_ci struct hl_cs_job *job, *tmp; 68162306a36Sopenharmony_ci struct hl_cs_compl *hl_cs_cmpl = 68262306a36Sopenharmony_ci container_of(cs->fence, struct hl_cs_compl, base_fence); 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci cs->completed = true; 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ci /* 68762306a36Sopenharmony_ci * Although if we reached here it means that all external jobs have 68862306a36Sopenharmony_ci * finished, because each one of them took refcnt to CS, we still 68962306a36Sopenharmony_ci * need to go over the internal jobs and complete them. Otherwise, we 69062306a36Sopenharmony_ci * will have leaked memory and what's worse, the CS object (and 69162306a36Sopenharmony_ci * potentially the CTX object) could be released, while the JOB 69262306a36Sopenharmony_ci * still holds a pointer to them (but no reference). 69362306a36Sopenharmony_ci */ 69462306a36Sopenharmony_ci list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 69562306a36Sopenharmony_ci hl_complete_job(hdev, job); 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci if (!cs->submitted) { 69862306a36Sopenharmony_ci /* 69962306a36Sopenharmony_ci * In case the wait for signal CS was submitted, the fence put 70062306a36Sopenharmony_ci * occurs in init_signal_wait_cs() or collective_wait_init_cs() 70162306a36Sopenharmony_ci * right before hanging on the PQ. 70262306a36Sopenharmony_ci */ 70362306a36Sopenharmony_ci if (cs->type == CS_TYPE_WAIT || 70462306a36Sopenharmony_ci cs->type == CS_TYPE_COLLECTIVE_WAIT) 70562306a36Sopenharmony_ci hl_fence_put(cs->signal_fence); 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci goto out; 70862306a36Sopenharmony_ci } 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci /* Need to update CI for all queue jobs that does not get completion */ 71162306a36Sopenharmony_ci hl_hw_queue_update_ci(cs); 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci /* remove CS from CS mirror list */ 71462306a36Sopenharmony_ci spin_lock(&hdev->cs_mirror_lock); 71562306a36Sopenharmony_ci list_del_init(&cs->mirror_node); 71662306a36Sopenharmony_ci spin_unlock(&hdev->cs_mirror_lock); 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci cs_handle_tdr(hdev, cs); 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci if (cs->staged_cs) { 72162306a36Sopenharmony_ci /* the completion CS decrements reference for the entire 72262306a36Sopenharmony_ci * staged submission 72362306a36Sopenharmony_ci */ 72462306a36Sopenharmony_ci if (cs->staged_last) { 72562306a36Sopenharmony_ci struct hl_cs *staged_cs, *tmp_cs; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci list_for_each_entry_safe(staged_cs, tmp_cs, 72862306a36Sopenharmony_ci &cs->staged_cs_node, staged_cs_node) 72962306a36Sopenharmony_ci staged_cs_put(hdev, staged_cs); 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci /* A staged CS will be a member in the list only after it 73362306a36Sopenharmony_ci * was submitted. We used 'cs_mirror_lock' when inserting 73462306a36Sopenharmony_ci * it to list so we will use it again when removing it 73562306a36Sopenharmony_ci */ 73662306a36Sopenharmony_ci if (cs->submitted) { 73762306a36Sopenharmony_ci spin_lock(&hdev->cs_mirror_lock); 73862306a36Sopenharmony_ci list_del(&cs->staged_cs_node); 73962306a36Sopenharmony_ci spin_unlock(&hdev->cs_mirror_lock); 74062306a36Sopenharmony_ci } 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci /* decrement refcount to handle when first staged cs 74362306a36Sopenharmony_ci * with encaps signals is completed. 74462306a36Sopenharmony_ci */ 74562306a36Sopenharmony_ci if (hl_cs_cmpl->encaps_signals) 74662306a36Sopenharmony_ci kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, 74762306a36Sopenharmony_ci hl_encaps_release_handle_and_put_ctx); 74862306a36Sopenharmony_ci } 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals) 75162306a36Sopenharmony_ci kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ciout: 75462306a36Sopenharmony_ci /* Must be called before hl_ctx_put because inside we use ctx to get 75562306a36Sopenharmony_ci * the device 75662306a36Sopenharmony_ci */ 75762306a36Sopenharmony_ci hl_debugfs_remove_cs(cs); 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL; 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci /* We need to mark an error for not submitted because in that case 76262306a36Sopenharmony_ci * the hl fence release flow is different. Mainly, we don't need 76362306a36Sopenharmony_ci * to handle hw_sob for signal/wait 76462306a36Sopenharmony_ci */ 76562306a36Sopenharmony_ci if (cs->timedout) 76662306a36Sopenharmony_ci cs->fence->error = -ETIMEDOUT; 76762306a36Sopenharmony_ci else if (cs->aborted) 76862306a36Sopenharmony_ci cs->fence->error = -EIO; 76962306a36Sopenharmony_ci else if (!cs->submitted) 77062306a36Sopenharmony_ci cs->fence->error = -EBUSY; 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci if (unlikely(cs->skip_reset_on_timeout)) { 77362306a36Sopenharmony_ci dev_err(hdev->dev, 77462306a36Sopenharmony_ci "Command submission %llu completed after %llu (s)\n", 77562306a36Sopenharmony_ci cs->sequence, 77662306a36Sopenharmony_ci div_u64(jiffies - cs->submission_time_jiffies, HZ)); 77762306a36Sopenharmony_ci } 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci if (cs->timestamp) { 78062306a36Sopenharmony_ci cs->fence->timestamp = cs->completion_timestamp; 78162306a36Sopenharmony_ci hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence, 78262306a36Sopenharmony_ci cs->fence->timestamp, cs->fence->error); 78362306a36Sopenharmony_ci } 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci hl_ctx_put(cs->ctx); 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci complete_all(&cs->fence->completion); 78862306a36Sopenharmony_ci complete_multi_cs(hdev, cs); 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci hl_fence_put(cs->fence); 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci kfree(cs->jobs_in_queue_cnt); 79562306a36Sopenharmony_ci kfree(cs); 79662306a36Sopenharmony_ci} 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_cistatic void cs_timedout(struct work_struct *work) 79962306a36Sopenharmony_ci{ 80062306a36Sopenharmony_ci struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); 80162306a36Sopenharmony_ci bool skip_reset_on_timeout, device_reset = false; 80262306a36Sopenharmony_ci struct hl_device *hdev; 80362306a36Sopenharmony_ci u64 event_mask = 0x0; 80462306a36Sopenharmony_ci uint timeout_sec; 80562306a36Sopenharmony_ci int rc; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci skip_reset_on_timeout = cs->skip_reset_on_timeout; 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci rc = cs_get_unless_zero(cs); 81062306a36Sopenharmony_ci if (!rc) 81162306a36Sopenharmony_ci return; 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci if ((!cs->submitted) || (cs->completed)) { 81462306a36Sopenharmony_ci cs_put(cs); 81562306a36Sopenharmony_ci return; 81662306a36Sopenharmony_ci } 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci hdev = cs->ctx->hdev; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci if (likely(!skip_reset_on_timeout)) { 82162306a36Sopenharmony_ci if (hdev->reset_on_lockup) 82262306a36Sopenharmony_ci device_reset = true; 82362306a36Sopenharmony_ci else 82462306a36Sopenharmony_ci hdev->reset_info.needs_reset = true; 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci /* Mark the CS is timed out so we won't try to cancel its TDR */ 82762306a36Sopenharmony_ci cs->timedout = true; 82862306a36Sopenharmony_ci } 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci /* Save only the first CS timeout parameters */ 83162306a36Sopenharmony_ci rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); 83262306a36Sopenharmony_ci if (rc) { 83362306a36Sopenharmony_ci hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); 83462306a36Sopenharmony_ci hdev->captured_err_info.cs_timeout.seq = cs->sequence; 83562306a36Sopenharmony_ci event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT; 83662306a36Sopenharmony_ci } 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000; 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci switch (cs->type) { 84162306a36Sopenharmony_ci case CS_TYPE_SIGNAL: 84262306a36Sopenharmony_ci dev_err(hdev->dev, 84362306a36Sopenharmony_ci "Signal command submission %llu has not finished in %u seconds!\n", 84462306a36Sopenharmony_ci cs->sequence, timeout_sec); 84562306a36Sopenharmony_ci break; 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci case CS_TYPE_WAIT: 84862306a36Sopenharmony_ci dev_err(hdev->dev, 84962306a36Sopenharmony_ci "Wait command submission %llu has not finished in %u seconds!\n", 85062306a36Sopenharmony_ci cs->sequence, timeout_sec); 85162306a36Sopenharmony_ci break; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci case CS_TYPE_COLLECTIVE_WAIT: 85462306a36Sopenharmony_ci dev_err(hdev->dev, 85562306a36Sopenharmony_ci "Collective Wait command submission %llu has not finished in %u seconds!\n", 85662306a36Sopenharmony_ci cs->sequence, timeout_sec); 85762306a36Sopenharmony_ci break; 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci default: 86062306a36Sopenharmony_ci dev_err(hdev->dev, 86162306a36Sopenharmony_ci "Command submission %llu has not finished in %u seconds!\n", 86262306a36Sopenharmony_ci cs->sequence, timeout_sec); 86362306a36Sopenharmony_ci break; 86462306a36Sopenharmony_ci } 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci rc = hl_state_dump(hdev); 86762306a36Sopenharmony_ci if (rc) 86862306a36Sopenharmony_ci dev_err(hdev->dev, "Error during system state dump %d\n", rc); 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci cs_put(cs); 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci if (device_reset) { 87362306a36Sopenharmony_ci event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 87462306a36Sopenharmony_ci hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask); 87562306a36Sopenharmony_ci } else if (event_mask) { 87662306a36Sopenharmony_ci hl_notifier_event_send_all(hdev, event_mask); 87762306a36Sopenharmony_ci } 87862306a36Sopenharmony_ci} 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_cistatic int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, 88162306a36Sopenharmony_ci enum hl_cs_type cs_type, u64 user_sequence, 88262306a36Sopenharmony_ci struct hl_cs **cs_new, u32 flags, u32 timeout) 88362306a36Sopenharmony_ci{ 88462306a36Sopenharmony_ci struct hl_cs_counters_atomic *cntr; 88562306a36Sopenharmony_ci struct hl_fence *other = NULL; 88662306a36Sopenharmony_ci struct hl_cs_compl *cs_cmpl; 88762306a36Sopenharmony_ci struct hl_cs *cs; 88862306a36Sopenharmony_ci int rc; 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci cntr = &hdev->aggregated_cs_counters; 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci cs = kzalloc(sizeof(*cs), GFP_ATOMIC); 89362306a36Sopenharmony_ci if (!cs) 89462306a36Sopenharmony_ci cs = kzalloc(sizeof(*cs), GFP_KERNEL); 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci if (!cs) { 89762306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 89862306a36Sopenharmony_ci atomic64_inc(&cntr->out_of_mem_drop_cnt); 89962306a36Sopenharmony_ci return -ENOMEM; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci /* increment refcnt for context */ 90362306a36Sopenharmony_ci hl_ctx_get(ctx); 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_ci cs->ctx = ctx; 90662306a36Sopenharmony_ci cs->submitted = false; 90762306a36Sopenharmony_ci cs->completed = false; 90862306a36Sopenharmony_ci cs->type = cs_type; 90962306a36Sopenharmony_ci cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); 91062306a36Sopenharmony_ci cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); 91162306a36Sopenharmony_ci cs->timeout_jiffies = timeout; 91262306a36Sopenharmony_ci cs->skip_reset_on_timeout = 91362306a36Sopenharmony_ci hdev->reset_info.skip_reset_on_timeout || 91462306a36Sopenharmony_ci !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT); 91562306a36Sopenharmony_ci cs->submission_time_jiffies = jiffies; 91662306a36Sopenharmony_ci INIT_LIST_HEAD(&cs->job_list); 91762306a36Sopenharmony_ci INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); 91862306a36Sopenharmony_ci kref_init(&cs->refcount); 91962306a36Sopenharmony_ci spin_lock_init(&cs->job_lock); 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC); 92262306a36Sopenharmony_ci if (!cs_cmpl) 92362306a36Sopenharmony_ci cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL); 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci if (!cs_cmpl) { 92662306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 92762306a36Sopenharmony_ci atomic64_inc(&cntr->out_of_mem_drop_cnt); 92862306a36Sopenharmony_ci rc = -ENOMEM; 92962306a36Sopenharmony_ci goto free_cs; 93062306a36Sopenharmony_ci } 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, 93362306a36Sopenharmony_ci sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); 93462306a36Sopenharmony_ci if (!cs->jobs_in_queue_cnt) 93562306a36Sopenharmony_ci cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, 93662306a36Sopenharmony_ci sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci if (!cs->jobs_in_queue_cnt) { 93962306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 94062306a36Sopenharmony_ci atomic64_inc(&cntr->out_of_mem_drop_cnt); 94162306a36Sopenharmony_ci rc = -ENOMEM; 94262306a36Sopenharmony_ci goto free_cs_cmpl; 94362306a36Sopenharmony_ci } 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci cs_cmpl->hdev = hdev; 94662306a36Sopenharmony_ci cs_cmpl->type = cs->type; 94762306a36Sopenharmony_ci spin_lock_init(&cs_cmpl->lock); 94862306a36Sopenharmony_ci cs->fence = &cs_cmpl->base_fence; 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci spin_lock(&ctx->cs_lock); 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci cs_cmpl->cs_seq = ctx->cs_sequence; 95362306a36Sopenharmony_ci other = ctx->cs_pending[cs_cmpl->cs_seq & 95462306a36Sopenharmony_ci (hdev->asic_prop.max_pending_cs - 1)]; 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci if (other && !completion_done(&other->completion)) { 95762306a36Sopenharmony_ci /* If the following statement is true, it means we have reached 95862306a36Sopenharmony_ci * a point in which only part of the staged submission was 95962306a36Sopenharmony_ci * submitted and we don't have enough room in the 'cs_pending' 96062306a36Sopenharmony_ci * array for the rest of the submission. 96162306a36Sopenharmony_ci * This causes a deadlock because this CS will never be 96262306a36Sopenharmony_ci * completed as it depends on future CS's for completion. 96362306a36Sopenharmony_ci */ 96462306a36Sopenharmony_ci if (other->cs_sequence == user_sequence) 96562306a36Sopenharmony_ci dev_crit_ratelimited(hdev->dev, 96662306a36Sopenharmony_ci "Staged CS %llu deadlock due to lack of resources", 96762306a36Sopenharmony_ci user_sequence); 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ci dev_dbg_ratelimited(hdev->dev, 97062306a36Sopenharmony_ci "Rejecting CS because of too many in-flights CS\n"); 97162306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt); 97262306a36Sopenharmony_ci atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); 97362306a36Sopenharmony_ci rc = -EAGAIN; 97462306a36Sopenharmony_ci goto free_fence; 97562306a36Sopenharmony_ci } 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci /* init hl_fence */ 97862306a36Sopenharmony_ci hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci cs->sequence = cs_cmpl->cs_seq; 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci ctx->cs_pending[cs_cmpl->cs_seq & 98362306a36Sopenharmony_ci (hdev->asic_prop.max_pending_cs - 1)] = 98462306a36Sopenharmony_ci &cs_cmpl->base_fence; 98562306a36Sopenharmony_ci ctx->cs_sequence++; 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci hl_fence_get(&cs_cmpl->base_fence); 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci hl_fence_put(other); 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci spin_unlock(&ctx->cs_lock); 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci *cs_new = cs; 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci return 0; 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_cifree_fence: 99862306a36Sopenharmony_ci spin_unlock(&ctx->cs_lock); 99962306a36Sopenharmony_ci kfree(cs->jobs_in_queue_cnt); 100062306a36Sopenharmony_cifree_cs_cmpl: 100162306a36Sopenharmony_ci kfree(cs_cmpl); 100262306a36Sopenharmony_cifree_cs: 100362306a36Sopenharmony_ci kfree(cs); 100462306a36Sopenharmony_ci hl_ctx_put(ctx); 100562306a36Sopenharmony_ci return rc; 100662306a36Sopenharmony_ci} 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_cistatic void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) 100962306a36Sopenharmony_ci{ 101062306a36Sopenharmony_ci struct hl_cs_job *job, *tmp; 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci staged_cs_put(hdev, cs); 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_ci list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 101562306a36Sopenharmony_ci hl_complete_job(hdev, job); 101662306a36Sopenharmony_ci} 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci/* 101962306a36Sopenharmony_ci * release_reserved_encaps_signals() - release reserved encapsulated signals. 102062306a36Sopenharmony_ci * @hdev: pointer to habanalabs device structure 102162306a36Sopenharmony_ci * 102262306a36Sopenharmony_ci * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with 102362306a36Sopenharmony_ci * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back. 102462306a36Sopenharmony_ci * For these signals need also to put the refcount of the H/W SOB which was taken at the 102562306a36Sopenharmony_ci * reservation. 102662306a36Sopenharmony_ci */ 102762306a36Sopenharmony_cistatic void release_reserved_encaps_signals(struct hl_device *hdev) 102862306a36Sopenharmony_ci{ 102962306a36Sopenharmony_ci struct hl_ctx *ctx = hl_get_compute_ctx(hdev); 103062306a36Sopenharmony_ci struct hl_cs_encaps_sig_handle *handle; 103162306a36Sopenharmony_ci struct hl_encaps_signals_mgr *mgr; 103262306a36Sopenharmony_ci u32 id; 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci if (!ctx) 103562306a36Sopenharmony_ci return; 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci mgr = &ctx->sig_mgr; 103862306a36Sopenharmony_ci 103962306a36Sopenharmony_ci idr_for_each_entry(&mgr->handles, handle, id) 104062306a36Sopenharmony_ci if (handle->cs_seq == ULLONG_MAX) 104162306a36Sopenharmony_ci kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx); 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci hl_ctx_put(ctx); 104462306a36Sopenharmony_ci} 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_civoid hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) 104762306a36Sopenharmony_ci{ 104862306a36Sopenharmony_ci int i; 104962306a36Sopenharmony_ci struct hl_cs *cs, *tmp; 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci if (!skip_wq_flush) { 105262306a36Sopenharmony_ci flush_workqueue(hdev->ts_free_obj_wq); 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci /* flush all completions before iterating over the CS mirror list in 105562306a36Sopenharmony_ci * order to avoid a race with the release functions 105662306a36Sopenharmony_ci */ 105762306a36Sopenharmony_ci for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 105862306a36Sopenharmony_ci flush_workqueue(hdev->cq_wq[i]); 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci flush_workqueue(hdev->cs_cmplt_wq); 106162306a36Sopenharmony_ci } 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci /* Make sure we don't have leftovers in the CS mirror list */ 106462306a36Sopenharmony_ci list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { 106562306a36Sopenharmony_ci cs_get(cs); 106662306a36Sopenharmony_ci cs->aborted = true; 106762306a36Sopenharmony_ci dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", 106862306a36Sopenharmony_ci cs->ctx->asid, cs->sequence); 106962306a36Sopenharmony_ci cs_rollback(hdev, cs); 107062306a36Sopenharmony_ci cs_put(cs); 107162306a36Sopenharmony_ci } 107262306a36Sopenharmony_ci 107362306a36Sopenharmony_ci force_complete_multi_cs(hdev); 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci release_reserved_encaps_signals(hdev); 107662306a36Sopenharmony_ci} 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_cistatic void 107962306a36Sopenharmony_ciwake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) 108062306a36Sopenharmony_ci{ 108162306a36Sopenharmony_ci struct hl_user_pending_interrupt *pend, *temp; 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci spin_lock(&interrupt->wait_list_lock); 108462306a36Sopenharmony_ci list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { 108562306a36Sopenharmony_ci if (pend->ts_reg_info.buf) { 108662306a36Sopenharmony_ci list_del(&pend->wait_list_node); 108762306a36Sopenharmony_ci hl_mmap_mem_buf_put(pend->ts_reg_info.buf); 108862306a36Sopenharmony_ci hl_cb_put(pend->ts_reg_info.cq_cb); 108962306a36Sopenharmony_ci } else { 109062306a36Sopenharmony_ci pend->fence.error = -EIO; 109162306a36Sopenharmony_ci complete_all(&pend->fence.completion); 109262306a36Sopenharmony_ci } 109362306a36Sopenharmony_ci } 109462306a36Sopenharmony_ci spin_unlock(&interrupt->wait_list_lock); 109562306a36Sopenharmony_ci} 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_civoid hl_release_pending_user_interrupts(struct hl_device *hdev) 109862306a36Sopenharmony_ci{ 109962306a36Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 110062306a36Sopenharmony_ci struct hl_user_interrupt *interrupt; 110162306a36Sopenharmony_ci int i; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci if (!prop->user_interrupt_count) 110462306a36Sopenharmony_ci return; 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci /* We iterate through the user interrupt requests and waking up all 110762306a36Sopenharmony_ci * user threads waiting for interrupt completion. We iterate the 110862306a36Sopenharmony_ci * list under a lock, this is why all user threads, once awake, 110962306a36Sopenharmony_ci * will wait on the same lock and will release the waiting object upon 111062306a36Sopenharmony_ci * unlock. 111162306a36Sopenharmony_ci */ 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci for (i = 0 ; i < prop->user_interrupt_count ; i++) { 111462306a36Sopenharmony_ci interrupt = &hdev->user_interrupt[i]; 111562306a36Sopenharmony_ci wake_pending_user_interrupt_threads(interrupt); 111662306a36Sopenharmony_ci } 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci interrupt = &hdev->common_user_cq_interrupt; 111962306a36Sopenharmony_ci wake_pending_user_interrupt_threads(interrupt); 112062306a36Sopenharmony_ci 112162306a36Sopenharmony_ci interrupt = &hdev->common_decoder_interrupt; 112262306a36Sopenharmony_ci wake_pending_user_interrupt_threads(interrupt); 112362306a36Sopenharmony_ci} 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_cistatic void force_complete_cs(struct hl_device *hdev) 112662306a36Sopenharmony_ci{ 112762306a36Sopenharmony_ci struct hl_cs *cs; 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci spin_lock(&hdev->cs_mirror_lock); 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_ci list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) { 113262306a36Sopenharmony_ci cs->fence->error = -EIO; 113362306a36Sopenharmony_ci complete_all(&cs->fence->completion); 113462306a36Sopenharmony_ci } 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_ci spin_unlock(&hdev->cs_mirror_lock); 113762306a36Sopenharmony_ci} 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_civoid hl_abort_waiting_for_cs_completions(struct hl_device *hdev) 114062306a36Sopenharmony_ci{ 114162306a36Sopenharmony_ci force_complete_cs(hdev); 114262306a36Sopenharmony_ci force_complete_multi_cs(hdev); 114362306a36Sopenharmony_ci} 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_cistatic void job_wq_completion(struct work_struct *work) 114662306a36Sopenharmony_ci{ 114762306a36Sopenharmony_ci struct hl_cs_job *job = container_of(work, struct hl_cs_job, 114862306a36Sopenharmony_ci finish_work); 114962306a36Sopenharmony_ci struct hl_cs *cs = job->cs; 115062306a36Sopenharmony_ci struct hl_device *hdev = cs->ctx->hdev; 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci /* job is no longer needed */ 115362306a36Sopenharmony_ci hl_complete_job(hdev, job); 115462306a36Sopenharmony_ci} 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_cistatic void cs_completion(struct work_struct *work) 115762306a36Sopenharmony_ci{ 115862306a36Sopenharmony_ci struct hl_cs *cs = container_of(work, struct hl_cs, finish_work); 115962306a36Sopenharmony_ci struct hl_device *hdev = cs->ctx->hdev; 116062306a36Sopenharmony_ci struct hl_cs_job *job, *tmp; 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 116362306a36Sopenharmony_ci hl_complete_job(hdev, job); 116462306a36Sopenharmony_ci} 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ciu32 hl_get_active_cs_num(struct hl_device *hdev) 116762306a36Sopenharmony_ci{ 116862306a36Sopenharmony_ci u32 active_cs_num = 0; 116962306a36Sopenharmony_ci struct hl_cs *cs; 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci spin_lock(&hdev->cs_mirror_lock); 117262306a36Sopenharmony_ci 117362306a36Sopenharmony_ci list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) 117462306a36Sopenharmony_ci if (!cs->completed) 117562306a36Sopenharmony_ci active_cs_num++; 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci spin_unlock(&hdev->cs_mirror_lock); 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci return active_cs_num; 118062306a36Sopenharmony_ci} 118162306a36Sopenharmony_ci 118262306a36Sopenharmony_cistatic int validate_queue_index(struct hl_device *hdev, 118362306a36Sopenharmony_ci struct hl_cs_chunk *chunk, 118462306a36Sopenharmony_ci enum hl_queue_type *queue_type, 118562306a36Sopenharmony_ci bool *is_kernel_allocated_cb) 118662306a36Sopenharmony_ci{ 118762306a36Sopenharmony_ci struct asic_fixed_properties *asic = &hdev->asic_prop; 118862306a36Sopenharmony_ci struct hw_queue_properties *hw_queue_prop; 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci /* This must be checked here to prevent out-of-bounds access to 119162306a36Sopenharmony_ci * hw_queues_props array 119262306a36Sopenharmony_ci */ 119362306a36Sopenharmony_ci if (chunk->queue_index >= asic->max_queues) { 119462306a36Sopenharmony_ci dev_err(hdev->dev, "Queue index %d is invalid\n", 119562306a36Sopenharmony_ci chunk->queue_index); 119662306a36Sopenharmony_ci return -EINVAL; 119762306a36Sopenharmony_ci } 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_ci hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci if (hw_queue_prop->type == QUEUE_TYPE_NA) { 120262306a36Sopenharmony_ci dev_err(hdev->dev, "Queue index %d is not applicable\n", 120362306a36Sopenharmony_ci chunk->queue_index); 120462306a36Sopenharmony_ci return -EINVAL; 120562306a36Sopenharmony_ci } 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci if (hw_queue_prop->binned) { 120862306a36Sopenharmony_ci dev_err(hdev->dev, "Queue index %d is binned out\n", 120962306a36Sopenharmony_ci chunk->queue_index); 121062306a36Sopenharmony_ci return -EINVAL; 121162306a36Sopenharmony_ci } 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_ci if (hw_queue_prop->driver_only) { 121462306a36Sopenharmony_ci dev_err(hdev->dev, 121562306a36Sopenharmony_ci "Queue index %d is restricted for the kernel driver\n", 121662306a36Sopenharmony_ci chunk->queue_index); 121762306a36Sopenharmony_ci return -EINVAL; 121862306a36Sopenharmony_ci } 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_ci /* When hw queue type isn't QUEUE_TYPE_HW, 122162306a36Sopenharmony_ci * USER_ALLOC_CB flag shall be referred as "don't care". 122262306a36Sopenharmony_ci */ 122362306a36Sopenharmony_ci if (hw_queue_prop->type == QUEUE_TYPE_HW) { 122462306a36Sopenharmony_ci if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { 122562306a36Sopenharmony_ci if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { 122662306a36Sopenharmony_ci dev_err(hdev->dev, 122762306a36Sopenharmony_ci "Queue index %d doesn't support user CB\n", 122862306a36Sopenharmony_ci chunk->queue_index); 122962306a36Sopenharmony_ci return -EINVAL; 123062306a36Sopenharmony_ci } 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci *is_kernel_allocated_cb = false; 123362306a36Sopenharmony_ci } else { 123462306a36Sopenharmony_ci if (!(hw_queue_prop->cb_alloc_flags & 123562306a36Sopenharmony_ci CB_ALLOC_KERNEL)) { 123662306a36Sopenharmony_ci dev_err(hdev->dev, 123762306a36Sopenharmony_ci "Queue index %d doesn't support kernel CB\n", 123862306a36Sopenharmony_ci chunk->queue_index); 123962306a36Sopenharmony_ci return -EINVAL; 124062306a36Sopenharmony_ci } 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci *is_kernel_allocated_cb = true; 124362306a36Sopenharmony_ci } 124462306a36Sopenharmony_ci } else { 124562306a36Sopenharmony_ci *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags 124662306a36Sopenharmony_ci & CB_ALLOC_KERNEL); 124762306a36Sopenharmony_ci } 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci *queue_type = hw_queue_prop->type; 125062306a36Sopenharmony_ci return 0; 125162306a36Sopenharmony_ci} 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_cistatic struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, 125462306a36Sopenharmony_ci struct hl_mem_mgr *mmg, 125562306a36Sopenharmony_ci struct hl_cs_chunk *chunk) 125662306a36Sopenharmony_ci{ 125762306a36Sopenharmony_ci struct hl_cb *cb; 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ci cb = hl_cb_get(mmg, chunk->cb_handle); 126062306a36Sopenharmony_ci if (!cb) { 126162306a36Sopenharmony_ci dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle); 126262306a36Sopenharmony_ci return NULL; 126362306a36Sopenharmony_ci } 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { 126662306a36Sopenharmony_ci dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); 126762306a36Sopenharmony_ci goto release_cb; 126862306a36Sopenharmony_ci } 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci atomic_inc(&cb->cs_cnt); 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_ci return cb; 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_cirelease_cb: 127562306a36Sopenharmony_ci hl_cb_put(cb); 127662306a36Sopenharmony_ci return NULL; 127762306a36Sopenharmony_ci} 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_cistruct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, 128062306a36Sopenharmony_ci enum hl_queue_type queue_type, bool is_kernel_allocated_cb) 128162306a36Sopenharmony_ci{ 128262306a36Sopenharmony_ci struct hl_cs_job *job; 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci job = kzalloc(sizeof(*job), GFP_ATOMIC); 128562306a36Sopenharmony_ci if (!job) 128662306a36Sopenharmony_ci job = kzalloc(sizeof(*job), GFP_KERNEL); 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci if (!job) 128962306a36Sopenharmony_ci return NULL; 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci kref_init(&job->refcount); 129262306a36Sopenharmony_ci job->queue_type = queue_type; 129362306a36Sopenharmony_ci job->is_kernel_allocated_cb = is_kernel_allocated_cb; 129462306a36Sopenharmony_ci 129562306a36Sopenharmony_ci if (is_cb_patched(hdev, job)) 129662306a36Sopenharmony_ci INIT_LIST_HEAD(&job->userptr_list); 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci if (job->queue_type == QUEUE_TYPE_EXT) 129962306a36Sopenharmony_ci INIT_WORK(&job->finish_work, job_wq_completion); 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ci return job; 130262306a36Sopenharmony_ci} 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_cistatic enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) 130562306a36Sopenharmony_ci{ 130662306a36Sopenharmony_ci if (cs_type_flags & HL_CS_FLAGS_SIGNAL) 130762306a36Sopenharmony_ci return CS_TYPE_SIGNAL; 130862306a36Sopenharmony_ci else if (cs_type_flags & HL_CS_FLAGS_WAIT) 130962306a36Sopenharmony_ci return CS_TYPE_WAIT; 131062306a36Sopenharmony_ci else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT) 131162306a36Sopenharmony_ci return CS_TYPE_COLLECTIVE_WAIT; 131262306a36Sopenharmony_ci else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY) 131362306a36Sopenharmony_ci return CS_RESERVE_SIGNALS; 131462306a36Sopenharmony_ci else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) 131562306a36Sopenharmony_ci return CS_UNRESERVE_SIGNALS; 131662306a36Sopenharmony_ci else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) 131762306a36Sopenharmony_ci return CS_TYPE_ENGINE_CORE; 131862306a36Sopenharmony_ci else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND) 131962306a36Sopenharmony_ci return CS_TYPE_ENGINES; 132062306a36Sopenharmony_ci else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) 132162306a36Sopenharmony_ci return CS_TYPE_FLUSH_PCI_HBW_WRITES; 132262306a36Sopenharmony_ci else 132362306a36Sopenharmony_ci return CS_TYPE_DEFAULT; 132462306a36Sopenharmony_ci} 132562306a36Sopenharmony_ci 132662306a36Sopenharmony_cistatic int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) 132762306a36Sopenharmony_ci{ 132862306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 132962306a36Sopenharmony_ci struct hl_ctx *ctx = hpriv->ctx; 133062306a36Sopenharmony_ci u32 cs_type_flags, num_chunks; 133162306a36Sopenharmony_ci enum hl_device_status status; 133262306a36Sopenharmony_ci enum hl_cs_type cs_type; 133362306a36Sopenharmony_ci bool is_sync_stream; 133462306a36Sopenharmony_ci int i; 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci for (i = 0 ; i < sizeof(args->in.pad) ; i++) 133762306a36Sopenharmony_ci if (args->in.pad[i]) { 133862306a36Sopenharmony_ci dev_dbg(hdev->dev, "Padding bytes must be 0\n"); 133962306a36Sopenharmony_ci return -EINVAL; 134062306a36Sopenharmony_ci } 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci if (!hl_device_operational(hdev, &status)) { 134362306a36Sopenharmony_ci return -EBUSY; 134462306a36Sopenharmony_ci } 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_ci if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 134762306a36Sopenharmony_ci !hdev->supports_staged_submission) { 134862306a36Sopenharmony_ci dev_err(hdev->dev, "staged submission not supported"); 134962306a36Sopenharmony_ci return -EPERM; 135062306a36Sopenharmony_ci } 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_ci if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) { 135562306a36Sopenharmony_ci dev_err(hdev->dev, 135662306a36Sopenharmony_ci "CS type flags are mutually exclusive, context %d\n", 135762306a36Sopenharmony_ci ctx->asid); 135862306a36Sopenharmony_ci return -EINVAL; 135962306a36Sopenharmony_ci } 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci cs_type = hl_cs_get_cs_type(cs_type_flags); 136262306a36Sopenharmony_ci num_chunks = args->in.num_chunks_execute; 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_ci is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || 136562306a36Sopenharmony_ci cs_type == CS_TYPE_COLLECTIVE_WAIT); 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_ci if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { 136862306a36Sopenharmony_ci dev_err(hdev->dev, "Sync stream CS is not supported\n"); 136962306a36Sopenharmony_ci return -EINVAL; 137062306a36Sopenharmony_ci } 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci if (cs_type == CS_TYPE_DEFAULT) { 137362306a36Sopenharmony_ci if (!num_chunks) { 137462306a36Sopenharmony_ci dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); 137562306a36Sopenharmony_ci return -EINVAL; 137662306a36Sopenharmony_ci } 137762306a36Sopenharmony_ci } else if (is_sync_stream && num_chunks != 1) { 137862306a36Sopenharmony_ci dev_err(hdev->dev, 137962306a36Sopenharmony_ci "Sync stream CS mandates one chunk only, context %d\n", 138062306a36Sopenharmony_ci ctx->asid); 138162306a36Sopenharmony_ci return -EINVAL; 138262306a36Sopenharmony_ci } 138362306a36Sopenharmony_ci 138462306a36Sopenharmony_ci return 0; 138562306a36Sopenharmony_ci} 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_cistatic int hl_cs_copy_chunk_array(struct hl_device *hdev, 138862306a36Sopenharmony_ci struct hl_cs_chunk **cs_chunk_array, 138962306a36Sopenharmony_ci void __user *chunks, u32 num_chunks, 139062306a36Sopenharmony_ci struct hl_ctx *ctx) 139162306a36Sopenharmony_ci{ 139262306a36Sopenharmony_ci u32 size_to_copy; 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci if (num_chunks > HL_MAX_JOBS_PER_CS) { 139562306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 139662306a36Sopenharmony_ci atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 139762306a36Sopenharmony_ci dev_err(hdev->dev, 139862306a36Sopenharmony_ci "Number of chunks can NOT be larger than %d\n", 139962306a36Sopenharmony_ci HL_MAX_JOBS_PER_CS); 140062306a36Sopenharmony_ci return -EINVAL; 140162306a36Sopenharmony_ci } 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), 140462306a36Sopenharmony_ci GFP_ATOMIC); 140562306a36Sopenharmony_ci if (!*cs_chunk_array) 140662306a36Sopenharmony_ci *cs_chunk_array = kmalloc_array(num_chunks, 140762306a36Sopenharmony_ci sizeof(**cs_chunk_array), GFP_KERNEL); 140862306a36Sopenharmony_ci if (!*cs_chunk_array) { 140962306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 141062306a36Sopenharmony_ci atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); 141162306a36Sopenharmony_ci return -ENOMEM; 141262306a36Sopenharmony_ci } 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); 141562306a36Sopenharmony_ci if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) { 141662306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 141762306a36Sopenharmony_ci atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 141862306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); 141962306a36Sopenharmony_ci kfree(*cs_chunk_array); 142062306a36Sopenharmony_ci return -EFAULT; 142162306a36Sopenharmony_ci } 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci return 0; 142462306a36Sopenharmony_ci} 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_cistatic int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, 142762306a36Sopenharmony_ci u64 sequence, u32 flags, 142862306a36Sopenharmony_ci u32 encaps_signal_handle) 142962306a36Sopenharmony_ci{ 143062306a36Sopenharmony_ci if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION)) 143162306a36Sopenharmony_ci return 0; 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_ci cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST); 143462306a36Sopenharmony_ci cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST); 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci if (cs->staged_first) { 143762306a36Sopenharmony_ci /* Staged CS sequence is the first CS sequence */ 143862306a36Sopenharmony_ci INIT_LIST_HEAD(&cs->staged_cs_node); 143962306a36Sopenharmony_ci cs->staged_sequence = cs->sequence; 144062306a36Sopenharmony_ci 144162306a36Sopenharmony_ci if (cs->encaps_signals) 144262306a36Sopenharmony_ci cs->encaps_sig_hdl_id = encaps_signal_handle; 144362306a36Sopenharmony_ci } else { 144462306a36Sopenharmony_ci /* User sequence will be validated in 'hl_hw_queue_schedule_cs' 144562306a36Sopenharmony_ci * under the cs_mirror_lock 144662306a36Sopenharmony_ci */ 144762306a36Sopenharmony_ci cs->staged_sequence = sequence; 144862306a36Sopenharmony_ci } 144962306a36Sopenharmony_ci 145062306a36Sopenharmony_ci /* Increment CS reference if needed */ 145162306a36Sopenharmony_ci staged_cs_get(hdev, cs); 145262306a36Sopenharmony_ci 145362306a36Sopenharmony_ci cs->staged_cs = true; 145462306a36Sopenharmony_ci 145562306a36Sopenharmony_ci return 0; 145662306a36Sopenharmony_ci} 145762306a36Sopenharmony_ci 145862306a36Sopenharmony_cistatic u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid) 145962306a36Sopenharmony_ci{ 146062306a36Sopenharmony_ci int i; 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci for (i = 0; i < hdev->stream_master_qid_arr_size; i++) 146362306a36Sopenharmony_ci if (qid == hdev->stream_master_qid_arr[i]) 146462306a36Sopenharmony_ci return BIT(i); 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci return 0; 146762306a36Sopenharmony_ci} 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_cistatic int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, 147062306a36Sopenharmony_ci u32 num_chunks, u64 *cs_seq, u32 flags, 147162306a36Sopenharmony_ci u32 encaps_signals_handle, u32 timeout, 147262306a36Sopenharmony_ci u16 *signal_initial_sob_count) 147362306a36Sopenharmony_ci{ 147462306a36Sopenharmony_ci bool staged_mid, int_queues_only = true, using_hw_queues = false; 147562306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 147662306a36Sopenharmony_ci struct hl_cs_chunk *cs_chunk_array; 147762306a36Sopenharmony_ci struct hl_cs_counters_atomic *cntr; 147862306a36Sopenharmony_ci struct hl_ctx *ctx = hpriv->ctx; 147962306a36Sopenharmony_ci struct hl_cs_job *job; 148062306a36Sopenharmony_ci struct hl_cs *cs; 148162306a36Sopenharmony_ci struct hl_cb *cb; 148262306a36Sopenharmony_ci u64 user_sequence; 148362306a36Sopenharmony_ci u8 stream_master_qid_map = 0; 148462306a36Sopenharmony_ci int rc, i; 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_ci cntr = &hdev->aggregated_cs_counters; 148762306a36Sopenharmony_ci user_sequence = *cs_seq; 148862306a36Sopenharmony_ci *cs_seq = ULLONG_MAX; 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_ci rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, 149162306a36Sopenharmony_ci hpriv->ctx); 149262306a36Sopenharmony_ci if (rc) 149362306a36Sopenharmony_ci goto out; 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_ci if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 149662306a36Sopenharmony_ci !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) 149762306a36Sopenharmony_ci staged_mid = true; 149862306a36Sopenharmony_ci else 149962306a36Sopenharmony_ci staged_mid = false; 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, 150262306a36Sopenharmony_ci staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, 150362306a36Sopenharmony_ci timeout); 150462306a36Sopenharmony_ci if (rc) 150562306a36Sopenharmony_ci goto free_cs_chunk_array; 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci *cs_seq = cs->sequence; 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci hl_debugfs_add_cs(cs); 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_ci rc = cs_staged_submission(hdev, cs, user_sequence, flags, 151262306a36Sopenharmony_ci encaps_signals_handle); 151362306a36Sopenharmony_ci if (rc) 151462306a36Sopenharmony_ci goto free_cs_object; 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci /* If this is a staged submission we must return the staged sequence 151762306a36Sopenharmony_ci * rather than the internal CS sequence 151862306a36Sopenharmony_ci */ 151962306a36Sopenharmony_ci if (cs->staged_cs) 152062306a36Sopenharmony_ci *cs_seq = cs->staged_sequence; 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ci /* Validate ALL the CS chunks before submitting the CS */ 152362306a36Sopenharmony_ci for (i = 0 ; i < num_chunks ; i++) { 152462306a36Sopenharmony_ci struct hl_cs_chunk *chunk = &cs_chunk_array[i]; 152562306a36Sopenharmony_ci enum hl_queue_type queue_type; 152662306a36Sopenharmony_ci bool is_kernel_allocated_cb; 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci rc = validate_queue_index(hdev, chunk, &queue_type, 152962306a36Sopenharmony_ci &is_kernel_allocated_cb); 153062306a36Sopenharmony_ci if (rc) { 153162306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 153262306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 153362306a36Sopenharmony_ci goto free_cs_object; 153462306a36Sopenharmony_ci } 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_ci if (is_kernel_allocated_cb) { 153762306a36Sopenharmony_ci cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk); 153862306a36Sopenharmony_ci if (!cb) { 153962306a36Sopenharmony_ci atomic64_inc( 154062306a36Sopenharmony_ci &ctx->cs_counters.validation_drop_cnt); 154162306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 154262306a36Sopenharmony_ci rc = -EINVAL; 154362306a36Sopenharmony_ci goto free_cs_object; 154462306a36Sopenharmony_ci } 154562306a36Sopenharmony_ci } else { 154662306a36Sopenharmony_ci cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; 154762306a36Sopenharmony_ci } 154862306a36Sopenharmony_ci 154962306a36Sopenharmony_ci if (queue_type == QUEUE_TYPE_EXT || 155062306a36Sopenharmony_ci queue_type == QUEUE_TYPE_HW) { 155162306a36Sopenharmony_ci int_queues_only = false; 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci /* 155462306a36Sopenharmony_ci * store which stream are being used for external/HW 155562306a36Sopenharmony_ci * queues of this CS 155662306a36Sopenharmony_ci */ 155762306a36Sopenharmony_ci if (hdev->supports_wait_for_multi_cs) 155862306a36Sopenharmony_ci stream_master_qid_map |= 155962306a36Sopenharmony_ci get_stream_master_qid_mask(hdev, 156062306a36Sopenharmony_ci chunk->queue_index); 156162306a36Sopenharmony_ci } 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci if (queue_type == QUEUE_TYPE_HW) 156462306a36Sopenharmony_ci using_hw_queues = true; 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci job = hl_cs_allocate_job(hdev, queue_type, 156762306a36Sopenharmony_ci is_kernel_allocated_cb); 156862306a36Sopenharmony_ci if (!job) { 156962306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 157062306a36Sopenharmony_ci atomic64_inc(&cntr->out_of_mem_drop_cnt); 157162306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to allocate a new job\n"); 157262306a36Sopenharmony_ci rc = -ENOMEM; 157362306a36Sopenharmony_ci if (is_kernel_allocated_cb) 157462306a36Sopenharmony_ci goto release_cb; 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci goto free_cs_object; 157762306a36Sopenharmony_ci } 157862306a36Sopenharmony_ci 157962306a36Sopenharmony_ci job->id = i + 1; 158062306a36Sopenharmony_ci job->cs = cs; 158162306a36Sopenharmony_ci job->user_cb = cb; 158262306a36Sopenharmony_ci job->user_cb_size = chunk->cb_size; 158362306a36Sopenharmony_ci job->hw_queue_id = chunk->queue_index; 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ci cs->jobs_in_queue_cnt[job->hw_queue_id]++; 158662306a36Sopenharmony_ci cs->jobs_cnt++; 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci list_add_tail(&job->cs_node, &cs->job_list); 158962306a36Sopenharmony_ci 159062306a36Sopenharmony_ci /* 159162306a36Sopenharmony_ci * Increment CS reference. When CS reference is 0, CS is 159262306a36Sopenharmony_ci * done and can be signaled to user and free all its resources 159362306a36Sopenharmony_ci * Only increment for JOB on external or H/W queues, because 159462306a36Sopenharmony_ci * only for those JOBs we get completion 159562306a36Sopenharmony_ci */ 159662306a36Sopenharmony_ci if (cs_needs_completion(cs) && 159762306a36Sopenharmony_ci (job->queue_type == QUEUE_TYPE_EXT || 159862306a36Sopenharmony_ci job->queue_type == QUEUE_TYPE_HW)) 159962306a36Sopenharmony_ci cs_get(cs); 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci hl_debugfs_add_job(hdev, job); 160262306a36Sopenharmony_ci 160362306a36Sopenharmony_ci rc = cs_parser(hpriv, job); 160462306a36Sopenharmony_ci if (rc) { 160562306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); 160662306a36Sopenharmony_ci atomic64_inc(&cntr->parsing_drop_cnt); 160762306a36Sopenharmony_ci dev_err(hdev->dev, 160862306a36Sopenharmony_ci "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", 160962306a36Sopenharmony_ci cs->ctx->asid, cs->sequence, job->id, rc); 161062306a36Sopenharmony_ci goto free_cs_object; 161162306a36Sopenharmony_ci } 161262306a36Sopenharmony_ci } 161362306a36Sopenharmony_ci 161462306a36Sopenharmony_ci /* We allow a CS with any queue type combination as long as it does 161562306a36Sopenharmony_ci * not get a completion 161662306a36Sopenharmony_ci */ 161762306a36Sopenharmony_ci if (int_queues_only && cs_needs_completion(cs)) { 161862306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 161962306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 162062306a36Sopenharmony_ci dev_err(hdev->dev, 162162306a36Sopenharmony_ci "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n", 162262306a36Sopenharmony_ci cs->ctx->asid, cs->sequence); 162362306a36Sopenharmony_ci rc = -EINVAL; 162462306a36Sopenharmony_ci goto free_cs_object; 162562306a36Sopenharmony_ci } 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci if (using_hw_queues) 162862306a36Sopenharmony_ci INIT_WORK(&cs->finish_work, cs_completion); 162962306a36Sopenharmony_ci 163062306a36Sopenharmony_ci /* 163162306a36Sopenharmony_ci * store the (external/HW queues) streams used by the CS in the 163262306a36Sopenharmony_ci * fence object for multi-CS completion 163362306a36Sopenharmony_ci */ 163462306a36Sopenharmony_ci if (hdev->supports_wait_for_multi_cs) 163562306a36Sopenharmony_ci cs->fence->stream_master_qid_map = stream_master_qid_map; 163662306a36Sopenharmony_ci 163762306a36Sopenharmony_ci rc = hl_hw_queue_schedule_cs(cs); 163862306a36Sopenharmony_ci if (rc) { 163962306a36Sopenharmony_ci if (rc != -EAGAIN) 164062306a36Sopenharmony_ci dev_err(hdev->dev, 164162306a36Sopenharmony_ci "Failed to submit CS %d.%llu to H/W queues, error %d\n", 164262306a36Sopenharmony_ci cs->ctx->asid, cs->sequence, rc); 164362306a36Sopenharmony_ci goto free_cs_object; 164462306a36Sopenharmony_ci } 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_ci *signal_initial_sob_count = cs->initial_sob_count; 164762306a36Sopenharmony_ci 164862306a36Sopenharmony_ci rc = HL_CS_STATUS_SUCCESS; 164962306a36Sopenharmony_ci goto put_cs; 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_cirelease_cb: 165262306a36Sopenharmony_ci atomic_dec(&cb->cs_cnt); 165362306a36Sopenharmony_ci hl_cb_put(cb); 165462306a36Sopenharmony_cifree_cs_object: 165562306a36Sopenharmony_ci cs_rollback(hdev, cs); 165662306a36Sopenharmony_ci *cs_seq = ULLONG_MAX; 165762306a36Sopenharmony_ci /* The path below is both for good and erroneous exits */ 165862306a36Sopenharmony_ciput_cs: 165962306a36Sopenharmony_ci /* We finished with the CS in this function, so put the ref */ 166062306a36Sopenharmony_ci cs_put(cs); 166162306a36Sopenharmony_cifree_cs_chunk_array: 166262306a36Sopenharmony_ci kfree(cs_chunk_array); 166362306a36Sopenharmony_ciout: 166462306a36Sopenharmony_ci return rc; 166562306a36Sopenharmony_ci} 166662306a36Sopenharmony_ci 166762306a36Sopenharmony_cistatic int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, 166862306a36Sopenharmony_ci u64 *cs_seq) 166962306a36Sopenharmony_ci{ 167062306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 167162306a36Sopenharmony_ci struct hl_ctx *ctx = hpriv->ctx; 167262306a36Sopenharmony_ci bool need_soft_reset = false; 167362306a36Sopenharmony_ci int rc = 0, do_ctx_switch = 0; 167462306a36Sopenharmony_ci void __user *chunks; 167562306a36Sopenharmony_ci u32 num_chunks, tmp; 167662306a36Sopenharmony_ci u16 sob_count; 167762306a36Sopenharmony_ci int ret; 167862306a36Sopenharmony_ci 167962306a36Sopenharmony_ci if (hdev->supports_ctx_switch) 168062306a36Sopenharmony_ci do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_ci if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { 168362306a36Sopenharmony_ci mutex_lock(&hpriv->restore_phase_mutex); 168462306a36Sopenharmony_ci 168562306a36Sopenharmony_ci if (do_ctx_switch) { 168662306a36Sopenharmony_ci rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); 168762306a36Sopenharmony_ci if (rc) { 168862306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 168962306a36Sopenharmony_ci "Failed to switch to context %d, rejecting CS! %d\n", 169062306a36Sopenharmony_ci ctx->asid, rc); 169162306a36Sopenharmony_ci /* 169262306a36Sopenharmony_ci * If we timedout, or if the device is not IDLE 169362306a36Sopenharmony_ci * while we want to do context-switch (-EBUSY), 169462306a36Sopenharmony_ci * we need to soft-reset because QMAN is 169562306a36Sopenharmony_ci * probably stuck. However, we can't call to 169662306a36Sopenharmony_ci * reset here directly because of deadlock, so 169762306a36Sopenharmony_ci * need to do it at the very end of this 169862306a36Sopenharmony_ci * function 169962306a36Sopenharmony_ci */ 170062306a36Sopenharmony_ci if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) 170162306a36Sopenharmony_ci need_soft_reset = true; 170262306a36Sopenharmony_ci mutex_unlock(&hpriv->restore_phase_mutex); 170362306a36Sopenharmony_ci goto out; 170462306a36Sopenharmony_ci } 170562306a36Sopenharmony_ci } 170662306a36Sopenharmony_ci 170762306a36Sopenharmony_ci hdev->asic_funcs->restore_phase_topology(hdev); 170862306a36Sopenharmony_ci 170962306a36Sopenharmony_ci chunks = (void __user *) (uintptr_t) args->in.chunks_restore; 171062306a36Sopenharmony_ci num_chunks = args->in.num_chunks_restore; 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_ci if (!num_chunks) { 171362306a36Sopenharmony_ci dev_dbg(hdev->dev, 171462306a36Sopenharmony_ci "Need to run restore phase but restore CS is empty\n"); 171562306a36Sopenharmony_ci rc = 0; 171662306a36Sopenharmony_ci } else { 171762306a36Sopenharmony_ci rc = cs_ioctl_default(hpriv, chunks, num_chunks, 171862306a36Sopenharmony_ci cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count); 171962306a36Sopenharmony_ci } 172062306a36Sopenharmony_ci 172162306a36Sopenharmony_ci mutex_unlock(&hpriv->restore_phase_mutex); 172262306a36Sopenharmony_ci 172362306a36Sopenharmony_ci if (rc) { 172462306a36Sopenharmony_ci dev_err(hdev->dev, 172562306a36Sopenharmony_ci "Failed to submit restore CS for context %d (%d)\n", 172662306a36Sopenharmony_ci ctx->asid, rc); 172762306a36Sopenharmony_ci goto out; 172862306a36Sopenharmony_ci } 172962306a36Sopenharmony_ci 173062306a36Sopenharmony_ci /* Need to wait for restore completion before execution phase */ 173162306a36Sopenharmony_ci if (num_chunks) { 173262306a36Sopenharmony_ci enum hl_cs_wait_status status; 173362306a36Sopenharmony_ciwait_again: 173462306a36Sopenharmony_ci ret = _hl_cs_wait_ioctl(hdev, ctx, 173562306a36Sopenharmony_ci jiffies_to_usecs(hdev->timeout_jiffies), 173662306a36Sopenharmony_ci *cs_seq, &status, NULL); 173762306a36Sopenharmony_ci if (ret) { 173862306a36Sopenharmony_ci if (ret == -ERESTARTSYS) { 173962306a36Sopenharmony_ci usleep_range(100, 200); 174062306a36Sopenharmony_ci goto wait_again; 174162306a36Sopenharmony_ci } 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_ci dev_err(hdev->dev, 174462306a36Sopenharmony_ci "Restore CS for context %d failed to complete %d\n", 174562306a36Sopenharmony_ci ctx->asid, ret); 174662306a36Sopenharmony_ci rc = -ENOEXEC; 174762306a36Sopenharmony_ci goto out; 174862306a36Sopenharmony_ci } 174962306a36Sopenharmony_ci } 175062306a36Sopenharmony_ci 175162306a36Sopenharmony_ci if (hdev->supports_ctx_switch) 175262306a36Sopenharmony_ci ctx->thread_ctx_switch_wait_token = 1; 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_ci } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { 175562306a36Sopenharmony_ci rc = hl_poll_timeout_memory(hdev, 175662306a36Sopenharmony_ci &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), 175762306a36Sopenharmony_ci 100, jiffies_to_usecs(hdev->timeout_jiffies), false); 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci if (rc == -ETIMEDOUT) { 176062306a36Sopenharmony_ci dev_err(hdev->dev, 176162306a36Sopenharmony_ci "context switch phase timeout (%d)\n", tmp); 176262306a36Sopenharmony_ci goto out; 176362306a36Sopenharmony_ci } 176462306a36Sopenharmony_ci } 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_ciout: 176762306a36Sopenharmony_ci if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) 176862306a36Sopenharmony_ci hl_device_reset(hdev, 0); 176962306a36Sopenharmony_ci 177062306a36Sopenharmony_ci return rc; 177162306a36Sopenharmony_ci} 177262306a36Sopenharmony_ci 177362306a36Sopenharmony_ci/* 177462306a36Sopenharmony_ci * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case. 177562306a36Sopenharmony_ci * if the SOB value reaches the max value move to the other SOB reserved 177662306a36Sopenharmony_ci * to the queue. 177762306a36Sopenharmony_ci * @hdev: pointer to device structure 177862306a36Sopenharmony_ci * @q_idx: stream queue index 177962306a36Sopenharmony_ci * @hw_sob: the H/W SOB used in this signal CS. 178062306a36Sopenharmony_ci * @count: signals count 178162306a36Sopenharmony_ci * @encaps_sig: tells whether it's reservation for encaps signals or not. 178262306a36Sopenharmony_ci * 178362306a36Sopenharmony_ci * Note that this function must be called while hw_queues_lock is taken. 178462306a36Sopenharmony_ci */ 178562306a36Sopenharmony_ciint hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, 178662306a36Sopenharmony_ci struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig) 178762306a36Sopenharmony_ci 178862306a36Sopenharmony_ci{ 178962306a36Sopenharmony_ci struct hl_sync_stream_properties *prop; 179062306a36Sopenharmony_ci struct hl_hw_sob *sob = *hw_sob, *other_sob; 179162306a36Sopenharmony_ci u8 other_sob_offset; 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ci prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 179462306a36Sopenharmony_ci 179562306a36Sopenharmony_ci hw_sob_get(sob); 179662306a36Sopenharmony_ci 179762306a36Sopenharmony_ci /* check for wraparound */ 179862306a36Sopenharmony_ci if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { 179962306a36Sopenharmony_ci /* 180062306a36Sopenharmony_ci * Decrement as we reached the max value. 180162306a36Sopenharmony_ci * The release function won't be called here as we've 180262306a36Sopenharmony_ci * just incremented the refcount right before calling this 180362306a36Sopenharmony_ci * function. 180462306a36Sopenharmony_ci */ 180562306a36Sopenharmony_ci hw_sob_put_err(sob); 180662306a36Sopenharmony_ci 180762306a36Sopenharmony_ci /* 180862306a36Sopenharmony_ci * check the other sob value, if it still in use then fail 180962306a36Sopenharmony_ci * otherwise make the switch 181062306a36Sopenharmony_ci */ 181162306a36Sopenharmony_ci other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; 181262306a36Sopenharmony_ci other_sob = &prop->hw_sob[other_sob_offset]; 181362306a36Sopenharmony_ci 181462306a36Sopenharmony_ci if (kref_read(&other_sob->kref) != 1) { 181562306a36Sopenharmony_ci dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n", 181662306a36Sopenharmony_ci q_idx); 181762306a36Sopenharmony_ci return -EINVAL; 181862306a36Sopenharmony_ci } 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_ci /* 182162306a36Sopenharmony_ci * next_sob_val always points to the next available signal 182262306a36Sopenharmony_ci * in the sob, so in encaps signals it will be the next one 182362306a36Sopenharmony_ci * after reserving the required amount. 182462306a36Sopenharmony_ci */ 182562306a36Sopenharmony_ci if (encaps_sig) 182662306a36Sopenharmony_ci prop->next_sob_val = count + 1; 182762306a36Sopenharmony_ci else 182862306a36Sopenharmony_ci prop->next_sob_val = count; 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci /* only two SOBs are currently in use */ 183162306a36Sopenharmony_ci prop->curr_sob_offset = other_sob_offset; 183262306a36Sopenharmony_ci *hw_sob = other_sob; 183362306a36Sopenharmony_ci 183462306a36Sopenharmony_ci /* 183562306a36Sopenharmony_ci * check if other_sob needs reset, then do it before using it 183662306a36Sopenharmony_ci * for the reservation or the next signal cs. 183762306a36Sopenharmony_ci * we do it here, and for both encaps and regular signal cs 183862306a36Sopenharmony_ci * cases in order to avoid possible races of two kref_put 183962306a36Sopenharmony_ci * of the sob which can occur at the same time if we move the 184062306a36Sopenharmony_ci * sob reset(kref_put) to cs_do_release function. 184162306a36Sopenharmony_ci * in addition, if we have combination of cs signal and 184262306a36Sopenharmony_ci * encaps, and at the point we need to reset the sob there was 184362306a36Sopenharmony_ci * no more reservations and only signal cs keep coming, 184462306a36Sopenharmony_ci * in such case we need signal_cs to put the refcount and 184562306a36Sopenharmony_ci * reset the sob. 184662306a36Sopenharmony_ci */ 184762306a36Sopenharmony_ci if (other_sob->need_reset) 184862306a36Sopenharmony_ci hw_sob_put(other_sob); 184962306a36Sopenharmony_ci 185062306a36Sopenharmony_ci if (encaps_sig) { 185162306a36Sopenharmony_ci /* set reset indication for the sob */ 185262306a36Sopenharmony_ci sob->need_reset = true; 185362306a36Sopenharmony_ci hw_sob_get(other_sob); 185462306a36Sopenharmony_ci } 185562306a36Sopenharmony_ci 185662306a36Sopenharmony_ci dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", 185762306a36Sopenharmony_ci prop->curr_sob_offset, q_idx); 185862306a36Sopenharmony_ci } else { 185962306a36Sopenharmony_ci prop->next_sob_val += count; 186062306a36Sopenharmony_ci } 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci return 0; 186362306a36Sopenharmony_ci} 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_cistatic int cs_ioctl_extract_signal_seq(struct hl_device *hdev, 186662306a36Sopenharmony_ci struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx, 186762306a36Sopenharmony_ci bool encaps_signals) 186862306a36Sopenharmony_ci{ 186962306a36Sopenharmony_ci u64 *signal_seq_arr = NULL; 187062306a36Sopenharmony_ci u32 size_to_copy, signal_seq_arr_len; 187162306a36Sopenharmony_ci int rc = 0; 187262306a36Sopenharmony_ci 187362306a36Sopenharmony_ci if (encaps_signals) { 187462306a36Sopenharmony_ci *signal_seq = chunk->encaps_signal_seq; 187562306a36Sopenharmony_ci return 0; 187662306a36Sopenharmony_ci } 187762306a36Sopenharmony_ci 187862306a36Sopenharmony_ci signal_seq_arr_len = chunk->num_signal_seq_arr; 187962306a36Sopenharmony_ci 188062306a36Sopenharmony_ci /* currently only one signal seq is supported */ 188162306a36Sopenharmony_ci if (signal_seq_arr_len != 1) { 188262306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 188362306a36Sopenharmony_ci atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 188462306a36Sopenharmony_ci dev_err(hdev->dev, 188562306a36Sopenharmony_ci "Wait for signal CS supports only one signal CS seq\n"); 188662306a36Sopenharmony_ci return -EINVAL; 188762306a36Sopenharmony_ci } 188862306a36Sopenharmony_ci 188962306a36Sopenharmony_ci signal_seq_arr = kmalloc_array(signal_seq_arr_len, 189062306a36Sopenharmony_ci sizeof(*signal_seq_arr), 189162306a36Sopenharmony_ci GFP_ATOMIC); 189262306a36Sopenharmony_ci if (!signal_seq_arr) 189362306a36Sopenharmony_ci signal_seq_arr = kmalloc_array(signal_seq_arr_len, 189462306a36Sopenharmony_ci sizeof(*signal_seq_arr), 189562306a36Sopenharmony_ci GFP_KERNEL); 189662306a36Sopenharmony_ci if (!signal_seq_arr) { 189762306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 189862306a36Sopenharmony_ci atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); 189962306a36Sopenharmony_ci return -ENOMEM; 190062306a36Sopenharmony_ci } 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_ci size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr); 190362306a36Sopenharmony_ci if (copy_from_user(signal_seq_arr, 190462306a36Sopenharmony_ci u64_to_user_ptr(chunk->signal_seq_arr), 190562306a36Sopenharmony_ci size_to_copy)) { 190662306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 190762306a36Sopenharmony_ci atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 190862306a36Sopenharmony_ci dev_err(hdev->dev, 190962306a36Sopenharmony_ci "Failed to copy signal seq array from user\n"); 191062306a36Sopenharmony_ci rc = -EFAULT; 191162306a36Sopenharmony_ci goto out; 191262306a36Sopenharmony_ci } 191362306a36Sopenharmony_ci 191462306a36Sopenharmony_ci /* currently it is guaranteed to have only one signal seq */ 191562306a36Sopenharmony_ci *signal_seq = signal_seq_arr[0]; 191662306a36Sopenharmony_ci 191762306a36Sopenharmony_ciout: 191862306a36Sopenharmony_ci kfree(signal_seq_arr); 191962306a36Sopenharmony_ci 192062306a36Sopenharmony_ci return rc; 192162306a36Sopenharmony_ci} 192262306a36Sopenharmony_ci 192362306a36Sopenharmony_cistatic int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, 192462306a36Sopenharmony_ci struct hl_ctx *ctx, struct hl_cs *cs, 192562306a36Sopenharmony_ci enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset) 192662306a36Sopenharmony_ci{ 192762306a36Sopenharmony_ci struct hl_cs_counters_atomic *cntr; 192862306a36Sopenharmony_ci struct hl_cs_job *job; 192962306a36Sopenharmony_ci struct hl_cb *cb; 193062306a36Sopenharmony_ci u32 cb_size; 193162306a36Sopenharmony_ci 193262306a36Sopenharmony_ci cntr = &hdev->aggregated_cs_counters; 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci job = hl_cs_allocate_job(hdev, q_type, true); 193562306a36Sopenharmony_ci if (!job) { 193662306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 193762306a36Sopenharmony_ci atomic64_inc(&cntr->out_of_mem_drop_cnt); 193862306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to allocate a new job\n"); 193962306a36Sopenharmony_ci return -ENOMEM; 194062306a36Sopenharmony_ci } 194162306a36Sopenharmony_ci 194262306a36Sopenharmony_ci if (cs->type == CS_TYPE_WAIT) 194362306a36Sopenharmony_ci cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); 194462306a36Sopenharmony_ci else 194562306a36Sopenharmony_ci cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); 194662306a36Sopenharmony_ci 194762306a36Sopenharmony_ci cb = hl_cb_kernel_create(hdev, cb_size, q_type == QUEUE_TYPE_HW); 194862306a36Sopenharmony_ci if (!cb) { 194962306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 195062306a36Sopenharmony_ci atomic64_inc(&cntr->out_of_mem_drop_cnt); 195162306a36Sopenharmony_ci kfree(job); 195262306a36Sopenharmony_ci return -EFAULT; 195362306a36Sopenharmony_ci } 195462306a36Sopenharmony_ci 195562306a36Sopenharmony_ci job->id = 0; 195662306a36Sopenharmony_ci job->cs = cs; 195762306a36Sopenharmony_ci job->user_cb = cb; 195862306a36Sopenharmony_ci atomic_inc(&job->user_cb->cs_cnt); 195962306a36Sopenharmony_ci job->user_cb_size = cb_size; 196062306a36Sopenharmony_ci job->hw_queue_id = q_idx; 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ci if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) 196362306a36Sopenharmony_ci && cs->encaps_signals) 196462306a36Sopenharmony_ci job->encaps_sig_wait_offset = encaps_signal_offset; 196562306a36Sopenharmony_ci /* 196662306a36Sopenharmony_ci * No need in parsing, user CB is the patched CB. 196762306a36Sopenharmony_ci * We call hl_cb_destroy() out of two reasons - we don't need the CB in 196862306a36Sopenharmony_ci * the CB idr anymore and to decrement its refcount as it was 196962306a36Sopenharmony_ci * incremented inside hl_cb_kernel_create(). 197062306a36Sopenharmony_ci */ 197162306a36Sopenharmony_ci job->patched_cb = job->user_cb; 197262306a36Sopenharmony_ci job->job_cb_size = job->user_cb_size; 197362306a36Sopenharmony_ci hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci /* increment refcount as for external queues we get completion */ 197662306a36Sopenharmony_ci cs_get(cs); 197762306a36Sopenharmony_ci 197862306a36Sopenharmony_ci cs->jobs_in_queue_cnt[job->hw_queue_id]++; 197962306a36Sopenharmony_ci cs->jobs_cnt++; 198062306a36Sopenharmony_ci 198162306a36Sopenharmony_ci list_add_tail(&job->cs_node, &cs->job_list); 198262306a36Sopenharmony_ci 198362306a36Sopenharmony_ci hl_debugfs_add_job(hdev, job); 198462306a36Sopenharmony_ci 198562306a36Sopenharmony_ci return 0; 198662306a36Sopenharmony_ci} 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_cistatic int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, 198962306a36Sopenharmony_ci u32 q_idx, u32 count, 199062306a36Sopenharmony_ci u32 *handle_id, u32 *sob_addr, 199162306a36Sopenharmony_ci u32 *signals_count) 199262306a36Sopenharmony_ci{ 199362306a36Sopenharmony_ci struct hw_queue_properties *hw_queue_prop; 199462306a36Sopenharmony_ci struct hl_sync_stream_properties *prop; 199562306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 199662306a36Sopenharmony_ci struct hl_cs_encaps_sig_handle *handle; 199762306a36Sopenharmony_ci struct hl_encaps_signals_mgr *mgr; 199862306a36Sopenharmony_ci struct hl_hw_sob *hw_sob; 199962306a36Sopenharmony_ci int hdl_id; 200062306a36Sopenharmony_ci int rc = 0; 200162306a36Sopenharmony_ci 200262306a36Sopenharmony_ci if (count >= HL_MAX_SOB_VAL) { 200362306a36Sopenharmony_ci dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n", 200462306a36Sopenharmony_ci count); 200562306a36Sopenharmony_ci rc = -EINVAL; 200662306a36Sopenharmony_ci goto out; 200762306a36Sopenharmony_ci } 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci if (q_idx >= hdev->asic_prop.max_queues) { 201062306a36Sopenharmony_ci dev_err(hdev->dev, "Queue index %d is invalid\n", 201162306a36Sopenharmony_ci q_idx); 201262306a36Sopenharmony_ci rc = -EINVAL; 201362306a36Sopenharmony_ci goto out; 201462306a36Sopenharmony_ci } 201562306a36Sopenharmony_ci 201662306a36Sopenharmony_ci hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; 201762306a36Sopenharmony_ci 201862306a36Sopenharmony_ci if (!hw_queue_prop->supports_sync_stream) { 201962306a36Sopenharmony_ci dev_err(hdev->dev, 202062306a36Sopenharmony_ci "Queue index %d does not support sync stream operations\n", 202162306a36Sopenharmony_ci q_idx); 202262306a36Sopenharmony_ci rc = -EINVAL; 202362306a36Sopenharmony_ci goto out; 202462306a36Sopenharmony_ci } 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci handle = kzalloc(sizeof(*handle), GFP_KERNEL); 202962306a36Sopenharmony_ci if (!handle) { 203062306a36Sopenharmony_ci rc = -ENOMEM; 203162306a36Sopenharmony_ci goto out; 203262306a36Sopenharmony_ci } 203362306a36Sopenharmony_ci 203462306a36Sopenharmony_ci handle->count = count; 203562306a36Sopenharmony_ci 203662306a36Sopenharmony_ci hl_ctx_get(hpriv->ctx); 203762306a36Sopenharmony_ci handle->ctx = hpriv->ctx; 203862306a36Sopenharmony_ci mgr = &hpriv->ctx->sig_mgr; 203962306a36Sopenharmony_ci 204062306a36Sopenharmony_ci spin_lock(&mgr->lock); 204162306a36Sopenharmony_ci hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC); 204262306a36Sopenharmony_ci spin_unlock(&mgr->lock); 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci if (hdl_id < 0) { 204562306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); 204662306a36Sopenharmony_ci rc = -EINVAL; 204762306a36Sopenharmony_ci goto put_ctx; 204862306a36Sopenharmony_ci } 204962306a36Sopenharmony_ci 205062306a36Sopenharmony_ci handle->id = hdl_id; 205162306a36Sopenharmony_ci handle->q_idx = q_idx; 205262306a36Sopenharmony_ci handle->hdev = hdev; 205362306a36Sopenharmony_ci kref_init(&handle->refcount); 205462306a36Sopenharmony_ci 205562306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_lock(hdev); 205662306a36Sopenharmony_ci 205762306a36Sopenharmony_ci hw_sob = &prop->hw_sob[prop->curr_sob_offset]; 205862306a36Sopenharmony_ci 205962306a36Sopenharmony_ci /* 206062306a36Sopenharmony_ci * Increment the SOB value by count by user request 206162306a36Sopenharmony_ci * to reserve those signals 206262306a36Sopenharmony_ci * check if the signals amount to reserve is not exceeding the max sob 206362306a36Sopenharmony_ci * value, if yes then switch sob. 206462306a36Sopenharmony_ci */ 206562306a36Sopenharmony_ci rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count, 206662306a36Sopenharmony_ci true); 206762306a36Sopenharmony_ci if (rc) { 206862306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to switch SOB\n"); 206962306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_unlock(hdev); 207062306a36Sopenharmony_ci rc = -EINVAL; 207162306a36Sopenharmony_ci goto remove_idr; 207262306a36Sopenharmony_ci } 207362306a36Sopenharmony_ci /* set the hw_sob to the handle after calling the sob wraparound handler 207462306a36Sopenharmony_ci * since sob could have changed. 207562306a36Sopenharmony_ci */ 207662306a36Sopenharmony_ci handle->hw_sob = hw_sob; 207762306a36Sopenharmony_ci 207862306a36Sopenharmony_ci /* store the current sob value for unreserve validity check, and 207962306a36Sopenharmony_ci * signal offset support 208062306a36Sopenharmony_ci */ 208162306a36Sopenharmony_ci handle->pre_sob_val = prop->next_sob_val - handle->count; 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci handle->cs_seq = ULLONG_MAX; 208462306a36Sopenharmony_ci 208562306a36Sopenharmony_ci *signals_count = prop->next_sob_val; 208662306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_unlock(hdev); 208762306a36Sopenharmony_ci 208862306a36Sopenharmony_ci *sob_addr = handle->hw_sob->sob_addr; 208962306a36Sopenharmony_ci *handle_id = hdl_id; 209062306a36Sopenharmony_ci 209162306a36Sopenharmony_ci dev_dbg(hdev->dev, 209262306a36Sopenharmony_ci "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n", 209362306a36Sopenharmony_ci hw_sob->sob_id, handle->hw_sob->sob_addr, 209462306a36Sopenharmony_ci prop->next_sob_val - 1, q_idx, hdl_id); 209562306a36Sopenharmony_ci goto out; 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ciremove_idr: 209862306a36Sopenharmony_ci spin_lock(&mgr->lock); 209962306a36Sopenharmony_ci idr_remove(&mgr->handles, hdl_id); 210062306a36Sopenharmony_ci spin_unlock(&mgr->lock); 210162306a36Sopenharmony_ci 210262306a36Sopenharmony_ciput_ctx: 210362306a36Sopenharmony_ci hl_ctx_put(handle->ctx); 210462306a36Sopenharmony_ci kfree(handle); 210562306a36Sopenharmony_ci 210662306a36Sopenharmony_ciout: 210762306a36Sopenharmony_ci return rc; 210862306a36Sopenharmony_ci} 210962306a36Sopenharmony_ci 211062306a36Sopenharmony_cistatic int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) 211162306a36Sopenharmony_ci{ 211262306a36Sopenharmony_ci struct hl_cs_encaps_sig_handle *encaps_sig_hdl; 211362306a36Sopenharmony_ci struct hl_sync_stream_properties *prop; 211462306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 211562306a36Sopenharmony_ci struct hl_encaps_signals_mgr *mgr; 211662306a36Sopenharmony_ci struct hl_hw_sob *hw_sob; 211762306a36Sopenharmony_ci u32 q_idx, sob_addr; 211862306a36Sopenharmony_ci int rc = 0; 211962306a36Sopenharmony_ci 212062306a36Sopenharmony_ci mgr = &hpriv->ctx->sig_mgr; 212162306a36Sopenharmony_ci 212262306a36Sopenharmony_ci spin_lock(&mgr->lock); 212362306a36Sopenharmony_ci encaps_sig_hdl = idr_find(&mgr->handles, handle_id); 212462306a36Sopenharmony_ci if (encaps_sig_hdl) { 212562306a36Sopenharmony_ci dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n", 212662306a36Sopenharmony_ci handle_id, encaps_sig_hdl->hw_sob->sob_addr, 212762306a36Sopenharmony_ci encaps_sig_hdl->count); 212862306a36Sopenharmony_ci 212962306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_lock(hdev); 213062306a36Sopenharmony_ci 213162306a36Sopenharmony_ci q_idx = encaps_sig_hdl->q_idx; 213262306a36Sopenharmony_ci prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 213362306a36Sopenharmony_ci hw_sob = &prop->hw_sob[prop->curr_sob_offset]; 213462306a36Sopenharmony_ci sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); 213562306a36Sopenharmony_ci 213662306a36Sopenharmony_ci /* Check if sob_val got out of sync due to other 213762306a36Sopenharmony_ci * signal submission requests which were handled 213862306a36Sopenharmony_ci * between the reserve-unreserve calls or SOB switch 213962306a36Sopenharmony_ci * upon reaching SOB max value. 214062306a36Sopenharmony_ci */ 214162306a36Sopenharmony_ci if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count 214262306a36Sopenharmony_ci != prop->next_sob_val || 214362306a36Sopenharmony_ci sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { 214462306a36Sopenharmony_ci dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n", 214562306a36Sopenharmony_ci encaps_sig_hdl->pre_sob_val, 214662306a36Sopenharmony_ci (prop->next_sob_val - encaps_sig_hdl->count)); 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_unlock(hdev); 214962306a36Sopenharmony_ci rc = -EINVAL; 215062306a36Sopenharmony_ci goto out_unlock; 215162306a36Sopenharmony_ci } 215262306a36Sopenharmony_ci 215362306a36Sopenharmony_ci /* 215462306a36Sopenharmony_ci * Decrement the SOB value by count by user request 215562306a36Sopenharmony_ci * to unreserve those signals 215662306a36Sopenharmony_ci */ 215762306a36Sopenharmony_ci prop->next_sob_val -= encaps_sig_hdl->count; 215862306a36Sopenharmony_ci 215962306a36Sopenharmony_ci hdev->asic_funcs->hw_queues_unlock(hdev); 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci hw_sob_put(hw_sob); 216262306a36Sopenharmony_ci 216362306a36Sopenharmony_ci /* Release the id and free allocated memory of the handle */ 216462306a36Sopenharmony_ci idr_remove(&mgr->handles, handle_id); 216562306a36Sopenharmony_ci 216662306a36Sopenharmony_ci /* unlock before calling ctx_put, where we might sleep */ 216762306a36Sopenharmony_ci spin_unlock(&mgr->lock); 216862306a36Sopenharmony_ci hl_ctx_put(encaps_sig_hdl->ctx); 216962306a36Sopenharmony_ci kfree(encaps_sig_hdl); 217062306a36Sopenharmony_ci goto out; 217162306a36Sopenharmony_ci } else { 217262306a36Sopenharmony_ci rc = -EINVAL; 217362306a36Sopenharmony_ci dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); 217462306a36Sopenharmony_ci } 217562306a36Sopenharmony_ci 217662306a36Sopenharmony_ciout_unlock: 217762306a36Sopenharmony_ci spin_unlock(&mgr->lock); 217862306a36Sopenharmony_ci 217962306a36Sopenharmony_ciout: 218062306a36Sopenharmony_ci return rc; 218162306a36Sopenharmony_ci} 218262306a36Sopenharmony_ci 218362306a36Sopenharmony_cistatic int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, 218462306a36Sopenharmony_ci void __user *chunks, u32 num_chunks, 218562306a36Sopenharmony_ci u64 *cs_seq, u32 flags, u32 timeout, 218662306a36Sopenharmony_ci u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count) 218762306a36Sopenharmony_ci{ 218862306a36Sopenharmony_ci struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL; 218962306a36Sopenharmony_ci bool handle_found = false, is_wait_cs = false, 219062306a36Sopenharmony_ci wait_cs_submitted = false, 219162306a36Sopenharmony_ci cs_encaps_signals = false; 219262306a36Sopenharmony_ci struct hl_cs_chunk *cs_chunk_array, *chunk; 219362306a36Sopenharmony_ci bool staged_cs_with_encaps_signals = false; 219462306a36Sopenharmony_ci struct hw_queue_properties *hw_queue_prop; 219562306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 219662306a36Sopenharmony_ci struct hl_cs_compl *sig_waitcs_cmpl; 219762306a36Sopenharmony_ci u32 q_idx, collective_engine_id = 0; 219862306a36Sopenharmony_ci struct hl_cs_counters_atomic *cntr; 219962306a36Sopenharmony_ci struct hl_fence *sig_fence = NULL; 220062306a36Sopenharmony_ci struct hl_ctx *ctx = hpriv->ctx; 220162306a36Sopenharmony_ci enum hl_queue_type q_type; 220262306a36Sopenharmony_ci struct hl_cs *cs; 220362306a36Sopenharmony_ci u64 signal_seq; 220462306a36Sopenharmony_ci int rc; 220562306a36Sopenharmony_ci 220662306a36Sopenharmony_ci cntr = &hdev->aggregated_cs_counters; 220762306a36Sopenharmony_ci *cs_seq = ULLONG_MAX; 220862306a36Sopenharmony_ci 220962306a36Sopenharmony_ci rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, 221062306a36Sopenharmony_ci ctx); 221162306a36Sopenharmony_ci if (rc) 221262306a36Sopenharmony_ci goto out; 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_ci /* currently it is guaranteed to have only one chunk */ 221562306a36Sopenharmony_ci chunk = &cs_chunk_array[0]; 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_ci if (chunk->queue_index >= hdev->asic_prop.max_queues) { 221862306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 221962306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 222062306a36Sopenharmony_ci dev_err(hdev->dev, "Queue index %d is invalid\n", 222162306a36Sopenharmony_ci chunk->queue_index); 222262306a36Sopenharmony_ci rc = -EINVAL; 222362306a36Sopenharmony_ci goto free_cs_chunk_array; 222462306a36Sopenharmony_ci } 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci q_idx = chunk->queue_index; 222762306a36Sopenharmony_ci hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; 222862306a36Sopenharmony_ci q_type = hw_queue_prop->type; 222962306a36Sopenharmony_ci 223062306a36Sopenharmony_ci if (!hw_queue_prop->supports_sync_stream) { 223162306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 223262306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 223362306a36Sopenharmony_ci dev_err(hdev->dev, 223462306a36Sopenharmony_ci "Queue index %d does not support sync stream operations\n", 223562306a36Sopenharmony_ci q_idx); 223662306a36Sopenharmony_ci rc = -EINVAL; 223762306a36Sopenharmony_ci goto free_cs_chunk_array; 223862306a36Sopenharmony_ci } 223962306a36Sopenharmony_ci 224062306a36Sopenharmony_ci if (cs_type == CS_TYPE_COLLECTIVE_WAIT) { 224162306a36Sopenharmony_ci if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 224262306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 224362306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 224462306a36Sopenharmony_ci dev_err(hdev->dev, 224562306a36Sopenharmony_ci "Queue index %d is invalid\n", q_idx); 224662306a36Sopenharmony_ci rc = -EINVAL; 224762306a36Sopenharmony_ci goto free_cs_chunk_array; 224862306a36Sopenharmony_ci } 224962306a36Sopenharmony_ci 225062306a36Sopenharmony_ci if (!hdev->nic_ports_mask) { 225162306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 225262306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 225362306a36Sopenharmony_ci dev_err(hdev->dev, 225462306a36Sopenharmony_ci "Collective operations not supported when NIC ports are disabled"); 225562306a36Sopenharmony_ci rc = -EINVAL; 225662306a36Sopenharmony_ci goto free_cs_chunk_array; 225762306a36Sopenharmony_ci } 225862306a36Sopenharmony_ci 225962306a36Sopenharmony_ci collective_engine_id = chunk->collective_engine_id; 226062306a36Sopenharmony_ci } 226162306a36Sopenharmony_ci 226262306a36Sopenharmony_ci is_wait_cs = !!(cs_type == CS_TYPE_WAIT || 226362306a36Sopenharmony_ci cs_type == CS_TYPE_COLLECTIVE_WAIT); 226462306a36Sopenharmony_ci 226562306a36Sopenharmony_ci cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); 226662306a36Sopenharmony_ci 226762306a36Sopenharmony_ci if (is_wait_cs) { 226862306a36Sopenharmony_ci rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, 226962306a36Sopenharmony_ci ctx, cs_encaps_signals); 227062306a36Sopenharmony_ci if (rc) 227162306a36Sopenharmony_ci goto free_cs_chunk_array; 227262306a36Sopenharmony_ci 227362306a36Sopenharmony_ci if (cs_encaps_signals) { 227462306a36Sopenharmony_ci /* check if cs sequence has encapsulated 227562306a36Sopenharmony_ci * signals handle 227662306a36Sopenharmony_ci */ 227762306a36Sopenharmony_ci struct idr *idp; 227862306a36Sopenharmony_ci u32 id; 227962306a36Sopenharmony_ci 228062306a36Sopenharmony_ci spin_lock(&ctx->sig_mgr.lock); 228162306a36Sopenharmony_ci idp = &ctx->sig_mgr.handles; 228262306a36Sopenharmony_ci idr_for_each_entry(idp, encaps_sig_hdl, id) { 228362306a36Sopenharmony_ci if (encaps_sig_hdl->cs_seq == signal_seq) { 228462306a36Sopenharmony_ci /* get refcount to protect removing this handle from idr, 228562306a36Sopenharmony_ci * needed when multiple wait cs are used with offset 228662306a36Sopenharmony_ci * to wait on reserved encaps signals. 228762306a36Sopenharmony_ci * Since kref_put of this handle is executed outside the 228862306a36Sopenharmony_ci * current lock, it is possible that the handle refcount 228962306a36Sopenharmony_ci * is 0 but it yet to be removed from the list. In this 229062306a36Sopenharmony_ci * case need to consider the handle as not valid. 229162306a36Sopenharmony_ci */ 229262306a36Sopenharmony_ci if (kref_get_unless_zero(&encaps_sig_hdl->refcount)) 229362306a36Sopenharmony_ci handle_found = true; 229462306a36Sopenharmony_ci break; 229562306a36Sopenharmony_ci } 229662306a36Sopenharmony_ci } 229762306a36Sopenharmony_ci spin_unlock(&ctx->sig_mgr.lock); 229862306a36Sopenharmony_ci 229962306a36Sopenharmony_ci if (!handle_found) { 230062306a36Sopenharmony_ci /* treat as signal CS already finished */ 230162306a36Sopenharmony_ci dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", 230262306a36Sopenharmony_ci signal_seq); 230362306a36Sopenharmony_ci rc = 0; 230462306a36Sopenharmony_ci goto free_cs_chunk_array; 230562306a36Sopenharmony_ci } 230662306a36Sopenharmony_ci 230762306a36Sopenharmony_ci /* validate also the signal offset value */ 230862306a36Sopenharmony_ci if (chunk->encaps_signal_offset > 230962306a36Sopenharmony_ci encaps_sig_hdl->count) { 231062306a36Sopenharmony_ci dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n", 231162306a36Sopenharmony_ci chunk->encaps_signal_offset, 231262306a36Sopenharmony_ci encaps_sig_hdl->count); 231362306a36Sopenharmony_ci rc = -EINVAL; 231462306a36Sopenharmony_ci goto free_cs_chunk_array; 231562306a36Sopenharmony_ci } 231662306a36Sopenharmony_ci } 231762306a36Sopenharmony_ci 231862306a36Sopenharmony_ci sig_fence = hl_ctx_get_fence(ctx, signal_seq); 231962306a36Sopenharmony_ci if (IS_ERR(sig_fence)) { 232062306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 232162306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 232262306a36Sopenharmony_ci dev_err(hdev->dev, 232362306a36Sopenharmony_ci "Failed to get signal CS with seq 0x%llx\n", 232462306a36Sopenharmony_ci signal_seq); 232562306a36Sopenharmony_ci rc = PTR_ERR(sig_fence); 232662306a36Sopenharmony_ci goto free_cs_chunk_array; 232762306a36Sopenharmony_ci } 232862306a36Sopenharmony_ci 232962306a36Sopenharmony_ci if (!sig_fence) { 233062306a36Sopenharmony_ci /* signal CS already finished */ 233162306a36Sopenharmony_ci rc = 0; 233262306a36Sopenharmony_ci goto free_cs_chunk_array; 233362306a36Sopenharmony_ci } 233462306a36Sopenharmony_ci 233562306a36Sopenharmony_ci sig_waitcs_cmpl = 233662306a36Sopenharmony_ci container_of(sig_fence, struct hl_cs_compl, base_fence); 233762306a36Sopenharmony_ci 233862306a36Sopenharmony_ci staged_cs_with_encaps_signals = !! 233962306a36Sopenharmony_ci (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && 234062306a36Sopenharmony_ci (flags & HL_CS_FLAGS_ENCAP_SIGNALS)); 234162306a36Sopenharmony_ci 234262306a36Sopenharmony_ci if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && 234362306a36Sopenharmony_ci !staged_cs_with_encaps_signals) { 234462306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 234562306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 234662306a36Sopenharmony_ci dev_err(hdev->dev, 234762306a36Sopenharmony_ci "CS seq 0x%llx is not of a signal/encaps-signal CS\n", 234862306a36Sopenharmony_ci signal_seq); 234962306a36Sopenharmony_ci hl_fence_put(sig_fence); 235062306a36Sopenharmony_ci rc = -EINVAL; 235162306a36Sopenharmony_ci goto free_cs_chunk_array; 235262306a36Sopenharmony_ci } 235362306a36Sopenharmony_ci 235462306a36Sopenharmony_ci if (completion_done(&sig_fence->completion)) { 235562306a36Sopenharmony_ci /* signal CS already finished */ 235662306a36Sopenharmony_ci hl_fence_put(sig_fence); 235762306a36Sopenharmony_ci rc = 0; 235862306a36Sopenharmony_ci goto free_cs_chunk_array; 235962306a36Sopenharmony_ci } 236062306a36Sopenharmony_ci } 236162306a36Sopenharmony_ci 236262306a36Sopenharmony_ci rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); 236362306a36Sopenharmony_ci if (rc) { 236462306a36Sopenharmony_ci if (is_wait_cs) 236562306a36Sopenharmony_ci hl_fence_put(sig_fence); 236662306a36Sopenharmony_ci 236762306a36Sopenharmony_ci goto free_cs_chunk_array; 236862306a36Sopenharmony_ci } 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci /* 237162306a36Sopenharmony_ci * Save the signal CS fence for later initialization right before 237262306a36Sopenharmony_ci * hanging the wait CS on the queue. 237362306a36Sopenharmony_ci * for encaps signals case, we save the cs sequence and handle pointer 237462306a36Sopenharmony_ci * for later initialization. 237562306a36Sopenharmony_ci */ 237662306a36Sopenharmony_ci if (is_wait_cs) { 237762306a36Sopenharmony_ci cs->signal_fence = sig_fence; 237862306a36Sopenharmony_ci /* store the handle pointer, so we don't have to 237962306a36Sopenharmony_ci * look for it again, later on the flow 238062306a36Sopenharmony_ci * when we need to set SOB info in hw_queue. 238162306a36Sopenharmony_ci */ 238262306a36Sopenharmony_ci if (cs->encaps_signals) 238362306a36Sopenharmony_ci cs->encaps_sig_hdl = encaps_sig_hdl; 238462306a36Sopenharmony_ci } 238562306a36Sopenharmony_ci 238662306a36Sopenharmony_ci hl_debugfs_add_cs(cs); 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci *cs_seq = cs->sequence; 238962306a36Sopenharmony_ci 239062306a36Sopenharmony_ci if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL) 239162306a36Sopenharmony_ci rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, 239262306a36Sopenharmony_ci q_idx, chunk->encaps_signal_offset); 239362306a36Sopenharmony_ci else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) 239462306a36Sopenharmony_ci rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, 239562306a36Sopenharmony_ci cs, q_idx, collective_engine_id, 239662306a36Sopenharmony_ci chunk->encaps_signal_offset); 239762306a36Sopenharmony_ci else { 239862306a36Sopenharmony_ci atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 239962306a36Sopenharmony_ci atomic64_inc(&cntr->validation_drop_cnt); 240062306a36Sopenharmony_ci rc = -EINVAL; 240162306a36Sopenharmony_ci } 240262306a36Sopenharmony_ci 240362306a36Sopenharmony_ci if (rc) 240462306a36Sopenharmony_ci goto free_cs_object; 240562306a36Sopenharmony_ci 240662306a36Sopenharmony_ci if (q_type == QUEUE_TYPE_HW) 240762306a36Sopenharmony_ci INIT_WORK(&cs->finish_work, cs_completion); 240862306a36Sopenharmony_ci 240962306a36Sopenharmony_ci rc = hl_hw_queue_schedule_cs(cs); 241062306a36Sopenharmony_ci if (rc) { 241162306a36Sopenharmony_ci /* In case wait cs failed here, it means the signal cs 241262306a36Sopenharmony_ci * already completed. we want to free all it's related objects 241362306a36Sopenharmony_ci * but we don't want to fail the ioctl. 241462306a36Sopenharmony_ci */ 241562306a36Sopenharmony_ci if (is_wait_cs) 241662306a36Sopenharmony_ci rc = 0; 241762306a36Sopenharmony_ci else if (rc != -EAGAIN) 241862306a36Sopenharmony_ci dev_err(hdev->dev, 241962306a36Sopenharmony_ci "Failed to submit CS %d.%llu to H/W queues, error %d\n", 242062306a36Sopenharmony_ci ctx->asid, cs->sequence, rc); 242162306a36Sopenharmony_ci goto free_cs_object; 242262306a36Sopenharmony_ci } 242362306a36Sopenharmony_ci 242462306a36Sopenharmony_ci *signal_sob_addr_offset = cs->sob_addr_offset; 242562306a36Sopenharmony_ci *signal_initial_sob_count = cs->initial_sob_count; 242662306a36Sopenharmony_ci 242762306a36Sopenharmony_ci rc = HL_CS_STATUS_SUCCESS; 242862306a36Sopenharmony_ci if (is_wait_cs) 242962306a36Sopenharmony_ci wait_cs_submitted = true; 243062306a36Sopenharmony_ci goto put_cs; 243162306a36Sopenharmony_ci 243262306a36Sopenharmony_cifree_cs_object: 243362306a36Sopenharmony_ci cs_rollback(hdev, cs); 243462306a36Sopenharmony_ci *cs_seq = ULLONG_MAX; 243562306a36Sopenharmony_ci /* The path below is both for good and erroneous exits */ 243662306a36Sopenharmony_ciput_cs: 243762306a36Sopenharmony_ci /* We finished with the CS in this function, so put the ref */ 243862306a36Sopenharmony_ci cs_put(cs); 243962306a36Sopenharmony_cifree_cs_chunk_array: 244062306a36Sopenharmony_ci if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs) 244162306a36Sopenharmony_ci kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); 244262306a36Sopenharmony_ci kfree(cs_chunk_array); 244362306a36Sopenharmony_ciout: 244462306a36Sopenharmony_ci return rc; 244562306a36Sopenharmony_ci} 244662306a36Sopenharmony_ci 244762306a36Sopenharmony_cistatic int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, 244862306a36Sopenharmony_ci u32 num_engine_cores, u32 core_command) 244962306a36Sopenharmony_ci{ 245062306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 245162306a36Sopenharmony_ci void __user *engine_cores_arr; 245262306a36Sopenharmony_ci u32 *cores; 245362306a36Sopenharmony_ci int rc; 245462306a36Sopenharmony_ci 245562306a36Sopenharmony_ci if (!hdev->asic_prop.supports_engine_modes) 245662306a36Sopenharmony_ci return -EPERM; 245762306a36Sopenharmony_ci 245862306a36Sopenharmony_ci if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { 245962306a36Sopenharmony_ci dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); 246062306a36Sopenharmony_ci return -EINVAL; 246162306a36Sopenharmony_ci } 246262306a36Sopenharmony_ci 246362306a36Sopenharmony_ci if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) { 246462306a36Sopenharmony_ci dev_err(hdev->dev, "Engine core command is invalid\n"); 246562306a36Sopenharmony_ci return -EINVAL; 246662306a36Sopenharmony_ci } 246762306a36Sopenharmony_ci 246862306a36Sopenharmony_ci engine_cores_arr = (void __user *) (uintptr_t) engine_cores; 246962306a36Sopenharmony_ci cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL); 247062306a36Sopenharmony_ci if (!cores) 247162306a36Sopenharmony_ci return -ENOMEM; 247262306a36Sopenharmony_ci 247362306a36Sopenharmony_ci if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) { 247462306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); 247562306a36Sopenharmony_ci kfree(cores); 247662306a36Sopenharmony_ci return -EFAULT; 247762306a36Sopenharmony_ci } 247862306a36Sopenharmony_ci 247962306a36Sopenharmony_ci rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); 248062306a36Sopenharmony_ci kfree(cores); 248162306a36Sopenharmony_ci 248262306a36Sopenharmony_ci return rc; 248362306a36Sopenharmony_ci} 248462306a36Sopenharmony_ci 248562306a36Sopenharmony_cistatic int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr, 248662306a36Sopenharmony_ci u32 num_engines, enum hl_engine_command command) 248762306a36Sopenharmony_ci{ 248862306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 248962306a36Sopenharmony_ci u32 *engines, max_num_of_engines; 249062306a36Sopenharmony_ci void __user *engines_arr; 249162306a36Sopenharmony_ci int rc; 249262306a36Sopenharmony_ci 249362306a36Sopenharmony_ci if (!hdev->asic_prop.supports_engine_modes) 249462306a36Sopenharmony_ci return -EPERM; 249562306a36Sopenharmony_ci 249662306a36Sopenharmony_ci if (command >= HL_ENGINE_COMMAND_MAX) { 249762306a36Sopenharmony_ci dev_err(hdev->dev, "Engine command is invalid\n"); 249862306a36Sopenharmony_ci return -EINVAL; 249962306a36Sopenharmony_ci } 250062306a36Sopenharmony_ci 250162306a36Sopenharmony_ci max_num_of_engines = hdev->asic_prop.max_num_of_engines; 250262306a36Sopenharmony_ci if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT) 250362306a36Sopenharmony_ci max_num_of_engines = hdev->asic_prop.num_engine_cores; 250462306a36Sopenharmony_ci 250562306a36Sopenharmony_ci if (!num_engines || num_engines > max_num_of_engines) { 250662306a36Sopenharmony_ci dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines); 250762306a36Sopenharmony_ci return -EINVAL; 250862306a36Sopenharmony_ci } 250962306a36Sopenharmony_ci 251062306a36Sopenharmony_ci engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr; 251162306a36Sopenharmony_ci engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL); 251262306a36Sopenharmony_ci if (!engines) 251362306a36Sopenharmony_ci return -ENOMEM; 251462306a36Sopenharmony_ci 251562306a36Sopenharmony_ci if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) { 251662306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to copy engine-ids array from user\n"); 251762306a36Sopenharmony_ci kfree(engines); 251862306a36Sopenharmony_ci return -EFAULT; 251962306a36Sopenharmony_ci } 252062306a36Sopenharmony_ci 252162306a36Sopenharmony_ci rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command); 252262306a36Sopenharmony_ci kfree(engines); 252362306a36Sopenharmony_ci 252462306a36Sopenharmony_ci return rc; 252562306a36Sopenharmony_ci} 252662306a36Sopenharmony_ci 252762306a36Sopenharmony_cistatic int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) 252862306a36Sopenharmony_ci{ 252962306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 253062306a36Sopenharmony_ci struct asic_fixed_properties *prop = &hdev->asic_prop; 253162306a36Sopenharmony_ci 253262306a36Sopenharmony_ci if (!prop->hbw_flush_reg) { 253362306a36Sopenharmony_ci dev_dbg(hdev->dev, "HBW flush is not supported\n"); 253462306a36Sopenharmony_ci return -EOPNOTSUPP; 253562306a36Sopenharmony_ci } 253662306a36Sopenharmony_ci 253762306a36Sopenharmony_ci RREG32(prop->hbw_flush_reg); 253862306a36Sopenharmony_ci 253962306a36Sopenharmony_ci return 0; 254062306a36Sopenharmony_ci} 254162306a36Sopenharmony_ci 254262306a36Sopenharmony_ciint hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) 254362306a36Sopenharmony_ci{ 254462306a36Sopenharmony_ci union hl_cs_args *args = data; 254562306a36Sopenharmony_ci enum hl_cs_type cs_type = 0; 254662306a36Sopenharmony_ci u64 cs_seq = ULONG_MAX; 254762306a36Sopenharmony_ci void __user *chunks; 254862306a36Sopenharmony_ci u32 num_chunks, flags, timeout, 254962306a36Sopenharmony_ci signals_count = 0, sob_addr = 0, handle_id = 0; 255062306a36Sopenharmony_ci u16 sob_initial_count = 0; 255162306a36Sopenharmony_ci int rc; 255262306a36Sopenharmony_ci 255362306a36Sopenharmony_ci rc = hl_cs_sanity_checks(hpriv, args); 255462306a36Sopenharmony_ci if (rc) 255562306a36Sopenharmony_ci goto out; 255662306a36Sopenharmony_ci 255762306a36Sopenharmony_ci rc = hl_cs_ctx_switch(hpriv, args, &cs_seq); 255862306a36Sopenharmony_ci if (rc) 255962306a36Sopenharmony_ci goto out; 256062306a36Sopenharmony_ci 256162306a36Sopenharmony_ci cs_type = hl_cs_get_cs_type(args->in.cs_flags & 256262306a36Sopenharmony_ci ~HL_CS_FLAGS_FORCE_RESTORE); 256362306a36Sopenharmony_ci chunks = (void __user *) (uintptr_t) args->in.chunks_execute; 256462306a36Sopenharmony_ci num_chunks = args->in.num_chunks_execute; 256562306a36Sopenharmony_ci flags = args->in.cs_flags; 256662306a36Sopenharmony_ci 256762306a36Sopenharmony_ci /* In case this is a staged CS, user should supply the CS sequence */ 256862306a36Sopenharmony_ci if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 256962306a36Sopenharmony_ci !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) 257062306a36Sopenharmony_ci cs_seq = args->in.seq; 257162306a36Sopenharmony_ci 257262306a36Sopenharmony_ci timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT 257362306a36Sopenharmony_ci ? msecs_to_jiffies(args->in.timeout * 1000) 257462306a36Sopenharmony_ci : hpriv->hdev->timeout_jiffies; 257562306a36Sopenharmony_ci 257662306a36Sopenharmony_ci switch (cs_type) { 257762306a36Sopenharmony_ci case CS_TYPE_SIGNAL: 257862306a36Sopenharmony_ci case CS_TYPE_WAIT: 257962306a36Sopenharmony_ci case CS_TYPE_COLLECTIVE_WAIT: 258062306a36Sopenharmony_ci rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, 258162306a36Sopenharmony_ci &cs_seq, args->in.cs_flags, timeout, 258262306a36Sopenharmony_ci &sob_addr, &sob_initial_count); 258362306a36Sopenharmony_ci break; 258462306a36Sopenharmony_ci case CS_RESERVE_SIGNALS: 258562306a36Sopenharmony_ci rc = cs_ioctl_reserve_signals(hpriv, 258662306a36Sopenharmony_ci args->in.encaps_signals_q_idx, 258762306a36Sopenharmony_ci args->in.encaps_signals_count, 258862306a36Sopenharmony_ci &handle_id, &sob_addr, &signals_count); 258962306a36Sopenharmony_ci break; 259062306a36Sopenharmony_ci case CS_UNRESERVE_SIGNALS: 259162306a36Sopenharmony_ci rc = cs_ioctl_unreserve_signals(hpriv, 259262306a36Sopenharmony_ci args->in.encaps_sig_handle_id); 259362306a36Sopenharmony_ci break; 259462306a36Sopenharmony_ci case CS_TYPE_ENGINE_CORE: 259562306a36Sopenharmony_ci rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, 259662306a36Sopenharmony_ci args->in.num_engine_cores, args->in.core_command); 259762306a36Sopenharmony_ci break; 259862306a36Sopenharmony_ci case CS_TYPE_ENGINES: 259962306a36Sopenharmony_ci rc = cs_ioctl_engines(hpriv, args->in.engines, 260062306a36Sopenharmony_ci args->in.num_engines, args->in.engine_command); 260162306a36Sopenharmony_ci break; 260262306a36Sopenharmony_ci case CS_TYPE_FLUSH_PCI_HBW_WRITES: 260362306a36Sopenharmony_ci rc = cs_ioctl_flush_pci_hbw_writes(hpriv); 260462306a36Sopenharmony_ci break; 260562306a36Sopenharmony_ci default: 260662306a36Sopenharmony_ci rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, 260762306a36Sopenharmony_ci args->in.cs_flags, 260862306a36Sopenharmony_ci args->in.encaps_sig_handle_id, 260962306a36Sopenharmony_ci timeout, &sob_initial_count); 261062306a36Sopenharmony_ci break; 261162306a36Sopenharmony_ci } 261262306a36Sopenharmony_ciout: 261362306a36Sopenharmony_ci if (rc != -EAGAIN) { 261462306a36Sopenharmony_ci memset(args, 0, sizeof(*args)); 261562306a36Sopenharmony_ci 261662306a36Sopenharmony_ci switch (cs_type) { 261762306a36Sopenharmony_ci case CS_RESERVE_SIGNALS: 261862306a36Sopenharmony_ci args->out.handle_id = handle_id; 261962306a36Sopenharmony_ci args->out.sob_base_addr_offset = sob_addr; 262062306a36Sopenharmony_ci args->out.count = signals_count; 262162306a36Sopenharmony_ci break; 262262306a36Sopenharmony_ci case CS_TYPE_SIGNAL: 262362306a36Sopenharmony_ci args->out.sob_base_addr_offset = sob_addr; 262462306a36Sopenharmony_ci args->out.sob_count_before_submission = sob_initial_count; 262562306a36Sopenharmony_ci args->out.seq = cs_seq; 262662306a36Sopenharmony_ci break; 262762306a36Sopenharmony_ci case CS_TYPE_DEFAULT: 262862306a36Sopenharmony_ci args->out.sob_count_before_submission = sob_initial_count; 262962306a36Sopenharmony_ci args->out.seq = cs_seq; 263062306a36Sopenharmony_ci break; 263162306a36Sopenharmony_ci default: 263262306a36Sopenharmony_ci args->out.seq = cs_seq; 263362306a36Sopenharmony_ci break; 263462306a36Sopenharmony_ci } 263562306a36Sopenharmony_ci 263662306a36Sopenharmony_ci args->out.status = rc; 263762306a36Sopenharmony_ci } 263862306a36Sopenharmony_ci 263962306a36Sopenharmony_ci return rc; 264062306a36Sopenharmony_ci} 264162306a36Sopenharmony_ci 264262306a36Sopenharmony_cistatic int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence, 264362306a36Sopenharmony_ci enum hl_cs_wait_status *status, u64 timeout_us, s64 *timestamp) 264462306a36Sopenharmony_ci{ 264562306a36Sopenharmony_ci struct hl_device *hdev = ctx->hdev; 264662306a36Sopenharmony_ci ktime_t timestamp_kt; 264762306a36Sopenharmony_ci long completion_rc; 264862306a36Sopenharmony_ci int rc = 0, error; 264962306a36Sopenharmony_ci 265062306a36Sopenharmony_ci if (IS_ERR(fence)) { 265162306a36Sopenharmony_ci rc = PTR_ERR(fence); 265262306a36Sopenharmony_ci if (rc == -EINVAL) 265362306a36Sopenharmony_ci dev_notice_ratelimited(hdev->dev, 265462306a36Sopenharmony_ci "Can't wait on CS %llu because current CS is at seq %llu\n", 265562306a36Sopenharmony_ci seq, ctx->cs_sequence); 265662306a36Sopenharmony_ci return rc; 265762306a36Sopenharmony_ci } 265862306a36Sopenharmony_ci 265962306a36Sopenharmony_ci if (!fence) { 266062306a36Sopenharmony_ci if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, ×tamp_kt, &error)) { 266162306a36Sopenharmony_ci dev_dbg(hdev->dev, 266262306a36Sopenharmony_ci "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", 266362306a36Sopenharmony_ci seq, ctx->cs_sequence); 266462306a36Sopenharmony_ci *status = CS_WAIT_STATUS_GONE; 266562306a36Sopenharmony_ci return 0; 266662306a36Sopenharmony_ci } 266762306a36Sopenharmony_ci 266862306a36Sopenharmony_ci completion_rc = 1; 266962306a36Sopenharmony_ci goto report_results; 267062306a36Sopenharmony_ci } 267162306a36Sopenharmony_ci 267262306a36Sopenharmony_ci if (!timeout_us) { 267362306a36Sopenharmony_ci completion_rc = completion_done(&fence->completion); 267462306a36Sopenharmony_ci } else { 267562306a36Sopenharmony_ci unsigned long timeout; 267662306a36Sopenharmony_ci 267762306a36Sopenharmony_ci timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ? 267862306a36Sopenharmony_ci timeout_us : usecs_to_jiffies(timeout_us); 267962306a36Sopenharmony_ci completion_rc = 268062306a36Sopenharmony_ci wait_for_completion_interruptible_timeout( 268162306a36Sopenharmony_ci &fence->completion, timeout); 268262306a36Sopenharmony_ci } 268362306a36Sopenharmony_ci 268462306a36Sopenharmony_ci error = fence->error; 268562306a36Sopenharmony_ci timestamp_kt = fence->timestamp; 268662306a36Sopenharmony_ci 268762306a36Sopenharmony_cireport_results: 268862306a36Sopenharmony_ci if (completion_rc > 0) { 268962306a36Sopenharmony_ci *status = CS_WAIT_STATUS_COMPLETED; 269062306a36Sopenharmony_ci if (timestamp) 269162306a36Sopenharmony_ci *timestamp = ktime_to_ns(timestamp_kt); 269262306a36Sopenharmony_ci } else { 269362306a36Sopenharmony_ci *status = CS_WAIT_STATUS_BUSY; 269462306a36Sopenharmony_ci } 269562306a36Sopenharmony_ci 269662306a36Sopenharmony_ci if (completion_rc == -ERESTARTSYS) 269762306a36Sopenharmony_ci rc = completion_rc; 269862306a36Sopenharmony_ci else if (error == -ETIMEDOUT || error == -EIO) 269962306a36Sopenharmony_ci rc = error; 270062306a36Sopenharmony_ci 270162306a36Sopenharmony_ci return rc; 270262306a36Sopenharmony_ci} 270362306a36Sopenharmony_ci 270462306a36Sopenharmony_ci/* 270562306a36Sopenharmony_ci * hl_cs_poll_fences - iterate CS fences to check for CS completion 270662306a36Sopenharmony_ci * 270762306a36Sopenharmony_ci * @mcs_data: multi-CS internal data 270862306a36Sopenharmony_ci * @mcs_compl: multi-CS completion structure 270962306a36Sopenharmony_ci * 271062306a36Sopenharmony_ci * @return 0 on success, otherwise non 0 error code 271162306a36Sopenharmony_ci * 271262306a36Sopenharmony_ci * The function iterates on all CS sequence in the list and set bit in 271362306a36Sopenharmony_ci * completion_bitmap for each completed CS. 271462306a36Sopenharmony_ci * While iterating, the function sets the stream map of each fence in the fence 271562306a36Sopenharmony_ci * array in the completion QID stream map to be used by CSs to perform 271662306a36Sopenharmony_ci * completion to the multi-CS context. 271762306a36Sopenharmony_ci * This function shall be called after taking context ref 271862306a36Sopenharmony_ci */ 271962306a36Sopenharmony_cistatic int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl) 272062306a36Sopenharmony_ci{ 272162306a36Sopenharmony_ci struct hl_fence **fence_ptr = mcs_data->fence_arr; 272262306a36Sopenharmony_ci struct hl_device *hdev = mcs_data->ctx->hdev; 272362306a36Sopenharmony_ci int i, rc, arr_len = mcs_data->arr_len; 272462306a36Sopenharmony_ci u64 *seq_arr = mcs_data->seq_arr; 272562306a36Sopenharmony_ci ktime_t max_ktime, first_cs_time; 272662306a36Sopenharmony_ci enum hl_cs_wait_status status; 272762306a36Sopenharmony_ci 272862306a36Sopenharmony_ci memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); 272962306a36Sopenharmony_ci 273062306a36Sopenharmony_ci /* get all fences under the same lock */ 273162306a36Sopenharmony_ci rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); 273262306a36Sopenharmony_ci if (rc) 273362306a36Sopenharmony_ci return rc; 273462306a36Sopenharmony_ci 273562306a36Sopenharmony_ci /* 273662306a36Sopenharmony_ci * re-initialize the completion here to handle 2 possible cases: 273762306a36Sopenharmony_ci * 1. CS will complete the multi-CS prior clearing the completion. in which 273862306a36Sopenharmony_ci * case the fence iteration is guaranteed to catch the CS completion. 273962306a36Sopenharmony_ci * 2. the completion will occur after re-init of the completion. 274062306a36Sopenharmony_ci * in which case we will wake up immediately in wait_for_completion. 274162306a36Sopenharmony_ci */ 274262306a36Sopenharmony_ci reinit_completion(&mcs_compl->completion); 274362306a36Sopenharmony_ci 274462306a36Sopenharmony_ci /* 274562306a36Sopenharmony_ci * set to maximum time to verify timestamp is valid: if at the end 274662306a36Sopenharmony_ci * this value is maintained- no timestamp was updated 274762306a36Sopenharmony_ci */ 274862306a36Sopenharmony_ci max_ktime = ktime_set(KTIME_SEC_MAX, 0); 274962306a36Sopenharmony_ci first_cs_time = max_ktime; 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci for (i = 0; i < arr_len; i++, fence_ptr++) { 275262306a36Sopenharmony_ci struct hl_fence *fence = *fence_ptr; 275362306a36Sopenharmony_ci 275462306a36Sopenharmony_ci /* 275562306a36Sopenharmony_ci * In order to prevent case where we wait until timeout even though a CS associated 275662306a36Sopenharmony_ci * with the multi-CS actually completed we do things in the below order: 275762306a36Sopenharmony_ci * 1. for each fence set it's QID map in the multi-CS completion QID map. This way 275862306a36Sopenharmony_ci * any CS can, potentially, complete the multi CS for the specific QID (note 275962306a36Sopenharmony_ci * that once completion is initialized, calling complete* and then wait on the 276062306a36Sopenharmony_ci * completion will cause it to return at once) 276162306a36Sopenharmony_ci * 2. only after allowing multi-CS completion for the specific QID we check whether 276262306a36Sopenharmony_ci * the specific CS already completed (and thus the wait for completion part will 276362306a36Sopenharmony_ci * be skipped). if the CS not completed it is guaranteed that completing CS will 276462306a36Sopenharmony_ci * wake up the completion. 276562306a36Sopenharmony_ci */ 276662306a36Sopenharmony_ci if (fence) 276762306a36Sopenharmony_ci mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map; 276862306a36Sopenharmony_ci 276962306a36Sopenharmony_ci /* 277062306a36Sopenharmony_ci * function won't sleep as it is called with timeout 0 (i.e. 277162306a36Sopenharmony_ci * poll the fence) 277262306a36Sopenharmony_ci */ 277362306a36Sopenharmony_ci rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL); 277462306a36Sopenharmony_ci if (rc) { 277562306a36Sopenharmony_ci dev_err(hdev->dev, 277662306a36Sopenharmony_ci "wait_for_fence error :%d for CS seq %llu\n", 277762306a36Sopenharmony_ci rc, seq_arr[i]); 277862306a36Sopenharmony_ci break; 277962306a36Sopenharmony_ci } 278062306a36Sopenharmony_ci 278162306a36Sopenharmony_ci switch (status) { 278262306a36Sopenharmony_ci case CS_WAIT_STATUS_BUSY: 278362306a36Sopenharmony_ci /* CS did not finished, QID to wait on already stored */ 278462306a36Sopenharmony_ci break; 278562306a36Sopenharmony_ci case CS_WAIT_STATUS_COMPLETED: 278662306a36Sopenharmony_ci /* 278762306a36Sopenharmony_ci * Using mcs_handling_done to avoid possibility of mcs_data 278862306a36Sopenharmony_ci * returns to user indicating CS completed before it finished 278962306a36Sopenharmony_ci * all of its mcs handling, to avoid race the next time the 279062306a36Sopenharmony_ci * user waits for mcs. 279162306a36Sopenharmony_ci * note: when reaching this case fence is definitely not NULL 279262306a36Sopenharmony_ci * but NULL check was added to overcome static analysis 279362306a36Sopenharmony_ci */ 279462306a36Sopenharmony_ci if (fence && !fence->mcs_handling_done) { 279562306a36Sopenharmony_ci /* 279662306a36Sopenharmony_ci * in case multi CS is completed but MCS handling not done 279762306a36Sopenharmony_ci * we "complete" the multi CS to prevent it from waiting 279862306a36Sopenharmony_ci * until time-out and the "multi-CS handling done" will have 279962306a36Sopenharmony_ci * another chance at the next iteration 280062306a36Sopenharmony_ci */ 280162306a36Sopenharmony_ci complete_all(&mcs_compl->completion); 280262306a36Sopenharmony_ci break; 280362306a36Sopenharmony_ci } 280462306a36Sopenharmony_ci 280562306a36Sopenharmony_ci mcs_data->completion_bitmap |= BIT(i); 280662306a36Sopenharmony_ci /* 280762306a36Sopenharmony_ci * For all completed CSs we take the earliest timestamp. 280862306a36Sopenharmony_ci * For this we have to validate that the timestamp is 280962306a36Sopenharmony_ci * earliest of all timestamps so far. 281062306a36Sopenharmony_ci */ 281162306a36Sopenharmony_ci if (fence && mcs_data->update_ts && 281262306a36Sopenharmony_ci (ktime_compare(fence->timestamp, first_cs_time) < 0)) 281362306a36Sopenharmony_ci first_cs_time = fence->timestamp; 281462306a36Sopenharmony_ci break; 281562306a36Sopenharmony_ci case CS_WAIT_STATUS_GONE: 281662306a36Sopenharmony_ci mcs_data->update_ts = false; 281762306a36Sopenharmony_ci mcs_data->gone_cs = true; 281862306a36Sopenharmony_ci /* 281962306a36Sopenharmony_ci * It is possible to get an old sequence numbers from user 282062306a36Sopenharmony_ci * which related to already completed CSs and their fences 282162306a36Sopenharmony_ci * already gone. In this case, CS set as completed but 282262306a36Sopenharmony_ci * no need to consider its QID for mcs completion. 282362306a36Sopenharmony_ci */ 282462306a36Sopenharmony_ci mcs_data->completion_bitmap |= BIT(i); 282562306a36Sopenharmony_ci break; 282662306a36Sopenharmony_ci default: 282762306a36Sopenharmony_ci dev_err(hdev->dev, "Invalid fence status\n"); 282862306a36Sopenharmony_ci rc = -EINVAL; 282962306a36Sopenharmony_ci break; 283062306a36Sopenharmony_ci } 283162306a36Sopenharmony_ci 283262306a36Sopenharmony_ci } 283362306a36Sopenharmony_ci 283462306a36Sopenharmony_ci hl_fences_put(mcs_data->fence_arr, arr_len); 283562306a36Sopenharmony_ci 283662306a36Sopenharmony_ci if (mcs_data->update_ts && 283762306a36Sopenharmony_ci (ktime_compare(first_cs_time, max_ktime) != 0)) 283862306a36Sopenharmony_ci mcs_data->timestamp = ktime_to_ns(first_cs_time); 283962306a36Sopenharmony_ci 284062306a36Sopenharmony_ci return rc; 284162306a36Sopenharmony_ci} 284262306a36Sopenharmony_ci 284362306a36Sopenharmony_cistatic int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, 284462306a36Sopenharmony_ci enum hl_cs_wait_status *status, s64 *timestamp) 284562306a36Sopenharmony_ci{ 284662306a36Sopenharmony_ci struct hl_fence *fence; 284762306a36Sopenharmony_ci int rc = 0; 284862306a36Sopenharmony_ci 284962306a36Sopenharmony_ci if (timestamp) 285062306a36Sopenharmony_ci *timestamp = 0; 285162306a36Sopenharmony_ci 285262306a36Sopenharmony_ci hl_ctx_get(ctx); 285362306a36Sopenharmony_ci 285462306a36Sopenharmony_ci fence = hl_ctx_get_fence(ctx, seq); 285562306a36Sopenharmony_ci 285662306a36Sopenharmony_ci rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp); 285762306a36Sopenharmony_ci hl_fence_put(fence); 285862306a36Sopenharmony_ci hl_ctx_put(ctx); 285962306a36Sopenharmony_ci 286062306a36Sopenharmony_ci return rc; 286162306a36Sopenharmony_ci} 286262306a36Sopenharmony_ci 286362306a36Sopenharmony_cistatic inline unsigned long hl_usecs64_to_jiffies(const u64 usecs) 286462306a36Sopenharmony_ci{ 286562306a36Sopenharmony_ci if (usecs <= U32_MAX) 286662306a36Sopenharmony_ci return usecs_to_jiffies(usecs); 286762306a36Sopenharmony_ci 286862306a36Sopenharmony_ci /* 286962306a36Sopenharmony_ci * If the value in nanoseconds is larger than 64 bit, use the largest 287062306a36Sopenharmony_ci * 64 bit value. 287162306a36Sopenharmony_ci */ 287262306a36Sopenharmony_ci if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC))) 287362306a36Sopenharmony_ci return nsecs_to_jiffies(U64_MAX); 287462306a36Sopenharmony_ci 287562306a36Sopenharmony_ci return nsecs_to_jiffies(usecs * NSEC_PER_USEC); 287662306a36Sopenharmony_ci} 287762306a36Sopenharmony_ci 287862306a36Sopenharmony_ci/* 287962306a36Sopenharmony_ci * hl_wait_multi_cs_completion_init - init completion structure 288062306a36Sopenharmony_ci * 288162306a36Sopenharmony_ci * @hdev: pointer to habanalabs device structure 288262306a36Sopenharmony_ci * @stream_master_bitmap: stream master QIDs map, set bit indicates stream 288362306a36Sopenharmony_ci * master QID to wait on 288462306a36Sopenharmony_ci * 288562306a36Sopenharmony_ci * @return valid completion struct pointer on success, otherwise error pointer 288662306a36Sopenharmony_ci * 288762306a36Sopenharmony_ci * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver. 288862306a36Sopenharmony_ci * the function gets the first available completion (by marking it "used") 288962306a36Sopenharmony_ci * and initialize its values. 289062306a36Sopenharmony_ci */ 289162306a36Sopenharmony_cistatic struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev) 289262306a36Sopenharmony_ci{ 289362306a36Sopenharmony_ci struct multi_cs_completion *mcs_compl; 289462306a36Sopenharmony_ci int i; 289562306a36Sopenharmony_ci 289662306a36Sopenharmony_ci /* find free multi_cs completion structure */ 289762306a36Sopenharmony_ci for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 289862306a36Sopenharmony_ci mcs_compl = &hdev->multi_cs_completion[i]; 289962306a36Sopenharmony_ci spin_lock(&mcs_compl->lock); 290062306a36Sopenharmony_ci if (!mcs_compl->used) { 290162306a36Sopenharmony_ci mcs_compl->used = 1; 290262306a36Sopenharmony_ci mcs_compl->timestamp = 0; 290362306a36Sopenharmony_ci /* 290462306a36Sopenharmony_ci * init QID map to 0 to avoid completion by CSs. the actual QID map 290562306a36Sopenharmony_ci * to multi-CS CSs will be set incrementally at a later stage 290662306a36Sopenharmony_ci */ 290762306a36Sopenharmony_ci mcs_compl->stream_master_qid_map = 0; 290862306a36Sopenharmony_ci spin_unlock(&mcs_compl->lock); 290962306a36Sopenharmony_ci break; 291062306a36Sopenharmony_ci } 291162306a36Sopenharmony_ci spin_unlock(&mcs_compl->lock); 291262306a36Sopenharmony_ci } 291362306a36Sopenharmony_ci 291462306a36Sopenharmony_ci if (i == MULTI_CS_MAX_USER_CTX) { 291562306a36Sopenharmony_ci dev_err(hdev->dev, "no available multi-CS completion structure\n"); 291662306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 291762306a36Sopenharmony_ci } 291862306a36Sopenharmony_ci return mcs_compl; 291962306a36Sopenharmony_ci} 292062306a36Sopenharmony_ci 292162306a36Sopenharmony_ci/* 292262306a36Sopenharmony_ci * hl_wait_multi_cs_completion_fini - return completion structure and set as 292362306a36Sopenharmony_ci * unused 292462306a36Sopenharmony_ci * 292562306a36Sopenharmony_ci * @mcs_compl: pointer to the completion structure 292662306a36Sopenharmony_ci */ 292762306a36Sopenharmony_cistatic void hl_wait_multi_cs_completion_fini( 292862306a36Sopenharmony_ci struct multi_cs_completion *mcs_compl) 292962306a36Sopenharmony_ci{ 293062306a36Sopenharmony_ci /* 293162306a36Sopenharmony_ci * free completion structure, do it under lock to be in-sync with the 293262306a36Sopenharmony_ci * thread that signals completion 293362306a36Sopenharmony_ci */ 293462306a36Sopenharmony_ci spin_lock(&mcs_compl->lock); 293562306a36Sopenharmony_ci mcs_compl->used = 0; 293662306a36Sopenharmony_ci spin_unlock(&mcs_compl->lock); 293762306a36Sopenharmony_ci} 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_ci/* 294062306a36Sopenharmony_ci * hl_wait_multi_cs_completion - wait for first CS to complete 294162306a36Sopenharmony_ci * 294262306a36Sopenharmony_ci * @mcs_data: multi-CS internal data 294362306a36Sopenharmony_ci * 294462306a36Sopenharmony_ci * @return 0 on success, otherwise non 0 error code 294562306a36Sopenharmony_ci */ 294662306a36Sopenharmony_cistatic int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data, 294762306a36Sopenharmony_ci struct multi_cs_completion *mcs_compl) 294862306a36Sopenharmony_ci{ 294962306a36Sopenharmony_ci long completion_rc; 295062306a36Sopenharmony_ci 295162306a36Sopenharmony_ci completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion, 295262306a36Sopenharmony_ci mcs_data->timeout_jiffies); 295362306a36Sopenharmony_ci 295462306a36Sopenharmony_ci /* update timestamp */ 295562306a36Sopenharmony_ci if (completion_rc > 0) 295662306a36Sopenharmony_ci mcs_data->timestamp = mcs_compl->timestamp; 295762306a36Sopenharmony_ci 295862306a36Sopenharmony_ci if (completion_rc == -ERESTARTSYS) 295962306a36Sopenharmony_ci return completion_rc; 296062306a36Sopenharmony_ci 296162306a36Sopenharmony_ci mcs_data->wait_status = completion_rc; 296262306a36Sopenharmony_ci 296362306a36Sopenharmony_ci return 0; 296462306a36Sopenharmony_ci} 296562306a36Sopenharmony_ci 296662306a36Sopenharmony_ci/* 296762306a36Sopenharmony_ci * hl_multi_cs_completion_init - init array of multi-CS completion structures 296862306a36Sopenharmony_ci * 296962306a36Sopenharmony_ci * @hdev: pointer to habanalabs device structure 297062306a36Sopenharmony_ci */ 297162306a36Sopenharmony_civoid hl_multi_cs_completion_init(struct hl_device *hdev) 297262306a36Sopenharmony_ci{ 297362306a36Sopenharmony_ci struct multi_cs_completion *mcs_cmpl; 297462306a36Sopenharmony_ci int i; 297562306a36Sopenharmony_ci 297662306a36Sopenharmony_ci for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 297762306a36Sopenharmony_ci mcs_cmpl = &hdev->multi_cs_completion[i]; 297862306a36Sopenharmony_ci mcs_cmpl->used = 0; 297962306a36Sopenharmony_ci spin_lock_init(&mcs_cmpl->lock); 298062306a36Sopenharmony_ci init_completion(&mcs_cmpl->completion); 298162306a36Sopenharmony_ci } 298262306a36Sopenharmony_ci} 298362306a36Sopenharmony_ci 298462306a36Sopenharmony_ci/* 298562306a36Sopenharmony_ci * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl 298662306a36Sopenharmony_ci * 298762306a36Sopenharmony_ci * @hpriv: pointer to the private data of the fd 298862306a36Sopenharmony_ci * @data: pointer to multi-CS wait ioctl in/out args 298962306a36Sopenharmony_ci * 299062306a36Sopenharmony_ci */ 299162306a36Sopenharmony_cistatic int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) 299262306a36Sopenharmony_ci{ 299362306a36Sopenharmony_ci struct multi_cs_completion *mcs_compl; 299462306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 299562306a36Sopenharmony_ci struct multi_cs_data mcs_data = {}; 299662306a36Sopenharmony_ci union hl_wait_cs_args *args = data; 299762306a36Sopenharmony_ci struct hl_ctx *ctx = hpriv->ctx; 299862306a36Sopenharmony_ci struct hl_fence **fence_arr; 299962306a36Sopenharmony_ci void __user *seq_arr; 300062306a36Sopenharmony_ci u32 size_to_copy; 300162306a36Sopenharmony_ci u64 *cs_seq_arr; 300262306a36Sopenharmony_ci u8 seq_arr_len; 300362306a36Sopenharmony_ci int rc, i; 300462306a36Sopenharmony_ci 300562306a36Sopenharmony_ci for (i = 0 ; i < sizeof(args->in.pad) ; i++) 300662306a36Sopenharmony_ci if (args->in.pad[i]) { 300762306a36Sopenharmony_ci dev_dbg(hdev->dev, "Padding bytes must be 0\n"); 300862306a36Sopenharmony_ci return -EINVAL; 300962306a36Sopenharmony_ci } 301062306a36Sopenharmony_ci 301162306a36Sopenharmony_ci if (!hdev->supports_wait_for_multi_cs) { 301262306a36Sopenharmony_ci dev_err(hdev->dev, "Wait for multi CS is not supported\n"); 301362306a36Sopenharmony_ci return -EPERM; 301462306a36Sopenharmony_ci } 301562306a36Sopenharmony_ci 301662306a36Sopenharmony_ci seq_arr_len = args->in.seq_arr_len; 301762306a36Sopenharmony_ci 301862306a36Sopenharmony_ci if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) { 301962306a36Sopenharmony_ci dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n", 302062306a36Sopenharmony_ci HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len); 302162306a36Sopenharmony_ci return -EINVAL; 302262306a36Sopenharmony_ci } 302362306a36Sopenharmony_ci 302462306a36Sopenharmony_ci /* allocate memory for sequence array */ 302562306a36Sopenharmony_ci cs_seq_arr = 302662306a36Sopenharmony_ci kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL); 302762306a36Sopenharmony_ci if (!cs_seq_arr) 302862306a36Sopenharmony_ci return -ENOMEM; 302962306a36Sopenharmony_ci 303062306a36Sopenharmony_ci /* copy CS sequence array from user */ 303162306a36Sopenharmony_ci seq_arr = (void __user *) (uintptr_t) args->in.seq; 303262306a36Sopenharmony_ci size_to_copy = seq_arr_len * sizeof(*cs_seq_arr); 303362306a36Sopenharmony_ci if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) { 303462306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n"); 303562306a36Sopenharmony_ci rc = -EFAULT; 303662306a36Sopenharmony_ci goto free_seq_arr; 303762306a36Sopenharmony_ci } 303862306a36Sopenharmony_ci 303962306a36Sopenharmony_ci /* allocate array for the fences */ 304062306a36Sopenharmony_ci fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL); 304162306a36Sopenharmony_ci if (!fence_arr) { 304262306a36Sopenharmony_ci rc = -ENOMEM; 304362306a36Sopenharmony_ci goto free_seq_arr; 304462306a36Sopenharmony_ci } 304562306a36Sopenharmony_ci 304662306a36Sopenharmony_ci /* initialize the multi-CS internal data */ 304762306a36Sopenharmony_ci mcs_data.ctx = ctx; 304862306a36Sopenharmony_ci mcs_data.seq_arr = cs_seq_arr; 304962306a36Sopenharmony_ci mcs_data.fence_arr = fence_arr; 305062306a36Sopenharmony_ci mcs_data.arr_len = seq_arr_len; 305162306a36Sopenharmony_ci 305262306a36Sopenharmony_ci hl_ctx_get(ctx); 305362306a36Sopenharmony_ci 305462306a36Sopenharmony_ci /* wait (with timeout) for the first CS to be completed */ 305562306a36Sopenharmony_ci mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); 305662306a36Sopenharmony_ci mcs_compl = hl_wait_multi_cs_completion_init(hdev); 305762306a36Sopenharmony_ci if (IS_ERR(mcs_compl)) { 305862306a36Sopenharmony_ci rc = PTR_ERR(mcs_compl); 305962306a36Sopenharmony_ci goto put_ctx; 306062306a36Sopenharmony_ci } 306162306a36Sopenharmony_ci 306262306a36Sopenharmony_ci /* poll all CS fences, extract timestamp */ 306362306a36Sopenharmony_ci mcs_data.update_ts = true; 306462306a36Sopenharmony_ci rc = hl_cs_poll_fences(&mcs_data, mcs_compl); 306562306a36Sopenharmony_ci /* 306662306a36Sopenharmony_ci * skip wait for CS completion when one of the below is true: 306762306a36Sopenharmony_ci * - an error on the poll function 306862306a36Sopenharmony_ci * - one or more CS in the list completed 306962306a36Sopenharmony_ci * - the user called ioctl with timeout 0 307062306a36Sopenharmony_ci */ 307162306a36Sopenharmony_ci if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) 307262306a36Sopenharmony_ci goto completion_fini; 307362306a36Sopenharmony_ci 307462306a36Sopenharmony_ci while (true) { 307562306a36Sopenharmony_ci rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl); 307662306a36Sopenharmony_ci if (rc || (mcs_data.wait_status == 0)) 307762306a36Sopenharmony_ci break; 307862306a36Sopenharmony_ci 307962306a36Sopenharmony_ci /* 308062306a36Sopenharmony_ci * poll fences once again to update the CS map. 308162306a36Sopenharmony_ci * no timestamp should be updated this time. 308262306a36Sopenharmony_ci */ 308362306a36Sopenharmony_ci mcs_data.update_ts = false; 308462306a36Sopenharmony_ci rc = hl_cs_poll_fences(&mcs_data, mcs_compl); 308562306a36Sopenharmony_ci 308662306a36Sopenharmony_ci if (rc || mcs_data.completion_bitmap) 308762306a36Sopenharmony_ci break; 308862306a36Sopenharmony_ci 308962306a36Sopenharmony_ci /* 309062306a36Sopenharmony_ci * if hl_wait_multi_cs_completion returned before timeout (i.e. 309162306a36Sopenharmony_ci * it got a completion) it either got completed by CS in the multi CS list 309262306a36Sopenharmony_ci * (in which case the indication will be non empty completion_bitmap) or it 309362306a36Sopenharmony_ci * got completed by CS submitted to one of the shared stream master but 309462306a36Sopenharmony_ci * not in the multi CS list (in which case we should wait again but modify 309562306a36Sopenharmony_ci * the timeout and set timestamp as zero to let a CS related to the current 309662306a36Sopenharmony_ci * multi-CS set a new, relevant, timestamp) 309762306a36Sopenharmony_ci */ 309862306a36Sopenharmony_ci mcs_data.timeout_jiffies = mcs_data.wait_status; 309962306a36Sopenharmony_ci mcs_compl->timestamp = 0; 310062306a36Sopenharmony_ci } 310162306a36Sopenharmony_ci 310262306a36Sopenharmony_cicompletion_fini: 310362306a36Sopenharmony_ci hl_wait_multi_cs_completion_fini(mcs_compl); 310462306a36Sopenharmony_ci 310562306a36Sopenharmony_ciput_ctx: 310662306a36Sopenharmony_ci hl_ctx_put(ctx); 310762306a36Sopenharmony_ci kfree(fence_arr); 310862306a36Sopenharmony_ci 310962306a36Sopenharmony_cifree_seq_arr: 311062306a36Sopenharmony_ci kfree(cs_seq_arr); 311162306a36Sopenharmony_ci 311262306a36Sopenharmony_ci if (rc == -ERESTARTSYS) { 311362306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 311462306a36Sopenharmony_ci "user process got signal while waiting for Multi-CS\n"); 311562306a36Sopenharmony_ci rc = -EINTR; 311662306a36Sopenharmony_ci } 311762306a36Sopenharmony_ci 311862306a36Sopenharmony_ci if (rc) 311962306a36Sopenharmony_ci return rc; 312062306a36Sopenharmony_ci 312162306a36Sopenharmony_ci /* update output args */ 312262306a36Sopenharmony_ci memset(args, 0, sizeof(*args)); 312362306a36Sopenharmony_ci 312462306a36Sopenharmony_ci if (mcs_data.completion_bitmap) { 312562306a36Sopenharmony_ci args->out.status = HL_WAIT_CS_STATUS_COMPLETED; 312662306a36Sopenharmony_ci args->out.cs_completion_map = mcs_data.completion_bitmap; 312762306a36Sopenharmony_ci 312862306a36Sopenharmony_ci /* if timestamp not 0- it's valid */ 312962306a36Sopenharmony_ci if (mcs_data.timestamp) { 313062306a36Sopenharmony_ci args->out.timestamp_nsec = mcs_data.timestamp; 313162306a36Sopenharmony_ci args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 313262306a36Sopenharmony_ci } 313362306a36Sopenharmony_ci 313462306a36Sopenharmony_ci /* update if some CS was gone */ 313562306a36Sopenharmony_ci if (!mcs_data.timestamp) 313662306a36Sopenharmony_ci args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; 313762306a36Sopenharmony_ci } else { 313862306a36Sopenharmony_ci args->out.status = HL_WAIT_CS_STATUS_BUSY; 313962306a36Sopenharmony_ci } 314062306a36Sopenharmony_ci 314162306a36Sopenharmony_ci return 0; 314262306a36Sopenharmony_ci} 314362306a36Sopenharmony_ci 314462306a36Sopenharmony_cistatic int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) 314562306a36Sopenharmony_ci{ 314662306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 314762306a36Sopenharmony_ci union hl_wait_cs_args *args = data; 314862306a36Sopenharmony_ci enum hl_cs_wait_status status; 314962306a36Sopenharmony_ci u64 seq = args->in.seq; 315062306a36Sopenharmony_ci s64 timestamp; 315162306a36Sopenharmony_ci int rc; 315262306a36Sopenharmony_ci 315362306a36Sopenharmony_ci rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, ×tamp); 315462306a36Sopenharmony_ci 315562306a36Sopenharmony_ci if (rc == -ERESTARTSYS) { 315662306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 315762306a36Sopenharmony_ci "user process got signal while waiting for CS handle %llu\n", 315862306a36Sopenharmony_ci seq); 315962306a36Sopenharmony_ci return -EINTR; 316062306a36Sopenharmony_ci } 316162306a36Sopenharmony_ci 316262306a36Sopenharmony_ci memset(args, 0, sizeof(*args)); 316362306a36Sopenharmony_ci 316462306a36Sopenharmony_ci if (rc) { 316562306a36Sopenharmony_ci if (rc == -ETIMEDOUT) { 316662306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 316762306a36Sopenharmony_ci "CS %llu has timed-out while user process is waiting for it\n", 316862306a36Sopenharmony_ci seq); 316962306a36Sopenharmony_ci args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; 317062306a36Sopenharmony_ci } else if (rc == -EIO) { 317162306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 317262306a36Sopenharmony_ci "CS %llu has been aborted while user process is waiting for it\n", 317362306a36Sopenharmony_ci seq); 317462306a36Sopenharmony_ci args->out.status = HL_WAIT_CS_STATUS_ABORTED; 317562306a36Sopenharmony_ci } 317662306a36Sopenharmony_ci return rc; 317762306a36Sopenharmony_ci } 317862306a36Sopenharmony_ci 317962306a36Sopenharmony_ci if (timestamp) { 318062306a36Sopenharmony_ci args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 318162306a36Sopenharmony_ci args->out.timestamp_nsec = timestamp; 318262306a36Sopenharmony_ci } 318362306a36Sopenharmony_ci 318462306a36Sopenharmony_ci switch (status) { 318562306a36Sopenharmony_ci case CS_WAIT_STATUS_GONE: 318662306a36Sopenharmony_ci args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; 318762306a36Sopenharmony_ci fallthrough; 318862306a36Sopenharmony_ci case CS_WAIT_STATUS_COMPLETED: 318962306a36Sopenharmony_ci args->out.status = HL_WAIT_CS_STATUS_COMPLETED; 319062306a36Sopenharmony_ci break; 319162306a36Sopenharmony_ci case CS_WAIT_STATUS_BUSY: 319262306a36Sopenharmony_ci default: 319362306a36Sopenharmony_ci args->out.status = HL_WAIT_CS_STATUS_BUSY; 319462306a36Sopenharmony_ci break; 319562306a36Sopenharmony_ci } 319662306a36Sopenharmony_ci 319762306a36Sopenharmony_ci return 0; 319862306a36Sopenharmony_ci} 319962306a36Sopenharmony_ci 320062306a36Sopenharmony_cistatic int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, 320162306a36Sopenharmony_ci struct hl_cb *cq_cb, 320262306a36Sopenharmony_ci u64 ts_offset, u64 cq_offset, u64 target_value, 320362306a36Sopenharmony_ci spinlock_t *wait_list_lock, 320462306a36Sopenharmony_ci struct hl_user_pending_interrupt **pend) 320562306a36Sopenharmony_ci{ 320662306a36Sopenharmony_ci struct hl_ts_buff *ts_buff = buf->private; 320762306a36Sopenharmony_ci struct hl_user_pending_interrupt *requested_offset_record = 320862306a36Sopenharmony_ci (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + 320962306a36Sopenharmony_ci ts_offset; 321062306a36Sopenharmony_ci struct hl_user_pending_interrupt *cb_last = 321162306a36Sopenharmony_ci (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + 321262306a36Sopenharmony_ci (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); 321362306a36Sopenharmony_ci unsigned long iter_counter = 0; 321462306a36Sopenharmony_ci u64 current_cq_counter; 321562306a36Sopenharmony_ci ktime_t timestamp; 321662306a36Sopenharmony_ci 321762306a36Sopenharmony_ci /* Validate ts_offset not exceeding last max */ 321862306a36Sopenharmony_ci if (requested_offset_record >= cb_last) { 321962306a36Sopenharmony_ci dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n", 322062306a36Sopenharmony_ci (u64)(uintptr_t)cb_last); 322162306a36Sopenharmony_ci return -EINVAL; 322262306a36Sopenharmony_ci } 322362306a36Sopenharmony_ci 322462306a36Sopenharmony_ci timestamp = ktime_get(); 322562306a36Sopenharmony_ci 322662306a36Sopenharmony_cistart_over: 322762306a36Sopenharmony_ci spin_lock(wait_list_lock); 322862306a36Sopenharmony_ci 322962306a36Sopenharmony_ci /* Unregister only if we didn't reach the target value 323062306a36Sopenharmony_ci * since in this case there will be no handling in irq context 323162306a36Sopenharmony_ci * and then it's safe to delete the node out of the interrupt list 323262306a36Sopenharmony_ci * then re-use it on other interrupt 323362306a36Sopenharmony_ci */ 323462306a36Sopenharmony_ci if (requested_offset_record->ts_reg_info.in_use) { 323562306a36Sopenharmony_ci current_cq_counter = *requested_offset_record->cq_kernel_addr; 323662306a36Sopenharmony_ci if (current_cq_counter < requested_offset_record->cq_target_value) { 323762306a36Sopenharmony_ci list_del(&requested_offset_record->wait_list_node); 323862306a36Sopenharmony_ci spin_unlock(wait_list_lock); 323962306a36Sopenharmony_ci 324062306a36Sopenharmony_ci hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); 324162306a36Sopenharmony_ci hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); 324262306a36Sopenharmony_ci 324362306a36Sopenharmony_ci dev_dbg(buf->mmg->dev, 324462306a36Sopenharmony_ci "ts node removed from interrupt list now can re-use\n"); 324562306a36Sopenharmony_ci } else { 324662306a36Sopenharmony_ci dev_dbg(buf->mmg->dev, 324762306a36Sopenharmony_ci "ts node in middle of irq handling\n"); 324862306a36Sopenharmony_ci 324962306a36Sopenharmony_ci /* irq thread handling in the middle give it time to finish */ 325062306a36Sopenharmony_ci spin_unlock(wait_list_lock); 325162306a36Sopenharmony_ci usleep_range(100, 1000); 325262306a36Sopenharmony_ci if (++iter_counter == MAX_TS_ITER_NUM) { 325362306a36Sopenharmony_ci dev_err(buf->mmg->dev, 325462306a36Sopenharmony_ci "Timestamp offset processing reached timeout of %lld ms\n", 325562306a36Sopenharmony_ci ktime_ms_delta(ktime_get(), timestamp)); 325662306a36Sopenharmony_ci return -EAGAIN; 325762306a36Sopenharmony_ci } 325862306a36Sopenharmony_ci 325962306a36Sopenharmony_ci goto start_over; 326062306a36Sopenharmony_ci } 326162306a36Sopenharmony_ci } else { 326262306a36Sopenharmony_ci /* Fill up the new registration node info */ 326362306a36Sopenharmony_ci requested_offset_record->ts_reg_info.buf = buf; 326462306a36Sopenharmony_ci requested_offset_record->ts_reg_info.cq_cb = cq_cb; 326562306a36Sopenharmony_ci requested_offset_record->ts_reg_info.timestamp_kernel_addr = 326662306a36Sopenharmony_ci (u64 *) ts_buff->user_buff_address + ts_offset; 326762306a36Sopenharmony_ci requested_offset_record->cq_kernel_addr = 326862306a36Sopenharmony_ci (u64 *) cq_cb->kernel_address + cq_offset; 326962306a36Sopenharmony_ci requested_offset_record->cq_target_value = target_value; 327062306a36Sopenharmony_ci 327162306a36Sopenharmony_ci spin_unlock(wait_list_lock); 327262306a36Sopenharmony_ci } 327362306a36Sopenharmony_ci 327462306a36Sopenharmony_ci *pend = requested_offset_record; 327562306a36Sopenharmony_ci 327662306a36Sopenharmony_ci dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n", 327762306a36Sopenharmony_ci requested_offset_record); 327862306a36Sopenharmony_ci return 0; 327962306a36Sopenharmony_ci} 328062306a36Sopenharmony_ci 328162306a36Sopenharmony_cistatic int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, 328262306a36Sopenharmony_ci struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg, 328362306a36Sopenharmony_ci u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, 328462306a36Sopenharmony_ci u64 target_value, struct hl_user_interrupt *interrupt, 328562306a36Sopenharmony_ci bool register_ts_record, u64 ts_handle, u64 ts_offset, 328662306a36Sopenharmony_ci u32 *status, u64 *timestamp) 328762306a36Sopenharmony_ci{ 328862306a36Sopenharmony_ci struct hl_user_pending_interrupt *pend; 328962306a36Sopenharmony_ci struct hl_mmap_mem_buf *buf; 329062306a36Sopenharmony_ci struct hl_cb *cq_cb; 329162306a36Sopenharmony_ci unsigned long timeout; 329262306a36Sopenharmony_ci long completion_rc; 329362306a36Sopenharmony_ci int rc = 0; 329462306a36Sopenharmony_ci 329562306a36Sopenharmony_ci timeout = hl_usecs64_to_jiffies(timeout_us); 329662306a36Sopenharmony_ci 329762306a36Sopenharmony_ci hl_ctx_get(ctx); 329862306a36Sopenharmony_ci 329962306a36Sopenharmony_ci cq_cb = hl_cb_get(cb_mmg, cq_counters_handle); 330062306a36Sopenharmony_ci if (!cq_cb) { 330162306a36Sopenharmony_ci rc = -EINVAL; 330262306a36Sopenharmony_ci goto put_ctx; 330362306a36Sopenharmony_ci } 330462306a36Sopenharmony_ci 330562306a36Sopenharmony_ci /* Validate the cq offset */ 330662306a36Sopenharmony_ci if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >= 330762306a36Sopenharmony_ci ((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) { 330862306a36Sopenharmony_ci rc = -EINVAL; 330962306a36Sopenharmony_ci goto put_cq_cb; 331062306a36Sopenharmony_ci } 331162306a36Sopenharmony_ci 331262306a36Sopenharmony_ci if (register_ts_record) { 331362306a36Sopenharmony_ci dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", 331462306a36Sopenharmony_ci interrupt->interrupt_id, ts_offset, cq_counters_offset); 331562306a36Sopenharmony_ci buf = hl_mmap_mem_buf_get(mmg, ts_handle); 331662306a36Sopenharmony_ci if (!buf) { 331762306a36Sopenharmony_ci rc = -EINVAL; 331862306a36Sopenharmony_ci goto put_cq_cb; 331962306a36Sopenharmony_ci } 332062306a36Sopenharmony_ci 332162306a36Sopenharmony_ci /* get ts buffer record */ 332262306a36Sopenharmony_ci rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset, 332362306a36Sopenharmony_ci cq_counters_offset, target_value, 332462306a36Sopenharmony_ci &interrupt->wait_list_lock, &pend); 332562306a36Sopenharmony_ci if (rc) 332662306a36Sopenharmony_ci goto put_ts_buff; 332762306a36Sopenharmony_ci } else { 332862306a36Sopenharmony_ci pend = kzalloc(sizeof(*pend), GFP_KERNEL); 332962306a36Sopenharmony_ci if (!pend) { 333062306a36Sopenharmony_ci rc = -ENOMEM; 333162306a36Sopenharmony_ci goto put_cq_cb; 333262306a36Sopenharmony_ci } 333362306a36Sopenharmony_ci hl_fence_init(&pend->fence, ULONG_MAX); 333462306a36Sopenharmony_ci pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; 333562306a36Sopenharmony_ci pend->cq_target_value = target_value; 333662306a36Sopenharmony_ci } 333762306a36Sopenharmony_ci 333862306a36Sopenharmony_ci spin_lock(&interrupt->wait_list_lock); 333962306a36Sopenharmony_ci 334062306a36Sopenharmony_ci /* We check for completion value as interrupt could have been received 334162306a36Sopenharmony_ci * before we added the node to the wait list 334262306a36Sopenharmony_ci */ 334362306a36Sopenharmony_ci if (*pend->cq_kernel_addr >= target_value) { 334462306a36Sopenharmony_ci if (register_ts_record) 334562306a36Sopenharmony_ci pend->ts_reg_info.in_use = 0; 334662306a36Sopenharmony_ci spin_unlock(&interrupt->wait_list_lock); 334762306a36Sopenharmony_ci 334862306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_COMPLETED; 334962306a36Sopenharmony_ci 335062306a36Sopenharmony_ci if (register_ts_record) { 335162306a36Sopenharmony_ci *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); 335262306a36Sopenharmony_ci goto put_ts_buff; 335362306a36Sopenharmony_ci } else { 335462306a36Sopenharmony_ci pend->fence.timestamp = ktime_get(); 335562306a36Sopenharmony_ci goto set_timestamp; 335662306a36Sopenharmony_ci } 335762306a36Sopenharmony_ci } else if (!timeout_us) { 335862306a36Sopenharmony_ci spin_unlock(&interrupt->wait_list_lock); 335962306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_BUSY; 336062306a36Sopenharmony_ci pend->fence.timestamp = ktime_get(); 336162306a36Sopenharmony_ci goto set_timestamp; 336262306a36Sopenharmony_ci } 336362306a36Sopenharmony_ci 336462306a36Sopenharmony_ci /* Add pending user interrupt to relevant list for the interrupt 336562306a36Sopenharmony_ci * handler to monitor. 336662306a36Sopenharmony_ci * Note that we cannot have sorted list by target value, 336762306a36Sopenharmony_ci * in order to shorten the list pass loop, since 336862306a36Sopenharmony_ci * same list could have nodes for different cq counter handle. 336962306a36Sopenharmony_ci * Note: 337062306a36Sopenharmony_ci * Mark ts buff offset as in use here in the spinlock protection area 337162306a36Sopenharmony_ci * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record 337262306a36Sopenharmony_ci * before adding the node to the list. this scenario might happen when 337362306a36Sopenharmony_ci * multiple threads are racing on same offset and one thread could 337462306a36Sopenharmony_ci * set the ts buff in ts_buff_get_kernel_ts_record then the other thread 337562306a36Sopenharmony_ci * takes over and get to ts_buff_get_kernel_ts_record and then we will try 337662306a36Sopenharmony_ci * to re-use the same ts buff offset, and will try to delete a non existing 337762306a36Sopenharmony_ci * node from the list. 337862306a36Sopenharmony_ci */ 337962306a36Sopenharmony_ci if (register_ts_record) 338062306a36Sopenharmony_ci pend->ts_reg_info.in_use = 1; 338162306a36Sopenharmony_ci 338262306a36Sopenharmony_ci list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); 338362306a36Sopenharmony_ci spin_unlock(&interrupt->wait_list_lock); 338462306a36Sopenharmony_ci 338562306a36Sopenharmony_ci if (register_ts_record) { 338662306a36Sopenharmony_ci rc = *status = HL_WAIT_CS_STATUS_COMPLETED; 338762306a36Sopenharmony_ci goto ts_registration_exit; 338862306a36Sopenharmony_ci } 338962306a36Sopenharmony_ci 339062306a36Sopenharmony_ci /* Wait for interrupt handler to signal completion */ 339162306a36Sopenharmony_ci completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, 339262306a36Sopenharmony_ci timeout); 339362306a36Sopenharmony_ci if (completion_rc > 0) { 339462306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_COMPLETED; 339562306a36Sopenharmony_ci } else { 339662306a36Sopenharmony_ci if (completion_rc == -ERESTARTSYS) { 339762306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 339862306a36Sopenharmony_ci "user process got signal while waiting for interrupt ID %d\n", 339962306a36Sopenharmony_ci interrupt->interrupt_id); 340062306a36Sopenharmony_ci rc = -EINTR; 340162306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_ABORTED; 340262306a36Sopenharmony_ci } else { 340362306a36Sopenharmony_ci if (pend->fence.error == -EIO) { 340462306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 340562306a36Sopenharmony_ci "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", 340662306a36Sopenharmony_ci pend->fence.error); 340762306a36Sopenharmony_ci rc = -EIO; 340862306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_ABORTED; 340962306a36Sopenharmony_ci } else { 341062306a36Sopenharmony_ci /* The wait has timed-out. We don't know anything beyond that 341162306a36Sopenharmony_ci * because the workload wasn't submitted through the driver. 341262306a36Sopenharmony_ci * Therefore, from driver's perspective, the workload is still 341362306a36Sopenharmony_ci * executing. 341462306a36Sopenharmony_ci */ 341562306a36Sopenharmony_ci rc = 0; 341662306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_BUSY; 341762306a36Sopenharmony_ci } 341862306a36Sopenharmony_ci } 341962306a36Sopenharmony_ci } 342062306a36Sopenharmony_ci 342162306a36Sopenharmony_ci /* 342262306a36Sopenharmony_ci * We keep removing the node from list here, and not at the irq handler 342362306a36Sopenharmony_ci * for completion timeout case. and if it's a registration 342462306a36Sopenharmony_ci * for ts record, the node will be deleted in the irq handler after 342562306a36Sopenharmony_ci * we reach the target value. 342662306a36Sopenharmony_ci */ 342762306a36Sopenharmony_ci spin_lock(&interrupt->wait_list_lock); 342862306a36Sopenharmony_ci list_del(&pend->wait_list_node); 342962306a36Sopenharmony_ci spin_unlock(&interrupt->wait_list_lock); 343062306a36Sopenharmony_ci 343162306a36Sopenharmony_ciset_timestamp: 343262306a36Sopenharmony_ci *timestamp = ktime_to_ns(pend->fence.timestamp); 343362306a36Sopenharmony_ci kfree(pend); 343462306a36Sopenharmony_ci hl_cb_put(cq_cb); 343562306a36Sopenharmony_cits_registration_exit: 343662306a36Sopenharmony_ci hl_ctx_put(ctx); 343762306a36Sopenharmony_ci 343862306a36Sopenharmony_ci return rc; 343962306a36Sopenharmony_ci 344062306a36Sopenharmony_ciput_ts_buff: 344162306a36Sopenharmony_ci hl_mmap_mem_buf_put(buf); 344262306a36Sopenharmony_ciput_cq_cb: 344362306a36Sopenharmony_ci hl_cb_put(cq_cb); 344462306a36Sopenharmony_ciput_ctx: 344562306a36Sopenharmony_ci hl_ctx_put(ctx); 344662306a36Sopenharmony_ci 344762306a36Sopenharmony_ci return rc; 344862306a36Sopenharmony_ci} 344962306a36Sopenharmony_ci 345062306a36Sopenharmony_cistatic int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx, 345162306a36Sopenharmony_ci u64 timeout_us, u64 user_address, 345262306a36Sopenharmony_ci u64 target_value, struct hl_user_interrupt *interrupt, 345362306a36Sopenharmony_ci u32 *status, 345462306a36Sopenharmony_ci u64 *timestamp) 345562306a36Sopenharmony_ci{ 345662306a36Sopenharmony_ci struct hl_user_pending_interrupt *pend; 345762306a36Sopenharmony_ci unsigned long timeout; 345862306a36Sopenharmony_ci u64 completion_value; 345962306a36Sopenharmony_ci long completion_rc; 346062306a36Sopenharmony_ci int rc = 0; 346162306a36Sopenharmony_ci 346262306a36Sopenharmony_ci timeout = hl_usecs64_to_jiffies(timeout_us); 346362306a36Sopenharmony_ci 346462306a36Sopenharmony_ci hl_ctx_get(ctx); 346562306a36Sopenharmony_ci 346662306a36Sopenharmony_ci pend = kzalloc(sizeof(*pend), GFP_KERNEL); 346762306a36Sopenharmony_ci if (!pend) { 346862306a36Sopenharmony_ci hl_ctx_put(ctx); 346962306a36Sopenharmony_ci return -ENOMEM; 347062306a36Sopenharmony_ci } 347162306a36Sopenharmony_ci 347262306a36Sopenharmony_ci hl_fence_init(&pend->fence, ULONG_MAX); 347362306a36Sopenharmony_ci 347462306a36Sopenharmony_ci /* Add pending user interrupt to relevant list for the interrupt 347562306a36Sopenharmony_ci * handler to monitor 347662306a36Sopenharmony_ci */ 347762306a36Sopenharmony_ci spin_lock(&interrupt->wait_list_lock); 347862306a36Sopenharmony_ci list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); 347962306a36Sopenharmony_ci spin_unlock(&interrupt->wait_list_lock); 348062306a36Sopenharmony_ci 348162306a36Sopenharmony_ci /* We check for completion value as interrupt could have been received 348262306a36Sopenharmony_ci * before we added the node to the wait list 348362306a36Sopenharmony_ci */ 348462306a36Sopenharmony_ci if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { 348562306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to copy completion value from user\n"); 348662306a36Sopenharmony_ci rc = -EFAULT; 348762306a36Sopenharmony_ci goto remove_pending_user_interrupt; 348862306a36Sopenharmony_ci } 348962306a36Sopenharmony_ci 349062306a36Sopenharmony_ci if (completion_value >= target_value) { 349162306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_COMPLETED; 349262306a36Sopenharmony_ci /* There was no interrupt, we assume the completion is now. */ 349362306a36Sopenharmony_ci pend->fence.timestamp = ktime_get(); 349462306a36Sopenharmony_ci } else { 349562306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_BUSY; 349662306a36Sopenharmony_ci } 349762306a36Sopenharmony_ci 349862306a36Sopenharmony_ci if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) 349962306a36Sopenharmony_ci goto remove_pending_user_interrupt; 350062306a36Sopenharmony_ci 350162306a36Sopenharmony_ciwait_again: 350262306a36Sopenharmony_ci /* Wait for interrupt handler to signal completion */ 350362306a36Sopenharmony_ci completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, 350462306a36Sopenharmony_ci timeout); 350562306a36Sopenharmony_ci 350662306a36Sopenharmony_ci /* If timeout did not expire we need to perform the comparison. 350762306a36Sopenharmony_ci * If comparison fails, keep waiting until timeout expires 350862306a36Sopenharmony_ci */ 350962306a36Sopenharmony_ci if (completion_rc > 0) { 351062306a36Sopenharmony_ci spin_lock(&interrupt->wait_list_lock); 351162306a36Sopenharmony_ci /* reinit_completion must be called before we check for user 351262306a36Sopenharmony_ci * completion value, otherwise, if interrupt is received after 351362306a36Sopenharmony_ci * the comparison and before the next wait_for_completion, 351462306a36Sopenharmony_ci * we will reach timeout and fail 351562306a36Sopenharmony_ci */ 351662306a36Sopenharmony_ci reinit_completion(&pend->fence.completion); 351762306a36Sopenharmony_ci spin_unlock(&interrupt->wait_list_lock); 351862306a36Sopenharmony_ci 351962306a36Sopenharmony_ci if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { 352062306a36Sopenharmony_ci dev_err(hdev->dev, "Failed to copy completion value from user\n"); 352162306a36Sopenharmony_ci rc = -EFAULT; 352262306a36Sopenharmony_ci 352362306a36Sopenharmony_ci goto remove_pending_user_interrupt; 352462306a36Sopenharmony_ci } 352562306a36Sopenharmony_ci 352662306a36Sopenharmony_ci if (completion_value >= target_value) { 352762306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_COMPLETED; 352862306a36Sopenharmony_ci } else if (pend->fence.error) { 352962306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 353062306a36Sopenharmony_ci "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", 353162306a36Sopenharmony_ci pend->fence.error); 353262306a36Sopenharmony_ci /* set the command completion status as ABORTED */ 353362306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_ABORTED; 353462306a36Sopenharmony_ci } else { 353562306a36Sopenharmony_ci timeout = completion_rc; 353662306a36Sopenharmony_ci goto wait_again; 353762306a36Sopenharmony_ci } 353862306a36Sopenharmony_ci } else if (completion_rc == -ERESTARTSYS) { 353962306a36Sopenharmony_ci dev_err_ratelimited(hdev->dev, 354062306a36Sopenharmony_ci "user process got signal while waiting for interrupt ID %d\n", 354162306a36Sopenharmony_ci interrupt->interrupt_id); 354262306a36Sopenharmony_ci rc = -EINTR; 354362306a36Sopenharmony_ci } else { 354462306a36Sopenharmony_ci /* The wait has timed-out. We don't know anything beyond that 354562306a36Sopenharmony_ci * because the workload wasn't submitted through the driver. 354662306a36Sopenharmony_ci * Therefore, from driver's perspective, the workload is still 354762306a36Sopenharmony_ci * executing. 354862306a36Sopenharmony_ci */ 354962306a36Sopenharmony_ci rc = 0; 355062306a36Sopenharmony_ci *status = HL_WAIT_CS_STATUS_BUSY; 355162306a36Sopenharmony_ci } 355262306a36Sopenharmony_ci 355362306a36Sopenharmony_ciremove_pending_user_interrupt: 355462306a36Sopenharmony_ci spin_lock(&interrupt->wait_list_lock); 355562306a36Sopenharmony_ci list_del(&pend->wait_list_node); 355662306a36Sopenharmony_ci spin_unlock(&interrupt->wait_list_lock); 355762306a36Sopenharmony_ci 355862306a36Sopenharmony_ci *timestamp = ktime_to_ns(pend->fence.timestamp); 355962306a36Sopenharmony_ci 356062306a36Sopenharmony_ci kfree(pend); 356162306a36Sopenharmony_ci hl_ctx_put(ctx); 356262306a36Sopenharmony_ci 356362306a36Sopenharmony_ci return rc; 356462306a36Sopenharmony_ci} 356562306a36Sopenharmony_ci 356662306a36Sopenharmony_cistatic int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) 356762306a36Sopenharmony_ci{ 356862306a36Sopenharmony_ci u16 interrupt_id, first_interrupt, last_interrupt; 356962306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 357062306a36Sopenharmony_ci struct asic_fixed_properties *prop; 357162306a36Sopenharmony_ci struct hl_user_interrupt *interrupt; 357262306a36Sopenharmony_ci union hl_wait_cs_args *args = data; 357362306a36Sopenharmony_ci u32 status = HL_WAIT_CS_STATUS_BUSY; 357462306a36Sopenharmony_ci u64 timestamp = 0; 357562306a36Sopenharmony_ci int rc, int_idx; 357662306a36Sopenharmony_ci 357762306a36Sopenharmony_ci prop = &hdev->asic_prop; 357862306a36Sopenharmony_ci 357962306a36Sopenharmony_ci if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) { 358062306a36Sopenharmony_ci dev_err(hdev->dev, "no user interrupts allowed"); 358162306a36Sopenharmony_ci return -EPERM; 358262306a36Sopenharmony_ci } 358362306a36Sopenharmony_ci 358462306a36Sopenharmony_ci interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); 358562306a36Sopenharmony_ci 358662306a36Sopenharmony_ci first_interrupt = prop->first_available_user_interrupt; 358762306a36Sopenharmony_ci last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1; 358862306a36Sopenharmony_ci 358962306a36Sopenharmony_ci if (interrupt_id < prop->user_dec_intr_count) { 359062306a36Sopenharmony_ci 359162306a36Sopenharmony_ci /* Check if the requested core is enabled */ 359262306a36Sopenharmony_ci if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) { 359362306a36Sopenharmony_ci dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed", 359462306a36Sopenharmony_ci interrupt_id); 359562306a36Sopenharmony_ci return -EINVAL; 359662306a36Sopenharmony_ci } 359762306a36Sopenharmony_ci 359862306a36Sopenharmony_ci interrupt = &hdev->user_interrupt[interrupt_id]; 359962306a36Sopenharmony_ci 360062306a36Sopenharmony_ci } else if (interrupt_id >= first_interrupt && interrupt_id <= last_interrupt) { 360162306a36Sopenharmony_ci 360262306a36Sopenharmony_ci int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count; 360362306a36Sopenharmony_ci interrupt = &hdev->user_interrupt[int_idx]; 360462306a36Sopenharmony_ci 360562306a36Sopenharmony_ci } else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) { 360662306a36Sopenharmony_ci interrupt = &hdev->common_user_cq_interrupt; 360762306a36Sopenharmony_ci } else if (interrupt_id == HL_COMMON_DEC_INTERRUPT_ID) { 360862306a36Sopenharmony_ci interrupt = &hdev->common_decoder_interrupt; 360962306a36Sopenharmony_ci } else { 361062306a36Sopenharmony_ci dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); 361162306a36Sopenharmony_ci return -EINVAL; 361262306a36Sopenharmony_ci } 361362306a36Sopenharmony_ci 361462306a36Sopenharmony_ci if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) 361562306a36Sopenharmony_ci rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr, 361662306a36Sopenharmony_ci args->in.interrupt_timeout_us, args->in.cq_counters_handle, 361762306a36Sopenharmony_ci args->in.cq_counters_offset, 361862306a36Sopenharmony_ci args->in.target, interrupt, 361962306a36Sopenharmony_ci !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), 362062306a36Sopenharmony_ci args->in.timestamp_handle, args->in.timestamp_offset, 362162306a36Sopenharmony_ci &status, ×tamp); 362262306a36Sopenharmony_ci else 362362306a36Sopenharmony_ci rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, 362462306a36Sopenharmony_ci args->in.interrupt_timeout_us, args->in.addr, 362562306a36Sopenharmony_ci args->in.target, interrupt, &status, 362662306a36Sopenharmony_ci ×tamp); 362762306a36Sopenharmony_ci if (rc) 362862306a36Sopenharmony_ci return rc; 362962306a36Sopenharmony_ci 363062306a36Sopenharmony_ci memset(args, 0, sizeof(*args)); 363162306a36Sopenharmony_ci args->out.status = status; 363262306a36Sopenharmony_ci 363362306a36Sopenharmony_ci if (timestamp) { 363462306a36Sopenharmony_ci args->out.timestamp_nsec = timestamp; 363562306a36Sopenharmony_ci args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 363662306a36Sopenharmony_ci } 363762306a36Sopenharmony_ci 363862306a36Sopenharmony_ci return 0; 363962306a36Sopenharmony_ci} 364062306a36Sopenharmony_ci 364162306a36Sopenharmony_ciint hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) 364262306a36Sopenharmony_ci{ 364362306a36Sopenharmony_ci struct hl_device *hdev = hpriv->hdev; 364462306a36Sopenharmony_ci union hl_wait_cs_args *args = data; 364562306a36Sopenharmony_ci u32 flags = args->in.flags; 364662306a36Sopenharmony_ci int rc; 364762306a36Sopenharmony_ci 364862306a36Sopenharmony_ci /* If the device is not operational, or if an error has happened and user should release the 364962306a36Sopenharmony_ci * device, there is no point in waiting for any command submission or user interrupt. 365062306a36Sopenharmony_ci */ 365162306a36Sopenharmony_ci if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active) 365262306a36Sopenharmony_ci return -EBUSY; 365362306a36Sopenharmony_ci 365462306a36Sopenharmony_ci if (flags & HL_WAIT_CS_FLAGS_INTERRUPT) 365562306a36Sopenharmony_ci rc = hl_interrupt_wait_ioctl(hpriv, data); 365662306a36Sopenharmony_ci else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS) 365762306a36Sopenharmony_ci rc = hl_multi_cs_wait_ioctl(hpriv, data); 365862306a36Sopenharmony_ci else 365962306a36Sopenharmony_ci rc = hl_cs_wait_ioctl(hpriv, data); 366062306a36Sopenharmony_ci 366162306a36Sopenharmony_ci return rc; 366262306a36Sopenharmony_ci} 3663