18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci/* 48c2ecf20Sopenharmony_ci * Copyright 2016-2019 HabanaLabs, Ltd. 58c2ecf20Sopenharmony_ci * All Rights Reserved. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include "habanalabs.h" 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include <linux/slab.h> 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci/** 138c2ecf20Sopenharmony_ci * struct hl_eqe_work - This structure is used to schedule work of EQ 148c2ecf20Sopenharmony_ci * entry and cpucp_reset event 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * @eq_work: workqueue object to run when EQ entry is received 178c2ecf20Sopenharmony_ci * @hdev: pointer to device structure 188c2ecf20Sopenharmony_ci * @eq_entry: copy of the EQ entry 198c2ecf20Sopenharmony_ci */ 208c2ecf20Sopenharmony_cistruct hl_eqe_work { 218c2ecf20Sopenharmony_ci struct work_struct eq_work; 228c2ecf20Sopenharmony_ci struct hl_device *hdev; 238c2ecf20Sopenharmony_ci struct hl_eq_entry eq_entry; 248c2ecf20Sopenharmony_ci}; 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci/** 278c2ecf20Sopenharmony_ci * hl_cq_inc_ptr - increment ci or pi of cq 288c2ecf20Sopenharmony_ci * 298c2ecf20Sopenharmony_ci * @ptr: the current ci or pi value of the completion queue 308c2ecf20Sopenharmony_ci * 318c2ecf20Sopenharmony_ci * Increment ptr by 1. If it reaches the number of completion queue 328c2ecf20Sopenharmony_ci * entries, set it to 0 338c2ecf20Sopenharmony_ci */ 348c2ecf20Sopenharmony_ciinline u32 hl_cq_inc_ptr(u32 ptr) 358c2ecf20Sopenharmony_ci{ 368c2ecf20Sopenharmony_ci ptr++; 378c2ecf20Sopenharmony_ci if (unlikely(ptr == HL_CQ_LENGTH)) 388c2ecf20Sopenharmony_ci ptr = 0; 398c2ecf20Sopenharmony_ci return ptr; 408c2ecf20Sopenharmony_ci} 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci/** 438c2ecf20Sopenharmony_ci * hl_eq_inc_ptr - increment ci of eq 448c2ecf20Sopenharmony_ci * 458c2ecf20Sopenharmony_ci * @ptr: the current ci value of the event queue 468c2ecf20Sopenharmony_ci * 478c2ecf20Sopenharmony_ci * Increment ptr by 1. If it reaches the number of event queue 488c2ecf20Sopenharmony_ci * entries, set it to 0 498c2ecf20Sopenharmony_ci */ 508c2ecf20Sopenharmony_ciinline u32 hl_eq_inc_ptr(u32 ptr) 518c2ecf20Sopenharmony_ci{ 528c2ecf20Sopenharmony_ci ptr++; 538c2ecf20Sopenharmony_ci if (unlikely(ptr == HL_EQ_LENGTH)) 548c2ecf20Sopenharmony_ci ptr = 0; 558c2ecf20Sopenharmony_ci return ptr; 568c2ecf20Sopenharmony_ci} 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_cistatic void irq_handle_eqe(struct work_struct *work) 598c2ecf20Sopenharmony_ci{ 608c2ecf20Sopenharmony_ci struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work, 618c2ecf20Sopenharmony_ci eq_work); 628c2ecf20Sopenharmony_ci struct hl_device *hdev = eqe_work->hdev; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry); 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci kfree(eqe_work); 678c2ecf20Sopenharmony_ci} 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci/** 708c2ecf20Sopenharmony_ci * hl_irq_handler_cq - irq handler for completion queue 718c2ecf20Sopenharmony_ci * 728c2ecf20Sopenharmony_ci * @irq: irq number 738c2ecf20Sopenharmony_ci * @arg: pointer to completion queue structure 748c2ecf20Sopenharmony_ci * 758c2ecf20Sopenharmony_ci */ 768c2ecf20Sopenharmony_ciirqreturn_t hl_irq_handler_cq(int irq, void *arg) 778c2ecf20Sopenharmony_ci{ 788c2ecf20Sopenharmony_ci struct hl_cq *cq = arg; 798c2ecf20Sopenharmony_ci struct hl_device *hdev = cq->hdev; 808c2ecf20Sopenharmony_ci struct hl_hw_queue *queue; 818c2ecf20Sopenharmony_ci struct hl_cs_job *job; 828c2ecf20Sopenharmony_ci bool shadow_index_valid; 838c2ecf20Sopenharmony_ci u16 shadow_index; 848c2ecf20Sopenharmony_ci struct hl_cq_entry *cq_entry, *cq_base; 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci if (hdev->disabled) { 878c2ecf20Sopenharmony_ci dev_dbg(hdev->dev, 888c2ecf20Sopenharmony_ci "Device disabled but received IRQ %d for CQ %d\n", 898c2ecf20Sopenharmony_ci irq, cq->hw_queue_id); 908c2ecf20Sopenharmony_ci return IRQ_HANDLED; 918c2ecf20Sopenharmony_ci } 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci cq_base = cq->kernel_address; 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci while (1) { 968c2ecf20Sopenharmony_ci bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) & 978c2ecf20Sopenharmony_ci CQ_ENTRY_READY_MASK) 988c2ecf20Sopenharmony_ci >> CQ_ENTRY_READY_SHIFT); 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci if (!entry_ready) 1018c2ecf20Sopenharmony_ci break; 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci]; 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci /* Make sure we read CQ entry contents after we've 1068c2ecf20Sopenharmony_ci * checked the ownership bit. 1078c2ecf20Sopenharmony_ci */ 1088c2ecf20Sopenharmony_ci dma_rmb(); 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci shadow_index_valid = ((le32_to_cpu(cq_entry->data) & 1118c2ecf20Sopenharmony_ci CQ_ENTRY_SHADOW_INDEX_VALID_MASK) 1128c2ecf20Sopenharmony_ci >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci shadow_index = (u16) ((le32_to_cpu(cq_entry->data) & 1158c2ecf20Sopenharmony_ci CQ_ENTRY_SHADOW_INDEX_MASK) 1168c2ecf20Sopenharmony_ci >> CQ_ENTRY_SHADOW_INDEX_SHIFT); 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci queue = &hdev->kernel_queues[cq->hw_queue_id]; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci if ((shadow_index_valid) && (!hdev->disabled)) { 1218c2ecf20Sopenharmony_ci job = queue->shadow_queue[hl_pi_2_offset(shadow_index)]; 1228c2ecf20Sopenharmony_ci queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work); 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci atomic_inc(&queue->ci); 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci /* Clear CQ entry ready bit */ 1288c2ecf20Sopenharmony_ci cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) & 1298c2ecf20Sopenharmony_ci ~CQ_ENTRY_READY_MASK); 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci cq->ci = hl_cq_inc_ptr(cq->ci); 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci /* Increment free slots */ 1348c2ecf20Sopenharmony_ci atomic_inc(&cq->free_slots_cnt); 1358c2ecf20Sopenharmony_ci } 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci return IRQ_HANDLED; 1388c2ecf20Sopenharmony_ci} 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci/** 1418c2ecf20Sopenharmony_ci * hl_irq_handler_eq - irq handler for event queue 1428c2ecf20Sopenharmony_ci * 1438c2ecf20Sopenharmony_ci * @irq: irq number 1448c2ecf20Sopenharmony_ci * @arg: pointer to event queue structure 1458c2ecf20Sopenharmony_ci * 1468c2ecf20Sopenharmony_ci */ 1478c2ecf20Sopenharmony_ciirqreturn_t hl_irq_handler_eq(int irq, void *arg) 1488c2ecf20Sopenharmony_ci{ 1498c2ecf20Sopenharmony_ci struct hl_eq *eq = arg; 1508c2ecf20Sopenharmony_ci struct hl_device *hdev = eq->hdev; 1518c2ecf20Sopenharmony_ci struct hl_eq_entry *eq_entry; 1528c2ecf20Sopenharmony_ci struct hl_eq_entry *eq_base; 1538c2ecf20Sopenharmony_ci struct hl_eqe_work *handle_eqe_work; 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci eq_base = eq->kernel_address; 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci while (1) { 1588c2ecf20Sopenharmony_ci bool entry_ready = 1598c2ecf20Sopenharmony_ci ((le32_to_cpu(eq_base[eq->ci].hdr.ctl) & 1608c2ecf20Sopenharmony_ci EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT); 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci if (!entry_ready) 1638c2ecf20Sopenharmony_ci break; 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci eq_entry = &eq_base[eq->ci]; 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci /* 1688c2ecf20Sopenharmony_ci * Make sure we read EQ entry contents after we've 1698c2ecf20Sopenharmony_ci * checked the ownership bit. 1708c2ecf20Sopenharmony_ci */ 1718c2ecf20Sopenharmony_ci dma_rmb(); 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci if (hdev->disabled) { 1748c2ecf20Sopenharmony_ci dev_warn(hdev->dev, 1758c2ecf20Sopenharmony_ci "Device disabled but received IRQ %d for EQ\n", 1768c2ecf20Sopenharmony_ci irq); 1778c2ecf20Sopenharmony_ci goto skip_irq; 1788c2ecf20Sopenharmony_ci } 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC); 1818c2ecf20Sopenharmony_ci if (handle_eqe_work) { 1828c2ecf20Sopenharmony_ci INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe); 1838c2ecf20Sopenharmony_ci handle_eqe_work->hdev = hdev; 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci memcpy(&handle_eqe_work->eq_entry, eq_entry, 1868c2ecf20Sopenharmony_ci sizeof(*eq_entry)); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci queue_work(hdev->eq_wq, &handle_eqe_work->eq_work); 1898c2ecf20Sopenharmony_ci } 1908c2ecf20Sopenharmony_ciskip_irq: 1918c2ecf20Sopenharmony_ci /* Clear EQ entry ready bit */ 1928c2ecf20Sopenharmony_ci eq_entry->hdr.ctl = 1938c2ecf20Sopenharmony_ci cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) & 1948c2ecf20Sopenharmony_ci ~EQ_CTL_READY_MASK); 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci eq->ci = hl_eq_inc_ptr(eq->ci); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci hdev->asic_funcs->update_eq_ci(hdev, eq->ci); 1998c2ecf20Sopenharmony_ci } 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci return IRQ_HANDLED; 2028c2ecf20Sopenharmony_ci} 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci/** 2058c2ecf20Sopenharmony_ci * hl_cq_init - main initialization function for an cq object 2068c2ecf20Sopenharmony_ci * 2078c2ecf20Sopenharmony_ci * @hdev: pointer to device structure 2088c2ecf20Sopenharmony_ci * @q: pointer to cq structure 2098c2ecf20Sopenharmony_ci * @hw_queue_id: The H/W queue ID this completion queue belongs to 2108c2ecf20Sopenharmony_ci * 2118c2ecf20Sopenharmony_ci * Allocate dma-able memory for the completion queue and initialize fields 2128c2ecf20Sopenharmony_ci * Returns 0 on success 2138c2ecf20Sopenharmony_ci */ 2148c2ecf20Sopenharmony_ciint hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) 2158c2ecf20Sopenharmony_ci{ 2168c2ecf20Sopenharmony_ci void *p; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES, 2198c2ecf20Sopenharmony_ci &q->bus_address, GFP_KERNEL | __GFP_ZERO); 2208c2ecf20Sopenharmony_ci if (!p) 2218c2ecf20Sopenharmony_ci return -ENOMEM; 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci q->hdev = hdev; 2248c2ecf20Sopenharmony_ci q->kernel_address = p; 2258c2ecf20Sopenharmony_ci q->hw_queue_id = hw_queue_id; 2268c2ecf20Sopenharmony_ci q->ci = 0; 2278c2ecf20Sopenharmony_ci q->pi = 0; 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci return 0; 2328c2ecf20Sopenharmony_ci} 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci/** 2358c2ecf20Sopenharmony_ci * hl_cq_fini - destroy completion queue 2368c2ecf20Sopenharmony_ci * 2378c2ecf20Sopenharmony_ci * @hdev: pointer to device structure 2388c2ecf20Sopenharmony_ci * @q: pointer to cq structure 2398c2ecf20Sopenharmony_ci * 2408c2ecf20Sopenharmony_ci * Free the completion queue memory 2418c2ecf20Sopenharmony_ci */ 2428c2ecf20Sopenharmony_civoid hl_cq_fini(struct hl_device *hdev, struct hl_cq *q) 2438c2ecf20Sopenharmony_ci{ 2448c2ecf20Sopenharmony_ci hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, 2458c2ecf20Sopenharmony_ci q->kernel_address, 2468c2ecf20Sopenharmony_ci q->bus_address); 2478c2ecf20Sopenharmony_ci} 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_civoid hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) 2508c2ecf20Sopenharmony_ci{ 2518c2ecf20Sopenharmony_ci q->ci = 0; 2528c2ecf20Sopenharmony_ci q->pi = 0; 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci /* 2578c2ecf20Sopenharmony_ci * It's not enough to just reset the PI/CI because the H/W may have 2588c2ecf20Sopenharmony_ci * written valid completion entries before it was halted and therefore 2598c2ecf20Sopenharmony_ci * we need to clean the actual queues so we won't process old entries 2608c2ecf20Sopenharmony_ci * when the device is operational again 2618c2ecf20Sopenharmony_ci */ 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci memset(q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES); 2648c2ecf20Sopenharmony_ci} 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci/** 2678c2ecf20Sopenharmony_ci * hl_eq_init - main initialization function for an event queue object 2688c2ecf20Sopenharmony_ci * 2698c2ecf20Sopenharmony_ci * @hdev: pointer to device structure 2708c2ecf20Sopenharmony_ci * @q: pointer to eq structure 2718c2ecf20Sopenharmony_ci * 2728c2ecf20Sopenharmony_ci * Allocate dma-able memory for the event queue and initialize fields 2738c2ecf20Sopenharmony_ci * Returns 0 on success 2748c2ecf20Sopenharmony_ci */ 2758c2ecf20Sopenharmony_ciint hl_eq_init(struct hl_device *hdev, struct hl_eq *q) 2768c2ecf20Sopenharmony_ci{ 2778c2ecf20Sopenharmony_ci void *p; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, 2808c2ecf20Sopenharmony_ci HL_EQ_SIZE_IN_BYTES, 2818c2ecf20Sopenharmony_ci &q->bus_address); 2828c2ecf20Sopenharmony_ci if (!p) 2838c2ecf20Sopenharmony_ci return -ENOMEM; 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci q->hdev = hdev; 2868c2ecf20Sopenharmony_ci q->kernel_address = p; 2878c2ecf20Sopenharmony_ci q->ci = 0; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci return 0; 2908c2ecf20Sopenharmony_ci} 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci/** 2938c2ecf20Sopenharmony_ci * hl_eq_fini - destroy event queue 2948c2ecf20Sopenharmony_ci * 2958c2ecf20Sopenharmony_ci * @hdev: pointer to device structure 2968c2ecf20Sopenharmony_ci * @q: pointer to eq structure 2978c2ecf20Sopenharmony_ci * 2988c2ecf20Sopenharmony_ci * Free the event queue memory 2998c2ecf20Sopenharmony_ci */ 3008c2ecf20Sopenharmony_civoid hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) 3018c2ecf20Sopenharmony_ci{ 3028c2ecf20Sopenharmony_ci flush_workqueue(hdev->eq_wq); 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, 3058c2ecf20Sopenharmony_ci HL_EQ_SIZE_IN_BYTES, 3068c2ecf20Sopenharmony_ci q->kernel_address); 3078c2ecf20Sopenharmony_ci} 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_civoid hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) 3108c2ecf20Sopenharmony_ci{ 3118c2ecf20Sopenharmony_ci q->ci = 0; 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci /* 3148c2ecf20Sopenharmony_ci * It's not enough to just reset the PI/CI because the H/W may have 3158c2ecf20Sopenharmony_ci * written valid completion entries before it was halted and therefore 3168c2ecf20Sopenharmony_ci * we need to clean the actual queues so we won't process old entries 3178c2ecf20Sopenharmony_ci * when the device is operational again 3188c2ecf20Sopenharmony_ci */ 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); 3218c2ecf20Sopenharmony_ci} 322