18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci
38c2ecf20Sopenharmony_ci/*
48c2ecf20Sopenharmony_ci * Copyright 2016-2019 HabanaLabs, Ltd.
58c2ecf20Sopenharmony_ci * All Rights Reserved.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include "habanalabs.h"
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#include <linux/slab.h>
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci/**
138c2ecf20Sopenharmony_ci * struct hl_eqe_work - This structure is used to schedule work of EQ
148c2ecf20Sopenharmony_ci *                      entry and cpucp_reset event
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci * @eq_work:          workqueue object to run when EQ entry is received
178c2ecf20Sopenharmony_ci * @hdev:             pointer to device structure
188c2ecf20Sopenharmony_ci * @eq_entry:         copy of the EQ entry
198c2ecf20Sopenharmony_ci */
208c2ecf20Sopenharmony_cistruct hl_eqe_work {
218c2ecf20Sopenharmony_ci	struct work_struct	eq_work;
228c2ecf20Sopenharmony_ci	struct hl_device	*hdev;
238c2ecf20Sopenharmony_ci	struct hl_eq_entry	eq_entry;
248c2ecf20Sopenharmony_ci};
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci/**
278c2ecf20Sopenharmony_ci * hl_cq_inc_ptr - increment ci or pi of cq
288c2ecf20Sopenharmony_ci *
298c2ecf20Sopenharmony_ci * @ptr: the current ci or pi value of the completion queue
308c2ecf20Sopenharmony_ci *
318c2ecf20Sopenharmony_ci * Increment ptr by 1. If it reaches the number of completion queue
328c2ecf20Sopenharmony_ci * entries, set it to 0
338c2ecf20Sopenharmony_ci */
348c2ecf20Sopenharmony_ciinline u32 hl_cq_inc_ptr(u32 ptr)
358c2ecf20Sopenharmony_ci{
368c2ecf20Sopenharmony_ci	ptr++;
378c2ecf20Sopenharmony_ci	if (unlikely(ptr == HL_CQ_LENGTH))
388c2ecf20Sopenharmony_ci		ptr = 0;
398c2ecf20Sopenharmony_ci	return ptr;
408c2ecf20Sopenharmony_ci}
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci/**
438c2ecf20Sopenharmony_ci * hl_eq_inc_ptr - increment ci of eq
448c2ecf20Sopenharmony_ci *
458c2ecf20Sopenharmony_ci * @ptr: the current ci value of the event queue
468c2ecf20Sopenharmony_ci *
478c2ecf20Sopenharmony_ci * Increment ptr by 1. If it reaches the number of event queue
488c2ecf20Sopenharmony_ci * entries, set it to 0
498c2ecf20Sopenharmony_ci */
508c2ecf20Sopenharmony_ciinline u32 hl_eq_inc_ptr(u32 ptr)
518c2ecf20Sopenharmony_ci{
528c2ecf20Sopenharmony_ci	ptr++;
538c2ecf20Sopenharmony_ci	if (unlikely(ptr == HL_EQ_LENGTH))
548c2ecf20Sopenharmony_ci		ptr = 0;
558c2ecf20Sopenharmony_ci	return ptr;
568c2ecf20Sopenharmony_ci}
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_cistatic void irq_handle_eqe(struct work_struct *work)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work,
618c2ecf20Sopenharmony_ci							eq_work);
628c2ecf20Sopenharmony_ci	struct hl_device *hdev = eqe_work->hdev;
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry);
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	kfree(eqe_work);
678c2ecf20Sopenharmony_ci}
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci/**
708c2ecf20Sopenharmony_ci * hl_irq_handler_cq - irq handler for completion queue
718c2ecf20Sopenharmony_ci *
728c2ecf20Sopenharmony_ci * @irq: irq number
738c2ecf20Sopenharmony_ci * @arg: pointer to completion queue structure
748c2ecf20Sopenharmony_ci *
758c2ecf20Sopenharmony_ci */
768c2ecf20Sopenharmony_ciirqreturn_t hl_irq_handler_cq(int irq, void *arg)
778c2ecf20Sopenharmony_ci{
788c2ecf20Sopenharmony_ci	struct hl_cq *cq = arg;
798c2ecf20Sopenharmony_ci	struct hl_device *hdev = cq->hdev;
808c2ecf20Sopenharmony_ci	struct hl_hw_queue *queue;
818c2ecf20Sopenharmony_ci	struct hl_cs_job *job;
828c2ecf20Sopenharmony_ci	bool shadow_index_valid;
838c2ecf20Sopenharmony_ci	u16 shadow_index;
848c2ecf20Sopenharmony_ci	struct hl_cq_entry *cq_entry, *cq_base;
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	if (hdev->disabled) {
878c2ecf20Sopenharmony_ci		dev_dbg(hdev->dev,
888c2ecf20Sopenharmony_ci			"Device disabled but received IRQ %d for CQ %d\n",
898c2ecf20Sopenharmony_ci			irq, cq->hw_queue_id);
908c2ecf20Sopenharmony_ci		return IRQ_HANDLED;
918c2ecf20Sopenharmony_ci	}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	cq_base = cq->kernel_address;
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	while (1) {
968c2ecf20Sopenharmony_ci		bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) &
978c2ecf20Sopenharmony_ci					CQ_ENTRY_READY_MASK)
988c2ecf20Sopenharmony_ci						>> CQ_ENTRY_READY_SHIFT);
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci		if (!entry_ready)
1018c2ecf20Sopenharmony_ci			break;
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci		cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci];
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci		/* Make sure we read CQ entry contents after we've
1068c2ecf20Sopenharmony_ci		 * checked the ownership bit.
1078c2ecf20Sopenharmony_ci		 */
1088c2ecf20Sopenharmony_ci		dma_rmb();
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci		shadow_index_valid = ((le32_to_cpu(cq_entry->data) &
1118c2ecf20Sopenharmony_ci					CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
1128c2ecf20Sopenharmony_ci					>> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci		shadow_index = (u16) ((le32_to_cpu(cq_entry->data) &
1158c2ecf20Sopenharmony_ci					CQ_ENTRY_SHADOW_INDEX_MASK)
1168c2ecf20Sopenharmony_ci					>> CQ_ENTRY_SHADOW_INDEX_SHIFT);
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci		queue = &hdev->kernel_queues[cq->hw_queue_id];
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci		if ((shadow_index_valid) && (!hdev->disabled)) {
1218c2ecf20Sopenharmony_ci			job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
1228c2ecf20Sopenharmony_ci			queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
1238c2ecf20Sopenharmony_ci		}
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci		atomic_inc(&queue->ci);
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci		/* Clear CQ entry ready bit */
1288c2ecf20Sopenharmony_ci		cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
1298c2ecf20Sopenharmony_ci						~CQ_ENTRY_READY_MASK);
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci		cq->ci = hl_cq_inc_ptr(cq->ci);
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci		/* Increment free slots */
1348c2ecf20Sopenharmony_ci		atomic_inc(&cq->free_slots_cnt);
1358c2ecf20Sopenharmony_ci	}
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	return IRQ_HANDLED;
1388c2ecf20Sopenharmony_ci}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci/**
1418c2ecf20Sopenharmony_ci * hl_irq_handler_eq - irq handler for event queue
1428c2ecf20Sopenharmony_ci *
1438c2ecf20Sopenharmony_ci * @irq: irq number
1448c2ecf20Sopenharmony_ci * @arg: pointer to event queue structure
1458c2ecf20Sopenharmony_ci *
1468c2ecf20Sopenharmony_ci */
1478c2ecf20Sopenharmony_ciirqreturn_t hl_irq_handler_eq(int irq, void *arg)
1488c2ecf20Sopenharmony_ci{
1498c2ecf20Sopenharmony_ci	struct hl_eq *eq = arg;
1508c2ecf20Sopenharmony_ci	struct hl_device *hdev = eq->hdev;
1518c2ecf20Sopenharmony_ci	struct hl_eq_entry *eq_entry;
1528c2ecf20Sopenharmony_ci	struct hl_eq_entry *eq_base;
1538c2ecf20Sopenharmony_ci	struct hl_eqe_work *handle_eqe_work;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	eq_base = eq->kernel_address;
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	while (1) {
1588c2ecf20Sopenharmony_ci		bool entry_ready =
1598c2ecf20Sopenharmony_ci			((le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
1608c2ecf20Sopenharmony_ci				EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT);
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci		if (!entry_ready)
1638c2ecf20Sopenharmony_ci			break;
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci		eq_entry = &eq_base[eq->ci];
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci		/*
1688c2ecf20Sopenharmony_ci		 * Make sure we read EQ entry contents after we've
1698c2ecf20Sopenharmony_ci		 * checked the ownership bit.
1708c2ecf20Sopenharmony_ci		 */
1718c2ecf20Sopenharmony_ci		dma_rmb();
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci		if (hdev->disabled) {
1748c2ecf20Sopenharmony_ci			dev_warn(hdev->dev,
1758c2ecf20Sopenharmony_ci				"Device disabled but received IRQ %d for EQ\n",
1768c2ecf20Sopenharmony_ci					irq);
1778c2ecf20Sopenharmony_ci			goto skip_irq;
1788c2ecf20Sopenharmony_ci		}
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci		handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC);
1818c2ecf20Sopenharmony_ci		if (handle_eqe_work) {
1828c2ecf20Sopenharmony_ci			INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe);
1838c2ecf20Sopenharmony_ci			handle_eqe_work->hdev = hdev;
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci			memcpy(&handle_eqe_work->eq_entry, eq_entry,
1868c2ecf20Sopenharmony_ci					sizeof(*eq_entry));
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci			queue_work(hdev->eq_wq, &handle_eqe_work->eq_work);
1898c2ecf20Sopenharmony_ci		}
1908c2ecf20Sopenharmony_ciskip_irq:
1918c2ecf20Sopenharmony_ci		/* Clear EQ entry ready bit */
1928c2ecf20Sopenharmony_ci		eq_entry->hdr.ctl =
1938c2ecf20Sopenharmony_ci			cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) &
1948c2ecf20Sopenharmony_ci							~EQ_CTL_READY_MASK);
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci		eq->ci = hl_eq_inc_ptr(eq->ci);
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci		hdev->asic_funcs->update_eq_ci(hdev, eq->ci);
1998c2ecf20Sopenharmony_ci	}
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	return IRQ_HANDLED;
2028c2ecf20Sopenharmony_ci}
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci/**
2058c2ecf20Sopenharmony_ci * hl_cq_init - main initialization function for an cq object
2068c2ecf20Sopenharmony_ci *
2078c2ecf20Sopenharmony_ci * @hdev: pointer to device structure
2088c2ecf20Sopenharmony_ci * @q: pointer to cq structure
2098c2ecf20Sopenharmony_ci * @hw_queue_id: The H/W queue ID this completion queue belongs to
2108c2ecf20Sopenharmony_ci *
2118c2ecf20Sopenharmony_ci * Allocate dma-able memory for the completion queue and initialize fields
2128c2ecf20Sopenharmony_ci * Returns 0 on success
2138c2ecf20Sopenharmony_ci */
2148c2ecf20Sopenharmony_ciint hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
2158c2ecf20Sopenharmony_ci{
2168c2ecf20Sopenharmony_ci	void *p;
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
2198c2ecf20Sopenharmony_ci				&q->bus_address, GFP_KERNEL | __GFP_ZERO);
2208c2ecf20Sopenharmony_ci	if (!p)
2218c2ecf20Sopenharmony_ci		return -ENOMEM;
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	q->hdev = hdev;
2248c2ecf20Sopenharmony_ci	q->kernel_address = p;
2258c2ecf20Sopenharmony_ci	q->hw_queue_id = hw_queue_id;
2268c2ecf20Sopenharmony_ci	q->ci = 0;
2278c2ecf20Sopenharmony_ci	q->pi = 0;
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	return 0;
2328c2ecf20Sopenharmony_ci}
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci/**
2358c2ecf20Sopenharmony_ci * hl_cq_fini - destroy completion queue
2368c2ecf20Sopenharmony_ci *
2378c2ecf20Sopenharmony_ci * @hdev: pointer to device structure
2388c2ecf20Sopenharmony_ci * @q: pointer to cq structure
2398c2ecf20Sopenharmony_ci *
2408c2ecf20Sopenharmony_ci * Free the completion queue memory
2418c2ecf20Sopenharmony_ci */
2428c2ecf20Sopenharmony_civoid hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
2438c2ecf20Sopenharmony_ci{
2448c2ecf20Sopenharmony_ci	hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
2458c2ecf20Sopenharmony_ci						 q->kernel_address,
2468c2ecf20Sopenharmony_ci						 q->bus_address);
2478c2ecf20Sopenharmony_ci}
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_civoid hl_cq_reset(struct hl_device *hdev, struct hl_cq *q)
2508c2ecf20Sopenharmony_ci{
2518c2ecf20Sopenharmony_ci	q->ci = 0;
2528c2ecf20Sopenharmony_ci	q->pi = 0;
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci	atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	/*
2578c2ecf20Sopenharmony_ci	 * It's not enough to just reset the PI/CI because the H/W may have
2588c2ecf20Sopenharmony_ci	 * written valid completion entries before it was halted and therefore
2598c2ecf20Sopenharmony_ci	 * we need to clean the actual queues so we won't process old entries
2608c2ecf20Sopenharmony_ci	 * when the device is operational again
2618c2ecf20Sopenharmony_ci	 */
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	memset(q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES);
2648c2ecf20Sopenharmony_ci}
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci/**
2678c2ecf20Sopenharmony_ci * hl_eq_init - main initialization function for an event queue object
2688c2ecf20Sopenharmony_ci *
2698c2ecf20Sopenharmony_ci * @hdev: pointer to device structure
2708c2ecf20Sopenharmony_ci * @q: pointer to eq structure
2718c2ecf20Sopenharmony_ci *
2728c2ecf20Sopenharmony_ci * Allocate dma-able memory for the event queue and initialize fields
2738c2ecf20Sopenharmony_ci * Returns 0 on success
2748c2ecf20Sopenharmony_ci */
2758c2ecf20Sopenharmony_ciint hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
2768c2ecf20Sopenharmony_ci{
2778c2ecf20Sopenharmony_ci	void *p;
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci	p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
2808c2ecf20Sopenharmony_ci							HL_EQ_SIZE_IN_BYTES,
2818c2ecf20Sopenharmony_ci							&q->bus_address);
2828c2ecf20Sopenharmony_ci	if (!p)
2838c2ecf20Sopenharmony_ci		return -ENOMEM;
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci	q->hdev = hdev;
2868c2ecf20Sopenharmony_ci	q->kernel_address = p;
2878c2ecf20Sopenharmony_ci	q->ci = 0;
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci	return 0;
2908c2ecf20Sopenharmony_ci}
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci/**
2938c2ecf20Sopenharmony_ci * hl_eq_fini - destroy event queue
2948c2ecf20Sopenharmony_ci *
2958c2ecf20Sopenharmony_ci * @hdev: pointer to device structure
2968c2ecf20Sopenharmony_ci * @q: pointer to eq structure
2978c2ecf20Sopenharmony_ci *
2988c2ecf20Sopenharmony_ci * Free the event queue memory
2998c2ecf20Sopenharmony_ci */
3008c2ecf20Sopenharmony_civoid hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
3018c2ecf20Sopenharmony_ci{
3028c2ecf20Sopenharmony_ci	flush_workqueue(hdev->eq_wq);
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
3058c2ecf20Sopenharmony_ci					HL_EQ_SIZE_IN_BYTES,
3068c2ecf20Sopenharmony_ci					q->kernel_address);
3078c2ecf20Sopenharmony_ci}
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_civoid hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
3108c2ecf20Sopenharmony_ci{
3118c2ecf20Sopenharmony_ci	q->ci = 0;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	/*
3148c2ecf20Sopenharmony_ci	 * It's not enough to just reset the PI/CI because the H/W may have
3158c2ecf20Sopenharmony_ci	 * written valid completion entries before it was halted and therefore
3168c2ecf20Sopenharmony_ci	 * we need to clean the actual queues so we won't process old entries
3178c2ecf20Sopenharmony_ci	 * when the device is operational again
3188c2ecf20Sopenharmony_ci	 */
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci	memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES);
3218c2ecf20Sopenharmony_ci}
322