1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2019 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "goyaP.h"
9#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_0.h"
11#include "../include/goya/asic_reg/goya_masks.h"
12#include "../include/goya/goya_reg_map.h"
13
14#include <linux/pci.h>
15#include <linux/genalloc.h>
16#include <linux/hwmon.h>
17#include <linux/io-64-nonatomic-lo-hi.h>
18#include <linux/iommu.h>
19#include <linux/seq_file.h>
20
21/*
22 * GOYA security scheme:
23 *
24 * 1. Host is protected by:
25 *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
26 *        - MMU
27 *
28 * 2. DRAM is protected by:
29 *        - Range registers (protect the first 512MB)
30 *        - MMU (isolation between users)
31 *
32 * 3. Configuration is protected by:
33 *        - Range registers
34 *        - Protection bits
35 *
36 * When MMU is disabled:
37 *
38 * QMAN DMA: PQ, CQ, CP, DMA are secured.
39 * PQ, CB and the data are on the host.
40 *
41 * QMAN TPC/MME:
42 * PQ, CQ and CP are not secured.
43 * PQ, CB and the data are on the SRAM/DRAM.
44 *
45 * Since QMAN DMA is secured, the driver is parsing the DMA CB:
46 *     - checks DMA pointer
47 *     - WREG, MSG_PROT are not allowed.
48 *     - MSG_LONG/SHORT are allowed.
49 *
50 * A read/write transaction by the QMAN to a protected area will succeed if
51 * and only if the QMAN's CP is secured and MSG_PROT is used
52 *
53 *
54 * When MMU is enabled:
55 *
56 * QMAN DMA: PQ, CQ and CP are secured.
57 * MMU is set to bypass on the Secure props register of the QMAN.
58 * The reasons we don't enable MMU for PQ, CQ and CP are:
59 *     - PQ entry is in kernel address space and the driver doesn't map it.
60 *     - CP writes to MSIX register and to kernel address space (completion
61 *       queue).
62 *
63 * DMA is not secured but because CP is secured, the driver still needs to parse
64 * the CB, but doesn't need to check the DMA addresses.
65 *
66 * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
67 * the driver doesn't map memory in MMU.
68 *
69 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
70 *
71 * DMA RR does NOT protect host because DMA is not secured
72 *
73 */
74
75#define GOYA_BOOT_FIT_FILE	"habanalabs/goya/goya-boot-fit.itb"
76#define GOYA_LINUX_FW_FILE	"habanalabs/goya/goya-fit.itb"
77
78#define GOYA_MMU_REGS_NUM		63
79
80#define GOYA_DMA_POOL_BLK_SIZE		0x100		/* 256 bytes */
81
82#define GOYA_RESET_TIMEOUT_MSEC		500		/* 500ms */
83#define GOYA_PLDM_RESET_TIMEOUT_MSEC	20000		/* 20s */
84#define GOYA_RESET_WAIT_MSEC		1		/* 1ms */
85#define GOYA_CPU_RESET_WAIT_MSEC	100		/* 100ms */
86#define GOYA_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
87#define GOYA_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
88#define GOYA_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
89#define GOYA_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
90#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
91#define GOYA_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
92
93#define GOYA_QMAN0_FENCE_VAL		0xD169B243
94
95#define GOYA_MAX_STRING_LEN		20
96
97#define GOYA_CB_POOL_CB_CNT		512
98#define GOYA_CB_POOL_CB_SIZE		0x20000		/* 128KB */
99
100#define IS_QM_IDLE(engine, qm_glbl_sts0) \
101	(((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
102#define IS_DMA_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(DMA, qm_glbl_sts0)
103#define IS_TPC_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(TPC, qm_glbl_sts0)
104#define IS_MME_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(MME, qm_glbl_sts0)
105
106#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
107	(((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
108			engine##_CMDQ_IDLE_MASK)
109#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
110	IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
111#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
112	IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
113
114#define IS_DMA_IDLE(dma_core_sts0) \
115	!((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
116
117#define IS_TPC_IDLE(tpc_cfg_sts) \
118	(((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
119
120#define IS_MME_IDLE(mme_arch_sts) \
121	(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
122
123
124static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
125		"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
126		"goya cq 4", "goya cpu eq"
127};
128
129static u16 goya_packet_sizes[MAX_PACKET_ID] = {
130	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
131	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
132	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
133	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
134	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
135	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
136	[PACKET_FENCE]		= sizeof(struct packet_fence),
137	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
138	[PACKET_NOP]		= sizeof(struct packet_nop),
139	[PACKET_STOP]		= sizeof(struct packet_stop)
140};
141
142static inline bool validate_packet_id(enum packet_id id)
143{
144	switch (id) {
145	case PACKET_WREG_32:
146	case PACKET_WREG_BULK:
147	case PACKET_MSG_LONG:
148	case PACKET_MSG_SHORT:
149	case PACKET_CP_DMA:
150	case PACKET_MSG_PROT:
151	case PACKET_FENCE:
152	case PACKET_LIN_DMA:
153	case PACKET_NOP:
154	case PACKET_STOP:
155		return true;
156	default:
157		return false;
158	}
159}
160
161static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
162	mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
163	mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
164	mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
165	mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
166	mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
167	mmTPC0_QM_GLBL_SECURE_PROPS,
168	mmTPC0_QM_GLBL_NON_SECURE_PROPS,
169	mmTPC0_CMDQ_GLBL_SECURE_PROPS,
170	mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
171	mmTPC0_CFG_ARUSER,
172	mmTPC0_CFG_AWUSER,
173	mmTPC1_QM_GLBL_SECURE_PROPS,
174	mmTPC1_QM_GLBL_NON_SECURE_PROPS,
175	mmTPC1_CMDQ_GLBL_SECURE_PROPS,
176	mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
177	mmTPC1_CFG_ARUSER,
178	mmTPC1_CFG_AWUSER,
179	mmTPC2_QM_GLBL_SECURE_PROPS,
180	mmTPC2_QM_GLBL_NON_SECURE_PROPS,
181	mmTPC2_CMDQ_GLBL_SECURE_PROPS,
182	mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
183	mmTPC2_CFG_ARUSER,
184	mmTPC2_CFG_AWUSER,
185	mmTPC3_QM_GLBL_SECURE_PROPS,
186	mmTPC3_QM_GLBL_NON_SECURE_PROPS,
187	mmTPC3_CMDQ_GLBL_SECURE_PROPS,
188	mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
189	mmTPC3_CFG_ARUSER,
190	mmTPC3_CFG_AWUSER,
191	mmTPC4_QM_GLBL_SECURE_PROPS,
192	mmTPC4_QM_GLBL_NON_SECURE_PROPS,
193	mmTPC4_CMDQ_GLBL_SECURE_PROPS,
194	mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
195	mmTPC4_CFG_ARUSER,
196	mmTPC4_CFG_AWUSER,
197	mmTPC5_QM_GLBL_SECURE_PROPS,
198	mmTPC5_QM_GLBL_NON_SECURE_PROPS,
199	mmTPC5_CMDQ_GLBL_SECURE_PROPS,
200	mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
201	mmTPC5_CFG_ARUSER,
202	mmTPC5_CFG_AWUSER,
203	mmTPC6_QM_GLBL_SECURE_PROPS,
204	mmTPC6_QM_GLBL_NON_SECURE_PROPS,
205	mmTPC6_CMDQ_GLBL_SECURE_PROPS,
206	mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
207	mmTPC6_CFG_ARUSER,
208	mmTPC6_CFG_AWUSER,
209	mmTPC7_QM_GLBL_SECURE_PROPS,
210	mmTPC7_QM_GLBL_NON_SECURE_PROPS,
211	mmTPC7_CMDQ_GLBL_SECURE_PROPS,
212	mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
213	mmTPC7_CFG_ARUSER,
214	mmTPC7_CFG_AWUSER,
215	mmMME_QM_GLBL_SECURE_PROPS,
216	mmMME_QM_GLBL_NON_SECURE_PROPS,
217	mmMME_CMDQ_GLBL_SECURE_PROPS,
218	mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
219	mmMME_SBA_CONTROL_DATA,
220	mmMME_SBB_CONTROL_DATA,
221	mmMME_SBC_CONTROL_DATA,
222	mmMME_WBC_CONTROL_DATA,
223	mmPCIE_WRAP_PSOC_ARUSER,
224	mmPCIE_WRAP_PSOC_AWUSER
225};
226
227static u32 goya_all_events[] = {
228	GOYA_ASYNC_EVENT_ID_PCIE_IF,
229	GOYA_ASYNC_EVENT_ID_TPC0_ECC,
230	GOYA_ASYNC_EVENT_ID_TPC1_ECC,
231	GOYA_ASYNC_EVENT_ID_TPC2_ECC,
232	GOYA_ASYNC_EVENT_ID_TPC3_ECC,
233	GOYA_ASYNC_EVENT_ID_TPC4_ECC,
234	GOYA_ASYNC_EVENT_ID_TPC5_ECC,
235	GOYA_ASYNC_EVENT_ID_TPC6_ECC,
236	GOYA_ASYNC_EVENT_ID_TPC7_ECC,
237	GOYA_ASYNC_EVENT_ID_MME_ECC,
238	GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
239	GOYA_ASYNC_EVENT_ID_MMU_ECC,
240	GOYA_ASYNC_EVENT_ID_DMA_MACRO,
241	GOYA_ASYNC_EVENT_ID_DMA_ECC,
242	GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
243	GOYA_ASYNC_EVENT_ID_PSOC_MEM,
244	GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
245	GOYA_ASYNC_EVENT_ID_SRAM0,
246	GOYA_ASYNC_EVENT_ID_SRAM1,
247	GOYA_ASYNC_EVENT_ID_SRAM2,
248	GOYA_ASYNC_EVENT_ID_SRAM3,
249	GOYA_ASYNC_EVENT_ID_SRAM4,
250	GOYA_ASYNC_EVENT_ID_SRAM5,
251	GOYA_ASYNC_EVENT_ID_SRAM6,
252	GOYA_ASYNC_EVENT_ID_SRAM7,
253	GOYA_ASYNC_EVENT_ID_SRAM8,
254	GOYA_ASYNC_EVENT_ID_SRAM9,
255	GOYA_ASYNC_EVENT_ID_SRAM10,
256	GOYA_ASYNC_EVENT_ID_SRAM11,
257	GOYA_ASYNC_EVENT_ID_SRAM12,
258	GOYA_ASYNC_EVENT_ID_SRAM13,
259	GOYA_ASYNC_EVENT_ID_SRAM14,
260	GOYA_ASYNC_EVENT_ID_SRAM15,
261	GOYA_ASYNC_EVENT_ID_SRAM16,
262	GOYA_ASYNC_EVENT_ID_SRAM17,
263	GOYA_ASYNC_EVENT_ID_SRAM18,
264	GOYA_ASYNC_EVENT_ID_SRAM19,
265	GOYA_ASYNC_EVENT_ID_SRAM20,
266	GOYA_ASYNC_EVENT_ID_SRAM21,
267	GOYA_ASYNC_EVENT_ID_SRAM22,
268	GOYA_ASYNC_EVENT_ID_SRAM23,
269	GOYA_ASYNC_EVENT_ID_SRAM24,
270	GOYA_ASYNC_EVENT_ID_SRAM25,
271	GOYA_ASYNC_EVENT_ID_SRAM26,
272	GOYA_ASYNC_EVENT_ID_SRAM27,
273	GOYA_ASYNC_EVENT_ID_SRAM28,
274	GOYA_ASYNC_EVENT_ID_SRAM29,
275	GOYA_ASYNC_EVENT_ID_GIC500,
276	GOYA_ASYNC_EVENT_ID_PLL0,
277	GOYA_ASYNC_EVENT_ID_PLL1,
278	GOYA_ASYNC_EVENT_ID_PLL3,
279	GOYA_ASYNC_EVENT_ID_PLL4,
280	GOYA_ASYNC_EVENT_ID_PLL5,
281	GOYA_ASYNC_EVENT_ID_PLL6,
282	GOYA_ASYNC_EVENT_ID_AXI_ECC,
283	GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
284	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
285	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
286	GOYA_ASYNC_EVENT_ID_PCIE_DEC,
287	GOYA_ASYNC_EVENT_ID_TPC0_DEC,
288	GOYA_ASYNC_EVENT_ID_TPC1_DEC,
289	GOYA_ASYNC_EVENT_ID_TPC2_DEC,
290	GOYA_ASYNC_EVENT_ID_TPC3_DEC,
291	GOYA_ASYNC_EVENT_ID_TPC4_DEC,
292	GOYA_ASYNC_EVENT_ID_TPC5_DEC,
293	GOYA_ASYNC_EVENT_ID_TPC6_DEC,
294	GOYA_ASYNC_EVENT_ID_TPC7_DEC,
295	GOYA_ASYNC_EVENT_ID_MME_WACS,
296	GOYA_ASYNC_EVENT_ID_MME_WACSD,
297	GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
298	GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
299	GOYA_ASYNC_EVENT_ID_PSOC,
300	GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
301	GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
302	GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
303	GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
304	GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
305	GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
306	GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
307	GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
308	GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
309	GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
310	GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
311	GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
312	GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
313	GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
314	GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
315	GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
316	GOYA_ASYNC_EVENT_ID_TPC0_QM,
317	GOYA_ASYNC_EVENT_ID_TPC1_QM,
318	GOYA_ASYNC_EVENT_ID_TPC2_QM,
319	GOYA_ASYNC_EVENT_ID_TPC3_QM,
320	GOYA_ASYNC_EVENT_ID_TPC4_QM,
321	GOYA_ASYNC_EVENT_ID_TPC5_QM,
322	GOYA_ASYNC_EVENT_ID_TPC6_QM,
323	GOYA_ASYNC_EVENT_ID_TPC7_QM,
324	GOYA_ASYNC_EVENT_ID_MME_QM,
325	GOYA_ASYNC_EVENT_ID_MME_CMDQ,
326	GOYA_ASYNC_EVENT_ID_DMA0_QM,
327	GOYA_ASYNC_EVENT_ID_DMA1_QM,
328	GOYA_ASYNC_EVENT_ID_DMA2_QM,
329	GOYA_ASYNC_EVENT_ID_DMA3_QM,
330	GOYA_ASYNC_EVENT_ID_DMA4_QM,
331	GOYA_ASYNC_EVENT_ID_DMA0_CH,
332	GOYA_ASYNC_EVENT_ID_DMA1_CH,
333	GOYA_ASYNC_EVENT_ID_DMA2_CH,
334	GOYA_ASYNC_EVENT_ID_DMA3_CH,
335	GOYA_ASYNC_EVENT_ID_DMA4_CH,
336	GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
337	GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
338	GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
339	GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
340	GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
341	GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
342	GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
343	GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
344	GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
345	GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
346	GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
347	GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
348	GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
349	GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
350	GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
351	GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
352	GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
353};
354
355static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
356static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
357static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
358static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
359
360int goya_get_fixed_properties(struct hl_device *hdev)
361{
362	struct asic_fixed_properties *prop = &hdev->asic_prop;
363	int i;
364
365	prop->max_queues = GOYA_QUEUE_ID_SIZE;
366	prop->hw_queues_props = kcalloc(prop->max_queues,
367			sizeof(struct hw_queue_properties),
368			GFP_KERNEL);
369
370	if (!prop->hw_queues_props)
371		return -ENOMEM;
372
373	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
374		prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
375		prop->hw_queues_props[i].driver_only = 0;
376		prop->hw_queues_props[i].requires_kernel_cb = 1;
377	}
378
379	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
380		prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
381		prop->hw_queues_props[i].driver_only = 1;
382		prop->hw_queues_props[i].requires_kernel_cb = 0;
383	}
384
385	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
386			NUMBER_OF_INT_HW_QUEUES; i++) {
387		prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
388		prop->hw_queues_props[i].driver_only = 0;
389		prop->hw_queues_props[i].requires_kernel_cb = 0;
390	}
391
392	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
393
394	prop->dram_base_address = DRAM_PHYS_BASE;
395	prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
396	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
397	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
398
399	prop->sram_base_address = SRAM_BASE_ADDR;
400	prop->sram_size = SRAM_SIZE;
401	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
402	prop->sram_user_base_address = prop->sram_base_address +
403						SRAM_USER_BASE_OFFSET;
404
405	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
406	prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
407	if (hdev->pldm)
408		prop->mmu_pgt_size = 0x800000; /* 8MB */
409	else
410		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
411	prop->mmu_pte_size = HL_PTE_SIZE;
412	prop->mmu_hop_table_size = HOP_TABLE_SIZE;
413	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
414	prop->dram_page_size = PAGE_SIZE_2MB;
415
416	prop->dmmu.hop0_shift = HOP0_SHIFT;
417	prop->dmmu.hop1_shift = HOP1_SHIFT;
418	prop->dmmu.hop2_shift = HOP2_SHIFT;
419	prop->dmmu.hop3_shift = HOP3_SHIFT;
420	prop->dmmu.hop4_shift = HOP4_SHIFT;
421	prop->dmmu.hop0_mask = HOP0_MASK;
422	prop->dmmu.hop1_mask = HOP1_MASK;
423	prop->dmmu.hop2_mask = HOP2_MASK;
424	prop->dmmu.hop3_mask = HOP3_MASK;
425	prop->dmmu.hop4_mask = HOP4_MASK;
426	prop->dmmu.start_addr = VA_DDR_SPACE_START;
427	prop->dmmu.end_addr = VA_DDR_SPACE_END;
428	prop->dmmu.page_size = PAGE_SIZE_2MB;
429	prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
430
431	/* shifts and masks are the same in PMMU and DMMU */
432	memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
433	prop->pmmu.start_addr = VA_HOST_SPACE_START;
434	prop->pmmu.end_addr = VA_HOST_SPACE_END;
435	prop->pmmu.page_size = PAGE_SIZE_4KB;
436	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
437
438	/* PMMU and HPMMU are the same except of page size */
439	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
440	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
441
442	prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
443	prop->cfg_size = CFG_SIZE;
444	prop->max_asid = MAX_ASID;
445	prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
446	prop->high_pll = PLL_HIGH_DEFAULT;
447	prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
448	prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
449	prop->max_power_default = MAX_POWER_DEFAULT;
450	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
451	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
452	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
453
454	strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
455		CARD_NAME_MAX_LEN);
456
457	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
458
459	return 0;
460}
461
462/*
463 * goya_pci_bars_map - Map PCI BARS of Goya device
464 *
465 * @hdev: pointer to hl_device structure
466 *
467 * Request PCI regions and map them to kernel virtual addresses.
468 * Returns 0 on success
469 *
470 */
471static int goya_pci_bars_map(struct hl_device *hdev)
472{
473	static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
474	bool is_wc[3] = {false, false, true};
475	int rc;
476
477	rc = hl_pci_bars_map(hdev, name, is_wc);
478	if (rc)
479		return rc;
480
481	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
482			(CFG_BASE - SRAM_BASE_ADDR);
483
484	return 0;
485}
486
487static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
488{
489	struct goya_device *goya = hdev->asic_specific;
490	struct hl_inbound_pci_region pci_region;
491	u64 old_addr = addr;
492	int rc;
493
494	if ((goya) && (goya->ddr_bar_cur_addr == addr))
495		return old_addr;
496
497	/* Inbound Region 1 - Bar 4 - Point to DDR */
498	pci_region.mode = PCI_BAR_MATCH_MODE;
499	pci_region.bar = DDR_BAR_ID;
500	pci_region.addr = addr;
501	rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
502	if (rc)
503		return U64_MAX;
504
505	if (goya) {
506		old_addr = goya->ddr_bar_cur_addr;
507		goya->ddr_bar_cur_addr = addr;
508	}
509
510	return old_addr;
511}
512
513/*
514 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
515 *
516 * @hdev: pointer to hl_device structure
517 *
518 * This is needed in case the firmware doesn't initialize the iATU
519 *
520 */
521static int goya_init_iatu(struct hl_device *hdev)
522{
523	struct hl_inbound_pci_region inbound_region;
524	struct hl_outbound_pci_region outbound_region;
525	int rc;
526
527	/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
528	inbound_region.mode = PCI_BAR_MATCH_MODE;
529	inbound_region.bar = SRAM_CFG_BAR_ID;
530	inbound_region.addr = SRAM_BASE_ADDR;
531	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
532	if (rc)
533		goto done;
534
535	/* Inbound Region 1 - Bar 4 - Point to DDR */
536	inbound_region.mode = PCI_BAR_MATCH_MODE;
537	inbound_region.bar = DDR_BAR_ID;
538	inbound_region.addr = DRAM_PHYS_BASE;
539	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
540	if (rc)
541		goto done;
542
543	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
544
545	/* Outbound Region 0 - Point to Host  */
546	outbound_region.addr = HOST_PHYS_BASE;
547	outbound_region.size = HOST_PHYS_SIZE;
548	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
549
550done:
551	return rc;
552}
553
554/*
555 * goya_early_init - GOYA early initialization code
556 *
557 * @hdev: pointer to hl_device structure
558 *
559 * Verify PCI bars
560 * Set DMA masks
561 * PCI controller initialization
562 * Map PCI bars
563 *
564 */
565static int goya_early_init(struct hl_device *hdev)
566{
567	struct asic_fixed_properties *prop = &hdev->asic_prop;
568	struct pci_dev *pdev = hdev->pdev;
569	u32 val;
570	int rc;
571
572	rc = goya_get_fixed_properties(hdev);
573	if (rc) {
574		dev_err(hdev->dev, "Failed to get fixed properties\n");
575		return rc;
576	}
577
578	/* Check BAR sizes */
579	if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
580		dev_err(hdev->dev,
581			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
582			SRAM_CFG_BAR_ID,
583			(unsigned long long) pci_resource_len(pdev,
584							SRAM_CFG_BAR_ID),
585			CFG_BAR_SIZE);
586		rc = -ENODEV;
587		goto free_queue_props;
588	}
589
590	if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
591		dev_err(hdev->dev,
592			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
593			MSIX_BAR_ID,
594			(unsigned long long) pci_resource_len(pdev,
595								MSIX_BAR_ID),
596			MSIX_BAR_SIZE);
597		rc = -ENODEV;
598		goto free_queue_props;
599	}
600
601	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
602
603	rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
604			mmCPU_BOOT_ERR0, GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
605	if (rc)
606		goto free_queue_props;
607
608	/* Goya Firmware does not support security */
609	prop->fw_security_disabled = true;
610	dev_info(hdev->dev, "firmware-level security is disabled\n");
611
612	if (!hdev->pldm) {
613		val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
614		if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
615			dev_warn(hdev->dev,
616				"PCI strap is not configured correctly, PCI bus errors may occur\n");
617	}
618
619	return 0;
620
621free_queue_props:
622	kfree(hdev->asic_prop.hw_queues_props);
623	return rc;
624}
625
626/*
627 * goya_early_fini - GOYA early finalization code
628 *
629 * @hdev: pointer to hl_device structure
630 *
631 * Unmap PCI bars
632 *
633 */
634static int goya_early_fini(struct hl_device *hdev)
635{
636	kfree(hdev->asic_prop.hw_queues_props);
637	hl_pci_fini(hdev);
638
639	return 0;
640}
641
642static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
643{
644	/* mask to zero the MMBP and ASID bits */
645	WREG32_AND(reg, ~0x7FF);
646	WREG32_OR(reg, asid);
647}
648
649static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
650{
651	struct goya_device *goya = hdev->asic_specific;
652
653	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
654		return;
655
656	if (secure)
657		WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
658	else
659		WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
660
661	RREG32(mmDMA_QM_0_GLBL_PROT);
662}
663
664/*
665 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
666 *
667 * @hdev: pointer to hl_device structure
668 *
669 */
670static void goya_fetch_psoc_frequency(struct hl_device *hdev)
671{
672	struct asic_fixed_properties *prop = &hdev->asic_prop;
673	u32 trace_freq = 0;
674	u32 pll_clk = 0;
675	u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
676	u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
677	u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
678	u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
679	u32 od = RREG32(mmPSOC_PCI_PLL_OD);
680
681	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
682		if (div_sel == DIV_SEL_REF_CLK)
683			trace_freq = PLL_REF_CLK;
684		else
685			trace_freq = PLL_REF_CLK / (div_fctr + 1);
686	} else if (div_sel == DIV_SEL_PLL_CLK ||
687					div_sel == DIV_SEL_DIVIDED_PLL) {
688		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
689		if (div_sel == DIV_SEL_PLL_CLK)
690			trace_freq = pll_clk;
691		else
692			trace_freq = pll_clk / (div_fctr + 1);
693	} else {
694		dev_warn(hdev->dev,
695			"Received invalid div select value: %d", div_sel);
696	}
697
698	prop->psoc_timestamp_frequency = trace_freq;
699	prop->psoc_pci_pll_nr = nr;
700	prop->psoc_pci_pll_nf = nf;
701	prop->psoc_pci_pll_od = od;
702	prop->psoc_pci_pll_div_factor = div_fctr;
703}
704
705int goya_late_init(struct hl_device *hdev)
706{
707	struct asic_fixed_properties *prop = &hdev->asic_prop;
708	int rc;
709
710	goya_fetch_psoc_frequency(hdev);
711
712	rc = goya_mmu_clear_pgt_range(hdev);
713	if (rc) {
714		dev_err(hdev->dev,
715			"Failed to clear MMU page tables range %d\n", rc);
716		return rc;
717	}
718
719	rc = goya_mmu_set_dram_default_page(hdev);
720	if (rc) {
721		dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
722		return rc;
723	}
724
725	rc = goya_mmu_add_mappings_for_device_cpu(hdev);
726	if (rc)
727		return rc;
728
729	rc = goya_init_cpu_queues(hdev);
730	if (rc)
731		return rc;
732
733	rc = goya_test_cpu_queue(hdev);
734	if (rc)
735		return rc;
736
737	rc = goya_cpucp_info_get(hdev);
738	if (rc) {
739		dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
740		return rc;
741	}
742
743	/* Now that we have the DRAM size in ASIC prop, we need to check
744	 * its size and configure the DMA_IF DDR wrap protection (which is in
745	 * the MMU block) accordingly. The value is the log2 of the DRAM size
746	 */
747	WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
748
749	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
750	if (rc) {
751		dev_err(hdev->dev,
752			"Failed to enable PCI access from CPU %d\n", rc);
753		return rc;
754	}
755
756	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
757			GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
758
759	return 0;
760}
761
762/*
763 * goya_late_fini - GOYA late tear-down code
764 *
765 * @hdev: pointer to hl_device structure
766 *
767 * Free sensors allocated structures
768 */
769void goya_late_fini(struct hl_device *hdev)
770{
771	const struct hwmon_channel_info **channel_info_arr;
772	int i = 0;
773
774	if (!hdev->hl_chip_info->info)
775		return;
776
777	channel_info_arr = hdev->hl_chip_info->info;
778
779	while (channel_info_arr[i]) {
780		kfree(channel_info_arr[i]->config);
781		kfree(channel_info_arr[i]);
782		i++;
783	}
784
785	kfree(channel_info_arr);
786
787	hdev->hl_chip_info->info = NULL;
788}
789
790/*
791 * goya_sw_init - Goya software initialization code
792 *
793 * @hdev: pointer to hl_device structure
794 *
795 */
796static int goya_sw_init(struct hl_device *hdev)
797{
798	struct goya_device *goya;
799	int rc;
800
801	/* Allocate device structure */
802	goya = kzalloc(sizeof(*goya), GFP_KERNEL);
803	if (!goya)
804		return -ENOMEM;
805
806	/* according to goya_init_iatu */
807	goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
808
809	goya->mme_clk = GOYA_PLL_FREQ_LOW;
810	goya->tpc_clk = GOYA_PLL_FREQ_LOW;
811	goya->ic_clk = GOYA_PLL_FREQ_LOW;
812
813	hdev->asic_specific = goya;
814
815	/* Create DMA pool for small allocations */
816	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
817			&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
818	if (!hdev->dma_pool) {
819		dev_err(hdev->dev, "failed to create DMA pool\n");
820		rc = -ENOMEM;
821		goto free_goya_device;
822	}
823
824	hdev->cpu_accessible_dma_mem =
825			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
826					HL_CPU_ACCESSIBLE_MEM_SIZE,
827					&hdev->cpu_accessible_dma_address,
828					GFP_KERNEL | __GFP_ZERO);
829
830	if (!hdev->cpu_accessible_dma_mem) {
831		rc = -ENOMEM;
832		goto free_dma_pool;
833	}
834
835	dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
836		&hdev->cpu_accessible_dma_address);
837
838	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
839	if (!hdev->cpu_accessible_dma_pool) {
840		dev_err(hdev->dev,
841			"Failed to create CPU accessible DMA pool\n");
842		rc = -ENOMEM;
843		goto free_cpu_dma_mem;
844	}
845
846	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
847				(uintptr_t) hdev->cpu_accessible_dma_mem,
848				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
849	if (rc) {
850		dev_err(hdev->dev,
851			"Failed to add memory to CPU accessible DMA pool\n");
852		rc = -EFAULT;
853		goto free_cpu_accessible_dma_pool;
854	}
855
856	spin_lock_init(&goya->hw_queues_lock);
857	hdev->supports_coresight = true;
858	hdev->supports_soft_reset = true;
859
860	return 0;
861
862free_cpu_accessible_dma_pool:
863	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
864free_cpu_dma_mem:
865	hdev->asic_funcs->asic_dma_free_coherent(hdev,
866			HL_CPU_ACCESSIBLE_MEM_SIZE,
867			hdev->cpu_accessible_dma_mem,
868			hdev->cpu_accessible_dma_address);
869free_dma_pool:
870	dma_pool_destroy(hdev->dma_pool);
871free_goya_device:
872	kfree(goya);
873
874	return rc;
875}
876
877/*
878 * goya_sw_fini - Goya software tear-down code
879 *
880 * @hdev: pointer to hl_device structure
881 *
882 */
883static int goya_sw_fini(struct hl_device *hdev)
884{
885	struct goya_device *goya = hdev->asic_specific;
886
887	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
888
889	hdev->asic_funcs->asic_dma_free_coherent(hdev,
890			HL_CPU_ACCESSIBLE_MEM_SIZE,
891			hdev->cpu_accessible_dma_mem,
892			hdev->cpu_accessible_dma_address);
893
894	dma_pool_destroy(hdev->dma_pool);
895
896	kfree(goya);
897
898	return 0;
899}
900
901static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
902		dma_addr_t bus_address)
903{
904	struct goya_device *goya = hdev->asic_specific;
905	u32 mtr_base_lo, mtr_base_hi;
906	u32 so_base_lo, so_base_hi;
907	u32 gic_base_lo, gic_base_hi;
908	u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
909	u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
910
911	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
912	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
913	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
914	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
915
916	gic_base_lo =
917		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
918	gic_base_hi =
919		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
920
921	WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
922	WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
923
924	WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
925	WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
926	WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
927
928	WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
929	WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
930	WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
931	WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
932	WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
933	WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
934	WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
935			GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
936
937	/* PQ has buffer of 2 cache lines, while CQ has 8 lines */
938	WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
939	WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
940
941	if (goya->hw_cap_initialized & HW_CAP_MMU)
942		WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
943	else
944		WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
945
946	if (hdev->stop_on_err)
947		dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
948
949	WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
950	WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
951}
952
953static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
954{
955	u32 gic_base_lo, gic_base_hi;
956	u64 sob_addr;
957	u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
958
959	gic_base_lo =
960		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
961	gic_base_hi =
962		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
963
964	WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
965	WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
966	WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
967			GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
968
969	if (dma_id)
970		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
971				(dma_id - 1) * 4;
972	else
973		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
974
975	WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
976	WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
977}
978
979/*
980 * goya_init_dma_qmans - Initialize QMAN DMA registers
981 *
982 * @hdev: pointer to hl_device structure
983 *
984 * Initialize the H/W registers of the QMAN DMA channels
985 *
986 */
987void goya_init_dma_qmans(struct hl_device *hdev)
988{
989	struct goya_device *goya = hdev->asic_specific;
990	struct hl_hw_queue *q;
991	int i;
992
993	if (goya->hw_cap_initialized & HW_CAP_DMA)
994		return;
995
996	q = &hdev->kernel_queues[0];
997
998	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
999		q->cq_id = q->msi_vec = i;
1000		goya_init_dma_qman(hdev, i, q->bus_address);
1001		goya_init_dma_ch(hdev, i);
1002	}
1003
1004	goya->hw_cap_initialized |= HW_CAP_DMA;
1005}
1006
1007/*
1008 * goya_disable_external_queues - Disable external queues
1009 *
1010 * @hdev: pointer to hl_device structure
1011 *
1012 */
1013static void goya_disable_external_queues(struct hl_device *hdev)
1014{
1015	struct goya_device *goya = hdev->asic_specific;
1016
1017	if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1018		return;
1019
1020	WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1021	WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1022	WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1023	WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1024	WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1025}
1026
1027static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1028				u32 cp_sts_reg, u32 glbl_sts0_reg)
1029{
1030	int rc;
1031	u32 status;
1032
1033	/* use the values of TPC0 as they are all the same*/
1034
1035	WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1036
1037	status = RREG32(cp_sts_reg);
1038	if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1039		rc = hl_poll_timeout(
1040			hdev,
1041			cp_sts_reg,
1042			status,
1043			!(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1044			1000,
1045			QMAN_FENCE_TIMEOUT_USEC);
1046
1047		/* if QMAN is stuck in fence no need to check for stop */
1048		if (rc)
1049			return 0;
1050	}
1051
1052	rc = hl_poll_timeout(
1053		hdev,
1054		glbl_sts0_reg,
1055		status,
1056		(status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1057		1000,
1058		QMAN_STOP_TIMEOUT_USEC);
1059
1060	if (rc) {
1061		dev_err(hdev->dev,
1062			"Timeout while waiting for QMAN to stop\n");
1063		return -EINVAL;
1064	}
1065
1066	return 0;
1067}
1068
1069/*
1070 * goya_stop_external_queues - Stop external queues
1071 *
1072 * @hdev: pointer to hl_device structure
1073 *
1074 * Returns 0 on success
1075 *
1076 */
1077static int goya_stop_external_queues(struct hl_device *hdev)
1078{
1079	int rc, retval = 0;
1080
1081	struct goya_device *goya = hdev->asic_specific;
1082
1083	if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1084		return retval;
1085
1086	rc = goya_stop_queue(hdev,
1087			mmDMA_QM_0_GLBL_CFG1,
1088			mmDMA_QM_0_CP_STS,
1089			mmDMA_QM_0_GLBL_STS0);
1090
1091	if (rc) {
1092		dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1093		retval = -EIO;
1094	}
1095
1096	rc = goya_stop_queue(hdev,
1097			mmDMA_QM_1_GLBL_CFG1,
1098			mmDMA_QM_1_CP_STS,
1099			mmDMA_QM_1_GLBL_STS0);
1100
1101	if (rc) {
1102		dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1103		retval = -EIO;
1104	}
1105
1106	rc = goya_stop_queue(hdev,
1107			mmDMA_QM_2_GLBL_CFG1,
1108			mmDMA_QM_2_CP_STS,
1109			mmDMA_QM_2_GLBL_STS0);
1110
1111	if (rc) {
1112		dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1113		retval = -EIO;
1114	}
1115
1116	rc = goya_stop_queue(hdev,
1117			mmDMA_QM_3_GLBL_CFG1,
1118			mmDMA_QM_3_CP_STS,
1119			mmDMA_QM_3_GLBL_STS0);
1120
1121	if (rc) {
1122		dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1123		retval = -EIO;
1124	}
1125
1126	rc = goya_stop_queue(hdev,
1127			mmDMA_QM_4_GLBL_CFG1,
1128			mmDMA_QM_4_CP_STS,
1129			mmDMA_QM_4_GLBL_STS0);
1130
1131	if (rc) {
1132		dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1133		retval = -EIO;
1134	}
1135
1136	return retval;
1137}
1138
1139/*
1140 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1141 *
1142 * @hdev: pointer to hl_device structure
1143 *
1144 * Returns 0 on success
1145 *
1146 */
1147int goya_init_cpu_queues(struct hl_device *hdev)
1148{
1149	struct goya_device *goya = hdev->asic_specific;
1150	struct hl_eq *eq;
1151	u32 status;
1152	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1153	int err;
1154
1155	if (!hdev->cpu_queues_enable)
1156		return 0;
1157
1158	if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1159		return 0;
1160
1161	eq = &hdev->event_queue;
1162
1163	WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1164	WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1165
1166	WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1167	WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1168
1169	WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1170			lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1171	WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1172			upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1173
1174	WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1175	WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1176	WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1177
1178	/* Used for EQ CI */
1179	WREG32(mmCPU_EQ_CI, 0);
1180
1181	WREG32(mmCPU_IF_PF_PQ_PI, 0);
1182
1183	WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1184
1185	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1186			GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1187
1188	err = hl_poll_timeout(
1189		hdev,
1190		mmCPU_PQ_INIT_STATUS,
1191		status,
1192		(status == PQ_INIT_STATUS_READY_FOR_HOST),
1193		1000,
1194		GOYA_CPU_TIMEOUT_USEC);
1195
1196	if (err) {
1197		dev_err(hdev->dev,
1198			"Failed to setup communication with device CPU\n");
1199		return -EIO;
1200	}
1201
1202	goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1203	return 0;
1204}
1205
1206static void goya_set_pll_refclk(struct hl_device *hdev)
1207{
1208	WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1209	WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1210	WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1211	WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1212
1213	WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1214	WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1215	WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1216	WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1217
1218	WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1219	WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1220	WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1221	WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1222
1223	WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1224	WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1225	WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1226	WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1227
1228	WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1229	WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1230	WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1231	WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1232
1233	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1234	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1235	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1236	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1237
1238	WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1239	WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1240	WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1241	WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1242}
1243
1244static void goya_disable_clk_rlx(struct hl_device *hdev)
1245{
1246	WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1247	WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1248}
1249
1250static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1251{
1252	u64 tpc_eml_address;
1253	u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1254	int err, slm_index;
1255
1256	tpc_offset = tpc_id * 0x40000;
1257	tpc_eml_offset = tpc_id * 0x200000;
1258	tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1259	tpc_slm_offset = tpc_eml_address + 0x100000;
1260
1261	/*
1262	 * Workaround for Bug H2 #2443 :
1263	 * "TPC SB is not initialized on chip reset"
1264	 */
1265
1266	val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1267	if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1268		dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1269			tpc_id);
1270
1271	WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1272
1273	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1274	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1275	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1276	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1277	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1278	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1279	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1280	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1281	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1282	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1283
1284	WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1285		1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1286
1287	err = hl_poll_timeout(
1288		hdev,
1289		mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1290		val,
1291		(val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1292		1000,
1293		HL_DEVICE_TIMEOUT_USEC);
1294
1295	if (err)
1296		dev_err(hdev->dev,
1297			"Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1298
1299	WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1300		1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1301
1302	msleep(GOYA_RESET_WAIT_MSEC);
1303
1304	WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1305		~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1306
1307	msleep(GOYA_RESET_WAIT_MSEC);
1308
1309	for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1310		WREG32(tpc_slm_offset + (slm_index << 2), 0);
1311
1312	val = RREG32(tpc_slm_offset);
1313}
1314
1315static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1316{
1317	struct goya_device *goya = hdev->asic_specific;
1318	int i;
1319
1320	if (hdev->pldm)
1321		return;
1322
1323	if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1324		return;
1325
1326	/* Workaround for H2 #2443 */
1327
1328	for (i = 0 ; i < TPC_MAX_NUM ; i++)
1329		_goya_tpc_mbist_workaround(hdev, i);
1330
1331	goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1332}
1333
1334/*
1335 * goya_init_golden_registers - Initialize golden registers
1336 *
1337 * @hdev: pointer to hl_device structure
1338 *
1339 * Initialize the H/W registers of the device
1340 *
1341 */
1342static void goya_init_golden_registers(struct hl_device *hdev)
1343{
1344	struct goya_device *goya = hdev->asic_specific;
1345	u32 polynom[10], tpc_intr_mask, offset;
1346	int i;
1347
1348	if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1349		return;
1350
1351	polynom[0] = 0x00020080;
1352	polynom[1] = 0x00401000;
1353	polynom[2] = 0x00200800;
1354	polynom[3] = 0x00002000;
1355	polynom[4] = 0x00080200;
1356	polynom[5] = 0x00040100;
1357	polynom[6] = 0x00100400;
1358	polynom[7] = 0x00004000;
1359	polynom[8] = 0x00010000;
1360	polynom[9] = 0x00008000;
1361
1362	/* Mask all arithmetic interrupts from TPC */
1363	tpc_intr_mask = 0x7FFF;
1364
1365	for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1366		WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1367		WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1368		WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1369		WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1370		WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1371
1372		WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1373		WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1374		WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1375		WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1376		WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1377
1378
1379		WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1380		WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1381		WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1382		WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1383		WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1384
1385		WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1386		WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1387		WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1388		WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1389		WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1390
1391		WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1392		WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1393		WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1394		WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1395		WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1396
1397		WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1398		WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1399		WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1400		WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1401		WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1402	}
1403
1404	WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1405	WREG32(mmMME_AGU, 0x0f0f0f10);
1406	WREG32(mmMME_SEI_MASK, ~0x0);
1407
1408	WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1409	WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1410	WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1411	WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1412	WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1413	WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1414	WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1415	WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1416	WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1417	WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1418	WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1419	WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1420	WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1421	WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1422	WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1423	WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1424	WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1425	WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1426	WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1427	WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1428	WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1429	WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1430	WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1431	WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1432	WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1433	WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1434	WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1435	WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1436	WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1437	WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1438	WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1439	WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1440	WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1441	WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1442	WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1443	WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1444	WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1445	WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1446	WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1447	WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1448	WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1449	WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1450	WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1451	WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1452	WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1453	WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1454	WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1455	WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1456	WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1457	WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1458	WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1459	WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1460	WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1461	WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1462	WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1463	WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1464	WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1465	WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1466	WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1467	WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1468	WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1469	WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1470	WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1471	WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1472	WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1473	WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1474	WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1475	WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1476	WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1477	WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1478	WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1479	WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1480	WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1481	WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1482	WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1483	WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1484	WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1485	WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1486	WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1487	WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1488	WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1489	WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1490	WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1491	WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1492
1493	WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1494	WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1495	WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1496	WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1497	WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1498	WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1499	WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1500	WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1501	WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1502	WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1503	WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1504	WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1505
1506	WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1507	WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1508	WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1509	WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1510	WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1511	WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1512	WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1513	WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1514	WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1515	WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1516	WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1517	WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1518
1519	WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1520	WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1521	WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1522	WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1523	WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1524	WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1525	WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1526	WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1527	WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1528	WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1529	WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1530	WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1531
1532	WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1533	WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1534	WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1535	WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1536	WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1537	WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1538	WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1539	WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1540	WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1541	WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1542	WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1543	WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1544
1545	WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1546	WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1547	WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1548	WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1549	WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1550	WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1551	WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1552	WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1553	WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1554	WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1555	WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1556	WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1557
1558	WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1559	WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1560	WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1561	WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1562	WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1563	WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1564	WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1565	WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1566	WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1567	WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1568	WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1569	WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1570
1571	for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1572		WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1573		WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1574		WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1575		WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1576		WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1577		WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1578
1579		WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1580		WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1581		WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1582		WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1583		WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1584		WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1585		WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1586		WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1587
1588		WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1589		WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1590	}
1591
1592	for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1593		WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1594				1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1595		WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1596				1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1597	}
1598
1599	for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1600		/*
1601		 * Workaround for Bug H2 #2441 :
1602		 * "ST.NOP set trace event illegal opcode"
1603		 */
1604		WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1605
1606		WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1607				1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1608		WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1609				1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1610
1611		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1612				ICACHE_FETCH_LINE_NUM, 2);
1613	}
1614
1615	WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1616	WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1617			1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1618
1619	WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1620	WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1621			1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1622
1623	/*
1624	 * Workaround for H2 #HW-23 bug
1625	 * Set DMA max outstanding read requests to 240 on DMA CH 1.
1626	 * This limitation is still large enough to not affect Gen4 bandwidth.
1627	 * We need to only limit that DMA channel because the user can only read
1628	 * from Host using DMA CH 1
1629	 */
1630	WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1631
1632	WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1633
1634	goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1635}
1636
1637static void goya_init_mme_qman(struct hl_device *hdev)
1638{
1639	u32 mtr_base_lo, mtr_base_hi;
1640	u32 so_base_lo, so_base_hi;
1641	u32 gic_base_lo, gic_base_hi;
1642	u64 qman_base_addr;
1643
1644	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1645	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1646	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1647	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1648
1649	gic_base_lo =
1650		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1651	gic_base_hi =
1652		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1653
1654	qman_base_addr = hdev->asic_prop.sram_base_address +
1655				MME_QMAN_BASE_OFFSET;
1656
1657	WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1658	WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1659	WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1660	WREG32(mmMME_QM_PQ_PI, 0);
1661	WREG32(mmMME_QM_PQ_CI, 0);
1662	WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1663	WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1664	WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1665	WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1666
1667	WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1668	WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1669	WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1670	WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1671
1672	/* QMAN CQ has 8 cache lines */
1673	WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1674
1675	WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1676	WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1677
1678	WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1679
1680	WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1681
1682	WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1683
1684	WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1685}
1686
1687static void goya_init_mme_cmdq(struct hl_device *hdev)
1688{
1689	u32 mtr_base_lo, mtr_base_hi;
1690	u32 so_base_lo, so_base_hi;
1691	u32 gic_base_lo, gic_base_hi;
1692
1693	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1694	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1695	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1696	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1697
1698	gic_base_lo =
1699		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1700	gic_base_hi =
1701		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1702
1703	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1704	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1705	WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO,	so_base_lo);
1706	WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1707
1708	/* CMDQ CQ has 20 cache lines */
1709	WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1710
1711	WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1712	WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1713
1714	WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1715
1716	WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1717
1718	WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1719
1720	WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1721}
1722
1723void goya_init_mme_qmans(struct hl_device *hdev)
1724{
1725	struct goya_device *goya = hdev->asic_specific;
1726	u32 so_base_lo, so_base_hi;
1727
1728	if (goya->hw_cap_initialized & HW_CAP_MME)
1729		return;
1730
1731	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1732	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1733
1734	WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1735	WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1736
1737	goya_init_mme_qman(hdev);
1738	goya_init_mme_cmdq(hdev);
1739
1740	goya->hw_cap_initialized |= HW_CAP_MME;
1741}
1742
1743static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1744{
1745	u32 mtr_base_lo, mtr_base_hi;
1746	u32 so_base_lo, so_base_hi;
1747	u32 gic_base_lo, gic_base_hi;
1748	u64 qman_base_addr;
1749	u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1750
1751	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1752	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1753	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1754	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1755
1756	gic_base_lo =
1757		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1758	gic_base_hi =
1759		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1760
1761	qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1762
1763	WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1764	WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1765	WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1766	WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1767	WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1768	WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1769	WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1770	WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1771	WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1772
1773	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1774	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1775	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1776	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1777
1778	WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1779
1780	WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1781	WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1782
1783	WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1784			GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1785
1786	WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1787
1788	WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1789
1790	WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1791}
1792
1793static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1794{
1795	u32 mtr_base_lo, mtr_base_hi;
1796	u32 so_base_lo, so_base_hi;
1797	u32 gic_base_lo, gic_base_hi;
1798	u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1799
1800	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1801	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1802	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1803	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1804
1805	gic_base_lo =
1806		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1807	gic_base_hi =
1808		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1809
1810	WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1811	WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1812	WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1813	WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1814
1815	WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1816
1817	WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1818	WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1819
1820	WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1821			GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1822
1823	WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1824
1825	WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1826
1827	WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1828}
1829
1830void goya_init_tpc_qmans(struct hl_device *hdev)
1831{
1832	struct goya_device *goya = hdev->asic_specific;
1833	u32 so_base_lo, so_base_hi;
1834	u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1835			mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1836	int i;
1837
1838	if (goya->hw_cap_initialized & HW_CAP_TPC)
1839		return;
1840
1841	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1842	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1843
1844	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1845		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1846				so_base_lo);
1847		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1848				so_base_hi);
1849	}
1850
1851	goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1852	goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1853	goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1854	goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1855	goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1856	goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1857	goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1858	goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1859
1860	for (i = 0 ; i < TPC_MAX_NUM ; i++)
1861		goya_init_tpc_cmdq(hdev, i);
1862
1863	goya->hw_cap_initialized |= HW_CAP_TPC;
1864}
1865
1866/*
1867 * goya_disable_internal_queues - Disable internal queues
1868 *
1869 * @hdev: pointer to hl_device structure
1870 *
1871 */
1872static void goya_disable_internal_queues(struct hl_device *hdev)
1873{
1874	struct goya_device *goya = hdev->asic_specific;
1875
1876	if (!(goya->hw_cap_initialized & HW_CAP_MME))
1877		goto disable_tpc;
1878
1879	WREG32(mmMME_QM_GLBL_CFG0, 0);
1880	WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1881
1882disable_tpc:
1883	if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1884		return;
1885
1886	WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1887	WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1888
1889	WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1890	WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1891
1892	WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1893	WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1894
1895	WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1896	WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1897
1898	WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1899	WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1900
1901	WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1902	WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1903
1904	WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1905	WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1906
1907	WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1908	WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1909}
1910
1911/*
1912 * goya_stop_internal_queues - Stop internal queues
1913 *
1914 * @hdev: pointer to hl_device structure
1915 *
1916 * Returns 0 on success
1917 *
1918 */
1919static int goya_stop_internal_queues(struct hl_device *hdev)
1920{
1921	struct goya_device *goya = hdev->asic_specific;
1922	int rc, retval = 0;
1923
1924	if (!(goya->hw_cap_initialized & HW_CAP_MME))
1925		goto stop_tpc;
1926
1927	/*
1928	 * Each queue (QMAN) is a separate H/W logic. That means that each
1929	 * QMAN can be stopped independently and failure to stop one does NOT
1930	 * mandate we should not try to stop other QMANs
1931	 */
1932
1933	rc = goya_stop_queue(hdev,
1934			mmMME_QM_GLBL_CFG1,
1935			mmMME_QM_CP_STS,
1936			mmMME_QM_GLBL_STS0);
1937
1938	if (rc) {
1939		dev_err(hdev->dev, "failed to stop MME QMAN\n");
1940		retval = -EIO;
1941	}
1942
1943	rc = goya_stop_queue(hdev,
1944			mmMME_CMDQ_GLBL_CFG1,
1945			mmMME_CMDQ_CP_STS,
1946			mmMME_CMDQ_GLBL_STS0);
1947
1948	if (rc) {
1949		dev_err(hdev->dev, "failed to stop MME CMDQ\n");
1950		retval = -EIO;
1951	}
1952
1953stop_tpc:
1954	if (!(goya->hw_cap_initialized & HW_CAP_TPC))
1955		return retval;
1956
1957	rc = goya_stop_queue(hdev,
1958			mmTPC0_QM_GLBL_CFG1,
1959			mmTPC0_QM_CP_STS,
1960			mmTPC0_QM_GLBL_STS0);
1961
1962	if (rc) {
1963		dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
1964		retval = -EIO;
1965	}
1966
1967	rc = goya_stop_queue(hdev,
1968			mmTPC0_CMDQ_GLBL_CFG1,
1969			mmTPC0_CMDQ_CP_STS,
1970			mmTPC0_CMDQ_GLBL_STS0);
1971
1972	if (rc) {
1973		dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
1974		retval = -EIO;
1975	}
1976
1977	rc = goya_stop_queue(hdev,
1978			mmTPC1_QM_GLBL_CFG1,
1979			mmTPC1_QM_CP_STS,
1980			mmTPC1_QM_GLBL_STS0);
1981
1982	if (rc) {
1983		dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
1984		retval = -EIO;
1985	}
1986
1987	rc = goya_stop_queue(hdev,
1988			mmTPC1_CMDQ_GLBL_CFG1,
1989			mmTPC1_CMDQ_CP_STS,
1990			mmTPC1_CMDQ_GLBL_STS0);
1991
1992	if (rc) {
1993		dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
1994		retval = -EIO;
1995	}
1996
1997	rc = goya_stop_queue(hdev,
1998			mmTPC2_QM_GLBL_CFG1,
1999			mmTPC2_QM_CP_STS,
2000			mmTPC2_QM_GLBL_STS0);
2001
2002	if (rc) {
2003		dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2004		retval = -EIO;
2005	}
2006
2007	rc = goya_stop_queue(hdev,
2008			mmTPC2_CMDQ_GLBL_CFG1,
2009			mmTPC2_CMDQ_CP_STS,
2010			mmTPC2_CMDQ_GLBL_STS0);
2011
2012	if (rc) {
2013		dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2014		retval = -EIO;
2015	}
2016
2017	rc = goya_stop_queue(hdev,
2018			mmTPC3_QM_GLBL_CFG1,
2019			mmTPC3_QM_CP_STS,
2020			mmTPC3_QM_GLBL_STS0);
2021
2022	if (rc) {
2023		dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2024		retval = -EIO;
2025	}
2026
2027	rc = goya_stop_queue(hdev,
2028			mmTPC3_CMDQ_GLBL_CFG1,
2029			mmTPC3_CMDQ_CP_STS,
2030			mmTPC3_CMDQ_GLBL_STS0);
2031
2032	if (rc) {
2033		dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2034		retval = -EIO;
2035	}
2036
2037	rc = goya_stop_queue(hdev,
2038			mmTPC4_QM_GLBL_CFG1,
2039			mmTPC4_QM_CP_STS,
2040			mmTPC4_QM_GLBL_STS0);
2041
2042	if (rc) {
2043		dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2044		retval = -EIO;
2045	}
2046
2047	rc = goya_stop_queue(hdev,
2048			mmTPC4_CMDQ_GLBL_CFG1,
2049			mmTPC4_CMDQ_CP_STS,
2050			mmTPC4_CMDQ_GLBL_STS0);
2051
2052	if (rc) {
2053		dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2054		retval = -EIO;
2055	}
2056
2057	rc = goya_stop_queue(hdev,
2058			mmTPC5_QM_GLBL_CFG1,
2059			mmTPC5_QM_CP_STS,
2060			mmTPC5_QM_GLBL_STS0);
2061
2062	if (rc) {
2063		dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2064		retval = -EIO;
2065	}
2066
2067	rc = goya_stop_queue(hdev,
2068			mmTPC5_CMDQ_GLBL_CFG1,
2069			mmTPC5_CMDQ_CP_STS,
2070			mmTPC5_CMDQ_GLBL_STS0);
2071
2072	if (rc) {
2073		dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2074		retval = -EIO;
2075	}
2076
2077	rc = goya_stop_queue(hdev,
2078			mmTPC6_QM_GLBL_CFG1,
2079			mmTPC6_QM_CP_STS,
2080			mmTPC6_QM_GLBL_STS0);
2081
2082	if (rc) {
2083		dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2084		retval = -EIO;
2085	}
2086
2087	rc = goya_stop_queue(hdev,
2088			mmTPC6_CMDQ_GLBL_CFG1,
2089			mmTPC6_CMDQ_CP_STS,
2090			mmTPC6_CMDQ_GLBL_STS0);
2091
2092	if (rc) {
2093		dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2094		retval = -EIO;
2095	}
2096
2097	rc = goya_stop_queue(hdev,
2098			mmTPC7_QM_GLBL_CFG1,
2099			mmTPC7_QM_CP_STS,
2100			mmTPC7_QM_GLBL_STS0);
2101
2102	if (rc) {
2103		dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2104		retval = -EIO;
2105	}
2106
2107	rc = goya_stop_queue(hdev,
2108			mmTPC7_CMDQ_GLBL_CFG1,
2109			mmTPC7_CMDQ_CP_STS,
2110			mmTPC7_CMDQ_GLBL_STS0);
2111
2112	if (rc) {
2113		dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2114		retval = -EIO;
2115	}
2116
2117	return retval;
2118}
2119
2120static void goya_dma_stall(struct hl_device *hdev)
2121{
2122	struct goya_device *goya = hdev->asic_specific;
2123
2124	if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2125		return;
2126
2127	WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2128	WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2129	WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2130	WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2131	WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2132}
2133
2134static void goya_tpc_stall(struct hl_device *hdev)
2135{
2136	struct goya_device *goya = hdev->asic_specific;
2137
2138	if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2139		return;
2140
2141	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2142	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2143	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2144	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2145	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2146	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2147	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2148	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2149}
2150
2151static void goya_mme_stall(struct hl_device *hdev)
2152{
2153	struct goya_device *goya = hdev->asic_specific;
2154
2155	if (!(goya->hw_cap_initialized & HW_CAP_MME))
2156		return;
2157
2158	WREG32(mmMME_STALL, 0xFFFFFFFF);
2159}
2160
2161static int goya_enable_msix(struct hl_device *hdev)
2162{
2163	struct goya_device *goya = hdev->asic_specific;
2164	int cq_cnt = hdev->asic_prop.completion_queues_count;
2165	int rc, i, irq_cnt_init, irq;
2166
2167	if (goya->hw_cap_initialized & HW_CAP_MSIX)
2168		return 0;
2169
2170	rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2171				GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2172	if (rc < 0) {
2173		dev_err(hdev->dev,
2174			"MSI-X: Failed to enable support -- %d/%d\n",
2175			GOYA_MSIX_ENTRIES, rc);
2176		return rc;
2177	}
2178
2179	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2180		irq = pci_irq_vector(hdev->pdev, i);
2181		rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2182				&hdev->completion_queue[i]);
2183		if (rc) {
2184			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2185			goto free_irqs;
2186		}
2187	}
2188
2189	irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2190
2191	rc = request_irq(irq, hl_irq_handler_eq, 0,
2192			goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2193			&hdev->event_queue);
2194	if (rc) {
2195		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2196		goto free_irqs;
2197	}
2198
2199	goya->hw_cap_initialized |= HW_CAP_MSIX;
2200	return 0;
2201
2202free_irqs:
2203	for (i = 0 ; i < irq_cnt_init ; i++)
2204		free_irq(pci_irq_vector(hdev->pdev, i),
2205			&hdev->completion_queue[i]);
2206
2207	pci_free_irq_vectors(hdev->pdev);
2208	return rc;
2209}
2210
2211static void goya_sync_irqs(struct hl_device *hdev)
2212{
2213	struct goya_device *goya = hdev->asic_specific;
2214	int i;
2215
2216	if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2217		return;
2218
2219	/* Wait for all pending IRQs to be finished */
2220	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2221		synchronize_irq(pci_irq_vector(hdev->pdev, i));
2222
2223	synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2224}
2225
2226static void goya_disable_msix(struct hl_device *hdev)
2227{
2228	struct goya_device *goya = hdev->asic_specific;
2229	int i, irq;
2230
2231	if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2232		return;
2233
2234	goya_sync_irqs(hdev);
2235
2236	irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2237	free_irq(irq, &hdev->event_queue);
2238
2239	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2240		irq = pci_irq_vector(hdev->pdev, i);
2241		free_irq(irq, &hdev->completion_queue[i]);
2242	}
2243
2244	pci_free_irq_vectors(hdev->pdev);
2245
2246	goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2247}
2248
2249static void goya_enable_timestamp(struct hl_device *hdev)
2250{
2251	/* Disable the timestamp counter */
2252	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2253
2254	/* Zero the lower/upper parts of the 64-bit counter */
2255	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2256	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2257
2258	/* Enable the counter */
2259	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2260}
2261
2262static void goya_disable_timestamp(struct hl_device *hdev)
2263{
2264	/* Disable the timestamp counter */
2265	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2266}
2267
2268static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2269{
2270	u32 wait_timeout_ms;
2271
2272	dev_info(hdev->dev,
2273		"Halting compute engines and disabling interrupts\n");
2274
2275	if (hdev->pldm)
2276		wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2277	else
2278		wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2279
2280	goya_stop_external_queues(hdev);
2281	goya_stop_internal_queues(hdev);
2282
2283	msleep(wait_timeout_ms);
2284
2285	goya_dma_stall(hdev);
2286	goya_tpc_stall(hdev);
2287	goya_mme_stall(hdev);
2288
2289	msleep(wait_timeout_ms);
2290
2291	goya_disable_external_queues(hdev);
2292	goya_disable_internal_queues(hdev);
2293
2294	goya_disable_timestamp(hdev);
2295
2296	if (hard_reset) {
2297		goya_disable_msix(hdev);
2298		goya_mmu_remove_device_cpu_mappings(hdev);
2299	} else {
2300		goya_sync_irqs(hdev);
2301	}
2302}
2303
2304/*
2305 * goya_load_firmware_to_device() - Load LINUX FW code to device.
2306 * @hdev: Pointer to hl_device structure.
2307 *
2308 * Copy LINUX fw code from firmware file to HBM BAR.
2309 *
2310 * Return: 0 on success, non-zero for failure.
2311 */
2312static int goya_load_firmware_to_device(struct hl_device *hdev)
2313{
2314	void __iomem *dst;
2315
2316	dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2317
2318	return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
2319}
2320
2321/*
2322 * goya_load_boot_fit_to_device() - Load boot fit to device.
2323 * @hdev: Pointer to hl_device structure.
2324 *
2325 * Copy boot fit file to SRAM BAR.
2326 *
2327 * Return: 0 on success, non-zero for failure.
2328 */
2329static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2330{
2331	void __iomem *dst;
2332
2333	dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2334
2335	return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst);
2336}
2337
2338/*
2339 * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2340 * The version string should be located by that offset.
2341 */
2342static void goya_read_device_fw_version(struct hl_device *hdev,
2343					enum hl_fw_component fwc)
2344{
2345	const char *name;
2346	u32 ver_off;
2347	char *dest;
2348
2349	switch (fwc) {
2350	case FW_COMP_UBOOT:
2351		ver_off = RREG32(mmUBOOT_VER_OFFSET);
2352		dest = hdev->asic_prop.uboot_ver;
2353		name = "U-Boot";
2354		break;
2355	case FW_COMP_PREBOOT:
2356		ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2357		dest = hdev->asic_prop.preboot_ver;
2358		name = "Preboot";
2359		break;
2360	default:
2361		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2362		return;
2363	}
2364
2365	ver_off &= ~((u32)SRAM_BASE_ADDR);
2366
2367	if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2368		memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2369							VERSION_MAX_LEN);
2370	} else {
2371		dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2372								name, ver_off);
2373		strcpy(dest, "unavailable");
2374	}
2375}
2376
2377static int goya_init_cpu(struct hl_device *hdev)
2378{
2379	struct goya_device *goya = hdev->asic_specific;
2380	int rc;
2381
2382	if (!hdev->cpu_enable)
2383		return 0;
2384
2385	if (goya->hw_cap_initialized & HW_CAP_CPU)
2386		return 0;
2387
2388	/*
2389	 * Before pushing u-boot/linux to device, need to set the ddr bar to
2390	 * base address of dram
2391	 */
2392	if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2393		dev_err(hdev->dev,
2394			"failed to map DDR bar to DRAM base address\n");
2395		return -EIO;
2396	}
2397
2398	rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2399			mmPSOC_GLOBAL_CONF_UBOOT_MAGIC,
2400			mmCPU_CMD_STATUS_TO_HOST, mmCPU_BOOT_ERR0,
2401			false, GOYA_CPU_TIMEOUT_USEC,
2402			GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
2403
2404	if (rc)
2405		return rc;
2406
2407	goya->hw_cap_initialized |= HW_CAP_CPU;
2408
2409	return 0;
2410}
2411
2412static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2413						u64 phys_addr)
2414{
2415	u32 status, timeout_usec;
2416	int rc;
2417
2418	if (hdev->pldm)
2419		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2420	else
2421		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2422
2423	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2424	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2425	WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2426
2427	rc = hl_poll_timeout(
2428		hdev,
2429		MMU_ASID_BUSY,
2430		status,
2431		!(status & 0x80000000),
2432		1000,
2433		timeout_usec);
2434
2435	if (rc) {
2436		dev_err(hdev->dev,
2437			"Timeout during MMU hop0 config of asid %d\n", asid);
2438		return rc;
2439	}
2440
2441	return 0;
2442}
2443
2444int goya_mmu_init(struct hl_device *hdev)
2445{
2446	struct asic_fixed_properties *prop = &hdev->asic_prop;
2447	struct goya_device *goya = hdev->asic_specific;
2448	u64 hop0_addr;
2449	int rc, i;
2450
2451	if (!hdev->mmu_enable)
2452		return 0;
2453
2454	if (goya->hw_cap_initialized & HW_CAP_MMU)
2455		return 0;
2456
2457	hdev->dram_supports_virtual_memory = true;
2458	hdev->dram_default_page_mapping = true;
2459
2460	for (i = 0 ; i < prop->max_asid ; i++) {
2461		hop0_addr = prop->mmu_pgt_addr +
2462				(i * prop->mmu_hop_table_size);
2463
2464		rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2465		if (rc) {
2466			dev_err(hdev->dev,
2467				"failed to set hop0 addr for asid %d\n", i);
2468			goto err;
2469		}
2470	}
2471
2472	goya->hw_cap_initialized |= HW_CAP_MMU;
2473
2474	/* init MMU cache manage page */
2475	WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2476				lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2477	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2478
2479	/* Remove follower feature due to performance bug */
2480	WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2481			(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2482
2483	hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2484					VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2485
2486	WREG32(mmMMU_MMU_ENABLE, 1);
2487	WREG32(mmMMU_SPI_MASK, 0xF);
2488
2489	return 0;
2490
2491err:
2492	return rc;
2493}
2494
2495/*
2496 * goya_hw_init - Goya hardware initialization code
2497 *
2498 * @hdev: pointer to hl_device structure
2499 *
2500 * Returns 0 on success
2501 *
2502 */
2503static int goya_hw_init(struct hl_device *hdev)
2504{
2505	struct asic_fixed_properties *prop = &hdev->asic_prop;
2506	int rc;
2507
2508	dev_info(hdev->dev, "Starting initialization of H/W\n");
2509
2510	/* Perform read from the device to make sure device is up */
2511	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2512
2513	/*
2514	 * Let's mark in the H/W that we have reached this point. We check
2515	 * this value in the reset_before_init function to understand whether
2516	 * we need to reset the chip before doing H/W init. This register is
2517	 * cleared by the H/W upon H/W reset
2518	 */
2519	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2520
2521	rc = goya_init_cpu(hdev);
2522	if (rc) {
2523		dev_err(hdev->dev, "failed to initialize CPU\n");
2524		return rc;
2525	}
2526
2527	goya_tpc_mbist_workaround(hdev);
2528
2529	goya_init_golden_registers(hdev);
2530
2531	/*
2532	 * After CPU initialization is finished, change DDR bar mapping inside
2533	 * iATU to point to the start address of the MMU page tables
2534	 */
2535	if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2536			~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2537		dev_err(hdev->dev,
2538			"failed to map DDR bar to MMU page tables\n");
2539		return -EIO;
2540	}
2541
2542	rc = goya_mmu_init(hdev);
2543	if (rc)
2544		return rc;
2545
2546	goya_init_security(hdev);
2547
2548	goya_init_dma_qmans(hdev);
2549
2550	goya_init_mme_qmans(hdev);
2551
2552	goya_init_tpc_qmans(hdev);
2553
2554	goya_enable_timestamp(hdev);
2555
2556	/* MSI-X must be enabled before CPU queues are initialized */
2557	rc = goya_enable_msix(hdev);
2558	if (rc)
2559		goto disable_queues;
2560
2561	/* Perform read from the device to flush all MSI-X configuration */
2562	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2563
2564	return 0;
2565
2566disable_queues:
2567	goya_disable_internal_queues(hdev);
2568	goya_disable_external_queues(hdev);
2569
2570	return rc;
2571}
2572
2573/*
2574 * goya_hw_fini - Goya hardware tear-down code
2575 *
2576 * @hdev: pointer to hl_device structure
2577 * @hard_reset: should we do hard reset to all engines or just reset the
2578 *              compute/dma engines
2579 */
2580static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2581{
2582	struct goya_device *goya = hdev->asic_specific;
2583	u32 reset_timeout_ms, cpu_timeout_ms, status;
2584
2585	if (hdev->pldm) {
2586		reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2587		cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2588	} else {
2589		reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2590		cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2591	}
2592
2593	if (hard_reset) {
2594		/* I don't know what is the state of the CPU so make sure it is
2595		 * stopped in any means necessary
2596		 */
2597		WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2598		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2599			GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2600
2601		msleep(cpu_timeout_ms);
2602
2603		goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2604		goya_disable_clk_rlx(hdev);
2605		goya_set_pll_refclk(hdev);
2606
2607		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2608		dev_info(hdev->dev,
2609			"Issued HARD reset command, going to wait %dms\n",
2610			reset_timeout_ms);
2611	} else {
2612		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2613		dev_info(hdev->dev,
2614			"Issued SOFT reset command, going to wait %dms\n",
2615			reset_timeout_ms);
2616	}
2617
2618	/*
2619	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
2620	 * itself is in reset. In either reset we need to wait until the reset
2621	 * is deasserted
2622	 */
2623	msleep(reset_timeout_ms);
2624
2625	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2626	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2627		dev_err(hdev->dev,
2628			"Timeout while waiting for device to reset 0x%x\n",
2629			status);
2630
2631	if (!hard_reset) {
2632		goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2633						HW_CAP_GOLDEN | HW_CAP_TPC);
2634		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2635				GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2636		return;
2637	}
2638
2639	/* Chicken bit to re-initiate boot sequencer flow */
2640	WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2641		1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2642	/* Move boot manager FSM to pre boot sequencer init state */
2643	WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2644			0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2645
2646	goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2647					HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2648					HW_CAP_DMA | HW_CAP_MME |
2649					HW_CAP_MMU | HW_CAP_TPC_MBIST |
2650					HW_CAP_GOLDEN | HW_CAP_TPC);
2651	memset(goya->events_stat, 0, sizeof(goya->events_stat));
2652}
2653
2654int goya_suspend(struct hl_device *hdev)
2655{
2656	int rc;
2657
2658	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
2659	if (rc)
2660		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2661
2662	return rc;
2663}
2664
2665int goya_resume(struct hl_device *hdev)
2666{
2667	return goya_init_iatu(hdev);
2668}
2669
2670static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2671			void *cpu_addr, dma_addr_t dma_addr, size_t size)
2672{
2673	int rc;
2674
2675	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2676			VM_DONTCOPY | VM_NORESERVE;
2677
2678	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
2679				(dma_addr - HOST_PHYS_BASE), size);
2680	if (rc)
2681		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
2682
2683	return rc;
2684}
2685
2686void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2687{
2688	u32 db_reg_offset, db_value;
2689
2690	switch (hw_queue_id) {
2691	case GOYA_QUEUE_ID_DMA_0:
2692		db_reg_offset = mmDMA_QM_0_PQ_PI;
2693		break;
2694
2695	case GOYA_QUEUE_ID_DMA_1:
2696		db_reg_offset = mmDMA_QM_1_PQ_PI;
2697		break;
2698
2699	case GOYA_QUEUE_ID_DMA_2:
2700		db_reg_offset = mmDMA_QM_2_PQ_PI;
2701		break;
2702
2703	case GOYA_QUEUE_ID_DMA_3:
2704		db_reg_offset = mmDMA_QM_3_PQ_PI;
2705		break;
2706
2707	case GOYA_QUEUE_ID_DMA_4:
2708		db_reg_offset = mmDMA_QM_4_PQ_PI;
2709		break;
2710
2711	case GOYA_QUEUE_ID_CPU_PQ:
2712		db_reg_offset = mmCPU_IF_PF_PQ_PI;
2713		break;
2714
2715	case GOYA_QUEUE_ID_MME:
2716		db_reg_offset = mmMME_QM_PQ_PI;
2717		break;
2718
2719	case GOYA_QUEUE_ID_TPC0:
2720		db_reg_offset = mmTPC0_QM_PQ_PI;
2721		break;
2722
2723	case GOYA_QUEUE_ID_TPC1:
2724		db_reg_offset = mmTPC1_QM_PQ_PI;
2725		break;
2726
2727	case GOYA_QUEUE_ID_TPC2:
2728		db_reg_offset = mmTPC2_QM_PQ_PI;
2729		break;
2730
2731	case GOYA_QUEUE_ID_TPC3:
2732		db_reg_offset = mmTPC3_QM_PQ_PI;
2733		break;
2734
2735	case GOYA_QUEUE_ID_TPC4:
2736		db_reg_offset = mmTPC4_QM_PQ_PI;
2737		break;
2738
2739	case GOYA_QUEUE_ID_TPC5:
2740		db_reg_offset = mmTPC5_QM_PQ_PI;
2741		break;
2742
2743	case GOYA_QUEUE_ID_TPC6:
2744		db_reg_offset = mmTPC6_QM_PQ_PI;
2745		break;
2746
2747	case GOYA_QUEUE_ID_TPC7:
2748		db_reg_offset = mmTPC7_QM_PQ_PI;
2749		break;
2750
2751	default:
2752		/* Should never get here */
2753		dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2754			hw_queue_id);
2755		return;
2756	}
2757
2758	db_value = pi;
2759
2760	/* ring the doorbell */
2761	WREG32(db_reg_offset, db_value);
2762
2763	if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
2764		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2765				GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2766}
2767
2768void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2769{
2770	/* The QMANs are on the SRAM so need to copy to IO space */
2771	memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2772}
2773
2774static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2775					dma_addr_t *dma_handle, gfp_t flags)
2776{
2777	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2778						dma_handle, flags);
2779
2780	/* Shift to the device's base physical address of host memory */
2781	if (kernel_addr)
2782		*dma_handle += HOST_PHYS_BASE;
2783
2784	return kernel_addr;
2785}
2786
2787static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2788					void *cpu_addr, dma_addr_t dma_handle)
2789{
2790	/* Cancel the device's base physical address of host memory */
2791	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2792
2793	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2794}
2795
2796void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2797				dma_addr_t *dma_handle,	u16 *queue_len)
2798{
2799	void *base;
2800	u32 offset;
2801
2802	*dma_handle = hdev->asic_prop.sram_base_address;
2803
2804	base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
2805
2806	switch (queue_id) {
2807	case GOYA_QUEUE_ID_MME:
2808		offset = MME_QMAN_BASE_OFFSET;
2809		*queue_len = MME_QMAN_LENGTH;
2810		break;
2811	case GOYA_QUEUE_ID_TPC0:
2812		offset = TPC0_QMAN_BASE_OFFSET;
2813		*queue_len = TPC_QMAN_LENGTH;
2814		break;
2815	case GOYA_QUEUE_ID_TPC1:
2816		offset = TPC1_QMAN_BASE_OFFSET;
2817		*queue_len = TPC_QMAN_LENGTH;
2818		break;
2819	case GOYA_QUEUE_ID_TPC2:
2820		offset = TPC2_QMAN_BASE_OFFSET;
2821		*queue_len = TPC_QMAN_LENGTH;
2822		break;
2823	case GOYA_QUEUE_ID_TPC3:
2824		offset = TPC3_QMAN_BASE_OFFSET;
2825		*queue_len = TPC_QMAN_LENGTH;
2826		break;
2827	case GOYA_QUEUE_ID_TPC4:
2828		offset = TPC4_QMAN_BASE_OFFSET;
2829		*queue_len = TPC_QMAN_LENGTH;
2830		break;
2831	case GOYA_QUEUE_ID_TPC5:
2832		offset = TPC5_QMAN_BASE_OFFSET;
2833		*queue_len = TPC_QMAN_LENGTH;
2834		break;
2835	case GOYA_QUEUE_ID_TPC6:
2836		offset = TPC6_QMAN_BASE_OFFSET;
2837		*queue_len = TPC_QMAN_LENGTH;
2838		break;
2839	case GOYA_QUEUE_ID_TPC7:
2840		offset = TPC7_QMAN_BASE_OFFSET;
2841		*queue_len = TPC_QMAN_LENGTH;
2842		break;
2843	default:
2844		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2845		return NULL;
2846	}
2847
2848	base += offset;
2849	*dma_handle += offset;
2850
2851	return base;
2852}
2853
2854static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
2855{
2856	struct packet_msg_prot *fence_pkt;
2857	u32 *fence_ptr;
2858	dma_addr_t fence_dma_addr;
2859	struct hl_cb *cb;
2860	u32 tmp, timeout;
2861	int rc;
2862
2863	if (hdev->pldm)
2864		timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2865	else
2866		timeout = HL_DEVICE_TIMEOUT_USEC;
2867
2868	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
2869		dev_err_ratelimited(hdev->dev,
2870			"Can't send driver job on QMAN0 because the device is not idle\n");
2871		return -EBUSY;
2872	}
2873
2874	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2875							&fence_dma_addr);
2876	if (!fence_ptr) {
2877		dev_err(hdev->dev,
2878			"Failed to allocate fence memory for QMAN0\n");
2879		return -ENOMEM;
2880	}
2881
2882	goya_qman0_set_security(hdev, true);
2883
2884	cb = job->patched_cb;
2885
2886	fence_pkt = cb->kernel_address +
2887			job->job_cb_size - sizeof(struct packet_msg_prot);
2888
2889	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2890			(1 << GOYA_PKT_CTL_EB_SHIFT) |
2891			(1 << GOYA_PKT_CTL_MB_SHIFT);
2892	fence_pkt->ctl = cpu_to_le32(tmp);
2893	fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
2894	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2895
2896	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2897					job->job_cb_size, cb->bus_address);
2898	if (rc) {
2899		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2900		goto free_fence_ptr;
2901	}
2902
2903	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2904				(tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2905				timeout, true);
2906
2907	hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2908
2909	if (rc == -ETIMEDOUT) {
2910		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2911		goto free_fence_ptr;
2912	}
2913
2914free_fence_ptr:
2915	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2916					fence_dma_addr);
2917
2918	goya_qman0_set_security(hdev, false);
2919
2920	return rc;
2921}
2922
2923int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
2924				u32 timeout, long *result)
2925{
2926	struct goya_device *goya = hdev->asic_specific;
2927
2928	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
2929		if (result)
2930			*result = 0;
2931		return 0;
2932	}
2933
2934	if (!timeout)
2935		timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
2936
2937	return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
2938					timeout, result);
2939}
2940
2941int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
2942{
2943	struct packet_msg_prot *fence_pkt;
2944	dma_addr_t pkt_dma_addr;
2945	u32 fence_val, tmp;
2946	dma_addr_t fence_dma_addr;
2947	u32 *fence_ptr;
2948	int rc;
2949
2950	fence_val = GOYA_QMAN0_FENCE_VAL;
2951
2952	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2953							&fence_dma_addr);
2954	if (!fence_ptr) {
2955		dev_err(hdev->dev,
2956			"Failed to allocate memory for H/W queue %d testing\n",
2957			hw_queue_id);
2958		return -ENOMEM;
2959	}
2960
2961	*fence_ptr = 0;
2962
2963	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
2964					sizeof(struct packet_msg_prot),
2965					GFP_KERNEL, &pkt_dma_addr);
2966	if (!fence_pkt) {
2967		dev_err(hdev->dev,
2968			"Failed to allocate packet for H/W queue %d testing\n",
2969			hw_queue_id);
2970		rc = -ENOMEM;
2971		goto free_fence_ptr;
2972	}
2973
2974	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2975			(1 << GOYA_PKT_CTL_EB_SHIFT) |
2976			(1 << GOYA_PKT_CTL_MB_SHIFT);
2977	fence_pkt->ctl = cpu_to_le32(tmp);
2978	fence_pkt->value = cpu_to_le32(fence_val);
2979	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2980
2981	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
2982					sizeof(struct packet_msg_prot),
2983					pkt_dma_addr);
2984	if (rc) {
2985		dev_err(hdev->dev,
2986			"Failed to send fence packet to H/W queue %d\n",
2987			hw_queue_id);
2988		goto free_pkt;
2989	}
2990
2991	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
2992					1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
2993
2994	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
2995
2996	if (rc == -ETIMEDOUT) {
2997		dev_err(hdev->dev,
2998			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
2999			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3000		rc = -EIO;
3001	}
3002
3003free_pkt:
3004	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3005					pkt_dma_addr);
3006free_fence_ptr:
3007	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3008					fence_dma_addr);
3009	return rc;
3010}
3011
3012int goya_test_cpu_queue(struct hl_device *hdev)
3013{
3014	struct goya_device *goya = hdev->asic_specific;
3015
3016	/*
3017	 * check capability here as send_cpu_message() won't update the result
3018	 * value if no capability
3019	 */
3020	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3021		return 0;
3022
3023	return hl_fw_test_cpu_queue(hdev);
3024}
3025
3026int goya_test_queues(struct hl_device *hdev)
3027{
3028	int i, rc, ret_val = 0;
3029
3030	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3031		rc = goya_test_queue(hdev, i);
3032		if (rc)
3033			ret_val = -EINVAL;
3034	}
3035
3036	return ret_val;
3037}
3038
3039static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3040					gfp_t mem_flags, dma_addr_t *dma_handle)
3041{
3042	void *kernel_addr;
3043
3044	if (size > GOYA_DMA_POOL_BLK_SIZE)
3045		return NULL;
3046
3047	kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3048
3049	/* Shift to the device's base physical address of host memory */
3050	if (kernel_addr)
3051		*dma_handle += HOST_PHYS_BASE;
3052
3053	return kernel_addr;
3054}
3055
3056static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3057				dma_addr_t dma_addr)
3058{
3059	/* Cancel the device's base physical address of host memory */
3060	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3061
3062	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3063}
3064
3065void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3066					dma_addr_t *dma_handle)
3067{
3068	void *vaddr;
3069
3070	vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3071	*dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3072			VA_CPU_ACCESSIBLE_MEM_ADDR;
3073
3074	return vaddr;
3075}
3076
3077void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3078					void *vaddr)
3079{
3080	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3081}
3082
3083static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3084				int nents, enum dma_data_direction dir)
3085{
3086	struct scatterlist *sg;
3087	int i;
3088
3089	if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3090		return -ENOMEM;
3091
3092	/* Shift to the device's base physical address of host memory */
3093	for_each_sg(sgl, sg, nents, i)
3094		sg->dma_address += HOST_PHYS_BASE;
3095
3096	return 0;
3097}
3098
3099static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3100				int nents, enum dma_data_direction dir)
3101{
3102	struct scatterlist *sg;
3103	int i;
3104
3105	/* Cancel the device's base physical address of host memory */
3106	for_each_sg(sgl, sg, nents, i)
3107		sg->dma_address -= HOST_PHYS_BASE;
3108
3109	dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3110}
3111
3112u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3113{
3114	struct scatterlist *sg, *sg_next_iter;
3115	u32 count, dma_desc_cnt;
3116	u64 len, len_next;
3117	dma_addr_t addr, addr_next;
3118
3119	dma_desc_cnt = 0;
3120
3121	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3122
3123		len = sg_dma_len(sg);
3124		addr = sg_dma_address(sg);
3125
3126		if (len == 0)
3127			break;
3128
3129		while ((count + 1) < sgt->nents) {
3130			sg_next_iter = sg_next(sg);
3131			len_next = sg_dma_len(sg_next_iter);
3132			addr_next = sg_dma_address(sg_next_iter);
3133
3134			if (len_next == 0)
3135				break;
3136
3137			if ((addr + len == addr_next) &&
3138				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3139				len += len_next;
3140				count++;
3141				sg = sg_next_iter;
3142			} else {
3143				break;
3144			}
3145		}
3146
3147		dma_desc_cnt++;
3148	}
3149
3150	return dma_desc_cnt * sizeof(struct packet_lin_dma);
3151}
3152
3153static int goya_pin_memory_before_cs(struct hl_device *hdev,
3154				struct hl_cs_parser *parser,
3155				struct packet_lin_dma *user_dma_pkt,
3156				u64 addr, enum dma_data_direction dir)
3157{
3158	struct hl_userptr *userptr;
3159	int rc;
3160
3161	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3162			parser->job_userptr_list, &userptr))
3163		goto already_pinned;
3164
3165	userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3166	if (!userptr)
3167		return -ENOMEM;
3168
3169	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3170				userptr);
3171	if (rc)
3172		goto free_userptr;
3173
3174	list_add_tail(&userptr->job_node, parser->job_userptr_list);
3175
3176	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3177					userptr->sgt->nents, dir);
3178	if (rc) {
3179		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3180		goto unpin_memory;
3181	}
3182
3183	userptr->dma_mapped = true;
3184	userptr->dir = dir;
3185
3186already_pinned:
3187	parser->patched_cb_size +=
3188			goya_get_dma_desc_list_size(hdev, userptr->sgt);
3189
3190	return 0;
3191
3192unpin_memory:
3193	list_del(&userptr->job_node);
3194	hl_unpin_host_memory(hdev, userptr);
3195free_userptr:
3196	kfree(userptr);
3197	return rc;
3198}
3199
3200static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3201				struct hl_cs_parser *parser,
3202				struct packet_lin_dma *user_dma_pkt)
3203{
3204	u64 device_memory_addr, addr;
3205	enum dma_data_direction dir;
3206	enum goya_dma_direction user_dir;
3207	bool sram_addr = true;
3208	bool skip_host_mem_pin = false;
3209	bool user_memset;
3210	u32 ctl;
3211	int rc = 0;
3212
3213	ctl = le32_to_cpu(user_dma_pkt->ctl);
3214
3215	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3216			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3217
3218	user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3219			GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3220
3221	switch (user_dir) {
3222	case DMA_HOST_TO_DRAM:
3223		dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3224		dir = DMA_TO_DEVICE;
3225		sram_addr = false;
3226		addr = le64_to_cpu(user_dma_pkt->src_addr);
3227		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3228		if (user_memset)
3229			skip_host_mem_pin = true;
3230		break;
3231
3232	case DMA_DRAM_TO_HOST:
3233		dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3234		dir = DMA_FROM_DEVICE;
3235		sram_addr = false;
3236		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3237		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3238		break;
3239
3240	case DMA_HOST_TO_SRAM:
3241		dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3242		dir = DMA_TO_DEVICE;
3243		addr = le64_to_cpu(user_dma_pkt->src_addr);
3244		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3245		if (user_memset)
3246			skip_host_mem_pin = true;
3247		break;
3248
3249	case DMA_SRAM_TO_HOST:
3250		dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3251		dir = DMA_FROM_DEVICE;
3252		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3253		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3254		break;
3255	default:
3256		dev_err(hdev->dev, "DMA direction is undefined\n");
3257		return -EFAULT;
3258	}
3259
3260	if (sram_addr) {
3261		if (!hl_mem_area_inside_range(device_memory_addr,
3262				le32_to_cpu(user_dma_pkt->tsize),
3263				hdev->asic_prop.sram_user_base_address,
3264				hdev->asic_prop.sram_end_address)) {
3265
3266			dev_err(hdev->dev,
3267				"SRAM address 0x%llx + 0x%x is invalid\n",
3268				device_memory_addr,
3269				user_dma_pkt->tsize);
3270			return -EFAULT;
3271		}
3272	} else {
3273		if (!hl_mem_area_inside_range(device_memory_addr,
3274				le32_to_cpu(user_dma_pkt->tsize),
3275				hdev->asic_prop.dram_user_base_address,
3276				hdev->asic_prop.dram_end_address)) {
3277
3278			dev_err(hdev->dev,
3279				"DRAM address 0x%llx + 0x%x is invalid\n",
3280				device_memory_addr,
3281				user_dma_pkt->tsize);
3282			return -EFAULT;
3283		}
3284	}
3285
3286	if (skip_host_mem_pin)
3287		parser->patched_cb_size += sizeof(*user_dma_pkt);
3288	else {
3289		if ((dir == DMA_TO_DEVICE) &&
3290				(parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3291			dev_err(hdev->dev,
3292				"Can't DMA from host on queue other then 1\n");
3293			return -EFAULT;
3294		}
3295
3296		rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3297						addr, dir);
3298	}
3299
3300	return rc;
3301}
3302
3303static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3304				struct hl_cs_parser *parser,
3305				struct packet_lin_dma *user_dma_pkt)
3306{
3307	u64 sram_memory_addr, dram_memory_addr;
3308	enum goya_dma_direction user_dir;
3309	u32 ctl;
3310
3311	ctl = le32_to_cpu(user_dma_pkt->ctl);
3312	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3313			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3314
3315	if (user_dir == DMA_DRAM_TO_SRAM) {
3316		dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3317		dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3318		sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3319	} else {
3320		dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3321		sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3322		dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3323	}
3324
3325	if (!hl_mem_area_inside_range(sram_memory_addr,
3326				le32_to_cpu(user_dma_pkt->tsize),
3327				hdev->asic_prop.sram_user_base_address,
3328				hdev->asic_prop.sram_end_address)) {
3329		dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3330			sram_memory_addr, user_dma_pkt->tsize);
3331		return -EFAULT;
3332	}
3333
3334	if (!hl_mem_area_inside_range(dram_memory_addr,
3335				le32_to_cpu(user_dma_pkt->tsize),
3336				hdev->asic_prop.dram_user_base_address,
3337				hdev->asic_prop.dram_end_address)) {
3338		dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3339			dram_memory_addr, user_dma_pkt->tsize);
3340		return -EFAULT;
3341	}
3342
3343	parser->patched_cb_size += sizeof(*user_dma_pkt);
3344
3345	return 0;
3346}
3347
3348static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3349				struct hl_cs_parser *parser,
3350				struct packet_lin_dma *user_dma_pkt)
3351{
3352	enum goya_dma_direction user_dir;
3353	u32 ctl;
3354	int rc;
3355
3356	dev_dbg(hdev->dev, "DMA packet details:\n");
3357	dev_dbg(hdev->dev, "source == 0x%llx\n",
3358		le64_to_cpu(user_dma_pkt->src_addr));
3359	dev_dbg(hdev->dev, "destination == 0x%llx\n",
3360		le64_to_cpu(user_dma_pkt->dst_addr));
3361	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3362
3363	ctl = le32_to_cpu(user_dma_pkt->ctl);
3364	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3365			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3366
3367	/*
3368	 * Special handling for DMA with size 0. The H/W has a bug where
3369	 * this can cause the QMAN DMA to get stuck, so block it here.
3370	 */
3371	if (user_dma_pkt->tsize == 0) {
3372		dev_err(hdev->dev,
3373			"Got DMA with size 0, might reset the device\n");
3374		return -EINVAL;
3375	}
3376
3377	if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3378		rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3379	else
3380		rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3381
3382	return rc;
3383}
3384
3385static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3386				struct hl_cs_parser *parser,
3387				struct packet_lin_dma *user_dma_pkt)
3388{
3389	dev_dbg(hdev->dev, "DMA packet details:\n");
3390	dev_dbg(hdev->dev, "source == 0x%llx\n",
3391		le64_to_cpu(user_dma_pkt->src_addr));
3392	dev_dbg(hdev->dev, "destination == 0x%llx\n",
3393		le64_to_cpu(user_dma_pkt->dst_addr));
3394	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3395
3396	/*
3397	 * WA for HW-23.
3398	 * We can't allow user to read from Host using QMANs other than 1.
3399	 * PMMU and HPMMU addresses are equal, check only one of them.
3400	 */
3401	if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3402		hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3403				le32_to_cpu(user_dma_pkt->tsize),
3404				hdev->asic_prop.pmmu.start_addr,
3405				hdev->asic_prop.pmmu.end_addr)) {
3406		dev_err(hdev->dev,
3407			"Can't DMA from host on queue other then 1\n");
3408		return -EFAULT;
3409	}
3410
3411	if (user_dma_pkt->tsize == 0) {
3412		dev_err(hdev->dev,
3413			"Got DMA with size 0, might reset the device\n");
3414		return -EINVAL;
3415	}
3416
3417	parser->patched_cb_size += sizeof(*user_dma_pkt);
3418
3419	return 0;
3420}
3421
3422static int goya_validate_wreg32(struct hl_device *hdev,
3423				struct hl_cs_parser *parser,
3424				struct packet_wreg32 *wreg_pkt)
3425{
3426	struct goya_device *goya = hdev->asic_specific;
3427	u32 sob_start_addr, sob_end_addr;
3428	u16 reg_offset;
3429
3430	reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3431			GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3432
3433	dev_dbg(hdev->dev, "WREG32 packet details:\n");
3434	dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3435	dev_dbg(hdev->dev, "value      == 0x%x\n",
3436		le32_to_cpu(wreg_pkt->value));
3437
3438	if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3439		dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3440			reg_offset);
3441		return -EPERM;
3442	}
3443
3444	/*
3445	 * With MMU, DMA channels are not secured, so it doesn't matter where
3446	 * the WR COMP will be written to because it will go out with
3447	 * non-secured property
3448	 */
3449	if (goya->hw_cap_initialized & HW_CAP_MMU)
3450		return 0;
3451
3452	sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3453	sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3454
3455	if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3456			(le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3457
3458		dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3459			wreg_pkt->value);
3460		return -EPERM;
3461	}
3462
3463	return 0;
3464}
3465
3466static int goya_validate_cb(struct hl_device *hdev,
3467			struct hl_cs_parser *parser, bool is_mmu)
3468{
3469	u32 cb_parsed_length = 0;
3470	int rc = 0;
3471
3472	parser->patched_cb_size = 0;
3473
3474	/* cb_user_size is more than 0 so loop will always be executed */
3475	while (cb_parsed_length < parser->user_cb_size) {
3476		enum packet_id pkt_id;
3477		u16 pkt_size;
3478		struct goya_packet *user_pkt;
3479
3480		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3481
3482		pkt_id = (enum packet_id) (
3483				(le64_to_cpu(user_pkt->header) &
3484				PACKET_HEADER_PACKET_ID_MASK) >>
3485					PACKET_HEADER_PACKET_ID_SHIFT);
3486
3487		if (!validate_packet_id(pkt_id)) {
3488			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3489			rc = -EINVAL;
3490			break;
3491		}
3492
3493		pkt_size = goya_packet_sizes[pkt_id];
3494		cb_parsed_length += pkt_size;
3495		if (cb_parsed_length > parser->user_cb_size) {
3496			dev_err(hdev->dev,
3497				"packet 0x%x is out of CB boundary\n", pkt_id);
3498			rc = -EINVAL;
3499			break;
3500		}
3501
3502		switch (pkt_id) {
3503		case PACKET_WREG_32:
3504			/*
3505			 * Although it is validated after copy in patch_cb(),
3506			 * need to validate here as well because patch_cb() is
3507			 * not called in MMU path while this function is called
3508			 */
3509			rc = goya_validate_wreg32(hdev,
3510				parser, (struct packet_wreg32 *) user_pkt);
3511			parser->patched_cb_size += pkt_size;
3512			break;
3513
3514		case PACKET_WREG_BULK:
3515			dev_err(hdev->dev,
3516				"User not allowed to use WREG_BULK\n");
3517			rc = -EPERM;
3518			break;
3519
3520		case PACKET_MSG_PROT:
3521			dev_err(hdev->dev,
3522				"User not allowed to use MSG_PROT\n");
3523			rc = -EPERM;
3524			break;
3525
3526		case PACKET_CP_DMA:
3527			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3528			rc = -EPERM;
3529			break;
3530
3531		case PACKET_STOP:
3532			dev_err(hdev->dev, "User not allowed to use STOP\n");
3533			rc = -EPERM;
3534			break;
3535
3536		case PACKET_LIN_DMA:
3537			if (is_mmu)
3538				rc = goya_validate_dma_pkt_mmu(hdev, parser,
3539					(struct packet_lin_dma *) user_pkt);
3540			else
3541				rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3542					(struct packet_lin_dma *) user_pkt);
3543			break;
3544
3545		case PACKET_MSG_LONG:
3546		case PACKET_MSG_SHORT:
3547		case PACKET_FENCE:
3548		case PACKET_NOP:
3549			parser->patched_cb_size += pkt_size;
3550			break;
3551
3552		default:
3553			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3554				pkt_id);
3555			rc = -EINVAL;
3556			break;
3557		}
3558
3559		if (rc)
3560			break;
3561	}
3562
3563	/*
3564	 * The new CB should have space at the end for two MSG_PROT packets:
3565	 * 1. A packet that will act as a completion packet
3566	 * 2. A packet that will generate MSI-X interrupt
3567	 */
3568	parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3569
3570	return rc;
3571}
3572
3573static int goya_patch_dma_packet(struct hl_device *hdev,
3574				struct hl_cs_parser *parser,
3575				struct packet_lin_dma *user_dma_pkt,
3576				struct packet_lin_dma *new_dma_pkt,
3577				u32 *new_dma_pkt_size)
3578{
3579	struct hl_userptr *userptr;
3580	struct scatterlist *sg, *sg_next_iter;
3581	u32 count, dma_desc_cnt;
3582	u64 len, len_next;
3583	dma_addr_t dma_addr, dma_addr_next;
3584	enum goya_dma_direction user_dir;
3585	u64 device_memory_addr, addr;
3586	enum dma_data_direction dir;
3587	struct sg_table *sgt;
3588	bool skip_host_mem_pin = false;
3589	bool user_memset;
3590	u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3591
3592	ctl = le32_to_cpu(user_dma_pkt->ctl);
3593
3594	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3595			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3596
3597	user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3598			GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3599
3600	if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3601			(user_dma_pkt->tsize == 0)) {
3602		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3603		*new_dma_pkt_size = sizeof(*new_dma_pkt);
3604		return 0;
3605	}
3606
3607	if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3608		addr = le64_to_cpu(user_dma_pkt->src_addr);
3609		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3610		dir = DMA_TO_DEVICE;
3611		if (user_memset)
3612			skip_host_mem_pin = true;
3613	} else {
3614		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3615		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3616		dir = DMA_FROM_DEVICE;
3617	}
3618
3619	if ((!skip_host_mem_pin) &&
3620		(hl_userptr_is_pinned(hdev, addr,
3621			le32_to_cpu(user_dma_pkt->tsize),
3622			parser->job_userptr_list, &userptr) == false)) {
3623		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3624				addr, user_dma_pkt->tsize);
3625		return -EFAULT;
3626	}
3627
3628	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3629		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3630		*new_dma_pkt_size = sizeof(*user_dma_pkt);
3631		return 0;
3632	}
3633
3634	user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3635
3636	user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3637
3638	sgt = userptr->sgt;
3639	dma_desc_cnt = 0;
3640
3641	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3642		len = sg_dma_len(sg);
3643		dma_addr = sg_dma_address(sg);
3644
3645		if (len == 0)
3646			break;
3647
3648		while ((count + 1) < sgt->nents) {
3649			sg_next_iter = sg_next(sg);
3650			len_next = sg_dma_len(sg_next_iter);
3651			dma_addr_next = sg_dma_address(sg_next_iter);
3652
3653			if (len_next == 0)
3654				break;
3655
3656			if ((dma_addr + len == dma_addr_next) &&
3657				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3658				len += len_next;
3659				count++;
3660				sg = sg_next_iter;
3661			} else {
3662				break;
3663			}
3664		}
3665
3666		ctl = le32_to_cpu(user_dma_pkt->ctl);
3667		if (likely(dma_desc_cnt))
3668			ctl &= ~GOYA_PKT_CTL_EB_MASK;
3669		ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3670				GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3671		new_dma_pkt->ctl = cpu_to_le32(ctl);
3672		new_dma_pkt->tsize = cpu_to_le32((u32) len);
3673
3674		if (dir == DMA_TO_DEVICE) {
3675			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3676			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3677		} else {
3678			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3679			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3680		}
3681
3682		if (!user_memset)
3683			device_memory_addr += len;
3684		dma_desc_cnt++;
3685		new_dma_pkt++;
3686	}
3687
3688	if (!dma_desc_cnt) {
3689		dev_err(hdev->dev,
3690			"Error of 0 SG entries when patching DMA packet\n");
3691		return -EFAULT;
3692	}
3693
3694	/* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3695	new_dma_pkt--;
3696	new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3697
3698	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3699
3700	return 0;
3701}
3702
3703static int goya_patch_cb(struct hl_device *hdev,
3704				struct hl_cs_parser *parser)
3705{
3706	u32 cb_parsed_length = 0;
3707	u32 cb_patched_cur_length = 0;
3708	int rc = 0;
3709
3710	/* cb_user_size is more than 0 so loop will always be executed */
3711	while (cb_parsed_length < parser->user_cb_size) {
3712		enum packet_id pkt_id;
3713		u16 pkt_size;
3714		u32 new_pkt_size = 0;
3715		struct goya_packet *user_pkt, *kernel_pkt;
3716
3717		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3718		kernel_pkt = parser->patched_cb->kernel_address +
3719					cb_patched_cur_length;
3720
3721		pkt_id = (enum packet_id) (
3722				(le64_to_cpu(user_pkt->header) &
3723				PACKET_HEADER_PACKET_ID_MASK) >>
3724					PACKET_HEADER_PACKET_ID_SHIFT);
3725
3726		if (!validate_packet_id(pkt_id)) {
3727			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3728			rc = -EINVAL;
3729			break;
3730		}
3731
3732		pkt_size = goya_packet_sizes[pkt_id];
3733		cb_parsed_length += pkt_size;
3734		if (cb_parsed_length > parser->user_cb_size) {
3735			dev_err(hdev->dev,
3736				"packet 0x%x is out of CB boundary\n", pkt_id);
3737			rc = -EINVAL;
3738			break;
3739		}
3740
3741		switch (pkt_id) {
3742		case PACKET_LIN_DMA:
3743			rc = goya_patch_dma_packet(hdev, parser,
3744					(struct packet_lin_dma *) user_pkt,
3745					(struct packet_lin_dma *) kernel_pkt,
3746					&new_pkt_size);
3747			cb_patched_cur_length += new_pkt_size;
3748			break;
3749
3750		case PACKET_WREG_32:
3751			memcpy(kernel_pkt, user_pkt, pkt_size);
3752			cb_patched_cur_length += pkt_size;
3753			rc = goya_validate_wreg32(hdev, parser,
3754					(struct packet_wreg32 *) kernel_pkt);
3755			break;
3756
3757		case PACKET_WREG_BULK:
3758			dev_err(hdev->dev,
3759				"User not allowed to use WREG_BULK\n");
3760			rc = -EPERM;
3761			break;
3762
3763		case PACKET_MSG_PROT:
3764			dev_err(hdev->dev,
3765				"User not allowed to use MSG_PROT\n");
3766			rc = -EPERM;
3767			break;
3768
3769		case PACKET_CP_DMA:
3770			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3771			rc = -EPERM;
3772			break;
3773
3774		case PACKET_STOP:
3775			dev_err(hdev->dev, "User not allowed to use STOP\n");
3776			rc = -EPERM;
3777			break;
3778
3779		case PACKET_MSG_LONG:
3780		case PACKET_MSG_SHORT:
3781		case PACKET_FENCE:
3782		case PACKET_NOP:
3783			memcpy(kernel_pkt, user_pkt, pkt_size);
3784			cb_patched_cur_length += pkt_size;
3785			break;
3786
3787		default:
3788			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3789				pkt_id);
3790			rc = -EINVAL;
3791			break;
3792		}
3793
3794		if (rc)
3795			break;
3796	}
3797
3798	return rc;
3799}
3800
3801static int goya_parse_cb_mmu(struct hl_device *hdev,
3802		struct hl_cs_parser *parser)
3803{
3804	u64 patched_cb_handle;
3805	u32 patched_cb_size;
3806	struct hl_cb *user_cb;
3807	int rc;
3808
3809	/*
3810	 * The new CB should have space at the end for two MSG_PROT pkt:
3811	 * 1. A packet that will act as a completion packet
3812	 * 2. A packet that will generate MSI-X interrupt
3813	 */
3814	parser->patched_cb_size = parser->user_cb_size +
3815			sizeof(struct packet_msg_prot) * 2;
3816
3817	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
3818				parser->patched_cb_size, false, false,
3819				&patched_cb_handle);
3820
3821	if (rc) {
3822		dev_err(hdev->dev,
3823			"Failed to allocate patched CB for DMA CS %d\n",
3824			rc);
3825		return rc;
3826	}
3827
3828	patched_cb_handle >>= PAGE_SHIFT;
3829	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3830				(u32) patched_cb_handle);
3831	/* hl_cb_get should never fail here so use kernel WARN */
3832	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3833			(u32) patched_cb_handle);
3834	if (!parser->patched_cb) {
3835		rc = -EFAULT;
3836		goto out;
3837	}
3838
3839	/*
3840	 * The check that parser->user_cb_size <= parser->user_cb->size was done
3841	 * in validate_queue_index().
3842	 */
3843	memcpy(parser->patched_cb->kernel_address,
3844		parser->user_cb->kernel_address,
3845		parser->user_cb_size);
3846
3847	patched_cb_size = parser->patched_cb_size;
3848
3849	/* validate patched CB instead of user CB */
3850	user_cb = parser->user_cb;
3851	parser->user_cb = parser->patched_cb;
3852	rc = goya_validate_cb(hdev, parser, true);
3853	parser->user_cb = user_cb;
3854
3855	if (rc) {
3856		hl_cb_put(parser->patched_cb);
3857		goto out;
3858	}
3859
3860	if (patched_cb_size != parser->patched_cb_size) {
3861		dev_err(hdev->dev, "user CB size mismatch\n");
3862		hl_cb_put(parser->patched_cb);
3863		rc = -EINVAL;
3864		goto out;
3865	}
3866
3867out:
3868	/*
3869	 * Always call cb destroy here because we still have 1 reference
3870	 * to it by calling cb_get earlier. After the job will be completed,
3871	 * cb_put will release it, but here we want to remove it from the
3872	 * idr
3873	 */
3874	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3875					patched_cb_handle << PAGE_SHIFT);
3876
3877	return rc;
3878}
3879
3880static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3881				struct hl_cs_parser *parser)
3882{
3883	u64 patched_cb_handle;
3884	int rc;
3885
3886	rc = goya_validate_cb(hdev, parser, false);
3887
3888	if (rc)
3889		goto free_userptr;
3890
3891	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
3892				parser->patched_cb_size, false, false,
3893				&patched_cb_handle);
3894	if (rc) {
3895		dev_err(hdev->dev,
3896			"Failed to allocate patched CB for DMA CS %d\n", rc);
3897		goto free_userptr;
3898	}
3899
3900	patched_cb_handle >>= PAGE_SHIFT;
3901	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3902				(u32) patched_cb_handle);
3903	/* hl_cb_get should never fail here so use kernel WARN */
3904	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3905			(u32) patched_cb_handle);
3906	if (!parser->patched_cb) {
3907		rc = -EFAULT;
3908		goto out;
3909	}
3910
3911	rc = goya_patch_cb(hdev, parser);
3912
3913	if (rc)
3914		hl_cb_put(parser->patched_cb);
3915
3916out:
3917	/*
3918	 * Always call cb destroy here because we still have 1 reference
3919	 * to it by calling cb_get earlier. After the job will be completed,
3920	 * cb_put will release it, but here we want to remove it from the
3921	 * idr
3922	 */
3923	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3924				patched_cb_handle << PAGE_SHIFT);
3925
3926free_userptr:
3927	if (rc)
3928		hl_userptr_delete_list(hdev, parser->job_userptr_list);
3929	return rc;
3930}
3931
3932static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
3933					struct hl_cs_parser *parser)
3934{
3935	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
3936	struct goya_device *goya = hdev->asic_specific;
3937
3938	if (goya->hw_cap_initialized & HW_CAP_MMU)
3939		return 0;
3940
3941	/* For internal queue jobs, just check if CB address is valid */
3942	if (hl_mem_area_inside_range(
3943			(u64) (uintptr_t) parser->user_cb,
3944			parser->user_cb_size,
3945			asic_prop->sram_user_base_address,
3946			asic_prop->sram_end_address))
3947		return 0;
3948
3949	if (hl_mem_area_inside_range(
3950			(u64) (uintptr_t) parser->user_cb,
3951			parser->user_cb_size,
3952			asic_prop->dram_user_base_address,
3953			asic_prop->dram_end_address))
3954		return 0;
3955
3956	dev_err(hdev->dev,
3957		"Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
3958		parser->user_cb, parser->user_cb_size);
3959
3960	return -EFAULT;
3961}
3962
3963int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
3964{
3965	struct goya_device *goya = hdev->asic_specific;
3966
3967	if (parser->queue_type == QUEUE_TYPE_INT)
3968		return goya_parse_cb_no_ext_queue(hdev, parser);
3969
3970	if (goya->hw_cap_initialized & HW_CAP_MMU)
3971		return goya_parse_cb_mmu(hdev, parser);
3972	else
3973		return goya_parse_cb_no_mmu(hdev, parser);
3974}
3975
3976void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
3977				u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
3978				bool eb)
3979{
3980	struct packet_msg_prot *cq_pkt;
3981	u32 tmp;
3982
3983	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
3984
3985	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3986			(1 << GOYA_PKT_CTL_EB_SHIFT) |
3987			(1 << GOYA_PKT_CTL_MB_SHIFT);
3988	cq_pkt->ctl = cpu_to_le32(tmp);
3989	cq_pkt->value = cpu_to_le32(cq_val);
3990	cq_pkt->addr = cpu_to_le64(cq_addr);
3991
3992	cq_pkt++;
3993
3994	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3995			(1 << GOYA_PKT_CTL_MB_SHIFT);
3996	cq_pkt->ctl = cpu_to_le32(tmp);
3997	cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
3998	cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
3999}
4000
4001void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4002{
4003	WREG32(mmCPU_EQ_CI, val);
4004}
4005
4006void goya_restore_phase_topology(struct hl_device *hdev)
4007{
4008
4009}
4010
4011static void goya_clear_sm_regs(struct hl_device *hdev)
4012{
4013	int i, num_of_sob_in_longs, num_of_mon_in_longs;
4014
4015	num_of_sob_in_longs =
4016		((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4017
4018	num_of_mon_in_longs =
4019		((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4020
4021	for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4022		WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4023
4024	for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4025		WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4026
4027	/* Flush all WREG to prevent race */
4028	i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4029}
4030
4031/*
4032 * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
4033 *                       address.
4034 *
4035 * @hdev:	pointer to hl_device structure
4036 * @addr:	device or host mapped address
4037 * @val:	returned value
4038 *
4039 * In case of DDR address that is not mapped into the default aperture that
4040 * the DDR bar exposes, the function will configure the iATU so that the DDR
4041 * bar will be positioned at a base address that allows reading from the
4042 * required address. Configuring the iATU during normal operation can
4043 * lead to undefined behavior and therefore, should be done with extreme care
4044 *
4045 */
4046static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4047{
4048	struct asic_fixed_properties *prop = &hdev->asic_prop;
4049	u64 ddr_bar_addr;
4050	int rc = 0;
4051
4052	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4053		*val = RREG32(addr - CFG_BASE);
4054
4055	} else if ((addr >= SRAM_BASE_ADDR) &&
4056			(addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4057
4058		*val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4059				(addr - SRAM_BASE_ADDR));
4060
4061	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4062
4063		u64 bar_base_addr = DRAM_PHYS_BASE +
4064				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4065
4066		ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4067		if (ddr_bar_addr != U64_MAX) {
4068			*val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4069						(addr - bar_base_addr));
4070
4071			ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4072							ddr_bar_addr);
4073		}
4074		if (ddr_bar_addr == U64_MAX)
4075			rc = -EIO;
4076
4077	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4078		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4079
4080	} else {
4081		rc = -EFAULT;
4082	}
4083
4084	return rc;
4085}
4086
4087/*
4088 * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4089 *                        address.
4090 *
4091 * @hdev:	pointer to hl_device structure
4092 * @addr:	device or host mapped address
4093 * @val:	returned value
4094 *
4095 * In case of DDR address that is not mapped into the default aperture that
4096 * the DDR bar exposes, the function will configure the iATU so that the DDR
4097 * bar will be positioned at a base address that allows writing to the
4098 * required address. Configuring the iATU during normal operation can
4099 * lead to undefined behavior and therefore, should be done with extreme care
4100 *
4101 */
4102static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4103{
4104	struct asic_fixed_properties *prop = &hdev->asic_prop;
4105	u64 ddr_bar_addr;
4106	int rc = 0;
4107
4108	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4109		WREG32(addr - CFG_BASE, val);
4110
4111	} else if ((addr >= SRAM_BASE_ADDR) &&
4112			(addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4113
4114		writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4115					(addr - SRAM_BASE_ADDR));
4116
4117	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4118
4119		u64 bar_base_addr = DRAM_PHYS_BASE +
4120				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4121
4122		ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4123		if (ddr_bar_addr != U64_MAX) {
4124			writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4125						(addr - bar_base_addr));
4126
4127			ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4128							ddr_bar_addr);
4129		}
4130		if (ddr_bar_addr == U64_MAX)
4131			rc = -EIO;
4132
4133	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4134		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4135
4136	} else {
4137		rc = -EFAULT;
4138	}
4139
4140	return rc;
4141}
4142
4143static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4144{
4145	struct asic_fixed_properties *prop = &hdev->asic_prop;
4146	u64 ddr_bar_addr;
4147	int rc = 0;
4148
4149	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4150		u32 val_l = RREG32(addr - CFG_BASE);
4151		u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4152
4153		*val = (((u64) val_h) << 32) | val_l;
4154
4155	} else if ((addr >= SRAM_BASE_ADDR) &&
4156			(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4157
4158		*val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4159				(addr - SRAM_BASE_ADDR));
4160
4161	} else if (addr <=
4162		   DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4163
4164		u64 bar_base_addr = DRAM_PHYS_BASE +
4165				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4166
4167		ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4168		if (ddr_bar_addr != U64_MAX) {
4169			*val = readq(hdev->pcie_bar[DDR_BAR_ID] +
4170						(addr - bar_base_addr));
4171
4172			ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4173							ddr_bar_addr);
4174		}
4175		if (ddr_bar_addr == U64_MAX)
4176			rc = -EIO;
4177
4178	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4179		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4180
4181	} else {
4182		rc = -EFAULT;
4183	}
4184
4185	return rc;
4186}
4187
4188static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4189{
4190	struct asic_fixed_properties *prop = &hdev->asic_prop;
4191	u64 ddr_bar_addr;
4192	int rc = 0;
4193
4194	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4195		WREG32(addr - CFG_BASE, lower_32_bits(val));
4196		WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
4197
4198	} else if ((addr >= SRAM_BASE_ADDR) &&
4199			(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4200
4201		writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4202					(addr - SRAM_BASE_ADDR));
4203
4204	} else if (addr <=
4205		   DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4206
4207		u64 bar_base_addr = DRAM_PHYS_BASE +
4208				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4209
4210		ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4211		if (ddr_bar_addr != U64_MAX) {
4212			writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4213						(addr - bar_base_addr));
4214
4215			ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4216							ddr_bar_addr);
4217		}
4218		if (ddr_bar_addr == U64_MAX)
4219			rc = -EIO;
4220
4221	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4222		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4223
4224	} else {
4225		rc = -EFAULT;
4226	}
4227
4228	return rc;
4229}
4230
4231static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4232{
4233	struct goya_device *goya = hdev->asic_specific;
4234
4235	if (hdev->hard_reset_pending)
4236		return U64_MAX;
4237
4238	return readq(hdev->pcie_bar[DDR_BAR_ID] +
4239			(addr - goya->ddr_bar_cur_addr));
4240}
4241
4242static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4243{
4244	struct goya_device *goya = hdev->asic_specific;
4245
4246	if (hdev->hard_reset_pending)
4247		return;
4248
4249	writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4250			(addr - goya->ddr_bar_cur_addr));
4251}
4252
4253static const char *_goya_get_event_desc(u16 event_type)
4254{
4255	switch (event_type) {
4256	case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4257		return "PCIe_if";
4258	case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4259	case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4260	case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4261	case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4262	case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4263	case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4264	case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4265	case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4266		return "TPC%d_ecc";
4267	case GOYA_ASYNC_EVENT_ID_MME_ECC:
4268		return "MME_ecc";
4269	case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4270		return "MME_ecc_ext";
4271	case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4272		return "MMU_ecc";
4273	case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4274		return "DMA_macro";
4275	case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4276		return "DMA_ecc";
4277	case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4278		return "CPU_if_ecc";
4279	case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4280		return "PSOC_mem";
4281	case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4282		return "PSOC_coresight";
4283	case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4284		return "SRAM%d";
4285	case GOYA_ASYNC_EVENT_ID_GIC500:
4286		return "GIC500";
4287	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4288		return "PLL%d";
4289	case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4290		return "AXI_ecc";
4291	case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4292		return "L2_ram_ecc";
4293	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4294		return "PSOC_gpio_05_sw_reset";
4295	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4296		return "PSOC_gpio_10_vrhot_icrit";
4297	case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4298		return "PCIe_dec";
4299	case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4300	case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4301	case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4302	case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4303	case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4304	case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4305	case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4306	case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4307		return "TPC%d_dec";
4308	case GOYA_ASYNC_EVENT_ID_MME_WACS:
4309		return "MME_wacs";
4310	case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4311		return "MME_wacsd";
4312	case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4313		return "CPU_axi_splitter";
4314	case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4315		return "PSOC_axi_dec";
4316	case GOYA_ASYNC_EVENT_ID_PSOC:
4317		return "PSOC";
4318	case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4319	case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4320	case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4321	case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4322	case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4323	case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4324	case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4325	case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4326		return "TPC%d_krn_err";
4327	case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4328		return "TPC%d_cq";
4329	case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4330		return "TPC%d_qm";
4331	case GOYA_ASYNC_EVENT_ID_MME_QM:
4332		return "MME_qm";
4333	case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4334		return "MME_cq";
4335	case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4336		return "DMA%d_qm";
4337	case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4338		return "DMA%d_ch";
4339	case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4340	case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4341	case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4342	case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4343	case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4344	case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4345	case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4346	case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4347		return "TPC%d_bmon_spmu";
4348	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4349		return "DMA_bm_ch%d";
4350	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4351		return "POWER_ENV_S";
4352	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4353		return "POWER_ENV_E";
4354	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4355		return "THERMAL_ENV_S";
4356	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4357		return "THERMAL_ENV_E";
4358	default:
4359		return "N/A";
4360	}
4361}
4362
4363static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4364{
4365	u8 index;
4366
4367	switch (event_type) {
4368	case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4369	case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4370	case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4371	case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4372	case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4373	case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4374	case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4375	case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4376		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4377		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4378		break;
4379	case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4380		index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4381		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4382		break;
4383	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4384		index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4385		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4386		break;
4387	case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4388	case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4389	case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4390	case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4391	case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4392	case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4393	case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4394	case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4395		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4396		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4397		break;
4398	case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4399	case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4400	case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4401	case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4402	case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4403	case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4404	case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4405	case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4406		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4407		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4408		break;
4409	case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4410		index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4411		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4412		break;
4413	case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4414		index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4415		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4416		break;
4417	case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4418		index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4419		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4420		break;
4421	case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4422		index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4423		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4424		break;
4425	case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4426	case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4427	case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4428	case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4429	case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4430	case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4431	case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4432	case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4433		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4434		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4435		break;
4436	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4437		index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4438		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4439		break;
4440	default:
4441		snprintf(desc, size, _goya_get_event_desc(event_type));
4442		break;
4443	}
4444}
4445
4446static void goya_print_razwi_info(struct hl_device *hdev)
4447{
4448	if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4449		dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4450		WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4451	}
4452
4453	if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4454		dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4455		WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4456	}
4457
4458	if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4459		dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4460		WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4461	}
4462
4463	if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4464		dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4465		WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4466	}
4467}
4468
4469static void goya_print_mmu_error_info(struct hl_device *hdev)
4470{
4471	struct goya_device *goya = hdev->asic_specific;
4472	u64 addr;
4473	u32 val;
4474
4475	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4476		return;
4477
4478	val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4479	if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4480		addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4481		addr <<= 32;
4482		addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4483
4484		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4485					addr);
4486
4487		WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4488	}
4489}
4490
4491static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4492				bool razwi)
4493{
4494	char desc[20] = "";
4495
4496	goya_get_event_desc(event_type, desc, sizeof(desc));
4497	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4498		event_type, desc);
4499
4500	if (razwi) {
4501		goya_print_razwi_info(hdev);
4502		goya_print_mmu_error_info(hdev);
4503	}
4504}
4505
4506static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4507		size_t irq_arr_size)
4508{
4509	struct cpucp_unmask_irq_arr_packet *pkt;
4510	size_t total_pkt_size;
4511	long result;
4512	int rc;
4513	int irq_num_entries, irq_arr_index;
4514	__le32 *goya_irq_arr;
4515
4516	total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
4517			irq_arr_size;
4518
4519	/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
4520	total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4521
4522	/* total_pkt_size is casted to u16 later on */
4523	if (total_pkt_size > USHRT_MAX) {
4524		dev_err(hdev->dev, "too many elements in IRQ array\n");
4525		return -EINVAL;
4526	}
4527
4528	pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4529	if (!pkt)
4530		return -ENOMEM;
4531
4532	irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4533	pkt->length = cpu_to_le32(irq_num_entries);
4534
4535	/* We must perform any necessary endianness conversation on the irq
4536	 * array being passed to the goya hardware
4537	 */
4538	for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4539			irq_arr_index < irq_num_entries ; irq_arr_index++)
4540		goya_irq_arr[irq_arr_index] =
4541				cpu_to_le32(irq_arr[irq_arr_index]);
4542
4543	pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4544						CPUCP_PKT_CTL_OPCODE_SHIFT);
4545
4546	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4547						total_pkt_size,	0, &result);
4548
4549	if (rc)
4550		dev_err(hdev->dev, "failed to unmask IRQ array\n");
4551
4552	kfree(pkt);
4553
4554	return rc;
4555}
4556
4557static int goya_soft_reset_late_init(struct hl_device *hdev)
4558{
4559	/*
4560	 * Unmask all IRQs since some could have been received
4561	 * during the soft reset
4562	 */
4563	return goya_unmask_irq_arr(hdev, goya_all_events,
4564					sizeof(goya_all_events));
4565}
4566
4567static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4568{
4569	struct cpucp_packet pkt;
4570	long result;
4571	int rc;
4572
4573	memset(&pkt, 0, sizeof(pkt));
4574
4575	pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
4576				CPUCP_PKT_CTL_OPCODE_SHIFT);
4577	pkt.value = cpu_to_le64(event_type);
4578
4579	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4580						0, &result);
4581
4582	if (rc)
4583		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4584
4585	return rc;
4586}
4587
4588static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4589{
4590	switch (event_type) {
4591	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4592		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
4593		dev_info_ratelimited(hdev->dev,
4594			"Clock throttling due to power consumption\n");
4595		break;
4596	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4597		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
4598		dev_info_ratelimited(hdev->dev,
4599			"Power envelop is safe, back to optimal clock\n");
4600		break;
4601	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4602		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
4603		dev_info_ratelimited(hdev->dev,
4604			"Clock throttling due to overheating\n");
4605		break;
4606	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4607		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
4608		dev_info_ratelimited(hdev->dev,
4609			"Thermal envelop is safe, back to optimal clock\n");
4610		break;
4611
4612	default:
4613		dev_err(hdev->dev, "Received invalid clock change event %d\n",
4614			event_type);
4615		break;
4616	}
4617}
4618
4619void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4620{
4621	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4622	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4623				>> EQ_CTL_EVENT_TYPE_SHIFT);
4624	struct goya_device *goya = hdev->asic_specific;
4625
4626	if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
4627		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
4628				event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
4629		return;
4630	}
4631
4632	goya->events_stat[event_type]++;
4633	goya->events_stat_aggregate[event_type]++;
4634
4635	switch (event_type) {
4636	case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4637	case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4638	case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4639	case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4640	case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4641	case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4642	case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4643	case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4644	case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4645	case GOYA_ASYNC_EVENT_ID_MME_ECC:
4646	case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4647	case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4648	case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4649	case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4650	case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4651	case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4652	case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4653	case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4654	case GOYA_ASYNC_EVENT_ID_GIC500:
4655	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4656	case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4657	case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4658	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4659		goya_print_irq_info(hdev, event_type, false);
4660		if (hdev->hard_reset_on_fw_events)
4661			hl_device_reset(hdev, true, false);
4662		break;
4663
4664	case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4665	case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4666	case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4667	case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4668	case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4669	case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4670	case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4671	case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4672	case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4673	case GOYA_ASYNC_EVENT_ID_MME_WACS:
4674	case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4675	case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4676	case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4677	case GOYA_ASYNC_EVENT_ID_PSOC:
4678	case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4679	case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4680	case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4681	case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4682	case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4683	case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4684	case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4685	case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4686	case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4687	case GOYA_ASYNC_EVENT_ID_MME_QM:
4688	case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4689	case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4690	case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4691		goya_print_irq_info(hdev, event_type, true);
4692		goya_unmask_irq(hdev, event_type);
4693		break;
4694
4695	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4696	case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4697	case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4698	case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4699	case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4700	case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4701	case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4702	case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4703	case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4704	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4705		goya_print_irq_info(hdev, event_type, false);
4706		goya_unmask_irq(hdev, event_type);
4707		break;
4708
4709	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4710	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4711	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4712	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4713		goya_print_clk_change_info(hdev, event_type);
4714		goya_unmask_irq(hdev, event_type);
4715		break;
4716
4717	default:
4718		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4719				event_type);
4720		break;
4721	}
4722}
4723
4724void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4725{
4726	struct goya_device *goya = hdev->asic_specific;
4727
4728	if (aggregate) {
4729		*size = (u32) sizeof(goya->events_stat_aggregate);
4730		return goya->events_stat_aggregate;
4731	}
4732
4733	*size = (u32) sizeof(goya->events_stat);
4734	return goya->events_stat;
4735}
4736
4737static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4738				u64 val, bool is_dram)
4739{
4740	struct packet_lin_dma *lin_dma_pkt;
4741	struct hl_cs_job *job;
4742	u32 cb_size, ctl;
4743	struct hl_cb *cb;
4744	int rc, lin_dma_pkts_cnt;
4745
4746	lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4747	cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4748						sizeof(struct packet_msg_prot);
4749	cb = hl_cb_kernel_create(hdev, cb_size, false);
4750	if (!cb)
4751		return -ENOMEM;
4752
4753	lin_dma_pkt = cb->kernel_address;
4754
4755	do {
4756		memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4757
4758		ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4759				(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4760				(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4761				(1 << GOYA_PKT_CTL_RB_SHIFT) |
4762				(1 << GOYA_PKT_CTL_MB_SHIFT));
4763		ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4764				GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4765		lin_dma_pkt->ctl = cpu_to_le32(ctl);
4766
4767		lin_dma_pkt->src_addr = cpu_to_le64(val);
4768		lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4769		if (lin_dma_pkts_cnt > 1)
4770			lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4771		else
4772			lin_dma_pkt->tsize = cpu_to_le32(size);
4773
4774		size -= SZ_2G;
4775		addr += SZ_2G;
4776		lin_dma_pkt++;
4777	} while (--lin_dma_pkts_cnt);
4778
4779	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4780	if (!job) {
4781		dev_err(hdev->dev, "Failed to allocate a new job\n");
4782		rc = -ENOMEM;
4783		goto release_cb;
4784	}
4785
4786	job->id = 0;
4787	job->user_cb = cb;
4788	job->user_cb->cs_cnt++;
4789	job->user_cb_size = cb_size;
4790	job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4791	job->patched_cb = job->user_cb;
4792	job->job_cb_size = job->user_cb_size;
4793
4794	hl_debugfs_add_job(hdev, job);
4795
4796	rc = goya_send_job_on_qman0(hdev, job);
4797
4798	hl_debugfs_remove_job(hdev, job);
4799	kfree(job);
4800	cb->cs_cnt--;
4801
4802release_cb:
4803	hl_cb_put(cb);
4804	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4805
4806	return rc;
4807}
4808
4809int goya_context_switch(struct hl_device *hdev, u32 asid)
4810{
4811	struct asic_fixed_properties *prop = &hdev->asic_prop;
4812	u64 addr = prop->sram_base_address, sob_addr;
4813	u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4814	u64 val = 0x7777777777777777ull;
4815	int rc, dma_id;
4816	u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4817					mmDMA_CH_0_WR_COMP_ADDR_LO;
4818
4819	rc = goya_memset_device_memory(hdev, addr, size, val, false);
4820	if (rc) {
4821		dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4822		return rc;
4823	}
4824
4825	/* we need to reset registers that the user is allowed to change */
4826	sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4827	WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4828
4829	for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4830		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4831							(dma_id - 1) * 4;
4832		WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4833						lower_32_bits(sob_addr));
4834	}
4835
4836	WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4837
4838	goya_mmu_prepare(hdev, asid);
4839
4840	goya_clear_sm_regs(hdev);
4841
4842	return 0;
4843}
4844
4845static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4846{
4847	struct asic_fixed_properties *prop = &hdev->asic_prop;
4848	struct goya_device *goya = hdev->asic_specific;
4849	u64 addr = prop->mmu_pgt_addr;
4850	u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4851			MMU_CACHE_MNG_SIZE;
4852
4853	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4854		return 0;
4855
4856	return goya_memset_device_memory(hdev, addr, size, 0, true);
4857}
4858
4859static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4860{
4861	struct goya_device *goya = hdev->asic_specific;
4862	u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4863	u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4864	u64 val = 0x9999999999999999ull;
4865
4866	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4867		return 0;
4868
4869	return goya_memset_device_memory(hdev, addr, size, val, true);
4870}
4871
4872static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4873{
4874	struct asic_fixed_properties *prop = &hdev->asic_prop;
4875	struct goya_device *goya = hdev->asic_specific;
4876	s64 off, cpu_off;
4877	int rc;
4878
4879	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4880		return 0;
4881
4882	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4883		rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
4884				prop->dram_base_address + off, PAGE_SIZE_2MB,
4885				(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
4886		if (rc) {
4887			dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4888				prop->dram_base_address + off);
4889			goto unmap;
4890		}
4891	}
4892
4893	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4894		rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4895			hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
4896
4897		if (rc) {
4898			dev_err(hdev->dev,
4899				"Map failed for CPU accessible memory\n");
4900			off -= PAGE_SIZE_2MB;
4901			goto unmap;
4902		}
4903	} else {
4904		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4905			rc = hl_mmu_map(hdev->kernel_ctx,
4906				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4907				hdev->cpu_accessible_dma_address + cpu_off,
4908				PAGE_SIZE_4KB, true);
4909			if (rc) {
4910				dev_err(hdev->dev,
4911					"Map failed for CPU accessible memory\n");
4912				cpu_off -= PAGE_SIZE_4KB;
4913				goto unmap_cpu;
4914			}
4915		}
4916	}
4917
4918	goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4919	goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4920	WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4921	WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4922
4923	/* Make sure configuration is flushed to device */
4924	RREG32(mmCPU_IF_AWUSER_OVR_EN);
4925
4926	goya->device_cpu_mmu_mappings_done = true;
4927
4928	return 0;
4929
4930unmap_cpu:
4931	for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4932		if (hl_mmu_unmap(hdev->kernel_ctx,
4933				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4934				PAGE_SIZE_4KB, true))
4935			dev_warn_ratelimited(hdev->dev,
4936				"failed to unmap address 0x%llx\n",
4937				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4938unmap:
4939	for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4940		if (hl_mmu_unmap(hdev->kernel_ctx,
4941				prop->dram_base_address + off, PAGE_SIZE_2MB,
4942				true))
4943			dev_warn_ratelimited(hdev->dev,
4944				"failed to unmap address 0x%llx\n",
4945				prop->dram_base_address + off);
4946
4947	return rc;
4948}
4949
4950void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4951{
4952	struct asic_fixed_properties *prop = &hdev->asic_prop;
4953	struct goya_device *goya = hdev->asic_specific;
4954	u32 off, cpu_off;
4955
4956	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4957		return;
4958
4959	if (!goya->device_cpu_mmu_mappings_done)
4960		return;
4961
4962	WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4963	WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4964
4965	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4966		if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4967				PAGE_SIZE_2MB, true))
4968			dev_warn(hdev->dev,
4969				"Failed to unmap CPU accessible memory\n");
4970	} else {
4971		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
4972			if (hl_mmu_unmap(hdev->kernel_ctx,
4973					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4974					PAGE_SIZE_4KB,
4975					(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
4976				dev_warn_ratelimited(hdev->dev,
4977					"failed to unmap address 0x%llx\n",
4978					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4979	}
4980
4981	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
4982		if (hl_mmu_unmap(hdev->kernel_ctx,
4983				prop->dram_base_address + off, PAGE_SIZE_2MB,
4984				(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
4985			dev_warn_ratelimited(hdev->dev,
4986					"Failed to unmap address 0x%llx\n",
4987					prop->dram_base_address + off);
4988
4989	goya->device_cpu_mmu_mappings_done = false;
4990}
4991
4992static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4993{
4994	struct goya_device *goya = hdev->asic_specific;
4995	int i;
4996
4997	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4998		return;
4999
5000	if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
5001		WARN(1, "asid %u is too big\n", asid);
5002		return;
5003	}
5004
5005	/* zero the MMBP and ASID bits and then set the ASID */
5006	for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
5007		goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
5008}
5009
5010static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5011					u32 flags)
5012{
5013	struct goya_device *goya = hdev->asic_specific;
5014	u32 status, timeout_usec;
5015	int rc;
5016
5017	if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5018		hdev->hard_reset_pending)
5019		return 0;
5020
5021	/* no need in L1 only invalidation in Goya */
5022	if (!is_hard)
5023		return 0;
5024
5025	if (hdev->pldm)
5026		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5027	else
5028		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5029
5030	mutex_lock(&hdev->mmu_cache_lock);
5031
5032	/* L0 & L1 invalidation */
5033	WREG32(mmSTLB_INV_ALL_START, 1);
5034
5035	rc = hl_poll_timeout(
5036		hdev,
5037		mmSTLB_INV_ALL_START,
5038		status,
5039		!status,
5040		1000,
5041		timeout_usec);
5042
5043	mutex_unlock(&hdev->mmu_cache_lock);
5044
5045	if (rc) {
5046		dev_err_ratelimited(hdev->dev,
5047					"MMU cache invalidation timeout\n");
5048		hl_device_reset(hdev, true, false);
5049	}
5050
5051	return rc;
5052}
5053
5054static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5055				bool is_hard, u32 asid, u64 va, u64 size)
5056{
5057	struct goya_device *goya = hdev->asic_specific;
5058	u32 status, timeout_usec, inv_data, pi;
5059	int rc;
5060
5061	if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5062		hdev->hard_reset_pending)
5063		return 0;
5064
5065	/* no need in L1 only invalidation in Goya */
5066	if (!is_hard)
5067		return 0;
5068
5069	if (hdev->pldm)
5070		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5071	else
5072		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5073
5074	mutex_lock(&hdev->mmu_cache_lock);
5075
5076	/*
5077	 * TODO: currently invalidate entire L0 & L1 as in regular hard
5078	 * invalidation. Need to apply invalidation of specific cache lines with
5079	 * mask of ASID & VA & size.
5080	 * Note that L1 with be flushed entirely in any case.
5081	 */
5082
5083	/* L0 & L1 invalidation */
5084	inv_data = RREG32(mmSTLB_CACHE_INV);
5085	/* PI is 8 bit */
5086	pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5087	WREG32(mmSTLB_CACHE_INV,
5088			(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5089
5090	rc = hl_poll_timeout(
5091		hdev,
5092		mmSTLB_INV_CONSUMER_INDEX,
5093		status,
5094		status == pi,
5095		1000,
5096		timeout_usec);
5097
5098	mutex_unlock(&hdev->mmu_cache_lock);
5099
5100	if (rc) {
5101		dev_err_ratelimited(hdev->dev,
5102					"MMU cache invalidation timeout\n");
5103		hl_device_reset(hdev, true, false);
5104	}
5105
5106	return rc;
5107}
5108
5109int goya_send_heartbeat(struct hl_device *hdev)
5110{
5111	struct goya_device *goya = hdev->asic_specific;
5112
5113	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5114		return 0;
5115
5116	return hl_fw_send_heartbeat(hdev);
5117}
5118
5119int goya_cpucp_info_get(struct hl_device *hdev)
5120{
5121	struct goya_device *goya = hdev->asic_specific;
5122	struct asic_fixed_properties *prop = &hdev->asic_prop;
5123	u64 dram_size;
5124	int rc;
5125
5126	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5127		return 0;
5128
5129	rc = hl_fw_cpucp_info_get(hdev);
5130	if (rc)
5131		return rc;
5132
5133	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
5134	if (dram_size) {
5135		if ((!is_power_of_2(dram_size)) ||
5136				(dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5137			dev_err(hdev->dev,
5138				"F/W reported invalid DRAM size %llu. Trying to use default size\n",
5139				dram_size);
5140			dram_size = DRAM_PHYS_DEFAULT_SIZE;
5141		}
5142
5143		prop->dram_size = dram_size;
5144		prop->dram_end_address = prop->dram_base_address + dram_size;
5145	}
5146
5147	if (!strlen(prop->cpucp_info.card_name))
5148		strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5149				CARD_NAME_MAX_LEN);
5150
5151	return 0;
5152}
5153
5154static void goya_set_clock_gating(struct hl_device *hdev)
5155{
5156	/* clock gating not supported in Goya */
5157}
5158
5159static void goya_disable_clock_gating(struct hl_device *hdev)
5160{
5161	/* clock gating not supported in Goya */
5162}
5163
5164static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
5165				struct seq_file *s)
5166{
5167	const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5168	const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5169	u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5170		mme_arch_sts;
5171	bool is_idle = true, is_eng_idle;
5172	u64 offset;
5173	int i;
5174
5175	if (s)
5176		seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
5177				"---  -------  ------------  -------------\n");
5178
5179	offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5180
5181	for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5182		qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5183		dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5184		is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5185				IS_DMA_IDLE(dma_core_sts0);
5186		is_idle &= is_eng_idle;
5187
5188		if (mask)
5189			*mask |= ((u64) !is_eng_idle) <<
5190						(GOYA_ENGINE_ID_DMA_0 + i);
5191		if (s)
5192			seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5193					qm_glbl_sts0, dma_core_sts0);
5194	}
5195
5196	if (s)
5197		seq_puts(s,
5198			"\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
5199			"---  -------  ------------  --------------  ----------\n");
5200
5201	offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5202
5203	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5204		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5205		cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5206		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5207		is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5208				IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5209				IS_TPC_IDLE(tpc_cfg_sts);
5210		is_idle &= is_eng_idle;
5211
5212		if (mask)
5213			*mask |= ((u64) !is_eng_idle) <<
5214						(GOYA_ENGINE_ID_TPC_0 + i);
5215		if (s)
5216			seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5217				qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5218	}
5219
5220	if (s)
5221		seq_puts(s,
5222			"\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
5223			"---  -------  ------------  --------------  -----------\n");
5224
5225	qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5226	cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5227	mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5228	is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5229			IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5230			IS_MME_IDLE(mme_arch_sts);
5231	is_idle &= is_eng_idle;
5232
5233	if (mask)
5234		*mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0;
5235	if (s) {
5236		seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5237				cmdq_glbl_sts0, mme_arch_sts);
5238		seq_puts(s, "\n");
5239	}
5240
5241	return is_idle;
5242}
5243
5244static void goya_hw_queues_lock(struct hl_device *hdev)
5245	__acquires(&goya->hw_queues_lock)
5246{
5247	struct goya_device *goya = hdev->asic_specific;
5248
5249	spin_lock(&goya->hw_queues_lock);
5250}
5251
5252static void goya_hw_queues_unlock(struct hl_device *hdev)
5253	__releases(&goya->hw_queues_lock)
5254{
5255	struct goya_device *goya = hdev->asic_specific;
5256
5257	spin_unlock(&goya->hw_queues_lock);
5258}
5259
5260static u32 goya_get_pci_id(struct hl_device *hdev)
5261{
5262	return hdev->pdev->device;
5263}
5264
5265static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5266				size_t max_size)
5267{
5268	struct goya_device *goya = hdev->asic_specific;
5269
5270	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5271		return 0;
5272
5273	return hl_fw_get_eeprom_data(hdev, data, max_size);
5274}
5275
5276static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5277{
5278	return RREG32(mmHW_STATE);
5279}
5280
5281static int goya_ctx_init(struct hl_ctx *ctx)
5282{
5283	return 0;
5284}
5285
5286u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5287{
5288	return cq_idx;
5289}
5290
5291static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5292{
5293	return 0;
5294}
5295
5296static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5297{
5298	return 0;
5299}
5300
5301static void goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
5302{
5303
5304}
5305
5306static void goya_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
5307			u16 sob_val, u16 mon_id, u32 q_idx)
5308{
5309
5310}
5311
5312static void goya_reset_sob(struct hl_device *hdev, void *data)
5313{
5314
5315}
5316
5317static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
5318{
5319	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
5320							HL_POWER9_HOST_MAGIC) {
5321		dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
5322		hdev->power9_64bit_dma_enable = 1;
5323		hdev->dma_mask = 64;
5324	} else {
5325		dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
5326		hdev->power9_64bit_dma_enable = 0;
5327		hdev->dma_mask = 48;
5328	}
5329}
5330
5331u64 goya_get_device_time(struct hl_device *hdev)
5332{
5333	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5334
5335	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5336}
5337
5338static const struct hl_asic_funcs goya_funcs = {
5339	.early_init = goya_early_init,
5340	.early_fini = goya_early_fini,
5341	.late_init = goya_late_init,
5342	.late_fini = goya_late_fini,
5343	.sw_init = goya_sw_init,
5344	.sw_fini = goya_sw_fini,
5345	.hw_init = goya_hw_init,
5346	.hw_fini = goya_hw_fini,
5347	.halt_engines = goya_halt_engines,
5348	.suspend = goya_suspend,
5349	.resume = goya_resume,
5350	.cb_mmap = goya_cb_mmap,
5351	.ring_doorbell = goya_ring_doorbell,
5352	.pqe_write = goya_pqe_write,
5353	.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5354	.asic_dma_free_coherent = goya_dma_free_coherent,
5355	.get_int_queue_base = goya_get_int_queue_base,
5356	.test_queues = goya_test_queues,
5357	.asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5358	.asic_dma_pool_free = goya_dma_pool_free,
5359	.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5360	.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5361	.hl_dma_unmap_sg = goya_dma_unmap_sg,
5362	.cs_parser = goya_cs_parser,
5363	.asic_dma_map_sg = goya_dma_map_sg,
5364	.get_dma_desc_list_size = goya_get_dma_desc_list_size,
5365	.add_end_of_cb_packets = goya_add_end_of_cb_packets,
5366	.update_eq_ci = goya_update_eq_ci,
5367	.context_switch = goya_context_switch,
5368	.restore_phase_topology = goya_restore_phase_topology,
5369	.debugfs_read32 = goya_debugfs_read32,
5370	.debugfs_write32 = goya_debugfs_write32,
5371	.debugfs_read64 = goya_debugfs_read64,
5372	.debugfs_write64 = goya_debugfs_write64,
5373	.add_device_attr = goya_add_device_attr,
5374	.handle_eqe = goya_handle_eqe,
5375	.set_pll_profile = goya_set_pll_profile,
5376	.get_events_stat = goya_get_events_stat,
5377	.read_pte = goya_read_pte,
5378	.write_pte = goya_write_pte,
5379	.mmu_invalidate_cache = goya_mmu_invalidate_cache,
5380	.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5381	.send_heartbeat = goya_send_heartbeat,
5382	.set_clock_gating = goya_set_clock_gating,
5383	.disable_clock_gating = goya_disable_clock_gating,
5384	.debug_coresight = goya_debug_coresight,
5385	.is_device_idle = goya_is_device_idle,
5386	.soft_reset_late_init = goya_soft_reset_late_init,
5387	.hw_queues_lock = goya_hw_queues_lock,
5388	.hw_queues_unlock = goya_hw_queues_unlock,
5389	.get_pci_id = goya_get_pci_id,
5390	.get_eeprom_data = goya_get_eeprom_data,
5391	.send_cpu_message = goya_send_cpu_message,
5392	.get_hw_state = goya_get_hw_state,
5393	.pci_bars_map = goya_pci_bars_map,
5394	.init_iatu = goya_init_iatu,
5395	.rreg = hl_rreg,
5396	.wreg = hl_wreg,
5397	.halt_coresight = goya_halt_coresight,
5398	.ctx_init = goya_ctx_init,
5399	.get_clk_rate = goya_get_clk_rate,
5400	.get_queue_id_for_cq = goya_get_queue_id_for_cq,
5401	.read_device_fw_version = goya_read_device_fw_version,
5402	.load_firmware_to_device = goya_load_firmware_to_device,
5403	.load_boot_fit_to_device = goya_load_boot_fit_to_device,
5404	.get_signal_cb_size = goya_get_signal_cb_size,
5405	.get_wait_cb_size = goya_get_wait_cb_size,
5406	.gen_signal_cb = goya_gen_signal_cb,
5407	.gen_wait_cb = goya_gen_wait_cb,
5408	.reset_sob = goya_reset_sob,
5409	.set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
5410	.get_device_time = goya_get_device_time
5411};
5412
5413/*
5414 * goya_set_asic_funcs - set Goya function pointers
5415 *
5416 * @*hdev: pointer to hl_device structure
5417 *
5418 */
5419void goya_set_asic_funcs(struct hl_device *hdev)
5420{
5421	hdev->asic_funcs = &goya_funcs;
5422}
5423