1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "goyaP.h"
9#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_0.h"
11#include "../include/goya/asic_reg/goya_masks.h"
12#include "../include/goya/goya_reg_map.h"
13
14#include <linux/pci.h>
15#include <linux/hwmon.h>
16#include <linux/iommu.h>
17#include <linux/seq_file.h>
18
19/*
20 * GOYA security scheme:
21 *
22 * 1. Host is protected by:
23 *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
24 *        - MMU
25 *
26 * 2. DRAM is protected by:
27 *        - Range registers (protect the first 512MB)
28 *        - MMU (isolation between users)
29 *
30 * 3. Configuration is protected by:
31 *        - Range registers
32 *        - Protection bits
33 *
34 * When MMU is disabled:
35 *
36 * QMAN DMA: PQ, CQ, CP, DMA are secured.
37 * PQ, CB and the data are on the host.
38 *
39 * QMAN TPC/MME:
40 * PQ, CQ and CP are not secured.
41 * PQ, CB and the data are on the SRAM/DRAM.
42 *
43 * Since QMAN DMA is secured, the driver is parsing the DMA CB:
44 *     - checks DMA pointer
45 *     - WREG, MSG_PROT are not allowed.
46 *     - MSG_LONG/SHORT are allowed.
47 *
48 * A read/write transaction by the QMAN to a protected area will succeed if
49 * and only if the QMAN's CP is secured and MSG_PROT is used
50 *
51 *
52 * When MMU is enabled:
53 *
54 * QMAN DMA: PQ, CQ and CP are secured.
55 * MMU is set to bypass on the Secure props register of the QMAN.
56 * The reasons we don't enable MMU for PQ, CQ and CP are:
57 *     - PQ entry is in kernel address space and the driver doesn't map it.
58 *     - CP writes to MSIX register and to kernel address space (completion
59 *       queue).
60 *
61 * DMA is not secured but because CP is secured, the driver still needs to parse
62 * the CB, but doesn't need to check the DMA addresses.
63 *
64 * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
65 * the driver doesn't map memory in MMU.
66 *
67 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
68 *
69 * DMA RR does NOT protect host because DMA is not secured
70 *
71 */
72
73#define GOYA_BOOT_FIT_FILE	"habanalabs/goya/goya-boot-fit.itb"
74#define GOYA_LINUX_FW_FILE	"habanalabs/goya/goya-fit.itb"
75
76#define GOYA_MMU_REGS_NUM		63
77
78#define GOYA_DMA_POOL_BLK_SIZE		0x100		/* 256 bytes */
79
80#define GOYA_RESET_TIMEOUT_MSEC		500		/* 500ms */
81#define GOYA_PLDM_RESET_TIMEOUT_MSEC	20000		/* 20s */
82#define GOYA_RESET_WAIT_MSEC		1		/* 1ms */
83#define GOYA_CPU_RESET_WAIT_MSEC	100		/* 100ms */
84#define GOYA_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
85#define GOYA_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
86#define GOYA_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
87#define GOYA_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
88#define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
89#define GOYA_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
90#define GOYA_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
91
92#define GOYA_QMAN0_FENCE_VAL		0xD169B243
93
94#define GOYA_MAX_STRING_LEN		20
95
96#define GOYA_CB_POOL_CB_CNT		512
97#define GOYA_CB_POOL_CB_SIZE		0x20000		/* 128KB */
98
99#define IS_QM_IDLE(engine, qm_glbl_sts0) \
100	(((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
101#define IS_DMA_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(DMA, qm_glbl_sts0)
102#define IS_TPC_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(TPC, qm_glbl_sts0)
103#define IS_MME_QM_IDLE(qm_glbl_sts0)	IS_QM_IDLE(MME, qm_glbl_sts0)
104
105#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
106	(((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
107			engine##_CMDQ_IDLE_MASK)
108#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
109	IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
110#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
111	IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
112
113#define IS_DMA_IDLE(dma_core_sts0) \
114	!((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
115
116#define IS_TPC_IDLE(tpc_cfg_sts) \
117	(((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
118
119#define IS_MME_IDLE(mme_arch_sts) \
120	(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
121
122static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
123		"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
124		"goya cq 4", "goya cpu eq"
125};
126
127static u16 goya_packet_sizes[MAX_PACKET_ID] = {
128	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
129	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
130	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
131	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
132	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
133	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
134	[PACKET_FENCE]		= sizeof(struct packet_fence),
135	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
136	[PACKET_NOP]		= sizeof(struct packet_nop),
137	[PACKET_STOP]		= sizeof(struct packet_stop)
138};
139
140static inline bool validate_packet_id(enum packet_id id)
141{
142	switch (id) {
143	case PACKET_WREG_32:
144	case PACKET_WREG_BULK:
145	case PACKET_MSG_LONG:
146	case PACKET_MSG_SHORT:
147	case PACKET_CP_DMA:
148	case PACKET_MSG_PROT:
149	case PACKET_FENCE:
150	case PACKET_LIN_DMA:
151	case PACKET_NOP:
152	case PACKET_STOP:
153		return true;
154	default:
155		return false;
156	}
157}
158
159static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
160	mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
161	mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
162	mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
163	mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
164	mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
165	mmTPC0_QM_GLBL_SECURE_PROPS,
166	mmTPC0_QM_GLBL_NON_SECURE_PROPS,
167	mmTPC0_CMDQ_GLBL_SECURE_PROPS,
168	mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
169	mmTPC0_CFG_ARUSER,
170	mmTPC0_CFG_AWUSER,
171	mmTPC1_QM_GLBL_SECURE_PROPS,
172	mmTPC1_QM_GLBL_NON_SECURE_PROPS,
173	mmTPC1_CMDQ_GLBL_SECURE_PROPS,
174	mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
175	mmTPC1_CFG_ARUSER,
176	mmTPC1_CFG_AWUSER,
177	mmTPC2_QM_GLBL_SECURE_PROPS,
178	mmTPC2_QM_GLBL_NON_SECURE_PROPS,
179	mmTPC2_CMDQ_GLBL_SECURE_PROPS,
180	mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
181	mmTPC2_CFG_ARUSER,
182	mmTPC2_CFG_AWUSER,
183	mmTPC3_QM_GLBL_SECURE_PROPS,
184	mmTPC3_QM_GLBL_NON_SECURE_PROPS,
185	mmTPC3_CMDQ_GLBL_SECURE_PROPS,
186	mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
187	mmTPC3_CFG_ARUSER,
188	mmTPC3_CFG_AWUSER,
189	mmTPC4_QM_GLBL_SECURE_PROPS,
190	mmTPC4_QM_GLBL_NON_SECURE_PROPS,
191	mmTPC4_CMDQ_GLBL_SECURE_PROPS,
192	mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
193	mmTPC4_CFG_ARUSER,
194	mmTPC4_CFG_AWUSER,
195	mmTPC5_QM_GLBL_SECURE_PROPS,
196	mmTPC5_QM_GLBL_NON_SECURE_PROPS,
197	mmTPC5_CMDQ_GLBL_SECURE_PROPS,
198	mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
199	mmTPC5_CFG_ARUSER,
200	mmTPC5_CFG_AWUSER,
201	mmTPC6_QM_GLBL_SECURE_PROPS,
202	mmTPC6_QM_GLBL_NON_SECURE_PROPS,
203	mmTPC6_CMDQ_GLBL_SECURE_PROPS,
204	mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
205	mmTPC6_CFG_ARUSER,
206	mmTPC6_CFG_AWUSER,
207	mmTPC7_QM_GLBL_SECURE_PROPS,
208	mmTPC7_QM_GLBL_NON_SECURE_PROPS,
209	mmTPC7_CMDQ_GLBL_SECURE_PROPS,
210	mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
211	mmTPC7_CFG_ARUSER,
212	mmTPC7_CFG_AWUSER,
213	mmMME_QM_GLBL_SECURE_PROPS,
214	mmMME_QM_GLBL_NON_SECURE_PROPS,
215	mmMME_CMDQ_GLBL_SECURE_PROPS,
216	mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
217	mmMME_SBA_CONTROL_DATA,
218	mmMME_SBB_CONTROL_DATA,
219	mmMME_SBC_CONTROL_DATA,
220	mmMME_WBC_CONTROL_DATA,
221	mmPCIE_WRAP_PSOC_ARUSER,
222	mmPCIE_WRAP_PSOC_AWUSER
223};
224
225static u32 goya_all_events[] = {
226	GOYA_ASYNC_EVENT_ID_PCIE_IF,
227	GOYA_ASYNC_EVENT_ID_TPC0_ECC,
228	GOYA_ASYNC_EVENT_ID_TPC1_ECC,
229	GOYA_ASYNC_EVENT_ID_TPC2_ECC,
230	GOYA_ASYNC_EVENT_ID_TPC3_ECC,
231	GOYA_ASYNC_EVENT_ID_TPC4_ECC,
232	GOYA_ASYNC_EVENT_ID_TPC5_ECC,
233	GOYA_ASYNC_EVENT_ID_TPC6_ECC,
234	GOYA_ASYNC_EVENT_ID_TPC7_ECC,
235	GOYA_ASYNC_EVENT_ID_MME_ECC,
236	GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
237	GOYA_ASYNC_EVENT_ID_MMU_ECC,
238	GOYA_ASYNC_EVENT_ID_DMA_MACRO,
239	GOYA_ASYNC_EVENT_ID_DMA_ECC,
240	GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
241	GOYA_ASYNC_EVENT_ID_PSOC_MEM,
242	GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
243	GOYA_ASYNC_EVENT_ID_SRAM0,
244	GOYA_ASYNC_EVENT_ID_SRAM1,
245	GOYA_ASYNC_EVENT_ID_SRAM2,
246	GOYA_ASYNC_EVENT_ID_SRAM3,
247	GOYA_ASYNC_EVENT_ID_SRAM4,
248	GOYA_ASYNC_EVENT_ID_SRAM5,
249	GOYA_ASYNC_EVENT_ID_SRAM6,
250	GOYA_ASYNC_EVENT_ID_SRAM7,
251	GOYA_ASYNC_EVENT_ID_SRAM8,
252	GOYA_ASYNC_EVENT_ID_SRAM9,
253	GOYA_ASYNC_EVENT_ID_SRAM10,
254	GOYA_ASYNC_EVENT_ID_SRAM11,
255	GOYA_ASYNC_EVENT_ID_SRAM12,
256	GOYA_ASYNC_EVENT_ID_SRAM13,
257	GOYA_ASYNC_EVENT_ID_SRAM14,
258	GOYA_ASYNC_EVENT_ID_SRAM15,
259	GOYA_ASYNC_EVENT_ID_SRAM16,
260	GOYA_ASYNC_EVENT_ID_SRAM17,
261	GOYA_ASYNC_EVENT_ID_SRAM18,
262	GOYA_ASYNC_EVENT_ID_SRAM19,
263	GOYA_ASYNC_EVENT_ID_SRAM20,
264	GOYA_ASYNC_EVENT_ID_SRAM21,
265	GOYA_ASYNC_EVENT_ID_SRAM22,
266	GOYA_ASYNC_EVENT_ID_SRAM23,
267	GOYA_ASYNC_EVENT_ID_SRAM24,
268	GOYA_ASYNC_EVENT_ID_SRAM25,
269	GOYA_ASYNC_EVENT_ID_SRAM26,
270	GOYA_ASYNC_EVENT_ID_SRAM27,
271	GOYA_ASYNC_EVENT_ID_SRAM28,
272	GOYA_ASYNC_EVENT_ID_SRAM29,
273	GOYA_ASYNC_EVENT_ID_GIC500,
274	GOYA_ASYNC_EVENT_ID_PLL0,
275	GOYA_ASYNC_EVENT_ID_PLL1,
276	GOYA_ASYNC_EVENT_ID_PLL3,
277	GOYA_ASYNC_EVENT_ID_PLL4,
278	GOYA_ASYNC_EVENT_ID_PLL5,
279	GOYA_ASYNC_EVENT_ID_PLL6,
280	GOYA_ASYNC_EVENT_ID_AXI_ECC,
281	GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
282	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
283	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
284	GOYA_ASYNC_EVENT_ID_PCIE_DEC,
285	GOYA_ASYNC_EVENT_ID_TPC0_DEC,
286	GOYA_ASYNC_EVENT_ID_TPC1_DEC,
287	GOYA_ASYNC_EVENT_ID_TPC2_DEC,
288	GOYA_ASYNC_EVENT_ID_TPC3_DEC,
289	GOYA_ASYNC_EVENT_ID_TPC4_DEC,
290	GOYA_ASYNC_EVENT_ID_TPC5_DEC,
291	GOYA_ASYNC_EVENT_ID_TPC6_DEC,
292	GOYA_ASYNC_EVENT_ID_TPC7_DEC,
293	GOYA_ASYNC_EVENT_ID_MME_WACS,
294	GOYA_ASYNC_EVENT_ID_MME_WACSD,
295	GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
296	GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
297	GOYA_ASYNC_EVENT_ID_PSOC,
298	GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
299	GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
300	GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
301	GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
302	GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
303	GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
304	GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
305	GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
306	GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
307	GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
308	GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
309	GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
310	GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
311	GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
312	GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
313	GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
314	GOYA_ASYNC_EVENT_ID_TPC0_QM,
315	GOYA_ASYNC_EVENT_ID_TPC1_QM,
316	GOYA_ASYNC_EVENT_ID_TPC2_QM,
317	GOYA_ASYNC_EVENT_ID_TPC3_QM,
318	GOYA_ASYNC_EVENT_ID_TPC4_QM,
319	GOYA_ASYNC_EVENT_ID_TPC5_QM,
320	GOYA_ASYNC_EVENT_ID_TPC6_QM,
321	GOYA_ASYNC_EVENT_ID_TPC7_QM,
322	GOYA_ASYNC_EVENT_ID_MME_QM,
323	GOYA_ASYNC_EVENT_ID_MME_CMDQ,
324	GOYA_ASYNC_EVENT_ID_DMA0_QM,
325	GOYA_ASYNC_EVENT_ID_DMA1_QM,
326	GOYA_ASYNC_EVENT_ID_DMA2_QM,
327	GOYA_ASYNC_EVENT_ID_DMA3_QM,
328	GOYA_ASYNC_EVENT_ID_DMA4_QM,
329	GOYA_ASYNC_EVENT_ID_DMA0_CH,
330	GOYA_ASYNC_EVENT_ID_DMA1_CH,
331	GOYA_ASYNC_EVENT_ID_DMA2_CH,
332	GOYA_ASYNC_EVENT_ID_DMA3_CH,
333	GOYA_ASYNC_EVENT_ID_DMA4_CH,
334	GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
335	GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
336	GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
337	GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
338	GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
339	GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
340	GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
341	GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
342	GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
343	GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
344	GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
345	GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
346	GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
347	GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
348	GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
349	GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
350	GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
351};
352
353static s64 goya_state_dump_specs_props[SP_MAX] = {0};
354
355static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
356static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
357static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
358static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
359
360int goya_set_fixed_properties(struct hl_device *hdev)
361{
362	struct asic_fixed_properties *prop = &hdev->asic_prop;
363	int i;
364
365	prop->max_queues = GOYA_QUEUE_ID_SIZE;
366	prop->hw_queues_props = kcalloc(prop->max_queues,
367			sizeof(struct hw_queue_properties),
368			GFP_KERNEL);
369
370	if (!prop->hw_queues_props)
371		return -ENOMEM;
372
373	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
374		prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
375		prop->hw_queues_props[i].driver_only = 0;
376		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
377	}
378
379	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
380		prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
381		prop->hw_queues_props[i].driver_only = 1;
382		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
383	}
384
385	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
386			NUMBER_OF_INT_HW_QUEUES; i++) {
387		prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
388		prop->hw_queues_props[i].driver_only = 0;
389		prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
390	}
391
392	prop->cfg_base_address = CFG_BASE;
393	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
394	prop->host_base_address = HOST_PHYS_BASE;
395	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
396	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
397	prop->completion_mode = HL_COMPLETION_MODE_JOB;
398	prop->dram_base_address = DRAM_PHYS_BASE;
399	prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
400	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
401	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
402
403	prop->sram_base_address = SRAM_BASE_ADDR;
404	prop->sram_size = SRAM_SIZE;
405	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
406	prop->sram_user_base_address = prop->sram_base_address +
407						SRAM_USER_BASE_OFFSET;
408
409	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
410	prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
411	if (hdev->pldm)
412		prop->mmu_pgt_size = 0x800000; /* 8MB */
413	else
414		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
415	prop->mmu_pte_size = HL_PTE_SIZE;
416	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
417	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
418	prop->dram_page_size = PAGE_SIZE_2MB;
419	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
420	prop->dram_supports_virtual_memory = true;
421
422	prop->dmmu.hop_shifts[MMU_HOP0] = MMU_V1_0_HOP0_SHIFT;
423	prop->dmmu.hop_shifts[MMU_HOP1] = MMU_V1_0_HOP1_SHIFT;
424	prop->dmmu.hop_shifts[MMU_HOP2] = MMU_V1_0_HOP2_SHIFT;
425	prop->dmmu.hop_shifts[MMU_HOP3] = MMU_V1_0_HOP3_SHIFT;
426	prop->dmmu.hop_shifts[MMU_HOP4] = MMU_V1_0_HOP4_SHIFT;
427	prop->dmmu.hop_masks[MMU_HOP0] = MMU_V1_0_HOP0_MASK;
428	prop->dmmu.hop_masks[MMU_HOP1] = MMU_V1_0_HOP1_MASK;
429	prop->dmmu.hop_masks[MMU_HOP2] = MMU_V1_0_HOP2_MASK;
430	prop->dmmu.hop_masks[MMU_HOP3] = MMU_V1_0_HOP3_MASK;
431	prop->dmmu.hop_masks[MMU_HOP4] = MMU_V1_0_HOP4_MASK;
432	prop->dmmu.start_addr = VA_DDR_SPACE_START;
433	prop->dmmu.end_addr = VA_DDR_SPACE_END;
434	prop->dmmu.page_size = PAGE_SIZE_2MB;
435	prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
436	prop->dmmu.last_mask = LAST_MASK;
437	/* TODO: will be duplicated until implementing per-MMU props */
438	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
439	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
440
441	/* shifts and masks are the same in PMMU and DMMU */
442	memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
443	prop->pmmu.start_addr = VA_HOST_SPACE_START;
444	prop->pmmu.end_addr = VA_HOST_SPACE_END;
445	prop->pmmu.page_size = PAGE_SIZE_4KB;
446	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
447	prop->pmmu.last_mask = LAST_MASK;
448	/* TODO: will be duplicated until implementing per-MMU props */
449	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
450	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
451
452	/* PMMU and HPMMU are the same except of page size */
453	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
454	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
455
456	prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
457	prop->cfg_size = CFG_SIZE;
458	prop->max_asid = MAX_ASID;
459	prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
460	prop->high_pll = PLL_HIGH_DEFAULT;
461	prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
462	prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
463	prop->max_power_default = MAX_POWER_DEFAULT;
464	prop->dc_power_default = DC_POWER_DEFAULT;
465	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
466	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468
469	strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
470		CARD_NAME_MAX_LEN);
471
472	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
473
474	prop->first_available_user_interrupt = USHRT_MAX;
475	prop->tpc_interrupt_id = USHRT_MAX;
476	prop->eq_interrupt_id = GOYA_EVENT_QUEUE_MSIX_IDX;
477
478	for (i = 0 ; i < HL_MAX_DCORES ; i++)
479		prop->first_available_cq[i] = USHRT_MAX;
480
481	prop->fw_cpu_boot_dev_sts0_valid = false;
482	prop->fw_cpu_boot_dev_sts1_valid = false;
483	prop->hard_reset_done_by_fw = false;
484	prop->gic_interrupts_enable = true;
485
486	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
487
488	prop->clk_pll_index = HL_GOYA_MME_PLL;
489
490	prop->use_get_power_for_reset_history = true;
491
492	prop->configurable_stop_on_err = true;
493
494	prop->set_max_power_on_device_init = true;
495
496	prop->dma_mask = 48;
497
498	return 0;
499}
500
501/*
502 * goya_pci_bars_map - Map PCI BARS of Goya device
503 *
504 * @hdev: pointer to hl_device structure
505 *
506 * Request PCI regions and map them to kernel virtual addresses.
507 * Returns 0 on success
508 *
509 */
510static int goya_pci_bars_map(struct hl_device *hdev)
511{
512	static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
513	bool is_wc[3] = {false, false, true};
514	int rc;
515
516	rc = hl_pci_bars_map(hdev, name, is_wc);
517	if (rc)
518		return rc;
519
520	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
521			(CFG_BASE - SRAM_BASE_ADDR);
522
523	return 0;
524}
525
526static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
527{
528	struct goya_device *goya = hdev->asic_specific;
529	struct hl_inbound_pci_region pci_region;
530	u64 old_addr = addr;
531	int rc;
532
533	if ((goya) && (goya->ddr_bar_cur_addr == addr))
534		return old_addr;
535
536	/* Inbound Region 1 - Bar 4 - Point to DDR */
537	pci_region.mode = PCI_BAR_MATCH_MODE;
538	pci_region.bar = DDR_BAR_ID;
539	pci_region.addr = addr;
540	rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
541	if (rc)
542		return U64_MAX;
543
544	if (goya) {
545		old_addr = goya->ddr_bar_cur_addr;
546		goya->ddr_bar_cur_addr = addr;
547	}
548
549	return old_addr;
550}
551
552/*
553 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
554 *
555 * @hdev: pointer to hl_device structure
556 *
557 * This is needed in case the firmware doesn't initialize the iATU
558 *
559 */
560static int goya_init_iatu(struct hl_device *hdev)
561{
562	struct hl_inbound_pci_region inbound_region;
563	struct hl_outbound_pci_region outbound_region;
564	int rc;
565
566	if (hdev->asic_prop.iatu_done_by_fw)
567		return 0;
568
569	/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
570	inbound_region.mode = PCI_BAR_MATCH_MODE;
571	inbound_region.bar = SRAM_CFG_BAR_ID;
572	inbound_region.addr = SRAM_BASE_ADDR;
573	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
574	if (rc)
575		goto done;
576
577	/* Inbound Region 1 - Bar 4 - Point to DDR */
578	inbound_region.mode = PCI_BAR_MATCH_MODE;
579	inbound_region.bar = DDR_BAR_ID;
580	inbound_region.addr = DRAM_PHYS_BASE;
581	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
582	if (rc)
583		goto done;
584
585	/* Outbound Region 0 - Point to Host  */
586	outbound_region.addr = HOST_PHYS_BASE;
587	outbound_region.size = HOST_PHYS_SIZE;
588	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
589
590done:
591	return rc;
592}
593
594static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
595{
596	return RREG32(mmHW_STATE);
597}
598
599/*
600 * goya_early_init - GOYA early initialization code
601 *
602 * @hdev: pointer to hl_device structure
603 *
604 * Verify PCI bars
605 * Set DMA masks
606 * PCI controller initialization
607 * Map PCI bars
608 *
609 */
610static int goya_early_init(struct hl_device *hdev)
611{
612	struct asic_fixed_properties *prop = &hdev->asic_prop;
613	struct pci_dev *pdev = hdev->pdev;
614	resource_size_t pci_bar_size;
615	u32 fw_boot_status, val;
616	int rc;
617
618	rc = goya_set_fixed_properties(hdev);
619	if (rc) {
620		dev_err(hdev->dev, "Failed to get fixed properties\n");
621		return rc;
622	}
623
624	/* Check BAR sizes */
625	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
626
627	if (pci_bar_size != CFG_BAR_SIZE) {
628		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
629			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
630		rc = -ENODEV;
631		goto free_queue_props;
632	}
633
634	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
635
636	if (pci_bar_size != MSIX_BAR_SIZE) {
637		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
638			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
639		rc = -ENODEV;
640		goto free_queue_props;
641	}
642
643	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
644	hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);
645
646	/* If FW security is enabled at this point it means no access to ELBI */
647	if (hdev->asic_prop.fw_security_enabled) {
648		hdev->asic_prop.iatu_done_by_fw = true;
649		goto pci_init;
650	}
651
652	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
653				&fw_boot_status);
654	if (rc)
655		goto free_queue_props;
656
657	/* Check whether FW is configuring iATU */
658	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
659			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
660		hdev->asic_prop.iatu_done_by_fw = true;
661
662pci_init:
663	rc = hl_pci_init(hdev);
664	if (rc)
665		goto free_queue_props;
666
667	/* Before continuing in the initialization, we need to read the preboot
668	 * version to determine whether we run with a security-enabled firmware
669	 */
670	rc = hl_fw_read_preboot_status(hdev);
671	if (rc) {
672		if (hdev->reset_on_preboot_fail)
673			/* we are already on failure flow, so don't check if hw_fini fails. */
674			hdev->asic_funcs->hw_fini(hdev, true, false);
675		goto pci_fini;
676	}
677
678	if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
679		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
680		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
681		if (rc) {
682			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
683			goto pci_fini;
684		}
685	}
686
687	if (!hdev->pldm) {
688		val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
689		if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
690			dev_warn(hdev->dev,
691				"PCI strap is not configured correctly, PCI bus errors may occur\n");
692	}
693
694	return 0;
695
696pci_fini:
697	hl_pci_fini(hdev);
698free_queue_props:
699	kfree(hdev->asic_prop.hw_queues_props);
700	return rc;
701}
702
703/*
704 * goya_early_fini - GOYA early finalization code
705 *
706 * @hdev: pointer to hl_device structure
707 *
708 * Unmap PCI bars
709 *
710 */
711static int goya_early_fini(struct hl_device *hdev)
712{
713	kfree(hdev->asic_prop.hw_queues_props);
714	hl_pci_fini(hdev);
715
716	return 0;
717}
718
719static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
720{
721	/* mask to zero the MMBP and ASID bits */
722	WREG32_AND(reg, ~0x7FF);
723	WREG32_OR(reg, asid);
724}
725
726static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
727{
728	struct goya_device *goya = hdev->asic_specific;
729
730	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
731		return;
732
733	if (secure)
734		WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
735	else
736		WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
737
738	RREG32(mmDMA_QM_0_GLBL_PROT);
739}
740
741/*
742 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
743 *
744 * @hdev: pointer to hl_device structure
745 *
746 */
747static void goya_fetch_psoc_frequency(struct hl_device *hdev)
748{
749	struct asic_fixed_properties *prop = &hdev->asic_prop;
750	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
751	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
752	int rc;
753
754	if (hdev->asic_prop.fw_security_enabled) {
755		struct goya_device *goya = hdev->asic_specific;
756
757		if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
758			return;
759
760		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
761				pll_freq_arr);
762
763		if (rc)
764			return;
765
766		freq = pll_freq_arr[1];
767	} else {
768		div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
769		div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
770		nr = RREG32(mmPSOC_PCI_PLL_NR);
771		nf = RREG32(mmPSOC_PCI_PLL_NF);
772		od = RREG32(mmPSOC_PCI_PLL_OD);
773
774		if (div_sel == DIV_SEL_REF_CLK ||
775				div_sel == DIV_SEL_DIVIDED_REF) {
776			if (div_sel == DIV_SEL_REF_CLK)
777				freq = PLL_REF_CLK;
778			else
779				freq = PLL_REF_CLK / (div_fctr + 1);
780		} else if (div_sel == DIV_SEL_PLL_CLK ||
781				div_sel == DIV_SEL_DIVIDED_PLL) {
782			pll_clk = PLL_REF_CLK * (nf + 1) /
783					((nr + 1) * (od + 1));
784			if (div_sel == DIV_SEL_PLL_CLK)
785				freq = pll_clk;
786			else
787				freq = pll_clk / (div_fctr + 1);
788		} else {
789			dev_warn(hdev->dev,
790				"Received invalid div select value: %d",
791				div_sel);
792			freq = 0;
793		}
794	}
795
796	prop->psoc_timestamp_frequency = freq;
797	prop->psoc_pci_pll_nr = nr;
798	prop->psoc_pci_pll_nf = nf;
799	prop->psoc_pci_pll_od = od;
800	prop->psoc_pci_pll_div_factor = div_fctr;
801}
802
803/*
804 * goya_set_frequency - set the frequency of the device
805 *
806 * @hdev: pointer to habanalabs device structure
807 * @freq: the new frequency value
808 *
809 * Change the frequency if needed. This function has no protection against
810 * concurrency, therefore it is assumed that the calling function has protected
811 * itself against the case of calling this function from multiple threads with
812 * different values
813 *
814 * Returns 0 if no change was done, otherwise returns 1
815 */
816int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
817{
818	struct goya_device *goya = hdev->asic_specific;
819
820	if ((goya->pm_mng_profile == PM_MANUAL) ||
821			(goya->curr_pll_profile == freq))
822		return 0;
823
824	dev_dbg(hdev->dev, "Changing device frequency to %s\n",
825		freq == PLL_HIGH ? "high" : "low");
826
827	goya_set_pll_profile(hdev, freq);
828
829	goya->curr_pll_profile = freq;
830
831	return 1;
832}
833
834static void goya_set_freq_to_low_job(struct work_struct *work)
835{
836	struct goya_work_freq *goya_work = container_of(work,
837						struct goya_work_freq,
838						work_freq.work);
839	struct hl_device *hdev = goya_work->hdev;
840
841	mutex_lock(&hdev->fpriv_list_lock);
842
843	if (!hdev->is_compute_ctx_active)
844		goya_set_frequency(hdev, PLL_LOW);
845
846	mutex_unlock(&hdev->fpriv_list_lock);
847
848	schedule_delayed_work(&goya_work->work_freq,
849			usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
850}
851
852int goya_late_init(struct hl_device *hdev)
853{
854	struct asic_fixed_properties *prop = &hdev->asic_prop;
855	struct goya_device *goya = hdev->asic_specific;
856	int rc;
857
858	goya_fetch_psoc_frequency(hdev);
859
860	rc = goya_mmu_clear_pgt_range(hdev);
861	if (rc) {
862		dev_err(hdev->dev,
863			"Failed to clear MMU page tables range %d\n", rc);
864		return rc;
865	}
866
867	rc = goya_mmu_set_dram_default_page(hdev);
868	if (rc) {
869		dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
870		return rc;
871	}
872
873	rc = goya_mmu_add_mappings_for_device_cpu(hdev);
874	if (rc)
875		return rc;
876
877	rc = goya_init_cpu_queues(hdev);
878	if (rc)
879		return rc;
880
881	rc = goya_test_cpu_queue(hdev);
882	if (rc)
883		return rc;
884
885	rc = goya_cpucp_info_get(hdev);
886	if (rc) {
887		dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
888		return rc;
889	}
890
891	/* Now that we have the DRAM size in ASIC prop, we need to check
892	 * its size and configure the DMA_IF DDR wrap protection (which is in
893	 * the MMU block) accordingly. The value is the log2 of the DRAM size
894	 */
895	WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
896
897	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
898	if (rc) {
899		dev_err(hdev->dev,
900			"Failed to enable PCI access from CPU %d\n", rc);
901		return rc;
902	}
903
904	/* force setting to low frequency */
905	goya->curr_pll_profile = PLL_LOW;
906
907	goya->pm_mng_profile = PM_AUTO;
908
909	goya_set_pll_profile(hdev, PLL_LOW);
910
911	schedule_delayed_work(&goya->goya_work->work_freq,
912		usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
913
914	return 0;
915}
916
917/*
918 * goya_late_fini - GOYA late tear-down code
919 *
920 * @hdev: pointer to hl_device structure
921 *
922 * Free sensors allocated structures
923 */
924void goya_late_fini(struct hl_device *hdev)
925{
926	struct goya_device *goya = hdev->asic_specific;
927
928	cancel_delayed_work_sync(&goya->goya_work->work_freq);
929
930	hl_hwmon_release_resources(hdev);
931}
932
933static void goya_set_pci_memory_regions(struct hl_device *hdev)
934{
935	struct asic_fixed_properties *prop = &hdev->asic_prop;
936	struct pci_mem_region *region;
937
938	/* CFG */
939	region = &hdev->pci_mem_region[PCI_REGION_CFG];
940	region->region_base = CFG_BASE;
941	region->region_size = CFG_SIZE;
942	region->offset_in_bar = CFG_BASE - SRAM_BASE_ADDR;
943	region->bar_size = CFG_BAR_SIZE;
944	region->bar_id = SRAM_CFG_BAR_ID;
945	region->used = 1;
946
947	/* SRAM */
948	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
949	region->region_base = SRAM_BASE_ADDR;
950	region->region_size = SRAM_SIZE;
951	region->offset_in_bar = 0;
952	region->bar_size = CFG_BAR_SIZE;
953	region->bar_id = SRAM_CFG_BAR_ID;
954	region->used = 1;
955
956	/* DRAM */
957	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
958	region->region_base = DRAM_PHYS_BASE;
959	region->region_size = hdev->asic_prop.dram_size;
960	region->offset_in_bar = 0;
961	region->bar_size = prop->dram_pci_bar_size;
962	region->bar_id = DDR_BAR_ID;
963	region->used = 1;
964}
965
966/*
967 * goya_sw_init - Goya software initialization code
968 *
969 * @hdev: pointer to hl_device structure
970 *
971 */
972static int goya_sw_init(struct hl_device *hdev)
973{
974	struct goya_device *goya;
975	int rc;
976
977	/* Allocate device structure */
978	goya = kzalloc(sizeof(*goya), GFP_KERNEL);
979	if (!goya)
980		return -ENOMEM;
981
982	/* according to goya_init_iatu */
983	goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
984
985	goya->mme_clk = GOYA_PLL_FREQ_LOW;
986	goya->tpc_clk = GOYA_PLL_FREQ_LOW;
987	goya->ic_clk = GOYA_PLL_FREQ_LOW;
988
989	hdev->asic_specific = goya;
990
991	/* Create DMA pool for small allocations */
992	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
993			&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
994	if (!hdev->dma_pool) {
995		dev_err(hdev->dev, "failed to create DMA pool\n");
996		rc = -ENOMEM;
997		goto free_goya_device;
998	}
999
1000	hdev->cpu_accessible_dma_mem = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1001							&hdev->cpu_accessible_dma_address,
1002							GFP_KERNEL | __GFP_ZERO);
1003
1004	if (!hdev->cpu_accessible_dma_mem) {
1005		rc = -ENOMEM;
1006		goto free_dma_pool;
1007	}
1008
1009	dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
1010		&hdev->cpu_accessible_dma_address);
1011
1012	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1013	if (!hdev->cpu_accessible_dma_pool) {
1014		dev_err(hdev->dev,
1015			"Failed to create CPU accessible DMA pool\n");
1016		rc = -ENOMEM;
1017		goto free_cpu_dma_mem;
1018	}
1019
1020	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1021				(uintptr_t) hdev->cpu_accessible_dma_mem,
1022				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1023	if (rc) {
1024		dev_err(hdev->dev,
1025			"Failed to add memory to CPU accessible DMA pool\n");
1026		rc = -EFAULT;
1027		goto free_cpu_accessible_dma_pool;
1028	}
1029
1030	spin_lock_init(&goya->hw_queues_lock);
1031	hdev->supports_coresight = true;
1032	hdev->asic_prop.supports_compute_reset = true;
1033	hdev->asic_prop.allow_inference_soft_reset = true;
1034	hdev->supports_wait_for_multi_cs = false;
1035	hdev->supports_ctx_switch = true;
1036
1037	hdev->asic_funcs->set_pci_memory_regions(hdev);
1038
1039	goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL);
1040	if (!goya->goya_work) {
1041		rc = -ENOMEM;
1042		goto free_cpu_accessible_dma_pool;
1043	}
1044
1045	goya->goya_work->hdev = hdev;
1046	INIT_DELAYED_WORK(&goya->goya_work->work_freq, goya_set_freq_to_low_job);
1047
1048	return 0;
1049
1050free_cpu_accessible_dma_pool:
1051	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1052free_cpu_dma_mem:
1053	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1054					hdev->cpu_accessible_dma_address);
1055free_dma_pool:
1056	dma_pool_destroy(hdev->dma_pool);
1057free_goya_device:
1058	kfree(goya);
1059
1060	return rc;
1061}
1062
1063/*
1064 * goya_sw_fini - Goya software tear-down code
1065 *
1066 * @hdev: pointer to hl_device structure
1067 *
1068 */
1069static int goya_sw_fini(struct hl_device *hdev)
1070{
1071	struct goya_device *goya = hdev->asic_specific;
1072
1073	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1074
1075	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1076					hdev->cpu_accessible_dma_address);
1077
1078	dma_pool_destroy(hdev->dma_pool);
1079
1080	kfree(goya->goya_work);
1081	kfree(goya);
1082
1083	return 0;
1084}
1085
1086static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
1087		dma_addr_t bus_address)
1088{
1089	struct goya_device *goya = hdev->asic_specific;
1090	u32 mtr_base_lo, mtr_base_hi;
1091	u32 so_base_lo, so_base_hi;
1092	u32 gic_base_lo, gic_base_hi;
1093	u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
1094	u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
1095
1096	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1097	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1098	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1099	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1100
1101	gic_base_lo =
1102		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1103	gic_base_hi =
1104		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1105
1106	WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
1107	WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
1108
1109	WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
1110	WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
1111	WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
1112
1113	WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1114	WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1115	WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1116	WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1117	WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1118	WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1119	WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
1120			GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
1121
1122	/* PQ has buffer of 2 cache lines, while CQ has 8 lines */
1123	WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
1124	WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
1125
1126	if (goya->hw_cap_initialized & HW_CAP_MMU)
1127		WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
1128	else
1129		WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
1130
1131	if (hdev->stop_on_err)
1132		dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
1133
1134	WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
1135	WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
1136}
1137
1138static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
1139{
1140	u32 gic_base_lo, gic_base_hi;
1141	u64 sob_addr;
1142	u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
1143
1144	gic_base_lo =
1145		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1146	gic_base_hi =
1147		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1148
1149	WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
1150	WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
1151	WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
1152			GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
1153
1154	if (dma_id)
1155		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
1156				(dma_id - 1) * 4;
1157	else
1158		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
1159
1160	WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
1161	WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
1162}
1163
1164/*
1165 * goya_init_dma_qmans - Initialize QMAN DMA registers
1166 *
1167 * @hdev: pointer to hl_device structure
1168 *
1169 * Initialize the H/W registers of the QMAN DMA channels
1170 *
1171 */
1172void goya_init_dma_qmans(struct hl_device *hdev)
1173{
1174	struct goya_device *goya = hdev->asic_specific;
1175	struct hl_hw_queue *q;
1176	int i;
1177
1178	if (goya->hw_cap_initialized & HW_CAP_DMA)
1179		return;
1180
1181	q = &hdev->kernel_queues[0];
1182
1183	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
1184		q->cq_id = q->msi_vec = i;
1185		goya_init_dma_qman(hdev, i, q->bus_address);
1186		goya_init_dma_ch(hdev, i);
1187	}
1188
1189	goya->hw_cap_initialized |= HW_CAP_DMA;
1190}
1191
1192/*
1193 * goya_disable_external_queues - Disable external queues
1194 *
1195 * @hdev: pointer to hl_device structure
1196 *
1197 */
1198static void goya_disable_external_queues(struct hl_device *hdev)
1199{
1200	struct goya_device *goya = hdev->asic_specific;
1201
1202	if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1203		return;
1204
1205	WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1206	WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1207	WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1208	WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1209	WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1210}
1211
1212static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1213				u32 cp_sts_reg, u32 glbl_sts0_reg)
1214{
1215	int rc;
1216	u32 status;
1217
1218	/* use the values of TPC0 as they are all the same*/
1219
1220	WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1221
1222	status = RREG32(cp_sts_reg);
1223	if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1224		rc = hl_poll_timeout(
1225			hdev,
1226			cp_sts_reg,
1227			status,
1228			!(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1229			1000,
1230			QMAN_FENCE_TIMEOUT_USEC);
1231
1232		/* if QMAN is stuck in fence no need to check for stop */
1233		if (rc)
1234			return 0;
1235	}
1236
1237	rc = hl_poll_timeout(
1238		hdev,
1239		glbl_sts0_reg,
1240		status,
1241		(status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1242		1000,
1243		QMAN_STOP_TIMEOUT_USEC);
1244
1245	if (rc) {
1246		dev_err(hdev->dev,
1247			"Timeout while waiting for QMAN to stop\n");
1248		return -EINVAL;
1249	}
1250
1251	return 0;
1252}
1253
1254/*
1255 * goya_stop_external_queues - Stop external queues
1256 *
1257 * @hdev: pointer to hl_device structure
1258 *
1259 * Returns 0 on success
1260 *
1261 */
1262static int goya_stop_external_queues(struct hl_device *hdev)
1263{
1264	int rc, retval = 0;
1265
1266	struct goya_device *goya = hdev->asic_specific;
1267
1268	if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1269		return retval;
1270
1271	rc = goya_stop_queue(hdev,
1272			mmDMA_QM_0_GLBL_CFG1,
1273			mmDMA_QM_0_CP_STS,
1274			mmDMA_QM_0_GLBL_STS0);
1275
1276	if (rc) {
1277		dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1278		retval = -EIO;
1279	}
1280
1281	rc = goya_stop_queue(hdev,
1282			mmDMA_QM_1_GLBL_CFG1,
1283			mmDMA_QM_1_CP_STS,
1284			mmDMA_QM_1_GLBL_STS0);
1285
1286	if (rc) {
1287		dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1288		retval = -EIO;
1289	}
1290
1291	rc = goya_stop_queue(hdev,
1292			mmDMA_QM_2_GLBL_CFG1,
1293			mmDMA_QM_2_CP_STS,
1294			mmDMA_QM_2_GLBL_STS0);
1295
1296	if (rc) {
1297		dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1298		retval = -EIO;
1299	}
1300
1301	rc = goya_stop_queue(hdev,
1302			mmDMA_QM_3_GLBL_CFG1,
1303			mmDMA_QM_3_CP_STS,
1304			mmDMA_QM_3_GLBL_STS0);
1305
1306	if (rc) {
1307		dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1308		retval = -EIO;
1309	}
1310
1311	rc = goya_stop_queue(hdev,
1312			mmDMA_QM_4_GLBL_CFG1,
1313			mmDMA_QM_4_CP_STS,
1314			mmDMA_QM_4_GLBL_STS0);
1315
1316	if (rc) {
1317		dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1318		retval = -EIO;
1319	}
1320
1321	return retval;
1322}
1323
1324/*
1325 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1326 *
1327 * @hdev: pointer to hl_device structure
1328 *
1329 * Returns 0 on success
1330 *
1331 */
1332int goya_init_cpu_queues(struct hl_device *hdev)
1333{
1334	struct goya_device *goya = hdev->asic_specific;
1335	struct asic_fixed_properties *prop = &hdev->asic_prop;
1336	struct hl_eq *eq;
1337	u32 status;
1338	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1339	int err;
1340
1341	if (!hdev->cpu_queues_enable)
1342		return 0;
1343
1344	if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1345		return 0;
1346
1347	eq = &hdev->event_queue;
1348
1349	WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1350	WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1351
1352	WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1353	WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1354
1355	WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1356			lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1357	WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1358			upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1359
1360	WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1361	WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1362	WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1363
1364	/* Used for EQ CI */
1365	WREG32(mmCPU_EQ_CI, 0);
1366
1367	WREG32(mmCPU_IF_PF_PQ_PI, 0);
1368
1369	WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1370
1371	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1372			GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1373
1374	err = hl_poll_timeout(
1375		hdev,
1376		mmCPU_PQ_INIT_STATUS,
1377		status,
1378		(status == PQ_INIT_STATUS_READY_FOR_HOST),
1379		1000,
1380		GOYA_CPU_TIMEOUT_USEC);
1381
1382	if (err) {
1383		dev_err(hdev->dev,
1384			"Failed to setup communication with device CPU\n");
1385		return -EIO;
1386	}
1387
1388	/* update FW application security bits */
1389	if (prop->fw_cpu_boot_dev_sts0_valid)
1390		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
1391
1392	if (prop->fw_cpu_boot_dev_sts1_valid)
1393		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
1394
1395	goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1396	return 0;
1397}
1398
1399static void goya_set_pll_refclk(struct hl_device *hdev)
1400{
1401	WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1402	WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1403	WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1404	WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1405
1406	WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1407	WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1408	WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1409	WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1410
1411	WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1412	WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1413	WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1414	WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1415
1416	WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1417	WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1418	WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1419	WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1420
1421	WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1422	WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1423	WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1424	WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1425
1426	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1427	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1428	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1429	WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1430
1431	WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1432	WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1433	WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1434	WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1435}
1436
1437static void goya_disable_clk_rlx(struct hl_device *hdev)
1438{
1439	WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1440	WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1441}
1442
1443static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1444{
1445	u64 tpc_eml_address;
1446	u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1447	int err, slm_index;
1448
1449	tpc_offset = tpc_id * 0x40000;
1450	tpc_eml_offset = tpc_id * 0x200000;
1451	tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1452	tpc_slm_offset = tpc_eml_address + 0x100000;
1453
1454	/*
1455	 * Workaround for Bug H2 #2443 :
1456	 * "TPC SB is not initialized on chip reset"
1457	 */
1458
1459	val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1460	if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1461		dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1462			tpc_id);
1463
1464	WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1465
1466	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1467	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1468	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1469	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1470	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1471	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1472	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1473	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1474	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1475	WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1476
1477	WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1478		1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1479
1480	err = hl_poll_timeout(
1481		hdev,
1482		mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1483		val,
1484		(val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1485		1000,
1486		HL_DEVICE_TIMEOUT_USEC);
1487
1488	if (err)
1489		dev_err(hdev->dev,
1490			"Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1491
1492	WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1493		1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1494
1495	msleep(GOYA_RESET_WAIT_MSEC);
1496
1497	WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1498		~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1499
1500	msleep(GOYA_RESET_WAIT_MSEC);
1501
1502	for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1503		WREG32(tpc_slm_offset + (slm_index << 2), 0);
1504
1505	val = RREG32(tpc_slm_offset);
1506}
1507
1508static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1509{
1510	struct goya_device *goya = hdev->asic_specific;
1511	int i;
1512
1513	if (hdev->pldm)
1514		return;
1515
1516	if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1517		return;
1518
1519	/* Workaround for H2 #2443 */
1520
1521	for (i = 0 ; i < TPC_MAX_NUM ; i++)
1522		_goya_tpc_mbist_workaround(hdev, i);
1523
1524	goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1525}
1526
1527/*
1528 * goya_init_golden_registers - Initialize golden registers
1529 *
1530 * @hdev: pointer to hl_device structure
1531 *
1532 * Initialize the H/W registers of the device
1533 *
1534 */
1535static void goya_init_golden_registers(struct hl_device *hdev)
1536{
1537	struct goya_device *goya = hdev->asic_specific;
1538	u32 polynom[10], tpc_intr_mask, offset;
1539	int i;
1540
1541	if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1542		return;
1543
1544	polynom[0] = 0x00020080;
1545	polynom[1] = 0x00401000;
1546	polynom[2] = 0x00200800;
1547	polynom[3] = 0x00002000;
1548	polynom[4] = 0x00080200;
1549	polynom[5] = 0x00040100;
1550	polynom[6] = 0x00100400;
1551	polynom[7] = 0x00004000;
1552	polynom[8] = 0x00010000;
1553	polynom[9] = 0x00008000;
1554
1555	/* Mask all arithmetic interrupts from TPC */
1556	tpc_intr_mask = 0x7FFF;
1557
1558	for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1559		WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1560		WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1561		WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1562		WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1563		WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1564
1565		WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1566		WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1567		WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1568		WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1569		WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1570
1571
1572		WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1573		WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1574		WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1575		WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1576		WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1577
1578		WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1579		WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1580		WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1581		WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1582		WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1583
1584		WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1585		WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1586		WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1587		WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1588		WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1589
1590		WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1591		WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1592		WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1593		WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1594		WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1595	}
1596
1597	WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1598	WREG32(mmMME_AGU, 0x0f0f0f10);
1599	WREG32(mmMME_SEI_MASK, ~0x0);
1600
1601	WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1602	WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1603	WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1604	WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1605	WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1606	WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1607	WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1608	WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1609	WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1610	WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1611	WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1612	WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1613	WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1614	WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1615	WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1616	WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1617	WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1618	WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1619	WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1620	WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1621	WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1622	WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1623	WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1624	WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1625	WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1626	WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1627	WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1628	WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1629	WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1630	WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1631	WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1632	WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1633	WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1634	WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1635	WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1636	WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1637	WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1638	WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1639	WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1640	WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1641	WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1642	WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1643	WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1644	WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1645	WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1646	WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1647	WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1648	WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1649	WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1650	WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1651	WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1652	WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1653	WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1654	WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1655	WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1656	WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1657	WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1658	WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1659	WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1660	WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1661	WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1662	WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1663	WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1664	WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1665	WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1666	WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1667	WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1668	WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1669	WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1670	WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1671	WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1672	WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1673	WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1674	WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1675	WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1676	WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1677	WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1678	WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1679	WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1680	WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1681	WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1682	WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1683	WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1684	WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1685
1686	WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1687	WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1688	WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1689	WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1690	WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1691	WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1692	WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1693	WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1694	WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1695	WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1696	WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1697	WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1698
1699	WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1700	WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1701	WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1702	WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1703	WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1704	WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1705	WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1706	WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1707	WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1708	WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1709	WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1710	WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1711
1712	WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1713	WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1714	WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1715	WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1716	WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1717	WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1718	WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1719	WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1720	WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1721	WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1722	WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1723	WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1724
1725	WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1726	WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1727	WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1728	WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1729	WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1730	WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1731	WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1732	WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1733	WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1734	WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1735	WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1736	WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1737
1738	WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1739	WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1740	WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1741	WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1742	WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1743	WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1744	WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1745	WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1746	WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1747	WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1748	WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1749	WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1750
1751	WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1752	WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1753	WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1754	WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1755	WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1756	WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1757	WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1758	WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1759	WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1760	WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1761	WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1762	WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1763
1764	for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1765		WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1766		WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1767		WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1768		WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1769		WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1770		WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1771
1772		WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1773		WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1774		WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1775		WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1776		WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1777		WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1778		WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1779		WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1780
1781		WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1782		WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1783	}
1784
1785	for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1786		WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1787				1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1788		WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1789				1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1790	}
1791
1792	for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1793		/*
1794		 * Workaround for Bug H2 #2441 :
1795		 * "ST.NOP set trace event illegal opcode"
1796		 */
1797		WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1798
1799		WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1800				1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1801		WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1802				1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1803
1804		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1805				ICACHE_FETCH_LINE_NUM, 2);
1806	}
1807
1808	WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1809	WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1810			1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1811
1812	WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1813	WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1814			1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1815
1816	/*
1817	 * Workaround for H2 #HW-23 bug
1818	 * Set DMA max outstanding read requests to 240 on DMA CH 1.
1819	 * This limitation is still large enough to not affect Gen4 bandwidth.
1820	 * We need to only limit that DMA channel because the user can only read
1821	 * from Host using DMA CH 1
1822	 */
1823	WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1824
1825	WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1826
1827	goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1828}
1829
1830static void goya_init_mme_qman(struct hl_device *hdev)
1831{
1832	u32 mtr_base_lo, mtr_base_hi;
1833	u32 so_base_lo, so_base_hi;
1834	u32 gic_base_lo, gic_base_hi;
1835	u64 qman_base_addr;
1836
1837	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1838	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1839	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1840	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1841
1842	gic_base_lo =
1843		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1844	gic_base_hi =
1845		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1846
1847	qman_base_addr = hdev->asic_prop.sram_base_address +
1848				MME_QMAN_BASE_OFFSET;
1849
1850	WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1851	WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1852	WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1853	WREG32(mmMME_QM_PQ_PI, 0);
1854	WREG32(mmMME_QM_PQ_CI, 0);
1855	WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1856	WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1857	WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1858	WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1859
1860	WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1861	WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1862	WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1863	WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1864
1865	/* QMAN CQ has 8 cache lines */
1866	WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1867
1868	WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1869	WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1870
1871	WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1872
1873	WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1874
1875	WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1876
1877	WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1878}
1879
1880static void goya_init_mme_cmdq(struct hl_device *hdev)
1881{
1882	u32 mtr_base_lo, mtr_base_hi;
1883	u32 so_base_lo, so_base_hi;
1884	u32 gic_base_lo, gic_base_hi;
1885
1886	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1887	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1888	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1889	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1890
1891	gic_base_lo =
1892		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1893	gic_base_hi =
1894		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1895
1896	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1897	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1898	WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO,	so_base_lo);
1899	WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1900
1901	/* CMDQ CQ has 20 cache lines */
1902	WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1903
1904	WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1905	WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1906
1907	WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1908
1909	WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1910
1911	WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1912
1913	WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1914}
1915
1916void goya_init_mme_qmans(struct hl_device *hdev)
1917{
1918	struct goya_device *goya = hdev->asic_specific;
1919	u32 so_base_lo, so_base_hi;
1920
1921	if (goya->hw_cap_initialized & HW_CAP_MME)
1922		return;
1923
1924	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1925	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1926
1927	WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1928	WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1929
1930	goya_init_mme_qman(hdev);
1931	goya_init_mme_cmdq(hdev);
1932
1933	goya->hw_cap_initialized |= HW_CAP_MME;
1934}
1935
1936static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1937{
1938	u32 mtr_base_lo, mtr_base_hi;
1939	u32 so_base_lo, so_base_hi;
1940	u32 gic_base_lo, gic_base_hi;
1941	u64 qman_base_addr;
1942	u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1943
1944	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1945	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1946	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1947	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1948
1949	gic_base_lo =
1950		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1951	gic_base_hi =
1952		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1953
1954	qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1955
1956	WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1957	WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1958	WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1959	WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1960	WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1961	WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1962	WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1963	WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1964	WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1965
1966	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1967	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1968	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1969	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1970
1971	WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1972
1973	WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1974	WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1975
1976	WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1977			GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1978
1979	WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1980
1981	WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1982
1983	WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1984}
1985
1986static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1987{
1988	u32 mtr_base_lo, mtr_base_hi;
1989	u32 so_base_lo, so_base_hi;
1990	u32 gic_base_lo, gic_base_hi;
1991	u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1992
1993	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1994	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1995	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1996	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1997
1998	gic_base_lo =
1999		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
2000	gic_base_hi =
2001		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
2002
2003	WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
2004	WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
2005	WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
2006	WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
2007
2008	WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
2009
2010	WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
2011	WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
2012
2013	WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
2014			GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
2015
2016	WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
2017
2018	WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
2019
2020	WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
2021}
2022
2023void goya_init_tpc_qmans(struct hl_device *hdev)
2024{
2025	struct goya_device *goya = hdev->asic_specific;
2026	u32 so_base_lo, so_base_hi;
2027	u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
2028			mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
2029	int i;
2030
2031	if (goya->hw_cap_initialized & HW_CAP_TPC)
2032		return;
2033
2034	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2035	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2036
2037	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
2038		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
2039				so_base_lo);
2040		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
2041				so_base_hi);
2042	}
2043
2044	goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
2045	goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
2046	goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
2047	goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
2048	goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
2049	goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
2050	goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
2051	goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
2052
2053	for (i = 0 ; i < TPC_MAX_NUM ; i++)
2054		goya_init_tpc_cmdq(hdev, i);
2055
2056	goya->hw_cap_initialized |= HW_CAP_TPC;
2057}
2058
2059/*
2060 * goya_disable_internal_queues - Disable internal queues
2061 *
2062 * @hdev: pointer to hl_device structure
2063 *
2064 */
2065static void goya_disable_internal_queues(struct hl_device *hdev)
2066{
2067	struct goya_device *goya = hdev->asic_specific;
2068
2069	if (!(goya->hw_cap_initialized & HW_CAP_MME))
2070		goto disable_tpc;
2071
2072	WREG32(mmMME_QM_GLBL_CFG0, 0);
2073	WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
2074
2075disable_tpc:
2076	if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2077		return;
2078
2079	WREG32(mmTPC0_QM_GLBL_CFG0, 0);
2080	WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
2081
2082	WREG32(mmTPC1_QM_GLBL_CFG0, 0);
2083	WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
2084
2085	WREG32(mmTPC2_QM_GLBL_CFG0, 0);
2086	WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
2087
2088	WREG32(mmTPC3_QM_GLBL_CFG0, 0);
2089	WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
2090
2091	WREG32(mmTPC4_QM_GLBL_CFG0, 0);
2092	WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
2093
2094	WREG32(mmTPC5_QM_GLBL_CFG0, 0);
2095	WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
2096
2097	WREG32(mmTPC6_QM_GLBL_CFG0, 0);
2098	WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
2099
2100	WREG32(mmTPC7_QM_GLBL_CFG0, 0);
2101	WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
2102}
2103
2104/*
2105 * goya_stop_internal_queues - Stop internal queues
2106 *
2107 * @hdev: pointer to hl_device structure
2108 *
2109 * Returns 0 on success
2110 *
2111 */
2112static int goya_stop_internal_queues(struct hl_device *hdev)
2113{
2114	struct goya_device *goya = hdev->asic_specific;
2115	int rc, retval = 0;
2116
2117	if (!(goya->hw_cap_initialized & HW_CAP_MME))
2118		goto stop_tpc;
2119
2120	/*
2121	 * Each queue (QMAN) is a separate H/W logic. That means that each
2122	 * QMAN can be stopped independently and failure to stop one does NOT
2123	 * mandate we should not try to stop other QMANs
2124	 */
2125
2126	rc = goya_stop_queue(hdev,
2127			mmMME_QM_GLBL_CFG1,
2128			mmMME_QM_CP_STS,
2129			mmMME_QM_GLBL_STS0);
2130
2131	if (rc) {
2132		dev_err(hdev->dev, "failed to stop MME QMAN\n");
2133		retval = -EIO;
2134	}
2135
2136	rc = goya_stop_queue(hdev,
2137			mmMME_CMDQ_GLBL_CFG1,
2138			mmMME_CMDQ_CP_STS,
2139			mmMME_CMDQ_GLBL_STS0);
2140
2141	if (rc) {
2142		dev_err(hdev->dev, "failed to stop MME CMDQ\n");
2143		retval = -EIO;
2144	}
2145
2146stop_tpc:
2147	if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2148		return retval;
2149
2150	rc = goya_stop_queue(hdev,
2151			mmTPC0_QM_GLBL_CFG1,
2152			mmTPC0_QM_CP_STS,
2153			mmTPC0_QM_GLBL_STS0);
2154
2155	if (rc) {
2156		dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
2157		retval = -EIO;
2158	}
2159
2160	rc = goya_stop_queue(hdev,
2161			mmTPC0_CMDQ_GLBL_CFG1,
2162			mmTPC0_CMDQ_CP_STS,
2163			mmTPC0_CMDQ_GLBL_STS0);
2164
2165	if (rc) {
2166		dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
2167		retval = -EIO;
2168	}
2169
2170	rc = goya_stop_queue(hdev,
2171			mmTPC1_QM_GLBL_CFG1,
2172			mmTPC1_QM_CP_STS,
2173			mmTPC1_QM_GLBL_STS0);
2174
2175	if (rc) {
2176		dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
2177		retval = -EIO;
2178	}
2179
2180	rc = goya_stop_queue(hdev,
2181			mmTPC1_CMDQ_GLBL_CFG1,
2182			mmTPC1_CMDQ_CP_STS,
2183			mmTPC1_CMDQ_GLBL_STS0);
2184
2185	if (rc) {
2186		dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
2187		retval = -EIO;
2188	}
2189
2190	rc = goya_stop_queue(hdev,
2191			mmTPC2_QM_GLBL_CFG1,
2192			mmTPC2_QM_CP_STS,
2193			mmTPC2_QM_GLBL_STS0);
2194
2195	if (rc) {
2196		dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2197		retval = -EIO;
2198	}
2199
2200	rc = goya_stop_queue(hdev,
2201			mmTPC2_CMDQ_GLBL_CFG1,
2202			mmTPC2_CMDQ_CP_STS,
2203			mmTPC2_CMDQ_GLBL_STS0);
2204
2205	if (rc) {
2206		dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2207		retval = -EIO;
2208	}
2209
2210	rc = goya_stop_queue(hdev,
2211			mmTPC3_QM_GLBL_CFG1,
2212			mmTPC3_QM_CP_STS,
2213			mmTPC3_QM_GLBL_STS0);
2214
2215	if (rc) {
2216		dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2217		retval = -EIO;
2218	}
2219
2220	rc = goya_stop_queue(hdev,
2221			mmTPC3_CMDQ_GLBL_CFG1,
2222			mmTPC3_CMDQ_CP_STS,
2223			mmTPC3_CMDQ_GLBL_STS0);
2224
2225	if (rc) {
2226		dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2227		retval = -EIO;
2228	}
2229
2230	rc = goya_stop_queue(hdev,
2231			mmTPC4_QM_GLBL_CFG1,
2232			mmTPC4_QM_CP_STS,
2233			mmTPC4_QM_GLBL_STS0);
2234
2235	if (rc) {
2236		dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2237		retval = -EIO;
2238	}
2239
2240	rc = goya_stop_queue(hdev,
2241			mmTPC4_CMDQ_GLBL_CFG1,
2242			mmTPC4_CMDQ_CP_STS,
2243			mmTPC4_CMDQ_GLBL_STS0);
2244
2245	if (rc) {
2246		dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2247		retval = -EIO;
2248	}
2249
2250	rc = goya_stop_queue(hdev,
2251			mmTPC5_QM_GLBL_CFG1,
2252			mmTPC5_QM_CP_STS,
2253			mmTPC5_QM_GLBL_STS0);
2254
2255	if (rc) {
2256		dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2257		retval = -EIO;
2258	}
2259
2260	rc = goya_stop_queue(hdev,
2261			mmTPC5_CMDQ_GLBL_CFG1,
2262			mmTPC5_CMDQ_CP_STS,
2263			mmTPC5_CMDQ_GLBL_STS0);
2264
2265	if (rc) {
2266		dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2267		retval = -EIO;
2268	}
2269
2270	rc = goya_stop_queue(hdev,
2271			mmTPC6_QM_GLBL_CFG1,
2272			mmTPC6_QM_CP_STS,
2273			mmTPC6_QM_GLBL_STS0);
2274
2275	if (rc) {
2276		dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2277		retval = -EIO;
2278	}
2279
2280	rc = goya_stop_queue(hdev,
2281			mmTPC6_CMDQ_GLBL_CFG1,
2282			mmTPC6_CMDQ_CP_STS,
2283			mmTPC6_CMDQ_GLBL_STS0);
2284
2285	if (rc) {
2286		dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2287		retval = -EIO;
2288	}
2289
2290	rc = goya_stop_queue(hdev,
2291			mmTPC7_QM_GLBL_CFG1,
2292			mmTPC7_QM_CP_STS,
2293			mmTPC7_QM_GLBL_STS0);
2294
2295	if (rc) {
2296		dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2297		retval = -EIO;
2298	}
2299
2300	rc = goya_stop_queue(hdev,
2301			mmTPC7_CMDQ_GLBL_CFG1,
2302			mmTPC7_CMDQ_CP_STS,
2303			mmTPC7_CMDQ_GLBL_STS0);
2304
2305	if (rc) {
2306		dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2307		retval = -EIO;
2308	}
2309
2310	return retval;
2311}
2312
2313static void goya_dma_stall(struct hl_device *hdev)
2314{
2315	struct goya_device *goya = hdev->asic_specific;
2316
2317	if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2318		return;
2319
2320	WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2321	WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2322	WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2323	WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2324	WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2325}
2326
2327static void goya_tpc_stall(struct hl_device *hdev)
2328{
2329	struct goya_device *goya = hdev->asic_specific;
2330
2331	if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2332		return;
2333
2334	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2335	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2336	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2337	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2338	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2339	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2340	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2341	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2342}
2343
2344static void goya_mme_stall(struct hl_device *hdev)
2345{
2346	struct goya_device *goya = hdev->asic_specific;
2347
2348	if (!(goya->hw_cap_initialized & HW_CAP_MME))
2349		return;
2350
2351	WREG32(mmMME_STALL, 0xFFFFFFFF);
2352}
2353
2354static int goya_enable_msix(struct hl_device *hdev)
2355{
2356	struct goya_device *goya = hdev->asic_specific;
2357	int cq_cnt = hdev->asic_prop.completion_queues_count;
2358	int rc, i, irq_cnt_init, irq;
2359
2360	if (goya->hw_cap_initialized & HW_CAP_MSIX)
2361		return 0;
2362
2363	rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2364				GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2365	if (rc < 0) {
2366		dev_err(hdev->dev,
2367			"MSI-X: Failed to enable support -- %d/%d\n",
2368			GOYA_MSIX_ENTRIES, rc);
2369		return rc;
2370	}
2371
2372	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2373		irq = pci_irq_vector(hdev->pdev, i);
2374		rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2375				&hdev->completion_queue[i]);
2376		if (rc) {
2377			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2378			goto free_irqs;
2379		}
2380	}
2381
2382	irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2383
2384	rc = request_irq(irq, hl_irq_handler_eq, 0,
2385			goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2386			&hdev->event_queue);
2387	if (rc) {
2388		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2389		goto free_irqs;
2390	}
2391
2392	goya->hw_cap_initialized |= HW_CAP_MSIX;
2393	return 0;
2394
2395free_irqs:
2396	for (i = 0 ; i < irq_cnt_init ; i++)
2397		free_irq(pci_irq_vector(hdev->pdev, i),
2398			&hdev->completion_queue[i]);
2399
2400	pci_free_irq_vectors(hdev->pdev);
2401	return rc;
2402}
2403
2404static void goya_sync_irqs(struct hl_device *hdev)
2405{
2406	struct goya_device *goya = hdev->asic_specific;
2407	int i;
2408
2409	if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2410		return;
2411
2412	/* Wait for all pending IRQs to be finished */
2413	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2414		synchronize_irq(pci_irq_vector(hdev->pdev, i));
2415
2416	synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2417}
2418
2419static void goya_disable_msix(struct hl_device *hdev)
2420{
2421	struct goya_device *goya = hdev->asic_specific;
2422	int i, irq;
2423
2424	if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2425		return;
2426
2427	goya_sync_irqs(hdev);
2428
2429	irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2430	free_irq(irq, &hdev->event_queue);
2431
2432	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2433		irq = pci_irq_vector(hdev->pdev, i);
2434		free_irq(irq, &hdev->completion_queue[i]);
2435	}
2436
2437	pci_free_irq_vectors(hdev->pdev);
2438
2439	goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2440}
2441
2442static void goya_enable_timestamp(struct hl_device *hdev)
2443{
2444	/* Disable the timestamp counter */
2445	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2446
2447	/* Zero the lower/upper parts of the 64-bit counter */
2448	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2449	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2450
2451	/* Enable the counter */
2452	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2453}
2454
2455static void goya_disable_timestamp(struct hl_device *hdev)
2456{
2457	/* Disable the timestamp counter */
2458	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2459}
2460
2461static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2462{
2463	u32 wait_timeout_ms;
2464
2465	if (hdev->pldm)
2466		wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2467	else
2468		wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2469
2470	goya_stop_external_queues(hdev);
2471	goya_stop_internal_queues(hdev);
2472
2473	msleep(wait_timeout_ms);
2474
2475	goya_dma_stall(hdev);
2476	goya_tpc_stall(hdev);
2477	goya_mme_stall(hdev);
2478
2479	msleep(wait_timeout_ms);
2480
2481	goya_disable_external_queues(hdev);
2482	goya_disable_internal_queues(hdev);
2483
2484	goya_disable_timestamp(hdev);
2485
2486	if (hard_reset) {
2487		goya_disable_msix(hdev);
2488		goya_mmu_remove_device_cpu_mappings(hdev);
2489	} else {
2490		goya_sync_irqs(hdev);
2491	}
2492}
2493
2494/*
2495 * goya_load_firmware_to_device() - Load LINUX FW code to device.
2496 * @hdev: Pointer to hl_device structure.
2497 *
2498 * Copy LINUX fw code from firmware file to HBM BAR.
2499 *
2500 * Return: 0 on success, non-zero for failure.
2501 */
2502static int goya_load_firmware_to_device(struct hl_device *hdev)
2503{
2504	void __iomem *dst;
2505
2506	dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2507
2508	return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0);
2509}
2510
2511/*
2512 * goya_load_boot_fit_to_device() - Load boot fit to device.
2513 * @hdev: Pointer to hl_device structure.
2514 *
2515 * Copy boot fit file to SRAM BAR.
2516 *
2517 * Return: 0 on success, non-zero for failure.
2518 */
2519static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2520{
2521	void __iomem *dst;
2522
2523	dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2524
2525	return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
2526}
2527
2528static void goya_init_dynamic_firmware_loader(struct hl_device *hdev)
2529{
2530	struct dynamic_fw_load_mgr *dynamic_loader;
2531	struct cpu_dyn_regs *dyn_regs;
2532
2533	dynamic_loader = &hdev->fw_loader.dynamic_loader;
2534
2535	/*
2536	 * here we update initial values for few specific dynamic regs (as
2537	 * before reading the first descriptor from FW those value has to be
2538	 * hard-coded) in later stages of the protocol those values will be
2539	 * updated automatically by reading the FW descriptor so data there
2540	 * will always be up-to-date
2541	 */
2542	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
2543	dyn_regs->kmd_msg_to_cpu =
2544				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
2545	dyn_regs->cpu_cmd_status_to_host =
2546				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
2547
2548	dynamic_loader->wait_for_bl_timeout = GOYA_WAIT_FOR_BL_TIMEOUT_USEC;
2549}
2550
2551static void goya_init_static_firmware_loader(struct hl_device *hdev)
2552{
2553	struct static_fw_load_mgr *static_loader;
2554
2555	static_loader = &hdev->fw_loader.static_loader;
2556
2557	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2558	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2559	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
2560	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
2561	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
2562	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
2563	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
2564	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
2565	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
2566	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
2567	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
2568	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
2569}
2570
2571static void goya_init_firmware_preload_params(struct hl_device *hdev)
2572{
2573	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
2574
2575	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
2576	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
2577	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
2578	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
2579	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
2580	pre_fw_load->wait_for_preboot_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
2581}
2582
2583static void goya_init_firmware_loader(struct hl_device *hdev)
2584{
2585	struct asic_fixed_properties *prop = &hdev->asic_prop;
2586	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
2587
2588	/* fill common fields */
2589	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
2590	fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
2591	fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
2592	fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
2593	fw_loader->boot_fit_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
2594	fw_loader->skip_bmc = false;
2595	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
2596	fw_loader->dram_bar_id = DDR_BAR_ID;
2597
2598	if (prop->dynamic_fw_load)
2599		goya_init_dynamic_firmware_loader(hdev);
2600	else
2601		goya_init_static_firmware_loader(hdev);
2602}
2603
2604static int goya_init_cpu(struct hl_device *hdev)
2605{
2606	struct goya_device *goya = hdev->asic_specific;
2607	int rc;
2608
2609	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
2610		return 0;
2611
2612	if (goya->hw_cap_initialized & HW_CAP_CPU)
2613		return 0;
2614
2615	/*
2616	 * Before pushing u-boot/linux to device, need to set the ddr bar to
2617	 * base address of dram
2618	 */
2619	if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2620		dev_err(hdev->dev,
2621			"failed to map DDR bar to DRAM base address\n");
2622		return -EIO;
2623	}
2624
2625	rc = hl_fw_init_cpu(hdev);
2626
2627	if (rc)
2628		return rc;
2629
2630	goya->hw_cap_initialized |= HW_CAP_CPU;
2631
2632	return 0;
2633}
2634
2635static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2636						u64 phys_addr)
2637{
2638	u32 status, timeout_usec;
2639	int rc;
2640
2641	if (hdev->pldm)
2642		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2643	else
2644		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2645
2646	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2647	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2648	WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2649
2650	rc = hl_poll_timeout(
2651		hdev,
2652		MMU_ASID_BUSY,
2653		status,
2654		!(status & 0x80000000),
2655		1000,
2656		timeout_usec);
2657
2658	if (rc) {
2659		dev_err(hdev->dev,
2660			"Timeout during MMU hop0 config of asid %d\n", asid);
2661		return rc;
2662	}
2663
2664	return 0;
2665}
2666
2667int goya_mmu_init(struct hl_device *hdev)
2668{
2669	struct asic_fixed_properties *prop = &hdev->asic_prop;
2670	struct goya_device *goya = hdev->asic_specific;
2671	u64 hop0_addr;
2672	int rc, i;
2673
2674	if (goya->hw_cap_initialized & HW_CAP_MMU)
2675		return 0;
2676
2677	hdev->dram_default_page_mapping = true;
2678
2679	for (i = 0 ; i < prop->max_asid ; i++) {
2680		hop0_addr = prop->mmu_pgt_addr +
2681				(i * prop->mmu_hop_table_size);
2682
2683		rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2684		if (rc) {
2685			dev_err(hdev->dev,
2686				"failed to set hop0 addr for asid %d\n", i);
2687			goto err;
2688		}
2689	}
2690
2691	goya->hw_cap_initialized |= HW_CAP_MMU;
2692
2693	/* init MMU cache manage page */
2694	WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2695				lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2696	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2697
2698	/* Remove follower feature due to performance bug */
2699	WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2700			(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2701
2702	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
2703
2704	WREG32(mmMMU_MMU_ENABLE, 1);
2705	WREG32(mmMMU_SPI_MASK, 0xF);
2706
2707	return 0;
2708
2709err:
2710	return rc;
2711}
2712
2713/*
2714 * goya_hw_init - Goya hardware initialization code
2715 *
2716 * @hdev: pointer to hl_device structure
2717 *
2718 * Returns 0 on success
2719 *
2720 */
2721static int goya_hw_init(struct hl_device *hdev)
2722{
2723	struct asic_fixed_properties *prop = &hdev->asic_prop;
2724	int rc;
2725
2726	/* Perform read from the device to make sure device is up */
2727	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2728
2729	/*
2730	 * Let's mark in the H/W that we have reached this point. We check
2731	 * this value in the reset_before_init function to understand whether
2732	 * we need to reset the chip before doing H/W init. This register is
2733	 * cleared by the H/W upon H/W reset
2734	 */
2735	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2736
2737	rc = goya_init_cpu(hdev);
2738	if (rc) {
2739		dev_err(hdev->dev, "failed to initialize CPU\n");
2740		return rc;
2741	}
2742
2743	goya_tpc_mbist_workaround(hdev);
2744
2745	goya_init_golden_registers(hdev);
2746
2747	/*
2748	 * After CPU initialization is finished, change DDR bar mapping inside
2749	 * iATU to point to the start address of the MMU page tables
2750	 */
2751	if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2752			~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2753		dev_err(hdev->dev,
2754			"failed to map DDR bar to MMU page tables\n");
2755		return -EIO;
2756	}
2757
2758	rc = goya_mmu_init(hdev);
2759	if (rc)
2760		return rc;
2761
2762	goya_init_security(hdev);
2763
2764	goya_init_dma_qmans(hdev);
2765
2766	goya_init_mme_qmans(hdev);
2767
2768	goya_init_tpc_qmans(hdev);
2769
2770	goya_enable_timestamp(hdev);
2771
2772	/* MSI-X must be enabled before CPU queues are initialized */
2773	rc = goya_enable_msix(hdev);
2774	if (rc)
2775		goto disable_queues;
2776
2777	/* Perform read from the device to flush all MSI-X configuration */
2778	RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2779
2780	return 0;
2781
2782disable_queues:
2783	goya_disable_internal_queues(hdev);
2784	goya_disable_external_queues(hdev);
2785
2786	return rc;
2787}
2788
2789static int goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2790{
2791	struct goya_device *goya = hdev->asic_specific;
2792	u32 reset_timeout_ms, cpu_timeout_ms, status;
2793
2794	if (hdev->pldm) {
2795		reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2796		cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2797	} else {
2798		reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2799		cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2800	}
2801
2802	if (hard_reset) {
2803		/* I don't know what is the state of the CPU so make sure it is
2804		 * stopped in any means necessary
2805		 */
2806		WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2807		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2808			GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2809
2810		msleep(cpu_timeout_ms);
2811
2812		goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2813		goya_disable_clk_rlx(hdev);
2814		goya_set_pll_refclk(hdev);
2815
2816		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2817		dev_dbg(hdev->dev,
2818			"Issued HARD reset command, going to wait %dms\n",
2819			reset_timeout_ms);
2820	} else {
2821		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2822		dev_dbg(hdev->dev,
2823			"Issued SOFT reset command, going to wait %dms\n",
2824			reset_timeout_ms);
2825	}
2826
2827	/*
2828	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
2829	 * itself is in reset. In either reset we need to wait until the reset
2830	 * is deasserted
2831	 */
2832	msleep(reset_timeout_ms);
2833
2834	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2835	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
2836		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
2837		return -ETIMEDOUT;
2838	}
2839
2840	if (!hard_reset && goya) {
2841		goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2842						HW_CAP_GOLDEN | HW_CAP_TPC);
2843		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2844				GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2845		return 0;
2846	}
2847
2848	/* Chicken bit to re-initiate boot sequencer flow */
2849	WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2850		1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2851	/* Move boot manager FSM to pre boot sequencer init state */
2852	WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2853			0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2854
2855	if (goya) {
2856		goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2857				HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2858				HW_CAP_DMA | HW_CAP_MME |
2859				HW_CAP_MMU | HW_CAP_TPC_MBIST |
2860				HW_CAP_GOLDEN | HW_CAP_TPC);
2861
2862		memset(goya->events_stat, 0, sizeof(goya->events_stat));
2863	}
2864	return 0;
2865}
2866
2867int goya_suspend(struct hl_device *hdev)
2868{
2869	int rc;
2870
2871	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2872	if (rc)
2873		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2874
2875	return rc;
2876}
2877
2878int goya_resume(struct hl_device *hdev)
2879{
2880	return goya_init_iatu(hdev);
2881}
2882
2883static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2884			void *cpu_addr, dma_addr_t dma_addr, size_t size)
2885{
2886	int rc;
2887
2888	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2889			VM_DONTCOPY | VM_NORESERVE);
2890
2891	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
2892				(dma_addr - HOST_PHYS_BASE), size);
2893	if (rc)
2894		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
2895
2896	return rc;
2897}
2898
2899void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2900{
2901	u32 db_reg_offset, db_value;
2902
2903	switch (hw_queue_id) {
2904	case GOYA_QUEUE_ID_DMA_0:
2905		db_reg_offset = mmDMA_QM_0_PQ_PI;
2906		break;
2907
2908	case GOYA_QUEUE_ID_DMA_1:
2909		db_reg_offset = mmDMA_QM_1_PQ_PI;
2910		break;
2911
2912	case GOYA_QUEUE_ID_DMA_2:
2913		db_reg_offset = mmDMA_QM_2_PQ_PI;
2914		break;
2915
2916	case GOYA_QUEUE_ID_DMA_3:
2917		db_reg_offset = mmDMA_QM_3_PQ_PI;
2918		break;
2919
2920	case GOYA_QUEUE_ID_DMA_4:
2921		db_reg_offset = mmDMA_QM_4_PQ_PI;
2922		break;
2923
2924	case GOYA_QUEUE_ID_CPU_PQ:
2925		db_reg_offset = mmCPU_IF_PF_PQ_PI;
2926		break;
2927
2928	case GOYA_QUEUE_ID_MME:
2929		db_reg_offset = mmMME_QM_PQ_PI;
2930		break;
2931
2932	case GOYA_QUEUE_ID_TPC0:
2933		db_reg_offset = mmTPC0_QM_PQ_PI;
2934		break;
2935
2936	case GOYA_QUEUE_ID_TPC1:
2937		db_reg_offset = mmTPC1_QM_PQ_PI;
2938		break;
2939
2940	case GOYA_QUEUE_ID_TPC2:
2941		db_reg_offset = mmTPC2_QM_PQ_PI;
2942		break;
2943
2944	case GOYA_QUEUE_ID_TPC3:
2945		db_reg_offset = mmTPC3_QM_PQ_PI;
2946		break;
2947
2948	case GOYA_QUEUE_ID_TPC4:
2949		db_reg_offset = mmTPC4_QM_PQ_PI;
2950		break;
2951
2952	case GOYA_QUEUE_ID_TPC5:
2953		db_reg_offset = mmTPC5_QM_PQ_PI;
2954		break;
2955
2956	case GOYA_QUEUE_ID_TPC6:
2957		db_reg_offset = mmTPC6_QM_PQ_PI;
2958		break;
2959
2960	case GOYA_QUEUE_ID_TPC7:
2961		db_reg_offset = mmTPC7_QM_PQ_PI;
2962		break;
2963
2964	default:
2965		/* Should never get here */
2966		dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2967			hw_queue_id);
2968		return;
2969	}
2970
2971	db_value = pi;
2972
2973	/* ring the doorbell */
2974	WREG32(db_reg_offset, db_value);
2975
2976	if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
2977		/* make sure device CPU will read latest data from host */
2978		mb();
2979		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2980				GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2981	}
2982}
2983
2984void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2985{
2986	/* The QMANs are on the SRAM so need to copy to IO space */
2987	memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2988}
2989
2990static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2991					dma_addr_t *dma_handle, gfp_t flags)
2992{
2993	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2994						dma_handle, flags);
2995
2996	/* Shift to the device's base physical address of host memory */
2997	if (kernel_addr)
2998		*dma_handle += HOST_PHYS_BASE;
2999
3000	return kernel_addr;
3001}
3002
3003static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
3004					void *cpu_addr, dma_addr_t dma_handle)
3005{
3006	/* Cancel the device's base physical address of host memory */
3007	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3008
3009	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3010}
3011
3012int goya_scrub_device_mem(struct hl_device *hdev)
3013{
3014	return 0;
3015}
3016
3017void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
3018				dma_addr_t *dma_handle,	u16 *queue_len)
3019{
3020	void *base;
3021	u32 offset;
3022
3023	*dma_handle = hdev->asic_prop.sram_base_address;
3024
3025	base = (__force void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
3026
3027	switch (queue_id) {
3028	case GOYA_QUEUE_ID_MME:
3029		offset = MME_QMAN_BASE_OFFSET;
3030		*queue_len = MME_QMAN_LENGTH;
3031		break;
3032	case GOYA_QUEUE_ID_TPC0:
3033		offset = TPC0_QMAN_BASE_OFFSET;
3034		*queue_len = TPC_QMAN_LENGTH;
3035		break;
3036	case GOYA_QUEUE_ID_TPC1:
3037		offset = TPC1_QMAN_BASE_OFFSET;
3038		*queue_len = TPC_QMAN_LENGTH;
3039		break;
3040	case GOYA_QUEUE_ID_TPC2:
3041		offset = TPC2_QMAN_BASE_OFFSET;
3042		*queue_len = TPC_QMAN_LENGTH;
3043		break;
3044	case GOYA_QUEUE_ID_TPC3:
3045		offset = TPC3_QMAN_BASE_OFFSET;
3046		*queue_len = TPC_QMAN_LENGTH;
3047		break;
3048	case GOYA_QUEUE_ID_TPC4:
3049		offset = TPC4_QMAN_BASE_OFFSET;
3050		*queue_len = TPC_QMAN_LENGTH;
3051		break;
3052	case GOYA_QUEUE_ID_TPC5:
3053		offset = TPC5_QMAN_BASE_OFFSET;
3054		*queue_len = TPC_QMAN_LENGTH;
3055		break;
3056	case GOYA_QUEUE_ID_TPC6:
3057		offset = TPC6_QMAN_BASE_OFFSET;
3058		*queue_len = TPC_QMAN_LENGTH;
3059		break;
3060	case GOYA_QUEUE_ID_TPC7:
3061		offset = TPC7_QMAN_BASE_OFFSET;
3062		*queue_len = TPC_QMAN_LENGTH;
3063		break;
3064	default:
3065		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3066		return NULL;
3067	}
3068
3069	base += offset;
3070	*dma_handle += offset;
3071
3072	return base;
3073}
3074
3075static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
3076{
3077	struct packet_msg_prot *fence_pkt;
3078	u32 *fence_ptr;
3079	dma_addr_t fence_dma_addr;
3080	struct hl_cb *cb;
3081	u32 tmp, timeout;
3082	int rc;
3083
3084	if (hdev->pldm)
3085		timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
3086	else
3087		timeout = HL_DEVICE_TIMEOUT_USEC;
3088
3089	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
3090		dev_err_ratelimited(hdev->dev,
3091			"Can't send driver job on QMAN0 because the device is not idle\n");
3092		return -EBUSY;
3093	}
3094
3095	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
3096	if (!fence_ptr) {
3097		dev_err(hdev->dev,
3098			"Failed to allocate fence memory for QMAN0\n");
3099		return -ENOMEM;
3100	}
3101
3102	goya_qman0_set_security(hdev, true);
3103
3104	cb = job->patched_cb;
3105
3106	fence_pkt = cb->kernel_address +
3107			job->job_cb_size - sizeof(struct packet_msg_prot);
3108
3109	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3110			(1 << GOYA_PKT_CTL_EB_SHIFT) |
3111			(1 << GOYA_PKT_CTL_MB_SHIFT);
3112	fence_pkt->ctl = cpu_to_le32(tmp);
3113	fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
3114	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3115
3116	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
3117					job->job_cb_size, cb->bus_address);
3118	if (rc) {
3119		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
3120		goto free_fence_ptr;
3121	}
3122
3123	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
3124				(tmp == GOYA_QMAN0_FENCE_VAL), 1000,
3125				timeout, true);
3126
3127	hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
3128
3129	if (rc == -ETIMEDOUT) {
3130		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
3131		goto free_fence_ptr;
3132	}
3133
3134free_fence_ptr:
3135	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
3136
3137	goya_qman0_set_security(hdev, false);
3138
3139	return rc;
3140}
3141
3142int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
3143				u32 timeout, u64 *result)
3144{
3145	struct goya_device *goya = hdev->asic_specific;
3146
3147	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
3148		if (result)
3149			*result = 0;
3150		return 0;
3151	}
3152
3153	if (!timeout)
3154		timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
3155
3156	return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
3157					timeout, result);
3158}
3159
3160int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3161{
3162	struct packet_msg_prot *fence_pkt;
3163	dma_addr_t pkt_dma_addr;
3164	u32 fence_val, tmp;
3165	dma_addr_t fence_dma_addr;
3166	u32 *fence_ptr;
3167	int rc;
3168
3169	fence_val = GOYA_QMAN0_FENCE_VAL;
3170
3171	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
3172	if (!fence_ptr) {
3173		dev_err(hdev->dev,
3174			"Failed to allocate memory for H/W queue %d testing\n",
3175			hw_queue_id);
3176		return -ENOMEM;
3177	}
3178
3179	*fence_ptr = 0;
3180
3181	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
3182						&pkt_dma_addr);
3183	if (!fence_pkt) {
3184		dev_err(hdev->dev,
3185			"Failed to allocate packet for H/W queue %d testing\n",
3186			hw_queue_id);
3187		rc = -ENOMEM;
3188		goto free_fence_ptr;
3189	}
3190
3191	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3192			(1 << GOYA_PKT_CTL_EB_SHIFT) |
3193			(1 << GOYA_PKT_CTL_MB_SHIFT);
3194	fence_pkt->ctl = cpu_to_le32(tmp);
3195	fence_pkt->value = cpu_to_le32(fence_val);
3196	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3197
3198	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3199					sizeof(struct packet_msg_prot),
3200					pkt_dma_addr);
3201	if (rc) {
3202		dev_err(hdev->dev,
3203			"Failed to send fence packet to H/W queue %d\n",
3204			hw_queue_id);
3205		goto free_pkt;
3206	}
3207
3208	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3209					1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
3210
3211	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3212
3213	if (rc == -ETIMEDOUT) {
3214		dev_err(hdev->dev,
3215			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3216			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3217		rc = -EIO;
3218	}
3219
3220free_pkt:
3221	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
3222free_fence_ptr:
3223	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
3224	return rc;
3225}
3226
3227int goya_test_cpu_queue(struct hl_device *hdev)
3228{
3229	struct goya_device *goya = hdev->asic_specific;
3230
3231	/*
3232	 * check capability here as send_cpu_message() won't update the result
3233	 * value if no capability
3234	 */
3235	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3236		return 0;
3237
3238	return hl_fw_test_cpu_queue(hdev);
3239}
3240
3241int goya_test_queues(struct hl_device *hdev)
3242{
3243	int i, rc, ret_val = 0;
3244
3245	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3246		rc = goya_test_queue(hdev, i);
3247		if (rc)
3248			ret_val = -EINVAL;
3249	}
3250
3251	return ret_val;
3252}
3253
3254static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3255					gfp_t mem_flags, dma_addr_t *dma_handle)
3256{
3257	void *kernel_addr;
3258
3259	if (size > GOYA_DMA_POOL_BLK_SIZE)
3260		return NULL;
3261
3262	kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3263
3264	/* Shift to the device's base physical address of host memory */
3265	if (kernel_addr)
3266		*dma_handle += HOST_PHYS_BASE;
3267
3268	return kernel_addr;
3269}
3270
3271static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3272				dma_addr_t dma_addr)
3273{
3274	/* Cancel the device's base physical address of host memory */
3275	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3276
3277	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3278}
3279
3280void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3281					dma_addr_t *dma_handle)
3282{
3283	void *vaddr;
3284
3285	vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3286	*dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3287			VA_CPU_ACCESSIBLE_MEM_ADDR;
3288
3289	return vaddr;
3290}
3291
3292void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3293					void *vaddr)
3294{
3295	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3296}
3297
3298u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3299{
3300	struct scatterlist *sg, *sg_next_iter;
3301	u32 count, dma_desc_cnt;
3302	u64 len, len_next;
3303	dma_addr_t addr, addr_next;
3304
3305	dma_desc_cnt = 0;
3306
3307	for_each_sgtable_dma_sg(sgt, sg, count) {
3308		len = sg_dma_len(sg);
3309		addr = sg_dma_address(sg);
3310
3311		if (len == 0)
3312			break;
3313
3314		while ((count + 1) < sgt->nents) {
3315			sg_next_iter = sg_next(sg);
3316			len_next = sg_dma_len(sg_next_iter);
3317			addr_next = sg_dma_address(sg_next_iter);
3318
3319			if (len_next == 0)
3320				break;
3321
3322			if ((addr + len == addr_next) &&
3323				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3324				len += len_next;
3325				count++;
3326				sg = sg_next_iter;
3327			} else {
3328				break;
3329			}
3330		}
3331
3332		dma_desc_cnt++;
3333	}
3334
3335	return dma_desc_cnt * sizeof(struct packet_lin_dma);
3336}
3337
3338static int goya_pin_memory_before_cs(struct hl_device *hdev,
3339				struct hl_cs_parser *parser,
3340				struct packet_lin_dma *user_dma_pkt,
3341				u64 addr, enum dma_data_direction dir)
3342{
3343	struct hl_userptr *userptr;
3344	int rc;
3345
3346	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3347			parser->job_userptr_list, &userptr))
3348		goto already_pinned;
3349
3350	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
3351	if (!userptr)
3352		return -ENOMEM;
3353
3354	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3355				userptr);
3356	if (rc)
3357		goto free_userptr;
3358
3359	list_add_tail(&userptr->job_node, parser->job_userptr_list);
3360
3361	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
3362	if (rc) {
3363		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3364		goto unpin_memory;
3365	}
3366
3367	userptr->dma_mapped = true;
3368	userptr->dir = dir;
3369
3370already_pinned:
3371	parser->patched_cb_size +=
3372			goya_get_dma_desc_list_size(hdev, userptr->sgt);
3373
3374	return 0;
3375
3376unpin_memory:
3377	list_del(&userptr->job_node);
3378	hl_unpin_host_memory(hdev, userptr);
3379free_userptr:
3380	kfree(userptr);
3381	return rc;
3382}
3383
3384static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3385				struct hl_cs_parser *parser,
3386				struct packet_lin_dma *user_dma_pkt)
3387{
3388	u64 device_memory_addr, addr;
3389	enum dma_data_direction dir;
3390	enum hl_goya_dma_direction user_dir;
3391	bool sram_addr = true;
3392	bool skip_host_mem_pin = false;
3393	bool user_memset;
3394	u32 ctl;
3395	int rc = 0;
3396
3397	ctl = le32_to_cpu(user_dma_pkt->ctl);
3398
3399	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3400			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3401
3402	user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3403			GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3404
3405	switch (user_dir) {
3406	case HL_DMA_HOST_TO_DRAM:
3407		dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3408		dir = DMA_TO_DEVICE;
3409		sram_addr = false;
3410		addr = le64_to_cpu(user_dma_pkt->src_addr);
3411		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3412		if (user_memset)
3413			skip_host_mem_pin = true;
3414		break;
3415
3416	case HL_DMA_DRAM_TO_HOST:
3417		dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3418		dir = DMA_FROM_DEVICE;
3419		sram_addr = false;
3420		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3421		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3422		break;
3423
3424	case HL_DMA_HOST_TO_SRAM:
3425		dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3426		dir = DMA_TO_DEVICE;
3427		addr = le64_to_cpu(user_dma_pkt->src_addr);
3428		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3429		if (user_memset)
3430			skip_host_mem_pin = true;
3431		break;
3432
3433	case HL_DMA_SRAM_TO_HOST:
3434		dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3435		dir = DMA_FROM_DEVICE;
3436		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3437		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3438		break;
3439	default:
3440		dev_err(hdev->dev, "DMA direction %d is unsupported/undefined\n", user_dir);
3441		return -EFAULT;
3442	}
3443
3444	if (sram_addr) {
3445		if (!hl_mem_area_inside_range(device_memory_addr,
3446				le32_to_cpu(user_dma_pkt->tsize),
3447				hdev->asic_prop.sram_user_base_address,
3448				hdev->asic_prop.sram_end_address)) {
3449
3450			dev_err(hdev->dev,
3451				"SRAM address 0x%llx + 0x%x is invalid\n",
3452				device_memory_addr,
3453				user_dma_pkt->tsize);
3454			return -EFAULT;
3455		}
3456	} else {
3457		if (!hl_mem_area_inside_range(device_memory_addr,
3458				le32_to_cpu(user_dma_pkt->tsize),
3459				hdev->asic_prop.dram_user_base_address,
3460				hdev->asic_prop.dram_end_address)) {
3461
3462			dev_err(hdev->dev,
3463				"DRAM address 0x%llx + 0x%x is invalid\n",
3464				device_memory_addr,
3465				user_dma_pkt->tsize);
3466			return -EFAULT;
3467		}
3468	}
3469
3470	if (skip_host_mem_pin)
3471		parser->patched_cb_size += sizeof(*user_dma_pkt);
3472	else {
3473		if ((dir == DMA_TO_DEVICE) &&
3474				(parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3475			dev_err(hdev->dev,
3476				"Can't DMA from host on queue other then 1\n");
3477			return -EFAULT;
3478		}
3479
3480		rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3481						addr, dir);
3482	}
3483
3484	return rc;
3485}
3486
3487static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3488				struct hl_cs_parser *parser,
3489				struct packet_lin_dma *user_dma_pkt)
3490{
3491	u64 sram_memory_addr, dram_memory_addr;
3492	enum hl_goya_dma_direction user_dir;
3493	u32 ctl;
3494
3495	ctl = le32_to_cpu(user_dma_pkt->ctl);
3496	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3497			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3498
3499	if (user_dir == HL_DMA_DRAM_TO_SRAM) {
3500		dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3501		dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3502		sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3503	} else {
3504		dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3505		sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3506		dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3507	}
3508
3509	if (!hl_mem_area_inside_range(sram_memory_addr,
3510				le32_to_cpu(user_dma_pkt->tsize),
3511				hdev->asic_prop.sram_user_base_address,
3512				hdev->asic_prop.sram_end_address)) {
3513		dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3514			sram_memory_addr, user_dma_pkt->tsize);
3515		return -EFAULT;
3516	}
3517
3518	if (!hl_mem_area_inside_range(dram_memory_addr,
3519				le32_to_cpu(user_dma_pkt->tsize),
3520				hdev->asic_prop.dram_user_base_address,
3521				hdev->asic_prop.dram_end_address)) {
3522		dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3523			dram_memory_addr, user_dma_pkt->tsize);
3524		return -EFAULT;
3525	}
3526
3527	parser->patched_cb_size += sizeof(*user_dma_pkt);
3528
3529	return 0;
3530}
3531
3532static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3533				struct hl_cs_parser *parser,
3534				struct packet_lin_dma *user_dma_pkt)
3535{
3536	enum hl_goya_dma_direction user_dir;
3537	u32 ctl;
3538	int rc;
3539
3540	dev_dbg(hdev->dev, "DMA packet details:\n");
3541	dev_dbg(hdev->dev, "source == 0x%llx\n",
3542		le64_to_cpu(user_dma_pkt->src_addr));
3543	dev_dbg(hdev->dev, "destination == 0x%llx\n",
3544		le64_to_cpu(user_dma_pkt->dst_addr));
3545	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3546
3547	ctl = le32_to_cpu(user_dma_pkt->ctl);
3548	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3549			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3550
3551	/*
3552	 * Special handling for DMA with size 0. The H/W has a bug where
3553	 * this can cause the QMAN DMA to get stuck, so block it here.
3554	 */
3555	if (user_dma_pkt->tsize == 0) {
3556		dev_err(hdev->dev,
3557			"Got DMA with size 0, might reset the device\n");
3558		return -EINVAL;
3559	}
3560
3561	if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM))
3562		rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3563	else
3564		rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3565
3566	return rc;
3567}
3568
3569static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3570				struct hl_cs_parser *parser,
3571				struct packet_lin_dma *user_dma_pkt)
3572{
3573	dev_dbg(hdev->dev, "DMA packet details:\n");
3574	dev_dbg(hdev->dev, "source == 0x%llx\n",
3575		le64_to_cpu(user_dma_pkt->src_addr));
3576	dev_dbg(hdev->dev, "destination == 0x%llx\n",
3577		le64_to_cpu(user_dma_pkt->dst_addr));
3578	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3579
3580	/*
3581	 * WA for HW-23.
3582	 * We can't allow user to read from Host using QMANs other than 1.
3583	 * PMMU and HPMMU addresses are equal, check only one of them.
3584	 */
3585	if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3586		hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3587				le32_to_cpu(user_dma_pkt->tsize),
3588				hdev->asic_prop.pmmu.start_addr,
3589				hdev->asic_prop.pmmu.end_addr)) {
3590		dev_err(hdev->dev,
3591			"Can't DMA from host on queue other then 1\n");
3592		return -EFAULT;
3593	}
3594
3595	if (user_dma_pkt->tsize == 0) {
3596		dev_err(hdev->dev,
3597			"Got DMA with size 0, might reset the device\n");
3598		return -EINVAL;
3599	}
3600
3601	parser->patched_cb_size += sizeof(*user_dma_pkt);
3602
3603	return 0;
3604}
3605
3606static int goya_validate_wreg32(struct hl_device *hdev,
3607				struct hl_cs_parser *parser,
3608				struct packet_wreg32 *wreg_pkt)
3609{
3610	struct goya_device *goya = hdev->asic_specific;
3611	u32 sob_start_addr, sob_end_addr;
3612	u16 reg_offset;
3613
3614	reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3615			GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3616
3617	dev_dbg(hdev->dev, "WREG32 packet details:\n");
3618	dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3619	dev_dbg(hdev->dev, "value      == 0x%x\n",
3620		le32_to_cpu(wreg_pkt->value));
3621
3622	if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3623		dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3624			reg_offset);
3625		return -EPERM;
3626	}
3627
3628	/*
3629	 * With MMU, DMA channels are not secured, so it doesn't matter where
3630	 * the WR COMP will be written to because it will go out with
3631	 * non-secured property
3632	 */
3633	if (goya->hw_cap_initialized & HW_CAP_MMU)
3634		return 0;
3635
3636	sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3637	sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3638
3639	if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3640			(le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3641
3642		dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3643			wreg_pkt->value);
3644		return -EPERM;
3645	}
3646
3647	return 0;
3648}
3649
3650static int goya_validate_cb(struct hl_device *hdev,
3651			struct hl_cs_parser *parser, bool is_mmu)
3652{
3653	u32 cb_parsed_length = 0;
3654	int rc = 0;
3655
3656	parser->patched_cb_size = 0;
3657
3658	/* cb_user_size is more than 0 so loop will always be executed */
3659	while (cb_parsed_length < parser->user_cb_size) {
3660		enum packet_id pkt_id;
3661		u16 pkt_size;
3662		struct goya_packet *user_pkt;
3663
3664		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3665
3666		pkt_id = (enum packet_id) (
3667				(le64_to_cpu(user_pkt->header) &
3668				PACKET_HEADER_PACKET_ID_MASK) >>
3669					PACKET_HEADER_PACKET_ID_SHIFT);
3670
3671		if (!validate_packet_id(pkt_id)) {
3672			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3673			rc = -EINVAL;
3674			break;
3675		}
3676
3677		pkt_size = goya_packet_sizes[pkt_id];
3678		cb_parsed_length += pkt_size;
3679		if (cb_parsed_length > parser->user_cb_size) {
3680			dev_err(hdev->dev,
3681				"packet 0x%x is out of CB boundary\n", pkt_id);
3682			rc = -EINVAL;
3683			break;
3684		}
3685
3686		switch (pkt_id) {
3687		case PACKET_WREG_32:
3688			/*
3689			 * Although it is validated after copy in patch_cb(),
3690			 * need to validate here as well because patch_cb() is
3691			 * not called in MMU path while this function is called
3692			 */
3693			rc = goya_validate_wreg32(hdev,
3694				parser, (struct packet_wreg32 *) user_pkt);
3695			parser->patched_cb_size += pkt_size;
3696			break;
3697
3698		case PACKET_WREG_BULK:
3699			dev_err(hdev->dev,
3700				"User not allowed to use WREG_BULK\n");
3701			rc = -EPERM;
3702			break;
3703
3704		case PACKET_MSG_PROT:
3705			dev_err(hdev->dev,
3706				"User not allowed to use MSG_PROT\n");
3707			rc = -EPERM;
3708			break;
3709
3710		case PACKET_CP_DMA:
3711			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3712			rc = -EPERM;
3713			break;
3714
3715		case PACKET_STOP:
3716			dev_err(hdev->dev, "User not allowed to use STOP\n");
3717			rc = -EPERM;
3718			break;
3719
3720		case PACKET_LIN_DMA:
3721			if (is_mmu)
3722				rc = goya_validate_dma_pkt_mmu(hdev, parser,
3723					(struct packet_lin_dma *) user_pkt);
3724			else
3725				rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3726					(struct packet_lin_dma *) user_pkt);
3727			break;
3728
3729		case PACKET_MSG_LONG:
3730		case PACKET_MSG_SHORT:
3731		case PACKET_FENCE:
3732		case PACKET_NOP:
3733			parser->patched_cb_size += pkt_size;
3734			break;
3735
3736		default:
3737			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3738				pkt_id);
3739			rc = -EINVAL;
3740			break;
3741		}
3742
3743		if (rc)
3744			break;
3745	}
3746
3747	/*
3748	 * The new CB should have space at the end for two MSG_PROT packets:
3749	 * 1. A packet that will act as a completion packet
3750	 * 2. A packet that will generate MSI-X interrupt
3751	 */
3752	parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3753
3754	return rc;
3755}
3756
3757static int goya_patch_dma_packet(struct hl_device *hdev,
3758				struct hl_cs_parser *parser,
3759				struct packet_lin_dma *user_dma_pkt,
3760				struct packet_lin_dma *new_dma_pkt,
3761				u32 *new_dma_pkt_size)
3762{
3763	struct hl_userptr *userptr;
3764	struct scatterlist *sg, *sg_next_iter;
3765	u32 count, dma_desc_cnt;
3766	u64 len, len_next;
3767	dma_addr_t dma_addr, dma_addr_next;
3768	enum hl_goya_dma_direction user_dir;
3769	u64 device_memory_addr, addr;
3770	enum dma_data_direction dir;
3771	struct sg_table *sgt;
3772	bool skip_host_mem_pin = false;
3773	bool user_memset;
3774	u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3775
3776	ctl = le32_to_cpu(user_dma_pkt->ctl);
3777
3778	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3779			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3780
3781	user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3782			GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3783
3784	if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM) ||
3785			(user_dma_pkt->tsize == 0)) {
3786		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3787		*new_dma_pkt_size = sizeof(*new_dma_pkt);
3788		return 0;
3789	}
3790
3791	if ((user_dir == HL_DMA_HOST_TO_DRAM) || (user_dir == HL_DMA_HOST_TO_SRAM)) {
3792		addr = le64_to_cpu(user_dma_pkt->src_addr);
3793		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3794		dir = DMA_TO_DEVICE;
3795		if (user_memset)
3796			skip_host_mem_pin = true;
3797	} else {
3798		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3799		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3800		dir = DMA_FROM_DEVICE;
3801	}
3802
3803	if ((!skip_host_mem_pin) &&
3804		(hl_userptr_is_pinned(hdev, addr,
3805			le32_to_cpu(user_dma_pkt->tsize),
3806			parser->job_userptr_list, &userptr) == false)) {
3807		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3808				addr, user_dma_pkt->tsize);
3809		return -EFAULT;
3810	}
3811
3812	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3813		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3814		*new_dma_pkt_size = sizeof(*user_dma_pkt);
3815		return 0;
3816	}
3817
3818	user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3819
3820	user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3821
3822	sgt = userptr->sgt;
3823	dma_desc_cnt = 0;
3824
3825	for_each_sgtable_dma_sg(sgt, sg, count) {
3826		len = sg_dma_len(sg);
3827		dma_addr = sg_dma_address(sg);
3828
3829		if (len == 0)
3830			break;
3831
3832		while ((count + 1) < sgt->nents) {
3833			sg_next_iter = sg_next(sg);
3834			len_next = sg_dma_len(sg_next_iter);
3835			dma_addr_next = sg_dma_address(sg_next_iter);
3836
3837			if (len_next == 0)
3838				break;
3839
3840			if ((dma_addr + len == dma_addr_next) &&
3841				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3842				len += len_next;
3843				count++;
3844				sg = sg_next_iter;
3845			} else {
3846				break;
3847			}
3848		}
3849
3850		ctl = le32_to_cpu(user_dma_pkt->ctl);
3851		if (likely(dma_desc_cnt))
3852			ctl &= ~GOYA_PKT_CTL_EB_MASK;
3853		ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3854				GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3855		new_dma_pkt->ctl = cpu_to_le32(ctl);
3856		new_dma_pkt->tsize = cpu_to_le32((u32) len);
3857
3858		if (dir == DMA_TO_DEVICE) {
3859			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3860			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3861		} else {
3862			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3863			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3864		}
3865
3866		if (!user_memset)
3867			device_memory_addr += len;
3868		dma_desc_cnt++;
3869		new_dma_pkt++;
3870	}
3871
3872	if (!dma_desc_cnt) {
3873		dev_err(hdev->dev,
3874			"Error of 0 SG entries when patching DMA packet\n");
3875		return -EFAULT;
3876	}
3877
3878	/* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3879	new_dma_pkt--;
3880	new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3881
3882	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3883
3884	return 0;
3885}
3886
3887static int goya_patch_cb(struct hl_device *hdev,
3888				struct hl_cs_parser *parser)
3889{
3890	u32 cb_parsed_length = 0;
3891	u32 cb_patched_cur_length = 0;
3892	int rc = 0;
3893
3894	/* cb_user_size is more than 0 so loop will always be executed */
3895	while (cb_parsed_length < parser->user_cb_size) {
3896		enum packet_id pkt_id;
3897		u16 pkt_size;
3898		u32 new_pkt_size = 0;
3899		struct goya_packet *user_pkt, *kernel_pkt;
3900
3901		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3902		kernel_pkt = parser->patched_cb->kernel_address +
3903					cb_patched_cur_length;
3904
3905		pkt_id = (enum packet_id) (
3906				(le64_to_cpu(user_pkt->header) &
3907				PACKET_HEADER_PACKET_ID_MASK) >>
3908					PACKET_HEADER_PACKET_ID_SHIFT);
3909
3910		if (!validate_packet_id(pkt_id)) {
3911			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3912			rc = -EINVAL;
3913			break;
3914		}
3915
3916		pkt_size = goya_packet_sizes[pkt_id];
3917		cb_parsed_length += pkt_size;
3918		if (cb_parsed_length > parser->user_cb_size) {
3919			dev_err(hdev->dev,
3920				"packet 0x%x is out of CB boundary\n", pkt_id);
3921			rc = -EINVAL;
3922			break;
3923		}
3924
3925		switch (pkt_id) {
3926		case PACKET_LIN_DMA:
3927			rc = goya_patch_dma_packet(hdev, parser,
3928					(struct packet_lin_dma *) user_pkt,
3929					(struct packet_lin_dma *) kernel_pkt,
3930					&new_pkt_size);
3931			cb_patched_cur_length += new_pkt_size;
3932			break;
3933
3934		case PACKET_WREG_32:
3935			memcpy(kernel_pkt, user_pkt, pkt_size);
3936			cb_patched_cur_length += pkt_size;
3937			rc = goya_validate_wreg32(hdev, parser,
3938					(struct packet_wreg32 *) kernel_pkt);
3939			break;
3940
3941		case PACKET_WREG_BULK:
3942			dev_err(hdev->dev,
3943				"User not allowed to use WREG_BULK\n");
3944			rc = -EPERM;
3945			break;
3946
3947		case PACKET_MSG_PROT:
3948			dev_err(hdev->dev,
3949				"User not allowed to use MSG_PROT\n");
3950			rc = -EPERM;
3951			break;
3952
3953		case PACKET_CP_DMA:
3954			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3955			rc = -EPERM;
3956			break;
3957
3958		case PACKET_STOP:
3959			dev_err(hdev->dev, "User not allowed to use STOP\n");
3960			rc = -EPERM;
3961			break;
3962
3963		case PACKET_MSG_LONG:
3964		case PACKET_MSG_SHORT:
3965		case PACKET_FENCE:
3966		case PACKET_NOP:
3967			memcpy(kernel_pkt, user_pkt, pkt_size);
3968			cb_patched_cur_length += pkt_size;
3969			break;
3970
3971		default:
3972			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3973				pkt_id);
3974			rc = -EINVAL;
3975			break;
3976		}
3977
3978		if (rc)
3979			break;
3980	}
3981
3982	return rc;
3983}
3984
3985static int goya_parse_cb_mmu(struct hl_device *hdev,
3986		struct hl_cs_parser *parser)
3987{
3988	u64 handle;
3989	u32 patched_cb_size;
3990	struct hl_cb *user_cb;
3991	int rc;
3992
3993	/*
3994	 * The new CB should have space at the end for two MSG_PROT pkt:
3995	 * 1. A packet that will act as a completion packet
3996	 * 2. A packet that will generate MSI-X interrupt
3997	 */
3998	parser->patched_cb_size = parser->user_cb_size +
3999			sizeof(struct packet_msg_prot) * 2;
4000
4001	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
4002				parser->patched_cb_size, false, false,
4003				&handle);
4004
4005	if (rc) {
4006		dev_err(hdev->dev,
4007			"Failed to allocate patched CB for DMA CS %d\n",
4008			rc);
4009		return rc;
4010	}
4011
4012	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
4013	/* hl_cb_get should never fail here */
4014	if (!parser->patched_cb) {
4015		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
4016		rc = -EFAULT;
4017		goto out;
4018	}
4019
4020	/*
4021	 * The check that parser->user_cb_size <= parser->user_cb->size was done
4022	 * in validate_queue_index().
4023	 */
4024	memcpy(parser->patched_cb->kernel_address,
4025		parser->user_cb->kernel_address,
4026		parser->user_cb_size);
4027
4028	patched_cb_size = parser->patched_cb_size;
4029
4030	/* validate patched CB instead of user CB */
4031	user_cb = parser->user_cb;
4032	parser->user_cb = parser->patched_cb;
4033	rc = goya_validate_cb(hdev, parser, true);
4034	parser->user_cb = user_cb;
4035
4036	if (rc) {
4037		hl_cb_put(parser->patched_cb);
4038		goto out;
4039	}
4040
4041	if (patched_cb_size != parser->patched_cb_size) {
4042		dev_err(hdev->dev, "user CB size mismatch\n");
4043		hl_cb_put(parser->patched_cb);
4044		rc = -EINVAL;
4045		goto out;
4046	}
4047
4048out:
4049	/*
4050	 * Always call cb destroy here because we still have 1 reference
4051	 * to it by calling cb_get earlier. After the job will be completed,
4052	 * cb_put will release it, but here we want to remove it from the
4053	 * idr
4054	 */
4055	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
4056
4057	return rc;
4058}
4059
4060static int goya_parse_cb_no_mmu(struct hl_device *hdev,
4061				struct hl_cs_parser *parser)
4062{
4063	u64 handle;
4064	int rc;
4065
4066	rc = goya_validate_cb(hdev, parser, false);
4067
4068	if (rc)
4069		goto free_userptr;
4070
4071	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
4072				parser->patched_cb_size, false, false,
4073				&handle);
4074	if (rc) {
4075		dev_err(hdev->dev,
4076			"Failed to allocate patched CB for DMA CS %d\n", rc);
4077		goto free_userptr;
4078	}
4079
4080	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
4081	/* hl_cb_get should never fail here */
4082	if (!parser->patched_cb) {
4083		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
4084		rc = -EFAULT;
4085		goto out;
4086	}
4087
4088	rc = goya_patch_cb(hdev, parser);
4089
4090	if (rc)
4091		hl_cb_put(parser->patched_cb);
4092
4093out:
4094	/*
4095	 * Always call cb destroy here because we still have 1 reference
4096	 * to it by calling cb_get earlier. After the job will be completed,
4097	 * cb_put will release it, but here we want to remove it from the
4098	 * idr
4099	 */
4100	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
4101
4102free_userptr:
4103	if (rc)
4104		hl_userptr_delete_list(hdev, parser->job_userptr_list);
4105	return rc;
4106}
4107
4108static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
4109					struct hl_cs_parser *parser)
4110{
4111	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4112	struct goya_device *goya = hdev->asic_specific;
4113
4114	if (goya->hw_cap_initialized & HW_CAP_MMU)
4115		return 0;
4116
4117	/* For internal queue jobs, just check if CB address is valid */
4118	if (hl_mem_area_inside_range(
4119			(u64) (uintptr_t) parser->user_cb,
4120			parser->user_cb_size,
4121			asic_prop->sram_user_base_address,
4122			asic_prop->sram_end_address))
4123		return 0;
4124
4125	if (hl_mem_area_inside_range(
4126			(u64) (uintptr_t) parser->user_cb,
4127			parser->user_cb_size,
4128			asic_prop->dram_user_base_address,
4129			asic_prop->dram_end_address))
4130		return 0;
4131
4132	dev_err(hdev->dev,
4133		"Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
4134		parser->user_cb, parser->user_cb_size);
4135
4136	return -EFAULT;
4137}
4138
4139int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4140{
4141	struct goya_device *goya = hdev->asic_specific;
4142
4143	if (parser->queue_type == QUEUE_TYPE_INT)
4144		return goya_parse_cb_no_ext_queue(hdev, parser);
4145
4146	if (goya->hw_cap_initialized & HW_CAP_MMU)
4147		return goya_parse_cb_mmu(hdev, parser);
4148	else
4149		return goya_parse_cb_no_mmu(hdev, parser);
4150}
4151
4152void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
4153				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
4154				u32 msix_vec, bool eb)
4155{
4156	struct packet_msg_prot *cq_pkt;
4157	u32 tmp;
4158
4159	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4160
4161	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4162			(1 << GOYA_PKT_CTL_EB_SHIFT) |
4163			(1 << GOYA_PKT_CTL_MB_SHIFT);
4164	cq_pkt->ctl = cpu_to_le32(tmp);
4165	cq_pkt->value = cpu_to_le32(cq_val);
4166	cq_pkt->addr = cpu_to_le64(cq_addr);
4167
4168	cq_pkt++;
4169
4170	tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4171			(1 << GOYA_PKT_CTL_MB_SHIFT);
4172	cq_pkt->ctl = cpu_to_le32(tmp);
4173	cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
4174	cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
4175}
4176
4177void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4178{
4179	WREG32(mmCPU_EQ_CI, val);
4180}
4181
4182void goya_restore_phase_topology(struct hl_device *hdev)
4183{
4184
4185}
4186
4187static void goya_clear_sm_regs(struct hl_device *hdev)
4188{
4189	int i, num_of_sob_in_longs, num_of_mon_in_longs;
4190
4191	num_of_sob_in_longs =
4192		((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4193
4194	num_of_mon_in_longs =
4195		((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4196
4197	for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4198		WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4199
4200	for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4201		WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4202
4203	/* Flush all WREG to prevent race */
4204	i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4205}
4206
4207static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
4208{
4209	dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n");
4210	return -EPERM;
4211}
4212
4213static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4214{
4215	struct goya_device *goya = hdev->asic_specific;
4216
4217	if (hdev->reset_info.hard_reset_pending)
4218		return U64_MAX;
4219
4220	return readq(hdev->pcie_bar[DDR_BAR_ID] +
4221			(addr - goya->ddr_bar_cur_addr));
4222}
4223
4224static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4225{
4226	struct goya_device *goya = hdev->asic_specific;
4227
4228	if (hdev->reset_info.hard_reset_pending)
4229		return;
4230
4231	writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4232			(addr - goya->ddr_bar_cur_addr));
4233}
4234
4235static const char *_goya_get_event_desc(u16 event_type)
4236{
4237	switch (event_type) {
4238	case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4239		return "PCIe_if";
4240	case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4241	case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4242	case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4243	case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4244	case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4245	case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4246	case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4247	case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4248		return "TPC%d_ecc";
4249	case GOYA_ASYNC_EVENT_ID_MME_ECC:
4250		return "MME_ecc";
4251	case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4252		return "MME_ecc_ext";
4253	case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4254		return "MMU_ecc";
4255	case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4256		return "DMA_macro";
4257	case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4258		return "DMA_ecc";
4259	case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4260		return "CPU_if_ecc";
4261	case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4262		return "PSOC_mem";
4263	case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4264		return "PSOC_coresight";
4265	case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4266		return "SRAM%d";
4267	case GOYA_ASYNC_EVENT_ID_GIC500:
4268		return "GIC500";
4269	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4270		return "PLL%d";
4271	case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4272		return "AXI_ecc";
4273	case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4274		return "L2_ram_ecc";
4275	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4276		return "PSOC_gpio_05_sw_reset";
4277	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4278		return "PSOC_gpio_10_vrhot_icrit";
4279	case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4280		return "PCIe_dec";
4281	case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4282	case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4283	case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4284	case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4285	case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4286	case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4287	case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4288	case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4289		return "TPC%d_dec";
4290	case GOYA_ASYNC_EVENT_ID_MME_WACS:
4291		return "MME_wacs";
4292	case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4293		return "MME_wacsd";
4294	case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4295		return "CPU_axi_splitter";
4296	case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4297		return "PSOC_axi_dec";
4298	case GOYA_ASYNC_EVENT_ID_PSOC:
4299		return "PSOC";
4300	case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4301	case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4302	case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4303	case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4304	case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4305	case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4306	case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4307	case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4308		return "TPC%d_krn_err";
4309	case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4310		return "TPC%d_cq";
4311	case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4312		return "TPC%d_qm";
4313	case GOYA_ASYNC_EVENT_ID_MME_QM:
4314		return "MME_qm";
4315	case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4316		return "MME_cq";
4317	case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4318		return "DMA%d_qm";
4319	case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4320		return "DMA%d_ch";
4321	case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4322	case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4323	case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4324	case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4325	case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4326	case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4327	case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4328	case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4329		return "TPC%d_bmon_spmu";
4330	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4331		return "DMA_bm_ch%d";
4332	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4333		return "POWER_ENV_S";
4334	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4335		return "POWER_ENV_E";
4336	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4337		return "THERMAL_ENV_S";
4338	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4339		return "THERMAL_ENV_E";
4340	case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4341		return "QUEUE_OUT_OF_SYNC";
4342	default:
4343		return "N/A";
4344	}
4345}
4346
4347static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4348{
4349	u8 index;
4350
4351	switch (event_type) {
4352	case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4353	case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4354	case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4355	case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4356	case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4357	case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4358	case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4359	case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4360		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4361		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4362		break;
4363	case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4364		index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4365		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4366		break;
4367	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4368		index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4369		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4370		break;
4371	case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4372	case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4373	case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4374	case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4375	case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4376	case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4377	case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4378	case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4379		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4380		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4381		break;
4382	case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4383	case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4384	case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4385	case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4386	case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4387	case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4388	case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4389	case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4390		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4391		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4392		break;
4393	case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4394		index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4395		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4396		break;
4397	case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4398		index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4399		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4400		break;
4401	case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4402		index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4403		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4404		break;
4405	case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4406		index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4407		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4408		break;
4409	case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4410	case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4411	case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4412	case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4413	case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4414	case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4415	case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4416	case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4417		index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4418		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4419		break;
4420	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4421		index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4422		snprintf(desc, size, _goya_get_event_desc(event_type), index);
4423		break;
4424	case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4425		snprintf(desc, size, _goya_get_event_desc(event_type));
4426		break;
4427	default:
4428		snprintf(desc, size, _goya_get_event_desc(event_type));
4429		break;
4430	}
4431}
4432
4433static void goya_print_razwi_info(struct hl_device *hdev)
4434{
4435	if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4436		dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4437		WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4438	}
4439
4440	if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4441		dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4442		WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4443	}
4444
4445	if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4446		dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4447		WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4448	}
4449
4450	if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4451		dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4452		WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4453	}
4454}
4455
4456static void goya_print_mmu_error_info(struct hl_device *hdev)
4457{
4458	struct goya_device *goya = hdev->asic_specific;
4459	u64 addr;
4460	u32 val;
4461
4462	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4463		return;
4464
4465	val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4466	if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4467		addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4468		addr <<= 32;
4469		addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4470
4471		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4472					addr);
4473
4474		WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4475	}
4476}
4477
4478static void goya_print_out_of_sync_info(struct hl_device *hdev,
4479					struct cpucp_pkt_sync_err *sync_err)
4480{
4481	struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
4482
4483	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
4484		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
4485}
4486
4487static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4488				bool razwi)
4489{
4490	char desc[20] = "";
4491
4492	goya_get_event_desc(event_type, desc, sizeof(desc));
4493	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4494		event_type, desc);
4495
4496	if (razwi) {
4497		goya_print_razwi_info(hdev);
4498		goya_print_mmu_error_info(hdev);
4499	}
4500}
4501
4502static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4503		size_t irq_arr_size)
4504{
4505	struct cpucp_unmask_irq_arr_packet *pkt;
4506	size_t total_pkt_size;
4507	u64 result;
4508	int rc;
4509	int irq_num_entries, irq_arr_index;
4510	__le32 *goya_irq_arr;
4511
4512	total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
4513			irq_arr_size;
4514
4515	/* data should be aligned to 8 bytes in order to CPU-CP to copy it */
4516	total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4517
4518	/* total_pkt_size is casted to u16 later on */
4519	if (total_pkt_size > USHRT_MAX) {
4520		dev_err(hdev->dev, "too many elements in IRQ array\n");
4521		return -EINVAL;
4522	}
4523
4524	pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4525	if (!pkt)
4526		return -ENOMEM;
4527
4528	irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4529	pkt->length = cpu_to_le32(irq_num_entries);
4530
4531	/* We must perform any necessary endianness conversation on the irq
4532	 * array being passed to the goya hardware
4533	 */
4534	for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4535			irq_arr_index < irq_num_entries ; irq_arr_index++)
4536		goya_irq_arr[irq_arr_index] =
4537				cpu_to_le32(irq_arr[irq_arr_index]);
4538
4539	pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4540						CPUCP_PKT_CTL_OPCODE_SHIFT);
4541
4542	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4543						total_pkt_size,	0, &result);
4544
4545	if (rc)
4546		dev_err(hdev->dev, "failed to unmask IRQ array\n");
4547
4548	kfree(pkt);
4549
4550	return rc;
4551}
4552
4553static int goya_compute_reset_late_init(struct hl_device *hdev)
4554{
4555	/*
4556	 * Unmask all IRQs since some could have been received
4557	 * during the soft reset
4558	 */
4559	return goya_unmask_irq_arr(hdev, goya_all_events,
4560					sizeof(goya_all_events));
4561}
4562
4563static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4564{
4565	struct cpucp_packet pkt;
4566	u64 result;
4567	int rc;
4568
4569	memset(&pkt, 0, sizeof(pkt));
4570
4571	pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
4572				CPUCP_PKT_CTL_OPCODE_SHIFT);
4573	pkt.value = cpu_to_le64(event_type);
4574
4575	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4576						0, &result);
4577
4578	if (rc)
4579		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4580
4581	return rc;
4582}
4583
4584static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4585{
4586	ktime_t zero_time = ktime_set(0, 0);
4587
4588	mutex_lock(&hdev->clk_throttling.lock);
4589
4590	switch (event_type) {
4591	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4592		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
4593		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
4594		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
4595		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
4596		dev_info_ratelimited(hdev->dev,
4597			"Clock throttling due to power consumption\n");
4598		break;
4599
4600	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4601		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
4602		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
4603		dev_info_ratelimited(hdev->dev,
4604			"Power envelop is safe, back to optimal clock\n");
4605		break;
4606
4607	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4608		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
4609		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
4610		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
4611		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
4612		dev_info_ratelimited(hdev->dev,
4613			"Clock throttling due to overheating\n");
4614		break;
4615
4616	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4617		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
4618		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
4619		dev_info_ratelimited(hdev->dev,
4620			"Thermal envelop is safe, back to optimal clock\n");
4621		break;
4622
4623	default:
4624		dev_err(hdev->dev, "Received invalid clock change event %d\n",
4625			event_type);
4626		break;
4627	}
4628
4629	mutex_unlock(&hdev->clk_throttling.lock);
4630}
4631
4632void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4633{
4634	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4635	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4636				>> EQ_CTL_EVENT_TYPE_SHIFT);
4637	struct goya_device *goya = hdev->asic_specific;
4638
4639	if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
4640		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
4641				event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
4642		return;
4643	}
4644
4645	goya->events_stat[event_type]++;
4646	goya->events_stat_aggregate[event_type]++;
4647
4648	switch (event_type) {
4649	case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4650	case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4651	case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4652	case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4653	case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4654	case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4655	case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4656	case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4657	case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4658	case GOYA_ASYNC_EVENT_ID_MME_ECC:
4659	case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4660	case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4661	case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4662	case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4663	case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4664	case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4665	case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4666	case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4667	case GOYA_ASYNC_EVENT_ID_GIC500:
4668	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4669	case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4670	case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4671		goya_print_irq_info(hdev, event_type, false);
4672		if (hdev->hard_reset_on_fw_events)
4673			hl_device_reset(hdev, (HL_DRV_RESET_HARD |
4674						HL_DRV_RESET_FW_FATAL_ERR));
4675		break;
4676
4677	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4678		goya_print_irq_info(hdev, event_type, false);
4679		if (hdev->hard_reset_on_fw_events)
4680			hl_device_reset(hdev, HL_DRV_RESET_HARD);
4681		break;
4682
4683	case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4684	case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4685	case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4686	case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4687	case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4688	case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4689	case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4690	case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4691	case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4692	case GOYA_ASYNC_EVENT_ID_MME_WACS:
4693	case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4694	case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4695	case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4696	case GOYA_ASYNC_EVENT_ID_PSOC:
4697	case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4698	case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4699	case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4700	case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4701	case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4702	case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4703	case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4704	case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4705	case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4706	case GOYA_ASYNC_EVENT_ID_MME_QM:
4707	case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4708	case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4709	case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4710		goya_print_irq_info(hdev, event_type, true);
4711		goya_unmask_irq(hdev, event_type);
4712		break;
4713
4714	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4715	case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4716	case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4717	case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4718	case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4719	case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4720	case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4721	case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4722	case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4723	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4724		goya_print_irq_info(hdev, event_type, false);
4725		goya_unmask_irq(hdev, event_type);
4726		break;
4727
4728	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4729	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4730	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4731	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4732		goya_print_clk_change_info(hdev, event_type);
4733		goya_unmask_irq(hdev, event_type);
4734		break;
4735
4736	case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4737		goya_print_irq_info(hdev, event_type, false);
4738		goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
4739		if (hdev->hard_reset_on_fw_events)
4740			hl_device_reset(hdev, HL_DRV_RESET_HARD);
4741		else
4742			hl_fw_unmask_irq(hdev, event_type);
4743		break;
4744
4745	default:
4746		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4747				event_type);
4748		break;
4749	}
4750}
4751
4752void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
4753{
4754	struct goya_device *goya = hdev->asic_specific;
4755
4756	if (aggregate) {
4757		*size = (u32) sizeof(goya->events_stat_aggregate);
4758		return goya->events_stat_aggregate;
4759	}
4760
4761	*size = (u32) sizeof(goya->events_stat);
4762	return goya->events_stat;
4763}
4764
4765static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4766				u64 val, bool is_dram)
4767{
4768	struct packet_lin_dma *lin_dma_pkt;
4769	struct hl_cs_job *job;
4770	u32 cb_size, ctl;
4771	struct hl_cb *cb;
4772	int rc, lin_dma_pkts_cnt;
4773
4774	lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4775	cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4776						sizeof(struct packet_msg_prot);
4777	cb = hl_cb_kernel_create(hdev, cb_size, false);
4778	if (!cb)
4779		return -ENOMEM;
4780
4781	lin_dma_pkt = cb->kernel_address;
4782
4783	do {
4784		memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4785
4786		ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4787				(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4788				(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4789				(1 << GOYA_PKT_CTL_RB_SHIFT) |
4790				(1 << GOYA_PKT_CTL_MB_SHIFT));
4791		ctl |= (is_dram ? HL_DMA_HOST_TO_DRAM : HL_DMA_HOST_TO_SRAM) <<
4792				GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4793		lin_dma_pkt->ctl = cpu_to_le32(ctl);
4794
4795		lin_dma_pkt->src_addr = cpu_to_le64(val);
4796		lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4797		if (lin_dma_pkts_cnt > 1)
4798			lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4799		else
4800			lin_dma_pkt->tsize = cpu_to_le32(size);
4801
4802		size -= SZ_2G;
4803		addr += SZ_2G;
4804		lin_dma_pkt++;
4805	} while (--lin_dma_pkts_cnt);
4806
4807	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4808	if (!job) {
4809		dev_err(hdev->dev, "Failed to allocate a new job\n");
4810		rc = -ENOMEM;
4811		goto release_cb;
4812	}
4813
4814	job->id = 0;
4815	job->user_cb = cb;
4816	atomic_inc(&job->user_cb->cs_cnt);
4817	job->user_cb_size = cb_size;
4818	job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4819	job->patched_cb = job->user_cb;
4820	job->job_cb_size = job->user_cb_size;
4821
4822	hl_debugfs_add_job(hdev, job);
4823
4824	rc = goya_send_job_on_qman0(hdev, job);
4825
4826	hl_debugfs_remove_job(hdev, job);
4827	kfree(job);
4828	atomic_dec(&cb->cs_cnt);
4829
4830release_cb:
4831	hl_cb_put(cb);
4832	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
4833
4834	return rc;
4835}
4836
4837int goya_context_switch(struct hl_device *hdev, u32 asid)
4838{
4839	struct asic_fixed_properties *prop = &hdev->asic_prop;
4840	u64 addr = prop->sram_base_address, sob_addr;
4841	u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4842	u64 val = 0x7777777777777777ull;
4843	int rc, dma_id;
4844	u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4845					mmDMA_CH_0_WR_COMP_ADDR_LO;
4846
4847	rc = goya_memset_device_memory(hdev, addr, size, val, false);
4848	if (rc) {
4849		dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4850		return rc;
4851	}
4852
4853	/* we need to reset registers that the user is allowed to change */
4854	sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4855	WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4856
4857	for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4858		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4859							(dma_id - 1) * 4;
4860		WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4861						lower_32_bits(sob_addr));
4862	}
4863
4864	WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4865
4866	goya_clear_sm_regs(hdev);
4867
4868	return 0;
4869}
4870
4871static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4872{
4873	struct asic_fixed_properties *prop = &hdev->asic_prop;
4874	struct goya_device *goya = hdev->asic_specific;
4875	u64 addr = prop->mmu_pgt_addr;
4876	u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4877			MMU_CACHE_MNG_SIZE;
4878
4879	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4880		return 0;
4881
4882	return goya_memset_device_memory(hdev, addr, size, 0, true);
4883}
4884
4885static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4886{
4887	struct goya_device *goya = hdev->asic_specific;
4888	u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4889	u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4890	u64 val = 0x9999999999999999ull;
4891
4892	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4893		return 0;
4894
4895	return goya_memset_device_memory(hdev, addr, size, val, true);
4896}
4897
4898static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4899{
4900	struct asic_fixed_properties *prop = &hdev->asic_prop;
4901	struct goya_device *goya = hdev->asic_specific;
4902	s64 off, cpu_off;
4903	int rc;
4904
4905	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4906		return 0;
4907
4908	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4909		rc = hl_mmu_map_page(hdev->kernel_ctx,
4910			prop->dram_base_address + off,
4911			prop->dram_base_address + off, PAGE_SIZE_2MB,
4912			(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
4913		if (rc) {
4914			dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4915				prop->dram_base_address + off);
4916			goto unmap;
4917		}
4918	}
4919
4920	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4921		rc = hl_mmu_map_page(hdev->kernel_ctx,
4922			VA_CPU_ACCESSIBLE_MEM_ADDR,
4923			hdev->cpu_accessible_dma_address,
4924			PAGE_SIZE_2MB, true);
4925
4926		if (rc) {
4927			dev_err(hdev->dev,
4928				"Map failed for CPU accessible memory\n");
4929			off -= PAGE_SIZE_2MB;
4930			goto unmap;
4931		}
4932	} else {
4933		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4934			rc = hl_mmu_map_page(hdev->kernel_ctx,
4935				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4936				hdev->cpu_accessible_dma_address + cpu_off,
4937				PAGE_SIZE_4KB, true);
4938			if (rc) {
4939				dev_err(hdev->dev,
4940					"Map failed for CPU accessible memory\n");
4941				cpu_off -= PAGE_SIZE_4KB;
4942				goto unmap_cpu;
4943			}
4944		}
4945	}
4946
4947	goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4948	goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4949	WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4950	WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4951
4952	/* Make sure configuration is flushed to device */
4953	RREG32(mmCPU_IF_AWUSER_OVR_EN);
4954
4955	goya->device_cpu_mmu_mappings_done = true;
4956
4957	return 0;
4958
4959unmap_cpu:
4960	for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4961		if (hl_mmu_unmap_page(hdev->kernel_ctx,
4962				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4963				PAGE_SIZE_4KB, true))
4964			dev_warn_ratelimited(hdev->dev,
4965				"failed to unmap address 0x%llx\n",
4966				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4967unmap:
4968	for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4969		if (hl_mmu_unmap_page(hdev->kernel_ctx,
4970				prop->dram_base_address + off, PAGE_SIZE_2MB,
4971				true))
4972			dev_warn_ratelimited(hdev->dev,
4973				"failed to unmap address 0x%llx\n",
4974				prop->dram_base_address + off);
4975
4976	return rc;
4977}
4978
4979void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4980{
4981	struct asic_fixed_properties *prop = &hdev->asic_prop;
4982	struct goya_device *goya = hdev->asic_specific;
4983	u32 off, cpu_off;
4984
4985	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4986		return;
4987
4988	if (!goya->device_cpu_mmu_mappings_done)
4989		return;
4990
4991	WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4992	WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4993
4994	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4995		if (hl_mmu_unmap_page(hdev->kernel_ctx,
4996				VA_CPU_ACCESSIBLE_MEM_ADDR,
4997				PAGE_SIZE_2MB, true))
4998			dev_warn(hdev->dev,
4999				"Failed to unmap CPU accessible memory\n");
5000	} else {
5001		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
5002			if (hl_mmu_unmap_page(hdev->kernel_ctx,
5003					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
5004					PAGE_SIZE_4KB,
5005					(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
5006				dev_warn_ratelimited(hdev->dev,
5007					"failed to unmap address 0x%llx\n",
5008					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
5009	}
5010
5011	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
5012		if (hl_mmu_unmap_page(hdev->kernel_ctx,
5013				prop->dram_base_address + off, PAGE_SIZE_2MB,
5014				(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
5015			dev_warn_ratelimited(hdev->dev,
5016					"Failed to unmap address 0x%llx\n",
5017					prop->dram_base_address + off);
5018
5019	goya->device_cpu_mmu_mappings_done = false;
5020}
5021
5022static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
5023{
5024	struct goya_device *goya = hdev->asic_specific;
5025	int i;
5026
5027	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5028		return;
5029
5030	if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
5031		dev_crit(hdev->dev, "asid %u is too big\n", asid);
5032		return;
5033	}
5034
5035	/* zero the MMBP and ASID bits and then set the ASID */
5036	for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
5037		goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
5038}
5039
5040static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5041					u32 flags)
5042{
5043	struct goya_device *goya = hdev->asic_specific;
5044	u32 status, timeout_usec;
5045	int rc;
5046
5047	if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5048		hdev->reset_info.hard_reset_pending)
5049		return 0;
5050
5051	/* no need in L1 only invalidation in Goya */
5052	if (!is_hard)
5053		return 0;
5054
5055	if (hdev->pldm)
5056		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5057	else
5058		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5059
5060	/* L0 & L1 invalidation */
5061	WREG32(mmSTLB_INV_ALL_START, 1);
5062
5063	rc = hl_poll_timeout(
5064		hdev,
5065		mmSTLB_INV_ALL_START,
5066		status,
5067		!status,
5068		1000,
5069		timeout_usec);
5070
5071	return rc;
5072}
5073
5074static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5075						bool is_hard, u32 flags,
5076						u32 asid, u64 va, u64 size)
5077{
5078	/* Treat as invalidate all because there is no range invalidation
5079	 * in Goya
5080	 */
5081	return hl_mmu_invalidate_cache(hdev, is_hard, flags);
5082}
5083
5084int goya_send_heartbeat(struct hl_device *hdev)
5085{
5086	struct goya_device *goya = hdev->asic_specific;
5087
5088	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5089		return 0;
5090
5091	return hl_fw_send_heartbeat(hdev);
5092}
5093
5094int goya_cpucp_info_get(struct hl_device *hdev)
5095{
5096	struct goya_device *goya = hdev->asic_specific;
5097	struct asic_fixed_properties *prop = &hdev->asic_prop;
5098	u64 dram_size;
5099	int rc;
5100
5101	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5102		return 0;
5103
5104	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
5105					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
5106					mmCPU_BOOT_ERR1);
5107	if (rc)
5108		return rc;
5109
5110	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
5111	if (dram_size) {
5112		if ((!is_power_of_2(dram_size)) ||
5113				(dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5114			dev_err(hdev->dev,
5115				"F/W reported invalid DRAM size %llu. Trying to use default size\n",
5116				dram_size);
5117			dram_size = DRAM_PHYS_DEFAULT_SIZE;
5118		}
5119
5120		prop->dram_size = dram_size;
5121		prop->dram_end_address = prop->dram_base_address + dram_size;
5122	}
5123
5124	if (!strlen(prop->cpucp_info.card_name))
5125		strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5126				CARD_NAME_MAX_LEN);
5127
5128	return 0;
5129}
5130
5131static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
5132				struct engines_data *e)
5133{
5134	const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5135	const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5136	unsigned long *mask = (unsigned long *)mask_arr;
5137	u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5138		mme_arch_sts;
5139	bool is_idle = true, is_eng_idle;
5140	u64 offset;
5141	int i;
5142
5143	if (e)
5144		hl_engine_data_sprintf(e, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
5145					"---  -------  ------------  -------------\n");
5146
5147	offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5148
5149	for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5150		qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5151		dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5152		is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5153				IS_DMA_IDLE(dma_core_sts0);
5154		is_idle &= is_eng_idle;
5155
5156		if (mask && !is_eng_idle)
5157			set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
5158		if (e)
5159			hl_engine_data_sprintf(e, dma_fmt, i, is_eng_idle ? "Y" : "N",
5160					qm_glbl_sts0, dma_core_sts0);
5161	}
5162
5163	if (e)
5164		hl_engine_data_sprintf(e,
5165			"\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
5166			"---  -------  ------------  --------------  ----------\n");
5167
5168	offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5169
5170	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5171		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5172		cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5173		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5174		is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5175				IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5176				IS_TPC_IDLE(tpc_cfg_sts);
5177		is_idle &= is_eng_idle;
5178
5179		if (mask && !is_eng_idle)
5180			set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
5181		if (e)
5182			hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N",
5183				qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5184	}
5185
5186	if (e)
5187		hl_engine_data_sprintf(e,
5188			"\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
5189			"---  -------  ------------  --------------  -----------\n");
5190
5191	qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5192	cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5193	mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5194	is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5195			IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5196			IS_MME_IDLE(mme_arch_sts);
5197	is_idle &= is_eng_idle;
5198
5199	if (mask && !is_eng_idle)
5200		set_bit(GOYA_ENGINE_ID_MME_0, mask);
5201	if (e) {
5202		hl_engine_data_sprintf(e, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5203				cmdq_glbl_sts0, mme_arch_sts);
5204		hl_engine_data_sprintf(e, "\n");
5205	}
5206
5207	return is_idle;
5208}
5209
5210static void goya_hw_queues_lock(struct hl_device *hdev)
5211	__acquires(&goya->hw_queues_lock)
5212{
5213	struct goya_device *goya = hdev->asic_specific;
5214
5215	spin_lock(&goya->hw_queues_lock);
5216}
5217
5218static void goya_hw_queues_unlock(struct hl_device *hdev)
5219	__releases(&goya->hw_queues_lock)
5220{
5221	struct goya_device *goya = hdev->asic_specific;
5222
5223	spin_unlock(&goya->hw_queues_lock);
5224}
5225
5226static u32 goya_get_pci_id(struct hl_device *hdev)
5227{
5228	return hdev->pdev->device;
5229}
5230
5231static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5232				size_t max_size)
5233{
5234	struct goya_device *goya = hdev->asic_specific;
5235
5236	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5237		return 0;
5238
5239	return hl_fw_get_eeprom_data(hdev, data, max_size);
5240}
5241
5242static void goya_cpu_init_scrambler_dram(struct hl_device *hdev)
5243{
5244
5245}
5246
5247static int goya_ctx_init(struct hl_ctx *ctx)
5248{
5249	if (ctx->asid != HL_KERNEL_ASID_ID)
5250		goya_mmu_prepare(ctx->hdev, ctx->asid);
5251
5252	return 0;
5253}
5254
5255static int goya_pre_schedule_cs(struct hl_cs *cs)
5256{
5257	return 0;
5258}
5259
5260u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5261{
5262	return cq_idx;
5263}
5264
5265static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5266{
5267	return 0;
5268}
5269
5270static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5271{
5272	return 0;
5273}
5274
5275static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
5276				u32 size, bool eb)
5277{
5278	return 0;
5279}
5280
5281static u32 goya_gen_wait_cb(struct hl_device *hdev,
5282		struct hl_gen_wait_properties *prop)
5283{
5284	return 0;
5285}
5286
5287static void goya_reset_sob(struct hl_device *hdev, void *data)
5288{
5289
5290}
5291
5292static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
5293{
5294
5295}
5296
5297u64 goya_get_device_time(struct hl_device *hdev)
5298{
5299	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5300
5301	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5302}
5303
5304static int goya_collective_wait_init_cs(struct hl_cs *cs)
5305{
5306	return 0;
5307}
5308
5309static int goya_collective_wait_create_jobs(struct hl_device *hdev,
5310		struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
5311		u32 collective_engine_id, u32 encaps_signal_offset)
5312{
5313	return -EINVAL;
5314}
5315
5316static void goya_ctx_fini(struct hl_ctx *ctx)
5317{
5318
5319}
5320
5321static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
5322			u32 *block_size, u32 *block_id)
5323{
5324	return -EPERM;
5325}
5326
5327static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5328				u32 block_id, u32 block_size)
5329{
5330	return -EPERM;
5331}
5332
5333static void goya_enable_events_from_fw(struct hl_device *hdev)
5334{
5335	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
5336			GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
5337}
5338
5339static int goya_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
5340{
5341	return -EINVAL;
5342}
5343
5344static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
5345{
5346	switch (pll_idx) {
5347	case HL_GOYA_CPU_PLL: return CPU_PLL;
5348	case HL_GOYA_PCI_PLL: return PCI_PLL;
5349	case HL_GOYA_MME_PLL: return MME_PLL;
5350	case HL_GOYA_TPC_PLL: return TPC_PLL;
5351	case HL_GOYA_IC_PLL: return IC_PLL;
5352	case HL_GOYA_MC_PLL: return MC_PLL;
5353	case HL_GOYA_EMMC_PLL: return EMMC_PLL;
5354	default: return -EINVAL;
5355	}
5356}
5357
5358static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
5359				struct hl_sync_to_engine_map *map)
5360{
5361	/* Not implemented */
5362	return 0;
5363}
5364
5365static int goya_monitor_valid(struct hl_mon_state_dump *mon)
5366{
5367	/* Not implemented */
5368	return 0;
5369}
5370
5371static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
5372				struct hl_device *hdev,
5373				struct hl_mon_state_dump *mon)
5374{
5375	/* Not implemented */
5376	return 0;
5377}
5378
5379
5380static int goya_print_fences_single_engine(
5381	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
5382	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
5383	size_t *size, size_t *offset)
5384{
5385	/* Not implemented */
5386	return 0;
5387}
5388
5389
5390static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
5391	.monitor_valid = goya_monitor_valid,
5392	.print_single_monitor = goya_print_single_monitor,
5393	.gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
5394	.print_fences_single_engine = goya_print_fences_single_engine,
5395};
5396
5397static void goya_state_dump_init(struct hl_device *hdev)
5398{
5399	/* Not implemented */
5400	hdev->state_dump_specs.props = goya_state_dump_specs_props;
5401	hdev->state_dump_specs.funcs = goya_state_dump_funcs;
5402}
5403
5404static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
5405{
5406	return 0;
5407}
5408
5409static u32 *goya_get_stream_master_qid_arr(void)
5410{
5411	return NULL;
5412}
5413
5414static int goya_get_monitor_dump(struct hl_device *hdev, void *data)
5415{
5416	return -EOPNOTSUPP;
5417}
5418
5419static void goya_check_if_razwi_happened(struct hl_device *hdev)
5420{
5421}
5422
5423static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
5424{
5425	return -EOPNOTSUPP;
5426}
5427
5428static int goya_set_dram_properties(struct hl_device *hdev)
5429{
5430	return 0;
5431}
5432
5433static int goya_set_binning_masks(struct hl_device *hdev)
5434{
5435	return 0;
5436}
5437
5438static int goya_send_device_activity(struct hl_device *hdev, bool open)
5439{
5440	return 0;
5441}
5442
5443static const struct hl_asic_funcs goya_funcs = {
5444	.early_init = goya_early_init,
5445	.early_fini = goya_early_fini,
5446	.late_init = goya_late_init,
5447	.late_fini = goya_late_fini,
5448	.sw_init = goya_sw_init,
5449	.sw_fini = goya_sw_fini,
5450	.hw_init = goya_hw_init,
5451	.hw_fini = goya_hw_fini,
5452	.halt_engines = goya_halt_engines,
5453	.suspend = goya_suspend,
5454	.resume = goya_resume,
5455	.mmap = goya_mmap,
5456	.ring_doorbell = goya_ring_doorbell,
5457	.pqe_write = goya_pqe_write,
5458	.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5459	.asic_dma_free_coherent = goya_dma_free_coherent,
5460	.scrub_device_mem = goya_scrub_device_mem,
5461	.scrub_device_dram = goya_scrub_device_dram,
5462	.get_int_queue_base = goya_get_int_queue_base,
5463	.test_queues = goya_test_queues,
5464	.asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5465	.asic_dma_pool_free = goya_dma_pool_free,
5466	.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5467	.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5468	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
5469	.cs_parser = goya_cs_parser,
5470	.asic_dma_map_sgtable = hl_dma_map_sgtable,
5471	.add_end_of_cb_packets = goya_add_end_of_cb_packets,
5472	.update_eq_ci = goya_update_eq_ci,
5473	.context_switch = goya_context_switch,
5474	.restore_phase_topology = goya_restore_phase_topology,
5475	.debugfs_read_dma = goya_debugfs_read_dma,
5476	.add_device_attr = goya_add_device_attr,
5477	.handle_eqe = goya_handle_eqe,
5478	.get_events_stat = goya_get_events_stat,
5479	.read_pte = goya_read_pte,
5480	.write_pte = goya_write_pte,
5481	.mmu_invalidate_cache = goya_mmu_invalidate_cache,
5482	.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5483	.mmu_prefetch_cache_range = NULL,
5484	.send_heartbeat = goya_send_heartbeat,
5485	.debug_coresight = goya_debug_coresight,
5486	.is_device_idle = goya_is_device_idle,
5487	.compute_reset_late_init = goya_compute_reset_late_init,
5488	.hw_queues_lock = goya_hw_queues_lock,
5489	.hw_queues_unlock = goya_hw_queues_unlock,
5490	.get_pci_id = goya_get_pci_id,
5491	.get_eeprom_data = goya_get_eeprom_data,
5492	.get_monitor_dump = goya_get_monitor_dump,
5493	.send_cpu_message = goya_send_cpu_message,
5494	.pci_bars_map = goya_pci_bars_map,
5495	.init_iatu = goya_init_iatu,
5496	.rreg = hl_rreg,
5497	.wreg = hl_wreg,
5498	.halt_coresight = goya_halt_coresight,
5499	.ctx_init = goya_ctx_init,
5500	.ctx_fini = goya_ctx_fini,
5501	.pre_schedule_cs = goya_pre_schedule_cs,
5502	.get_queue_id_for_cq = goya_get_queue_id_for_cq,
5503	.load_firmware_to_device = goya_load_firmware_to_device,
5504	.load_boot_fit_to_device = goya_load_boot_fit_to_device,
5505	.get_signal_cb_size = goya_get_signal_cb_size,
5506	.get_wait_cb_size = goya_get_wait_cb_size,
5507	.gen_signal_cb = goya_gen_signal_cb,
5508	.gen_wait_cb = goya_gen_wait_cb,
5509	.reset_sob = goya_reset_sob,
5510	.reset_sob_group = goya_reset_sob_group,
5511	.get_device_time = goya_get_device_time,
5512	.pb_print_security_errors = NULL,
5513	.collective_wait_init_cs = goya_collective_wait_init_cs,
5514	.collective_wait_create_jobs = goya_collective_wait_create_jobs,
5515	.get_dec_base_addr = NULL,
5516	.scramble_addr = hl_mmu_scramble_addr,
5517	.descramble_addr = hl_mmu_descramble_addr,
5518	.ack_protection_bits_errors = goya_ack_protection_bits_errors,
5519	.get_hw_block_id = goya_get_hw_block_id,
5520	.hw_block_mmap = goya_block_mmap,
5521	.enable_events_from_fw = goya_enable_events_from_fw,
5522	.ack_mmu_errors = goya_ack_mmu_page_fault_or_access_error,
5523	.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
5524	.init_firmware_preload_params = goya_init_firmware_preload_params,
5525	.init_firmware_loader = goya_init_firmware_loader,
5526	.init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
5527	.state_dump_init = goya_state_dump_init,
5528	.get_sob_addr = &goya_get_sob_addr,
5529	.set_pci_memory_regions = goya_set_pci_memory_regions,
5530	.get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
5531	.check_if_razwi_happened = goya_check_if_razwi_happened,
5532	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
5533	.access_dev_mem = hl_access_dev_mem,
5534	.set_dram_bar_base = goya_set_ddr_bar_base,
5535	.send_device_activity = goya_send_device_activity,
5536	.set_dram_properties = goya_set_dram_properties,
5537	.set_binning_masks = goya_set_binning_masks,
5538};
5539
5540/*
5541 * goya_set_asic_funcs - set Goya function pointers
5542 *
5543 * @*hdev: pointer to hl_device structure
5544 *
5545 */
5546void goya_set_asic_funcs(struct hl_device *hdev)
5547{
5548	hdev->asic_funcs = &goya_funcs;
5549}
5550