1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 *        - Range registers
30 *        - MMU
31 *
32 * 2. DDR is protected by:
33 *        - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 *        - Range registers
37 *        - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 *     - DMA is not secured.
43 *     - PQ and CQ are secured.
44 *     - CP is secured: The driver needs to parse CB but WREG should be allowed
45 *                      because of TDMA (tensor DMA). Hence, WREG is always not
46 *                      secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 *     - Clear SRAM on context switch (happens on context switch when device is
55 *       idle)
56 *     - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC	1000		/* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
77#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
78#define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
79#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
80#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
82#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
83
84#define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
85
86#define GAUDI_MAX_STRING_LEN		20
87
88#define GAUDI_CB_POOL_CB_CNT		512
89#define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
90
91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
92
93#define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
94
95#define GAUDI_NUM_OF_QM_ERR_CAUSE	16
96
97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
98
99#define GAUDI_ARB_WDT_TIMEOUT		0x1000000
100
101#define GAUDI_CLK_GATE_DEBUGFS_MASK	(\
102		BIT(GAUDI_ENGINE_ID_MME_0) |\
103		BIT(GAUDI_ENGINE_ID_MME_2) |\
104		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105
106static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110		"gaudi cpu eq"
111};
112
113static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
114	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116	[GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
122};
123
124static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125	[0] = GAUDI_QUEUE_ID_DMA_0_0,
126	[1] = GAUDI_QUEUE_ID_DMA_0_1,
127	[2] = GAUDI_QUEUE_ID_DMA_0_2,
128	[3] = GAUDI_QUEUE_ID_DMA_0_3,
129	[4] = GAUDI_QUEUE_ID_DMA_1_0,
130	[5] = GAUDI_QUEUE_ID_DMA_1_1,
131	[6] = GAUDI_QUEUE_ID_DMA_1_2,
132	[7] = GAUDI_QUEUE_ID_DMA_1_3,
133	[8] = GAUDI_QUEUE_ID_DMA_5_0,
134	[9] = GAUDI_QUEUE_ID_DMA_5_1,
135	[10] = GAUDI_QUEUE_ID_DMA_5_2,
136	[11] = GAUDI_QUEUE_ID_DMA_5_3
137};
138
139static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
141	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
142	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
143	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
144	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
145	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
146	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
147	[PACKET_FENCE]		= sizeof(struct packet_fence),
148	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
149	[PACKET_NOP]		= sizeof(struct packet_nop),
150	[PACKET_STOP]		= sizeof(struct packet_stop),
151	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
152	[PACKET_WAIT]		= sizeof(struct packet_wait),
153	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
154};
155
156static inline bool validate_packet_id(enum packet_id id)
157{
158	switch (id) {
159	case PACKET_WREG_32:
160	case PACKET_WREG_BULK:
161	case PACKET_MSG_LONG:
162	case PACKET_MSG_SHORT:
163	case PACKET_CP_DMA:
164	case PACKET_REPEAT:
165	case PACKET_MSG_PROT:
166	case PACKET_FENCE:
167	case PACKET_LIN_DMA:
168	case PACKET_NOP:
169	case PACKET_STOP:
170	case PACKET_ARB_POINT:
171	case PACKET_WAIT:
172	case PACKET_LOAD_AND_EXE:
173		return true;
174	default:
175		return false;
176	}
177}
178
179static const char * const
180gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181	"tpc_address_exceed_slm",
182	"tpc_div_by_0",
183	"tpc_spu_mac_overflow",
184	"tpc_spu_addsub_overflow",
185	"tpc_spu_abs_overflow",
186	"tpc_spu_fp_dst_nan_inf",
187	"tpc_spu_fp_dst_denorm",
188	"tpc_vpu_mac_overflow",
189	"tpc_vpu_addsub_overflow",
190	"tpc_vpu_abs_overflow",
191	"tpc_vpu_fp_dst_nan_inf",
192	"tpc_vpu_fp_dst_denorm",
193	"tpc_assertions",
194	"tpc_illegal_instruction",
195	"tpc_pc_wrap_around",
196	"tpc_qm_sw_err",
197	"tpc_hbw_rresp_err",
198	"tpc_hbw_bresp_err",
199	"tpc_lbw_rresp_err",
200	"tpc_lbw_bresp_err"
201};
202
203static const char * const
204gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205	"PQ AXI HBW error",
206	"CQ AXI HBW error",
207	"CP AXI HBW error",
208	"CP error due to undefined OPCODE",
209	"CP encountered STOP OPCODE",
210	"CP AXI LBW error",
211	"CP WRREG32 or WRBULK returned error",
212	"N/A",
213	"FENCE 0 inc over max value and clipped",
214	"FENCE 1 inc over max value and clipped",
215	"FENCE 2 inc over max value and clipped",
216	"FENCE 3 inc over max value and clipped",
217	"FENCE 0 dec under min value and clipped",
218	"FENCE 1 dec under min value and clipped",
219	"FENCE 2 dec under min value and clipped",
220	"FENCE 3 dec under min value and clipped"
221};
222
223static const char * const
224gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225	"Choice push while full error",
226	"Choice Q watchdog error",
227	"MSG AXI LBW returned with error"
228};
229
230static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_0 */
305	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_1 */
306	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_2 */
307	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_3 */
308	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_0 */
309	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_1 */
310	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_2 */
311	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_3 */
312	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_0 */
313	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_1 */
314	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_2 */
315	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_3 */
316	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_0 */
317	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_1 */
318	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_2 */
319	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_3 */
320	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_0 */
321	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_1 */
322	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_2 */
323	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_3 */
324	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_0 */
325	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_1 */
326	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_2 */
327	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_3 */
328	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_0 */
329	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_1 */
330	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_2 */
331	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_3 */
332	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_0 */
333	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_1 */
334	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_2 */
335	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_3 */
336	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_0 */
337	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_1 */
338	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_2 */
339	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_3 */
340	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_0 */
341	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_1 */
342	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_2 */
343	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_3 */
344};
345
346struct ecc_info_extract_params {
347	u64 block_address;
348	u32 num_memories;
349	bool derr;
350	bool disable_clock_gating;
351};
352
353static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354								u64 phys_addr);
355static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356					struct hl_cs_job *job);
357static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358					u32 size, u64 val);
359static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360				u32 tpc_id);
361static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362static int gaudi_cpucp_info_get(struct hl_device *hdev);
363static void gaudi_disable_clock_gating(struct hl_device *hdev);
364static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365
366static int gaudi_get_fixed_properties(struct hl_device *hdev)
367{
368	struct asic_fixed_properties *prop = &hdev->asic_prop;
369	u32 num_sync_stream_queues = 0;
370	int i;
371
372	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373	prop->hw_queues_props = kcalloc(prop->max_queues,
374			sizeof(struct hw_queue_properties),
375			GFP_KERNEL);
376
377	if (!prop->hw_queues_props)
378		return -ENOMEM;
379
380	for (i = 0 ; i < prop->max_queues ; i++) {
381		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383			prop->hw_queues_props[i].driver_only = 0;
384			prop->hw_queues_props[i].requires_kernel_cb = 1;
385			prop->hw_queues_props[i].supports_sync_stream = 1;
386			num_sync_stream_queues++;
387		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389			prop->hw_queues_props[i].driver_only = 1;
390			prop->hw_queues_props[i].requires_kernel_cb = 0;
391			prop->hw_queues_props[i].supports_sync_stream = 0;
392		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394			prop->hw_queues_props[i].driver_only = 0;
395			prop->hw_queues_props[i].requires_kernel_cb = 0;
396		} else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397			prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398			prop->hw_queues_props[i].driver_only = 0;
399			prop->hw_queues_props[i].requires_kernel_cb = 0;
400			prop->hw_queues_props[i].supports_sync_stream = 0;
401		}
402	}
403
404	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
405	prop->sync_stream_first_sob = 0;
406	prop->sync_stream_first_mon = 0;
407	prop->dram_base_address = DRAM_PHYS_BASE;
408	prop->dram_size = GAUDI_HBM_SIZE_32GB;
409	prop->dram_end_address = prop->dram_base_address +
410					prop->dram_size;
411	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412
413	prop->sram_base_address = SRAM_BASE_ADDR;
414	prop->sram_size = SRAM_SIZE;
415	prop->sram_end_address = prop->sram_base_address +
416					prop->sram_size;
417	prop->sram_user_base_address = prop->sram_base_address +
418					SRAM_USER_BASE_OFFSET;
419
420	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421	if (hdev->pldm)
422		prop->mmu_pgt_size = 0x800000; /* 8MB */
423	else
424		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425	prop->mmu_pte_size = HL_PTE_SIZE;
426	prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428	prop->dram_page_size = PAGE_SIZE_2MB;
429
430	prop->pmmu.hop0_shift = HOP0_SHIFT;
431	prop->pmmu.hop1_shift = HOP1_SHIFT;
432	prop->pmmu.hop2_shift = HOP2_SHIFT;
433	prop->pmmu.hop3_shift = HOP3_SHIFT;
434	prop->pmmu.hop4_shift = HOP4_SHIFT;
435	prop->pmmu.hop0_mask = HOP0_MASK;
436	prop->pmmu.hop1_mask = HOP1_MASK;
437	prop->pmmu.hop2_mask = HOP2_MASK;
438	prop->pmmu.hop3_mask = HOP3_MASK;
439	prop->pmmu.hop4_mask = HOP4_MASK;
440	prop->pmmu.start_addr = VA_HOST_SPACE_START;
441	prop->pmmu.end_addr =
442			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443	prop->pmmu.page_size = PAGE_SIZE_4KB;
444	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
445
446	/* PMMU and HPMMU are the same except of page size */
447	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449
450	/* shifts and masks are the same in PMMU and DMMU */
451	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453	prop->dmmu.end_addr = VA_HOST_SPACE_END;
454	prop->dmmu.page_size = PAGE_SIZE_2MB;
455
456	prop->cfg_size = CFG_SIZE;
457	prop->max_asid = MAX_ASID;
458	prop->num_of_events = GAUDI_EVENT_SIZE;
459	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460
461	prop->max_power_default = MAX_POWER_DEFAULT_PCI;
462
463	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465
466	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468
469	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
470					CARD_NAME_MAX_LEN);
471
472	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473
474	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475			num_sync_stream_queues * HL_RSVD_SOBS;
476	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477			num_sync_stream_queues * HL_RSVD_MONS;
478
479	return 0;
480}
481
482static int gaudi_pci_bars_map(struct hl_device *hdev)
483{
484	static const char * const name[] = {"SRAM", "CFG", "HBM"};
485	bool is_wc[3] = {false, false, true};
486	int rc;
487
488	rc = hl_pci_bars_map(hdev, name, is_wc);
489	if (rc)
490		return rc;
491
492	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493			(CFG_BASE - SPI_FLASH_BASE_ADDR);
494
495	return 0;
496}
497
498static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499{
500	struct gaudi_device *gaudi = hdev->asic_specific;
501	struct hl_inbound_pci_region pci_region;
502	u64 old_addr = addr;
503	int rc;
504
505	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506		return old_addr;
507
508	/* Inbound Region 2 - Bar 4 - Point to HBM */
509	pci_region.mode = PCI_BAR_MATCH_MODE;
510	pci_region.bar = HBM_BAR_ID;
511	pci_region.addr = addr;
512	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
513	if (rc)
514		return U64_MAX;
515
516	if (gaudi) {
517		old_addr = gaudi->hbm_bar_cur_addr;
518		gaudi->hbm_bar_cur_addr = addr;
519	}
520
521	return old_addr;
522}
523
524static int gaudi_init_iatu(struct hl_device *hdev)
525{
526	struct hl_inbound_pci_region inbound_region;
527	struct hl_outbound_pci_region outbound_region;
528	int rc;
529
530	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531	inbound_region.mode = PCI_BAR_MATCH_MODE;
532	inbound_region.bar = SRAM_BAR_ID;
533	inbound_region.addr = SRAM_BASE_ADDR;
534	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535	if (rc)
536		goto done;
537
538	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
539	inbound_region.mode = PCI_BAR_MATCH_MODE;
540	inbound_region.bar = CFG_BAR_ID;
541	inbound_region.addr = SPI_FLASH_BASE_ADDR;
542	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
543	if (rc)
544		goto done;
545
546	/* Inbound Region 2 - Bar 4 - Point to HBM */
547	inbound_region.mode = PCI_BAR_MATCH_MODE;
548	inbound_region.bar = HBM_BAR_ID;
549	inbound_region.addr = DRAM_PHYS_BASE;
550	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551	if (rc)
552		goto done;
553
554	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555
556	/* Outbound Region 0 - Point to Host */
557	outbound_region.addr = HOST_PHYS_BASE;
558	outbound_region.size = HOST_PHYS_SIZE;
559	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560
561done:
562	return rc;
563}
564
565static int gaudi_early_init(struct hl_device *hdev)
566{
567	struct asic_fixed_properties *prop = &hdev->asic_prop;
568	struct pci_dev *pdev = hdev->pdev;
569	int rc;
570
571	rc = gaudi_get_fixed_properties(hdev);
572	if (rc) {
573		dev_err(hdev->dev, "Failed to get fixed properties\n");
574		return rc;
575	}
576
577	/* Check BAR sizes */
578	if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579		dev_err(hdev->dev,
580			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581			SRAM_BAR_ID,
582			(unsigned long long) pci_resource_len(pdev,
583							SRAM_BAR_ID),
584			SRAM_BAR_SIZE);
585		rc = -ENODEV;
586		goto free_queue_props;
587	}
588
589	if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590		dev_err(hdev->dev,
591			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592			CFG_BAR_ID,
593			(unsigned long long) pci_resource_len(pdev,
594								CFG_BAR_ID),
595			CFG_BAR_SIZE);
596		rc = -ENODEV;
597		goto free_queue_props;
598	}
599
600	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601
602	rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
603			mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
604	if (rc)
605		goto free_queue_props;
606
607	/* GAUDI Firmware does not yet support security */
608	prop->fw_security_disabled = true;
609	dev_info(hdev->dev, "firmware-level security is disabled\n");
610
611	return 0;
612
613free_queue_props:
614	kfree(hdev->asic_prop.hw_queues_props);
615	return rc;
616}
617
618static int gaudi_early_fini(struct hl_device *hdev)
619{
620	kfree(hdev->asic_prop.hw_queues_props);
621	hl_pci_fini(hdev);
622
623	return 0;
624}
625
626/**
627 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
628 *
629 * @hdev: pointer to hl_device structure
630 *
631 */
632static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
633{
634	struct asic_fixed_properties *prop = &hdev->asic_prop;
635	u32 trace_freq = 0;
636	u32 pll_clk = 0;
637	u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
638	u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
639	u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
640	u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
641	u32 od = RREG32(mmPSOC_CPU_PLL_OD);
642
643	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
644		if (div_sel == DIV_SEL_REF_CLK)
645			trace_freq = PLL_REF_CLK;
646		else
647			trace_freq = PLL_REF_CLK / (div_fctr + 1);
648	} else if (div_sel == DIV_SEL_PLL_CLK ||
649					div_sel == DIV_SEL_DIVIDED_PLL) {
650		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
651		if (div_sel == DIV_SEL_PLL_CLK)
652			trace_freq = pll_clk;
653		else
654			trace_freq = pll_clk / (div_fctr + 1);
655	} else {
656		dev_warn(hdev->dev,
657			"Received invalid div select value: %d", div_sel);
658	}
659
660	prop->psoc_timestamp_frequency = trace_freq;
661	prop->psoc_pci_pll_nr = nr;
662	prop->psoc_pci_pll_nf = nf;
663	prop->psoc_pci_pll_od = od;
664	prop->psoc_pci_pll_div_factor = div_fctr;
665}
666
667static int _gaudi_init_tpc_mem(struct hl_device *hdev,
668		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
669{
670	struct asic_fixed_properties *prop = &hdev->asic_prop;
671	struct packet_lin_dma *init_tpc_mem_pkt;
672	struct hl_cs_job *job;
673	struct hl_cb *cb;
674	u64 dst_addr;
675	u32 cb_size, ctl;
676	u8 tpc_id;
677	int rc;
678
679	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
680	if (!cb)
681		return -EFAULT;
682
683	init_tpc_mem_pkt = cb->kernel_address;
684	cb_size = sizeof(*init_tpc_mem_pkt);
685	memset(init_tpc_mem_pkt, 0, cb_size);
686
687	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
688
689	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
690	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
691	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
692	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
693
694	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
695
696	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
697	dst_addr = (prop->sram_user_base_address &
698			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
699			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
700	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
701
702	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
703	if (!job) {
704		dev_err(hdev->dev, "Failed to allocate a new job\n");
705		rc = -ENOMEM;
706		goto release_cb;
707	}
708
709	job->id = 0;
710	job->user_cb = cb;
711	job->user_cb->cs_cnt++;
712	job->user_cb_size = cb_size;
713	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
714	job->patched_cb = job->user_cb;
715	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
716
717	hl_debugfs_add_job(hdev, job);
718
719	rc = gaudi_send_job_on_qman0(hdev, job);
720
721	if (rc)
722		goto free_job;
723
724	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
725		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
726		if (rc)
727			break;
728	}
729
730free_job:
731	hl_userptr_delete_list(hdev, &job->userptr_list);
732	hl_debugfs_remove_job(hdev, job);
733	kfree(job);
734	cb->cs_cnt--;
735
736release_cb:
737	hl_cb_put(cb);
738	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
739
740	return rc;
741}
742
743/*
744 * gaudi_init_tpc_mem() - Initialize TPC memories.
745 * @hdev: Pointer to hl_device structure.
746 *
747 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
748 *
749 * Return: 0 for success, negative value for error.
750 */
751static int gaudi_init_tpc_mem(struct hl_device *hdev)
752{
753	const struct firmware *fw;
754	size_t fw_size;
755	void *cpu_addr;
756	dma_addr_t dma_handle;
757	int rc, count = 5;
758
759again:
760	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
761	if (rc == -EINTR && count-- > 0) {
762		msleep(50);
763		goto again;
764	}
765
766	if (rc) {
767		dev_err(hdev->dev, "Failed to load firmware file %s\n",
768				GAUDI_TPC_FW_FILE);
769		goto out;
770	}
771
772	fw_size = fw->size;
773	cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
774			&dma_handle, GFP_KERNEL | __GFP_ZERO);
775	if (!cpu_addr) {
776		dev_err(hdev->dev,
777			"Failed to allocate %zu of dma memory for TPC kernel\n",
778			fw_size);
779		rc = -ENOMEM;
780		goto out;
781	}
782
783	memcpy(cpu_addr, fw->data, fw_size);
784
785	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
786
787	hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
788			dma_handle);
789
790out:
791	release_firmware(fw);
792	return rc;
793}
794
795static int gaudi_late_init(struct hl_device *hdev)
796{
797	struct gaudi_device *gaudi = hdev->asic_specific;
798	int rc;
799
800	rc = gaudi->cpucp_info_get(hdev);
801	if (rc) {
802		dev_err(hdev->dev, "Failed to get cpucp info\n");
803		return rc;
804	}
805
806	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
807	if (rc) {
808		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
809		return rc;
810	}
811
812	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
813
814	gaudi_fetch_psoc_frequency(hdev);
815
816	rc = gaudi_mmu_clear_pgt_range(hdev);
817	if (rc) {
818		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
819		goto disable_pci_access;
820	}
821
822	rc = gaudi_init_tpc_mem(hdev);
823	if (rc) {
824		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
825		goto disable_pci_access;
826	}
827
828	return 0;
829
830disable_pci_access:
831	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
832
833	return rc;
834}
835
836static void gaudi_late_fini(struct hl_device *hdev)
837{
838	const struct hwmon_channel_info **channel_info_arr;
839	int i = 0;
840
841	if (!hdev->hl_chip_info->info)
842		return;
843
844	channel_info_arr = hdev->hl_chip_info->info;
845
846	while (channel_info_arr[i]) {
847		kfree(channel_info_arr[i]->config);
848		kfree(channel_info_arr[i]);
849		i++;
850	}
851
852	kfree(channel_info_arr);
853
854	hdev->hl_chip_info->info = NULL;
855}
856
857static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
858{
859	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
860	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
861	int i, j, rc = 0;
862
863	/*
864	 * The device CPU works with 40-bits addresses, while bit 39 must be set
865	 * to '1' when accessing the host.
866	 * Bits 49:39 of the full host address are saved for a later
867	 * configuration of the HW to perform extension to 50 bits.
868	 * Because there is a single HW register that holds the extension bits,
869	 * these bits must be identical in all allocated range.
870	 */
871
872	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
873		virt_addr_arr[i] =
874			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
875						HL_CPU_ACCESSIBLE_MEM_SIZE,
876						&dma_addr_arr[i],
877						GFP_KERNEL | __GFP_ZERO);
878		if (!virt_addr_arr[i]) {
879			rc = -ENOMEM;
880			goto free_dma_mem_arr;
881		}
882
883		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
884		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
885				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
886			break;
887	}
888
889	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
890		dev_err(hdev->dev,
891			"MSB of CPU accessible DMA memory are not identical in all range\n");
892		rc = -EFAULT;
893		goto free_dma_mem_arr;
894	}
895
896	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
897	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
898	hdev->cpu_pci_msb_addr =
899		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
900
901	GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
902
903free_dma_mem_arr:
904	for (j = 0 ; j < i ; j++)
905		hdev->asic_funcs->asic_dma_free_coherent(hdev,
906						HL_CPU_ACCESSIBLE_MEM_SIZE,
907						virt_addr_arr[j],
908						dma_addr_arr[j]);
909
910	return rc;
911}
912
913static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
914{
915	struct gaudi_device *gaudi = hdev->asic_specific;
916	struct gaudi_internal_qman_info *q;
917	u32 i;
918
919	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
920		q = &gaudi->internal_qmans[i];
921		if (!q->pq_kernel_addr)
922			continue;
923		hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
924							q->pq_kernel_addr,
925							q->pq_dma_addr);
926	}
927}
928
929static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
930{
931	struct gaudi_device *gaudi = hdev->asic_specific;
932	struct gaudi_internal_qman_info *q;
933	int rc, i;
934
935	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
936		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
937			continue;
938
939		q = &gaudi->internal_qmans[i];
940
941		switch (i) {
942		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
943		case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
944			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
945			break;
946		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
947			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
948			break;
949		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
950			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
951			break;
952		default:
953			dev_err(hdev->dev, "Bad internal queue index %d", i);
954			rc = -EINVAL;
955			goto free_internal_qmans_pq_mem;
956		}
957
958		q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
959						hdev, q->pq_size,
960						&q->pq_dma_addr,
961						GFP_KERNEL | __GFP_ZERO);
962		if (!q->pq_kernel_addr) {
963			rc = -ENOMEM;
964			goto free_internal_qmans_pq_mem;
965		}
966	}
967
968	return 0;
969
970free_internal_qmans_pq_mem:
971	gaudi_free_internal_qmans_pq_mem(hdev);
972	return rc;
973}
974
975static int gaudi_sw_init(struct hl_device *hdev)
976{
977	struct gaudi_device *gaudi;
978	u32 i, event_id = 0;
979	int rc;
980
981	/* Allocate device structure */
982	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
983	if (!gaudi)
984		return -ENOMEM;
985
986	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
987		if (gaudi_irq_map_table[i].valid) {
988			if (event_id == GAUDI_EVENT_SIZE) {
989				dev_err(hdev->dev,
990					"Event array exceeds the limit of %u events\n",
991					GAUDI_EVENT_SIZE);
992				rc = -EINVAL;
993				goto free_gaudi_device;
994			}
995
996			gaudi->events[event_id++] =
997					gaudi_irq_map_table[i].fc_id;
998		}
999	}
1000
1001	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1002
1003	gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1004
1005	hdev->asic_specific = gaudi;
1006
1007	/* Create DMA pool for small allocations */
1008	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1009			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1010	if (!hdev->dma_pool) {
1011		dev_err(hdev->dev, "failed to create DMA pool\n");
1012		rc = -ENOMEM;
1013		goto free_gaudi_device;
1014	}
1015
1016	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1017	if (rc)
1018		goto free_dma_pool;
1019
1020	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1021	if (!hdev->cpu_accessible_dma_pool) {
1022		dev_err(hdev->dev,
1023			"Failed to create CPU accessible DMA pool\n");
1024		rc = -ENOMEM;
1025		goto free_cpu_dma_mem;
1026	}
1027
1028	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1029				(uintptr_t) hdev->cpu_accessible_dma_mem,
1030				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1031	if (rc) {
1032		dev_err(hdev->dev,
1033			"Failed to add memory to CPU accessible DMA pool\n");
1034		rc = -EFAULT;
1035		goto free_cpu_accessible_dma_pool;
1036	}
1037
1038	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1039	if (rc)
1040		goto free_cpu_accessible_dma_pool;
1041
1042	spin_lock_init(&gaudi->hw_queues_lock);
1043	mutex_init(&gaudi->clk_gate_mutex);
1044
1045	hdev->supports_sync_stream = true;
1046	hdev->supports_coresight = true;
1047
1048	return 0;
1049
1050free_cpu_accessible_dma_pool:
1051	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1052free_cpu_dma_mem:
1053	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1054				hdev->cpu_pci_msb_addr);
1055	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1056			HL_CPU_ACCESSIBLE_MEM_SIZE,
1057			hdev->cpu_accessible_dma_mem,
1058			hdev->cpu_accessible_dma_address);
1059free_dma_pool:
1060	dma_pool_destroy(hdev->dma_pool);
1061free_gaudi_device:
1062	kfree(gaudi);
1063	return rc;
1064}
1065
1066static int gaudi_sw_fini(struct hl_device *hdev)
1067{
1068	struct gaudi_device *gaudi = hdev->asic_specific;
1069
1070	gaudi_free_internal_qmans_pq_mem(hdev);
1071
1072	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1073
1074	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1075					hdev->cpu_pci_msb_addr);
1076	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1077			HL_CPU_ACCESSIBLE_MEM_SIZE,
1078			hdev->cpu_accessible_dma_mem,
1079			hdev->cpu_accessible_dma_address);
1080
1081	dma_pool_destroy(hdev->dma_pool);
1082
1083	mutex_destroy(&gaudi->clk_gate_mutex);
1084
1085	kfree(gaudi);
1086
1087	return 0;
1088}
1089
1090static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1091{
1092	struct hl_device *hdev = arg;
1093	int i;
1094
1095	if (hdev->disabled)
1096		return IRQ_HANDLED;
1097
1098	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1099		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1100
1101	hl_irq_handler_eq(irq, &hdev->event_queue);
1102
1103	return IRQ_HANDLED;
1104}
1105
1106/*
1107 * For backward compatibility, new MSI interrupts should be set after the
1108 * existing CPU and NIC interrupts.
1109 */
1110static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1111				bool cpu_eq)
1112{
1113	int msi_vec;
1114
1115	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1116		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1117				GAUDI_EVENT_QUEUE_MSI_IDX);
1118
1119	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1120			(nr + NIC_NUMBER_OF_ENGINES + 1);
1121
1122	return pci_irq_vector(hdev->pdev, msi_vec);
1123}
1124
1125static int gaudi_enable_msi_single(struct hl_device *hdev)
1126{
1127	int rc, irq;
1128
1129	dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1130
1131	irq = gaudi_pci_irq_vector(hdev, 0, false);
1132	rc = request_irq(irq, gaudi_irq_handler_single, 0,
1133			"gaudi single msi", hdev);
1134	if (rc)
1135		dev_err(hdev->dev,
1136			"Failed to request single MSI IRQ\n");
1137
1138	return rc;
1139}
1140
1141static int gaudi_enable_msi_multi(struct hl_device *hdev)
1142{
1143	int cq_cnt = hdev->asic_prop.completion_queues_count;
1144	int rc, i, irq_cnt_init, irq;
1145
1146	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1147		irq = gaudi_pci_irq_vector(hdev, i, false);
1148		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1149				&hdev->completion_queue[i]);
1150		if (rc) {
1151			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1152			goto free_irqs;
1153		}
1154	}
1155
1156	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1157	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1158				&hdev->event_queue);
1159	if (rc) {
1160		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1161		goto free_irqs;
1162	}
1163
1164	return 0;
1165
1166free_irqs:
1167	for (i = 0 ; i < irq_cnt_init ; i++)
1168		free_irq(gaudi_pci_irq_vector(hdev, i, false),
1169				&hdev->completion_queue[i]);
1170	return rc;
1171}
1172
1173static int gaudi_enable_msi(struct hl_device *hdev)
1174{
1175	struct gaudi_device *gaudi = hdev->asic_specific;
1176	int rc;
1177
1178	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1179		return 0;
1180
1181	rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1182					PCI_IRQ_MSI);
1183	if (rc < 0) {
1184		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1185		return rc;
1186	}
1187
1188	if (rc < NUMBER_OF_INTERRUPTS) {
1189		gaudi->multi_msi_mode = false;
1190		rc = gaudi_enable_msi_single(hdev);
1191	} else {
1192		gaudi->multi_msi_mode = true;
1193		rc = gaudi_enable_msi_multi(hdev);
1194	}
1195
1196	if (rc)
1197		goto free_pci_irq_vectors;
1198
1199	gaudi->hw_cap_initialized |= HW_CAP_MSI;
1200
1201	return 0;
1202
1203free_pci_irq_vectors:
1204	pci_free_irq_vectors(hdev->pdev);
1205	return rc;
1206}
1207
1208static void gaudi_sync_irqs(struct hl_device *hdev)
1209{
1210	struct gaudi_device *gaudi = hdev->asic_specific;
1211	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1212
1213	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1214		return;
1215
1216	/* Wait for all pending IRQs to be finished */
1217	if (gaudi->multi_msi_mode) {
1218		for (i = 0 ; i < cq_cnt ; i++)
1219			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1220
1221		synchronize_irq(gaudi_pci_irq_vector(hdev,
1222						GAUDI_EVENT_QUEUE_MSI_IDX,
1223						true));
1224	} else {
1225		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1226	}
1227}
1228
1229static void gaudi_disable_msi(struct hl_device *hdev)
1230{
1231	struct gaudi_device *gaudi = hdev->asic_specific;
1232	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1233
1234	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1235		return;
1236
1237	gaudi_sync_irqs(hdev);
1238
1239	if (gaudi->multi_msi_mode) {
1240		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1241						true);
1242		free_irq(irq, &hdev->event_queue);
1243
1244		for (i = 0 ; i < cq_cnt ; i++) {
1245			irq = gaudi_pci_irq_vector(hdev, i, false);
1246			free_irq(irq, &hdev->completion_queue[i]);
1247		}
1248	} else {
1249		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1250	}
1251
1252	pci_free_irq_vectors(hdev->pdev);
1253
1254	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1255}
1256
1257static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1258{
1259	struct gaudi_device *gaudi = hdev->asic_specific;
1260
1261	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1262		return;
1263
1264	if (!hdev->sram_scrambler_enable)
1265		return;
1266
1267	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1268			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1270			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1272			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1274			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1275	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1276			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1277	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1278			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1279	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1280			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1281	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1282			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1283
1284	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1285			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1287			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1289			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1291			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1292	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1293			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1294	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1295			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1296	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1297			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1298	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1299			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1300
1301	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1302			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1304			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1306			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1308			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1309	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1310			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1311	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1312			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1313	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1314			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1315	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1316			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1317
1318	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1319}
1320
1321static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1322{
1323	struct gaudi_device *gaudi = hdev->asic_specific;
1324
1325	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1326		return;
1327
1328	if (!hdev->dram_scrambler_enable)
1329		return;
1330
1331	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1332			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1334			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1336			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1338			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1339	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1340			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1341	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1342			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1343	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1344			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1345	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1346			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1347
1348	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1349			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1351			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1353			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1355			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1356	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1357			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1358	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1359			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1360	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1361			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1362	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1363			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1364
1365	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1366			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1368			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1370			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1372			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1373	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1374			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1375	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1376			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1377	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1378			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1379	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1380			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1381
1382	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1383}
1384
1385static void gaudi_init_e2e(struct hl_device *hdev)
1386{
1387	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1388	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1389	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1390	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1391
1392	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1393	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1394	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1395	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1396
1397	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1398	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1399	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1400	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1401
1402	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1403	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1404	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1405	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1406
1407	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1408	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1409	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1410	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1411
1412	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1413	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1414	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1415	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1416
1417	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1418	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1419	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1420	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1421
1422	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1423	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1424	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1425	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1426
1427	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1428	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1429	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1430	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1431
1432	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1433	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1434	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1435	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1436
1437	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1438	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1439	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1440	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1441
1442	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1443	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1444	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1445	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1446
1447	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1448	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1449	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1450	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1451
1452	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1453	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1454	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1455	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1456
1457	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1458	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1459	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1460	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1461
1462	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1463	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1464	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1465	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1466
1467	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1468	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1469	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1470	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1471
1472	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1473	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1474	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1475	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1476
1477	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1478	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1479	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1480	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1481
1482	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1483	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1484	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1485	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1486
1487	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1488	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1489	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1490	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1491
1492	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1493	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1494	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1495	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1496
1497	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1498	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1499	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1500	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1501
1502	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1503	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1504	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1505	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1506
1507	if (!hdev->dram_scrambler_enable) {
1508		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1509		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1510		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1511		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1512
1513		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1514		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1515		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1516		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1517
1518		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1519		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1520		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1521		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1522
1523		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1524		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1525		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1526		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1527
1528		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1529		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1530		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1531		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1532
1533		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1534		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1535		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1536		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1537
1538		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1539		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1540		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1541		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1542
1543		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1544		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1545		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1546		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1547
1548		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1549		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1550		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1551		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1552
1553		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1554		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1555		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1556		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1557
1558		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1559		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1560		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1561		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1562
1563		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1564		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1565		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1566		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1567
1568		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1569		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1570		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1571		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1572
1573		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1574		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1575		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1576		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1577
1578		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1579		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1580		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1581		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1582
1583		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1584		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1585		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1586		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1587
1588		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1589		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1590		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1591		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1592
1593		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1594		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1595		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1596		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1597
1598		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1599		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1600		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1601		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1602
1603		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1604		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1605		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1606		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1607
1608		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1609		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1610		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1611		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1612
1613		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1614		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1615		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1616		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1617
1618		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1619		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1620		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1621		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1622
1623		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1624		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1625		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1626		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1627	}
1628
1629	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1630			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1631	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1632			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1633
1634	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1635			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1636	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1637			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1638
1639	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1640			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1641	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1642			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1643
1644	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1645			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1646	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1647			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1648
1649	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1650			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1651	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1652			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1653
1654	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1655			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1656	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1657			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1658
1659	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1660			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1661	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1662			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1663
1664	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1665			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1666	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1667			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1668
1669	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1670			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1671	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1672			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1673
1674	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1675			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1676	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1677			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1678
1679	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1680			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1681	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1682			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1683
1684	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1685			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1686	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1687			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1688
1689	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1690			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1691	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1692			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1693
1694	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1695			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1696	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1697			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1698
1699	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1700			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1701	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1702			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1703
1704	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1705			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1706	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1707			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1708
1709	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1710			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1711	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1712			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1713
1714	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1715			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1716	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1717			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1718
1719	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1720			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1721	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1722			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1723
1724	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1725			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1726	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1727			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1728
1729	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1730			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1731	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1732			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1733
1734	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1735			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1736	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1737			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1738
1739	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1740			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1741	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1742			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1743
1744	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1745			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1746	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1747			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1748}
1749
1750static void gaudi_init_hbm_cred(struct hl_device *hdev)
1751{
1752	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1753
1754	hbm0_wr = 0x33333333;
1755	hbm0_rd = 0x77777777;
1756	hbm1_wr = 0x55555555;
1757	hbm1_rd = 0xDDDDDDDD;
1758
1759	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1760	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1761	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1762	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1763
1764	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1765	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1766	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1767	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1768
1769	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1770	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1771	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1772	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1773
1774	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1775	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1776	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1777	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1778
1779	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1780			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1781			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1782	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1783			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1786			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1787			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1788	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1789			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1790			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1791
1792	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1793			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1794			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1795	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1796			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1797			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1798	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1799			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1800			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1801	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1802			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1803			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1804}
1805
1806static void gaudi_init_golden_registers(struct hl_device *hdev)
1807{
1808	u32 tpc_offset;
1809	int tpc_id, i;
1810
1811	gaudi_init_e2e(hdev);
1812
1813	gaudi_init_hbm_cred(hdev);
1814
1815	hdev->asic_funcs->disable_clock_gating(hdev);
1816
1817	for (tpc_id = 0, tpc_offset = 0;
1818				tpc_id < TPC_NUMBER_OF_ENGINES;
1819				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1820		/* Mask all arithmetic interrupts from TPC */
1821		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1822		/* Set 16 cache lines */
1823		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1824				ICACHE_FETCH_LINE_NUM, 2);
1825	}
1826
1827	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1828	for (i = 0 ; i < 128 ; i += 8)
1829		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1830
1831	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1832	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1833	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1834	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1835}
1836
1837static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1838					int qman_id, dma_addr_t qman_pq_addr)
1839{
1840	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1841	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1842	u32 q_off, dma_qm_offset;
1843	u32 dma_qm_err_cfg;
1844
1845	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1846
1847	mtr_base_en_lo = lower_32_bits(CFG_BASE +
1848				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1849	mtr_base_en_hi = upper_32_bits(CFG_BASE +
1850				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1851	so_base_en_lo = lower_32_bits(CFG_BASE +
1852				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1853	so_base_en_hi = upper_32_bits(CFG_BASE +
1854				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1855	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1856				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1857	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1858				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1859	so_base_ws_lo = lower_32_bits(CFG_BASE +
1860				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1861	so_base_ws_hi = upper_32_bits(CFG_BASE +
1862				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1863
1864	q_off = dma_qm_offset + qman_id * 4;
1865
1866	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1867	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1868
1869	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1870	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1871	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1872
1873	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
1874	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
1875							QMAN_LDMA_SRC_OFFSET);
1876	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
1877							QMAN_LDMA_DST_OFFSET);
1878
1879	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1880	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1881	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1882	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1883	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1884	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1885	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1886	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1887
1888	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1889
1890	/* The following configuration is needed only once per QMAN */
1891	if (qman_id == 0) {
1892		/* Configure RAZWI IRQ */
1893		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1894		if (hdev->stop_on_err) {
1895			dma_qm_err_cfg |=
1896				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1897		}
1898
1899		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1900		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1901			lower_32_bits(CFG_BASE +
1902					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1903		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1904			upper_32_bits(CFG_BASE +
1905					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1906		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1907			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1908									dma_id);
1909
1910		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1911				QM_ARB_ERR_MSG_EN_MASK);
1912
1913		/* Increase ARB WDT to support streams architecture */
1914		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1915				GAUDI_ARB_WDT_TIMEOUT);
1916
1917		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1918				QMAN_EXTERNAL_MAKE_TRUSTED);
1919
1920		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1921	}
1922}
1923
1924static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1925{
1926	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1927	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1928
1929	/* Set to maximum possible according to physical size */
1930	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1931	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1932
1933	/* WA for H/W bug H3-2116 */
1934	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
1935
1936	/* STOP_ON bit implies no completion to operation in case of RAZWI */
1937	if (hdev->stop_on_err)
1938		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1939
1940	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1941	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1942		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1943	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1944		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1945	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1946		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1947	WREG32(mmDMA0_CORE_PROT + dma_offset,
1948			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1949	/* If the channel is secured, it should be in MMU bypass mode */
1950	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1951			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1952	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1953}
1954
1955static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1956				u32 enable_mask)
1957{
1958	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1959
1960	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1961}
1962
1963static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1964{
1965	struct gaudi_device *gaudi = hdev->asic_specific;
1966	struct hl_hw_queue *q;
1967	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1968
1969	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1970		return;
1971
1972	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1973		dma_id = gaudi_dma_assignment[i];
1974		/*
1975		 * For queues after the CPU Q need to add 1 to get the correct
1976		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1977		 * order to get the correct MSI register.
1978		 */
1979		if (dma_id > 1) {
1980			cpu_skip = 1;
1981			nic_skip = NIC_NUMBER_OF_ENGINES;
1982		} else {
1983			cpu_skip = 0;
1984			nic_skip = 0;
1985		}
1986
1987		for (j = 0 ; j < QMAN_STREAMS ; j++) {
1988			q_idx = 4 * dma_id + j + cpu_skip;
1989			q = &hdev->kernel_queues[q_idx];
1990			q->cq_id = cq_id++;
1991			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1992			gaudi_init_pci_dma_qman(hdev, dma_id, j,
1993						q->bus_address);
1994		}
1995
1996		gaudi_init_dma_core(hdev, dma_id);
1997
1998		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1999	}
2000
2001	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2002}
2003
2004static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2005					int qman_id, u64 qman_base_addr)
2006{
2007	u32 mtr_base_lo, mtr_base_hi;
2008	u32 so_base_lo, so_base_hi;
2009	u32 q_off, dma_qm_offset;
2010	u32 dma_qm_err_cfg;
2011
2012	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2013
2014	mtr_base_lo = lower_32_bits(CFG_BASE +
2015				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2016	mtr_base_hi = upper_32_bits(CFG_BASE +
2017				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2018	so_base_lo = lower_32_bits(CFG_BASE +
2019				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2020	so_base_hi = upper_32_bits(CFG_BASE +
2021				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2022
2023	q_off = dma_qm_offset + qman_id * 4;
2024
2025	if (qman_id < 4) {
2026		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2027					lower_32_bits(qman_base_addr));
2028		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2029					upper_32_bits(qman_base_addr));
2030
2031		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2032		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2033		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2034
2035		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2036							QMAN_CPDMA_SIZE_OFFSET);
2037		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2038							QMAN_CPDMA_SRC_OFFSET);
2039		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2040							QMAN_CPDMA_DST_OFFSET);
2041	} else {
2042		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2043							QMAN_LDMA_SIZE_OFFSET);
2044		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2045							QMAN_LDMA_SRC_OFFSET);
2046		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2047							QMAN_LDMA_DST_OFFSET);
2048
2049		/* Configure RAZWI IRQ */
2050		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2051		if (hdev->stop_on_err) {
2052			dma_qm_err_cfg |=
2053				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2054		}
2055		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2056
2057		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2058			lower_32_bits(CFG_BASE +
2059					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2060		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2061			upper_32_bits(CFG_BASE +
2062					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2063		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2064			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2065									dma_id);
2066
2067		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2068				QM_ARB_ERR_MSG_EN_MASK);
2069
2070		/* Increase ARB WDT to support streams architecture */
2071		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2072				GAUDI_ARB_WDT_TIMEOUT);
2073
2074		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2075		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2076				QMAN_INTERNAL_MAKE_TRUSTED);
2077	}
2078
2079	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2080	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2081	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2082	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2083}
2084
2085static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2086{
2087	struct gaudi_device *gaudi = hdev->asic_specific;
2088	struct gaudi_internal_qman_info *q;
2089	u64 qman_base_addr;
2090	int i, j, dma_id, internal_q_index;
2091
2092	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2093		return;
2094
2095	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2096		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2097
2098		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2099			 /*
2100			  * Add the CPU queue in order to get the correct queue
2101			  * number as all internal queue are placed after it
2102			  */
2103			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2104
2105			q = &gaudi->internal_qmans[internal_q_index];
2106			qman_base_addr = (u64) q->pq_dma_addr;
2107			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2108						qman_base_addr);
2109		}
2110
2111		/* Initializing lower CP for HBM DMA QMAN */
2112		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2113
2114		gaudi_init_dma_core(hdev, dma_id);
2115
2116		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2117	}
2118
2119	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2120}
2121
2122static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2123					int qman_id, u64 qman_base_addr)
2124{
2125	u32 mtr_base_lo, mtr_base_hi;
2126	u32 so_base_lo, so_base_hi;
2127	u32 q_off, mme_id;
2128	u32 mme_qm_err_cfg;
2129
2130	mtr_base_lo = lower_32_bits(CFG_BASE +
2131				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2132	mtr_base_hi = upper_32_bits(CFG_BASE +
2133				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2134	so_base_lo = lower_32_bits(CFG_BASE +
2135				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2136	so_base_hi = upper_32_bits(CFG_BASE +
2137				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2138
2139	q_off = mme_offset + qman_id * 4;
2140
2141	if (qman_id < 4) {
2142		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2143					lower_32_bits(qman_base_addr));
2144		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2145					upper_32_bits(qman_base_addr));
2146
2147		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2148		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2149		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2150
2151		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2152							QMAN_CPDMA_SIZE_OFFSET);
2153		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2154							QMAN_CPDMA_SRC_OFFSET);
2155		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2156							QMAN_CPDMA_DST_OFFSET);
2157	} else {
2158		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2159							QMAN_LDMA_SIZE_OFFSET);
2160		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2161							QMAN_LDMA_SRC_OFFSET);
2162		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2163							QMAN_LDMA_DST_OFFSET);
2164
2165		/* Configure RAZWI IRQ */
2166		mme_id = mme_offset /
2167				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2168
2169		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2170		if (hdev->stop_on_err) {
2171			mme_qm_err_cfg |=
2172				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2173		}
2174		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2175		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2176			lower_32_bits(CFG_BASE +
2177					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2178		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2179			upper_32_bits(CFG_BASE +
2180					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2181		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2182			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2183									mme_id);
2184
2185		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2186				QM_ARB_ERR_MSG_EN_MASK);
2187
2188		/* Increase ARB WDT to support streams architecture */
2189		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2190				GAUDI_ARB_WDT_TIMEOUT);
2191
2192		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2193		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2194				QMAN_INTERNAL_MAKE_TRUSTED);
2195	}
2196
2197	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2198	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2199	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2200	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2201}
2202
2203static void gaudi_init_mme_qmans(struct hl_device *hdev)
2204{
2205	struct gaudi_device *gaudi = hdev->asic_specific;
2206	struct gaudi_internal_qman_info *q;
2207	u64 qman_base_addr;
2208	u32 mme_offset;
2209	int i, internal_q_index;
2210
2211	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2212		return;
2213
2214	/*
2215	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2216	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2217	 */
2218
2219	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2220
2221	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2222		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2223		q = &gaudi->internal_qmans[internal_q_index];
2224		qman_base_addr = (u64) q->pq_dma_addr;
2225		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2226					qman_base_addr);
2227		if (i == 3)
2228			mme_offset = 0;
2229	}
2230
2231	/* Initializing lower CP for MME QMANs */
2232	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2233	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2234	gaudi_init_mme_qman(hdev, 0, 4, 0);
2235
2236	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2237	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2238
2239	gaudi->hw_cap_initialized |= HW_CAP_MME;
2240}
2241
2242static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2243				int qman_id, u64 qman_base_addr)
2244{
2245	u32 mtr_base_lo, mtr_base_hi;
2246	u32 so_base_lo, so_base_hi;
2247	u32 q_off, tpc_id;
2248	u32 tpc_qm_err_cfg;
2249
2250	mtr_base_lo = lower_32_bits(CFG_BASE +
2251				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2252	mtr_base_hi = upper_32_bits(CFG_BASE +
2253				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2254	so_base_lo = lower_32_bits(CFG_BASE +
2255				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2256	so_base_hi = upper_32_bits(CFG_BASE +
2257				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2258
2259	q_off = tpc_offset + qman_id * 4;
2260
2261	if (qman_id < 4) {
2262		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2263					lower_32_bits(qman_base_addr));
2264		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2265					upper_32_bits(qman_base_addr));
2266
2267		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2268		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2269		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2270
2271		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2272							QMAN_CPDMA_SIZE_OFFSET);
2273		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2274							QMAN_CPDMA_SRC_OFFSET);
2275		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2276							QMAN_CPDMA_DST_OFFSET);
2277	} else {
2278		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2279							QMAN_LDMA_SIZE_OFFSET);
2280		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2281							QMAN_LDMA_SRC_OFFSET);
2282		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2283							QMAN_LDMA_DST_OFFSET);
2284
2285		/* Configure RAZWI IRQ */
2286		tpc_id = tpc_offset /
2287				(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2288
2289		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2290		if (hdev->stop_on_err) {
2291			tpc_qm_err_cfg |=
2292				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2293		}
2294
2295		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2296		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2297			lower_32_bits(CFG_BASE +
2298				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2299		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2300			upper_32_bits(CFG_BASE +
2301				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2302		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2303			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2304									tpc_id);
2305
2306		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2307				QM_ARB_ERR_MSG_EN_MASK);
2308
2309		/* Increase ARB WDT to support streams architecture */
2310		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2311				GAUDI_ARB_WDT_TIMEOUT);
2312
2313		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2314		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2315				QMAN_INTERNAL_MAKE_TRUSTED);
2316	}
2317
2318	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2319	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2320	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2321	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2322}
2323
2324static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2325{
2326	struct gaudi_device *gaudi = hdev->asic_specific;
2327	struct gaudi_internal_qman_info *q;
2328	u64 qman_base_addr;
2329	u32 so_base_hi, tpc_offset = 0;
2330	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2331			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2332	int i, tpc_id, internal_q_index;
2333
2334	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2335		return;
2336
2337	so_base_hi = upper_32_bits(CFG_BASE +
2338				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2339
2340	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2341		for (i = 0 ; i < QMAN_STREAMS ; i++) {
2342			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2343						tpc_id * QMAN_STREAMS + i;
2344			q = &gaudi->internal_qmans[internal_q_index];
2345			qman_base_addr = (u64) q->pq_dma_addr;
2346			gaudi_init_tpc_qman(hdev, tpc_offset, i,
2347						qman_base_addr);
2348
2349			if (i == 3) {
2350				/* Initializing lower CP for TPC QMAN */
2351				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2352
2353				/* Enable the QMAN and TPC channel */
2354				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2355						QMAN_TPC_ENABLE);
2356			}
2357		}
2358
2359		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2360				so_base_hi);
2361
2362		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2363
2364		gaudi->hw_cap_initialized |=
2365				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2366	}
2367}
2368
2369static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2370{
2371	struct gaudi_device *gaudi = hdev->asic_specific;
2372
2373	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2374		return;
2375
2376	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2377	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2378	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2379}
2380
2381static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2382{
2383	struct gaudi_device *gaudi = hdev->asic_specific;
2384
2385	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2386		return;
2387
2388	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2389	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2390	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2391	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2392	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2393}
2394
2395static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2396{
2397	struct gaudi_device *gaudi = hdev->asic_specific;
2398
2399	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2400		return;
2401
2402	WREG32(mmMME2_QM_GLBL_CFG0, 0);
2403	WREG32(mmMME0_QM_GLBL_CFG0, 0);
2404}
2405
2406static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2407{
2408	struct gaudi_device *gaudi = hdev->asic_specific;
2409	u32 tpc_offset = 0;
2410	int tpc_id;
2411
2412	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2413		return;
2414
2415	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2416		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2417		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2418	}
2419}
2420
2421static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2422{
2423	struct gaudi_device *gaudi = hdev->asic_specific;
2424
2425	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2426		return;
2427
2428	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2429	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2430	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2431	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2432}
2433
2434static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2435{
2436	struct gaudi_device *gaudi = hdev->asic_specific;
2437
2438	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2439		return;
2440
2441	/* Stop CPs of HBM DMA QMANs */
2442
2443	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2444	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2445	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2446	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2447	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2448}
2449
2450static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2451{
2452	struct gaudi_device *gaudi = hdev->asic_specific;
2453
2454	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2455		return;
2456
2457	/* Stop CPs of MME QMANs */
2458	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2459	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2460}
2461
2462static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2463{
2464	struct gaudi_device *gaudi = hdev->asic_specific;
2465
2466	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2467		return;
2468
2469	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2470	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2471	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2472	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2473	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2474	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2475	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2476	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2477}
2478
2479static void gaudi_pci_dma_stall(struct hl_device *hdev)
2480{
2481	struct gaudi_device *gaudi = hdev->asic_specific;
2482
2483	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2484		return;
2485
2486	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2487	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2488	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2489}
2490
2491static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2492{
2493	struct gaudi_device *gaudi = hdev->asic_specific;
2494
2495	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2496		return;
2497
2498	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2499	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2500	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2501	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2502	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2503}
2504
2505static void gaudi_mme_stall(struct hl_device *hdev)
2506{
2507	struct gaudi_device *gaudi = hdev->asic_specific;
2508
2509	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2510		return;
2511
2512	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
2513	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2514	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2515	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2516	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2517	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2518	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2519	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2520	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2521	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2522	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2523	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2524	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2525	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2526	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2527	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2528	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2529}
2530
2531static void gaudi_tpc_stall(struct hl_device *hdev)
2532{
2533	struct gaudi_device *gaudi = hdev->asic_specific;
2534
2535	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2536		return;
2537
2538	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2539	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2540	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2541	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2542	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2543	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2544	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2545	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2546}
2547
2548static void gaudi_set_clock_gating(struct hl_device *hdev)
2549{
2550	struct gaudi_device *gaudi = hdev->asic_specific;
2551	u32 qman_offset;
2552	bool enable;
2553	int i;
2554
2555	/* In case we are during debug session, don't enable the clock gate
2556	 * as it may interfere
2557	 */
2558	if (hdev->in_debug)
2559		return;
2560
2561	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2562		enable = !!(hdev->clock_gating_mask &
2563				(BIT_ULL(gaudi_dma_assignment[i])));
2564
2565		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2566		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2567				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2568		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2569				enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2570	}
2571
2572	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2573		enable = !!(hdev->clock_gating_mask &
2574				(BIT_ULL(gaudi_dma_assignment[i])));
2575
2576		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2577		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2578				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2579		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2580				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2581	}
2582
2583	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2584	WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2585	WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2586
2587	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2588	WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2589	WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2590
2591	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2592		enable = !!(hdev->clock_gating_mask &
2593				(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2594
2595		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2596				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2597		WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2598				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2599
2600		qman_offset += TPC_QMAN_OFFSET;
2601	}
2602
2603	gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2604}
2605
2606static void gaudi_disable_clock_gating(struct hl_device *hdev)
2607{
2608	struct gaudi_device *gaudi = hdev->asic_specific;
2609	u32 qman_offset;
2610	int i;
2611
2612	if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2613		return;
2614
2615	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2616		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2617		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2618
2619		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2620	}
2621
2622	WREG32(mmMME0_QM_CGM_CFG, 0);
2623	WREG32(mmMME0_QM_CGM_CFG1, 0);
2624	WREG32(mmMME2_QM_CGM_CFG, 0);
2625	WREG32(mmMME2_QM_CGM_CFG1, 0);
2626
2627	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2628		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2629		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2630
2631		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2632	}
2633
2634	gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2635}
2636
2637static void gaudi_enable_timestamp(struct hl_device *hdev)
2638{
2639	/* Disable the timestamp counter */
2640	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2641
2642	/* Zero the lower/upper parts of the 64-bit counter */
2643	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2644	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2645
2646	/* Enable the counter */
2647	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2648}
2649
2650static void gaudi_disable_timestamp(struct hl_device *hdev)
2651{
2652	/* Disable the timestamp counter */
2653	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2654}
2655
2656static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2657{
2658	u32 wait_timeout_ms;
2659
2660	dev_info(hdev->dev,
2661		"Halting compute engines and disabling interrupts\n");
2662
2663	if (hdev->pldm)
2664		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2665	else
2666		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2667
2668
2669	gaudi_stop_mme_qmans(hdev);
2670	gaudi_stop_tpc_qmans(hdev);
2671	gaudi_stop_hbm_dma_qmans(hdev);
2672	gaudi_stop_pci_dma_qmans(hdev);
2673
2674	hdev->asic_funcs->disable_clock_gating(hdev);
2675
2676	msleep(wait_timeout_ms);
2677
2678	gaudi_pci_dma_stall(hdev);
2679	gaudi_hbm_dma_stall(hdev);
2680	gaudi_tpc_stall(hdev);
2681	gaudi_mme_stall(hdev);
2682
2683	msleep(wait_timeout_ms);
2684
2685	gaudi_disable_mme_qmans(hdev);
2686	gaudi_disable_tpc_qmans(hdev);
2687	gaudi_disable_hbm_dma_qmans(hdev);
2688	gaudi_disable_pci_dma_qmans(hdev);
2689
2690	gaudi_disable_timestamp(hdev);
2691
2692	gaudi_disable_msi(hdev);
2693}
2694
2695static int gaudi_mmu_init(struct hl_device *hdev)
2696{
2697	struct asic_fixed_properties *prop = &hdev->asic_prop;
2698	struct gaudi_device *gaudi = hdev->asic_specific;
2699	u64 hop0_addr;
2700	int rc, i;
2701
2702	if (!hdev->mmu_enable)
2703		return 0;
2704
2705	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2706		return 0;
2707
2708	hdev->dram_supports_virtual_memory = false;
2709
2710	for (i = 0 ; i < prop->max_asid ; i++) {
2711		hop0_addr = prop->mmu_pgt_addr +
2712				(i * prop->mmu_hop_table_size);
2713
2714		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2715		if (rc) {
2716			dev_err(hdev->dev,
2717				"failed to set hop0 addr for asid %d\n", i);
2718			goto err;
2719		}
2720	}
2721
2722	/* init MMU cache manage page */
2723	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2724	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2725
2726	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2727
2728	WREG32(mmMMU_UP_MMU_ENABLE, 1);
2729	WREG32(mmMMU_UP_SPI_MASK, 0xF);
2730
2731	WREG32(mmSTLB_HOP_CONFIGURATION,
2732			hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2733
2734	/*
2735	 * The H/W expects the first PI after init to be 1. After wraparound
2736	 * we'll write 0.
2737	 */
2738	gaudi->mmu_cache_inv_pi = 1;
2739
2740	gaudi->hw_cap_initialized |= HW_CAP_MMU;
2741
2742	return 0;
2743
2744err:
2745	return rc;
2746}
2747
2748static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2749{
2750	void __iomem *dst;
2751
2752	/* HBM scrambler must be initialized before pushing F/W to HBM */
2753	gaudi_init_scrambler_hbm(hdev);
2754
2755	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2756
2757	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2758}
2759
2760static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2761{
2762	void __iomem *dst;
2763
2764	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2765
2766	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2767}
2768
2769static void gaudi_read_device_fw_version(struct hl_device *hdev,
2770					enum hl_fw_component fwc)
2771{
2772	const char *name;
2773	u32 ver_off;
2774	char *dest;
2775
2776	switch (fwc) {
2777	case FW_COMP_UBOOT:
2778		ver_off = RREG32(mmUBOOT_VER_OFFSET);
2779		dest = hdev->asic_prop.uboot_ver;
2780		name = "U-Boot";
2781		break;
2782	case FW_COMP_PREBOOT:
2783		ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2784		dest = hdev->asic_prop.preboot_ver;
2785		name = "Preboot";
2786		break;
2787	default:
2788		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2789		return;
2790	}
2791
2792	ver_off &= ~((u32)SRAM_BASE_ADDR);
2793
2794	if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2795		memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2796							VERSION_MAX_LEN);
2797	} else {
2798		dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2799								name, ver_off);
2800		strcpy(dest, "unavailable");
2801	}
2802}
2803
2804static int gaudi_init_cpu(struct hl_device *hdev)
2805{
2806	struct gaudi_device *gaudi = hdev->asic_specific;
2807	int rc;
2808
2809	if (!hdev->cpu_enable)
2810		return 0;
2811
2812	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2813		return 0;
2814
2815	/*
2816	 * The device CPU works with 40 bits addresses.
2817	 * This register sets the extension to 50 bits.
2818	 */
2819	WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2820
2821	rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2822			mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2823			mmCPU_CMD_STATUS_TO_HOST,
2824			mmCPU_BOOT_ERR0,
2825			!hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2826			GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2827
2828	if (rc)
2829		return rc;
2830
2831	gaudi->hw_cap_initialized |= HW_CAP_CPU;
2832
2833	return 0;
2834}
2835
2836static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2837{
2838	struct gaudi_device *gaudi = hdev->asic_specific;
2839	struct hl_eq *eq;
2840	u32 status;
2841	struct hl_hw_queue *cpu_pq =
2842			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2843	int err;
2844
2845	if (!hdev->cpu_queues_enable)
2846		return 0;
2847
2848	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2849		return 0;
2850
2851	eq = &hdev->event_queue;
2852
2853	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2854	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2855
2856	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2857	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2858
2859	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2860			lower_32_bits(hdev->cpu_accessible_dma_address));
2861	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2862			upper_32_bits(hdev->cpu_accessible_dma_address));
2863
2864	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2865	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2866	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2867
2868	/* Used for EQ CI */
2869	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2870
2871	WREG32(mmCPU_IF_PF_PQ_PI, 0);
2872
2873	if (gaudi->multi_msi_mode)
2874		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2875	else
2876		WREG32(mmCPU_IF_QUEUE_INIT,
2877			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2878
2879	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2880
2881	err = hl_poll_timeout(
2882		hdev,
2883		mmCPU_IF_QUEUE_INIT,
2884		status,
2885		(status == PQ_INIT_STATUS_READY_FOR_HOST),
2886		1000,
2887		cpu_timeout);
2888
2889	if (err) {
2890		dev_err(hdev->dev,
2891			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
2892		return -EIO;
2893	}
2894
2895	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2896	return 0;
2897}
2898
2899static void gaudi_pre_hw_init(struct hl_device *hdev)
2900{
2901	/* Perform read from the device to make sure device is up */
2902	RREG32(mmHW_STATE);
2903
2904	/* Set the access through PCI bars (Linux driver only) as
2905	 * secured
2906	 */
2907	WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
2908			(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2909			PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2910
2911	/* Perform read to flush the waiting writes to ensure
2912	 * configuration was set in the device
2913	 */
2914	RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2915
2916	/*
2917	 * Let's mark in the H/W that we have reached this point. We check
2918	 * this value in the reset_before_init function to understand whether
2919	 * we need to reset the chip before doing H/W init. This register is
2920	 * cleared by the H/W upon H/W reset
2921	 */
2922	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2923
2924	/* Configure the reset registers. Must be done as early as possible
2925	 * in case we fail during H/W initialization
2926	 */
2927	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2928					(CFG_RST_H_DMA_MASK |
2929					CFG_RST_H_MME_MASK |
2930					CFG_RST_H_SM_MASK |
2931					CFG_RST_H_TPC_7_MASK));
2932
2933	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2934
2935	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2936					(CFG_RST_H_HBM_MASK |
2937					CFG_RST_H_TPC_7_MASK |
2938					CFG_RST_H_NIC_MASK |
2939					CFG_RST_H_SM_MASK |
2940					CFG_RST_H_DMA_MASK |
2941					CFG_RST_H_MME_MASK |
2942					CFG_RST_H_CPU_MASK |
2943					CFG_RST_H_MMU_MASK));
2944
2945	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2946					(CFG_RST_L_IF_MASK |
2947					CFG_RST_L_PSOC_MASK |
2948					CFG_RST_L_TPC_MASK));
2949}
2950
2951static int gaudi_hw_init(struct hl_device *hdev)
2952{
2953	int rc;
2954
2955	dev_info(hdev->dev, "Starting initialization of H/W\n");
2956
2957	gaudi_pre_hw_init(hdev);
2958
2959	gaudi_init_pci_dma_qmans(hdev);
2960
2961	gaudi_init_hbm_dma_qmans(hdev);
2962
2963	rc = gaudi_init_cpu(hdev);
2964	if (rc) {
2965		dev_err(hdev->dev, "failed to initialize CPU\n");
2966		return rc;
2967	}
2968
2969	/* SRAM scrambler must be initialized after CPU is running from HBM */
2970	gaudi_init_scrambler_sram(hdev);
2971
2972	/* This is here just in case we are working without CPU */
2973	gaudi_init_scrambler_hbm(hdev);
2974
2975	gaudi_init_golden_registers(hdev);
2976
2977	rc = gaudi_mmu_init(hdev);
2978	if (rc)
2979		return rc;
2980
2981	gaudi_init_security(hdev);
2982
2983	gaudi_init_mme_qmans(hdev);
2984
2985	gaudi_init_tpc_qmans(hdev);
2986
2987	hdev->asic_funcs->set_clock_gating(hdev);
2988
2989	gaudi_enable_timestamp(hdev);
2990
2991	/* MSI must be enabled before CPU queues are initialized */
2992	rc = gaudi_enable_msi(hdev);
2993	if (rc)
2994		goto disable_queues;
2995
2996	/* must be called after MSI was enabled */
2997	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2998	if (rc) {
2999		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3000			rc);
3001		goto disable_msi;
3002	}
3003
3004	/* Perform read from the device to flush all configuration */
3005	RREG32(mmHW_STATE);
3006
3007	return 0;
3008
3009disable_msi:
3010	gaudi_disable_msi(hdev);
3011disable_queues:
3012	gaudi_disable_mme_qmans(hdev);
3013	gaudi_disable_pci_dma_qmans(hdev);
3014
3015	return rc;
3016}
3017
3018static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3019{
3020	struct gaudi_device *gaudi = hdev->asic_specific;
3021	u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3022
3023	if (!hard_reset) {
3024		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3025		return;
3026	}
3027
3028	if (hdev->pldm) {
3029		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3030		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3031	} else {
3032		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3033		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3034	}
3035
3036	/* Set device to handle FLR by H/W as we will put the device CPU to
3037	 * halt mode
3038	 */
3039	WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3040					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3041
3042	/* I don't know what is the state of the CPU so make sure it is
3043	 * stopped in any means necessary
3044	 */
3045	WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3046	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3047
3048	msleep(cpu_timeout_ms);
3049
3050	/* Tell ASIC not to re-initialize PCIe */
3051	WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3052
3053	boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3054
3055	/* H/W bug WA:
3056	 * rdata[31:0] = strap_read_val;
3057	 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3058	 */
3059	boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3060			(boot_strap & 0x001FFFFF));
3061	WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3062
3063	/* Restart BTL/BLR upon hard-reset */
3064	WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3065
3066	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3067			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3068	dev_info(hdev->dev,
3069		"Issued HARD reset command, going to wait %dms\n",
3070		reset_timeout_ms);
3071
3072	/*
3073	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3074	 * itself is in reset. Need to wait until the reset is deasserted
3075	 */
3076	msleep(reset_timeout_ms);
3077
3078	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3079	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3080		dev_err(hdev->dev,
3081			"Timeout while waiting for device to reset 0x%x\n",
3082			status);
3083
3084	WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3085
3086	gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3087					HW_CAP_HBM | HW_CAP_PCI_DMA |
3088					HW_CAP_MME | HW_CAP_TPC_MASK |
3089					HW_CAP_HBM_DMA | HW_CAP_PLL |
3090					HW_CAP_MMU |
3091					HW_CAP_SRAM_SCRAMBLER |
3092					HW_CAP_HBM_SCRAMBLER |
3093					HW_CAP_CLK_GATE);
3094
3095	memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3096}
3097
3098static int gaudi_suspend(struct hl_device *hdev)
3099{
3100	int rc;
3101
3102	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3103	if (rc)
3104		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3105
3106	return rc;
3107}
3108
3109static int gaudi_resume(struct hl_device *hdev)
3110{
3111	return gaudi_init_iatu(hdev);
3112}
3113
3114static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3115			void *cpu_addr, dma_addr_t dma_addr, size_t size)
3116{
3117	int rc;
3118
3119	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3120			VM_DONTCOPY | VM_NORESERVE;
3121
3122	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
3123				(dma_addr - HOST_PHYS_BASE), size);
3124	if (rc)
3125		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3126
3127	return rc;
3128}
3129
3130static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3131{
3132	struct gaudi_device *gaudi = hdev->asic_specific;
3133	u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3134	int dma_id;
3135	bool invalid_queue = false;
3136
3137	switch (hw_queue_id) {
3138	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3139		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3140		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3142		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143		break;
3144
3145	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3146		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3147		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3149		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150		break;
3151
3152	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3153		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3154		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157		break;
3158
3159	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3160		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3161		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164		break;
3165
3166	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3167		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3168		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171		break;
3172
3173	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3174		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3175		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3176		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3177		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3178		break;
3179
3180	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3181		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3182		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3183		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3184		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3185		break;
3186
3187	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3188		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3189		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3190		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3191		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3192		break;
3193
3194	case GAUDI_QUEUE_ID_CPU_PQ:
3195		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3196			db_reg_offset = mmCPU_IF_PF_PQ_PI;
3197		else
3198			invalid_queue = true;
3199		break;
3200
3201	case GAUDI_QUEUE_ID_MME_0_0:
3202		db_reg_offset = mmMME2_QM_PQ_PI_0;
3203		break;
3204
3205	case GAUDI_QUEUE_ID_MME_0_1:
3206		db_reg_offset = mmMME2_QM_PQ_PI_1;
3207		break;
3208
3209	case GAUDI_QUEUE_ID_MME_0_2:
3210		db_reg_offset = mmMME2_QM_PQ_PI_2;
3211		break;
3212
3213	case GAUDI_QUEUE_ID_MME_0_3:
3214		db_reg_offset = mmMME2_QM_PQ_PI_3;
3215		break;
3216
3217	case GAUDI_QUEUE_ID_MME_1_0:
3218		db_reg_offset = mmMME0_QM_PQ_PI_0;
3219		break;
3220
3221	case GAUDI_QUEUE_ID_MME_1_1:
3222		db_reg_offset = mmMME0_QM_PQ_PI_1;
3223		break;
3224
3225	case GAUDI_QUEUE_ID_MME_1_2:
3226		db_reg_offset = mmMME0_QM_PQ_PI_2;
3227		break;
3228
3229	case GAUDI_QUEUE_ID_MME_1_3:
3230		db_reg_offset = mmMME0_QM_PQ_PI_3;
3231		break;
3232
3233	case GAUDI_QUEUE_ID_TPC_0_0:
3234		db_reg_offset = mmTPC0_QM_PQ_PI_0;
3235		break;
3236
3237	case GAUDI_QUEUE_ID_TPC_0_1:
3238		db_reg_offset = mmTPC0_QM_PQ_PI_1;
3239		break;
3240
3241	case GAUDI_QUEUE_ID_TPC_0_2:
3242		db_reg_offset = mmTPC0_QM_PQ_PI_2;
3243		break;
3244
3245	case GAUDI_QUEUE_ID_TPC_0_3:
3246		db_reg_offset = mmTPC0_QM_PQ_PI_3;
3247		break;
3248
3249	case GAUDI_QUEUE_ID_TPC_1_0:
3250		db_reg_offset = mmTPC1_QM_PQ_PI_0;
3251		break;
3252
3253	case GAUDI_QUEUE_ID_TPC_1_1:
3254		db_reg_offset = mmTPC1_QM_PQ_PI_1;
3255		break;
3256
3257	case GAUDI_QUEUE_ID_TPC_1_2:
3258		db_reg_offset = mmTPC1_QM_PQ_PI_2;
3259		break;
3260
3261	case GAUDI_QUEUE_ID_TPC_1_3:
3262		db_reg_offset = mmTPC1_QM_PQ_PI_3;
3263		break;
3264
3265	case GAUDI_QUEUE_ID_TPC_2_0:
3266		db_reg_offset = mmTPC2_QM_PQ_PI_0;
3267		break;
3268
3269	case GAUDI_QUEUE_ID_TPC_2_1:
3270		db_reg_offset = mmTPC2_QM_PQ_PI_1;
3271		break;
3272
3273	case GAUDI_QUEUE_ID_TPC_2_2:
3274		db_reg_offset = mmTPC2_QM_PQ_PI_2;
3275		break;
3276
3277	case GAUDI_QUEUE_ID_TPC_2_3:
3278		db_reg_offset = mmTPC2_QM_PQ_PI_3;
3279		break;
3280
3281	case GAUDI_QUEUE_ID_TPC_3_0:
3282		db_reg_offset = mmTPC3_QM_PQ_PI_0;
3283		break;
3284
3285	case GAUDI_QUEUE_ID_TPC_3_1:
3286		db_reg_offset = mmTPC3_QM_PQ_PI_1;
3287		break;
3288
3289	case GAUDI_QUEUE_ID_TPC_3_2:
3290		db_reg_offset = mmTPC3_QM_PQ_PI_2;
3291		break;
3292
3293	case GAUDI_QUEUE_ID_TPC_3_3:
3294		db_reg_offset = mmTPC3_QM_PQ_PI_3;
3295		break;
3296
3297	case GAUDI_QUEUE_ID_TPC_4_0:
3298		db_reg_offset = mmTPC4_QM_PQ_PI_0;
3299		break;
3300
3301	case GAUDI_QUEUE_ID_TPC_4_1:
3302		db_reg_offset = mmTPC4_QM_PQ_PI_1;
3303		break;
3304
3305	case GAUDI_QUEUE_ID_TPC_4_2:
3306		db_reg_offset = mmTPC4_QM_PQ_PI_2;
3307		break;
3308
3309	case GAUDI_QUEUE_ID_TPC_4_3:
3310		db_reg_offset = mmTPC4_QM_PQ_PI_3;
3311		break;
3312
3313	case GAUDI_QUEUE_ID_TPC_5_0:
3314		db_reg_offset = mmTPC5_QM_PQ_PI_0;
3315		break;
3316
3317	case GAUDI_QUEUE_ID_TPC_5_1:
3318		db_reg_offset = mmTPC5_QM_PQ_PI_1;
3319		break;
3320
3321	case GAUDI_QUEUE_ID_TPC_5_2:
3322		db_reg_offset = mmTPC5_QM_PQ_PI_2;
3323		break;
3324
3325	case GAUDI_QUEUE_ID_TPC_5_3:
3326		db_reg_offset = mmTPC5_QM_PQ_PI_3;
3327		break;
3328
3329	case GAUDI_QUEUE_ID_TPC_6_0:
3330		db_reg_offset = mmTPC6_QM_PQ_PI_0;
3331		break;
3332
3333	case GAUDI_QUEUE_ID_TPC_6_1:
3334		db_reg_offset = mmTPC6_QM_PQ_PI_1;
3335		break;
3336
3337	case GAUDI_QUEUE_ID_TPC_6_2:
3338		db_reg_offset = mmTPC6_QM_PQ_PI_2;
3339		break;
3340
3341	case GAUDI_QUEUE_ID_TPC_6_3:
3342		db_reg_offset = mmTPC6_QM_PQ_PI_3;
3343		break;
3344
3345	case GAUDI_QUEUE_ID_TPC_7_0:
3346		db_reg_offset = mmTPC7_QM_PQ_PI_0;
3347		break;
3348
3349	case GAUDI_QUEUE_ID_TPC_7_1:
3350		db_reg_offset = mmTPC7_QM_PQ_PI_1;
3351		break;
3352
3353	case GAUDI_QUEUE_ID_TPC_7_2:
3354		db_reg_offset = mmTPC7_QM_PQ_PI_2;
3355		break;
3356
3357	case GAUDI_QUEUE_ID_TPC_7_3:
3358		db_reg_offset = mmTPC7_QM_PQ_PI_3;
3359		break;
3360
3361	default:
3362		invalid_queue = true;
3363	}
3364
3365	if (invalid_queue) {
3366		/* Should never get here */
3367		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3368			hw_queue_id);
3369		return;
3370	}
3371
3372	db_value = pi;
3373
3374	/* ring the doorbell */
3375	WREG32(db_reg_offset, db_value);
3376
3377	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3378		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3379				GAUDI_EVENT_PI_UPDATE);
3380}
3381
3382static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3383				struct hl_bd *bd)
3384{
3385	__le64 *pbd = (__le64 *) bd;
3386
3387	/* The QMANs are on the host memory so a simple copy suffice */
3388	pqe[0] = pbd[0];
3389	pqe[1] = pbd[1];
3390}
3391
3392static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3393					dma_addr_t *dma_handle, gfp_t flags)
3394{
3395	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3396						dma_handle, flags);
3397
3398	/* Shift to the device's base physical address of host memory */
3399	if (kernel_addr)
3400		*dma_handle += HOST_PHYS_BASE;
3401
3402	return kernel_addr;
3403}
3404
3405static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3406		void *cpu_addr, dma_addr_t dma_handle)
3407{
3408	/* Cancel the device's base physical address of host memory */
3409	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3410
3411	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3412}
3413
3414static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3415				u32 queue_id, dma_addr_t *dma_handle,
3416				u16 *queue_len)
3417{
3418	struct gaudi_device *gaudi = hdev->asic_specific;
3419	struct gaudi_internal_qman_info *q;
3420
3421	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3422			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3423		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3424		return NULL;
3425	}
3426
3427	q = &gaudi->internal_qmans[queue_id];
3428	*dma_handle = q->pq_dma_addr;
3429	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3430
3431	return q->pq_kernel_addr;
3432}
3433
3434static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3435				u16 len, u32 timeout, long *result)
3436{
3437	struct gaudi_device *gaudi = hdev->asic_specific;
3438
3439	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3440		if (result)
3441			*result = 0;
3442		return 0;
3443	}
3444
3445	if (!timeout)
3446		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3447
3448	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3449						timeout, result);
3450}
3451
3452static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3453{
3454	struct packet_msg_prot *fence_pkt;
3455	dma_addr_t pkt_dma_addr;
3456	u32 fence_val, tmp, timeout_usec;
3457	dma_addr_t fence_dma_addr;
3458	u32 *fence_ptr;
3459	int rc;
3460
3461	if (hdev->pldm)
3462		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3463	else
3464		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3465
3466	fence_val = GAUDI_QMAN0_FENCE_VAL;
3467
3468	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3469							&fence_dma_addr);
3470	if (!fence_ptr) {
3471		dev_err(hdev->dev,
3472			"Failed to allocate memory for H/W queue %d testing\n",
3473			hw_queue_id);
3474		return -ENOMEM;
3475	}
3476
3477	*fence_ptr = 0;
3478
3479	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3480					sizeof(struct packet_msg_prot),
3481					GFP_KERNEL, &pkt_dma_addr);
3482	if (!fence_pkt) {
3483		dev_err(hdev->dev,
3484			"Failed to allocate packet for H/W queue %d testing\n",
3485			hw_queue_id);
3486		rc = -ENOMEM;
3487		goto free_fence_ptr;
3488	}
3489
3490	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3491	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3492	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3493
3494	fence_pkt->ctl = cpu_to_le32(tmp);
3495	fence_pkt->value = cpu_to_le32(fence_val);
3496	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3497
3498	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3499					sizeof(struct packet_msg_prot),
3500					pkt_dma_addr);
3501	if (rc) {
3502		dev_err(hdev->dev,
3503			"Failed to send fence packet to H/W queue %d\n",
3504			hw_queue_id);
3505		goto free_pkt;
3506	}
3507
3508	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3509					1000, timeout_usec, true);
3510
3511	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3512
3513	if (rc == -ETIMEDOUT) {
3514		dev_err(hdev->dev,
3515			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3516			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3517		rc = -EIO;
3518	}
3519
3520free_pkt:
3521	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3522					pkt_dma_addr);
3523free_fence_ptr:
3524	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3525					fence_dma_addr);
3526	return rc;
3527}
3528
3529static int gaudi_test_cpu_queue(struct hl_device *hdev)
3530{
3531	struct gaudi_device *gaudi = hdev->asic_specific;
3532
3533	/*
3534	 * check capability here as send_cpu_message() won't update the result
3535	 * value if no capability
3536	 */
3537	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3538		return 0;
3539
3540	return hl_fw_test_cpu_queue(hdev);
3541}
3542
3543static int gaudi_test_queues(struct hl_device *hdev)
3544{
3545	int i, rc, ret_val = 0;
3546
3547	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3548		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3549			rc = gaudi_test_queue(hdev, i);
3550			if (rc)
3551				ret_val = -EINVAL;
3552		}
3553	}
3554
3555	rc = gaudi_test_cpu_queue(hdev);
3556	if (rc)
3557		ret_val = -EINVAL;
3558
3559	return ret_val;
3560}
3561
3562static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3563		gfp_t mem_flags, dma_addr_t *dma_handle)
3564{
3565	void *kernel_addr;
3566
3567	if (size > GAUDI_DMA_POOL_BLK_SIZE)
3568		return NULL;
3569
3570	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3571
3572	/* Shift to the device's base physical address of host memory */
3573	if (kernel_addr)
3574		*dma_handle += HOST_PHYS_BASE;
3575
3576	return kernel_addr;
3577}
3578
3579static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3580			dma_addr_t dma_addr)
3581{
3582	/* Cancel the device's base physical address of host memory */
3583	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3584
3585	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3586}
3587
3588static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3589					size_t size, dma_addr_t *dma_handle)
3590{
3591	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3592}
3593
3594static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3595						size_t size, void *vaddr)
3596{
3597	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3598}
3599
3600static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3601			int nents, enum dma_data_direction dir)
3602{
3603	struct scatterlist *sg;
3604	int i;
3605
3606	if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3607		return -ENOMEM;
3608
3609	/* Shift to the device's base physical address of host memory */
3610	for_each_sg(sgl, sg, nents, i)
3611		sg->dma_address += HOST_PHYS_BASE;
3612
3613	return 0;
3614}
3615
3616static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3617			int nents, enum dma_data_direction dir)
3618{
3619	struct scatterlist *sg;
3620	int i;
3621
3622	/* Cancel the device's base physical address of host memory */
3623	for_each_sg(sgl, sg, nents, i)
3624		sg->dma_address -= HOST_PHYS_BASE;
3625
3626	dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3627}
3628
3629static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3630					struct sg_table *sgt)
3631{
3632	struct scatterlist *sg, *sg_next_iter;
3633	u32 count, dma_desc_cnt;
3634	u64 len, len_next;
3635	dma_addr_t addr, addr_next;
3636
3637	dma_desc_cnt = 0;
3638
3639	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3640
3641		len = sg_dma_len(sg);
3642		addr = sg_dma_address(sg);
3643
3644		if (len == 0)
3645			break;
3646
3647		while ((count + 1) < sgt->nents) {
3648			sg_next_iter = sg_next(sg);
3649			len_next = sg_dma_len(sg_next_iter);
3650			addr_next = sg_dma_address(sg_next_iter);
3651
3652			if (len_next == 0)
3653				break;
3654
3655			if ((addr + len == addr_next) &&
3656				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3657				len += len_next;
3658				count++;
3659				sg = sg_next_iter;
3660			} else {
3661				break;
3662			}
3663		}
3664
3665		dma_desc_cnt++;
3666	}
3667
3668	return dma_desc_cnt * sizeof(struct packet_lin_dma);
3669}
3670
3671static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3672				struct hl_cs_parser *parser,
3673				struct packet_lin_dma *user_dma_pkt,
3674				u64 addr, enum dma_data_direction dir)
3675{
3676	struct hl_userptr *userptr;
3677	int rc;
3678
3679	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3680			parser->job_userptr_list, &userptr))
3681		goto already_pinned;
3682
3683	userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3684	if (!userptr)
3685		return -ENOMEM;
3686
3687	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3688				userptr);
3689	if (rc)
3690		goto free_userptr;
3691
3692	list_add_tail(&userptr->job_node, parser->job_userptr_list);
3693
3694	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3695					userptr->sgt->nents, dir);
3696	if (rc) {
3697		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3698		goto unpin_memory;
3699	}
3700
3701	userptr->dma_mapped = true;
3702	userptr->dir = dir;
3703
3704already_pinned:
3705	parser->patched_cb_size +=
3706			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3707
3708	return 0;
3709
3710unpin_memory:
3711	list_del(&userptr->job_node);
3712	hl_unpin_host_memory(hdev, userptr);
3713free_userptr:
3714	kfree(userptr);
3715	return rc;
3716}
3717
3718static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3719				struct hl_cs_parser *parser,
3720				struct packet_lin_dma *user_dma_pkt,
3721				bool src_in_host)
3722{
3723	enum dma_data_direction dir;
3724	bool skip_host_mem_pin = false, user_memset;
3725	u64 addr;
3726	int rc = 0;
3727
3728	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3729			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3730			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3731
3732	if (src_in_host) {
3733		if (user_memset)
3734			skip_host_mem_pin = true;
3735
3736		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3737		dir = DMA_TO_DEVICE;
3738		addr = le64_to_cpu(user_dma_pkt->src_addr);
3739	} else {
3740		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3741		dir = DMA_FROM_DEVICE;
3742		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3743				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3744				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3745	}
3746
3747	if (skip_host_mem_pin)
3748		parser->patched_cb_size += sizeof(*user_dma_pkt);
3749	else
3750		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3751						addr, dir);
3752
3753	return rc;
3754}
3755
3756static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3757				struct hl_cs_parser *parser,
3758				struct packet_lin_dma *user_dma_pkt)
3759{
3760	bool src_in_host = false;
3761	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3762			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3763			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3764
3765	dev_dbg(hdev->dev, "DMA packet details:\n");
3766	dev_dbg(hdev->dev, "source == 0x%llx\n",
3767				le64_to_cpu(user_dma_pkt->src_addr));
3768	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3769	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3770
3771	/*
3772	 * Special handling for DMA with size 0. Bypass all validations
3773	 * because no transactions will be done except for WR_COMP, which
3774	 * is not a security issue
3775	 */
3776	if (!le32_to_cpu(user_dma_pkt->tsize)) {
3777		parser->patched_cb_size += sizeof(*user_dma_pkt);
3778		return 0;
3779	}
3780
3781	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3782		src_in_host = true;
3783
3784	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3785						src_in_host);
3786}
3787
3788static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3789					struct hl_cs_parser *parser,
3790					struct packet_load_and_exe *user_pkt)
3791{
3792	u32 cfg;
3793
3794	cfg = le32_to_cpu(user_pkt->cfg);
3795
3796	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3797		dev_err(hdev->dev,
3798			"User not allowed to use Load and Execute\n");
3799		return -EPERM;
3800	}
3801
3802	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3803
3804	return 0;
3805}
3806
3807static int gaudi_validate_cb(struct hl_device *hdev,
3808			struct hl_cs_parser *parser, bool is_mmu)
3809{
3810	u32 cb_parsed_length = 0;
3811	int rc = 0;
3812
3813	parser->patched_cb_size = 0;
3814
3815	/* cb_user_size is more than 0 so loop will always be executed */
3816	while (cb_parsed_length < parser->user_cb_size) {
3817		enum packet_id pkt_id;
3818		u16 pkt_size;
3819		struct gaudi_packet *user_pkt;
3820
3821		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3822
3823		pkt_id = (enum packet_id) (
3824				(le64_to_cpu(user_pkt->header) &
3825				PACKET_HEADER_PACKET_ID_MASK) >>
3826					PACKET_HEADER_PACKET_ID_SHIFT);
3827
3828		if (!validate_packet_id(pkt_id)) {
3829			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3830			rc = -EINVAL;
3831			break;
3832		}
3833
3834		pkt_size = gaudi_packet_sizes[pkt_id];
3835		cb_parsed_length += pkt_size;
3836		if (cb_parsed_length > parser->user_cb_size) {
3837			dev_err(hdev->dev,
3838				"packet 0x%x is out of CB boundary\n", pkt_id);
3839			rc = -EINVAL;
3840			break;
3841		}
3842
3843		switch (pkt_id) {
3844		case PACKET_MSG_PROT:
3845			dev_err(hdev->dev,
3846				"User not allowed to use MSG_PROT\n");
3847			rc = -EPERM;
3848			break;
3849
3850		case PACKET_CP_DMA:
3851			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3852			rc = -EPERM;
3853			break;
3854
3855		case PACKET_STOP:
3856			dev_err(hdev->dev, "User not allowed to use STOP\n");
3857			rc = -EPERM;
3858			break;
3859
3860		case PACKET_WREG_BULK:
3861			dev_err(hdev->dev,
3862				"User not allowed to use WREG_BULK\n");
3863			rc = -EPERM;
3864			break;
3865
3866		case PACKET_LOAD_AND_EXE:
3867			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3868				(struct packet_load_and_exe *) user_pkt);
3869			break;
3870
3871		case PACKET_LIN_DMA:
3872			parser->contains_dma_pkt = true;
3873			if (is_mmu)
3874				parser->patched_cb_size += pkt_size;
3875			else
3876				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3877					(struct packet_lin_dma *) user_pkt);
3878			break;
3879
3880		case PACKET_WREG_32:
3881		case PACKET_MSG_LONG:
3882		case PACKET_MSG_SHORT:
3883		case PACKET_REPEAT:
3884		case PACKET_FENCE:
3885		case PACKET_NOP:
3886		case PACKET_ARB_POINT:
3887			parser->patched_cb_size += pkt_size;
3888			break;
3889
3890		default:
3891			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3892				pkt_id);
3893			rc = -EINVAL;
3894			break;
3895		}
3896
3897		if (rc)
3898			break;
3899	}
3900
3901	/*
3902	 * The new CB should have space at the end for two MSG_PROT packets:
3903	 * 1. A packet that will act as a completion packet
3904	 * 2. A packet that will generate MSI-X interrupt
3905	 */
3906	parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3907
3908	return rc;
3909}
3910
3911static int gaudi_patch_dma_packet(struct hl_device *hdev,
3912				struct hl_cs_parser *parser,
3913				struct packet_lin_dma *user_dma_pkt,
3914				struct packet_lin_dma *new_dma_pkt,
3915				u32 *new_dma_pkt_size)
3916{
3917	struct hl_userptr *userptr;
3918	struct scatterlist *sg, *sg_next_iter;
3919	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3920	u64 len, len_next;
3921	dma_addr_t dma_addr, dma_addr_next;
3922	u64 device_memory_addr, addr;
3923	enum dma_data_direction dir;
3924	struct sg_table *sgt;
3925	bool src_in_host = false;
3926	bool skip_host_mem_pin = false;
3927	bool user_memset;
3928
3929	ctl = le32_to_cpu(user_dma_pkt->ctl);
3930
3931	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3932		src_in_host = true;
3933
3934	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3935			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3936
3937	if (src_in_host) {
3938		addr = le64_to_cpu(user_dma_pkt->src_addr);
3939		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3940		dir = DMA_TO_DEVICE;
3941		if (user_memset)
3942			skip_host_mem_pin = true;
3943	} else {
3944		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3945		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3946		dir = DMA_FROM_DEVICE;
3947	}
3948
3949	if ((!skip_host_mem_pin) &&
3950		(!hl_userptr_is_pinned(hdev, addr,
3951					le32_to_cpu(user_dma_pkt->tsize),
3952					parser->job_userptr_list, &userptr))) {
3953		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3954				addr, user_dma_pkt->tsize);
3955		return -EFAULT;
3956	}
3957
3958	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3959		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3960		*new_dma_pkt_size = sizeof(*user_dma_pkt);
3961		return 0;
3962	}
3963
3964	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3965
3966	sgt = userptr->sgt;
3967	dma_desc_cnt = 0;
3968
3969	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3970		len = sg_dma_len(sg);
3971		dma_addr = sg_dma_address(sg);
3972
3973		if (len == 0)
3974			break;
3975
3976		while ((count + 1) < sgt->nents) {
3977			sg_next_iter = sg_next(sg);
3978			len_next = sg_dma_len(sg_next_iter);
3979			dma_addr_next = sg_dma_address(sg_next_iter);
3980
3981			if (len_next == 0)
3982				break;
3983
3984			if ((dma_addr + len == dma_addr_next) &&
3985				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3986				len += len_next;
3987				count++;
3988				sg = sg_next_iter;
3989			} else {
3990				break;
3991			}
3992		}
3993
3994		ctl = le32_to_cpu(user_dma_pkt->ctl);
3995		if (likely(dma_desc_cnt))
3996			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3997		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3998		new_dma_pkt->ctl = cpu_to_le32(ctl);
3999		new_dma_pkt->tsize = cpu_to_le32(len);
4000
4001		if (dir == DMA_TO_DEVICE) {
4002			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
4003			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
4004		} else {
4005			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
4006			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
4007		}
4008
4009		if (!user_memset)
4010			device_memory_addr += len;
4011		dma_desc_cnt++;
4012		new_dma_pkt++;
4013	}
4014
4015	if (!dma_desc_cnt) {
4016		dev_err(hdev->dev,
4017			"Error of 0 SG entries when patching DMA packet\n");
4018		return -EFAULT;
4019	}
4020
4021	/* Fix the last dma packet - wrcomp must be as user set it */
4022	new_dma_pkt--;
4023	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4024
4025	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4026
4027	return 0;
4028}
4029
4030static int gaudi_patch_cb(struct hl_device *hdev,
4031				struct hl_cs_parser *parser)
4032{
4033	u32 cb_parsed_length = 0;
4034	u32 cb_patched_cur_length = 0;
4035	int rc = 0;
4036
4037	/* cb_user_size is more than 0 so loop will always be executed */
4038	while (cb_parsed_length < parser->user_cb_size) {
4039		enum packet_id pkt_id;
4040		u16 pkt_size;
4041		u32 new_pkt_size = 0;
4042		struct gaudi_packet *user_pkt, *kernel_pkt;
4043
4044		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4045		kernel_pkt = parser->patched_cb->kernel_address +
4046					cb_patched_cur_length;
4047
4048		pkt_id = (enum packet_id) (
4049				(le64_to_cpu(user_pkt->header) &
4050				PACKET_HEADER_PACKET_ID_MASK) >>
4051					PACKET_HEADER_PACKET_ID_SHIFT);
4052
4053		if (!validate_packet_id(pkt_id)) {
4054			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4055			rc = -EINVAL;
4056			break;
4057		}
4058
4059		pkt_size = gaudi_packet_sizes[pkt_id];
4060		cb_parsed_length += pkt_size;
4061		if (cb_parsed_length > parser->user_cb_size) {
4062			dev_err(hdev->dev,
4063				"packet 0x%x is out of CB boundary\n", pkt_id);
4064			rc = -EINVAL;
4065			break;
4066		}
4067
4068		switch (pkt_id) {
4069		case PACKET_LIN_DMA:
4070			rc = gaudi_patch_dma_packet(hdev, parser,
4071					(struct packet_lin_dma *) user_pkt,
4072					(struct packet_lin_dma *) kernel_pkt,
4073					&new_pkt_size);
4074			cb_patched_cur_length += new_pkt_size;
4075			break;
4076
4077		case PACKET_MSG_PROT:
4078			dev_err(hdev->dev,
4079				"User not allowed to use MSG_PROT\n");
4080			rc = -EPERM;
4081			break;
4082
4083		case PACKET_CP_DMA:
4084			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4085			rc = -EPERM;
4086			break;
4087
4088		case PACKET_STOP:
4089			dev_err(hdev->dev, "User not allowed to use STOP\n");
4090			rc = -EPERM;
4091			break;
4092
4093		case PACKET_WREG_32:
4094		case PACKET_WREG_BULK:
4095		case PACKET_MSG_LONG:
4096		case PACKET_MSG_SHORT:
4097		case PACKET_REPEAT:
4098		case PACKET_FENCE:
4099		case PACKET_NOP:
4100		case PACKET_ARB_POINT:
4101		case PACKET_LOAD_AND_EXE:
4102			memcpy(kernel_pkt, user_pkt, pkt_size);
4103			cb_patched_cur_length += pkt_size;
4104			break;
4105
4106		default:
4107			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4108				pkt_id);
4109			rc = -EINVAL;
4110			break;
4111		}
4112
4113		if (rc)
4114			break;
4115	}
4116
4117	return rc;
4118}
4119
4120static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4121		struct hl_cs_parser *parser)
4122{
4123	u64 patched_cb_handle;
4124	u32 patched_cb_size;
4125	struct hl_cb *user_cb;
4126	int rc;
4127
4128	/*
4129	 * The new CB should have space at the end for two MSG_PROT pkt:
4130	 * 1. A packet that will act as a completion packet
4131	 * 2. A packet that will generate MSI interrupt
4132	 */
4133	parser->patched_cb_size = parser->user_cb_size +
4134			sizeof(struct packet_msg_prot) * 2;
4135
4136	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4137				parser->patched_cb_size, false, false,
4138				&patched_cb_handle);
4139
4140	if (rc) {
4141		dev_err(hdev->dev,
4142			"Failed to allocate patched CB for DMA CS %d\n",
4143			rc);
4144		return rc;
4145	}
4146
4147	patched_cb_handle >>= PAGE_SHIFT;
4148	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4149				(u32) patched_cb_handle);
4150	/* hl_cb_get should never fail here so use kernel WARN */
4151	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4152			(u32) patched_cb_handle);
4153	if (!parser->patched_cb) {
4154		rc = -EFAULT;
4155		goto out;
4156	}
4157
4158	/*
4159	 * The check that parser->user_cb_size <= parser->user_cb->size was done
4160	 * in validate_queue_index().
4161	 */
4162	memcpy(parser->patched_cb->kernel_address,
4163		parser->user_cb->kernel_address,
4164		parser->user_cb_size);
4165
4166	patched_cb_size = parser->patched_cb_size;
4167
4168	/* Validate patched CB instead of user CB */
4169	user_cb = parser->user_cb;
4170	parser->user_cb = parser->patched_cb;
4171	rc = gaudi_validate_cb(hdev, parser, true);
4172	parser->user_cb = user_cb;
4173
4174	if (rc) {
4175		hl_cb_put(parser->patched_cb);
4176		goto out;
4177	}
4178
4179	if (patched_cb_size != parser->patched_cb_size) {
4180		dev_err(hdev->dev, "user CB size mismatch\n");
4181		hl_cb_put(parser->patched_cb);
4182		rc = -EINVAL;
4183		goto out;
4184	}
4185
4186out:
4187	/*
4188	 * Always call cb destroy here because we still have 1 reference
4189	 * to it by calling cb_get earlier. After the job will be completed,
4190	 * cb_put will release it, but here we want to remove it from the
4191	 * idr
4192	 */
4193	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4194					patched_cb_handle << PAGE_SHIFT);
4195
4196	return rc;
4197}
4198
4199static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4200		struct hl_cs_parser *parser)
4201{
4202	u64 patched_cb_handle;
4203	int rc;
4204
4205	rc = gaudi_validate_cb(hdev, parser, false);
4206
4207	if (rc)
4208		goto free_userptr;
4209
4210	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4211				parser->patched_cb_size, false, false,
4212				&patched_cb_handle);
4213	if (rc) {
4214		dev_err(hdev->dev,
4215			"Failed to allocate patched CB for DMA CS %d\n", rc);
4216		goto free_userptr;
4217	}
4218
4219	patched_cb_handle >>= PAGE_SHIFT;
4220	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4221				(u32) patched_cb_handle);
4222	/* hl_cb_get should never fail here so use kernel WARN */
4223	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4224			(u32) patched_cb_handle);
4225	if (!parser->patched_cb) {
4226		rc = -EFAULT;
4227		goto out;
4228	}
4229
4230	rc = gaudi_patch_cb(hdev, parser);
4231
4232	if (rc)
4233		hl_cb_put(parser->patched_cb);
4234
4235out:
4236	/*
4237	 * Always call cb destroy here because we still have 1 reference
4238	 * to it by calling cb_get earlier. After the job will be completed,
4239	 * cb_put will release it, but here we want to remove it from the
4240	 * idr
4241	 */
4242	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4243				patched_cb_handle << PAGE_SHIFT);
4244
4245free_userptr:
4246	if (rc)
4247		hl_userptr_delete_list(hdev, parser->job_userptr_list);
4248	return rc;
4249}
4250
4251static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4252					struct hl_cs_parser *parser)
4253{
4254	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4255
4256	/* For internal queue jobs just check if CB address is valid */
4257	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4258					parser->user_cb_size,
4259					asic_prop->sram_user_base_address,
4260					asic_prop->sram_end_address))
4261		return 0;
4262
4263	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4264					parser->user_cb_size,
4265					asic_prop->dram_user_base_address,
4266					asic_prop->dram_end_address))
4267		return 0;
4268
4269	/* PMMU and HPMMU addresses are equal, check only one of them */
4270	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4271					parser->user_cb_size,
4272					asic_prop->pmmu.start_addr,
4273					asic_prop->pmmu.end_addr))
4274		return 0;
4275
4276	dev_err(hdev->dev,
4277		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4278		parser->user_cb, parser->user_cb_size);
4279
4280	return -EFAULT;
4281}
4282
4283static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4284{
4285	struct gaudi_device *gaudi = hdev->asic_specific;
4286
4287	if (parser->queue_type == QUEUE_TYPE_INT)
4288		return gaudi_parse_cb_no_ext_queue(hdev, parser);
4289
4290	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4291		return gaudi_parse_cb_mmu(hdev, parser);
4292	else
4293		return gaudi_parse_cb_no_mmu(hdev, parser);
4294}
4295
4296static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4297					void *kernel_address, u32 len,
4298					u64 cq_addr, u32 cq_val, u32 msi_vec,
4299					bool eb)
4300{
4301	struct gaudi_device *gaudi = hdev->asic_specific;
4302	struct packet_msg_prot *cq_pkt;
4303	u32 tmp;
4304
4305	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4306
4307	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4308	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4309
4310	if (eb)
4311		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4312
4313	cq_pkt->ctl = cpu_to_le32(tmp);
4314	cq_pkt->value = cpu_to_le32(cq_val);
4315	cq_pkt->addr = cpu_to_le64(cq_addr);
4316
4317	cq_pkt++;
4318
4319	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4320	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4321	cq_pkt->ctl = cpu_to_le32(tmp);
4322	cq_pkt->value = cpu_to_le32(1);
4323
4324	if (!gaudi->multi_msi_mode)
4325		msi_vec = 0;
4326
4327	cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4328}
4329
4330static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4331{
4332	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4333}
4334
4335static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4336					u32 size, u64 val)
4337{
4338	struct packet_lin_dma *lin_dma_pkt;
4339	struct hl_cs_job *job;
4340	u32 cb_size, ctl, err_cause;
4341	struct hl_cb *cb;
4342	int rc;
4343
4344	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4345	if (!cb)
4346		return -EFAULT;
4347
4348	lin_dma_pkt = cb->kernel_address;
4349	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4350	cb_size = sizeof(*lin_dma_pkt);
4351
4352	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4353	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4354	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4355	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4356	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4357
4358	lin_dma_pkt->ctl = cpu_to_le32(ctl);
4359	lin_dma_pkt->src_addr = cpu_to_le64(val);
4360	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4361	lin_dma_pkt->tsize = cpu_to_le32(size);
4362
4363	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4364	if (!job) {
4365		dev_err(hdev->dev, "Failed to allocate a new job\n");
4366		rc = -ENOMEM;
4367		goto release_cb;
4368	}
4369
4370	/* Verify DMA is OK */
4371	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4372	if (err_cause && !hdev->init_done) {
4373		dev_dbg(hdev->dev,
4374			"Clearing DMA0 engine from errors (cause 0x%x)\n",
4375			err_cause);
4376		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4377	}
4378
4379	job->id = 0;
4380	job->user_cb = cb;
4381	job->user_cb->cs_cnt++;
4382	job->user_cb_size = cb_size;
4383	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4384	job->patched_cb = job->user_cb;
4385	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4386
4387	hl_debugfs_add_job(hdev, job);
4388
4389	rc = gaudi_send_job_on_qman0(hdev, job);
4390	hl_debugfs_remove_job(hdev, job);
4391	kfree(job);
4392	cb->cs_cnt--;
4393
4394	/* Verify DMA is OK */
4395	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4396	if (err_cause) {
4397		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4398		rc = -EIO;
4399		if (!hdev->init_done) {
4400			dev_dbg(hdev->dev,
4401				"Clearing DMA0 engine from errors (cause 0x%x)\n",
4402				err_cause);
4403			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4404		}
4405	}
4406
4407release_cb:
4408	hl_cb_put(cb);
4409	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4410
4411	return rc;
4412}
4413
4414static void gaudi_restore_sm_registers(struct hl_device *hdev)
4415{
4416	int i;
4417
4418	for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4419		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4420		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4421		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4422	}
4423
4424	for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4425		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4426		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4427		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4428	}
4429
4430	i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4431
4432	for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4433		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4434
4435	i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4436
4437	for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4438		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4439}
4440
4441static void gaudi_restore_dma_registers(struct hl_device *hdev)
4442{
4443	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4444			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4445	int i;
4446
4447	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4448		u64 sob_addr = CFG_BASE +
4449				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4450				(i * sob_delta);
4451		u32 dma_offset = i * DMA_CORE_OFFSET;
4452
4453		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4454				lower_32_bits(sob_addr));
4455		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4456				upper_32_bits(sob_addr));
4457		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4458
4459		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4460		 * modified by the user for SRAM reduction
4461		 */
4462		if (i > 1)
4463			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4464								0x00000001);
4465	}
4466}
4467
4468static void gaudi_restore_qm_registers(struct hl_device *hdev)
4469{
4470	u32 qman_offset;
4471	int i;
4472
4473	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4474		qman_offset = i * DMA_QMAN_OFFSET;
4475		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4476	}
4477
4478	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4479		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4480		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4481	}
4482
4483	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4484		qman_offset = i * TPC_QMAN_OFFSET;
4485		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4486	}
4487}
4488
4489static void gaudi_restore_user_registers(struct hl_device *hdev)
4490{
4491	gaudi_restore_sm_registers(hdev);
4492	gaudi_restore_dma_registers(hdev);
4493	gaudi_restore_qm_registers(hdev);
4494}
4495
4496static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4497{
4498	struct asic_fixed_properties *prop = &hdev->asic_prop;
4499	u64 addr = prop->sram_user_base_address;
4500	u32 size = hdev->pldm ? 0x10000 :
4501			(prop->sram_size - SRAM_USER_BASE_OFFSET);
4502	u64 val = 0x7777777777777777ull;
4503	int rc;
4504
4505	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4506	if (rc) {
4507		dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4508		return rc;
4509	}
4510
4511	gaudi_mmu_prepare(hdev, asid);
4512
4513	gaudi_restore_user_registers(hdev);
4514
4515	return 0;
4516}
4517
4518static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4519{
4520	struct asic_fixed_properties *prop = &hdev->asic_prop;
4521	struct gaudi_device *gaudi = hdev->asic_specific;
4522	u64 addr = prop->mmu_pgt_addr;
4523	u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4524
4525	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4526		return 0;
4527
4528	return gaudi_memset_device_memory(hdev, addr, size, 0);
4529}
4530
4531static void gaudi_restore_phase_topology(struct hl_device *hdev)
4532{
4533
4534}
4535
4536static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4537{
4538	struct asic_fixed_properties *prop = &hdev->asic_prop;
4539	struct gaudi_device *gaudi = hdev->asic_specific;
4540	u64 hbm_bar_addr;
4541	int rc = 0;
4542
4543	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4544
4545		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4546				(hdev->clock_gating_mask &
4547						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4548
4549			dev_err_ratelimited(hdev->dev,
4550				"Can't read register - clock gating is enabled!\n");
4551			rc = -EFAULT;
4552		} else {
4553			*val = RREG32(addr - CFG_BASE);
4554		}
4555
4556	} else if ((addr >= SRAM_BASE_ADDR) &&
4557			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4558		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4559				(addr - SRAM_BASE_ADDR));
4560	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4561		u64 bar_base_addr = DRAM_PHYS_BASE +
4562				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4563
4564		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4565		if (hbm_bar_addr != U64_MAX) {
4566			*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4567						(addr - bar_base_addr));
4568
4569			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4570						hbm_bar_addr);
4571		}
4572		if (hbm_bar_addr == U64_MAX)
4573			rc = -EIO;
4574	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4575		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4576	} else {
4577		rc = -EFAULT;
4578	}
4579
4580	return rc;
4581}
4582
4583static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4584{
4585	struct asic_fixed_properties *prop = &hdev->asic_prop;
4586	struct gaudi_device *gaudi = hdev->asic_specific;
4587	u64 hbm_bar_addr;
4588	int rc = 0;
4589
4590	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4591
4592		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4593				(hdev->clock_gating_mask &
4594						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4595
4596			dev_err_ratelimited(hdev->dev,
4597				"Can't write register - clock gating is enabled!\n");
4598			rc = -EFAULT;
4599		} else {
4600			WREG32(addr - CFG_BASE, val);
4601		}
4602
4603	} else if ((addr >= SRAM_BASE_ADDR) &&
4604			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4605		writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4606					(addr - SRAM_BASE_ADDR));
4607	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4608		u64 bar_base_addr = DRAM_PHYS_BASE +
4609				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4610
4611		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4612		if (hbm_bar_addr != U64_MAX) {
4613			writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4614						(addr - bar_base_addr));
4615
4616			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4617						hbm_bar_addr);
4618		}
4619		if (hbm_bar_addr == U64_MAX)
4620			rc = -EIO;
4621	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4622		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4623	} else {
4624		rc = -EFAULT;
4625	}
4626
4627	return rc;
4628}
4629
4630static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4631{
4632	struct asic_fixed_properties *prop = &hdev->asic_prop;
4633	struct gaudi_device *gaudi = hdev->asic_specific;
4634	u64 hbm_bar_addr;
4635	int rc = 0;
4636
4637	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4638
4639		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4640				(hdev->clock_gating_mask &
4641						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4642
4643			dev_err_ratelimited(hdev->dev,
4644				"Can't read register - clock gating is enabled!\n");
4645			rc = -EFAULT;
4646		} else {
4647			u32 val_l = RREG32(addr - CFG_BASE);
4648			u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4649
4650			*val = (((u64) val_h) << 32) | val_l;
4651		}
4652
4653	} else if ((addr >= SRAM_BASE_ADDR) &&
4654		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4655		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4656				(addr - SRAM_BASE_ADDR));
4657	} else if (addr <=
4658		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4659		u64 bar_base_addr = DRAM_PHYS_BASE +
4660				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4661
4662		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4663		if (hbm_bar_addr != U64_MAX) {
4664			*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4665						(addr - bar_base_addr));
4666
4667			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4668						hbm_bar_addr);
4669		}
4670		if (hbm_bar_addr == U64_MAX)
4671			rc = -EIO;
4672	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4673		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4674	} else {
4675		rc = -EFAULT;
4676	}
4677
4678	return rc;
4679}
4680
4681static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4682{
4683	struct asic_fixed_properties *prop = &hdev->asic_prop;
4684	struct gaudi_device *gaudi = hdev->asic_specific;
4685	u64 hbm_bar_addr;
4686	int rc = 0;
4687
4688	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4689
4690		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4691				(hdev->clock_gating_mask &
4692						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4693
4694			dev_err_ratelimited(hdev->dev,
4695				"Can't write register - clock gating is enabled!\n");
4696			rc = -EFAULT;
4697		} else {
4698			WREG32(addr - CFG_BASE, lower_32_bits(val));
4699			WREG32(addr + sizeof(u32) - CFG_BASE,
4700				upper_32_bits(val));
4701		}
4702
4703	} else if ((addr >= SRAM_BASE_ADDR) &&
4704		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4705		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4706					(addr - SRAM_BASE_ADDR));
4707	} else if (addr <=
4708		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4709		u64 bar_base_addr = DRAM_PHYS_BASE +
4710				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4711
4712		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4713		if (hbm_bar_addr != U64_MAX) {
4714			writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4715						(addr - bar_base_addr));
4716
4717			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4718						hbm_bar_addr);
4719		}
4720		if (hbm_bar_addr == U64_MAX)
4721			rc = -EIO;
4722	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4723		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4724	} else {
4725		rc = -EFAULT;
4726	}
4727
4728	return rc;
4729}
4730
4731static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4732{
4733	struct gaudi_device *gaudi = hdev->asic_specific;
4734
4735	if (hdev->hard_reset_pending)
4736		return U64_MAX;
4737
4738	return readq(hdev->pcie_bar[HBM_BAR_ID] +
4739			(addr - gaudi->hbm_bar_cur_addr));
4740}
4741
4742static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4743{
4744	struct gaudi_device *gaudi = hdev->asic_specific;
4745
4746	if (hdev->hard_reset_pending)
4747		return;
4748
4749	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4750			(addr - gaudi->hbm_bar_cur_addr));
4751}
4752
4753void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4754{
4755	/* mask to zero the MMBP and ASID bits */
4756	WREG32_AND(reg, ~0x7FF);
4757	WREG32_OR(reg, asid);
4758}
4759
4760static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4761{
4762	struct gaudi_device *gaudi = hdev->asic_specific;
4763
4764	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4765		return;
4766
4767	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4768		WARN(1, "asid %u is too big\n", asid);
4769		return;
4770	}
4771
4772	mutex_lock(&gaudi->clk_gate_mutex);
4773
4774	hdev->asic_funcs->disable_clock_gating(hdev);
4775
4776	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4777	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4778	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4779	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4780	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4781
4782	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4783	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4784	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4785	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4786	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4787
4788	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4789	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4790	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4791	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4792	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4793
4794	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4795	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4796	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4797	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4798	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4799
4800	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4801	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4802	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4803	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4804	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4805
4806	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4807	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4808	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4809	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4810	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4811
4812	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4813	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4814	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4815	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4816	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4817
4818	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4819	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4820	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4821	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4822	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4823
4824	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4825	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4826	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4827	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4828	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4829	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4830	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4831	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4832
4833	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4834	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4835	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4836	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4837	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4838	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4839	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4840
4841	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4842	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4843	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4844	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4845	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4846	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4847	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4848
4849	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4850	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4851	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4852	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4853	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4854	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4855	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4856
4857	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4858	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4859	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4860	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4861	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4862	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4863	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4864
4865	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4866	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4867	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4868	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4869	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4870	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4871	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4872
4873	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4874	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4875	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4876	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4877	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4878	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4879	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4880
4881	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4882	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4883	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4884	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4885	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4886	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4887	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4888
4889	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4890	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4891	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4892	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4893	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4894	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4895	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4896
4897	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4898	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4899	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4900	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4901	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4902	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4903	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4904	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4905	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4906	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4907
4908	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4909	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4910	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4911	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4912	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4913	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4914	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4915	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4916	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4917	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4918	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4919	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4920
4921	hdev->asic_funcs->set_clock_gating(hdev);
4922
4923	mutex_unlock(&gaudi->clk_gate_mutex);
4924}
4925
4926static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4927		struct hl_cs_job *job)
4928{
4929	struct packet_msg_prot *fence_pkt;
4930	u32 *fence_ptr;
4931	dma_addr_t fence_dma_addr;
4932	struct hl_cb *cb;
4933	u32 tmp, timeout, dma_offset;
4934	int rc;
4935
4936	if (hdev->pldm)
4937		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4938	else
4939		timeout = HL_DEVICE_TIMEOUT_USEC;
4940
4941	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4942		dev_err_ratelimited(hdev->dev,
4943			"Can't send driver job on QMAN0 because the device is not idle\n");
4944		return -EBUSY;
4945	}
4946
4947	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4948							&fence_dma_addr);
4949	if (!fence_ptr) {
4950		dev_err(hdev->dev,
4951			"Failed to allocate fence memory for QMAN0\n");
4952		return -ENOMEM;
4953	}
4954
4955	cb = job->patched_cb;
4956
4957	fence_pkt = cb->kernel_address +
4958			job->job_cb_size - sizeof(struct packet_msg_prot);
4959
4960	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4961	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4962	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4963
4964	fence_pkt->ctl = cpu_to_le32(tmp);
4965	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4966	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4967
4968	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4969
4970	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4971
4972	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4973					job->job_cb_size, cb->bus_address);
4974	if (rc) {
4975		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4976		goto free_fence_ptr;
4977	}
4978
4979	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4980				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4981				timeout, true);
4982
4983	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4984
4985	if (rc == -ETIMEDOUT) {
4986		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4987		goto free_fence_ptr;
4988	}
4989
4990free_fence_ptr:
4991	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4992			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4993
4994	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4995					fence_dma_addr);
4996	return rc;
4997}
4998
4999static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
5000{
5001	if (event_type >= GAUDI_EVENT_SIZE)
5002		goto event_not_supported;
5003
5004	if (!gaudi_irq_map_table[event_type].valid)
5005		goto event_not_supported;
5006
5007	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
5008
5009	return;
5010
5011event_not_supported:
5012	snprintf(desc, size, "N/A");
5013}
5014
5015static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5016							u32 x_y, bool is_write)
5017{
5018	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5019
5020	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5021				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5022
5023	switch (x_y) {
5024	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5025	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5026		dma_id[0] = 0;
5027		dma_id[1] = 2;
5028		break;
5029	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5030	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5031		dma_id[0] = 1;
5032		dma_id[1] = 3;
5033		break;
5034	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5035	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5036		dma_id[0] = 4;
5037		dma_id[1] = 6;
5038		break;
5039	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5040	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5041		dma_id[0] = 5;
5042		dma_id[1] = 7;
5043		break;
5044	default:
5045		goto unknown_initiator;
5046	}
5047
5048	for (i = 0 ; i < 2 ; i++) {
5049		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5050		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5051	}
5052
5053	switch (x_y) {
5054	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5055	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5056		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5057			return "DMA0";
5058		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5059			return "DMA2";
5060		else
5061			return "DMA0 or DMA2";
5062	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5063	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5064		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5065			return "DMA1";
5066		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5067			return "DMA3";
5068		else
5069			return "DMA1 or DMA3";
5070	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5071	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5072		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5073			return "DMA4";
5074		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5075			return "DMA6";
5076		else
5077			return "DMA4 or DMA6";
5078	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5079	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5080		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5081			return "DMA5";
5082		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5083			return "DMA7";
5084		else
5085			return "DMA5 or DMA7";
5086	}
5087
5088unknown_initiator:
5089	return "unknown initiator";
5090}
5091
5092static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5093							bool is_write)
5094{
5095	u32 val, x_y, axi_id;
5096
5097	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5098				RREG32(mmMMU_UP_RAZWI_READ_ID);
5099	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5100			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5101	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5102			RAZWI_INITIATOR_AXI_ID_SHIFT);
5103
5104	switch (x_y) {
5105	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5106		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5107			return "TPC0";
5108		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5109			return "NIC0";
5110		break;
5111	case RAZWI_INITIATOR_ID_X_Y_TPC1:
5112		return "TPC1";
5113	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5114	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5115		return "MME0";
5116	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5117	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5118		return "MME1";
5119	case RAZWI_INITIATOR_ID_X_Y_TPC2:
5120		return "TPC2";
5121	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5122		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5123			return "TPC3";
5124		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5125			return "PCI";
5126		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5127			return "CPU";
5128		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5129			return "PSOC";
5130		break;
5131	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5132	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5133	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5134	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5135	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5136	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5137	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5138	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5139		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5140	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5141		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5142			return "TPC4";
5143		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5144			return "NIC1";
5145		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5146			return "NIC2";
5147		break;
5148	case RAZWI_INITIATOR_ID_X_Y_TPC5:
5149		return "TPC5";
5150	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5151	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5152		return "MME2";
5153	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5154	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5155		return "MME3";
5156	case RAZWI_INITIATOR_ID_X_Y_TPC6:
5157		return "TPC6";
5158	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5159		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5160			return "TPC7";
5161		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5162			return "NIC4";
5163		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5164			return "NIC5";
5165		break;
5166	default:
5167		break;
5168	}
5169
5170	dev_err(hdev->dev,
5171		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5172		val,
5173		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5174		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5175		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5176			RAZWI_INITIATOR_AXI_ID_MASK);
5177
5178	return "unknown initiator";
5179}
5180
5181static void gaudi_print_razwi_info(struct hl_device *hdev)
5182{
5183	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5184		dev_err_ratelimited(hdev->dev,
5185			"RAZWI event caused by illegal write of %s\n",
5186			gaudi_get_razwi_initiator_name(hdev, true));
5187		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5188	}
5189
5190	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5191		dev_err_ratelimited(hdev->dev,
5192			"RAZWI event caused by illegal read of %s\n",
5193			gaudi_get_razwi_initiator_name(hdev, false));
5194		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5195	}
5196}
5197
5198static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5199{
5200	struct gaudi_device *gaudi = hdev->asic_specific;
5201	u64 addr;
5202	u32 val;
5203
5204	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5205		return;
5206
5207	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5208	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5209		addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5210		addr <<= 32;
5211		addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5212
5213		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5214					addr);
5215
5216		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5217	}
5218
5219	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5220	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5221		addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5222		addr <<= 32;
5223		addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5224
5225		dev_err_ratelimited(hdev->dev,
5226				"MMU access error on va 0x%llx\n", addr);
5227
5228		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5229	}
5230}
5231
5232/*
5233 *  +-------------------+------------------------------------------------------+
5234 *  | Configuration Reg |                     Description                      |
5235 *  |      Address      |                                                      |
5236 *  +-------------------+------------------------------------------------------+
5237 *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
5238 *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
5239 *  |                   |0xF34 memory wrappers 63:32                           |
5240 *  |                   |0xF38 memory wrappers 95:64                           |
5241 *  |                   |0xF3C memory wrappers 127:96                          |
5242 *  +-------------------+------------------------------------------------------+
5243 *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
5244 *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
5245 *  |                   |0xF44 memory wrappers 63:32                           |
5246 *  |                   |0xF48 memory wrappers 95:64                           |
5247 *  |                   |0xF4C memory wrappers 127:96                          |
5248 *  +-------------------+------------------------------------------------------+
5249 */
5250static int gaudi_extract_ecc_info(struct hl_device *hdev,
5251		struct ecc_info_extract_params *params, u64 *ecc_address,
5252		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5253{
5254	struct gaudi_device *gaudi = hdev->asic_specific;
5255	u32 i, num_mem_regs, reg, err_bit;
5256	u64 err_addr, err_word = 0;
5257	int rc = 0;
5258
5259	num_mem_regs = params->num_memories / 32 +
5260			((params->num_memories % 32) ? 1 : 0);
5261
5262	if (params->block_address >= CFG_BASE)
5263		params->block_address -= CFG_BASE;
5264
5265	if (params->derr)
5266		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5267	else
5268		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5269
5270	if (params->disable_clock_gating) {
5271		mutex_lock(&gaudi->clk_gate_mutex);
5272		hdev->asic_funcs->disable_clock_gating(hdev);
5273	}
5274
5275	/* Set invalid wrapper index */
5276	*memory_wrapper_idx = 0xFF;
5277
5278	/* Iterate through memory wrappers, a single bit must be set */
5279	for (i = 0 ; i < num_mem_regs ; i++) {
5280		err_addr += i * 4;
5281		err_word = RREG32(err_addr);
5282		if (err_word) {
5283			err_bit = __ffs(err_word);
5284			*memory_wrapper_idx = err_bit + (32 * i);
5285			break;
5286		}
5287	}
5288
5289	if (*memory_wrapper_idx == 0xFF) {
5290		dev_err(hdev->dev, "ECC error information cannot be found\n");
5291		rc = -EINVAL;
5292		goto enable_clk_gate;
5293	}
5294
5295	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5296			*memory_wrapper_idx);
5297
5298	*ecc_address =
5299		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5300	*ecc_syndrom =
5301		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5302
5303	/* Clear error indication */
5304	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5305	if (params->derr)
5306		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5307	else
5308		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5309
5310	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5311
5312enable_clk_gate:
5313	if (params->disable_clock_gating) {
5314		hdev->asic_funcs->set_clock_gating(hdev);
5315
5316		mutex_unlock(&gaudi->clk_gate_mutex);
5317	}
5318
5319	return rc;
5320}
5321
5322static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5323					  const char *qm_name,
5324					  u64 glbl_sts_addr,
5325					  u64 arb_err_addr)
5326{
5327	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5328	char reg_desc[32];
5329
5330	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
5331	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5332		glbl_sts_clr_val = 0;
5333		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5334
5335		if (!glbl_sts_val)
5336			continue;
5337
5338		if (i == QMAN_STREAMS)
5339			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5340		else
5341			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5342
5343		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5344			if (glbl_sts_val & BIT(j)) {
5345				dev_err_ratelimited(hdev->dev,
5346						"%s %s. err cause: %s\n",
5347						qm_name, reg_desc,
5348						gaudi_qman_error_cause[j]);
5349				glbl_sts_clr_val |= BIT(j);
5350			}
5351		}
5352
5353		/* Write 1 clear errors */
5354		WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5355	}
5356
5357	arb_err_val = RREG32(arb_err_addr);
5358
5359	if (!arb_err_val)
5360		return;
5361
5362	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5363		if (arb_err_val & BIT(j)) {
5364			dev_err_ratelimited(hdev->dev,
5365					"%s ARB_ERR. err cause: %s\n",
5366					qm_name,
5367					gaudi_qman_arb_error_cause[j]);
5368		}
5369	}
5370}
5371
5372static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5373		struct hl_eq_ecc_data *ecc_data)
5374{
5375	struct ecc_info_extract_params params;
5376	u64 ecc_address = 0, ecc_syndrom = 0;
5377	u8 index, memory_wrapper_idx = 0;
5378	bool extract_info_from_fw;
5379	int rc;
5380
5381	switch (event_type) {
5382	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5383	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5384		extract_info_from_fw = true;
5385		break;
5386	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5387		index = event_type - GAUDI_EVENT_TPC0_SERR;
5388		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5389		params.num_memories = 90;
5390		params.derr = false;
5391		params.disable_clock_gating = true;
5392		extract_info_from_fw = false;
5393		break;
5394	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5395		index = event_type - GAUDI_EVENT_TPC0_DERR;
5396		params.block_address =
5397			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5398		params.num_memories = 90;
5399		params.derr = true;
5400		params.disable_clock_gating = true;
5401		extract_info_from_fw = false;
5402		break;
5403	case GAUDI_EVENT_MME0_ACC_SERR:
5404	case GAUDI_EVENT_MME1_ACC_SERR:
5405	case GAUDI_EVENT_MME2_ACC_SERR:
5406	case GAUDI_EVENT_MME3_ACC_SERR:
5407		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5408		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5409		params.num_memories = 128;
5410		params.derr = false;
5411		params.disable_clock_gating = true;
5412		extract_info_from_fw = false;
5413		break;
5414	case GAUDI_EVENT_MME0_ACC_DERR:
5415	case GAUDI_EVENT_MME1_ACC_DERR:
5416	case GAUDI_EVENT_MME2_ACC_DERR:
5417	case GAUDI_EVENT_MME3_ACC_DERR:
5418		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5419		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5420		params.num_memories = 128;
5421		params.derr = true;
5422		params.disable_clock_gating = true;
5423		extract_info_from_fw = false;
5424		break;
5425	case GAUDI_EVENT_MME0_SBAB_SERR:
5426	case GAUDI_EVENT_MME1_SBAB_SERR:
5427	case GAUDI_EVENT_MME2_SBAB_SERR:
5428	case GAUDI_EVENT_MME3_SBAB_SERR:
5429		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5430		params.block_address =
5431			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5432		params.num_memories = 33;
5433		params.derr = false;
5434		params.disable_clock_gating = true;
5435		extract_info_from_fw = false;
5436		break;
5437	case GAUDI_EVENT_MME0_SBAB_DERR:
5438	case GAUDI_EVENT_MME1_SBAB_DERR:
5439	case GAUDI_EVENT_MME2_SBAB_DERR:
5440	case GAUDI_EVENT_MME3_SBAB_DERR:
5441		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5442		params.block_address =
5443			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5444		params.num_memories = 33;
5445		params.derr = true;
5446		params.disable_clock_gating = true;
5447		extract_info_from_fw = false;
5448		break;
5449	default:
5450		return;
5451	}
5452
5453	if (extract_info_from_fw) {
5454		ecc_address = le64_to_cpu(ecc_data->ecc_address);
5455		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5456		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5457	} else {
5458		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5459				&ecc_syndrom, &memory_wrapper_idx);
5460		if (rc)
5461			return;
5462	}
5463
5464	dev_err(hdev->dev,
5465		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5466		ecc_address, ecc_syndrom, memory_wrapper_idx);
5467}
5468
5469static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5470{
5471	u64 glbl_sts_addr, arb_err_addr;
5472	u8 index;
5473	char desc[32];
5474
5475	switch (event_type) {
5476	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5477		index = event_type - GAUDI_EVENT_TPC0_QM;
5478		glbl_sts_addr =
5479			mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5480		arb_err_addr =
5481			mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5482		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5483		break;
5484	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5485		index = event_type - GAUDI_EVENT_MME0_QM;
5486		glbl_sts_addr =
5487			mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5488		arb_err_addr =
5489			mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5490		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5491		break;
5492	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5493		index = event_type - GAUDI_EVENT_DMA0_QM;
5494		glbl_sts_addr =
5495			mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5496		arb_err_addr =
5497			mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5498		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5499		break;
5500	default:
5501		return;
5502	}
5503
5504	gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5505}
5506
5507static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5508					bool razwi)
5509{
5510	char desc[64] = "";
5511
5512	gaudi_get_event_desc(event_type, desc, sizeof(desc));
5513	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5514		event_type, desc);
5515
5516	if (razwi) {
5517		gaudi_print_razwi_info(hdev);
5518		gaudi_print_mmu_error_info(hdev);
5519	}
5520}
5521
5522static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5523{
5524	struct gaudi_device *gaudi = hdev->asic_specific;
5525
5526	/* Unmask all IRQs since some could have been received
5527	 * during the soft reset
5528	 */
5529	return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5530}
5531
5532static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5533{
5534	int ch, err = 0;
5535	u32 base, val, val2;
5536
5537	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5538	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5539		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5540		val = (val & 0xFF) | ((val >> 8) & 0xFF);
5541		if (val) {
5542			err = 1;
5543			dev_err(hdev->dev,
5544				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5545				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5546				(val >> 2) & 0x1, (val >> 3) & 0x1,
5547				(val >> 4) & 0x1);
5548
5549			val2 = RREG32(base + ch * 0x1000 + 0x060);
5550			dev_err(hdev->dev,
5551				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5552				device, ch * 2,
5553				RREG32(base + ch * 0x1000 + 0x064),
5554				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5555				(val2 & 0xFF0000) >> 16,
5556				(val2 & 0xFF000000) >> 24);
5557		}
5558
5559		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5560		val = (val & 0xFF) | ((val >> 8) & 0xFF);
5561		if (val) {
5562			err = 1;
5563			dev_err(hdev->dev,
5564				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5565				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5566				(val >> 2) & 0x1, (val >> 3) & 0x1,
5567				(val >> 4) & 0x1);
5568
5569			val2 = RREG32(base + ch * 0x1000 + 0x070);
5570			dev_err(hdev->dev,
5571				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5572				device, ch * 2 + 1,
5573				RREG32(base + ch * 0x1000 + 0x074),
5574				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5575				(val2 & 0xFF0000) >> 16,
5576				(val2 & 0xFF000000) >> 24);
5577		}
5578
5579		/* Clear interrupts */
5580		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5581		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5582		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5583		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5584		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5585		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5586	}
5587
5588	val  = RREG32(base + 0x8F30);
5589	val2 = RREG32(base + 0x8F34);
5590	if (val | val2) {
5591		err = 1;
5592		dev_err(hdev->dev,
5593			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5594			device, val, val2);
5595	}
5596	val  = RREG32(base + 0x8F40);
5597	val2 = RREG32(base + 0x8F44);
5598	if (val | val2) {
5599		err = 1;
5600		dev_err(hdev->dev,
5601			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5602			device, val, val2);
5603	}
5604
5605	return err;
5606}
5607
5608static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5609{
5610	switch (hbm_event_type) {
5611	case GAUDI_EVENT_HBM0_SPI_0:
5612	case GAUDI_EVENT_HBM0_SPI_1:
5613		return 0;
5614	case GAUDI_EVENT_HBM1_SPI_0:
5615	case GAUDI_EVENT_HBM1_SPI_1:
5616		return 1;
5617	case GAUDI_EVENT_HBM2_SPI_0:
5618	case GAUDI_EVENT_HBM2_SPI_1:
5619		return 2;
5620	case GAUDI_EVENT_HBM3_SPI_0:
5621	case GAUDI_EVENT_HBM3_SPI_1:
5622		return 3;
5623	default:
5624		break;
5625	}
5626
5627	/* Should never happen */
5628	return 0;
5629}
5630
5631static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5632					char *interrupt_name)
5633{
5634	struct gaudi_device *gaudi = hdev->asic_specific;
5635	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5636	bool soft_reset_required = false;
5637
5638	/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5639	 * gating, and thus cannot be done in CPU-CP and should be done instead
5640	 * by the driver.
5641	 */
5642
5643	mutex_lock(&gaudi->clk_gate_mutex);
5644
5645	hdev->asic_funcs->disable_clock_gating(hdev);
5646
5647	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5648				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5649
5650	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5651		if (tpc_interrupts_cause & BIT(i)) {
5652			dev_err_ratelimited(hdev->dev,
5653					"TPC%d_%s interrupt cause: %s\n",
5654					tpc_id, interrupt_name,
5655					gaudi_tpc_interrupts_cause[i]);
5656			/* If this is QM error, we need to soft-reset */
5657			if (i == 15)
5658				soft_reset_required = true;
5659		}
5660
5661	/* Clear interrupts */
5662	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5663
5664	hdev->asic_funcs->set_clock_gating(hdev);
5665
5666	mutex_unlock(&gaudi->clk_gate_mutex);
5667
5668	return soft_reset_required;
5669}
5670
5671static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5672{
5673	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5674}
5675
5676static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5677{
5678	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5679}
5680
5681static void gaudi_print_clk_change_info(struct hl_device *hdev,
5682					u16 event_type)
5683{
5684	switch (event_type) {
5685	case GAUDI_EVENT_FIX_POWER_ENV_S:
5686		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5687		dev_info_ratelimited(hdev->dev,
5688			"Clock throttling due to power consumption\n");
5689		break;
5690
5691	case GAUDI_EVENT_FIX_POWER_ENV_E:
5692		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5693		dev_info_ratelimited(hdev->dev,
5694			"Power envelop is safe, back to optimal clock\n");
5695		break;
5696
5697	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5698		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5699		dev_info_ratelimited(hdev->dev,
5700			"Clock throttling due to overheating\n");
5701		break;
5702
5703	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5704		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5705		dev_info_ratelimited(hdev->dev,
5706			"Thermal envelop is safe, back to optimal clock\n");
5707		break;
5708
5709	default:
5710		dev_err(hdev->dev, "Received invalid clock change event %d\n",
5711			event_type);
5712		break;
5713	}
5714}
5715
5716static void gaudi_handle_eqe(struct hl_device *hdev,
5717				struct hl_eq_entry *eq_entry)
5718{
5719	struct gaudi_device *gaudi = hdev->asic_specific;
5720	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5721	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5722			>> EQ_CTL_EVENT_TYPE_SHIFT);
5723	u8 cause;
5724	bool reset_required;
5725
5726	if (event_type >= GAUDI_EVENT_SIZE) {
5727		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
5728				event_type, GAUDI_EVENT_SIZE - 1);
5729		return;
5730	}
5731
5732	gaudi->events_stat[event_type]++;
5733	gaudi->events_stat_aggregate[event_type]++;
5734
5735	switch (event_type) {
5736	case GAUDI_EVENT_PCIE_CORE_DERR:
5737	case GAUDI_EVENT_PCIE_IF_DERR:
5738	case GAUDI_EVENT_PCIE_PHY_DERR:
5739	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5740	case GAUDI_EVENT_MME0_ACC_DERR:
5741	case GAUDI_EVENT_MME0_SBAB_DERR:
5742	case GAUDI_EVENT_MME1_ACC_DERR:
5743	case GAUDI_EVENT_MME1_SBAB_DERR:
5744	case GAUDI_EVENT_MME2_ACC_DERR:
5745	case GAUDI_EVENT_MME2_SBAB_DERR:
5746	case GAUDI_EVENT_MME3_ACC_DERR:
5747	case GAUDI_EVENT_MME3_SBAB_DERR:
5748	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5749		fallthrough;
5750	case GAUDI_EVENT_CPU_IF_ECC_DERR:
5751	case GAUDI_EVENT_PSOC_MEM_DERR:
5752	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5753	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5754	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5755	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5756	case GAUDI_EVENT_MMU_DERR:
5757		gaudi_print_irq_info(hdev, event_type, true);
5758		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5759		if (hdev->hard_reset_on_fw_events)
5760			hl_device_reset(hdev, true, false);
5761		break;
5762
5763	case GAUDI_EVENT_GIC500:
5764	case GAUDI_EVENT_AXI_ECC:
5765	case GAUDI_EVENT_L2_RAM_ECC:
5766	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5767		gaudi_print_irq_info(hdev, event_type, false);
5768		if (hdev->hard_reset_on_fw_events)
5769			hl_device_reset(hdev, true, false);
5770		break;
5771
5772	case GAUDI_EVENT_HBM0_SPI_0:
5773	case GAUDI_EVENT_HBM1_SPI_0:
5774	case GAUDI_EVENT_HBM2_SPI_0:
5775	case GAUDI_EVENT_HBM3_SPI_0:
5776		gaudi_print_irq_info(hdev, event_type, false);
5777		gaudi_hbm_read_interrupts(hdev,
5778					  gaudi_hbm_event_to_dev(event_type));
5779		if (hdev->hard_reset_on_fw_events)
5780			hl_device_reset(hdev, true, false);
5781		break;
5782
5783	case GAUDI_EVENT_HBM0_SPI_1:
5784	case GAUDI_EVENT_HBM1_SPI_1:
5785	case GAUDI_EVENT_HBM2_SPI_1:
5786	case GAUDI_EVENT_HBM3_SPI_1:
5787		gaudi_print_irq_info(hdev, event_type, false);
5788		gaudi_hbm_read_interrupts(hdev,
5789					  gaudi_hbm_event_to_dev(event_type));
5790		break;
5791
5792	case GAUDI_EVENT_TPC0_DEC:
5793	case GAUDI_EVENT_TPC1_DEC:
5794	case GAUDI_EVENT_TPC2_DEC:
5795	case GAUDI_EVENT_TPC3_DEC:
5796	case GAUDI_EVENT_TPC4_DEC:
5797	case GAUDI_EVENT_TPC5_DEC:
5798	case GAUDI_EVENT_TPC6_DEC:
5799	case GAUDI_EVENT_TPC7_DEC:
5800		gaudi_print_irq_info(hdev, event_type, true);
5801		reset_required = gaudi_tpc_read_interrupts(hdev,
5802					tpc_dec_event_to_tpc_id(event_type),
5803					"AXI_SLV_DEC_Error");
5804		if (reset_required) {
5805			dev_err(hdev->dev, "hard reset required due to %s\n",
5806				gaudi_irq_map_table[event_type].name);
5807
5808			if (hdev->hard_reset_on_fw_events)
5809				hl_device_reset(hdev, true, false);
5810		} else {
5811			hl_fw_unmask_irq(hdev, event_type);
5812		}
5813		break;
5814
5815	case GAUDI_EVENT_TPC0_KRN_ERR:
5816	case GAUDI_EVENT_TPC1_KRN_ERR:
5817	case GAUDI_EVENT_TPC2_KRN_ERR:
5818	case GAUDI_EVENT_TPC3_KRN_ERR:
5819	case GAUDI_EVENT_TPC4_KRN_ERR:
5820	case GAUDI_EVENT_TPC5_KRN_ERR:
5821	case GAUDI_EVENT_TPC6_KRN_ERR:
5822	case GAUDI_EVENT_TPC7_KRN_ERR:
5823		gaudi_print_irq_info(hdev, event_type, true);
5824		reset_required = gaudi_tpc_read_interrupts(hdev,
5825					tpc_krn_event_to_tpc_id(event_type),
5826					"KRN_ERR");
5827		if (reset_required) {
5828			dev_err(hdev->dev, "hard reset required due to %s\n",
5829				gaudi_irq_map_table[event_type].name);
5830
5831			if (hdev->hard_reset_on_fw_events)
5832				hl_device_reset(hdev, true, false);
5833		} else {
5834			hl_fw_unmask_irq(hdev, event_type);
5835		}
5836		break;
5837
5838	case GAUDI_EVENT_PCIE_CORE_SERR:
5839	case GAUDI_EVENT_PCIE_IF_SERR:
5840	case GAUDI_EVENT_PCIE_PHY_SERR:
5841	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5842	case GAUDI_EVENT_MME0_ACC_SERR:
5843	case GAUDI_EVENT_MME0_SBAB_SERR:
5844	case GAUDI_EVENT_MME1_ACC_SERR:
5845	case GAUDI_EVENT_MME1_SBAB_SERR:
5846	case GAUDI_EVENT_MME2_ACC_SERR:
5847	case GAUDI_EVENT_MME2_SBAB_SERR:
5848	case GAUDI_EVENT_MME3_ACC_SERR:
5849	case GAUDI_EVENT_MME3_SBAB_SERR:
5850	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5851	case GAUDI_EVENT_CPU_IF_ECC_SERR:
5852	case GAUDI_EVENT_PSOC_MEM_SERR:
5853	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5854	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5855	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5856	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5857		fallthrough;
5858	case GAUDI_EVENT_MMU_SERR:
5859		gaudi_print_irq_info(hdev, event_type, true);
5860		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5861		hl_fw_unmask_irq(hdev, event_type);
5862		break;
5863
5864	case GAUDI_EVENT_PCIE_DEC:
5865	case GAUDI_EVENT_MME0_WBC_RSP:
5866	case GAUDI_EVENT_MME0_SBAB0_RSP:
5867	case GAUDI_EVENT_MME1_WBC_RSP:
5868	case GAUDI_EVENT_MME1_SBAB0_RSP:
5869	case GAUDI_EVENT_MME2_WBC_RSP:
5870	case GAUDI_EVENT_MME2_SBAB0_RSP:
5871	case GAUDI_EVENT_MME3_WBC_RSP:
5872	case GAUDI_EVENT_MME3_SBAB0_RSP:
5873	case GAUDI_EVENT_CPU_AXI_SPLITTER:
5874	case GAUDI_EVENT_PSOC_AXI_DEC:
5875	case GAUDI_EVENT_PSOC_PRSTN_FALL:
5876	case GAUDI_EVENT_MMU_PAGE_FAULT:
5877	case GAUDI_EVENT_MMU_WR_PERM:
5878	case GAUDI_EVENT_RAZWI_OR_ADC:
5879	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5880	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5881	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5882		fallthrough;
5883	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5884		gaudi_print_irq_info(hdev, event_type, true);
5885		gaudi_handle_qman_err(hdev, event_type);
5886		hl_fw_unmask_irq(hdev, event_type);
5887		break;
5888
5889	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5890		gaudi_print_irq_info(hdev, event_type, true);
5891		if (hdev->hard_reset_on_fw_events)
5892			hl_device_reset(hdev, true, false);
5893		break;
5894
5895	case GAUDI_EVENT_TPC0_BMON_SPMU:
5896	case GAUDI_EVENT_TPC1_BMON_SPMU:
5897	case GAUDI_EVENT_TPC2_BMON_SPMU:
5898	case GAUDI_EVENT_TPC3_BMON_SPMU:
5899	case GAUDI_EVENT_TPC4_BMON_SPMU:
5900	case GAUDI_EVENT_TPC5_BMON_SPMU:
5901	case GAUDI_EVENT_TPC6_BMON_SPMU:
5902	case GAUDI_EVENT_TPC7_BMON_SPMU:
5903	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5904		gaudi_print_irq_info(hdev, event_type, false);
5905		hl_fw_unmask_irq(hdev, event_type);
5906		break;
5907
5908	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5909		gaudi_print_clk_change_info(hdev, event_type);
5910		hl_fw_unmask_irq(hdev, event_type);
5911		break;
5912
5913	case GAUDI_EVENT_PSOC_GPIO_U16_0:
5914		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5915		dev_err(hdev->dev,
5916			"Received high temp H/W interrupt %d (cause %d)\n",
5917			event_type, cause);
5918		break;
5919
5920	default:
5921		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5922				event_type);
5923		break;
5924	}
5925}
5926
5927static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5928					u32 *size)
5929{
5930	struct gaudi_device *gaudi = hdev->asic_specific;
5931
5932	if (aggregate) {
5933		*size = (u32) sizeof(gaudi->events_stat_aggregate);
5934		return gaudi->events_stat_aggregate;
5935	}
5936
5937	*size = (u32) sizeof(gaudi->events_stat);
5938	return gaudi->events_stat;
5939}
5940
5941static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5942					u32 flags)
5943{
5944	struct gaudi_device *gaudi = hdev->asic_specific;
5945	u32 status, timeout_usec;
5946	int rc;
5947
5948	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5949		hdev->hard_reset_pending)
5950		return 0;
5951
5952	if (hdev->pldm)
5953		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5954	else
5955		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5956
5957	mutex_lock(&hdev->mmu_cache_lock);
5958
5959	/* L0 & L1 invalidation */
5960	WREG32(mmSTLB_INV_PS, 3);
5961	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5962	WREG32(mmSTLB_INV_PS, 2);
5963
5964	rc = hl_poll_timeout(
5965		hdev,
5966		mmSTLB_INV_PS,
5967		status,
5968		!status,
5969		1000,
5970		timeout_usec);
5971
5972	WREG32(mmSTLB_INV_SET, 0);
5973
5974	mutex_unlock(&hdev->mmu_cache_lock);
5975
5976	if (rc) {
5977		dev_err_ratelimited(hdev->dev,
5978					"MMU cache invalidation timeout\n");
5979		hl_device_reset(hdev, true, false);
5980	}
5981
5982	return rc;
5983}
5984
5985static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5986				bool is_hard, u32 asid, u64 va, u64 size)
5987{
5988	struct gaudi_device *gaudi = hdev->asic_specific;
5989	u32 status, timeout_usec;
5990	u32 inv_data;
5991	u32 pi;
5992	int rc;
5993
5994	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5995		hdev->hard_reset_pending)
5996		return 0;
5997
5998	mutex_lock(&hdev->mmu_cache_lock);
5999
6000	if (hdev->pldm)
6001		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6002	else
6003		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6004
6005	/*
6006	 * TODO: currently invalidate entire L0 & L1 as in regular hard
6007	 * invalidation. Need to apply invalidation of specific cache
6008	 * lines with mask of ASID & VA & size.
6009	 * Note that L1 with be flushed entirely in any case.
6010	 */
6011
6012	/* L0 & L1 invalidation */
6013	inv_data = RREG32(mmSTLB_CACHE_INV);
6014	/* PI is 8 bit */
6015	pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6016	WREG32(mmSTLB_CACHE_INV,
6017		(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6018
6019	rc = hl_poll_timeout(
6020		hdev,
6021		mmSTLB_INV_CONSUMER_INDEX,
6022		status,
6023		status == pi,
6024		1000,
6025		timeout_usec);
6026
6027	mutex_unlock(&hdev->mmu_cache_lock);
6028
6029	if (rc) {
6030		dev_err_ratelimited(hdev->dev,
6031					"MMU cache invalidation timeout\n");
6032		hl_device_reset(hdev, true, false);
6033	}
6034
6035	return rc;
6036}
6037
6038static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6039					u32 asid, u64 phys_addr)
6040{
6041	u32 status, timeout_usec;
6042	int rc;
6043
6044	if (hdev->pldm)
6045		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6046	else
6047		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6048
6049	WREG32(MMU_ASID, asid);
6050	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6051	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6052	WREG32(MMU_BUSY, 0x80000000);
6053
6054	rc = hl_poll_timeout(
6055		hdev,
6056		MMU_BUSY,
6057		status,
6058		!(status & 0x80000000),
6059		1000,
6060		timeout_usec);
6061
6062	if (rc) {
6063		dev_err(hdev->dev,
6064			"Timeout during MMU hop0 config of asid %d\n", asid);
6065		return rc;
6066	}
6067
6068	return 0;
6069}
6070
6071static int gaudi_send_heartbeat(struct hl_device *hdev)
6072{
6073	struct gaudi_device *gaudi = hdev->asic_specific;
6074
6075	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6076		return 0;
6077
6078	return hl_fw_send_heartbeat(hdev);
6079}
6080
6081static int gaudi_cpucp_info_get(struct hl_device *hdev)
6082{
6083	struct gaudi_device *gaudi = hdev->asic_specific;
6084	struct asic_fixed_properties *prop = &hdev->asic_prop;
6085	int rc;
6086
6087	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6088		return 0;
6089
6090	rc = hl_fw_cpucp_info_get(hdev);
6091	if (rc)
6092		return rc;
6093
6094	if (!strlen(prop->cpucp_info.card_name))
6095		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6096				CARD_NAME_MAX_LEN);
6097
6098	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6099
6100	if (hdev->card_type == cpucp_card_type_pci)
6101		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6102	else if (hdev->card_type == cpucp_card_type_pmc)
6103		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6104
6105	hdev->max_power = prop->max_power_default;
6106
6107	return 0;
6108}
6109
6110static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6111					struct seq_file *s)
6112{
6113	struct gaudi_device *gaudi = hdev->asic_specific;
6114	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6115	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6116	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6117	bool is_idle = true, is_eng_idle, is_slave;
6118	u64 offset;
6119	int i, dma_id;
6120
6121	mutex_lock(&gaudi->clk_gate_mutex);
6122
6123	hdev->asic_funcs->disable_clock_gating(hdev);
6124
6125	if (s)
6126		seq_puts(s,
6127			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6128			"---  -------  ------------  ----------  -------------\n");
6129
6130	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6131		dma_id = gaudi_dma_assignment[i];
6132		offset = dma_id * DMA_QMAN_OFFSET;
6133
6134		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6135		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6136		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6137		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6138				IS_DMA_IDLE(dma_core_sts0);
6139		is_idle &= is_eng_idle;
6140
6141		if (mask)
6142			*mask |= ((u64) !is_eng_idle) <<
6143					(GAUDI_ENGINE_ID_DMA_0 + dma_id);
6144		if (s)
6145			seq_printf(s, fmt, dma_id,
6146				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6147				qm_cgm_sts, dma_core_sts0);
6148	}
6149
6150	if (s)
6151		seq_puts(s,
6152			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
6153			"---  -------  ------------  ----------  ----------\n");
6154
6155	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6156		offset = i * TPC_QMAN_OFFSET;
6157		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6158		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6159		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6160		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6161				IS_TPC_IDLE(tpc_cfg_sts);
6162		is_idle &= is_eng_idle;
6163
6164		if (mask)
6165			*mask |= ((u64) !is_eng_idle) <<
6166						(GAUDI_ENGINE_ID_TPC_0 + i);
6167		if (s)
6168			seq_printf(s, fmt, i,
6169				is_eng_idle ? "Y" : "N",
6170				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6171	}
6172
6173	if (s)
6174		seq_puts(s,
6175			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
6176			"---  -------  ------------  ----------  -----------\n");
6177
6178	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6179		offset = i * MME_QMAN_OFFSET;
6180		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6181		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6182
6183		/* MME 1 & 3 are slaves, no need to check their QMANs */
6184		is_slave = i % 2;
6185		if (!is_slave) {
6186			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6187			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6188			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6189		}
6190
6191		is_idle &= is_eng_idle;
6192
6193		if (mask)
6194			*mask |= ((u64) !is_eng_idle) <<
6195						(GAUDI_ENGINE_ID_MME_0 + i);
6196		if (s) {
6197			if (!is_slave)
6198				seq_printf(s, fmt, i,
6199					is_eng_idle ? "Y" : "N",
6200					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6201			else
6202				seq_printf(s, mme_slave_fmt, i,
6203					is_eng_idle ? "Y" : "N", "-",
6204					"-", mme_arch_sts);
6205		}
6206	}
6207
6208	if (s)
6209		seq_puts(s, "\n");
6210
6211	hdev->asic_funcs->set_clock_gating(hdev);
6212
6213	mutex_unlock(&gaudi->clk_gate_mutex);
6214
6215	return is_idle;
6216}
6217
6218static void gaudi_hw_queues_lock(struct hl_device *hdev)
6219	__acquires(&gaudi->hw_queues_lock)
6220{
6221	struct gaudi_device *gaudi = hdev->asic_specific;
6222
6223	spin_lock(&gaudi->hw_queues_lock);
6224}
6225
6226static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6227	__releases(&gaudi->hw_queues_lock)
6228{
6229	struct gaudi_device *gaudi = hdev->asic_specific;
6230
6231	spin_unlock(&gaudi->hw_queues_lock);
6232}
6233
6234static u32 gaudi_get_pci_id(struct hl_device *hdev)
6235{
6236	return hdev->pdev->device;
6237}
6238
6239static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6240				size_t max_size)
6241{
6242	struct gaudi_device *gaudi = hdev->asic_specific;
6243
6244	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6245		return 0;
6246
6247	return hl_fw_get_eeprom_data(hdev, data, max_size);
6248}
6249
6250/*
6251 * this function should be used only during initialization and/or after reset,
6252 * when there are no active users.
6253 */
6254static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6255				u32 tpc_id)
6256{
6257	struct gaudi_device *gaudi = hdev->asic_specific;
6258	u64 kernel_timeout;
6259	u32 status, offset;
6260	int rc;
6261
6262	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6263
6264	if (hdev->pldm)
6265		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6266	else
6267		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6268
6269	mutex_lock(&gaudi->clk_gate_mutex);
6270
6271	hdev->asic_funcs->disable_clock_gating(hdev);
6272
6273	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6274			lower_32_bits(tpc_kernel));
6275	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6276			upper_32_bits(tpc_kernel));
6277
6278	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6279			lower_32_bits(tpc_kernel));
6280	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6281			upper_32_bits(tpc_kernel));
6282	/* set a valid LUT pointer, content is of no significance */
6283	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6284			lower_32_bits(tpc_kernel));
6285	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6286			upper_32_bits(tpc_kernel));
6287
6288	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6289			lower_32_bits(CFG_BASE +
6290				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6291
6292	WREG32(mmTPC0_CFG_TPC_CMD + offset,
6293			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6294			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6295	/* wait a bit for the engine to start executing */
6296	usleep_range(1000, 1500);
6297
6298	/* wait until engine has finished executing */
6299	rc = hl_poll_timeout(
6300		hdev,
6301		mmTPC0_CFG_STATUS + offset,
6302		status,
6303		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6304				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6305		1000,
6306		kernel_timeout);
6307
6308	if (rc) {
6309		dev_err(hdev->dev,
6310			"Timeout while waiting for TPC%d icache prefetch\n",
6311			tpc_id);
6312		hdev->asic_funcs->set_clock_gating(hdev);
6313		mutex_unlock(&gaudi->clk_gate_mutex);
6314		return -EIO;
6315	}
6316
6317	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6318			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6319
6320	/* wait a bit for the engine to start executing */
6321	usleep_range(1000, 1500);
6322
6323	/* wait until engine has finished executing */
6324	rc = hl_poll_timeout(
6325		hdev,
6326		mmTPC0_CFG_STATUS + offset,
6327		status,
6328		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6329				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6330		1000,
6331		kernel_timeout);
6332
6333	if (rc) {
6334		dev_err(hdev->dev,
6335			"Timeout while waiting for TPC%d vector pipe\n",
6336			tpc_id);
6337		hdev->asic_funcs->set_clock_gating(hdev);
6338		mutex_unlock(&gaudi->clk_gate_mutex);
6339		return -EIO;
6340	}
6341
6342	rc = hl_poll_timeout(
6343		hdev,
6344		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6345		status,
6346		(status == 0),
6347		1000,
6348		kernel_timeout);
6349
6350	hdev->asic_funcs->set_clock_gating(hdev);
6351	mutex_unlock(&gaudi->clk_gate_mutex);
6352
6353	if (rc) {
6354		dev_err(hdev->dev,
6355			"Timeout while waiting for TPC%d kernel to execute\n",
6356			tpc_id);
6357		return -EIO;
6358	}
6359
6360	return 0;
6361}
6362
6363static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6364{
6365	return RREG32(mmHW_STATE);
6366}
6367
6368static int gaudi_ctx_init(struct hl_ctx *ctx)
6369{
6370	return 0;
6371}
6372
6373static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6374{
6375	return gaudi_cq_assignment[cq_idx];
6376}
6377
6378static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6379{
6380	return sizeof(struct packet_msg_short) +
6381			sizeof(struct packet_msg_prot) * 2;
6382}
6383
6384static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6385{
6386	return sizeof(struct packet_msg_short) * 4 +
6387			sizeof(struct packet_fence) +
6388			sizeof(struct packet_msg_prot) * 2;
6389}
6390
6391static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6392{
6393	struct hl_cb *cb = (struct hl_cb *) data;
6394	struct packet_msg_short *pkt;
6395	u32 value, ctl;
6396
6397	pkt = cb->kernel_address;
6398	memset(pkt, 0, sizeof(*pkt));
6399
6400	/* Inc by 1, Mode ADD */
6401	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6402	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6403
6404	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6405	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6406	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6407	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6408	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6409	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6410	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6411
6412	pkt->value = cpu_to_le32(value);
6413	pkt->ctl = cpu_to_le32(ctl);
6414}
6415
6416static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6417					u16 addr)
6418{
6419	u32 ctl, pkt_size = sizeof(*pkt);
6420
6421	memset(pkt, 0, pkt_size);
6422
6423	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6424	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
6425	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6426	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6427	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6428	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6429
6430	pkt->value = cpu_to_le32(value);
6431	pkt->ctl = cpu_to_le32(ctl);
6432
6433	return pkt_size;
6434}
6435
6436static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6437					u16 sob_val, u16 addr)
6438{
6439	u32 ctl, value, pkt_size = sizeof(*pkt);
6440	u8 mask = ~(1 << (sob_id & 0x7));
6441
6442	memset(pkt, 0, pkt_size);
6443
6444	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6445	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6446	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6447			0); /* GREATER OR EQUAL*/
6448	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6449
6450	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6451	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6452	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6453	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6454	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6455	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6456	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6457
6458	pkt->value = cpu_to_le32(value);
6459	pkt->ctl = cpu_to_le32(ctl);
6460
6461	return pkt_size;
6462}
6463
6464static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6465{
6466	u32 ctl, cfg, pkt_size = sizeof(*pkt);
6467
6468	memset(pkt, 0, pkt_size);
6469
6470	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6471	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6472	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6473
6474	ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6475	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6476	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6477	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6478
6479	pkt->cfg = cpu_to_le32(cfg);
6480	pkt->ctl = cpu_to_le32(ctl);
6481
6482	return pkt_size;
6483}
6484
6485static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6486			u16 sob_val, u16 mon_id, u32 q_idx)
6487{
6488	struct hl_cb *cb = (struct hl_cb *) data;
6489	void *buf = cb->kernel_address;
6490	u64 monitor_base, fence_addr = 0;
6491	u32 size = 0;
6492	u16 msg_addr_offset;
6493
6494	switch (q_idx) {
6495	case GAUDI_QUEUE_ID_DMA_0_0:
6496		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6497		break;
6498	case GAUDI_QUEUE_ID_DMA_0_1:
6499		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6500		break;
6501	case GAUDI_QUEUE_ID_DMA_0_2:
6502		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6503		break;
6504	case GAUDI_QUEUE_ID_DMA_0_3:
6505		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6506		break;
6507	case GAUDI_QUEUE_ID_DMA_1_0:
6508		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6509		break;
6510	case GAUDI_QUEUE_ID_DMA_1_1:
6511		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6512		break;
6513	case GAUDI_QUEUE_ID_DMA_1_2:
6514		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6515		break;
6516	case GAUDI_QUEUE_ID_DMA_1_3:
6517		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6518		break;
6519	case GAUDI_QUEUE_ID_DMA_5_0:
6520		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6521		break;
6522	case GAUDI_QUEUE_ID_DMA_5_1:
6523		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6524		break;
6525	case GAUDI_QUEUE_ID_DMA_5_2:
6526		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6527		break;
6528	case GAUDI_QUEUE_ID_DMA_5_3:
6529		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6530		break;
6531	default:
6532		/* queue index should be valid here */
6533		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6534				q_idx);
6535		return;
6536	}
6537
6538	fence_addr += CFG_BASE;
6539
6540	/*
6541	 * monitor_base should be the content of the base0 address registers,
6542	 * so it will be added to the msg short offsets
6543	 */
6544	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6545
6546	/* First monitor config packet: low address of the sync */
6547	msg_addr_offset =
6548		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6549				monitor_base;
6550
6551	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6552					msg_addr_offset);
6553
6554	/* Second monitor config packet: high address of the sync */
6555	msg_addr_offset =
6556		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6557				monitor_base;
6558
6559	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6560					msg_addr_offset);
6561
6562	/*
6563	 * Third monitor config packet: the payload, i.e. what to write when the
6564	 * sync triggers
6565	 */
6566	msg_addr_offset =
6567		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6568				monitor_base;
6569
6570	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6571
6572	/* Fourth monitor config packet: bind the monitor to a sync object */
6573	msg_addr_offset =
6574		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6575				monitor_base;
6576	size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6577						msg_addr_offset);
6578
6579	/* Fence packet */
6580	size += gaudi_add_fence_pkt(buf + size);
6581}
6582
6583static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6584{
6585	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6586
6587	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6588		hw_sob->sob_id);
6589
6590	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6591		0);
6592
6593	kref_init(&hw_sob->kref);
6594}
6595
6596static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6597{
6598	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6599							HL_POWER9_HOST_MAGIC) {
6600		hdev->power9_64bit_dma_enable = 1;
6601		hdev->dma_mask = 64;
6602	} else {
6603		hdev->power9_64bit_dma_enable = 0;
6604		hdev->dma_mask = 48;
6605	}
6606}
6607
6608static u64 gaudi_get_device_time(struct hl_device *hdev)
6609{
6610	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6611
6612	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6613}
6614
6615static const struct hl_asic_funcs gaudi_funcs = {
6616	.early_init = gaudi_early_init,
6617	.early_fini = gaudi_early_fini,
6618	.late_init = gaudi_late_init,
6619	.late_fini = gaudi_late_fini,
6620	.sw_init = gaudi_sw_init,
6621	.sw_fini = gaudi_sw_fini,
6622	.hw_init = gaudi_hw_init,
6623	.hw_fini = gaudi_hw_fini,
6624	.halt_engines = gaudi_halt_engines,
6625	.suspend = gaudi_suspend,
6626	.resume = gaudi_resume,
6627	.cb_mmap = gaudi_cb_mmap,
6628	.ring_doorbell = gaudi_ring_doorbell,
6629	.pqe_write = gaudi_pqe_write,
6630	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6631	.asic_dma_free_coherent = gaudi_dma_free_coherent,
6632	.get_int_queue_base = gaudi_get_int_queue_base,
6633	.test_queues = gaudi_test_queues,
6634	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6635	.asic_dma_pool_free = gaudi_dma_pool_free,
6636	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6637	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6638	.hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6639	.cs_parser = gaudi_cs_parser,
6640	.asic_dma_map_sg = gaudi_dma_map_sg,
6641	.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6642	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6643	.update_eq_ci = gaudi_update_eq_ci,
6644	.context_switch = gaudi_context_switch,
6645	.restore_phase_topology = gaudi_restore_phase_topology,
6646	.debugfs_read32 = gaudi_debugfs_read32,
6647	.debugfs_write32 = gaudi_debugfs_write32,
6648	.debugfs_read64 = gaudi_debugfs_read64,
6649	.debugfs_write64 = gaudi_debugfs_write64,
6650	.add_device_attr = gaudi_add_device_attr,
6651	.handle_eqe = gaudi_handle_eqe,
6652	.set_pll_profile = gaudi_set_pll_profile,
6653	.get_events_stat = gaudi_get_events_stat,
6654	.read_pte = gaudi_read_pte,
6655	.write_pte = gaudi_write_pte,
6656	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6657	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6658	.send_heartbeat = gaudi_send_heartbeat,
6659	.set_clock_gating = gaudi_set_clock_gating,
6660	.disable_clock_gating = gaudi_disable_clock_gating,
6661	.debug_coresight = gaudi_debug_coresight,
6662	.is_device_idle = gaudi_is_device_idle,
6663	.soft_reset_late_init = gaudi_soft_reset_late_init,
6664	.hw_queues_lock = gaudi_hw_queues_lock,
6665	.hw_queues_unlock = gaudi_hw_queues_unlock,
6666	.get_pci_id = gaudi_get_pci_id,
6667	.get_eeprom_data = gaudi_get_eeprom_data,
6668	.send_cpu_message = gaudi_send_cpu_message,
6669	.get_hw_state = gaudi_get_hw_state,
6670	.pci_bars_map = gaudi_pci_bars_map,
6671	.init_iatu = gaudi_init_iatu,
6672	.rreg = hl_rreg,
6673	.wreg = hl_wreg,
6674	.halt_coresight = gaudi_halt_coresight,
6675	.ctx_init = gaudi_ctx_init,
6676	.get_clk_rate = gaudi_get_clk_rate,
6677	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6678	.read_device_fw_version = gaudi_read_device_fw_version,
6679	.load_firmware_to_device = gaudi_load_firmware_to_device,
6680	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6681	.get_signal_cb_size = gaudi_get_signal_cb_size,
6682	.get_wait_cb_size = gaudi_get_wait_cb_size,
6683	.gen_signal_cb = gaudi_gen_signal_cb,
6684	.gen_wait_cb = gaudi_gen_wait_cb,
6685	.reset_sob = gaudi_reset_sob,
6686	.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6687	.get_device_time = gaudi_get_device_time
6688};
6689
6690/**
6691 * gaudi_set_asic_funcs - set GAUDI function pointers
6692 *
6693 * @hdev: pointer to hl_device structure
6694 *
6695 */
6696void gaudi_set_asic_funcs(struct hl_device *hdev)
6697{
6698	hdev->asic_funcs = &gaudi_funcs;
6699}
6700