1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "../include/hw_ip/mmu/mmu_general.h"
10#include "../include/hw_ip/mmu/mmu_v1_1.h"
11#include "../include/gaudi/gaudi_masks.h"
12#include "../include/gaudi/gaudi_fw_if.h"
13#include "../include/gaudi/gaudi_reg_map.h"
14#include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/iommu.h>
21#include <linux/seq_file.h>
22
23/*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 *        - Range registers
28 *        - MMU
29 *
30 * 2. DDR is protected by:
31 *        - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 *        - Range registers
35 *        - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 *     - DMA is not secured.
41 *     - PQ and CQ are secured.
42 *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43 *                      because of TDMA (tensor DMA). Hence, WREG is always not
44 *                      secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 *     - Clear SRAM on context switch (happens on context switch when device is
53 *       idle)
54 *     - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62#define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63#define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64#define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65
66#define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67
68#define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69#define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70#define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71#define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72
73#define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76#define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81#define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82
83#define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84
85#define GAUDI_MAX_STRING_LEN		20
86
87#define GAUDI_CB_POOL_CB_CNT		512
88#define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89
90#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91
92#define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93
94#define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95
96#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97
98#define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
99
100#define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
101
102#define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
103
104#define MONITOR_SOB_STRING_SIZE		256
105
106static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107	GAUDI_QUEUE_ID_DMA_0_0,
108	GAUDI_QUEUE_ID_DMA_0_1,
109	GAUDI_QUEUE_ID_DMA_0_2,
110	GAUDI_QUEUE_ID_DMA_0_3,
111	GAUDI_QUEUE_ID_DMA_1_0,
112	GAUDI_QUEUE_ID_DMA_1_1,
113	GAUDI_QUEUE_ID_DMA_1_2,
114	GAUDI_QUEUE_ID_DMA_1_3
115};
116
117static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
118	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
119	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
120	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
121	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
122	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
123	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
124	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
125	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
126};
127
128static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
129	[0] = GAUDI_QUEUE_ID_DMA_0_0,
130	[1] = GAUDI_QUEUE_ID_DMA_0_1,
131	[2] = GAUDI_QUEUE_ID_DMA_0_2,
132	[3] = GAUDI_QUEUE_ID_DMA_0_3,
133	[4] = GAUDI_QUEUE_ID_DMA_1_0,
134	[5] = GAUDI_QUEUE_ID_DMA_1_1,
135	[6] = GAUDI_QUEUE_ID_DMA_1_2,
136	[7] = GAUDI_QUEUE_ID_DMA_1_3,
137};
138
139static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
141	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
142	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
143	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
144	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
145	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
146	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
147	[PACKET_FENCE]		= sizeof(struct packet_fence),
148	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
149	[PACKET_NOP]		= sizeof(struct packet_nop),
150	[PACKET_STOP]		= sizeof(struct packet_stop),
151	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
152	[PACKET_WAIT]		= sizeof(struct packet_wait),
153	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
154};
155
156static inline bool validate_packet_id(enum packet_id id)
157{
158	switch (id) {
159	case PACKET_WREG_32:
160	case PACKET_WREG_BULK:
161	case PACKET_MSG_LONG:
162	case PACKET_MSG_SHORT:
163	case PACKET_CP_DMA:
164	case PACKET_REPEAT:
165	case PACKET_MSG_PROT:
166	case PACKET_FENCE:
167	case PACKET_LIN_DMA:
168	case PACKET_NOP:
169	case PACKET_STOP:
170	case PACKET_ARB_POINT:
171	case PACKET_WAIT:
172	case PACKET_LOAD_AND_EXE:
173		return true;
174	default:
175		return false;
176	}
177}
178
179static const char * const
180gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181	"tpc_address_exceed_slm",
182	"tpc_div_by_0",
183	"tpc_spu_mac_overflow",
184	"tpc_spu_addsub_overflow",
185	"tpc_spu_abs_overflow",
186	"tpc_spu_fp_dst_nan_inf",
187	"tpc_spu_fp_dst_denorm",
188	"tpc_vpu_mac_overflow",
189	"tpc_vpu_addsub_overflow",
190	"tpc_vpu_abs_overflow",
191	"tpc_vpu_fp_dst_nan_inf",
192	"tpc_vpu_fp_dst_denorm",
193	"tpc_assertions",
194	"tpc_illegal_instruction",
195	"tpc_pc_wrap_around",
196	"tpc_qm_sw_err",
197	"tpc_hbw_rresp_err",
198	"tpc_hbw_bresp_err",
199	"tpc_lbw_rresp_err",
200	"tpc_lbw_bresp_err"
201};
202
203static const char * const
204gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205	"PQ AXI HBW error",
206	"CQ AXI HBW error",
207	"CP AXI HBW error",
208	"CP error due to undefined OPCODE",
209	"CP encountered STOP OPCODE",
210	"CP AXI LBW error",
211	"CP WRREG32 or WRBULK returned error",
212	"N/A",
213	"FENCE 0 inc over max value and clipped",
214	"FENCE 1 inc over max value and clipped",
215	"FENCE 2 inc over max value and clipped",
216	"FENCE 3 inc over max value and clipped",
217	"FENCE 0 dec under min value and clipped",
218	"FENCE 1 dec under min value and clipped",
219	"FENCE 2 dec under min value and clipped",
220	"FENCE 3 dec under min value and clipped"
221};
222
223static const char * const
224gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225	"Choice push while full error",
226	"Choice Q watchdog error",
227	"MSG AXI LBW returned with error"
228};
229
230static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
305	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
306	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
307	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
308	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
309	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
310	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
311	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
312	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
313	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
314	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
315	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
316	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
317	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
318	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
319	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
320	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
321	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
322	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
323	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
324	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
325	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
326	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
327	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
328	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
329	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
330	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
331	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
332	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
333	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
334	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
335	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
336	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
337	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
338	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
339	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
340	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
341	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
342	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
343	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
344};
345
346static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
347	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
348	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
349	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
350	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
351	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
352	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
353	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
354	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
355	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
356	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
357	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
358	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
359	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
360	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
361	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
362	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
363	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
364	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
365	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
366	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
367	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
368	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
369	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
370	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
371	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
372	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
373	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
374};
375
376static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
377	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
378	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
379	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
380	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
381	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
382	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
383	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
384	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
385	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
386	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
387	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
388};
389
390static s64 gaudi_state_dump_specs_props[] = {
391	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
392	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
393	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
394	[SP_MON_OBJ_WR_ADDR_LOW] =
395		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
396	[SP_MON_OBJ_WR_ADDR_HIGH] =
397		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
398	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
399	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
400	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
401	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
402	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
403	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
404	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
405	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
406	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
407	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
408	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
409	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
410	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
411	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
412	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
413	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
414	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
415	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
416	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
417	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
418	[SP_FENCE0_CNT_OFFSET] =
419		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
420	[SP_FENCE0_RDATA_OFFSET] =
421		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
422	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
423	[SP_NUM_CORES] = 1,
424};
425
426static const int gaudi_queue_id_to_engine_id[] = {
427	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
428	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
429	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
430	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
431	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
432	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
433	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
434	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
435	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
436	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
437	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
438	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
439	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
440	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
441	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
442	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
443	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
444	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
445	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
446	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
447	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
448	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
449	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
450	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
451	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
452	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
453	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
454	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
455	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
456};
457
458/* The order here is opposite to the order of the indexing in the h/w.
459 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
460 */
461static const char * const gaudi_sync_manager_names[] = {
462	"SYNC_MGR_E_N",
463	"SYNC_MGR_W_N",
464	"SYNC_MGR_E_S",
465	"SYNC_MGR_W_S",
466	NULL
467};
468
469struct ecc_info_extract_params {
470	u64 block_address;
471	u32 num_memories;
472	bool derr;
473};
474
475static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
476								u64 phys_addr);
477static int gaudi_send_job_on_qman0(struct hl_device *hdev,
478					struct hl_cs_job *job);
479static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
480					u32 size, u64 val);
481static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
482					u32 num_regs, u32 val);
483static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
484				u32 tpc_id);
485static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
486static int gaudi_cpucp_info_get(struct hl_device *hdev);
487static void gaudi_disable_clock_gating(struct hl_device *hdev);
488static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
489static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
490				u32 size, bool eb);
491static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
492				struct hl_gen_wait_properties *prop);
493static inline enum hl_collective_mode
494get_collective_mode(struct hl_device *hdev, u32 queue_id)
495{
496	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
497		return HL_COLLECTIVE_MASTER;
498
499	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
500			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
501		return HL_COLLECTIVE_SLAVE;
502
503	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
504			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
505		return HL_COLLECTIVE_SLAVE;
506
507	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
508			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
509		return HL_COLLECTIVE_SLAVE;
510
511	return HL_COLLECTIVE_NOT_SUPPORTED;
512}
513
514static inline void set_default_power_values(struct hl_device *hdev)
515{
516	struct asic_fixed_properties *prop = &hdev->asic_prop;
517
518	if (hdev->card_type == cpucp_card_type_pmc) {
519		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
520
521		if (prop->fw_security_enabled)
522			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
523		else
524			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
525	} else {
526		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
527		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
528	}
529}
530
531static int gaudi_set_fixed_properties(struct hl_device *hdev)
532{
533	struct asic_fixed_properties *prop = &hdev->asic_prop;
534	u32 num_sync_stream_queues = 0;
535	int i;
536
537	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
538	prop->hw_queues_props = kcalloc(prop->max_queues,
539			sizeof(struct hw_queue_properties),
540			GFP_KERNEL);
541
542	if (!prop->hw_queues_props)
543		return -ENOMEM;
544
545	for (i = 0 ; i < prop->max_queues ; i++) {
546		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
547			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
548			prop->hw_queues_props[i].driver_only = 0;
549			prop->hw_queues_props[i].supports_sync_stream = 1;
550			prop->hw_queues_props[i].cb_alloc_flags =
551				CB_ALLOC_KERNEL;
552			num_sync_stream_queues++;
553		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
554			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
555			prop->hw_queues_props[i].driver_only = 1;
556			prop->hw_queues_props[i].supports_sync_stream = 0;
557			prop->hw_queues_props[i].cb_alloc_flags =
558				CB_ALLOC_KERNEL;
559		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
560			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
561			prop->hw_queues_props[i].driver_only = 0;
562			prop->hw_queues_props[i].supports_sync_stream = 0;
563			prop->hw_queues_props[i].cb_alloc_flags =
564				CB_ALLOC_USER;
565
566		}
567		prop->hw_queues_props[i].collective_mode =
568						get_collective_mode(hdev, i);
569	}
570
571	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
572	prop->cfg_base_address = CFG_BASE;
573	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
574	prop->host_base_address = HOST_PHYS_BASE;
575	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
576	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
577	prop->completion_mode = HL_COMPLETION_MODE_JOB;
578	prop->collective_first_sob = 0;
579	prop->collective_first_mon = 0;
580
581	/* 2 SOBs per internal queue stream are reserved for collective */
582	prop->sync_stream_first_sob =
583			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
584			* QMAN_STREAMS * HL_RSVD_SOBS;
585
586	/* 1 monitor per internal queue stream are reserved for collective
587	 * 2 monitors per external queue stream are reserved for collective
588	 */
589	prop->sync_stream_first_mon =
590			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
591			(NUMBER_OF_EXT_HW_QUEUES * 2);
592
593	prop->dram_base_address = DRAM_PHYS_BASE;
594	prop->dram_size = GAUDI_HBM_SIZE_32GB;
595	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
596	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
597
598	prop->sram_base_address = SRAM_BASE_ADDR;
599	prop->sram_size = SRAM_SIZE;
600	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
601	prop->sram_user_base_address =
602			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
603
604	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
605	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
606
607	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
608	if (hdev->pldm)
609		prop->mmu_pgt_size = 0x800000; /* 8MB */
610	else
611		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
612	prop->mmu_pte_size = HL_PTE_SIZE;
613	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
614	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
615	prop->dram_page_size = PAGE_SIZE_2MB;
616	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
617	prop->dram_supports_virtual_memory = false;
618
619	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
620	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
621	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
622	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
623	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
624	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
625	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
626	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
627	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
628	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
629	prop->pmmu.start_addr = VA_HOST_SPACE_START;
630	prop->pmmu.end_addr =
631			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
632	prop->pmmu.page_size = PAGE_SIZE_4KB;
633	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
634	prop->pmmu.last_mask = LAST_MASK;
635	/* TODO: will be duplicated until implementing per-MMU props */
636	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
637	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
638
639	/* PMMU and HPMMU are the same except of page size */
640	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
641	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
642
643	/* shifts and masks are the same in PMMU and DMMU */
644	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
645	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
646	prop->dmmu.end_addr = VA_HOST_SPACE_END;
647	prop->dmmu.page_size = PAGE_SIZE_2MB;
648
649	prop->cfg_size = CFG_SIZE;
650	prop->max_asid = MAX_ASID;
651	prop->num_of_events = GAUDI_EVENT_SIZE;
652	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
653	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
654
655	set_default_power_values(hdev);
656
657	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
658	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
659
660	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
661	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
662
663	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
664					CARD_NAME_MAX_LEN);
665
666	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
667
668	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
669			prop->sync_stream_first_sob +
670			(num_sync_stream_queues * HL_RSVD_SOBS);
671	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
672			prop->sync_stream_first_mon +
673			(num_sync_stream_queues * HL_RSVD_MONS);
674
675	prop->first_available_user_interrupt = USHRT_MAX;
676	prop->tpc_interrupt_id = USHRT_MAX;
677
678	/* single msi */
679	prop->eq_interrupt_id = 0;
680
681	for (i = 0 ; i < HL_MAX_DCORES ; i++)
682		prop->first_available_cq[i] = USHRT_MAX;
683
684	prop->fw_cpu_boot_dev_sts0_valid = false;
685	prop->fw_cpu_boot_dev_sts1_valid = false;
686	prop->hard_reset_done_by_fw = false;
687	prop->gic_interrupts_enable = true;
688
689	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
690
691	prop->clk_pll_index = HL_GAUDI_MME_PLL;
692	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
693
694	prop->use_get_power_for_reset_history = true;
695
696	prop->configurable_stop_on_err = true;
697
698	prop->set_max_power_on_device_init = true;
699
700	prop->dma_mask = 48;
701
702	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
703
704	return 0;
705}
706
707static int gaudi_pci_bars_map(struct hl_device *hdev)
708{
709	static const char * const name[] = {"SRAM", "CFG", "HBM"};
710	bool is_wc[3] = {false, false, true};
711	int rc;
712
713	rc = hl_pci_bars_map(hdev, name, is_wc);
714	if (rc)
715		return rc;
716
717	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718			(CFG_BASE - SPI_FLASH_BASE_ADDR);
719
720	return 0;
721}
722
723static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724{
725	struct gaudi_device *gaudi = hdev->asic_specific;
726	struct hl_inbound_pci_region pci_region;
727	u64 old_addr = addr;
728	int rc;
729
730	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
731		return old_addr;
732
733	if (hdev->asic_prop.iatu_done_by_fw)
734		return U64_MAX;
735
736	/* Inbound Region 2 - Bar 4 - Point to HBM */
737	pci_region.mode = PCI_BAR_MATCH_MODE;
738	pci_region.bar = HBM_BAR_ID;
739	pci_region.addr = addr;
740	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
741	if (rc)
742		return U64_MAX;
743
744	if (gaudi) {
745		old_addr = gaudi->hbm_bar_cur_addr;
746		gaudi->hbm_bar_cur_addr = addr;
747	}
748
749	return old_addr;
750}
751
752static int gaudi_init_iatu(struct hl_device *hdev)
753{
754	struct hl_inbound_pci_region inbound_region;
755	struct hl_outbound_pci_region outbound_region;
756	int rc;
757
758	if (hdev->asic_prop.iatu_done_by_fw)
759		return 0;
760
761	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762	inbound_region.mode = PCI_BAR_MATCH_MODE;
763	inbound_region.bar = SRAM_BAR_ID;
764	inbound_region.addr = SRAM_BASE_ADDR;
765	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
766	if (rc)
767		goto done;
768
769	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770	inbound_region.mode = PCI_BAR_MATCH_MODE;
771	inbound_region.bar = CFG_BAR_ID;
772	inbound_region.addr = SPI_FLASH_BASE_ADDR;
773	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
774	if (rc)
775		goto done;
776
777	/* Inbound Region 2 - Bar 4 - Point to HBM */
778	inbound_region.mode = PCI_BAR_MATCH_MODE;
779	inbound_region.bar = HBM_BAR_ID;
780	inbound_region.addr = DRAM_PHYS_BASE;
781	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
782	if (rc)
783		goto done;
784
785	/* Outbound Region 0 - Point to Host */
786	outbound_region.addr = HOST_PHYS_BASE;
787	outbound_region.size = HOST_PHYS_SIZE;
788	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
789
790done:
791	return rc;
792}
793
794static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
795{
796	return RREG32(mmHW_STATE);
797}
798
799static int gaudi_early_init(struct hl_device *hdev)
800{
801	struct asic_fixed_properties *prop = &hdev->asic_prop;
802	struct pci_dev *pdev = hdev->pdev;
803	resource_size_t pci_bar_size;
804	u32 fw_boot_status;
805	int rc;
806
807	rc = gaudi_set_fixed_properties(hdev);
808	if (rc) {
809		dev_err(hdev->dev, "Failed setting fixed properties\n");
810		return rc;
811	}
812
813	/* Check BAR sizes */
814	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
815
816	if (pci_bar_size != SRAM_BAR_SIZE) {
817		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
819		rc = -ENODEV;
820		goto free_queue_props;
821	}
822
823	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
824
825	if (pci_bar_size != CFG_BAR_SIZE) {
826		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
828		rc = -ENODEV;
829		goto free_queue_props;
830	}
831
832	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
834
835	/* If FW security is enabled at this point it means no access to ELBI */
836	if (hdev->asic_prop.fw_security_enabled) {
837		hdev->asic_prop.iatu_done_by_fw = true;
838
839		/*
840		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841		 * decision can only be taken based on PCI ID security.
842		 */
843		hdev->asic_prop.gic_interrupts_enable = false;
844		goto pci_init;
845	}
846
847	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
848				&fw_boot_status);
849	if (rc)
850		goto free_queue_props;
851
852	/* Check whether FW is configuring iATU */
853	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855		hdev->asic_prop.iatu_done_by_fw = true;
856
857pci_init:
858	rc = hl_pci_init(hdev);
859	if (rc)
860		goto free_queue_props;
861
862	/* Before continuing in the initialization, we need to read the preboot
863	 * version to determine whether we run with a security-enabled firmware
864	 */
865	rc = hl_fw_read_preboot_status(hdev);
866	if (rc) {
867		if (hdev->reset_on_preboot_fail)
868			/* we are already on failure flow, so don't check if hw_fini fails. */
869			hdev->asic_funcs->hw_fini(hdev, true, false);
870		goto pci_fini;
871	}
872
873	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
874		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
875		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
876		if (rc) {
877			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
878			goto pci_fini;
879		}
880	}
881
882	return 0;
883
884pci_fini:
885	hl_pci_fini(hdev);
886free_queue_props:
887	kfree(hdev->asic_prop.hw_queues_props);
888	return rc;
889}
890
891static int gaudi_early_fini(struct hl_device *hdev)
892{
893	kfree(hdev->asic_prop.hw_queues_props);
894	hl_pci_fini(hdev);
895
896	return 0;
897}
898
899/**
900 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
901 *
902 * @hdev: pointer to hl_device structure
903 *
904 */
905static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
906{
907	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
908	struct asic_fixed_properties *prop = &hdev->asic_prop;
909	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
910	int rc;
911
912	if ((hdev->fw_components & FW_TYPE_LINUX) &&
913			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
914		struct gaudi_device *gaudi = hdev->asic_specific;
915
916		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
917			return 0;
918
919		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
920
921		if (rc)
922			return rc;
923
924		freq = pll_freq_arr[2];
925	} else {
926		/* Backward compatibility */
927		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
928		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
929		nr = RREG32(mmPSOC_CPU_PLL_NR);
930		nf = RREG32(mmPSOC_CPU_PLL_NF);
931		od = RREG32(mmPSOC_CPU_PLL_OD);
932
933		if (div_sel == DIV_SEL_REF_CLK ||
934				div_sel == DIV_SEL_DIVIDED_REF) {
935			if (div_sel == DIV_SEL_REF_CLK)
936				freq = PLL_REF_CLK;
937			else
938				freq = PLL_REF_CLK / (div_fctr + 1);
939		} else if (div_sel == DIV_SEL_PLL_CLK ||
940			div_sel == DIV_SEL_DIVIDED_PLL) {
941			pll_clk = PLL_REF_CLK * (nf + 1) /
942					((nr + 1) * (od + 1));
943			if (div_sel == DIV_SEL_PLL_CLK)
944				freq = pll_clk;
945			else
946				freq = pll_clk / (div_fctr + 1);
947		} else {
948			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
949			freq = 0;
950		}
951	}
952
953	prop->psoc_timestamp_frequency = freq;
954	prop->psoc_pci_pll_nr = nr;
955	prop->psoc_pci_pll_nf = nf;
956	prop->psoc_pci_pll_od = od;
957	prop->psoc_pci_pll_div_factor = div_fctr;
958
959	return 0;
960}
961
962static int _gaudi_init_tpc_mem(struct hl_device *hdev,
963		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
964{
965	struct asic_fixed_properties *prop = &hdev->asic_prop;
966	struct packet_lin_dma *init_tpc_mem_pkt;
967	struct hl_cs_job *job;
968	struct hl_cb *cb;
969	u64 dst_addr;
970	u32 cb_size, ctl;
971	u8 tpc_id;
972	int rc;
973
974	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
975	if (!cb)
976		return -EFAULT;
977
978	init_tpc_mem_pkt = cb->kernel_address;
979	cb_size = sizeof(*init_tpc_mem_pkt);
980	memset(init_tpc_mem_pkt, 0, cb_size);
981
982	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
983
984	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
985	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
986	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
987	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
988
989	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
990
991	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
992
993	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
994	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
995				round_up(prop->sram_user_base_address, SZ_8K));
996	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
997
998	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
999	if (!job) {
1000		dev_err(hdev->dev, "Failed to allocate a new job\n");
1001		rc = -ENOMEM;
1002		goto release_cb;
1003	}
1004
1005	job->id = 0;
1006	job->user_cb = cb;
1007	atomic_inc(&job->user_cb->cs_cnt);
1008	job->user_cb_size = cb_size;
1009	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1010	job->patched_cb = job->user_cb;
1011	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1012
1013	hl_debugfs_add_job(hdev, job);
1014
1015	rc = gaudi_send_job_on_qman0(hdev, job);
1016
1017	if (rc)
1018		goto free_job;
1019
1020	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1021		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1022		if (rc)
1023			break;
1024	}
1025
1026free_job:
1027	hl_userptr_delete_list(hdev, &job->userptr_list);
1028	hl_debugfs_remove_job(hdev, job);
1029	kfree(job);
1030	atomic_dec(&cb->cs_cnt);
1031
1032release_cb:
1033	hl_cb_put(cb);
1034	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1035
1036	return rc;
1037}
1038
1039/*
1040 * gaudi_init_tpc_mem() - Initialize TPC memories.
1041 * @hdev: Pointer to hl_device structure.
1042 *
1043 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1044 *
1045 * Return: 0 for success, negative value for error.
1046 */
1047static int gaudi_init_tpc_mem(struct hl_device *hdev)
1048{
1049	const struct firmware *fw;
1050	size_t fw_size;
1051	void *cpu_addr;
1052	dma_addr_t dma_handle;
1053	int rc, count = 5;
1054
1055again:
1056	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1057	if (rc == -EINTR && count-- > 0) {
1058		msleep(50);
1059		goto again;
1060	}
1061
1062	if (rc) {
1063		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1064				GAUDI_TPC_FW_FILE);
1065		goto out;
1066	}
1067
1068	fw_size = fw->size;
1069	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1070	if (!cpu_addr) {
1071		dev_err(hdev->dev,
1072			"Failed to allocate %zu of dma memory for TPC kernel\n",
1073			fw_size);
1074		rc = -ENOMEM;
1075		goto out;
1076	}
1077
1078	memcpy(cpu_addr, fw->data, fw_size);
1079
1080	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1081
1082	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1083
1084out:
1085	release_firmware(fw);
1086	return rc;
1087}
1088
1089static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1090{
1091	struct gaudi_device *gaudi = hdev->asic_specific;
1092	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1093	struct hl_hw_queue *q;
1094	u32 i, sob_id, sob_group_id, queue_id;
1095
1096	/* Iterate through SOB groups and assign a SOB for each slave queue */
1097	sob_group_id =
1098		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1099	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1100
1101	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1102	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1103		q = &hdev->kernel_queues[queue_id + (4 * i)];
1104		q->sync_stream_prop.collective_sob_id = sob_id + i;
1105	}
1106
1107	/* Both DMA5 and TPC7 use the same resources since only a single
1108	 * engine need to participate in the reduction process
1109	 */
1110	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1111	q = &hdev->kernel_queues[queue_id];
1112	q->sync_stream_prop.collective_sob_id =
1113			sob_id + NIC_NUMBER_OF_ENGINES;
1114
1115	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1116	q = &hdev->kernel_queues[queue_id];
1117	q->sync_stream_prop.collective_sob_id =
1118			sob_id + NIC_NUMBER_OF_ENGINES;
1119}
1120
1121static void gaudi_sob_group_hw_reset(struct kref *ref)
1122{
1123	struct gaudi_hw_sob_group *hw_sob_group =
1124		container_of(ref, struct gaudi_hw_sob_group, kref);
1125	struct hl_device *hdev = hw_sob_group->hdev;
1126	int i;
1127
1128	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1129		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1130			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1131
1132	kref_init(&hw_sob_group->kref);
1133}
1134
1135static void gaudi_sob_group_reset_error(struct kref *ref)
1136{
1137	struct gaudi_hw_sob_group *hw_sob_group =
1138		container_of(ref, struct gaudi_hw_sob_group, kref);
1139	struct hl_device *hdev = hw_sob_group->hdev;
1140
1141	dev_crit(hdev->dev,
1142		"SOB release shouldn't be called here, base_sob_id: %d\n",
1143		hw_sob_group->base_sob_id);
1144}
1145
1146static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1147{
1148	struct gaudi_collective_properties *prop;
1149	int i;
1150
1151	prop = &gaudi->collective_props;
1152
1153	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1154
1155	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1156		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1157			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1158					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1159	/* Set collective engine bit */
1160	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162}
1163
1164static int gaudi_collective_init(struct hl_device *hdev)
1165{
1166	u32 i, sob_id, reserved_sobs_per_group;
1167	struct gaudi_collective_properties *prop;
1168	struct gaudi_device *gaudi;
1169
1170	gaudi = hdev->asic_specific;
1171	prop = &gaudi->collective_props;
1172	sob_id = hdev->asic_prop.collective_first_sob;
1173
1174	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1175	reserved_sobs_per_group =
1176		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1177
1178	/* Init SOB groups */
1179	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1180		prop->hw_sob_group[i].hdev = hdev;
1181		prop->hw_sob_group[i].base_sob_id = sob_id;
1182		sob_id += reserved_sobs_per_group;
1183		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1184	}
1185
1186	for (i = 0 ; i < QMAN_STREAMS; i++) {
1187		prop->next_sob_group_val[i] = 1;
1188		prop->curr_sob_group_idx[i] = 0;
1189		gaudi_collective_map_sobs(hdev, i);
1190	}
1191
1192	gaudi_collective_mstr_sob_mask_set(gaudi);
1193
1194	return 0;
1195}
1196
1197static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1198{
1199	struct gaudi_device *gaudi = hdev->asic_specific;
1200	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1201
1202	kref_put(&cprop->hw_sob_group[sob_group].kref,
1203					gaudi_sob_group_hw_reset);
1204}
1205
1206static void gaudi_collective_master_init_job(struct hl_device *hdev,
1207		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1208{
1209	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1210	struct gaudi_collective_properties *cprop;
1211	struct hl_gen_wait_properties wait_prop;
1212	struct hl_sync_stream_properties *prop;
1213	struct gaudi_device *gaudi;
1214
1215	gaudi = hdev->asic_specific;
1216	cprop = &gaudi->collective_props;
1217	queue_id = job->hw_queue_id;
1218	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1219
1220	master_sob_base =
1221		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1222	master_monitor = prop->collective_mstr_mon_id[0];
1223
1224	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1225
1226	dev_dbg(hdev->dev,
1227		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1228		master_sob_base, cprop->mstr_sob_mask[0],
1229		cprop->next_sob_group_val[stream],
1230		master_monitor, queue_id);
1231
1232	wait_prop.data = (void *) job->patched_cb;
1233	wait_prop.sob_base = master_sob_base;
1234	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1235	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1236	wait_prop.mon_id = master_monitor;
1237	wait_prop.q_idx = queue_id;
1238	wait_prop.size = cb_size;
1239	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1240
1241	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1242	master_monitor = prop->collective_mstr_mon_id[1];
1243
1244	dev_dbg(hdev->dev,
1245		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1246		master_sob_base, cprop->mstr_sob_mask[1],
1247		cprop->next_sob_group_val[stream],
1248		master_monitor, queue_id);
1249
1250	wait_prop.sob_base = master_sob_base;
1251	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1252	wait_prop.mon_id = master_monitor;
1253	wait_prop.size = cb_size;
1254	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1255}
1256
1257static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1258		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1259{
1260	struct hl_gen_wait_properties wait_prop;
1261	struct hl_sync_stream_properties *prop;
1262	u32 queue_id, cb_size = 0;
1263
1264	queue_id = job->hw_queue_id;
1265	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1266
1267	if (job->cs->encaps_signals) {
1268		/* use the encaps signal handle store earlier in the flow
1269		 * and set the SOB information from the encaps
1270		 * signals handle
1271		 */
1272		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1273						cs_cmpl);
1274
1275		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1276				job->cs->sequence,
1277				cs_cmpl->hw_sob->sob_id,
1278				cs_cmpl->sob_val);
1279	}
1280
1281	/* Add to wait CBs using slave monitor */
1282	wait_prop.data = (void *) job->user_cb;
1283	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1284	wait_prop.sob_mask = 0x1;
1285	wait_prop.sob_val = cs_cmpl->sob_val;
1286	wait_prop.mon_id = prop->collective_slave_mon_id;
1287	wait_prop.q_idx = queue_id;
1288	wait_prop.size = cb_size;
1289
1290	dev_dbg(hdev->dev,
1291		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1292		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1293		prop->collective_slave_mon_id, queue_id);
1294
1295	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1296
1297	dev_dbg(hdev->dev,
1298		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1299		prop->collective_sob_id, queue_id);
1300
1301	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1302			prop->collective_sob_id, cb_size, false);
1303}
1304
1305static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1306{
1307	struct hl_cs_compl *signal_cs_cmpl =
1308		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1309	struct hl_cs_compl *cs_cmpl =
1310		container_of(cs->fence, struct hl_cs_compl, base_fence);
1311	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1312	struct gaudi_collective_properties *cprop;
1313	u32 stream, queue_id, sob_group_offset;
1314	struct gaudi_device *gaudi;
1315	struct hl_device *hdev;
1316	struct hl_cs_job *job;
1317	struct hl_ctx *ctx;
1318
1319	ctx = cs->ctx;
1320	hdev = ctx->hdev;
1321	gaudi = hdev->asic_specific;
1322	cprop = &gaudi->collective_props;
1323
1324	if (cs->encaps_signals) {
1325		cs_cmpl->hw_sob = handle->hw_sob;
1326		/* at this checkpoint we only need the hw_sob pointer
1327		 * for the completion check before start going over the jobs
1328		 * of the master/slaves, the sob_value will be taken later on
1329		 * in gaudi_collective_slave_init_job depends on each
1330		 * job wait offset value.
1331		 */
1332		cs_cmpl->sob_val = 0;
1333	} else {
1334		/* copy the SOB id and value of the signal CS */
1335		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1336		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1337	}
1338
1339	/* check again if the signal cs already completed.
1340	 * if yes then don't send any wait cs since the hw_sob
1341	 * could be in reset already. if signal is not completed
1342	 * then get refcount to hw_sob to prevent resetting the sob
1343	 * while wait cs is not submitted.
1344	 * note that this check is protected by two locks,
1345	 * hw queue lock and completion object lock,
1346	 * and the same completion object lock also protects
1347	 * the hw_sob reset handler function.
1348	 * The hw_queue lock prevent out of sync of hw_sob
1349	 * refcount value, changed by signal/wait flows.
1350	 */
1351	spin_lock(&signal_cs_cmpl->lock);
1352
1353	if (completion_done(&cs->signal_fence->completion)) {
1354		spin_unlock(&signal_cs_cmpl->lock);
1355		return -EINVAL;
1356	}
1357	/* Increment kref since all slave queues are now waiting on it */
1358	kref_get(&cs_cmpl->hw_sob->kref);
1359
1360	spin_unlock(&signal_cs_cmpl->lock);
1361
1362	/* Calculate the stream from collective master queue (1st job) */
1363	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1364	stream = job->hw_queue_id % 4;
1365	sob_group_offset =
1366		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1367
1368	list_for_each_entry(job, &cs->job_list, cs_node) {
1369		queue_id = job->hw_queue_id;
1370
1371		if (hdev->kernel_queues[queue_id].collective_mode ==
1372				HL_COLLECTIVE_MASTER)
1373			gaudi_collective_master_init_job(hdev, job, stream,
1374						sob_group_offset);
1375		else
1376			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1377	}
1378
1379	cs_cmpl->sob_group = sob_group_offset;
1380
1381	/* Handle sob group kref and wraparound */
1382	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1383	cprop->next_sob_group_val[stream]++;
1384
1385	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1386		/*
1387		 * Decrement as we reached the max value.
1388		 * The release function won't be called here as we've
1389		 * just incremented the refcount.
1390		 */
1391		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1392				gaudi_sob_group_reset_error);
1393		cprop->next_sob_group_val[stream] = 1;
1394		/* only two SOBs are currently in use */
1395		cprop->curr_sob_group_idx[stream] =
1396			(cprop->curr_sob_group_idx[stream] + 1) &
1397							(HL_RSVD_SOBS - 1);
1398
1399		gaudi_collective_map_sobs(hdev, stream);
1400
1401		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1402				cprop->curr_sob_group_idx[stream], stream);
1403	}
1404
1405	mb();
1406	hl_fence_put(cs->signal_fence);
1407	cs->signal_fence = NULL;
1408
1409	return 0;
1410}
1411
1412static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1413{
1414	u32 cacheline_end, additional_commands;
1415
1416	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1417	additional_commands = sizeof(struct packet_msg_prot) * 2;
1418
1419	if (user_cb_size + additional_commands > cacheline_end)
1420		return cacheline_end - user_cb_size + additional_commands;
1421	else
1422		return additional_commands;
1423}
1424
1425static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1426		struct hl_ctx *ctx, struct hl_cs *cs,
1427		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1428		u32 encaps_signal_offset)
1429{
1430	struct hw_queue_properties *hw_queue_prop;
1431	struct hl_cs_counters_atomic *cntr;
1432	struct hl_cs_job *job;
1433	struct hl_cb *cb;
1434	u32 cb_size;
1435	bool patched_cb;
1436
1437	cntr = &hdev->aggregated_cs_counters;
1438
1439	if (mode == HL_COLLECTIVE_MASTER) {
1440		/* CB size of collective master queue contains
1441		 * 4 msg short packets for monitor 1 configuration
1442		 * 1 fence packet
1443		 * 4 msg short packets for monitor 2 configuration
1444		 * 1 fence packet
1445		 * 2 msg prot packets for completion and MSI
1446		 */
1447		cb_size = sizeof(struct packet_msg_short) * 8 +
1448				sizeof(struct packet_fence) * 2 +
1449				sizeof(struct packet_msg_prot) * 2;
1450		patched_cb = true;
1451	} else {
1452		/* CB size of collective slave queues contains
1453		 * 4 msg short packets for monitor configuration
1454		 * 1 fence packet
1455		 * 1 additional msg short packet for sob signal
1456		 */
1457		cb_size = sizeof(struct packet_msg_short) * 5 +
1458				sizeof(struct packet_fence);
1459		patched_cb = false;
1460	}
1461
1462	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1463	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1464	if (!job) {
1465		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1466		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1467		dev_err(hdev->dev, "Failed to allocate a new job\n");
1468		return -ENOMEM;
1469	}
1470
1471	/* Allocate internal mapped CB for non patched CBs */
1472	cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1473	if (!cb) {
1474		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1475		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1476		kfree(job);
1477		return -EFAULT;
1478	}
1479
1480	job->id = 0;
1481	job->cs = cs;
1482	job->user_cb = cb;
1483	atomic_inc(&job->user_cb->cs_cnt);
1484	job->user_cb_size = cb_size;
1485	job->hw_queue_id = queue_id;
1486
1487	/* since its guaranteed to have only one chunk in the collective wait
1488	 * cs, we can use this chunk to set the encapsulated signal offset
1489	 * in the jobs.
1490	 */
1491	if (cs->encaps_signals)
1492		job->encaps_sig_wait_offset = encaps_signal_offset;
1493
1494	/*
1495	 * No need in parsing, user CB is the patched CB.
1496	 * We call hl_cb_destroy() out of two reasons - we don't need
1497	 * the CB in the CB idr anymore and to decrement its refcount as
1498	 * it was incremented inside hl_cb_kernel_create().
1499	 */
1500	if (patched_cb)
1501		job->patched_cb = job->user_cb;
1502	else
1503		job->patched_cb = NULL;
1504
1505	job->job_cb_size = job->user_cb_size;
1506	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1507
1508	/* increment refcount as for external queues we get completion */
1509	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1510		cs_get(cs);
1511
1512	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1513
1514	list_add_tail(&job->cs_node, &cs->job_list);
1515
1516	hl_debugfs_add_job(hdev, job);
1517
1518	return 0;
1519}
1520
1521static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1522		struct hl_ctx *ctx, struct hl_cs *cs,
1523		u32 wait_queue_id, u32 collective_engine_id,
1524		u32 encaps_signal_offset)
1525{
1526	struct gaudi_device *gaudi = hdev->asic_specific;
1527	struct hw_queue_properties *hw_queue_prop;
1528	u32 queue_id, collective_queue, num_jobs;
1529	u32 stream, nic_queue, nic_idx = 0;
1530	bool skip;
1531	int i, rc = 0;
1532
1533	/* Verify wait queue id is configured as master */
1534	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1535	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1536		dev_err(hdev->dev,
1537			"Queue %d is not configured as collective master\n",
1538			wait_queue_id);
1539		return -EINVAL;
1540	}
1541
1542	/* Verify engine id is supported */
1543	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1544			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1545		dev_err(hdev->dev,
1546			"Collective wait does not support engine %u\n",
1547			collective_engine_id);
1548		return -EINVAL;
1549	}
1550
1551	stream = wait_queue_id % 4;
1552
1553	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1554		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1555	else
1556		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1557
1558	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1559	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1560
1561	/* First job goes to the collective master queue, it will wait for
1562	 * the collective slave queues to finish execution.
1563	 * The synchronization is done using two monitors:
1564	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1565	 * reduction engine (DMA5/TPC7).
1566	 *
1567	 * Rest of the jobs goes to the collective slave queues which will
1568	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1569	 */
1570	for (i = 0 ; i < num_jobs ; i++) {
1571		if (i == 0) {
1572			queue_id = wait_queue_id;
1573			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1574				HL_COLLECTIVE_MASTER, queue_id,
1575				wait_queue_id, encaps_signal_offset);
1576		} else {
1577			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1578				if (gaudi->hw_cap_initialized &
1579					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1580					skip = false;
1581				else
1582					skip = true;
1583
1584				queue_id = nic_queue;
1585				nic_queue += 4;
1586				nic_idx++;
1587
1588				if (skip)
1589					continue;
1590			} else {
1591				queue_id = collective_queue;
1592			}
1593
1594			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1595				HL_COLLECTIVE_SLAVE, queue_id,
1596				wait_queue_id, encaps_signal_offset);
1597		}
1598
1599		if (rc)
1600			return rc;
1601	}
1602
1603	return rc;
1604}
1605
1606static int gaudi_late_init(struct hl_device *hdev)
1607{
1608	struct gaudi_device *gaudi = hdev->asic_specific;
1609	int rc;
1610
1611	rc = gaudi->cpucp_info_get(hdev);
1612	if (rc) {
1613		dev_err(hdev->dev, "Failed to get cpucp info\n");
1614		return rc;
1615	}
1616
1617	if ((hdev->card_type == cpucp_card_type_pci) &&
1618			(hdev->nic_ports_mask & 0x3)) {
1619		dev_info(hdev->dev,
1620			"PCI card detected, only 8 ports are enabled\n");
1621		hdev->nic_ports_mask &= ~0x3;
1622
1623		/* Stop and disable unused NIC QMANs */
1624		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1625					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1626					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1627
1628		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1629					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1630					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1631
1632		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1633		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1634
1635		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1636	}
1637
1638	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1639	if (rc) {
1640		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1641		return rc;
1642	}
1643
1644	/* Scrub both SRAM and DRAM */
1645	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1646	if (rc)
1647		goto disable_pci_access;
1648
1649	rc = gaudi_fetch_psoc_frequency(hdev);
1650	if (rc) {
1651		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1652		goto disable_pci_access;
1653	}
1654
1655	rc = gaudi_mmu_clear_pgt_range(hdev);
1656	if (rc) {
1657		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1658		goto disable_pci_access;
1659	}
1660
1661	rc = gaudi_init_tpc_mem(hdev);
1662	if (rc) {
1663		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1664		goto disable_pci_access;
1665	}
1666
1667	rc = gaudi_collective_init(hdev);
1668	if (rc) {
1669		dev_err(hdev->dev, "Failed to init collective\n");
1670		goto disable_pci_access;
1671	}
1672
1673	/* We only support a single ASID for the user, so for the sake of optimization, just
1674	 * initialize the ASID one time during device initialization with the fixed value of 1
1675	 */
1676	gaudi_mmu_prepare(hdev, 1);
1677
1678	hl_fw_set_pll_profile(hdev);
1679
1680	return 0;
1681
1682disable_pci_access:
1683	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1684
1685	return rc;
1686}
1687
1688static void gaudi_late_fini(struct hl_device *hdev)
1689{
1690	hl_hwmon_release_resources(hdev);
1691}
1692
1693static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1694{
1695	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1696	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1697	int i, j, rc = 0;
1698
1699	/*
1700	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1701	 * to '1' when accessing the host.
1702	 * Bits 49:39 of the full host address are saved for a later
1703	 * configuration of the HW to perform extension to 50 bits.
1704	 * Because there is a single HW register that holds the extension bits,
1705	 * these bits must be identical in all allocated range.
1706	 */
1707
1708	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1709		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1710								&dma_addr_arr[i],
1711								GFP_KERNEL | __GFP_ZERO);
1712		if (!virt_addr_arr[i]) {
1713			rc = -ENOMEM;
1714			goto free_dma_mem_arr;
1715		}
1716
1717		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1718		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1719				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1720			break;
1721	}
1722
1723	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1724		dev_err(hdev->dev,
1725			"MSB of CPU accessible DMA memory are not identical in all range\n");
1726		rc = -EFAULT;
1727		goto free_dma_mem_arr;
1728	}
1729
1730	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1731	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1732	hdev->cpu_pci_msb_addr =
1733		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1734
1735	if (!hdev->asic_prop.fw_security_enabled)
1736		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1737
1738free_dma_mem_arr:
1739	for (j = 0 ; j < i ; j++)
1740		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1741						dma_addr_arr[j]);
1742
1743	return rc;
1744}
1745
1746static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1747{
1748	struct gaudi_device *gaudi = hdev->asic_specific;
1749	struct gaudi_internal_qman_info *q;
1750	u32 i;
1751
1752	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1753		q = &gaudi->internal_qmans[i];
1754		if (!q->pq_kernel_addr)
1755			continue;
1756		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1757	}
1758}
1759
1760static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1761{
1762	struct gaudi_device *gaudi = hdev->asic_specific;
1763	struct gaudi_internal_qman_info *q;
1764	int rc, i;
1765
1766	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1767		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1768			continue;
1769
1770		q = &gaudi->internal_qmans[i];
1771
1772		switch (i) {
1773		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1774			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1775			break;
1776		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1777			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1778			break;
1779		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1780			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1781			break;
1782		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1783			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1784			break;
1785		default:
1786			dev_err(hdev->dev, "Bad internal queue index %d", i);
1787			rc = -EINVAL;
1788			goto free_internal_qmans_pq_mem;
1789		}
1790
1791		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1792								GFP_KERNEL | __GFP_ZERO);
1793		if (!q->pq_kernel_addr) {
1794			rc = -ENOMEM;
1795			goto free_internal_qmans_pq_mem;
1796		}
1797	}
1798
1799	return 0;
1800
1801free_internal_qmans_pq_mem:
1802	gaudi_free_internal_qmans_pq_mem(hdev);
1803	return rc;
1804}
1805
1806static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1807{
1808	struct asic_fixed_properties *prop = &hdev->asic_prop;
1809	struct pci_mem_region *region;
1810
1811	/* CFG */
1812	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1813	region->region_base = CFG_BASE;
1814	region->region_size = CFG_SIZE;
1815	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1816	region->bar_size = CFG_BAR_SIZE;
1817	region->bar_id = CFG_BAR_ID;
1818	region->used = 1;
1819
1820	/* SRAM */
1821	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1822	region->region_base = SRAM_BASE_ADDR;
1823	region->region_size = SRAM_SIZE;
1824	region->offset_in_bar = 0;
1825	region->bar_size = SRAM_BAR_SIZE;
1826	region->bar_id = SRAM_BAR_ID;
1827	region->used = 1;
1828
1829	/* DRAM */
1830	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1831	region->region_base = DRAM_PHYS_BASE;
1832	region->region_size = hdev->asic_prop.dram_size;
1833	region->offset_in_bar = 0;
1834	region->bar_size = prop->dram_pci_bar_size;
1835	region->bar_id = HBM_BAR_ID;
1836	region->used = 1;
1837
1838	/* SP SRAM */
1839	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1840	region->region_base = PSOC_SCRATCHPAD_ADDR;
1841	region->region_size = PSOC_SCRATCHPAD_SIZE;
1842	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1843	region->bar_size = CFG_BAR_SIZE;
1844	region->bar_id = CFG_BAR_ID;
1845	region->used = 1;
1846}
1847
1848static int gaudi_sw_init(struct hl_device *hdev)
1849{
1850	struct gaudi_device *gaudi;
1851	u32 i, event_id = 0;
1852	int rc;
1853
1854	/* Allocate device structure */
1855	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1856	if (!gaudi)
1857		return -ENOMEM;
1858
1859	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1860		if (gaudi_irq_map_table[i].valid) {
1861			if (event_id == GAUDI_EVENT_SIZE) {
1862				dev_err(hdev->dev,
1863					"Event array exceeds the limit of %u events\n",
1864					GAUDI_EVENT_SIZE);
1865				rc = -EINVAL;
1866				goto free_gaudi_device;
1867			}
1868
1869			gaudi->events[event_id++] =
1870					gaudi_irq_map_table[i].fc_id;
1871		}
1872	}
1873
1874	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1875
1876	hdev->asic_specific = gaudi;
1877
1878	/* Create DMA pool for small allocations */
1879	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1880			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1881	if (!hdev->dma_pool) {
1882		dev_err(hdev->dev, "failed to create DMA pool\n");
1883		rc = -ENOMEM;
1884		goto free_gaudi_device;
1885	}
1886
1887	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1888	if (rc)
1889		goto free_dma_pool;
1890
1891	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1892	if (!hdev->cpu_accessible_dma_pool) {
1893		dev_err(hdev->dev,
1894			"Failed to create CPU accessible DMA pool\n");
1895		rc = -ENOMEM;
1896		goto free_cpu_dma_mem;
1897	}
1898
1899	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1900				(uintptr_t) hdev->cpu_accessible_dma_mem,
1901				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1902	if (rc) {
1903		dev_err(hdev->dev,
1904			"Failed to add memory to CPU accessible DMA pool\n");
1905		rc = -EFAULT;
1906		goto free_cpu_accessible_dma_pool;
1907	}
1908
1909	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1910	if (rc)
1911		goto free_cpu_accessible_dma_pool;
1912
1913	spin_lock_init(&gaudi->hw_queues_lock);
1914
1915	hdev->supports_sync_stream = true;
1916	hdev->supports_coresight = true;
1917	hdev->supports_staged_submission = true;
1918	hdev->supports_wait_for_multi_cs = true;
1919
1920	hdev->asic_funcs->set_pci_memory_regions(hdev);
1921	hdev->stream_master_qid_arr =
1922				hdev->asic_funcs->get_stream_master_qid_arr();
1923	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1924
1925	return 0;
1926
1927free_cpu_accessible_dma_pool:
1928	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1929free_cpu_dma_mem:
1930	if (!hdev->asic_prop.fw_security_enabled)
1931		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1932					hdev->cpu_pci_msb_addr);
1933	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1934					hdev->cpu_accessible_dma_address);
1935free_dma_pool:
1936	dma_pool_destroy(hdev->dma_pool);
1937free_gaudi_device:
1938	kfree(gaudi);
1939	return rc;
1940}
1941
1942static int gaudi_sw_fini(struct hl_device *hdev)
1943{
1944	struct gaudi_device *gaudi = hdev->asic_specific;
1945
1946	gaudi_free_internal_qmans_pq_mem(hdev);
1947
1948	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1949
1950	if (!hdev->asic_prop.fw_security_enabled)
1951		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1952					hdev->cpu_pci_msb_addr);
1953
1954	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1955					hdev->cpu_accessible_dma_address);
1956
1957	dma_pool_destroy(hdev->dma_pool);
1958
1959	kfree(gaudi);
1960
1961	return 0;
1962}
1963
1964static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1965{
1966	struct hl_device *hdev = arg;
1967	int i;
1968
1969	if (hdev->disabled)
1970		return IRQ_HANDLED;
1971
1972	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1973		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1974
1975	hl_irq_handler_eq(irq, &hdev->event_queue);
1976
1977	return IRQ_HANDLED;
1978}
1979
1980/*
1981 * For backward compatibility, new MSI interrupts should be set after the
1982 * existing CPU and NIC interrupts.
1983 */
1984static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1985				bool cpu_eq)
1986{
1987	int msi_vec;
1988
1989	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1990		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1991				GAUDI_EVENT_QUEUE_MSI_IDX);
1992
1993	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1994			(nr + NIC_NUMBER_OF_ENGINES + 1);
1995
1996	return pci_irq_vector(hdev->pdev, msi_vec);
1997}
1998
1999static int gaudi_enable_msi_single(struct hl_device *hdev)
2000{
2001	int rc, irq;
2002
2003	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2004
2005	irq = gaudi_pci_irq_vector(hdev, 0, false);
2006	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2007			"gaudi single msi", hdev);
2008	if (rc)
2009		dev_err(hdev->dev,
2010			"Failed to request single MSI IRQ\n");
2011
2012	return rc;
2013}
2014
2015static int gaudi_enable_msi(struct hl_device *hdev)
2016{
2017	struct gaudi_device *gaudi = hdev->asic_specific;
2018	int rc;
2019
2020	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2021		return 0;
2022
2023	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2024	if (rc < 0) {
2025		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2026		return rc;
2027	}
2028
2029	rc = gaudi_enable_msi_single(hdev);
2030	if (rc)
2031		goto free_pci_irq_vectors;
2032
2033	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2034
2035	return 0;
2036
2037free_pci_irq_vectors:
2038	pci_free_irq_vectors(hdev->pdev);
2039	return rc;
2040}
2041
2042static void gaudi_sync_irqs(struct hl_device *hdev)
2043{
2044	struct gaudi_device *gaudi = hdev->asic_specific;
2045
2046	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2047		return;
2048
2049	/* Wait for all pending IRQs to be finished */
2050	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2051}
2052
2053static void gaudi_disable_msi(struct hl_device *hdev)
2054{
2055	struct gaudi_device *gaudi = hdev->asic_specific;
2056
2057	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2058		return;
2059
2060	gaudi_sync_irqs(hdev);
2061	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2062	pci_free_irq_vectors(hdev->pdev);
2063
2064	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2065}
2066
2067static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2068{
2069	struct gaudi_device *gaudi = hdev->asic_specific;
2070
2071	if (hdev->asic_prop.fw_security_enabled)
2072		return;
2073
2074	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2075						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2076		return;
2077
2078	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2079		return;
2080
2081	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2082			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2083	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2084			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2085	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2086			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2087	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2088			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2089	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2090			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2092			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2094			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2096			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097
2098	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2099			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2101			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2102	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2103			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2105			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2107			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2109			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2111			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2113			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114
2115	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2116			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2117	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2118			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2119	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2120			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2121	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2122			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2123	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2124			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2126			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2128			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2130			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131
2132	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2133}
2134
2135static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2136{
2137	struct gaudi_device *gaudi = hdev->asic_specific;
2138
2139	if (hdev->asic_prop.fw_security_enabled)
2140		return;
2141
2142	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2143					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2144		return;
2145
2146	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2147		return;
2148
2149	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2150			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2151	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2152			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2153	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2154			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2155	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2156			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2158			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2160			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2162			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2164			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165
2166	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2167			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2168	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2169			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2170	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2171			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2172	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2173			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2174	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2175			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2177			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2179			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2181			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182
2183	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2184			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2185	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2186			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2187	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2188			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2189	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2190			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2191	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2192			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2194			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2196			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2198			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199
2200	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2201}
2202
2203static void gaudi_init_e2e(struct hl_device *hdev)
2204{
2205	if (hdev->asic_prop.fw_security_enabled)
2206		return;
2207
2208	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2209					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2210		return;
2211
2212	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2213	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2214	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2215	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2216
2217	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2218	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2219	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2220	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2221
2222	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2223	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2224	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2225	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2226
2227	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2228	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2229	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2230	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2231
2232	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2233	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2234	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2235	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2236
2237	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2238	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2239	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2240	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2241
2242	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2243	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2244	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2245	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2246
2247	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2248	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2249	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2250	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2251
2252	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2253	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2254	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2255	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2256
2257	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2258	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2259	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2260	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2261
2262	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2263	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2264	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2265	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2266
2267	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2268	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2269	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2270	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2271
2272	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2273	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2274	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2275	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2276
2277	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2278	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2279	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2280	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2281
2282	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2283	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2284	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2285	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2286
2287	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2288	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2289	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2290	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2291
2292	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2293	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2294	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2295	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2296
2297	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2298	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2299	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2300	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2301
2302	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2303	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2304	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2305	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2306
2307	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2308	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2309	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2310	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2311
2312	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2313	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2314	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2315	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2316
2317	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2318	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2319	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2320	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2321
2322	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2323	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2324	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2325	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2326
2327	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2328	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2329	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2330	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2331
2332	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2333			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2334	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2335			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2336
2337	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2338			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2339	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2340			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2341
2342	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2343			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2344	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2345			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2346
2347	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2348			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2349	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2350			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2351
2352	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2353			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2354	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2355			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2356
2357	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2358			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2359	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2360			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2361
2362	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2363			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2364	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2365			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2366
2367	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2368			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2369	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2370			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2371
2372	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2373			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2374	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2375			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2376
2377	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2378			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2379	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2380			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2381
2382	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2383			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2384	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2385			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2386
2387	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2388			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2389	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2390			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2391
2392	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2393			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2394	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2395			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2396
2397	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2398			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2399	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2400			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2401
2402	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2403			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2404	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2405			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2406
2407	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2408			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2409	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2410			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2411
2412	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2413			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2414	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2415			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2416
2417	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2418			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2419	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2420			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2421
2422	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2423			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2424	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2425			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2426
2427	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2428			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2429	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2430			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2431
2432	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2433			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2434	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2435			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2436
2437	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2438			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2439	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2440			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2441
2442	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2443			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2444	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2445			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2446
2447	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2448			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2449	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2450			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2451}
2452
2453static void gaudi_init_hbm_cred(struct hl_device *hdev)
2454{
2455	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2456
2457	if (hdev->asic_prop.fw_security_enabled)
2458		return;
2459
2460	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2461						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2462		return;
2463
2464	hbm0_wr = 0x33333333;
2465	hbm0_rd = 0x77777777;
2466	hbm1_wr = 0x55555555;
2467	hbm1_rd = 0xDDDDDDDD;
2468
2469	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2470	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2471	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2472	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2473
2474	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2475	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2476	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2477	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2478
2479	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2480	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2481	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2482	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2483
2484	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2485	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2486	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2487	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2488
2489	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2490			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2491			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2492	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2493			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2496			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2499			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2501
2502	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2503			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2504			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2505	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2506			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2509			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2512			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2514}
2515
2516static void gaudi_init_golden_registers(struct hl_device *hdev)
2517{
2518	u32 tpc_offset;
2519	int tpc_id, i;
2520
2521	gaudi_init_e2e(hdev);
2522	gaudi_init_hbm_cred(hdev);
2523
2524	for (tpc_id = 0, tpc_offset = 0;
2525				tpc_id < TPC_NUMBER_OF_ENGINES;
2526				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2527		/* Mask all arithmetic interrupts from TPC */
2528		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2529		/* Set 16 cache lines */
2530		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2531				ICACHE_FETCH_LINE_NUM, 2);
2532	}
2533
2534	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2535	for (i = 0 ; i < 128 ; i += 8)
2536		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2537
2538	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542}
2543
2544static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2545					int qman_id, dma_addr_t qman_pq_addr)
2546{
2547	struct cpu_dyn_regs *dyn_regs =
2548			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2549	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2550	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2551	u32 q_off, dma_qm_offset;
2552	u32 dma_qm_err_cfg, irq_handler_offset;
2553
2554	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2555
2556	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2557				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2559				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2560	so_base_en_lo = lower_32_bits(CFG_BASE +
2561				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562	so_base_en_hi = upper_32_bits(CFG_BASE +
2563				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2564	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2565				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2567				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568	so_base_ws_lo = lower_32_bits(CFG_BASE +
2569				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570	so_base_ws_hi = upper_32_bits(CFG_BASE +
2571				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2572
2573	q_off = dma_qm_offset + qman_id * 4;
2574
2575	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2576	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2577
2578	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2579	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2580	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2581
2582	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2583	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2584							QMAN_LDMA_SRC_OFFSET);
2585	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2586							QMAN_LDMA_DST_OFFSET);
2587
2588	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2589	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2590	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2591	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2592	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2593	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2594	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2595	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2596
2597	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2598
2599	/* The following configuration is needed only once per QMAN */
2600	if (qman_id == 0) {
2601		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2602				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2603				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2604
2605		/* Configure RAZWI IRQ */
2606		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2607		if (hdev->stop_on_err)
2608			dma_qm_err_cfg |=
2609				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2610
2611		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2612
2613		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2614			lower_32_bits(CFG_BASE + irq_handler_offset));
2615		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2616			upper_32_bits(CFG_BASE + irq_handler_offset));
2617
2618		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2619			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2620									dma_id);
2621
2622		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2623				QM_ARB_ERR_MSG_EN_MASK);
2624
2625		/* Set timeout to maximum */
2626		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2627
2628		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2629				QMAN_EXTERNAL_MAKE_TRUSTED);
2630
2631		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2632	}
2633}
2634
2635static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2636{
2637	struct cpu_dyn_regs *dyn_regs =
2638			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2639	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2640	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2641	u32 irq_handler_offset;
2642
2643	/* Set to maximum possible according to physical size */
2644	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2645	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2646
2647	/* WA for H/W bug H3-2116 */
2648	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2649
2650	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2651	if (hdev->stop_on_err)
2652		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2653
2654	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2655
2656	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2657			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2658			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2659
2660	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2661		lower_32_bits(CFG_BASE + irq_handler_offset));
2662	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2663		upper_32_bits(CFG_BASE + irq_handler_offset));
2664
2665	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2666		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2667	WREG32(mmDMA0_CORE_PROT + dma_offset,
2668			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2669	/* If the channel is secured, it should be in MMU bypass mode */
2670	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2671			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2672	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2673}
2674
2675static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2676				u32 enable_mask)
2677{
2678	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2679
2680	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2681}
2682
2683static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2684{
2685	struct gaudi_device *gaudi = hdev->asic_specific;
2686	struct hl_hw_queue *q;
2687	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2688
2689	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2690		return;
2691
2692	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2693		dma_id = gaudi_dma_assignment[i];
2694		/*
2695		 * For queues after the CPU Q need to add 1 to get the correct
2696		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2697		 * order to get the correct MSI register.
2698		 */
2699		if (dma_id > 1) {
2700			cpu_skip = 1;
2701			nic_skip = NIC_NUMBER_OF_ENGINES;
2702		} else {
2703			cpu_skip = 0;
2704			nic_skip = 0;
2705		}
2706
2707		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2708			q_idx = 4 * dma_id + j + cpu_skip;
2709			q = &hdev->kernel_queues[q_idx];
2710			q->cq_id = cq_id++;
2711			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2712			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2713						q->bus_address);
2714		}
2715
2716		gaudi_init_dma_core(hdev, dma_id);
2717
2718		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2719	}
2720
2721	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2722}
2723
2724static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2725					int qman_id, u64 qman_base_addr)
2726{
2727	struct cpu_dyn_regs *dyn_regs =
2728			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2729	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2730	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2731	u32 dma_qm_err_cfg, irq_handler_offset;
2732	u32 q_off, dma_qm_offset;
2733
2734	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2735
2736	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2737			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2738	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2739				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2740	so_base_en_lo = lower_32_bits(CFG_BASE +
2741				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2742	so_base_en_hi = upper_32_bits(CFG_BASE +
2743				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2744	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2745				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2746	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2747				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748	so_base_ws_lo = lower_32_bits(CFG_BASE +
2749				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2750	so_base_ws_hi = upper_32_bits(CFG_BASE +
2751				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752
2753	q_off = dma_qm_offset + qman_id * 4;
2754
2755	if (qman_id < 4) {
2756		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2757					lower_32_bits(qman_base_addr));
2758		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2759					upper_32_bits(qman_base_addr));
2760
2761		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2762		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2763		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2764
2765		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2766							QMAN_CPDMA_SIZE_OFFSET);
2767		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2768							QMAN_CPDMA_SRC_OFFSET);
2769		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2770							QMAN_CPDMA_DST_OFFSET);
2771	} else {
2772		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2773				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2774				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2775
2776		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2777							QMAN_LDMA_SIZE_OFFSET);
2778		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2779							QMAN_LDMA_SRC_OFFSET);
2780		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2781							QMAN_LDMA_DST_OFFSET);
2782
2783		/* Configure RAZWI IRQ */
2784		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2785		if (hdev->stop_on_err)
2786			dma_qm_err_cfg |=
2787				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2788
2789		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2790
2791		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2792			lower_32_bits(CFG_BASE + irq_handler_offset));
2793		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2794			upper_32_bits(CFG_BASE + irq_handler_offset));
2795
2796		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2797			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2798									dma_id);
2799
2800		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2801				QM_ARB_ERR_MSG_EN_MASK);
2802
2803		/* Set timeout to maximum */
2804		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2805
2806		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2807		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2808				QMAN_INTERNAL_MAKE_TRUSTED);
2809	}
2810
2811	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2812	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2813	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2814	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2815
2816	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2817	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2818		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2819				mtr_base_ws_lo);
2820		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2821				mtr_base_ws_hi);
2822		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2823				so_base_ws_lo);
2824		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2825				so_base_ws_hi);
2826	}
2827}
2828
2829static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2830{
2831	struct gaudi_device *gaudi = hdev->asic_specific;
2832	struct gaudi_internal_qman_info *q;
2833	u64 qman_base_addr;
2834	int i, j, dma_id, internal_q_index;
2835
2836	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2837		return;
2838
2839	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2840		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2841
2842		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2843			 /*
2844			  * Add the CPU queue in order to get the correct queue
2845			  * number as all internal queue are placed after it
2846			  */
2847			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2848
2849			q = &gaudi->internal_qmans[internal_q_index];
2850			qman_base_addr = (u64) q->pq_dma_addr;
2851			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2852						qman_base_addr);
2853		}
2854
2855		/* Initializing lower CP for HBM DMA QMAN */
2856		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2857
2858		gaudi_init_dma_core(hdev, dma_id);
2859
2860		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2861	}
2862
2863	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2864}
2865
2866static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2867					int qman_id, u64 qman_base_addr)
2868{
2869	struct cpu_dyn_regs *dyn_regs =
2870			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2871	u32 mtr_base_lo, mtr_base_hi;
2872	u32 so_base_lo, so_base_hi;
2873	u32 irq_handler_offset;
2874	u32 q_off, mme_id;
2875	u32 mme_qm_err_cfg;
2876
2877	mtr_base_lo = lower_32_bits(CFG_BASE +
2878				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2879	mtr_base_hi = upper_32_bits(CFG_BASE +
2880				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881	so_base_lo = lower_32_bits(CFG_BASE +
2882				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2883	so_base_hi = upper_32_bits(CFG_BASE +
2884				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885
2886	q_off = mme_offset + qman_id * 4;
2887
2888	if (qman_id < 4) {
2889		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2890					lower_32_bits(qman_base_addr));
2891		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2892					upper_32_bits(qman_base_addr));
2893
2894		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2895		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2896		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2897
2898		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2899							QMAN_CPDMA_SIZE_OFFSET);
2900		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2901							QMAN_CPDMA_SRC_OFFSET);
2902		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2903							QMAN_CPDMA_DST_OFFSET);
2904	} else {
2905		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2906				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2907				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2908
2909		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2910							QMAN_LDMA_SIZE_OFFSET);
2911		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2912							QMAN_LDMA_SRC_OFFSET);
2913		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2914							QMAN_LDMA_DST_OFFSET);
2915
2916		/* Configure RAZWI IRQ */
2917		mme_id = mme_offset /
2918				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2919
2920		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2921		if (hdev->stop_on_err)
2922			mme_qm_err_cfg |=
2923				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2924
2925		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2926
2927		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2928			lower_32_bits(CFG_BASE + irq_handler_offset));
2929		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2930			upper_32_bits(CFG_BASE + irq_handler_offset));
2931
2932		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2933			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2934									mme_id);
2935
2936		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2937				QM_ARB_ERR_MSG_EN_MASK);
2938
2939		/* Set timeout to maximum */
2940		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2941
2942		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2943		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2944				QMAN_INTERNAL_MAKE_TRUSTED);
2945	}
2946
2947	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2948	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2949	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2950	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2951}
2952
2953static void gaudi_init_mme_qmans(struct hl_device *hdev)
2954{
2955	struct gaudi_device *gaudi = hdev->asic_specific;
2956	struct gaudi_internal_qman_info *q;
2957	u64 qman_base_addr;
2958	u32 mme_offset;
2959	int i, internal_q_index;
2960
2961	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2962		return;
2963
2964	/*
2965	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2966	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2967	 */
2968
2969	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2970
2971	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2972		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2973		q = &gaudi->internal_qmans[internal_q_index];
2974		qman_base_addr = (u64) q->pq_dma_addr;
2975		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2976					qman_base_addr);
2977		if (i == 3)
2978			mme_offset = 0;
2979	}
2980
2981	/* Initializing lower CP for MME QMANs */
2982	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2983	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2984	gaudi_init_mme_qman(hdev, 0, 4, 0);
2985
2986	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2987	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988
2989	gaudi->hw_cap_initialized |= HW_CAP_MME;
2990}
2991
2992static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2993				int qman_id, u64 qman_base_addr)
2994{
2995	struct cpu_dyn_regs *dyn_regs =
2996			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2997	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2998	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2999	u32 tpc_qm_err_cfg, irq_handler_offset;
3000	u32 q_off, tpc_id;
3001
3002	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3003			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3004	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3005				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3006	so_base_en_lo = lower_32_bits(CFG_BASE +
3007				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3008	so_base_en_hi = upper_32_bits(CFG_BASE +
3009				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3010	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3011				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3012	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3013				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3014	so_base_ws_lo = lower_32_bits(CFG_BASE +
3015				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3016	so_base_ws_hi = upper_32_bits(CFG_BASE +
3017				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3018
3019	q_off = tpc_offset + qman_id * 4;
3020
3021	tpc_id = tpc_offset /
3022			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3023
3024	if (qman_id < 4) {
3025		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3026					lower_32_bits(qman_base_addr));
3027		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3028					upper_32_bits(qman_base_addr));
3029
3030		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3031		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3032		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3033
3034		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3035							QMAN_CPDMA_SIZE_OFFSET);
3036		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3037							QMAN_CPDMA_SRC_OFFSET);
3038		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3039							QMAN_CPDMA_DST_OFFSET);
3040	} else {
3041		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3042				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3043				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3044
3045		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3046							QMAN_LDMA_SIZE_OFFSET);
3047		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3048							QMAN_LDMA_SRC_OFFSET);
3049		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3050							QMAN_LDMA_DST_OFFSET);
3051
3052		/* Configure RAZWI IRQ */
3053		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3054		if (hdev->stop_on_err)
3055			tpc_qm_err_cfg |=
3056				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3057
3058		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3059
3060		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3061			lower_32_bits(CFG_BASE + irq_handler_offset));
3062		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3063			upper_32_bits(CFG_BASE + irq_handler_offset));
3064
3065		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3066			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3067									tpc_id);
3068
3069		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3070				QM_ARB_ERR_MSG_EN_MASK);
3071
3072		/* Set timeout to maximum */
3073		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3074
3075		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3076		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3077				QMAN_INTERNAL_MAKE_TRUSTED);
3078	}
3079
3080	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3081	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3082	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3083	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3084
3085	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3086	if (tpc_id == 6) {
3087		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3088				mtr_base_ws_lo);
3089		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3090				mtr_base_ws_hi);
3091		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3092				so_base_ws_lo);
3093		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3094				so_base_ws_hi);
3095	}
3096}
3097
3098static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3099{
3100	struct gaudi_device *gaudi = hdev->asic_specific;
3101	struct gaudi_internal_qman_info *q;
3102	u64 qman_base_addr;
3103	u32 so_base_hi, tpc_offset = 0;
3104	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3105			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3106	int i, tpc_id, internal_q_index;
3107
3108	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3109		return;
3110
3111	so_base_hi = upper_32_bits(CFG_BASE +
3112				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3113
3114	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3115		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3116			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3117						tpc_id * QMAN_STREAMS + i;
3118			q = &gaudi->internal_qmans[internal_q_index];
3119			qman_base_addr = (u64) q->pq_dma_addr;
3120			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3121						qman_base_addr);
3122
3123			if (i == 3) {
3124				/* Initializing lower CP for TPC QMAN */
3125				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3126
3127				/* Enable the QMAN and TPC channel */
3128				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3129						QMAN_TPC_ENABLE);
3130			}
3131		}
3132
3133		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3134				so_base_hi);
3135
3136		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3137
3138		gaudi->hw_cap_initialized |=
3139				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3140	}
3141}
3142
3143static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3144				int qman_id, u64 qman_base_addr, int nic_id)
3145{
3146	struct cpu_dyn_regs *dyn_regs =
3147			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3148	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3149	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3150	u32 nic_qm_err_cfg, irq_handler_offset;
3151	u32 q_off;
3152
3153	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3154			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3155	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3156				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3158				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3159	so_base_en_hi = upper_32_bits(CFG_BASE +
3160				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3162				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3163	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3164				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3166				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3167	so_base_ws_hi = upper_32_bits(CFG_BASE +
3168				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169
3170	q_off = nic_offset + qman_id * 4;
3171
3172	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3173	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3174
3175	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3176	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3177	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3178
3179	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3180							QMAN_LDMA_SIZE_OFFSET);
3181	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3182							QMAN_LDMA_SRC_OFFSET);
3183	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3184							QMAN_LDMA_DST_OFFSET);
3185
3186	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3187	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3188	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3189	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3190
3191	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3192	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3193	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3194	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3195	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3196
3197	if (qman_id == 0) {
3198		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3199				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3200				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3201
3202		/* Configure RAZWI IRQ */
3203		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3204		if (hdev->stop_on_err)
3205			nic_qm_err_cfg |=
3206				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3207
3208		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3209
3210		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3211			lower_32_bits(CFG_BASE + irq_handler_offset));
3212		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3213			upper_32_bits(CFG_BASE + irq_handler_offset));
3214
3215		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3216			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3217									nic_id);
3218
3219		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3220				QM_ARB_ERR_MSG_EN_MASK);
3221
3222		/* Set timeout to maximum */
3223		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3224
3225		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3226		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3227				QMAN_INTERNAL_MAKE_TRUSTED);
3228	}
3229}
3230
3231static void gaudi_init_nic_qmans(struct hl_device *hdev)
3232{
3233	struct gaudi_device *gaudi = hdev->asic_specific;
3234	struct gaudi_internal_qman_info *q;
3235	u64 qman_base_addr;
3236	u32 nic_offset = 0;
3237	u32 nic_delta_between_qmans =
3238			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3239	u32 nic_delta_between_nics =
3240			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3241	int i, nic_id, internal_q_index;
3242
3243	if (!hdev->nic_ports_mask)
3244		return;
3245
3246	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3247		return;
3248
3249	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3250
3251	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3252		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3253			nic_offset += nic_delta_between_qmans;
3254			if (nic_id & 1) {
3255				nic_offset -= (nic_delta_between_qmans * 2);
3256				nic_offset += nic_delta_between_nics;
3257			}
3258			continue;
3259		}
3260
3261		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3262			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3263						nic_id * QMAN_STREAMS + i;
3264			q = &gaudi->internal_qmans[internal_q_index];
3265			qman_base_addr = (u64) q->pq_dma_addr;
3266			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3267						qman_base_addr, nic_id);
3268		}
3269
3270		/* Enable the QMAN */
3271		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3272
3273		nic_offset += nic_delta_between_qmans;
3274		if (nic_id & 1) {
3275			nic_offset -= (nic_delta_between_qmans * 2);
3276			nic_offset += nic_delta_between_nics;
3277		}
3278
3279		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3280	}
3281}
3282
3283static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3284{
3285	struct gaudi_device *gaudi = hdev->asic_specific;
3286
3287	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3288		return;
3289
3290	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3291	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3292	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3293}
3294
3295static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3296{
3297	struct gaudi_device *gaudi = hdev->asic_specific;
3298
3299	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3300		return;
3301
3302	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3303	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3304	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3305	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3306	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3307}
3308
3309static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3310{
3311	struct gaudi_device *gaudi = hdev->asic_specific;
3312
3313	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3314		return;
3315
3316	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3317	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3318}
3319
3320static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3321{
3322	struct gaudi_device *gaudi = hdev->asic_specific;
3323	u32 tpc_offset = 0;
3324	int tpc_id;
3325
3326	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3327		return;
3328
3329	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3330		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3331		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3332	}
3333}
3334
3335static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3336{
3337	struct gaudi_device *gaudi = hdev->asic_specific;
3338	u32 nic_mask, nic_offset = 0;
3339	u32 nic_delta_between_qmans =
3340			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3341	u32 nic_delta_between_nics =
3342			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3343	int nic_id;
3344
3345	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3346		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3347
3348		if (gaudi->hw_cap_initialized & nic_mask)
3349			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3350
3351		nic_offset += nic_delta_between_qmans;
3352		if (nic_id & 1) {
3353			nic_offset -= (nic_delta_between_qmans * 2);
3354			nic_offset += nic_delta_between_nics;
3355		}
3356	}
3357}
3358
3359static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3360{
3361	struct gaudi_device *gaudi = hdev->asic_specific;
3362
3363	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3364		return;
3365
3366	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3367	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3368	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370}
3371
3372static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3373{
3374	struct gaudi_device *gaudi = hdev->asic_specific;
3375
3376	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3377		return;
3378
3379	/* Stop CPs of HBM DMA QMANs */
3380
3381	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3382	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386}
3387
3388static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3389{
3390	struct gaudi_device *gaudi = hdev->asic_specific;
3391
3392	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3393		return;
3394
3395	/* Stop CPs of MME QMANs */
3396	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3397	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398}
3399
3400static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3401{
3402	struct gaudi_device *gaudi = hdev->asic_specific;
3403
3404	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3405		return;
3406
3407	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3408	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415}
3416
3417static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3418{
3419	struct gaudi_device *gaudi = hdev->asic_specific;
3420
3421	/* Stop upper CPs of QMANs */
3422
3423	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3424		WREG32(mmNIC0_QM0_GLBL_CFG1,
3425				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3426				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3427				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3428
3429	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3430		WREG32(mmNIC0_QM1_GLBL_CFG1,
3431				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3432				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3433				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3434
3435	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3436		WREG32(mmNIC1_QM0_GLBL_CFG1,
3437				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3438				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3439				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3440
3441	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3442		WREG32(mmNIC1_QM1_GLBL_CFG1,
3443				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3444				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3445				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3446
3447	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3448		WREG32(mmNIC2_QM0_GLBL_CFG1,
3449				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3450				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3451				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3452
3453	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3454		WREG32(mmNIC2_QM1_GLBL_CFG1,
3455				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3456				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3457				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3458
3459	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3460		WREG32(mmNIC3_QM0_GLBL_CFG1,
3461				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3462				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3463				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3464
3465	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3466		WREG32(mmNIC3_QM1_GLBL_CFG1,
3467				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3468				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3469				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3470
3471	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3472		WREG32(mmNIC4_QM0_GLBL_CFG1,
3473				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3474				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3475				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3476
3477	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3478		WREG32(mmNIC4_QM1_GLBL_CFG1,
3479				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3480				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3481				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3482}
3483
3484static void gaudi_pci_dma_stall(struct hl_device *hdev)
3485{
3486	struct gaudi_device *gaudi = hdev->asic_specific;
3487
3488	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3489		return;
3490
3491	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3492	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494}
3495
3496static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3497{
3498	struct gaudi_device *gaudi = hdev->asic_specific;
3499
3500	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3501		return;
3502
3503	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3504	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508}
3509
3510static void gaudi_mme_stall(struct hl_device *hdev)
3511{
3512	struct gaudi_device *gaudi = hdev->asic_specific;
3513
3514	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3515		return;
3516
3517	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3518	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3519	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3521	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534}
3535
3536static void gaudi_tpc_stall(struct hl_device *hdev)
3537{
3538	struct gaudi_device *gaudi = hdev->asic_specific;
3539
3540	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3541		return;
3542
3543	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3544	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551}
3552
3553static void gaudi_disable_clock_gating(struct hl_device *hdev)
3554{
3555	u32 qman_offset;
3556	int i;
3557
3558	if (hdev->asic_prop.fw_security_enabled)
3559		return;
3560
3561	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3562		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3563		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3564
3565		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3566	}
3567
3568	WREG32(mmMME0_QM_CGM_CFG, 0);
3569	WREG32(mmMME0_QM_CGM_CFG1, 0);
3570	WREG32(mmMME2_QM_CGM_CFG, 0);
3571	WREG32(mmMME2_QM_CGM_CFG1, 0);
3572
3573	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3574		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3575		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3576
3577		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3578	}
3579}
3580
3581static void gaudi_enable_timestamp(struct hl_device *hdev)
3582{
3583	/* Disable the timestamp counter */
3584	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3585
3586	/* Zero the lower/upper parts of the 64-bit counter */
3587	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3588	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3589
3590	/* Enable the counter */
3591	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3592}
3593
3594static void gaudi_disable_timestamp(struct hl_device *hdev)
3595{
3596	/* Disable the timestamp counter */
3597	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3598}
3599
3600static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3601{
3602	u32 wait_timeout_ms;
3603
3604	if (hdev->pldm)
3605		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3606	else
3607		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3608
3609	if (fw_reset)
3610		goto skip_engines;
3611
3612	gaudi_stop_nic_qmans(hdev);
3613	gaudi_stop_mme_qmans(hdev);
3614	gaudi_stop_tpc_qmans(hdev);
3615	gaudi_stop_hbm_dma_qmans(hdev);
3616	gaudi_stop_pci_dma_qmans(hdev);
3617
3618	msleep(wait_timeout_ms);
3619
3620	gaudi_pci_dma_stall(hdev);
3621	gaudi_hbm_dma_stall(hdev);
3622	gaudi_tpc_stall(hdev);
3623	gaudi_mme_stall(hdev);
3624
3625	msleep(wait_timeout_ms);
3626
3627	gaudi_disable_nic_qmans(hdev);
3628	gaudi_disable_mme_qmans(hdev);
3629	gaudi_disable_tpc_qmans(hdev);
3630	gaudi_disable_hbm_dma_qmans(hdev);
3631	gaudi_disable_pci_dma_qmans(hdev);
3632
3633	gaudi_disable_timestamp(hdev);
3634
3635skip_engines:
3636	gaudi_disable_msi(hdev);
3637}
3638
3639static int gaudi_mmu_init(struct hl_device *hdev)
3640{
3641	struct asic_fixed_properties *prop = &hdev->asic_prop;
3642	struct gaudi_device *gaudi = hdev->asic_specific;
3643	u64 hop0_addr;
3644	int rc, i;
3645
3646	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3647		return 0;
3648
3649	for (i = 0 ; i < prop->max_asid ; i++) {
3650		hop0_addr = prop->mmu_pgt_addr +
3651				(i * prop->mmu_hop_table_size);
3652
3653		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3654		if (rc) {
3655			dev_err(hdev->dev,
3656				"failed to set hop0 addr for asid %d\n", i);
3657			return rc;
3658		}
3659	}
3660
3661	/* init MMU cache manage page */
3662	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3663	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3664
3665	/* mem cache invalidation */
3666	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3667
3668	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3669	if (rc)
3670		return rc;
3671
3672	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3673	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3674
3675	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3676
3677	/*
3678	 * The H/W expects the first PI after init to be 1. After wraparound
3679	 * we'll write 0.
3680	 */
3681	gaudi->mmu_cache_inv_pi = 1;
3682
3683	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3684
3685	return 0;
3686}
3687
3688static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3689{
3690	void __iomem *dst;
3691
3692	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3693
3694	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3695}
3696
3697static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3698{
3699	void __iomem *dst;
3700
3701	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3702
3703	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3704}
3705
3706static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3707{
3708	struct dynamic_fw_load_mgr *dynamic_loader;
3709	struct cpu_dyn_regs *dyn_regs;
3710
3711	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3712
3713	/*
3714	 * here we update initial values for few specific dynamic regs (as
3715	 * before reading the first descriptor from FW those value has to be
3716	 * hard-coded) in later stages of the protocol those values will be
3717	 * updated automatically by reading the FW descriptor so data there
3718	 * will always be up-to-date
3719	 */
3720	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3721	dyn_regs->kmd_msg_to_cpu =
3722				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3723	dyn_regs->cpu_cmd_status_to_host =
3724				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3725
3726	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3727}
3728
3729static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3730{
3731	struct static_fw_load_mgr *static_loader;
3732
3733	static_loader = &hdev->fw_loader.static_loader;
3734
3735	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3736	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3738	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3739	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3740	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3741	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3742	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3743	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3744	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3745	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3746	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3747	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3748			GAUDI_PLDM_RESET_WAIT_MSEC :
3749			GAUDI_CPU_RESET_WAIT_MSEC;
3750}
3751
3752static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3753{
3754	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3755
3756	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3757	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3758	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3759	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3760	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3761	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3762}
3763
3764static void gaudi_init_firmware_loader(struct hl_device *hdev)
3765{
3766	struct asic_fixed_properties *prop = &hdev->asic_prop;
3767	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3768
3769	/* fill common fields */
3770	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3771	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3772	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3773	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3774	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3775	fw_loader->skip_bmc = !hdev->bmc_enable;
3776	fw_loader->sram_bar_id = SRAM_BAR_ID;
3777	fw_loader->dram_bar_id = HBM_BAR_ID;
3778
3779	if (prop->dynamic_fw_load)
3780		gaudi_init_dynamic_firmware_loader(hdev);
3781	else
3782		gaudi_init_static_firmware_loader(hdev);
3783}
3784
3785static int gaudi_init_cpu(struct hl_device *hdev)
3786{
3787	struct gaudi_device *gaudi = hdev->asic_specific;
3788	int rc;
3789
3790	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3791		return 0;
3792
3793	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3794		return 0;
3795
3796	/*
3797	 * The device CPU works with 40 bits addresses.
3798	 * This register sets the extension to 50 bits.
3799	 */
3800	if (!hdev->asic_prop.fw_security_enabled)
3801		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3802
3803	rc = hl_fw_init_cpu(hdev);
3804
3805	if (rc)
3806		return rc;
3807
3808	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3809
3810	return 0;
3811}
3812
3813static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3814{
3815	struct cpu_dyn_regs *dyn_regs =
3816			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3817	struct asic_fixed_properties *prop = &hdev->asic_prop;
3818	struct gaudi_device *gaudi = hdev->asic_specific;
3819	u32 status, irq_handler_offset;
3820	struct hl_eq *eq;
3821	struct hl_hw_queue *cpu_pq =
3822			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3823	int err;
3824
3825	if (!hdev->cpu_queues_enable)
3826		return 0;
3827
3828	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3829		return 0;
3830
3831	eq = &hdev->event_queue;
3832
3833	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3834	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3835
3836	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3837	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3838
3839	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3840			lower_32_bits(hdev->cpu_accessible_dma_address));
3841	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3842			upper_32_bits(hdev->cpu_accessible_dma_address));
3843
3844	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3845	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3846	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3847
3848	/* Used for EQ CI */
3849	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3850
3851	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3852
3853	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3854
3855	irq_handler_offset = prop->gic_interrupts_enable ?
3856			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3857			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3858
3859	WREG32(irq_handler_offset,
3860		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3861
3862	err = hl_poll_timeout(
3863		hdev,
3864		mmCPU_IF_QUEUE_INIT,
3865		status,
3866		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3867		1000,
3868		cpu_timeout);
3869
3870	if (err) {
3871		dev_err(hdev->dev,
3872			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3873		return -EIO;
3874	}
3875
3876	/* update FW application security bits */
3877	if (prop->fw_cpu_boot_dev_sts0_valid)
3878		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3879	if (prop->fw_cpu_boot_dev_sts1_valid)
3880		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3881
3882	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3883	return 0;
3884}
3885
3886static void gaudi_pre_hw_init(struct hl_device *hdev)
3887{
3888	/* Perform read from the device to make sure device is up */
3889	RREG32(mmHW_STATE);
3890
3891	if (!hdev->asic_prop.fw_security_enabled) {
3892		/* Set the access through PCI bars (Linux driver only) as
3893		 * secured
3894		 */
3895		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3896				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3897				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3898
3899		/* Perform read to flush the waiting writes to ensure
3900		 * configuration was set in the device
3901		 */
3902		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3903	}
3904
3905	/*
3906	 * Let's mark in the H/W that we have reached this point. We check
3907	 * this value in the reset_before_init function to understand whether
3908	 * we need to reset the chip before doing H/W init. This register is
3909	 * cleared by the H/W upon H/W reset
3910	 */
3911	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3912}
3913
3914static int gaudi_hw_init(struct hl_device *hdev)
3915{
3916	struct gaudi_device *gaudi = hdev->asic_specific;
3917	int rc;
3918
3919	gaudi_pre_hw_init(hdev);
3920
3921	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3922	 * So we set it here and if anyone tries to move it later to
3923	 * a different address, there will be an error
3924	 */
3925	if (hdev->asic_prop.iatu_done_by_fw)
3926		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3927
3928	/*
3929	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3930	 * base address of dram
3931	 */
3932	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3933		dev_err(hdev->dev,
3934			"failed to map HBM bar to DRAM base address\n");
3935		return -EIO;
3936	}
3937
3938	rc = gaudi_init_cpu(hdev);
3939	if (rc) {
3940		dev_err(hdev->dev, "failed to initialize CPU\n");
3941		return rc;
3942	}
3943
3944	/* In case the clock gating was enabled in preboot we need to disable
3945	 * it here before touching the MME/TPC registers.
3946	 */
3947	gaudi_disable_clock_gating(hdev);
3948
3949	/* SRAM scrambler must be initialized after CPU is running from HBM */
3950	gaudi_init_scrambler_sram(hdev);
3951
3952	/* This is here just in case we are working without CPU */
3953	gaudi_init_scrambler_hbm(hdev);
3954
3955	gaudi_init_golden_registers(hdev);
3956
3957	rc = gaudi_mmu_init(hdev);
3958	if (rc)
3959		return rc;
3960
3961	gaudi_init_security(hdev);
3962
3963	gaudi_init_pci_dma_qmans(hdev);
3964
3965	gaudi_init_hbm_dma_qmans(hdev);
3966
3967	gaudi_init_mme_qmans(hdev);
3968
3969	gaudi_init_tpc_qmans(hdev);
3970
3971	gaudi_init_nic_qmans(hdev);
3972
3973	gaudi_enable_timestamp(hdev);
3974
3975	/* MSI must be enabled before CPU queues and NIC are initialized */
3976	rc = gaudi_enable_msi(hdev);
3977	if (rc)
3978		goto disable_queues;
3979
3980	/* must be called after MSI was enabled */
3981	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3982	if (rc) {
3983		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3984			rc);
3985		goto disable_msi;
3986	}
3987
3988	/* Perform read from the device to flush all configuration */
3989	RREG32(mmHW_STATE);
3990
3991	return 0;
3992
3993disable_msi:
3994	gaudi_disable_msi(hdev);
3995disable_queues:
3996	gaudi_disable_mme_qmans(hdev);
3997	gaudi_disable_pci_dma_qmans(hdev);
3998
3999	return rc;
4000}
4001
4002static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4003{
4004	struct cpu_dyn_regs *dyn_regs =
4005			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4006	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4007	struct gaudi_device *gaudi = hdev->asic_specific;
4008	bool driver_performs_reset;
4009
4010	if (!hard_reset) {
4011		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4012		return 0;
4013	}
4014
4015	if (hdev->pldm) {
4016		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4017		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4018	} else {
4019		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4020		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4021	}
4022
4023	if (fw_reset) {
4024		dev_dbg(hdev->dev,
4025			"Firmware performs HARD reset, going to wait %dms\n",
4026			reset_timeout_ms);
4027
4028		goto skip_reset;
4029	}
4030
4031	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4032					!hdev->asic_prop.hard_reset_done_by_fw);
4033
4034	/* Set device to handle FLR by H/W as we will put the device CPU to
4035	 * halt mode
4036	 */
4037	if (driver_performs_reset)
4038		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4039					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4040
4041	/* If linux is loaded in the device CPU we need to communicate with it
4042	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4043	 * registers in case of old F/Ws
4044	 */
4045	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4046		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4047				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4048				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4049
4050		WREG32(irq_handler_offset,
4051			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4052
4053		/* This is a hail-mary attempt to revive the card in the small chance that the
4054		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4055		 * In that case, triggering reset through GIC won't help. We need to trigger the
4056		 * reset as if Linux wasn't loaded.
4057		 *
4058		 * We do it only if the reset cause was HB, because that would be the indication
4059		 * of such an event.
4060		 *
4061		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4062		 * damage.
4063		 */
4064		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4065			if (hdev->asic_prop.hard_reset_done_by_fw)
4066				hl_fw_ask_hard_reset_without_linux(hdev);
4067			else
4068				hl_fw_ask_halt_machine_without_linux(hdev);
4069		}
4070	} else {
4071		if (hdev->asic_prop.hard_reset_done_by_fw)
4072			hl_fw_ask_hard_reset_without_linux(hdev);
4073		else
4074			hl_fw_ask_halt_machine_without_linux(hdev);
4075	}
4076
4077	if (driver_performs_reset) {
4078
4079		/* Configure the reset registers. Must be done as early as
4080		 * possible in case we fail during H/W initialization
4081		 */
4082		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4083						(CFG_RST_H_DMA_MASK |
4084						CFG_RST_H_MME_MASK |
4085						CFG_RST_H_SM_MASK |
4086						CFG_RST_H_TPC_7_MASK));
4087
4088		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4089
4090		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4091						(CFG_RST_H_HBM_MASK |
4092						CFG_RST_H_TPC_7_MASK |
4093						CFG_RST_H_NIC_MASK |
4094						CFG_RST_H_SM_MASK |
4095						CFG_RST_H_DMA_MASK |
4096						CFG_RST_H_MME_MASK |
4097						CFG_RST_H_CPU_MASK |
4098						CFG_RST_H_MMU_MASK));
4099
4100		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4101						(CFG_RST_L_IF_MASK |
4102						CFG_RST_L_PSOC_MASK |
4103						CFG_RST_L_TPC_MASK));
4104
4105		msleep(cpu_timeout_ms);
4106
4107		/* Tell ASIC not to re-initialize PCIe */
4108		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4109
4110		/* Restart BTL/BLR upon hard-reset */
4111		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4112
4113		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4114			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4115
4116		dev_dbg(hdev->dev,
4117			"Issued HARD reset command, going to wait %dms\n",
4118			reset_timeout_ms);
4119	} else {
4120		dev_dbg(hdev->dev,
4121			"Firmware performs HARD reset, going to wait %dms\n",
4122			reset_timeout_ms);
4123	}
4124
4125skip_reset:
4126	/*
4127	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4128	 * itself is in reset. Need to wait until the reset is deasserted
4129	 */
4130	msleep(reset_timeout_ms);
4131
4132	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4133	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4134		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4135		return -ETIMEDOUT;
4136	}
4137
4138	if (gaudi) {
4139		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4140						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4141						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4142						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4143						HW_CAP_HBM_SCRAMBLER);
4144
4145		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4146
4147		hdev->device_cpu_is_halted = false;
4148	}
4149	return 0;
4150}
4151
4152static int gaudi_suspend(struct hl_device *hdev)
4153{
4154	int rc;
4155
4156	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4157	if (rc)
4158		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4159
4160	return rc;
4161}
4162
4163static int gaudi_resume(struct hl_device *hdev)
4164{
4165	return gaudi_init_iatu(hdev);
4166}
4167
4168static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4169			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4170{
4171	int rc;
4172
4173	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4174			VM_DONTCOPY | VM_NORESERVE);
4175
4176	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4177				(dma_addr - HOST_PHYS_BASE), size);
4178	if (rc)
4179		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4180
4181	return rc;
4182}
4183
4184static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4185{
4186	struct cpu_dyn_regs *dyn_regs =
4187			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4188	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4189	struct gaudi_device *gaudi = hdev->asic_specific;
4190	bool invalid_queue = false;
4191	int dma_id;
4192
4193	switch (hw_queue_id) {
4194	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4195		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4196		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4197		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4198		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4199		break;
4200
4201	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4202		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4203		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4204		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4205		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4206		break;
4207
4208	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4209		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4210		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4211		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4212		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4213		break;
4214
4215	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4216		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4217		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4218		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4219		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4220		break;
4221
4222	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4223		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4224		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4225		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4226		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4227		break;
4228
4229	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4230		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4231		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4232		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4233		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4234		break;
4235
4236	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4237		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4238		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4239		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4240		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4241		break;
4242
4243	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4244		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4245		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4246		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4247		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4248		break;
4249
4250	case GAUDI_QUEUE_ID_CPU_PQ:
4251		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4252			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4253		else
4254			invalid_queue = true;
4255		break;
4256
4257	case GAUDI_QUEUE_ID_MME_0_0:
4258		db_reg_offset = mmMME2_QM_PQ_PI_0;
4259		break;
4260
4261	case GAUDI_QUEUE_ID_MME_0_1:
4262		db_reg_offset = mmMME2_QM_PQ_PI_1;
4263		break;
4264
4265	case GAUDI_QUEUE_ID_MME_0_2:
4266		db_reg_offset = mmMME2_QM_PQ_PI_2;
4267		break;
4268
4269	case GAUDI_QUEUE_ID_MME_0_3:
4270		db_reg_offset = mmMME2_QM_PQ_PI_3;
4271		break;
4272
4273	case GAUDI_QUEUE_ID_MME_1_0:
4274		db_reg_offset = mmMME0_QM_PQ_PI_0;
4275		break;
4276
4277	case GAUDI_QUEUE_ID_MME_1_1:
4278		db_reg_offset = mmMME0_QM_PQ_PI_1;
4279		break;
4280
4281	case GAUDI_QUEUE_ID_MME_1_2:
4282		db_reg_offset = mmMME0_QM_PQ_PI_2;
4283		break;
4284
4285	case GAUDI_QUEUE_ID_MME_1_3:
4286		db_reg_offset = mmMME0_QM_PQ_PI_3;
4287		break;
4288
4289	case GAUDI_QUEUE_ID_TPC_0_0:
4290		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4291		break;
4292
4293	case GAUDI_QUEUE_ID_TPC_0_1:
4294		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4295		break;
4296
4297	case GAUDI_QUEUE_ID_TPC_0_2:
4298		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4299		break;
4300
4301	case GAUDI_QUEUE_ID_TPC_0_3:
4302		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4303		break;
4304
4305	case GAUDI_QUEUE_ID_TPC_1_0:
4306		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4307		break;
4308
4309	case GAUDI_QUEUE_ID_TPC_1_1:
4310		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4311		break;
4312
4313	case GAUDI_QUEUE_ID_TPC_1_2:
4314		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4315		break;
4316
4317	case GAUDI_QUEUE_ID_TPC_1_3:
4318		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4319		break;
4320
4321	case GAUDI_QUEUE_ID_TPC_2_0:
4322		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4323		break;
4324
4325	case GAUDI_QUEUE_ID_TPC_2_1:
4326		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4327		break;
4328
4329	case GAUDI_QUEUE_ID_TPC_2_2:
4330		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4331		break;
4332
4333	case GAUDI_QUEUE_ID_TPC_2_3:
4334		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4335		break;
4336
4337	case GAUDI_QUEUE_ID_TPC_3_0:
4338		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4339		break;
4340
4341	case GAUDI_QUEUE_ID_TPC_3_1:
4342		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4343		break;
4344
4345	case GAUDI_QUEUE_ID_TPC_3_2:
4346		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4347		break;
4348
4349	case GAUDI_QUEUE_ID_TPC_3_3:
4350		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4351		break;
4352
4353	case GAUDI_QUEUE_ID_TPC_4_0:
4354		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4355		break;
4356
4357	case GAUDI_QUEUE_ID_TPC_4_1:
4358		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4359		break;
4360
4361	case GAUDI_QUEUE_ID_TPC_4_2:
4362		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4363		break;
4364
4365	case GAUDI_QUEUE_ID_TPC_4_3:
4366		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4367		break;
4368
4369	case GAUDI_QUEUE_ID_TPC_5_0:
4370		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4371		break;
4372
4373	case GAUDI_QUEUE_ID_TPC_5_1:
4374		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4375		break;
4376
4377	case GAUDI_QUEUE_ID_TPC_5_2:
4378		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4379		break;
4380
4381	case GAUDI_QUEUE_ID_TPC_5_3:
4382		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4383		break;
4384
4385	case GAUDI_QUEUE_ID_TPC_6_0:
4386		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4387		break;
4388
4389	case GAUDI_QUEUE_ID_TPC_6_1:
4390		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4391		break;
4392
4393	case GAUDI_QUEUE_ID_TPC_6_2:
4394		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4395		break;
4396
4397	case GAUDI_QUEUE_ID_TPC_6_3:
4398		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4399		break;
4400
4401	case GAUDI_QUEUE_ID_TPC_7_0:
4402		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4403		break;
4404
4405	case GAUDI_QUEUE_ID_TPC_7_1:
4406		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4407		break;
4408
4409	case GAUDI_QUEUE_ID_TPC_7_2:
4410		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4411		break;
4412
4413	case GAUDI_QUEUE_ID_TPC_7_3:
4414		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4415		break;
4416
4417	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4418		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4419			invalid_queue = true;
4420
4421		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4422		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4423		break;
4424
4425	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4426		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4427			invalid_queue = true;
4428
4429		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4430		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4431		break;
4432
4433	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4434		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4435			invalid_queue = true;
4436
4437		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4438		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4439		break;
4440
4441	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4442		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4443			invalid_queue = true;
4444
4445		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4446		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4447		break;
4448
4449	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4450		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4451			invalid_queue = true;
4452
4453		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4454		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4455		break;
4456
4457	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4458		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4459			invalid_queue = true;
4460
4461		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4462		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4463		break;
4464
4465	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4466		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4467			invalid_queue = true;
4468
4469		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4470		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4471		break;
4472
4473	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4474		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4475			invalid_queue = true;
4476
4477		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4478		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4479		break;
4480
4481	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4482		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4483			invalid_queue = true;
4484
4485		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4486		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4487		break;
4488
4489	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4490		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4491			invalid_queue = true;
4492
4493		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4494		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4495		break;
4496
4497	default:
4498		invalid_queue = true;
4499	}
4500
4501	if (invalid_queue) {
4502		/* Should never get here */
4503		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4504			hw_queue_id);
4505		return;
4506	}
4507
4508	db_value = pi;
4509
4510	/* ring the doorbell */
4511	WREG32(db_reg_offset, db_value);
4512
4513	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4514		/* make sure device CPU will read latest data from host */
4515		mb();
4516
4517		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4518				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4519				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4520
4521		WREG32(irq_handler_offset,
4522			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4523	}
4524}
4525
4526static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4527				struct hl_bd *bd)
4528{
4529	__le64 *pbd = (__le64 *) bd;
4530
4531	/* The QMANs are on the host memory so a simple copy suffice */
4532	pqe[0] = pbd[0];
4533	pqe[1] = pbd[1];
4534}
4535
4536static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4537					dma_addr_t *dma_handle, gfp_t flags)
4538{
4539	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4540						dma_handle, flags);
4541
4542	/* Shift to the device's base physical address of host memory */
4543	if (kernel_addr)
4544		*dma_handle += HOST_PHYS_BASE;
4545
4546	return kernel_addr;
4547}
4548
4549static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4550		void *cpu_addr, dma_addr_t dma_handle)
4551{
4552	/* Cancel the device's base physical address of host memory */
4553	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4554
4555	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4556}
4557
4558static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4559{
4560	struct asic_fixed_properties *prop = &hdev->asic_prop;
4561	u64 cur_addr = prop->dram_user_base_address;
4562	u32 chunk_size, busy;
4563	int rc, dma_id;
4564
4565	while (cur_addr < prop->dram_end_address) {
4566		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4567			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4568
4569			chunk_size =
4570			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4571
4572			dev_dbg(hdev->dev,
4573				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4574				cur_addr, cur_addr + chunk_size);
4575
4576			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4577					lower_32_bits(val));
4578			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4579					upper_32_bits(val));
4580			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4581						lower_32_bits(cur_addr));
4582			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4583						upper_32_bits(cur_addr));
4584			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4585					chunk_size);
4586			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4587					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4588					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4589
4590			cur_addr += chunk_size;
4591
4592			if (cur_addr == prop->dram_end_address)
4593				break;
4594		}
4595
4596		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4597			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4598
4599			rc = hl_poll_timeout(
4600				hdev,
4601				mmDMA0_CORE_STS0 + dma_offset,
4602				busy,
4603				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4604				1000,
4605				HBM_SCRUBBING_TIMEOUT_US);
4606
4607			if (rc) {
4608				dev_err(hdev->dev,
4609					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4610					dma_id);
4611				return -EIO;
4612			}
4613		}
4614	}
4615
4616	return 0;
4617}
4618
4619static int gaudi_scrub_device_mem(struct hl_device *hdev)
4620{
4621	struct asic_fixed_properties *prop = &hdev->asic_prop;
4622	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4623			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4624	u64 addr, size, val = hdev->memory_scrub_val;
4625	ktime_t timeout;
4626	int rc = 0;
4627
4628	if (!hdev->memory_scrub)
4629		return 0;
4630
4631	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4632	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4633		if (ktime_compare(ktime_get(), timeout) > 0) {
4634			dev_err(hdev->dev, "waiting for idle timeout\n");
4635			return -ETIMEDOUT;
4636		}
4637		usleep_range((1000 >> 2) + 1, 1000);
4638	}
4639
4640	/* Scrub SRAM */
4641	addr = prop->sram_user_base_address;
4642	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4643
4644	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4645			addr, addr + size, val);
4646	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4647	if (rc) {
4648		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4649		return rc;
4650	}
4651
4652	/* Scrub HBM using all DMA channels in parallel */
4653	rc = gaudi_scrub_device_dram(hdev, val);
4654	if (rc) {
4655		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4656		return rc;
4657	}
4658
4659	return 0;
4660}
4661
4662static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4663				u32 queue_id, dma_addr_t *dma_handle,
4664				u16 *queue_len)
4665{
4666	struct gaudi_device *gaudi = hdev->asic_specific;
4667	struct gaudi_internal_qman_info *q;
4668
4669	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4670			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4671		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4672		return NULL;
4673	}
4674
4675	q = &gaudi->internal_qmans[queue_id];
4676	*dma_handle = q->pq_dma_addr;
4677	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4678
4679	return q->pq_kernel_addr;
4680}
4681
4682static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4683				u16 len, u32 timeout, u64 *result)
4684{
4685	struct gaudi_device *gaudi = hdev->asic_specific;
4686
4687	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4688		if (result)
4689			*result = 0;
4690		return 0;
4691	}
4692
4693	if (!timeout)
4694		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4695
4696	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4697						timeout, result);
4698}
4699
4700static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4701{
4702	struct packet_msg_prot *fence_pkt;
4703	dma_addr_t pkt_dma_addr;
4704	u32 fence_val, tmp, timeout_usec;
4705	dma_addr_t fence_dma_addr;
4706	u32 *fence_ptr;
4707	int rc;
4708
4709	if (hdev->pldm)
4710		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4711	else
4712		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4713
4714	fence_val = GAUDI_QMAN0_FENCE_VAL;
4715
4716	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4717	if (!fence_ptr) {
4718		dev_err(hdev->dev,
4719			"Failed to allocate memory for H/W queue %d testing\n",
4720			hw_queue_id);
4721		return -ENOMEM;
4722	}
4723
4724	*fence_ptr = 0;
4725
4726	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4727						&pkt_dma_addr);
4728	if (!fence_pkt) {
4729		dev_err(hdev->dev,
4730			"Failed to allocate packet for H/W queue %d testing\n",
4731			hw_queue_id);
4732		rc = -ENOMEM;
4733		goto free_fence_ptr;
4734	}
4735
4736	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4737	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4738	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4739
4740	fence_pkt->ctl = cpu_to_le32(tmp);
4741	fence_pkt->value = cpu_to_le32(fence_val);
4742	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4743
4744	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4745					sizeof(struct packet_msg_prot),
4746					pkt_dma_addr);
4747	if (rc) {
4748		dev_err(hdev->dev,
4749			"Failed to send fence packet to H/W queue %d\n",
4750			hw_queue_id);
4751		goto free_pkt;
4752	}
4753
4754	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4755					1000, timeout_usec, true);
4756
4757	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4758
4759	if (rc == -ETIMEDOUT) {
4760		dev_err(hdev->dev,
4761			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4762			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4763		rc = -EIO;
4764	}
4765
4766free_pkt:
4767	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4768free_fence_ptr:
4769	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4770	return rc;
4771}
4772
4773static int gaudi_test_cpu_queue(struct hl_device *hdev)
4774{
4775	struct gaudi_device *gaudi = hdev->asic_specific;
4776
4777	/*
4778	 * check capability here as send_cpu_message() won't update the result
4779	 * value if no capability
4780	 */
4781	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4782		return 0;
4783
4784	return hl_fw_test_cpu_queue(hdev);
4785}
4786
4787static int gaudi_test_queues(struct hl_device *hdev)
4788{
4789	int i, rc, ret_val = 0;
4790
4791	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4792		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4793			rc = gaudi_test_queue(hdev, i);
4794			if (rc)
4795				ret_val = -EINVAL;
4796		}
4797	}
4798
4799	rc = gaudi_test_cpu_queue(hdev);
4800	if (rc)
4801		ret_val = -EINVAL;
4802
4803	return ret_val;
4804}
4805
4806static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4807		gfp_t mem_flags, dma_addr_t *dma_handle)
4808{
4809	void *kernel_addr;
4810
4811	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4812		return NULL;
4813
4814	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4815
4816	/* Shift to the device's base physical address of host memory */
4817	if (kernel_addr)
4818		*dma_handle += HOST_PHYS_BASE;
4819
4820	return kernel_addr;
4821}
4822
4823static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4824			dma_addr_t dma_addr)
4825{
4826	/* Cancel the device's base physical address of host memory */
4827	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4828
4829	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4830}
4831
4832static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4833					size_t size, dma_addr_t *dma_handle)
4834{
4835	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4836}
4837
4838static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4839						size_t size, void *vaddr)
4840{
4841	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4842}
4843
4844static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4845{
4846	struct scatterlist *sg, *sg_next_iter;
4847	u32 count, dma_desc_cnt;
4848	u64 len, len_next;
4849	dma_addr_t addr, addr_next;
4850
4851	dma_desc_cnt = 0;
4852
4853	for_each_sgtable_dma_sg(sgt, sg, count) {
4854		len = sg_dma_len(sg);
4855		addr = sg_dma_address(sg);
4856
4857		if (len == 0)
4858			break;
4859
4860		while ((count + 1) < sgt->nents) {
4861			sg_next_iter = sg_next(sg);
4862			len_next = sg_dma_len(sg_next_iter);
4863			addr_next = sg_dma_address(sg_next_iter);
4864
4865			if (len_next == 0)
4866				break;
4867
4868			if ((addr + len == addr_next) &&
4869				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4870				len += len_next;
4871				count++;
4872				sg = sg_next_iter;
4873			} else {
4874				break;
4875			}
4876		}
4877
4878		dma_desc_cnt++;
4879	}
4880
4881	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4882}
4883
4884static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4885				struct hl_cs_parser *parser,
4886				struct packet_lin_dma *user_dma_pkt,
4887				u64 addr, enum dma_data_direction dir)
4888{
4889	struct hl_userptr *userptr;
4890	int rc;
4891
4892	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4893			parser->job_userptr_list, &userptr))
4894		goto already_pinned;
4895
4896	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4897	if (!userptr)
4898		return -ENOMEM;
4899
4900	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4901				userptr);
4902	if (rc)
4903		goto free_userptr;
4904
4905	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4906
4907	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4908	if (rc) {
4909		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4910		goto unpin_memory;
4911	}
4912
4913	userptr->dma_mapped = true;
4914	userptr->dir = dir;
4915
4916already_pinned:
4917	parser->patched_cb_size +=
4918			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4919
4920	return 0;
4921
4922unpin_memory:
4923	list_del(&userptr->job_node);
4924	hl_unpin_host_memory(hdev, userptr);
4925free_userptr:
4926	kfree(userptr);
4927	return rc;
4928}
4929
4930static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4931				struct hl_cs_parser *parser,
4932				struct packet_lin_dma *user_dma_pkt,
4933				bool src_in_host)
4934{
4935	enum dma_data_direction dir;
4936	bool skip_host_mem_pin = false, user_memset;
4937	u64 addr;
4938	int rc = 0;
4939
4940	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4941			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4942			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4943
4944	if (src_in_host) {
4945		if (user_memset)
4946			skip_host_mem_pin = true;
4947
4948		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4949		dir = DMA_TO_DEVICE;
4950		addr = le64_to_cpu(user_dma_pkt->src_addr);
4951	} else {
4952		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4953		dir = DMA_FROM_DEVICE;
4954		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4955				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4956				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4957	}
4958
4959	if (skip_host_mem_pin)
4960		parser->patched_cb_size += sizeof(*user_dma_pkt);
4961	else
4962		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4963						addr, dir);
4964
4965	return rc;
4966}
4967
4968static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4969				struct hl_cs_parser *parser,
4970				struct packet_lin_dma *user_dma_pkt)
4971{
4972	bool src_in_host = false;
4973	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4974			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4975			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4976
4977	dev_dbg(hdev->dev, "DMA packet details:\n");
4978	dev_dbg(hdev->dev, "source == 0x%llx\n",
4979				le64_to_cpu(user_dma_pkt->src_addr));
4980	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4981	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4982
4983	/*
4984	 * Special handling for DMA with size 0. Bypass all validations
4985	 * because no transactions will be done except for WR_COMP, which
4986	 * is not a security issue
4987	 */
4988	if (!le32_to_cpu(user_dma_pkt->tsize)) {
4989		parser->patched_cb_size += sizeof(*user_dma_pkt);
4990		return 0;
4991	}
4992
4993	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4994		src_in_host = true;
4995
4996	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4997						src_in_host);
4998}
4999
5000static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5001					struct hl_cs_parser *parser,
5002					struct packet_load_and_exe *user_pkt)
5003{
5004	u32 cfg;
5005
5006	cfg = le32_to_cpu(user_pkt->cfg);
5007
5008	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5009		dev_err(hdev->dev,
5010			"User not allowed to use Load and Execute\n");
5011		return -EPERM;
5012	}
5013
5014	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5015
5016	return 0;
5017}
5018
5019static int gaudi_validate_cb(struct hl_device *hdev,
5020			struct hl_cs_parser *parser, bool is_mmu)
5021{
5022	u32 cb_parsed_length = 0;
5023	int rc = 0;
5024
5025	parser->patched_cb_size = 0;
5026
5027	/* cb_user_size is more than 0 so loop will always be executed */
5028	while (cb_parsed_length < parser->user_cb_size) {
5029		enum packet_id pkt_id;
5030		u16 pkt_size;
5031		struct gaudi_packet *user_pkt;
5032
5033		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5034
5035		pkt_id = (enum packet_id) (
5036				(le64_to_cpu(user_pkt->header) &
5037				PACKET_HEADER_PACKET_ID_MASK) >>
5038					PACKET_HEADER_PACKET_ID_SHIFT);
5039
5040		if (!validate_packet_id(pkt_id)) {
5041			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5042			rc = -EINVAL;
5043			break;
5044		}
5045
5046		pkt_size = gaudi_packet_sizes[pkt_id];
5047		cb_parsed_length += pkt_size;
5048		if (cb_parsed_length > parser->user_cb_size) {
5049			dev_err(hdev->dev,
5050				"packet 0x%x is out of CB boundary\n", pkt_id);
5051			rc = -EINVAL;
5052			break;
5053		}
5054
5055		switch (pkt_id) {
5056		case PACKET_MSG_PROT:
5057			dev_err(hdev->dev,
5058				"User not allowed to use MSG_PROT\n");
5059			rc = -EPERM;
5060			break;
5061
5062		case PACKET_CP_DMA:
5063			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5064			rc = -EPERM;
5065			break;
5066
5067		case PACKET_STOP:
5068			dev_err(hdev->dev, "User not allowed to use STOP\n");
5069			rc = -EPERM;
5070			break;
5071
5072		case PACKET_WREG_BULK:
5073			dev_err(hdev->dev,
5074				"User not allowed to use WREG_BULK\n");
5075			rc = -EPERM;
5076			break;
5077
5078		case PACKET_LOAD_AND_EXE:
5079			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5080				(struct packet_load_and_exe *) user_pkt);
5081			break;
5082
5083		case PACKET_LIN_DMA:
5084			parser->contains_dma_pkt = true;
5085			if (is_mmu)
5086				parser->patched_cb_size += pkt_size;
5087			else
5088				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5089					(struct packet_lin_dma *) user_pkt);
5090			break;
5091
5092		case PACKET_WREG_32:
5093		case PACKET_MSG_LONG:
5094		case PACKET_MSG_SHORT:
5095		case PACKET_REPEAT:
5096		case PACKET_FENCE:
5097		case PACKET_NOP:
5098		case PACKET_ARB_POINT:
5099			parser->patched_cb_size += pkt_size;
5100			break;
5101
5102		default:
5103			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5104				pkt_id);
5105			rc = -EINVAL;
5106			break;
5107		}
5108
5109		if (rc)
5110			break;
5111	}
5112
5113	/*
5114	 * The new CB should have space at the end for two MSG_PROT packets:
5115	 * 1. Optional NOP padding for cacheline alignment
5116	 * 2. A packet that will act as a completion packet
5117	 * 3. A packet that will generate MSI interrupt
5118	 */
5119	if (parser->completion)
5120		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5121			parser->patched_cb_size);
5122
5123	return rc;
5124}
5125
5126static int gaudi_patch_dma_packet(struct hl_device *hdev,
5127				struct hl_cs_parser *parser,
5128				struct packet_lin_dma *user_dma_pkt,
5129				struct packet_lin_dma *new_dma_pkt,
5130				u32 *new_dma_pkt_size)
5131{
5132	struct hl_userptr *userptr;
5133	struct scatterlist *sg, *sg_next_iter;
5134	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5135	u64 len, len_next;
5136	dma_addr_t dma_addr, dma_addr_next;
5137	u64 device_memory_addr, addr;
5138	enum dma_data_direction dir;
5139	struct sg_table *sgt;
5140	bool src_in_host = false;
5141	bool skip_host_mem_pin = false;
5142	bool user_memset;
5143
5144	ctl = le32_to_cpu(user_dma_pkt->ctl);
5145
5146	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5147		src_in_host = true;
5148
5149	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5150			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5151
5152	if (src_in_host) {
5153		addr = le64_to_cpu(user_dma_pkt->src_addr);
5154		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5155		dir = DMA_TO_DEVICE;
5156		if (user_memset)
5157			skip_host_mem_pin = true;
5158	} else {
5159		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5160		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5161		dir = DMA_FROM_DEVICE;
5162	}
5163
5164	if ((!skip_host_mem_pin) &&
5165		(!hl_userptr_is_pinned(hdev, addr,
5166					le32_to_cpu(user_dma_pkt->tsize),
5167					parser->job_userptr_list, &userptr))) {
5168		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5169				addr, user_dma_pkt->tsize);
5170		return -EFAULT;
5171	}
5172
5173	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5174		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5175		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5176		return 0;
5177	}
5178
5179	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5180
5181	sgt = userptr->sgt;
5182	dma_desc_cnt = 0;
5183
5184	for_each_sgtable_dma_sg(sgt, sg, count) {
5185		len = sg_dma_len(sg);
5186		dma_addr = sg_dma_address(sg);
5187
5188		if (len == 0)
5189			break;
5190
5191		while ((count + 1) < sgt->nents) {
5192			sg_next_iter = sg_next(sg);
5193			len_next = sg_dma_len(sg_next_iter);
5194			dma_addr_next = sg_dma_address(sg_next_iter);
5195
5196			if (len_next == 0)
5197				break;
5198
5199			if ((dma_addr + len == dma_addr_next) &&
5200				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5201				len += len_next;
5202				count++;
5203				sg = sg_next_iter;
5204			} else {
5205				break;
5206			}
5207		}
5208
5209		ctl = le32_to_cpu(user_dma_pkt->ctl);
5210		if (likely(dma_desc_cnt))
5211			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5212		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5213		new_dma_pkt->ctl = cpu_to_le32(ctl);
5214		new_dma_pkt->tsize = cpu_to_le32(len);
5215
5216		if (dir == DMA_TO_DEVICE) {
5217			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5218			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5219		} else {
5220			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5221			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5222		}
5223
5224		if (!user_memset)
5225			device_memory_addr += len;
5226		dma_desc_cnt++;
5227		new_dma_pkt++;
5228	}
5229
5230	if (!dma_desc_cnt) {
5231		dev_err(hdev->dev,
5232			"Error of 0 SG entries when patching DMA packet\n");
5233		return -EFAULT;
5234	}
5235
5236	/* Fix the last dma packet - wrcomp must be as user set it */
5237	new_dma_pkt--;
5238	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5239
5240	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5241
5242	return 0;
5243}
5244
5245static int gaudi_patch_cb(struct hl_device *hdev,
5246				struct hl_cs_parser *parser)
5247{
5248	u32 cb_parsed_length = 0;
5249	u32 cb_patched_cur_length = 0;
5250	int rc = 0;
5251
5252	/* cb_user_size is more than 0 so loop will always be executed */
5253	while (cb_parsed_length < parser->user_cb_size) {
5254		enum packet_id pkt_id;
5255		u16 pkt_size;
5256		u32 new_pkt_size = 0;
5257		struct gaudi_packet *user_pkt, *kernel_pkt;
5258
5259		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5260		kernel_pkt = parser->patched_cb->kernel_address +
5261					cb_patched_cur_length;
5262
5263		pkt_id = (enum packet_id) (
5264				(le64_to_cpu(user_pkt->header) &
5265				PACKET_HEADER_PACKET_ID_MASK) >>
5266					PACKET_HEADER_PACKET_ID_SHIFT);
5267
5268		if (!validate_packet_id(pkt_id)) {
5269			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5270			rc = -EINVAL;
5271			break;
5272		}
5273
5274		pkt_size = gaudi_packet_sizes[pkt_id];
5275		cb_parsed_length += pkt_size;
5276		if (cb_parsed_length > parser->user_cb_size) {
5277			dev_err(hdev->dev,
5278				"packet 0x%x is out of CB boundary\n", pkt_id);
5279			rc = -EINVAL;
5280			break;
5281		}
5282
5283		switch (pkt_id) {
5284		case PACKET_LIN_DMA:
5285			rc = gaudi_patch_dma_packet(hdev, parser,
5286					(struct packet_lin_dma *) user_pkt,
5287					(struct packet_lin_dma *) kernel_pkt,
5288					&new_pkt_size);
5289			cb_patched_cur_length += new_pkt_size;
5290			break;
5291
5292		case PACKET_MSG_PROT:
5293			dev_err(hdev->dev,
5294				"User not allowed to use MSG_PROT\n");
5295			rc = -EPERM;
5296			break;
5297
5298		case PACKET_CP_DMA:
5299			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5300			rc = -EPERM;
5301			break;
5302
5303		case PACKET_STOP:
5304			dev_err(hdev->dev, "User not allowed to use STOP\n");
5305			rc = -EPERM;
5306			break;
5307
5308		case PACKET_WREG_32:
5309		case PACKET_WREG_BULK:
5310		case PACKET_MSG_LONG:
5311		case PACKET_MSG_SHORT:
5312		case PACKET_REPEAT:
5313		case PACKET_FENCE:
5314		case PACKET_NOP:
5315		case PACKET_ARB_POINT:
5316		case PACKET_LOAD_AND_EXE:
5317			memcpy(kernel_pkt, user_pkt, pkt_size);
5318			cb_patched_cur_length += pkt_size;
5319			break;
5320
5321		default:
5322			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5323				pkt_id);
5324			rc = -EINVAL;
5325			break;
5326		}
5327
5328		if (rc)
5329			break;
5330	}
5331
5332	return rc;
5333}
5334
5335static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5336		struct hl_cs_parser *parser)
5337{
5338	u64 handle;
5339	u32 patched_cb_size;
5340	struct hl_cb *user_cb;
5341	int rc;
5342
5343	/*
5344	 * The new CB should have space at the end for two MSG_PROT packets:
5345	 * 1. Optional NOP padding for cacheline alignment
5346	 * 2. A packet that will act as a completion packet
5347	 * 3. A packet that will generate MSI interrupt
5348	 */
5349	if (parser->completion)
5350		parser->patched_cb_size = parser->user_cb_size +
5351				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5352	else
5353		parser->patched_cb_size = parser->user_cb_size;
5354
5355	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5356				parser->patched_cb_size, false, false,
5357				&handle);
5358
5359	if (rc) {
5360		dev_err(hdev->dev,
5361			"Failed to allocate patched CB for DMA CS %d\n",
5362			rc);
5363		return rc;
5364	}
5365
5366	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5367	/* hl_cb_get should never fail */
5368	if (!parser->patched_cb) {
5369		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5370		rc = -EFAULT;
5371		goto out;
5372	}
5373
5374	/*
5375	 * We are protected from overflow because the check
5376	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5377	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5378	 *
5379	 * There is no option to reach here without going through that check because:
5380	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5381	 *    an external queue.
5382	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5383	 */
5384	memcpy(parser->patched_cb->kernel_address,
5385		parser->user_cb->kernel_address,
5386		parser->user_cb_size);
5387
5388	patched_cb_size = parser->patched_cb_size;
5389
5390	/* Validate patched CB instead of user CB */
5391	user_cb = parser->user_cb;
5392	parser->user_cb = parser->patched_cb;
5393	rc = gaudi_validate_cb(hdev, parser, true);
5394	parser->user_cb = user_cb;
5395
5396	if (rc) {
5397		hl_cb_put(parser->patched_cb);
5398		goto out;
5399	}
5400
5401	if (patched_cb_size != parser->patched_cb_size) {
5402		dev_err(hdev->dev, "user CB size mismatch\n");
5403		hl_cb_put(parser->patched_cb);
5404		rc = -EINVAL;
5405		goto out;
5406	}
5407
5408out:
5409	/*
5410	 * Always call cb destroy here because we still have 1 reference
5411	 * to it by calling cb_get earlier. After the job will be completed,
5412	 * cb_put will release it, but here we want to remove it from the
5413	 * idr
5414	 */
5415	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5416
5417	return rc;
5418}
5419
5420static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5421		struct hl_cs_parser *parser)
5422{
5423	u64 handle;
5424	int rc;
5425
5426	rc = gaudi_validate_cb(hdev, parser, false);
5427
5428	if (rc)
5429		goto free_userptr;
5430
5431	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5432				parser->patched_cb_size, false, false,
5433				&handle);
5434	if (rc) {
5435		dev_err(hdev->dev,
5436			"Failed to allocate patched CB for DMA CS %d\n", rc);
5437		goto free_userptr;
5438	}
5439
5440	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5441	/* hl_cb_get should never fail here */
5442	if (!parser->patched_cb) {
5443		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5444		rc = -EFAULT;
5445		goto out;
5446	}
5447
5448	rc = gaudi_patch_cb(hdev, parser);
5449
5450	if (rc)
5451		hl_cb_put(parser->patched_cb);
5452
5453out:
5454	/*
5455	 * Always call cb destroy here because we still have 1 reference
5456	 * to it by calling cb_get earlier. After the job will be completed,
5457	 * cb_put will release it, but here we want to remove it from the
5458	 * idr
5459	 */
5460	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5461
5462free_userptr:
5463	if (rc)
5464		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5465	return rc;
5466}
5467
5468static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5469					struct hl_cs_parser *parser)
5470{
5471	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5472	struct gaudi_device *gaudi = hdev->asic_specific;
5473	u32 nic_queue_offset, nic_mask_q_id;
5474
5475	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5476			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5477		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5478		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5479
5480		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5481			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5482			return -EINVAL;
5483		}
5484	}
5485
5486	/* For internal queue jobs just check if CB address is valid */
5487	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5488					parser->user_cb_size,
5489					asic_prop->sram_user_base_address,
5490					asic_prop->sram_end_address))
5491		return 0;
5492
5493	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5494					parser->user_cb_size,
5495					asic_prop->dram_user_base_address,
5496					asic_prop->dram_end_address))
5497		return 0;
5498
5499	/* PMMU and HPMMU addresses are equal, check only one of them */
5500	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5501					parser->user_cb_size,
5502					asic_prop->pmmu.start_addr,
5503					asic_prop->pmmu.end_addr))
5504		return 0;
5505
5506	dev_err(hdev->dev,
5507		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5508		parser->user_cb, parser->user_cb_size);
5509
5510	return -EFAULT;
5511}
5512
5513static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5514{
5515	struct gaudi_device *gaudi = hdev->asic_specific;
5516
5517	if (parser->queue_type == QUEUE_TYPE_INT)
5518		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5519
5520	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5521		return gaudi_parse_cb_mmu(hdev, parser);
5522	else
5523		return gaudi_parse_cb_no_mmu(hdev, parser);
5524}
5525
5526static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5527				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5528				u32 msi_vec, bool eb)
5529{
5530	struct packet_msg_prot *cq_pkt;
5531	struct packet_nop *cq_padding;
5532	u64 msi_addr;
5533	u32 tmp;
5534
5535	cq_padding = kernel_address + original_len;
5536	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5537
5538	while ((void *)cq_padding < (void *)cq_pkt) {
5539		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5540		cq_padding++;
5541	}
5542
5543	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5544	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5545
5546	if (eb)
5547		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5548
5549	cq_pkt->ctl = cpu_to_le32(tmp);
5550	cq_pkt->value = cpu_to_le32(cq_val);
5551	cq_pkt->addr = cpu_to_le64(cq_addr);
5552
5553	cq_pkt++;
5554
5555	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5556	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5557	cq_pkt->ctl = cpu_to_le32(tmp);
5558	cq_pkt->value = cpu_to_le32(1);
5559	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5560	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5561}
5562
5563static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5564{
5565	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5566}
5567
5568static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5569					u32 size, u64 val)
5570{
5571	struct packet_lin_dma *lin_dma_pkt;
5572	struct hl_cs_job *job;
5573	u32 cb_size, ctl, err_cause;
5574	struct hl_cb *cb;
5575	int rc;
5576
5577	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5578	if (!cb)
5579		return -EFAULT;
5580
5581	lin_dma_pkt = cb->kernel_address;
5582	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5583	cb_size = sizeof(*lin_dma_pkt);
5584
5585	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5586	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5587	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5588	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5589	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5590
5591	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5592	lin_dma_pkt->src_addr = cpu_to_le64(val);
5593	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5594	lin_dma_pkt->tsize = cpu_to_le32(size);
5595
5596	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5597	if (!job) {
5598		dev_err(hdev->dev, "Failed to allocate a new job\n");
5599		rc = -ENOMEM;
5600		goto release_cb;
5601	}
5602
5603	/* Verify DMA is OK */
5604	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5605	if (err_cause && !hdev->init_done) {
5606		dev_dbg(hdev->dev,
5607			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5608			err_cause);
5609		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5610	}
5611
5612	job->id = 0;
5613	job->user_cb = cb;
5614	atomic_inc(&job->user_cb->cs_cnt);
5615	job->user_cb_size = cb_size;
5616	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5617	job->patched_cb = job->user_cb;
5618	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5619
5620	hl_debugfs_add_job(hdev, job);
5621
5622	rc = gaudi_send_job_on_qman0(hdev, job);
5623	hl_debugfs_remove_job(hdev, job);
5624	kfree(job);
5625	atomic_dec(&cb->cs_cnt);
5626
5627	/* Verify DMA is OK */
5628	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5629	if (err_cause) {
5630		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5631		rc = -EIO;
5632		if (!hdev->init_done) {
5633			dev_dbg(hdev->dev,
5634				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5635				err_cause);
5636			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5637		}
5638	}
5639
5640release_cb:
5641	hl_cb_put(cb);
5642	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5643
5644	return rc;
5645}
5646
5647static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5648					u32 num_regs, u32 val)
5649{
5650	struct packet_msg_long *pkt;
5651	struct hl_cs_job *job;
5652	u32 cb_size, ctl;
5653	struct hl_cb *cb;
5654	int i, rc;
5655
5656	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5657
5658	if (cb_size > SZ_2M) {
5659		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5660		return -ENOMEM;
5661	}
5662
5663	cb = hl_cb_kernel_create(hdev, cb_size, false);
5664	if (!cb)
5665		return -EFAULT;
5666
5667	pkt = cb->kernel_address;
5668
5669	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5670	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5671	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5672	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5673	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5674
5675	for (i = 0; i < num_regs ; i++, pkt++) {
5676		pkt->ctl = cpu_to_le32(ctl);
5677		pkt->value = cpu_to_le32(val);
5678		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5679	}
5680
5681	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5682	if (!job) {
5683		dev_err(hdev->dev, "Failed to allocate a new job\n");
5684		rc = -ENOMEM;
5685		goto release_cb;
5686	}
5687
5688	job->id = 0;
5689	job->user_cb = cb;
5690	atomic_inc(&job->user_cb->cs_cnt);
5691	job->user_cb_size = cb_size;
5692	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5693	job->patched_cb = job->user_cb;
5694	job->job_cb_size = cb_size;
5695
5696	hl_debugfs_add_job(hdev, job);
5697
5698	rc = gaudi_send_job_on_qman0(hdev, job);
5699	hl_debugfs_remove_job(hdev, job);
5700	kfree(job);
5701	atomic_dec(&cb->cs_cnt);
5702
5703release_cb:
5704	hl_cb_put(cb);
5705	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5706
5707	return rc;
5708}
5709
5710static int gaudi_restore_sm_registers(struct hl_device *hdev)
5711{
5712	u64 base_addr;
5713	u32 num_regs;
5714	int rc;
5715
5716	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5717	num_regs = NUM_OF_SOB_IN_BLOCK;
5718	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5719	if (rc) {
5720		dev_err(hdev->dev, "failed resetting SM registers");
5721		return -ENOMEM;
5722	}
5723
5724	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5725	num_regs = NUM_OF_SOB_IN_BLOCK;
5726	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5727	if (rc) {
5728		dev_err(hdev->dev, "failed resetting SM registers");
5729		return -ENOMEM;
5730	}
5731
5732	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5733	num_regs = NUM_OF_SOB_IN_BLOCK;
5734	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5735	if (rc) {
5736		dev_err(hdev->dev, "failed resetting SM registers");
5737		return -ENOMEM;
5738	}
5739
5740	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5741	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5742	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5743	if (rc) {
5744		dev_err(hdev->dev, "failed resetting SM registers");
5745		return -ENOMEM;
5746	}
5747
5748	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5749	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5750	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5751	if (rc) {
5752		dev_err(hdev->dev, "failed resetting SM registers");
5753		return -ENOMEM;
5754	}
5755
5756	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5757	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5758	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5759	if (rc) {
5760		dev_err(hdev->dev, "failed resetting SM registers");
5761		return -ENOMEM;
5762	}
5763
5764	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5765			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5766	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5767	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5768	if (rc) {
5769		dev_err(hdev->dev, "failed resetting SM registers");
5770		return -ENOMEM;
5771	}
5772
5773	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5774			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5775	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5776	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5777	if (rc) {
5778		dev_err(hdev->dev, "failed resetting SM registers");
5779		return -ENOMEM;
5780	}
5781
5782	return 0;
5783}
5784
5785static void gaudi_restore_dma_registers(struct hl_device *hdev)
5786{
5787	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5788			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5789	int i;
5790
5791	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5792		u64 sob_addr = CFG_BASE +
5793				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5794				(i * sob_delta);
5795		u32 dma_offset = i * DMA_CORE_OFFSET;
5796
5797		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5798				lower_32_bits(sob_addr));
5799		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5800				upper_32_bits(sob_addr));
5801		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5802
5803		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5804		 * modified by the user for SRAM reduction
5805		 */
5806		if (i > 1)
5807			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5808								0x00000001);
5809	}
5810}
5811
5812static void gaudi_restore_qm_registers(struct hl_device *hdev)
5813{
5814	u32 qman_offset;
5815	int i;
5816
5817	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5818		qman_offset = i * DMA_QMAN_OFFSET;
5819		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5820	}
5821
5822	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5823		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5824		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5825	}
5826
5827	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5828		qman_offset = i * TPC_QMAN_OFFSET;
5829		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5830	}
5831
5832	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5833		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5834				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5835		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5836	}
5837}
5838
5839static int gaudi_restore_user_registers(struct hl_device *hdev)
5840{
5841	int rc;
5842
5843	rc = gaudi_restore_sm_registers(hdev);
5844	if (rc)
5845		return rc;
5846
5847	gaudi_restore_dma_registers(hdev);
5848	gaudi_restore_qm_registers(hdev);
5849
5850	return 0;
5851}
5852
5853static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5854{
5855	return 0;
5856}
5857
5858static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5859{
5860	u32 size = hdev->asic_prop.mmu_pgt_size +
5861			hdev->asic_prop.mmu_cache_mng_size;
5862	struct gaudi_device *gaudi = hdev->asic_specific;
5863	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5864
5865	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5866		return 0;
5867
5868	return gaudi_memset_device_memory(hdev, addr, size, 0);
5869}
5870
5871static void gaudi_restore_phase_topology(struct hl_device *hdev)
5872{
5873
5874}
5875
5876static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5877					u32 size_to_dma, dma_addr_t dma_addr)
5878{
5879	u32 err_cause, val;
5880	u64 dma_offset;
5881	int rc;
5882
5883	dma_offset = dma_id * DMA_CORE_OFFSET;
5884
5885	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5886	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5887	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5888	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5889	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5890	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5891			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5892
5893	rc = hl_poll_timeout(
5894		hdev,
5895		mmDMA0_CORE_STS0 + dma_offset,
5896		val,
5897		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5898		0,
5899		1000000);
5900
5901	if (rc) {
5902		dev_err(hdev->dev,
5903			"DMA %d timed-out during reading of 0x%llx\n",
5904			dma_id, addr);
5905		return -EIO;
5906	}
5907
5908	/* Verify DMA is OK */
5909	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5910	if (err_cause) {
5911		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5912		dev_dbg(hdev->dev,
5913			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5914			err_cause);
5915		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5916
5917		return -EIO;
5918	}
5919
5920	return 0;
5921}
5922
5923static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5924				void *blob_addr)
5925{
5926	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5927	u32 qm_glbl_sts0, qm_cgm_sts;
5928	u64 dma_offset, qm_offset;
5929	dma_addr_t dma_addr;
5930	void *kernel_addr;
5931	bool is_eng_idle;
5932	int rc = 0, dma_id;
5933
5934	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5935
5936	if (!kernel_addr)
5937		return -ENOMEM;
5938
5939	hdev->asic_funcs->hw_queues_lock(hdev);
5940
5941	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5942	dma_offset = dma_id * DMA_CORE_OFFSET;
5943	qm_offset = dma_id * DMA_QMAN_OFFSET;
5944	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5945	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5946	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5947	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5948		      IS_DMA_IDLE(dma_core_sts0);
5949
5950	if (!is_eng_idle) {
5951		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5952		dma_offset = dma_id * DMA_CORE_OFFSET;
5953		qm_offset = dma_id * DMA_QMAN_OFFSET;
5954		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5955		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5956		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5957		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5958			      IS_DMA_IDLE(dma_core_sts0);
5959
5960		if (!is_eng_idle) {
5961			dev_err_ratelimited(hdev->dev,
5962				"Can't read via DMA because it is BUSY\n");
5963			rc = -EAGAIN;
5964			goto out;
5965		}
5966	}
5967
5968	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5969	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5970			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5971
5972	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5973	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5974	 * ASID
5975	 */
5976	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5977
5978	/* Verify DMA is OK */
5979	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5980	if (err_cause) {
5981		dev_dbg(hdev->dev,
5982			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5983			err_cause);
5984		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5985	}
5986
5987	pos = 0;
5988	size_left = size;
5989	size_to_dma = SZ_2M;
5990
5991	while (size_left > 0) {
5992
5993		if (size_left < SZ_2M)
5994			size_to_dma = size_left;
5995
5996		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5997						dma_addr);
5998		if (rc)
5999			break;
6000
6001		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6002
6003		if (size_left <= SZ_2M)
6004			break;
6005
6006		pos += SZ_2M;
6007		addr += SZ_2M;
6008		size_left -= SZ_2M;
6009	}
6010
6011	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6012	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6013	 * ASID
6014	 */
6015	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6016			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6017
6018	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6019
6020out:
6021	hdev->asic_funcs->hw_queues_unlock(hdev);
6022
6023	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6024
6025	return rc;
6026}
6027
6028static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6029{
6030	struct gaudi_device *gaudi = hdev->asic_specific;
6031
6032	if (hdev->reset_info.hard_reset_pending)
6033		return U64_MAX;
6034
6035	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6036			(addr - gaudi->hbm_bar_cur_addr));
6037}
6038
6039static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6040{
6041	struct gaudi_device *gaudi = hdev->asic_specific;
6042
6043	if (hdev->reset_info.hard_reset_pending)
6044		return;
6045
6046	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6047			(addr - gaudi->hbm_bar_cur_addr));
6048}
6049
6050void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6051{
6052	/* mask to zero the MMBP and ASID bits */
6053	WREG32_AND(reg, ~0x7FF);
6054	WREG32_OR(reg, asid);
6055}
6056
6057static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6058{
6059	struct gaudi_device *gaudi = hdev->asic_specific;
6060
6061	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6062		return;
6063
6064	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6065		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6066		return;
6067	}
6068
6069	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6070	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6071	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6072	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6073	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6074
6075	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6076	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6077	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6078	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6079	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6080
6081	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6082	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6083	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6084	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6085	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6086
6087	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6088	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6089	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6090	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6091	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6092
6093	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6094	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6095	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6096	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6097	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6098
6099	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6100	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6101	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6102	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6103	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6104
6105	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6106	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6107	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6108	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6109	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6110
6111	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6112	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6113	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6114	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6115	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6116
6117	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6118	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6119	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6120	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6121	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6122	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6123	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6124	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6125
6126	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6127	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6128	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6129	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6130	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6131	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6132	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6133
6134	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6135	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6136	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6137	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6138	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6139	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6140	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6141
6142	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6143	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6144	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6145	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6146	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6147	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6148	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6149
6150	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6151	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6152	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6153	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6154	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6155	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6156	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6157
6158	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6159	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6160	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6161	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6162	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6163	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6164	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6165
6166	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6167	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6168	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6169	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6170	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6171	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6172	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6173
6174	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6175	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6176	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6177	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6178	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6179	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6180	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6181
6182	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6183	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6184	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6185	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6186	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6187	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6188	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6189
6190	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6191	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6192	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6193	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6194	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6195	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6196	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6197	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6198	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6199	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6200
6201	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6202	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6203	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6204	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6205	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6206	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6207	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6208	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6209	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6210	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6211	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6212	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6213
6214	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6215		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6216				asid);
6217		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6218				asid);
6219		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6220				asid);
6221		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6222				asid);
6223		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6224				asid);
6225	}
6226
6227	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6228		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6229				asid);
6230		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6231				asid);
6232		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6233				asid);
6234		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6235				asid);
6236		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6237				asid);
6238	}
6239
6240	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6241		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6242				asid);
6243		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6244				asid);
6245		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6246				asid);
6247		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6248				asid);
6249		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6250				asid);
6251	}
6252
6253	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6254		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6255				asid);
6256		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6257				asid);
6258		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6259				asid);
6260		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6261				asid);
6262		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6263				asid);
6264	}
6265
6266	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6267		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6268				asid);
6269		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6270				asid);
6271		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6272				asid);
6273		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6274				asid);
6275		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6276				asid);
6277	}
6278
6279	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6280		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6281				asid);
6282		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6283				asid);
6284		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6285				asid);
6286		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6287				asid);
6288		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6289				asid);
6290	}
6291
6292	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6293		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6294				asid);
6295		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6296				asid);
6297		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6298				asid);
6299		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6300				asid);
6301		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6302				asid);
6303	}
6304
6305	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6306		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6307				asid);
6308		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6309				asid);
6310		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6311				asid);
6312		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6313				asid);
6314		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6315				asid);
6316	}
6317
6318	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6319		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6320				asid);
6321		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6322				asid);
6323		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6324				asid);
6325		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6326				asid);
6327		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6328				asid);
6329	}
6330
6331	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6332		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6333				asid);
6334		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6335				asid);
6336		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6337				asid);
6338		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6339				asid);
6340		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6341				asid);
6342	}
6343
6344	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6345	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6346}
6347
6348static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6349		struct hl_cs_job *job)
6350{
6351	struct packet_msg_prot *fence_pkt;
6352	u32 *fence_ptr;
6353	dma_addr_t fence_dma_addr;
6354	struct hl_cb *cb;
6355	u32 tmp, timeout, dma_offset;
6356	int rc;
6357
6358	if (hdev->pldm)
6359		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6360	else
6361		timeout = HL_DEVICE_TIMEOUT_USEC;
6362
6363	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6364	if (!fence_ptr) {
6365		dev_err(hdev->dev,
6366			"Failed to allocate fence memory for QMAN0\n");
6367		return -ENOMEM;
6368	}
6369
6370	cb = job->patched_cb;
6371
6372	fence_pkt = cb->kernel_address +
6373			job->job_cb_size - sizeof(struct packet_msg_prot);
6374
6375	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6376	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6377	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6378
6379	fence_pkt->ctl = cpu_to_le32(tmp);
6380	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6381	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6382
6383	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6384
6385	WREG32(mmDMA0_CORE_PROT + dma_offset,
6386			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6387
6388	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6389					job->job_cb_size, cb->bus_address);
6390	if (rc) {
6391		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6392		goto free_fence_ptr;
6393	}
6394
6395	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6396				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6397				timeout, true);
6398
6399	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6400
6401	if (rc == -ETIMEDOUT) {
6402		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6403		goto free_fence_ptr;
6404	}
6405
6406free_fence_ptr:
6407	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6408
6409	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6410	return rc;
6411}
6412
6413static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6414{
6415	if (event_type >= GAUDI_EVENT_SIZE)
6416		goto event_not_supported;
6417
6418	if (!gaudi_irq_map_table[event_type].valid)
6419		goto event_not_supported;
6420
6421	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6422
6423	return;
6424
6425event_not_supported:
6426	snprintf(desc, size, "N/A");
6427}
6428
6429static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6430							bool is_write, u16 *engine_id_1,
6431							u16 *engine_id_2)
6432{
6433	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6434
6435	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6436				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6437
6438	switch (x_y) {
6439	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6440	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6441		dma_id[0] = 0;
6442		dma_id[1] = 2;
6443		break;
6444	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6445	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6446		dma_id[0] = 1;
6447		dma_id[1] = 3;
6448		break;
6449	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6450	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6451		dma_id[0] = 4;
6452		dma_id[1] = 6;
6453		break;
6454	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6455	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6456		dma_id[0] = 5;
6457		dma_id[1] = 7;
6458		break;
6459	default:
6460		goto unknown_initiator;
6461	}
6462
6463	for (i = 0 ; i < 2 ; i++) {
6464		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6465		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6466	}
6467
6468	switch (x_y) {
6469	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6470	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6471		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6472			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6473			return "DMA0";
6474		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6475			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6476			return "DMA2";
6477		} else {
6478			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6479			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6480			return "DMA0 or DMA2";
6481		}
6482	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6483	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6484		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6485			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6486			return "DMA1";
6487		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6488			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6489			return "DMA3";
6490		} else {
6491			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6492			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6493			return "DMA1 or DMA3";
6494		}
6495	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6496	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6497		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6498			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6499			return "DMA4";
6500		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6501			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6502			return "DMA6";
6503		} else {
6504			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6505			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6506			return "DMA4 or DMA6";
6507		}
6508	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6509	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6510		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6511			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6512			return "DMA5";
6513		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6514			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6515			return "DMA7";
6516		} else {
6517			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6518			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6519			return "DMA5 or DMA7";
6520		}
6521	}
6522
6523unknown_initiator:
6524	return "unknown initiator";
6525}
6526
6527static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6528							u16 *engine_id_1, u16 *engine_id_2)
6529{
6530	u32 val, x_y, axi_id;
6531
6532	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6533				RREG32(mmMMU_UP_RAZWI_READ_ID);
6534	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6535			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6536	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6537			RAZWI_INITIATOR_AXI_ID_SHIFT);
6538
6539	switch (x_y) {
6540	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6541		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6542			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6543			return "TPC0";
6544		}
6545		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6546			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6547			return "NIC0";
6548		}
6549		break;
6550	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6551		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6552		return "TPC1";
6553	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6554	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6555		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6556		return "MME0";
6557	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6558	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6559		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6560		return "MME1";
6561	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6562		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6563		return "TPC2";
6564	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6565		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6566			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6567			return "TPC3";
6568		}
6569		/* PCI, CPU or PSOC does not have engine id*/
6570		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6571			return "PCI";
6572		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6573			return "CPU";
6574		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6575			return "PSOC";
6576		break;
6577	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6578	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6579	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6580	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6581	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6582	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6583	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6584	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6585		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6586				engine_id_1, engine_id_2);
6587	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6588		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6589			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6590			return "TPC4";
6591		}
6592		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6593			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6594			return "NIC1";
6595		}
6596		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6597			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6598			return "NIC2";
6599		}
6600		break;
6601	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6602		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6603		return "TPC5";
6604	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6605	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6606		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6607		return "MME2";
6608	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6609	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6610		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6611		return "MME3";
6612	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6613		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6614		return "TPC6";
6615	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6616		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6617			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6618			return "TPC7";
6619		}
6620		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6621			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6622			return "NIC4";
6623		}
6624		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6625			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6626			return "NIC5";
6627		}
6628		break;
6629	default:
6630		break;
6631	}
6632
6633	dev_err(hdev->dev,
6634		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6635		val,
6636		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6637		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6638		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6639			RAZWI_INITIATOR_AXI_ID_MASK);
6640
6641	return "unknown initiator";
6642}
6643
6644static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6645						u16 *engine_id_2, bool *is_read, bool *is_write)
6646{
6647
6648	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6649		dev_err_ratelimited(hdev->dev,
6650			"RAZWI event caused by illegal write of %s\n",
6651			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6652		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6653		*is_write = true;
6654	}
6655
6656	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6657		dev_err_ratelimited(hdev->dev,
6658			"RAZWI event caused by illegal read of %s\n",
6659			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6660		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6661		*is_read = true;
6662	}
6663}
6664
6665static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6666{
6667	struct gaudi_device *gaudi = hdev->asic_specific;
6668	u32 val;
6669
6670	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6671		return;
6672
6673	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6674	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6675		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6676		*addr <<= 32;
6677		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6678
6679		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6680		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6681
6682		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6683	}
6684
6685	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6686	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6687		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6688		*addr <<= 32;
6689		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6690
6691		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6692
6693		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6694	}
6695}
6696
6697/*
6698 *  +-------------------+------------------------------------------------------+
6699 *  | Configuration Reg |                     Description                      |
6700 *  |      Address      |                                                      |
6701 *  +-------------------+------------------------------------------------------+
6702 *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6703 *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6704 *  |                   |0xF34 memory wrappers 63:32                           |
6705 *  |                   |0xF38 memory wrappers 95:64                           |
6706 *  |                   |0xF3C memory wrappers 127:96                          |
6707 *  +-------------------+------------------------------------------------------+
6708 *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6709 *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6710 *  |                   |0xF44 memory wrappers 63:32                           |
6711 *  |                   |0xF48 memory wrappers 95:64                           |
6712 *  |                   |0xF4C memory wrappers 127:96                          |
6713 *  +-------------------+------------------------------------------------------+
6714 */
6715static int gaudi_extract_ecc_info(struct hl_device *hdev,
6716		struct ecc_info_extract_params *params, u64 *ecc_address,
6717		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6718{
6719	u32 i, num_mem_regs, reg, err_bit;
6720	u64 err_addr, err_word = 0;
6721
6722	num_mem_regs = params->num_memories / 32 +
6723			((params->num_memories % 32) ? 1 : 0);
6724
6725	if (params->block_address >= CFG_BASE)
6726		params->block_address -= CFG_BASE;
6727
6728	if (params->derr)
6729		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6730	else
6731		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6732
6733	/* Set invalid wrapper index */
6734	*memory_wrapper_idx = 0xFF;
6735
6736	/* Iterate through memory wrappers, a single bit must be set */
6737	for (i = 0 ; i < num_mem_regs ; i++) {
6738		err_addr += i * 4;
6739		err_word = RREG32(err_addr);
6740		if (err_word) {
6741			err_bit = __ffs(err_word);
6742			*memory_wrapper_idx = err_bit + (32 * i);
6743			break;
6744		}
6745	}
6746
6747	if (*memory_wrapper_idx == 0xFF) {
6748		dev_err(hdev->dev, "ECC error information cannot be found\n");
6749		return -EINVAL;
6750	}
6751
6752	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6753			*memory_wrapper_idx);
6754
6755	*ecc_address =
6756		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6757	*ecc_syndrom =
6758		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6759
6760	/* Clear error indication */
6761	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6762	if (params->derr)
6763		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6764	else
6765		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6766
6767	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6768
6769	return 0;
6770}
6771
6772/*
6773 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6774 *
6775 * @idx: the current pi/ci value
6776 * @q_len: the queue length (power of 2)
6777 *
6778 * @return the cyclically decremented index
6779 */
6780static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6781{
6782	u32 mask = q_len - 1;
6783
6784	/*
6785	 * modular decrement is equivalent to adding (queue_size -1)
6786	 * later we take LSBs to make sure the value is in the
6787	 * range [0, queue_len - 1]
6788	 */
6789	return (idx + q_len - 1) & mask;
6790}
6791
6792/**
6793 * gaudi_handle_sw_config_stream_data - print SW config stream data
6794 *
6795 * @hdev: pointer to the habanalabs device structure
6796 * @stream: the QMAN's stream
6797 * @qman_base: base address of QMAN registers block
6798 * @event_mask: mask of the last events occurred
6799 */
6800static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6801						u64 qman_base, u64 event_mask)
6802{
6803	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6804	u32 cq_ptr_lo_off, size;
6805
6806	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6807
6808	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6809						stream * cq_ptr_lo_off;
6810	cq_ptr_hi = cq_ptr_lo +
6811				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6812	cq_tsize = cq_ptr_lo +
6813				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6814
6815	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6816	size = RREG32(cq_tsize);
6817	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6818							stream, cq_ptr, size);
6819
6820	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6821		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6822		hdev->captured_err_info.undef_opcode.cq_size = size;
6823		hdev->captured_err_info.undef_opcode.stream_id = stream;
6824	}
6825}
6826
6827/**
6828 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6829 *
6830 * @hdev: pointer to the habanalabs device structure
6831 * @qid_base: first QID of the QMAN (out of 4 streams)
6832 * @stream: the QMAN's stream
6833 * @qman_base: base address of QMAN registers block
6834 * @event_mask: mask of the last events occurred
6835 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6836 */
6837static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6838						u32 stream, u64 qman_base,
6839						u64 event_mask,
6840						bool pr_sw_conf)
6841{
6842	u32 ci, qm_ci_stream_off, queue_len;
6843	struct hl_hw_queue *q;
6844	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6845	int i;
6846
6847	q = &hdev->kernel_queues[qid_base + stream];
6848
6849	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6850	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6851						stream * qm_ci_stream_off;
6852
6853	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6854					q->int_queue_len : HL_QUEUE_LENGTH;
6855
6856	hdev->asic_funcs->hw_queues_lock(hdev);
6857
6858	if (pr_sw_conf)
6859		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6860
6861	ci = RREG32(pq_ci);
6862
6863	/* we should start printing form ci -1 */
6864	ci = gaudi_queue_idx_dec(ci, queue_len);
6865	memset(addr, 0, sizeof(addr));
6866
6867	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6868		struct hl_bd *bd;
6869		u32 len;
6870
6871		bd = q->kernel_address;
6872		bd += ci;
6873
6874		len = le32_to_cpu(bd->len);
6875		/* len 0 means uninitialized entry- break */
6876		if (!len)
6877			break;
6878
6879		addr[i] = le64_to_cpu(bd->ptr);
6880
6881		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6882							stream, ci, addr[i], len);
6883
6884		/* get previous ci, wrap if needed */
6885		ci = gaudi_queue_idx_dec(ci, queue_len);
6886	}
6887
6888	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6889		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6890		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6891
6892		if (arr_idx == 0) {
6893			undef_opcode->timestamp = ktime_get();
6894			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6895		}
6896
6897		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6898		undef_opcode->cb_addr_streams_len++;
6899	}
6900
6901	hdev->asic_funcs->hw_queues_unlock(hdev);
6902}
6903
6904/**
6905 * handle_qman_data_on_err - extract QMAN data on error
6906 *
6907 * @hdev: pointer to the habanalabs device structure
6908 * @qid_base: first QID of the QMAN (out of 4 streams)
6909 * @stream: the QMAN's stream
6910 * @qman_base: base address of QMAN registers block
6911 * @event_mask: mask of the last events occurred
6912 *
6913 * This function attempt to exatract as much data as possible on QMAN error.
6914 * On upper CP print the SW config stream data and last 8 PQEs.
6915 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6916 */
6917static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6918				   u32 stream, u64 qman_base, u64 event_mask)
6919{
6920	u32 i;
6921
6922	if (stream != QMAN_STREAMS) {
6923		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6924			qman_base, event_mask, true);
6925		return;
6926	}
6927
6928	/* handle Lower-CP */
6929	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6930
6931	for (i = 0; i < QMAN_STREAMS; i++)
6932		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6933			qman_base, event_mask, false);
6934}
6935
6936static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6937					  const char *qm_name,
6938					  u64 qman_base,
6939					  u32 qid_base,
6940					  u64 *event_mask)
6941{
6942	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6943	u64 glbl_sts_addr, arb_err_addr;
6944	char reg_desc[32];
6945
6946	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6947	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6948
6949	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6950	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6951		glbl_sts_clr_val = 0;
6952		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6953
6954		if (!glbl_sts_val)
6955			continue;
6956
6957		if (i == QMAN_STREAMS)
6958			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6959		else
6960			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6961
6962		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6963			if (glbl_sts_val & BIT(j)) {
6964				dev_err_ratelimited(hdev->dev,
6965						"%s %s. err cause: %s\n",
6966						qm_name, reg_desc,
6967						gaudi_qman_error_cause[j]);
6968				glbl_sts_clr_val |= BIT(j);
6969			}
6970		}
6971		/* check for undefined opcode */
6972		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6973				hdev->captured_err_info.undef_opcode.write_enable) {
6974			memset(&hdev->captured_err_info.undef_opcode, 0,
6975						sizeof(hdev->captured_err_info.undef_opcode));
6976
6977			hdev->captured_err_info.undef_opcode.write_enable = false;
6978			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6979		}
6980
6981		/* Write 1 clear errors */
6982		if (!hdev->stop_on_err)
6983			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6984		else
6985			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6986	}
6987
6988	arb_err_val = RREG32(arb_err_addr);
6989
6990	if (!arb_err_val)
6991		return;
6992
6993	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6994		if (arb_err_val & BIT(j)) {
6995			dev_err_ratelimited(hdev->dev,
6996					"%s ARB_ERR. err cause: %s\n",
6997					qm_name,
6998					gaudi_qman_arb_error_cause[j]);
6999		}
7000	}
7001}
7002
7003static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7004		struct hl_eq_sm_sei_data *sei_data)
7005{
7006	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7007
7008	/* Flip the bits as the enum is ordered in the opposite way */
7009	index = (index ^ 0x3) & 0x3;
7010
7011	switch (sei_data->sei_cause) {
7012	case SM_SEI_SO_OVERFLOW:
7013		dev_err_ratelimited(hdev->dev,
7014			"%s SEI Error: SOB Group %u overflow/underflow",
7015			gaudi_sync_manager_names[index],
7016			le32_to_cpu(sei_data->sei_log));
7017		break;
7018	case SM_SEI_LBW_4B_UNALIGNED:
7019		dev_err_ratelimited(hdev->dev,
7020			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7021			gaudi_sync_manager_names[index],
7022			le32_to_cpu(sei_data->sei_log));
7023		break;
7024	case SM_SEI_AXI_RESPONSE_ERR:
7025		dev_err_ratelimited(hdev->dev,
7026			"%s SEI Error: AXI ID %u response error",
7027			gaudi_sync_manager_names[index],
7028			le32_to_cpu(sei_data->sei_log));
7029		break;
7030	default:
7031		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7032				le32_to_cpu(sei_data->sei_log));
7033		break;
7034	}
7035}
7036
7037static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7038		struct hl_eq_ecc_data *ecc_data)
7039{
7040	struct ecc_info_extract_params params;
7041	u64 ecc_address = 0, ecc_syndrom = 0;
7042	u8 index, memory_wrapper_idx = 0;
7043	bool extract_info_from_fw;
7044	int rc;
7045
7046	if (hdev->asic_prop.fw_security_enabled) {
7047		extract_info_from_fw = true;
7048		goto extract_ecc_info;
7049	}
7050
7051	switch (event_type) {
7052	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7053	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7054		extract_info_from_fw = true;
7055		break;
7056	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7057		index = event_type - GAUDI_EVENT_TPC0_SERR;
7058		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7059		params.num_memories = 90;
7060		params.derr = false;
7061		extract_info_from_fw = false;
7062		break;
7063	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7064		index = event_type - GAUDI_EVENT_TPC0_DERR;
7065		params.block_address =
7066			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7067		params.num_memories = 90;
7068		params.derr = true;
7069		extract_info_from_fw = false;
7070		break;
7071	case GAUDI_EVENT_MME0_ACC_SERR:
7072	case GAUDI_EVENT_MME1_ACC_SERR:
7073	case GAUDI_EVENT_MME2_ACC_SERR:
7074	case GAUDI_EVENT_MME3_ACC_SERR:
7075		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7076		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7077		params.num_memories = 128;
7078		params.derr = false;
7079		extract_info_from_fw = false;
7080		break;
7081	case GAUDI_EVENT_MME0_ACC_DERR:
7082	case GAUDI_EVENT_MME1_ACC_DERR:
7083	case GAUDI_EVENT_MME2_ACC_DERR:
7084	case GAUDI_EVENT_MME3_ACC_DERR:
7085		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7086		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7087		params.num_memories = 128;
7088		params.derr = true;
7089		extract_info_from_fw = false;
7090		break;
7091	case GAUDI_EVENT_MME0_SBAB_SERR:
7092	case GAUDI_EVENT_MME1_SBAB_SERR:
7093	case GAUDI_EVENT_MME2_SBAB_SERR:
7094	case GAUDI_EVENT_MME3_SBAB_SERR:
7095		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7096		params.block_address =
7097			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7098		params.num_memories = 33;
7099		params.derr = false;
7100		extract_info_from_fw = false;
7101		break;
7102	case GAUDI_EVENT_MME0_SBAB_DERR:
7103	case GAUDI_EVENT_MME1_SBAB_DERR:
7104	case GAUDI_EVENT_MME2_SBAB_DERR:
7105	case GAUDI_EVENT_MME3_SBAB_DERR:
7106		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7107		params.block_address =
7108			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7109		params.num_memories = 33;
7110		params.derr = true;
7111		extract_info_from_fw = false;
7112		break;
7113	default:
7114		return;
7115	}
7116
7117extract_ecc_info:
7118	if (extract_info_from_fw) {
7119		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7120		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7121		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7122	} else {
7123		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7124				&ecc_syndrom, &memory_wrapper_idx);
7125		if (rc)
7126			return;
7127	}
7128
7129	dev_err(hdev->dev,
7130		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7131		ecc_address, ecc_syndrom, memory_wrapper_idx);
7132}
7133
7134static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7135{
7136	u64 qman_base;
7137	char desc[32];
7138	u32 qid_base;
7139	u8 index;
7140
7141	switch (event_type) {
7142	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7143		index = event_type - GAUDI_EVENT_TPC0_QM;
7144		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7145		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7146		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7147		break;
7148	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7149		if (event_type == GAUDI_EVENT_MME0_QM) {
7150			index = 0;
7151			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7152		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7153			index = 2;
7154			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7155		}
7156		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7157		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7158		break;
7159	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7160		index = event_type - GAUDI_EVENT_DMA0_QM;
7161		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7162		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7163		if (index > 1)
7164			qid_base++;
7165		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7166		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7167		break;
7168	case GAUDI_EVENT_NIC0_QM0:
7169		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7170		qman_base = mmNIC0_QM0_BASE;
7171		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7172		break;
7173	case GAUDI_EVENT_NIC0_QM1:
7174		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7175		qman_base = mmNIC0_QM1_BASE;
7176		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7177		break;
7178	case GAUDI_EVENT_NIC1_QM0:
7179		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7180		qman_base = mmNIC1_QM0_BASE;
7181		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7182		break;
7183	case GAUDI_EVENT_NIC1_QM1:
7184		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7185		qman_base = mmNIC1_QM1_BASE;
7186		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7187		break;
7188	case GAUDI_EVENT_NIC2_QM0:
7189		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7190		qman_base = mmNIC2_QM0_BASE;
7191		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7192		break;
7193	case GAUDI_EVENT_NIC2_QM1:
7194		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7195		qman_base = mmNIC2_QM1_BASE;
7196		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7197		break;
7198	case GAUDI_EVENT_NIC3_QM0:
7199		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7200		qman_base = mmNIC3_QM0_BASE;
7201		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7202		break;
7203	case GAUDI_EVENT_NIC3_QM1:
7204		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7205		qman_base = mmNIC3_QM1_BASE;
7206		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7207		break;
7208	case GAUDI_EVENT_NIC4_QM0:
7209		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7210		qman_base = mmNIC4_QM0_BASE;
7211		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7212		break;
7213	case GAUDI_EVENT_NIC4_QM1:
7214		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7215		qman_base = mmNIC4_QM1_BASE;
7216		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7217		break;
7218	default:
7219		return;
7220	}
7221
7222	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7223}
7224
7225static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7226					bool check_razwi, u64 *event_mask)
7227{
7228	bool is_read = false, is_write = false;
7229	u16 engine_id[2], num_of_razwi_eng = 0;
7230	char desc[64] = "";
7231	u64 razwi_addr = 0;
7232	u8 razwi_flags = 0;
7233
7234	/*
7235	 * Init engine id by default as not valid and only if razwi initiated from engine with
7236	 * engine id it will get valid value.
7237	 */
7238	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7239	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7240
7241	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7242	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7243		event_type, desc);
7244
7245	if (check_razwi) {
7246		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7247						&is_write);
7248		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7249
7250		if (is_read)
7251			razwi_flags |= HL_RAZWI_READ;
7252		if (is_write)
7253			razwi_flags |= HL_RAZWI_WRITE;
7254
7255		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7256			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7257				num_of_razwi_eng = 2;
7258			else
7259				num_of_razwi_eng = 1;
7260		}
7261
7262		if (razwi_flags)
7263			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7264					razwi_flags, event_mask);
7265	}
7266}
7267
7268static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7269					struct cpucp_pkt_sync_err *sync_err)
7270{
7271	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7272
7273	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7274		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7275}
7276
7277static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7278					struct hl_eq_fw_alive *fw_alive)
7279{
7280	dev_err(hdev->dev,
7281		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7282		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7283		le32_to_cpu(fw_alive->process_id),
7284		le32_to_cpu(fw_alive->thread_id),
7285		le64_to_cpu(fw_alive->uptime_seconds));
7286}
7287
7288static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7289						void *data)
7290{
7291	char desc[64] = "", *type;
7292	struct eq_nic_sei_event *eq_nic_sei = data;
7293	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7294
7295	switch (eq_nic_sei->axi_error_cause) {
7296	case RXB:
7297		type = "RXB";
7298		break;
7299	case RXE:
7300		type = "RXE";
7301		break;
7302	case TXS:
7303		type = "TXS";
7304		break;
7305	case TXE:
7306		type = "TXE";
7307		break;
7308	case QPC_RESP:
7309		type = "QPC_RESP";
7310		break;
7311	case NON_AXI_ERR:
7312		type = "NON_AXI_ERR";
7313		break;
7314	case TMR:
7315		type = "TMR";
7316		break;
7317	default:
7318		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7319			eq_nic_sei->axi_error_cause);
7320		type = "N/A";
7321		break;
7322	}
7323
7324	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7325			eq_nic_sei->id);
7326	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7327		event_type, desc);
7328}
7329
7330static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7331{
7332	/* GAUDI doesn't support any reset except hard-reset */
7333	return -EPERM;
7334}
7335
7336static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7337			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7338{
7339	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7340	int rc = 0;
7341
7342	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7343					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7344		if (!hbm_ecc_data) {
7345			dev_err(hdev->dev, "No FW ECC data");
7346			return 0;
7347		}
7348
7349		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7350				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7351		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7352				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7353		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7354				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7355		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7356				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7357		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7358				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7359		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7360				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7361		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7362				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7363
7364		dev_err(hdev->dev,
7365			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7366			device, ch, wr_par, rd_par, ca_par, serr, derr);
7367		dev_err(hdev->dev,
7368			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7369			device, ch, hbm_ecc_data->first_addr, type,
7370			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7371			hbm_ecc_data->dec_cnt);
7372		return 0;
7373	}
7374
7375	if (hdev->asic_prop.fw_security_enabled) {
7376		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7377		return 0;
7378	}
7379
7380	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7381	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7382		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7383		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7384		if (val) {
7385			rc = -EIO;
7386			dev_err(hdev->dev,
7387				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7388				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7389				(val >> 2) & 0x1, (val >> 3) & 0x1,
7390				(val >> 4) & 0x1);
7391
7392			val2 = RREG32(base + ch * 0x1000 + 0x060);
7393			dev_err(hdev->dev,
7394				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7395				device, ch * 2,
7396				RREG32(base + ch * 0x1000 + 0x064),
7397				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7398				(val2 & 0xFF0000) >> 16,
7399				(val2 & 0xFF000000) >> 24);
7400		}
7401
7402		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7403		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7404		if (val) {
7405			rc = -EIO;
7406			dev_err(hdev->dev,
7407				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7408				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7409				(val >> 2) & 0x1, (val >> 3) & 0x1,
7410				(val >> 4) & 0x1);
7411
7412			val2 = RREG32(base + ch * 0x1000 + 0x070);
7413			dev_err(hdev->dev,
7414				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7415				device, ch * 2 + 1,
7416				RREG32(base + ch * 0x1000 + 0x074),
7417				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7418				(val2 & 0xFF0000) >> 16,
7419				(val2 & 0xFF000000) >> 24);
7420		}
7421
7422		/* Clear interrupts */
7423		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7424		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7425		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7426		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7427		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7428		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7429	}
7430
7431	val  = RREG32(base + 0x8F30);
7432	val2 = RREG32(base + 0x8F34);
7433	if (val | val2) {
7434		rc = -EIO;
7435		dev_err(hdev->dev,
7436			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7437			device, val, val2);
7438	}
7439	val  = RREG32(base + 0x8F40);
7440	val2 = RREG32(base + 0x8F44);
7441	if (val | val2) {
7442		rc = -EIO;
7443		dev_err(hdev->dev,
7444			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7445			device, val, val2);
7446	}
7447
7448	return rc;
7449}
7450
7451static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7452{
7453	switch (hbm_event_type) {
7454	case GAUDI_EVENT_HBM0_SPI_0:
7455	case GAUDI_EVENT_HBM0_SPI_1:
7456		return 0;
7457	case GAUDI_EVENT_HBM1_SPI_0:
7458	case GAUDI_EVENT_HBM1_SPI_1:
7459		return 1;
7460	case GAUDI_EVENT_HBM2_SPI_0:
7461	case GAUDI_EVENT_HBM2_SPI_1:
7462		return 2;
7463	case GAUDI_EVENT_HBM3_SPI_0:
7464	case GAUDI_EVENT_HBM3_SPI_1:
7465		return 3;
7466	default:
7467		break;
7468	}
7469
7470	/* Should never happen */
7471	return 0;
7472}
7473
7474static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7475					char *interrupt_name)
7476{
7477	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7478	bool soft_reset_required = false;
7479
7480	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7481				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7482
7483	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7484		if (tpc_interrupts_cause & BIT(i)) {
7485			dev_err_ratelimited(hdev->dev,
7486					"TPC%d_%s interrupt cause: %s\n",
7487					tpc_id, interrupt_name,
7488					gaudi_tpc_interrupts_cause[i]);
7489			/* If this is QM error, we need to soft-reset */
7490			if (i == 15)
7491				soft_reset_required = true;
7492		}
7493
7494	/* Clear interrupts */
7495	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7496
7497	return soft_reset_required;
7498}
7499
7500static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7501{
7502	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7503}
7504
7505static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7506{
7507	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7508}
7509
7510static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7511{
7512	ktime_t zero_time = ktime_set(0, 0);
7513
7514	mutex_lock(&hdev->clk_throttling.lock);
7515
7516	switch (event_type) {
7517	case GAUDI_EVENT_FIX_POWER_ENV_S:
7518		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7519		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7520		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7521		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7522		dev_info_ratelimited(hdev->dev,
7523			"Clock throttling due to power consumption\n");
7524		break;
7525
7526	case GAUDI_EVENT_FIX_POWER_ENV_E:
7527		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7528		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7529		dev_info_ratelimited(hdev->dev,
7530			"Power envelop is safe, back to optimal clock\n");
7531		break;
7532
7533	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7534		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7535		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7536		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7537		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7538		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7539		dev_info_ratelimited(hdev->dev,
7540			"Clock throttling due to overheating\n");
7541		break;
7542
7543	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7544		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7545		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7546		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7547		dev_info_ratelimited(hdev->dev,
7548			"Thermal envelop is safe, back to optimal clock\n");
7549		break;
7550
7551	default:
7552		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7553			event_type);
7554		break;
7555	}
7556
7557	mutex_unlock(&hdev->clk_throttling.lock);
7558}
7559
7560static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7561{
7562	struct gaudi_device *gaudi = hdev->asic_specific;
7563	struct hl_info_fw_err_info fw_err_info;
7564	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7565	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7566	u32 fw_fatal_err_flag = 0, flags = 0;
7567	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7568			>> EQ_CTL_EVENT_TYPE_SHIFT);
7569	bool reset_required, reset_direct = false;
7570	u8 cause;
7571	int rc;
7572
7573	if (event_type >= GAUDI_EVENT_SIZE) {
7574		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7575				event_type, GAUDI_EVENT_SIZE - 1);
7576		return;
7577	}
7578
7579	gaudi->events_stat[event_type]++;
7580	gaudi->events_stat_aggregate[event_type]++;
7581
7582	switch (event_type) {
7583	case GAUDI_EVENT_PCIE_CORE_DERR:
7584	case GAUDI_EVENT_PCIE_IF_DERR:
7585	case GAUDI_EVENT_PCIE_PHY_DERR:
7586	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7587	case GAUDI_EVENT_MME0_ACC_DERR:
7588	case GAUDI_EVENT_MME0_SBAB_DERR:
7589	case GAUDI_EVENT_MME1_ACC_DERR:
7590	case GAUDI_EVENT_MME1_SBAB_DERR:
7591	case GAUDI_EVENT_MME2_ACC_DERR:
7592	case GAUDI_EVENT_MME2_SBAB_DERR:
7593	case GAUDI_EVENT_MME3_ACC_DERR:
7594	case GAUDI_EVENT_MME3_SBAB_DERR:
7595	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7596		fallthrough;
7597	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7598	case GAUDI_EVENT_PSOC_MEM_DERR:
7599	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7600	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7601	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7602	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7603	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7604	case GAUDI_EVENT_MMU_DERR:
7605	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7606		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7607		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7608		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7609		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7610		goto reset_device;
7611
7612	case GAUDI_EVENT_GIC500:
7613	case GAUDI_EVENT_AXI_ECC:
7614	case GAUDI_EVENT_L2_RAM_ECC:
7615	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7616		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7617		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7618		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7619		goto reset_device;
7620
7621	case GAUDI_EVENT_HBM0_SPI_0:
7622	case GAUDI_EVENT_HBM1_SPI_0:
7623	case GAUDI_EVENT_HBM2_SPI_0:
7624	case GAUDI_EVENT_HBM3_SPI_0:
7625		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7626		gaudi_hbm_read_interrupts(hdev,
7627				gaudi_hbm_event_to_dev(event_type),
7628				&eq_entry->hbm_ecc_data);
7629		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7630		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7631		goto reset_device;
7632
7633	case GAUDI_EVENT_HBM0_SPI_1:
7634	case GAUDI_EVENT_HBM1_SPI_1:
7635	case GAUDI_EVENT_HBM2_SPI_1:
7636	case GAUDI_EVENT_HBM3_SPI_1:
7637		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7638		gaudi_hbm_read_interrupts(hdev,
7639				gaudi_hbm_event_to_dev(event_type),
7640				&eq_entry->hbm_ecc_data);
7641		hl_fw_unmask_irq(hdev, event_type);
7642		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7643		break;
7644
7645	case GAUDI_EVENT_TPC0_DEC:
7646	case GAUDI_EVENT_TPC1_DEC:
7647	case GAUDI_EVENT_TPC2_DEC:
7648	case GAUDI_EVENT_TPC3_DEC:
7649	case GAUDI_EVENT_TPC4_DEC:
7650	case GAUDI_EVENT_TPC5_DEC:
7651	case GAUDI_EVENT_TPC6_DEC:
7652	case GAUDI_EVENT_TPC7_DEC:
7653		/* In TPC DEC event, notify on TPC assertion. While there isn't
7654		 * a specific event for assertion yet, the FW generates TPC DEC event.
7655		 * The SW upper layer will inspect an internal mapped area to indicate
7656		 * if the event is a TPC Assertion or a "real" TPC DEC.
7657		 */
7658		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7659		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7660		reset_required = gaudi_tpc_read_interrupts(hdev,
7661					tpc_dec_event_to_tpc_id(event_type),
7662					"AXI_SLV_DEC_Error");
7663		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7664		if (reset_required) {
7665			dev_err(hdev->dev, "reset required due to %s\n",
7666				gaudi_irq_map_table[event_type].name);
7667
7668			reset_direct = true;
7669			goto reset_device;
7670		} else {
7671			hl_fw_unmask_irq(hdev, event_type);
7672			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7673		}
7674		break;
7675
7676	case GAUDI_EVENT_TPC0_KRN_ERR:
7677	case GAUDI_EVENT_TPC1_KRN_ERR:
7678	case GAUDI_EVENT_TPC2_KRN_ERR:
7679	case GAUDI_EVENT_TPC3_KRN_ERR:
7680	case GAUDI_EVENT_TPC4_KRN_ERR:
7681	case GAUDI_EVENT_TPC5_KRN_ERR:
7682	case GAUDI_EVENT_TPC6_KRN_ERR:
7683	case GAUDI_EVENT_TPC7_KRN_ERR:
7684		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7685		reset_required = gaudi_tpc_read_interrupts(hdev,
7686					tpc_krn_event_to_tpc_id(event_type),
7687					"KRN_ERR");
7688		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7689		if (reset_required) {
7690			dev_err(hdev->dev, "reset required due to %s\n",
7691				gaudi_irq_map_table[event_type].name);
7692
7693			reset_direct = true;
7694			goto reset_device;
7695		} else {
7696			hl_fw_unmask_irq(hdev, event_type);
7697			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7698		}
7699		break;
7700
7701	case GAUDI_EVENT_PCIE_CORE_SERR:
7702	case GAUDI_EVENT_PCIE_IF_SERR:
7703	case GAUDI_EVENT_PCIE_PHY_SERR:
7704	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7705	case GAUDI_EVENT_MME0_ACC_SERR:
7706	case GAUDI_EVENT_MME0_SBAB_SERR:
7707	case GAUDI_EVENT_MME1_ACC_SERR:
7708	case GAUDI_EVENT_MME1_SBAB_SERR:
7709	case GAUDI_EVENT_MME2_ACC_SERR:
7710	case GAUDI_EVENT_MME2_SBAB_SERR:
7711	case GAUDI_EVENT_MME3_ACC_SERR:
7712	case GAUDI_EVENT_MME3_SBAB_SERR:
7713	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7714	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7715	case GAUDI_EVENT_PSOC_MEM_SERR:
7716	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7717	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7718	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7719	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7720	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7721		fallthrough;
7722	case GAUDI_EVENT_MMU_SERR:
7723		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7724		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7725		hl_fw_unmask_irq(hdev, event_type);
7726		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7727		break;
7728
7729	case GAUDI_EVENT_PCIE_DEC:
7730	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7731	case GAUDI_EVENT_PSOC_AXI_DEC:
7732	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7733		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7734		hl_fw_unmask_irq(hdev, event_type);
7735		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7736		break;
7737
7738	case GAUDI_EVENT_MMU_PAGE_FAULT:
7739	case GAUDI_EVENT_MMU_WR_PERM:
7740		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7741		hl_fw_unmask_irq(hdev, event_type);
7742		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7743		break;
7744
7745	case GAUDI_EVENT_MME0_WBC_RSP:
7746	case GAUDI_EVENT_MME0_SBAB0_RSP:
7747	case GAUDI_EVENT_MME1_WBC_RSP:
7748	case GAUDI_EVENT_MME1_SBAB0_RSP:
7749	case GAUDI_EVENT_MME2_WBC_RSP:
7750	case GAUDI_EVENT_MME2_SBAB0_RSP:
7751	case GAUDI_EVENT_MME3_WBC_RSP:
7752	case GAUDI_EVENT_MME3_SBAB0_RSP:
7753	case GAUDI_EVENT_RAZWI_OR_ADC:
7754	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7755	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7756		fallthrough;
7757	case GAUDI_EVENT_NIC0_QM0:
7758	case GAUDI_EVENT_NIC0_QM1:
7759	case GAUDI_EVENT_NIC1_QM0:
7760	case GAUDI_EVENT_NIC1_QM1:
7761	case GAUDI_EVENT_NIC2_QM0:
7762	case GAUDI_EVENT_NIC2_QM1:
7763	case GAUDI_EVENT_NIC3_QM0:
7764	case GAUDI_EVENT_NIC3_QM1:
7765	case GAUDI_EVENT_NIC4_QM0:
7766	case GAUDI_EVENT_NIC4_QM1:
7767	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7768	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7769		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7770		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7771		hl_fw_unmask_irq(hdev, event_type);
7772		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7773		break;
7774
7775	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7776		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7777		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7778		goto reset_device;
7779
7780	case GAUDI_EVENT_TPC0_BMON_SPMU:
7781	case GAUDI_EVENT_TPC1_BMON_SPMU:
7782	case GAUDI_EVENT_TPC2_BMON_SPMU:
7783	case GAUDI_EVENT_TPC3_BMON_SPMU:
7784	case GAUDI_EVENT_TPC4_BMON_SPMU:
7785	case GAUDI_EVENT_TPC5_BMON_SPMU:
7786	case GAUDI_EVENT_TPC6_BMON_SPMU:
7787	case GAUDI_EVENT_TPC7_BMON_SPMU:
7788	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7789		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7790		hl_fw_unmask_irq(hdev, event_type);
7791		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7792		break;
7793
7794	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7795		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7796		hl_fw_unmask_irq(hdev, event_type);
7797		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7798		break;
7799
7800	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7801		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7802		gaudi_print_sm_sei_info(hdev, event_type,
7803					&eq_entry->sm_sei_data);
7804		rc = hl_state_dump(hdev);
7805		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7806		if (rc)
7807			dev_err(hdev->dev,
7808				"Error during system state dump %d\n", rc);
7809		hl_fw_unmask_irq(hdev, event_type);
7810		break;
7811
7812	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7813		break;
7814
7815	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7816		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7817		hl_fw_unmask_irq(hdev, event_type);
7818		break;
7819
7820	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7821		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7822		dev_err(hdev->dev,
7823			"Received high temp H/W interrupt %d (cause %d)\n",
7824			event_type, cause);
7825		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7826		break;
7827
7828	case GAUDI_EVENT_DEV_RESET_REQ:
7829		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7830		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7831		goto reset_device;
7832
7833	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7834		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7835		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7836		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7837		goto reset_device;
7838
7839	case GAUDI_EVENT_FW_ALIVE_S:
7840		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7841		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7842		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7843		fw_err_info.event_id = event_type;
7844		fw_err_info.event_mask = &event_mask;
7845		hl_handle_fw_err(hdev, &fw_err_info);
7846		goto reset_device;
7847
7848	default:
7849		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7850				event_type);
7851		break;
7852	}
7853
7854	if (event_mask)
7855		hl_notifier_event_send_all(hdev, event_mask);
7856
7857	return;
7858
7859reset_device:
7860	reset_required = true;
7861
7862	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7863		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7864
7865		/* notify on device unavailable while the reset triggered by fw */
7866		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7867					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7868	} else if (hdev->hard_reset_on_fw_events) {
7869		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7870		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7871	} else {
7872		reset_required = false;
7873	}
7874
7875	if (reset_required) {
7876		/* escalate general hw errors to critical/fatal error */
7877		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7878			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7879
7880		hl_device_cond_reset(hdev, flags, event_mask);
7881	} else {
7882		hl_fw_unmask_irq(hdev, event_type);
7883		/* Notification on occurred event needs to be sent although reset is not executed */
7884		if (event_mask)
7885			hl_notifier_event_send_all(hdev, event_mask);
7886	}
7887}
7888
7889static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7890{
7891	struct gaudi_device *gaudi = hdev->asic_specific;
7892
7893	if (aggregate) {
7894		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7895		return gaudi->events_stat_aggregate;
7896	}
7897
7898	*size = (u32) sizeof(gaudi->events_stat);
7899	return gaudi->events_stat;
7900}
7901
7902static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7903{
7904	struct gaudi_device *gaudi = hdev->asic_specific;
7905	u32 status, timeout_usec;
7906	int rc;
7907
7908	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7909		hdev->reset_info.hard_reset_pending)
7910		return 0;
7911
7912	if (hdev->pldm)
7913		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7914	else
7915		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7916
7917	/* L0 & L1 invalidation */
7918	WREG32(mmSTLB_INV_PS, 3);
7919	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7920	WREG32(mmSTLB_INV_PS, 2);
7921
7922	rc = hl_poll_timeout(
7923		hdev,
7924		mmSTLB_INV_PS,
7925		status,
7926		!status,
7927		1000,
7928		timeout_usec);
7929
7930	WREG32(mmSTLB_INV_SET, 0);
7931
7932	return rc;
7933}
7934
7935static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7936						bool is_hard, u32 flags,
7937						u32 asid, u64 va, u64 size)
7938{
7939	/* Treat as invalidate all because there is no range invalidation
7940	 * in Gaudi
7941	 */
7942	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7943}
7944
7945static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7946{
7947	u32 status, timeout_usec;
7948	int rc;
7949
7950	if (hdev->pldm)
7951		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7952	else
7953		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7954
7955	WREG32(MMU_ASID, asid);
7956	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7957	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7958	WREG32(MMU_BUSY, 0x80000000);
7959
7960	rc = hl_poll_timeout(
7961		hdev,
7962		MMU_BUSY,
7963		status,
7964		!(status & 0x80000000),
7965		1000,
7966		timeout_usec);
7967
7968	if (rc) {
7969		dev_err(hdev->dev,
7970			"Timeout during MMU hop0 config of asid %d\n", asid);
7971		return rc;
7972	}
7973
7974	return 0;
7975}
7976
7977static int gaudi_send_heartbeat(struct hl_device *hdev)
7978{
7979	struct gaudi_device *gaudi = hdev->asic_specific;
7980
7981	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7982		return 0;
7983
7984	return hl_fw_send_heartbeat(hdev);
7985}
7986
7987static int gaudi_cpucp_info_get(struct hl_device *hdev)
7988{
7989	struct gaudi_device *gaudi = hdev->asic_specific;
7990	struct asic_fixed_properties *prop = &hdev->asic_prop;
7991	int rc;
7992
7993	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7994		return 0;
7995
7996	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7997					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
7998					mmCPU_BOOT_ERR1);
7999	if (rc)
8000		return rc;
8001
8002	if (!strlen(prop->cpucp_info.card_name))
8003		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8004				CARD_NAME_MAX_LEN);
8005
8006	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8007
8008	set_default_power_values(hdev);
8009
8010	return 0;
8011}
8012
8013static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8014		struct engines_data *e)
8015{
8016	struct gaudi_device *gaudi = hdev->asic_specific;
8017	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8018	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8019	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8020	unsigned long *mask = (unsigned long *)mask_arr;
8021	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8022	bool is_idle = true, is_eng_idle, is_slave;
8023	u64 offset;
8024	int i, dma_id, port;
8025
8026	if (e)
8027		hl_engine_data_sprintf(e,
8028			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8029			"---  -------  ------------  ----------  -------------\n");
8030
8031	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8032		dma_id = gaudi_dma_assignment[i];
8033		offset = dma_id * DMA_QMAN_OFFSET;
8034
8035		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8036		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8037		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8038		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8039				IS_DMA_IDLE(dma_core_sts0);
8040		is_idle &= is_eng_idle;
8041
8042		if (mask && !is_eng_idle)
8043			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8044		if (e)
8045			hl_engine_data_sprintf(e, fmt, dma_id,
8046				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8047				qm_cgm_sts, dma_core_sts0);
8048	}
8049
8050	if (e)
8051		hl_engine_data_sprintf(e,
8052			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8053			"---  -------  ------------  ----------  ----------\n");
8054
8055	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8056		offset = i * TPC_QMAN_OFFSET;
8057		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8058		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8059		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8060		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8061				IS_TPC_IDLE(tpc_cfg_sts);
8062		is_idle &= is_eng_idle;
8063
8064		if (mask && !is_eng_idle)
8065			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8066		if (e)
8067			hl_engine_data_sprintf(e, fmt, i,
8068				is_eng_idle ? "Y" : "N",
8069				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8070	}
8071
8072	if (e)
8073		hl_engine_data_sprintf(e,
8074			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8075			"---  -------  ------------  ----------  -----------\n");
8076
8077	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8078		offset = i * MME_QMAN_OFFSET;
8079		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8080		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8081
8082		/* MME 1 & 3 are slaves, no need to check their QMANs */
8083		is_slave = i % 2;
8084		if (!is_slave) {
8085			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8086			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8087			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8088		}
8089
8090		is_idle &= is_eng_idle;
8091
8092		if (mask && !is_eng_idle)
8093			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8094		if (e) {
8095			if (!is_slave)
8096				hl_engine_data_sprintf(e, fmt, i,
8097					is_eng_idle ? "Y" : "N",
8098					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8099			else
8100				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8101					is_eng_idle ? "Y" : "N", "-",
8102					"-", mme_arch_sts);
8103		}
8104	}
8105
8106	if (e)
8107		hl_engine_data_sprintf(e,
8108				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8109				"---  -------  ------------  ----------\n");
8110
8111	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8112		offset = i * NIC_MACRO_QMAN_OFFSET;
8113		port = 2 * i;
8114		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8115			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8116			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8117			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8118			is_idle &= is_eng_idle;
8119
8120			if (mask && !is_eng_idle)
8121				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8122			if (e)
8123				hl_engine_data_sprintf(e, nic_fmt, port,
8124						is_eng_idle ? "Y" : "N",
8125						qm_glbl_sts0, qm_cgm_sts);
8126		}
8127
8128		port = 2 * i + 1;
8129		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8130			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8131			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8132			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8133			is_idle &= is_eng_idle;
8134
8135			if (mask && !is_eng_idle)
8136				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8137			if (e)
8138				hl_engine_data_sprintf(e, nic_fmt, port,
8139						is_eng_idle ? "Y" : "N",
8140						qm_glbl_sts0, qm_cgm_sts);
8141		}
8142	}
8143
8144	if (e)
8145		hl_engine_data_sprintf(e, "\n");
8146
8147	return is_idle;
8148}
8149
8150static void gaudi_hw_queues_lock(struct hl_device *hdev)
8151	__acquires(&gaudi->hw_queues_lock)
8152{
8153	struct gaudi_device *gaudi = hdev->asic_specific;
8154
8155	spin_lock(&gaudi->hw_queues_lock);
8156}
8157
8158static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8159	__releases(&gaudi->hw_queues_lock)
8160{
8161	struct gaudi_device *gaudi = hdev->asic_specific;
8162
8163	spin_unlock(&gaudi->hw_queues_lock);
8164}
8165
8166static u32 gaudi_get_pci_id(struct hl_device *hdev)
8167{
8168	return hdev->pdev->device;
8169}
8170
8171static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8172				size_t max_size)
8173{
8174	struct gaudi_device *gaudi = hdev->asic_specific;
8175
8176	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8177		return 0;
8178
8179	return hl_fw_get_eeprom_data(hdev, data, max_size);
8180}
8181
8182static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8183{
8184	struct gaudi_device *gaudi = hdev->asic_specific;
8185
8186	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8187		return 0;
8188
8189	return hl_fw_get_monitor_dump(hdev, data);
8190}
8191
8192/*
8193 * this function should be used only during initialization and/or after reset,
8194 * when there are no active users.
8195 */
8196static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8197{
8198	u64 kernel_timeout;
8199	u32 status, offset;
8200	int rc;
8201
8202	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8203
8204	if (hdev->pldm)
8205		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8206	else
8207		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8208
8209	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8210			lower_32_bits(tpc_kernel));
8211	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8212			upper_32_bits(tpc_kernel));
8213
8214	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8215			lower_32_bits(tpc_kernel));
8216	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8217			upper_32_bits(tpc_kernel));
8218	/* set a valid LUT pointer, content is of no significance */
8219	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8220			lower_32_bits(tpc_kernel));
8221	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8222			upper_32_bits(tpc_kernel));
8223
8224	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8225			lower_32_bits(CFG_BASE +
8226				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8227
8228	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8229			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8230			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8231	/* wait a bit for the engine to start executing */
8232	usleep_range(1000, 1500);
8233
8234	/* wait until engine has finished executing */
8235	rc = hl_poll_timeout(
8236		hdev,
8237		mmTPC0_CFG_STATUS + offset,
8238		status,
8239		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8240				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8241		1000,
8242		kernel_timeout);
8243
8244	if (rc) {
8245		dev_err(hdev->dev,
8246			"Timeout while waiting for TPC%d icache prefetch\n",
8247			tpc_id);
8248		return -EIO;
8249	}
8250
8251	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8252			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8253
8254	/* wait a bit for the engine to start executing */
8255	usleep_range(1000, 1500);
8256
8257	/* wait until engine has finished executing */
8258	rc = hl_poll_timeout(
8259		hdev,
8260		mmTPC0_CFG_STATUS + offset,
8261		status,
8262		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8263				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8264		1000,
8265		kernel_timeout);
8266
8267	if (rc) {
8268		dev_err(hdev->dev,
8269			"Timeout while waiting for TPC%d vector pipe\n",
8270			tpc_id);
8271		return -EIO;
8272	}
8273
8274	rc = hl_poll_timeout(
8275		hdev,
8276		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8277		status,
8278		(status == 0),
8279		1000,
8280		kernel_timeout);
8281
8282	if (rc) {
8283		dev_err(hdev->dev,
8284			"Timeout while waiting for TPC%d kernel to execute\n",
8285			tpc_id);
8286		return -EIO;
8287	}
8288
8289	return 0;
8290}
8291
8292static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8293		struct hl_ctx *ctx)
8294{
8295	struct gaudi_device *gaudi = hdev->asic_specific;
8296	int min_alloc_order, rc, collective_cb_size;
8297
8298	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8299		return 0;
8300
8301	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8302							HOST_SPACE_INTERNAL_CB_SZ,
8303							&hdev->internal_cb_pool_dma_addr,
8304							GFP_KERNEL | __GFP_ZERO);
8305
8306	if (!hdev->internal_cb_pool_virt_addr)
8307		return -ENOMEM;
8308
8309	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8310			sizeof(struct packet_fence);
8311	min_alloc_order = ilog2(collective_cb_size);
8312
8313	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8314	if (!hdev->internal_cb_pool) {
8315		dev_err(hdev->dev,
8316			"Failed to create internal CB pool\n");
8317		rc = -ENOMEM;
8318		goto free_internal_cb_pool;
8319	}
8320
8321	rc = gen_pool_add(hdev->internal_cb_pool,
8322				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8323				HOST_SPACE_INTERNAL_CB_SZ, -1);
8324	if (rc) {
8325		dev_err(hdev->dev,
8326			"Failed to add memory to internal CB pool\n");
8327		rc = -EFAULT;
8328		goto destroy_internal_cb_pool;
8329	}
8330
8331	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8332			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8333			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8334
8335	if (!hdev->internal_cb_va_base) {
8336		rc = -ENOMEM;
8337		goto destroy_internal_cb_pool;
8338	}
8339
8340	mutex_lock(&hdev->mmu_lock);
8341
8342	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8343			hdev->internal_cb_pool_dma_addr,
8344			HOST_SPACE_INTERNAL_CB_SZ);
8345	if (rc)
8346		goto unreserve_internal_cb_pool;
8347
8348	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8349	if (rc)
8350		goto unmap_internal_cb_pool;
8351
8352	mutex_unlock(&hdev->mmu_lock);
8353
8354	return 0;
8355
8356unmap_internal_cb_pool:
8357	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8358			HOST_SPACE_INTERNAL_CB_SZ);
8359unreserve_internal_cb_pool:
8360	mutex_unlock(&hdev->mmu_lock);
8361	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8362			HOST_SPACE_INTERNAL_CB_SZ);
8363destroy_internal_cb_pool:
8364	gen_pool_destroy(hdev->internal_cb_pool);
8365free_internal_cb_pool:
8366	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8367					hdev->internal_cb_pool_dma_addr);
8368
8369	return rc;
8370}
8371
8372static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8373		struct hl_ctx *ctx)
8374{
8375	struct gaudi_device *gaudi = hdev->asic_specific;
8376
8377	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8378		return;
8379
8380	mutex_lock(&hdev->mmu_lock);
8381	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8382			HOST_SPACE_INTERNAL_CB_SZ);
8383	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8384			HOST_SPACE_INTERNAL_CB_SZ);
8385	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8386	mutex_unlock(&hdev->mmu_lock);
8387
8388	gen_pool_destroy(hdev->internal_cb_pool);
8389
8390	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8391					hdev->internal_cb_pool_dma_addr);
8392}
8393
8394static int gaudi_ctx_init(struct hl_ctx *ctx)
8395{
8396	int rc;
8397
8398	if (ctx->asid == HL_KERNEL_ASID_ID)
8399		return 0;
8400
8401	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8402	if (rc)
8403		return rc;
8404
8405	rc = gaudi_restore_user_registers(ctx->hdev);
8406	if (rc)
8407		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8408
8409	return rc;
8410}
8411
8412static void gaudi_ctx_fini(struct hl_ctx *ctx)
8413{
8414	if (ctx->asid == HL_KERNEL_ASID_ID)
8415		return;
8416
8417	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8418}
8419
8420static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8421{
8422	return 0;
8423}
8424
8425static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8426{
8427	return gaudi_cq_assignment[cq_idx];
8428}
8429
8430static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8431{
8432	return sizeof(struct packet_msg_short) +
8433			sizeof(struct packet_msg_prot) * 2;
8434}
8435
8436static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8437{
8438	return sizeof(struct packet_msg_short) * 4 +
8439			sizeof(struct packet_fence) +
8440			sizeof(struct packet_msg_prot) * 2;
8441}
8442
8443static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8444{
8445	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8446}
8447
8448static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8449				u32 size, bool eb)
8450{
8451	struct hl_cb *cb = (struct hl_cb *) data;
8452	struct packet_msg_short *pkt;
8453	u32 value, ctl, pkt_size = sizeof(*pkt);
8454
8455	pkt = cb->kernel_address + size;
8456	memset(pkt, 0, pkt_size);
8457
8458	/* Inc by 1, Mode ADD */
8459	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8460	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8461
8462	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8463	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8464	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8465	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8466	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8467	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8468	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8469
8470	pkt->value = cpu_to_le32(value);
8471	pkt->ctl = cpu_to_le32(ctl);
8472
8473	return size + pkt_size;
8474}
8475
8476static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8477					u16 addr)
8478{
8479	u32 ctl, pkt_size = sizeof(*pkt);
8480
8481	memset(pkt, 0, pkt_size);
8482
8483	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8484	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8485	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8486	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8487	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8488	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8489
8490	pkt->value = cpu_to_le32(value);
8491	pkt->ctl = cpu_to_le32(ctl);
8492
8493	return pkt_size;
8494}
8495
8496static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8497		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8498		u16 sob_val, u16 mon_id)
8499{
8500	u64 monitor_base;
8501	u32 ctl, value, pkt_size = sizeof(*pkt);
8502	u16 msg_addr_offset;
8503	u8 mask;
8504
8505	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8506		dev_err(hdev->dev,
8507			"sob_base %u (mask %#x) is not valid\n",
8508			sob_base, sob_mask);
8509		return 0;
8510	}
8511
8512	/*
8513	 * monitor_base should be the content of the base0 address registers,
8514	 * so it will be added to the msg short offsets
8515	 */
8516	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8517
8518	msg_addr_offset =
8519		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8520				monitor_base;
8521
8522	memset(pkt, 0, pkt_size);
8523
8524	/* Monitor config packet: bind the monitor to a sync object */
8525	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8526	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8527	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8528			0); /* GREATER OR EQUAL*/
8529	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8530
8531	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8532	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8533	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8534	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8535	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8536	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8537	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8538
8539	pkt->value = cpu_to_le32(value);
8540	pkt->ctl = cpu_to_le32(ctl);
8541
8542	return pkt_size;
8543}
8544
8545static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8546{
8547	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8548
8549	memset(pkt, 0, pkt_size);
8550
8551	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8552	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8553	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8554
8555	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8556	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8557	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8558	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8559
8560	pkt->cfg = cpu_to_le32(cfg);
8561	pkt->ctl = cpu_to_le32(ctl);
8562
8563	return pkt_size;
8564}
8565
8566static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8567{
8568	u32 offset, nic_index;
8569
8570	switch (queue_id) {
8571	case GAUDI_QUEUE_ID_DMA_0_0:
8572		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8573		break;
8574	case GAUDI_QUEUE_ID_DMA_0_1:
8575		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8576		break;
8577	case GAUDI_QUEUE_ID_DMA_0_2:
8578		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8579		break;
8580	case GAUDI_QUEUE_ID_DMA_0_3:
8581		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8582		break;
8583	case GAUDI_QUEUE_ID_DMA_1_0:
8584		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8585		break;
8586	case GAUDI_QUEUE_ID_DMA_1_1:
8587		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8588		break;
8589	case GAUDI_QUEUE_ID_DMA_1_2:
8590		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8591		break;
8592	case GAUDI_QUEUE_ID_DMA_1_3:
8593		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8594		break;
8595	case GAUDI_QUEUE_ID_DMA_5_0:
8596		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8597		break;
8598	case GAUDI_QUEUE_ID_DMA_5_1:
8599		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8600		break;
8601	case GAUDI_QUEUE_ID_DMA_5_2:
8602		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8603		break;
8604	case GAUDI_QUEUE_ID_DMA_5_3:
8605		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8606		break;
8607	case GAUDI_QUEUE_ID_TPC_7_0:
8608		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8609		break;
8610	case GAUDI_QUEUE_ID_TPC_7_1:
8611		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8612		break;
8613	case GAUDI_QUEUE_ID_TPC_7_2:
8614		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8615		break;
8616	case GAUDI_QUEUE_ID_TPC_7_3:
8617		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8618		break;
8619	case GAUDI_QUEUE_ID_NIC_0_0:
8620	case GAUDI_QUEUE_ID_NIC_1_0:
8621	case GAUDI_QUEUE_ID_NIC_2_0:
8622	case GAUDI_QUEUE_ID_NIC_3_0:
8623	case GAUDI_QUEUE_ID_NIC_4_0:
8624	case GAUDI_QUEUE_ID_NIC_5_0:
8625	case GAUDI_QUEUE_ID_NIC_6_0:
8626	case GAUDI_QUEUE_ID_NIC_7_0:
8627	case GAUDI_QUEUE_ID_NIC_8_0:
8628	case GAUDI_QUEUE_ID_NIC_9_0:
8629		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8630		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8631				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8632				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8633		break;
8634	case GAUDI_QUEUE_ID_NIC_0_1:
8635	case GAUDI_QUEUE_ID_NIC_1_1:
8636	case GAUDI_QUEUE_ID_NIC_2_1:
8637	case GAUDI_QUEUE_ID_NIC_3_1:
8638	case GAUDI_QUEUE_ID_NIC_4_1:
8639	case GAUDI_QUEUE_ID_NIC_5_1:
8640	case GAUDI_QUEUE_ID_NIC_6_1:
8641	case GAUDI_QUEUE_ID_NIC_7_1:
8642	case GAUDI_QUEUE_ID_NIC_8_1:
8643	case GAUDI_QUEUE_ID_NIC_9_1:
8644		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8645		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8646				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8647				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8648		break;
8649	case GAUDI_QUEUE_ID_NIC_0_2:
8650	case GAUDI_QUEUE_ID_NIC_1_2:
8651	case GAUDI_QUEUE_ID_NIC_2_2:
8652	case GAUDI_QUEUE_ID_NIC_3_2:
8653	case GAUDI_QUEUE_ID_NIC_4_2:
8654	case GAUDI_QUEUE_ID_NIC_5_2:
8655	case GAUDI_QUEUE_ID_NIC_6_2:
8656	case GAUDI_QUEUE_ID_NIC_7_2:
8657	case GAUDI_QUEUE_ID_NIC_8_2:
8658	case GAUDI_QUEUE_ID_NIC_9_2:
8659		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8660		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8661				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8662				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8663		break;
8664	case GAUDI_QUEUE_ID_NIC_0_3:
8665	case GAUDI_QUEUE_ID_NIC_1_3:
8666	case GAUDI_QUEUE_ID_NIC_2_3:
8667	case GAUDI_QUEUE_ID_NIC_3_3:
8668	case GAUDI_QUEUE_ID_NIC_4_3:
8669	case GAUDI_QUEUE_ID_NIC_5_3:
8670	case GAUDI_QUEUE_ID_NIC_6_3:
8671	case GAUDI_QUEUE_ID_NIC_7_3:
8672	case GAUDI_QUEUE_ID_NIC_8_3:
8673	case GAUDI_QUEUE_ID_NIC_9_3:
8674		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8675		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8676				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8677				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8678		break;
8679	default:
8680		return -EINVAL;
8681	}
8682
8683	*addr = CFG_BASE + offset;
8684
8685	return 0;
8686}
8687
8688static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8689{
8690	u64 monitor_base;
8691	u32 size = 0;
8692	u16 msg_addr_offset;
8693
8694	/*
8695	 * monitor_base should be the content of the base0 address registers,
8696	 * so it will be added to the msg short offsets
8697	 */
8698	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8699
8700	/* First monitor config packet: low address of the sync */
8701	msg_addr_offset =
8702		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8703				monitor_base;
8704
8705	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8706					msg_addr_offset);
8707
8708	/* Second monitor config packet: high address of the sync */
8709	msg_addr_offset =
8710		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8711				monitor_base;
8712
8713	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8714					msg_addr_offset);
8715
8716	/*
8717	 * Third monitor config packet: the payload, i.e. what to write when the
8718	 * sync triggers
8719	 */
8720	msg_addr_offset =
8721		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8722				monitor_base;
8723
8724	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8725
8726	return size;
8727}
8728
8729static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8730				struct hl_gen_wait_properties *prop)
8731{
8732	struct hl_cb *cb = (struct hl_cb *) prop->data;
8733	void *buf = cb->kernel_address;
8734	u64 fence_addr = 0;
8735	u32 size = prop->size;
8736
8737	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8738		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8739				prop->q_idx);
8740		return 0;
8741	}
8742
8743	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8744	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8745			prop->sob_mask, prop->sob_val, prop->mon_id);
8746	size += gaudi_add_fence_pkt(buf + size);
8747
8748	return size;
8749}
8750
8751static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8752{
8753	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8754
8755	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8756		hw_sob->sob_id);
8757
8758	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8759			hw_sob->sob_id * 4, 0);
8760
8761	kref_init(&hw_sob->kref);
8762}
8763
8764static u64 gaudi_get_device_time(struct hl_device *hdev)
8765{
8766	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8767
8768	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8769}
8770
8771static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8772				u32 *block_size, u32 *block_id)
8773{
8774	return -EPERM;
8775}
8776
8777static int gaudi_block_mmap(struct hl_device *hdev,
8778				struct vm_area_struct *vma,
8779				u32 block_id, u32 block_size)
8780{
8781	return -EPERM;
8782}
8783
8784static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8785{
8786	struct cpu_dyn_regs *dyn_regs =
8787			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8788	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8789			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8790			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8791
8792	WREG32(irq_handler_offset,
8793		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8794}
8795
8796static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8797{
8798	return -EINVAL;
8799}
8800
8801static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8802{
8803	switch (pll_idx) {
8804	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8805	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8806	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8807	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8808	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8809	case HL_GAUDI_MME_PLL: return MME_PLL;
8810	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8811	case HL_GAUDI_IF_PLL: return IF_PLL;
8812	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8813	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8814	default: return -EINVAL;
8815	}
8816}
8817
8818static int gaudi_add_sync_to_engine_map_entry(
8819	struct hl_sync_to_engine_map *map, u32 reg_value,
8820	enum hl_sync_engine_type engine_type, u32 engine_id)
8821{
8822	struct hl_sync_to_engine_map_entry *entry;
8823
8824	/* Reg value represents a partial address of sync object,
8825	 * it is used as unique identifier. For this we need to
8826	 * clear the cutoff cfg base bits from the value.
8827	 */
8828	if (reg_value == 0 || reg_value == 0xffffffff)
8829		return 0;
8830	reg_value -= lower_32_bits(CFG_BASE);
8831
8832	/* create a new hash entry */
8833	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8834	if (!entry)
8835		return -ENOMEM;
8836	entry->engine_type = engine_type;
8837	entry->engine_id = engine_id;
8838	entry->sync_id = reg_value;
8839	hash_add(map->tb, &entry->node, reg_value);
8840
8841	return 0;
8842}
8843
8844static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8845				struct hl_sync_to_engine_map *map)
8846{
8847	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8848	int i, j, rc;
8849	u32 reg_value;
8850
8851	/* Iterate over TPC engines */
8852	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8853
8854		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8855					sds->props[SP_NEXT_TPC] * i);
8856
8857		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8858							ENGINE_TPC, i);
8859		if (rc)
8860			goto free_sync_to_engine_map;
8861	}
8862
8863	/* Iterate over MME engines */
8864	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8865		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8866
8867			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8868						sds->props[SP_NEXT_MME] * i +
8869						j * sizeof(u32));
8870
8871			rc = gaudi_add_sync_to_engine_map_entry(
8872				map, reg_value, ENGINE_MME,
8873				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8874			if (rc)
8875				goto free_sync_to_engine_map;
8876		}
8877	}
8878
8879	/* Iterate over DMA engines */
8880	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8881		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8882					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8883		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8884							ENGINE_DMA, i);
8885		if (rc)
8886			goto free_sync_to_engine_map;
8887	}
8888
8889	return 0;
8890
8891free_sync_to_engine_map:
8892	hl_state_dump_free_sync_to_engine_map(map);
8893
8894	return rc;
8895}
8896
8897static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8898{
8899	return FIELD_GET(
8900		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8901		mon->status);
8902}
8903
8904static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8905{
8906	const size_t max_write = 10;
8907	u32 gid, mask, sob;
8908	int i, offset;
8909
8910	/* Sync object ID is calculated as follows:
8911	 * (8 * group_id + cleared bits in mask)
8912	 */
8913	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8914			mon->arm_data);
8915	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8916			mon->arm_data);
8917
8918	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8919		max_write; mask >>= 1, i++) {
8920		if (!(mask & 1)) {
8921			sob = gid * MONITOR_MAX_SOBS + i;
8922
8923			if (offset > 0)
8924				offset += snprintf(sobs + offset, max_write,
8925							", ");
8926
8927			offset += snprintf(sobs + offset, max_write, "%u", sob);
8928		}
8929	}
8930}
8931
8932static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8933				struct hl_device *hdev,
8934				struct hl_mon_state_dump *mon)
8935{
8936	const char *name;
8937	char scratch_buf1[BIN_REG_STRING_SIZE],
8938		scratch_buf2[BIN_REG_STRING_SIZE];
8939	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8940
8941	name = hl_state_dump_get_monitor_name(hdev, mon);
8942	if (!name)
8943		name = "";
8944
8945	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8946
8947	return hl_snprintf_resize(
8948		buf, size, offset,
8949		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8950		mon->id, name,
8951		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8952				mon->arm_data),
8953		hl_format_as_binary(
8954			scratch_buf1, sizeof(scratch_buf1),
8955			FIELD_GET(
8956				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8957				mon->arm_data)),
8958		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8959				mon->arm_data),
8960		mon->wr_data,
8961		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8962		hl_format_as_binary(
8963			scratch_buf2, sizeof(scratch_buf2),
8964			FIELD_GET(
8965				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8966				mon->status)),
8967		monitored_sobs);
8968}
8969
8970
8971static int gaudi_print_fences_single_engine(
8972	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8973	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8974	size_t *size, size_t *offset)
8975{
8976	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8977	int rc = -ENOMEM, i;
8978	u32 *statuses, *fences;
8979
8980	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8981			sizeof(*statuses), GFP_KERNEL);
8982	if (!statuses)
8983		goto out;
8984
8985	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8986				sds->props[SP_ENGINE_NUM_OF_QUEUES],
8987			 sizeof(*fences), GFP_KERNEL);
8988	if (!fences)
8989		goto free_status;
8990
8991	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8992		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8993
8994	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8995				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8996		fences[i] = RREG32(base_offset + i * sizeof(u32));
8997
8998	/* The actual print */
8999	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9000		u32 fence_id;
9001		u64 fence_cnt, fence_rdata;
9002		const char *engine_name;
9003
9004		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9005			statuses[i]))
9006			continue;
9007
9008		fence_id =
9009			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9010		fence_cnt = base_offset + CFG_BASE +
9011			sizeof(u32) *
9012			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9013		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9014				sds->props[SP_FENCE0_RDATA_OFFSET];
9015		engine_name = hl_sync_engine_to_string(engine_type);
9016
9017		rc = hl_snprintf_resize(
9018			buf, size, offset,
9019			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9020			engine_name, engine_id,
9021			i, fence_id,
9022			fence_cnt, engine_name, engine_id, fence_id, i,
9023			fence_rdata, engine_name, engine_id, fence_id, i,
9024			fences[fence_id],
9025			statuses[i]);
9026		if (rc)
9027			goto free_fences;
9028	}
9029
9030	rc = 0;
9031
9032free_fences:
9033	kfree(fences);
9034free_status:
9035	kfree(statuses);
9036out:
9037	return rc;
9038}
9039
9040
9041static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9042	.monitor_valid = gaudi_monitor_valid,
9043	.print_single_monitor = gaudi_print_single_monitor,
9044	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9045	.print_fences_single_engine = gaudi_print_fences_single_engine,
9046};
9047
9048static void gaudi_state_dump_init(struct hl_device *hdev)
9049{
9050	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9051	int i;
9052
9053	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9054		hash_add(sds->so_id_to_str_tb,
9055			&gaudi_so_id_to_str[i].node,
9056			gaudi_so_id_to_str[i].id);
9057
9058	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9059		hash_add(sds->monitor_id_to_str_tb,
9060			&gaudi_monitor_id_to_str[i].node,
9061			gaudi_monitor_id_to_str[i].id);
9062
9063	sds->props = gaudi_state_dump_specs_props;
9064
9065	sds->sync_namager_names = gaudi_sync_manager_names;
9066
9067	sds->funcs = gaudi_state_dump_funcs;
9068}
9069
9070static u32 *gaudi_get_stream_master_qid_arr(void)
9071{
9072	return gaudi_stream_master;
9073}
9074
9075static int gaudi_set_dram_properties(struct hl_device *hdev)
9076{
9077	return 0;
9078}
9079
9080static int gaudi_set_binning_masks(struct hl_device *hdev)
9081{
9082	return 0;
9083}
9084
9085static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9086{
9087}
9088
9089static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9090{
9091	struct hl_device *hdev = dev_get_drvdata(dev);
9092	struct cpucp_info *cpucp_info;
9093
9094	cpucp_info = &hdev->asic_prop.cpucp_info;
9095
9096	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9097}
9098
9099static DEVICE_ATTR_RO(infineon_ver);
9100
9101static struct attribute *gaudi_vrm_dev_attrs[] = {
9102	&dev_attr_infineon_ver.attr,
9103	NULL,
9104};
9105
9106static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9107					struct attribute_group *dev_vrm_attr_grp)
9108{
9109	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9110	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9111}
9112
9113static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9114{
9115	return 0;
9116}
9117
9118static const struct hl_asic_funcs gaudi_funcs = {
9119	.early_init = gaudi_early_init,
9120	.early_fini = gaudi_early_fini,
9121	.late_init = gaudi_late_init,
9122	.late_fini = gaudi_late_fini,
9123	.sw_init = gaudi_sw_init,
9124	.sw_fini = gaudi_sw_fini,
9125	.hw_init = gaudi_hw_init,
9126	.hw_fini = gaudi_hw_fini,
9127	.halt_engines = gaudi_halt_engines,
9128	.suspend = gaudi_suspend,
9129	.resume = gaudi_resume,
9130	.mmap = gaudi_mmap,
9131	.ring_doorbell = gaudi_ring_doorbell,
9132	.pqe_write = gaudi_pqe_write,
9133	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9134	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9135	.scrub_device_mem = gaudi_scrub_device_mem,
9136	.scrub_device_dram = gaudi_scrub_device_dram,
9137	.get_int_queue_base = gaudi_get_int_queue_base,
9138	.test_queues = gaudi_test_queues,
9139	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9140	.asic_dma_pool_free = gaudi_dma_pool_free,
9141	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9142	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9143	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9144	.cs_parser = gaudi_cs_parser,
9145	.asic_dma_map_sgtable = hl_dma_map_sgtable,
9146	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9147	.update_eq_ci = gaudi_update_eq_ci,
9148	.context_switch = gaudi_context_switch,
9149	.restore_phase_topology = gaudi_restore_phase_topology,
9150	.debugfs_read_dma = gaudi_debugfs_read_dma,
9151	.add_device_attr = gaudi_add_device_attr,
9152	.handle_eqe = gaudi_handle_eqe,
9153	.get_events_stat = gaudi_get_events_stat,
9154	.read_pte = gaudi_read_pte,
9155	.write_pte = gaudi_write_pte,
9156	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9157	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9158	.mmu_prefetch_cache_range = NULL,
9159	.send_heartbeat = gaudi_send_heartbeat,
9160	.debug_coresight = gaudi_debug_coresight,
9161	.is_device_idle = gaudi_is_device_idle,
9162	.compute_reset_late_init = gaudi_compute_reset_late_init,
9163	.hw_queues_lock = gaudi_hw_queues_lock,
9164	.hw_queues_unlock = gaudi_hw_queues_unlock,
9165	.get_pci_id = gaudi_get_pci_id,
9166	.get_eeprom_data = gaudi_get_eeprom_data,
9167	.get_monitor_dump = gaudi_get_monitor_dump,
9168	.send_cpu_message = gaudi_send_cpu_message,
9169	.pci_bars_map = gaudi_pci_bars_map,
9170	.init_iatu = gaudi_init_iatu,
9171	.rreg = hl_rreg,
9172	.wreg = hl_wreg,
9173	.halt_coresight = gaudi_halt_coresight,
9174	.ctx_init = gaudi_ctx_init,
9175	.ctx_fini = gaudi_ctx_fini,
9176	.pre_schedule_cs = gaudi_pre_schedule_cs,
9177	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9178	.load_firmware_to_device = gaudi_load_firmware_to_device,
9179	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9180	.get_signal_cb_size = gaudi_get_signal_cb_size,
9181	.get_wait_cb_size = gaudi_get_wait_cb_size,
9182	.gen_signal_cb = gaudi_gen_signal_cb,
9183	.gen_wait_cb = gaudi_gen_wait_cb,
9184	.reset_sob = gaudi_reset_sob,
9185	.reset_sob_group = gaudi_reset_sob_group,
9186	.get_device_time = gaudi_get_device_time,
9187	.pb_print_security_errors = NULL,
9188	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9189	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9190	.get_dec_base_addr = NULL,
9191	.scramble_addr = hl_mmu_scramble_addr,
9192	.descramble_addr = hl_mmu_descramble_addr,
9193	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9194	.get_hw_block_id = gaudi_get_hw_block_id,
9195	.hw_block_mmap = gaudi_block_mmap,
9196	.enable_events_from_fw = gaudi_enable_events_from_fw,
9197	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9198	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9199	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9200	.init_firmware_loader = gaudi_init_firmware_loader,
9201	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9202	.state_dump_init = gaudi_state_dump_init,
9203	.get_sob_addr = gaudi_get_sob_addr,
9204	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9205	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9206	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9207	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9208	.access_dev_mem = hl_access_dev_mem,
9209	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9210	.send_device_activity = gaudi_send_device_activity,
9211	.set_dram_properties = gaudi_set_dram_properties,
9212	.set_binning_masks = gaudi_set_binning_masks,
9213};
9214
9215/**
9216 * gaudi_set_asic_funcs - set GAUDI function pointers
9217 *
9218 * @hdev: pointer to hl_device structure
9219 *
9220 */
9221void gaudi_set_asic_funcs(struct hl_device *hdev)
9222{
9223	hdev->asic_funcs = &gaudi_funcs;
9224}
9225