1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/genalloc.h>
21 #include <linux/io-64-nonatomic-lo-hi.h>
22 #include <linux/iommu.h>
23 #include <linux/seq_file.h>
24 
25 /*
26  * Gaudi security scheme:
27  *
28  * 1. Host is protected by:
29  *        - Range registers
30  *        - MMU
31  *
32  * 2. DDR is protected by:
33  *        - Range registers (protect the first 512MB)
34  *
35  * 3. Configuration is protected by:
36  *        - Range registers
37  *        - Protection bits
38  *
39  * MMU is always enabled.
40  *
41  * QMAN DMA channels 0,1,5 (PCI DMAN):
42  *     - DMA is not secured.
43  *     - PQ and CQ are secured.
44  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
45  *                      because of TDMA (tensor DMA). Hence, WREG is always not
46  *                      secured.
47  *
48  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49  * channel 0 to be secured, execute the DMA and change it back to not secured.
50  * Currently, the driver doesn't use the DMA while there are compute jobs
51  * running.
52  *
53  * The current use cases for the driver to use the DMA are:
54  *     - Clear SRAM on context switch (happens on context switch when device is
55  *       idle)
56  *     - MMU page tables area clear (happens on init)
57  *
58  * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60  * CQ, CP and the engine are not secured
61  *
62  */
63 
64 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
65 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
66 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
67 
68 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
69 
70 #define GAUDI_RESET_TIMEOUT_MSEC	1000		/* 1000ms */
71 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
72 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
73 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
74 
75 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
76 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
77 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
78 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
79 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
80 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
81 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	1000000		/* 1s */
82 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
83 
84 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
85 
86 #define GAUDI_MAX_STRING_LEN		20
87 
88 #define GAUDI_CB_POOL_CB_CNT		512
89 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
90 
91 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
92 
93 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
94 
95 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
96 
97 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
98 
99 #define GAUDI_ARB_WDT_TIMEOUT		0x1000000
100 
101 #define GAUDI_CLK_GATE_DEBUGFS_MASK	(\
102 		BIT(GAUDI_ENGINE_ID_MME_0) |\
103 		BIT(GAUDI_ENGINE_ID_MME_2) |\
104 		GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105 
106 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110 		"gaudi cpu eq"
111 };
112 
113 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
114 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116 	[GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
122 };
123 
124 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
126 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
127 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
128 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
129 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
130 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
131 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
132 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
133 	[8] = GAUDI_QUEUE_ID_DMA_5_0,
134 	[9] = GAUDI_QUEUE_ID_DMA_5_1,
135 	[10] = GAUDI_QUEUE_ID_DMA_5_2,
136 	[11] = GAUDI_QUEUE_ID_DMA_5_3
137 };
138 
139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
141 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
142 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
143 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
144 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
145 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
146 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
147 	[PACKET_FENCE]		= sizeof(struct packet_fence),
148 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
149 	[PACKET_NOP]		= sizeof(struct packet_nop),
150 	[PACKET_STOP]		= sizeof(struct packet_stop),
151 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
152 	[PACKET_WAIT]		= sizeof(struct packet_wait),
153 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
154 };
155 
validate_packet_id(enum packet_id id)156 static inline bool validate_packet_id(enum packet_id id)
157 {
158 	switch (id) {
159 	case PACKET_WREG_32:
160 	case PACKET_WREG_BULK:
161 	case PACKET_MSG_LONG:
162 	case PACKET_MSG_SHORT:
163 	case PACKET_CP_DMA:
164 	case PACKET_REPEAT:
165 	case PACKET_MSG_PROT:
166 	case PACKET_FENCE:
167 	case PACKET_LIN_DMA:
168 	case PACKET_NOP:
169 	case PACKET_STOP:
170 	case PACKET_ARB_POINT:
171 	case PACKET_WAIT:
172 	case PACKET_LOAD_AND_EXE:
173 		return true;
174 	default:
175 		return false;
176 	}
177 }
178 
179 static const char * const
180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181 	"tpc_address_exceed_slm",
182 	"tpc_div_by_0",
183 	"tpc_spu_mac_overflow",
184 	"tpc_spu_addsub_overflow",
185 	"tpc_spu_abs_overflow",
186 	"tpc_spu_fp_dst_nan_inf",
187 	"tpc_spu_fp_dst_denorm",
188 	"tpc_vpu_mac_overflow",
189 	"tpc_vpu_addsub_overflow",
190 	"tpc_vpu_abs_overflow",
191 	"tpc_vpu_fp_dst_nan_inf",
192 	"tpc_vpu_fp_dst_denorm",
193 	"tpc_assertions",
194 	"tpc_illegal_instruction",
195 	"tpc_pc_wrap_around",
196 	"tpc_qm_sw_err",
197 	"tpc_hbw_rresp_err",
198 	"tpc_hbw_bresp_err",
199 	"tpc_lbw_rresp_err",
200 	"tpc_lbw_bresp_err"
201 };
202 
203 static const char * const
204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205 	"PQ AXI HBW error",
206 	"CQ AXI HBW error",
207 	"CP AXI HBW error",
208 	"CP error due to undefined OPCODE",
209 	"CP encountered STOP OPCODE",
210 	"CP AXI LBW error",
211 	"CP WRREG32 or WRBULK returned error",
212 	"N/A",
213 	"FENCE 0 inc over max value and clipped",
214 	"FENCE 1 inc over max value and clipped",
215 	"FENCE 2 inc over max value and clipped",
216 	"FENCE 3 inc over max value and clipped",
217 	"FENCE 0 dec under min value and clipped",
218 	"FENCE 1 dec under min value and clipped",
219 	"FENCE 2 dec under min value and clipped",
220 	"FENCE 3 dec under min value and clipped"
221 };
222 
223 static const char * const
224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225 	"Choice push while full error",
226 	"Choice Q watchdog error",
227 	"MSG AXI LBW returned with error"
228 };
229 
230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_0 */
305 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_1 */
306 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_2 */
307 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_3 */
308 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_0 */
309 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_1 */
310 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_2 */
311 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_3 */
312 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_0 */
313 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_1 */
314 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_2 */
315 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_3 */
316 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_0 */
317 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_1 */
318 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_2 */
319 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_3 */
320 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_0 */
321 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_1 */
322 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_2 */
323 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_3 */
324 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_0 */
325 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_1 */
326 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_2 */
327 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_3 */
328 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_0 */
329 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_1 */
330 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_2 */
331 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_3 */
332 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_0 */
333 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_1 */
334 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_2 */
335 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_3 */
336 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_0 */
337 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_1 */
338 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_2 */
339 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_3 */
340 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_0 */
341 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_1 */
342 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_2 */
343 	QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_3 */
344 };
345 
346 struct ecc_info_extract_params {
347 	u64 block_address;
348 	u32 num_memories;
349 	bool derr;
350 	bool disable_clock_gating;
351 };
352 
353 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354 								u64 phys_addr);
355 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356 					struct hl_cs_job *job);
357 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358 					u32 size, u64 val);
359 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360 				u32 tpc_id);
361 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362 static int gaudi_cpucp_info_get(struct hl_device *hdev);
363 static void gaudi_disable_clock_gating(struct hl_device *hdev);
364 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365 
gaudi_get_fixed_properties(struct hl_device *hdev)366 static int gaudi_get_fixed_properties(struct hl_device *hdev)
367 {
368 	struct asic_fixed_properties *prop = &hdev->asic_prop;
369 	u32 num_sync_stream_queues = 0;
370 	int i;
371 
372 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373 	prop->hw_queues_props = kcalloc(prop->max_queues,
374 			sizeof(struct hw_queue_properties),
375 			GFP_KERNEL);
376 
377 	if (!prop->hw_queues_props)
378 		return -ENOMEM;
379 
380 	for (i = 0 ; i < prop->max_queues ; i++) {
381 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383 			prop->hw_queues_props[i].driver_only = 0;
384 			prop->hw_queues_props[i].requires_kernel_cb = 1;
385 			prop->hw_queues_props[i].supports_sync_stream = 1;
386 			num_sync_stream_queues++;
387 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389 			prop->hw_queues_props[i].driver_only = 1;
390 			prop->hw_queues_props[i].requires_kernel_cb = 0;
391 			prop->hw_queues_props[i].supports_sync_stream = 0;
392 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394 			prop->hw_queues_props[i].driver_only = 0;
395 			prop->hw_queues_props[i].requires_kernel_cb = 0;
396 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397 			prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398 			prop->hw_queues_props[i].driver_only = 0;
399 			prop->hw_queues_props[i].requires_kernel_cb = 0;
400 			prop->hw_queues_props[i].supports_sync_stream = 0;
401 		}
402 	}
403 
404 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
405 	prop->sync_stream_first_sob = 0;
406 	prop->sync_stream_first_mon = 0;
407 	prop->dram_base_address = DRAM_PHYS_BASE;
408 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
409 	prop->dram_end_address = prop->dram_base_address +
410 					prop->dram_size;
411 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412 
413 	prop->sram_base_address = SRAM_BASE_ADDR;
414 	prop->sram_size = SRAM_SIZE;
415 	prop->sram_end_address = prop->sram_base_address +
416 					prop->sram_size;
417 	prop->sram_user_base_address = prop->sram_base_address +
418 					SRAM_USER_BASE_OFFSET;
419 
420 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421 	if (hdev->pldm)
422 		prop->mmu_pgt_size = 0x800000; /* 8MB */
423 	else
424 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425 	prop->mmu_pte_size = HL_PTE_SIZE;
426 	prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427 	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428 	prop->dram_page_size = PAGE_SIZE_2MB;
429 
430 	prop->pmmu.hop0_shift = HOP0_SHIFT;
431 	prop->pmmu.hop1_shift = HOP1_SHIFT;
432 	prop->pmmu.hop2_shift = HOP2_SHIFT;
433 	prop->pmmu.hop3_shift = HOP3_SHIFT;
434 	prop->pmmu.hop4_shift = HOP4_SHIFT;
435 	prop->pmmu.hop0_mask = HOP0_MASK;
436 	prop->pmmu.hop1_mask = HOP1_MASK;
437 	prop->pmmu.hop2_mask = HOP2_MASK;
438 	prop->pmmu.hop3_mask = HOP3_MASK;
439 	prop->pmmu.hop4_mask = HOP4_MASK;
440 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
441 	prop->pmmu.end_addr =
442 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443 	prop->pmmu.page_size = PAGE_SIZE_4KB;
444 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
445 
446 	/* PMMU and HPMMU are the same except of page size */
447 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449 
450 	/* shifts and masks are the same in PMMU and DMMU */
451 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
454 	prop->dmmu.page_size = PAGE_SIZE_2MB;
455 
456 	prop->cfg_size = CFG_SIZE;
457 	prop->max_asid = MAX_ASID;
458 	prop->num_of_events = GAUDI_EVENT_SIZE;
459 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460 
461 	prop->max_power_default = MAX_POWER_DEFAULT_PCI;
462 
463 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465 
466 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468 
469 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
470 					CARD_NAME_MAX_LEN);
471 
472 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473 
474 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475 			num_sync_stream_queues * HL_RSVD_SOBS;
476 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477 			num_sync_stream_queues * HL_RSVD_MONS;
478 
479 	return 0;
480 }
481 
gaudi_pci_bars_map(struct hl_device *hdev)482 static int gaudi_pci_bars_map(struct hl_device *hdev)
483 {
484 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
485 	bool is_wc[3] = {false, false, true};
486 	int rc;
487 
488 	rc = hl_pci_bars_map(hdev, name, is_wc);
489 	if (rc)
490 		return rc;
491 
492 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
494 
495 	return 0;
496 }
497 
gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)498 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499 {
500 	struct gaudi_device *gaudi = hdev->asic_specific;
501 	struct hl_inbound_pci_region pci_region;
502 	u64 old_addr = addr;
503 	int rc;
504 
505 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506 		return old_addr;
507 
508 	/* Inbound Region 2 - Bar 4 - Point to HBM */
509 	pci_region.mode = PCI_BAR_MATCH_MODE;
510 	pci_region.bar = HBM_BAR_ID;
511 	pci_region.addr = addr;
512 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
513 	if (rc)
514 		return U64_MAX;
515 
516 	if (gaudi) {
517 		old_addr = gaudi->hbm_bar_cur_addr;
518 		gaudi->hbm_bar_cur_addr = addr;
519 	}
520 
521 	return old_addr;
522 }
523 
gaudi_init_iatu(struct hl_device *hdev)524 static int gaudi_init_iatu(struct hl_device *hdev)
525 {
526 	struct hl_inbound_pci_region inbound_region;
527 	struct hl_outbound_pci_region outbound_region;
528 	int rc;
529 
530 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531 	inbound_region.mode = PCI_BAR_MATCH_MODE;
532 	inbound_region.bar = SRAM_BAR_ID;
533 	inbound_region.addr = SRAM_BASE_ADDR;
534 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535 	if (rc)
536 		goto done;
537 
538 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
539 	inbound_region.mode = PCI_BAR_MATCH_MODE;
540 	inbound_region.bar = CFG_BAR_ID;
541 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
542 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
543 	if (rc)
544 		goto done;
545 
546 	/* Inbound Region 2 - Bar 4 - Point to HBM */
547 	inbound_region.mode = PCI_BAR_MATCH_MODE;
548 	inbound_region.bar = HBM_BAR_ID;
549 	inbound_region.addr = DRAM_PHYS_BASE;
550 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551 	if (rc)
552 		goto done;
553 
554 	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555 
556 	/* Outbound Region 0 - Point to Host */
557 	outbound_region.addr = HOST_PHYS_BASE;
558 	outbound_region.size = HOST_PHYS_SIZE;
559 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560 
561 done:
562 	return rc;
563 }
564 
gaudi_early_init(struct hl_device *hdev)565 static int gaudi_early_init(struct hl_device *hdev)
566 {
567 	struct asic_fixed_properties *prop = &hdev->asic_prop;
568 	struct pci_dev *pdev = hdev->pdev;
569 	int rc;
570 
571 	rc = gaudi_get_fixed_properties(hdev);
572 	if (rc) {
573 		dev_err(hdev->dev, "Failed to get fixed properties\n");
574 		return rc;
575 	}
576 
577 	/* Check BAR sizes */
578 	if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579 		dev_err(hdev->dev,
580 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581 			SRAM_BAR_ID,
582 			(unsigned long long) pci_resource_len(pdev,
583 							SRAM_BAR_ID),
584 			SRAM_BAR_SIZE);
585 		rc = -ENODEV;
586 		goto free_queue_props;
587 	}
588 
589 	if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590 		dev_err(hdev->dev,
591 			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592 			CFG_BAR_ID,
593 			(unsigned long long) pci_resource_len(pdev,
594 								CFG_BAR_ID),
595 			CFG_BAR_SIZE);
596 		rc = -ENODEV;
597 		goto free_queue_props;
598 	}
599 
600 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601 
602 	rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
603 			mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
604 	if (rc)
605 		goto free_queue_props;
606 
607 	/* GAUDI Firmware does not yet support security */
608 	prop->fw_security_disabled = true;
609 	dev_info(hdev->dev, "firmware-level security is disabled\n");
610 
611 	return 0;
612 
613 free_queue_props:
614 	kfree(hdev->asic_prop.hw_queues_props);
615 	return rc;
616 }
617 
gaudi_early_fini(struct hl_device *hdev)618 static int gaudi_early_fini(struct hl_device *hdev)
619 {
620 	kfree(hdev->asic_prop.hw_queues_props);
621 	hl_pci_fini(hdev);
622 
623 	return 0;
624 }
625 
626 /**
627  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
628  *
629  * @hdev: pointer to hl_device structure
630  *
631  */
gaudi_fetch_psoc_frequency(struct hl_device *hdev)632 static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
633 {
634 	struct asic_fixed_properties *prop = &hdev->asic_prop;
635 	u32 trace_freq = 0;
636 	u32 pll_clk = 0;
637 	u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
638 	u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
639 	u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
640 	u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
641 	u32 od = RREG32(mmPSOC_CPU_PLL_OD);
642 
643 	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
644 		if (div_sel == DIV_SEL_REF_CLK)
645 			trace_freq = PLL_REF_CLK;
646 		else
647 			trace_freq = PLL_REF_CLK / (div_fctr + 1);
648 	} else if (div_sel == DIV_SEL_PLL_CLK ||
649 					div_sel == DIV_SEL_DIVIDED_PLL) {
650 		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
651 		if (div_sel == DIV_SEL_PLL_CLK)
652 			trace_freq = pll_clk;
653 		else
654 			trace_freq = pll_clk / (div_fctr + 1);
655 	} else {
656 		dev_warn(hdev->dev,
657 			"Received invalid div select value: %d", div_sel);
658 	}
659 
660 	prop->psoc_timestamp_frequency = trace_freq;
661 	prop->psoc_pci_pll_nr = nr;
662 	prop->psoc_pci_pll_nf = nf;
663 	prop->psoc_pci_pll_od = od;
664 	prop->psoc_pci_pll_div_factor = div_fctr;
665 }
666 
_gaudi_init_tpc_mem(struct hl_device *hdev, dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)667 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
668 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
669 {
670 	struct asic_fixed_properties *prop = &hdev->asic_prop;
671 	struct packet_lin_dma *init_tpc_mem_pkt;
672 	struct hl_cs_job *job;
673 	struct hl_cb *cb;
674 	u64 dst_addr;
675 	u32 cb_size, ctl;
676 	u8 tpc_id;
677 	int rc;
678 
679 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
680 	if (!cb)
681 		return -EFAULT;
682 
683 	init_tpc_mem_pkt = cb->kernel_address;
684 	cb_size = sizeof(*init_tpc_mem_pkt);
685 	memset(init_tpc_mem_pkt, 0, cb_size);
686 
687 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
688 
689 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
690 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
691 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
692 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
693 
694 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
695 
696 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
697 	dst_addr = (prop->sram_user_base_address &
698 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
699 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
700 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
701 
702 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
703 	if (!job) {
704 		dev_err(hdev->dev, "Failed to allocate a new job\n");
705 		rc = -ENOMEM;
706 		goto release_cb;
707 	}
708 
709 	job->id = 0;
710 	job->user_cb = cb;
711 	job->user_cb->cs_cnt++;
712 	job->user_cb_size = cb_size;
713 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
714 	job->patched_cb = job->user_cb;
715 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
716 
717 	hl_debugfs_add_job(hdev, job);
718 
719 	rc = gaudi_send_job_on_qman0(hdev, job);
720 
721 	if (rc)
722 		goto free_job;
723 
724 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
725 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
726 		if (rc)
727 			break;
728 	}
729 
730 free_job:
731 	hl_userptr_delete_list(hdev, &job->userptr_list);
732 	hl_debugfs_remove_job(hdev, job);
733 	kfree(job);
734 	cb->cs_cnt--;
735 
736 release_cb:
737 	hl_cb_put(cb);
738 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
739 
740 	return rc;
741 }
742 
743 /*
744  * gaudi_init_tpc_mem() - Initialize TPC memories.
745  * @hdev: Pointer to hl_device structure.
746  *
747  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
748  *
749  * Return: 0 for success, negative value for error.
750  */
gaudi_init_tpc_mem(struct hl_device *hdev)751 static int gaudi_init_tpc_mem(struct hl_device *hdev)
752 {
753 	const struct firmware *fw;
754 	size_t fw_size;
755 	void *cpu_addr;
756 	dma_addr_t dma_handle;
757 	int rc, count = 5;
758 
759 again:
760 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
761 	if (rc == -EINTR && count-- > 0) {
762 		msleep(50);
763 		goto again;
764 	}
765 
766 	if (rc) {
767 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
768 				GAUDI_TPC_FW_FILE);
769 		goto out;
770 	}
771 
772 	fw_size = fw->size;
773 	cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
774 			&dma_handle, GFP_KERNEL | __GFP_ZERO);
775 	if (!cpu_addr) {
776 		dev_err(hdev->dev,
777 			"Failed to allocate %zu of dma memory for TPC kernel\n",
778 			fw_size);
779 		rc = -ENOMEM;
780 		goto out;
781 	}
782 
783 	memcpy(cpu_addr, fw->data, fw_size);
784 
785 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
786 
787 	hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
788 			dma_handle);
789 
790 out:
791 	release_firmware(fw);
792 	return rc;
793 }
794 
gaudi_late_init(struct hl_device *hdev)795 static int gaudi_late_init(struct hl_device *hdev)
796 {
797 	struct gaudi_device *gaudi = hdev->asic_specific;
798 	int rc;
799 
800 	rc = gaudi->cpucp_info_get(hdev);
801 	if (rc) {
802 		dev_err(hdev->dev, "Failed to get cpucp info\n");
803 		return rc;
804 	}
805 
806 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
807 	if (rc) {
808 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
809 		return rc;
810 	}
811 
812 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
813 
814 	gaudi_fetch_psoc_frequency(hdev);
815 
816 	rc = gaudi_mmu_clear_pgt_range(hdev);
817 	if (rc) {
818 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
819 		goto disable_pci_access;
820 	}
821 
822 	rc = gaudi_init_tpc_mem(hdev);
823 	if (rc) {
824 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
825 		goto disable_pci_access;
826 	}
827 
828 	return 0;
829 
830 disable_pci_access:
831 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
832 
833 	return rc;
834 }
835 
gaudi_late_fini(struct hl_device *hdev)836 static void gaudi_late_fini(struct hl_device *hdev)
837 {
838 	const struct hwmon_channel_info **channel_info_arr;
839 	int i = 0;
840 
841 	if (!hdev->hl_chip_info->info)
842 		return;
843 
844 	channel_info_arr = hdev->hl_chip_info->info;
845 
846 	while (channel_info_arr[i]) {
847 		kfree(channel_info_arr[i]->config);
848 		kfree(channel_info_arr[i]);
849 		i++;
850 	}
851 
852 	kfree(channel_info_arr);
853 
854 	hdev->hl_chip_info->info = NULL;
855 }
856 
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)857 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
858 {
859 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
860 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
861 	int i, j, rc = 0;
862 
863 	/*
864 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
865 	 * to '1' when accessing the host.
866 	 * Bits 49:39 of the full host address are saved for a later
867 	 * configuration of the HW to perform extension to 50 bits.
868 	 * Because there is a single HW register that holds the extension bits,
869 	 * these bits must be identical in all allocated range.
870 	 */
871 
872 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
873 		virt_addr_arr[i] =
874 			hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
875 						HL_CPU_ACCESSIBLE_MEM_SIZE,
876 						&dma_addr_arr[i],
877 						GFP_KERNEL | __GFP_ZERO);
878 		if (!virt_addr_arr[i]) {
879 			rc = -ENOMEM;
880 			goto free_dma_mem_arr;
881 		}
882 
883 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
884 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
885 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
886 			break;
887 	}
888 
889 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
890 		dev_err(hdev->dev,
891 			"MSB of CPU accessible DMA memory are not identical in all range\n");
892 		rc = -EFAULT;
893 		goto free_dma_mem_arr;
894 	}
895 
896 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
897 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
898 	hdev->cpu_pci_msb_addr =
899 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
900 
901 	GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
902 
903 free_dma_mem_arr:
904 	for (j = 0 ; j < i ; j++)
905 		hdev->asic_funcs->asic_dma_free_coherent(hdev,
906 						HL_CPU_ACCESSIBLE_MEM_SIZE,
907 						virt_addr_arr[j],
908 						dma_addr_arr[j]);
909 
910 	return rc;
911 }
912 
gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)913 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
914 {
915 	struct gaudi_device *gaudi = hdev->asic_specific;
916 	struct gaudi_internal_qman_info *q;
917 	u32 i;
918 
919 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
920 		q = &gaudi->internal_qmans[i];
921 		if (!q->pq_kernel_addr)
922 			continue;
923 		hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
924 							q->pq_kernel_addr,
925 							q->pq_dma_addr);
926 	}
927 }
928 
gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)929 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
930 {
931 	struct gaudi_device *gaudi = hdev->asic_specific;
932 	struct gaudi_internal_qman_info *q;
933 	int rc, i;
934 
935 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
936 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
937 			continue;
938 
939 		q = &gaudi->internal_qmans[i];
940 
941 		switch (i) {
942 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
943 		case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
944 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
945 			break;
946 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
947 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
948 			break;
949 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
950 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
951 			break;
952 		default:
953 			dev_err(hdev->dev, "Bad internal queue index %d", i);
954 			rc = -EINVAL;
955 			goto free_internal_qmans_pq_mem;
956 		}
957 
958 		q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
959 						hdev, q->pq_size,
960 						&q->pq_dma_addr,
961 						GFP_KERNEL | __GFP_ZERO);
962 		if (!q->pq_kernel_addr) {
963 			rc = -ENOMEM;
964 			goto free_internal_qmans_pq_mem;
965 		}
966 	}
967 
968 	return 0;
969 
970 free_internal_qmans_pq_mem:
971 	gaudi_free_internal_qmans_pq_mem(hdev);
972 	return rc;
973 }
974 
gaudi_sw_init(struct hl_device *hdev)975 static int gaudi_sw_init(struct hl_device *hdev)
976 {
977 	struct gaudi_device *gaudi;
978 	u32 i, event_id = 0;
979 	int rc;
980 
981 	/* Allocate device structure */
982 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
983 	if (!gaudi)
984 		return -ENOMEM;
985 
986 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
987 		if (gaudi_irq_map_table[i].valid) {
988 			if (event_id == GAUDI_EVENT_SIZE) {
989 				dev_err(hdev->dev,
990 					"Event array exceeds the limit of %u events\n",
991 					GAUDI_EVENT_SIZE);
992 				rc = -EINVAL;
993 				goto free_gaudi_device;
994 			}
995 
996 			gaudi->events[event_id++] =
997 					gaudi_irq_map_table[i].fc_id;
998 		}
999 	}
1000 
1001 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1002 
1003 	gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1004 
1005 	hdev->asic_specific = gaudi;
1006 
1007 	/* Create DMA pool for small allocations */
1008 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1009 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1010 	if (!hdev->dma_pool) {
1011 		dev_err(hdev->dev, "failed to create DMA pool\n");
1012 		rc = -ENOMEM;
1013 		goto free_gaudi_device;
1014 	}
1015 
1016 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1017 	if (rc)
1018 		goto free_dma_pool;
1019 
1020 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1021 	if (!hdev->cpu_accessible_dma_pool) {
1022 		dev_err(hdev->dev,
1023 			"Failed to create CPU accessible DMA pool\n");
1024 		rc = -ENOMEM;
1025 		goto free_cpu_dma_mem;
1026 	}
1027 
1028 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1029 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1030 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1031 	if (rc) {
1032 		dev_err(hdev->dev,
1033 			"Failed to add memory to CPU accessible DMA pool\n");
1034 		rc = -EFAULT;
1035 		goto free_cpu_accessible_dma_pool;
1036 	}
1037 
1038 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1039 	if (rc)
1040 		goto free_cpu_accessible_dma_pool;
1041 
1042 	spin_lock_init(&gaudi->hw_queues_lock);
1043 	mutex_init(&gaudi->clk_gate_mutex);
1044 
1045 	hdev->supports_sync_stream = true;
1046 	hdev->supports_coresight = true;
1047 
1048 	return 0;
1049 
1050 free_cpu_accessible_dma_pool:
1051 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1052 free_cpu_dma_mem:
1053 	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1054 				hdev->cpu_pci_msb_addr);
1055 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1056 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1057 			hdev->cpu_accessible_dma_mem,
1058 			hdev->cpu_accessible_dma_address);
1059 free_dma_pool:
1060 	dma_pool_destroy(hdev->dma_pool);
1061 free_gaudi_device:
1062 	kfree(gaudi);
1063 	return rc;
1064 }
1065 
gaudi_sw_fini(struct hl_device *hdev)1066 static int gaudi_sw_fini(struct hl_device *hdev)
1067 {
1068 	struct gaudi_device *gaudi = hdev->asic_specific;
1069 
1070 	gaudi_free_internal_qmans_pq_mem(hdev);
1071 
1072 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1073 
1074 	GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1075 					hdev->cpu_pci_msb_addr);
1076 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
1077 			HL_CPU_ACCESSIBLE_MEM_SIZE,
1078 			hdev->cpu_accessible_dma_mem,
1079 			hdev->cpu_accessible_dma_address);
1080 
1081 	dma_pool_destroy(hdev->dma_pool);
1082 
1083 	mutex_destroy(&gaudi->clk_gate_mutex);
1084 
1085 	kfree(gaudi);
1086 
1087 	return 0;
1088 }
1089 
gaudi_irq_handler_single(int irq, void *arg)1090 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1091 {
1092 	struct hl_device *hdev = arg;
1093 	int i;
1094 
1095 	if (hdev->disabled)
1096 		return IRQ_HANDLED;
1097 
1098 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1099 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1100 
1101 	hl_irq_handler_eq(irq, &hdev->event_queue);
1102 
1103 	return IRQ_HANDLED;
1104 }
1105 
1106 /*
1107  * For backward compatibility, new MSI interrupts should be set after the
1108  * existing CPU and NIC interrupts.
1109  */
gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, bool cpu_eq)1110 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1111 				bool cpu_eq)
1112 {
1113 	int msi_vec;
1114 
1115 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1116 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1117 				GAUDI_EVENT_QUEUE_MSI_IDX);
1118 
1119 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1120 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1121 
1122 	return pci_irq_vector(hdev->pdev, msi_vec);
1123 }
1124 
gaudi_enable_msi_single(struct hl_device *hdev)1125 static int gaudi_enable_msi_single(struct hl_device *hdev)
1126 {
1127 	int rc, irq;
1128 
1129 	dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1130 
1131 	irq = gaudi_pci_irq_vector(hdev, 0, false);
1132 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
1133 			"gaudi single msi", hdev);
1134 	if (rc)
1135 		dev_err(hdev->dev,
1136 			"Failed to request single MSI IRQ\n");
1137 
1138 	return rc;
1139 }
1140 
gaudi_enable_msi_multi(struct hl_device *hdev)1141 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1142 {
1143 	int cq_cnt = hdev->asic_prop.completion_queues_count;
1144 	int rc, i, irq_cnt_init, irq;
1145 
1146 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1147 		irq = gaudi_pci_irq_vector(hdev, i, false);
1148 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1149 				&hdev->completion_queue[i]);
1150 		if (rc) {
1151 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1152 			goto free_irqs;
1153 		}
1154 	}
1155 
1156 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1157 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1158 				&hdev->event_queue);
1159 	if (rc) {
1160 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1161 		goto free_irqs;
1162 	}
1163 
1164 	return 0;
1165 
1166 free_irqs:
1167 	for (i = 0 ; i < irq_cnt_init ; i++)
1168 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
1169 				&hdev->completion_queue[i]);
1170 	return rc;
1171 }
1172 
gaudi_enable_msi(struct hl_device *hdev)1173 static int gaudi_enable_msi(struct hl_device *hdev)
1174 {
1175 	struct gaudi_device *gaudi = hdev->asic_specific;
1176 	int rc;
1177 
1178 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1179 		return 0;
1180 
1181 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1182 					PCI_IRQ_MSI);
1183 	if (rc < 0) {
1184 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1185 		return rc;
1186 	}
1187 
1188 	if (rc < NUMBER_OF_INTERRUPTS) {
1189 		gaudi->multi_msi_mode = false;
1190 		rc = gaudi_enable_msi_single(hdev);
1191 	} else {
1192 		gaudi->multi_msi_mode = true;
1193 		rc = gaudi_enable_msi_multi(hdev);
1194 	}
1195 
1196 	if (rc)
1197 		goto free_pci_irq_vectors;
1198 
1199 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
1200 
1201 	return 0;
1202 
1203 free_pci_irq_vectors:
1204 	pci_free_irq_vectors(hdev->pdev);
1205 	return rc;
1206 }
1207 
gaudi_sync_irqs(struct hl_device *hdev)1208 static void gaudi_sync_irqs(struct hl_device *hdev)
1209 {
1210 	struct gaudi_device *gaudi = hdev->asic_specific;
1211 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1212 
1213 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1214 		return;
1215 
1216 	/* Wait for all pending IRQs to be finished */
1217 	if (gaudi->multi_msi_mode) {
1218 		for (i = 0 ; i < cq_cnt ; i++)
1219 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1220 
1221 		synchronize_irq(gaudi_pci_irq_vector(hdev,
1222 						GAUDI_EVENT_QUEUE_MSI_IDX,
1223 						true));
1224 	} else {
1225 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1226 	}
1227 }
1228 
gaudi_disable_msi(struct hl_device *hdev)1229 static void gaudi_disable_msi(struct hl_device *hdev)
1230 {
1231 	struct gaudi_device *gaudi = hdev->asic_specific;
1232 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1233 
1234 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1235 		return;
1236 
1237 	gaudi_sync_irqs(hdev);
1238 
1239 	if (gaudi->multi_msi_mode) {
1240 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1241 						true);
1242 		free_irq(irq, &hdev->event_queue);
1243 
1244 		for (i = 0 ; i < cq_cnt ; i++) {
1245 			irq = gaudi_pci_irq_vector(hdev, i, false);
1246 			free_irq(irq, &hdev->completion_queue[i]);
1247 		}
1248 	} else {
1249 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1250 	}
1251 
1252 	pci_free_irq_vectors(hdev->pdev);
1253 
1254 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1255 }
1256 
gaudi_init_scrambler_sram(struct hl_device *hdev)1257 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1258 {
1259 	struct gaudi_device *gaudi = hdev->asic_specific;
1260 
1261 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1262 		return;
1263 
1264 	if (!hdev->sram_scrambler_enable)
1265 		return;
1266 
1267 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1268 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1270 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1272 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1274 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1275 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1276 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1277 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1278 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1279 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1280 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1281 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1282 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1283 
1284 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1285 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1287 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1289 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1291 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1292 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1293 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1294 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1295 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1296 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1297 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1298 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1299 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1300 
1301 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1302 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1304 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1306 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1308 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1309 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1310 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1311 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1312 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1313 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1314 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1315 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1316 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1317 
1318 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1319 }
1320 
gaudi_init_scrambler_hbm(struct hl_device *hdev)1321 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1322 {
1323 	struct gaudi_device *gaudi = hdev->asic_specific;
1324 
1325 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1326 		return;
1327 
1328 	if (!hdev->dram_scrambler_enable)
1329 		return;
1330 
1331 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1332 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1334 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1336 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1338 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1339 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1340 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1341 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1342 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1343 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1344 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1345 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1346 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1347 
1348 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1349 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1351 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1353 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1355 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1356 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1357 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1358 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1359 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1360 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1361 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1362 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1363 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1364 
1365 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1366 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1368 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1370 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1372 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1373 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1374 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1375 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1376 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1377 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1378 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1379 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1380 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1381 
1382 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1383 }
1384 
gaudi_init_e2e(struct hl_device *hdev)1385 static void gaudi_init_e2e(struct hl_device *hdev)
1386 {
1387 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1388 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1389 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1390 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1391 
1392 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1393 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1394 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1395 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1396 
1397 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1398 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1399 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1400 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1401 
1402 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1403 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1404 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1405 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1406 
1407 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1408 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1409 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1410 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1411 
1412 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1413 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1414 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1415 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1416 
1417 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1418 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1419 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1420 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1421 
1422 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1423 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1424 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1425 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1426 
1427 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1428 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1429 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1430 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1431 
1432 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1433 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1434 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1435 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1436 
1437 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1438 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1439 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1440 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1441 
1442 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1443 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1444 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1445 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1446 
1447 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1448 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1449 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1450 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1451 
1452 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1453 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1454 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1455 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1456 
1457 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1458 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1459 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1460 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1461 
1462 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1463 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1464 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1465 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1466 
1467 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1468 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1469 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1470 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1471 
1472 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1473 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1474 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1475 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1476 
1477 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1478 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1479 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1480 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1481 
1482 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1483 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1484 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1485 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1486 
1487 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1488 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1489 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1490 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1491 
1492 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1493 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1494 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1495 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1496 
1497 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1498 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1499 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1500 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1501 
1502 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1503 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1504 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1505 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1506 
1507 	if (!hdev->dram_scrambler_enable) {
1508 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1509 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1510 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1511 		WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1512 
1513 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1514 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1515 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1516 		WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1517 
1518 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1519 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1520 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1521 		WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1522 
1523 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1524 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1525 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1526 		WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1527 
1528 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1529 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1530 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1531 		WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1532 
1533 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1534 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1535 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1536 		WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1537 
1538 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1539 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1540 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1541 		WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1542 
1543 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1544 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1545 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1546 		WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1547 
1548 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1549 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1550 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1551 		WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1552 
1553 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1554 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1555 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1556 		WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1557 
1558 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1559 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1560 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1561 		WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1562 
1563 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1564 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1565 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1566 		WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1567 
1568 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1569 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1570 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1571 		WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1572 
1573 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1574 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1575 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1576 		WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1577 
1578 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1579 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1580 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1581 		WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1582 
1583 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1584 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1585 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1586 		WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1587 
1588 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1589 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1590 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1591 		WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1592 
1593 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1594 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1595 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1596 		WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1597 
1598 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1599 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1600 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1601 		WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1602 
1603 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1604 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1605 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1606 		WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1607 
1608 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1609 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1610 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1611 		WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1612 
1613 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1614 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1615 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1616 		WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1617 
1618 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1619 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1620 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1621 		WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1622 
1623 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1624 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1625 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1626 		WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1627 	}
1628 
1629 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1630 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1631 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1632 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1633 
1634 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1635 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1636 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1637 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1638 
1639 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1640 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1641 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1642 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1643 
1644 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1645 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1646 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1647 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1648 
1649 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1650 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1651 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1652 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1653 
1654 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1655 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1656 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1657 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1658 
1659 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1660 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1661 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1662 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1663 
1664 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1665 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1666 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1667 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1668 
1669 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1670 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1671 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1672 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1673 
1674 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1675 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1676 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1677 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1678 
1679 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1680 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1681 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1682 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1683 
1684 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1685 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1686 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1687 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1688 
1689 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1690 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1691 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1692 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1693 
1694 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1695 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1696 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1697 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1698 
1699 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1700 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1701 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1702 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1703 
1704 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1705 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1706 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1707 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1708 
1709 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1710 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1711 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1712 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1713 
1714 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1715 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1716 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1717 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1718 
1719 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1720 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1721 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1722 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1723 
1724 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1725 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1726 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1727 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1728 
1729 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1730 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1731 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1732 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1733 
1734 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1735 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1736 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1737 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1738 
1739 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1740 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1741 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1742 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1743 
1744 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1745 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1746 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1747 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1748 }
1749 
gaudi_init_hbm_cred(struct hl_device *hdev)1750 static void gaudi_init_hbm_cred(struct hl_device *hdev)
1751 {
1752 	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1753 
1754 	hbm0_wr = 0x33333333;
1755 	hbm0_rd = 0x77777777;
1756 	hbm1_wr = 0x55555555;
1757 	hbm1_rd = 0xDDDDDDDD;
1758 
1759 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1760 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1761 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1762 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1763 
1764 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1765 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1766 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1767 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1768 
1769 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1770 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1771 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1772 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1773 
1774 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1775 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1776 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1777 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1778 
1779 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1780 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1781 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1782 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1783 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1786 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1787 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1788 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1789 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1790 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1791 
1792 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1793 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1794 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1795 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1796 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1797 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1798 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1799 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1800 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1801 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1802 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1803 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1804 }
1805 
gaudi_init_golden_registers(struct hl_device *hdev)1806 static void gaudi_init_golden_registers(struct hl_device *hdev)
1807 {
1808 	u32 tpc_offset;
1809 	int tpc_id, i;
1810 
1811 	gaudi_init_e2e(hdev);
1812 
1813 	gaudi_init_hbm_cred(hdev);
1814 
1815 	hdev->asic_funcs->disable_clock_gating(hdev);
1816 
1817 	for (tpc_id = 0, tpc_offset = 0;
1818 				tpc_id < TPC_NUMBER_OF_ENGINES;
1819 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1820 		/* Mask all arithmetic interrupts from TPC */
1821 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1822 		/* Set 16 cache lines */
1823 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1824 				ICACHE_FETCH_LINE_NUM, 2);
1825 	}
1826 
1827 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1828 	for (i = 0 ; i < 128 ; i += 8)
1829 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1830 
1831 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1832 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1833 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1834 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1835 }
1836 
gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, int qman_id, dma_addr_t qman_pq_addr)1837 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1838 					int qman_id, dma_addr_t qman_pq_addr)
1839 {
1840 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1841 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1842 	u32 q_off, dma_qm_offset;
1843 	u32 dma_qm_err_cfg;
1844 
1845 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1846 
1847 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
1848 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1849 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
1850 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1851 	so_base_en_lo = lower_32_bits(CFG_BASE +
1852 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1853 	so_base_en_hi = upper_32_bits(CFG_BASE +
1854 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1855 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1856 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1857 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1858 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1859 	so_base_ws_lo = lower_32_bits(CFG_BASE +
1860 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1861 	so_base_ws_hi = upper_32_bits(CFG_BASE +
1862 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1863 
1864 	q_off = dma_qm_offset + qman_id * 4;
1865 
1866 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1867 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1868 
1869 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1870 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1871 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1872 
1873 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
1874 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
1875 							QMAN_LDMA_SRC_OFFSET);
1876 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
1877 							QMAN_LDMA_DST_OFFSET);
1878 
1879 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1880 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1881 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1882 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1883 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1884 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1885 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1886 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1887 
1888 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1889 
1890 	/* The following configuration is needed only once per QMAN */
1891 	if (qman_id == 0) {
1892 		/* Configure RAZWI IRQ */
1893 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1894 		if (hdev->stop_on_err) {
1895 			dma_qm_err_cfg |=
1896 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1897 		}
1898 
1899 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1900 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1901 			lower_32_bits(CFG_BASE +
1902 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1903 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1904 			upper_32_bits(CFG_BASE +
1905 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1906 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1907 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1908 									dma_id);
1909 
1910 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1911 				QM_ARB_ERR_MSG_EN_MASK);
1912 
1913 		/* Increase ARB WDT to support streams architecture */
1914 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1915 				GAUDI_ARB_WDT_TIMEOUT);
1916 
1917 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1918 				QMAN_EXTERNAL_MAKE_TRUSTED);
1919 
1920 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1921 	}
1922 }
1923 
gaudi_init_dma_core(struct hl_device *hdev, int dma_id)1924 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1925 {
1926 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1927 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1928 
1929 	/* Set to maximum possible according to physical size */
1930 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1931 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1932 
1933 	/* WA for H/W bug H3-2116 */
1934 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
1935 
1936 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
1937 	if (hdev->stop_on_err)
1938 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1939 
1940 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1941 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1942 		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1943 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1944 		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1945 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1946 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1947 	WREG32(mmDMA0_CORE_PROT + dma_offset,
1948 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1949 	/* If the channel is secured, it should be in MMU bypass mode */
1950 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1951 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1952 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1953 }
1954 
gaudi_enable_qman(struct hl_device *hdev, int dma_id, u32 enable_mask)1955 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1956 				u32 enable_mask)
1957 {
1958 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1959 
1960 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1961 }
1962 
gaudi_init_pci_dma_qmans(struct hl_device *hdev)1963 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1964 {
1965 	struct gaudi_device *gaudi = hdev->asic_specific;
1966 	struct hl_hw_queue *q;
1967 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1968 
1969 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1970 		return;
1971 
1972 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1973 		dma_id = gaudi_dma_assignment[i];
1974 		/*
1975 		 * For queues after the CPU Q need to add 1 to get the correct
1976 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1977 		 * order to get the correct MSI register.
1978 		 */
1979 		if (dma_id > 1) {
1980 			cpu_skip = 1;
1981 			nic_skip = NIC_NUMBER_OF_ENGINES;
1982 		} else {
1983 			cpu_skip = 0;
1984 			nic_skip = 0;
1985 		}
1986 
1987 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
1988 			q_idx = 4 * dma_id + j + cpu_skip;
1989 			q = &hdev->kernel_queues[q_idx];
1990 			q->cq_id = cq_id++;
1991 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1992 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
1993 						q->bus_address);
1994 		}
1995 
1996 		gaudi_init_dma_core(hdev, dma_id);
1997 
1998 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1999 	}
2000 
2001 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2002 }
2003 
gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, int qman_id, u64 qman_base_addr)2004 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2005 					int qman_id, u64 qman_base_addr)
2006 {
2007 	u32 mtr_base_lo, mtr_base_hi;
2008 	u32 so_base_lo, so_base_hi;
2009 	u32 q_off, dma_qm_offset;
2010 	u32 dma_qm_err_cfg;
2011 
2012 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2013 
2014 	mtr_base_lo = lower_32_bits(CFG_BASE +
2015 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2016 	mtr_base_hi = upper_32_bits(CFG_BASE +
2017 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2018 	so_base_lo = lower_32_bits(CFG_BASE +
2019 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2020 	so_base_hi = upper_32_bits(CFG_BASE +
2021 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2022 
2023 	q_off = dma_qm_offset + qman_id * 4;
2024 
2025 	if (qman_id < 4) {
2026 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2027 					lower_32_bits(qman_base_addr));
2028 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2029 					upper_32_bits(qman_base_addr));
2030 
2031 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2032 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2033 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2034 
2035 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2036 							QMAN_CPDMA_SIZE_OFFSET);
2037 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2038 							QMAN_CPDMA_SRC_OFFSET);
2039 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2040 							QMAN_CPDMA_DST_OFFSET);
2041 	} else {
2042 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2043 							QMAN_LDMA_SIZE_OFFSET);
2044 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2045 							QMAN_LDMA_SRC_OFFSET);
2046 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2047 							QMAN_LDMA_DST_OFFSET);
2048 
2049 		/* Configure RAZWI IRQ */
2050 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2051 		if (hdev->stop_on_err) {
2052 			dma_qm_err_cfg |=
2053 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2054 		}
2055 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2056 
2057 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2058 			lower_32_bits(CFG_BASE +
2059 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2060 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2061 			upper_32_bits(CFG_BASE +
2062 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2063 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2064 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2065 									dma_id);
2066 
2067 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2068 				QM_ARB_ERR_MSG_EN_MASK);
2069 
2070 		/* Increase ARB WDT to support streams architecture */
2071 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2072 				GAUDI_ARB_WDT_TIMEOUT);
2073 
2074 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2075 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2076 				QMAN_INTERNAL_MAKE_TRUSTED);
2077 	}
2078 
2079 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2080 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2081 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2082 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2083 }
2084 
gaudi_init_hbm_dma_qmans(struct hl_device *hdev)2085 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2086 {
2087 	struct gaudi_device *gaudi = hdev->asic_specific;
2088 	struct gaudi_internal_qman_info *q;
2089 	u64 qman_base_addr;
2090 	int i, j, dma_id, internal_q_index;
2091 
2092 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2093 		return;
2094 
2095 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2096 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2097 
2098 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2099 			 /*
2100 			  * Add the CPU queue in order to get the correct queue
2101 			  * number as all internal queue are placed after it
2102 			  */
2103 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2104 
2105 			q = &gaudi->internal_qmans[internal_q_index];
2106 			qman_base_addr = (u64) q->pq_dma_addr;
2107 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2108 						qman_base_addr);
2109 		}
2110 
2111 		/* Initializing lower CP for HBM DMA QMAN */
2112 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2113 
2114 		gaudi_init_dma_core(hdev, dma_id);
2115 
2116 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2117 	}
2118 
2119 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2120 }
2121 
gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, int qman_id, u64 qman_base_addr)2122 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2123 					int qman_id, u64 qman_base_addr)
2124 {
2125 	u32 mtr_base_lo, mtr_base_hi;
2126 	u32 so_base_lo, so_base_hi;
2127 	u32 q_off, mme_id;
2128 	u32 mme_qm_err_cfg;
2129 
2130 	mtr_base_lo = lower_32_bits(CFG_BASE +
2131 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2132 	mtr_base_hi = upper_32_bits(CFG_BASE +
2133 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2134 	so_base_lo = lower_32_bits(CFG_BASE +
2135 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2136 	so_base_hi = upper_32_bits(CFG_BASE +
2137 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2138 
2139 	q_off = mme_offset + qman_id * 4;
2140 
2141 	if (qman_id < 4) {
2142 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2143 					lower_32_bits(qman_base_addr));
2144 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2145 					upper_32_bits(qman_base_addr));
2146 
2147 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2148 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2149 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2150 
2151 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2152 							QMAN_CPDMA_SIZE_OFFSET);
2153 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2154 							QMAN_CPDMA_SRC_OFFSET);
2155 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2156 							QMAN_CPDMA_DST_OFFSET);
2157 	} else {
2158 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2159 							QMAN_LDMA_SIZE_OFFSET);
2160 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2161 							QMAN_LDMA_SRC_OFFSET);
2162 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2163 							QMAN_LDMA_DST_OFFSET);
2164 
2165 		/* Configure RAZWI IRQ */
2166 		mme_id = mme_offset /
2167 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2168 
2169 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2170 		if (hdev->stop_on_err) {
2171 			mme_qm_err_cfg |=
2172 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2173 		}
2174 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2175 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2176 			lower_32_bits(CFG_BASE +
2177 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2178 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2179 			upper_32_bits(CFG_BASE +
2180 					mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2181 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2182 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2183 									mme_id);
2184 
2185 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2186 				QM_ARB_ERR_MSG_EN_MASK);
2187 
2188 		/* Increase ARB WDT to support streams architecture */
2189 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2190 				GAUDI_ARB_WDT_TIMEOUT);
2191 
2192 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2193 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2194 				QMAN_INTERNAL_MAKE_TRUSTED);
2195 	}
2196 
2197 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2198 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2199 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2200 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2201 }
2202 
gaudi_init_mme_qmans(struct hl_device *hdev)2203 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2204 {
2205 	struct gaudi_device *gaudi = hdev->asic_specific;
2206 	struct gaudi_internal_qman_info *q;
2207 	u64 qman_base_addr;
2208 	u32 mme_offset;
2209 	int i, internal_q_index;
2210 
2211 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2212 		return;
2213 
2214 	/*
2215 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2216 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2217 	 */
2218 
2219 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2220 
2221 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2222 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2223 		q = &gaudi->internal_qmans[internal_q_index];
2224 		qman_base_addr = (u64) q->pq_dma_addr;
2225 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2226 					qman_base_addr);
2227 		if (i == 3)
2228 			mme_offset = 0;
2229 	}
2230 
2231 	/* Initializing lower CP for MME QMANs */
2232 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2233 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2234 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2235 
2236 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2237 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2238 
2239 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2240 }
2241 
gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, int qman_id, u64 qman_base_addr)2242 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2243 				int qman_id, u64 qman_base_addr)
2244 {
2245 	u32 mtr_base_lo, mtr_base_hi;
2246 	u32 so_base_lo, so_base_hi;
2247 	u32 q_off, tpc_id;
2248 	u32 tpc_qm_err_cfg;
2249 
2250 	mtr_base_lo = lower_32_bits(CFG_BASE +
2251 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2252 	mtr_base_hi = upper_32_bits(CFG_BASE +
2253 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2254 	so_base_lo = lower_32_bits(CFG_BASE +
2255 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2256 	so_base_hi = upper_32_bits(CFG_BASE +
2257 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2258 
2259 	q_off = tpc_offset + qman_id * 4;
2260 
2261 	if (qman_id < 4) {
2262 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2263 					lower_32_bits(qman_base_addr));
2264 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2265 					upper_32_bits(qman_base_addr));
2266 
2267 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2268 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2269 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2270 
2271 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2272 							QMAN_CPDMA_SIZE_OFFSET);
2273 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2274 							QMAN_CPDMA_SRC_OFFSET);
2275 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2276 							QMAN_CPDMA_DST_OFFSET);
2277 	} else {
2278 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2279 							QMAN_LDMA_SIZE_OFFSET);
2280 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2281 							QMAN_LDMA_SRC_OFFSET);
2282 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2283 							QMAN_LDMA_DST_OFFSET);
2284 
2285 		/* Configure RAZWI IRQ */
2286 		tpc_id = tpc_offset /
2287 				(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2288 
2289 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2290 		if (hdev->stop_on_err) {
2291 			tpc_qm_err_cfg |=
2292 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2293 		}
2294 
2295 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2296 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2297 			lower_32_bits(CFG_BASE +
2298 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2299 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2300 			upper_32_bits(CFG_BASE +
2301 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2302 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2303 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2304 									tpc_id);
2305 
2306 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2307 				QM_ARB_ERR_MSG_EN_MASK);
2308 
2309 		/* Increase ARB WDT to support streams architecture */
2310 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2311 				GAUDI_ARB_WDT_TIMEOUT);
2312 
2313 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2314 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2315 				QMAN_INTERNAL_MAKE_TRUSTED);
2316 	}
2317 
2318 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2319 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2320 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2321 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2322 }
2323 
gaudi_init_tpc_qmans(struct hl_device *hdev)2324 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2325 {
2326 	struct gaudi_device *gaudi = hdev->asic_specific;
2327 	struct gaudi_internal_qman_info *q;
2328 	u64 qman_base_addr;
2329 	u32 so_base_hi, tpc_offset = 0;
2330 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2331 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2332 	int i, tpc_id, internal_q_index;
2333 
2334 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2335 		return;
2336 
2337 	so_base_hi = upper_32_bits(CFG_BASE +
2338 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2339 
2340 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2341 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
2342 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2343 						tpc_id * QMAN_STREAMS + i;
2344 			q = &gaudi->internal_qmans[internal_q_index];
2345 			qman_base_addr = (u64) q->pq_dma_addr;
2346 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
2347 						qman_base_addr);
2348 
2349 			if (i == 3) {
2350 				/* Initializing lower CP for TPC QMAN */
2351 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2352 
2353 				/* Enable the QMAN and TPC channel */
2354 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2355 						QMAN_TPC_ENABLE);
2356 			}
2357 		}
2358 
2359 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2360 				so_base_hi);
2361 
2362 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2363 
2364 		gaudi->hw_cap_initialized |=
2365 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2366 	}
2367 }
2368 
gaudi_disable_pci_dma_qmans(struct hl_device *hdev)2369 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2370 {
2371 	struct gaudi_device *gaudi = hdev->asic_specific;
2372 
2373 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2374 		return;
2375 
2376 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2377 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2378 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2379 }
2380 
gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)2381 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2382 {
2383 	struct gaudi_device *gaudi = hdev->asic_specific;
2384 
2385 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2386 		return;
2387 
2388 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2389 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2390 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2391 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2392 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2393 }
2394 
gaudi_disable_mme_qmans(struct hl_device *hdev)2395 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2396 {
2397 	struct gaudi_device *gaudi = hdev->asic_specific;
2398 
2399 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2400 		return;
2401 
2402 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
2403 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
2404 }
2405 
gaudi_disable_tpc_qmans(struct hl_device *hdev)2406 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2407 {
2408 	struct gaudi_device *gaudi = hdev->asic_specific;
2409 	u32 tpc_offset = 0;
2410 	int tpc_id;
2411 
2412 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2413 		return;
2414 
2415 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2416 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2417 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2418 	}
2419 }
2420 
gaudi_stop_pci_dma_qmans(struct hl_device *hdev)2421 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2422 {
2423 	struct gaudi_device *gaudi = hdev->asic_specific;
2424 
2425 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2426 		return;
2427 
2428 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2429 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2430 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2431 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2432 }
2433 
gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)2434 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2435 {
2436 	struct gaudi_device *gaudi = hdev->asic_specific;
2437 
2438 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2439 		return;
2440 
2441 	/* Stop CPs of HBM DMA QMANs */
2442 
2443 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2444 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2445 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2446 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2447 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2448 }
2449 
gaudi_stop_mme_qmans(struct hl_device *hdev)2450 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2451 {
2452 	struct gaudi_device *gaudi = hdev->asic_specific;
2453 
2454 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2455 		return;
2456 
2457 	/* Stop CPs of MME QMANs */
2458 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2459 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2460 }
2461 
gaudi_stop_tpc_qmans(struct hl_device *hdev)2462 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2463 {
2464 	struct gaudi_device *gaudi = hdev->asic_specific;
2465 
2466 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2467 		return;
2468 
2469 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2470 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2471 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2472 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2473 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2474 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2475 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2476 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2477 }
2478 
gaudi_pci_dma_stall(struct hl_device *hdev)2479 static void gaudi_pci_dma_stall(struct hl_device *hdev)
2480 {
2481 	struct gaudi_device *gaudi = hdev->asic_specific;
2482 
2483 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2484 		return;
2485 
2486 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2487 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2488 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2489 }
2490 
gaudi_hbm_dma_stall(struct hl_device *hdev)2491 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2492 {
2493 	struct gaudi_device *gaudi = hdev->asic_specific;
2494 
2495 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2496 		return;
2497 
2498 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2499 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2500 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2501 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2502 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2503 }
2504 
gaudi_mme_stall(struct hl_device *hdev)2505 static void gaudi_mme_stall(struct hl_device *hdev)
2506 {
2507 	struct gaudi_device *gaudi = hdev->asic_specific;
2508 
2509 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2510 		return;
2511 
2512 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
2513 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2514 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2515 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2516 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2517 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2518 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2519 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2520 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2521 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2522 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2523 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2524 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2525 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2526 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2527 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2528 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2529 }
2530 
gaudi_tpc_stall(struct hl_device *hdev)2531 static void gaudi_tpc_stall(struct hl_device *hdev)
2532 {
2533 	struct gaudi_device *gaudi = hdev->asic_specific;
2534 
2535 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2536 		return;
2537 
2538 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2539 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2540 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2541 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2542 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2543 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2544 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2545 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2546 }
2547 
gaudi_set_clock_gating(struct hl_device *hdev)2548 static void gaudi_set_clock_gating(struct hl_device *hdev)
2549 {
2550 	struct gaudi_device *gaudi = hdev->asic_specific;
2551 	u32 qman_offset;
2552 	bool enable;
2553 	int i;
2554 
2555 	/* In case we are during debug session, don't enable the clock gate
2556 	 * as it may interfere
2557 	 */
2558 	if (hdev->in_debug)
2559 		return;
2560 
2561 	for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2562 		enable = !!(hdev->clock_gating_mask &
2563 				(BIT_ULL(gaudi_dma_assignment[i])));
2564 
2565 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2566 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2567 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2568 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2569 				enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2570 	}
2571 
2572 	for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2573 		enable = !!(hdev->clock_gating_mask &
2574 				(BIT_ULL(gaudi_dma_assignment[i])));
2575 
2576 		qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2577 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2578 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2579 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2580 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2581 	}
2582 
2583 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2584 	WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2585 	WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2586 
2587 	enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2588 	WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2589 	WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2590 
2591 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2592 		enable = !!(hdev->clock_gating_mask &
2593 				(BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2594 
2595 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2596 				enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2597 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2598 				enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2599 
2600 		qman_offset += TPC_QMAN_OFFSET;
2601 	}
2602 
2603 	gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2604 }
2605 
gaudi_disable_clock_gating(struct hl_device *hdev)2606 static void gaudi_disable_clock_gating(struct hl_device *hdev)
2607 {
2608 	struct gaudi_device *gaudi = hdev->asic_specific;
2609 	u32 qman_offset;
2610 	int i;
2611 
2612 	if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2613 		return;
2614 
2615 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2616 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2617 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2618 
2619 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2620 	}
2621 
2622 	WREG32(mmMME0_QM_CGM_CFG, 0);
2623 	WREG32(mmMME0_QM_CGM_CFG1, 0);
2624 	WREG32(mmMME2_QM_CGM_CFG, 0);
2625 	WREG32(mmMME2_QM_CGM_CFG1, 0);
2626 
2627 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2628 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2629 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2630 
2631 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2632 	}
2633 
2634 	gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2635 }
2636 
gaudi_enable_timestamp(struct hl_device *hdev)2637 static void gaudi_enable_timestamp(struct hl_device *hdev)
2638 {
2639 	/* Disable the timestamp counter */
2640 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2641 
2642 	/* Zero the lower/upper parts of the 64-bit counter */
2643 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2644 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2645 
2646 	/* Enable the counter */
2647 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2648 }
2649 
gaudi_disable_timestamp(struct hl_device *hdev)2650 static void gaudi_disable_timestamp(struct hl_device *hdev)
2651 {
2652 	/* Disable the timestamp counter */
2653 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2654 }
2655 
gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)2656 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2657 {
2658 	u32 wait_timeout_ms;
2659 
2660 	dev_info(hdev->dev,
2661 		"Halting compute engines and disabling interrupts\n");
2662 
2663 	if (hdev->pldm)
2664 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2665 	else
2666 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2667 
2668 
2669 	gaudi_stop_mme_qmans(hdev);
2670 	gaudi_stop_tpc_qmans(hdev);
2671 	gaudi_stop_hbm_dma_qmans(hdev);
2672 	gaudi_stop_pci_dma_qmans(hdev);
2673 
2674 	hdev->asic_funcs->disable_clock_gating(hdev);
2675 
2676 	msleep(wait_timeout_ms);
2677 
2678 	gaudi_pci_dma_stall(hdev);
2679 	gaudi_hbm_dma_stall(hdev);
2680 	gaudi_tpc_stall(hdev);
2681 	gaudi_mme_stall(hdev);
2682 
2683 	msleep(wait_timeout_ms);
2684 
2685 	gaudi_disable_mme_qmans(hdev);
2686 	gaudi_disable_tpc_qmans(hdev);
2687 	gaudi_disable_hbm_dma_qmans(hdev);
2688 	gaudi_disable_pci_dma_qmans(hdev);
2689 
2690 	gaudi_disable_timestamp(hdev);
2691 
2692 	gaudi_disable_msi(hdev);
2693 }
2694 
gaudi_mmu_init(struct hl_device *hdev)2695 static int gaudi_mmu_init(struct hl_device *hdev)
2696 {
2697 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2698 	struct gaudi_device *gaudi = hdev->asic_specific;
2699 	u64 hop0_addr;
2700 	int rc, i;
2701 
2702 	if (!hdev->mmu_enable)
2703 		return 0;
2704 
2705 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2706 		return 0;
2707 
2708 	hdev->dram_supports_virtual_memory = false;
2709 
2710 	for (i = 0 ; i < prop->max_asid ; i++) {
2711 		hop0_addr = prop->mmu_pgt_addr +
2712 				(i * prop->mmu_hop_table_size);
2713 
2714 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2715 		if (rc) {
2716 			dev_err(hdev->dev,
2717 				"failed to set hop0 addr for asid %d\n", i);
2718 			goto err;
2719 		}
2720 	}
2721 
2722 	/* init MMU cache manage page */
2723 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2724 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2725 
2726 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2727 
2728 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
2729 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
2730 
2731 	WREG32(mmSTLB_HOP_CONFIGURATION,
2732 			hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2733 
2734 	/*
2735 	 * The H/W expects the first PI after init to be 1. After wraparound
2736 	 * we'll write 0.
2737 	 */
2738 	gaudi->mmu_cache_inv_pi = 1;
2739 
2740 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
2741 
2742 	return 0;
2743 
2744 err:
2745 	return rc;
2746 }
2747 
gaudi_load_firmware_to_device(struct hl_device *hdev)2748 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2749 {
2750 	void __iomem *dst;
2751 
2752 	/* HBM scrambler must be initialized before pushing F/W to HBM */
2753 	gaudi_init_scrambler_hbm(hdev);
2754 
2755 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2756 
2757 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2758 }
2759 
gaudi_load_boot_fit_to_device(struct hl_device *hdev)2760 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2761 {
2762 	void __iomem *dst;
2763 
2764 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2765 
2766 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2767 }
2768 
gaudi_read_device_fw_version(struct hl_device *hdev, enum hl_fw_component fwc)2769 static void gaudi_read_device_fw_version(struct hl_device *hdev,
2770 					enum hl_fw_component fwc)
2771 {
2772 	const char *name;
2773 	u32 ver_off;
2774 	char *dest;
2775 
2776 	switch (fwc) {
2777 	case FW_COMP_UBOOT:
2778 		ver_off = RREG32(mmUBOOT_VER_OFFSET);
2779 		dest = hdev->asic_prop.uboot_ver;
2780 		name = "U-Boot";
2781 		break;
2782 	case FW_COMP_PREBOOT:
2783 		ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2784 		dest = hdev->asic_prop.preboot_ver;
2785 		name = "Preboot";
2786 		break;
2787 	default:
2788 		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2789 		return;
2790 	}
2791 
2792 	ver_off &= ~((u32)SRAM_BASE_ADDR);
2793 
2794 	if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2795 		memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2796 							VERSION_MAX_LEN);
2797 	} else {
2798 		dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2799 								name, ver_off);
2800 		strcpy(dest, "unavailable");
2801 	}
2802 }
2803 
gaudi_init_cpu(struct hl_device *hdev)2804 static int gaudi_init_cpu(struct hl_device *hdev)
2805 {
2806 	struct gaudi_device *gaudi = hdev->asic_specific;
2807 	int rc;
2808 
2809 	if (!hdev->cpu_enable)
2810 		return 0;
2811 
2812 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2813 		return 0;
2814 
2815 	/*
2816 	 * The device CPU works with 40 bits addresses.
2817 	 * This register sets the extension to 50 bits.
2818 	 */
2819 	WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2820 
2821 	rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2822 			mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2823 			mmCPU_CMD_STATUS_TO_HOST,
2824 			mmCPU_BOOT_ERR0,
2825 			!hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2826 			GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2827 
2828 	if (rc)
2829 		return rc;
2830 
2831 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
2832 
2833 	return 0;
2834 }
2835 
gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)2836 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2837 {
2838 	struct gaudi_device *gaudi = hdev->asic_specific;
2839 	struct hl_eq *eq;
2840 	u32 status;
2841 	struct hl_hw_queue *cpu_pq =
2842 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2843 	int err;
2844 
2845 	if (!hdev->cpu_queues_enable)
2846 		return 0;
2847 
2848 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2849 		return 0;
2850 
2851 	eq = &hdev->event_queue;
2852 
2853 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2854 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2855 
2856 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2857 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2858 
2859 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2860 			lower_32_bits(hdev->cpu_accessible_dma_address));
2861 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2862 			upper_32_bits(hdev->cpu_accessible_dma_address));
2863 
2864 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2865 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2866 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2867 
2868 	/* Used for EQ CI */
2869 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2870 
2871 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
2872 
2873 	if (gaudi->multi_msi_mode)
2874 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2875 	else
2876 		WREG32(mmCPU_IF_QUEUE_INIT,
2877 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2878 
2879 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2880 
2881 	err = hl_poll_timeout(
2882 		hdev,
2883 		mmCPU_IF_QUEUE_INIT,
2884 		status,
2885 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
2886 		1000,
2887 		cpu_timeout);
2888 
2889 	if (err) {
2890 		dev_err(hdev->dev,
2891 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
2892 		return -EIO;
2893 	}
2894 
2895 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2896 	return 0;
2897 }
2898 
gaudi_pre_hw_init(struct hl_device *hdev)2899 static void gaudi_pre_hw_init(struct hl_device *hdev)
2900 {
2901 	/* Perform read from the device to make sure device is up */
2902 	RREG32(mmHW_STATE);
2903 
2904 	/* Set the access through PCI bars (Linux driver only) as
2905 	 * secured
2906 	 */
2907 	WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
2908 			(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2909 			PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2910 
2911 	/* Perform read to flush the waiting writes to ensure
2912 	 * configuration was set in the device
2913 	 */
2914 	RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2915 
2916 	/*
2917 	 * Let's mark in the H/W that we have reached this point. We check
2918 	 * this value in the reset_before_init function to understand whether
2919 	 * we need to reset the chip before doing H/W init. This register is
2920 	 * cleared by the H/W upon H/W reset
2921 	 */
2922 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2923 
2924 	/* Configure the reset registers. Must be done as early as possible
2925 	 * in case we fail during H/W initialization
2926 	 */
2927 	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2928 					(CFG_RST_H_DMA_MASK |
2929 					CFG_RST_H_MME_MASK |
2930 					CFG_RST_H_SM_MASK |
2931 					CFG_RST_H_TPC_7_MASK));
2932 
2933 	WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2934 
2935 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2936 					(CFG_RST_H_HBM_MASK |
2937 					CFG_RST_H_TPC_7_MASK |
2938 					CFG_RST_H_NIC_MASK |
2939 					CFG_RST_H_SM_MASK |
2940 					CFG_RST_H_DMA_MASK |
2941 					CFG_RST_H_MME_MASK |
2942 					CFG_RST_H_CPU_MASK |
2943 					CFG_RST_H_MMU_MASK));
2944 
2945 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2946 					(CFG_RST_L_IF_MASK |
2947 					CFG_RST_L_PSOC_MASK |
2948 					CFG_RST_L_TPC_MASK));
2949 }
2950 
gaudi_hw_init(struct hl_device *hdev)2951 static int gaudi_hw_init(struct hl_device *hdev)
2952 {
2953 	int rc;
2954 
2955 	dev_info(hdev->dev, "Starting initialization of H/W\n");
2956 
2957 	gaudi_pre_hw_init(hdev);
2958 
2959 	gaudi_init_pci_dma_qmans(hdev);
2960 
2961 	gaudi_init_hbm_dma_qmans(hdev);
2962 
2963 	rc = gaudi_init_cpu(hdev);
2964 	if (rc) {
2965 		dev_err(hdev->dev, "failed to initialize CPU\n");
2966 		return rc;
2967 	}
2968 
2969 	/* SRAM scrambler must be initialized after CPU is running from HBM */
2970 	gaudi_init_scrambler_sram(hdev);
2971 
2972 	/* This is here just in case we are working without CPU */
2973 	gaudi_init_scrambler_hbm(hdev);
2974 
2975 	gaudi_init_golden_registers(hdev);
2976 
2977 	rc = gaudi_mmu_init(hdev);
2978 	if (rc)
2979 		return rc;
2980 
2981 	gaudi_init_security(hdev);
2982 
2983 	gaudi_init_mme_qmans(hdev);
2984 
2985 	gaudi_init_tpc_qmans(hdev);
2986 
2987 	hdev->asic_funcs->set_clock_gating(hdev);
2988 
2989 	gaudi_enable_timestamp(hdev);
2990 
2991 	/* MSI must be enabled before CPU queues are initialized */
2992 	rc = gaudi_enable_msi(hdev);
2993 	if (rc)
2994 		goto disable_queues;
2995 
2996 	/* must be called after MSI was enabled */
2997 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2998 	if (rc) {
2999 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3000 			rc);
3001 		goto disable_msi;
3002 	}
3003 
3004 	/* Perform read from the device to flush all configuration */
3005 	RREG32(mmHW_STATE);
3006 
3007 	return 0;
3008 
3009 disable_msi:
3010 	gaudi_disable_msi(hdev);
3011 disable_queues:
3012 	gaudi_disable_mme_qmans(hdev);
3013 	gaudi_disable_pci_dma_qmans(hdev);
3014 
3015 	return rc;
3016 }
3017 
gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)3018 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3019 {
3020 	struct gaudi_device *gaudi = hdev->asic_specific;
3021 	u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3022 
3023 	if (!hard_reset) {
3024 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3025 		return;
3026 	}
3027 
3028 	if (hdev->pldm) {
3029 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3030 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3031 	} else {
3032 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3033 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3034 	}
3035 
3036 	/* Set device to handle FLR by H/W as we will put the device CPU to
3037 	 * halt mode
3038 	 */
3039 	WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3040 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3041 
3042 	/* I don't know what is the state of the CPU so make sure it is
3043 	 * stopped in any means necessary
3044 	 */
3045 	WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3046 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3047 
3048 	msleep(cpu_timeout_ms);
3049 
3050 	/* Tell ASIC not to re-initialize PCIe */
3051 	WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3052 
3053 	boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3054 
3055 	/* H/W bug WA:
3056 	 * rdata[31:0] = strap_read_val;
3057 	 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3058 	 */
3059 	boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3060 			(boot_strap & 0x001FFFFF));
3061 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3062 
3063 	/* Restart BTL/BLR upon hard-reset */
3064 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3065 
3066 	WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3067 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3068 	dev_info(hdev->dev,
3069 		"Issued HARD reset command, going to wait %dms\n",
3070 		reset_timeout_ms);
3071 
3072 	/*
3073 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3074 	 * itself is in reset. Need to wait until the reset is deasserted
3075 	 */
3076 	msleep(reset_timeout_ms);
3077 
3078 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3079 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3080 		dev_err(hdev->dev,
3081 			"Timeout while waiting for device to reset 0x%x\n",
3082 			status);
3083 
3084 	WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3085 
3086 	gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3087 					HW_CAP_HBM | HW_CAP_PCI_DMA |
3088 					HW_CAP_MME | HW_CAP_TPC_MASK |
3089 					HW_CAP_HBM_DMA | HW_CAP_PLL |
3090 					HW_CAP_MMU |
3091 					HW_CAP_SRAM_SCRAMBLER |
3092 					HW_CAP_HBM_SCRAMBLER |
3093 					HW_CAP_CLK_GATE);
3094 
3095 	memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3096 }
3097 
gaudi_suspend(struct hl_device *hdev)3098 static int gaudi_suspend(struct hl_device *hdev)
3099 {
3100 	int rc;
3101 
3102 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3103 	if (rc)
3104 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3105 
3106 	return rc;
3107 }
3108 
gaudi_resume(struct hl_device *hdev)3109 static int gaudi_resume(struct hl_device *hdev)
3110 {
3111 	return gaudi_init_iatu(hdev);
3112 }
3113 
gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size)3114 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3115 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
3116 {
3117 	int rc;
3118 
3119 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3120 			VM_DONTCOPY | VM_NORESERVE;
3121 
3122 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
3123 				(dma_addr - HOST_PHYS_BASE), size);
3124 	if (rc)
3125 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3126 
3127 	return rc;
3128 }
3129 
gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)3130 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3131 {
3132 	struct gaudi_device *gaudi = hdev->asic_specific;
3133 	u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3134 	int dma_id;
3135 	bool invalid_queue = false;
3136 
3137 	switch (hw_queue_id) {
3138 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3139 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3140 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3142 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143 		break;
3144 
3145 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3146 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3147 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3149 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150 		break;
3151 
3152 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3153 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3154 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157 		break;
3158 
3159 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3160 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3161 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164 		break;
3165 
3166 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3167 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3168 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171 		break;
3172 
3173 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3174 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3175 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3176 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3177 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3178 		break;
3179 
3180 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3181 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3182 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3183 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3184 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3185 		break;
3186 
3187 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3188 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3189 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3190 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3191 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3192 		break;
3193 
3194 	case GAUDI_QUEUE_ID_CPU_PQ:
3195 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3196 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
3197 		else
3198 			invalid_queue = true;
3199 		break;
3200 
3201 	case GAUDI_QUEUE_ID_MME_0_0:
3202 		db_reg_offset = mmMME2_QM_PQ_PI_0;
3203 		break;
3204 
3205 	case GAUDI_QUEUE_ID_MME_0_1:
3206 		db_reg_offset = mmMME2_QM_PQ_PI_1;
3207 		break;
3208 
3209 	case GAUDI_QUEUE_ID_MME_0_2:
3210 		db_reg_offset = mmMME2_QM_PQ_PI_2;
3211 		break;
3212 
3213 	case GAUDI_QUEUE_ID_MME_0_3:
3214 		db_reg_offset = mmMME2_QM_PQ_PI_3;
3215 		break;
3216 
3217 	case GAUDI_QUEUE_ID_MME_1_0:
3218 		db_reg_offset = mmMME0_QM_PQ_PI_0;
3219 		break;
3220 
3221 	case GAUDI_QUEUE_ID_MME_1_1:
3222 		db_reg_offset = mmMME0_QM_PQ_PI_1;
3223 		break;
3224 
3225 	case GAUDI_QUEUE_ID_MME_1_2:
3226 		db_reg_offset = mmMME0_QM_PQ_PI_2;
3227 		break;
3228 
3229 	case GAUDI_QUEUE_ID_MME_1_3:
3230 		db_reg_offset = mmMME0_QM_PQ_PI_3;
3231 		break;
3232 
3233 	case GAUDI_QUEUE_ID_TPC_0_0:
3234 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
3235 		break;
3236 
3237 	case GAUDI_QUEUE_ID_TPC_0_1:
3238 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
3239 		break;
3240 
3241 	case GAUDI_QUEUE_ID_TPC_0_2:
3242 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
3243 		break;
3244 
3245 	case GAUDI_QUEUE_ID_TPC_0_3:
3246 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
3247 		break;
3248 
3249 	case GAUDI_QUEUE_ID_TPC_1_0:
3250 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
3251 		break;
3252 
3253 	case GAUDI_QUEUE_ID_TPC_1_1:
3254 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
3255 		break;
3256 
3257 	case GAUDI_QUEUE_ID_TPC_1_2:
3258 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
3259 		break;
3260 
3261 	case GAUDI_QUEUE_ID_TPC_1_3:
3262 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
3263 		break;
3264 
3265 	case GAUDI_QUEUE_ID_TPC_2_0:
3266 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
3267 		break;
3268 
3269 	case GAUDI_QUEUE_ID_TPC_2_1:
3270 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
3271 		break;
3272 
3273 	case GAUDI_QUEUE_ID_TPC_2_2:
3274 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
3275 		break;
3276 
3277 	case GAUDI_QUEUE_ID_TPC_2_3:
3278 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
3279 		break;
3280 
3281 	case GAUDI_QUEUE_ID_TPC_3_0:
3282 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
3283 		break;
3284 
3285 	case GAUDI_QUEUE_ID_TPC_3_1:
3286 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
3287 		break;
3288 
3289 	case GAUDI_QUEUE_ID_TPC_3_2:
3290 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
3291 		break;
3292 
3293 	case GAUDI_QUEUE_ID_TPC_3_3:
3294 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
3295 		break;
3296 
3297 	case GAUDI_QUEUE_ID_TPC_4_0:
3298 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
3299 		break;
3300 
3301 	case GAUDI_QUEUE_ID_TPC_4_1:
3302 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
3303 		break;
3304 
3305 	case GAUDI_QUEUE_ID_TPC_4_2:
3306 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
3307 		break;
3308 
3309 	case GAUDI_QUEUE_ID_TPC_4_3:
3310 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
3311 		break;
3312 
3313 	case GAUDI_QUEUE_ID_TPC_5_0:
3314 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
3315 		break;
3316 
3317 	case GAUDI_QUEUE_ID_TPC_5_1:
3318 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
3319 		break;
3320 
3321 	case GAUDI_QUEUE_ID_TPC_5_2:
3322 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
3323 		break;
3324 
3325 	case GAUDI_QUEUE_ID_TPC_5_3:
3326 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
3327 		break;
3328 
3329 	case GAUDI_QUEUE_ID_TPC_6_0:
3330 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
3331 		break;
3332 
3333 	case GAUDI_QUEUE_ID_TPC_6_1:
3334 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
3335 		break;
3336 
3337 	case GAUDI_QUEUE_ID_TPC_6_2:
3338 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
3339 		break;
3340 
3341 	case GAUDI_QUEUE_ID_TPC_6_3:
3342 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
3343 		break;
3344 
3345 	case GAUDI_QUEUE_ID_TPC_7_0:
3346 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
3347 		break;
3348 
3349 	case GAUDI_QUEUE_ID_TPC_7_1:
3350 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
3351 		break;
3352 
3353 	case GAUDI_QUEUE_ID_TPC_7_2:
3354 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
3355 		break;
3356 
3357 	case GAUDI_QUEUE_ID_TPC_7_3:
3358 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
3359 		break;
3360 
3361 	default:
3362 		invalid_queue = true;
3363 	}
3364 
3365 	if (invalid_queue) {
3366 		/* Should never get here */
3367 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3368 			hw_queue_id);
3369 		return;
3370 	}
3371 
3372 	db_value = pi;
3373 
3374 	/* ring the doorbell */
3375 	WREG32(db_reg_offset, db_value);
3376 
3377 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3378 		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3379 				GAUDI_EVENT_PI_UPDATE);
3380 }
3381 
gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)3382 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3383 				struct hl_bd *bd)
3384 {
3385 	__le64 *pbd = (__le64 *) bd;
3386 
3387 	/* The QMANs are on the host memory so a simple copy suffice */
3388 	pqe[0] = pbd[0];
3389 	pqe[1] = pbd[1];
3390 }
3391 
gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, gfp_t flags)3392 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3393 					dma_addr_t *dma_handle, gfp_t flags)
3394 {
3395 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3396 						dma_handle, flags);
3397 
3398 	/* Shift to the device's base physical address of host memory */
3399 	if (kernel_addr)
3400 		*dma_handle += HOST_PHYS_BASE;
3401 
3402 	return kernel_addr;
3403 }
3404 
gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, dma_addr_t dma_handle)3405 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3406 		void *cpu_addr, dma_addr_t dma_handle)
3407 {
3408 	/* Cancel the device's base physical address of host memory */
3409 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3410 
3411 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3412 }
3413 
gaudi_get_int_queue_base(struct hl_device *hdev, u32 queue_id, dma_addr_t *dma_handle, u16 *queue_len)3414 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3415 				u32 queue_id, dma_addr_t *dma_handle,
3416 				u16 *queue_len)
3417 {
3418 	struct gaudi_device *gaudi = hdev->asic_specific;
3419 	struct gaudi_internal_qman_info *q;
3420 
3421 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3422 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3423 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3424 		return NULL;
3425 	}
3426 
3427 	q = &gaudi->internal_qmans[queue_id];
3428 	*dma_handle = q->pq_dma_addr;
3429 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3430 
3431 	return q->pq_kernel_addr;
3432 }
3433 
gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, u32 timeout, long *result)3434 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3435 				u16 len, u32 timeout, long *result)
3436 {
3437 	struct gaudi_device *gaudi = hdev->asic_specific;
3438 
3439 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3440 		if (result)
3441 			*result = 0;
3442 		return 0;
3443 	}
3444 
3445 	if (!timeout)
3446 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3447 
3448 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3449 						timeout, result);
3450 }
3451 
gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)3452 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3453 {
3454 	struct packet_msg_prot *fence_pkt;
3455 	dma_addr_t pkt_dma_addr;
3456 	u32 fence_val, tmp, timeout_usec;
3457 	dma_addr_t fence_dma_addr;
3458 	u32 *fence_ptr;
3459 	int rc;
3460 
3461 	if (hdev->pldm)
3462 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3463 	else
3464 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3465 
3466 	fence_val = GAUDI_QMAN0_FENCE_VAL;
3467 
3468 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3469 							&fence_dma_addr);
3470 	if (!fence_ptr) {
3471 		dev_err(hdev->dev,
3472 			"Failed to allocate memory for H/W queue %d testing\n",
3473 			hw_queue_id);
3474 		return -ENOMEM;
3475 	}
3476 
3477 	*fence_ptr = 0;
3478 
3479 	fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3480 					sizeof(struct packet_msg_prot),
3481 					GFP_KERNEL, &pkt_dma_addr);
3482 	if (!fence_pkt) {
3483 		dev_err(hdev->dev,
3484 			"Failed to allocate packet for H/W queue %d testing\n",
3485 			hw_queue_id);
3486 		rc = -ENOMEM;
3487 		goto free_fence_ptr;
3488 	}
3489 
3490 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3491 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3492 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3493 
3494 	fence_pkt->ctl = cpu_to_le32(tmp);
3495 	fence_pkt->value = cpu_to_le32(fence_val);
3496 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3497 
3498 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3499 					sizeof(struct packet_msg_prot),
3500 					pkt_dma_addr);
3501 	if (rc) {
3502 		dev_err(hdev->dev,
3503 			"Failed to send fence packet to H/W queue %d\n",
3504 			hw_queue_id);
3505 		goto free_pkt;
3506 	}
3507 
3508 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3509 					1000, timeout_usec, true);
3510 
3511 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3512 
3513 	if (rc == -ETIMEDOUT) {
3514 		dev_err(hdev->dev,
3515 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3516 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3517 		rc = -EIO;
3518 	}
3519 
3520 free_pkt:
3521 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3522 					pkt_dma_addr);
3523 free_fence_ptr:
3524 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3525 					fence_dma_addr);
3526 	return rc;
3527 }
3528 
gaudi_test_cpu_queue(struct hl_device *hdev)3529 static int gaudi_test_cpu_queue(struct hl_device *hdev)
3530 {
3531 	struct gaudi_device *gaudi = hdev->asic_specific;
3532 
3533 	/*
3534 	 * check capability here as send_cpu_message() won't update the result
3535 	 * value if no capability
3536 	 */
3537 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3538 		return 0;
3539 
3540 	return hl_fw_test_cpu_queue(hdev);
3541 }
3542 
gaudi_test_queues(struct hl_device *hdev)3543 static int gaudi_test_queues(struct hl_device *hdev)
3544 {
3545 	int i, rc, ret_val = 0;
3546 
3547 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3548 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3549 			rc = gaudi_test_queue(hdev, i);
3550 			if (rc)
3551 				ret_val = -EINVAL;
3552 		}
3553 	}
3554 
3555 	rc = gaudi_test_cpu_queue(hdev);
3556 	if (rc)
3557 		ret_val = -EINVAL;
3558 
3559 	return ret_val;
3560 }
3561 
gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags, dma_addr_t *dma_handle)3562 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3563 		gfp_t mem_flags, dma_addr_t *dma_handle)
3564 {
3565 	void *kernel_addr;
3566 
3567 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
3568 		return NULL;
3569 
3570 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3571 
3572 	/* Shift to the device's base physical address of host memory */
3573 	if (kernel_addr)
3574 		*dma_handle += HOST_PHYS_BASE;
3575 
3576 	return kernel_addr;
3577 }
3578 
gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)3579 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3580 			dma_addr_t dma_addr)
3581 {
3582 	/* Cancel the device's base physical address of host memory */
3583 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3584 
3585 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3586 }
3587 
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle)3588 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3589 					size_t size, dma_addr_t *dma_handle)
3590 {
3591 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3592 }
3593 
gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)3594 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3595 						size_t size, void *vaddr)
3596 {
3597 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3598 }
3599 
gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, int nents, enum dma_data_direction dir)3600 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3601 			int nents, enum dma_data_direction dir)
3602 {
3603 	struct scatterlist *sg;
3604 	int i;
3605 
3606 	if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3607 		return -ENOMEM;
3608 
3609 	/* Shift to the device's base physical address of host memory */
3610 	for_each_sg(sgl, sg, nents, i)
3611 		sg->dma_address += HOST_PHYS_BASE;
3612 
3613 	return 0;
3614 }
3615 
gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl, int nents, enum dma_data_direction dir)3616 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3617 			int nents, enum dma_data_direction dir)
3618 {
3619 	struct scatterlist *sg;
3620 	int i;
3621 
3622 	/* Cancel the device's base physical address of host memory */
3623 	for_each_sg(sgl, sg, nents, i)
3624 		sg->dma_address -= HOST_PHYS_BASE;
3625 
3626 	dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3627 }
3628 
gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)3629 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3630 					struct sg_table *sgt)
3631 {
3632 	struct scatterlist *sg, *sg_next_iter;
3633 	u32 count, dma_desc_cnt;
3634 	u64 len, len_next;
3635 	dma_addr_t addr, addr_next;
3636 
3637 	dma_desc_cnt = 0;
3638 
3639 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3640 
3641 		len = sg_dma_len(sg);
3642 		addr = sg_dma_address(sg);
3643 
3644 		if (len == 0)
3645 			break;
3646 
3647 		while ((count + 1) < sgt->nents) {
3648 			sg_next_iter = sg_next(sg);
3649 			len_next = sg_dma_len(sg_next_iter);
3650 			addr_next = sg_dma_address(sg_next_iter);
3651 
3652 			if (len_next == 0)
3653 				break;
3654 
3655 			if ((addr + len == addr_next) &&
3656 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3657 				len += len_next;
3658 				count++;
3659 				sg = sg_next_iter;
3660 			} else {
3661 				break;
3662 			}
3663 		}
3664 
3665 		dma_desc_cnt++;
3666 	}
3667 
3668 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
3669 }
3670 
gaudi_pin_memory_before_cs(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_lin_dma *user_dma_pkt, u64 addr, enum dma_data_direction dir)3671 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3672 				struct hl_cs_parser *parser,
3673 				struct packet_lin_dma *user_dma_pkt,
3674 				u64 addr, enum dma_data_direction dir)
3675 {
3676 	struct hl_userptr *userptr;
3677 	int rc;
3678 
3679 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3680 			parser->job_userptr_list, &userptr))
3681 		goto already_pinned;
3682 
3683 	userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3684 	if (!userptr)
3685 		return -ENOMEM;
3686 
3687 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3688 				userptr);
3689 	if (rc)
3690 		goto free_userptr;
3691 
3692 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
3693 
3694 	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3695 					userptr->sgt->nents, dir);
3696 	if (rc) {
3697 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3698 		goto unpin_memory;
3699 	}
3700 
3701 	userptr->dma_mapped = true;
3702 	userptr->dir = dir;
3703 
3704 already_pinned:
3705 	parser->patched_cb_size +=
3706 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3707 
3708 	return 0;
3709 
3710 unpin_memory:
3711 	list_del(&userptr->job_node);
3712 	hl_unpin_host_memory(hdev, userptr);
3713 free_userptr:
3714 	kfree(userptr);
3715 	return rc;
3716 }
3717 
gaudi_validate_dma_pkt_host(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_lin_dma *user_dma_pkt, bool src_in_host)3718 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3719 				struct hl_cs_parser *parser,
3720 				struct packet_lin_dma *user_dma_pkt,
3721 				bool src_in_host)
3722 {
3723 	enum dma_data_direction dir;
3724 	bool skip_host_mem_pin = false, user_memset;
3725 	u64 addr;
3726 	int rc = 0;
3727 
3728 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3729 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3730 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3731 
3732 	if (src_in_host) {
3733 		if (user_memset)
3734 			skip_host_mem_pin = true;
3735 
3736 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3737 		dir = DMA_TO_DEVICE;
3738 		addr = le64_to_cpu(user_dma_pkt->src_addr);
3739 	} else {
3740 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3741 		dir = DMA_FROM_DEVICE;
3742 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3743 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3744 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3745 	}
3746 
3747 	if (skip_host_mem_pin)
3748 		parser->patched_cb_size += sizeof(*user_dma_pkt);
3749 	else
3750 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3751 						addr, dir);
3752 
3753 	return rc;
3754 }
3755 
gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_lin_dma *user_dma_pkt)3756 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3757 				struct hl_cs_parser *parser,
3758 				struct packet_lin_dma *user_dma_pkt)
3759 {
3760 	bool src_in_host = false;
3761 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3762 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3763 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3764 
3765 	dev_dbg(hdev->dev, "DMA packet details:\n");
3766 	dev_dbg(hdev->dev, "source == 0x%llx\n",
3767 				le64_to_cpu(user_dma_pkt->src_addr));
3768 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3769 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3770 
3771 	/*
3772 	 * Special handling for DMA with size 0. Bypass all validations
3773 	 * because no transactions will be done except for WR_COMP, which
3774 	 * is not a security issue
3775 	 */
3776 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
3777 		parser->patched_cb_size += sizeof(*user_dma_pkt);
3778 		return 0;
3779 	}
3780 
3781 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3782 		src_in_host = true;
3783 
3784 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3785 						src_in_host);
3786 }
3787 
gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_load_and_exe *user_pkt)3788 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3789 					struct hl_cs_parser *parser,
3790 					struct packet_load_and_exe *user_pkt)
3791 {
3792 	u32 cfg;
3793 
3794 	cfg = le32_to_cpu(user_pkt->cfg);
3795 
3796 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3797 		dev_err(hdev->dev,
3798 			"User not allowed to use Load and Execute\n");
3799 		return -EPERM;
3800 	}
3801 
3802 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3803 
3804 	return 0;
3805 }
3806 
gaudi_validate_cb(struct hl_device *hdev, struct hl_cs_parser *parser, bool is_mmu)3807 static int gaudi_validate_cb(struct hl_device *hdev,
3808 			struct hl_cs_parser *parser, bool is_mmu)
3809 {
3810 	u32 cb_parsed_length = 0;
3811 	int rc = 0;
3812 
3813 	parser->patched_cb_size = 0;
3814 
3815 	/* cb_user_size is more than 0 so loop will always be executed */
3816 	while (cb_parsed_length < parser->user_cb_size) {
3817 		enum packet_id pkt_id;
3818 		u16 pkt_size;
3819 		struct gaudi_packet *user_pkt;
3820 
3821 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3822 
3823 		pkt_id = (enum packet_id) (
3824 				(le64_to_cpu(user_pkt->header) &
3825 				PACKET_HEADER_PACKET_ID_MASK) >>
3826 					PACKET_HEADER_PACKET_ID_SHIFT);
3827 
3828 		if (!validate_packet_id(pkt_id)) {
3829 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3830 			rc = -EINVAL;
3831 			break;
3832 		}
3833 
3834 		pkt_size = gaudi_packet_sizes[pkt_id];
3835 		cb_parsed_length += pkt_size;
3836 		if (cb_parsed_length > parser->user_cb_size) {
3837 			dev_err(hdev->dev,
3838 				"packet 0x%x is out of CB boundary\n", pkt_id);
3839 			rc = -EINVAL;
3840 			break;
3841 		}
3842 
3843 		switch (pkt_id) {
3844 		case PACKET_MSG_PROT:
3845 			dev_err(hdev->dev,
3846 				"User not allowed to use MSG_PROT\n");
3847 			rc = -EPERM;
3848 			break;
3849 
3850 		case PACKET_CP_DMA:
3851 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3852 			rc = -EPERM;
3853 			break;
3854 
3855 		case PACKET_STOP:
3856 			dev_err(hdev->dev, "User not allowed to use STOP\n");
3857 			rc = -EPERM;
3858 			break;
3859 
3860 		case PACKET_WREG_BULK:
3861 			dev_err(hdev->dev,
3862 				"User not allowed to use WREG_BULK\n");
3863 			rc = -EPERM;
3864 			break;
3865 
3866 		case PACKET_LOAD_AND_EXE:
3867 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3868 				(struct packet_load_and_exe *) user_pkt);
3869 			break;
3870 
3871 		case PACKET_LIN_DMA:
3872 			parser->contains_dma_pkt = true;
3873 			if (is_mmu)
3874 				parser->patched_cb_size += pkt_size;
3875 			else
3876 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3877 					(struct packet_lin_dma *) user_pkt);
3878 			break;
3879 
3880 		case PACKET_WREG_32:
3881 		case PACKET_MSG_LONG:
3882 		case PACKET_MSG_SHORT:
3883 		case PACKET_REPEAT:
3884 		case PACKET_FENCE:
3885 		case PACKET_NOP:
3886 		case PACKET_ARB_POINT:
3887 			parser->patched_cb_size += pkt_size;
3888 			break;
3889 
3890 		default:
3891 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3892 				pkt_id);
3893 			rc = -EINVAL;
3894 			break;
3895 		}
3896 
3897 		if (rc)
3898 			break;
3899 	}
3900 
3901 	/*
3902 	 * The new CB should have space at the end for two MSG_PROT packets:
3903 	 * 1. A packet that will act as a completion packet
3904 	 * 2. A packet that will generate MSI-X interrupt
3905 	 */
3906 	parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3907 
3908 	return rc;
3909 }
3910 
gaudi_patch_dma_packet(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_lin_dma *user_dma_pkt, struct packet_lin_dma *new_dma_pkt, u32 *new_dma_pkt_size)3911 static int gaudi_patch_dma_packet(struct hl_device *hdev,
3912 				struct hl_cs_parser *parser,
3913 				struct packet_lin_dma *user_dma_pkt,
3914 				struct packet_lin_dma *new_dma_pkt,
3915 				u32 *new_dma_pkt_size)
3916 {
3917 	struct hl_userptr *userptr;
3918 	struct scatterlist *sg, *sg_next_iter;
3919 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3920 	u64 len, len_next;
3921 	dma_addr_t dma_addr, dma_addr_next;
3922 	u64 device_memory_addr, addr;
3923 	enum dma_data_direction dir;
3924 	struct sg_table *sgt;
3925 	bool src_in_host = false;
3926 	bool skip_host_mem_pin = false;
3927 	bool user_memset;
3928 
3929 	ctl = le32_to_cpu(user_dma_pkt->ctl);
3930 
3931 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3932 		src_in_host = true;
3933 
3934 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3935 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3936 
3937 	if (src_in_host) {
3938 		addr = le64_to_cpu(user_dma_pkt->src_addr);
3939 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3940 		dir = DMA_TO_DEVICE;
3941 		if (user_memset)
3942 			skip_host_mem_pin = true;
3943 	} else {
3944 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
3945 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3946 		dir = DMA_FROM_DEVICE;
3947 	}
3948 
3949 	if ((!skip_host_mem_pin) &&
3950 		(!hl_userptr_is_pinned(hdev, addr,
3951 					le32_to_cpu(user_dma_pkt->tsize),
3952 					parser->job_userptr_list, &userptr))) {
3953 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3954 				addr, user_dma_pkt->tsize);
3955 		return -EFAULT;
3956 	}
3957 
3958 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3959 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3960 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
3961 		return 0;
3962 	}
3963 
3964 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3965 
3966 	sgt = userptr->sgt;
3967 	dma_desc_cnt = 0;
3968 
3969 	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3970 		len = sg_dma_len(sg);
3971 		dma_addr = sg_dma_address(sg);
3972 
3973 		if (len == 0)
3974 			break;
3975 
3976 		while ((count + 1) < sgt->nents) {
3977 			sg_next_iter = sg_next(sg);
3978 			len_next = sg_dma_len(sg_next_iter);
3979 			dma_addr_next = sg_dma_address(sg_next_iter);
3980 
3981 			if (len_next == 0)
3982 				break;
3983 
3984 			if ((dma_addr + len == dma_addr_next) &&
3985 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3986 				len += len_next;
3987 				count++;
3988 				sg = sg_next_iter;
3989 			} else {
3990 				break;
3991 			}
3992 		}
3993 
3994 		ctl = le32_to_cpu(user_dma_pkt->ctl);
3995 		if (likely(dma_desc_cnt))
3996 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3997 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3998 		new_dma_pkt->ctl = cpu_to_le32(ctl);
3999 		new_dma_pkt->tsize = cpu_to_le32(len);
4000 
4001 		if (dir == DMA_TO_DEVICE) {
4002 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
4003 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
4004 		} else {
4005 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
4006 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
4007 		}
4008 
4009 		if (!user_memset)
4010 			device_memory_addr += len;
4011 		dma_desc_cnt++;
4012 		new_dma_pkt++;
4013 	}
4014 
4015 	if (!dma_desc_cnt) {
4016 		dev_err(hdev->dev,
4017 			"Error of 0 SG entries when patching DMA packet\n");
4018 		return -EFAULT;
4019 	}
4020 
4021 	/* Fix the last dma packet - wrcomp must be as user set it */
4022 	new_dma_pkt--;
4023 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4024 
4025 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4026 
4027 	return 0;
4028 }
4029 
gaudi_patch_cb(struct hl_device *hdev, struct hl_cs_parser *parser)4030 static int gaudi_patch_cb(struct hl_device *hdev,
4031 				struct hl_cs_parser *parser)
4032 {
4033 	u32 cb_parsed_length = 0;
4034 	u32 cb_patched_cur_length = 0;
4035 	int rc = 0;
4036 
4037 	/* cb_user_size is more than 0 so loop will always be executed */
4038 	while (cb_parsed_length < parser->user_cb_size) {
4039 		enum packet_id pkt_id;
4040 		u16 pkt_size;
4041 		u32 new_pkt_size = 0;
4042 		struct gaudi_packet *user_pkt, *kernel_pkt;
4043 
4044 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4045 		kernel_pkt = parser->patched_cb->kernel_address +
4046 					cb_patched_cur_length;
4047 
4048 		pkt_id = (enum packet_id) (
4049 				(le64_to_cpu(user_pkt->header) &
4050 				PACKET_HEADER_PACKET_ID_MASK) >>
4051 					PACKET_HEADER_PACKET_ID_SHIFT);
4052 
4053 		if (!validate_packet_id(pkt_id)) {
4054 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4055 			rc = -EINVAL;
4056 			break;
4057 		}
4058 
4059 		pkt_size = gaudi_packet_sizes[pkt_id];
4060 		cb_parsed_length += pkt_size;
4061 		if (cb_parsed_length > parser->user_cb_size) {
4062 			dev_err(hdev->dev,
4063 				"packet 0x%x is out of CB boundary\n", pkt_id);
4064 			rc = -EINVAL;
4065 			break;
4066 		}
4067 
4068 		switch (pkt_id) {
4069 		case PACKET_LIN_DMA:
4070 			rc = gaudi_patch_dma_packet(hdev, parser,
4071 					(struct packet_lin_dma *) user_pkt,
4072 					(struct packet_lin_dma *) kernel_pkt,
4073 					&new_pkt_size);
4074 			cb_patched_cur_length += new_pkt_size;
4075 			break;
4076 
4077 		case PACKET_MSG_PROT:
4078 			dev_err(hdev->dev,
4079 				"User not allowed to use MSG_PROT\n");
4080 			rc = -EPERM;
4081 			break;
4082 
4083 		case PACKET_CP_DMA:
4084 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4085 			rc = -EPERM;
4086 			break;
4087 
4088 		case PACKET_STOP:
4089 			dev_err(hdev->dev, "User not allowed to use STOP\n");
4090 			rc = -EPERM;
4091 			break;
4092 
4093 		case PACKET_WREG_32:
4094 		case PACKET_WREG_BULK:
4095 		case PACKET_MSG_LONG:
4096 		case PACKET_MSG_SHORT:
4097 		case PACKET_REPEAT:
4098 		case PACKET_FENCE:
4099 		case PACKET_NOP:
4100 		case PACKET_ARB_POINT:
4101 		case PACKET_LOAD_AND_EXE:
4102 			memcpy(kernel_pkt, user_pkt, pkt_size);
4103 			cb_patched_cur_length += pkt_size;
4104 			break;
4105 
4106 		default:
4107 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4108 				pkt_id);
4109 			rc = -EINVAL;
4110 			break;
4111 		}
4112 
4113 		if (rc)
4114 			break;
4115 	}
4116 
4117 	return rc;
4118 }
4119 
gaudi_parse_cb_mmu(struct hl_device *hdev, struct hl_cs_parser *parser)4120 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4121 		struct hl_cs_parser *parser)
4122 {
4123 	u64 patched_cb_handle;
4124 	u32 patched_cb_size;
4125 	struct hl_cb *user_cb;
4126 	int rc;
4127 
4128 	/*
4129 	 * The new CB should have space at the end for two MSG_PROT pkt:
4130 	 * 1. A packet that will act as a completion packet
4131 	 * 2. A packet that will generate MSI interrupt
4132 	 */
4133 	parser->patched_cb_size = parser->user_cb_size +
4134 			sizeof(struct packet_msg_prot) * 2;
4135 
4136 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4137 				parser->patched_cb_size, false, false,
4138 				&patched_cb_handle);
4139 
4140 	if (rc) {
4141 		dev_err(hdev->dev,
4142 			"Failed to allocate patched CB for DMA CS %d\n",
4143 			rc);
4144 		return rc;
4145 	}
4146 
4147 	patched_cb_handle >>= PAGE_SHIFT;
4148 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4149 				(u32) patched_cb_handle);
4150 	/* hl_cb_get should never fail here so use kernel WARN */
4151 	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4152 			(u32) patched_cb_handle);
4153 	if (!parser->patched_cb) {
4154 		rc = -EFAULT;
4155 		goto out;
4156 	}
4157 
4158 	/*
4159 	 * The check that parser->user_cb_size <= parser->user_cb->size was done
4160 	 * in validate_queue_index().
4161 	 */
4162 	memcpy(parser->patched_cb->kernel_address,
4163 		parser->user_cb->kernel_address,
4164 		parser->user_cb_size);
4165 
4166 	patched_cb_size = parser->patched_cb_size;
4167 
4168 	/* Validate patched CB instead of user CB */
4169 	user_cb = parser->user_cb;
4170 	parser->user_cb = parser->patched_cb;
4171 	rc = gaudi_validate_cb(hdev, parser, true);
4172 	parser->user_cb = user_cb;
4173 
4174 	if (rc) {
4175 		hl_cb_put(parser->patched_cb);
4176 		goto out;
4177 	}
4178 
4179 	if (patched_cb_size != parser->patched_cb_size) {
4180 		dev_err(hdev->dev, "user CB size mismatch\n");
4181 		hl_cb_put(parser->patched_cb);
4182 		rc = -EINVAL;
4183 		goto out;
4184 	}
4185 
4186 out:
4187 	/*
4188 	 * Always call cb destroy here because we still have 1 reference
4189 	 * to it by calling cb_get earlier. After the job will be completed,
4190 	 * cb_put will release it, but here we want to remove it from the
4191 	 * idr
4192 	 */
4193 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4194 					patched_cb_handle << PAGE_SHIFT);
4195 
4196 	return rc;
4197 }
4198 
gaudi_parse_cb_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser)4199 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4200 		struct hl_cs_parser *parser)
4201 {
4202 	u64 patched_cb_handle;
4203 	int rc;
4204 
4205 	rc = gaudi_validate_cb(hdev, parser, false);
4206 
4207 	if (rc)
4208 		goto free_userptr;
4209 
4210 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4211 				parser->patched_cb_size, false, false,
4212 				&patched_cb_handle);
4213 	if (rc) {
4214 		dev_err(hdev->dev,
4215 			"Failed to allocate patched CB for DMA CS %d\n", rc);
4216 		goto free_userptr;
4217 	}
4218 
4219 	patched_cb_handle >>= PAGE_SHIFT;
4220 	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4221 				(u32) patched_cb_handle);
4222 	/* hl_cb_get should never fail here so use kernel WARN */
4223 	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4224 			(u32) patched_cb_handle);
4225 	if (!parser->patched_cb) {
4226 		rc = -EFAULT;
4227 		goto out;
4228 	}
4229 
4230 	rc = gaudi_patch_cb(hdev, parser);
4231 
4232 	if (rc)
4233 		hl_cb_put(parser->patched_cb);
4234 
4235 out:
4236 	/*
4237 	 * Always call cb destroy here because we still have 1 reference
4238 	 * to it by calling cb_get earlier. After the job will be completed,
4239 	 * cb_put will release it, but here we want to remove it from the
4240 	 * idr
4241 	 */
4242 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4243 				patched_cb_handle << PAGE_SHIFT);
4244 
4245 free_userptr:
4246 	if (rc)
4247 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
4248 	return rc;
4249 }
4250 
gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, struct hl_cs_parser *parser)4251 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4252 					struct hl_cs_parser *parser)
4253 {
4254 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4255 
4256 	/* For internal queue jobs just check if CB address is valid */
4257 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4258 					parser->user_cb_size,
4259 					asic_prop->sram_user_base_address,
4260 					asic_prop->sram_end_address))
4261 		return 0;
4262 
4263 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4264 					parser->user_cb_size,
4265 					asic_prop->dram_user_base_address,
4266 					asic_prop->dram_end_address))
4267 		return 0;
4268 
4269 	/* PMMU and HPMMU addresses are equal, check only one of them */
4270 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4271 					parser->user_cb_size,
4272 					asic_prop->pmmu.start_addr,
4273 					asic_prop->pmmu.end_addr))
4274 		return 0;
4275 
4276 	dev_err(hdev->dev,
4277 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4278 		parser->user_cb, parser->user_cb_size);
4279 
4280 	return -EFAULT;
4281 }
4282 
gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)4283 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4284 {
4285 	struct gaudi_device *gaudi = hdev->asic_specific;
4286 
4287 	if (parser->queue_type == QUEUE_TYPE_INT)
4288 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
4289 
4290 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4291 		return gaudi_parse_cb_mmu(hdev, parser);
4292 	else
4293 		return gaudi_parse_cb_no_mmu(hdev, parser);
4294 }
4295 
gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msi_vec, bool eb)4296 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4297 					void *kernel_address, u32 len,
4298 					u64 cq_addr, u32 cq_val, u32 msi_vec,
4299 					bool eb)
4300 {
4301 	struct gaudi_device *gaudi = hdev->asic_specific;
4302 	struct packet_msg_prot *cq_pkt;
4303 	u32 tmp;
4304 
4305 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4306 
4307 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4308 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4309 
4310 	if (eb)
4311 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4312 
4313 	cq_pkt->ctl = cpu_to_le32(tmp);
4314 	cq_pkt->value = cpu_to_le32(cq_val);
4315 	cq_pkt->addr = cpu_to_le64(cq_addr);
4316 
4317 	cq_pkt++;
4318 
4319 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4320 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4321 	cq_pkt->ctl = cpu_to_le32(tmp);
4322 	cq_pkt->value = cpu_to_le32(1);
4323 
4324 	if (!gaudi->multi_msi_mode)
4325 		msi_vec = 0;
4326 
4327 	cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4328 }
4329 
gaudi_update_eq_ci(struct hl_device *hdev, u32 val)4330 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4331 {
4332 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4333 }
4334 
gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size, u64 val)4335 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4336 					u32 size, u64 val)
4337 {
4338 	struct packet_lin_dma *lin_dma_pkt;
4339 	struct hl_cs_job *job;
4340 	u32 cb_size, ctl, err_cause;
4341 	struct hl_cb *cb;
4342 	int rc;
4343 
4344 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4345 	if (!cb)
4346 		return -EFAULT;
4347 
4348 	lin_dma_pkt = cb->kernel_address;
4349 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4350 	cb_size = sizeof(*lin_dma_pkt);
4351 
4352 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4353 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4354 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4355 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4356 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4357 
4358 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
4359 	lin_dma_pkt->src_addr = cpu_to_le64(val);
4360 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4361 	lin_dma_pkt->tsize = cpu_to_le32(size);
4362 
4363 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4364 	if (!job) {
4365 		dev_err(hdev->dev, "Failed to allocate a new job\n");
4366 		rc = -ENOMEM;
4367 		goto release_cb;
4368 	}
4369 
4370 	/* Verify DMA is OK */
4371 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4372 	if (err_cause && !hdev->init_done) {
4373 		dev_dbg(hdev->dev,
4374 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
4375 			err_cause);
4376 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4377 	}
4378 
4379 	job->id = 0;
4380 	job->user_cb = cb;
4381 	job->user_cb->cs_cnt++;
4382 	job->user_cb_size = cb_size;
4383 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4384 	job->patched_cb = job->user_cb;
4385 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4386 
4387 	hl_debugfs_add_job(hdev, job);
4388 
4389 	rc = gaudi_send_job_on_qman0(hdev, job);
4390 	hl_debugfs_remove_job(hdev, job);
4391 	kfree(job);
4392 	cb->cs_cnt--;
4393 
4394 	/* Verify DMA is OK */
4395 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4396 	if (err_cause) {
4397 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4398 		rc = -EIO;
4399 		if (!hdev->init_done) {
4400 			dev_dbg(hdev->dev,
4401 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
4402 				err_cause);
4403 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4404 		}
4405 	}
4406 
4407 release_cb:
4408 	hl_cb_put(cb);
4409 	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4410 
4411 	return rc;
4412 }
4413 
gaudi_restore_sm_registers(struct hl_device *hdev)4414 static void gaudi_restore_sm_registers(struct hl_device *hdev)
4415 {
4416 	int i;
4417 
4418 	for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4419 		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4420 		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4421 		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4422 	}
4423 
4424 	for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4425 		WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4426 		WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4427 		WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4428 	}
4429 
4430 	i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4431 
4432 	for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4433 		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4434 
4435 	i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4436 
4437 	for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4438 		WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4439 }
4440 
gaudi_restore_dma_registers(struct hl_device *hdev)4441 static void gaudi_restore_dma_registers(struct hl_device *hdev)
4442 {
4443 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4444 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4445 	int i;
4446 
4447 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4448 		u64 sob_addr = CFG_BASE +
4449 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4450 				(i * sob_delta);
4451 		u32 dma_offset = i * DMA_CORE_OFFSET;
4452 
4453 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4454 				lower_32_bits(sob_addr));
4455 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4456 				upper_32_bits(sob_addr));
4457 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4458 
4459 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4460 		 * modified by the user for SRAM reduction
4461 		 */
4462 		if (i > 1)
4463 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4464 								0x00000001);
4465 	}
4466 }
4467 
gaudi_restore_qm_registers(struct hl_device *hdev)4468 static void gaudi_restore_qm_registers(struct hl_device *hdev)
4469 {
4470 	u32 qman_offset;
4471 	int i;
4472 
4473 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4474 		qman_offset = i * DMA_QMAN_OFFSET;
4475 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4476 	}
4477 
4478 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4479 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4480 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4481 	}
4482 
4483 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4484 		qman_offset = i * TPC_QMAN_OFFSET;
4485 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4486 	}
4487 }
4488 
gaudi_restore_user_registers(struct hl_device *hdev)4489 static void gaudi_restore_user_registers(struct hl_device *hdev)
4490 {
4491 	gaudi_restore_sm_registers(hdev);
4492 	gaudi_restore_dma_registers(hdev);
4493 	gaudi_restore_qm_registers(hdev);
4494 }
4495 
gaudi_context_switch(struct hl_device *hdev, u32 asid)4496 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4497 {
4498 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4499 	u64 addr = prop->sram_user_base_address;
4500 	u32 size = hdev->pldm ? 0x10000 :
4501 			(prop->sram_size - SRAM_USER_BASE_OFFSET);
4502 	u64 val = 0x7777777777777777ull;
4503 	int rc;
4504 
4505 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4506 	if (rc) {
4507 		dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4508 		return rc;
4509 	}
4510 
4511 	gaudi_mmu_prepare(hdev, asid);
4512 
4513 	gaudi_restore_user_registers(hdev);
4514 
4515 	return 0;
4516 }
4517 
gaudi_mmu_clear_pgt_range(struct hl_device *hdev)4518 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4519 {
4520 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4521 	struct gaudi_device *gaudi = hdev->asic_specific;
4522 	u64 addr = prop->mmu_pgt_addr;
4523 	u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4524 
4525 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4526 		return 0;
4527 
4528 	return gaudi_memset_device_memory(hdev, addr, size, 0);
4529 }
4530 
gaudi_restore_phase_topology(struct hl_device *hdev)4531 static void gaudi_restore_phase_topology(struct hl_device *hdev)
4532 {
4533 
4534 }
4535 
gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)4536 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4537 {
4538 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4539 	struct gaudi_device *gaudi = hdev->asic_specific;
4540 	u64 hbm_bar_addr;
4541 	int rc = 0;
4542 
4543 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4544 
4545 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4546 				(hdev->clock_gating_mask &
4547 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4548 
4549 			dev_err_ratelimited(hdev->dev,
4550 				"Can't read register - clock gating is enabled!\n");
4551 			rc = -EFAULT;
4552 		} else {
4553 			*val = RREG32(addr - CFG_BASE);
4554 		}
4555 
4556 	} else if ((addr >= SRAM_BASE_ADDR) &&
4557 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4558 		*val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4559 				(addr - SRAM_BASE_ADDR));
4560 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4561 		u64 bar_base_addr = DRAM_PHYS_BASE +
4562 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4563 
4564 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4565 		if (hbm_bar_addr != U64_MAX) {
4566 			*val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4567 						(addr - bar_base_addr));
4568 
4569 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4570 						hbm_bar_addr);
4571 		}
4572 		if (hbm_bar_addr == U64_MAX)
4573 			rc = -EIO;
4574 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4575 		*val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4576 	} else {
4577 		rc = -EFAULT;
4578 	}
4579 
4580 	return rc;
4581 }
4582 
gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)4583 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4584 {
4585 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4586 	struct gaudi_device *gaudi = hdev->asic_specific;
4587 	u64 hbm_bar_addr;
4588 	int rc = 0;
4589 
4590 	if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4591 
4592 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4593 				(hdev->clock_gating_mask &
4594 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4595 
4596 			dev_err_ratelimited(hdev->dev,
4597 				"Can't write register - clock gating is enabled!\n");
4598 			rc = -EFAULT;
4599 		} else {
4600 			WREG32(addr - CFG_BASE, val);
4601 		}
4602 
4603 	} else if ((addr >= SRAM_BASE_ADDR) &&
4604 			(addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4605 		writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4606 					(addr - SRAM_BASE_ADDR));
4607 	} else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4608 		u64 bar_base_addr = DRAM_PHYS_BASE +
4609 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4610 
4611 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4612 		if (hbm_bar_addr != U64_MAX) {
4613 			writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4614 						(addr - bar_base_addr));
4615 
4616 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4617 						hbm_bar_addr);
4618 		}
4619 		if (hbm_bar_addr == U64_MAX)
4620 			rc = -EIO;
4621 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4622 		*(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4623 	} else {
4624 		rc = -EFAULT;
4625 	}
4626 
4627 	return rc;
4628 }
4629 
gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)4630 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4631 {
4632 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4633 	struct gaudi_device *gaudi = hdev->asic_specific;
4634 	u64 hbm_bar_addr;
4635 	int rc = 0;
4636 
4637 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4638 
4639 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4640 				(hdev->clock_gating_mask &
4641 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4642 
4643 			dev_err_ratelimited(hdev->dev,
4644 				"Can't read register - clock gating is enabled!\n");
4645 			rc = -EFAULT;
4646 		} else {
4647 			u32 val_l = RREG32(addr - CFG_BASE);
4648 			u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4649 
4650 			*val = (((u64) val_h) << 32) | val_l;
4651 		}
4652 
4653 	} else if ((addr >= SRAM_BASE_ADDR) &&
4654 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4655 		*val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4656 				(addr - SRAM_BASE_ADDR));
4657 	} else if (addr <=
4658 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4659 		u64 bar_base_addr = DRAM_PHYS_BASE +
4660 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4661 
4662 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4663 		if (hbm_bar_addr != U64_MAX) {
4664 			*val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4665 						(addr - bar_base_addr));
4666 
4667 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4668 						hbm_bar_addr);
4669 		}
4670 		if (hbm_bar_addr == U64_MAX)
4671 			rc = -EIO;
4672 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4673 		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4674 	} else {
4675 		rc = -EFAULT;
4676 	}
4677 
4678 	return rc;
4679 }
4680 
gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)4681 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4682 {
4683 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4684 	struct gaudi_device *gaudi = hdev->asic_specific;
4685 	u64 hbm_bar_addr;
4686 	int rc = 0;
4687 
4688 	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4689 
4690 		if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4691 				(hdev->clock_gating_mask &
4692 						GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4693 
4694 			dev_err_ratelimited(hdev->dev,
4695 				"Can't write register - clock gating is enabled!\n");
4696 			rc = -EFAULT;
4697 		} else {
4698 			WREG32(addr - CFG_BASE, lower_32_bits(val));
4699 			WREG32(addr + sizeof(u32) - CFG_BASE,
4700 				upper_32_bits(val));
4701 		}
4702 
4703 	} else if ((addr >= SRAM_BASE_ADDR) &&
4704 		   (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4705 		writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4706 					(addr - SRAM_BASE_ADDR));
4707 	} else if (addr <=
4708 		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4709 		u64 bar_base_addr = DRAM_PHYS_BASE +
4710 				(addr & ~(prop->dram_pci_bar_size - 0x1ull));
4711 
4712 		hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4713 		if (hbm_bar_addr != U64_MAX) {
4714 			writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4715 						(addr - bar_base_addr));
4716 
4717 			hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4718 						hbm_bar_addr);
4719 		}
4720 		if (hbm_bar_addr == U64_MAX)
4721 			rc = -EIO;
4722 	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4723 		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4724 	} else {
4725 		rc = -EFAULT;
4726 	}
4727 
4728 	return rc;
4729 }
4730 
gaudi_read_pte(struct hl_device *hdev, u64 addr)4731 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4732 {
4733 	struct gaudi_device *gaudi = hdev->asic_specific;
4734 
4735 	if (hdev->hard_reset_pending)
4736 		return U64_MAX;
4737 
4738 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
4739 			(addr - gaudi->hbm_bar_cur_addr));
4740 }
4741 
gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)4742 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4743 {
4744 	struct gaudi_device *gaudi = hdev->asic_specific;
4745 
4746 	if (hdev->hard_reset_pending)
4747 		return;
4748 
4749 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4750 			(addr - gaudi->hbm_bar_cur_addr));
4751 }
4752 
gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)4753 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4754 {
4755 	/* mask to zero the MMBP and ASID bits */
4756 	WREG32_AND(reg, ~0x7FF);
4757 	WREG32_OR(reg, asid);
4758 }
4759 
gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)4760 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4761 {
4762 	struct gaudi_device *gaudi = hdev->asic_specific;
4763 
4764 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4765 		return;
4766 
4767 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4768 		WARN(1, "asid %u is too big\n", asid);
4769 		return;
4770 	}
4771 
4772 	mutex_lock(&gaudi->clk_gate_mutex);
4773 
4774 	hdev->asic_funcs->disable_clock_gating(hdev);
4775 
4776 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4777 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4778 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4779 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4780 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4781 
4782 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4783 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4784 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4785 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4786 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4787 
4788 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4789 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4790 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4791 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4792 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4793 
4794 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4795 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4796 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4797 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4798 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4799 
4800 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4801 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4802 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4803 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4804 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4805 
4806 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4807 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4808 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4809 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4810 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4811 
4812 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4813 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4814 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4815 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4816 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4817 
4818 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4819 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4820 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4821 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4822 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4823 
4824 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4825 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4826 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4827 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4828 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4829 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4830 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4831 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4832 
4833 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4834 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4835 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4836 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4837 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4838 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4839 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4840 
4841 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4842 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4843 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4844 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4845 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4846 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4847 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4848 
4849 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4850 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4851 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4852 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4853 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4854 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4855 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4856 
4857 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4858 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4859 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4860 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4861 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4862 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4863 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4864 
4865 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4866 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4867 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4868 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4869 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4870 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4871 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4872 
4873 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4874 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4875 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4876 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4877 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4878 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4879 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4880 
4881 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4882 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4883 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4884 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4885 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4886 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4887 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4888 
4889 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4890 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4891 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4892 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4893 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4894 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4895 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4896 
4897 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4898 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4899 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4900 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4901 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4902 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4903 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4904 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4905 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4906 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4907 
4908 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4909 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4910 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4911 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4912 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4913 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4914 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4915 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4916 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4917 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4918 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4919 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4920 
4921 	hdev->asic_funcs->set_clock_gating(hdev);
4922 
4923 	mutex_unlock(&gaudi->clk_gate_mutex);
4924 }
4925 
gaudi_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)4926 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4927 		struct hl_cs_job *job)
4928 {
4929 	struct packet_msg_prot *fence_pkt;
4930 	u32 *fence_ptr;
4931 	dma_addr_t fence_dma_addr;
4932 	struct hl_cb *cb;
4933 	u32 tmp, timeout, dma_offset;
4934 	int rc;
4935 
4936 	if (hdev->pldm)
4937 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4938 	else
4939 		timeout = HL_DEVICE_TIMEOUT_USEC;
4940 
4941 	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4942 		dev_err_ratelimited(hdev->dev,
4943 			"Can't send driver job on QMAN0 because the device is not idle\n");
4944 		return -EBUSY;
4945 	}
4946 
4947 	fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4948 							&fence_dma_addr);
4949 	if (!fence_ptr) {
4950 		dev_err(hdev->dev,
4951 			"Failed to allocate fence memory for QMAN0\n");
4952 		return -ENOMEM;
4953 	}
4954 
4955 	cb = job->patched_cb;
4956 
4957 	fence_pkt = cb->kernel_address +
4958 			job->job_cb_size - sizeof(struct packet_msg_prot);
4959 
4960 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4961 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4962 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4963 
4964 	fence_pkt->ctl = cpu_to_le32(tmp);
4965 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4966 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4967 
4968 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4969 
4970 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4971 
4972 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4973 					job->job_cb_size, cb->bus_address);
4974 	if (rc) {
4975 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4976 		goto free_fence_ptr;
4977 	}
4978 
4979 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4980 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4981 				timeout, true);
4982 
4983 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4984 
4985 	if (rc == -ETIMEDOUT) {
4986 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4987 		goto free_fence_ptr;
4988 	}
4989 
4990 free_fence_ptr:
4991 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4992 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4993 
4994 	hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4995 					fence_dma_addr);
4996 	return rc;
4997 }
4998 
gaudi_get_event_desc(u16 event_type, char *desc, size_t size)4999 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
5000 {
5001 	if (event_type >= GAUDI_EVENT_SIZE)
5002 		goto event_not_supported;
5003 
5004 	if (!gaudi_irq_map_table[event_type].valid)
5005 		goto event_not_supported;
5006 
5007 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
5008 
5009 	return;
5010 
5011 event_not_supported:
5012 	snprintf(desc, size, "N/A");
5013 }
5014 
gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, bool is_write)5015 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5016 							u32 x_y, bool is_write)
5017 {
5018 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5019 
5020 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5021 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5022 
5023 	switch (x_y) {
5024 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5025 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5026 		dma_id[0] = 0;
5027 		dma_id[1] = 2;
5028 		break;
5029 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5030 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5031 		dma_id[0] = 1;
5032 		dma_id[1] = 3;
5033 		break;
5034 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5035 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5036 		dma_id[0] = 4;
5037 		dma_id[1] = 6;
5038 		break;
5039 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5040 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5041 		dma_id[0] = 5;
5042 		dma_id[1] = 7;
5043 		break;
5044 	default:
5045 		goto unknown_initiator;
5046 	}
5047 
5048 	for (i = 0 ; i < 2 ; i++) {
5049 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5050 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5051 	}
5052 
5053 	switch (x_y) {
5054 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5055 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5056 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5057 			return "DMA0";
5058 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5059 			return "DMA2";
5060 		else
5061 			return "DMA0 or DMA2";
5062 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5063 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5064 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5065 			return "DMA1";
5066 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5067 			return "DMA3";
5068 		else
5069 			return "DMA1 or DMA3";
5070 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5071 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5072 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5073 			return "DMA4";
5074 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5075 			return "DMA6";
5076 		else
5077 			return "DMA4 or DMA6";
5078 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5079 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5080 		if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5081 			return "DMA5";
5082 		else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5083 			return "DMA7";
5084 		else
5085 			return "DMA5 or DMA7";
5086 	}
5087 
5088 unknown_initiator:
5089 	return "unknown initiator";
5090 }
5091 
gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write)5092 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5093 							bool is_write)
5094 {
5095 	u32 val, x_y, axi_id;
5096 
5097 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5098 				RREG32(mmMMU_UP_RAZWI_READ_ID);
5099 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5100 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5101 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5102 			RAZWI_INITIATOR_AXI_ID_SHIFT);
5103 
5104 	switch (x_y) {
5105 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5106 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5107 			return "TPC0";
5108 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5109 			return "NIC0";
5110 		break;
5111 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
5112 		return "TPC1";
5113 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5114 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5115 		return "MME0";
5116 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5117 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5118 		return "MME1";
5119 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
5120 		return "TPC2";
5121 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5122 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5123 			return "TPC3";
5124 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5125 			return "PCI";
5126 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5127 			return "CPU";
5128 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5129 			return "PSOC";
5130 		break;
5131 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5132 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5133 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5134 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5135 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5136 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5137 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5138 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5139 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5140 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5141 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5142 			return "TPC4";
5143 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5144 			return "NIC1";
5145 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5146 			return "NIC2";
5147 		break;
5148 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
5149 		return "TPC5";
5150 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5151 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5152 		return "MME2";
5153 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5154 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5155 		return "MME3";
5156 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
5157 		return "TPC6";
5158 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5159 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5160 			return "TPC7";
5161 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5162 			return "NIC4";
5163 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5164 			return "NIC5";
5165 		break;
5166 	default:
5167 		break;
5168 	}
5169 
5170 	dev_err(hdev->dev,
5171 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5172 		val,
5173 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5174 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5175 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5176 			RAZWI_INITIATOR_AXI_ID_MASK);
5177 
5178 	return "unknown initiator";
5179 }
5180 
gaudi_print_razwi_info(struct hl_device *hdev)5181 static void gaudi_print_razwi_info(struct hl_device *hdev)
5182 {
5183 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5184 		dev_err_ratelimited(hdev->dev,
5185 			"RAZWI event caused by illegal write of %s\n",
5186 			gaudi_get_razwi_initiator_name(hdev, true));
5187 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5188 	}
5189 
5190 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5191 		dev_err_ratelimited(hdev->dev,
5192 			"RAZWI event caused by illegal read of %s\n",
5193 			gaudi_get_razwi_initiator_name(hdev, false));
5194 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5195 	}
5196 }
5197 
gaudi_print_mmu_error_info(struct hl_device *hdev)5198 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5199 {
5200 	struct gaudi_device *gaudi = hdev->asic_specific;
5201 	u64 addr;
5202 	u32 val;
5203 
5204 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5205 		return;
5206 
5207 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5208 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5209 		addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5210 		addr <<= 32;
5211 		addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5212 
5213 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5214 					addr);
5215 
5216 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5217 	}
5218 
5219 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5220 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5221 		addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5222 		addr <<= 32;
5223 		addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5224 
5225 		dev_err_ratelimited(hdev->dev,
5226 				"MMU access error on va 0x%llx\n", addr);
5227 
5228 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5229 	}
5230 }
5231 
5232 /*
5233  *  +-------------------+------------------------------------------------------+
5234  *  | Configuration Reg |                     Description                      |
5235  *  |      Address      |                                                      |
5236  *  +-------------------+------------------------------------------------------+
5237  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
5238  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
5239  *  |                   |0xF34 memory wrappers 63:32                           |
5240  *  |                   |0xF38 memory wrappers 95:64                           |
5241  *  |                   |0xF3C memory wrappers 127:96                          |
5242  *  +-------------------+------------------------------------------------------+
5243  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
5244  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
5245  *  |                   |0xF44 memory wrappers 63:32                           |
5246  *  |                   |0xF48 memory wrappers 95:64                           |
5247  *  |                   |0xF4C memory wrappers 127:96                          |
5248  *  +-------------------+------------------------------------------------------+
5249  */
gaudi_extract_ecc_info(struct hl_device *hdev, struct ecc_info_extract_params *params, u64 *ecc_address, u64 *ecc_syndrom, u8 *memory_wrapper_idx)5250 static int gaudi_extract_ecc_info(struct hl_device *hdev,
5251 		struct ecc_info_extract_params *params, u64 *ecc_address,
5252 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5253 {
5254 	struct gaudi_device *gaudi = hdev->asic_specific;
5255 	u32 i, num_mem_regs, reg, err_bit;
5256 	u64 err_addr, err_word = 0;
5257 	int rc = 0;
5258 
5259 	num_mem_regs = params->num_memories / 32 +
5260 			((params->num_memories % 32) ? 1 : 0);
5261 
5262 	if (params->block_address >= CFG_BASE)
5263 		params->block_address -= CFG_BASE;
5264 
5265 	if (params->derr)
5266 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5267 	else
5268 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5269 
5270 	if (params->disable_clock_gating) {
5271 		mutex_lock(&gaudi->clk_gate_mutex);
5272 		hdev->asic_funcs->disable_clock_gating(hdev);
5273 	}
5274 
5275 	/* Set invalid wrapper index */
5276 	*memory_wrapper_idx = 0xFF;
5277 
5278 	/* Iterate through memory wrappers, a single bit must be set */
5279 	for (i = 0 ; i < num_mem_regs ; i++) {
5280 		err_addr += i * 4;
5281 		err_word = RREG32(err_addr);
5282 		if (err_word) {
5283 			err_bit = __ffs(err_word);
5284 			*memory_wrapper_idx = err_bit + (32 * i);
5285 			break;
5286 		}
5287 	}
5288 
5289 	if (*memory_wrapper_idx == 0xFF) {
5290 		dev_err(hdev->dev, "ECC error information cannot be found\n");
5291 		rc = -EINVAL;
5292 		goto enable_clk_gate;
5293 	}
5294 
5295 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5296 			*memory_wrapper_idx);
5297 
5298 	*ecc_address =
5299 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5300 	*ecc_syndrom =
5301 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5302 
5303 	/* Clear error indication */
5304 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5305 	if (params->derr)
5306 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5307 	else
5308 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5309 
5310 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5311 
5312 enable_clk_gate:
5313 	if (params->disable_clock_gating) {
5314 		hdev->asic_funcs->set_clock_gating(hdev);
5315 
5316 		mutex_unlock(&gaudi->clk_gate_mutex);
5317 	}
5318 
5319 	return rc;
5320 }
5321 
gaudi_handle_qman_err_generic(struct hl_device *hdev, const char *qm_name, u64 glbl_sts_addr, u64 arb_err_addr)5322 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5323 					  const char *qm_name,
5324 					  u64 glbl_sts_addr,
5325 					  u64 arb_err_addr)
5326 {
5327 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5328 	char reg_desc[32];
5329 
5330 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
5331 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5332 		glbl_sts_clr_val = 0;
5333 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5334 
5335 		if (!glbl_sts_val)
5336 			continue;
5337 
5338 		if (i == QMAN_STREAMS)
5339 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5340 		else
5341 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5342 
5343 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5344 			if (glbl_sts_val & BIT(j)) {
5345 				dev_err_ratelimited(hdev->dev,
5346 						"%s %s. err cause: %s\n",
5347 						qm_name, reg_desc,
5348 						gaudi_qman_error_cause[j]);
5349 				glbl_sts_clr_val |= BIT(j);
5350 			}
5351 		}
5352 
5353 		/* Write 1 clear errors */
5354 		WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5355 	}
5356 
5357 	arb_err_val = RREG32(arb_err_addr);
5358 
5359 	if (!arb_err_val)
5360 		return;
5361 
5362 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5363 		if (arb_err_val & BIT(j)) {
5364 			dev_err_ratelimited(hdev->dev,
5365 					"%s ARB_ERR. err cause: %s\n",
5366 					qm_name,
5367 					gaudi_qman_arb_error_cause[j]);
5368 		}
5369 	}
5370 }
5371 
gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, struct hl_eq_ecc_data *ecc_data)5372 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5373 		struct hl_eq_ecc_data *ecc_data)
5374 {
5375 	struct ecc_info_extract_params params;
5376 	u64 ecc_address = 0, ecc_syndrom = 0;
5377 	u8 index, memory_wrapper_idx = 0;
5378 	bool extract_info_from_fw;
5379 	int rc;
5380 
5381 	switch (event_type) {
5382 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5383 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5384 		extract_info_from_fw = true;
5385 		break;
5386 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5387 		index = event_type - GAUDI_EVENT_TPC0_SERR;
5388 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5389 		params.num_memories = 90;
5390 		params.derr = false;
5391 		params.disable_clock_gating = true;
5392 		extract_info_from_fw = false;
5393 		break;
5394 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5395 		index = event_type - GAUDI_EVENT_TPC0_DERR;
5396 		params.block_address =
5397 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5398 		params.num_memories = 90;
5399 		params.derr = true;
5400 		params.disable_clock_gating = true;
5401 		extract_info_from_fw = false;
5402 		break;
5403 	case GAUDI_EVENT_MME0_ACC_SERR:
5404 	case GAUDI_EVENT_MME1_ACC_SERR:
5405 	case GAUDI_EVENT_MME2_ACC_SERR:
5406 	case GAUDI_EVENT_MME3_ACC_SERR:
5407 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5408 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5409 		params.num_memories = 128;
5410 		params.derr = false;
5411 		params.disable_clock_gating = true;
5412 		extract_info_from_fw = false;
5413 		break;
5414 	case GAUDI_EVENT_MME0_ACC_DERR:
5415 	case GAUDI_EVENT_MME1_ACC_DERR:
5416 	case GAUDI_EVENT_MME2_ACC_DERR:
5417 	case GAUDI_EVENT_MME3_ACC_DERR:
5418 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5419 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5420 		params.num_memories = 128;
5421 		params.derr = true;
5422 		params.disable_clock_gating = true;
5423 		extract_info_from_fw = false;
5424 		break;
5425 	case GAUDI_EVENT_MME0_SBAB_SERR:
5426 	case GAUDI_EVENT_MME1_SBAB_SERR:
5427 	case GAUDI_EVENT_MME2_SBAB_SERR:
5428 	case GAUDI_EVENT_MME3_SBAB_SERR:
5429 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5430 		params.block_address =
5431 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5432 		params.num_memories = 33;
5433 		params.derr = false;
5434 		params.disable_clock_gating = true;
5435 		extract_info_from_fw = false;
5436 		break;
5437 	case GAUDI_EVENT_MME0_SBAB_DERR:
5438 	case GAUDI_EVENT_MME1_SBAB_DERR:
5439 	case GAUDI_EVENT_MME2_SBAB_DERR:
5440 	case GAUDI_EVENT_MME3_SBAB_DERR:
5441 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5442 		params.block_address =
5443 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5444 		params.num_memories = 33;
5445 		params.derr = true;
5446 		params.disable_clock_gating = true;
5447 		extract_info_from_fw = false;
5448 		break;
5449 	default:
5450 		return;
5451 	}
5452 
5453 	if (extract_info_from_fw) {
5454 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
5455 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5456 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5457 	} else {
5458 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5459 				&ecc_syndrom, &memory_wrapper_idx);
5460 		if (rc)
5461 			return;
5462 	}
5463 
5464 	dev_err(hdev->dev,
5465 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5466 		ecc_address, ecc_syndrom, memory_wrapper_idx);
5467 }
5468 
gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)5469 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5470 {
5471 	u64 glbl_sts_addr, arb_err_addr;
5472 	u8 index;
5473 	char desc[32];
5474 
5475 	switch (event_type) {
5476 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5477 		index = event_type - GAUDI_EVENT_TPC0_QM;
5478 		glbl_sts_addr =
5479 			mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5480 		arb_err_addr =
5481 			mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5482 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5483 		break;
5484 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5485 		index = event_type - GAUDI_EVENT_MME0_QM;
5486 		glbl_sts_addr =
5487 			mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5488 		arb_err_addr =
5489 			mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5490 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5491 		break;
5492 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5493 		index = event_type - GAUDI_EVENT_DMA0_QM;
5494 		glbl_sts_addr =
5495 			mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5496 		arb_err_addr =
5497 			mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5498 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5499 		break;
5500 	default:
5501 		return;
5502 	}
5503 
5504 	gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5505 }
5506 
gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, bool razwi)5507 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5508 					bool razwi)
5509 {
5510 	char desc[64] = "";
5511 
5512 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
5513 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5514 		event_type, desc);
5515 
5516 	if (razwi) {
5517 		gaudi_print_razwi_info(hdev);
5518 		gaudi_print_mmu_error_info(hdev);
5519 	}
5520 }
5521 
gaudi_soft_reset_late_init(struct hl_device *hdev)5522 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5523 {
5524 	struct gaudi_device *gaudi = hdev->asic_specific;
5525 
5526 	/* Unmask all IRQs since some could have been received
5527 	 * during the soft reset
5528 	 */
5529 	return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5530 }
5531 
gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)5532 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5533 {
5534 	int ch, err = 0;
5535 	u32 base, val, val2;
5536 
5537 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5538 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5539 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5540 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
5541 		if (val) {
5542 			err = 1;
5543 			dev_err(hdev->dev,
5544 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5545 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5546 				(val >> 2) & 0x1, (val >> 3) & 0x1,
5547 				(val >> 4) & 0x1);
5548 
5549 			val2 = RREG32(base + ch * 0x1000 + 0x060);
5550 			dev_err(hdev->dev,
5551 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5552 				device, ch * 2,
5553 				RREG32(base + ch * 0x1000 + 0x064),
5554 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5555 				(val2 & 0xFF0000) >> 16,
5556 				(val2 & 0xFF000000) >> 24);
5557 		}
5558 
5559 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5560 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
5561 		if (val) {
5562 			err = 1;
5563 			dev_err(hdev->dev,
5564 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5565 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5566 				(val >> 2) & 0x1, (val >> 3) & 0x1,
5567 				(val >> 4) & 0x1);
5568 
5569 			val2 = RREG32(base + ch * 0x1000 + 0x070);
5570 			dev_err(hdev->dev,
5571 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5572 				device, ch * 2 + 1,
5573 				RREG32(base + ch * 0x1000 + 0x074),
5574 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5575 				(val2 & 0xFF0000) >> 16,
5576 				(val2 & 0xFF000000) >> 24);
5577 		}
5578 
5579 		/* Clear interrupts */
5580 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5581 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5582 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5583 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5584 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5585 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5586 	}
5587 
5588 	val  = RREG32(base + 0x8F30);
5589 	val2 = RREG32(base + 0x8F34);
5590 	if (val | val2) {
5591 		err = 1;
5592 		dev_err(hdev->dev,
5593 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5594 			device, val, val2);
5595 	}
5596 	val  = RREG32(base + 0x8F40);
5597 	val2 = RREG32(base + 0x8F44);
5598 	if (val | val2) {
5599 		err = 1;
5600 		dev_err(hdev->dev,
5601 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5602 			device, val, val2);
5603 	}
5604 
5605 	return err;
5606 }
5607 
gaudi_hbm_event_to_dev(u16 hbm_event_type)5608 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5609 {
5610 	switch (hbm_event_type) {
5611 	case GAUDI_EVENT_HBM0_SPI_0:
5612 	case GAUDI_EVENT_HBM0_SPI_1:
5613 		return 0;
5614 	case GAUDI_EVENT_HBM1_SPI_0:
5615 	case GAUDI_EVENT_HBM1_SPI_1:
5616 		return 1;
5617 	case GAUDI_EVENT_HBM2_SPI_0:
5618 	case GAUDI_EVENT_HBM2_SPI_1:
5619 		return 2;
5620 	case GAUDI_EVENT_HBM3_SPI_0:
5621 	case GAUDI_EVENT_HBM3_SPI_1:
5622 		return 3;
5623 	default:
5624 		break;
5625 	}
5626 
5627 	/* Should never happen */
5628 	return 0;
5629 }
5630 
gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, char *interrupt_name)5631 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5632 					char *interrupt_name)
5633 {
5634 	struct gaudi_device *gaudi = hdev->asic_specific;
5635 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5636 	bool soft_reset_required = false;
5637 
5638 	/* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5639 	 * gating, and thus cannot be done in CPU-CP and should be done instead
5640 	 * by the driver.
5641 	 */
5642 
5643 	mutex_lock(&gaudi->clk_gate_mutex);
5644 
5645 	hdev->asic_funcs->disable_clock_gating(hdev);
5646 
5647 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5648 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5649 
5650 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5651 		if (tpc_interrupts_cause & BIT(i)) {
5652 			dev_err_ratelimited(hdev->dev,
5653 					"TPC%d_%s interrupt cause: %s\n",
5654 					tpc_id, interrupt_name,
5655 					gaudi_tpc_interrupts_cause[i]);
5656 			/* If this is QM error, we need to soft-reset */
5657 			if (i == 15)
5658 				soft_reset_required = true;
5659 		}
5660 
5661 	/* Clear interrupts */
5662 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5663 
5664 	hdev->asic_funcs->set_clock_gating(hdev);
5665 
5666 	mutex_unlock(&gaudi->clk_gate_mutex);
5667 
5668 	return soft_reset_required;
5669 }
5670 
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)5671 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5672 {
5673 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5674 }
5675 
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)5676 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5677 {
5678 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5679 }
5680 
gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)5681 static void gaudi_print_clk_change_info(struct hl_device *hdev,
5682 					u16 event_type)
5683 {
5684 	switch (event_type) {
5685 	case GAUDI_EVENT_FIX_POWER_ENV_S:
5686 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5687 		dev_info_ratelimited(hdev->dev,
5688 			"Clock throttling due to power consumption\n");
5689 		break;
5690 
5691 	case GAUDI_EVENT_FIX_POWER_ENV_E:
5692 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5693 		dev_info_ratelimited(hdev->dev,
5694 			"Power envelop is safe, back to optimal clock\n");
5695 		break;
5696 
5697 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5698 		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5699 		dev_info_ratelimited(hdev->dev,
5700 			"Clock throttling due to overheating\n");
5701 		break;
5702 
5703 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5704 		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5705 		dev_info_ratelimited(hdev->dev,
5706 			"Thermal envelop is safe, back to optimal clock\n");
5707 		break;
5708 
5709 	default:
5710 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
5711 			event_type);
5712 		break;
5713 	}
5714 }
5715 
gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)5716 static void gaudi_handle_eqe(struct hl_device *hdev,
5717 				struct hl_eq_entry *eq_entry)
5718 {
5719 	struct gaudi_device *gaudi = hdev->asic_specific;
5720 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5721 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5722 			>> EQ_CTL_EVENT_TYPE_SHIFT);
5723 	u8 cause;
5724 	bool reset_required;
5725 
5726 	if (event_type >= GAUDI_EVENT_SIZE) {
5727 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
5728 				event_type, GAUDI_EVENT_SIZE - 1);
5729 		return;
5730 	}
5731 
5732 	gaudi->events_stat[event_type]++;
5733 	gaudi->events_stat_aggregate[event_type]++;
5734 
5735 	switch (event_type) {
5736 	case GAUDI_EVENT_PCIE_CORE_DERR:
5737 	case GAUDI_EVENT_PCIE_IF_DERR:
5738 	case GAUDI_EVENT_PCIE_PHY_DERR:
5739 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5740 	case GAUDI_EVENT_MME0_ACC_DERR:
5741 	case GAUDI_EVENT_MME0_SBAB_DERR:
5742 	case GAUDI_EVENT_MME1_ACC_DERR:
5743 	case GAUDI_EVENT_MME1_SBAB_DERR:
5744 	case GAUDI_EVENT_MME2_ACC_DERR:
5745 	case GAUDI_EVENT_MME2_SBAB_DERR:
5746 	case GAUDI_EVENT_MME3_ACC_DERR:
5747 	case GAUDI_EVENT_MME3_SBAB_DERR:
5748 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5749 		fallthrough;
5750 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
5751 	case GAUDI_EVENT_PSOC_MEM_DERR:
5752 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5753 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5754 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5755 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5756 	case GAUDI_EVENT_MMU_DERR:
5757 		gaudi_print_irq_info(hdev, event_type, true);
5758 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5759 		if (hdev->hard_reset_on_fw_events)
5760 			hl_device_reset(hdev, true, false);
5761 		break;
5762 
5763 	case GAUDI_EVENT_GIC500:
5764 	case GAUDI_EVENT_AXI_ECC:
5765 	case GAUDI_EVENT_L2_RAM_ECC:
5766 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5767 		gaudi_print_irq_info(hdev, event_type, false);
5768 		if (hdev->hard_reset_on_fw_events)
5769 			hl_device_reset(hdev, true, false);
5770 		break;
5771 
5772 	case GAUDI_EVENT_HBM0_SPI_0:
5773 	case GAUDI_EVENT_HBM1_SPI_0:
5774 	case GAUDI_EVENT_HBM2_SPI_0:
5775 	case GAUDI_EVENT_HBM3_SPI_0:
5776 		gaudi_print_irq_info(hdev, event_type, false);
5777 		gaudi_hbm_read_interrupts(hdev,
5778 					  gaudi_hbm_event_to_dev(event_type));
5779 		if (hdev->hard_reset_on_fw_events)
5780 			hl_device_reset(hdev, true, false);
5781 		break;
5782 
5783 	case GAUDI_EVENT_HBM0_SPI_1:
5784 	case GAUDI_EVENT_HBM1_SPI_1:
5785 	case GAUDI_EVENT_HBM2_SPI_1:
5786 	case GAUDI_EVENT_HBM3_SPI_1:
5787 		gaudi_print_irq_info(hdev, event_type, false);
5788 		gaudi_hbm_read_interrupts(hdev,
5789 					  gaudi_hbm_event_to_dev(event_type));
5790 		break;
5791 
5792 	case GAUDI_EVENT_TPC0_DEC:
5793 	case GAUDI_EVENT_TPC1_DEC:
5794 	case GAUDI_EVENT_TPC2_DEC:
5795 	case GAUDI_EVENT_TPC3_DEC:
5796 	case GAUDI_EVENT_TPC4_DEC:
5797 	case GAUDI_EVENT_TPC5_DEC:
5798 	case GAUDI_EVENT_TPC6_DEC:
5799 	case GAUDI_EVENT_TPC7_DEC:
5800 		gaudi_print_irq_info(hdev, event_type, true);
5801 		reset_required = gaudi_tpc_read_interrupts(hdev,
5802 					tpc_dec_event_to_tpc_id(event_type),
5803 					"AXI_SLV_DEC_Error");
5804 		if (reset_required) {
5805 			dev_err(hdev->dev, "hard reset required due to %s\n",
5806 				gaudi_irq_map_table[event_type].name);
5807 
5808 			if (hdev->hard_reset_on_fw_events)
5809 				hl_device_reset(hdev, true, false);
5810 		} else {
5811 			hl_fw_unmask_irq(hdev, event_type);
5812 		}
5813 		break;
5814 
5815 	case GAUDI_EVENT_TPC0_KRN_ERR:
5816 	case GAUDI_EVENT_TPC1_KRN_ERR:
5817 	case GAUDI_EVENT_TPC2_KRN_ERR:
5818 	case GAUDI_EVENT_TPC3_KRN_ERR:
5819 	case GAUDI_EVENT_TPC4_KRN_ERR:
5820 	case GAUDI_EVENT_TPC5_KRN_ERR:
5821 	case GAUDI_EVENT_TPC6_KRN_ERR:
5822 	case GAUDI_EVENT_TPC7_KRN_ERR:
5823 		gaudi_print_irq_info(hdev, event_type, true);
5824 		reset_required = gaudi_tpc_read_interrupts(hdev,
5825 					tpc_krn_event_to_tpc_id(event_type),
5826 					"KRN_ERR");
5827 		if (reset_required) {
5828 			dev_err(hdev->dev, "hard reset required due to %s\n",
5829 				gaudi_irq_map_table[event_type].name);
5830 
5831 			if (hdev->hard_reset_on_fw_events)
5832 				hl_device_reset(hdev, true, false);
5833 		} else {
5834 			hl_fw_unmask_irq(hdev, event_type);
5835 		}
5836 		break;
5837 
5838 	case GAUDI_EVENT_PCIE_CORE_SERR:
5839 	case GAUDI_EVENT_PCIE_IF_SERR:
5840 	case GAUDI_EVENT_PCIE_PHY_SERR:
5841 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5842 	case GAUDI_EVENT_MME0_ACC_SERR:
5843 	case GAUDI_EVENT_MME0_SBAB_SERR:
5844 	case GAUDI_EVENT_MME1_ACC_SERR:
5845 	case GAUDI_EVENT_MME1_SBAB_SERR:
5846 	case GAUDI_EVENT_MME2_ACC_SERR:
5847 	case GAUDI_EVENT_MME2_SBAB_SERR:
5848 	case GAUDI_EVENT_MME3_ACC_SERR:
5849 	case GAUDI_EVENT_MME3_SBAB_SERR:
5850 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5851 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
5852 	case GAUDI_EVENT_PSOC_MEM_SERR:
5853 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5854 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5855 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5856 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5857 		fallthrough;
5858 	case GAUDI_EVENT_MMU_SERR:
5859 		gaudi_print_irq_info(hdev, event_type, true);
5860 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5861 		hl_fw_unmask_irq(hdev, event_type);
5862 		break;
5863 
5864 	case GAUDI_EVENT_PCIE_DEC:
5865 	case GAUDI_EVENT_MME0_WBC_RSP:
5866 	case GAUDI_EVENT_MME0_SBAB0_RSP:
5867 	case GAUDI_EVENT_MME1_WBC_RSP:
5868 	case GAUDI_EVENT_MME1_SBAB0_RSP:
5869 	case GAUDI_EVENT_MME2_WBC_RSP:
5870 	case GAUDI_EVENT_MME2_SBAB0_RSP:
5871 	case GAUDI_EVENT_MME3_WBC_RSP:
5872 	case GAUDI_EVENT_MME3_SBAB0_RSP:
5873 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
5874 	case GAUDI_EVENT_PSOC_AXI_DEC:
5875 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
5876 	case GAUDI_EVENT_MMU_PAGE_FAULT:
5877 	case GAUDI_EVENT_MMU_WR_PERM:
5878 	case GAUDI_EVENT_RAZWI_OR_ADC:
5879 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5880 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5881 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5882 		fallthrough;
5883 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5884 		gaudi_print_irq_info(hdev, event_type, true);
5885 		gaudi_handle_qman_err(hdev, event_type);
5886 		hl_fw_unmask_irq(hdev, event_type);
5887 		break;
5888 
5889 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5890 		gaudi_print_irq_info(hdev, event_type, true);
5891 		if (hdev->hard_reset_on_fw_events)
5892 			hl_device_reset(hdev, true, false);
5893 		break;
5894 
5895 	case GAUDI_EVENT_TPC0_BMON_SPMU:
5896 	case GAUDI_EVENT_TPC1_BMON_SPMU:
5897 	case GAUDI_EVENT_TPC2_BMON_SPMU:
5898 	case GAUDI_EVENT_TPC3_BMON_SPMU:
5899 	case GAUDI_EVENT_TPC4_BMON_SPMU:
5900 	case GAUDI_EVENT_TPC5_BMON_SPMU:
5901 	case GAUDI_EVENT_TPC6_BMON_SPMU:
5902 	case GAUDI_EVENT_TPC7_BMON_SPMU:
5903 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5904 		gaudi_print_irq_info(hdev, event_type, false);
5905 		hl_fw_unmask_irq(hdev, event_type);
5906 		break;
5907 
5908 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5909 		gaudi_print_clk_change_info(hdev, event_type);
5910 		hl_fw_unmask_irq(hdev, event_type);
5911 		break;
5912 
5913 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
5914 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5915 		dev_err(hdev->dev,
5916 			"Received high temp H/W interrupt %d (cause %d)\n",
5917 			event_type, cause);
5918 		break;
5919 
5920 	default:
5921 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5922 				event_type);
5923 		break;
5924 	}
5925 }
5926 
gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)5927 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5928 					u32 *size)
5929 {
5930 	struct gaudi_device *gaudi = hdev->asic_specific;
5931 
5932 	if (aggregate) {
5933 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
5934 		return gaudi->events_stat_aggregate;
5935 	}
5936 
5937 	*size = (u32) sizeof(gaudi->events_stat);
5938 	return gaudi->events_stat;
5939 }
5940 
gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)5941 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5942 					u32 flags)
5943 {
5944 	struct gaudi_device *gaudi = hdev->asic_specific;
5945 	u32 status, timeout_usec;
5946 	int rc;
5947 
5948 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5949 		hdev->hard_reset_pending)
5950 		return 0;
5951 
5952 	if (hdev->pldm)
5953 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5954 	else
5955 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5956 
5957 	mutex_lock(&hdev->mmu_cache_lock);
5958 
5959 	/* L0 & L1 invalidation */
5960 	WREG32(mmSTLB_INV_PS, 3);
5961 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5962 	WREG32(mmSTLB_INV_PS, 2);
5963 
5964 	rc = hl_poll_timeout(
5965 		hdev,
5966 		mmSTLB_INV_PS,
5967 		status,
5968 		!status,
5969 		1000,
5970 		timeout_usec);
5971 
5972 	WREG32(mmSTLB_INV_SET, 0);
5973 
5974 	mutex_unlock(&hdev->mmu_cache_lock);
5975 
5976 	if (rc) {
5977 		dev_err_ratelimited(hdev->dev,
5978 					"MMU cache invalidation timeout\n");
5979 		hl_device_reset(hdev, true, false);
5980 	}
5981 
5982 	return rc;
5983 }
5984 
gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size)5985 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5986 				bool is_hard, u32 asid, u64 va, u64 size)
5987 {
5988 	struct gaudi_device *gaudi = hdev->asic_specific;
5989 	u32 status, timeout_usec;
5990 	u32 inv_data;
5991 	u32 pi;
5992 	int rc;
5993 
5994 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5995 		hdev->hard_reset_pending)
5996 		return 0;
5997 
5998 	mutex_lock(&hdev->mmu_cache_lock);
5999 
6000 	if (hdev->pldm)
6001 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6002 	else
6003 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6004 
6005 	/*
6006 	 * TODO: currently invalidate entire L0 & L1 as in regular hard
6007 	 * invalidation. Need to apply invalidation of specific cache
6008 	 * lines with mask of ASID & VA & size.
6009 	 * Note that L1 with be flushed entirely in any case.
6010 	 */
6011 
6012 	/* L0 & L1 invalidation */
6013 	inv_data = RREG32(mmSTLB_CACHE_INV);
6014 	/* PI is 8 bit */
6015 	pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6016 	WREG32(mmSTLB_CACHE_INV,
6017 		(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6018 
6019 	rc = hl_poll_timeout(
6020 		hdev,
6021 		mmSTLB_INV_CONSUMER_INDEX,
6022 		status,
6023 		status == pi,
6024 		1000,
6025 		timeout_usec);
6026 
6027 	mutex_unlock(&hdev->mmu_cache_lock);
6028 
6029 	if (rc) {
6030 		dev_err_ratelimited(hdev->dev,
6031 					"MMU cache invalidation timeout\n");
6032 		hl_device_reset(hdev, true, false);
6033 	}
6034 
6035 	return rc;
6036 }
6037 
gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)6038 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6039 					u32 asid, u64 phys_addr)
6040 {
6041 	u32 status, timeout_usec;
6042 	int rc;
6043 
6044 	if (hdev->pldm)
6045 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6046 	else
6047 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6048 
6049 	WREG32(MMU_ASID, asid);
6050 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6051 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6052 	WREG32(MMU_BUSY, 0x80000000);
6053 
6054 	rc = hl_poll_timeout(
6055 		hdev,
6056 		MMU_BUSY,
6057 		status,
6058 		!(status & 0x80000000),
6059 		1000,
6060 		timeout_usec);
6061 
6062 	if (rc) {
6063 		dev_err(hdev->dev,
6064 			"Timeout during MMU hop0 config of asid %d\n", asid);
6065 		return rc;
6066 	}
6067 
6068 	return 0;
6069 }
6070 
gaudi_send_heartbeat(struct hl_device *hdev)6071 static int gaudi_send_heartbeat(struct hl_device *hdev)
6072 {
6073 	struct gaudi_device *gaudi = hdev->asic_specific;
6074 
6075 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6076 		return 0;
6077 
6078 	return hl_fw_send_heartbeat(hdev);
6079 }
6080 
gaudi_cpucp_info_get(struct hl_device *hdev)6081 static int gaudi_cpucp_info_get(struct hl_device *hdev)
6082 {
6083 	struct gaudi_device *gaudi = hdev->asic_specific;
6084 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6085 	int rc;
6086 
6087 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6088 		return 0;
6089 
6090 	rc = hl_fw_cpucp_info_get(hdev);
6091 	if (rc)
6092 		return rc;
6093 
6094 	if (!strlen(prop->cpucp_info.card_name))
6095 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6096 				CARD_NAME_MAX_LEN);
6097 
6098 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6099 
6100 	if (hdev->card_type == cpucp_card_type_pci)
6101 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6102 	else if (hdev->card_type == cpucp_card_type_pmc)
6103 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6104 
6105 	hdev->max_power = prop->max_power_default;
6106 
6107 	return 0;
6108 }
6109 
gaudi_is_device_idle(struct hl_device *hdev, u64 *mask, struct seq_file *s)6110 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6111 					struct seq_file *s)
6112 {
6113 	struct gaudi_device *gaudi = hdev->asic_specific;
6114 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6115 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6116 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6117 	bool is_idle = true, is_eng_idle, is_slave;
6118 	u64 offset;
6119 	int i, dma_id;
6120 
6121 	mutex_lock(&gaudi->clk_gate_mutex);
6122 
6123 	hdev->asic_funcs->disable_clock_gating(hdev);
6124 
6125 	if (s)
6126 		seq_puts(s,
6127 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6128 			"---  -------  ------------  ----------  -------------\n");
6129 
6130 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6131 		dma_id = gaudi_dma_assignment[i];
6132 		offset = dma_id * DMA_QMAN_OFFSET;
6133 
6134 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6135 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6136 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6137 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6138 				IS_DMA_IDLE(dma_core_sts0);
6139 		is_idle &= is_eng_idle;
6140 
6141 		if (mask)
6142 			*mask |= ((u64) !is_eng_idle) <<
6143 					(GAUDI_ENGINE_ID_DMA_0 + dma_id);
6144 		if (s)
6145 			seq_printf(s, fmt, dma_id,
6146 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6147 				qm_cgm_sts, dma_core_sts0);
6148 	}
6149 
6150 	if (s)
6151 		seq_puts(s,
6152 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
6153 			"---  -------  ------------  ----------  ----------\n");
6154 
6155 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6156 		offset = i * TPC_QMAN_OFFSET;
6157 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6158 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6159 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6160 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6161 				IS_TPC_IDLE(tpc_cfg_sts);
6162 		is_idle &= is_eng_idle;
6163 
6164 		if (mask)
6165 			*mask |= ((u64) !is_eng_idle) <<
6166 						(GAUDI_ENGINE_ID_TPC_0 + i);
6167 		if (s)
6168 			seq_printf(s, fmt, i,
6169 				is_eng_idle ? "Y" : "N",
6170 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6171 	}
6172 
6173 	if (s)
6174 		seq_puts(s,
6175 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
6176 			"---  -------  ------------  ----------  -----------\n");
6177 
6178 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6179 		offset = i * MME_QMAN_OFFSET;
6180 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6181 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6182 
6183 		/* MME 1 & 3 are slaves, no need to check their QMANs */
6184 		is_slave = i % 2;
6185 		if (!is_slave) {
6186 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6187 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6188 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6189 		}
6190 
6191 		is_idle &= is_eng_idle;
6192 
6193 		if (mask)
6194 			*mask |= ((u64) !is_eng_idle) <<
6195 						(GAUDI_ENGINE_ID_MME_0 + i);
6196 		if (s) {
6197 			if (!is_slave)
6198 				seq_printf(s, fmt, i,
6199 					is_eng_idle ? "Y" : "N",
6200 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6201 			else
6202 				seq_printf(s, mme_slave_fmt, i,
6203 					is_eng_idle ? "Y" : "N", "-",
6204 					"-", mme_arch_sts);
6205 		}
6206 	}
6207 
6208 	if (s)
6209 		seq_puts(s, "\n");
6210 
6211 	hdev->asic_funcs->set_clock_gating(hdev);
6212 
6213 	mutex_unlock(&gaudi->clk_gate_mutex);
6214 
6215 	return is_idle;
6216 }
6217 
6218 static void gaudi_hw_queues_lock(struct hl_device *hdev)
6219 	__acquires(&gaudi->hw_queues_lock)
6220 {
6221 	struct gaudi_device *gaudi = hdev->asic_specific;
6222 
6223 	spin_lock(&gaudi->hw_queues_lock);
6224 }
6225 
6226 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6227 	__releases(&gaudi->hw_queues_lock)
6228 {
6229 	struct gaudi_device *gaudi = hdev->asic_specific;
6230 
6231 	spin_unlock(&gaudi->hw_queues_lock);
6232 }
6233 
gaudi_get_pci_id(struct hl_device *hdev)6234 static u32 gaudi_get_pci_id(struct hl_device *hdev)
6235 {
6236 	return hdev->pdev->device;
6237 }
6238 
gaudi_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)6239 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6240 				size_t max_size)
6241 {
6242 	struct gaudi_device *gaudi = hdev->asic_specific;
6243 
6244 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6245 		return 0;
6246 
6247 	return hl_fw_get_eeprom_data(hdev, data, max_size);
6248 }
6249 
6250 /*
6251  * this function should be used only during initialization and/or after reset,
6252  * when there are no active users.
6253  */
gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)6254 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6255 				u32 tpc_id)
6256 {
6257 	struct gaudi_device *gaudi = hdev->asic_specific;
6258 	u64 kernel_timeout;
6259 	u32 status, offset;
6260 	int rc;
6261 
6262 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6263 
6264 	if (hdev->pldm)
6265 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6266 	else
6267 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6268 
6269 	mutex_lock(&gaudi->clk_gate_mutex);
6270 
6271 	hdev->asic_funcs->disable_clock_gating(hdev);
6272 
6273 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6274 			lower_32_bits(tpc_kernel));
6275 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6276 			upper_32_bits(tpc_kernel));
6277 
6278 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6279 			lower_32_bits(tpc_kernel));
6280 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6281 			upper_32_bits(tpc_kernel));
6282 	/* set a valid LUT pointer, content is of no significance */
6283 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6284 			lower_32_bits(tpc_kernel));
6285 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6286 			upper_32_bits(tpc_kernel));
6287 
6288 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6289 			lower_32_bits(CFG_BASE +
6290 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6291 
6292 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
6293 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6294 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6295 	/* wait a bit for the engine to start executing */
6296 	usleep_range(1000, 1500);
6297 
6298 	/* wait until engine has finished executing */
6299 	rc = hl_poll_timeout(
6300 		hdev,
6301 		mmTPC0_CFG_STATUS + offset,
6302 		status,
6303 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6304 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6305 		1000,
6306 		kernel_timeout);
6307 
6308 	if (rc) {
6309 		dev_err(hdev->dev,
6310 			"Timeout while waiting for TPC%d icache prefetch\n",
6311 			tpc_id);
6312 		hdev->asic_funcs->set_clock_gating(hdev);
6313 		mutex_unlock(&gaudi->clk_gate_mutex);
6314 		return -EIO;
6315 	}
6316 
6317 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6318 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6319 
6320 	/* wait a bit for the engine to start executing */
6321 	usleep_range(1000, 1500);
6322 
6323 	/* wait until engine has finished executing */
6324 	rc = hl_poll_timeout(
6325 		hdev,
6326 		mmTPC0_CFG_STATUS + offset,
6327 		status,
6328 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6329 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6330 		1000,
6331 		kernel_timeout);
6332 
6333 	if (rc) {
6334 		dev_err(hdev->dev,
6335 			"Timeout while waiting for TPC%d vector pipe\n",
6336 			tpc_id);
6337 		hdev->asic_funcs->set_clock_gating(hdev);
6338 		mutex_unlock(&gaudi->clk_gate_mutex);
6339 		return -EIO;
6340 	}
6341 
6342 	rc = hl_poll_timeout(
6343 		hdev,
6344 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6345 		status,
6346 		(status == 0),
6347 		1000,
6348 		kernel_timeout);
6349 
6350 	hdev->asic_funcs->set_clock_gating(hdev);
6351 	mutex_unlock(&gaudi->clk_gate_mutex);
6352 
6353 	if (rc) {
6354 		dev_err(hdev->dev,
6355 			"Timeout while waiting for TPC%d kernel to execute\n",
6356 			tpc_id);
6357 		return -EIO;
6358 	}
6359 
6360 	return 0;
6361 }
6362 
gaudi_get_hw_state(struct hl_device *hdev)6363 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6364 {
6365 	return RREG32(mmHW_STATE);
6366 }
6367 
gaudi_ctx_init(struct hl_ctx *ctx)6368 static int gaudi_ctx_init(struct hl_ctx *ctx)
6369 {
6370 	return 0;
6371 }
6372 
gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)6373 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6374 {
6375 	return gaudi_cq_assignment[cq_idx];
6376 }
6377 
gaudi_get_signal_cb_size(struct hl_device *hdev)6378 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6379 {
6380 	return sizeof(struct packet_msg_short) +
6381 			sizeof(struct packet_msg_prot) * 2;
6382 }
6383 
gaudi_get_wait_cb_size(struct hl_device *hdev)6384 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6385 {
6386 	return sizeof(struct packet_msg_short) * 4 +
6387 			sizeof(struct packet_fence) +
6388 			sizeof(struct packet_msg_prot) * 2;
6389 }
6390 
gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)6391 static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6392 {
6393 	struct hl_cb *cb = (struct hl_cb *) data;
6394 	struct packet_msg_short *pkt;
6395 	u32 value, ctl;
6396 
6397 	pkt = cb->kernel_address;
6398 	memset(pkt, 0, sizeof(*pkt));
6399 
6400 	/* Inc by 1, Mode ADD */
6401 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6402 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6403 
6404 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6405 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6406 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6407 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6408 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6409 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6410 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6411 
6412 	pkt->value = cpu_to_le32(value);
6413 	pkt->ctl = cpu_to_le32(ctl);
6414 }
6415 
gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)6416 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6417 					u16 addr)
6418 {
6419 	u32 ctl, pkt_size = sizeof(*pkt);
6420 
6421 	memset(pkt, 0, pkt_size);
6422 
6423 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6424 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
6425 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6426 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6427 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6428 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6429 
6430 	pkt->value = cpu_to_le32(value);
6431 	pkt->ctl = cpu_to_le32(ctl);
6432 
6433 	return pkt_size;
6434 }
6435 
gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id, u16 sob_val, u16 addr)6436 static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6437 					u16 sob_val, u16 addr)
6438 {
6439 	u32 ctl, value, pkt_size = sizeof(*pkt);
6440 	u8 mask = ~(1 << (sob_id & 0x7));
6441 
6442 	memset(pkt, 0, pkt_size);
6443 
6444 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6445 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6446 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6447 			0); /* GREATER OR EQUAL*/
6448 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6449 
6450 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6451 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6452 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6453 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6454 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6455 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6456 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6457 
6458 	pkt->value = cpu_to_le32(value);
6459 	pkt->ctl = cpu_to_le32(ctl);
6460 
6461 	return pkt_size;
6462 }
6463 
gaudi_add_fence_pkt(struct packet_fence *pkt)6464 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6465 {
6466 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
6467 
6468 	memset(pkt, 0, pkt_size);
6469 
6470 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6471 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6472 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6473 
6474 	ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6475 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6476 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6477 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6478 
6479 	pkt->cfg = cpu_to_le32(cfg);
6480 	pkt->ctl = cpu_to_le32(ctl);
6481 
6482 	return pkt_size;
6483 }
6484 
gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id, u16 sob_val, u16 mon_id, u32 q_idx)6485 static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6486 			u16 sob_val, u16 mon_id, u32 q_idx)
6487 {
6488 	struct hl_cb *cb = (struct hl_cb *) data;
6489 	void *buf = cb->kernel_address;
6490 	u64 monitor_base, fence_addr = 0;
6491 	u32 size = 0;
6492 	u16 msg_addr_offset;
6493 
6494 	switch (q_idx) {
6495 	case GAUDI_QUEUE_ID_DMA_0_0:
6496 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6497 		break;
6498 	case GAUDI_QUEUE_ID_DMA_0_1:
6499 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6500 		break;
6501 	case GAUDI_QUEUE_ID_DMA_0_2:
6502 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6503 		break;
6504 	case GAUDI_QUEUE_ID_DMA_0_3:
6505 		fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6506 		break;
6507 	case GAUDI_QUEUE_ID_DMA_1_0:
6508 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6509 		break;
6510 	case GAUDI_QUEUE_ID_DMA_1_1:
6511 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6512 		break;
6513 	case GAUDI_QUEUE_ID_DMA_1_2:
6514 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6515 		break;
6516 	case GAUDI_QUEUE_ID_DMA_1_3:
6517 		fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6518 		break;
6519 	case GAUDI_QUEUE_ID_DMA_5_0:
6520 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6521 		break;
6522 	case GAUDI_QUEUE_ID_DMA_5_1:
6523 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6524 		break;
6525 	case GAUDI_QUEUE_ID_DMA_5_2:
6526 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6527 		break;
6528 	case GAUDI_QUEUE_ID_DMA_5_3:
6529 		fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6530 		break;
6531 	default:
6532 		/* queue index should be valid here */
6533 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6534 				q_idx);
6535 		return;
6536 	}
6537 
6538 	fence_addr += CFG_BASE;
6539 
6540 	/*
6541 	 * monitor_base should be the content of the base0 address registers,
6542 	 * so it will be added to the msg short offsets
6543 	 */
6544 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6545 
6546 	/* First monitor config packet: low address of the sync */
6547 	msg_addr_offset =
6548 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6549 				monitor_base;
6550 
6551 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6552 					msg_addr_offset);
6553 
6554 	/* Second monitor config packet: high address of the sync */
6555 	msg_addr_offset =
6556 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6557 				monitor_base;
6558 
6559 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6560 					msg_addr_offset);
6561 
6562 	/*
6563 	 * Third monitor config packet: the payload, i.e. what to write when the
6564 	 * sync triggers
6565 	 */
6566 	msg_addr_offset =
6567 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6568 				monitor_base;
6569 
6570 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6571 
6572 	/* Fourth monitor config packet: bind the monitor to a sync object */
6573 	msg_addr_offset =
6574 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6575 				monitor_base;
6576 	size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6577 						msg_addr_offset);
6578 
6579 	/* Fence packet */
6580 	size += gaudi_add_fence_pkt(buf + size);
6581 }
6582 
gaudi_reset_sob(struct hl_device *hdev, void *data)6583 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6584 {
6585 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6586 
6587 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6588 		hw_sob->sob_id);
6589 
6590 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6591 		0);
6592 
6593 	kref_init(&hw_sob->kref);
6594 }
6595 
gaudi_set_dma_mask_from_fw(struct hl_device *hdev)6596 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6597 {
6598 	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6599 							HL_POWER9_HOST_MAGIC) {
6600 		hdev->power9_64bit_dma_enable = 1;
6601 		hdev->dma_mask = 64;
6602 	} else {
6603 		hdev->power9_64bit_dma_enable = 0;
6604 		hdev->dma_mask = 48;
6605 	}
6606 }
6607 
gaudi_get_device_time(struct hl_device *hdev)6608 static u64 gaudi_get_device_time(struct hl_device *hdev)
6609 {
6610 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6611 
6612 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6613 }
6614 
6615 static const struct hl_asic_funcs gaudi_funcs = {
6616 	.early_init = gaudi_early_init,
6617 	.early_fini = gaudi_early_fini,
6618 	.late_init = gaudi_late_init,
6619 	.late_fini = gaudi_late_fini,
6620 	.sw_init = gaudi_sw_init,
6621 	.sw_fini = gaudi_sw_fini,
6622 	.hw_init = gaudi_hw_init,
6623 	.hw_fini = gaudi_hw_fini,
6624 	.halt_engines = gaudi_halt_engines,
6625 	.suspend = gaudi_suspend,
6626 	.resume = gaudi_resume,
6627 	.cb_mmap = gaudi_cb_mmap,
6628 	.ring_doorbell = gaudi_ring_doorbell,
6629 	.pqe_write = gaudi_pqe_write,
6630 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6631 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
6632 	.get_int_queue_base = gaudi_get_int_queue_base,
6633 	.test_queues = gaudi_test_queues,
6634 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6635 	.asic_dma_pool_free = gaudi_dma_pool_free,
6636 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6637 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6638 	.hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6639 	.cs_parser = gaudi_cs_parser,
6640 	.asic_dma_map_sg = gaudi_dma_map_sg,
6641 	.get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6642 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6643 	.update_eq_ci = gaudi_update_eq_ci,
6644 	.context_switch = gaudi_context_switch,
6645 	.restore_phase_topology = gaudi_restore_phase_topology,
6646 	.debugfs_read32 = gaudi_debugfs_read32,
6647 	.debugfs_write32 = gaudi_debugfs_write32,
6648 	.debugfs_read64 = gaudi_debugfs_read64,
6649 	.debugfs_write64 = gaudi_debugfs_write64,
6650 	.add_device_attr = gaudi_add_device_attr,
6651 	.handle_eqe = gaudi_handle_eqe,
6652 	.set_pll_profile = gaudi_set_pll_profile,
6653 	.get_events_stat = gaudi_get_events_stat,
6654 	.read_pte = gaudi_read_pte,
6655 	.write_pte = gaudi_write_pte,
6656 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6657 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6658 	.send_heartbeat = gaudi_send_heartbeat,
6659 	.set_clock_gating = gaudi_set_clock_gating,
6660 	.disable_clock_gating = gaudi_disable_clock_gating,
6661 	.debug_coresight = gaudi_debug_coresight,
6662 	.is_device_idle = gaudi_is_device_idle,
6663 	.soft_reset_late_init = gaudi_soft_reset_late_init,
6664 	.hw_queues_lock = gaudi_hw_queues_lock,
6665 	.hw_queues_unlock = gaudi_hw_queues_unlock,
6666 	.get_pci_id = gaudi_get_pci_id,
6667 	.get_eeprom_data = gaudi_get_eeprom_data,
6668 	.send_cpu_message = gaudi_send_cpu_message,
6669 	.get_hw_state = gaudi_get_hw_state,
6670 	.pci_bars_map = gaudi_pci_bars_map,
6671 	.init_iatu = gaudi_init_iatu,
6672 	.rreg = hl_rreg,
6673 	.wreg = hl_wreg,
6674 	.halt_coresight = gaudi_halt_coresight,
6675 	.ctx_init = gaudi_ctx_init,
6676 	.get_clk_rate = gaudi_get_clk_rate,
6677 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6678 	.read_device_fw_version = gaudi_read_device_fw_version,
6679 	.load_firmware_to_device = gaudi_load_firmware_to_device,
6680 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6681 	.get_signal_cb_size = gaudi_get_signal_cb_size,
6682 	.get_wait_cb_size = gaudi_get_wait_cb_size,
6683 	.gen_signal_cb = gaudi_gen_signal_cb,
6684 	.gen_wait_cb = gaudi_gen_wait_cb,
6685 	.reset_sob = gaudi_reset_sob,
6686 	.set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6687 	.get_device_time = gaudi_get_device_time
6688 };
6689 
6690 /**
6691  * gaudi_set_asic_funcs - set GAUDI function pointers
6692  *
6693  * @hdev: pointer to hl_device structure
6694  *
6695  */
gaudi_set_asic_funcs(struct hl_device *hdev)6696 void gaudi_set_asic_funcs(struct hl_device *hdev)
6697 {
6698 	hdev->asic_funcs = &gaudi_funcs;
6699 }
6700