1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/genalloc.h>
21 #include <linux/io-64-nonatomic-lo-hi.h>
22 #include <linux/iommu.h>
23 #include <linux/seq_file.h>
24
25 /*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70 #define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
78 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
79 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
80 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
82 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
83
84 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86 #define GAUDI_MAX_STRING_LEN 20
87
88 #define GAUDI_CB_POOL_CB_CNT 512
89 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
99 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
100
101 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
102 BIT(GAUDI_ENGINE_ID_MME_0) |\
103 BIT(GAUDI_ENGINE_ID_MME_2) |\
104 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105
106 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110 "gaudi cpu eq"
111 };
112
113 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
114 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116 [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
122 };
123
124 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125 [0] = GAUDI_QUEUE_ID_DMA_0_0,
126 [1] = GAUDI_QUEUE_ID_DMA_0_1,
127 [2] = GAUDI_QUEUE_ID_DMA_0_2,
128 [3] = GAUDI_QUEUE_ID_DMA_0_3,
129 [4] = GAUDI_QUEUE_ID_DMA_1_0,
130 [5] = GAUDI_QUEUE_ID_DMA_1_1,
131 [6] = GAUDI_QUEUE_ID_DMA_1_2,
132 [7] = GAUDI_QUEUE_ID_DMA_1_3,
133 [8] = GAUDI_QUEUE_ID_DMA_5_0,
134 [9] = GAUDI_QUEUE_ID_DMA_5_1,
135 [10] = GAUDI_QUEUE_ID_DMA_5_2,
136 [11] = GAUDI_QUEUE_ID_DMA_5_3
137 };
138
139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
141 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
142 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
143 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
144 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
145 [PACKET_REPEAT] = sizeof(struct packet_repeat),
146 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
147 [PACKET_FENCE] = sizeof(struct packet_fence),
148 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
149 [PACKET_NOP] = sizeof(struct packet_nop),
150 [PACKET_STOP] = sizeof(struct packet_stop),
151 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
152 [PACKET_WAIT] = sizeof(struct packet_wait),
153 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
154 };
155
validate_packet_id(enum packet_id id)156 static inline bool validate_packet_id(enum packet_id id)
157 {
158 switch (id) {
159 case PACKET_WREG_32:
160 case PACKET_WREG_BULK:
161 case PACKET_MSG_LONG:
162 case PACKET_MSG_SHORT:
163 case PACKET_CP_DMA:
164 case PACKET_REPEAT:
165 case PACKET_MSG_PROT:
166 case PACKET_FENCE:
167 case PACKET_LIN_DMA:
168 case PACKET_NOP:
169 case PACKET_STOP:
170 case PACKET_ARB_POINT:
171 case PACKET_WAIT:
172 case PACKET_LOAD_AND_EXE:
173 return true;
174 default:
175 return false;
176 }
177 }
178
179 static const char * const
180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181 "tpc_address_exceed_slm",
182 "tpc_div_by_0",
183 "tpc_spu_mac_overflow",
184 "tpc_spu_addsub_overflow",
185 "tpc_spu_abs_overflow",
186 "tpc_spu_fp_dst_nan_inf",
187 "tpc_spu_fp_dst_denorm",
188 "tpc_vpu_mac_overflow",
189 "tpc_vpu_addsub_overflow",
190 "tpc_vpu_abs_overflow",
191 "tpc_vpu_fp_dst_nan_inf",
192 "tpc_vpu_fp_dst_denorm",
193 "tpc_assertions",
194 "tpc_illegal_instruction",
195 "tpc_pc_wrap_around",
196 "tpc_qm_sw_err",
197 "tpc_hbw_rresp_err",
198 "tpc_hbw_bresp_err",
199 "tpc_lbw_rresp_err",
200 "tpc_lbw_bresp_err"
201 };
202
203 static const char * const
204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205 "PQ AXI HBW error",
206 "CQ AXI HBW error",
207 "CP AXI HBW error",
208 "CP error due to undefined OPCODE",
209 "CP encountered STOP OPCODE",
210 "CP AXI LBW error",
211 "CP WRREG32 or WRBULK returned error",
212 "N/A",
213 "FENCE 0 inc over max value and clipped",
214 "FENCE 1 inc over max value and clipped",
215 "FENCE 2 inc over max value and clipped",
216 "FENCE 3 inc over max value and clipped",
217 "FENCE 0 dec under min value and clipped",
218 "FENCE 1 dec under min value and clipped",
219 "FENCE 2 dec under min value and clipped",
220 "FENCE 3 dec under min value and clipped"
221 };
222
223 static const char * const
224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225 "Choice push while full error",
226 "Choice Q watchdog error",
227 "MSG AXI LBW returned with error"
228 };
229
230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
316 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
317 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
318 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
319 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
320 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
321 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
322 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
323 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
324 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
325 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
326 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
327 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
328 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
329 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
330 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
331 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
332 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
333 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
334 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
335 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
336 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
337 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
338 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
339 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
340 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
341 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
342 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
343 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
344 };
345
346 struct ecc_info_extract_params {
347 u64 block_address;
348 u32 num_memories;
349 bool derr;
350 bool disable_clock_gating;
351 };
352
353 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354 u64 phys_addr);
355 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356 struct hl_cs_job *job);
357 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358 u32 size, u64 val);
359 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360 u32 tpc_id);
361 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362 static int gaudi_cpucp_info_get(struct hl_device *hdev);
363 static void gaudi_disable_clock_gating(struct hl_device *hdev);
364 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365
gaudi_get_fixed_properties(struct hl_device *hdev)366 static int gaudi_get_fixed_properties(struct hl_device *hdev)
367 {
368 struct asic_fixed_properties *prop = &hdev->asic_prop;
369 u32 num_sync_stream_queues = 0;
370 int i;
371
372 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373 prop->hw_queues_props = kcalloc(prop->max_queues,
374 sizeof(struct hw_queue_properties),
375 GFP_KERNEL);
376
377 if (!prop->hw_queues_props)
378 return -ENOMEM;
379
380 for (i = 0 ; i < prop->max_queues ; i++) {
381 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383 prop->hw_queues_props[i].driver_only = 0;
384 prop->hw_queues_props[i].requires_kernel_cb = 1;
385 prop->hw_queues_props[i].supports_sync_stream = 1;
386 num_sync_stream_queues++;
387 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389 prop->hw_queues_props[i].driver_only = 1;
390 prop->hw_queues_props[i].requires_kernel_cb = 0;
391 prop->hw_queues_props[i].supports_sync_stream = 0;
392 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394 prop->hw_queues_props[i].driver_only = 0;
395 prop->hw_queues_props[i].requires_kernel_cb = 0;
396 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398 prop->hw_queues_props[i].driver_only = 0;
399 prop->hw_queues_props[i].requires_kernel_cb = 0;
400 prop->hw_queues_props[i].supports_sync_stream = 0;
401 }
402 }
403
404 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
405 prop->sync_stream_first_sob = 0;
406 prop->sync_stream_first_mon = 0;
407 prop->dram_base_address = DRAM_PHYS_BASE;
408 prop->dram_size = GAUDI_HBM_SIZE_32GB;
409 prop->dram_end_address = prop->dram_base_address +
410 prop->dram_size;
411 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412
413 prop->sram_base_address = SRAM_BASE_ADDR;
414 prop->sram_size = SRAM_SIZE;
415 prop->sram_end_address = prop->sram_base_address +
416 prop->sram_size;
417 prop->sram_user_base_address = prop->sram_base_address +
418 SRAM_USER_BASE_OFFSET;
419
420 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421 if (hdev->pldm)
422 prop->mmu_pgt_size = 0x800000; /* 8MB */
423 else
424 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425 prop->mmu_pte_size = HL_PTE_SIZE;
426 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428 prop->dram_page_size = PAGE_SIZE_2MB;
429
430 prop->pmmu.hop0_shift = HOP0_SHIFT;
431 prop->pmmu.hop1_shift = HOP1_SHIFT;
432 prop->pmmu.hop2_shift = HOP2_SHIFT;
433 prop->pmmu.hop3_shift = HOP3_SHIFT;
434 prop->pmmu.hop4_shift = HOP4_SHIFT;
435 prop->pmmu.hop0_mask = HOP0_MASK;
436 prop->pmmu.hop1_mask = HOP1_MASK;
437 prop->pmmu.hop2_mask = HOP2_MASK;
438 prop->pmmu.hop3_mask = HOP3_MASK;
439 prop->pmmu.hop4_mask = HOP4_MASK;
440 prop->pmmu.start_addr = VA_HOST_SPACE_START;
441 prop->pmmu.end_addr =
442 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443 prop->pmmu.page_size = PAGE_SIZE_4KB;
444 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
445
446 /* PMMU and HPMMU are the same except of page size */
447 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449
450 /* shifts and masks are the same in PMMU and DMMU */
451 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453 prop->dmmu.end_addr = VA_HOST_SPACE_END;
454 prop->dmmu.page_size = PAGE_SIZE_2MB;
455
456 prop->cfg_size = CFG_SIZE;
457 prop->max_asid = MAX_ASID;
458 prop->num_of_events = GAUDI_EVENT_SIZE;
459 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460
461 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
462
463 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465
466 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468
469 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
470 CARD_NAME_MAX_LEN);
471
472 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473
474 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475 num_sync_stream_queues * HL_RSVD_SOBS;
476 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477 num_sync_stream_queues * HL_RSVD_MONS;
478
479 return 0;
480 }
481
gaudi_pci_bars_map(struct hl_device *hdev)482 static int gaudi_pci_bars_map(struct hl_device *hdev)
483 {
484 static const char * const name[] = {"SRAM", "CFG", "HBM"};
485 bool is_wc[3] = {false, false, true};
486 int rc;
487
488 rc = hl_pci_bars_map(hdev, name, is_wc);
489 if (rc)
490 return rc;
491
492 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493 (CFG_BASE - SPI_FLASH_BASE_ADDR);
494
495 return 0;
496 }
497
gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)498 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499 {
500 struct gaudi_device *gaudi = hdev->asic_specific;
501 struct hl_inbound_pci_region pci_region;
502 u64 old_addr = addr;
503 int rc;
504
505 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506 return old_addr;
507
508 /* Inbound Region 2 - Bar 4 - Point to HBM */
509 pci_region.mode = PCI_BAR_MATCH_MODE;
510 pci_region.bar = HBM_BAR_ID;
511 pci_region.addr = addr;
512 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
513 if (rc)
514 return U64_MAX;
515
516 if (gaudi) {
517 old_addr = gaudi->hbm_bar_cur_addr;
518 gaudi->hbm_bar_cur_addr = addr;
519 }
520
521 return old_addr;
522 }
523
gaudi_init_iatu(struct hl_device *hdev)524 static int gaudi_init_iatu(struct hl_device *hdev)
525 {
526 struct hl_inbound_pci_region inbound_region;
527 struct hl_outbound_pci_region outbound_region;
528 int rc;
529
530 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531 inbound_region.mode = PCI_BAR_MATCH_MODE;
532 inbound_region.bar = SRAM_BAR_ID;
533 inbound_region.addr = SRAM_BASE_ADDR;
534 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535 if (rc)
536 goto done;
537
538 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
539 inbound_region.mode = PCI_BAR_MATCH_MODE;
540 inbound_region.bar = CFG_BAR_ID;
541 inbound_region.addr = SPI_FLASH_BASE_ADDR;
542 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
543 if (rc)
544 goto done;
545
546 /* Inbound Region 2 - Bar 4 - Point to HBM */
547 inbound_region.mode = PCI_BAR_MATCH_MODE;
548 inbound_region.bar = HBM_BAR_ID;
549 inbound_region.addr = DRAM_PHYS_BASE;
550 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551 if (rc)
552 goto done;
553
554 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555
556 /* Outbound Region 0 - Point to Host */
557 outbound_region.addr = HOST_PHYS_BASE;
558 outbound_region.size = HOST_PHYS_SIZE;
559 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560
561 done:
562 return rc;
563 }
564
gaudi_early_init(struct hl_device *hdev)565 static int gaudi_early_init(struct hl_device *hdev)
566 {
567 struct asic_fixed_properties *prop = &hdev->asic_prop;
568 struct pci_dev *pdev = hdev->pdev;
569 int rc;
570
571 rc = gaudi_get_fixed_properties(hdev);
572 if (rc) {
573 dev_err(hdev->dev, "Failed to get fixed properties\n");
574 return rc;
575 }
576
577 /* Check BAR sizes */
578 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579 dev_err(hdev->dev,
580 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581 SRAM_BAR_ID,
582 (unsigned long long) pci_resource_len(pdev,
583 SRAM_BAR_ID),
584 SRAM_BAR_SIZE);
585 rc = -ENODEV;
586 goto free_queue_props;
587 }
588
589 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590 dev_err(hdev->dev,
591 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592 CFG_BAR_ID,
593 (unsigned long long) pci_resource_len(pdev,
594 CFG_BAR_ID),
595 CFG_BAR_SIZE);
596 rc = -ENODEV;
597 goto free_queue_props;
598 }
599
600 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601
602 rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
603 mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
604 if (rc)
605 goto free_queue_props;
606
607 /* GAUDI Firmware does not yet support security */
608 prop->fw_security_disabled = true;
609 dev_info(hdev->dev, "firmware-level security is disabled\n");
610
611 return 0;
612
613 free_queue_props:
614 kfree(hdev->asic_prop.hw_queues_props);
615 return rc;
616 }
617
gaudi_early_fini(struct hl_device *hdev)618 static int gaudi_early_fini(struct hl_device *hdev)
619 {
620 kfree(hdev->asic_prop.hw_queues_props);
621 hl_pci_fini(hdev);
622
623 return 0;
624 }
625
626 /**
627 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
628 *
629 * @hdev: pointer to hl_device structure
630 *
631 */
gaudi_fetch_psoc_frequency(struct hl_device *hdev)632 static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
633 {
634 struct asic_fixed_properties *prop = &hdev->asic_prop;
635 u32 trace_freq = 0;
636 u32 pll_clk = 0;
637 u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
638 u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
639 u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
640 u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
641 u32 od = RREG32(mmPSOC_CPU_PLL_OD);
642
643 if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
644 if (div_sel == DIV_SEL_REF_CLK)
645 trace_freq = PLL_REF_CLK;
646 else
647 trace_freq = PLL_REF_CLK / (div_fctr + 1);
648 } else if (div_sel == DIV_SEL_PLL_CLK ||
649 div_sel == DIV_SEL_DIVIDED_PLL) {
650 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
651 if (div_sel == DIV_SEL_PLL_CLK)
652 trace_freq = pll_clk;
653 else
654 trace_freq = pll_clk / (div_fctr + 1);
655 } else {
656 dev_warn(hdev->dev,
657 "Received invalid div select value: %d", div_sel);
658 }
659
660 prop->psoc_timestamp_frequency = trace_freq;
661 prop->psoc_pci_pll_nr = nr;
662 prop->psoc_pci_pll_nf = nf;
663 prop->psoc_pci_pll_od = od;
664 prop->psoc_pci_pll_div_factor = div_fctr;
665 }
666
_gaudi_init_tpc_mem(struct hl_device *hdev, dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)667 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
668 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
669 {
670 struct asic_fixed_properties *prop = &hdev->asic_prop;
671 struct packet_lin_dma *init_tpc_mem_pkt;
672 struct hl_cs_job *job;
673 struct hl_cb *cb;
674 u64 dst_addr;
675 u32 cb_size, ctl;
676 u8 tpc_id;
677 int rc;
678
679 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
680 if (!cb)
681 return -EFAULT;
682
683 init_tpc_mem_pkt = cb->kernel_address;
684 cb_size = sizeof(*init_tpc_mem_pkt);
685 memset(init_tpc_mem_pkt, 0, cb_size);
686
687 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
688
689 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
690 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
691 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
692 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
693
694 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
695
696 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
697 dst_addr = (prop->sram_user_base_address &
698 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
699 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
700 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
701
702 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
703 if (!job) {
704 dev_err(hdev->dev, "Failed to allocate a new job\n");
705 rc = -ENOMEM;
706 goto release_cb;
707 }
708
709 job->id = 0;
710 job->user_cb = cb;
711 job->user_cb->cs_cnt++;
712 job->user_cb_size = cb_size;
713 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
714 job->patched_cb = job->user_cb;
715 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
716
717 hl_debugfs_add_job(hdev, job);
718
719 rc = gaudi_send_job_on_qman0(hdev, job);
720
721 if (rc)
722 goto free_job;
723
724 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
725 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
726 if (rc)
727 break;
728 }
729
730 free_job:
731 hl_userptr_delete_list(hdev, &job->userptr_list);
732 hl_debugfs_remove_job(hdev, job);
733 kfree(job);
734 cb->cs_cnt--;
735
736 release_cb:
737 hl_cb_put(cb);
738 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
739
740 return rc;
741 }
742
743 /*
744 * gaudi_init_tpc_mem() - Initialize TPC memories.
745 * @hdev: Pointer to hl_device structure.
746 *
747 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
748 *
749 * Return: 0 for success, negative value for error.
750 */
gaudi_init_tpc_mem(struct hl_device *hdev)751 static int gaudi_init_tpc_mem(struct hl_device *hdev)
752 {
753 const struct firmware *fw;
754 size_t fw_size;
755 void *cpu_addr;
756 dma_addr_t dma_handle;
757 int rc, count = 5;
758
759 again:
760 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
761 if (rc == -EINTR && count-- > 0) {
762 msleep(50);
763 goto again;
764 }
765
766 if (rc) {
767 dev_err(hdev->dev, "Failed to load firmware file %s\n",
768 GAUDI_TPC_FW_FILE);
769 goto out;
770 }
771
772 fw_size = fw->size;
773 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
774 &dma_handle, GFP_KERNEL | __GFP_ZERO);
775 if (!cpu_addr) {
776 dev_err(hdev->dev,
777 "Failed to allocate %zu of dma memory for TPC kernel\n",
778 fw_size);
779 rc = -ENOMEM;
780 goto out;
781 }
782
783 memcpy(cpu_addr, fw->data, fw_size);
784
785 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
786
787 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
788 dma_handle);
789
790 out:
791 release_firmware(fw);
792 return rc;
793 }
794
gaudi_late_init(struct hl_device *hdev)795 static int gaudi_late_init(struct hl_device *hdev)
796 {
797 struct gaudi_device *gaudi = hdev->asic_specific;
798 int rc;
799
800 rc = gaudi->cpucp_info_get(hdev);
801 if (rc) {
802 dev_err(hdev->dev, "Failed to get cpucp info\n");
803 return rc;
804 }
805
806 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
807 if (rc) {
808 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
809 return rc;
810 }
811
812 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
813
814 gaudi_fetch_psoc_frequency(hdev);
815
816 rc = gaudi_mmu_clear_pgt_range(hdev);
817 if (rc) {
818 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
819 goto disable_pci_access;
820 }
821
822 rc = gaudi_init_tpc_mem(hdev);
823 if (rc) {
824 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
825 goto disable_pci_access;
826 }
827
828 return 0;
829
830 disable_pci_access:
831 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
832
833 return rc;
834 }
835
gaudi_late_fini(struct hl_device *hdev)836 static void gaudi_late_fini(struct hl_device *hdev)
837 {
838 const struct hwmon_channel_info **channel_info_arr;
839 int i = 0;
840
841 if (!hdev->hl_chip_info->info)
842 return;
843
844 channel_info_arr = hdev->hl_chip_info->info;
845
846 while (channel_info_arr[i]) {
847 kfree(channel_info_arr[i]->config);
848 kfree(channel_info_arr[i]);
849 i++;
850 }
851
852 kfree(channel_info_arr);
853
854 hdev->hl_chip_info->info = NULL;
855 }
856
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)857 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
858 {
859 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
860 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
861 int i, j, rc = 0;
862
863 /*
864 * The device CPU works with 40-bits addresses, while bit 39 must be set
865 * to '1' when accessing the host.
866 * Bits 49:39 of the full host address are saved for a later
867 * configuration of the HW to perform extension to 50 bits.
868 * Because there is a single HW register that holds the extension bits,
869 * these bits must be identical in all allocated range.
870 */
871
872 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
873 virt_addr_arr[i] =
874 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
875 HL_CPU_ACCESSIBLE_MEM_SIZE,
876 &dma_addr_arr[i],
877 GFP_KERNEL | __GFP_ZERO);
878 if (!virt_addr_arr[i]) {
879 rc = -ENOMEM;
880 goto free_dma_mem_arr;
881 }
882
883 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
884 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
885 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
886 break;
887 }
888
889 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
890 dev_err(hdev->dev,
891 "MSB of CPU accessible DMA memory are not identical in all range\n");
892 rc = -EFAULT;
893 goto free_dma_mem_arr;
894 }
895
896 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
897 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
898 hdev->cpu_pci_msb_addr =
899 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
900
901 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
902
903 free_dma_mem_arr:
904 for (j = 0 ; j < i ; j++)
905 hdev->asic_funcs->asic_dma_free_coherent(hdev,
906 HL_CPU_ACCESSIBLE_MEM_SIZE,
907 virt_addr_arr[j],
908 dma_addr_arr[j]);
909
910 return rc;
911 }
912
gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)913 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
914 {
915 struct gaudi_device *gaudi = hdev->asic_specific;
916 struct gaudi_internal_qman_info *q;
917 u32 i;
918
919 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
920 q = &gaudi->internal_qmans[i];
921 if (!q->pq_kernel_addr)
922 continue;
923 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
924 q->pq_kernel_addr,
925 q->pq_dma_addr);
926 }
927 }
928
gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)929 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
930 {
931 struct gaudi_device *gaudi = hdev->asic_specific;
932 struct gaudi_internal_qman_info *q;
933 int rc, i;
934
935 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
936 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
937 continue;
938
939 q = &gaudi->internal_qmans[i];
940
941 switch (i) {
942 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
943 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
944 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
945 break;
946 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
947 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
948 break;
949 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
950 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
951 break;
952 default:
953 dev_err(hdev->dev, "Bad internal queue index %d", i);
954 rc = -EINVAL;
955 goto free_internal_qmans_pq_mem;
956 }
957
958 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
959 hdev, q->pq_size,
960 &q->pq_dma_addr,
961 GFP_KERNEL | __GFP_ZERO);
962 if (!q->pq_kernel_addr) {
963 rc = -ENOMEM;
964 goto free_internal_qmans_pq_mem;
965 }
966 }
967
968 return 0;
969
970 free_internal_qmans_pq_mem:
971 gaudi_free_internal_qmans_pq_mem(hdev);
972 return rc;
973 }
974
gaudi_sw_init(struct hl_device *hdev)975 static int gaudi_sw_init(struct hl_device *hdev)
976 {
977 struct gaudi_device *gaudi;
978 u32 i, event_id = 0;
979 int rc;
980
981 /* Allocate device structure */
982 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
983 if (!gaudi)
984 return -ENOMEM;
985
986 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
987 if (gaudi_irq_map_table[i].valid) {
988 if (event_id == GAUDI_EVENT_SIZE) {
989 dev_err(hdev->dev,
990 "Event array exceeds the limit of %u events\n",
991 GAUDI_EVENT_SIZE);
992 rc = -EINVAL;
993 goto free_gaudi_device;
994 }
995
996 gaudi->events[event_id++] =
997 gaudi_irq_map_table[i].fc_id;
998 }
999 }
1000
1001 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1002
1003 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1004
1005 hdev->asic_specific = gaudi;
1006
1007 /* Create DMA pool for small allocations */
1008 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1009 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1010 if (!hdev->dma_pool) {
1011 dev_err(hdev->dev, "failed to create DMA pool\n");
1012 rc = -ENOMEM;
1013 goto free_gaudi_device;
1014 }
1015
1016 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1017 if (rc)
1018 goto free_dma_pool;
1019
1020 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1021 if (!hdev->cpu_accessible_dma_pool) {
1022 dev_err(hdev->dev,
1023 "Failed to create CPU accessible DMA pool\n");
1024 rc = -ENOMEM;
1025 goto free_cpu_dma_mem;
1026 }
1027
1028 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1029 (uintptr_t) hdev->cpu_accessible_dma_mem,
1030 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1031 if (rc) {
1032 dev_err(hdev->dev,
1033 "Failed to add memory to CPU accessible DMA pool\n");
1034 rc = -EFAULT;
1035 goto free_cpu_accessible_dma_pool;
1036 }
1037
1038 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1039 if (rc)
1040 goto free_cpu_accessible_dma_pool;
1041
1042 spin_lock_init(&gaudi->hw_queues_lock);
1043 mutex_init(&gaudi->clk_gate_mutex);
1044
1045 hdev->supports_sync_stream = true;
1046 hdev->supports_coresight = true;
1047
1048 return 0;
1049
1050 free_cpu_accessible_dma_pool:
1051 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1052 free_cpu_dma_mem:
1053 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1054 hdev->cpu_pci_msb_addr);
1055 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1056 HL_CPU_ACCESSIBLE_MEM_SIZE,
1057 hdev->cpu_accessible_dma_mem,
1058 hdev->cpu_accessible_dma_address);
1059 free_dma_pool:
1060 dma_pool_destroy(hdev->dma_pool);
1061 free_gaudi_device:
1062 kfree(gaudi);
1063 return rc;
1064 }
1065
gaudi_sw_fini(struct hl_device *hdev)1066 static int gaudi_sw_fini(struct hl_device *hdev)
1067 {
1068 struct gaudi_device *gaudi = hdev->asic_specific;
1069
1070 gaudi_free_internal_qmans_pq_mem(hdev);
1071
1072 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1073
1074 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1075 hdev->cpu_pci_msb_addr);
1076 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1077 HL_CPU_ACCESSIBLE_MEM_SIZE,
1078 hdev->cpu_accessible_dma_mem,
1079 hdev->cpu_accessible_dma_address);
1080
1081 dma_pool_destroy(hdev->dma_pool);
1082
1083 mutex_destroy(&gaudi->clk_gate_mutex);
1084
1085 kfree(gaudi);
1086
1087 return 0;
1088 }
1089
gaudi_irq_handler_single(int irq, void *arg)1090 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1091 {
1092 struct hl_device *hdev = arg;
1093 int i;
1094
1095 if (hdev->disabled)
1096 return IRQ_HANDLED;
1097
1098 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1099 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1100
1101 hl_irq_handler_eq(irq, &hdev->event_queue);
1102
1103 return IRQ_HANDLED;
1104 }
1105
1106 /*
1107 * For backward compatibility, new MSI interrupts should be set after the
1108 * existing CPU and NIC interrupts.
1109 */
gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, bool cpu_eq)1110 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1111 bool cpu_eq)
1112 {
1113 int msi_vec;
1114
1115 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1116 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1117 GAUDI_EVENT_QUEUE_MSI_IDX);
1118
1119 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1120 (nr + NIC_NUMBER_OF_ENGINES + 1);
1121
1122 return pci_irq_vector(hdev->pdev, msi_vec);
1123 }
1124
gaudi_enable_msi_single(struct hl_device *hdev)1125 static int gaudi_enable_msi_single(struct hl_device *hdev)
1126 {
1127 int rc, irq;
1128
1129 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1130
1131 irq = gaudi_pci_irq_vector(hdev, 0, false);
1132 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1133 "gaudi single msi", hdev);
1134 if (rc)
1135 dev_err(hdev->dev,
1136 "Failed to request single MSI IRQ\n");
1137
1138 return rc;
1139 }
1140
gaudi_enable_msi_multi(struct hl_device *hdev)1141 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1142 {
1143 int cq_cnt = hdev->asic_prop.completion_queues_count;
1144 int rc, i, irq_cnt_init, irq;
1145
1146 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1147 irq = gaudi_pci_irq_vector(hdev, i, false);
1148 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1149 &hdev->completion_queue[i]);
1150 if (rc) {
1151 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1152 goto free_irqs;
1153 }
1154 }
1155
1156 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1157 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1158 &hdev->event_queue);
1159 if (rc) {
1160 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1161 goto free_irqs;
1162 }
1163
1164 return 0;
1165
1166 free_irqs:
1167 for (i = 0 ; i < irq_cnt_init ; i++)
1168 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1169 &hdev->completion_queue[i]);
1170 return rc;
1171 }
1172
gaudi_enable_msi(struct hl_device *hdev)1173 static int gaudi_enable_msi(struct hl_device *hdev)
1174 {
1175 struct gaudi_device *gaudi = hdev->asic_specific;
1176 int rc;
1177
1178 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1179 return 0;
1180
1181 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1182 PCI_IRQ_MSI);
1183 if (rc < 0) {
1184 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1185 return rc;
1186 }
1187
1188 if (rc < NUMBER_OF_INTERRUPTS) {
1189 gaudi->multi_msi_mode = false;
1190 rc = gaudi_enable_msi_single(hdev);
1191 } else {
1192 gaudi->multi_msi_mode = true;
1193 rc = gaudi_enable_msi_multi(hdev);
1194 }
1195
1196 if (rc)
1197 goto free_pci_irq_vectors;
1198
1199 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1200
1201 return 0;
1202
1203 free_pci_irq_vectors:
1204 pci_free_irq_vectors(hdev->pdev);
1205 return rc;
1206 }
1207
gaudi_sync_irqs(struct hl_device *hdev)1208 static void gaudi_sync_irqs(struct hl_device *hdev)
1209 {
1210 struct gaudi_device *gaudi = hdev->asic_specific;
1211 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1212
1213 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1214 return;
1215
1216 /* Wait for all pending IRQs to be finished */
1217 if (gaudi->multi_msi_mode) {
1218 for (i = 0 ; i < cq_cnt ; i++)
1219 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1220
1221 synchronize_irq(gaudi_pci_irq_vector(hdev,
1222 GAUDI_EVENT_QUEUE_MSI_IDX,
1223 true));
1224 } else {
1225 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1226 }
1227 }
1228
gaudi_disable_msi(struct hl_device *hdev)1229 static void gaudi_disable_msi(struct hl_device *hdev)
1230 {
1231 struct gaudi_device *gaudi = hdev->asic_specific;
1232 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1233
1234 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1235 return;
1236
1237 gaudi_sync_irqs(hdev);
1238
1239 if (gaudi->multi_msi_mode) {
1240 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1241 true);
1242 free_irq(irq, &hdev->event_queue);
1243
1244 for (i = 0 ; i < cq_cnt ; i++) {
1245 irq = gaudi_pci_irq_vector(hdev, i, false);
1246 free_irq(irq, &hdev->completion_queue[i]);
1247 }
1248 } else {
1249 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1250 }
1251
1252 pci_free_irq_vectors(hdev->pdev);
1253
1254 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1255 }
1256
gaudi_init_scrambler_sram(struct hl_device *hdev)1257 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1258 {
1259 struct gaudi_device *gaudi = hdev->asic_specific;
1260
1261 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1262 return;
1263
1264 if (!hdev->sram_scrambler_enable)
1265 return;
1266
1267 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1268 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1270 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1272 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1274 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1275 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1276 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1277 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1278 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1279 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1280 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1281 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1282 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1283
1284 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1285 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1287 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1289 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1291 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1292 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1293 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1294 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1295 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1296 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1297 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1298 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1299 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1300
1301 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1302 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1304 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1306 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1308 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1309 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1310 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1311 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1312 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1313 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1314 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1315 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1316 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1317
1318 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1319 }
1320
gaudi_init_scrambler_hbm(struct hl_device *hdev)1321 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1322 {
1323 struct gaudi_device *gaudi = hdev->asic_specific;
1324
1325 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1326 return;
1327
1328 if (!hdev->dram_scrambler_enable)
1329 return;
1330
1331 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1332 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1334 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1336 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1338 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1339 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1340 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1341 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1342 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1343 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1344 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1345 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1346 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1347
1348 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1349 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1351 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1353 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1355 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1356 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1357 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1358 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1359 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1360 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1361 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1362 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1363 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1364
1365 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1366 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1368 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1370 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1372 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1373 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1374 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1375 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1376 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1377 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1378 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1379 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1380 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1381
1382 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1383 }
1384
gaudi_init_e2e(struct hl_device *hdev)1385 static void gaudi_init_e2e(struct hl_device *hdev)
1386 {
1387 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1388 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1389 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1390 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1391
1392 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1393 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1394 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1395 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1396
1397 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1398 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1399 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1400 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1401
1402 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1403 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1404 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1405 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1406
1407 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1408 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1409 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1410 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1411
1412 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1413 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1414 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1415 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1416
1417 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1418 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1419 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1420 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1421
1422 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1423 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1424 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1425 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1426
1427 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1428 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1429 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1430 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1431
1432 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1433 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1434 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1435 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1436
1437 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1438 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1439 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1440 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1441
1442 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1443 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1444 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1445 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1446
1447 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1448 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1449 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1450 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1451
1452 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1453 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1454 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1455 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1456
1457 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1458 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1459 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1460 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1461
1462 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1463 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1464 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1465 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1466
1467 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1468 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1469 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1470 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1471
1472 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1473 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1474 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1475 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1476
1477 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1478 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1479 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1480 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1481
1482 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1483 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1484 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1485 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1486
1487 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1488 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1489 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1490 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1491
1492 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1493 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1494 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1495 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1496
1497 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1498 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1499 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1500 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1501
1502 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1503 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1504 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1505 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1506
1507 if (!hdev->dram_scrambler_enable) {
1508 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1509 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1510 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1511 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1512
1513 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1514 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1515 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1516 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1517
1518 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1519 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1520 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1521 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1522
1523 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1524 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1525 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1526 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1527
1528 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1529 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1530 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1531 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1532
1533 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1534 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1535 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1536 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1537
1538 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1539 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1540 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1541 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1542
1543 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1544 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1545 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1546 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1547
1548 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1549 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1550 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1551 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1552
1553 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1554 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1555 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1556 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1557
1558 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1559 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1560 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1561 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1562
1563 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1564 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1565 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1566 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1567
1568 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1569 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1570 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1571 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1572
1573 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1574 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1575 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1576 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1577
1578 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1579 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1580 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1581 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1582
1583 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1584 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1585 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1586 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1587
1588 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1589 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1590 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1591 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1592
1593 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1594 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1595 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1596 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1597
1598 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1599 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1600 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1601 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1602
1603 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1604 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1605 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1606 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1607
1608 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1609 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1610 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1611 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1612
1613 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1614 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1615 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1616 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1617
1618 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1619 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1620 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1621 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1622
1623 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1624 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1625 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1626 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1627 }
1628
1629 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1630 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1631 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1632 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1633
1634 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1635 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1636 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1637 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1638
1639 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1640 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1641 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1642 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1643
1644 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1645 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1646 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1647 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1648
1649 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1650 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1651 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1652 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1653
1654 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1655 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1656 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1657 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1658
1659 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1660 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1661 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1662 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1663
1664 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1665 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1666 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1667 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1668
1669 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1670 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1671 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1672 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1673
1674 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1675 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1676 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1677 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1678
1679 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1680 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1681 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1682 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1683
1684 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1685 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1686 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1687 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1688
1689 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1690 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1691 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1692 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1693
1694 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1695 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1696 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1697 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1698
1699 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1700 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1701 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1702 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1703
1704 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1705 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1706 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1707 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1708
1709 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1710 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1711 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1712 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1713
1714 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1715 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1716 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1717 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1718
1719 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1720 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1721 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1722 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1723
1724 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1725 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1726 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1727 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1728
1729 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1730 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1731 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1732 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1733
1734 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1735 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1736 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1737 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1738
1739 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1740 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1741 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1742 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1743
1744 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1745 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1746 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1747 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1748 }
1749
gaudi_init_hbm_cred(struct hl_device *hdev)1750 static void gaudi_init_hbm_cred(struct hl_device *hdev)
1751 {
1752 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1753
1754 hbm0_wr = 0x33333333;
1755 hbm0_rd = 0x77777777;
1756 hbm1_wr = 0x55555555;
1757 hbm1_rd = 0xDDDDDDDD;
1758
1759 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1760 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1761 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1762 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1763
1764 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1765 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1766 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1767 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1768
1769 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1770 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1771 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1772 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1773
1774 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1775 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1776 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1777 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1778
1779 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1780 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1781 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1782 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1783 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1786 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1787 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1788 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1789 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1790 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1791
1792 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1793 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1794 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1795 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1796 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1797 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1798 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1799 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1800 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1801 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1802 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1803 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1804 }
1805
gaudi_init_golden_registers(struct hl_device *hdev)1806 static void gaudi_init_golden_registers(struct hl_device *hdev)
1807 {
1808 u32 tpc_offset;
1809 int tpc_id, i;
1810
1811 gaudi_init_e2e(hdev);
1812
1813 gaudi_init_hbm_cred(hdev);
1814
1815 hdev->asic_funcs->disable_clock_gating(hdev);
1816
1817 for (tpc_id = 0, tpc_offset = 0;
1818 tpc_id < TPC_NUMBER_OF_ENGINES;
1819 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1820 /* Mask all arithmetic interrupts from TPC */
1821 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1822 /* Set 16 cache lines */
1823 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1824 ICACHE_FETCH_LINE_NUM, 2);
1825 }
1826
1827 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1828 for (i = 0 ; i < 128 ; i += 8)
1829 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1830
1831 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1832 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1833 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1834 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1835 }
1836
gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, int qman_id, dma_addr_t qman_pq_addr)1837 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1838 int qman_id, dma_addr_t qman_pq_addr)
1839 {
1840 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1841 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1842 u32 q_off, dma_qm_offset;
1843 u32 dma_qm_err_cfg;
1844
1845 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1846
1847 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1848 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1849 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1850 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1851 so_base_en_lo = lower_32_bits(CFG_BASE +
1852 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1853 so_base_en_hi = upper_32_bits(CFG_BASE +
1854 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1855 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1856 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1857 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1858 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1859 so_base_ws_lo = lower_32_bits(CFG_BASE +
1860 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1861 so_base_ws_hi = upper_32_bits(CFG_BASE +
1862 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1863
1864 q_off = dma_qm_offset + qman_id * 4;
1865
1866 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1867 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1868
1869 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1870 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1871 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1872
1873 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
1874 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
1875 QMAN_LDMA_SRC_OFFSET);
1876 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
1877 QMAN_LDMA_DST_OFFSET);
1878
1879 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1880 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1881 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1882 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1883 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1884 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1885 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1886 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1887
1888 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1889
1890 /* The following configuration is needed only once per QMAN */
1891 if (qman_id == 0) {
1892 /* Configure RAZWI IRQ */
1893 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1894 if (hdev->stop_on_err) {
1895 dma_qm_err_cfg |=
1896 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1897 }
1898
1899 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1900 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1901 lower_32_bits(CFG_BASE +
1902 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1903 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1904 upper_32_bits(CFG_BASE +
1905 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1906 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1907 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1908 dma_id);
1909
1910 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1911 QM_ARB_ERR_MSG_EN_MASK);
1912
1913 /* Increase ARB WDT to support streams architecture */
1914 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1915 GAUDI_ARB_WDT_TIMEOUT);
1916
1917 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1918 QMAN_EXTERNAL_MAKE_TRUSTED);
1919
1920 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1921 }
1922 }
1923
gaudi_init_dma_core(struct hl_device *hdev, int dma_id)1924 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1925 {
1926 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1927 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1928
1929 /* Set to maximum possible according to physical size */
1930 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1931 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1932
1933 /* WA for H/W bug H3-2116 */
1934 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
1935
1936 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1937 if (hdev->stop_on_err)
1938 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1939
1940 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1941 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1942 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1943 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1944 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1945 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1946 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1947 WREG32(mmDMA0_CORE_PROT + dma_offset,
1948 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1949 /* If the channel is secured, it should be in MMU bypass mode */
1950 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1951 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1952 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1953 }
1954
gaudi_enable_qman(struct hl_device *hdev, int dma_id, u32 enable_mask)1955 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1956 u32 enable_mask)
1957 {
1958 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1959
1960 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1961 }
1962
gaudi_init_pci_dma_qmans(struct hl_device *hdev)1963 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1964 {
1965 struct gaudi_device *gaudi = hdev->asic_specific;
1966 struct hl_hw_queue *q;
1967 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1968
1969 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1970 return;
1971
1972 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1973 dma_id = gaudi_dma_assignment[i];
1974 /*
1975 * For queues after the CPU Q need to add 1 to get the correct
1976 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1977 * order to get the correct MSI register.
1978 */
1979 if (dma_id > 1) {
1980 cpu_skip = 1;
1981 nic_skip = NIC_NUMBER_OF_ENGINES;
1982 } else {
1983 cpu_skip = 0;
1984 nic_skip = 0;
1985 }
1986
1987 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1988 q_idx = 4 * dma_id + j + cpu_skip;
1989 q = &hdev->kernel_queues[q_idx];
1990 q->cq_id = cq_id++;
1991 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1992 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1993 q->bus_address);
1994 }
1995
1996 gaudi_init_dma_core(hdev, dma_id);
1997
1998 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1999 }
2000
2001 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2002 }
2003
gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, int qman_id, u64 qman_base_addr)2004 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2005 int qman_id, u64 qman_base_addr)
2006 {
2007 u32 mtr_base_lo, mtr_base_hi;
2008 u32 so_base_lo, so_base_hi;
2009 u32 q_off, dma_qm_offset;
2010 u32 dma_qm_err_cfg;
2011
2012 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2013
2014 mtr_base_lo = lower_32_bits(CFG_BASE +
2015 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2016 mtr_base_hi = upper_32_bits(CFG_BASE +
2017 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2018 so_base_lo = lower_32_bits(CFG_BASE +
2019 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2020 so_base_hi = upper_32_bits(CFG_BASE +
2021 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2022
2023 q_off = dma_qm_offset + qman_id * 4;
2024
2025 if (qman_id < 4) {
2026 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2027 lower_32_bits(qman_base_addr));
2028 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2029 upper_32_bits(qman_base_addr));
2030
2031 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2032 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2033 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2034
2035 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2036 QMAN_CPDMA_SIZE_OFFSET);
2037 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2038 QMAN_CPDMA_SRC_OFFSET);
2039 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2040 QMAN_CPDMA_DST_OFFSET);
2041 } else {
2042 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2043 QMAN_LDMA_SIZE_OFFSET);
2044 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2045 QMAN_LDMA_SRC_OFFSET);
2046 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2047 QMAN_LDMA_DST_OFFSET);
2048
2049 /* Configure RAZWI IRQ */
2050 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2051 if (hdev->stop_on_err) {
2052 dma_qm_err_cfg |=
2053 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2054 }
2055 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2056
2057 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2058 lower_32_bits(CFG_BASE +
2059 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2060 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2061 upper_32_bits(CFG_BASE +
2062 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2063 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2064 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2065 dma_id);
2066
2067 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2068 QM_ARB_ERR_MSG_EN_MASK);
2069
2070 /* Increase ARB WDT to support streams architecture */
2071 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2072 GAUDI_ARB_WDT_TIMEOUT);
2073
2074 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2075 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2076 QMAN_INTERNAL_MAKE_TRUSTED);
2077 }
2078
2079 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2080 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2081 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2082 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2083 }
2084
gaudi_init_hbm_dma_qmans(struct hl_device *hdev)2085 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2086 {
2087 struct gaudi_device *gaudi = hdev->asic_specific;
2088 struct gaudi_internal_qman_info *q;
2089 u64 qman_base_addr;
2090 int i, j, dma_id, internal_q_index;
2091
2092 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2093 return;
2094
2095 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2096 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2097
2098 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2099 /*
2100 * Add the CPU queue in order to get the correct queue
2101 * number as all internal queue are placed after it
2102 */
2103 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2104
2105 q = &gaudi->internal_qmans[internal_q_index];
2106 qman_base_addr = (u64) q->pq_dma_addr;
2107 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2108 qman_base_addr);
2109 }
2110
2111 /* Initializing lower CP for HBM DMA QMAN */
2112 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2113
2114 gaudi_init_dma_core(hdev, dma_id);
2115
2116 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2117 }
2118
2119 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2120 }
2121
gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, int qman_id, u64 qman_base_addr)2122 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2123 int qman_id, u64 qman_base_addr)
2124 {
2125 u32 mtr_base_lo, mtr_base_hi;
2126 u32 so_base_lo, so_base_hi;
2127 u32 q_off, mme_id;
2128 u32 mme_qm_err_cfg;
2129
2130 mtr_base_lo = lower_32_bits(CFG_BASE +
2131 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2132 mtr_base_hi = upper_32_bits(CFG_BASE +
2133 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2134 so_base_lo = lower_32_bits(CFG_BASE +
2135 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2136 so_base_hi = upper_32_bits(CFG_BASE +
2137 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2138
2139 q_off = mme_offset + qman_id * 4;
2140
2141 if (qman_id < 4) {
2142 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2143 lower_32_bits(qman_base_addr));
2144 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2145 upper_32_bits(qman_base_addr));
2146
2147 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2148 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2149 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2150
2151 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2152 QMAN_CPDMA_SIZE_OFFSET);
2153 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2154 QMAN_CPDMA_SRC_OFFSET);
2155 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2156 QMAN_CPDMA_DST_OFFSET);
2157 } else {
2158 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2159 QMAN_LDMA_SIZE_OFFSET);
2160 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2161 QMAN_LDMA_SRC_OFFSET);
2162 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2163 QMAN_LDMA_DST_OFFSET);
2164
2165 /* Configure RAZWI IRQ */
2166 mme_id = mme_offset /
2167 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2168
2169 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2170 if (hdev->stop_on_err) {
2171 mme_qm_err_cfg |=
2172 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2173 }
2174 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2175 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2176 lower_32_bits(CFG_BASE +
2177 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2178 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2179 upper_32_bits(CFG_BASE +
2180 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2181 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2182 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2183 mme_id);
2184
2185 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2186 QM_ARB_ERR_MSG_EN_MASK);
2187
2188 /* Increase ARB WDT to support streams architecture */
2189 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2190 GAUDI_ARB_WDT_TIMEOUT);
2191
2192 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2193 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2194 QMAN_INTERNAL_MAKE_TRUSTED);
2195 }
2196
2197 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2198 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2199 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2200 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2201 }
2202
gaudi_init_mme_qmans(struct hl_device *hdev)2203 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2204 {
2205 struct gaudi_device *gaudi = hdev->asic_specific;
2206 struct gaudi_internal_qman_info *q;
2207 u64 qman_base_addr;
2208 u32 mme_offset;
2209 int i, internal_q_index;
2210
2211 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2212 return;
2213
2214 /*
2215 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2216 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2217 */
2218
2219 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2220
2221 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2222 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2223 q = &gaudi->internal_qmans[internal_q_index];
2224 qman_base_addr = (u64) q->pq_dma_addr;
2225 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2226 qman_base_addr);
2227 if (i == 3)
2228 mme_offset = 0;
2229 }
2230
2231 /* Initializing lower CP for MME QMANs */
2232 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2233 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2234 gaudi_init_mme_qman(hdev, 0, 4, 0);
2235
2236 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2237 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2238
2239 gaudi->hw_cap_initialized |= HW_CAP_MME;
2240 }
2241
gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, int qman_id, u64 qman_base_addr)2242 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2243 int qman_id, u64 qman_base_addr)
2244 {
2245 u32 mtr_base_lo, mtr_base_hi;
2246 u32 so_base_lo, so_base_hi;
2247 u32 q_off, tpc_id;
2248 u32 tpc_qm_err_cfg;
2249
2250 mtr_base_lo = lower_32_bits(CFG_BASE +
2251 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2252 mtr_base_hi = upper_32_bits(CFG_BASE +
2253 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2254 so_base_lo = lower_32_bits(CFG_BASE +
2255 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2256 so_base_hi = upper_32_bits(CFG_BASE +
2257 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2258
2259 q_off = tpc_offset + qman_id * 4;
2260
2261 if (qman_id < 4) {
2262 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2263 lower_32_bits(qman_base_addr));
2264 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2265 upper_32_bits(qman_base_addr));
2266
2267 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2268 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2269 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2270
2271 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2272 QMAN_CPDMA_SIZE_OFFSET);
2273 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2274 QMAN_CPDMA_SRC_OFFSET);
2275 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2276 QMAN_CPDMA_DST_OFFSET);
2277 } else {
2278 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2279 QMAN_LDMA_SIZE_OFFSET);
2280 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2281 QMAN_LDMA_SRC_OFFSET);
2282 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2283 QMAN_LDMA_DST_OFFSET);
2284
2285 /* Configure RAZWI IRQ */
2286 tpc_id = tpc_offset /
2287 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2288
2289 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2290 if (hdev->stop_on_err) {
2291 tpc_qm_err_cfg |=
2292 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2293 }
2294
2295 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2296 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2297 lower_32_bits(CFG_BASE +
2298 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2299 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2300 upper_32_bits(CFG_BASE +
2301 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2302 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2303 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2304 tpc_id);
2305
2306 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2307 QM_ARB_ERR_MSG_EN_MASK);
2308
2309 /* Increase ARB WDT to support streams architecture */
2310 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2311 GAUDI_ARB_WDT_TIMEOUT);
2312
2313 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2314 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2315 QMAN_INTERNAL_MAKE_TRUSTED);
2316 }
2317
2318 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2319 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2320 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2321 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2322 }
2323
gaudi_init_tpc_qmans(struct hl_device *hdev)2324 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2325 {
2326 struct gaudi_device *gaudi = hdev->asic_specific;
2327 struct gaudi_internal_qman_info *q;
2328 u64 qman_base_addr;
2329 u32 so_base_hi, tpc_offset = 0;
2330 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2331 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2332 int i, tpc_id, internal_q_index;
2333
2334 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2335 return;
2336
2337 so_base_hi = upper_32_bits(CFG_BASE +
2338 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2339
2340 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2341 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2342 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2343 tpc_id * QMAN_STREAMS + i;
2344 q = &gaudi->internal_qmans[internal_q_index];
2345 qman_base_addr = (u64) q->pq_dma_addr;
2346 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2347 qman_base_addr);
2348
2349 if (i == 3) {
2350 /* Initializing lower CP for TPC QMAN */
2351 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2352
2353 /* Enable the QMAN and TPC channel */
2354 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2355 QMAN_TPC_ENABLE);
2356 }
2357 }
2358
2359 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2360 so_base_hi);
2361
2362 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2363
2364 gaudi->hw_cap_initialized |=
2365 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2366 }
2367 }
2368
gaudi_disable_pci_dma_qmans(struct hl_device *hdev)2369 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2370 {
2371 struct gaudi_device *gaudi = hdev->asic_specific;
2372
2373 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2374 return;
2375
2376 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2377 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2378 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2379 }
2380
gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)2381 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2382 {
2383 struct gaudi_device *gaudi = hdev->asic_specific;
2384
2385 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2386 return;
2387
2388 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2389 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2390 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2391 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2392 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2393 }
2394
gaudi_disable_mme_qmans(struct hl_device *hdev)2395 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2396 {
2397 struct gaudi_device *gaudi = hdev->asic_specific;
2398
2399 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2400 return;
2401
2402 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2403 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2404 }
2405
gaudi_disable_tpc_qmans(struct hl_device *hdev)2406 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2407 {
2408 struct gaudi_device *gaudi = hdev->asic_specific;
2409 u32 tpc_offset = 0;
2410 int tpc_id;
2411
2412 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2413 return;
2414
2415 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2416 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2417 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2418 }
2419 }
2420
gaudi_stop_pci_dma_qmans(struct hl_device *hdev)2421 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2422 {
2423 struct gaudi_device *gaudi = hdev->asic_specific;
2424
2425 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2426 return;
2427
2428 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2429 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2430 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2431 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2432 }
2433
gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)2434 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2435 {
2436 struct gaudi_device *gaudi = hdev->asic_specific;
2437
2438 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2439 return;
2440
2441 /* Stop CPs of HBM DMA QMANs */
2442
2443 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2444 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2445 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2446 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2447 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2448 }
2449
gaudi_stop_mme_qmans(struct hl_device *hdev)2450 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2451 {
2452 struct gaudi_device *gaudi = hdev->asic_specific;
2453
2454 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2455 return;
2456
2457 /* Stop CPs of MME QMANs */
2458 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2459 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2460 }
2461
gaudi_stop_tpc_qmans(struct hl_device *hdev)2462 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2463 {
2464 struct gaudi_device *gaudi = hdev->asic_specific;
2465
2466 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2467 return;
2468
2469 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2470 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2471 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2472 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2473 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2474 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2475 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2476 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2477 }
2478
gaudi_pci_dma_stall(struct hl_device *hdev)2479 static void gaudi_pci_dma_stall(struct hl_device *hdev)
2480 {
2481 struct gaudi_device *gaudi = hdev->asic_specific;
2482
2483 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2484 return;
2485
2486 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2487 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2488 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2489 }
2490
gaudi_hbm_dma_stall(struct hl_device *hdev)2491 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2492 {
2493 struct gaudi_device *gaudi = hdev->asic_specific;
2494
2495 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2496 return;
2497
2498 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2499 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2500 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2501 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2502 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2503 }
2504
gaudi_mme_stall(struct hl_device *hdev)2505 static void gaudi_mme_stall(struct hl_device *hdev)
2506 {
2507 struct gaudi_device *gaudi = hdev->asic_specific;
2508
2509 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2510 return;
2511
2512 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2513 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2514 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2515 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2516 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2517 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2518 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2519 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2520 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2521 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2522 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2523 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2524 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2525 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2526 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2527 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2528 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2529 }
2530
gaudi_tpc_stall(struct hl_device *hdev)2531 static void gaudi_tpc_stall(struct hl_device *hdev)
2532 {
2533 struct gaudi_device *gaudi = hdev->asic_specific;
2534
2535 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2536 return;
2537
2538 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2539 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2540 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2541 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2542 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2543 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2544 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2545 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2546 }
2547
gaudi_set_clock_gating(struct hl_device *hdev)2548 static void gaudi_set_clock_gating(struct hl_device *hdev)
2549 {
2550 struct gaudi_device *gaudi = hdev->asic_specific;
2551 u32 qman_offset;
2552 bool enable;
2553 int i;
2554
2555 /* In case we are during debug session, don't enable the clock gate
2556 * as it may interfere
2557 */
2558 if (hdev->in_debug)
2559 return;
2560
2561 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2562 enable = !!(hdev->clock_gating_mask &
2563 (BIT_ULL(gaudi_dma_assignment[i])));
2564
2565 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2566 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2567 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2568 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2569 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2570 }
2571
2572 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2573 enable = !!(hdev->clock_gating_mask &
2574 (BIT_ULL(gaudi_dma_assignment[i])));
2575
2576 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2577 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2578 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2579 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2580 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2581 }
2582
2583 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2584 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2585 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2586
2587 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2588 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2589 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2590
2591 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2592 enable = !!(hdev->clock_gating_mask &
2593 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2594
2595 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2596 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2597 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2598 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2599
2600 qman_offset += TPC_QMAN_OFFSET;
2601 }
2602
2603 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2604 }
2605
gaudi_disable_clock_gating(struct hl_device *hdev)2606 static void gaudi_disable_clock_gating(struct hl_device *hdev)
2607 {
2608 struct gaudi_device *gaudi = hdev->asic_specific;
2609 u32 qman_offset;
2610 int i;
2611
2612 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2613 return;
2614
2615 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2616 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2617 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2618
2619 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2620 }
2621
2622 WREG32(mmMME0_QM_CGM_CFG, 0);
2623 WREG32(mmMME0_QM_CGM_CFG1, 0);
2624 WREG32(mmMME2_QM_CGM_CFG, 0);
2625 WREG32(mmMME2_QM_CGM_CFG1, 0);
2626
2627 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2628 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2629 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2630
2631 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2632 }
2633
2634 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2635 }
2636
gaudi_enable_timestamp(struct hl_device *hdev)2637 static void gaudi_enable_timestamp(struct hl_device *hdev)
2638 {
2639 /* Disable the timestamp counter */
2640 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2641
2642 /* Zero the lower/upper parts of the 64-bit counter */
2643 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2644 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2645
2646 /* Enable the counter */
2647 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2648 }
2649
gaudi_disable_timestamp(struct hl_device *hdev)2650 static void gaudi_disable_timestamp(struct hl_device *hdev)
2651 {
2652 /* Disable the timestamp counter */
2653 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2654 }
2655
gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)2656 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2657 {
2658 u32 wait_timeout_ms;
2659
2660 dev_info(hdev->dev,
2661 "Halting compute engines and disabling interrupts\n");
2662
2663 if (hdev->pldm)
2664 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2665 else
2666 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2667
2668
2669 gaudi_stop_mme_qmans(hdev);
2670 gaudi_stop_tpc_qmans(hdev);
2671 gaudi_stop_hbm_dma_qmans(hdev);
2672 gaudi_stop_pci_dma_qmans(hdev);
2673
2674 hdev->asic_funcs->disable_clock_gating(hdev);
2675
2676 msleep(wait_timeout_ms);
2677
2678 gaudi_pci_dma_stall(hdev);
2679 gaudi_hbm_dma_stall(hdev);
2680 gaudi_tpc_stall(hdev);
2681 gaudi_mme_stall(hdev);
2682
2683 msleep(wait_timeout_ms);
2684
2685 gaudi_disable_mme_qmans(hdev);
2686 gaudi_disable_tpc_qmans(hdev);
2687 gaudi_disable_hbm_dma_qmans(hdev);
2688 gaudi_disable_pci_dma_qmans(hdev);
2689
2690 gaudi_disable_timestamp(hdev);
2691
2692 gaudi_disable_msi(hdev);
2693 }
2694
gaudi_mmu_init(struct hl_device *hdev)2695 static int gaudi_mmu_init(struct hl_device *hdev)
2696 {
2697 struct asic_fixed_properties *prop = &hdev->asic_prop;
2698 struct gaudi_device *gaudi = hdev->asic_specific;
2699 u64 hop0_addr;
2700 int rc, i;
2701
2702 if (!hdev->mmu_enable)
2703 return 0;
2704
2705 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2706 return 0;
2707
2708 hdev->dram_supports_virtual_memory = false;
2709
2710 for (i = 0 ; i < prop->max_asid ; i++) {
2711 hop0_addr = prop->mmu_pgt_addr +
2712 (i * prop->mmu_hop_table_size);
2713
2714 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2715 if (rc) {
2716 dev_err(hdev->dev,
2717 "failed to set hop0 addr for asid %d\n", i);
2718 goto err;
2719 }
2720 }
2721
2722 /* init MMU cache manage page */
2723 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2724 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2725
2726 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2727
2728 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2729 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2730
2731 WREG32(mmSTLB_HOP_CONFIGURATION,
2732 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2733
2734 /*
2735 * The H/W expects the first PI after init to be 1. After wraparound
2736 * we'll write 0.
2737 */
2738 gaudi->mmu_cache_inv_pi = 1;
2739
2740 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2741
2742 return 0;
2743
2744 err:
2745 return rc;
2746 }
2747
gaudi_load_firmware_to_device(struct hl_device *hdev)2748 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2749 {
2750 void __iomem *dst;
2751
2752 /* HBM scrambler must be initialized before pushing F/W to HBM */
2753 gaudi_init_scrambler_hbm(hdev);
2754
2755 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2756
2757 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2758 }
2759
gaudi_load_boot_fit_to_device(struct hl_device *hdev)2760 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2761 {
2762 void __iomem *dst;
2763
2764 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2765
2766 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2767 }
2768
gaudi_read_device_fw_version(struct hl_device *hdev, enum hl_fw_component fwc)2769 static void gaudi_read_device_fw_version(struct hl_device *hdev,
2770 enum hl_fw_component fwc)
2771 {
2772 const char *name;
2773 u32 ver_off;
2774 char *dest;
2775
2776 switch (fwc) {
2777 case FW_COMP_UBOOT:
2778 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2779 dest = hdev->asic_prop.uboot_ver;
2780 name = "U-Boot";
2781 break;
2782 case FW_COMP_PREBOOT:
2783 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2784 dest = hdev->asic_prop.preboot_ver;
2785 name = "Preboot";
2786 break;
2787 default:
2788 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2789 return;
2790 }
2791
2792 ver_off &= ~((u32)SRAM_BASE_ADDR);
2793
2794 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2795 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2796 VERSION_MAX_LEN);
2797 } else {
2798 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2799 name, ver_off);
2800 strcpy(dest, "unavailable");
2801 }
2802 }
2803
gaudi_init_cpu(struct hl_device *hdev)2804 static int gaudi_init_cpu(struct hl_device *hdev)
2805 {
2806 struct gaudi_device *gaudi = hdev->asic_specific;
2807 int rc;
2808
2809 if (!hdev->cpu_enable)
2810 return 0;
2811
2812 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2813 return 0;
2814
2815 /*
2816 * The device CPU works with 40 bits addresses.
2817 * This register sets the extension to 50 bits.
2818 */
2819 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2820
2821 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2822 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2823 mmCPU_CMD_STATUS_TO_HOST,
2824 mmCPU_BOOT_ERR0,
2825 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2826 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2827
2828 if (rc)
2829 return rc;
2830
2831 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2832
2833 return 0;
2834 }
2835
gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)2836 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2837 {
2838 struct gaudi_device *gaudi = hdev->asic_specific;
2839 struct hl_eq *eq;
2840 u32 status;
2841 struct hl_hw_queue *cpu_pq =
2842 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2843 int err;
2844
2845 if (!hdev->cpu_queues_enable)
2846 return 0;
2847
2848 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2849 return 0;
2850
2851 eq = &hdev->event_queue;
2852
2853 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2854 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2855
2856 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2857 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2858
2859 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2860 lower_32_bits(hdev->cpu_accessible_dma_address));
2861 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2862 upper_32_bits(hdev->cpu_accessible_dma_address));
2863
2864 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2865 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2866 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2867
2868 /* Used for EQ CI */
2869 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2870
2871 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2872
2873 if (gaudi->multi_msi_mode)
2874 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2875 else
2876 WREG32(mmCPU_IF_QUEUE_INIT,
2877 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2878
2879 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2880
2881 err = hl_poll_timeout(
2882 hdev,
2883 mmCPU_IF_QUEUE_INIT,
2884 status,
2885 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2886 1000,
2887 cpu_timeout);
2888
2889 if (err) {
2890 dev_err(hdev->dev,
2891 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
2892 return -EIO;
2893 }
2894
2895 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2896 return 0;
2897 }
2898
gaudi_pre_hw_init(struct hl_device *hdev)2899 static void gaudi_pre_hw_init(struct hl_device *hdev)
2900 {
2901 /* Perform read from the device to make sure device is up */
2902 RREG32(mmHW_STATE);
2903
2904 /* Set the access through PCI bars (Linux driver only) as
2905 * secured
2906 */
2907 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
2908 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2909 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2910
2911 /* Perform read to flush the waiting writes to ensure
2912 * configuration was set in the device
2913 */
2914 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2915
2916 /*
2917 * Let's mark in the H/W that we have reached this point. We check
2918 * this value in the reset_before_init function to understand whether
2919 * we need to reset the chip before doing H/W init. This register is
2920 * cleared by the H/W upon H/W reset
2921 */
2922 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2923
2924 /* Configure the reset registers. Must be done as early as possible
2925 * in case we fail during H/W initialization
2926 */
2927 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2928 (CFG_RST_H_DMA_MASK |
2929 CFG_RST_H_MME_MASK |
2930 CFG_RST_H_SM_MASK |
2931 CFG_RST_H_TPC_7_MASK));
2932
2933 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2934
2935 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2936 (CFG_RST_H_HBM_MASK |
2937 CFG_RST_H_TPC_7_MASK |
2938 CFG_RST_H_NIC_MASK |
2939 CFG_RST_H_SM_MASK |
2940 CFG_RST_H_DMA_MASK |
2941 CFG_RST_H_MME_MASK |
2942 CFG_RST_H_CPU_MASK |
2943 CFG_RST_H_MMU_MASK));
2944
2945 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2946 (CFG_RST_L_IF_MASK |
2947 CFG_RST_L_PSOC_MASK |
2948 CFG_RST_L_TPC_MASK));
2949 }
2950
gaudi_hw_init(struct hl_device *hdev)2951 static int gaudi_hw_init(struct hl_device *hdev)
2952 {
2953 int rc;
2954
2955 dev_info(hdev->dev, "Starting initialization of H/W\n");
2956
2957 gaudi_pre_hw_init(hdev);
2958
2959 gaudi_init_pci_dma_qmans(hdev);
2960
2961 gaudi_init_hbm_dma_qmans(hdev);
2962
2963 rc = gaudi_init_cpu(hdev);
2964 if (rc) {
2965 dev_err(hdev->dev, "failed to initialize CPU\n");
2966 return rc;
2967 }
2968
2969 /* SRAM scrambler must be initialized after CPU is running from HBM */
2970 gaudi_init_scrambler_sram(hdev);
2971
2972 /* This is here just in case we are working without CPU */
2973 gaudi_init_scrambler_hbm(hdev);
2974
2975 gaudi_init_golden_registers(hdev);
2976
2977 rc = gaudi_mmu_init(hdev);
2978 if (rc)
2979 return rc;
2980
2981 gaudi_init_security(hdev);
2982
2983 gaudi_init_mme_qmans(hdev);
2984
2985 gaudi_init_tpc_qmans(hdev);
2986
2987 hdev->asic_funcs->set_clock_gating(hdev);
2988
2989 gaudi_enable_timestamp(hdev);
2990
2991 /* MSI must be enabled before CPU queues are initialized */
2992 rc = gaudi_enable_msi(hdev);
2993 if (rc)
2994 goto disable_queues;
2995
2996 /* must be called after MSI was enabled */
2997 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2998 if (rc) {
2999 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3000 rc);
3001 goto disable_msi;
3002 }
3003
3004 /* Perform read from the device to flush all configuration */
3005 RREG32(mmHW_STATE);
3006
3007 return 0;
3008
3009 disable_msi:
3010 gaudi_disable_msi(hdev);
3011 disable_queues:
3012 gaudi_disable_mme_qmans(hdev);
3013 gaudi_disable_pci_dma_qmans(hdev);
3014
3015 return rc;
3016 }
3017
gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)3018 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3019 {
3020 struct gaudi_device *gaudi = hdev->asic_specific;
3021 u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3022
3023 if (!hard_reset) {
3024 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3025 return;
3026 }
3027
3028 if (hdev->pldm) {
3029 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3030 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3031 } else {
3032 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3033 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3034 }
3035
3036 /* Set device to handle FLR by H/W as we will put the device CPU to
3037 * halt mode
3038 */
3039 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3040 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3041
3042 /* I don't know what is the state of the CPU so make sure it is
3043 * stopped in any means necessary
3044 */
3045 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3046 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3047
3048 msleep(cpu_timeout_ms);
3049
3050 /* Tell ASIC not to re-initialize PCIe */
3051 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3052
3053 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3054
3055 /* H/W bug WA:
3056 * rdata[31:0] = strap_read_val;
3057 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3058 */
3059 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3060 (boot_strap & 0x001FFFFF));
3061 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3062
3063 /* Restart BTL/BLR upon hard-reset */
3064 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3065
3066 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3067 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3068 dev_info(hdev->dev,
3069 "Issued HARD reset command, going to wait %dms\n",
3070 reset_timeout_ms);
3071
3072 /*
3073 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3074 * itself is in reset. Need to wait until the reset is deasserted
3075 */
3076 msleep(reset_timeout_ms);
3077
3078 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3079 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3080 dev_err(hdev->dev,
3081 "Timeout while waiting for device to reset 0x%x\n",
3082 status);
3083
3084 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3085
3086 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3087 HW_CAP_HBM | HW_CAP_PCI_DMA |
3088 HW_CAP_MME | HW_CAP_TPC_MASK |
3089 HW_CAP_HBM_DMA | HW_CAP_PLL |
3090 HW_CAP_MMU |
3091 HW_CAP_SRAM_SCRAMBLER |
3092 HW_CAP_HBM_SCRAMBLER |
3093 HW_CAP_CLK_GATE);
3094
3095 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3096 }
3097
gaudi_suspend(struct hl_device *hdev)3098 static int gaudi_suspend(struct hl_device *hdev)
3099 {
3100 int rc;
3101
3102 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3103 if (rc)
3104 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3105
3106 return rc;
3107 }
3108
gaudi_resume(struct hl_device *hdev)3109 static int gaudi_resume(struct hl_device *hdev)
3110 {
3111 return gaudi_init_iatu(hdev);
3112 }
3113
gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size)3114 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3115 void *cpu_addr, dma_addr_t dma_addr, size_t size)
3116 {
3117 int rc;
3118
3119 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3120 VM_DONTCOPY | VM_NORESERVE;
3121
3122 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
3123 (dma_addr - HOST_PHYS_BASE), size);
3124 if (rc)
3125 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3126
3127 return rc;
3128 }
3129
gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)3130 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3131 {
3132 struct gaudi_device *gaudi = hdev->asic_specific;
3133 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3134 int dma_id;
3135 bool invalid_queue = false;
3136
3137 switch (hw_queue_id) {
3138 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3139 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3140 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3142 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143 break;
3144
3145 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3146 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3147 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3149 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150 break;
3151
3152 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3153 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3154 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157 break;
3158
3159 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3160 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3161 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164 break;
3165
3166 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3167 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3168 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171 break;
3172
3173 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3174 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3175 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3176 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3177 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3178 break;
3179
3180 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3181 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3182 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3183 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3184 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3185 break;
3186
3187 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3188 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3189 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3190 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3191 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3192 break;
3193
3194 case GAUDI_QUEUE_ID_CPU_PQ:
3195 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3196 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3197 else
3198 invalid_queue = true;
3199 break;
3200
3201 case GAUDI_QUEUE_ID_MME_0_0:
3202 db_reg_offset = mmMME2_QM_PQ_PI_0;
3203 break;
3204
3205 case GAUDI_QUEUE_ID_MME_0_1:
3206 db_reg_offset = mmMME2_QM_PQ_PI_1;
3207 break;
3208
3209 case GAUDI_QUEUE_ID_MME_0_2:
3210 db_reg_offset = mmMME2_QM_PQ_PI_2;
3211 break;
3212
3213 case GAUDI_QUEUE_ID_MME_0_3:
3214 db_reg_offset = mmMME2_QM_PQ_PI_3;
3215 break;
3216
3217 case GAUDI_QUEUE_ID_MME_1_0:
3218 db_reg_offset = mmMME0_QM_PQ_PI_0;
3219 break;
3220
3221 case GAUDI_QUEUE_ID_MME_1_1:
3222 db_reg_offset = mmMME0_QM_PQ_PI_1;
3223 break;
3224
3225 case GAUDI_QUEUE_ID_MME_1_2:
3226 db_reg_offset = mmMME0_QM_PQ_PI_2;
3227 break;
3228
3229 case GAUDI_QUEUE_ID_MME_1_3:
3230 db_reg_offset = mmMME0_QM_PQ_PI_3;
3231 break;
3232
3233 case GAUDI_QUEUE_ID_TPC_0_0:
3234 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3235 break;
3236
3237 case GAUDI_QUEUE_ID_TPC_0_1:
3238 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3239 break;
3240
3241 case GAUDI_QUEUE_ID_TPC_0_2:
3242 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3243 break;
3244
3245 case GAUDI_QUEUE_ID_TPC_0_3:
3246 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3247 break;
3248
3249 case GAUDI_QUEUE_ID_TPC_1_0:
3250 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3251 break;
3252
3253 case GAUDI_QUEUE_ID_TPC_1_1:
3254 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3255 break;
3256
3257 case GAUDI_QUEUE_ID_TPC_1_2:
3258 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3259 break;
3260
3261 case GAUDI_QUEUE_ID_TPC_1_3:
3262 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3263 break;
3264
3265 case GAUDI_QUEUE_ID_TPC_2_0:
3266 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3267 break;
3268
3269 case GAUDI_QUEUE_ID_TPC_2_1:
3270 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3271 break;
3272
3273 case GAUDI_QUEUE_ID_TPC_2_2:
3274 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3275 break;
3276
3277 case GAUDI_QUEUE_ID_TPC_2_3:
3278 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3279 break;
3280
3281 case GAUDI_QUEUE_ID_TPC_3_0:
3282 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3283 break;
3284
3285 case GAUDI_QUEUE_ID_TPC_3_1:
3286 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3287 break;
3288
3289 case GAUDI_QUEUE_ID_TPC_3_2:
3290 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3291 break;
3292
3293 case GAUDI_QUEUE_ID_TPC_3_3:
3294 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3295 break;
3296
3297 case GAUDI_QUEUE_ID_TPC_4_0:
3298 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3299 break;
3300
3301 case GAUDI_QUEUE_ID_TPC_4_1:
3302 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3303 break;
3304
3305 case GAUDI_QUEUE_ID_TPC_4_2:
3306 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3307 break;
3308
3309 case GAUDI_QUEUE_ID_TPC_4_3:
3310 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3311 break;
3312
3313 case GAUDI_QUEUE_ID_TPC_5_0:
3314 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3315 break;
3316
3317 case GAUDI_QUEUE_ID_TPC_5_1:
3318 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3319 break;
3320
3321 case GAUDI_QUEUE_ID_TPC_5_2:
3322 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3323 break;
3324
3325 case GAUDI_QUEUE_ID_TPC_5_3:
3326 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3327 break;
3328
3329 case GAUDI_QUEUE_ID_TPC_6_0:
3330 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3331 break;
3332
3333 case GAUDI_QUEUE_ID_TPC_6_1:
3334 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3335 break;
3336
3337 case GAUDI_QUEUE_ID_TPC_6_2:
3338 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3339 break;
3340
3341 case GAUDI_QUEUE_ID_TPC_6_3:
3342 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3343 break;
3344
3345 case GAUDI_QUEUE_ID_TPC_7_0:
3346 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3347 break;
3348
3349 case GAUDI_QUEUE_ID_TPC_7_1:
3350 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3351 break;
3352
3353 case GAUDI_QUEUE_ID_TPC_7_2:
3354 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3355 break;
3356
3357 case GAUDI_QUEUE_ID_TPC_7_3:
3358 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3359 break;
3360
3361 default:
3362 invalid_queue = true;
3363 }
3364
3365 if (invalid_queue) {
3366 /* Should never get here */
3367 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3368 hw_queue_id);
3369 return;
3370 }
3371
3372 db_value = pi;
3373
3374 /* ring the doorbell */
3375 WREG32(db_reg_offset, db_value);
3376
3377 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3378 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3379 GAUDI_EVENT_PI_UPDATE);
3380 }
3381
gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)3382 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3383 struct hl_bd *bd)
3384 {
3385 __le64 *pbd = (__le64 *) bd;
3386
3387 /* The QMANs are on the host memory so a simple copy suffice */
3388 pqe[0] = pbd[0];
3389 pqe[1] = pbd[1];
3390 }
3391
gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, gfp_t flags)3392 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3393 dma_addr_t *dma_handle, gfp_t flags)
3394 {
3395 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3396 dma_handle, flags);
3397
3398 /* Shift to the device's base physical address of host memory */
3399 if (kernel_addr)
3400 *dma_handle += HOST_PHYS_BASE;
3401
3402 return kernel_addr;
3403 }
3404
gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, dma_addr_t dma_handle)3405 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3406 void *cpu_addr, dma_addr_t dma_handle)
3407 {
3408 /* Cancel the device's base physical address of host memory */
3409 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3410
3411 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3412 }
3413
gaudi_get_int_queue_base(struct hl_device *hdev, u32 queue_id, dma_addr_t *dma_handle, u16 *queue_len)3414 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3415 u32 queue_id, dma_addr_t *dma_handle,
3416 u16 *queue_len)
3417 {
3418 struct gaudi_device *gaudi = hdev->asic_specific;
3419 struct gaudi_internal_qman_info *q;
3420
3421 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3422 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3423 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3424 return NULL;
3425 }
3426
3427 q = &gaudi->internal_qmans[queue_id];
3428 *dma_handle = q->pq_dma_addr;
3429 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3430
3431 return q->pq_kernel_addr;
3432 }
3433
gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, u32 timeout, long *result)3434 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3435 u16 len, u32 timeout, long *result)
3436 {
3437 struct gaudi_device *gaudi = hdev->asic_specific;
3438
3439 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3440 if (result)
3441 *result = 0;
3442 return 0;
3443 }
3444
3445 if (!timeout)
3446 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3447
3448 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3449 timeout, result);
3450 }
3451
gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)3452 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3453 {
3454 struct packet_msg_prot *fence_pkt;
3455 dma_addr_t pkt_dma_addr;
3456 u32 fence_val, tmp, timeout_usec;
3457 dma_addr_t fence_dma_addr;
3458 u32 *fence_ptr;
3459 int rc;
3460
3461 if (hdev->pldm)
3462 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3463 else
3464 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3465
3466 fence_val = GAUDI_QMAN0_FENCE_VAL;
3467
3468 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3469 &fence_dma_addr);
3470 if (!fence_ptr) {
3471 dev_err(hdev->dev,
3472 "Failed to allocate memory for H/W queue %d testing\n",
3473 hw_queue_id);
3474 return -ENOMEM;
3475 }
3476
3477 *fence_ptr = 0;
3478
3479 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3480 sizeof(struct packet_msg_prot),
3481 GFP_KERNEL, &pkt_dma_addr);
3482 if (!fence_pkt) {
3483 dev_err(hdev->dev,
3484 "Failed to allocate packet for H/W queue %d testing\n",
3485 hw_queue_id);
3486 rc = -ENOMEM;
3487 goto free_fence_ptr;
3488 }
3489
3490 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3491 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3492 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3493
3494 fence_pkt->ctl = cpu_to_le32(tmp);
3495 fence_pkt->value = cpu_to_le32(fence_val);
3496 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3497
3498 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3499 sizeof(struct packet_msg_prot),
3500 pkt_dma_addr);
3501 if (rc) {
3502 dev_err(hdev->dev,
3503 "Failed to send fence packet to H/W queue %d\n",
3504 hw_queue_id);
3505 goto free_pkt;
3506 }
3507
3508 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3509 1000, timeout_usec, true);
3510
3511 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3512
3513 if (rc == -ETIMEDOUT) {
3514 dev_err(hdev->dev,
3515 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3516 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3517 rc = -EIO;
3518 }
3519
3520 free_pkt:
3521 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3522 pkt_dma_addr);
3523 free_fence_ptr:
3524 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3525 fence_dma_addr);
3526 return rc;
3527 }
3528
gaudi_test_cpu_queue(struct hl_device *hdev)3529 static int gaudi_test_cpu_queue(struct hl_device *hdev)
3530 {
3531 struct gaudi_device *gaudi = hdev->asic_specific;
3532
3533 /*
3534 * check capability here as send_cpu_message() won't update the result
3535 * value if no capability
3536 */
3537 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3538 return 0;
3539
3540 return hl_fw_test_cpu_queue(hdev);
3541 }
3542
gaudi_test_queues(struct hl_device *hdev)3543 static int gaudi_test_queues(struct hl_device *hdev)
3544 {
3545 int i, rc, ret_val = 0;
3546
3547 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3548 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3549 rc = gaudi_test_queue(hdev, i);
3550 if (rc)
3551 ret_val = -EINVAL;
3552 }
3553 }
3554
3555 rc = gaudi_test_cpu_queue(hdev);
3556 if (rc)
3557 ret_val = -EINVAL;
3558
3559 return ret_val;
3560 }
3561
gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags, dma_addr_t *dma_handle)3562 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3563 gfp_t mem_flags, dma_addr_t *dma_handle)
3564 {
3565 void *kernel_addr;
3566
3567 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3568 return NULL;
3569
3570 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3571
3572 /* Shift to the device's base physical address of host memory */
3573 if (kernel_addr)
3574 *dma_handle += HOST_PHYS_BASE;
3575
3576 return kernel_addr;
3577 }
3578
gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)3579 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3580 dma_addr_t dma_addr)
3581 {
3582 /* Cancel the device's base physical address of host memory */
3583 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3584
3585 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3586 }
3587
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle)3588 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3589 size_t size, dma_addr_t *dma_handle)
3590 {
3591 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3592 }
3593
gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)3594 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3595 size_t size, void *vaddr)
3596 {
3597 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3598 }
3599
gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, int nents, enum dma_data_direction dir)3600 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3601 int nents, enum dma_data_direction dir)
3602 {
3603 struct scatterlist *sg;
3604 int i;
3605
3606 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3607 return -ENOMEM;
3608
3609 /* Shift to the device's base physical address of host memory */
3610 for_each_sg(sgl, sg, nents, i)
3611 sg->dma_address += HOST_PHYS_BASE;
3612
3613 return 0;
3614 }
3615
gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl, int nents, enum dma_data_direction dir)3616 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3617 int nents, enum dma_data_direction dir)
3618 {
3619 struct scatterlist *sg;
3620 int i;
3621
3622 /* Cancel the device's base physical address of host memory */
3623 for_each_sg(sgl, sg, nents, i)
3624 sg->dma_address -= HOST_PHYS_BASE;
3625
3626 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3627 }
3628
gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)3629 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3630 struct sg_table *sgt)
3631 {
3632 struct scatterlist *sg, *sg_next_iter;
3633 u32 count, dma_desc_cnt;
3634 u64 len, len_next;
3635 dma_addr_t addr, addr_next;
3636
3637 dma_desc_cnt = 0;
3638
3639 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3640
3641 len = sg_dma_len(sg);
3642 addr = sg_dma_address(sg);
3643
3644 if (len == 0)
3645 break;
3646
3647 while ((count + 1) < sgt->nents) {
3648 sg_next_iter = sg_next(sg);
3649 len_next = sg_dma_len(sg_next_iter);
3650 addr_next = sg_dma_address(sg_next_iter);
3651
3652 if (len_next == 0)
3653 break;
3654
3655 if ((addr + len == addr_next) &&
3656 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3657 len += len_next;
3658 count++;
3659 sg = sg_next_iter;
3660 } else {
3661 break;
3662 }
3663 }
3664
3665 dma_desc_cnt++;
3666 }
3667
3668 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3669 }
3670
gaudi_pin_memory_before_cs(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_lin_dma *user_dma_pkt, u64 addr, enum dma_data_direction dir)3671 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3672 struct hl_cs_parser *parser,
3673 struct packet_lin_dma *user_dma_pkt,
3674 u64 addr, enum dma_data_direction dir)
3675 {
3676 struct hl_userptr *userptr;
3677 int rc;
3678
3679 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3680 parser->job_userptr_list, &userptr))
3681 goto already_pinned;
3682
3683 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3684 if (!userptr)
3685 return -ENOMEM;
3686
3687 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3688 userptr);
3689 if (rc)
3690 goto free_userptr;
3691
3692 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3693
3694 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3695 userptr->sgt->nents, dir);
3696 if (rc) {
3697 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3698 goto unpin_memory;
3699 }
3700
3701 userptr->dma_mapped = true;
3702 userptr->dir = dir;
3703
3704 already_pinned:
3705 parser->patched_cb_size +=
3706 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3707
3708 return 0;
3709
3710 unpin_memory:
3711 list_del(&userptr->job_node);
3712 hl_unpin_host_memory(hdev, userptr);
3713 free_userptr:
3714 kfree(userptr);
3715 return rc;
3716 }
3717
gaudi_validate_dma_pkt_host(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_lin_dma *user_dma_pkt, bool src_in_host)3718 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3719 struct hl_cs_parser *parser,
3720 struct packet_lin_dma *user_dma_pkt,
3721 bool src_in_host)
3722 {
3723 enum dma_data_direction dir;
3724 bool skip_host_mem_pin = false, user_memset;
3725 u64 addr;
3726 int rc = 0;
3727
3728 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3729 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3730 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3731
3732 if (src_in_host) {
3733 if (user_memset)
3734 skip_host_mem_pin = true;
3735
3736 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3737 dir = DMA_TO_DEVICE;
3738 addr = le64_to_cpu(user_dma_pkt->src_addr);
3739 } else {
3740 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3741 dir = DMA_FROM_DEVICE;
3742 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3743 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3744 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3745 }
3746
3747 if (skip_host_mem_pin)
3748 parser->patched_cb_size += sizeof(*user_dma_pkt);
3749 else
3750 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3751 addr, dir);
3752
3753 return rc;
3754 }
3755
gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_lin_dma *user_dma_pkt)3756 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3757 struct hl_cs_parser *parser,
3758 struct packet_lin_dma *user_dma_pkt)
3759 {
3760 bool src_in_host = false;
3761 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3762 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3763 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3764
3765 dev_dbg(hdev->dev, "DMA packet details:\n");
3766 dev_dbg(hdev->dev, "source == 0x%llx\n",
3767 le64_to_cpu(user_dma_pkt->src_addr));
3768 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3769 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3770
3771 /*
3772 * Special handling for DMA with size 0. Bypass all validations
3773 * because no transactions will be done except for WR_COMP, which
3774 * is not a security issue
3775 */
3776 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3777 parser->patched_cb_size += sizeof(*user_dma_pkt);
3778 return 0;
3779 }
3780
3781 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3782 src_in_host = true;
3783
3784 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3785 src_in_host);
3786 }
3787
gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_load_and_exe *user_pkt)3788 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3789 struct hl_cs_parser *parser,
3790 struct packet_load_and_exe *user_pkt)
3791 {
3792 u32 cfg;
3793
3794 cfg = le32_to_cpu(user_pkt->cfg);
3795
3796 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3797 dev_err(hdev->dev,
3798 "User not allowed to use Load and Execute\n");
3799 return -EPERM;
3800 }
3801
3802 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3803
3804 return 0;
3805 }
3806
gaudi_validate_cb(struct hl_device *hdev, struct hl_cs_parser *parser, bool is_mmu)3807 static int gaudi_validate_cb(struct hl_device *hdev,
3808 struct hl_cs_parser *parser, bool is_mmu)
3809 {
3810 u32 cb_parsed_length = 0;
3811 int rc = 0;
3812
3813 parser->patched_cb_size = 0;
3814
3815 /* cb_user_size is more than 0 so loop will always be executed */
3816 while (cb_parsed_length < parser->user_cb_size) {
3817 enum packet_id pkt_id;
3818 u16 pkt_size;
3819 struct gaudi_packet *user_pkt;
3820
3821 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3822
3823 pkt_id = (enum packet_id) (
3824 (le64_to_cpu(user_pkt->header) &
3825 PACKET_HEADER_PACKET_ID_MASK) >>
3826 PACKET_HEADER_PACKET_ID_SHIFT);
3827
3828 if (!validate_packet_id(pkt_id)) {
3829 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3830 rc = -EINVAL;
3831 break;
3832 }
3833
3834 pkt_size = gaudi_packet_sizes[pkt_id];
3835 cb_parsed_length += pkt_size;
3836 if (cb_parsed_length > parser->user_cb_size) {
3837 dev_err(hdev->dev,
3838 "packet 0x%x is out of CB boundary\n", pkt_id);
3839 rc = -EINVAL;
3840 break;
3841 }
3842
3843 switch (pkt_id) {
3844 case PACKET_MSG_PROT:
3845 dev_err(hdev->dev,
3846 "User not allowed to use MSG_PROT\n");
3847 rc = -EPERM;
3848 break;
3849
3850 case PACKET_CP_DMA:
3851 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3852 rc = -EPERM;
3853 break;
3854
3855 case PACKET_STOP:
3856 dev_err(hdev->dev, "User not allowed to use STOP\n");
3857 rc = -EPERM;
3858 break;
3859
3860 case PACKET_WREG_BULK:
3861 dev_err(hdev->dev,
3862 "User not allowed to use WREG_BULK\n");
3863 rc = -EPERM;
3864 break;
3865
3866 case PACKET_LOAD_AND_EXE:
3867 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3868 (struct packet_load_and_exe *) user_pkt);
3869 break;
3870
3871 case PACKET_LIN_DMA:
3872 parser->contains_dma_pkt = true;
3873 if (is_mmu)
3874 parser->patched_cb_size += pkt_size;
3875 else
3876 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3877 (struct packet_lin_dma *) user_pkt);
3878 break;
3879
3880 case PACKET_WREG_32:
3881 case PACKET_MSG_LONG:
3882 case PACKET_MSG_SHORT:
3883 case PACKET_REPEAT:
3884 case PACKET_FENCE:
3885 case PACKET_NOP:
3886 case PACKET_ARB_POINT:
3887 parser->patched_cb_size += pkt_size;
3888 break;
3889
3890 default:
3891 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3892 pkt_id);
3893 rc = -EINVAL;
3894 break;
3895 }
3896
3897 if (rc)
3898 break;
3899 }
3900
3901 /*
3902 * The new CB should have space at the end for two MSG_PROT packets:
3903 * 1. A packet that will act as a completion packet
3904 * 2. A packet that will generate MSI-X interrupt
3905 */
3906 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3907
3908 return rc;
3909 }
3910
gaudi_patch_dma_packet(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_lin_dma *user_dma_pkt, struct packet_lin_dma *new_dma_pkt, u32 *new_dma_pkt_size)3911 static int gaudi_patch_dma_packet(struct hl_device *hdev,
3912 struct hl_cs_parser *parser,
3913 struct packet_lin_dma *user_dma_pkt,
3914 struct packet_lin_dma *new_dma_pkt,
3915 u32 *new_dma_pkt_size)
3916 {
3917 struct hl_userptr *userptr;
3918 struct scatterlist *sg, *sg_next_iter;
3919 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3920 u64 len, len_next;
3921 dma_addr_t dma_addr, dma_addr_next;
3922 u64 device_memory_addr, addr;
3923 enum dma_data_direction dir;
3924 struct sg_table *sgt;
3925 bool src_in_host = false;
3926 bool skip_host_mem_pin = false;
3927 bool user_memset;
3928
3929 ctl = le32_to_cpu(user_dma_pkt->ctl);
3930
3931 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3932 src_in_host = true;
3933
3934 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3935 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3936
3937 if (src_in_host) {
3938 addr = le64_to_cpu(user_dma_pkt->src_addr);
3939 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3940 dir = DMA_TO_DEVICE;
3941 if (user_memset)
3942 skip_host_mem_pin = true;
3943 } else {
3944 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3945 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3946 dir = DMA_FROM_DEVICE;
3947 }
3948
3949 if ((!skip_host_mem_pin) &&
3950 (!hl_userptr_is_pinned(hdev, addr,
3951 le32_to_cpu(user_dma_pkt->tsize),
3952 parser->job_userptr_list, &userptr))) {
3953 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3954 addr, user_dma_pkt->tsize);
3955 return -EFAULT;
3956 }
3957
3958 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3959 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3960 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3961 return 0;
3962 }
3963
3964 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3965
3966 sgt = userptr->sgt;
3967 dma_desc_cnt = 0;
3968
3969 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3970 len = sg_dma_len(sg);
3971 dma_addr = sg_dma_address(sg);
3972
3973 if (len == 0)
3974 break;
3975
3976 while ((count + 1) < sgt->nents) {
3977 sg_next_iter = sg_next(sg);
3978 len_next = sg_dma_len(sg_next_iter);
3979 dma_addr_next = sg_dma_address(sg_next_iter);
3980
3981 if (len_next == 0)
3982 break;
3983
3984 if ((dma_addr + len == dma_addr_next) &&
3985 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3986 len += len_next;
3987 count++;
3988 sg = sg_next_iter;
3989 } else {
3990 break;
3991 }
3992 }
3993
3994 ctl = le32_to_cpu(user_dma_pkt->ctl);
3995 if (likely(dma_desc_cnt))
3996 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3997 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3998 new_dma_pkt->ctl = cpu_to_le32(ctl);
3999 new_dma_pkt->tsize = cpu_to_le32(len);
4000
4001 if (dir == DMA_TO_DEVICE) {
4002 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
4003 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
4004 } else {
4005 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
4006 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
4007 }
4008
4009 if (!user_memset)
4010 device_memory_addr += len;
4011 dma_desc_cnt++;
4012 new_dma_pkt++;
4013 }
4014
4015 if (!dma_desc_cnt) {
4016 dev_err(hdev->dev,
4017 "Error of 0 SG entries when patching DMA packet\n");
4018 return -EFAULT;
4019 }
4020
4021 /* Fix the last dma packet - wrcomp must be as user set it */
4022 new_dma_pkt--;
4023 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4024
4025 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4026
4027 return 0;
4028 }
4029
gaudi_patch_cb(struct hl_device *hdev, struct hl_cs_parser *parser)4030 static int gaudi_patch_cb(struct hl_device *hdev,
4031 struct hl_cs_parser *parser)
4032 {
4033 u32 cb_parsed_length = 0;
4034 u32 cb_patched_cur_length = 0;
4035 int rc = 0;
4036
4037 /* cb_user_size is more than 0 so loop will always be executed */
4038 while (cb_parsed_length < parser->user_cb_size) {
4039 enum packet_id pkt_id;
4040 u16 pkt_size;
4041 u32 new_pkt_size = 0;
4042 struct gaudi_packet *user_pkt, *kernel_pkt;
4043
4044 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4045 kernel_pkt = parser->patched_cb->kernel_address +
4046 cb_patched_cur_length;
4047
4048 pkt_id = (enum packet_id) (
4049 (le64_to_cpu(user_pkt->header) &
4050 PACKET_HEADER_PACKET_ID_MASK) >>
4051 PACKET_HEADER_PACKET_ID_SHIFT);
4052
4053 if (!validate_packet_id(pkt_id)) {
4054 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4055 rc = -EINVAL;
4056 break;
4057 }
4058
4059 pkt_size = gaudi_packet_sizes[pkt_id];
4060 cb_parsed_length += pkt_size;
4061 if (cb_parsed_length > parser->user_cb_size) {
4062 dev_err(hdev->dev,
4063 "packet 0x%x is out of CB boundary\n", pkt_id);
4064 rc = -EINVAL;
4065 break;
4066 }
4067
4068 switch (pkt_id) {
4069 case PACKET_LIN_DMA:
4070 rc = gaudi_patch_dma_packet(hdev, parser,
4071 (struct packet_lin_dma *) user_pkt,
4072 (struct packet_lin_dma *) kernel_pkt,
4073 &new_pkt_size);
4074 cb_patched_cur_length += new_pkt_size;
4075 break;
4076
4077 case PACKET_MSG_PROT:
4078 dev_err(hdev->dev,
4079 "User not allowed to use MSG_PROT\n");
4080 rc = -EPERM;
4081 break;
4082
4083 case PACKET_CP_DMA:
4084 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4085 rc = -EPERM;
4086 break;
4087
4088 case PACKET_STOP:
4089 dev_err(hdev->dev, "User not allowed to use STOP\n");
4090 rc = -EPERM;
4091 break;
4092
4093 case PACKET_WREG_32:
4094 case PACKET_WREG_BULK:
4095 case PACKET_MSG_LONG:
4096 case PACKET_MSG_SHORT:
4097 case PACKET_REPEAT:
4098 case PACKET_FENCE:
4099 case PACKET_NOP:
4100 case PACKET_ARB_POINT:
4101 case PACKET_LOAD_AND_EXE:
4102 memcpy(kernel_pkt, user_pkt, pkt_size);
4103 cb_patched_cur_length += pkt_size;
4104 break;
4105
4106 default:
4107 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4108 pkt_id);
4109 rc = -EINVAL;
4110 break;
4111 }
4112
4113 if (rc)
4114 break;
4115 }
4116
4117 return rc;
4118 }
4119
gaudi_parse_cb_mmu(struct hl_device *hdev, struct hl_cs_parser *parser)4120 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4121 struct hl_cs_parser *parser)
4122 {
4123 u64 patched_cb_handle;
4124 u32 patched_cb_size;
4125 struct hl_cb *user_cb;
4126 int rc;
4127
4128 /*
4129 * The new CB should have space at the end for two MSG_PROT pkt:
4130 * 1. A packet that will act as a completion packet
4131 * 2. A packet that will generate MSI interrupt
4132 */
4133 parser->patched_cb_size = parser->user_cb_size +
4134 sizeof(struct packet_msg_prot) * 2;
4135
4136 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4137 parser->patched_cb_size, false, false,
4138 &patched_cb_handle);
4139
4140 if (rc) {
4141 dev_err(hdev->dev,
4142 "Failed to allocate patched CB for DMA CS %d\n",
4143 rc);
4144 return rc;
4145 }
4146
4147 patched_cb_handle >>= PAGE_SHIFT;
4148 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4149 (u32) patched_cb_handle);
4150 /* hl_cb_get should never fail here so use kernel WARN */
4151 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4152 (u32) patched_cb_handle);
4153 if (!parser->patched_cb) {
4154 rc = -EFAULT;
4155 goto out;
4156 }
4157
4158 /*
4159 * The check that parser->user_cb_size <= parser->user_cb->size was done
4160 * in validate_queue_index().
4161 */
4162 memcpy(parser->patched_cb->kernel_address,
4163 parser->user_cb->kernel_address,
4164 parser->user_cb_size);
4165
4166 patched_cb_size = parser->patched_cb_size;
4167
4168 /* Validate patched CB instead of user CB */
4169 user_cb = parser->user_cb;
4170 parser->user_cb = parser->patched_cb;
4171 rc = gaudi_validate_cb(hdev, parser, true);
4172 parser->user_cb = user_cb;
4173
4174 if (rc) {
4175 hl_cb_put(parser->patched_cb);
4176 goto out;
4177 }
4178
4179 if (patched_cb_size != parser->patched_cb_size) {
4180 dev_err(hdev->dev, "user CB size mismatch\n");
4181 hl_cb_put(parser->patched_cb);
4182 rc = -EINVAL;
4183 goto out;
4184 }
4185
4186 out:
4187 /*
4188 * Always call cb destroy here because we still have 1 reference
4189 * to it by calling cb_get earlier. After the job will be completed,
4190 * cb_put will release it, but here we want to remove it from the
4191 * idr
4192 */
4193 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4194 patched_cb_handle << PAGE_SHIFT);
4195
4196 return rc;
4197 }
4198
gaudi_parse_cb_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser)4199 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4200 struct hl_cs_parser *parser)
4201 {
4202 u64 patched_cb_handle;
4203 int rc;
4204
4205 rc = gaudi_validate_cb(hdev, parser, false);
4206
4207 if (rc)
4208 goto free_userptr;
4209
4210 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4211 parser->patched_cb_size, false, false,
4212 &patched_cb_handle);
4213 if (rc) {
4214 dev_err(hdev->dev,
4215 "Failed to allocate patched CB for DMA CS %d\n", rc);
4216 goto free_userptr;
4217 }
4218
4219 patched_cb_handle >>= PAGE_SHIFT;
4220 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4221 (u32) patched_cb_handle);
4222 /* hl_cb_get should never fail here so use kernel WARN */
4223 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4224 (u32) patched_cb_handle);
4225 if (!parser->patched_cb) {
4226 rc = -EFAULT;
4227 goto out;
4228 }
4229
4230 rc = gaudi_patch_cb(hdev, parser);
4231
4232 if (rc)
4233 hl_cb_put(parser->patched_cb);
4234
4235 out:
4236 /*
4237 * Always call cb destroy here because we still have 1 reference
4238 * to it by calling cb_get earlier. After the job will be completed,
4239 * cb_put will release it, but here we want to remove it from the
4240 * idr
4241 */
4242 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4243 patched_cb_handle << PAGE_SHIFT);
4244
4245 free_userptr:
4246 if (rc)
4247 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4248 return rc;
4249 }
4250
gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, struct hl_cs_parser *parser)4251 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4252 struct hl_cs_parser *parser)
4253 {
4254 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4255
4256 /* For internal queue jobs just check if CB address is valid */
4257 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4258 parser->user_cb_size,
4259 asic_prop->sram_user_base_address,
4260 asic_prop->sram_end_address))
4261 return 0;
4262
4263 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4264 parser->user_cb_size,
4265 asic_prop->dram_user_base_address,
4266 asic_prop->dram_end_address))
4267 return 0;
4268
4269 /* PMMU and HPMMU addresses are equal, check only one of them */
4270 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4271 parser->user_cb_size,
4272 asic_prop->pmmu.start_addr,
4273 asic_prop->pmmu.end_addr))
4274 return 0;
4275
4276 dev_err(hdev->dev,
4277 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4278 parser->user_cb, parser->user_cb_size);
4279
4280 return -EFAULT;
4281 }
4282
gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)4283 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4284 {
4285 struct gaudi_device *gaudi = hdev->asic_specific;
4286
4287 if (parser->queue_type == QUEUE_TYPE_INT)
4288 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4289
4290 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4291 return gaudi_parse_cb_mmu(hdev, parser);
4292 else
4293 return gaudi_parse_cb_no_mmu(hdev, parser);
4294 }
4295
gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msi_vec, bool eb)4296 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4297 void *kernel_address, u32 len,
4298 u64 cq_addr, u32 cq_val, u32 msi_vec,
4299 bool eb)
4300 {
4301 struct gaudi_device *gaudi = hdev->asic_specific;
4302 struct packet_msg_prot *cq_pkt;
4303 u32 tmp;
4304
4305 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4306
4307 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4308 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4309
4310 if (eb)
4311 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4312
4313 cq_pkt->ctl = cpu_to_le32(tmp);
4314 cq_pkt->value = cpu_to_le32(cq_val);
4315 cq_pkt->addr = cpu_to_le64(cq_addr);
4316
4317 cq_pkt++;
4318
4319 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4320 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4321 cq_pkt->ctl = cpu_to_le32(tmp);
4322 cq_pkt->value = cpu_to_le32(1);
4323
4324 if (!gaudi->multi_msi_mode)
4325 msi_vec = 0;
4326
4327 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4328 }
4329
gaudi_update_eq_ci(struct hl_device *hdev, u32 val)4330 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4331 {
4332 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4333 }
4334
gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size, u64 val)4335 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4336 u32 size, u64 val)
4337 {
4338 struct packet_lin_dma *lin_dma_pkt;
4339 struct hl_cs_job *job;
4340 u32 cb_size, ctl, err_cause;
4341 struct hl_cb *cb;
4342 int rc;
4343
4344 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4345 if (!cb)
4346 return -EFAULT;
4347
4348 lin_dma_pkt = cb->kernel_address;
4349 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4350 cb_size = sizeof(*lin_dma_pkt);
4351
4352 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4353 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4354 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4355 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4356 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4357
4358 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4359 lin_dma_pkt->src_addr = cpu_to_le64(val);
4360 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4361 lin_dma_pkt->tsize = cpu_to_le32(size);
4362
4363 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4364 if (!job) {
4365 dev_err(hdev->dev, "Failed to allocate a new job\n");
4366 rc = -ENOMEM;
4367 goto release_cb;
4368 }
4369
4370 /* Verify DMA is OK */
4371 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4372 if (err_cause && !hdev->init_done) {
4373 dev_dbg(hdev->dev,
4374 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4375 err_cause);
4376 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4377 }
4378
4379 job->id = 0;
4380 job->user_cb = cb;
4381 job->user_cb->cs_cnt++;
4382 job->user_cb_size = cb_size;
4383 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4384 job->patched_cb = job->user_cb;
4385 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4386
4387 hl_debugfs_add_job(hdev, job);
4388
4389 rc = gaudi_send_job_on_qman0(hdev, job);
4390 hl_debugfs_remove_job(hdev, job);
4391 kfree(job);
4392 cb->cs_cnt--;
4393
4394 /* Verify DMA is OK */
4395 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4396 if (err_cause) {
4397 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4398 rc = -EIO;
4399 if (!hdev->init_done) {
4400 dev_dbg(hdev->dev,
4401 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4402 err_cause);
4403 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4404 }
4405 }
4406
4407 release_cb:
4408 hl_cb_put(cb);
4409 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4410
4411 return rc;
4412 }
4413
gaudi_restore_sm_registers(struct hl_device *hdev)4414 static void gaudi_restore_sm_registers(struct hl_device *hdev)
4415 {
4416 int i;
4417
4418 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4419 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4420 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4421 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4422 }
4423
4424 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4425 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4426 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4427 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4428 }
4429
4430 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4431
4432 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4433 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4434
4435 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4436
4437 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4438 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4439 }
4440
gaudi_restore_dma_registers(struct hl_device *hdev)4441 static void gaudi_restore_dma_registers(struct hl_device *hdev)
4442 {
4443 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4444 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4445 int i;
4446
4447 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4448 u64 sob_addr = CFG_BASE +
4449 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4450 (i * sob_delta);
4451 u32 dma_offset = i * DMA_CORE_OFFSET;
4452
4453 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4454 lower_32_bits(sob_addr));
4455 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4456 upper_32_bits(sob_addr));
4457 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4458
4459 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4460 * modified by the user for SRAM reduction
4461 */
4462 if (i > 1)
4463 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4464 0x00000001);
4465 }
4466 }
4467
gaudi_restore_qm_registers(struct hl_device *hdev)4468 static void gaudi_restore_qm_registers(struct hl_device *hdev)
4469 {
4470 u32 qman_offset;
4471 int i;
4472
4473 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4474 qman_offset = i * DMA_QMAN_OFFSET;
4475 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4476 }
4477
4478 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4479 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4480 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4481 }
4482
4483 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4484 qman_offset = i * TPC_QMAN_OFFSET;
4485 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4486 }
4487 }
4488
gaudi_restore_user_registers(struct hl_device *hdev)4489 static void gaudi_restore_user_registers(struct hl_device *hdev)
4490 {
4491 gaudi_restore_sm_registers(hdev);
4492 gaudi_restore_dma_registers(hdev);
4493 gaudi_restore_qm_registers(hdev);
4494 }
4495
gaudi_context_switch(struct hl_device *hdev, u32 asid)4496 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4497 {
4498 struct asic_fixed_properties *prop = &hdev->asic_prop;
4499 u64 addr = prop->sram_user_base_address;
4500 u32 size = hdev->pldm ? 0x10000 :
4501 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4502 u64 val = 0x7777777777777777ull;
4503 int rc;
4504
4505 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4506 if (rc) {
4507 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4508 return rc;
4509 }
4510
4511 gaudi_mmu_prepare(hdev, asid);
4512
4513 gaudi_restore_user_registers(hdev);
4514
4515 return 0;
4516 }
4517
gaudi_mmu_clear_pgt_range(struct hl_device *hdev)4518 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4519 {
4520 struct asic_fixed_properties *prop = &hdev->asic_prop;
4521 struct gaudi_device *gaudi = hdev->asic_specific;
4522 u64 addr = prop->mmu_pgt_addr;
4523 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4524
4525 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4526 return 0;
4527
4528 return gaudi_memset_device_memory(hdev, addr, size, 0);
4529 }
4530
gaudi_restore_phase_topology(struct hl_device *hdev)4531 static void gaudi_restore_phase_topology(struct hl_device *hdev)
4532 {
4533
4534 }
4535
gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)4536 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4537 {
4538 struct asic_fixed_properties *prop = &hdev->asic_prop;
4539 struct gaudi_device *gaudi = hdev->asic_specific;
4540 u64 hbm_bar_addr;
4541 int rc = 0;
4542
4543 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4544
4545 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4546 (hdev->clock_gating_mask &
4547 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4548
4549 dev_err_ratelimited(hdev->dev,
4550 "Can't read register - clock gating is enabled!\n");
4551 rc = -EFAULT;
4552 } else {
4553 *val = RREG32(addr - CFG_BASE);
4554 }
4555
4556 } else if ((addr >= SRAM_BASE_ADDR) &&
4557 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4558 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4559 (addr - SRAM_BASE_ADDR));
4560 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4561 u64 bar_base_addr = DRAM_PHYS_BASE +
4562 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4563
4564 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4565 if (hbm_bar_addr != U64_MAX) {
4566 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4567 (addr - bar_base_addr));
4568
4569 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4570 hbm_bar_addr);
4571 }
4572 if (hbm_bar_addr == U64_MAX)
4573 rc = -EIO;
4574 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4575 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4576 } else {
4577 rc = -EFAULT;
4578 }
4579
4580 return rc;
4581 }
4582
gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)4583 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4584 {
4585 struct asic_fixed_properties *prop = &hdev->asic_prop;
4586 struct gaudi_device *gaudi = hdev->asic_specific;
4587 u64 hbm_bar_addr;
4588 int rc = 0;
4589
4590 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4591
4592 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4593 (hdev->clock_gating_mask &
4594 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4595
4596 dev_err_ratelimited(hdev->dev,
4597 "Can't write register - clock gating is enabled!\n");
4598 rc = -EFAULT;
4599 } else {
4600 WREG32(addr - CFG_BASE, val);
4601 }
4602
4603 } else if ((addr >= SRAM_BASE_ADDR) &&
4604 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4605 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4606 (addr - SRAM_BASE_ADDR));
4607 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4608 u64 bar_base_addr = DRAM_PHYS_BASE +
4609 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4610
4611 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4612 if (hbm_bar_addr != U64_MAX) {
4613 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4614 (addr - bar_base_addr));
4615
4616 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4617 hbm_bar_addr);
4618 }
4619 if (hbm_bar_addr == U64_MAX)
4620 rc = -EIO;
4621 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4622 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4623 } else {
4624 rc = -EFAULT;
4625 }
4626
4627 return rc;
4628 }
4629
gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)4630 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4631 {
4632 struct asic_fixed_properties *prop = &hdev->asic_prop;
4633 struct gaudi_device *gaudi = hdev->asic_specific;
4634 u64 hbm_bar_addr;
4635 int rc = 0;
4636
4637 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4638
4639 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4640 (hdev->clock_gating_mask &
4641 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4642
4643 dev_err_ratelimited(hdev->dev,
4644 "Can't read register - clock gating is enabled!\n");
4645 rc = -EFAULT;
4646 } else {
4647 u32 val_l = RREG32(addr - CFG_BASE);
4648 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4649
4650 *val = (((u64) val_h) << 32) | val_l;
4651 }
4652
4653 } else if ((addr >= SRAM_BASE_ADDR) &&
4654 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4655 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4656 (addr - SRAM_BASE_ADDR));
4657 } else if (addr <=
4658 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4659 u64 bar_base_addr = DRAM_PHYS_BASE +
4660 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4661
4662 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4663 if (hbm_bar_addr != U64_MAX) {
4664 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4665 (addr - bar_base_addr));
4666
4667 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4668 hbm_bar_addr);
4669 }
4670 if (hbm_bar_addr == U64_MAX)
4671 rc = -EIO;
4672 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4673 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4674 } else {
4675 rc = -EFAULT;
4676 }
4677
4678 return rc;
4679 }
4680
gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)4681 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4682 {
4683 struct asic_fixed_properties *prop = &hdev->asic_prop;
4684 struct gaudi_device *gaudi = hdev->asic_specific;
4685 u64 hbm_bar_addr;
4686 int rc = 0;
4687
4688 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4689
4690 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4691 (hdev->clock_gating_mask &
4692 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4693
4694 dev_err_ratelimited(hdev->dev,
4695 "Can't write register - clock gating is enabled!\n");
4696 rc = -EFAULT;
4697 } else {
4698 WREG32(addr - CFG_BASE, lower_32_bits(val));
4699 WREG32(addr + sizeof(u32) - CFG_BASE,
4700 upper_32_bits(val));
4701 }
4702
4703 } else if ((addr >= SRAM_BASE_ADDR) &&
4704 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4705 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4706 (addr - SRAM_BASE_ADDR));
4707 } else if (addr <=
4708 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4709 u64 bar_base_addr = DRAM_PHYS_BASE +
4710 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4711
4712 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4713 if (hbm_bar_addr != U64_MAX) {
4714 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4715 (addr - bar_base_addr));
4716
4717 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4718 hbm_bar_addr);
4719 }
4720 if (hbm_bar_addr == U64_MAX)
4721 rc = -EIO;
4722 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4723 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4724 } else {
4725 rc = -EFAULT;
4726 }
4727
4728 return rc;
4729 }
4730
gaudi_read_pte(struct hl_device *hdev, u64 addr)4731 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4732 {
4733 struct gaudi_device *gaudi = hdev->asic_specific;
4734
4735 if (hdev->hard_reset_pending)
4736 return U64_MAX;
4737
4738 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4739 (addr - gaudi->hbm_bar_cur_addr));
4740 }
4741
gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)4742 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4743 {
4744 struct gaudi_device *gaudi = hdev->asic_specific;
4745
4746 if (hdev->hard_reset_pending)
4747 return;
4748
4749 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4750 (addr - gaudi->hbm_bar_cur_addr));
4751 }
4752
gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)4753 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4754 {
4755 /* mask to zero the MMBP and ASID bits */
4756 WREG32_AND(reg, ~0x7FF);
4757 WREG32_OR(reg, asid);
4758 }
4759
gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)4760 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4761 {
4762 struct gaudi_device *gaudi = hdev->asic_specific;
4763
4764 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4765 return;
4766
4767 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4768 WARN(1, "asid %u is too big\n", asid);
4769 return;
4770 }
4771
4772 mutex_lock(&gaudi->clk_gate_mutex);
4773
4774 hdev->asic_funcs->disable_clock_gating(hdev);
4775
4776 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4777 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4778 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4779 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4780 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4781
4782 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4783 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4784 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4785 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4786 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4787
4788 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4789 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4790 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4791 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4792 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4793
4794 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4795 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4796 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4797 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4798 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4799
4800 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4801 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4802 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4803 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4804 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4805
4806 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4807 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4808 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4809 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4810 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4811
4812 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4813 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4814 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4815 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4816 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4817
4818 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4819 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4820 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4821 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4822 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4823
4824 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4825 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4826 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4827 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4828 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4829 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4830 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4831 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4832
4833 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4834 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4835 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4836 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4837 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4838 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4839 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4840
4841 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4842 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4843 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4844 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4845 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4846 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4847 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4848
4849 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4850 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4851 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4852 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4853 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4854 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4855 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4856
4857 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4858 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4859 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4860 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4861 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4862 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4863 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4864
4865 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4866 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4867 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4868 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4869 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4870 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4871 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4872
4873 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4874 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4875 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4876 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4877 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4878 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4879 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4880
4881 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4882 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4883 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4884 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4885 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4886 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4887 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4888
4889 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4890 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4891 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4892 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4893 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4894 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4895 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4896
4897 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4898 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4899 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4900 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4901 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4902 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4903 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4904 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4905 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4906 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4907
4908 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4909 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4910 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4911 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4912 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4913 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4914 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4915 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4916 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4917 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4918 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4919 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4920
4921 hdev->asic_funcs->set_clock_gating(hdev);
4922
4923 mutex_unlock(&gaudi->clk_gate_mutex);
4924 }
4925
gaudi_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)4926 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4927 struct hl_cs_job *job)
4928 {
4929 struct packet_msg_prot *fence_pkt;
4930 u32 *fence_ptr;
4931 dma_addr_t fence_dma_addr;
4932 struct hl_cb *cb;
4933 u32 tmp, timeout, dma_offset;
4934 int rc;
4935
4936 if (hdev->pldm)
4937 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4938 else
4939 timeout = HL_DEVICE_TIMEOUT_USEC;
4940
4941 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4942 dev_err_ratelimited(hdev->dev,
4943 "Can't send driver job on QMAN0 because the device is not idle\n");
4944 return -EBUSY;
4945 }
4946
4947 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4948 &fence_dma_addr);
4949 if (!fence_ptr) {
4950 dev_err(hdev->dev,
4951 "Failed to allocate fence memory for QMAN0\n");
4952 return -ENOMEM;
4953 }
4954
4955 cb = job->patched_cb;
4956
4957 fence_pkt = cb->kernel_address +
4958 job->job_cb_size - sizeof(struct packet_msg_prot);
4959
4960 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4961 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4962 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4963
4964 fence_pkt->ctl = cpu_to_le32(tmp);
4965 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4966 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4967
4968 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4969
4970 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4971
4972 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4973 job->job_cb_size, cb->bus_address);
4974 if (rc) {
4975 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4976 goto free_fence_ptr;
4977 }
4978
4979 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4980 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4981 timeout, true);
4982
4983 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4984
4985 if (rc == -ETIMEDOUT) {
4986 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4987 goto free_fence_ptr;
4988 }
4989
4990 free_fence_ptr:
4991 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4992 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4993
4994 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4995 fence_dma_addr);
4996 return rc;
4997 }
4998
gaudi_get_event_desc(u16 event_type, char *desc, size_t size)4999 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
5000 {
5001 if (event_type >= GAUDI_EVENT_SIZE)
5002 goto event_not_supported;
5003
5004 if (!gaudi_irq_map_table[event_type].valid)
5005 goto event_not_supported;
5006
5007 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
5008
5009 return;
5010
5011 event_not_supported:
5012 snprintf(desc, size, "N/A");
5013 }
5014
gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, bool is_write)5015 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5016 u32 x_y, bool is_write)
5017 {
5018 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5019
5020 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5021 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5022
5023 switch (x_y) {
5024 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5025 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5026 dma_id[0] = 0;
5027 dma_id[1] = 2;
5028 break;
5029 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5030 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5031 dma_id[0] = 1;
5032 dma_id[1] = 3;
5033 break;
5034 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5035 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5036 dma_id[0] = 4;
5037 dma_id[1] = 6;
5038 break;
5039 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5040 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5041 dma_id[0] = 5;
5042 dma_id[1] = 7;
5043 break;
5044 default:
5045 goto unknown_initiator;
5046 }
5047
5048 for (i = 0 ; i < 2 ; i++) {
5049 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5050 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5051 }
5052
5053 switch (x_y) {
5054 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5055 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5056 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5057 return "DMA0";
5058 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5059 return "DMA2";
5060 else
5061 return "DMA0 or DMA2";
5062 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5063 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5064 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5065 return "DMA1";
5066 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5067 return "DMA3";
5068 else
5069 return "DMA1 or DMA3";
5070 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5071 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5072 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5073 return "DMA4";
5074 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5075 return "DMA6";
5076 else
5077 return "DMA4 or DMA6";
5078 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5079 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5080 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5081 return "DMA5";
5082 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5083 return "DMA7";
5084 else
5085 return "DMA5 or DMA7";
5086 }
5087
5088 unknown_initiator:
5089 return "unknown initiator";
5090 }
5091
gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write)5092 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5093 bool is_write)
5094 {
5095 u32 val, x_y, axi_id;
5096
5097 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5098 RREG32(mmMMU_UP_RAZWI_READ_ID);
5099 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5100 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5101 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5102 RAZWI_INITIATOR_AXI_ID_SHIFT);
5103
5104 switch (x_y) {
5105 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5106 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5107 return "TPC0";
5108 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5109 return "NIC0";
5110 break;
5111 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5112 return "TPC1";
5113 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5114 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5115 return "MME0";
5116 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5117 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5118 return "MME1";
5119 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5120 return "TPC2";
5121 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5122 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5123 return "TPC3";
5124 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5125 return "PCI";
5126 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5127 return "CPU";
5128 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5129 return "PSOC";
5130 break;
5131 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5132 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5133 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5134 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5135 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5136 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5137 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5138 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5139 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5140 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5141 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5142 return "TPC4";
5143 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5144 return "NIC1";
5145 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5146 return "NIC2";
5147 break;
5148 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5149 return "TPC5";
5150 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5151 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5152 return "MME2";
5153 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5154 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5155 return "MME3";
5156 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5157 return "TPC6";
5158 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5159 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5160 return "TPC7";
5161 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5162 return "NIC4";
5163 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5164 return "NIC5";
5165 break;
5166 default:
5167 break;
5168 }
5169
5170 dev_err(hdev->dev,
5171 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5172 val,
5173 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5174 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5175 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5176 RAZWI_INITIATOR_AXI_ID_MASK);
5177
5178 return "unknown initiator";
5179 }
5180
gaudi_print_razwi_info(struct hl_device *hdev)5181 static void gaudi_print_razwi_info(struct hl_device *hdev)
5182 {
5183 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5184 dev_err_ratelimited(hdev->dev,
5185 "RAZWI event caused by illegal write of %s\n",
5186 gaudi_get_razwi_initiator_name(hdev, true));
5187 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5188 }
5189
5190 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5191 dev_err_ratelimited(hdev->dev,
5192 "RAZWI event caused by illegal read of %s\n",
5193 gaudi_get_razwi_initiator_name(hdev, false));
5194 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5195 }
5196 }
5197
gaudi_print_mmu_error_info(struct hl_device *hdev)5198 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5199 {
5200 struct gaudi_device *gaudi = hdev->asic_specific;
5201 u64 addr;
5202 u32 val;
5203
5204 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5205 return;
5206
5207 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5208 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5209 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5210 addr <<= 32;
5211 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5212
5213 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5214 addr);
5215
5216 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5217 }
5218
5219 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5220 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5221 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5222 addr <<= 32;
5223 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5224
5225 dev_err_ratelimited(hdev->dev,
5226 "MMU access error on va 0x%llx\n", addr);
5227
5228 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5229 }
5230 }
5231
5232 /*
5233 * +-------------------+------------------------------------------------------+
5234 * | Configuration Reg | Description |
5235 * | Address | |
5236 * +-------------------+------------------------------------------------------+
5237 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5238 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5239 * | |0xF34 memory wrappers 63:32 |
5240 * | |0xF38 memory wrappers 95:64 |
5241 * | |0xF3C memory wrappers 127:96 |
5242 * +-------------------+------------------------------------------------------+
5243 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5244 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5245 * | |0xF44 memory wrappers 63:32 |
5246 * | |0xF48 memory wrappers 95:64 |
5247 * | |0xF4C memory wrappers 127:96 |
5248 * +-------------------+------------------------------------------------------+
5249 */
gaudi_extract_ecc_info(struct hl_device *hdev, struct ecc_info_extract_params *params, u64 *ecc_address, u64 *ecc_syndrom, u8 *memory_wrapper_idx)5250 static int gaudi_extract_ecc_info(struct hl_device *hdev,
5251 struct ecc_info_extract_params *params, u64 *ecc_address,
5252 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5253 {
5254 struct gaudi_device *gaudi = hdev->asic_specific;
5255 u32 i, num_mem_regs, reg, err_bit;
5256 u64 err_addr, err_word = 0;
5257 int rc = 0;
5258
5259 num_mem_regs = params->num_memories / 32 +
5260 ((params->num_memories % 32) ? 1 : 0);
5261
5262 if (params->block_address >= CFG_BASE)
5263 params->block_address -= CFG_BASE;
5264
5265 if (params->derr)
5266 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5267 else
5268 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5269
5270 if (params->disable_clock_gating) {
5271 mutex_lock(&gaudi->clk_gate_mutex);
5272 hdev->asic_funcs->disable_clock_gating(hdev);
5273 }
5274
5275 /* Set invalid wrapper index */
5276 *memory_wrapper_idx = 0xFF;
5277
5278 /* Iterate through memory wrappers, a single bit must be set */
5279 for (i = 0 ; i < num_mem_regs ; i++) {
5280 err_addr += i * 4;
5281 err_word = RREG32(err_addr);
5282 if (err_word) {
5283 err_bit = __ffs(err_word);
5284 *memory_wrapper_idx = err_bit + (32 * i);
5285 break;
5286 }
5287 }
5288
5289 if (*memory_wrapper_idx == 0xFF) {
5290 dev_err(hdev->dev, "ECC error information cannot be found\n");
5291 rc = -EINVAL;
5292 goto enable_clk_gate;
5293 }
5294
5295 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5296 *memory_wrapper_idx);
5297
5298 *ecc_address =
5299 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5300 *ecc_syndrom =
5301 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5302
5303 /* Clear error indication */
5304 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5305 if (params->derr)
5306 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5307 else
5308 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5309
5310 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5311
5312 enable_clk_gate:
5313 if (params->disable_clock_gating) {
5314 hdev->asic_funcs->set_clock_gating(hdev);
5315
5316 mutex_unlock(&gaudi->clk_gate_mutex);
5317 }
5318
5319 return rc;
5320 }
5321
gaudi_handle_qman_err_generic(struct hl_device *hdev, const char *qm_name, u64 glbl_sts_addr, u64 arb_err_addr)5322 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5323 const char *qm_name,
5324 u64 glbl_sts_addr,
5325 u64 arb_err_addr)
5326 {
5327 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5328 char reg_desc[32];
5329
5330 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5331 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5332 glbl_sts_clr_val = 0;
5333 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5334
5335 if (!glbl_sts_val)
5336 continue;
5337
5338 if (i == QMAN_STREAMS)
5339 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5340 else
5341 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5342
5343 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5344 if (glbl_sts_val & BIT(j)) {
5345 dev_err_ratelimited(hdev->dev,
5346 "%s %s. err cause: %s\n",
5347 qm_name, reg_desc,
5348 gaudi_qman_error_cause[j]);
5349 glbl_sts_clr_val |= BIT(j);
5350 }
5351 }
5352
5353 /* Write 1 clear errors */
5354 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5355 }
5356
5357 arb_err_val = RREG32(arb_err_addr);
5358
5359 if (!arb_err_val)
5360 return;
5361
5362 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5363 if (arb_err_val & BIT(j)) {
5364 dev_err_ratelimited(hdev->dev,
5365 "%s ARB_ERR. err cause: %s\n",
5366 qm_name,
5367 gaudi_qman_arb_error_cause[j]);
5368 }
5369 }
5370 }
5371
gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, struct hl_eq_ecc_data *ecc_data)5372 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5373 struct hl_eq_ecc_data *ecc_data)
5374 {
5375 struct ecc_info_extract_params params;
5376 u64 ecc_address = 0, ecc_syndrom = 0;
5377 u8 index, memory_wrapper_idx = 0;
5378 bool extract_info_from_fw;
5379 int rc;
5380
5381 switch (event_type) {
5382 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5383 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5384 extract_info_from_fw = true;
5385 break;
5386 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5387 index = event_type - GAUDI_EVENT_TPC0_SERR;
5388 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5389 params.num_memories = 90;
5390 params.derr = false;
5391 params.disable_clock_gating = true;
5392 extract_info_from_fw = false;
5393 break;
5394 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5395 index = event_type - GAUDI_EVENT_TPC0_DERR;
5396 params.block_address =
5397 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5398 params.num_memories = 90;
5399 params.derr = true;
5400 params.disable_clock_gating = true;
5401 extract_info_from_fw = false;
5402 break;
5403 case GAUDI_EVENT_MME0_ACC_SERR:
5404 case GAUDI_EVENT_MME1_ACC_SERR:
5405 case GAUDI_EVENT_MME2_ACC_SERR:
5406 case GAUDI_EVENT_MME3_ACC_SERR:
5407 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5408 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5409 params.num_memories = 128;
5410 params.derr = false;
5411 params.disable_clock_gating = true;
5412 extract_info_from_fw = false;
5413 break;
5414 case GAUDI_EVENT_MME0_ACC_DERR:
5415 case GAUDI_EVENT_MME1_ACC_DERR:
5416 case GAUDI_EVENT_MME2_ACC_DERR:
5417 case GAUDI_EVENT_MME3_ACC_DERR:
5418 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5419 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5420 params.num_memories = 128;
5421 params.derr = true;
5422 params.disable_clock_gating = true;
5423 extract_info_from_fw = false;
5424 break;
5425 case GAUDI_EVENT_MME0_SBAB_SERR:
5426 case GAUDI_EVENT_MME1_SBAB_SERR:
5427 case GAUDI_EVENT_MME2_SBAB_SERR:
5428 case GAUDI_EVENT_MME3_SBAB_SERR:
5429 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5430 params.block_address =
5431 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5432 params.num_memories = 33;
5433 params.derr = false;
5434 params.disable_clock_gating = true;
5435 extract_info_from_fw = false;
5436 break;
5437 case GAUDI_EVENT_MME0_SBAB_DERR:
5438 case GAUDI_EVENT_MME1_SBAB_DERR:
5439 case GAUDI_EVENT_MME2_SBAB_DERR:
5440 case GAUDI_EVENT_MME3_SBAB_DERR:
5441 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5442 params.block_address =
5443 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5444 params.num_memories = 33;
5445 params.derr = true;
5446 params.disable_clock_gating = true;
5447 extract_info_from_fw = false;
5448 break;
5449 default:
5450 return;
5451 }
5452
5453 if (extract_info_from_fw) {
5454 ecc_address = le64_to_cpu(ecc_data->ecc_address);
5455 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5456 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5457 } else {
5458 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
5459 &ecc_syndrom, &memory_wrapper_idx);
5460 if (rc)
5461 return;
5462 }
5463
5464 dev_err(hdev->dev,
5465 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5466 ecc_address, ecc_syndrom, memory_wrapper_idx);
5467 }
5468
gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)5469 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5470 {
5471 u64 glbl_sts_addr, arb_err_addr;
5472 u8 index;
5473 char desc[32];
5474
5475 switch (event_type) {
5476 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5477 index = event_type - GAUDI_EVENT_TPC0_QM;
5478 glbl_sts_addr =
5479 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5480 arb_err_addr =
5481 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5482 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5483 break;
5484 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5485 index = event_type - GAUDI_EVENT_MME0_QM;
5486 glbl_sts_addr =
5487 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5488 arb_err_addr =
5489 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5490 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5491 break;
5492 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5493 index = event_type - GAUDI_EVENT_DMA0_QM;
5494 glbl_sts_addr =
5495 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5496 arb_err_addr =
5497 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5498 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5499 break;
5500 default:
5501 return;
5502 }
5503
5504 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5505 }
5506
gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, bool razwi)5507 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5508 bool razwi)
5509 {
5510 char desc[64] = "";
5511
5512 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5513 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5514 event_type, desc);
5515
5516 if (razwi) {
5517 gaudi_print_razwi_info(hdev);
5518 gaudi_print_mmu_error_info(hdev);
5519 }
5520 }
5521
gaudi_soft_reset_late_init(struct hl_device *hdev)5522 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5523 {
5524 struct gaudi_device *gaudi = hdev->asic_specific;
5525
5526 /* Unmask all IRQs since some could have been received
5527 * during the soft reset
5528 */
5529 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5530 }
5531
gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)5532 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5533 {
5534 int ch, err = 0;
5535 u32 base, val, val2;
5536
5537 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5538 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5539 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5540 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5541 if (val) {
5542 err = 1;
5543 dev_err(hdev->dev,
5544 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5545 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5546 (val >> 2) & 0x1, (val >> 3) & 0x1,
5547 (val >> 4) & 0x1);
5548
5549 val2 = RREG32(base + ch * 0x1000 + 0x060);
5550 dev_err(hdev->dev,
5551 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5552 device, ch * 2,
5553 RREG32(base + ch * 0x1000 + 0x064),
5554 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5555 (val2 & 0xFF0000) >> 16,
5556 (val2 & 0xFF000000) >> 24);
5557 }
5558
5559 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5560 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5561 if (val) {
5562 err = 1;
5563 dev_err(hdev->dev,
5564 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5565 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5566 (val >> 2) & 0x1, (val >> 3) & 0x1,
5567 (val >> 4) & 0x1);
5568
5569 val2 = RREG32(base + ch * 0x1000 + 0x070);
5570 dev_err(hdev->dev,
5571 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5572 device, ch * 2 + 1,
5573 RREG32(base + ch * 0x1000 + 0x074),
5574 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5575 (val2 & 0xFF0000) >> 16,
5576 (val2 & 0xFF000000) >> 24);
5577 }
5578
5579 /* Clear interrupts */
5580 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5581 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5582 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5583 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5584 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5585 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5586 }
5587
5588 val = RREG32(base + 0x8F30);
5589 val2 = RREG32(base + 0x8F34);
5590 if (val | val2) {
5591 err = 1;
5592 dev_err(hdev->dev,
5593 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5594 device, val, val2);
5595 }
5596 val = RREG32(base + 0x8F40);
5597 val2 = RREG32(base + 0x8F44);
5598 if (val | val2) {
5599 err = 1;
5600 dev_err(hdev->dev,
5601 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5602 device, val, val2);
5603 }
5604
5605 return err;
5606 }
5607
gaudi_hbm_event_to_dev(u16 hbm_event_type)5608 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5609 {
5610 switch (hbm_event_type) {
5611 case GAUDI_EVENT_HBM0_SPI_0:
5612 case GAUDI_EVENT_HBM0_SPI_1:
5613 return 0;
5614 case GAUDI_EVENT_HBM1_SPI_0:
5615 case GAUDI_EVENT_HBM1_SPI_1:
5616 return 1;
5617 case GAUDI_EVENT_HBM2_SPI_0:
5618 case GAUDI_EVENT_HBM2_SPI_1:
5619 return 2;
5620 case GAUDI_EVENT_HBM3_SPI_0:
5621 case GAUDI_EVENT_HBM3_SPI_1:
5622 return 3;
5623 default:
5624 break;
5625 }
5626
5627 /* Should never happen */
5628 return 0;
5629 }
5630
gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, char *interrupt_name)5631 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5632 char *interrupt_name)
5633 {
5634 struct gaudi_device *gaudi = hdev->asic_specific;
5635 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5636 bool soft_reset_required = false;
5637
5638 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5639 * gating, and thus cannot be done in CPU-CP and should be done instead
5640 * by the driver.
5641 */
5642
5643 mutex_lock(&gaudi->clk_gate_mutex);
5644
5645 hdev->asic_funcs->disable_clock_gating(hdev);
5646
5647 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5648 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5649
5650 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5651 if (tpc_interrupts_cause & BIT(i)) {
5652 dev_err_ratelimited(hdev->dev,
5653 "TPC%d_%s interrupt cause: %s\n",
5654 tpc_id, interrupt_name,
5655 gaudi_tpc_interrupts_cause[i]);
5656 /* If this is QM error, we need to soft-reset */
5657 if (i == 15)
5658 soft_reset_required = true;
5659 }
5660
5661 /* Clear interrupts */
5662 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5663
5664 hdev->asic_funcs->set_clock_gating(hdev);
5665
5666 mutex_unlock(&gaudi->clk_gate_mutex);
5667
5668 return soft_reset_required;
5669 }
5670
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)5671 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5672 {
5673 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5674 }
5675
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)5676 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5677 {
5678 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5679 }
5680
gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)5681 static void gaudi_print_clk_change_info(struct hl_device *hdev,
5682 u16 event_type)
5683 {
5684 switch (event_type) {
5685 case GAUDI_EVENT_FIX_POWER_ENV_S:
5686 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5687 dev_info_ratelimited(hdev->dev,
5688 "Clock throttling due to power consumption\n");
5689 break;
5690
5691 case GAUDI_EVENT_FIX_POWER_ENV_E:
5692 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5693 dev_info_ratelimited(hdev->dev,
5694 "Power envelop is safe, back to optimal clock\n");
5695 break;
5696
5697 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5698 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5699 dev_info_ratelimited(hdev->dev,
5700 "Clock throttling due to overheating\n");
5701 break;
5702
5703 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5704 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5705 dev_info_ratelimited(hdev->dev,
5706 "Thermal envelop is safe, back to optimal clock\n");
5707 break;
5708
5709 default:
5710 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5711 event_type);
5712 break;
5713 }
5714 }
5715
gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)5716 static void gaudi_handle_eqe(struct hl_device *hdev,
5717 struct hl_eq_entry *eq_entry)
5718 {
5719 struct gaudi_device *gaudi = hdev->asic_specific;
5720 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5721 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5722 >> EQ_CTL_EVENT_TYPE_SHIFT);
5723 u8 cause;
5724 bool reset_required;
5725
5726 if (event_type >= GAUDI_EVENT_SIZE) {
5727 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
5728 event_type, GAUDI_EVENT_SIZE - 1);
5729 return;
5730 }
5731
5732 gaudi->events_stat[event_type]++;
5733 gaudi->events_stat_aggregate[event_type]++;
5734
5735 switch (event_type) {
5736 case GAUDI_EVENT_PCIE_CORE_DERR:
5737 case GAUDI_EVENT_PCIE_IF_DERR:
5738 case GAUDI_EVENT_PCIE_PHY_DERR:
5739 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5740 case GAUDI_EVENT_MME0_ACC_DERR:
5741 case GAUDI_EVENT_MME0_SBAB_DERR:
5742 case GAUDI_EVENT_MME1_ACC_DERR:
5743 case GAUDI_EVENT_MME1_SBAB_DERR:
5744 case GAUDI_EVENT_MME2_ACC_DERR:
5745 case GAUDI_EVENT_MME2_SBAB_DERR:
5746 case GAUDI_EVENT_MME3_ACC_DERR:
5747 case GAUDI_EVENT_MME3_SBAB_DERR:
5748 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5749 fallthrough;
5750 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5751 case GAUDI_EVENT_PSOC_MEM_DERR:
5752 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5753 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5754 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5755 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5756 case GAUDI_EVENT_MMU_DERR:
5757 gaudi_print_irq_info(hdev, event_type, true);
5758 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5759 if (hdev->hard_reset_on_fw_events)
5760 hl_device_reset(hdev, true, false);
5761 break;
5762
5763 case GAUDI_EVENT_GIC500:
5764 case GAUDI_EVENT_AXI_ECC:
5765 case GAUDI_EVENT_L2_RAM_ECC:
5766 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5767 gaudi_print_irq_info(hdev, event_type, false);
5768 if (hdev->hard_reset_on_fw_events)
5769 hl_device_reset(hdev, true, false);
5770 break;
5771
5772 case GAUDI_EVENT_HBM0_SPI_0:
5773 case GAUDI_EVENT_HBM1_SPI_0:
5774 case GAUDI_EVENT_HBM2_SPI_0:
5775 case GAUDI_EVENT_HBM3_SPI_0:
5776 gaudi_print_irq_info(hdev, event_type, false);
5777 gaudi_hbm_read_interrupts(hdev,
5778 gaudi_hbm_event_to_dev(event_type));
5779 if (hdev->hard_reset_on_fw_events)
5780 hl_device_reset(hdev, true, false);
5781 break;
5782
5783 case GAUDI_EVENT_HBM0_SPI_1:
5784 case GAUDI_EVENT_HBM1_SPI_1:
5785 case GAUDI_EVENT_HBM2_SPI_1:
5786 case GAUDI_EVENT_HBM3_SPI_1:
5787 gaudi_print_irq_info(hdev, event_type, false);
5788 gaudi_hbm_read_interrupts(hdev,
5789 gaudi_hbm_event_to_dev(event_type));
5790 break;
5791
5792 case GAUDI_EVENT_TPC0_DEC:
5793 case GAUDI_EVENT_TPC1_DEC:
5794 case GAUDI_EVENT_TPC2_DEC:
5795 case GAUDI_EVENT_TPC3_DEC:
5796 case GAUDI_EVENT_TPC4_DEC:
5797 case GAUDI_EVENT_TPC5_DEC:
5798 case GAUDI_EVENT_TPC6_DEC:
5799 case GAUDI_EVENT_TPC7_DEC:
5800 gaudi_print_irq_info(hdev, event_type, true);
5801 reset_required = gaudi_tpc_read_interrupts(hdev,
5802 tpc_dec_event_to_tpc_id(event_type),
5803 "AXI_SLV_DEC_Error");
5804 if (reset_required) {
5805 dev_err(hdev->dev, "hard reset required due to %s\n",
5806 gaudi_irq_map_table[event_type].name);
5807
5808 if (hdev->hard_reset_on_fw_events)
5809 hl_device_reset(hdev, true, false);
5810 } else {
5811 hl_fw_unmask_irq(hdev, event_type);
5812 }
5813 break;
5814
5815 case GAUDI_EVENT_TPC0_KRN_ERR:
5816 case GAUDI_EVENT_TPC1_KRN_ERR:
5817 case GAUDI_EVENT_TPC2_KRN_ERR:
5818 case GAUDI_EVENT_TPC3_KRN_ERR:
5819 case GAUDI_EVENT_TPC4_KRN_ERR:
5820 case GAUDI_EVENT_TPC5_KRN_ERR:
5821 case GAUDI_EVENT_TPC6_KRN_ERR:
5822 case GAUDI_EVENT_TPC7_KRN_ERR:
5823 gaudi_print_irq_info(hdev, event_type, true);
5824 reset_required = gaudi_tpc_read_interrupts(hdev,
5825 tpc_krn_event_to_tpc_id(event_type),
5826 "KRN_ERR");
5827 if (reset_required) {
5828 dev_err(hdev->dev, "hard reset required due to %s\n",
5829 gaudi_irq_map_table[event_type].name);
5830
5831 if (hdev->hard_reset_on_fw_events)
5832 hl_device_reset(hdev, true, false);
5833 } else {
5834 hl_fw_unmask_irq(hdev, event_type);
5835 }
5836 break;
5837
5838 case GAUDI_EVENT_PCIE_CORE_SERR:
5839 case GAUDI_EVENT_PCIE_IF_SERR:
5840 case GAUDI_EVENT_PCIE_PHY_SERR:
5841 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5842 case GAUDI_EVENT_MME0_ACC_SERR:
5843 case GAUDI_EVENT_MME0_SBAB_SERR:
5844 case GAUDI_EVENT_MME1_ACC_SERR:
5845 case GAUDI_EVENT_MME1_SBAB_SERR:
5846 case GAUDI_EVENT_MME2_ACC_SERR:
5847 case GAUDI_EVENT_MME2_SBAB_SERR:
5848 case GAUDI_EVENT_MME3_ACC_SERR:
5849 case GAUDI_EVENT_MME3_SBAB_SERR:
5850 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5851 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5852 case GAUDI_EVENT_PSOC_MEM_SERR:
5853 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5854 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5855 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5856 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5857 fallthrough;
5858 case GAUDI_EVENT_MMU_SERR:
5859 gaudi_print_irq_info(hdev, event_type, true);
5860 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5861 hl_fw_unmask_irq(hdev, event_type);
5862 break;
5863
5864 case GAUDI_EVENT_PCIE_DEC:
5865 case GAUDI_EVENT_MME0_WBC_RSP:
5866 case GAUDI_EVENT_MME0_SBAB0_RSP:
5867 case GAUDI_EVENT_MME1_WBC_RSP:
5868 case GAUDI_EVENT_MME1_SBAB0_RSP:
5869 case GAUDI_EVENT_MME2_WBC_RSP:
5870 case GAUDI_EVENT_MME2_SBAB0_RSP:
5871 case GAUDI_EVENT_MME3_WBC_RSP:
5872 case GAUDI_EVENT_MME3_SBAB0_RSP:
5873 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5874 case GAUDI_EVENT_PSOC_AXI_DEC:
5875 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5876 case GAUDI_EVENT_MMU_PAGE_FAULT:
5877 case GAUDI_EVENT_MMU_WR_PERM:
5878 case GAUDI_EVENT_RAZWI_OR_ADC:
5879 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5880 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5881 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5882 fallthrough;
5883 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5884 gaudi_print_irq_info(hdev, event_type, true);
5885 gaudi_handle_qman_err(hdev, event_type);
5886 hl_fw_unmask_irq(hdev, event_type);
5887 break;
5888
5889 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5890 gaudi_print_irq_info(hdev, event_type, true);
5891 if (hdev->hard_reset_on_fw_events)
5892 hl_device_reset(hdev, true, false);
5893 break;
5894
5895 case GAUDI_EVENT_TPC0_BMON_SPMU:
5896 case GAUDI_EVENT_TPC1_BMON_SPMU:
5897 case GAUDI_EVENT_TPC2_BMON_SPMU:
5898 case GAUDI_EVENT_TPC3_BMON_SPMU:
5899 case GAUDI_EVENT_TPC4_BMON_SPMU:
5900 case GAUDI_EVENT_TPC5_BMON_SPMU:
5901 case GAUDI_EVENT_TPC6_BMON_SPMU:
5902 case GAUDI_EVENT_TPC7_BMON_SPMU:
5903 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5904 gaudi_print_irq_info(hdev, event_type, false);
5905 hl_fw_unmask_irq(hdev, event_type);
5906 break;
5907
5908 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5909 gaudi_print_clk_change_info(hdev, event_type);
5910 hl_fw_unmask_irq(hdev, event_type);
5911 break;
5912
5913 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5914 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5915 dev_err(hdev->dev,
5916 "Received high temp H/W interrupt %d (cause %d)\n",
5917 event_type, cause);
5918 break;
5919
5920 default:
5921 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5922 event_type);
5923 break;
5924 }
5925 }
5926
gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)5927 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5928 u32 *size)
5929 {
5930 struct gaudi_device *gaudi = hdev->asic_specific;
5931
5932 if (aggregate) {
5933 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5934 return gaudi->events_stat_aggregate;
5935 }
5936
5937 *size = (u32) sizeof(gaudi->events_stat);
5938 return gaudi->events_stat;
5939 }
5940
gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)5941 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5942 u32 flags)
5943 {
5944 struct gaudi_device *gaudi = hdev->asic_specific;
5945 u32 status, timeout_usec;
5946 int rc;
5947
5948 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5949 hdev->hard_reset_pending)
5950 return 0;
5951
5952 if (hdev->pldm)
5953 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5954 else
5955 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5956
5957 mutex_lock(&hdev->mmu_cache_lock);
5958
5959 /* L0 & L1 invalidation */
5960 WREG32(mmSTLB_INV_PS, 3);
5961 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5962 WREG32(mmSTLB_INV_PS, 2);
5963
5964 rc = hl_poll_timeout(
5965 hdev,
5966 mmSTLB_INV_PS,
5967 status,
5968 !status,
5969 1000,
5970 timeout_usec);
5971
5972 WREG32(mmSTLB_INV_SET, 0);
5973
5974 mutex_unlock(&hdev->mmu_cache_lock);
5975
5976 if (rc) {
5977 dev_err_ratelimited(hdev->dev,
5978 "MMU cache invalidation timeout\n");
5979 hl_device_reset(hdev, true, false);
5980 }
5981
5982 return rc;
5983 }
5984
gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size)5985 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5986 bool is_hard, u32 asid, u64 va, u64 size)
5987 {
5988 struct gaudi_device *gaudi = hdev->asic_specific;
5989 u32 status, timeout_usec;
5990 u32 inv_data;
5991 u32 pi;
5992 int rc;
5993
5994 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5995 hdev->hard_reset_pending)
5996 return 0;
5997
5998 mutex_lock(&hdev->mmu_cache_lock);
5999
6000 if (hdev->pldm)
6001 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6002 else
6003 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6004
6005 /*
6006 * TODO: currently invalidate entire L0 & L1 as in regular hard
6007 * invalidation. Need to apply invalidation of specific cache
6008 * lines with mask of ASID & VA & size.
6009 * Note that L1 with be flushed entirely in any case.
6010 */
6011
6012 /* L0 & L1 invalidation */
6013 inv_data = RREG32(mmSTLB_CACHE_INV);
6014 /* PI is 8 bit */
6015 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6016 WREG32(mmSTLB_CACHE_INV,
6017 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6018
6019 rc = hl_poll_timeout(
6020 hdev,
6021 mmSTLB_INV_CONSUMER_INDEX,
6022 status,
6023 status == pi,
6024 1000,
6025 timeout_usec);
6026
6027 mutex_unlock(&hdev->mmu_cache_lock);
6028
6029 if (rc) {
6030 dev_err_ratelimited(hdev->dev,
6031 "MMU cache invalidation timeout\n");
6032 hl_device_reset(hdev, true, false);
6033 }
6034
6035 return rc;
6036 }
6037
gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)6038 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6039 u32 asid, u64 phys_addr)
6040 {
6041 u32 status, timeout_usec;
6042 int rc;
6043
6044 if (hdev->pldm)
6045 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6046 else
6047 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6048
6049 WREG32(MMU_ASID, asid);
6050 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6051 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6052 WREG32(MMU_BUSY, 0x80000000);
6053
6054 rc = hl_poll_timeout(
6055 hdev,
6056 MMU_BUSY,
6057 status,
6058 !(status & 0x80000000),
6059 1000,
6060 timeout_usec);
6061
6062 if (rc) {
6063 dev_err(hdev->dev,
6064 "Timeout during MMU hop0 config of asid %d\n", asid);
6065 return rc;
6066 }
6067
6068 return 0;
6069 }
6070
gaudi_send_heartbeat(struct hl_device *hdev)6071 static int gaudi_send_heartbeat(struct hl_device *hdev)
6072 {
6073 struct gaudi_device *gaudi = hdev->asic_specific;
6074
6075 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6076 return 0;
6077
6078 return hl_fw_send_heartbeat(hdev);
6079 }
6080
gaudi_cpucp_info_get(struct hl_device *hdev)6081 static int gaudi_cpucp_info_get(struct hl_device *hdev)
6082 {
6083 struct gaudi_device *gaudi = hdev->asic_specific;
6084 struct asic_fixed_properties *prop = &hdev->asic_prop;
6085 int rc;
6086
6087 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6088 return 0;
6089
6090 rc = hl_fw_cpucp_info_get(hdev);
6091 if (rc)
6092 return rc;
6093
6094 if (!strlen(prop->cpucp_info.card_name))
6095 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6096 CARD_NAME_MAX_LEN);
6097
6098 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6099
6100 if (hdev->card_type == cpucp_card_type_pci)
6101 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6102 else if (hdev->card_type == cpucp_card_type_pmc)
6103 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6104
6105 hdev->max_power = prop->max_power_default;
6106
6107 return 0;
6108 }
6109
gaudi_is_device_idle(struct hl_device *hdev, u64 *mask, struct seq_file *s)6110 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6111 struct seq_file *s)
6112 {
6113 struct gaudi_device *gaudi = hdev->asic_specific;
6114 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6115 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6116 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6117 bool is_idle = true, is_eng_idle, is_slave;
6118 u64 offset;
6119 int i, dma_id;
6120
6121 mutex_lock(&gaudi->clk_gate_mutex);
6122
6123 hdev->asic_funcs->disable_clock_gating(hdev);
6124
6125 if (s)
6126 seq_puts(s,
6127 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6128 "--- ------- ------------ ---------- -------------\n");
6129
6130 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6131 dma_id = gaudi_dma_assignment[i];
6132 offset = dma_id * DMA_QMAN_OFFSET;
6133
6134 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6135 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6136 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6137 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6138 IS_DMA_IDLE(dma_core_sts0);
6139 is_idle &= is_eng_idle;
6140
6141 if (mask)
6142 *mask |= ((u64) !is_eng_idle) <<
6143 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6144 if (s)
6145 seq_printf(s, fmt, dma_id,
6146 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6147 qm_cgm_sts, dma_core_sts0);
6148 }
6149
6150 if (s)
6151 seq_puts(s,
6152 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6153 "--- ------- ------------ ---------- ----------\n");
6154
6155 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6156 offset = i * TPC_QMAN_OFFSET;
6157 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6158 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6159 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6160 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6161 IS_TPC_IDLE(tpc_cfg_sts);
6162 is_idle &= is_eng_idle;
6163
6164 if (mask)
6165 *mask |= ((u64) !is_eng_idle) <<
6166 (GAUDI_ENGINE_ID_TPC_0 + i);
6167 if (s)
6168 seq_printf(s, fmt, i,
6169 is_eng_idle ? "Y" : "N",
6170 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6171 }
6172
6173 if (s)
6174 seq_puts(s,
6175 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6176 "--- ------- ------------ ---------- -----------\n");
6177
6178 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6179 offset = i * MME_QMAN_OFFSET;
6180 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6181 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6182
6183 /* MME 1 & 3 are slaves, no need to check their QMANs */
6184 is_slave = i % 2;
6185 if (!is_slave) {
6186 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6187 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6188 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6189 }
6190
6191 is_idle &= is_eng_idle;
6192
6193 if (mask)
6194 *mask |= ((u64) !is_eng_idle) <<
6195 (GAUDI_ENGINE_ID_MME_0 + i);
6196 if (s) {
6197 if (!is_slave)
6198 seq_printf(s, fmt, i,
6199 is_eng_idle ? "Y" : "N",
6200 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6201 else
6202 seq_printf(s, mme_slave_fmt, i,
6203 is_eng_idle ? "Y" : "N", "-",
6204 "-", mme_arch_sts);
6205 }
6206 }
6207
6208 if (s)
6209 seq_puts(s, "\n");
6210
6211 hdev->asic_funcs->set_clock_gating(hdev);
6212
6213 mutex_unlock(&gaudi->clk_gate_mutex);
6214
6215 return is_idle;
6216 }
6217
6218 static void gaudi_hw_queues_lock(struct hl_device *hdev)
6219 __acquires(&gaudi->hw_queues_lock)
6220 {
6221 struct gaudi_device *gaudi = hdev->asic_specific;
6222
6223 spin_lock(&gaudi->hw_queues_lock);
6224 }
6225
6226 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6227 __releases(&gaudi->hw_queues_lock)
6228 {
6229 struct gaudi_device *gaudi = hdev->asic_specific;
6230
6231 spin_unlock(&gaudi->hw_queues_lock);
6232 }
6233
gaudi_get_pci_id(struct hl_device *hdev)6234 static u32 gaudi_get_pci_id(struct hl_device *hdev)
6235 {
6236 return hdev->pdev->device;
6237 }
6238
gaudi_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)6239 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6240 size_t max_size)
6241 {
6242 struct gaudi_device *gaudi = hdev->asic_specific;
6243
6244 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6245 return 0;
6246
6247 return hl_fw_get_eeprom_data(hdev, data, max_size);
6248 }
6249
6250 /*
6251 * this function should be used only during initialization and/or after reset,
6252 * when there are no active users.
6253 */
gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)6254 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6255 u32 tpc_id)
6256 {
6257 struct gaudi_device *gaudi = hdev->asic_specific;
6258 u64 kernel_timeout;
6259 u32 status, offset;
6260 int rc;
6261
6262 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6263
6264 if (hdev->pldm)
6265 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6266 else
6267 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6268
6269 mutex_lock(&gaudi->clk_gate_mutex);
6270
6271 hdev->asic_funcs->disable_clock_gating(hdev);
6272
6273 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6274 lower_32_bits(tpc_kernel));
6275 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6276 upper_32_bits(tpc_kernel));
6277
6278 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6279 lower_32_bits(tpc_kernel));
6280 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6281 upper_32_bits(tpc_kernel));
6282 /* set a valid LUT pointer, content is of no significance */
6283 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6284 lower_32_bits(tpc_kernel));
6285 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6286 upper_32_bits(tpc_kernel));
6287
6288 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6289 lower_32_bits(CFG_BASE +
6290 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6291
6292 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6293 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6294 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6295 /* wait a bit for the engine to start executing */
6296 usleep_range(1000, 1500);
6297
6298 /* wait until engine has finished executing */
6299 rc = hl_poll_timeout(
6300 hdev,
6301 mmTPC0_CFG_STATUS + offset,
6302 status,
6303 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6304 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6305 1000,
6306 kernel_timeout);
6307
6308 if (rc) {
6309 dev_err(hdev->dev,
6310 "Timeout while waiting for TPC%d icache prefetch\n",
6311 tpc_id);
6312 hdev->asic_funcs->set_clock_gating(hdev);
6313 mutex_unlock(&gaudi->clk_gate_mutex);
6314 return -EIO;
6315 }
6316
6317 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6318 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6319
6320 /* wait a bit for the engine to start executing */
6321 usleep_range(1000, 1500);
6322
6323 /* wait until engine has finished executing */
6324 rc = hl_poll_timeout(
6325 hdev,
6326 mmTPC0_CFG_STATUS + offset,
6327 status,
6328 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6329 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6330 1000,
6331 kernel_timeout);
6332
6333 if (rc) {
6334 dev_err(hdev->dev,
6335 "Timeout while waiting for TPC%d vector pipe\n",
6336 tpc_id);
6337 hdev->asic_funcs->set_clock_gating(hdev);
6338 mutex_unlock(&gaudi->clk_gate_mutex);
6339 return -EIO;
6340 }
6341
6342 rc = hl_poll_timeout(
6343 hdev,
6344 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6345 status,
6346 (status == 0),
6347 1000,
6348 kernel_timeout);
6349
6350 hdev->asic_funcs->set_clock_gating(hdev);
6351 mutex_unlock(&gaudi->clk_gate_mutex);
6352
6353 if (rc) {
6354 dev_err(hdev->dev,
6355 "Timeout while waiting for TPC%d kernel to execute\n",
6356 tpc_id);
6357 return -EIO;
6358 }
6359
6360 return 0;
6361 }
6362
gaudi_get_hw_state(struct hl_device *hdev)6363 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6364 {
6365 return RREG32(mmHW_STATE);
6366 }
6367
gaudi_ctx_init(struct hl_ctx *ctx)6368 static int gaudi_ctx_init(struct hl_ctx *ctx)
6369 {
6370 return 0;
6371 }
6372
gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)6373 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6374 {
6375 return gaudi_cq_assignment[cq_idx];
6376 }
6377
gaudi_get_signal_cb_size(struct hl_device *hdev)6378 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6379 {
6380 return sizeof(struct packet_msg_short) +
6381 sizeof(struct packet_msg_prot) * 2;
6382 }
6383
gaudi_get_wait_cb_size(struct hl_device *hdev)6384 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6385 {
6386 return sizeof(struct packet_msg_short) * 4 +
6387 sizeof(struct packet_fence) +
6388 sizeof(struct packet_msg_prot) * 2;
6389 }
6390
gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)6391 static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6392 {
6393 struct hl_cb *cb = (struct hl_cb *) data;
6394 struct packet_msg_short *pkt;
6395 u32 value, ctl;
6396
6397 pkt = cb->kernel_address;
6398 memset(pkt, 0, sizeof(*pkt));
6399
6400 /* Inc by 1, Mode ADD */
6401 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6402 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6403
6404 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6405 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6406 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6407 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6408 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6409 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6410 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6411
6412 pkt->value = cpu_to_le32(value);
6413 pkt->ctl = cpu_to_le32(ctl);
6414 }
6415
gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)6416 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6417 u16 addr)
6418 {
6419 u32 ctl, pkt_size = sizeof(*pkt);
6420
6421 memset(pkt, 0, pkt_size);
6422
6423 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6424 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6425 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6426 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6427 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6428 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6429
6430 pkt->value = cpu_to_le32(value);
6431 pkt->ctl = cpu_to_le32(ctl);
6432
6433 return pkt_size;
6434 }
6435
gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id, u16 sob_val, u16 addr)6436 static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6437 u16 sob_val, u16 addr)
6438 {
6439 u32 ctl, value, pkt_size = sizeof(*pkt);
6440 u8 mask = ~(1 << (sob_id & 0x7));
6441
6442 memset(pkt, 0, pkt_size);
6443
6444 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6445 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6446 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6447 0); /* GREATER OR EQUAL*/
6448 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6449
6450 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6451 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6452 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6453 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6454 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6455 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6456 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6457
6458 pkt->value = cpu_to_le32(value);
6459 pkt->ctl = cpu_to_le32(ctl);
6460
6461 return pkt_size;
6462 }
6463
gaudi_add_fence_pkt(struct packet_fence *pkt)6464 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6465 {
6466 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6467
6468 memset(pkt, 0, pkt_size);
6469
6470 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6471 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6472 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6473
6474 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6475 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6476 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6477 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6478
6479 pkt->cfg = cpu_to_le32(cfg);
6480 pkt->ctl = cpu_to_le32(ctl);
6481
6482 return pkt_size;
6483 }
6484
gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id, u16 sob_val, u16 mon_id, u32 q_idx)6485 static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6486 u16 sob_val, u16 mon_id, u32 q_idx)
6487 {
6488 struct hl_cb *cb = (struct hl_cb *) data;
6489 void *buf = cb->kernel_address;
6490 u64 monitor_base, fence_addr = 0;
6491 u32 size = 0;
6492 u16 msg_addr_offset;
6493
6494 switch (q_idx) {
6495 case GAUDI_QUEUE_ID_DMA_0_0:
6496 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6497 break;
6498 case GAUDI_QUEUE_ID_DMA_0_1:
6499 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6500 break;
6501 case GAUDI_QUEUE_ID_DMA_0_2:
6502 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6503 break;
6504 case GAUDI_QUEUE_ID_DMA_0_3:
6505 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6506 break;
6507 case GAUDI_QUEUE_ID_DMA_1_0:
6508 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6509 break;
6510 case GAUDI_QUEUE_ID_DMA_1_1:
6511 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6512 break;
6513 case GAUDI_QUEUE_ID_DMA_1_2:
6514 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6515 break;
6516 case GAUDI_QUEUE_ID_DMA_1_3:
6517 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6518 break;
6519 case GAUDI_QUEUE_ID_DMA_5_0:
6520 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6521 break;
6522 case GAUDI_QUEUE_ID_DMA_5_1:
6523 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6524 break;
6525 case GAUDI_QUEUE_ID_DMA_5_2:
6526 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6527 break;
6528 case GAUDI_QUEUE_ID_DMA_5_3:
6529 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6530 break;
6531 default:
6532 /* queue index should be valid here */
6533 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6534 q_idx);
6535 return;
6536 }
6537
6538 fence_addr += CFG_BASE;
6539
6540 /*
6541 * monitor_base should be the content of the base0 address registers,
6542 * so it will be added to the msg short offsets
6543 */
6544 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6545
6546 /* First monitor config packet: low address of the sync */
6547 msg_addr_offset =
6548 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6549 monitor_base;
6550
6551 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6552 msg_addr_offset);
6553
6554 /* Second monitor config packet: high address of the sync */
6555 msg_addr_offset =
6556 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6557 monitor_base;
6558
6559 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6560 msg_addr_offset);
6561
6562 /*
6563 * Third monitor config packet: the payload, i.e. what to write when the
6564 * sync triggers
6565 */
6566 msg_addr_offset =
6567 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6568 monitor_base;
6569
6570 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6571
6572 /* Fourth monitor config packet: bind the monitor to a sync object */
6573 msg_addr_offset =
6574 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6575 monitor_base;
6576 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6577 msg_addr_offset);
6578
6579 /* Fence packet */
6580 size += gaudi_add_fence_pkt(buf + size);
6581 }
6582
gaudi_reset_sob(struct hl_device *hdev, void *data)6583 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6584 {
6585 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6586
6587 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6588 hw_sob->sob_id);
6589
6590 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6591 0);
6592
6593 kref_init(&hw_sob->kref);
6594 }
6595
gaudi_set_dma_mask_from_fw(struct hl_device *hdev)6596 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6597 {
6598 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6599 HL_POWER9_HOST_MAGIC) {
6600 hdev->power9_64bit_dma_enable = 1;
6601 hdev->dma_mask = 64;
6602 } else {
6603 hdev->power9_64bit_dma_enable = 0;
6604 hdev->dma_mask = 48;
6605 }
6606 }
6607
gaudi_get_device_time(struct hl_device *hdev)6608 static u64 gaudi_get_device_time(struct hl_device *hdev)
6609 {
6610 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6611
6612 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6613 }
6614
6615 static const struct hl_asic_funcs gaudi_funcs = {
6616 .early_init = gaudi_early_init,
6617 .early_fini = gaudi_early_fini,
6618 .late_init = gaudi_late_init,
6619 .late_fini = gaudi_late_fini,
6620 .sw_init = gaudi_sw_init,
6621 .sw_fini = gaudi_sw_fini,
6622 .hw_init = gaudi_hw_init,
6623 .hw_fini = gaudi_hw_fini,
6624 .halt_engines = gaudi_halt_engines,
6625 .suspend = gaudi_suspend,
6626 .resume = gaudi_resume,
6627 .cb_mmap = gaudi_cb_mmap,
6628 .ring_doorbell = gaudi_ring_doorbell,
6629 .pqe_write = gaudi_pqe_write,
6630 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6631 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6632 .get_int_queue_base = gaudi_get_int_queue_base,
6633 .test_queues = gaudi_test_queues,
6634 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6635 .asic_dma_pool_free = gaudi_dma_pool_free,
6636 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6637 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6638 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6639 .cs_parser = gaudi_cs_parser,
6640 .asic_dma_map_sg = gaudi_dma_map_sg,
6641 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6642 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6643 .update_eq_ci = gaudi_update_eq_ci,
6644 .context_switch = gaudi_context_switch,
6645 .restore_phase_topology = gaudi_restore_phase_topology,
6646 .debugfs_read32 = gaudi_debugfs_read32,
6647 .debugfs_write32 = gaudi_debugfs_write32,
6648 .debugfs_read64 = gaudi_debugfs_read64,
6649 .debugfs_write64 = gaudi_debugfs_write64,
6650 .add_device_attr = gaudi_add_device_attr,
6651 .handle_eqe = gaudi_handle_eqe,
6652 .set_pll_profile = gaudi_set_pll_profile,
6653 .get_events_stat = gaudi_get_events_stat,
6654 .read_pte = gaudi_read_pte,
6655 .write_pte = gaudi_write_pte,
6656 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6657 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6658 .send_heartbeat = gaudi_send_heartbeat,
6659 .set_clock_gating = gaudi_set_clock_gating,
6660 .disable_clock_gating = gaudi_disable_clock_gating,
6661 .debug_coresight = gaudi_debug_coresight,
6662 .is_device_idle = gaudi_is_device_idle,
6663 .soft_reset_late_init = gaudi_soft_reset_late_init,
6664 .hw_queues_lock = gaudi_hw_queues_lock,
6665 .hw_queues_unlock = gaudi_hw_queues_unlock,
6666 .get_pci_id = gaudi_get_pci_id,
6667 .get_eeprom_data = gaudi_get_eeprom_data,
6668 .send_cpu_message = gaudi_send_cpu_message,
6669 .get_hw_state = gaudi_get_hw_state,
6670 .pci_bars_map = gaudi_pci_bars_map,
6671 .init_iatu = gaudi_init_iatu,
6672 .rreg = hl_rreg,
6673 .wreg = hl_wreg,
6674 .halt_coresight = gaudi_halt_coresight,
6675 .ctx_init = gaudi_ctx_init,
6676 .get_clk_rate = gaudi_get_clk_rate,
6677 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6678 .read_device_fw_version = gaudi_read_device_fw_version,
6679 .load_firmware_to_device = gaudi_load_firmware_to_device,
6680 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6681 .get_signal_cb_size = gaudi_get_signal_cb_size,
6682 .get_wait_cb_size = gaudi_get_wait_cb_size,
6683 .gen_signal_cb = gaudi_gen_signal_cb,
6684 .gen_wait_cb = gaudi_gen_wait_cb,
6685 .reset_sob = gaudi_reset_sob,
6686 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6687 .get_device_time = gaudi_get_device_time
6688 };
6689
6690 /**
6691 * gaudi_set_asic_funcs - set GAUDI function pointers
6692 *
6693 * @hdev: pointer to hl_device structure
6694 *
6695 */
gaudi_set_asic_funcs(struct hl_device *hdev)6696 void gaudi_set_asic_funcs(struct hl_device *hdev)
6697 {
6698 hdev->asic_funcs = &gaudi_funcs;
6699 }
6700