1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2020 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#include "gaudiP.h" 9#include "../include/hw_ip/mmu/mmu_general.h" 10#include "../include/hw_ip/mmu/mmu_v1_1.h" 11#include "../include/gaudi/gaudi_masks.h" 12#include "../include/gaudi/gaudi_fw_if.h" 13#include "../include/gaudi/gaudi_reg_map.h" 14#include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16#include <linux/module.h> 17#include <linux/pci.h> 18#include <linux/firmware.h> 19#include <linux/hwmon.h> 20#include <linux/genalloc.h> 21#include <linux/io-64-nonatomic-lo-hi.h> 22#include <linux/iommu.h> 23#include <linux/seq_file.h> 24 25/* 26 * Gaudi security scheme: 27 * 28 * 1. Host is protected by: 29 * - Range registers 30 * - MMU 31 * 32 * 2. DDR is protected by: 33 * - Range registers (protect the first 512MB) 34 * 35 * 3. Configuration is protected by: 36 * - Range registers 37 * - Protection bits 38 * 39 * MMU is always enabled. 40 * 41 * QMAN DMA channels 0,1,5 (PCI DMAN): 42 * - DMA is not secured. 43 * - PQ and CQ are secured. 44 * - CP is secured: The driver needs to parse CB but WREG should be allowed 45 * because of TDMA (tensor DMA). Hence, WREG is always not 46 * secured. 47 * 48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 49 * channel 0 to be secured, execute the DMA and change it back to not secured. 50 * Currently, the driver doesn't use the DMA while there are compute jobs 51 * running. 52 * 53 * The current use cases for the driver to use the DMA are: 54 * - Clear SRAM on context switch (happens on context switch when device is 55 * idle) 56 * - MMU page tables area clear (happens on init) 57 * 58 * QMAN DMA 2-4,6,7, TPC, MME, NIC: 59 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 60 * CQ, CP and the engine are not secured 61 * 62 */ 63 64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 67 68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 69 70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */ 71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 74 75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 77#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 78#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 79#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 80#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 81#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ 82#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 83 84#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 85 86#define GAUDI_MAX_STRING_LEN 20 87 88#define GAUDI_CB_POOL_CB_CNT 512 89#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 90 91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 92 93#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 94 95#define GAUDI_NUM_OF_QM_ERR_CAUSE 16 96 97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 98 99#define GAUDI_ARB_WDT_TIMEOUT 0x1000000 100 101#define GAUDI_CLK_GATE_DEBUGFS_MASK (\ 102 BIT(GAUDI_ENGINE_ID_MME_0) |\ 103 BIT(GAUDI_ENGINE_ID_MME_2) |\ 104 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0)) 105 106static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { 107 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", 108 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", 109 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", 110 "gaudi cpu eq" 111}; 112 113static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 114 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 115 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 116 [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5, 117 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 118 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 119 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 120 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6, 121 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7 122}; 123 124static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 125 [0] = GAUDI_QUEUE_ID_DMA_0_0, 126 [1] = GAUDI_QUEUE_ID_DMA_0_1, 127 [2] = GAUDI_QUEUE_ID_DMA_0_2, 128 [3] = GAUDI_QUEUE_ID_DMA_0_3, 129 [4] = GAUDI_QUEUE_ID_DMA_1_0, 130 [5] = GAUDI_QUEUE_ID_DMA_1_1, 131 [6] = GAUDI_QUEUE_ID_DMA_1_2, 132 [7] = GAUDI_QUEUE_ID_DMA_1_3, 133 [8] = GAUDI_QUEUE_ID_DMA_5_0, 134 [9] = GAUDI_QUEUE_ID_DMA_5_1, 135 [10] = GAUDI_QUEUE_ID_DMA_5_2, 136 [11] = GAUDI_QUEUE_ID_DMA_5_3 137}; 138 139static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 140 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 141 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 142 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 143 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 144 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 145 [PACKET_REPEAT] = sizeof(struct packet_repeat), 146 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 147 [PACKET_FENCE] = sizeof(struct packet_fence), 148 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 149 [PACKET_NOP] = sizeof(struct packet_nop), 150 [PACKET_STOP] = sizeof(struct packet_stop), 151 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 152 [PACKET_WAIT] = sizeof(struct packet_wait), 153 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 154}; 155 156static inline bool validate_packet_id(enum packet_id id) 157{ 158 switch (id) { 159 case PACKET_WREG_32: 160 case PACKET_WREG_BULK: 161 case PACKET_MSG_LONG: 162 case PACKET_MSG_SHORT: 163 case PACKET_CP_DMA: 164 case PACKET_REPEAT: 165 case PACKET_MSG_PROT: 166 case PACKET_FENCE: 167 case PACKET_LIN_DMA: 168 case PACKET_NOP: 169 case PACKET_STOP: 170 case PACKET_ARB_POINT: 171 case PACKET_WAIT: 172 case PACKET_LOAD_AND_EXE: 173 return true; 174 default: 175 return false; 176 } 177} 178 179static const char * const 180gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 181 "tpc_address_exceed_slm", 182 "tpc_div_by_0", 183 "tpc_spu_mac_overflow", 184 "tpc_spu_addsub_overflow", 185 "tpc_spu_abs_overflow", 186 "tpc_spu_fp_dst_nan_inf", 187 "tpc_spu_fp_dst_denorm", 188 "tpc_vpu_mac_overflow", 189 "tpc_vpu_addsub_overflow", 190 "tpc_vpu_abs_overflow", 191 "tpc_vpu_fp_dst_nan_inf", 192 "tpc_vpu_fp_dst_denorm", 193 "tpc_assertions", 194 "tpc_illegal_instruction", 195 "tpc_pc_wrap_around", 196 "tpc_qm_sw_err", 197 "tpc_hbw_rresp_err", 198 "tpc_hbw_bresp_err", 199 "tpc_lbw_rresp_err", 200 "tpc_lbw_bresp_err" 201}; 202 203static const char * const 204gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 205 "PQ AXI HBW error", 206 "CQ AXI HBW error", 207 "CP AXI HBW error", 208 "CP error due to undefined OPCODE", 209 "CP encountered STOP OPCODE", 210 "CP AXI LBW error", 211 "CP WRREG32 or WRBULK returned error", 212 "N/A", 213 "FENCE 0 inc over max value and clipped", 214 "FENCE 1 inc over max value and clipped", 215 "FENCE 2 inc over max value and clipped", 216 "FENCE 3 inc over max value and clipped", 217 "FENCE 0 dec under min value and clipped", 218 "FENCE 1 dec under min value and clipped", 219 "FENCE 2 dec under min value and clipped", 220 "FENCE 3 dec under min value and clipped" 221}; 222 223static const char * const 224gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 225 "Choice push while full error", 226 "Choice Q watchdog error", 227 "MSG AXI LBW returned with error" 228}; 229 230static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 239 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */ 305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */ 306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */ 307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */ 308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */ 309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */ 310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */ 311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */ 312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */ 313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */ 314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */ 315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */ 316 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */ 317 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */ 318 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */ 319 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */ 320 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */ 321 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */ 322 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */ 323 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */ 324 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */ 325 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */ 326 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */ 327 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */ 328 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */ 329 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */ 330 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */ 331 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */ 332 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */ 333 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */ 334 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */ 335 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */ 336 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */ 337 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */ 338 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */ 339 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */ 340 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */ 341 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */ 342 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */ 343 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */ 344}; 345 346struct ecc_info_extract_params { 347 u64 block_address; 348 u32 num_memories; 349 bool derr; 350 bool disable_clock_gating; 351}; 352 353static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 354 u64 phys_addr); 355static int gaudi_send_job_on_qman0(struct hl_device *hdev, 356 struct hl_cs_job *job); 357static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 358 u32 size, u64 val); 359static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 360 u32 tpc_id); 361static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 362static int gaudi_cpucp_info_get(struct hl_device *hdev); 363static void gaudi_disable_clock_gating(struct hl_device *hdev); 364static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 365 366static int gaudi_get_fixed_properties(struct hl_device *hdev) 367{ 368 struct asic_fixed_properties *prop = &hdev->asic_prop; 369 u32 num_sync_stream_queues = 0; 370 int i; 371 372 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 373 prop->hw_queues_props = kcalloc(prop->max_queues, 374 sizeof(struct hw_queue_properties), 375 GFP_KERNEL); 376 377 if (!prop->hw_queues_props) 378 return -ENOMEM; 379 380 for (i = 0 ; i < prop->max_queues ; i++) { 381 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 382 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 383 prop->hw_queues_props[i].driver_only = 0; 384 prop->hw_queues_props[i].requires_kernel_cb = 1; 385 prop->hw_queues_props[i].supports_sync_stream = 1; 386 num_sync_stream_queues++; 387 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 388 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 389 prop->hw_queues_props[i].driver_only = 1; 390 prop->hw_queues_props[i].requires_kernel_cb = 0; 391 prop->hw_queues_props[i].supports_sync_stream = 0; 392 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 393 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 394 prop->hw_queues_props[i].driver_only = 0; 395 prop->hw_queues_props[i].requires_kernel_cb = 0; 396 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) { 397 prop->hw_queues_props[i].type = QUEUE_TYPE_NA; 398 prop->hw_queues_props[i].driver_only = 0; 399 prop->hw_queues_props[i].requires_kernel_cb = 0; 400 prop->hw_queues_props[i].supports_sync_stream = 0; 401 } 402 } 403 404 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 405 prop->sync_stream_first_sob = 0; 406 prop->sync_stream_first_mon = 0; 407 prop->dram_base_address = DRAM_PHYS_BASE; 408 prop->dram_size = GAUDI_HBM_SIZE_32GB; 409 prop->dram_end_address = prop->dram_base_address + 410 prop->dram_size; 411 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 412 413 prop->sram_base_address = SRAM_BASE_ADDR; 414 prop->sram_size = SRAM_SIZE; 415 prop->sram_end_address = prop->sram_base_address + 416 prop->sram_size; 417 prop->sram_user_base_address = prop->sram_base_address + 418 SRAM_USER_BASE_OFFSET; 419 420 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 421 if (hdev->pldm) 422 prop->mmu_pgt_size = 0x800000; /* 8MB */ 423 else 424 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 425 prop->mmu_pte_size = HL_PTE_SIZE; 426 prop->mmu_hop_table_size = HOP_TABLE_SIZE; 427 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; 428 prop->dram_page_size = PAGE_SIZE_2MB; 429 430 prop->pmmu.hop0_shift = HOP0_SHIFT; 431 prop->pmmu.hop1_shift = HOP1_SHIFT; 432 prop->pmmu.hop2_shift = HOP2_SHIFT; 433 prop->pmmu.hop3_shift = HOP3_SHIFT; 434 prop->pmmu.hop4_shift = HOP4_SHIFT; 435 prop->pmmu.hop0_mask = HOP0_MASK; 436 prop->pmmu.hop1_mask = HOP1_MASK; 437 prop->pmmu.hop2_mask = HOP2_MASK; 438 prop->pmmu.hop3_mask = HOP3_MASK; 439 prop->pmmu.hop4_mask = HOP4_MASK; 440 prop->pmmu.start_addr = VA_HOST_SPACE_START; 441 prop->pmmu.end_addr = 442 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 443 prop->pmmu.page_size = PAGE_SIZE_4KB; 444 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 445 446 /* PMMU and HPMMU are the same except of page size */ 447 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 448 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 449 450 /* shifts and masks are the same in PMMU and DMMU */ 451 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 452 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 453 prop->dmmu.end_addr = VA_HOST_SPACE_END; 454 prop->dmmu.page_size = PAGE_SIZE_2MB; 455 456 prop->cfg_size = CFG_SIZE; 457 prop->max_asid = MAX_ASID; 458 prop->num_of_events = GAUDI_EVENT_SIZE; 459 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 460 461 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 462 463 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 464 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 465 466 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 467 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 468 469 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 470 CARD_NAME_MAX_LEN); 471 472 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 473 474 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 475 num_sync_stream_queues * HL_RSVD_SOBS; 476 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 477 num_sync_stream_queues * HL_RSVD_MONS; 478 479 return 0; 480} 481 482static int gaudi_pci_bars_map(struct hl_device *hdev) 483{ 484 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 485 bool is_wc[3] = {false, false, true}; 486 int rc; 487 488 rc = hl_pci_bars_map(hdev, name, is_wc); 489 if (rc) 490 return rc; 491 492 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 493 (CFG_BASE - SPI_FLASH_BASE_ADDR); 494 495 return 0; 496} 497 498static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 499{ 500 struct gaudi_device *gaudi = hdev->asic_specific; 501 struct hl_inbound_pci_region pci_region; 502 u64 old_addr = addr; 503 int rc; 504 505 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 506 return old_addr; 507 508 /* Inbound Region 2 - Bar 4 - Point to HBM */ 509 pci_region.mode = PCI_BAR_MATCH_MODE; 510 pci_region.bar = HBM_BAR_ID; 511 pci_region.addr = addr; 512 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 513 if (rc) 514 return U64_MAX; 515 516 if (gaudi) { 517 old_addr = gaudi->hbm_bar_cur_addr; 518 gaudi->hbm_bar_cur_addr = addr; 519 } 520 521 return old_addr; 522} 523 524static int gaudi_init_iatu(struct hl_device *hdev) 525{ 526 struct hl_inbound_pci_region inbound_region; 527 struct hl_outbound_pci_region outbound_region; 528 int rc; 529 530 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 531 inbound_region.mode = PCI_BAR_MATCH_MODE; 532 inbound_region.bar = SRAM_BAR_ID; 533 inbound_region.addr = SRAM_BASE_ADDR; 534 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 535 if (rc) 536 goto done; 537 538 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 539 inbound_region.mode = PCI_BAR_MATCH_MODE; 540 inbound_region.bar = CFG_BAR_ID; 541 inbound_region.addr = SPI_FLASH_BASE_ADDR; 542 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 543 if (rc) 544 goto done; 545 546 /* Inbound Region 2 - Bar 4 - Point to HBM */ 547 inbound_region.mode = PCI_BAR_MATCH_MODE; 548 inbound_region.bar = HBM_BAR_ID; 549 inbound_region.addr = DRAM_PHYS_BASE; 550 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 551 if (rc) 552 goto done; 553 554 hdev->asic_funcs->set_dma_mask_from_fw(hdev); 555 556 /* Outbound Region 0 - Point to Host */ 557 outbound_region.addr = HOST_PHYS_BASE; 558 outbound_region.size = HOST_PHYS_SIZE; 559 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 560 561done: 562 return rc; 563} 564 565static int gaudi_early_init(struct hl_device *hdev) 566{ 567 struct asic_fixed_properties *prop = &hdev->asic_prop; 568 struct pci_dev *pdev = hdev->pdev; 569 int rc; 570 571 rc = gaudi_get_fixed_properties(hdev); 572 if (rc) { 573 dev_err(hdev->dev, "Failed to get fixed properties\n"); 574 return rc; 575 } 576 577 /* Check BAR sizes */ 578 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) { 579 dev_err(hdev->dev, 580 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", 581 SRAM_BAR_ID, 582 (unsigned long long) pci_resource_len(pdev, 583 SRAM_BAR_ID), 584 SRAM_BAR_SIZE); 585 rc = -ENODEV; 586 goto free_queue_props; 587 } 588 589 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) { 590 dev_err(hdev->dev, 591 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", 592 CFG_BAR_ID, 593 (unsigned long long) pci_resource_len(pdev, 594 CFG_BAR_ID), 595 CFG_BAR_SIZE); 596 rc = -ENODEV; 597 goto free_queue_props; 598 } 599 600 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 601 602 rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, 603 mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC); 604 if (rc) 605 goto free_queue_props; 606 607 /* GAUDI Firmware does not yet support security */ 608 prop->fw_security_disabled = true; 609 dev_info(hdev->dev, "firmware-level security is disabled\n"); 610 611 return 0; 612 613free_queue_props: 614 kfree(hdev->asic_prop.hw_queues_props); 615 return rc; 616} 617 618static int gaudi_early_fini(struct hl_device *hdev) 619{ 620 kfree(hdev->asic_prop.hw_queues_props); 621 hl_pci_fini(hdev); 622 623 return 0; 624} 625 626/** 627 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 628 * 629 * @hdev: pointer to hl_device structure 630 * 631 */ 632static void gaudi_fetch_psoc_frequency(struct hl_device *hdev) 633{ 634 struct asic_fixed_properties *prop = &hdev->asic_prop; 635 u32 trace_freq = 0; 636 u32 pll_clk = 0; 637 u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 638 u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 639 u32 nr = RREG32(mmPSOC_CPU_PLL_NR); 640 u32 nf = RREG32(mmPSOC_CPU_PLL_NF); 641 u32 od = RREG32(mmPSOC_CPU_PLL_OD); 642 643 if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) { 644 if (div_sel == DIV_SEL_REF_CLK) 645 trace_freq = PLL_REF_CLK; 646 else 647 trace_freq = PLL_REF_CLK / (div_fctr + 1); 648 } else if (div_sel == DIV_SEL_PLL_CLK || 649 div_sel == DIV_SEL_DIVIDED_PLL) { 650 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1)); 651 if (div_sel == DIV_SEL_PLL_CLK) 652 trace_freq = pll_clk; 653 else 654 trace_freq = pll_clk / (div_fctr + 1); 655 } else { 656 dev_warn(hdev->dev, 657 "Received invalid div select value: %d", div_sel); 658 } 659 660 prop->psoc_timestamp_frequency = trace_freq; 661 prop->psoc_pci_pll_nr = nr; 662 prop->psoc_pci_pll_nf = nf; 663 prop->psoc_pci_pll_od = od; 664 prop->psoc_pci_pll_div_factor = div_fctr; 665} 666 667static int _gaudi_init_tpc_mem(struct hl_device *hdev, 668 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 669{ 670 struct asic_fixed_properties *prop = &hdev->asic_prop; 671 struct packet_lin_dma *init_tpc_mem_pkt; 672 struct hl_cs_job *job; 673 struct hl_cb *cb; 674 u64 dst_addr; 675 u32 cb_size, ctl; 676 u8 tpc_id; 677 int rc; 678 679 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 680 if (!cb) 681 return -EFAULT; 682 683 init_tpc_mem_pkt = cb->kernel_address; 684 cb_size = sizeof(*init_tpc_mem_pkt); 685 memset(init_tpc_mem_pkt, 0, cb_size); 686 687 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 688 689 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 690 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 691 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 692 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 693 694 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 695 696 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 697 dst_addr = (prop->sram_user_base_address & 698 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 699 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 700 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 701 702 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 703 if (!job) { 704 dev_err(hdev->dev, "Failed to allocate a new job\n"); 705 rc = -ENOMEM; 706 goto release_cb; 707 } 708 709 job->id = 0; 710 job->user_cb = cb; 711 job->user_cb->cs_cnt++; 712 job->user_cb_size = cb_size; 713 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 714 job->patched_cb = job->user_cb; 715 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 716 717 hl_debugfs_add_job(hdev, job); 718 719 rc = gaudi_send_job_on_qman0(hdev, job); 720 721 if (rc) 722 goto free_job; 723 724 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 725 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 726 if (rc) 727 break; 728 } 729 730free_job: 731 hl_userptr_delete_list(hdev, &job->userptr_list); 732 hl_debugfs_remove_job(hdev, job); 733 kfree(job); 734 cb->cs_cnt--; 735 736release_cb: 737 hl_cb_put(cb); 738 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); 739 740 return rc; 741} 742 743/* 744 * gaudi_init_tpc_mem() - Initialize TPC memories. 745 * @hdev: Pointer to hl_device structure. 746 * 747 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 748 * 749 * Return: 0 for success, negative value for error. 750 */ 751static int gaudi_init_tpc_mem(struct hl_device *hdev) 752{ 753 const struct firmware *fw; 754 size_t fw_size; 755 void *cpu_addr; 756 dma_addr_t dma_handle; 757 int rc, count = 5; 758 759again: 760 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 761 if (rc == -EINTR && count-- > 0) { 762 msleep(50); 763 goto again; 764 } 765 766 if (rc) { 767 dev_err(hdev->dev, "Failed to load firmware file %s\n", 768 GAUDI_TPC_FW_FILE); 769 goto out; 770 } 771 772 fw_size = fw->size; 773 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size, 774 &dma_handle, GFP_KERNEL | __GFP_ZERO); 775 if (!cpu_addr) { 776 dev_err(hdev->dev, 777 "Failed to allocate %zu of dma memory for TPC kernel\n", 778 fw_size); 779 rc = -ENOMEM; 780 goto out; 781 } 782 783 memcpy(cpu_addr, fw->data, fw_size); 784 785 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 786 787 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr, 788 dma_handle); 789 790out: 791 release_firmware(fw); 792 return rc; 793} 794 795static int gaudi_late_init(struct hl_device *hdev) 796{ 797 struct gaudi_device *gaudi = hdev->asic_specific; 798 int rc; 799 800 rc = gaudi->cpucp_info_get(hdev); 801 if (rc) { 802 dev_err(hdev->dev, "Failed to get cpucp info\n"); 803 return rc; 804 } 805 806 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS); 807 if (rc) { 808 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 809 return rc; 810 } 811 812 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); 813 814 gaudi_fetch_psoc_frequency(hdev); 815 816 rc = gaudi_mmu_clear_pgt_range(hdev); 817 if (rc) { 818 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 819 goto disable_pci_access; 820 } 821 822 rc = gaudi_init_tpc_mem(hdev); 823 if (rc) { 824 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 825 goto disable_pci_access; 826 } 827 828 return 0; 829 830disable_pci_access: 831 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); 832 833 return rc; 834} 835 836static void gaudi_late_fini(struct hl_device *hdev) 837{ 838 const struct hwmon_channel_info **channel_info_arr; 839 int i = 0; 840 841 if (!hdev->hl_chip_info->info) 842 return; 843 844 channel_info_arr = hdev->hl_chip_info->info; 845 846 while (channel_info_arr[i]) { 847 kfree(channel_info_arr[i]->config); 848 kfree(channel_info_arr[i]); 849 i++; 850 } 851 852 kfree(channel_info_arr); 853 854 hdev->hl_chip_info->info = NULL; 855} 856 857static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 858{ 859 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 860 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 861 int i, j, rc = 0; 862 863 /* 864 * The device CPU works with 40-bits addresses, while bit 39 must be set 865 * to '1' when accessing the host. 866 * Bits 49:39 of the full host address are saved for a later 867 * configuration of the HW to perform extension to 50 bits. 868 * Because there is a single HW register that holds the extension bits, 869 * these bits must be identical in all allocated range. 870 */ 871 872 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 873 virt_addr_arr[i] = 874 hdev->asic_funcs->asic_dma_alloc_coherent(hdev, 875 HL_CPU_ACCESSIBLE_MEM_SIZE, 876 &dma_addr_arr[i], 877 GFP_KERNEL | __GFP_ZERO); 878 if (!virt_addr_arr[i]) { 879 rc = -ENOMEM; 880 goto free_dma_mem_arr; 881 } 882 883 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 884 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 885 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 886 break; 887 } 888 889 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 890 dev_err(hdev->dev, 891 "MSB of CPU accessible DMA memory are not identical in all range\n"); 892 rc = -EFAULT; 893 goto free_dma_mem_arr; 894 } 895 896 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 897 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 898 hdev->cpu_pci_msb_addr = 899 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 900 901 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 902 903free_dma_mem_arr: 904 for (j = 0 ; j < i ; j++) 905 hdev->asic_funcs->asic_dma_free_coherent(hdev, 906 HL_CPU_ACCESSIBLE_MEM_SIZE, 907 virt_addr_arr[j], 908 dma_addr_arr[j]); 909 910 return rc; 911} 912 913static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 914{ 915 struct gaudi_device *gaudi = hdev->asic_specific; 916 struct gaudi_internal_qman_info *q; 917 u32 i; 918 919 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 920 q = &gaudi->internal_qmans[i]; 921 if (!q->pq_kernel_addr) 922 continue; 923 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size, 924 q->pq_kernel_addr, 925 q->pq_dma_addr); 926 } 927} 928 929static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 930{ 931 struct gaudi_device *gaudi = hdev->asic_specific; 932 struct gaudi_internal_qman_info *q; 933 int rc, i; 934 935 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 936 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 937 continue; 938 939 q = &gaudi->internal_qmans[i]; 940 941 switch (i) { 942 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3: 943 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3: 944 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 945 break; 946 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 947 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 948 break; 949 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 950 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 951 break; 952 default: 953 dev_err(hdev->dev, "Bad internal queue index %d", i); 954 rc = -EINVAL; 955 goto free_internal_qmans_pq_mem; 956 } 957 958 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( 959 hdev, q->pq_size, 960 &q->pq_dma_addr, 961 GFP_KERNEL | __GFP_ZERO); 962 if (!q->pq_kernel_addr) { 963 rc = -ENOMEM; 964 goto free_internal_qmans_pq_mem; 965 } 966 } 967 968 return 0; 969 970free_internal_qmans_pq_mem: 971 gaudi_free_internal_qmans_pq_mem(hdev); 972 return rc; 973} 974 975static int gaudi_sw_init(struct hl_device *hdev) 976{ 977 struct gaudi_device *gaudi; 978 u32 i, event_id = 0; 979 int rc; 980 981 /* Allocate device structure */ 982 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 983 if (!gaudi) 984 return -ENOMEM; 985 986 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 987 if (gaudi_irq_map_table[i].valid) { 988 if (event_id == GAUDI_EVENT_SIZE) { 989 dev_err(hdev->dev, 990 "Event array exceeds the limit of %u events\n", 991 GAUDI_EVENT_SIZE); 992 rc = -EINVAL; 993 goto free_gaudi_device; 994 } 995 996 gaudi->events[event_id++] = 997 gaudi_irq_map_table[i].fc_id; 998 } 999 } 1000 1001 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1002 1003 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ; 1004 1005 hdev->asic_specific = gaudi; 1006 1007 /* Create DMA pool for small allocations */ 1008 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1009 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1010 if (!hdev->dma_pool) { 1011 dev_err(hdev->dev, "failed to create DMA pool\n"); 1012 rc = -ENOMEM; 1013 goto free_gaudi_device; 1014 } 1015 1016 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1017 if (rc) 1018 goto free_dma_pool; 1019 1020 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1021 if (!hdev->cpu_accessible_dma_pool) { 1022 dev_err(hdev->dev, 1023 "Failed to create CPU accessible DMA pool\n"); 1024 rc = -ENOMEM; 1025 goto free_cpu_dma_mem; 1026 } 1027 1028 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1029 (uintptr_t) hdev->cpu_accessible_dma_mem, 1030 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1031 if (rc) { 1032 dev_err(hdev->dev, 1033 "Failed to add memory to CPU accessible DMA pool\n"); 1034 rc = -EFAULT; 1035 goto free_cpu_accessible_dma_pool; 1036 } 1037 1038 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1039 if (rc) 1040 goto free_cpu_accessible_dma_pool; 1041 1042 spin_lock_init(&gaudi->hw_queues_lock); 1043 mutex_init(&gaudi->clk_gate_mutex); 1044 1045 hdev->supports_sync_stream = true; 1046 hdev->supports_coresight = true; 1047 1048 return 0; 1049 1050free_cpu_accessible_dma_pool: 1051 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1052free_cpu_dma_mem: 1053 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1054 hdev->cpu_pci_msb_addr); 1055 hdev->asic_funcs->asic_dma_free_coherent(hdev, 1056 HL_CPU_ACCESSIBLE_MEM_SIZE, 1057 hdev->cpu_accessible_dma_mem, 1058 hdev->cpu_accessible_dma_address); 1059free_dma_pool: 1060 dma_pool_destroy(hdev->dma_pool); 1061free_gaudi_device: 1062 kfree(gaudi); 1063 return rc; 1064} 1065 1066static int gaudi_sw_fini(struct hl_device *hdev) 1067{ 1068 struct gaudi_device *gaudi = hdev->asic_specific; 1069 1070 gaudi_free_internal_qmans_pq_mem(hdev); 1071 1072 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1073 1074 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1075 hdev->cpu_pci_msb_addr); 1076 hdev->asic_funcs->asic_dma_free_coherent(hdev, 1077 HL_CPU_ACCESSIBLE_MEM_SIZE, 1078 hdev->cpu_accessible_dma_mem, 1079 hdev->cpu_accessible_dma_address); 1080 1081 dma_pool_destroy(hdev->dma_pool); 1082 1083 mutex_destroy(&gaudi->clk_gate_mutex); 1084 1085 kfree(gaudi); 1086 1087 return 0; 1088} 1089 1090static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1091{ 1092 struct hl_device *hdev = arg; 1093 int i; 1094 1095 if (hdev->disabled) 1096 return IRQ_HANDLED; 1097 1098 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1099 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1100 1101 hl_irq_handler_eq(irq, &hdev->event_queue); 1102 1103 return IRQ_HANDLED; 1104} 1105 1106/* 1107 * For backward compatibility, new MSI interrupts should be set after the 1108 * existing CPU and NIC interrupts. 1109 */ 1110static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1111 bool cpu_eq) 1112{ 1113 int msi_vec; 1114 1115 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1116 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1117 GAUDI_EVENT_QUEUE_MSI_IDX); 1118 1119 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1120 (nr + NIC_NUMBER_OF_ENGINES + 1); 1121 1122 return pci_irq_vector(hdev->pdev, msi_vec); 1123} 1124 1125static int gaudi_enable_msi_single(struct hl_device *hdev) 1126{ 1127 int rc, irq; 1128 1129 dev_info(hdev->dev, "Working in single MSI IRQ mode\n"); 1130 1131 irq = gaudi_pci_irq_vector(hdev, 0, false); 1132 rc = request_irq(irq, gaudi_irq_handler_single, 0, 1133 "gaudi single msi", hdev); 1134 if (rc) 1135 dev_err(hdev->dev, 1136 "Failed to request single MSI IRQ\n"); 1137 1138 return rc; 1139} 1140 1141static int gaudi_enable_msi_multi(struct hl_device *hdev) 1142{ 1143 int cq_cnt = hdev->asic_prop.completion_queues_count; 1144 int rc, i, irq_cnt_init, irq; 1145 1146 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { 1147 irq = gaudi_pci_irq_vector(hdev, i, false); 1148 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i], 1149 &hdev->completion_queue[i]); 1150 if (rc) { 1151 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 1152 goto free_irqs; 1153 } 1154 } 1155 1156 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true); 1157 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt], 1158 &hdev->event_queue); 1159 if (rc) { 1160 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 1161 goto free_irqs; 1162 } 1163 1164 return 0; 1165 1166free_irqs: 1167 for (i = 0 ; i < irq_cnt_init ; i++) 1168 free_irq(gaudi_pci_irq_vector(hdev, i, false), 1169 &hdev->completion_queue[i]); 1170 return rc; 1171} 1172 1173static int gaudi_enable_msi(struct hl_device *hdev) 1174{ 1175 struct gaudi_device *gaudi = hdev->asic_specific; 1176 int rc; 1177 1178 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 1179 return 0; 1180 1181 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES, 1182 PCI_IRQ_MSI); 1183 if (rc < 0) { 1184 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 1185 return rc; 1186 } 1187 1188 if (rc < NUMBER_OF_INTERRUPTS) { 1189 gaudi->multi_msi_mode = false; 1190 rc = gaudi_enable_msi_single(hdev); 1191 } else { 1192 gaudi->multi_msi_mode = true; 1193 rc = gaudi_enable_msi_multi(hdev); 1194 } 1195 1196 if (rc) 1197 goto free_pci_irq_vectors; 1198 1199 gaudi->hw_cap_initialized |= HW_CAP_MSI; 1200 1201 return 0; 1202 1203free_pci_irq_vectors: 1204 pci_free_irq_vectors(hdev->pdev); 1205 return rc; 1206} 1207 1208static void gaudi_sync_irqs(struct hl_device *hdev) 1209{ 1210 struct gaudi_device *gaudi = hdev->asic_specific; 1211 int i, cq_cnt = hdev->asic_prop.completion_queues_count; 1212 1213 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 1214 return; 1215 1216 /* Wait for all pending IRQs to be finished */ 1217 if (gaudi->multi_msi_mode) { 1218 for (i = 0 ; i < cq_cnt ; i++) 1219 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false)); 1220 1221 synchronize_irq(gaudi_pci_irq_vector(hdev, 1222 GAUDI_EVENT_QUEUE_MSI_IDX, 1223 true)); 1224 } else { 1225 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 1226 } 1227} 1228 1229static void gaudi_disable_msi(struct hl_device *hdev) 1230{ 1231 struct gaudi_device *gaudi = hdev->asic_specific; 1232 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count; 1233 1234 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 1235 return; 1236 1237 gaudi_sync_irqs(hdev); 1238 1239 if (gaudi->multi_msi_mode) { 1240 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, 1241 true); 1242 free_irq(irq, &hdev->event_queue); 1243 1244 for (i = 0 ; i < cq_cnt ; i++) { 1245 irq = gaudi_pci_irq_vector(hdev, i, false); 1246 free_irq(irq, &hdev->completion_queue[i]); 1247 } 1248 } else { 1249 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 1250 } 1251 1252 pci_free_irq_vectors(hdev->pdev); 1253 1254 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 1255} 1256 1257static void gaudi_init_scrambler_sram(struct hl_device *hdev) 1258{ 1259 struct gaudi_device *gaudi = hdev->asic_specific; 1260 1261 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 1262 return; 1263 1264 if (!hdev->sram_scrambler_enable) 1265 return; 1266 1267 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 1268 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1269 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 1270 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1271 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 1272 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1273 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 1274 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1275 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 1276 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1277 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 1278 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1279 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 1280 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1281 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 1282 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1283 1284 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 1285 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1286 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 1287 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1288 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 1289 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1290 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 1291 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1292 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 1293 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1294 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 1295 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1296 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 1297 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1298 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 1299 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 1300 1301 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 1302 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 1303 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 1304 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 1305 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 1306 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 1307 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 1308 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 1309 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 1310 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 1311 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 1312 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 1313 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 1314 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 1315 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 1316 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 1317 1318 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 1319} 1320 1321static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 1322{ 1323 struct gaudi_device *gaudi = hdev->asic_specific; 1324 1325 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 1326 return; 1327 1328 if (!hdev->dram_scrambler_enable) 1329 return; 1330 1331 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 1332 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1333 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 1334 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1335 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 1336 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1337 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 1338 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1339 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 1340 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1341 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 1342 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1343 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 1344 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1345 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 1346 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1347 1348 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 1349 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1350 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 1351 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1352 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 1353 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1354 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 1355 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1356 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 1357 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1358 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 1359 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1360 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 1361 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1362 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 1363 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 1364 1365 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 1366 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 1367 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 1368 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 1369 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 1370 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 1371 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 1372 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 1373 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 1374 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 1375 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 1376 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 1377 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 1378 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 1379 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 1380 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 1381 1382 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 1383} 1384 1385static void gaudi_init_e2e(struct hl_device *hdev) 1386{ 1387 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 1388 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 1389 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 1390 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 1391 1392 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 1393 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 1394 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 1395 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 1396 1397 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 1398 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 1399 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 1400 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 1401 1402 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 1403 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 1404 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 1405 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 1406 1407 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 1408 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 1409 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 1410 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 1411 1412 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 1413 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 1414 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 1415 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 1416 1417 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 1418 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 1419 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 1420 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 1421 1422 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 1423 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 1424 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 1425 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 1426 1427 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 1428 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 1429 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 1430 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 1431 1432 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 1433 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 1434 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 1435 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 1436 1437 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 1438 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 1439 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 1440 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 1441 1442 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 1443 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 1444 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 1445 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 1446 1447 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 1448 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 1449 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 1450 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 1451 1452 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 1453 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 1454 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 1455 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 1456 1457 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 1458 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 1459 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 1460 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 1461 1462 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 1463 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 1464 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 1465 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 1466 1467 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 1468 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 1469 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 1470 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 1471 1472 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 1473 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 1474 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 1475 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 1476 1477 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 1478 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 1479 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 1480 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 1481 1482 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 1483 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 1484 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 1485 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 1486 1487 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 1488 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 1489 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 1490 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 1491 1492 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 1493 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 1494 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 1495 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 1496 1497 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 1498 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 1499 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 1500 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 1501 1502 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 1503 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 1504 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 1505 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 1506 1507 if (!hdev->dram_scrambler_enable) { 1508 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21); 1509 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22); 1510 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F); 1511 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20); 1512 1513 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21); 1514 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22); 1515 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F); 1516 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20); 1517 1518 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21); 1519 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22); 1520 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F); 1521 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20); 1522 1523 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21); 1524 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22); 1525 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F); 1526 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20); 1527 1528 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21); 1529 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22); 1530 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F); 1531 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20); 1532 1533 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21); 1534 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22); 1535 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F); 1536 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20); 1537 1538 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21); 1539 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22); 1540 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F); 1541 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20); 1542 1543 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21); 1544 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22); 1545 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F); 1546 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20); 1547 1548 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21); 1549 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22); 1550 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F); 1551 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20); 1552 1553 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21); 1554 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22); 1555 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F); 1556 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20); 1557 1558 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21); 1559 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22); 1560 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F); 1561 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20); 1562 1563 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21); 1564 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22); 1565 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F); 1566 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20); 1567 1568 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21); 1569 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22); 1570 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F); 1571 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20); 1572 1573 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21); 1574 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22); 1575 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F); 1576 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20); 1577 1578 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21); 1579 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22); 1580 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F); 1581 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20); 1582 1583 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21); 1584 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22); 1585 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F); 1586 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20); 1587 1588 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21); 1589 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22); 1590 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); 1591 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); 1592 1593 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21); 1594 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22); 1595 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); 1596 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); 1597 1598 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21); 1599 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22); 1600 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); 1601 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); 1602 1603 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21); 1604 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22); 1605 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); 1606 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); 1607 1608 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21); 1609 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22); 1610 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); 1611 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); 1612 1613 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21); 1614 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22); 1615 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); 1616 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); 1617 1618 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21); 1619 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22); 1620 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); 1621 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); 1622 1623 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21); 1624 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22); 1625 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); 1626 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); 1627 } 1628 1629 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 1630 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1631 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 1632 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1633 1634 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 1635 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1636 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 1637 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1638 1639 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 1640 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1641 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 1642 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1643 1644 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 1645 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1646 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 1647 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1648 1649 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 1650 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1651 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 1652 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1653 1654 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 1655 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1656 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 1657 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1658 1659 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 1660 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1661 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 1662 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1663 1664 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 1665 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1666 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 1667 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1668 1669 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 1670 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1671 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 1672 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1673 1674 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 1675 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1676 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 1677 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1678 1679 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 1680 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1681 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 1682 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1683 1684 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 1685 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1686 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 1687 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1688 1689 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 1690 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1691 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 1692 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1693 1694 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 1695 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1696 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 1697 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1698 1699 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 1700 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1701 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 1702 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1703 1704 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 1705 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 1706 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 1707 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 1708 1709 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 1710 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 1711 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 1712 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 1713 1714 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 1715 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 1716 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 1717 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 1718 1719 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 1720 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 1721 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 1722 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 1723 1724 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 1725 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 1726 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 1727 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 1728 1729 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 1730 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 1731 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 1732 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 1733 1734 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 1735 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 1736 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 1737 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 1738 1739 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 1740 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 1741 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 1742 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 1743 1744 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 1745 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 1746 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 1747 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 1748} 1749 1750static void gaudi_init_hbm_cred(struct hl_device *hdev) 1751{ 1752 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 1753 1754 hbm0_wr = 0x33333333; 1755 hbm0_rd = 0x77777777; 1756 hbm1_wr = 0x55555555; 1757 hbm1_rd = 0xDDDDDDDD; 1758 1759 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 1760 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 1761 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 1762 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 1763 1764 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 1765 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 1766 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 1767 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 1768 1769 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 1770 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 1771 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 1772 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 1773 1774 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 1775 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 1776 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 1777 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 1778 1779 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 1780 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 1781 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 1782 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 1783 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 1784 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 1785 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 1786 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 1787 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 1788 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 1789 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 1790 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 1791 1792 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 1793 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 1794 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 1795 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 1796 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 1797 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 1798 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 1799 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 1800 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 1801 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 1802 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 1803 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 1804} 1805 1806static void gaudi_init_golden_registers(struct hl_device *hdev) 1807{ 1808 u32 tpc_offset; 1809 int tpc_id, i; 1810 1811 gaudi_init_e2e(hdev); 1812 1813 gaudi_init_hbm_cred(hdev); 1814 1815 hdev->asic_funcs->disable_clock_gating(hdev); 1816 1817 for (tpc_id = 0, tpc_offset = 0; 1818 tpc_id < TPC_NUMBER_OF_ENGINES; 1819 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 1820 /* Mask all arithmetic interrupts from TPC */ 1821 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF); 1822 /* Set 16 cache lines */ 1823 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 1824 ICACHE_FETCH_LINE_NUM, 2); 1825 } 1826 1827 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 1828 for (i = 0 ; i < 128 ; i += 8) 1829 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 1830 1831 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 1832 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 1833 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 1834 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 1835} 1836 1837static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 1838 int qman_id, dma_addr_t qman_pq_addr) 1839{ 1840 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 1841 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 1842 u32 q_off, dma_qm_offset; 1843 u32 dma_qm_err_cfg; 1844 1845 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 1846 1847 mtr_base_en_lo = lower_32_bits(CFG_BASE + 1848 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 1849 mtr_base_en_hi = upper_32_bits(CFG_BASE + 1850 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 1851 so_base_en_lo = lower_32_bits(CFG_BASE + 1852 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 1853 so_base_en_hi = upper_32_bits(CFG_BASE + 1854 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 1855 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 1856 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 1857 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 1858 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 1859 so_base_ws_lo = lower_32_bits(CFG_BASE + 1860 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 1861 so_base_ws_hi = upper_32_bits(CFG_BASE + 1862 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 1863 1864 q_off = dma_qm_offset + qman_id * 4; 1865 1866 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 1867 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 1868 1869 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 1870 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 1871 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 1872 1873 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 1874 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 1875 QMAN_LDMA_SRC_OFFSET); 1876 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 1877 QMAN_LDMA_DST_OFFSET); 1878 1879 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 1880 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 1881 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 1882 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 1883 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 1884 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 1885 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 1886 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 1887 1888 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 1889 1890 /* The following configuration is needed only once per QMAN */ 1891 if (qman_id == 0) { 1892 /* Configure RAZWI IRQ */ 1893 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 1894 if (hdev->stop_on_err) { 1895 dma_qm_err_cfg |= 1896 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 1897 } 1898 1899 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 1900 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 1901 lower_32_bits(CFG_BASE + 1902 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 1903 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 1904 upper_32_bits(CFG_BASE + 1905 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 1906 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 1907 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 1908 dma_id); 1909 1910 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 1911 QM_ARB_ERR_MSG_EN_MASK); 1912 1913 /* Increase ARB WDT to support streams architecture */ 1914 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, 1915 GAUDI_ARB_WDT_TIMEOUT); 1916 1917 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 1918 QMAN_EXTERNAL_MAKE_TRUSTED); 1919 1920 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 1921 } 1922} 1923 1924static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 1925{ 1926 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 1927 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 1928 1929 /* Set to maximum possible according to physical size */ 1930 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 1931 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 1932 1933 /* WA for H/W bug H3-2116 */ 1934 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 1935 1936 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 1937 if (hdev->stop_on_err) 1938 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 1939 1940 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 1941 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 1942 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 1943 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 1944 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 1945 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 1946 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 1947 WREG32(mmDMA0_CORE_PROT + dma_offset, 1948 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 1949 /* If the channel is secured, it should be in MMU bypass mode */ 1950 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 1951 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 1952 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 1953} 1954 1955static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 1956 u32 enable_mask) 1957{ 1958 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 1959 1960 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 1961} 1962 1963static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 1964{ 1965 struct gaudi_device *gaudi = hdev->asic_specific; 1966 struct hl_hw_queue *q; 1967 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 1968 1969 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 1970 return; 1971 1972 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 1973 dma_id = gaudi_dma_assignment[i]; 1974 /* 1975 * For queues after the CPU Q need to add 1 to get the correct 1976 * queue. In addition, need to add the CPU EQ and NIC IRQs in 1977 * order to get the correct MSI register. 1978 */ 1979 if (dma_id > 1) { 1980 cpu_skip = 1; 1981 nic_skip = NIC_NUMBER_OF_ENGINES; 1982 } else { 1983 cpu_skip = 0; 1984 nic_skip = 0; 1985 } 1986 1987 for (j = 0 ; j < QMAN_STREAMS ; j++) { 1988 q_idx = 4 * dma_id + j + cpu_skip; 1989 q = &hdev->kernel_queues[q_idx]; 1990 q->cq_id = cq_id++; 1991 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 1992 gaudi_init_pci_dma_qman(hdev, dma_id, j, 1993 q->bus_address); 1994 } 1995 1996 gaudi_init_dma_core(hdev, dma_id); 1997 1998 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 1999 } 2000 2001 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2002} 2003 2004static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2005 int qman_id, u64 qman_base_addr) 2006{ 2007 u32 mtr_base_lo, mtr_base_hi; 2008 u32 so_base_lo, so_base_hi; 2009 u32 q_off, dma_qm_offset; 2010 u32 dma_qm_err_cfg; 2011 2012 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2013 2014 mtr_base_lo = lower_32_bits(CFG_BASE + 2015 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2016 mtr_base_hi = upper_32_bits(CFG_BASE + 2017 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2018 so_base_lo = lower_32_bits(CFG_BASE + 2019 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2020 so_base_hi = upper_32_bits(CFG_BASE + 2021 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2022 2023 q_off = dma_qm_offset + qman_id * 4; 2024 2025 if (qman_id < 4) { 2026 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2027 lower_32_bits(qman_base_addr)); 2028 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2029 upper_32_bits(qman_base_addr)); 2030 2031 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2032 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2033 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2034 2035 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2036 QMAN_CPDMA_SIZE_OFFSET); 2037 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2038 QMAN_CPDMA_SRC_OFFSET); 2039 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2040 QMAN_CPDMA_DST_OFFSET); 2041 } else { 2042 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2043 QMAN_LDMA_SIZE_OFFSET); 2044 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2045 QMAN_LDMA_SRC_OFFSET); 2046 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2047 QMAN_LDMA_DST_OFFSET); 2048 2049 /* Configure RAZWI IRQ */ 2050 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2051 if (hdev->stop_on_err) { 2052 dma_qm_err_cfg |= 2053 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2054 } 2055 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2056 2057 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2058 lower_32_bits(CFG_BASE + 2059 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 2060 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2061 upper_32_bits(CFG_BASE + 2062 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 2063 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2064 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2065 dma_id); 2066 2067 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2068 QM_ARB_ERR_MSG_EN_MASK); 2069 2070 /* Increase ARB WDT to support streams architecture */ 2071 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, 2072 GAUDI_ARB_WDT_TIMEOUT); 2073 2074 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2075 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2076 QMAN_INTERNAL_MAKE_TRUSTED); 2077 } 2078 2079 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 2080 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 2081 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 2082 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 2083} 2084 2085static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2086{ 2087 struct gaudi_device *gaudi = hdev->asic_specific; 2088 struct gaudi_internal_qman_info *q; 2089 u64 qman_base_addr; 2090 int i, j, dma_id, internal_q_index; 2091 2092 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2093 return; 2094 2095 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2096 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2097 2098 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2099 /* 2100 * Add the CPU queue in order to get the correct queue 2101 * number as all internal queue are placed after it 2102 */ 2103 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2104 2105 q = &gaudi->internal_qmans[internal_q_index]; 2106 qman_base_addr = (u64) q->pq_dma_addr; 2107 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2108 qman_base_addr); 2109 } 2110 2111 /* Initializing lower CP for HBM DMA QMAN */ 2112 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2113 2114 gaudi_init_dma_core(hdev, dma_id); 2115 2116 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2117 } 2118 2119 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2120} 2121 2122static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2123 int qman_id, u64 qman_base_addr) 2124{ 2125 u32 mtr_base_lo, mtr_base_hi; 2126 u32 so_base_lo, so_base_hi; 2127 u32 q_off, mme_id; 2128 u32 mme_qm_err_cfg; 2129 2130 mtr_base_lo = lower_32_bits(CFG_BASE + 2131 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2132 mtr_base_hi = upper_32_bits(CFG_BASE + 2133 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2134 so_base_lo = lower_32_bits(CFG_BASE + 2135 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2136 so_base_hi = upper_32_bits(CFG_BASE + 2137 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2138 2139 q_off = mme_offset + qman_id * 4; 2140 2141 if (qman_id < 4) { 2142 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2143 lower_32_bits(qman_base_addr)); 2144 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2145 upper_32_bits(qman_base_addr)); 2146 2147 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2148 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2149 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2150 2151 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2152 QMAN_CPDMA_SIZE_OFFSET); 2153 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2154 QMAN_CPDMA_SRC_OFFSET); 2155 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2156 QMAN_CPDMA_DST_OFFSET); 2157 } else { 2158 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2159 QMAN_LDMA_SIZE_OFFSET); 2160 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2161 QMAN_LDMA_SRC_OFFSET); 2162 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2163 QMAN_LDMA_DST_OFFSET); 2164 2165 /* Configure RAZWI IRQ */ 2166 mme_id = mme_offset / 2167 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2168 2169 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2170 if (hdev->stop_on_err) { 2171 mme_qm_err_cfg |= 2172 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2173 } 2174 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2175 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2176 lower_32_bits(CFG_BASE + 2177 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 2178 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2179 upper_32_bits(CFG_BASE + 2180 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 2181 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 2182 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 2183 mme_id); 2184 2185 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 2186 QM_ARB_ERR_MSG_EN_MASK); 2187 2188 /* Increase ARB WDT to support streams architecture */ 2189 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, 2190 GAUDI_ARB_WDT_TIMEOUT); 2191 2192 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 2193 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 2194 QMAN_INTERNAL_MAKE_TRUSTED); 2195 } 2196 2197 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 2198 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 2199 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 2200 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 2201} 2202 2203static void gaudi_init_mme_qmans(struct hl_device *hdev) 2204{ 2205 struct gaudi_device *gaudi = hdev->asic_specific; 2206 struct gaudi_internal_qman_info *q; 2207 u64 qman_base_addr; 2208 u32 mme_offset; 2209 int i, internal_q_index; 2210 2211 if (gaudi->hw_cap_initialized & HW_CAP_MME) 2212 return; 2213 2214 /* 2215 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 2216 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 2217 */ 2218 2219 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2220 2221 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 2222 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 2223 q = &gaudi->internal_qmans[internal_q_index]; 2224 qman_base_addr = (u64) q->pq_dma_addr; 2225 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 2226 qman_base_addr); 2227 if (i == 3) 2228 mme_offset = 0; 2229 } 2230 2231 /* Initializing lower CP for MME QMANs */ 2232 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2233 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 2234 gaudi_init_mme_qman(hdev, 0, 4, 0); 2235 2236 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2237 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2238 2239 gaudi->hw_cap_initialized |= HW_CAP_MME; 2240} 2241 2242static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 2243 int qman_id, u64 qman_base_addr) 2244{ 2245 u32 mtr_base_lo, mtr_base_hi; 2246 u32 so_base_lo, so_base_hi; 2247 u32 q_off, tpc_id; 2248 u32 tpc_qm_err_cfg; 2249 2250 mtr_base_lo = lower_32_bits(CFG_BASE + 2251 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2252 mtr_base_hi = upper_32_bits(CFG_BASE + 2253 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2254 so_base_lo = lower_32_bits(CFG_BASE + 2255 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2256 so_base_hi = upper_32_bits(CFG_BASE + 2257 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2258 2259 q_off = tpc_offset + qman_id * 4; 2260 2261 if (qman_id < 4) { 2262 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 2263 lower_32_bits(qman_base_addr)); 2264 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 2265 upper_32_bits(qman_base_addr)); 2266 2267 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 2268 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 2269 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 2270 2271 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2272 QMAN_CPDMA_SIZE_OFFSET); 2273 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2274 QMAN_CPDMA_SRC_OFFSET); 2275 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2276 QMAN_CPDMA_DST_OFFSET); 2277 } else { 2278 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2279 QMAN_LDMA_SIZE_OFFSET); 2280 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2281 QMAN_LDMA_SRC_OFFSET); 2282 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2283 QMAN_LDMA_DST_OFFSET); 2284 2285 /* Configure RAZWI IRQ */ 2286 tpc_id = tpc_offset / 2287 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 2288 2289 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2290 if (hdev->stop_on_err) { 2291 tpc_qm_err_cfg |= 2292 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2293 } 2294 2295 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 2296 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 2297 lower_32_bits(CFG_BASE + 2298 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 2299 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 2300 upper_32_bits(CFG_BASE + 2301 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); 2302 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 2303 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 2304 tpc_id); 2305 2306 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 2307 QM_ARB_ERR_MSG_EN_MASK); 2308 2309 /* Increase ARB WDT to support streams architecture */ 2310 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, 2311 GAUDI_ARB_WDT_TIMEOUT); 2312 2313 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 2314 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 2315 QMAN_INTERNAL_MAKE_TRUSTED); 2316 } 2317 2318 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 2319 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 2320 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 2321 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 2322} 2323 2324static void gaudi_init_tpc_qmans(struct hl_device *hdev) 2325{ 2326 struct gaudi_device *gaudi = hdev->asic_specific; 2327 struct gaudi_internal_qman_info *q; 2328 u64 qman_base_addr; 2329 u32 so_base_hi, tpc_offset = 0; 2330 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 2331 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 2332 int i, tpc_id, internal_q_index; 2333 2334 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 2335 return; 2336 2337 so_base_hi = upper_32_bits(CFG_BASE + 2338 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2339 2340 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 2341 for (i = 0 ; i < QMAN_STREAMS ; i++) { 2342 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 2343 tpc_id * QMAN_STREAMS + i; 2344 q = &gaudi->internal_qmans[internal_q_index]; 2345 qman_base_addr = (u64) q->pq_dma_addr; 2346 gaudi_init_tpc_qman(hdev, tpc_offset, i, 2347 qman_base_addr); 2348 2349 if (i == 3) { 2350 /* Initializing lower CP for TPC QMAN */ 2351 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 2352 2353 /* Enable the QMAN and TPC channel */ 2354 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 2355 QMAN_TPC_ENABLE); 2356 } 2357 } 2358 2359 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 2360 so_base_hi); 2361 2362 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 2363 2364 gaudi->hw_cap_initialized |= 2365 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 2366 } 2367} 2368 2369static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 2370{ 2371 struct gaudi_device *gaudi = hdev->asic_specific; 2372 2373 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 2374 return; 2375 2376 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 2377 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 2378 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 2379} 2380 2381static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 2382{ 2383 struct gaudi_device *gaudi = hdev->asic_specific; 2384 2385 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 2386 return; 2387 2388 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 2389 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 2390 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 2391 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 2392 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 2393} 2394 2395static void gaudi_disable_mme_qmans(struct hl_device *hdev) 2396{ 2397 struct gaudi_device *gaudi = hdev->asic_specific; 2398 2399 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 2400 return; 2401 2402 WREG32(mmMME2_QM_GLBL_CFG0, 0); 2403 WREG32(mmMME0_QM_GLBL_CFG0, 0); 2404} 2405 2406static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 2407{ 2408 struct gaudi_device *gaudi = hdev->asic_specific; 2409 u32 tpc_offset = 0; 2410 int tpc_id; 2411 2412 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 2413 return; 2414 2415 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 2416 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 2417 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 2418 } 2419} 2420 2421static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 2422{ 2423 struct gaudi_device *gaudi = hdev->asic_specific; 2424 2425 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 2426 return; 2427 2428 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 2429 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2430 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2431 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2432} 2433 2434static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 2435{ 2436 struct gaudi_device *gaudi = hdev->asic_specific; 2437 2438 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 2439 return; 2440 2441 /* Stop CPs of HBM DMA QMANs */ 2442 2443 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2444 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2445 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2446 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2447 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2448} 2449 2450static void gaudi_stop_mme_qmans(struct hl_device *hdev) 2451{ 2452 struct gaudi_device *gaudi = hdev->asic_specific; 2453 2454 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 2455 return; 2456 2457 /* Stop CPs of MME QMANs */ 2458 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2459 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2460} 2461 2462static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 2463{ 2464 struct gaudi_device *gaudi = hdev->asic_specific; 2465 2466 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 2467 return; 2468 2469 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2470 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2471 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2472 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2473 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2474 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2475 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2476 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 2477} 2478 2479static void gaudi_pci_dma_stall(struct hl_device *hdev) 2480{ 2481 struct gaudi_device *gaudi = hdev->asic_specific; 2482 2483 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 2484 return; 2485 2486 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 2487 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 2488 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 2489} 2490 2491static void gaudi_hbm_dma_stall(struct hl_device *hdev) 2492{ 2493 struct gaudi_device *gaudi = hdev->asic_specific; 2494 2495 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 2496 return; 2497 2498 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 2499 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 2500 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 2501 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 2502 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 2503} 2504 2505static void gaudi_mme_stall(struct hl_device *hdev) 2506{ 2507 struct gaudi_device *gaudi = hdev->asic_specific; 2508 2509 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 2510 return; 2511 2512 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 2513 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 2514 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 2515 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 2516 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 2517 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 2518 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 2519 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 2520 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 2521 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 2522 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 2523 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 2524 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 2525 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 2526 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 2527 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 2528 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 2529} 2530 2531static void gaudi_tpc_stall(struct hl_device *hdev) 2532{ 2533 struct gaudi_device *gaudi = hdev->asic_specific; 2534 2535 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 2536 return; 2537 2538 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 2539 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 2540 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 2541 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 2542 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 2543 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 2544 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 2545 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 2546} 2547 2548static void gaudi_set_clock_gating(struct hl_device *hdev) 2549{ 2550 struct gaudi_device *gaudi = hdev->asic_specific; 2551 u32 qman_offset; 2552 bool enable; 2553 int i; 2554 2555 /* In case we are during debug session, don't enable the clock gate 2556 * as it may interfere 2557 */ 2558 if (hdev->in_debug) 2559 return; 2560 2561 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { 2562 enable = !!(hdev->clock_gating_mask & 2563 (BIT_ULL(gaudi_dma_assignment[i]))); 2564 2565 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; 2566 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 2567 enable ? QMAN_CGM1_PWR_GATE_EN : 0); 2568 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 2569 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0); 2570 } 2571 2572 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { 2573 enable = !!(hdev->clock_gating_mask & 2574 (BIT_ULL(gaudi_dma_assignment[i]))); 2575 2576 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; 2577 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 2578 enable ? QMAN_CGM1_PWR_GATE_EN : 0); 2579 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 2580 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); 2581 } 2582 2583 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))); 2584 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0); 2585 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); 2586 2587 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))); 2588 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0); 2589 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); 2590 2591 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 2592 enable = !!(hdev->clock_gating_mask & 2593 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i))); 2594 2595 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 2596 enable ? QMAN_CGM1_PWR_GATE_EN : 0); 2597 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 2598 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0); 2599 2600 qman_offset += TPC_QMAN_OFFSET; 2601 } 2602 2603 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE; 2604} 2605 2606static void gaudi_disable_clock_gating(struct hl_device *hdev) 2607{ 2608 struct gaudi_device *gaudi = hdev->asic_specific; 2609 u32 qman_offset; 2610 int i; 2611 2612 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)) 2613 return; 2614 2615 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 2616 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 2617 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 2618 2619 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 2620 } 2621 2622 WREG32(mmMME0_QM_CGM_CFG, 0); 2623 WREG32(mmMME0_QM_CGM_CFG1, 0); 2624 WREG32(mmMME2_QM_CGM_CFG, 0); 2625 WREG32(mmMME2_QM_CGM_CFG1, 0); 2626 2627 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 2628 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 2629 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 2630 2631 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 2632 } 2633 2634 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE); 2635} 2636 2637static void gaudi_enable_timestamp(struct hl_device *hdev) 2638{ 2639 /* Disable the timestamp counter */ 2640 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 2641 2642 /* Zero the lower/upper parts of the 64-bit counter */ 2643 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 2644 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 2645 2646 /* Enable the counter */ 2647 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 2648} 2649 2650static void gaudi_disable_timestamp(struct hl_device *hdev) 2651{ 2652 /* Disable the timestamp counter */ 2653 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 2654} 2655 2656static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset) 2657{ 2658 u32 wait_timeout_ms; 2659 2660 dev_info(hdev->dev, 2661 "Halting compute engines and disabling interrupts\n"); 2662 2663 if (hdev->pldm) 2664 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 2665 else 2666 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 2667 2668 2669 gaudi_stop_mme_qmans(hdev); 2670 gaudi_stop_tpc_qmans(hdev); 2671 gaudi_stop_hbm_dma_qmans(hdev); 2672 gaudi_stop_pci_dma_qmans(hdev); 2673 2674 hdev->asic_funcs->disable_clock_gating(hdev); 2675 2676 msleep(wait_timeout_ms); 2677 2678 gaudi_pci_dma_stall(hdev); 2679 gaudi_hbm_dma_stall(hdev); 2680 gaudi_tpc_stall(hdev); 2681 gaudi_mme_stall(hdev); 2682 2683 msleep(wait_timeout_ms); 2684 2685 gaudi_disable_mme_qmans(hdev); 2686 gaudi_disable_tpc_qmans(hdev); 2687 gaudi_disable_hbm_dma_qmans(hdev); 2688 gaudi_disable_pci_dma_qmans(hdev); 2689 2690 gaudi_disable_timestamp(hdev); 2691 2692 gaudi_disable_msi(hdev); 2693} 2694 2695static int gaudi_mmu_init(struct hl_device *hdev) 2696{ 2697 struct asic_fixed_properties *prop = &hdev->asic_prop; 2698 struct gaudi_device *gaudi = hdev->asic_specific; 2699 u64 hop0_addr; 2700 int rc, i; 2701 2702 if (!hdev->mmu_enable) 2703 return 0; 2704 2705 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 2706 return 0; 2707 2708 hdev->dram_supports_virtual_memory = false; 2709 2710 for (i = 0 ; i < prop->max_asid ; i++) { 2711 hop0_addr = prop->mmu_pgt_addr + 2712 (i * prop->mmu_hop_table_size); 2713 2714 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 2715 if (rc) { 2716 dev_err(hdev->dev, 2717 "failed to set hop0 addr for asid %d\n", i); 2718 goto err; 2719 } 2720 } 2721 2722 /* init MMU cache manage page */ 2723 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8); 2724 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40); 2725 2726 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0); 2727 2728 WREG32(mmMMU_UP_MMU_ENABLE, 1); 2729 WREG32(mmMMU_UP_SPI_MASK, 0xF); 2730 2731 WREG32(mmSTLB_HOP_CONFIGURATION, 2732 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440); 2733 2734 /* 2735 * The H/W expects the first PI after init to be 1. After wraparound 2736 * we'll write 0. 2737 */ 2738 gaudi->mmu_cache_inv_pi = 1; 2739 2740 gaudi->hw_cap_initialized |= HW_CAP_MMU; 2741 2742 return 0; 2743 2744err: 2745 return rc; 2746} 2747 2748static int gaudi_load_firmware_to_device(struct hl_device *hdev) 2749{ 2750 void __iomem *dst; 2751 2752 /* HBM scrambler must be initialized before pushing F/W to HBM */ 2753 gaudi_init_scrambler_hbm(hdev); 2754 2755 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 2756 2757 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst); 2758} 2759 2760static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 2761{ 2762 void __iomem *dst; 2763 2764 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 2765 2766 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst); 2767} 2768 2769static void gaudi_read_device_fw_version(struct hl_device *hdev, 2770 enum hl_fw_component fwc) 2771{ 2772 const char *name; 2773 u32 ver_off; 2774 char *dest; 2775 2776 switch (fwc) { 2777 case FW_COMP_UBOOT: 2778 ver_off = RREG32(mmUBOOT_VER_OFFSET); 2779 dest = hdev->asic_prop.uboot_ver; 2780 name = "U-Boot"; 2781 break; 2782 case FW_COMP_PREBOOT: 2783 ver_off = RREG32(mmPREBOOT_VER_OFFSET); 2784 dest = hdev->asic_prop.preboot_ver; 2785 name = "Preboot"; 2786 break; 2787 default: 2788 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc); 2789 return; 2790 } 2791 2792 ver_off &= ~((u32)SRAM_BASE_ADDR); 2793 2794 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) { 2795 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off, 2796 VERSION_MAX_LEN); 2797 } else { 2798 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n", 2799 name, ver_off); 2800 strcpy(dest, "unavailable"); 2801 } 2802} 2803 2804static int gaudi_init_cpu(struct hl_device *hdev) 2805{ 2806 struct gaudi_device *gaudi = hdev->asic_specific; 2807 int rc; 2808 2809 if (!hdev->cpu_enable) 2810 return 0; 2811 2812 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 2813 return 0; 2814 2815 /* 2816 * The device CPU works with 40 bits addresses. 2817 * This register sets the extension to 50 bits. 2818 */ 2819 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 2820 2821 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, 2822 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, 2823 mmCPU_CMD_STATUS_TO_HOST, 2824 mmCPU_BOOT_ERR0, 2825 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC, 2826 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC); 2827 2828 if (rc) 2829 return rc; 2830 2831 gaudi->hw_cap_initialized |= HW_CAP_CPU; 2832 2833 return 0; 2834} 2835 2836static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 2837{ 2838 struct gaudi_device *gaudi = hdev->asic_specific; 2839 struct hl_eq *eq; 2840 u32 status; 2841 struct hl_hw_queue *cpu_pq = 2842 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 2843 int err; 2844 2845 if (!hdev->cpu_queues_enable) 2846 return 0; 2847 2848 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 2849 return 0; 2850 2851 eq = &hdev->event_queue; 2852 2853 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 2854 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 2855 2856 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 2857 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 2858 2859 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 2860 lower_32_bits(hdev->cpu_accessible_dma_address)); 2861 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 2862 upper_32_bits(hdev->cpu_accessible_dma_address)); 2863 2864 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 2865 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 2866 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 2867 2868 /* Used for EQ CI */ 2869 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 2870 2871 WREG32(mmCPU_IF_PF_PQ_PI, 0); 2872 2873 if (gaudi->multi_msi_mode) 2874 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 2875 else 2876 WREG32(mmCPU_IF_QUEUE_INIT, 2877 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 2878 2879 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE); 2880 2881 err = hl_poll_timeout( 2882 hdev, 2883 mmCPU_IF_QUEUE_INIT, 2884 status, 2885 (status == PQ_INIT_STATUS_READY_FOR_HOST), 2886 1000, 2887 cpu_timeout); 2888 2889 if (err) { 2890 dev_err(hdev->dev, 2891 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 2892 return -EIO; 2893 } 2894 2895 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 2896 return 0; 2897} 2898 2899static void gaudi_pre_hw_init(struct hl_device *hdev) 2900{ 2901 /* Perform read from the device to make sure device is up */ 2902 RREG32(mmHW_STATE); 2903 2904 /* Set the access through PCI bars (Linux driver only) as 2905 * secured 2906 */ 2907 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 2908 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 2909 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 2910 2911 /* Perform read to flush the waiting writes to ensure 2912 * configuration was set in the device 2913 */ 2914 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 2915 2916 /* 2917 * Let's mark in the H/W that we have reached this point. We check 2918 * this value in the reset_before_init function to understand whether 2919 * we need to reset the chip before doing H/W init. This register is 2920 * cleared by the H/W upon H/W reset 2921 */ 2922 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 2923 2924 /* Configure the reset registers. Must be done as early as possible 2925 * in case we fail during H/W initialization 2926 */ 2927 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 2928 (CFG_RST_H_DMA_MASK | 2929 CFG_RST_H_MME_MASK | 2930 CFG_RST_H_SM_MASK | 2931 CFG_RST_H_TPC_7_MASK)); 2932 2933 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 2934 2935 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 2936 (CFG_RST_H_HBM_MASK | 2937 CFG_RST_H_TPC_7_MASK | 2938 CFG_RST_H_NIC_MASK | 2939 CFG_RST_H_SM_MASK | 2940 CFG_RST_H_DMA_MASK | 2941 CFG_RST_H_MME_MASK | 2942 CFG_RST_H_CPU_MASK | 2943 CFG_RST_H_MMU_MASK)); 2944 2945 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 2946 (CFG_RST_L_IF_MASK | 2947 CFG_RST_L_PSOC_MASK | 2948 CFG_RST_L_TPC_MASK)); 2949} 2950 2951static int gaudi_hw_init(struct hl_device *hdev) 2952{ 2953 int rc; 2954 2955 dev_info(hdev->dev, "Starting initialization of H/W\n"); 2956 2957 gaudi_pre_hw_init(hdev); 2958 2959 gaudi_init_pci_dma_qmans(hdev); 2960 2961 gaudi_init_hbm_dma_qmans(hdev); 2962 2963 rc = gaudi_init_cpu(hdev); 2964 if (rc) { 2965 dev_err(hdev->dev, "failed to initialize CPU\n"); 2966 return rc; 2967 } 2968 2969 /* SRAM scrambler must be initialized after CPU is running from HBM */ 2970 gaudi_init_scrambler_sram(hdev); 2971 2972 /* This is here just in case we are working without CPU */ 2973 gaudi_init_scrambler_hbm(hdev); 2974 2975 gaudi_init_golden_registers(hdev); 2976 2977 rc = gaudi_mmu_init(hdev); 2978 if (rc) 2979 return rc; 2980 2981 gaudi_init_security(hdev); 2982 2983 gaudi_init_mme_qmans(hdev); 2984 2985 gaudi_init_tpc_qmans(hdev); 2986 2987 hdev->asic_funcs->set_clock_gating(hdev); 2988 2989 gaudi_enable_timestamp(hdev); 2990 2991 /* MSI must be enabled before CPU queues are initialized */ 2992 rc = gaudi_enable_msi(hdev); 2993 if (rc) 2994 goto disable_queues; 2995 2996 /* must be called after MSI was enabled */ 2997 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 2998 if (rc) { 2999 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 3000 rc); 3001 goto disable_msi; 3002 } 3003 3004 /* Perform read from the device to flush all configuration */ 3005 RREG32(mmHW_STATE); 3006 3007 return 0; 3008 3009disable_msi: 3010 gaudi_disable_msi(hdev); 3011disable_queues: 3012 gaudi_disable_mme_qmans(hdev); 3013 gaudi_disable_pci_dma_qmans(hdev); 3014 3015 return rc; 3016} 3017 3018static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) 3019{ 3020 struct gaudi_device *gaudi = hdev->asic_specific; 3021 u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0; 3022 3023 if (!hard_reset) { 3024 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 3025 return; 3026 } 3027 3028 if (hdev->pldm) { 3029 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 3030 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3031 } else { 3032 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 3033 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 3034 } 3035 3036 /* Set device to handle FLR by H/W as we will put the device CPU to 3037 * halt mode 3038 */ 3039 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 3040 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 3041 3042 /* I don't know what is the state of the CPU so make sure it is 3043 * stopped in any means necessary 3044 */ 3045 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); 3046 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE); 3047 3048 msleep(cpu_timeout_ms); 3049 3050 /* Tell ASIC not to re-initialize PCIe */ 3051 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 3052 3053 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); 3054 3055 /* H/W bug WA: 3056 * rdata[31:0] = strap_read_val; 3057 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0] 3058 */ 3059 boot_strap = (((boot_strap & 0x7FE00000) << 1) | 3060 (boot_strap & 0x001FFFFF)); 3061 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2); 3062 3063 /* Restart BTL/BLR upon hard-reset */ 3064 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 3065 3066 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 3067 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 3068 dev_info(hdev->dev, 3069 "Issued HARD reset command, going to wait %dms\n", 3070 reset_timeout_ms); 3071 3072 /* 3073 * After hard reset, we can't poll the BTM_FSM register because the PSOC 3074 * itself is in reset. Need to wait until the reset is deasserted 3075 */ 3076 msleep(reset_timeout_ms); 3077 3078 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 3079 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) 3080 dev_err(hdev->dev, 3081 "Timeout while waiting for device to reset 0x%x\n", 3082 status); 3083 3084 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap); 3085 3086 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | 3087 HW_CAP_HBM | HW_CAP_PCI_DMA | 3088 HW_CAP_MME | HW_CAP_TPC_MASK | 3089 HW_CAP_HBM_DMA | HW_CAP_PLL | 3090 HW_CAP_MMU | 3091 HW_CAP_SRAM_SCRAMBLER | 3092 HW_CAP_HBM_SCRAMBLER | 3093 HW_CAP_CLK_GATE); 3094 3095 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 3096} 3097 3098static int gaudi_suspend(struct hl_device *hdev) 3099{ 3100 int rc; 3101 3102 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); 3103 if (rc) 3104 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 3105 3106 return rc; 3107} 3108 3109static int gaudi_resume(struct hl_device *hdev) 3110{ 3111 return gaudi_init_iatu(hdev); 3112} 3113 3114static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 3115 void *cpu_addr, dma_addr_t dma_addr, size_t size) 3116{ 3117 int rc; 3118 3119 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 3120 VM_DONTCOPY | VM_NORESERVE; 3121 3122 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 3123 (dma_addr - HOST_PHYS_BASE), size); 3124 if (rc) 3125 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 3126 3127 return rc; 3128} 3129 3130static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 3131{ 3132 struct gaudi_device *gaudi = hdev->asic_specific; 3133 u32 db_reg_offset, db_value, dma_qm_offset, q_off; 3134 int dma_id; 3135 bool invalid_queue = false; 3136 3137 switch (hw_queue_id) { 3138 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 3139 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 3140 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 3141 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 3142 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 3143 break; 3144 3145 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 3146 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 3147 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 3148 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 3149 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 3150 break; 3151 3152 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 3153 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 3154 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 3155 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 3156 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 3157 break; 3158 3159 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 3160 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 3161 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 3162 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 3163 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 3164 break; 3165 3166 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 3167 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 3168 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 3169 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 3170 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 3171 break; 3172 3173 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 3174 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3]; 3175 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 3176 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 3177 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 3178 break; 3179 3180 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 3181 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 3182 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 3183 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 3184 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 3185 break; 3186 3187 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 3188 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 3189 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 3190 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 3191 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 3192 break; 3193 3194 case GAUDI_QUEUE_ID_CPU_PQ: 3195 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3196 db_reg_offset = mmCPU_IF_PF_PQ_PI; 3197 else 3198 invalid_queue = true; 3199 break; 3200 3201 case GAUDI_QUEUE_ID_MME_0_0: 3202 db_reg_offset = mmMME2_QM_PQ_PI_0; 3203 break; 3204 3205 case GAUDI_QUEUE_ID_MME_0_1: 3206 db_reg_offset = mmMME2_QM_PQ_PI_1; 3207 break; 3208 3209 case GAUDI_QUEUE_ID_MME_0_2: 3210 db_reg_offset = mmMME2_QM_PQ_PI_2; 3211 break; 3212 3213 case GAUDI_QUEUE_ID_MME_0_3: 3214 db_reg_offset = mmMME2_QM_PQ_PI_3; 3215 break; 3216 3217 case GAUDI_QUEUE_ID_MME_1_0: 3218 db_reg_offset = mmMME0_QM_PQ_PI_0; 3219 break; 3220 3221 case GAUDI_QUEUE_ID_MME_1_1: 3222 db_reg_offset = mmMME0_QM_PQ_PI_1; 3223 break; 3224 3225 case GAUDI_QUEUE_ID_MME_1_2: 3226 db_reg_offset = mmMME0_QM_PQ_PI_2; 3227 break; 3228 3229 case GAUDI_QUEUE_ID_MME_1_3: 3230 db_reg_offset = mmMME0_QM_PQ_PI_3; 3231 break; 3232 3233 case GAUDI_QUEUE_ID_TPC_0_0: 3234 db_reg_offset = mmTPC0_QM_PQ_PI_0; 3235 break; 3236 3237 case GAUDI_QUEUE_ID_TPC_0_1: 3238 db_reg_offset = mmTPC0_QM_PQ_PI_1; 3239 break; 3240 3241 case GAUDI_QUEUE_ID_TPC_0_2: 3242 db_reg_offset = mmTPC0_QM_PQ_PI_2; 3243 break; 3244 3245 case GAUDI_QUEUE_ID_TPC_0_3: 3246 db_reg_offset = mmTPC0_QM_PQ_PI_3; 3247 break; 3248 3249 case GAUDI_QUEUE_ID_TPC_1_0: 3250 db_reg_offset = mmTPC1_QM_PQ_PI_0; 3251 break; 3252 3253 case GAUDI_QUEUE_ID_TPC_1_1: 3254 db_reg_offset = mmTPC1_QM_PQ_PI_1; 3255 break; 3256 3257 case GAUDI_QUEUE_ID_TPC_1_2: 3258 db_reg_offset = mmTPC1_QM_PQ_PI_2; 3259 break; 3260 3261 case GAUDI_QUEUE_ID_TPC_1_3: 3262 db_reg_offset = mmTPC1_QM_PQ_PI_3; 3263 break; 3264 3265 case GAUDI_QUEUE_ID_TPC_2_0: 3266 db_reg_offset = mmTPC2_QM_PQ_PI_0; 3267 break; 3268 3269 case GAUDI_QUEUE_ID_TPC_2_1: 3270 db_reg_offset = mmTPC2_QM_PQ_PI_1; 3271 break; 3272 3273 case GAUDI_QUEUE_ID_TPC_2_2: 3274 db_reg_offset = mmTPC2_QM_PQ_PI_2; 3275 break; 3276 3277 case GAUDI_QUEUE_ID_TPC_2_3: 3278 db_reg_offset = mmTPC2_QM_PQ_PI_3; 3279 break; 3280 3281 case GAUDI_QUEUE_ID_TPC_3_0: 3282 db_reg_offset = mmTPC3_QM_PQ_PI_0; 3283 break; 3284 3285 case GAUDI_QUEUE_ID_TPC_3_1: 3286 db_reg_offset = mmTPC3_QM_PQ_PI_1; 3287 break; 3288 3289 case GAUDI_QUEUE_ID_TPC_3_2: 3290 db_reg_offset = mmTPC3_QM_PQ_PI_2; 3291 break; 3292 3293 case GAUDI_QUEUE_ID_TPC_3_3: 3294 db_reg_offset = mmTPC3_QM_PQ_PI_3; 3295 break; 3296 3297 case GAUDI_QUEUE_ID_TPC_4_0: 3298 db_reg_offset = mmTPC4_QM_PQ_PI_0; 3299 break; 3300 3301 case GAUDI_QUEUE_ID_TPC_4_1: 3302 db_reg_offset = mmTPC4_QM_PQ_PI_1; 3303 break; 3304 3305 case GAUDI_QUEUE_ID_TPC_4_2: 3306 db_reg_offset = mmTPC4_QM_PQ_PI_2; 3307 break; 3308 3309 case GAUDI_QUEUE_ID_TPC_4_3: 3310 db_reg_offset = mmTPC4_QM_PQ_PI_3; 3311 break; 3312 3313 case GAUDI_QUEUE_ID_TPC_5_0: 3314 db_reg_offset = mmTPC5_QM_PQ_PI_0; 3315 break; 3316 3317 case GAUDI_QUEUE_ID_TPC_5_1: 3318 db_reg_offset = mmTPC5_QM_PQ_PI_1; 3319 break; 3320 3321 case GAUDI_QUEUE_ID_TPC_5_2: 3322 db_reg_offset = mmTPC5_QM_PQ_PI_2; 3323 break; 3324 3325 case GAUDI_QUEUE_ID_TPC_5_3: 3326 db_reg_offset = mmTPC5_QM_PQ_PI_3; 3327 break; 3328 3329 case GAUDI_QUEUE_ID_TPC_6_0: 3330 db_reg_offset = mmTPC6_QM_PQ_PI_0; 3331 break; 3332 3333 case GAUDI_QUEUE_ID_TPC_6_1: 3334 db_reg_offset = mmTPC6_QM_PQ_PI_1; 3335 break; 3336 3337 case GAUDI_QUEUE_ID_TPC_6_2: 3338 db_reg_offset = mmTPC6_QM_PQ_PI_2; 3339 break; 3340 3341 case GAUDI_QUEUE_ID_TPC_6_3: 3342 db_reg_offset = mmTPC6_QM_PQ_PI_3; 3343 break; 3344 3345 case GAUDI_QUEUE_ID_TPC_7_0: 3346 db_reg_offset = mmTPC7_QM_PQ_PI_0; 3347 break; 3348 3349 case GAUDI_QUEUE_ID_TPC_7_1: 3350 db_reg_offset = mmTPC7_QM_PQ_PI_1; 3351 break; 3352 3353 case GAUDI_QUEUE_ID_TPC_7_2: 3354 db_reg_offset = mmTPC7_QM_PQ_PI_2; 3355 break; 3356 3357 case GAUDI_QUEUE_ID_TPC_7_3: 3358 db_reg_offset = mmTPC7_QM_PQ_PI_3; 3359 break; 3360 3361 default: 3362 invalid_queue = true; 3363 } 3364 3365 if (invalid_queue) { 3366 /* Should never get here */ 3367 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 3368 hw_queue_id); 3369 return; 3370 } 3371 3372 db_value = pi; 3373 3374 /* ring the doorbell */ 3375 WREG32(db_reg_offset, db_value); 3376 3377 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) 3378 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, 3379 GAUDI_EVENT_PI_UPDATE); 3380} 3381 3382static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 3383 struct hl_bd *bd) 3384{ 3385 __le64 *pbd = (__le64 *) bd; 3386 3387 /* The QMANs are on the host memory so a simple copy suffice */ 3388 pqe[0] = pbd[0]; 3389 pqe[1] = pbd[1]; 3390} 3391 3392static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 3393 dma_addr_t *dma_handle, gfp_t flags) 3394{ 3395 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 3396 dma_handle, flags); 3397 3398 /* Shift to the device's base physical address of host memory */ 3399 if (kernel_addr) 3400 *dma_handle += HOST_PHYS_BASE; 3401 3402 return kernel_addr; 3403} 3404 3405static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 3406 void *cpu_addr, dma_addr_t dma_handle) 3407{ 3408 /* Cancel the device's base physical address of host memory */ 3409 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 3410 3411 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 3412} 3413 3414static void *gaudi_get_int_queue_base(struct hl_device *hdev, 3415 u32 queue_id, dma_addr_t *dma_handle, 3416 u16 *queue_len) 3417{ 3418 struct gaudi_device *gaudi = hdev->asic_specific; 3419 struct gaudi_internal_qman_info *q; 3420 3421 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 3422 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 3423 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 3424 return NULL; 3425 } 3426 3427 q = &gaudi->internal_qmans[queue_id]; 3428 *dma_handle = q->pq_dma_addr; 3429 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 3430 3431 return q->pq_kernel_addr; 3432} 3433 3434static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 3435 u16 len, u32 timeout, long *result) 3436{ 3437 struct gaudi_device *gaudi = hdev->asic_specific; 3438 3439 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 3440 if (result) 3441 *result = 0; 3442 return 0; 3443 } 3444 3445 if (!timeout) 3446 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 3447 3448 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 3449 timeout, result); 3450} 3451 3452static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 3453{ 3454 struct packet_msg_prot *fence_pkt; 3455 dma_addr_t pkt_dma_addr; 3456 u32 fence_val, tmp, timeout_usec; 3457 dma_addr_t fence_dma_addr; 3458 u32 *fence_ptr; 3459 int rc; 3460 3461 if (hdev->pldm) 3462 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 3463 else 3464 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 3465 3466 fence_val = GAUDI_QMAN0_FENCE_VAL; 3467 3468 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, 3469 &fence_dma_addr); 3470 if (!fence_ptr) { 3471 dev_err(hdev->dev, 3472 "Failed to allocate memory for H/W queue %d testing\n", 3473 hw_queue_id); 3474 return -ENOMEM; 3475 } 3476 3477 *fence_ptr = 0; 3478 3479 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 3480 sizeof(struct packet_msg_prot), 3481 GFP_KERNEL, &pkt_dma_addr); 3482 if (!fence_pkt) { 3483 dev_err(hdev->dev, 3484 "Failed to allocate packet for H/W queue %d testing\n", 3485 hw_queue_id); 3486 rc = -ENOMEM; 3487 goto free_fence_ptr; 3488 } 3489 3490 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 3491 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 3492 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 3493 3494 fence_pkt->ctl = cpu_to_le32(tmp); 3495 fence_pkt->value = cpu_to_le32(fence_val); 3496 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 3497 3498 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 3499 sizeof(struct packet_msg_prot), 3500 pkt_dma_addr); 3501 if (rc) { 3502 dev_err(hdev->dev, 3503 "Failed to send fence packet to H/W queue %d\n", 3504 hw_queue_id); 3505 goto free_pkt; 3506 } 3507 3508 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 3509 1000, timeout_usec, true); 3510 3511 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 3512 3513 if (rc == -ETIMEDOUT) { 3514 dev_err(hdev->dev, 3515 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 3516 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 3517 rc = -EIO; 3518 } 3519 3520free_pkt: 3521 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt, 3522 pkt_dma_addr); 3523free_fence_ptr: 3524 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, 3525 fence_dma_addr); 3526 return rc; 3527} 3528 3529static int gaudi_test_cpu_queue(struct hl_device *hdev) 3530{ 3531 struct gaudi_device *gaudi = hdev->asic_specific; 3532 3533 /* 3534 * check capability here as send_cpu_message() won't update the result 3535 * value if no capability 3536 */ 3537 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 3538 return 0; 3539 3540 return hl_fw_test_cpu_queue(hdev); 3541} 3542 3543static int gaudi_test_queues(struct hl_device *hdev) 3544{ 3545 int i, rc, ret_val = 0; 3546 3547 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 3548 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 3549 rc = gaudi_test_queue(hdev, i); 3550 if (rc) 3551 ret_val = -EINVAL; 3552 } 3553 } 3554 3555 rc = gaudi_test_cpu_queue(hdev); 3556 if (rc) 3557 ret_val = -EINVAL; 3558 3559 return ret_val; 3560} 3561 3562static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 3563 gfp_t mem_flags, dma_addr_t *dma_handle) 3564{ 3565 void *kernel_addr; 3566 3567 if (size > GAUDI_DMA_POOL_BLK_SIZE) 3568 return NULL; 3569 3570 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 3571 3572 /* Shift to the device's base physical address of host memory */ 3573 if (kernel_addr) 3574 *dma_handle += HOST_PHYS_BASE; 3575 3576 return kernel_addr; 3577} 3578 3579static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 3580 dma_addr_t dma_addr) 3581{ 3582 /* Cancel the device's base physical address of host memory */ 3583 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 3584 3585 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 3586} 3587 3588static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 3589 size_t size, dma_addr_t *dma_handle) 3590{ 3591 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 3592} 3593 3594static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 3595 size_t size, void *vaddr) 3596{ 3597 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 3598} 3599 3600static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, 3601 int nents, enum dma_data_direction dir) 3602{ 3603 struct scatterlist *sg; 3604 int i; 3605 3606 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir)) 3607 return -ENOMEM; 3608 3609 /* Shift to the device's base physical address of host memory */ 3610 for_each_sg(sgl, sg, nents, i) 3611 sg->dma_address += HOST_PHYS_BASE; 3612 3613 return 0; 3614} 3615 3616static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl, 3617 int nents, enum dma_data_direction dir) 3618{ 3619 struct scatterlist *sg; 3620 int i; 3621 3622 /* Cancel the device's base physical address of host memory */ 3623 for_each_sg(sgl, sg, nents, i) 3624 sg->dma_address -= HOST_PHYS_BASE; 3625 3626 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir); 3627} 3628 3629static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, 3630 struct sg_table *sgt) 3631{ 3632 struct scatterlist *sg, *sg_next_iter; 3633 u32 count, dma_desc_cnt; 3634 u64 len, len_next; 3635 dma_addr_t addr, addr_next; 3636 3637 dma_desc_cnt = 0; 3638 3639 for_each_sg(sgt->sgl, sg, sgt->nents, count) { 3640 3641 len = sg_dma_len(sg); 3642 addr = sg_dma_address(sg); 3643 3644 if (len == 0) 3645 break; 3646 3647 while ((count + 1) < sgt->nents) { 3648 sg_next_iter = sg_next(sg); 3649 len_next = sg_dma_len(sg_next_iter); 3650 addr_next = sg_dma_address(sg_next_iter); 3651 3652 if (len_next == 0) 3653 break; 3654 3655 if ((addr + len == addr_next) && 3656 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 3657 len += len_next; 3658 count++; 3659 sg = sg_next_iter; 3660 } else { 3661 break; 3662 } 3663 } 3664 3665 dma_desc_cnt++; 3666 } 3667 3668 return dma_desc_cnt * sizeof(struct packet_lin_dma); 3669} 3670 3671static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 3672 struct hl_cs_parser *parser, 3673 struct packet_lin_dma *user_dma_pkt, 3674 u64 addr, enum dma_data_direction dir) 3675{ 3676 struct hl_userptr *userptr; 3677 int rc; 3678 3679 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 3680 parser->job_userptr_list, &userptr)) 3681 goto already_pinned; 3682 3683 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC); 3684 if (!userptr) 3685 return -ENOMEM; 3686 3687 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 3688 userptr); 3689 if (rc) 3690 goto free_userptr; 3691 3692 list_add_tail(&userptr->job_node, parser->job_userptr_list); 3693 3694 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, 3695 userptr->sgt->nents, dir); 3696 if (rc) { 3697 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 3698 goto unpin_memory; 3699 } 3700 3701 userptr->dma_mapped = true; 3702 userptr->dir = dir; 3703 3704already_pinned: 3705 parser->patched_cb_size += 3706 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 3707 3708 return 0; 3709 3710unpin_memory: 3711 list_del(&userptr->job_node); 3712 hl_unpin_host_memory(hdev, userptr); 3713free_userptr: 3714 kfree(userptr); 3715 return rc; 3716} 3717 3718static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 3719 struct hl_cs_parser *parser, 3720 struct packet_lin_dma *user_dma_pkt, 3721 bool src_in_host) 3722{ 3723 enum dma_data_direction dir; 3724 bool skip_host_mem_pin = false, user_memset; 3725 u64 addr; 3726 int rc = 0; 3727 3728 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 3729 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 3730 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 3731 3732 if (src_in_host) { 3733 if (user_memset) 3734 skip_host_mem_pin = true; 3735 3736 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 3737 dir = DMA_TO_DEVICE; 3738 addr = le64_to_cpu(user_dma_pkt->src_addr); 3739 } else { 3740 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 3741 dir = DMA_FROM_DEVICE; 3742 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 3743 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 3744 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 3745 } 3746 3747 if (skip_host_mem_pin) 3748 parser->patched_cb_size += sizeof(*user_dma_pkt); 3749 else 3750 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 3751 addr, dir); 3752 3753 return rc; 3754} 3755 3756static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 3757 struct hl_cs_parser *parser, 3758 struct packet_lin_dma *user_dma_pkt) 3759{ 3760 bool src_in_host = false; 3761 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 3762 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 3763 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 3764 3765 dev_dbg(hdev->dev, "DMA packet details:\n"); 3766 dev_dbg(hdev->dev, "source == 0x%llx\n", 3767 le64_to_cpu(user_dma_pkt->src_addr)); 3768 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 3769 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 3770 3771 /* 3772 * Special handling for DMA with size 0. Bypass all validations 3773 * because no transactions will be done except for WR_COMP, which 3774 * is not a security issue 3775 */ 3776 if (!le32_to_cpu(user_dma_pkt->tsize)) { 3777 parser->patched_cb_size += sizeof(*user_dma_pkt); 3778 return 0; 3779 } 3780 3781 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 3782 src_in_host = true; 3783 3784 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 3785 src_in_host); 3786} 3787 3788static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 3789 struct hl_cs_parser *parser, 3790 struct packet_load_and_exe *user_pkt) 3791{ 3792 u32 cfg; 3793 3794 cfg = le32_to_cpu(user_pkt->cfg); 3795 3796 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 3797 dev_err(hdev->dev, 3798 "User not allowed to use Load and Execute\n"); 3799 return -EPERM; 3800 } 3801 3802 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 3803 3804 return 0; 3805} 3806 3807static int gaudi_validate_cb(struct hl_device *hdev, 3808 struct hl_cs_parser *parser, bool is_mmu) 3809{ 3810 u32 cb_parsed_length = 0; 3811 int rc = 0; 3812 3813 parser->patched_cb_size = 0; 3814 3815 /* cb_user_size is more than 0 so loop will always be executed */ 3816 while (cb_parsed_length < parser->user_cb_size) { 3817 enum packet_id pkt_id; 3818 u16 pkt_size; 3819 struct gaudi_packet *user_pkt; 3820 3821 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 3822 3823 pkt_id = (enum packet_id) ( 3824 (le64_to_cpu(user_pkt->header) & 3825 PACKET_HEADER_PACKET_ID_MASK) >> 3826 PACKET_HEADER_PACKET_ID_SHIFT); 3827 3828 if (!validate_packet_id(pkt_id)) { 3829 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 3830 rc = -EINVAL; 3831 break; 3832 } 3833 3834 pkt_size = gaudi_packet_sizes[pkt_id]; 3835 cb_parsed_length += pkt_size; 3836 if (cb_parsed_length > parser->user_cb_size) { 3837 dev_err(hdev->dev, 3838 "packet 0x%x is out of CB boundary\n", pkt_id); 3839 rc = -EINVAL; 3840 break; 3841 } 3842 3843 switch (pkt_id) { 3844 case PACKET_MSG_PROT: 3845 dev_err(hdev->dev, 3846 "User not allowed to use MSG_PROT\n"); 3847 rc = -EPERM; 3848 break; 3849 3850 case PACKET_CP_DMA: 3851 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 3852 rc = -EPERM; 3853 break; 3854 3855 case PACKET_STOP: 3856 dev_err(hdev->dev, "User not allowed to use STOP\n"); 3857 rc = -EPERM; 3858 break; 3859 3860 case PACKET_WREG_BULK: 3861 dev_err(hdev->dev, 3862 "User not allowed to use WREG_BULK\n"); 3863 rc = -EPERM; 3864 break; 3865 3866 case PACKET_LOAD_AND_EXE: 3867 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 3868 (struct packet_load_and_exe *) user_pkt); 3869 break; 3870 3871 case PACKET_LIN_DMA: 3872 parser->contains_dma_pkt = true; 3873 if (is_mmu) 3874 parser->patched_cb_size += pkt_size; 3875 else 3876 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 3877 (struct packet_lin_dma *) user_pkt); 3878 break; 3879 3880 case PACKET_WREG_32: 3881 case PACKET_MSG_LONG: 3882 case PACKET_MSG_SHORT: 3883 case PACKET_REPEAT: 3884 case PACKET_FENCE: 3885 case PACKET_NOP: 3886 case PACKET_ARB_POINT: 3887 parser->patched_cb_size += pkt_size; 3888 break; 3889 3890 default: 3891 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 3892 pkt_id); 3893 rc = -EINVAL; 3894 break; 3895 } 3896 3897 if (rc) 3898 break; 3899 } 3900 3901 /* 3902 * The new CB should have space at the end for two MSG_PROT packets: 3903 * 1. A packet that will act as a completion packet 3904 * 2. A packet that will generate MSI-X interrupt 3905 */ 3906 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2; 3907 3908 return rc; 3909} 3910 3911static int gaudi_patch_dma_packet(struct hl_device *hdev, 3912 struct hl_cs_parser *parser, 3913 struct packet_lin_dma *user_dma_pkt, 3914 struct packet_lin_dma *new_dma_pkt, 3915 u32 *new_dma_pkt_size) 3916{ 3917 struct hl_userptr *userptr; 3918 struct scatterlist *sg, *sg_next_iter; 3919 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 3920 u64 len, len_next; 3921 dma_addr_t dma_addr, dma_addr_next; 3922 u64 device_memory_addr, addr; 3923 enum dma_data_direction dir; 3924 struct sg_table *sgt; 3925 bool src_in_host = false; 3926 bool skip_host_mem_pin = false; 3927 bool user_memset; 3928 3929 ctl = le32_to_cpu(user_dma_pkt->ctl); 3930 3931 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 3932 src_in_host = true; 3933 3934 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 3935 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 3936 3937 if (src_in_host) { 3938 addr = le64_to_cpu(user_dma_pkt->src_addr); 3939 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 3940 dir = DMA_TO_DEVICE; 3941 if (user_memset) 3942 skip_host_mem_pin = true; 3943 } else { 3944 addr = le64_to_cpu(user_dma_pkt->dst_addr); 3945 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 3946 dir = DMA_FROM_DEVICE; 3947 } 3948 3949 if ((!skip_host_mem_pin) && 3950 (!hl_userptr_is_pinned(hdev, addr, 3951 le32_to_cpu(user_dma_pkt->tsize), 3952 parser->job_userptr_list, &userptr))) { 3953 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 3954 addr, user_dma_pkt->tsize); 3955 return -EFAULT; 3956 } 3957 3958 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 3959 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 3960 *new_dma_pkt_size = sizeof(*user_dma_pkt); 3961 return 0; 3962 } 3963 3964 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 3965 3966 sgt = userptr->sgt; 3967 dma_desc_cnt = 0; 3968 3969 for_each_sg(sgt->sgl, sg, sgt->nents, count) { 3970 len = sg_dma_len(sg); 3971 dma_addr = sg_dma_address(sg); 3972 3973 if (len == 0) 3974 break; 3975 3976 while ((count + 1) < sgt->nents) { 3977 sg_next_iter = sg_next(sg); 3978 len_next = sg_dma_len(sg_next_iter); 3979 dma_addr_next = sg_dma_address(sg_next_iter); 3980 3981 if (len_next == 0) 3982 break; 3983 3984 if ((dma_addr + len == dma_addr_next) && 3985 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 3986 len += len_next; 3987 count++; 3988 sg = sg_next_iter; 3989 } else { 3990 break; 3991 } 3992 } 3993 3994 ctl = le32_to_cpu(user_dma_pkt->ctl); 3995 if (likely(dma_desc_cnt)) 3996 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 3997 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 3998 new_dma_pkt->ctl = cpu_to_le32(ctl); 3999 new_dma_pkt->tsize = cpu_to_le32(len); 4000 4001 if (dir == DMA_TO_DEVICE) { 4002 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 4003 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 4004 } else { 4005 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 4006 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 4007 } 4008 4009 if (!user_memset) 4010 device_memory_addr += len; 4011 dma_desc_cnt++; 4012 new_dma_pkt++; 4013 } 4014 4015 if (!dma_desc_cnt) { 4016 dev_err(hdev->dev, 4017 "Error of 0 SG entries when patching DMA packet\n"); 4018 return -EFAULT; 4019 } 4020 4021 /* Fix the last dma packet - wrcomp must be as user set it */ 4022 new_dma_pkt--; 4023 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 4024 4025 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 4026 4027 return 0; 4028} 4029 4030static int gaudi_patch_cb(struct hl_device *hdev, 4031 struct hl_cs_parser *parser) 4032{ 4033 u32 cb_parsed_length = 0; 4034 u32 cb_patched_cur_length = 0; 4035 int rc = 0; 4036 4037 /* cb_user_size is more than 0 so loop will always be executed */ 4038 while (cb_parsed_length < parser->user_cb_size) { 4039 enum packet_id pkt_id; 4040 u16 pkt_size; 4041 u32 new_pkt_size = 0; 4042 struct gaudi_packet *user_pkt, *kernel_pkt; 4043 4044 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 4045 kernel_pkt = parser->patched_cb->kernel_address + 4046 cb_patched_cur_length; 4047 4048 pkt_id = (enum packet_id) ( 4049 (le64_to_cpu(user_pkt->header) & 4050 PACKET_HEADER_PACKET_ID_MASK) >> 4051 PACKET_HEADER_PACKET_ID_SHIFT); 4052 4053 if (!validate_packet_id(pkt_id)) { 4054 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 4055 rc = -EINVAL; 4056 break; 4057 } 4058 4059 pkt_size = gaudi_packet_sizes[pkt_id]; 4060 cb_parsed_length += pkt_size; 4061 if (cb_parsed_length > parser->user_cb_size) { 4062 dev_err(hdev->dev, 4063 "packet 0x%x is out of CB boundary\n", pkt_id); 4064 rc = -EINVAL; 4065 break; 4066 } 4067 4068 switch (pkt_id) { 4069 case PACKET_LIN_DMA: 4070 rc = gaudi_patch_dma_packet(hdev, parser, 4071 (struct packet_lin_dma *) user_pkt, 4072 (struct packet_lin_dma *) kernel_pkt, 4073 &new_pkt_size); 4074 cb_patched_cur_length += new_pkt_size; 4075 break; 4076 4077 case PACKET_MSG_PROT: 4078 dev_err(hdev->dev, 4079 "User not allowed to use MSG_PROT\n"); 4080 rc = -EPERM; 4081 break; 4082 4083 case PACKET_CP_DMA: 4084 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 4085 rc = -EPERM; 4086 break; 4087 4088 case PACKET_STOP: 4089 dev_err(hdev->dev, "User not allowed to use STOP\n"); 4090 rc = -EPERM; 4091 break; 4092 4093 case PACKET_WREG_32: 4094 case PACKET_WREG_BULK: 4095 case PACKET_MSG_LONG: 4096 case PACKET_MSG_SHORT: 4097 case PACKET_REPEAT: 4098 case PACKET_FENCE: 4099 case PACKET_NOP: 4100 case PACKET_ARB_POINT: 4101 case PACKET_LOAD_AND_EXE: 4102 memcpy(kernel_pkt, user_pkt, pkt_size); 4103 cb_patched_cur_length += pkt_size; 4104 break; 4105 4106 default: 4107 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 4108 pkt_id); 4109 rc = -EINVAL; 4110 break; 4111 } 4112 4113 if (rc) 4114 break; 4115 } 4116 4117 return rc; 4118} 4119 4120static int gaudi_parse_cb_mmu(struct hl_device *hdev, 4121 struct hl_cs_parser *parser) 4122{ 4123 u64 patched_cb_handle; 4124 u32 patched_cb_size; 4125 struct hl_cb *user_cb; 4126 int rc; 4127 4128 /* 4129 * The new CB should have space at the end for two MSG_PROT pkt: 4130 * 1. A packet that will act as a completion packet 4131 * 2. A packet that will generate MSI interrupt 4132 */ 4133 parser->patched_cb_size = parser->user_cb_size + 4134 sizeof(struct packet_msg_prot) * 2; 4135 4136 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, 4137 parser->patched_cb_size, false, false, 4138 &patched_cb_handle); 4139 4140 if (rc) { 4141 dev_err(hdev->dev, 4142 "Failed to allocate patched CB for DMA CS %d\n", 4143 rc); 4144 return rc; 4145 } 4146 4147 patched_cb_handle >>= PAGE_SHIFT; 4148 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, 4149 (u32) patched_cb_handle); 4150 /* hl_cb_get should never fail here so use kernel WARN */ 4151 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n", 4152 (u32) patched_cb_handle); 4153 if (!parser->patched_cb) { 4154 rc = -EFAULT; 4155 goto out; 4156 } 4157 4158 /* 4159 * The check that parser->user_cb_size <= parser->user_cb->size was done 4160 * in validate_queue_index(). 4161 */ 4162 memcpy(parser->patched_cb->kernel_address, 4163 parser->user_cb->kernel_address, 4164 parser->user_cb_size); 4165 4166 patched_cb_size = parser->patched_cb_size; 4167 4168 /* Validate patched CB instead of user CB */ 4169 user_cb = parser->user_cb; 4170 parser->user_cb = parser->patched_cb; 4171 rc = gaudi_validate_cb(hdev, parser, true); 4172 parser->user_cb = user_cb; 4173 4174 if (rc) { 4175 hl_cb_put(parser->patched_cb); 4176 goto out; 4177 } 4178 4179 if (patched_cb_size != parser->patched_cb_size) { 4180 dev_err(hdev->dev, "user CB size mismatch\n"); 4181 hl_cb_put(parser->patched_cb); 4182 rc = -EINVAL; 4183 goto out; 4184 } 4185 4186out: 4187 /* 4188 * Always call cb destroy here because we still have 1 reference 4189 * to it by calling cb_get earlier. After the job will be completed, 4190 * cb_put will release it, but here we want to remove it from the 4191 * idr 4192 */ 4193 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, 4194 patched_cb_handle << PAGE_SHIFT); 4195 4196 return rc; 4197} 4198 4199static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 4200 struct hl_cs_parser *parser) 4201{ 4202 u64 patched_cb_handle; 4203 int rc; 4204 4205 rc = gaudi_validate_cb(hdev, parser, false); 4206 4207 if (rc) 4208 goto free_userptr; 4209 4210 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, 4211 parser->patched_cb_size, false, false, 4212 &patched_cb_handle); 4213 if (rc) { 4214 dev_err(hdev->dev, 4215 "Failed to allocate patched CB for DMA CS %d\n", rc); 4216 goto free_userptr; 4217 } 4218 4219 patched_cb_handle >>= PAGE_SHIFT; 4220 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, 4221 (u32) patched_cb_handle); 4222 /* hl_cb_get should never fail here so use kernel WARN */ 4223 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n", 4224 (u32) patched_cb_handle); 4225 if (!parser->patched_cb) { 4226 rc = -EFAULT; 4227 goto out; 4228 } 4229 4230 rc = gaudi_patch_cb(hdev, parser); 4231 4232 if (rc) 4233 hl_cb_put(parser->patched_cb); 4234 4235out: 4236 /* 4237 * Always call cb destroy here because we still have 1 reference 4238 * to it by calling cb_get earlier. After the job will be completed, 4239 * cb_put will release it, but here we want to remove it from the 4240 * idr 4241 */ 4242 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, 4243 patched_cb_handle << PAGE_SHIFT); 4244 4245free_userptr: 4246 if (rc) 4247 hl_userptr_delete_list(hdev, parser->job_userptr_list); 4248 return rc; 4249} 4250 4251static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 4252 struct hl_cs_parser *parser) 4253{ 4254 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 4255 4256 /* For internal queue jobs just check if CB address is valid */ 4257 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 4258 parser->user_cb_size, 4259 asic_prop->sram_user_base_address, 4260 asic_prop->sram_end_address)) 4261 return 0; 4262 4263 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 4264 parser->user_cb_size, 4265 asic_prop->dram_user_base_address, 4266 asic_prop->dram_end_address)) 4267 return 0; 4268 4269 /* PMMU and HPMMU addresses are equal, check only one of them */ 4270 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 4271 parser->user_cb_size, 4272 asic_prop->pmmu.start_addr, 4273 asic_prop->pmmu.end_addr)) 4274 return 0; 4275 4276 dev_err(hdev->dev, 4277 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 4278 parser->user_cb, parser->user_cb_size); 4279 4280 return -EFAULT; 4281} 4282 4283static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 4284{ 4285 struct gaudi_device *gaudi = hdev->asic_specific; 4286 4287 if (parser->queue_type == QUEUE_TYPE_INT) 4288 return gaudi_parse_cb_no_ext_queue(hdev, parser); 4289 4290 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 4291 return gaudi_parse_cb_mmu(hdev, parser); 4292 else 4293 return gaudi_parse_cb_no_mmu(hdev, parser); 4294} 4295 4296static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, 4297 void *kernel_address, u32 len, 4298 u64 cq_addr, u32 cq_val, u32 msi_vec, 4299 bool eb) 4300{ 4301 struct gaudi_device *gaudi = hdev->asic_specific; 4302 struct packet_msg_prot *cq_pkt; 4303 u32 tmp; 4304 4305 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 4306 4307 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4308 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4309 4310 if (eb) 4311 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4312 4313 cq_pkt->ctl = cpu_to_le32(tmp); 4314 cq_pkt->value = cpu_to_le32(cq_val); 4315 cq_pkt->addr = cpu_to_le64(cq_addr); 4316 4317 cq_pkt++; 4318 4319 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4320 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4321 cq_pkt->ctl = cpu_to_le32(tmp); 4322 cq_pkt->value = cpu_to_le32(1); 4323 4324 if (!gaudi->multi_msi_mode) 4325 msi_vec = 0; 4326 4327 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4); 4328} 4329 4330static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 4331{ 4332 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 4333} 4334 4335static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 4336 u32 size, u64 val) 4337{ 4338 struct packet_lin_dma *lin_dma_pkt; 4339 struct hl_cs_job *job; 4340 u32 cb_size, ctl, err_cause; 4341 struct hl_cb *cb; 4342 int rc; 4343 4344 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 4345 if (!cb) 4346 return -EFAULT; 4347 4348 lin_dma_pkt = cb->kernel_address; 4349 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 4350 cb_size = sizeof(*lin_dma_pkt); 4351 4352 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 4353 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 4354 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 4355 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4356 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 4357 4358 lin_dma_pkt->ctl = cpu_to_le32(ctl); 4359 lin_dma_pkt->src_addr = cpu_to_le64(val); 4360 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 4361 lin_dma_pkt->tsize = cpu_to_le32(size); 4362 4363 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 4364 if (!job) { 4365 dev_err(hdev->dev, "Failed to allocate a new job\n"); 4366 rc = -ENOMEM; 4367 goto release_cb; 4368 } 4369 4370 /* Verify DMA is OK */ 4371 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 4372 if (err_cause && !hdev->init_done) { 4373 dev_dbg(hdev->dev, 4374 "Clearing DMA0 engine from errors (cause 0x%x)\n", 4375 err_cause); 4376 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 4377 } 4378 4379 job->id = 0; 4380 job->user_cb = cb; 4381 job->user_cb->cs_cnt++; 4382 job->user_cb_size = cb_size; 4383 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 4384 job->patched_cb = job->user_cb; 4385 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 4386 4387 hl_debugfs_add_job(hdev, job); 4388 4389 rc = gaudi_send_job_on_qman0(hdev, job); 4390 hl_debugfs_remove_job(hdev, job); 4391 kfree(job); 4392 cb->cs_cnt--; 4393 4394 /* Verify DMA is OK */ 4395 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 4396 if (err_cause) { 4397 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 4398 rc = -EIO; 4399 if (!hdev->init_done) { 4400 dev_dbg(hdev->dev, 4401 "Clearing DMA0 engine from errors (cause 0x%x)\n", 4402 err_cause); 4403 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 4404 } 4405 } 4406 4407release_cb: 4408 hl_cb_put(cb); 4409 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); 4410 4411 return rc; 4412} 4413 4414static void gaudi_restore_sm_registers(struct hl_device *hdev) 4415{ 4416 int i; 4417 4418 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) { 4419 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0); 4420 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0); 4421 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0); 4422 } 4423 4424 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) { 4425 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0); 4426 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0); 4427 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0); 4428 } 4429 4430 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4; 4431 4432 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) 4433 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0); 4434 4435 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4; 4436 4437 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) 4438 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0); 4439} 4440 4441static void gaudi_restore_dma_registers(struct hl_device *hdev) 4442{ 4443 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 4444 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 4445 int i; 4446 4447 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 4448 u64 sob_addr = CFG_BASE + 4449 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 4450 (i * sob_delta); 4451 u32 dma_offset = i * DMA_CORE_OFFSET; 4452 4453 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 4454 lower_32_bits(sob_addr)); 4455 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 4456 upper_32_bits(sob_addr)); 4457 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 4458 4459 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 4460 * modified by the user for SRAM reduction 4461 */ 4462 if (i > 1) 4463 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 4464 0x00000001); 4465 } 4466} 4467 4468static void gaudi_restore_qm_registers(struct hl_device *hdev) 4469{ 4470 u32 qman_offset; 4471 int i; 4472 4473 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 4474 qman_offset = i * DMA_QMAN_OFFSET; 4475 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 4476 } 4477 4478 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 4479 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 4480 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 4481 } 4482 4483 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 4484 qman_offset = i * TPC_QMAN_OFFSET; 4485 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 4486 } 4487} 4488 4489static void gaudi_restore_user_registers(struct hl_device *hdev) 4490{ 4491 gaudi_restore_sm_registers(hdev); 4492 gaudi_restore_dma_registers(hdev); 4493 gaudi_restore_qm_registers(hdev); 4494} 4495 4496static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 4497{ 4498 struct asic_fixed_properties *prop = &hdev->asic_prop; 4499 u64 addr = prop->sram_user_base_address; 4500 u32 size = hdev->pldm ? 0x10000 : 4501 (prop->sram_size - SRAM_USER_BASE_OFFSET); 4502 u64 val = 0x7777777777777777ull; 4503 int rc; 4504 4505 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4506 if (rc) { 4507 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n"); 4508 return rc; 4509 } 4510 4511 gaudi_mmu_prepare(hdev, asid); 4512 4513 gaudi_restore_user_registers(hdev); 4514 4515 return 0; 4516} 4517 4518static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 4519{ 4520 struct asic_fixed_properties *prop = &hdev->asic_prop; 4521 struct gaudi_device *gaudi = hdev->asic_specific; 4522 u64 addr = prop->mmu_pgt_addr; 4523 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE; 4524 4525 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 4526 return 0; 4527 4528 return gaudi_memset_device_memory(hdev, addr, size, 0); 4529} 4530 4531static void gaudi_restore_phase_topology(struct hl_device *hdev) 4532{ 4533 4534} 4535 4536static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) 4537{ 4538 struct asic_fixed_properties *prop = &hdev->asic_prop; 4539 struct gaudi_device *gaudi = hdev->asic_specific; 4540 u64 hbm_bar_addr; 4541 int rc = 0; 4542 4543 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { 4544 4545 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && 4546 (hdev->clock_gating_mask & 4547 GAUDI_CLK_GATE_DEBUGFS_MASK)) { 4548 4549 dev_err_ratelimited(hdev->dev, 4550 "Can't read register - clock gating is enabled!\n"); 4551 rc = -EFAULT; 4552 } else { 4553 *val = RREG32(addr - CFG_BASE); 4554 } 4555 4556 } else if ((addr >= SRAM_BASE_ADDR) && 4557 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { 4558 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + 4559 (addr - SRAM_BASE_ADDR)); 4560 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { 4561 u64 bar_base_addr = DRAM_PHYS_BASE + 4562 (addr & ~(prop->dram_pci_bar_size - 0x1ull)); 4563 4564 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); 4565 if (hbm_bar_addr != U64_MAX) { 4566 *val = readl(hdev->pcie_bar[HBM_BAR_ID] + 4567 (addr - bar_base_addr)); 4568 4569 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, 4570 hbm_bar_addr); 4571 } 4572 if (hbm_bar_addr == U64_MAX) 4573 rc = -EIO; 4574 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { 4575 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); 4576 } else { 4577 rc = -EFAULT; 4578 } 4579 4580 return rc; 4581} 4582 4583static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) 4584{ 4585 struct asic_fixed_properties *prop = &hdev->asic_prop; 4586 struct gaudi_device *gaudi = hdev->asic_specific; 4587 u64 hbm_bar_addr; 4588 int rc = 0; 4589 4590 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { 4591 4592 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && 4593 (hdev->clock_gating_mask & 4594 GAUDI_CLK_GATE_DEBUGFS_MASK)) { 4595 4596 dev_err_ratelimited(hdev->dev, 4597 "Can't write register - clock gating is enabled!\n"); 4598 rc = -EFAULT; 4599 } else { 4600 WREG32(addr - CFG_BASE, val); 4601 } 4602 4603 } else if ((addr >= SRAM_BASE_ADDR) && 4604 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { 4605 writel(val, hdev->pcie_bar[SRAM_BAR_ID] + 4606 (addr - SRAM_BASE_ADDR)); 4607 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { 4608 u64 bar_base_addr = DRAM_PHYS_BASE + 4609 (addr & ~(prop->dram_pci_bar_size - 0x1ull)); 4610 4611 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); 4612 if (hbm_bar_addr != U64_MAX) { 4613 writel(val, hdev->pcie_bar[HBM_BAR_ID] + 4614 (addr - bar_base_addr)); 4615 4616 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, 4617 hbm_bar_addr); 4618 } 4619 if (hbm_bar_addr == U64_MAX) 4620 rc = -EIO; 4621 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { 4622 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; 4623 } else { 4624 rc = -EFAULT; 4625 } 4626 4627 return rc; 4628} 4629 4630static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) 4631{ 4632 struct asic_fixed_properties *prop = &hdev->asic_prop; 4633 struct gaudi_device *gaudi = hdev->asic_specific; 4634 u64 hbm_bar_addr; 4635 int rc = 0; 4636 4637 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { 4638 4639 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && 4640 (hdev->clock_gating_mask & 4641 GAUDI_CLK_GATE_DEBUGFS_MASK)) { 4642 4643 dev_err_ratelimited(hdev->dev, 4644 "Can't read register - clock gating is enabled!\n"); 4645 rc = -EFAULT; 4646 } else { 4647 u32 val_l = RREG32(addr - CFG_BASE); 4648 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); 4649 4650 *val = (((u64) val_h) << 32) | val_l; 4651 } 4652 4653 } else if ((addr >= SRAM_BASE_ADDR) && 4654 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { 4655 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + 4656 (addr - SRAM_BASE_ADDR)); 4657 } else if (addr <= 4658 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { 4659 u64 bar_base_addr = DRAM_PHYS_BASE + 4660 (addr & ~(prop->dram_pci_bar_size - 0x1ull)); 4661 4662 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); 4663 if (hbm_bar_addr != U64_MAX) { 4664 *val = readq(hdev->pcie_bar[HBM_BAR_ID] + 4665 (addr - bar_base_addr)); 4666 4667 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, 4668 hbm_bar_addr); 4669 } 4670 if (hbm_bar_addr == U64_MAX) 4671 rc = -EIO; 4672 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { 4673 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); 4674 } else { 4675 rc = -EFAULT; 4676 } 4677 4678 return rc; 4679} 4680 4681static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) 4682{ 4683 struct asic_fixed_properties *prop = &hdev->asic_prop; 4684 struct gaudi_device *gaudi = hdev->asic_specific; 4685 u64 hbm_bar_addr; 4686 int rc = 0; 4687 4688 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { 4689 4690 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && 4691 (hdev->clock_gating_mask & 4692 GAUDI_CLK_GATE_DEBUGFS_MASK)) { 4693 4694 dev_err_ratelimited(hdev->dev, 4695 "Can't write register - clock gating is enabled!\n"); 4696 rc = -EFAULT; 4697 } else { 4698 WREG32(addr - CFG_BASE, lower_32_bits(val)); 4699 WREG32(addr + sizeof(u32) - CFG_BASE, 4700 upper_32_bits(val)); 4701 } 4702 4703 } else if ((addr >= SRAM_BASE_ADDR) && 4704 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { 4705 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + 4706 (addr - SRAM_BASE_ADDR)); 4707 } else if (addr <= 4708 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { 4709 u64 bar_base_addr = DRAM_PHYS_BASE + 4710 (addr & ~(prop->dram_pci_bar_size - 0x1ull)); 4711 4712 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); 4713 if (hbm_bar_addr != U64_MAX) { 4714 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 4715 (addr - bar_base_addr)); 4716 4717 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, 4718 hbm_bar_addr); 4719 } 4720 if (hbm_bar_addr == U64_MAX) 4721 rc = -EIO; 4722 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { 4723 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; 4724 } else { 4725 rc = -EFAULT; 4726 } 4727 4728 return rc; 4729} 4730 4731static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 4732{ 4733 struct gaudi_device *gaudi = hdev->asic_specific; 4734 4735 if (hdev->hard_reset_pending) 4736 return U64_MAX; 4737 4738 return readq(hdev->pcie_bar[HBM_BAR_ID] + 4739 (addr - gaudi->hbm_bar_cur_addr)); 4740} 4741 4742static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 4743{ 4744 struct gaudi_device *gaudi = hdev->asic_specific; 4745 4746 if (hdev->hard_reset_pending) 4747 return; 4748 4749 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 4750 (addr - gaudi->hbm_bar_cur_addr)); 4751} 4752 4753void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 4754{ 4755 /* mask to zero the MMBP and ASID bits */ 4756 WREG32_AND(reg, ~0x7FF); 4757 WREG32_OR(reg, asid); 4758} 4759 4760static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 4761{ 4762 struct gaudi_device *gaudi = hdev->asic_specific; 4763 4764 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 4765 return; 4766 4767 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 4768 WARN(1, "asid %u is too big\n", asid); 4769 return; 4770 } 4771 4772 mutex_lock(&gaudi->clk_gate_mutex); 4773 4774 hdev->asic_funcs->disable_clock_gating(hdev); 4775 4776 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 4777 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 4778 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 4779 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 4780 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 4781 4782 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 4783 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 4784 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 4785 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 4786 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 4787 4788 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 4789 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 4790 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 4791 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 4792 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 4793 4794 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 4795 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 4796 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 4797 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 4798 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 4799 4800 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 4801 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 4802 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 4803 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 4804 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 4805 4806 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 4807 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 4808 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 4809 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 4810 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 4811 4812 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 4813 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 4814 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 4815 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 4816 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 4817 4818 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 4819 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 4820 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 4821 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 4822 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 4823 4824 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 4825 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 4826 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 4827 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 4828 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 4829 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 4830 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 4831 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 4832 4833 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 4834 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 4835 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 4836 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 4837 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 4838 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 4839 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 4840 4841 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 4842 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 4843 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 4844 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 4845 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 4846 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 4847 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 4848 4849 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 4850 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 4851 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 4852 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 4853 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 4854 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 4855 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 4856 4857 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 4858 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 4859 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 4860 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 4861 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 4862 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 4863 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 4864 4865 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 4866 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 4867 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 4868 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 4869 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 4870 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 4871 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 4872 4873 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 4874 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 4875 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 4876 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 4877 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 4878 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 4879 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 4880 4881 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 4882 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 4883 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 4884 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 4885 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 4886 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 4887 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 4888 4889 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 4890 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 4891 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 4892 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 4893 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 4894 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 4895 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 4896 4897 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 4898 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 4899 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 4900 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 4901 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 4902 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 4903 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 4904 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 4905 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 4906 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 4907 4908 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 4909 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 4910 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 4911 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 4912 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 4913 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 4914 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 4915 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 4916 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 4917 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 4918 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 4919 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 4920 4921 hdev->asic_funcs->set_clock_gating(hdev); 4922 4923 mutex_unlock(&gaudi->clk_gate_mutex); 4924} 4925 4926static int gaudi_send_job_on_qman0(struct hl_device *hdev, 4927 struct hl_cs_job *job) 4928{ 4929 struct packet_msg_prot *fence_pkt; 4930 u32 *fence_ptr; 4931 dma_addr_t fence_dma_addr; 4932 struct hl_cb *cb; 4933 u32 tmp, timeout, dma_offset; 4934 int rc; 4935 4936 if (hdev->pldm) 4937 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 4938 else 4939 timeout = HL_DEVICE_TIMEOUT_USEC; 4940 4941 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) { 4942 dev_err_ratelimited(hdev->dev, 4943 "Can't send driver job on QMAN0 because the device is not idle\n"); 4944 return -EBUSY; 4945 } 4946 4947 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, 4948 &fence_dma_addr); 4949 if (!fence_ptr) { 4950 dev_err(hdev->dev, 4951 "Failed to allocate fence memory for QMAN0\n"); 4952 return -ENOMEM; 4953 } 4954 4955 cb = job->patched_cb; 4956 4957 fence_pkt = cb->kernel_address + 4958 job->job_cb_size - sizeof(struct packet_msg_prot); 4959 4960 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4961 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4962 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4963 4964 fence_pkt->ctl = cpu_to_le32(tmp); 4965 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 4966 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4967 4968 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 4969 4970 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 4971 4972 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 4973 job->job_cb_size, cb->bus_address); 4974 if (rc) { 4975 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 4976 goto free_fence_ptr; 4977 } 4978 4979 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 4980 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 4981 timeout, true); 4982 4983 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 4984 4985 if (rc == -ETIMEDOUT) { 4986 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 4987 goto free_fence_ptr; 4988 } 4989 4990free_fence_ptr: 4991 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 4992 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 4993 4994 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, 4995 fence_dma_addr); 4996 return rc; 4997} 4998 4999static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 5000{ 5001 if (event_type >= GAUDI_EVENT_SIZE) 5002 goto event_not_supported; 5003 5004 if (!gaudi_irq_map_table[event_type].valid) 5005 goto event_not_supported; 5006 5007 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 5008 5009 return; 5010 5011event_not_supported: 5012 snprintf(desc, size, "N/A"); 5013} 5014 5015static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, 5016 u32 x_y, bool is_write) 5017{ 5018 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 5019 5020 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 5021 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 5022 5023 switch (x_y) { 5024 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 5025 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 5026 dma_id[0] = 0; 5027 dma_id[1] = 2; 5028 break; 5029 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 5030 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 5031 dma_id[0] = 1; 5032 dma_id[1] = 3; 5033 break; 5034 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 5035 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 5036 dma_id[0] = 4; 5037 dma_id[1] = 6; 5038 break; 5039 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 5040 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 5041 dma_id[0] = 5; 5042 dma_id[1] = 7; 5043 break; 5044 default: 5045 goto unknown_initiator; 5046 } 5047 5048 for (i = 0 ; i < 2 ; i++) { 5049 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 5050 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5051 } 5052 5053 switch (x_y) { 5054 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 5055 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 5056 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) 5057 return "DMA0"; 5058 else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) 5059 return "DMA2"; 5060 else 5061 return "DMA0 or DMA2"; 5062 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 5063 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 5064 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) 5065 return "DMA1"; 5066 else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) 5067 return "DMA3"; 5068 else 5069 return "DMA1 or DMA3"; 5070 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 5071 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 5072 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) 5073 return "DMA4"; 5074 else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) 5075 return "DMA6"; 5076 else 5077 return "DMA4 or DMA6"; 5078 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 5079 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 5080 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) 5081 return "DMA5"; 5082 else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) 5083 return "DMA7"; 5084 else 5085 return "DMA5 or DMA7"; 5086 } 5087 5088unknown_initiator: 5089 return "unknown initiator"; 5090} 5091 5092static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, 5093 bool is_write) 5094{ 5095 u32 val, x_y, axi_id; 5096 5097 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 5098 RREG32(mmMMU_UP_RAZWI_READ_ID); 5099 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 5100 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 5101 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 5102 RAZWI_INITIATOR_AXI_ID_SHIFT); 5103 5104 switch (x_y) { 5105 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 5106 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) 5107 return "TPC0"; 5108 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) 5109 return "NIC0"; 5110 break; 5111 case RAZWI_INITIATOR_ID_X_Y_TPC1: 5112 return "TPC1"; 5113 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 5114 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 5115 return "MME0"; 5116 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 5117 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 5118 return "MME1"; 5119 case RAZWI_INITIATOR_ID_X_Y_TPC2: 5120 return "TPC2"; 5121 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 5122 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) 5123 return "TPC3"; 5124 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 5125 return "PCI"; 5126 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 5127 return "CPU"; 5128 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 5129 return "PSOC"; 5130 break; 5131 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 5132 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 5133 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 5134 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 5135 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 5136 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 5137 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 5138 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 5139 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write); 5140 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 5141 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) 5142 return "TPC4"; 5143 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) 5144 return "NIC1"; 5145 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) 5146 return "NIC2"; 5147 break; 5148 case RAZWI_INITIATOR_ID_X_Y_TPC5: 5149 return "TPC5"; 5150 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 5151 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 5152 return "MME2"; 5153 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 5154 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 5155 return "MME3"; 5156 case RAZWI_INITIATOR_ID_X_Y_TPC6: 5157 return "TPC6"; 5158 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 5159 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) 5160 return "TPC7"; 5161 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) 5162 return "NIC4"; 5163 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) 5164 return "NIC5"; 5165 break; 5166 default: 5167 break; 5168 } 5169 5170 dev_err(hdev->dev, 5171 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 5172 val, 5173 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 5174 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 5175 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 5176 RAZWI_INITIATOR_AXI_ID_MASK); 5177 5178 return "unknown initiator"; 5179} 5180 5181static void gaudi_print_razwi_info(struct hl_device *hdev) 5182{ 5183 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 5184 dev_err_ratelimited(hdev->dev, 5185 "RAZWI event caused by illegal write of %s\n", 5186 gaudi_get_razwi_initiator_name(hdev, true)); 5187 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 5188 } 5189 5190 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 5191 dev_err_ratelimited(hdev->dev, 5192 "RAZWI event caused by illegal read of %s\n", 5193 gaudi_get_razwi_initiator_name(hdev, false)); 5194 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 5195 } 5196} 5197 5198static void gaudi_print_mmu_error_info(struct hl_device *hdev) 5199{ 5200 struct gaudi_device *gaudi = hdev->asic_specific; 5201 u64 addr; 5202 u32 val; 5203 5204 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5205 return; 5206 5207 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 5208 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 5209 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 5210 addr <<= 32; 5211 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 5212 5213 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", 5214 addr); 5215 5216 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 5217 } 5218 5219 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 5220 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 5221 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 5222 addr <<= 32; 5223 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 5224 5225 dev_err_ratelimited(hdev->dev, 5226 "MMU access error on va 0x%llx\n", addr); 5227 5228 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 5229 } 5230} 5231 5232/* 5233 * +-------------------+------------------------------------------------------+ 5234 * | Configuration Reg | Description | 5235 * | Address | | 5236 * +-------------------+------------------------------------------------------+ 5237 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 5238 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 5239 * | |0xF34 memory wrappers 63:32 | 5240 * | |0xF38 memory wrappers 95:64 | 5241 * | |0xF3C memory wrappers 127:96 | 5242 * +-------------------+------------------------------------------------------+ 5243 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 5244 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 5245 * | |0xF44 memory wrappers 63:32 | 5246 * | |0xF48 memory wrappers 95:64 | 5247 * | |0xF4C memory wrappers 127:96 | 5248 * +-------------------+------------------------------------------------------+ 5249 */ 5250static int gaudi_extract_ecc_info(struct hl_device *hdev, 5251 struct ecc_info_extract_params *params, u64 *ecc_address, 5252 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 5253{ 5254 struct gaudi_device *gaudi = hdev->asic_specific; 5255 u32 i, num_mem_regs, reg, err_bit; 5256 u64 err_addr, err_word = 0; 5257 int rc = 0; 5258 5259 num_mem_regs = params->num_memories / 32 + 5260 ((params->num_memories % 32) ? 1 : 0); 5261 5262 if (params->block_address >= CFG_BASE) 5263 params->block_address -= CFG_BASE; 5264 5265 if (params->derr) 5266 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 5267 else 5268 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 5269 5270 if (params->disable_clock_gating) { 5271 mutex_lock(&gaudi->clk_gate_mutex); 5272 hdev->asic_funcs->disable_clock_gating(hdev); 5273 } 5274 5275 /* Set invalid wrapper index */ 5276 *memory_wrapper_idx = 0xFF; 5277 5278 /* Iterate through memory wrappers, a single bit must be set */ 5279 for (i = 0 ; i < num_mem_regs ; i++) { 5280 err_addr += i * 4; 5281 err_word = RREG32(err_addr); 5282 if (err_word) { 5283 err_bit = __ffs(err_word); 5284 *memory_wrapper_idx = err_bit + (32 * i); 5285 break; 5286 } 5287 } 5288 5289 if (*memory_wrapper_idx == 0xFF) { 5290 dev_err(hdev->dev, "ECC error information cannot be found\n"); 5291 rc = -EINVAL; 5292 goto enable_clk_gate; 5293 } 5294 5295 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 5296 *memory_wrapper_idx); 5297 5298 *ecc_address = 5299 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 5300 *ecc_syndrom = 5301 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 5302 5303 /* Clear error indication */ 5304 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 5305 if (params->derr) 5306 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 5307 else 5308 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 5309 5310 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 5311 5312enable_clk_gate: 5313 if (params->disable_clock_gating) { 5314 hdev->asic_funcs->set_clock_gating(hdev); 5315 5316 mutex_unlock(&gaudi->clk_gate_mutex); 5317 } 5318 5319 return rc; 5320} 5321 5322static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 5323 const char *qm_name, 5324 u64 glbl_sts_addr, 5325 u64 arb_err_addr) 5326{ 5327 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 5328 char reg_desc[32]; 5329 5330 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 5331 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 5332 glbl_sts_clr_val = 0; 5333 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 5334 5335 if (!glbl_sts_val) 5336 continue; 5337 5338 if (i == QMAN_STREAMS) 5339 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 5340 else 5341 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 5342 5343 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 5344 if (glbl_sts_val & BIT(j)) { 5345 dev_err_ratelimited(hdev->dev, 5346 "%s %s. err cause: %s\n", 5347 qm_name, reg_desc, 5348 gaudi_qman_error_cause[j]); 5349 glbl_sts_clr_val |= BIT(j); 5350 } 5351 } 5352 5353 /* Write 1 clear errors */ 5354 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 5355 } 5356 5357 arb_err_val = RREG32(arb_err_addr); 5358 5359 if (!arb_err_val) 5360 return; 5361 5362 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 5363 if (arb_err_val & BIT(j)) { 5364 dev_err_ratelimited(hdev->dev, 5365 "%s ARB_ERR. err cause: %s\n", 5366 qm_name, 5367 gaudi_qman_arb_error_cause[j]); 5368 } 5369 } 5370} 5371 5372static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 5373 struct hl_eq_ecc_data *ecc_data) 5374{ 5375 struct ecc_info_extract_params params; 5376 u64 ecc_address = 0, ecc_syndrom = 0; 5377 u8 index, memory_wrapper_idx = 0; 5378 bool extract_info_from_fw; 5379 int rc; 5380 5381 switch (event_type) { 5382 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 5383 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 5384 extract_info_from_fw = true; 5385 break; 5386 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 5387 index = event_type - GAUDI_EVENT_TPC0_SERR; 5388 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 5389 params.num_memories = 90; 5390 params.derr = false; 5391 params.disable_clock_gating = true; 5392 extract_info_from_fw = false; 5393 break; 5394 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 5395 index = event_type - GAUDI_EVENT_TPC0_DERR; 5396 params.block_address = 5397 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 5398 params.num_memories = 90; 5399 params.derr = true; 5400 params.disable_clock_gating = true; 5401 extract_info_from_fw = false; 5402 break; 5403 case GAUDI_EVENT_MME0_ACC_SERR: 5404 case GAUDI_EVENT_MME1_ACC_SERR: 5405 case GAUDI_EVENT_MME2_ACC_SERR: 5406 case GAUDI_EVENT_MME3_ACC_SERR: 5407 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 5408 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 5409 params.num_memories = 128; 5410 params.derr = false; 5411 params.disable_clock_gating = true; 5412 extract_info_from_fw = false; 5413 break; 5414 case GAUDI_EVENT_MME0_ACC_DERR: 5415 case GAUDI_EVENT_MME1_ACC_DERR: 5416 case GAUDI_EVENT_MME2_ACC_DERR: 5417 case GAUDI_EVENT_MME3_ACC_DERR: 5418 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 5419 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 5420 params.num_memories = 128; 5421 params.derr = true; 5422 params.disable_clock_gating = true; 5423 extract_info_from_fw = false; 5424 break; 5425 case GAUDI_EVENT_MME0_SBAB_SERR: 5426 case GAUDI_EVENT_MME1_SBAB_SERR: 5427 case GAUDI_EVENT_MME2_SBAB_SERR: 5428 case GAUDI_EVENT_MME3_SBAB_SERR: 5429 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 5430 params.block_address = 5431 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 5432 params.num_memories = 33; 5433 params.derr = false; 5434 params.disable_clock_gating = true; 5435 extract_info_from_fw = false; 5436 break; 5437 case GAUDI_EVENT_MME0_SBAB_DERR: 5438 case GAUDI_EVENT_MME1_SBAB_DERR: 5439 case GAUDI_EVENT_MME2_SBAB_DERR: 5440 case GAUDI_EVENT_MME3_SBAB_DERR: 5441 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 5442 params.block_address = 5443 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 5444 params.num_memories = 33; 5445 params.derr = true; 5446 params.disable_clock_gating = true; 5447 extract_info_from_fw = false; 5448 break; 5449 default: 5450 return; 5451 } 5452 5453 if (extract_info_from_fw) { 5454 ecc_address = le64_to_cpu(ecc_data->ecc_address); 5455 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 5456 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 5457 } else { 5458 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 5459 &ecc_syndrom, &memory_wrapper_idx); 5460 if (rc) 5461 return; 5462 } 5463 5464 dev_err(hdev->dev, 5465 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 5466 ecc_address, ecc_syndrom, memory_wrapper_idx); 5467} 5468 5469static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type) 5470{ 5471 u64 glbl_sts_addr, arb_err_addr; 5472 u8 index; 5473 char desc[32]; 5474 5475 switch (event_type) { 5476 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 5477 index = event_type - GAUDI_EVENT_TPC0_QM; 5478 glbl_sts_addr = 5479 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET; 5480 arb_err_addr = 5481 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET; 5482 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 5483 break; 5484 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 5485 index = event_type - GAUDI_EVENT_MME0_QM; 5486 glbl_sts_addr = 5487 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET; 5488 arb_err_addr = 5489 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET; 5490 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 5491 break; 5492 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 5493 index = event_type - GAUDI_EVENT_DMA0_QM; 5494 glbl_sts_addr = 5495 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET; 5496 arb_err_addr = 5497 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET; 5498 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 5499 break; 5500 default: 5501 return; 5502 } 5503 5504 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr); 5505} 5506 5507static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 5508 bool razwi) 5509{ 5510 char desc[64] = ""; 5511 5512 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 5513 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 5514 event_type, desc); 5515 5516 if (razwi) { 5517 gaudi_print_razwi_info(hdev); 5518 gaudi_print_mmu_error_info(hdev); 5519 } 5520} 5521 5522static int gaudi_soft_reset_late_init(struct hl_device *hdev) 5523{ 5524 struct gaudi_device *gaudi = hdev->asic_specific; 5525 5526 /* Unmask all IRQs since some could have been received 5527 * during the soft reset 5528 */ 5529 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events)); 5530} 5531 5532static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device) 5533{ 5534 int ch, err = 0; 5535 u32 base, val, val2; 5536 5537 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 5538 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 5539 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 5540 val = (val & 0xFF) | ((val >> 8) & 0xFF); 5541 if (val) { 5542 err = 1; 5543 dev_err(hdev->dev, 5544 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 5545 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 5546 (val >> 2) & 0x1, (val >> 3) & 0x1, 5547 (val >> 4) & 0x1); 5548 5549 val2 = RREG32(base + ch * 0x1000 + 0x060); 5550 dev_err(hdev->dev, 5551 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n", 5552 device, ch * 2, 5553 RREG32(base + ch * 0x1000 + 0x064), 5554 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 5555 (val2 & 0xFF0000) >> 16, 5556 (val2 & 0xFF000000) >> 24); 5557 } 5558 5559 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 5560 val = (val & 0xFF) | ((val >> 8) & 0xFF); 5561 if (val) { 5562 err = 1; 5563 dev_err(hdev->dev, 5564 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 5565 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 5566 (val >> 2) & 0x1, (val >> 3) & 0x1, 5567 (val >> 4) & 0x1); 5568 5569 val2 = RREG32(base + ch * 0x1000 + 0x070); 5570 dev_err(hdev->dev, 5571 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n", 5572 device, ch * 2 + 1, 5573 RREG32(base + ch * 0x1000 + 0x074), 5574 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 5575 (val2 & 0xFF0000) >> 16, 5576 (val2 & 0xFF000000) >> 24); 5577 } 5578 5579 /* Clear interrupts */ 5580 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 5581 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 5582 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 5583 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 5584 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 5585 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 5586 } 5587 5588 val = RREG32(base + 0x8F30); 5589 val2 = RREG32(base + 0x8F34); 5590 if (val | val2) { 5591 err = 1; 5592 dev_err(hdev->dev, 5593 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 5594 device, val, val2); 5595 } 5596 val = RREG32(base + 0x8F40); 5597 val2 = RREG32(base + 0x8F44); 5598 if (val | val2) { 5599 err = 1; 5600 dev_err(hdev->dev, 5601 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 5602 device, val, val2); 5603 } 5604 5605 return err; 5606} 5607 5608static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 5609{ 5610 switch (hbm_event_type) { 5611 case GAUDI_EVENT_HBM0_SPI_0: 5612 case GAUDI_EVENT_HBM0_SPI_1: 5613 return 0; 5614 case GAUDI_EVENT_HBM1_SPI_0: 5615 case GAUDI_EVENT_HBM1_SPI_1: 5616 return 1; 5617 case GAUDI_EVENT_HBM2_SPI_0: 5618 case GAUDI_EVENT_HBM2_SPI_1: 5619 return 2; 5620 case GAUDI_EVENT_HBM3_SPI_0: 5621 case GAUDI_EVENT_HBM3_SPI_1: 5622 return 3; 5623 default: 5624 break; 5625 } 5626 5627 /* Should never happen */ 5628 return 0; 5629} 5630 5631static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 5632 char *interrupt_name) 5633{ 5634 struct gaudi_device *gaudi = hdev->asic_specific; 5635 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 5636 bool soft_reset_required = false; 5637 5638 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock 5639 * gating, and thus cannot be done in CPU-CP and should be done instead 5640 * by the driver. 5641 */ 5642 5643 mutex_lock(&gaudi->clk_gate_mutex); 5644 5645 hdev->asic_funcs->disable_clock_gating(hdev); 5646 5647 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 5648 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 5649 5650 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 5651 if (tpc_interrupts_cause & BIT(i)) { 5652 dev_err_ratelimited(hdev->dev, 5653 "TPC%d_%s interrupt cause: %s\n", 5654 tpc_id, interrupt_name, 5655 gaudi_tpc_interrupts_cause[i]); 5656 /* If this is QM error, we need to soft-reset */ 5657 if (i == 15) 5658 soft_reset_required = true; 5659 } 5660 5661 /* Clear interrupts */ 5662 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 5663 5664 hdev->asic_funcs->set_clock_gating(hdev); 5665 5666 mutex_unlock(&gaudi->clk_gate_mutex); 5667 5668 return soft_reset_required; 5669} 5670 5671static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 5672{ 5673 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 5674} 5675 5676static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 5677{ 5678 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 5679} 5680 5681static void gaudi_print_clk_change_info(struct hl_device *hdev, 5682 u16 event_type) 5683{ 5684 switch (event_type) { 5685 case GAUDI_EVENT_FIX_POWER_ENV_S: 5686 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER; 5687 dev_info_ratelimited(hdev->dev, 5688 "Clock throttling due to power consumption\n"); 5689 break; 5690 5691 case GAUDI_EVENT_FIX_POWER_ENV_E: 5692 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER; 5693 dev_info_ratelimited(hdev->dev, 5694 "Power envelop is safe, back to optimal clock\n"); 5695 break; 5696 5697 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 5698 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL; 5699 dev_info_ratelimited(hdev->dev, 5700 "Clock throttling due to overheating\n"); 5701 break; 5702 5703 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 5704 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL; 5705 dev_info_ratelimited(hdev->dev, 5706 "Thermal envelop is safe, back to optimal clock\n"); 5707 break; 5708 5709 default: 5710 dev_err(hdev->dev, "Received invalid clock change event %d\n", 5711 event_type); 5712 break; 5713 } 5714} 5715 5716static void gaudi_handle_eqe(struct hl_device *hdev, 5717 struct hl_eq_entry *eq_entry) 5718{ 5719 struct gaudi_device *gaudi = hdev->asic_specific; 5720 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 5721 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 5722 >> EQ_CTL_EVENT_TYPE_SHIFT); 5723 u8 cause; 5724 bool reset_required; 5725 5726 if (event_type >= GAUDI_EVENT_SIZE) { 5727 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 5728 event_type, GAUDI_EVENT_SIZE - 1); 5729 return; 5730 } 5731 5732 gaudi->events_stat[event_type]++; 5733 gaudi->events_stat_aggregate[event_type]++; 5734 5735 switch (event_type) { 5736 case GAUDI_EVENT_PCIE_CORE_DERR: 5737 case GAUDI_EVENT_PCIE_IF_DERR: 5738 case GAUDI_EVENT_PCIE_PHY_DERR: 5739 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 5740 case GAUDI_EVENT_MME0_ACC_DERR: 5741 case GAUDI_EVENT_MME0_SBAB_DERR: 5742 case GAUDI_EVENT_MME1_ACC_DERR: 5743 case GAUDI_EVENT_MME1_SBAB_DERR: 5744 case GAUDI_EVENT_MME2_ACC_DERR: 5745 case GAUDI_EVENT_MME2_SBAB_DERR: 5746 case GAUDI_EVENT_MME3_ACC_DERR: 5747 case GAUDI_EVENT_MME3_SBAB_DERR: 5748 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 5749 fallthrough; 5750 case GAUDI_EVENT_CPU_IF_ECC_DERR: 5751 case GAUDI_EVENT_PSOC_MEM_DERR: 5752 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 5753 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 5754 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 5755 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 5756 case GAUDI_EVENT_MMU_DERR: 5757 gaudi_print_irq_info(hdev, event_type, true); 5758 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 5759 if (hdev->hard_reset_on_fw_events) 5760 hl_device_reset(hdev, true, false); 5761 break; 5762 5763 case GAUDI_EVENT_GIC500: 5764 case GAUDI_EVENT_AXI_ECC: 5765 case GAUDI_EVENT_L2_RAM_ECC: 5766 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 5767 gaudi_print_irq_info(hdev, event_type, false); 5768 if (hdev->hard_reset_on_fw_events) 5769 hl_device_reset(hdev, true, false); 5770 break; 5771 5772 case GAUDI_EVENT_HBM0_SPI_0: 5773 case GAUDI_EVENT_HBM1_SPI_0: 5774 case GAUDI_EVENT_HBM2_SPI_0: 5775 case GAUDI_EVENT_HBM3_SPI_0: 5776 gaudi_print_irq_info(hdev, event_type, false); 5777 gaudi_hbm_read_interrupts(hdev, 5778 gaudi_hbm_event_to_dev(event_type)); 5779 if (hdev->hard_reset_on_fw_events) 5780 hl_device_reset(hdev, true, false); 5781 break; 5782 5783 case GAUDI_EVENT_HBM0_SPI_1: 5784 case GAUDI_EVENT_HBM1_SPI_1: 5785 case GAUDI_EVENT_HBM2_SPI_1: 5786 case GAUDI_EVENT_HBM3_SPI_1: 5787 gaudi_print_irq_info(hdev, event_type, false); 5788 gaudi_hbm_read_interrupts(hdev, 5789 gaudi_hbm_event_to_dev(event_type)); 5790 break; 5791 5792 case GAUDI_EVENT_TPC0_DEC: 5793 case GAUDI_EVENT_TPC1_DEC: 5794 case GAUDI_EVENT_TPC2_DEC: 5795 case GAUDI_EVENT_TPC3_DEC: 5796 case GAUDI_EVENT_TPC4_DEC: 5797 case GAUDI_EVENT_TPC5_DEC: 5798 case GAUDI_EVENT_TPC6_DEC: 5799 case GAUDI_EVENT_TPC7_DEC: 5800 gaudi_print_irq_info(hdev, event_type, true); 5801 reset_required = gaudi_tpc_read_interrupts(hdev, 5802 tpc_dec_event_to_tpc_id(event_type), 5803 "AXI_SLV_DEC_Error"); 5804 if (reset_required) { 5805 dev_err(hdev->dev, "hard reset required due to %s\n", 5806 gaudi_irq_map_table[event_type].name); 5807 5808 if (hdev->hard_reset_on_fw_events) 5809 hl_device_reset(hdev, true, false); 5810 } else { 5811 hl_fw_unmask_irq(hdev, event_type); 5812 } 5813 break; 5814 5815 case GAUDI_EVENT_TPC0_KRN_ERR: 5816 case GAUDI_EVENT_TPC1_KRN_ERR: 5817 case GAUDI_EVENT_TPC2_KRN_ERR: 5818 case GAUDI_EVENT_TPC3_KRN_ERR: 5819 case GAUDI_EVENT_TPC4_KRN_ERR: 5820 case GAUDI_EVENT_TPC5_KRN_ERR: 5821 case GAUDI_EVENT_TPC6_KRN_ERR: 5822 case GAUDI_EVENT_TPC7_KRN_ERR: 5823 gaudi_print_irq_info(hdev, event_type, true); 5824 reset_required = gaudi_tpc_read_interrupts(hdev, 5825 tpc_krn_event_to_tpc_id(event_type), 5826 "KRN_ERR"); 5827 if (reset_required) { 5828 dev_err(hdev->dev, "hard reset required due to %s\n", 5829 gaudi_irq_map_table[event_type].name); 5830 5831 if (hdev->hard_reset_on_fw_events) 5832 hl_device_reset(hdev, true, false); 5833 } else { 5834 hl_fw_unmask_irq(hdev, event_type); 5835 } 5836 break; 5837 5838 case GAUDI_EVENT_PCIE_CORE_SERR: 5839 case GAUDI_EVENT_PCIE_IF_SERR: 5840 case GAUDI_EVENT_PCIE_PHY_SERR: 5841 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 5842 case GAUDI_EVENT_MME0_ACC_SERR: 5843 case GAUDI_EVENT_MME0_SBAB_SERR: 5844 case GAUDI_EVENT_MME1_ACC_SERR: 5845 case GAUDI_EVENT_MME1_SBAB_SERR: 5846 case GAUDI_EVENT_MME2_ACC_SERR: 5847 case GAUDI_EVENT_MME2_SBAB_SERR: 5848 case GAUDI_EVENT_MME3_ACC_SERR: 5849 case GAUDI_EVENT_MME3_SBAB_SERR: 5850 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 5851 case GAUDI_EVENT_CPU_IF_ECC_SERR: 5852 case GAUDI_EVENT_PSOC_MEM_SERR: 5853 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 5854 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 5855 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 5856 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 5857 fallthrough; 5858 case GAUDI_EVENT_MMU_SERR: 5859 gaudi_print_irq_info(hdev, event_type, true); 5860 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 5861 hl_fw_unmask_irq(hdev, event_type); 5862 break; 5863 5864 case GAUDI_EVENT_PCIE_DEC: 5865 case GAUDI_EVENT_MME0_WBC_RSP: 5866 case GAUDI_EVENT_MME0_SBAB0_RSP: 5867 case GAUDI_EVENT_MME1_WBC_RSP: 5868 case GAUDI_EVENT_MME1_SBAB0_RSP: 5869 case GAUDI_EVENT_MME2_WBC_RSP: 5870 case GAUDI_EVENT_MME2_SBAB0_RSP: 5871 case GAUDI_EVENT_MME3_WBC_RSP: 5872 case GAUDI_EVENT_MME3_SBAB0_RSP: 5873 case GAUDI_EVENT_CPU_AXI_SPLITTER: 5874 case GAUDI_EVENT_PSOC_AXI_DEC: 5875 case GAUDI_EVENT_PSOC_PRSTN_FALL: 5876 case GAUDI_EVENT_MMU_PAGE_FAULT: 5877 case GAUDI_EVENT_MMU_WR_PERM: 5878 case GAUDI_EVENT_RAZWI_OR_ADC: 5879 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 5880 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 5881 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 5882 fallthrough; 5883 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 5884 gaudi_print_irq_info(hdev, event_type, true); 5885 gaudi_handle_qman_err(hdev, event_type); 5886 hl_fw_unmask_irq(hdev, event_type); 5887 break; 5888 5889 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 5890 gaudi_print_irq_info(hdev, event_type, true); 5891 if (hdev->hard_reset_on_fw_events) 5892 hl_device_reset(hdev, true, false); 5893 break; 5894 5895 case GAUDI_EVENT_TPC0_BMON_SPMU: 5896 case GAUDI_EVENT_TPC1_BMON_SPMU: 5897 case GAUDI_EVENT_TPC2_BMON_SPMU: 5898 case GAUDI_EVENT_TPC3_BMON_SPMU: 5899 case GAUDI_EVENT_TPC4_BMON_SPMU: 5900 case GAUDI_EVENT_TPC5_BMON_SPMU: 5901 case GAUDI_EVENT_TPC6_BMON_SPMU: 5902 case GAUDI_EVENT_TPC7_BMON_SPMU: 5903 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 5904 gaudi_print_irq_info(hdev, event_type, false); 5905 hl_fw_unmask_irq(hdev, event_type); 5906 break; 5907 5908 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 5909 gaudi_print_clk_change_info(hdev, event_type); 5910 hl_fw_unmask_irq(hdev, event_type); 5911 break; 5912 5913 case GAUDI_EVENT_PSOC_GPIO_U16_0: 5914 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 5915 dev_err(hdev->dev, 5916 "Received high temp H/W interrupt %d (cause %d)\n", 5917 event_type, cause); 5918 break; 5919 5920 default: 5921 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 5922 event_type); 5923 break; 5924 } 5925} 5926 5927static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, 5928 u32 *size) 5929{ 5930 struct gaudi_device *gaudi = hdev->asic_specific; 5931 5932 if (aggregate) { 5933 *size = (u32) sizeof(gaudi->events_stat_aggregate); 5934 return gaudi->events_stat_aggregate; 5935 } 5936 5937 *size = (u32) sizeof(gaudi->events_stat); 5938 return gaudi->events_stat; 5939} 5940 5941static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, 5942 u32 flags) 5943{ 5944 struct gaudi_device *gaudi = hdev->asic_specific; 5945 u32 status, timeout_usec; 5946 int rc; 5947 5948 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 5949 hdev->hard_reset_pending) 5950 return 0; 5951 5952 if (hdev->pldm) 5953 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 5954 else 5955 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 5956 5957 mutex_lock(&hdev->mmu_cache_lock); 5958 5959 /* L0 & L1 invalidation */ 5960 WREG32(mmSTLB_INV_PS, 3); 5961 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 5962 WREG32(mmSTLB_INV_PS, 2); 5963 5964 rc = hl_poll_timeout( 5965 hdev, 5966 mmSTLB_INV_PS, 5967 status, 5968 !status, 5969 1000, 5970 timeout_usec); 5971 5972 WREG32(mmSTLB_INV_SET, 0); 5973 5974 mutex_unlock(&hdev->mmu_cache_lock); 5975 5976 if (rc) { 5977 dev_err_ratelimited(hdev->dev, 5978 "MMU cache invalidation timeout\n"); 5979 hl_device_reset(hdev, true, false); 5980 } 5981 5982 return rc; 5983} 5984 5985static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 5986 bool is_hard, u32 asid, u64 va, u64 size) 5987{ 5988 struct gaudi_device *gaudi = hdev->asic_specific; 5989 u32 status, timeout_usec; 5990 u32 inv_data; 5991 u32 pi; 5992 int rc; 5993 5994 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 5995 hdev->hard_reset_pending) 5996 return 0; 5997 5998 mutex_lock(&hdev->mmu_cache_lock); 5999 6000 if (hdev->pldm) 6001 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 6002 else 6003 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 6004 6005 /* 6006 * TODO: currently invalidate entire L0 & L1 as in regular hard 6007 * invalidation. Need to apply invalidation of specific cache 6008 * lines with mask of ASID & VA & size. 6009 * Note that L1 with be flushed entirely in any case. 6010 */ 6011 6012 /* L0 & L1 invalidation */ 6013 inv_data = RREG32(mmSTLB_CACHE_INV); 6014 /* PI is 8 bit */ 6015 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF; 6016 WREG32(mmSTLB_CACHE_INV, 6017 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi); 6018 6019 rc = hl_poll_timeout( 6020 hdev, 6021 mmSTLB_INV_CONSUMER_INDEX, 6022 status, 6023 status == pi, 6024 1000, 6025 timeout_usec); 6026 6027 mutex_unlock(&hdev->mmu_cache_lock); 6028 6029 if (rc) { 6030 dev_err_ratelimited(hdev->dev, 6031 "MMU cache invalidation timeout\n"); 6032 hl_device_reset(hdev, true, false); 6033 } 6034 6035 return rc; 6036} 6037 6038static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, 6039 u32 asid, u64 phys_addr) 6040{ 6041 u32 status, timeout_usec; 6042 int rc; 6043 6044 if (hdev->pldm) 6045 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 6046 else 6047 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 6048 6049 WREG32(MMU_ASID, asid); 6050 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 6051 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 6052 WREG32(MMU_BUSY, 0x80000000); 6053 6054 rc = hl_poll_timeout( 6055 hdev, 6056 MMU_BUSY, 6057 status, 6058 !(status & 0x80000000), 6059 1000, 6060 timeout_usec); 6061 6062 if (rc) { 6063 dev_err(hdev->dev, 6064 "Timeout during MMU hop0 config of asid %d\n", asid); 6065 return rc; 6066 } 6067 6068 return 0; 6069} 6070 6071static int gaudi_send_heartbeat(struct hl_device *hdev) 6072{ 6073 struct gaudi_device *gaudi = hdev->asic_specific; 6074 6075 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 6076 return 0; 6077 6078 return hl_fw_send_heartbeat(hdev); 6079} 6080 6081static int gaudi_cpucp_info_get(struct hl_device *hdev) 6082{ 6083 struct gaudi_device *gaudi = hdev->asic_specific; 6084 struct asic_fixed_properties *prop = &hdev->asic_prop; 6085 int rc; 6086 6087 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 6088 return 0; 6089 6090 rc = hl_fw_cpucp_info_get(hdev); 6091 if (rc) 6092 return rc; 6093 6094 if (!strlen(prop->cpucp_info.card_name)) 6095 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 6096 CARD_NAME_MAX_LEN); 6097 6098 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 6099 6100 if (hdev->card_type == cpucp_card_type_pci) 6101 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 6102 else if (hdev->card_type == cpucp_card_type_pmc) 6103 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 6104 6105 hdev->max_power = prop->max_power_default; 6106 6107 return 0; 6108} 6109 6110static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask, 6111 struct seq_file *s) 6112{ 6113 struct gaudi_device *gaudi = hdev->asic_specific; 6114 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 6115 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 6116 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 6117 bool is_idle = true, is_eng_idle, is_slave; 6118 u64 offset; 6119 int i, dma_id; 6120 6121 mutex_lock(&gaudi->clk_gate_mutex); 6122 6123 hdev->asic_funcs->disable_clock_gating(hdev); 6124 6125 if (s) 6126 seq_puts(s, 6127 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 6128 "--- ------- ------------ ---------- -------------\n"); 6129 6130 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 6131 dma_id = gaudi_dma_assignment[i]; 6132 offset = dma_id * DMA_QMAN_OFFSET; 6133 6134 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 6135 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 6136 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 6137 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6138 IS_DMA_IDLE(dma_core_sts0); 6139 is_idle &= is_eng_idle; 6140 6141 if (mask) 6142 *mask |= ((u64) !is_eng_idle) << 6143 (GAUDI_ENGINE_ID_DMA_0 + dma_id); 6144 if (s) 6145 seq_printf(s, fmt, dma_id, 6146 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 6147 qm_cgm_sts, dma_core_sts0); 6148 } 6149 6150 if (s) 6151 seq_puts(s, 6152 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 6153 "--- ------- ------------ ---------- ----------\n"); 6154 6155 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 6156 offset = i * TPC_QMAN_OFFSET; 6157 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 6158 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 6159 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 6160 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6161 IS_TPC_IDLE(tpc_cfg_sts); 6162 is_idle &= is_eng_idle; 6163 6164 if (mask) 6165 *mask |= ((u64) !is_eng_idle) << 6166 (GAUDI_ENGINE_ID_TPC_0 + i); 6167 if (s) 6168 seq_printf(s, fmt, i, 6169 is_eng_idle ? "Y" : "N", 6170 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 6171 } 6172 6173 if (s) 6174 seq_puts(s, 6175 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 6176 "--- ------- ------------ ---------- -----------\n"); 6177 6178 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 6179 offset = i * MME_QMAN_OFFSET; 6180 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 6181 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 6182 6183 /* MME 1 & 3 are slaves, no need to check their QMANs */ 6184 is_slave = i % 2; 6185 if (!is_slave) { 6186 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 6187 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 6188 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 6189 } 6190 6191 is_idle &= is_eng_idle; 6192 6193 if (mask) 6194 *mask |= ((u64) !is_eng_idle) << 6195 (GAUDI_ENGINE_ID_MME_0 + i); 6196 if (s) { 6197 if (!is_slave) 6198 seq_printf(s, fmt, i, 6199 is_eng_idle ? "Y" : "N", 6200 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 6201 else 6202 seq_printf(s, mme_slave_fmt, i, 6203 is_eng_idle ? "Y" : "N", "-", 6204 "-", mme_arch_sts); 6205 } 6206 } 6207 6208 if (s) 6209 seq_puts(s, "\n"); 6210 6211 hdev->asic_funcs->set_clock_gating(hdev); 6212 6213 mutex_unlock(&gaudi->clk_gate_mutex); 6214 6215 return is_idle; 6216} 6217 6218static void gaudi_hw_queues_lock(struct hl_device *hdev) 6219 __acquires(&gaudi->hw_queues_lock) 6220{ 6221 struct gaudi_device *gaudi = hdev->asic_specific; 6222 6223 spin_lock(&gaudi->hw_queues_lock); 6224} 6225 6226static void gaudi_hw_queues_unlock(struct hl_device *hdev) 6227 __releases(&gaudi->hw_queues_lock) 6228{ 6229 struct gaudi_device *gaudi = hdev->asic_specific; 6230 6231 spin_unlock(&gaudi->hw_queues_lock); 6232} 6233 6234static u32 gaudi_get_pci_id(struct hl_device *hdev) 6235{ 6236 return hdev->pdev->device; 6237} 6238 6239static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 6240 size_t max_size) 6241{ 6242 struct gaudi_device *gaudi = hdev->asic_specific; 6243 6244 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 6245 return 0; 6246 6247 return hl_fw_get_eeprom_data(hdev, data, max_size); 6248} 6249 6250/* 6251 * this function should be used only during initialization and/or after reset, 6252 * when there are no active users. 6253 */ 6254static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 6255 u32 tpc_id) 6256{ 6257 struct gaudi_device *gaudi = hdev->asic_specific; 6258 u64 kernel_timeout; 6259 u32 status, offset; 6260 int rc; 6261 6262 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 6263 6264 if (hdev->pldm) 6265 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 6266 else 6267 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 6268 6269 mutex_lock(&gaudi->clk_gate_mutex); 6270 6271 hdev->asic_funcs->disable_clock_gating(hdev); 6272 6273 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 6274 lower_32_bits(tpc_kernel)); 6275 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 6276 upper_32_bits(tpc_kernel)); 6277 6278 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 6279 lower_32_bits(tpc_kernel)); 6280 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 6281 upper_32_bits(tpc_kernel)); 6282 /* set a valid LUT pointer, content is of no significance */ 6283 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 6284 lower_32_bits(tpc_kernel)); 6285 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 6286 upper_32_bits(tpc_kernel)); 6287 6288 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 6289 lower_32_bits(CFG_BASE + 6290 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 6291 6292 WREG32(mmTPC0_CFG_TPC_CMD + offset, 6293 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 6294 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 6295 /* wait a bit for the engine to start executing */ 6296 usleep_range(1000, 1500); 6297 6298 /* wait until engine has finished executing */ 6299 rc = hl_poll_timeout( 6300 hdev, 6301 mmTPC0_CFG_STATUS + offset, 6302 status, 6303 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 6304 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 6305 1000, 6306 kernel_timeout); 6307 6308 if (rc) { 6309 dev_err(hdev->dev, 6310 "Timeout while waiting for TPC%d icache prefetch\n", 6311 tpc_id); 6312 hdev->asic_funcs->set_clock_gating(hdev); 6313 mutex_unlock(&gaudi->clk_gate_mutex); 6314 return -EIO; 6315 } 6316 6317 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 6318 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 6319 6320 /* wait a bit for the engine to start executing */ 6321 usleep_range(1000, 1500); 6322 6323 /* wait until engine has finished executing */ 6324 rc = hl_poll_timeout( 6325 hdev, 6326 mmTPC0_CFG_STATUS + offset, 6327 status, 6328 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 6329 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 6330 1000, 6331 kernel_timeout); 6332 6333 if (rc) { 6334 dev_err(hdev->dev, 6335 "Timeout while waiting for TPC%d vector pipe\n", 6336 tpc_id); 6337 hdev->asic_funcs->set_clock_gating(hdev); 6338 mutex_unlock(&gaudi->clk_gate_mutex); 6339 return -EIO; 6340 } 6341 6342 rc = hl_poll_timeout( 6343 hdev, 6344 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 6345 status, 6346 (status == 0), 6347 1000, 6348 kernel_timeout); 6349 6350 hdev->asic_funcs->set_clock_gating(hdev); 6351 mutex_unlock(&gaudi->clk_gate_mutex); 6352 6353 if (rc) { 6354 dev_err(hdev->dev, 6355 "Timeout while waiting for TPC%d kernel to execute\n", 6356 tpc_id); 6357 return -EIO; 6358 } 6359 6360 return 0; 6361} 6362 6363static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 6364{ 6365 return RREG32(mmHW_STATE); 6366} 6367 6368static int gaudi_ctx_init(struct hl_ctx *ctx) 6369{ 6370 return 0; 6371} 6372 6373static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 6374{ 6375 return gaudi_cq_assignment[cq_idx]; 6376} 6377 6378static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 6379{ 6380 return sizeof(struct packet_msg_short) + 6381 sizeof(struct packet_msg_prot) * 2; 6382} 6383 6384static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 6385{ 6386 return sizeof(struct packet_msg_short) * 4 + 6387 sizeof(struct packet_fence) + 6388 sizeof(struct packet_msg_prot) * 2; 6389} 6390 6391static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id) 6392{ 6393 struct hl_cb *cb = (struct hl_cb *) data; 6394 struct packet_msg_short *pkt; 6395 u32 value, ctl; 6396 6397 pkt = cb->kernel_address; 6398 memset(pkt, 0, sizeof(*pkt)); 6399 6400 /* Inc by 1, Mode ADD */ 6401 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 6402 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 6403 6404 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 6405 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 6406 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 6407 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 6408 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1); 6409 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1); 6410 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1); 6411 6412 pkt->value = cpu_to_le32(value); 6413 pkt->ctl = cpu_to_le32(ctl); 6414} 6415 6416static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 6417 u16 addr) 6418{ 6419 u32 ctl, pkt_size = sizeof(*pkt); 6420 6421 memset(pkt, 0, pkt_size); 6422 6423 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 6424 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 6425 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 6426 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0); 6427 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1); 6428 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */ 6429 6430 pkt->value = cpu_to_le32(value); 6431 pkt->ctl = cpu_to_le32(ctl); 6432 6433 return pkt_size; 6434} 6435 6436static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id, 6437 u16 sob_val, u16 addr) 6438{ 6439 u32 ctl, value, pkt_size = sizeof(*pkt); 6440 u8 mask = ~(1 << (sob_id & 0x7)); 6441 6442 memset(pkt, 0, pkt_size); 6443 6444 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8); 6445 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 6446 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 6447 0); /* GREATER OR EQUAL*/ 6448 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 6449 6450 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 6451 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 6452 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 6453 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 6454 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0); 6455 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1); 6456 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1); 6457 6458 pkt->value = cpu_to_le32(value); 6459 pkt->ctl = cpu_to_le32(ctl); 6460 6461 return pkt_size; 6462} 6463 6464static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 6465{ 6466 u32 ctl, cfg, pkt_size = sizeof(*pkt); 6467 6468 memset(pkt, 0, pkt_size); 6469 6470 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 6471 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 6472 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 6473 6474 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE); 6475 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0); 6476 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1); 6477 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1); 6478 6479 pkt->cfg = cpu_to_le32(cfg); 6480 pkt->ctl = cpu_to_le32(ctl); 6481 6482 return pkt_size; 6483} 6484 6485static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id, 6486 u16 sob_val, u16 mon_id, u32 q_idx) 6487{ 6488 struct hl_cb *cb = (struct hl_cb *) data; 6489 void *buf = cb->kernel_address; 6490 u64 monitor_base, fence_addr = 0; 6491 u32 size = 0; 6492 u16 msg_addr_offset; 6493 6494 switch (q_idx) { 6495 case GAUDI_QUEUE_ID_DMA_0_0: 6496 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0; 6497 break; 6498 case GAUDI_QUEUE_ID_DMA_0_1: 6499 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1; 6500 break; 6501 case GAUDI_QUEUE_ID_DMA_0_2: 6502 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2; 6503 break; 6504 case GAUDI_QUEUE_ID_DMA_0_3: 6505 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3; 6506 break; 6507 case GAUDI_QUEUE_ID_DMA_1_0: 6508 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0; 6509 break; 6510 case GAUDI_QUEUE_ID_DMA_1_1: 6511 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1; 6512 break; 6513 case GAUDI_QUEUE_ID_DMA_1_2: 6514 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2; 6515 break; 6516 case GAUDI_QUEUE_ID_DMA_1_3: 6517 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3; 6518 break; 6519 case GAUDI_QUEUE_ID_DMA_5_0: 6520 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0; 6521 break; 6522 case GAUDI_QUEUE_ID_DMA_5_1: 6523 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1; 6524 break; 6525 case GAUDI_QUEUE_ID_DMA_5_2: 6526 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2; 6527 break; 6528 case GAUDI_QUEUE_ID_DMA_5_3: 6529 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3; 6530 break; 6531 default: 6532 /* queue index should be valid here */ 6533 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 6534 q_idx); 6535 return; 6536 } 6537 6538 fence_addr += CFG_BASE; 6539 6540 /* 6541 * monitor_base should be the content of the base0 address registers, 6542 * so it will be added to the msg short offsets 6543 */ 6544 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 6545 6546 /* First monitor config packet: low address of the sync */ 6547 msg_addr_offset = 6548 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 6549 monitor_base; 6550 6551 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 6552 msg_addr_offset); 6553 6554 /* Second monitor config packet: high address of the sync */ 6555 msg_addr_offset = 6556 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 6557 monitor_base; 6558 6559 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 6560 msg_addr_offset); 6561 6562 /* 6563 * Third monitor config packet: the payload, i.e. what to write when the 6564 * sync triggers 6565 */ 6566 msg_addr_offset = 6567 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 6568 monitor_base; 6569 6570 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 6571 6572 /* Fourth monitor config packet: bind the monitor to a sync object */ 6573 msg_addr_offset = 6574 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 6575 monitor_base; 6576 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val, 6577 msg_addr_offset); 6578 6579 /* Fence packet */ 6580 size += gaudi_add_fence_pkt(buf + size); 6581} 6582 6583static void gaudi_reset_sob(struct hl_device *hdev, void *data) 6584{ 6585 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 6586 6587 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 6588 hw_sob->sob_id); 6589 6590 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 6591 0); 6592 6593 kref_init(&hw_sob->kref); 6594} 6595 6596static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev) 6597{ 6598 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) == 6599 HL_POWER9_HOST_MAGIC) { 6600 hdev->power9_64bit_dma_enable = 1; 6601 hdev->dma_mask = 64; 6602 } else { 6603 hdev->power9_64bit_dma_enable = 0; 6604 hdev->dma_mask = 48; 6605 } 6606} 6607 6608static u64 gaudi_get_device_time(struct hl_device *hdev) 6609{ 6610 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 6611 6612 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 6613} 6614 6615static const struct hl_asic_funcs gaudi_funcs = { 6616 .early_init = gaudi_early_init, 6617 .early_fini = gaudi_early_fini, 6618 .late_init = gaudi_late_init, 6619 .late_fini = gaudi_late_fini, 6620 .sw_init = gaudi_sw_init, 6621 .sw_fini = gaudi_sw_fini, 6622 .hw_init = gaudi_hw_init, 6623 .hw_fini = gaudi_hw_fini, 6624 .halt_engines = gaudi_halt_engines, 6625 .suspend = gaudi_suspend, 6626 .resume = gaudi_resume, 6627 .cb_mmap = gaudi_cb_mmap, 6628 .ring_doorbell = gaudi_ring_doorbell, 6629 .pqe_write = gaudi_pqe_write, 6630 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 6631 .asic_dma_free_coherent = gaudi_dma_free_coherent, 6632 .get_int_queue_base = gaudi_get_int_queue_base, 6633 .test_queues = gaudi_test_queues, 6634 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 6635 .asic_dma_pool_free = gaudi_dma_pool_free, 6636 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 6637 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 6638 .hl_dma_unmap_sg = gaudi_dma_unmap_sg, 6639 .cs_parser = gaudi_cs_parser, 6640 .asic_dma_map_sg = gaudi_dma_map_sg, 6641 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size, 6642 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 6643 .update_eq_ci = gaudi_update_eq_ci, 6644 .context_switch = gaudi_context_switch, 6645 .restore_phase_topology = gaudi_restore_phase_topology, 6646 .debugfs_read32 = gaudi_debugfs_read32, 6647 .debugfs_write32 = gaudi_debugfs_write32, 6648 .debugfs_read64 = gaudi_debugfs_read64, 6649 .debugfs_write64 = gaudi_debugfs_write64, 6650 .add_device_attr = gaudi_add_device_attr, 6651 .handle_eqe = gaudi_handle_eqe, 6652 .set_pll_profile = gaudi_set_pll_profile, 6653 .get_events_stat = gaudi_get_events_stat, 6654 .read_pte = gaudi_read_pte, 6655 .write_pte = gaudi_write_pte, 6656 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 6657 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 6658 .send_heartbeat = gaudi_send_heartbeat, 6659 .set_clock_gating = gaudi_set_clock_gating, 6660 .disable_clock_gating = gaudi_disable_clock_gating, 6661 .debug_coresight = gaudi_debug_coresight, 6662 .is_device_idle = gaudi_is_device_idle, 6663 .soft_reset_late_init = gaudi_soft_reset_late_init, 6664 .hw_queues_lock = gaudi_hw_queues_lock, 6665 .hw_queues_unlock = gaudi_hw_queues_unlock, 6666 .get_pci_id = gaudi_get_pci_id, 6667 .get_eeprom_data = gaudi_get_eeprom_data, 6668 .send_cpu_message = gaudi_send_cpu_message, 6669 .get_hw_state = gaudi_get_hw_state, 6670 .pci_bars_map = gaudi_pci_bars_map, 6671 .init_iatu = gaudi_init_iatu, 6672 .rreg = hl_rreg, 6673 .wreg = hl_wreg, 6674 .halt_coresight = gaudi_halt_coresight, 6675 .ctx_init = gaudi_ctx_init, 6676 .get_clk_rate = gaudi_get_clk_rate, 6677 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 6678 .read_device_fw_version = gaudi_read_device_fw_version, 6679 .load_firmware_to_device = gaudi_load_firmware_to_device, 6680 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 6681 .get_signal_cb_size = gaudi_get_signal_cb_size, 6682 .get_wait_cb_size = gaudi_get_wait_cb_size, 6683 .gen_signal_cb = gaudi_gen_signal_cb, 6684 .gen_wait_cb = gaudi_gen_wait_cb, 6685 .reset_sob = gaudi_reset_sob, 6686 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw, 6687 .get_device_time = gaudi_get_device_time 6688}; 6689 6690/** 6691 * gaudi_set_asic_funcs - set GAUDI function pointers 6692 * 6693 * @hdev: pointer to hl_device structure 6694 * 6695 */ 6696void gaudi_set_asic_funcs(struct hl_device *hdev) 6697{ 6698 hdev->asic_funcs = &gaudi_funcs; 6699} 6700