1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24#include <linux/types.h> 25#include <linux/kernel.h> 26#include <linux/log2.h> 27#include <linux/sched.h> 28#include <linux/slab.h> 29#include <linux/mutex.h> 30#include <linux/device.h> 31 32#include "kfd_pm4_headers.h" 33#include "kfd_pm4_headers_diq.h" 34#include "kfd_kernel_queue.h" 35#include "kfd_priv.h" 36#include "kfd_pm4_opcodes.h" 37#include "cik_regs.h" 38#include "kfd_dbgmgr.h" 39#include "kfd_dbgdev.h" 40#include "kfd_device_queue_manager.h" 41 42static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) 43{ 44 dev->kfd2kgd->address_watch_disable(dev->kgd); 45} 46 47static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, 48 u32 pasid, uint64_t vmid0_address, 49 uint32_t *packet_buff, size_t size_in_bytes) 50{ 51 struct pm4__release_mem *rm_packet; 52 struct pm4__indirect_buffer_pasid *ib_packet; 53 struct kfd_mem_obj *mem_obj; 54 size_t pq_packets_size_in_bytes; 55 union ULARGE_INTEGER *largep; 56 union ULARGE_INTEGER addr; 57 struct kernel_queue *kq; 58 uint64_t *rm_state; 59 unsigned int *ib_packet_buff; 60 int status; 61 62 if (WARN_ON(!size_in_bytes)) 63 return -EINVAL; 64 65 kq = dbgdev->kq; 66 67 pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + 68 sizeof(struct pm4__indirect_buffer_pasid); 69 70 /* 71 * We acquire a buffer from DIQ 72 * The receive packet buff will be sitting on the Indirect Buffer 73 * and in the PQ we put the IB packet + sync packet(s). 74 */ 75 status = kq_acquire_packet_buffer(kq, 76 pq_packets_size_in_bytes / sizeof(uint32_t), 77 &ib_packet_buff); 78 if (status) { 79 pr_err("kq_acquire_packet_buffer failed\n"); 80 return status; 81 } 82 83 memset(ib_packet_buff, 0, pq_packets_size_in_bytes); 84 85 ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); 86 87 ib_packet->header.count = 3; 88 ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; 89 ib_packet->header.type = PM4_TYPE_3; 90 91 largep = (union ULARGE_INTEGER *) &vmid0_address; 92 93 ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; 94 ib_packet->bitfields3.ib_base_hi = largep->u.high_part; 95 96 ib_packet->control = (1 << 23) | (1 << 31) | 97 ((size_in_bytes / 4) & 0xfffff); 98 99 ib_packet->bitfields5.pasid = pasid; 100 101 /* 102 * for now we use release mem for GPU-CPU synchronization 103 * Consider WaitRegMem + WriteData as a better alternative 104 * we get a GART allocations ( gpu/cpu mapping), 105 * for the sync variable, and wait until: 106 * (a) Sync with HW 107 * (b) Sync var is written by CP to mem. 108 */ 109 rm_packet = (struct pm4__release_mem *) (ib_packet_buff + 110 (sizeof(struct pm4__indirect_buffer_pasid) / 111 sizeof(unsigned int))); 112 113 status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), 114 &mem_obj); 115 116 if (status) { 117 pr_err("Failed to allocate GART memory\n"); 118 kq_rollback_packet(kq); 119 return status; 120 } 121 122 rm_state = (uint64_t *) mem_obj->cpu_ptr; 123 124 *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; 125 126 rm_packet->header.opcode = IT_RELEASE_MEM; 127 rm_packet->header.type = PM4_TYPE_3; 128 rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2; 129 130 rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; 131 rm_packet->bitfields2.event_index = 132 event_index___release_mem__end_of_pipe; 133 134 rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; 135 rm_packet->bitfields2.atc = 0; 136 rm_packet->bitfields2.tc_wb_action_ena = 1; 137 138 addr.quad_part = mem_obj->gpu_addr; 139 140 rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; 141 rm_packet->address_hi = addr.u.high_part; 142 143 rm_packet->bitfields3.data_sel = 144 data_sel___release_mem__send_64_bit_data; 145 146 rm_packet->bitfields3.int_sel = 147 int_sel___release_mem__send_data_after_write_confirm; 148 149 rm_packet->bitfields3.dst_sel = 150 dst_sel___release_mem__memory_controller; 151 152 rm_packet->data_lo = QUEUESTATE__ACTIVE; 153 154 kq_submit_packet(kq); 155 156 /* Wait till CP writes sync code: */ 157 status = amdkfd_fence_wait_timeout( 158 rm_state, 159 QUEUESTATE__ACTIVE, 1500); 160 161 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 162 163 return status; 164} 165 166static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) 167{ 168 /* 169 * no action is needed in this case, 170 * just make sure diq will not be used 171 */ 172 173 dbgdev->kq = NULL; 174 175 return 0; 176} 177 178static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) 179{ 180 struct queue_properties properties; 181 unsigned int qid; 182 struct kernel_queue *kq = NULL; 183 int status; 184 185 properties.type = KFD_QUEUE_TYPE_DIQ; 186 187 status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, 188 &properties, &qid, NULL); 189 190 if (status) { 191 pr_err("Failed to create DIQ\n"); 192 return status; 193 } 194 195 pr_debug("DIQ Created with queue id: %d\n", qid); 196 197 kq = pqm_get_kernel_queue(dbgdev->pqm, qid); 198 199 if (!kq) { 200 pr_err("Error getting DIQ\n"); 201 pqm_destroy_queue(dbgdev->pqm, qid); 202 return -EFAULT; 203 } 204 205 dbgdev->kq = kq; 206 207 return status; 208} 209 210static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) 211{ 212 /* disable watch address */ 213 dbgdev_address_watch_disable_nodiq(dbgdev->dev); 214 return 0; 215} 216 217static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) 218{ 219 /* todo - disable address watch */ 220 int status; 221 222 status = pqm_destroy_queue(dbgdev->pqm, 223 dbgdev->kq->queue->properties.queue_id); 224 dbgdev->kq = NULL; 225 226 return status; 227} 228 229static void dbgdev_address_watch_set_registers( 230 const struct dbg_address_watch_info *adw_info, 231 union TCP_WATCH_ADDR_H_BITS *addrHi, 232 union TCP_WATCH_ADDR_L_BITS *addrLo, 233 union TCP_WATCH_CNTL_BITS *cntl, 234 unsigned int index, unsigned int vmid) 235{ 236 union ULARGE_INTEGER addr; 237 238 addr.quad_part = 0; 239 addrHi->u32All = 0; 240 addrLo->u32All = 0; 241 cntl->u32All = 0; 242 243 if (adw_info->watch_mask) 244 cntl->bitfields.mask = 245 (uint32_t) (adw_info->watch_mask[index] & 246 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); 247 else 248 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; 249 250 addr.quad_part = (unsigned long long) adw_info->watch_address[index]; 251 252 addrHi->bitfields.addr = addr.u.high_part & 253 ADDRESS_WATCH_REG_ADDHIGH_MASK; 254 addrLo->bitfields.addr = 255 (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); 256 257 cntl->bitfields.mode = adw_info->watch_mode[index]; 258 cntl->bitfields.vmid = (uint32_t) vmid; 259 /* for now assume it is an ATC address */ 260 cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; 261 262 pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); 263 pr_debug("\t\t%20s %08x\n", "set reg add high :", 264 addrHi->bitfields.addr); 265 pr_debug("\t\t%20s %08x\n", "set reg add low :", 266 addrLo->bitfields.addr); 267} 268 269static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, 270 struct dbg_address_watch_info *adw_info) 271{ 272 union TCP_WATCH_ADDR_H_BITS addrHi; 273 union TCP_WATCH_ADDR_L_BITS addrLo; 274 union TCP_WATCH_CNTL_BITS cntl; 275 struct kfd_process_device *pdd; 276 unsigned int i; 277 278 /* taking the vmid for that process on the safe way using pdd */ 279 pdd = kfd_get_process_device_data(dbgdev->dev, 280 adw_info->process); 281 if (!pdd) { 282 pr_err("Failed to get pdd for wave control no DIQ\n"); 283 return -EFAULT; 284 } 285 286 addrHi.u32All = 0; 287 addrLo.u32All = 0; 288 cntl.u32All = 0; 289 290 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 291 (adw_info->num_watch_points == 0)) { 292 pr_err("num_watch_points is invalid\n"); 293 return -EINVAL; 294 } 295 296 if (!adw_info->watch_mode || !adw_info->watch_address) { 297 pr_err("adw_info fields are not valid\n"); 298 return -EINVAL; 299 } 300 301 for (i = 0; i < adw_info->num_watch_points; i++) { 302 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, 303 &cntl, i, pdd->qpd.vmid); 304 305 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 306 pr_debug("\t\t%20s %08x\n", "register index :", i); 307 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); 308 pr_debug("\t\t%20s %08x\n", "Address Low is :", 309 addrLo.bitfields.addr); 310 pr_debug("\t\t%20s %08x\n", "Address high is :", 311 addrHi.bitfields.addr); 312 pr_debug("\t\t%20s %08x\n", "Address high is :", 313 addrHi.bitfields.addr); 314 pr_debug("\t\t%20s %08x\n", "Control Mask is :", 315 cntl.bitfields.mask); 316 pr_debug("\t\t%20s %08x\n", "Control Mode is :", 317 cntl.bitfields.mode); 318 pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 319 cntl.bitfields.vmid); 320 pr_debug("\t\t%20s %08x\n", "Control atc is :", 321 cntl.bitfields.atc); 322 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 323 324 pdd->dev->kfd2kgd->address_watch_execute( 325 dbgdev->dev->kgd, 326 i, 327 cntl.u32All, 328 addrHi.u32All, 329 addrLo.u32All); 330 } 331 332 return 0; 333} 334 335static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, 336 struct dbg_address_watch_info *adw_info) 337{ 338 struct pm4__set_config_reg *packets_vec; 339 union TCP_WATCH_ADDR_H_BITS addrHi; 340 union TCP_WATCH_ADDR_L_BITS addrLo; 341 union TCP_WATCH_CNTL_BITS cntl; 342 struct kfd_mem_obj *mem_obj; 343 unsigned int aw_reg_add_dword; 344 uint32_t *packet_buff_uint; 345 unsigned int i; 346 int status; 347 size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; 348 /* we do not control the vmid in DIQ mode, just a place holder */ 349 unsigned int vmid = 0; 350 351 addrHi.u32All = 0; 352 addrLo.u32All = 0; 353 cntl.u32All = 0; 354 355 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || 356 (adw_info->num_watch_points == 0)) { 357 pr_err("num_watch_points is invalid\n"); 358 return -EINVAL; 359 } 360 361 if (!adw_info->watch_mode || !adw_info->watch_address) { 362 pr_err("adw_info fields are not valid\n"); 363 return -EINVAL; 364 } 365 366 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 367 368 if (status) { 369 pr_err("Failed to allocate GART memory\n"); 370 return status; 371 } 372 373 packet_buff_uint = mem_obj->cpu_ptr; 374 375 memset(packet_buff_uint, 0, ib_size); 376 377 packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); 378 379 packets_vec[0].header.count = 1; 380 packets_vec[0].header.opcode = IT_SET_CONFIG_REG; 381 packets_vec[0].header.type = PM4_TYPE_3; 382 packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 383 packets_vec[0].bitfields2.insert_vmid = 1; 384 packets_vec[1].ordinal1 = packets_vec[0].ordinal1; 385 packets_vec[1].bitfields2.insert_vmid = 0; 386 packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 387 packets_vec[2].bitfields2.insert_vmid = 0; 388 packets_vec[3].ordinal1 = packets_vec[0].ordinal1; 389 packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; 390 packets_vec[3].bitfields2.insert_vmid = 1; 391 392 for (i = 0; i < adw_info->num_watch_points; i++) { 393 dbgdev_address_watch_set_registers(adw_info, 394 &addrHi, 395 &addrLo, 396 &cntl, 397 i, 398 vmid); 399 400 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 401 pr_debug("\t\t%20s %08x\n", "register index :", i); 402 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); 403 pr_debug("\t\t%20s %p\n", "Add ptr is :", 404 adw_info->watch_address); 405 pr_debug("\t\t%20s %08llx\n", "Add is :", 406 adw_info->watch_address[i]); 407 pr_debug("\t\t%20s %08x\n", "Address Low is :", 408 addrLo.bitfields.addr); 409 pr_debug("\t\t%20s %08x\n", "Address high is :", 410 addrHi.bitfields.addr); 411 pr_debug("\t\t%20s %08x\n", "Control Mask is :", 412 cntl.bitfields.mask); 413 pr_debug("\t\t%20s %08x\n", "Control Mode is :", 414 cntl.bitfields.mode); 415 pr_debug("\t\t%20s %08x\n", "Control Vmid is :", 416 cntl.bitfields.vmid); 417 pr_debug("\t\t%20s %08x\n", "Control atc is :", 418 cntl.bitfields.atc); 419 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); 420 421 aw_reg_add_dword = 422 dbgdev->dev->kfd2kgd->address_watch_get_offset( 423 dbgdev->dev->kgd, 424 i, 425 ADDRESS_WATCH_REG_CNTL); 426 427 packets_vec[0].bitfields2.reg_offset = 428 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 429 430 packets_vec[0].reg_data[0] = cntl.u32All; 431 432 aw_reg_add_dword = 433 dbgdev->dev->kfd2kgd->address_watch_get_offset( 434 dbgdev->dev->kgd, 435 i, 436 ADDRESS_WATCH_REG_ADDR_HI); 437 438 packets_vec[1].bitfields2.reg_offset = 439 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 440 packets_vec[1].reg_data[0] = addrHi.u32All; 441 442 aw_reg_add_dword = 443 dbgdev->dev->kfd2kgd->address_watch_get_offset( 444 dbgdev->dev->kgd, 445 i, 446 ADDRESS_WATCH_REG_ADDR_LO); 447 448 packets_vec[2].bitfields2.reg_offset = 449 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 450 packets_vec[2].reg_data[0] = addrLo.u32All; 451 452 /* enable watch flag if address is not zero*/ 453 if (adw_info->watch_address[i] > 0) 454 cntl.bitfields.valid = 1; 455 else 456 cntl.bitfields.valid = 0; 457 458 aw_reg_add_dword = 459 dbgdev->dev->kfd2kgd->address_watch_get_offset( 460 dbgdev->dev->kgd, 461 i, 462 ADDRESS_WATCH_REG_CNTL); 463 464 packets_vec[3].bitfields2.reg_offset = 465 aw_reg_add_dword - AMD_CONFIG_REG_BASE; 466 packets_vec[3].reg_data[0] = cntl.u32All; 467 468 status = dbgdev_diq_submit_ib( 469 dbgdev, 470 adw_info->process->pasid, 471 mem_obj->gpu_addr, 472 packet_buff_uint, 473 ib_size); 474 475 if (status) { 476 pr_err("Failed to submit IB to DIQ\n"); 477 break; 478 } 479 } 480 481 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 482 return status; 483} 484 485static int dbgdev_wave_control_set_registers( 486 struct dbg_wave_control_info *wac_info, 487 union SQ_CMD_BITS *in_reg_sq_cmd, 488 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) 489{ 490 int status = 0; 491 union SQ_CMD_BITS reg_sq_cmd; 492 union GRBM_GFX_INDEX_BITS reg_gfx_index; 493 struct HsaDbgWaveMsgAMDGen2 *pMsg; 494 495 reg_sq_cmd.u32All = 0; 496 reg_gfx_index.u32All = 0; 497 pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; 498 499 switch (wac_info->mode) { 500 /* Send command to single wave */ 501 case HSA_DBG_WAVEMODE_SINGLE: 502 /* 503 * Limit access to the process waves only, 504 * by setting vmid check 505 */ 506 reg_sq_cmd.bits.check_vmid = 1; 507 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; 508 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; 509 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; 510 511 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 512 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 513 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 514 515 break; 516 517 /* Send command to all waves with matching VMID */ 518 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: 519 520 reg_gfx_index.bits.sh_broadcast_writes = 1; 521 reg_gfx_index.bits.se_broadcast_writes = 1; 522 reg_gfx_index.bits.instance_broadcast_writes = 1; 523 524 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 525 526 break; 527 528 /* Send command to all CU waves with matching VMID */ 529 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: 530 531 reg_sq_cmd.bits.check_vmid = 1; 532 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 533 534 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; 535 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; 536 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; 537 538 break; 539 540 default: 541 return -EINVAL; 542 } 543 544 switch (wac_info->operand) { 545 case HSA_DBG_WAVEOP_HALT: 546 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; 547 break; 548 549 case HSA_DBG_WAVEOP_RESUME: 550 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; 551 break; 552 553 case HSA_DBG_WAVEOP_KILL: 554 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 555 break; 556 557 case HSA_DBG_WAVEOP_DEBUG: 558 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; 559 break; 560 561 case HSA_DBG_WAVEOP_TRAP: 562 if (wac_info->trapId < MAX_TRAPID) { 563 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; 564 reg_sq_cmd.bits.trap_id = wac_info->trapId; 565 } else { 566 status = -EINVAL; 567 } 568 break; 569 570 default: 571 status = -EINVAL; 572 break; 573 } 574 575 if (status == 0) { 576 *in_reg_sq_cmd = reg_sq_cmd; 577 *in_reg_gfx_index = reg_gfx_index; 578 } 579 580 return status; 581} 582 583static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, 584 struct dbg_wave_control_info *wac_info) 585{ 586 587 int status; 588 union SQ_CMD_BITS reg_sq_cmd; 589 union GRBM_GFX_INDEX_BITS reg_gfx_index; 590 struct kfd_mem_obj *mem_obj; 591 uint32_t *packet_buff_uint; 592 struct pm4__set_config_reg *packets_vec; 593 size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; 594 595 reg_sq_cmd.u32All = 0; 596 597 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, 598 ®_gfx_index); 599 if (status) { 600 pr_err("Failed to set wave control registers\n"); 601 return status; 602 } 603 604 /* we do not control the VMID in DIQ, so reset it to a known value */ 605 reg_sq_cmd.bits.vm_id = 0; 606 607 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 608 609 pr_debug("\t\t mode is: %u\n", wac_info->mode); 610 pr_debug("\t\t operand is: %u\n", wac_info->operand); 611 pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 612 pr_debug("\t\t msg value is: %u\n", 613 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 614 pr_debug("\t\t vmid is: N/A\n"); 615 616 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 617 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 618 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 619 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 620 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 621 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 622 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 623 624 pr_debug("\t\t ibw is : %u\n", 625 reg_gfx_index.bitfields.instance_broadcast_writes); 626 pr_debug("\t\t ii is : %u\n", 627 reg_gfx_index.bitfields.instance_index); 628 pr_debug("\t\t sebw is : %u\n", 629 reg_gfx_index.bitfields.se_broadcast_writes); 630 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 631 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 632 pr_debug("\t\t sbw is : %u\n", 633 reg_gfx_index.bitfields.sh_broadcast_writes); 634 635 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 636 637 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); 638 639 if (status != 0) { 640 pr_err("Failed to allocate GART memory\n"); 641 return status; 642 } 643 644 packet_buff_uint = mem_obj->cpu_ptr; 645 646 memset(packet_buff_uint, 0, ib_size); 647 648 packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; 649 packets_vec[0].header.count = 1; 650 packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; 651 packets_vec[0].header.type = PM4_TYPE_3; 652 packets_vec[0].bitfields2.reg_offset = 653 GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; 654 655 packets_vec[0].bitfields2.insert_vmid = 0; 656 packets_vec[0].reg_data[0] = reg_gfx_index.u32All; 657 658 packets_vec[1].header.count = 1; 659 packets_vec[1].header.opcode = IT_SET_CONFIG_REG; 660 packets_vec[1].header.type = PM4_TYPE_3; 661 packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; 662 663 packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; 664 packets_vec[1].bitfields2.insert_vmid = 1; 665 packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; 666 667 /* Restore the GRBM_GFX_INDEX register */ 668 669 reg_gfx_index.u32All = 0; 670 reg_gfx_index.bits.sh_broadcast_writes = 1; 671 reg_gfx_index.bits.instance_broadcast_writes = 1; 672 reg_gfx_index.bits.se_broadcast_writes = 1; 673 674 675 packets_vec[2].ordinal1 = packets_vec[0].ordinal1; 676 packets_vec[2].bitfields2.reg_offset = 677 GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; 678 679 packets_vec[2].bitfields2.insert_vmid = 0; 680 packets_vec[2].reg_data[0] = reg_gfx_index.u32All; 681 682 status = dbgdev_diq_submit_ib( 683 dbgdev, 684 wac_info->process->pasid, 685 mem_obj->gpu_addr, 686 packet_buff_uint, 687 ib_size); 688 689 if (status) 690 pr_err("Failed to submit IB to DIQ\n"); 691 692 kfd_gtt_sa_free(dbgdev->dev, mem_obj); 693 694 return status; 695} 696 697static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, 698 struct dbg_wave_control_info *wac_info) 699{ 700 int status; 701 union SQ_CMD_BITS reg_sq_cmd; 702 union GRBM_GFX_INDEX_BITS reg_gfx_index; 703 struct kfd_process_device *pdd; 704 705 reg_sq_cmd.u32All = 0; 706 707 /* taking the VMID for that process on the safe way using PDD */ 708 pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); 709 710 if (!pdd) { 711 pr_err("Failed to get pdd for wave control no DIQ\n"); 712 return -EFAULT; 713 } 714 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, 715 ®_gfx_index); 716 if (status) { 717 pr_err("Failed to set wave control registers\n"); 718 return status; 719 } 720 721 /* for non DIQ we need to patch the VMID: */ 722 723 reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; 724 725 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 726 727 pr_debug("\t\t mode is: %u\n", wac_info->mode); 728 pr_debug("\t\t operand is: %u\n", wac_info->operand); 729 pr_debug("\t\t trap id is: %u\n", wac_info->trapId); 730 pr_debug("\t\t msg value is: %u\n", 731 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 732 pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); 733 734 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); 735 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); 736 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); 737 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); 738 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); 739 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); 740 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); 741 742 pr_debug("\t\t ibw is : %u\n", 743 reg_gfx_index.bitfields.instance_broadcast_writes); 744 pr_debug("\t\t ii is : %u\n", 745 reg_gfx_index.bitfields.instance_index); 746 pr_debug("\t\t sebw is : %u\n", 747 reg_gfx_index.bitfields.se_broadcast_writes); 748 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); 749 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); 750 pr_debug("\t\t sbw is : %u\n", 751 reg_gfx_index.bitfields.sh_broadcast_writes); 752 753 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); 754 755 return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, 756 reg_gfx_index.u32All, 757 reg_sq_cmd.u32All); 758} 759 760int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) 761{ 762 int status = 0; 763 unsigned int vmid; 764 uint16_t queried_pasid; 765 union SQ_CMD_BITS reg_sq_cmd; 766 union GRBM_GFX_INDEX_BITS reg_gfx_index; 767 struct kfd_process_device *pdd; 768 struct dbg_wave_control_info wac_info; 769 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 770 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 771 772 reg_sq_cmd.u32All = 0; 773 status = 0; 774 775 wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; 776 wac_info.operand = HSA_DBG_WAVEOP_KILL; 777 778 pr_debug("Killing all process wavefronts\n"); 779 780 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 781 * ATC_VMID15_PASID_MAPPING 782 * to check which VMID the current process is mapped to. 783 */ 784 785 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 786 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 787 (dev->kgd, vmid, &queried_pasid); 788 789 if (status && queried_pasid == p->pasid) { 790 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 791 vmid, p->pasid); 792 break; 793 } 794 } 795 796 if (vmid > last_vmid_to_scan) { 797 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 798 return -EFAULT; 799 } 800 801 /* taking the VMID for that process on the safe way using PDD */ 802 pdd = kfd_get_process_device_data(dev, p); 803 if (!pdd) 804 return -EFAULT; 805 806 status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, 807 ®_gfx_index); 808 if (status != 0) 809 return -EINVAL; 810 811 /* for non DIQ we need to patch the VMID: */ 812 reg_sq_cmd.bits.vm_id = vmid; 813 814 dev->kfd2kgd->wave_control_execute(dev->kgd, 815 reg_gfx_index.u32All, 816 reg_sq_cmd.u32All); 817 818 return 0; 819} 820 821void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, 822 enum DBGDEV_TYPE type) 823{ 824 pdbgdev->dev = pdev; 825 pdbgdev->kq = NULL; 826 pdbgdev->type = type; 827 pdbgdev->pqm = NULL; 828 829 switch (type) { 830 case DBGDEV_TYPE_NODIQ: 831 pdbgdev->dbgdev_register = dbgdev_register_nodiq; 832 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; 833 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; 834 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; 835 break; 836 case DBGDEV_TYPE_DIQ: 837 default: 838 pdbgdev->dbgdev_register = dbgdev_register_diq; 839 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; 840 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; 841 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; 842 break; 843 } 844 845} 846