1/* 2 * Copyright 2020 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 */ 25 26 27#include "si_pipe.h" 28#include "si_build_pm4.h" 29#include "si_compute.h" 30 31#include "ac_rgp.h" 32#include "ac_sqtt.h" 33#include "util/u_memory.h" 34#include "tgsi/tgsi_from_mesa.h" 35 36static void 37si_emit_spi_config_cntl(struct si_context* sctx, 38 struct radeon_cmdbuf *cs, bool enable); 39 40static bool 41si_thread_trace_init_bo(struct si_context *sctx) 42{ 43 unsigned max_se = sctx->screen->info.max_se; 44 struct radeon_winsys *ws = sctx->ws; 45 uint64_t size; 46 47 /* The buffer size and address need to be aligned in HW regs. Align the 48 * size as early as possible so that we do all the allocation & addressing 49 * correctly. */ 50 sctx->thread_trace->buffer_size = align64(sctx->thread_trace->buffer_size, 51 1u << SQTT_BUFFER_ALIGN_SHIFT); 52 53 /* Compute total size of the thread trace BO for all SEs. */ 54 size = align64(sizeof(struct ac_thread_trace_info) * max_se, 55 1 << SQTT_BUFFER_ALIGN_SHIFT); 56 size += sctx->thread_trace->buffer_size * (uint64_t)max_se; 57 58 sctx->thread_trace->bo = 59 ws->buffer_create(ws, size, 4096, 60 RADEON_DOMAIN_VRAM, 61 RADEON_FLAG_NO_INTERPROCESS_SHARING | 62 RADEON_FLAG_GTT_WC | 63 RADEON_FLAG_NO_SUBALLOC); 64 if (!sctx->thread_trace->bo) 65 return false; 66 67 return true; 68} 69 70static bool 71si_se_is_disabled(struct si_context* sctx, unsigned se) 72{ 73 /* No active CU on the SE means it is disabled. */ 74 return sctx->screen->info.cu_mask[se][0] == 0; 75} 76 77 78static void 79si_emit_thread_trace_start(struct si_context* sctx, 80 struct radeon_cmdbuf *cs, 81 uint32_t queue_family_index) 82{ 83 struct si_screen *sscreen = sctx->screen; 84 uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; 85 unsigned max_se = sscreen->info.max_se; 86 87 radeon_begin(cs); 88 89 for (unsigned se = 0; se < max_se; se++) { 90 uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo); 91 uint64_t data_va = ac_thread_trace_get_data_va(&sctx->screen->info, sctx->thread_trace, va, se); 92 uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT; 93 94 if (si_se_is_disabled(sctx, se)) 95 continue; 96 97 /* Target SEx and SH0. */ 98 radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, 99 S_030800_SE_INDEX(se) | 100 S_030800_SH_INDEX(0) | 101 S_030800_INSTANCE_BROADCAST_WRITES(1)); 102 103 /* Select the first active CUs */ 104 int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]); 105 106 if (sctx->gfx_level >= GFX10) { 107 /* Order seems important for the following 2 registers. */ 108 radeon_set_privileged_config_reg(R_008D04_SQ_THREAD_TRACE_BUF0_SIZE, 109 S_008D04_SIZE(shifted_size) | 110 S_008D04_BASE_HI(shifted_va >> 32)); 111 112 radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va); 113 114 int wgp = first_active_cu / 2; 115 radeon_set_privileged_config_reg(R_008D14_SQ_THREAD_TRACE_MASK, 116 S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */ 117 S_008D14_SA_SEL(0) | 118 S_008D14_WGP_SEL(wgp) | 119 S_008D14_SIMD_SEL(0)); 120 121 radeon_set_privileged_config_reg(R_008D18_SQ_THREAD_TRACE_TOKEN_MASK, 122 S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC | 123 V_008D18_REG_INCLUDE_SHDEC | 124 V_008D18_REG_INCLUDE_GFXUDEC | 125 V_008D18_REG_INCLUDE_CONTEXT | 126 V_008D18_REG_INCLUDE_COMP | 127 V_008D18_REG_INCLUDE_CONFIG) | 128 S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF)); 129 130 /* Should be emitted last (it enables thread traces). */ 131 radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, 132 S_008D1C_MODE(1) | 133 S_008D1C_HIWATER(5) | 134 S_008D1C_UTIL_TIMER(1) | 135 S_008D1C_RT_FREQ(2) | /* 4096 clk */ 136 S_008D1C_DRAW_EVENT_EN(1) | 137 S_008D1C_REG_STALL_EN(1) | 138 S_008D1C_SPI_STALL_EN(1) | 139 S_008D1C_SQ_STALL_EN(1) | 140 S_008D1C_REG_DROP_ON_STALL(0) | 141 S_008D1C_LOWATER_OFFSET( 142 sctx->gfx_level >= GFX10_3 ? 4 : 0) | 143 S_008D1C_AUTO_FLUSH_MODE(sctx->screen->info.has_sqtt_auto_flush_mode_bug)); 144 } else { 145 /* Order seems important for the following 4 registers. */ 146 radeon_set_uconfig_reg(R_030CDC_SQ_THREAD_TRACE_BASE2, 147 S_030CDC_ADDR_HI(shifted_va >> 32)); 148 149 radeon_set_uconfig_reg(R_030CC0_SQ_THREAD_TRACE_BASE, shifted_va); 150 151 radeon_set_uconfig_reg(R_030CC4_SQ_THREAD_TRACE_SIZE, 152 S_030CC4_SIZE(shifted_size)); 153 154 radeon_set_uconfig_reg(R_030CD4_SQ_THREAD_TRACE_CTRL, 155 S_030CD4_RESET_BUFFER(1)); 156 157 uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) | 158 S_030CC8_SH_SEL(0) | 159 S_030CC8_SIMD_EN(0xf) | 160 S_030CC8_VM_ID_MASK(0) | 161 S_030CC8_REG_STALL_EN(1) | 162 S_030CC8_SPI_STALL_EN(1) | 163 S_030CC8_SQ_STALL_EN(1); 164 165 radeon_set_uconfig_reg(R_030CC8_SQ_THREAD_TRACE_MASK, 166 thread_trace_mask); 167 168 /* Trace all tokens and registers. */ 169 radeon_set_uconfig_reg(R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK, 170 S_030CCC_TOKEN_MASK(0xbfff) | 171 S_030CCC_REG_MASK(0xff) | 172 S_030CCC_REG_DROP_ON_STALL(0)); 173 174 /* Enable SQTT perf counters for all CUs. */ 175 radeon_set_uconfig_reg(R_030CD0_SQ_THREAD_TRACE_PERF_MASK, 176 S_030CD0_SH0_MASK(0xffff) | 177 S_030CD0_SH1_MASK(0xffff)); 178 179 radeon_set_uconfig_reg(R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2, 0xffffffff); 180 181 radeon_set_uconfig_reg(R_030CEC_SQ_THREAD_TRACE_HIWATER, 182 S_030CEC_HIWATER(4)); 183 184 if (sctx->gfx_level == GFX9) { 185 /* Reset thread trace status errors. */ 186 radeon_set_uconfig_reg(R_030CE8_SQ_THREAD_TRACE_STATUS, 187 S_030CE8_UTC_ERROR(0)); 188 } 189 190 /* Enable the thread trace mode. */ 191 uint32_t thread_trace_mode = 192 S_030CD8_MASK_PS(1) | 193 S_030CD8_MASK_VS(1) | 194 S_030CD8_MASK_GS(1) | 195 S_030CD8_MASK_ES(1) | 196 S_030CD8_MASK_HS(1) | 197 S_030CD8_MASK_LS(1) | 198 S_030CD8_MASK_CS(1) | 199 S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */ 200 S_030CD8_MODE(1); 201 202 if (sctx->gfx_level == GFX9) { 203 /* Count SQTT traffic in TCC perf counters. */ 204 thread_trace_mode |= S_030CD8_TC_PERF_EN(1); 205 } 206 207 radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE, 208 thread_trace_mode); 209 } 210 } 211 212 /* Restore global broadcasting. */ 213 radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, 214 S_030800_SE_BROADCAST_WRITES(1) | 215 S_030800_SH_BROADCAST_WRITES(1) | 216 S_030800_INSTANCE_BROADCAST_WRITES(1)); 217 218 /* Start the thread trace with a different event based on the queue. */ 219 if (queue_family_index == AMD_IP_COMPUTE) { 220 radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE, 221 S_00B878_THREAD_TRACE_ENABLE(1)); 222 } else { 223 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 224 radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0)); 225 } 226 radeon_end(); 227} 228 229static const uint32_t gfx9_thread_trace_info_regs[] = 230{ 231 R_030CE4_SQ_THREAD_TRACE_WPTR, 232 R_030CE8_SQ_THREAD_TRACE_STATUS, 233 R_030CF0_SQ_THREAD_TRACE_CNTR, 234}; 235 236static const uint32_t gfx10_thread_trace_info_regs[] = 237{ 238 R_008D10_SQ_THREAD_TRACE_WPTR, 239 R_008D20_SQ_THREAD_TRACE_STATUS, 240 R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR, 241}; 242 243static void 244si_copy_thread_trace_info_regs(struct si_context* sctx, 245 struct radeon_cmdbuf *cs, 246 unsigned se_index) 247{ 248 const uint32_t *thread_trace_info_regs = NULL; 249 250 switch (sctx->gfx_level) { 251 case GFX10_3: 252 case GFX10: 253 thread_trace_info_regs = gfx10_thread_trace_info_regs; 254 break; 255 case GFX9: 256 thread_trace_info_regs = gfx9_thread_trace_info_regs; 257 break; 258 default: 259 unreachable("Unsupported gfx_level"); 260 } 261 262 /* Get the VA where the info struct is stored for this SE. */ 263 uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo); 264 uint64_t info_va = ac_thread_trace_get_info_va(va, se_index); 265 266 radeon_begin(cs); 267 268 /* Copy back the info struct one DWORD at a time. */ 269 for (unsigned i = 0; i < 3; i++) { 270 radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); 271 radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) | 272 COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | 273 COPY_DATA_WR_CONFIRM); 274 radeon_emit(thread_trace_info_regs[i] >> 2); 275 radeon_emit(0); /* unused */ 276 radeon_emit((info_va + i * 4)); 277 radeon_emit((info_va + i * 4) >> 32); 278 } 279 radeon_end(); 280} 281 282 283 284static void 285si_emit_thread_trace_stop(struct si_context *sctx, 286 struct radeon_cmdbuf *cs, 287 uint32_t queue_family_index) 288{ 289 unsigned max_se = sctx->screen->info.max_se; 290 291 radeon_begin(cs); 292 293 /* Stop the thread trace with a different event based on the queue. */ 294 if (queue_family_index == AMD_IP_COMPUTE) { 295 radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE, 296 S_00B878_THREAD_TRACE_ENABLE(0)); 297 } else { 298 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 299 radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0)); 300 } 301 302 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 303 radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0)); 304 radeon_end(); 305 306 if (sctx->screen->info.has_sqtt_rb_harvest_bug) { 307 /* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */ 308 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | 309 SI_CONTEXT_FLUSH_AND_INV_DB | 310 SI_CONTEXT_CS_PARTIAL_FLUSH; 311 sctx->emit_cache_flush(sctx, cs); 312 } 313 314 for (unsigned se = 0; se < max_se; se++) { 315 if (si_se_is_disabled(sctx, se)) 316 continue; 317 318 radeon_begin(cs); 319 320 /* Target SEi and SH0. */ 321 radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, 322 S_030800_SE_INDEX(se) | 323 S_030800_SH_INDEX(0) | 324 S_030800_INSTANCE_BROADCAST_WRITES(1)); 325 326 if (sctx->gfx_level >= GFX10) { 327 if (!sctx->screen->info.has_sqtt_rb_harvest_bug) { 328 /* Make sure to wait for the trace buffer. */ 329 radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 330 radeon_emit(WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ 331 radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */ 332 radeon_emit(0); 333 radeon_emit(0); /* reference value */ 334 radeon_emit(~C_008D20_FINISH_DONE); /* mask */ 335 radeon_emit(4); /* poll interval */ 336 } 337 338 /* Disable the thread trace mode. */ 339 radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, 340 S_008D1C_MODE(0)); 341 342 /* Wait for thread trace completion. */ 343 radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 344 radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ 345 radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */ 346 radeon_emit(0); 347 radeon_emit(0); /* reference value */ 348 radeon_emit(~C_008D20_BUSY); /* mask */ 349 radeon_emit(4); /* poll interval */ 350 } else { 351 /* Disable the thread trace mode. */ 352 radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE, 353 S_030CD8_MODE(0)); 354 355 /* Wait for thread trace completion. */ 356 radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 357 radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ 358 radeon_emit(R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */ 359 radeon_emit(0); 360 radeon_emit(0); /* reference value */ 361 radeon_emit(~C_030CE8_BUSY); /* mask */ 362 radeon_emit(4); /* poll interval */ 363 } 364 radeon_end(); 365 366 si_copy_thread_trace_info_regs(sctx, cs, se); 367 } 368 369 /* Restore global broadcasting. */ 370 radeon_begin_again(cs); 371 radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, 372 S_030800_SE_BROADCAST_WRITES(1) | 373 S_030800_SH_BROADCAST_WRITES(1) | 374 S_030800_INSTANCE_BROADCAST_WRITES(1)); 375 radeon_end(); 376} 377 378static void 379si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf *cs) 380{ 381 struct radeon_winsys *ws = sctx->ws; 382 383 radeon_begin(cs); 384 385 switch (family) { 386 case AMD_IP_GFX: 387 radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 388 radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); 389 radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1)); 390 break; 391 case AMD_IP_COMPUTE: 392 radeon_emit(PKT3(PKT3_NOP, 0, 0)); 393 radeon_emit(0); 394 break; 395 } 396 radeon_end(); 397 398 ws->cs_add_buffer(cs, 399 sctx->thread_trace->bo, 400 RADEON_USAGE_READWRITE, 401 RADEON_DOMAIN_VRAM); 402 ws->cs_add_buffer(cs, 403 sctx->spm_trace.bo, 404 RADEON_USAGE_READWRITE, 405 RADEON_DOMAIN_VRAM); 406 407 si_cp_dma_wait_for_idle(sctx, cs); 408 409 /* Make sure to wait-for-idle before starting SQTT. */ 410 sctx->flags |= 411 SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | 412 SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | 413 SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME; 414 sctx->emit_cache_flush(sctx, cs); 415 416 si_inhibit_clockgating(sctx, cs, true); 417 418 /* Enable SQG events that collects thread trace data. */ 419 si_emit_spi_config_cntl(sctx, cs, true); 420 421 si_pc_emit_spm_reset(cs); 422 423 si_pc_emit_shaders(cs, 0x7f); 424 425 si_emit_spm_setup(sctx, cs); 426 427 si_emit_thread_trace_start(sctx, cs, family); 428 429 si_pc_emit_spm_start(cs); 430} 431 432static void 433si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs) 434{ 435 struct radeon_winsys *ws = sctx->ws; 436 437 radeon_begin(cs); 438 439 switch (family) { 440 case AMD_IP_GFX: 441 radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 442 radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); 443 radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1)); 444 break; 445 case AMD_IP_COMPUTE: 446 radeon_emit(PKT3(PKT3_NOP, 0, 0)); 447 radeon_emit(0); 448 break; 449 } 450 radeon_end(); 451 452 ws->cs_add_buffer(cs, 453 sctx->thread_trace->bo, 454 RADEON_USAGE_READWRITE, 455 RADEON_DOMAIN_VRAM); 456 457 ws->cs_add_buffer(cs, 458 sctx->spm_trace.bo, 459 RADEON_USAGE_READWRITE, 460 RADEON_DOMAIN_VRAM); 461 462 si_cp_dma_wait_for_idle(sctx, cs); 463 464 si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters, 465 sctx->screen->info.never_send_perfcounter_stop); 466 467 /* Make sure to wait-for-idle before stopping SQTT. */ 468 sctx->flags |= 469 SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | 470 SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE | 471 SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME; 472 sctx->emit_cache_flush(sctx, cs); 473 474 si_emit_thread_trace_stop(sctx, cs, family); 475 476 si_pc_emit_spm_reset(cs); 477 478 /* Restore previous state by disabling SQG events. */ 479 si_emit_spi_config_cntl(sctx, cs, false); 480 481 si_inhibit_clockgating(sctx, cs, false); 482} 483 484 485static void 486si_thread_trace_init_cs(struct si_context *sctx) 487{ 488 struct radeon_winsys *ws = sctx->ws; 489 490 /* Thread trace start CS (only handles AMD_IP_GFX). */ 491 sctx->thread_trace->start_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf); 492 if (!ws->cs_create(sctx->thread_trace->start_cs[AMD_IP_GFX], 493 sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) { 494 free(sctx->thread_trace->start_cs[AMD_IP_GFX]); 495 sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL; 496 return; 497 } 498 499 si_thread_trace_start(sctx, AMD_IP_GFX, sctx->thread_trace->start_cs[AMD_IP_GFX]); 500 501 /* Thread trace stop CS. */ 502 sctx->thread_trace->stop_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf); 503 if (!ws->cs_create(sctx->thread_trace->stop_cs[AMD_IP_GFX], 504 sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) { 505 free(sctx->thread_trace->start_cs[AMD_IP_GFX]); 506 sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL; 507 free(sctx->thread_trace->stop_cs[AMD_IP_GFX]); 508 sctx->thread_trace->stop_cs[AMD_IP_GFX] = NULL; 509 return; 510 } 511 512 si_thread_trace_stop(sctx, AMD_IP_GFX, sctx->thread_trace->stop_cs[AMD_IP_GFX]); 513} 514 515static void 516si_begin_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs) 517{ 518 struct radeon_cmdbuf *cs = sctx->thread_trace->start_cs[AMD_IP_GFX]; 519 sctx->ws->cs_flush(cs, 0, NULL); 520} 521 522static void 523si_end_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs) 524{ 525 struct radeon_cmdbuf *cs = sctx->thread_trace->stop_cs[AMD_IP_GFX]; 526 sctx->ws->cs_flush(cs, 0, &sctx->last_sqtt_fence); 527} 528 529static bool 530si_get_thread_trace(struct si_context *sctx, 531 struct ac_thread_trace *thread_trace) 532{ 533 unsigned max_se = sctx->screen->info.max_se; 534 535 memset(thread_trace, 0, sizeof(*thread_trace)); 536 thread_trace->num_traces = max_se; 537 538 sctx->thread_trace->ptr = sctx->ws->buffer_map(sctx->ws, sctx->thread_trace->bo, 539 NULL, 540 PIPE_MAP_READ); 541 542 if (!sctx->thread_trace->ptr) 543 return false; 544 545 void *thread_trace_ptr = sctx->thread_trace->ptr; 546 547 for (unsigned se = 0; se < max_se; se++) { 548 uint64_t info_offset = ac_thread_trace_get_info_offset(se); 549 uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se); 550 void *info_ptr = thread_trace_ptr + info_offset; 551 void *data_ptr = thread_trace_ptr + data_offset; 552 struct ac_thread_trace_info *info = 553 (struct ac_thread_trace_info *)info_ptr; 554 555 struct ac_thread_trace_se thread_trace_se = {0}; 556 557 if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) { 558 uint32_t expected_size = 559 ac_get_expected_buffer_size(&sctx->screen->info, info); 560 uint32_t available_size = (info->cur_offset * 32) / 1024; 561 562 fprintf(stderr, "Failed to get the thread trace " 563 "because the buffer is too small. The " 564 "hardware needs %d KB but the " 565 "buffer size is %d KB.\n", 566 expected_size, available_size); 567 fprintf(stderr, "Please update the buffer size with " 568 "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n"); 569 return false; 570 } 571 572 thread_trace_se.data_ptr = data_ptr; 573 thread_trace_se.info = *info; 574 thread_trace_se.shader_engine = se; 575 576 int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]); 577 578 /* For GFX10+ compute_unit really means WGP */ 579 thread_trace_se.compute_unit = 580 sctx->screen->info.gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu; 581 582 thread_trace->traces[se] = thread_trace_se; 583 } 584 585 thread_trace->data = sctx->thread_trace; 586 return true; 587} 588 589 590bool 591si_init_thread_trace(struct si_context *sctx) 592{ 593 static bool warn_once = true; 594 if (warn_once) { 595 fprintf(stderr, "*************************************************\n"); 596 fprintf(stderr, "* WARNING: Thread trace support is experimental *\n"); 597 fprintf(stderr, "*************************************************\n"); 598 warn_once = false; 599 } 600 601 sctx->thread_trace = CALLOC_STRUCT(ac_thread_trace_data); 602 603 if (sctx->gfx_level < GFX8) { 604 fprintf(stderr, "GPU hardware not supported: refer to " 605 "the RGP documentation for the list of " 606 "supported GPUs!\n"); 607 return false; 608 } 609 610 if (sctx->gfx_level > GFX10_3) { 611 fprintf(stderr, "radeonsi: Thread trace is not supported " 612 "for that GPU!\n"); 613 return false; 614 } 615 616 /* Default buffer size set to 32MB per SE. */ 617 sctx->thread_trace->buffer_size = debug_get_num_option("AMD_THREAD_TRACE_BUFFER_SIZE", 32 * 1024) * 1024; 618 sctx->thread_trace->start_frame = 10; 619 620 const char *trigger = getenv("AMD_THREAD_TRACE_TRIGGER"); 621 if (trigger) { 622 sctx->thread_trace->start_frame = atoi(trigger); 623 if (sctx->thread_trace->start_frame <= 0) { 624 /* This isn't a frame number, must be a file */ 625 sctx->thread_trace->trigger_file = strdup(trigger); 626 sctx->thread_trace->start_frame = -1; 627 } 628 } 629 630 if (!si_thread_trace_init_bo(sctx)) 631 return false; 632 633 list_inithead(&sctx->thread_trace->rgp_pso_correlation.record); 634 simple_mtx_init(&sctx->thread_trace->rgp_pso_correlation.lock, mtx_plain); 635 636 list_inithead(&sctx->thread_trace->rgp_loader_events.record); 637 simple_mtx_init(&sctx->thread_trace->rgp_loader_events.lock, mtx_plain); 638 639 list_inithead(&sctx->thread_trace->rgp_code_object.record); 640 simple_mtx_init(&sctx->thread_trace->rgp_code_object.lock, mtx_plain); 641 642 if (sctx->gfx_level >= GFX10) { 643 /* Limit SPM counters to GFX10+ for now */ 644 ASSERTED bool r = si_spm_init(sctx); 645 assert(r); 646 } 647 648 si_thread_trace_init_cs(sctx); 649 650 sctx->sqtt_next_event = EventInvalid; 651 652 return true; 653} 654 655void 656si_destroy_thread_trace(struct si_context *sctx) 657{ 658 struct si_screen *sscreen = sctx->screen; 659 struct pb_buffer *bo = sctx->thread_trace->bo; 660 radeon_bo_reference(sctx->screen->ws, &bo, NULL); 661 662 if (sctx->thread_trace->trigger_file) 663 free(sctx->thread_trace->trigger_file); 664 665 sscreen->ws->cs_destroy(sctx->thread_trace->start_cs[AMD_IP_GFX]); 666 sscreen->ws->cs_destroy(sctx->thread_trace->stop_cs[AMD_IP_GFX]); 667 668 struct rgp_pso_correlation *pso_correlation = &sctx->thread_trace->rgp_pso_correlation; 669 struct rgp_loader_events *loader_events = &sctx->thread_trace->rgp_loader_events; 670 struct rgp_code_object *code_object = &sctx->thread_trace->rgp_code_object; 671 list_for_each_entry_safe(struct rgp_pso_correlation_record, record, 672 &pso_correlation->record, list) { 673 list_del(&record->list); 674 free(record); 675 } 676 simple_mtx_destroy(&sctx->thread_trace->rgp_pso_correlation.lock); 677 678 list_for_each_entry_safe(struct rgp_loader_events_record, record, 679 &loader_events->record, list) { 680 list_del(&record->list); 681 free(record); 682 } 683 simple_mtx_destroy(&sctx->thread_trace->rgp_loader_events.lock); 684 685 list_for_each_entry_safe(struct rgp_code_object_record, record, 686 &code_object->record, list) { 687 uint32_t mask = record->shader_stages_mask; 688 int i; 689 690 /* Free the disassembly. */ 691 while (mask) { 692 i = u_bit_scan(&mask); 693 free(record->shader_data[i].code); 694 } 695 list_del(&record->list); 696 free(record); 697 } 698 simple_mtx_destroy(&sctx->thread_trace->rgp_code_object.lock); 699 700 free(sctx->thread_trace); 701 sctx->thread_trace = NULL; 702 703 if (sctx->gfx_level >= GFX10) 704 si_spm_finish(sctx); 705} 706 707static uint64_t num_frames = 0; 708 709void 710si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs) 711{ 712 /* Should we enable SQTT yet? */ 713 if (!sctx->thread_trace_enabled) { 714 bool frame_trigger = num_frames == sctx->thread_trace->start_frame; 715 bool file_trigger = false; 716 if (sctx->thread_trace->trigger_file && 717 access(sctx->thread_trace->trigger_file, W_OK) == 0) { 718 if (unlink(sctx->thread_trace->trigger_file) == 0) { 719 file_trigger = true; 720 } else { 721 /* Do not enable tracing if we cannot remove the file, 722 * because by then we'll trace every frame. 723 */ 724 fprintf(stderr, "radeonsi: could not remove thread trace trigger file, ignoring\n"); 725 } 726 } 727 728 if (frame_trigger || file_trigger) { 729 /* Wait for last submission */ 730 sctx->ws->fence_wait(sctx->ws, sctx->last_gfx_fence, PIPE_TIMEOUT_INFINITE); 731 732 /* Start SQTT */ 733 si_begin_thread_trace(sctx, rcs); 734 735 sctx->thread_trace_enabled = true; 736 sctx->thread_trace->start_frame = -1; 737 738 /* Force shader update to make sure si_sqtt_describe_pipeline_bind is called 739 * for the current "pipeline". 740 */ 741 sctx->do_update_shaders = true; 742 } 743 } else { 744 struct ac_thread_trace thread_trace = {0}; 745 746 /* Stop SQTT */ 747 si_end_thread_trace(sctx, rcs); 748 sctx->thread_trace_enabled = false; 749 sctx->thread_trace->start_frame = -1; 750 assert (sctx->last_sqtt_fence); 751 752 /* Wait for SQTT to finish and read back the bo */ 753 if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) && 754 si_get_thread_trace(sctx, &thread_trace)) { 755 /* Map the SPM counter buffer */ 756 if (sctx->gfx_level >= GFX10) 757 sctx->spm_trace.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm_trace.bo, 758 NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY); 759 760 ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, &sctx->spm_trace); 761 762 if (sctx->spm_trace.ptr) 763 sctx->ws->buffer_unmap(sctx->ws, sctx->spm_trace.bo); 764 } else { 765 fprintf(stderr, "Failed to read the trace\n"); 766 } 767 } 768 769 num_frames++; 770} 771 772 773static void 774si_emit_thread_trace_userdata(struct si_context* sctx, 775 struct radeon_cmdbuf *cs, 776 const void *data, uint32_t num_dwords) 777{ 778 const uint32_t *dwords = (uint32_t *)data; 779 780 radeon_begin(cs); 781 782 while (num_dwords > 0) { 783 uint32_t count = MIN2(num_dwords, 2); 784 785 /* Without the perfctr bit the CP might not always pass the 786 * write on correctly. */ 787 radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count, sctx->gfx_level >= GFX10); 788 789 radeon_emit_array(dwords, count); 790 791 dwords += count; 792 num_dwords -= count; 793 } 794 radeon_end(); 795} 796 797static void 798si_emit_spi_config_cntl(struct si_context* sctx, 799 struct radeon_cmdbuf *cs, bool enable) 800{ 801 radeon_begin(cs); 802 803 if (sctx->gfx_level >= GFX9) { 804 uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | 805 S_031100_EXP_PRIORITY_ORDER(3) | 806 S_031100_ENABLE_SQG_TOP_EVENTS(enable) | 807 S_031100_ENABLE_SQG_BOP_EVENTS(enable); 808 809 if (sctx->gfx_level >= GFX10) 810 spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3); 811 812 radeon_set_uconfig_reg(R_031100_SPI_CONFIG_CNTL, spi_config_cntl); 813 } else { 814 /* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */ 815 radeon_set_privileged_config_reg(R_009100_SPI_CONFIG_CNTL, 816 S_009100_ENABLE_SQG_TOP_EVENTS(enable) | 817 S_009100_ENABLE_SQG_BOP_EVENTS(enable)); 818 } 819 radeon_end(); 820} 821 822static uint32_t num_events = 0; 823void 824si_sqtt_write_event_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs, 825 enum rgp_sqtt_marker_event_type api_type, 826 uint32_t vertex_offset_user_data, 827 uint32_t instance_offset_user_data, 828 uint32_t draw_index_user_data) 829{ 830 struct rgp_sqtt_marker_event marker = {0}; 831 832 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; 833 marker.api_type = api_type == EventInvalid ? EventCmdDraw : api_type; 834 marker.cmd_id = num_events++; 835 marker.cb_id = 0; 836 837 if (vertex_offset_user_data == UINT_MAX || 838 instance_offset_user_data == UINT_MAX) { 839 vertex_offset_user_data = 0; 840 instance_offset_user_data = 0; 841 } 842 843 if (draw_index_user_data == UINT_MAX) 844 draw_index_user_data = vertex_offset_user_data; 845 846 marker.vertex_offset_reg_idx = vertex_offset_user_data; 847 marker.instance_offset_reg_idx = instance_offset_user_data; 848 marker.draw_index_reg_idx = draw_index_user_data; 849 850 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 851 852 sctx->sqtt_next_event = EventInvalid; 853} 854 855void 856si_write_event_with_dims_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs, 857 enum rgp_sqtt_marker_event_type api_type, 858 uint32_t x, uint32_t y, uint32_t z) 859{ 860 struct rgp_sqtt_marker_event_with_dims marker = {0}; 861 862 marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; 863 marker.event.api_type = api_type; 864 marker.event.cmd_id = num_events++; 865 marker.event.cb_id = 0; 866 marker.event.has_thread_dims = 1; 867 868 marker.thread_x = x; 869 marker.thread_y = y; 870 marker.thread_z = z; 871 872 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 873 sctx->sqtt_next_event = EventInvalid; 874} 875 876void 877si_sqtt_describe_barrier_start(struct si_context* sctx, struct radeon_cmdbuf *rcs) 878{ 879 struct rgp_sqtt_marker_barrier_start marker = {0}; 880 881 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; 882 marker.cb_id = 0; 883 marker.dword02 = 0xC0000000 + 10; /* RGP_BARRIER_INTERNAL_BASE */ 884 885 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 886} 887 888void 889si_sqtt_describe_barrier_end(struct si_context* sctx, struct radeon_cmdbuf *rcs, 890 unsigned flags) 891{ 892 struct rgp_sqtt_marker_barrier_end marker = {0}; 893 894 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; 895 marker.cb_id = 0; 896 897 if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) 898 marker.vs_partial_flush = true; 899 if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) 900 marker.ps_partial_flush = true; 901 if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH) 902 marker.cs_partial_flush = true; 903 904 if (flags & SI_CONTEXT_PFP_SYNC_ME) 905 marker.pfp_sync_me = true; 906 907 if (flags & SI_CONTEXT_INV_VCACHE) 908 marker.inval_tcp = true; 909 if (flags & SI_CONTEXT_INV_ICACHE) 910 marker.inval_sqI = true; 911 if (flags & SI_CONTEXT_INV_SCACHE) 912 marker.inval_sqK = true; 913 if (flags & SI_CONTEXT_INV_L2) 914 marker.inval_tcc = true; 915 916 if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { 917 marker.inval_cb = true; 918 marker.flush_cb = true; 919 } 920 if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) { 921 marker.inval_db = true; 922 marker.flush_db = true; 923 } 924 925 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 926} 927 928void 929si_write_user_event(struct si_context* sctx, struct radeon_cmdbuf *rcs, 930 enum rgp_sqtt_marker_user_event_type type, 931 const char *str, int len) 932{ 933 if (type == UserEventPop) { 934 assert (str == NULL); 935 struct rgp_sqtt_marker_user_event marker = { 0 }; 936 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; 937 marker.data_type = type; 938 939 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4); 940 } else { 941 assert (str != NULL); 942 struct rgp_sqtt_marker_user_event_with_length marker = { 0 }; 943 marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; 944 marker.user_event.data_type = type; 945 len = MIN2(1024, len); 946 marker.length = align(len, 4); 947 948 uint8_t *buffer = alloca(sizeof(marker) + marker.length); 949 memcpy(buffer, &marker, sizeof(marker)); 950 memcpy(buffer + sizeof(marker), str, len); 951 buffer[sizeof(marker) + len - 1] = '\0'; 952 953 si_emit_thread_trace_userdata(sctx, rcs, buffer, sizeof(marker) / 4 + marker.length / 4); 954 } 955} 956 957 958bool 959si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data, 960 uint64_t pipeline_hash) 961{ 962 simple_mtx_lock(&thread_trace_data->rgp_pso_correlation.lock); 963 list_for_each_entry_safe(struct rgp_pso_correlation_record, record, 964 &thread_trace_data->rgp_pso_correlation.record, list) { 965 if (record->pipeline_hash[0] == pipeline_hash) { 966 simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock); 967 return true; 968 } 969 970 } 971 simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock); 972 973 return false; 974} 975 976 977 978static enum rgp_hardware_stages 979si_sqtt_pipe_to_rgp_shader_stage(union si_shader_key* key, enum pipe_shader_type stage) 980{ 981 switch (stage) { 982 case PIPE_SHADER_VERTEX: 983 if (key->ge.as_ls) 984 return RGP_HW_STAGE_LS; 985 else if (key->ge.as_es) 986 return RGP_HW_STAGE_ES; 987 else if (key->ge.as_ngg) 988 return RGP_HW_STAGE_GS; 989 else 990 return RGP_HW_STAGE_VS; 991 case PIPE_SHADER_TESS_CTRL: 992 return RGP_HW_STAGE_HS; 993 case PIPE_SHADER_TESS_EVAL: 994 if (key->ge.as_es) 995 return RGP_HW_STAGE_ES; 996 else if (key->ge.as_ngg) 997 return RGP_HW_STAGE_GS; 998 else 999 return RGP_HW_STAGE_VS; 1000 case PIPE_SHADER_GEOMETRY: 1001 return RGP_HW_STAGE_GS; 1002 case PIPE_SHADER_FRAGMENT: 1003 return RGP_HW_STAGE_PS; 1004 case PIPE_SHADER_COMPUTE: 1005 return RGP_HW_STAGE_CS; 1006 default: 1007 unreachable("invalid mesa shader stage"); 1008 } 1009} 1010 1011static bool 1012si_sqtt_add_code_object(struct si_context* sctx, 1013 uint64_t pipeline_hash, 1014 bool is_compute) 1015{ 1016 struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace; 1017 struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object; 1018 struct rgp_code_object_record *record; 1019 1020 record = malloc(sizeof(struct rgp_code_object_record)); 1021 if (!record) 1022 return false; 1023 1024 record->shader_stages_mask = 0; 1025 record->num_shaders_combined = 0; 1026 record->pipeline_hash[0] = pipeline_hash; 1027 record->pipeline_hash[1] = pipeline_hash; 1028 1029 for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++) { 1030 struct si_shader *shader; 1031 enum rgp_hardware_stages hw_stage; 1032 1033 if (is_compute) { 1034 if (i != PIPE_SHADER_COMPUTE) 1035 continue; 1036 shader = &sctx->cs_shader_state.program->shader; 1037 hw_stage = RGP_HW_STAGE_CS; 1038 } else if (i != PIPE_SHADER_COMPUTE) { 1039 if (!sctx->shaders[i].cso || !sctx->shaders[i].current) 1040 continue; 1041 shader = sctx->shaders[i].current; 1042 hw_stage = si_sqtt_pipe_to_rgp_shader_stage(&shader->key, i); 1043 } else { 1044 continue; 1045 } 1046 1047 uint8_t *code = malloc(shader->binary.uploaded_code_size); 1048 if (!code) { 1049 free(record); 1050 return false; 1051 } 1052 memcpy(code, shader->binary.uploaded_code, shader->binary.uploaded_code_size); 1053 1054 uint64_t va = shader->bo->gpu_address; 1055 unsigned gl_shader_stage = tgsi_processor_to_shader_stage(i); 1056 record->shader_data[gl_shader_stage].hash[0] = _mesa_hash_data(code, shader->binary.uploaded_code_size); 1057 record->shader_data[gl_shader_stage].hash[1] = record->shader_data[gl_shader_stage].hash[0]; 1058 record->shader_data[gl_shader_stage].code_size = shader->binary.uploaded_code_size; 1059 record->shader_data[gl_shader_stage].code = code; 1060 record->shader_data[gl_shader_stage].vgpr_count = shader->config.num_vgprs; 1061 record->shader_data[gl_shader_stage].sgpr_count = shader->config.num_sgprs; 1062 record->shader_data[gl_shader_stage].base_address = va & 0xffffffffffff; 1063 record->shader_data[gl_shader_stage].elf_symbol_offset = 0; 1064 record->shader_data[gl_shader_stage].hw_stage = hw_stage; 1065 record->shader_data[gl_shader_stage].is_combined = false; 1066 record->shader_data[gl_shader_stage].scratch_memory_size = shader->config.scratch_bytes_per_wave; 1067 record->shader_data[gl_shader_stage].wavefront_size = shader->wave_size; 1068 1069 record->shader_stages_mask |= 1 << gl_shader_stage; 1070 record->num_shaders_combined++; 1071 } 1072 1073 simple_mtx_lock(&code_object->lock); 1074 list_addtail(&record->list, &code_object->record); 1075 code_object->record_count++; 1076 simple_mtx_unlock(&code_object->lock); 1077 1078 return true; 1079} 1080 1081bool 1082si_sqtt_register_pipeline(struct si_context* sctx, uint64_t pipeline_hash, uint64_t base_address, bool is_compute) 1083{ 1084 struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace; 1085 1086 assert (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_hash)); 1087 1088 bool result = ac_sqtt_add_pso_correlation(thread_trace_data, pipeline_hash); 1089 if (!result) 1090 return false; 1091 1092 result = ac_sqtt_add_code_object_loader_event(thread_trace_data, pipeline_hash, base_address); 1093 if (!result) 1094 return false; 1095 1096 return si_sqtt_add_code_object(sctx, pipeline_hash, is_compute); 1097} 1098 1099void 1100si_sqtt_describe_pipeline_bind(struct si_context* sctx, 1101 uint64_t pipeline_hash, 1102 int bind_point) 1103{ 1104 struct rgp_sqtt_marker_pipeline_bind marker = {0}; 1105 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 1106 1107 if (likely(!sctx->thread_trace_enabled)) { 1108 return; 1109 } 1110 1111 marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE; 1112 marker.cb_id = 0; 1113 marker.bind_point = bind_point; 1114 marker.api_pso_hash[0] = pipeline_hash; 1115 marker.api_pso_hash[1] = pipeline_hash >> 32; 1116 1117 si_emit_thread_trace_userdata(sctx, cs, &marker, sizeof(marker) / 4); 1118} 1119