Lines Matching refs:sctx

37 si_emit_spi_config_cntl(struct si_context* sctx,
41 si_thread_trace_init_bo(struct si_context *sctx)
43 unsigned max_se = sctx->screen->info.max_se;
44 struct radeon_winsys *ws = sctx->ws;
50 sctx->thread_trace->buffer_size = align64(sctx->thread_trace->buffer_size,
56 size += sctx->thread_trace->buffer_size * (uint64_t)max_se;
58 sctx->thread_trace->bo =
64 if (!sctx->thread_trace->bo)
71 si_se_is_disabled(struct si_context* sctx, unsigned se)
74 return sctx->screen->info.cu_mask[se][0] == 0;
79 si_emit_thread_trace_start(struct si_context* sctx,
83 struct si_screen *sscreen = sctx->screen;
84 uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
90 uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
91 uint64_t data_va = ac_thread_trace_get_data_va(&sctx->screen->info, sctx->thread_trace, va, se);
94 if (si_se_is_disabled(sctx, se))
104 int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
106 if (sctx->gfx_level >= GFX10) {
142 sctx->gfx_level >= GFX10_3 ? 4 : 0) |
143 S_008D1C_AUTO_FLUSH_MODE(sctx->screen->info.has_sqtt_auto_flush_mode_bug));
184 if (sctx->gfx_level == GFX9) {
202 if (sctx->gfx_level == GFX9) {
244 si_copy_thread_trace_info_regs(struct si_context* sctx,
250 switch (sctx->gfx_level) {
263 uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
285 si_emit_thread_trace_stop(struct si_context *sctx,
289 unsigned max_se = sctx->screen->info.max_se;
306 if (sctx->screen->info.has_sqtt_rb_harvest_bug) {
308 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
311 sctx->emit_cache_flush(sctx, cs);
315 if (si_se_is_disabled(sctx, se))
326 if (sctx->gfx_level >= GFX10) {
327 if (!sctx->screen->info.has_sqtt_rb_harvest_bug) {
366 si_copy_thread_trace_info_regs(sctx, cs, se);
379 si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
381 struct radeon_winsys *ws = sctx->ws;
399 sctx->thread_trace->bo,
403 sctx->spm_trace.bo,
407 si_cp_dma_wait_for_idle(sctx, cs);
410 sctx->flags |=
414 sctx->emit_cache_flush(sctx, cs);
416 si_inhibit_clockgating(sctx, cs, true);
419 si_emit_spi_config_cntl(sctx, cs, true);
425 si_emit_spm_setup(sctx, cs);
427 si_emit_thread_trace_start(sctx, cs, family);
433 si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
435 struct radeon_winsys *ws = sctx->ws;
453 sctx->thread_trace->bo,
458 sctx->spm_trace.bo,
462 si_cp_dma_wait_for_idle(sctx, cs);
464 si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters,
465 sctx->screen->info.never_send_perfcounter_stop);
468 sctx->flags |=
472 sctx->emit_cache_flush(sctx, cs);
474 si_emit_thread_trace_stop(sctx, cs, family);
479 si_emit_spi_config_cntl(sctx, cs, false);
481 si_inhibit_clockgating(sctx, cs, false);
486 si_thread_trace_init_cs(struct si_context *sctx)
488 struct radeon_winsys *ws = sctx->ws;
491 sctx->thread_trace->start_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
492 if (!ws->cs_create(sctx->thread_trace->start_cs[AMD_IP_GFX],
493 sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) {
494 free(sctx->thread_trace->start_cs[AMD_IP_GFX]);
495 sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL;
499 si_thread_trace_start(sctx, AMD_IP_GFX, sctx->thread_trace->start_cs[AMD_IP_GFX]);
502 sctx->thread_trace->stop_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
503 if (!ws->cs_create(sctx->thread_trace->stop_cs[AMD_IP_GFX],
504 sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) {
505 free(sctx->thread_trace->start_cs[AMD_IP_GFX]);
506 sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL;
507 free(sctx->thread_trace->stop_cs[AMD_IP_GFX]);
508 sctx->thread_trace->stop_cs[AMD_IP_GFX] = NULL;
512 si_thread_trace_stop(sctx, AMD_IP_GFX, sctx->thread_trace->stop_cs[AMD_IP_GFX]);
516 si_begin_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
518 struct radeon_cmdbuf *cs = sctx->thread_trace->start_cs[AMD_IP_GFX];
519 sctx->ws->cs_flush(cs, 0, NULL);
523 si_end_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
525 struct radeon_cmdbuf *cs = sctx->thread_trace->stop_cs[AMD_IP_GFX];
526 sctx->ws->cs_flush(cs, 0, &sctx->last_sqtt_fence);
530 si_get_thread_trace(struct si_context *sctx,
533 unsigned max_se = sctx->screen->info.max_se;
538 sctx->thread_trace->ptr = sctx->ws->buffer_map(sctx->ws, sctx->thread_trace->bo,
542 if (!sctx->thread_trace->ptr)
545 void *thread_trace_ptr = sctx->thread_trace->ptr;
549 uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se);
557 if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) {
559 ac_get_expected_buffer_size(&sctx->screen->info, info);
576 int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
580 sctx->screen->info.gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu;
585 thread_trace->data = sctx->thread_trace;
591 si_init_thread_trace(struct si_context *sctx)
601 sctx->thread_trace = CALLOC_STRUCT(ac_thread_trace_data);
603 if (sctx->gfx_level < GFX8) {
610 if (sctx->gfx_level > GFX10_3) {
617 sctx->thread_trace->buffer_size = debug_get_num_option("AMD_THREAD_TRACE_BUFFER_SIZE", 32 * 1024) * 1024;
618 sctx->thread_trace->start_frame = 10;
622 sctx->thread_trace->start_frame = atoi(trigger);
623 if (sctx->thread_trace->start_frame <= 0) {
625 sctx->thread_trace->trigger_file = strdup(trigger);
626 sctx->thread_trace->start_frame = -1;
630 if (!si_thread_trace_init_bo(sctx))
633 list_inithead(&sctx->thread_trace->rgp_pso_correlation.record);
634 simple_mtx_init(&sctx->thread_trace->rgp_pso_correlation.lock, mtx_plain);
636 list_inithead(&sctx->thread_trace->rgp_loader_events.record);
637 simple_mtx_init(&sctx->thread_trace->rgp_loader_events.lock, mtx_plain);
639 list_inithead(&sctx->thread_trace->rgp_code_object.record);
640 simple_mtx_init(&sctx->thread_trace->rgp_code_object.lock, mtx_plain);
642 if (sctx->gfx_level >= GFX10) {
644 ASSERTED bool r = si_spm_init(sctx);
648 si_thread_trace_init_cs(sctx);
650 sctx->sqtt_next_event = EventInvalid;
656 si_destroy_thread_trace(struct si_context *sctx)
658 struct si_screen *sscreen = sctx->screen;
659 struct pb_buffer *bo = sctx->thread_trace->bo;
660 radeon_bo_reference(sctx->screen->ws, &bo, NULL);
662 if (sctx->thread_trace->trigger_file)
663 free(sctx->thread_trace->trigger_file);
665 sscreen->ws->cs_destroy(sctx->thread_trace->start_cs[AMD_IP_GFX]);
666 sscreen->ws->cs_destroy(sctx->thread_trace->stop_cs[AMD_IP_GFX]);
668 struct rgp_pso_correlation *pso_correlation = &sctx->thread_trace->rgp_pso_correlation;
669 struct rgp_loader_events *loader_events = &sctx->thread_trace->rgp_loader_events;
670 struct rgp_code_object *code_object = &sctx->thread_trace->rgp_code_object;
676 simple_mtx_destroy(&sctx->thread_trace->rgp_pso_correlation.lock);
683 simple_mtx_destroy(&sctx->thread_trace->rgp_loader_events.lock);
698 simple_mtx_destroy(&sctx->thread_trace->rgp_code_object.lock);
700 free(sctx->thread_trace);
701 sctx->thread_trace = NULL;
703 if (sctx->gfx_level >= GFX10)
704 si_spm_finish(sctx);
710 si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
713 if (!sctx->thread_trace_enabled) {
714 bool frame_trigger = num_frames == sctx->thread_trace->start_frame;
716 if (sctx->thread_trace->trigger_file &&
717 access(sctx->thread_trace->trigger_file, W_OK) == 0) {
718 if (unlink(sctx->thread_trace->trigger_file) == 0) {
730 sctx->ws->fence_wait(sctx->ws, sctx->last_gfx_fence, PIPE_TIMEOUT_INFINITE);
733 si_begin_thread_trace(sctx, rcs);
735 sctx->thread_trace_enabled = true;
736 sctx->thread_trace->start_frame = -1;
741 sctx->do_update_shaders = true;
747 si_end_thread_trace(sctx, rcs);
748 sctx->thread_trace_enabled = false;
749 sctx->thread_trace->start_frame = -1;
750 assert (sctx->last_sqtt_fence);
753 if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) &&
754 si_get_thread_trace(sctx, &thread_trace)) {
756 if (sctx->gfx_level >= GFX10)
757 sctx->spm_trace.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm_trace.bo,
760 ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, &sctx->spm_trace);
762 if (sctx->spm_trace.ptr)
763 sctx->ws->buffer_unmap(sctx->ws, sctx->spm_trace.bo);
774 si_emit_thread_trace_userdata(struct si_context* sctx,
787 radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count, sctx->gfx_level >= GFX10);
798 si_emit_spi_config_cntl(struct si_context* sctx,
803 if (sctx->gfx_level >= GFX9) {
809 if (sctx->gfx_level >= GFX10)
824 si_sqtt_write_event_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs,
850 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
852 sctx->sqtt_next_event = EventInvalid;
856 si_write_event_with_dims_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs,
872 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
873 sctx->sqtt_next_event = EventInvalid;
877 si_sqtt_describe_barrier_start(struct si_context* sctx, struct radeon_cmdbuf *rcs)
885 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
889 si_sqtt_describe_barrier_end(struct si_context* sctx, struct radeon_cmdbuf *rcs,
925 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
929 si_write_user_event(struct si_context* sctx, struct radeon_cmdbuf *rcs,
939 si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
953 si_emit_thread_trace_userdata(sctx, rcs, buffer, sizeof(marker) / 4 + marker.length / 4);
1012 si_sqtt_add_code_object(struct si_context* sctx,
1016 struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
1036 shader = &sctx->cs_shader_state.program->shader;
1039 if (!sctx->shaders[i].cso || !sctx->shaders[i].current)
1041 shader = sctx->shaders[i].current;
1082 si_sqtt_register_pipeline(struct si_context* sctx, uint64_t pipeline_hash, uint64_t base_address, bool is_compute)
1084 struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
1096 return si_sqtt_add_code_object(sctx, pipeline_hash, is_compute);
1100 si_sqtt_describe_pipeline_bind(struct si_context* sctx,
1105 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
1107 if (likely(!sctx->thread_trace_enabled)) {
1117 si_emit_thread_trace_userdata(sctx, cs, &marker, sizeof(marker) / 4);