Lines Matching refs:sctx

232    struct si_context *sctx = (struct si_context *)ctx;
260 sel->compiler_ctx_state.debug = sctx->debug;
261 sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
264 si_schedule_initial_compile(sctx, MESA_SHADER_COMPUTE, &sel->ready, &sel->compiler_ctx_state,
281 bool ok = si_shader_binary_upload(sctx->screen, &program->shader, 0);
282 si_shader_dump(sctx->screen, &program->shader, &sctx->debug, stderr, true);
297 struct si_context *sctx = (struct si_context *)ctx;
301 sctx->cs_shader_state.program = program;
309 si_set_active_descriptors(sctx,
312 si_set_active_descriptors(sctx, SI_DESCS_FIRST_COMPUTE + SI_SHADER_DESCS_SAMPLERS_AND_IMAGES,
315 sctx->compute_shaderbuf_sgprs_dirty = true;
316 sctx->compute_image_sgprs_dirty = true;
318 if (unlikely((sctx->screen->debug_flags & DBG(SQTT)) && sctx->thread_trace)) {
325 struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
327 si_sqtt_register_pipeline(sctx, pipeline_code_hash, base_address, true);
330 si_sqtt_describe_pipeline_bind(sctx, pipeline_code_hash, 1);
338 struct si_context *sctx = (struct si_context *)ctx;
339 struct si_compute *program = sctx->cs_shader_state.program;
374 void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs)
376 const struct radeon_info *info = &sctx->screen->info;
380 S_00B834_DATA(sctx->screen->info.address32_hi >> 8));
388 if (sctx->gfx_level == GFX6) {
397 radeon_set_config_reg(R_00950C_TA_CS_BC_BASE_ADDR, sctx->border_color_buffer->gpu_address >> 8);
400 if (sctx->gfx_level >= GFX7) {
407 if (cs != &sctx->gfx_cs || !sctx->screen->info.has_graphics) {
414 if (sctx->border_color_buffer) {
415 uint64_t bc_va = sctx->border_color_buffer->gpu_address;
426 if (sctx->gfx_level >= GFX9 && sctx->gfx_level < GFX11 &&
427 (cs != &sctx->gfx_cs || !sctx->screen->info.has_graphics)) {
429 sctx->gfx_level >= GFX10 ? 0x20 : 0);
440 if (sctx->gfx_level >= GFX10) {
449 if (sctx->gfx_level < GFX11)
453 if (sctx->gfx_level >= GFX11) {
466 static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader)
470 scratch_needed = sctx->max_seen_compute_scratch_bytes_per_wave * sctx->screen->info.max_scratch_waves;
471 if (sctx->compute_scratch_buffer)
472 scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
475 si_resource_reference(&sctx->compute_scratch_buffer, NULL);
477 sctx->compute_scratch_buffer =
478 si_aligned_buffer_create(&sctx->screen->b,
482 scratch_needed, sctx->screen->info.pte_fragment_size);
484 if (!sctx->compute_scratch_buffer)
488 if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) {
489 if (sctx->gfx_level < GFX11) {
490 uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
492 if (!si_shader_binary_upload(sctx->screen, shader, scratch_va))
495 si_resource_reference(&shader->scratch_bo, sctx->compute_scratch_buffer);
501 static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute *program,
505 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
512 if (sctx->cs_shader_state.emitted_program == program && sctx->cs_shader_state.offset == offset)
529 if (sctx->gfx_level <= GFX6) {
543 ac_get_scratch_tmpring_size(&sctx->screen->info,
545 &sctx->max_seen_compute_scratch_bytes_per_wave, &tmpring_size);
547 if (!si_setup_compute_scratch_buffer(sctx, shader))
551 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->scratch_bo,
562 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->bo,
568 if (sctx->gfx_level >= GFX11) {
573 if (sctx->gfx_level >= GFX11 && shader->scratch_bo) {
575 radeon_emit(sctx->compute_scratch_buffer->gpu_address >> 8);
576 radeon_emit(sctx->compute_scratch_buffer->gpu_address >> 40);
584 COMPUTE_DBG(sctx->screen,
592 sctx->cs_shader_state.emitted_program = program;
593 sctx->cs_shader_state.offset = offset;
599 static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx,
602 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
603 uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
611 if (sctx->gfx_level >= GFX11)
620 if (sctx->gfx_level >= GFX9) {
625 if (sctx->gfx_level < GFX8) {
641 static void si_setup_user_sgprs_co_v2(struct si_context *sctx, const amd_kernel_code_t *code_object,
644 struct si_compute *program = sctx->cs_shader_state.program;
645 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
656 setup_scratch_rsrc_user_sgprs(sctx, code_object, user_sgpr);
685 u_upload_data(sctx->b.const_uploader, 0, sizeof(dispatch), 256, &dispatch, &dispatch_offset,
692 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, dispatch_buf,
723 static bool si_upload_compute_input(struct si_context *sctx, const amd_kernel_code_t *code_object,
726 struct si_compute *program = sctx->cs_shader_state.program;
733 u_upload_alloc(sctx->b.const_uploader, 0, program->input_size,
734 sctx->screen->info.tcc_cache_line_size, &kernel_args_offset,
746 COMPUTE_DBG(sctx->screen, "input %u : %u\n", i, kernel_args[i]);
749 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, input_buffer,
752 si_setup_user_sgprs_co_v2(sctx, code_object, info, kernel_args_va);
757 static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_grid_info *info)
759 struct si_compute *program = sctx->cs_shader_state.program;
761 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
775 si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i,
795 radeon_emit_array(sctx->cs_user_data, sel->info.base.cs.user_data_components_amd);
800 static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_grid_info *info)
802 struct si_screen *sscreen = sctx->screen;
803 struct radeon_cmdbuf *cs = &sctx->gfx_cs;
804 bool render_cond_bit = sctx->render_cond_enabled;
807 DIV_ROUND_UP(threads_per_threadgroup, sctx->cs_shader_state.program->shader.wave_size);
810 if (sctx->gfx_level >= GFX10 && waves_per_threadgroup == 1)
813 if (unlikely(sctx->thread_trace_enabled)) {
814 si_write_event_with_dims_marker(sctx, &sctx->gfx_cs,
823 sctx->cs_max_waves_per_sh, threadgroups_per_cu));
828 S_00B800_ORDER_MODE(sctx->gfx_level >= GFX7) |
829 S_00B800_CS_W32_EN(sctx->cs_shader_state.program->shader.wave_size == 32);
861 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(info->indirect),
880 if (unlikely(sctx->thread_trace_enabled && sctx->gfx_level >= GFX9)) {
887 static bool si_check_needs_implicit_sync(struct si_context *sctx)
901 struct si_shader_info *info = &sctx->cs_shader_state.program->sel.info;
902 struct si_samplers *samplers = &sctx->samplers[PIPE_SHADER_COMPUTE];
910 if (sctx->ws->cs_is_buffer_referenced(&sctx->gfx_cs, res->buf,
915 struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE];
923 if (sctx->ws->cs_is_buffer_referenced(&sctx->gfx_cs, res->buf,
932 struct si_context *sctx = (struct si_context *)ctx;
933 struct si_screen *sscreen = sctx->screen;
934 struct si_compute *program = sctx->cs_shader_state.program;
941 sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
946 si_check_dirty_buffers_textures(sctx);
948 if (sctx->has_graphics) {
949 if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
950 si_update_fb_dirtiness_after_rendering(sctx);
951 sctx->last_num_draw_calls = sctx->num_draw_calls;
953 if (sctx->force_cb_shader_coherent || si_check_needs_implicit_sync(sctx))
954 si_make_CB_shader_coherent(sctx, 0,
955 sctx->framebuffer.CB_has_shader_readable_metadata,
956 sctx->framebuffer.all_DCC_pipe_aligned);
959 si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
963 si_context_add_resource_size(sctx, &program->shader.bo->b.b);
967 si_context_add_resource_size(sctx, info->indirect);
970 if (sctx->gfx_level <= GFX8 && si_resource(info->indirect)->TC_L2_dirty) {
971 sctx->flags |= SI_CONTEXT_WB_L2;
976 si_need_gfx_cs_space(sctx, 0);
979 if (unlikely(radeon_uses_secure_bos(sctx->ws))) {
980 bool secure = si_compute_resources_check_encrypted(sctx);
981 if (secure != sctx->ws->cs_is_secure(&sctx->gfx_cs)) {
982 si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW |
988 if (sctx->bo_list_add_all_compute_resources)
989 si_compute_resources_add_all_to_bo_list(sctx);
991 if (!sctx->cs_shader_state.initialized) {
992 si_emit_initial_compute_regs(sctx, &sctx->gfx_cs);
994 sctx->cs_shader_state.emitted_program = NULL;
995 sctx->cs_shader_state.initialized = true;
1000 if (!si_switch_compute_shader(sctx, program, &program->shader, code_object, info->pc, &prefetch))
1003 si_upload_compute_shader_descriptors(sctx);
1004 si_emit_compute_shader_pointers(sctx);
1007 unlikely(!si_upload_compute_input(sctx, code_object, info)))
1016 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, buffer,
1021 if (sctx->flags)
1022 sctx->emit_cache_flush(sctx, &sctx->gfx_cs);
1024 if (sctx->has_graphics && si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond)) {
1025 sctx->atoms.s.render_cond.emit(sctx);
1026 si_set_atom_dirty(sctx, &sctx->atoms.s.render_cond, false);
1030 if (sctx->gfx_level >= GFX7 && prefetch)
1031 si_cp_dma_prefetch(sctx, &program->shader.bo->b.b, 0, program->shader.bo->b.b.width0);
1034 si_setup_nir_user_data(sctx, info);
1036 si_emit_dispatch_packets(sctx, info);
1038 if (unlikely(sctx->current_saved_cs)) {
1039 si_trace_emit(sctx);
1040 si_log_compute_state(sctx, sctx->log);
1044 unsigned display_dcc_store_mask = sctx->images[PIPE_SHADER_COMPUTE].display_dcc_store_mask &
1048 sctx->images[PIPE_SHADER_COMPUTE].views[u_bit_scan(&display_dcc_store_mask)].resource;
1050 si_mark_display_dcc_dirty(sctx, tex);
1055 sctx->compute_is_busy = true;
1056 sctx->num_compute_calls++;
1059 sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
1083 struct si_context *sctx = (struct si_context *)ctx;
1088 if (program == sctx->cs_shader_state.program)
1089 sctx->cs_shader_state.program = NULL;
1091 if (program == sctx->cs_shader_state.emitted_program)
1092 sctx->cs_shader_state.emitted_program = NULL;
1102 void si_init_compute_functions(struct si_context *sctx)
1104 sctx->b.create_compute_state = si_create_compute_state;
1105 sctx->b.delete_compute_state = si_delete_compute_state;
1106 sctx->b.bind_compute_state = si_bind_compute_state;
1107 sctx->b.set_compute_resources = si_set_compute_resources;
1108 sctx->b.set_global_binding = si_set_global_binding;
1109 sctx->b.launch_grid = si_launch_grid;