1/* 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25/* Resource binding slots and sampler states (each described with 8 or 26 * 4 dwords) are stored in lists in memory which is accessed by shaders 27 * using scalar load instructions. 28 * 29 * This file is responsible for managing such lists. It keeps a copy of all 30 * descriptors in CPU memory and re-uploads a whole list if some slots have 31 * been changed. 32 * 33 * This code is also responsible for updating shader pointers to those lists. 34 * 35 * Note that CP DMA can't be used for updating the lists, because a GPU hang 36 * could leave the list in a mid-IB state and the next IB would get wrong 37 * descriptors and the whole context would be unusable at that point. 38 * (Note: The register shadowing can't be used due to the same reason) 39 * 40 * Also, uploading descriptors to newly allocated memory doesn't require 41 * a KCACHE flush. 42 * 43 * 44 * Possible scenarios for one 16 dword image+sampler slot: 45 * 46 * | Image | w/ FMASK | Buffer | NULL 47 * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3] 48 * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0 49 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3] 50 * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3] 51 * 52 * FMASK implies MSAA, therefore no sampler state. 53 * Sampler states are never unbound except when FMASK is bound. 54 */ 55 56#include "si_pipe.h" 57#include "si_compute.h" 58#include "si_build_pm4.h" 59#include "sid.h" 60#include "util/format/u_format.h" 61#include "util/hash_table.h" 62#include "util/u_idalloc.h" 63#include "util/u_memory.h" 64#include "util/u_upload_mgr.h" 65 66/* NULL image and buffer descriptor for textures (alpha = 1) and images 67 * (alpha = 0). 68 * 69 * For images, all fields must be zero except for the swizzle, which 70 * supports arbitrary combinations of 0s and 1s. The texture type must be 71 * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs. 72 * 73 * For buffers, all fields must be zero. If they are not, the hw hangs. 74 * 75 * This is the only reason why the buffer descriptor must be in words [4:7]. 76 */ 77static uint32_t null_texture_descriptor[8] = { 78 0, 0, 0, S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) | S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) 79 /* the rest must contain zeros, which is also used by the buffer 80 * descriptor */ 81}; 82 83static uint32_t null_image_descriptor[8] = { 84 0, 0, 0, S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) 85 /* the rest must contain zeros, which is also used by the buffer 86 * descriptor */ 87}; 88 89static uint64_t si_desc_extract_buffer_address(const uint32_t *desc) 90{ 91 uint64_t va = desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32); 92 93 /* Sign-extend the 48-bit address. */ 94 va <<= 16; 95 va = (int64_t)va >> 16; 96 return va; 97} 98 99static void si_init_descriptor_list(uint32_t *desc_list, unsigned element_dw_size, 100 unsigned num_elements, const uint32_t *null_descriptor) 101{ 102 int i; 103 104 /* Initialize the array to NULL descriptors if the element size is 8. */ 105 if (null_descriptor) { 106 assert(element_dw_size % 8 == 0); 107 for (i = 0; i < num_elements * element_dw_size / 8; i++) 108 memcpy(desc_list + i * 8, null_descriptor, 8 * 4); 109 } 110} 111 112static void si_init_descriptors(struct si_descriptors *desc, short shader_userdata_rel_index, 113 unsigned element_dw_size, unsigned num_elements) 114{ 115 desc->list = CALLOC(num_elements, element_dw_size * 4); 116 desc->element_dw_size = element_dw_size; 117 desc->num_elements = num_elements; 118 desc->shader_userdata_offset = shader_userdata_rel_index * 4; 119 desc->slot_index_to_bind_directly = -1; 120} 121 122static void si_release_descriptors(struct si_descriptors *desc) 123{ 124 si_resource_reference(&desc->buffer, NULL); 125 FREE(desc->list); 126} 127 128static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc) 129{ 130 unsigned slot_size = desc->element_dw_size * 4; 131 unsigned first_slot_offset = desc->first_active_slot * slot_size; 132 unsigned upload_size = desc->num_active_slots * slot_size; 133 134 /* Skip the upload if no shader is using the descriptors. dirty_mask 135 * will stay dirty and the descriptors will be uploaded when there is 136 * a shader using them. 137 */ 138 if (!upload_size) 139 return true; 140 141 /* If there is just one active descriptor, bind it directly. */ 142 if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly && 143 desc->num_active_slots == 1) { 144 uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly * desc->element_dw_size]; 145 146 /* The buffer is already in the buffer list. */ 147 si_resource_reference(&desc->buffer, NULL); 148 desc->gpu_list = NULL; 149 desc->gpu_address = si_desc_extract_buffer_address(descriptor); 150 return true; 151 } 152 153 uint32_t *ptr; 154 unsigned buffer_offset; 155 u_upload_alloc(sctx->b.const_uploader, first_slot_offset, upload_size, 156 si_optimal_tcc_alignment(sctx, upload_size), &buffer_offset, 157 (struct pipe_resource **)&desc->buffer, (void **)&ptr); 158 if (!desc->buffer) { 159 desc->gpu_address = 0; 160 return false; /* skip the draw call */ 161 } 162 163 util_memcpy_cpu_to_le32(ptr, (char *)desc->list + first_slot_offset, upload_size); 164 desc->gpu_list = ptr - first_slot_offset / 4; 165 166 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, 167 RADEON_USAGE_READ | RADEON_PRIO_DESCRIPTORS); 168 169 /* The shader pointer should point to slot 0. */ 170 buffer_offset -= first_slot_offset; 171 desc->gpu_address = desc->buffer->gpu_address + buffer_offset; 172 173 assert(desc->buffer->flags & RADEON_FLAG_32BIT); 174 assert((desc->buffer->gpu_address >> 32) == sctx->screen->info.address32_hi); 175 assert((desc->gpu_address >> 32) == sctx->screen->info.address32_hi); 176 return true; 177} 178 179static void 180si_add_descriptors_to_bo_list(struct si_context *sctx, struct si_descriptors *desc) 181{ 182 if (!desc->buffer) 183 return; 184 185 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, 186 RADEON_USAGE_READ | RADEON_PRIO_DESCRIPTORS); 187} 188 189/* SAMPLER VIEWS */ 190 191static inline unsigned si_get_sampler_view_priority(struct si_resource *res) 192{ 193 if (res->b.b.target == PIPE_BUFFER) 194 return RADEON_PRIO_SAMPLER_BUFFER; 195 196 if (res->b.b.nr_samples > 1) 197 return RADEON_PRIO_SAMPLER_TEXTURE_MSAA; 198 199 return RADEON_PRIO_SAMPLER_TEXTURE; 200} 201 202static struct si_descriptors *si_sampler_and_image_descriptors(struct si_context *sctx, 203 unsigned shader) 204{ 205 return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)]; 206} 207 208static void si_release_sampler_views(struct si_samplers *samplers) 209{ 210 int i; 211 212 for (i = 0; i < ARRAY_SIZE(samplers->views); i++) { 213 pipe_sampler_view_reference(&samplers->views[i], NULL); 214 } 215} 216 217static void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_resource *resource, 218 unsigned usage, bool is_stencil_sampler, 219 bool check_mem) 220{ 221 struct si_texture *tex = (struct si_texture *)resource; 222 unsigned priority; 223 224 if (!resource) 225 return; 226 227 /* Use the flushed depth texture if direct sampling is unsupported. */ 228 if (resource->target != PIPE_BUFFER && tex->is_depth && 229 !si_can_sample_zs(tex, is_stencil_sampler)) 230 tex = tex->flushed_depth_texture; 231 232 priority = si_get_sampler_view_priority(&tex->buffer); 233 radeon_add_to_gfx_buffer_list_check_mem(sctx, &tex->buffer, usage | priority, check_mem); 234} 235 236static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_samplers *samplers) 237{ 238 unsigned mask = samplers->enabled_mask; 239 240 /* Add buffers to the CS. */ 241 while (mask) { 242 int i = u_bit_scan(&mask); 243 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i]; 244 245 si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ, 246 sview->is_stencil_sampler, false); 247 } 248} 249 250static bool si_sampler_views_check_encrypted(struct si_context *sctx, struct si_samplers *samplers, 251 unsigned samplers_declared) 252{ 253 unsigned mask = samplers->enabled_mask & samplers_declared; 254 255 /* Verify if a samplers uses an encrypted resource */ 256 while (mask) { 257 int i = u_bit_scan(&mask); 258 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i]; 259 260 struct si_resource *res = si_resource(sview->base.texture); 261 if (res->flags & RADEON_FLAG_ENCRYPTED) 262 return true; 263 } 264 return false; 265} 266 267/* Set buffer descriptor fields that can be changed by reallocations. */ 268static void si_set_buf_desc_address(struct si_resource *buf, uint64_t offset, uint32_t *state) 269{ 270 uint64_t va = buf->gpu_address + offset; 271 272 state[0] = va; 273 state[1] &= C_008F04_BASE_ADDRESS_HI; 274 state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32); 275} 276 277/* Set texture descriptor fields that can be changed by reallocations. 278 * 279 * \param tex texture 280 * \param base_level_info information of the level of BASE_ADDRESS 281 * \param base_level the level of BASE_ADDRESS 282 * \param first_level pipe_sampler_view.u.tex.first_level 283 * \param block_width util_format_get_blockwidth() 284 * \param is_stencil select between separate Z & Stencil 285 * \param state descriptor to update 286 */ 287void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture *tex, 288 const struct legacy_surf_level *base_level_info, 289 unsigned base_level, unsigned first_level, unsigned block_width, 290 /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */ 291 bool is_stencil, uint16_t access, uint32_t * restrict state) 292{ 293 uint64_t va, meta_va = 0; 294 295 if (tex->is_depth && !si_can_sample_zs(tex, is_stencil)) { 296 tex = tex->flushed_depth_texture; 297 is_stencil = false; 298 } 299 300 va = tex->buffer.gpu_address; 301 302 if (sscreen->info.gfx_level >= GFX9) { 303 /* Only stencil_offset needs to be added here. */ 304 if (is_stencil) 305 va += tex->surface.u.gfx9.zs.stencil_offset; 306 else 307 va += tex->surface.u.gfx9.surf_offset; 308 } else { 309 va += (uint64_t)base_level_info->offset_256B * 256; 310 } 311 312 state[0] = va >> 8; 313 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); 314 315 /* Only macrotiled modes can set tile swizzle. 316 * GFX9 doesn't use (legacy) base_level_info. 317 */ 318 if (sscreen->info.gfx_level >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D) 319 state[0] |= tex->surface.tile_swizzle; 320 321 if (sscreen->info.gfx_level >= GFX8) { 322 if (!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level)) { 323 meta_va = tex->buffer.gpu_address + tex->surface.meta_offset; 324 325 if (sscreen->info.gfx_level == GFX8) { 326 meta_va += tex->surface.u.legacy.color.dcc_level[base_level].dcc_offset; 327 assert(base_level_info->mode == RADEON_SURF_MODE_2D); 328 } 329 330 unsigned dcc_tile_swizzle = tex->surface.tile_swizzle << 8; 331 dcc_tile_swizzle &= (1 << tex->surface.meta_alignment_log2) - 1; 332 meta_va |= dcc_tile_swizzle; 333 } else if (vi_tc_compat_htile_enabled(tex, first_level, 334 is_stencil ? PIPE_MASK_S : PIPE_MASK_Z)) { 335 meta_va = tex->buffer.gpu_address + tex->surface.meta_offset; 336 } 337 338 if (meta_va) 339 state[6] |= S_008F28_COMPRESSION_EN(1); 340 } 341 342 if (sscreen->info.gfx_level >= GFX8 && sscreen->info.gfx_level <= GFX9) 343 state[7] = meta_va >> 8; 344 345 if (sscreen->info.gfx_level >= GFX10) { 346 if (is_stencil) { 347 state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode); 348 } else { 349 state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.swizzle_mode); 350 } 351 352 if (meta_va) { 353 struct gfx9_surf_meta_flags meta = { 354 .rb_aligned = 1, 355 .pipe_aligned = 1, 356 }; 357 358 if (!tex->is_depth && tex->surface.meta_offset) 359 meta = tex->surface.u.gfx9.color.dcc; 360 361 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) | 362 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8) | 363 /* DCC image stores require the following settings: 364 * - INDEPENDENT_64B_BLOCKS = 0 365 * - INDEPENDENT_128B_BLOCKS = 1 366 * - MAX_COMPRESSED_BLOCK_SIZE = 128B 367 * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used) 368 * 369 * The same limitations apply to SDMA compressed stores because 370 * SDMA uses the same DCC codec. 371 */ 372 S_00A018_WRITE_COMPRESS_ENABLE(ac_surface_supports_dcc_image_stores(sscreen->info.gfx_level, &tex->surface) && 373 (access & SI_IMAGE_ACCESS_ALLOW_DCC_STORE)); 374 375 /* TC-compatible MSAA HTILE requires ITERATE_256. */ 376 if (tex->is_depth && tex->buffer.b.b.nr_samples >= 2) 377 state[6] |= S_00A018_ITERATE_256(1); 378 } 379 380 state[7] = meta_va >> 16; 381 } else if (sscreen->info.gfx_level == GFX9) { 382 if (is_stencil) { 383 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode); 384 state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.zs.stencil_epitch); 385 } else { 386 uint16_t epitch = tex->surface.u.gfx9.epitch; 387 if (tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM && 388 block_width == 1) { 389 /* epitch is patched in ac_surface for sdma/vcn blocks to get 390 * a value expressed in elements unit. 391 * But here the texture is used with block_width == 1 so we 392 * need epitch in pixel units. 393 */ 394 epitch = (epitch + 1) / tex->surface.blk_w - 1; 395 } 396 state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode); 397 state[4] |= S_008F20_PITCH(epitch); 398 } 399 400 state[5] &= 401 C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED; 402 if (meta_va) { 403 struct gfx9_surf_meta_flags meta = { 404 .rb_aligned = 1, 405 .pipe_aligned = 1, 406 }; 407 408 if (!tex->is_depth && tex->surface.meta_offset) 409 meta = tex->surface.u.gfx9.color.dcc; 410 411 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | 412 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) | 413 S_008F24_META_RB_ALIGNED(meta.rb_aligned); 414 } 415 } else { 416 /* GFX6-GFX8 */ 417 unsigned pitch = base_level_info->nblk_x * block_width; 418 unsigned index = si_tile_mode_index(tex, base_level, is_stencil); 419 420 state[3] |= S_008F1C_TILING_INDEX(index); 421 state[4] |= S_008F20_PITCH(pitch - 1); 422 } 423 424 if (tex->swap_rgb_to_bgr) { 425 unsigned swizzle_x = G_008F1C_DST_SEL_X(state[3]); 426 unsigned swizzle_z = G_008F1C_DST_SEL_Z(state[3]); 427 428 state[3] &= C_008F1C_DST_SEL_X; 429 state[3] |= S_008F1C_DST_SEL_X(swizzle_z); 430 state[3] &= C_008F1C_DST_SEL_Z; 431 state[3] |= S_008F1C_DST_SEL_Z(swizzle_x); 432 } 433} 434 435static void si_set_sampler_state_desc(struct si_sampler_state *sstate, 436 struct si_sampler_view *sview, struct si_texture *tex, 437 uint32_t *desc) 438{ 439 if (tex && tex->upgraded_depth && sview && !sview->is_stencil_sampler) 440 memcpy(desc, sstate->upgraded_depth_val, 4 * 4); 441 else 442 memcpy(desc, sstate->val, 4 * 4); 443} 444 445static void si_set_sampler_view_desc(struct si_context *sctx, struct si_sampler_view *sview, 446 struct si_sampler_state *sstate, 447 /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */ 448 uint32_t * restrict desc) 449{ 450 struct pipe_sampler_view *view = &sview->base; 451 struct si_texture *tex = (struct si_texture *)view->texture; 452 453 assert(tex); /* views with texture == NULL aren't supported */ 454 455 if (tex->buffer.b.b.target == PIPE_BUFFER) { 456 memcpy(desc, sview->state, 8 * 4); 457 memcpy(desc + 8, null_texture_descriptor, 4 * 4); /* Disable FMASK. */ 458 si_set_buf_desc_address(&tex->buffer, sview->base.u.buf.offset, desc + 4); 459 return; 460 } 461 462 if (unlikely(sview->dcc_incompatible)) { 463 if (vi_dcc_enabled(tex, view->u.tex.first_level)) 464 if (!si_texture_disable_dcc(sctx, tex)) 465 si_decompress_dcc(sctx, tex); 466 467 sview->dcc_incompatible = false; 468 } 469 470 bool is_separate_stencil = tex->db_compatible && sview->is_stencil_sampler; 471 472 memcpy(desc, sview->state, 8 * 4); 473 si_set_mutable_tex_desc_fields(sctx->screen, tex, sview->base_level_info, 0, 474 sview->base.u.tex.first_level, sview->block_width, 475 is_separate_stencil, 0, desc); 476 477 if (tex->surface.fmask_size) { 478 memcpy(desc + 8, sview->fmask_state, 8 * 4); 479 } else { 480 /* Disable FMASK and bind sampler state in [12:15]. */ 481 memcpy(desc + 8, null_texture_descriptor, 4 * 4); 482 483 if (sstate) 484 si_set_sampler_state_desc(sstate, sview, tex, desc + 12); 485 } 486} 487 488static bool color_needs_decompression(struct si_texture *tex) 489{ 490 if (tex->is_depth) 491 return false; 492 493 return tex->surface.fmask_size || 494 (tex->dirty_level_mask && (tex->cmask_buffer || tex->surface.meta_offset)); 495} 496 497static bool depth_needs_decompression(struct si_texture *tex, bool is_stencil) 498{ 499 /* If the depth/stencil texture is TC-compatible, no decompression 500 * will be done. The decompression function will only flush DB caches 501 * to make it coherent with shaders. That's necessary because the driver 502 * doesn't flush DB caches in any other case. 503 */ 504 return tex->db_compatible && (tex->dirty_level_mask || (is_stencil && tex->stencil_dirty_level_mask)); 505} 506 507static void si_reset_sampler_view_slot(struct si_samplers *samplers, unsigned slot, 508 uint32_t * restrict desc) 509{ 510 pipe_sampler_view_reference(&samplers->views[slot], NULL); 511 memcpy(desc, null_texture_descriptor, 8 * 4); 512 /* Only clear the lower dwords of FMASK. */ 513 memcpy(desc + 8, null_texture_descriptor, 4 * 4); 514 /* Re-set the sampler state if we are transitioning from FMASK. */ 515 if (samplers->sampler_states[slot]) 516 si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL, desc + 12); 517} 518 519static void si_set_sampler_views(struct si_context *sctx, unsigned shader, 520 unsigned start_slot, unsigned count, 521 unsigned unbind_num_trailing_slots, 522 bool take_ownership, struct pipe_sampler_view **views, 523 bool disallow_early_out) 524{ 525 struct si_samplers *samplers = &sctx->samplers[shader]; 526 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); 527 uint32_t unbound_mask = 0; 528 529 if (views) { 530 for (unsigned i = 0; i < count; i++) { 531 unsigned slot = start_slot + i; 532 struct si_sampler_view *sview = (struct si_sampler_view *)views[i]; 533 unsigned desc_slot = si_get_sampler_slot(slot); 534 /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */ 535 uint32_t *restrict desc = descs->list + desc_slot * 16; 536 537 if (samplers->views[slot] == &sview->base && !disallow_early_out) { 538 if (take_ownership) { 539 struct pipe_sampler_view *view = views[i]; 540 pipe_sampler_view_reference(&view, NULL); 541 } 542 continue; 543 } 544 545 if (sview) { 546 struct si_texture *tex = (struct si_texture *)sview->base.texture; 547 548 si_set_sampler_view_desc(sctx, sview, samplers->sampler_states[slot], desc); 549 550 if (tex->buffer.b.b.target == PIPE_BUFFER) { 551 tex->buffer.bind_history |= SI_BIND_SAMPLER_BUFFER(shader); 552 samplers->needs_depth_decompress_mask &= ~(1u << slot); 553 samplers->needs_color_decompress_mask &= ~(1u << slot); 554 } else { 555 if (tex->is_depth) { 556 samplers->has_depth_tex_mask |= 1u << slot; 557 samplers->needs_color_decompress_mask &= ~(1u << slot); 558 559 if (depth_needs_decompression(tex, sview->is_stencil_sampler)) { 560 samplers->needs_depth_decompress_mask |= 1u << slot; 561 } else { 562 samplers->needs_depth_decompress_mask &= ~(1u << slot); 563 } 564 } else { 565 samplers->has_depth_tex_mask &= ~(1u << slot); 566 samplers->needs_depth_decompress_mask &= ~(1u << slot); 567 568 if (color_needs_decompression(tex)) { 569 samplers->needs_color_decompress_mask |= 1u << slot; 570 } else { 571 samplers->needs_color_decompress_mask &= ~(1u << slot); 572 } 573 } 574 575 if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) && 576 p_atomic_read(&tex->framebuffers_bound)) 577 sctx->need_check_render_feedback = true; 578 } 579 580 if (take_ownership) { 581 pipe_sampler_view_reference(&samplers->views[slot], NULL); 582 samplers->views[slot] = &sview->base; 583 } else { 584 pipe_sampler_view_reference(&samplers->views[slot], &sview->base); 585 } 586 samplers->enabled_mask |= 1u << slot; 587 588 /* Since this can flush, it must be done after enabled_mask is 589 * updated. */ 590 si_sampler_view_add_buffer(sctx, &tex->buffer.b.b, RADEON_USAGE_READ, 591 sview->is_stencil_sampler, true); 592 } else { 593 si_reset_sampler_view_slot(samplers, slot, desc); 594 unbound_mask |= 1u << slot; 595 } 596 } 597 } else { 598 unbind_num_trailing_slots += count; 599 count = 0; 600 } 601 602 for (unsigned i = 0; i < unbind_num_trailing_slots; i++) { 603 unsigned slot = start_slot + count + i; 604 unsigned desc_slot = si_get_sampler_slot(slot); 605 uint32_t * restrict desc = descs->list + desc_slot * 16; 606 607 if (samplers->views[slot]) 608 si_reset_sampler_view_slot(samplers, slot, desc); 609 } 610 611 unbound_mask |= BITFIELD_RANGE(start_slot + count, unbind_num_trailing_slots); 612 samplers->enabled_mask &= ~unbound_mask; 613 samplers->has_depth_tex_mask &= ~unbound_mask; 614 samplers->needs_depth_decompress_mask &= ~unbound_mask; 615 samplers->needs_color_decompress_mask &= ~unbound_mask; 616 617 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 618} 619 620static void si_update_shader_needs_decompress_mask(struct si_context *sctx, unsigned shader) 621{ 622 struct si_samplers *samplers = &sctx->samplers[shader]; 623 unsigned shader_bit = 1 << shader; 624 625 if (samplers->needs_depth_decompress_mask || samplers->needs_color_decompress_mask || 626 sctx->images[shader].needs_color_decompress_mask) 627 sctx->shader_needs_decompress_mask |= shader_bit; 628 else 629 sctx->shader_needs_decompress_mask &= ~shader_bit; 630 631 if (samplers->has_depth_tex_mask) 632 sctx->shader_has_depth_tex |= shader_bit; 633 else 634 sctx->shader_has_depth_tex &= ~shader_bit; 635} 636 637static void si_pipe_set_sampler_views(struct pipe_context *ctx, enum pipe_shader_type shader, 638 unsigned start, unsigned count, 639 unsigned unbind_num_trailing_slots, 640 bool take_ownership, struct pipe_sampler_view **views) 641{ 642 struct si_context *sctx = (struct si_context *)ctx; 643 644 if ((!count && !unbind_num_trailing_slots) || shader >= SI_NUM_SHADERS) 645 return; 646 647 si_set_sampler_views(sctx, shader, start, count, unbind_num_trailing_slots, 648 take_ownership, views, false); 649 si_update_shader_needs_decompress_mask(sctx, shader); 650} 651 652static void si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers) 653{ 654 unsigned mask = samplers->enabled_mask; 655 656 while (mask) { 657 int i = u_bit_scan(&mask); 658 struct pipe_resource *res = samplers->views[i]->texture; 659 660 if (res && res->target != PIPE_BUFFER) { 661 struct si_texture *tex = (struct si_texture *)res; 662 663 if (color_needs_decompression(tex)) { 664 samplers->needs_color_decompress_mask |= 1u << i; 665 } else { 666 samplers->needs_color_decompress_mask &= ~(1u << i); 667 } 668 } 669 } 670} 671 672/* IMAGE VIEWS */ 673 674static void si_release_image_views(struct si_images *images) 675{ 676 unsigned i; 677 678 for (i = 0; i < SI_NUM_IMAGES; ++i) { 679 struct pipe_image_view *view = &images->views[i]; 680 681 pipe_resource_reference(&view->resource, NULL); 682 } 683} 684 685static void si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images) 686{ 687 uint mask = images->enabled_mask; 688 689 /* Add buffers to the CS. */ 690 while (mask) { 691 int i = u_bit_scan(&mask); 692 struct pipe_image_view *view = &images->views[i]; 693 694 assert(view->resource); 695 696 si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false, false); 697 } 698} 699 700static bool si_image_views_check_encrypted(struct si_context *sctx, struct si_images *images, 701 unsigned images_declared) 702{ 703 uint mask = images->enabled_mask & images_declared; 704 705 while (mask) { 706 int i = u_bit_scan(&mask); 707 struct pipe_image_view *view = &images->views[i]; 708 709 assert(view->resource); 710 711 struct si_texture *tex = (struct si_texture *)view->resource; 712 if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) 713 return true; 714 } 715 return false; 716} 717 718static void si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot) 719{ 720 struct si_images *images = &ctx->images[shader]; 721 722 if (images->enabled_mask & (1u << slot)) { 723 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader); 724 unsigned desc_slot = si_get_image_slot(slot); 725 726 pipe_resource_reference(&images->views[slot].resource, NULL); 727 images->needs_color_decompress_mask &= ~(1 << slot); 728 729 memcpy(descs->list + desc_slot * 8, null_image_descriptor, 8 * 4); 730 images->enabled_mask &= ~(1u << slot); 731 images->display_dcc_store_mask &= ~(1u << slot); 732 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 733 } 734} 735 736static void si_mark_image_range_valid(const struct pipe_image_view *view) 737{ 738 struct si_resource *res = si_resource(view->resource); 739 740 if (res->b.b.target != PIPE_BUFFER) 741 return; 742 743 util_range_add(&res->b.b, &res->valid_buffer_range, view->u.buf.offset, 744 view->u.buf.offset + view->u.buf.size); 745} 746 747static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_image_view *view, 748 bool skip_decompress, uint32_t *desc, uint32_t *fmask_desc) 749{ 750 struct si_screen *screen = ctx->screen; 751 struct si_resource *res; 752 753 res = si_resource(view->resource); 754 755 if (res->b.b.target == PIPE_BUFFER) { 756 if (view->access & PIPE_IMAGE_ACCESS_WRITE) 757 si_mark_image_range_valid(view); 758 uint32_t elements = si_clamp_texture_texel_count(screen->max_texel_buffer_elements, 759 view->format, view->u.buf.size); 760 761 si_make_buffer_descriptor(screen, res, view->format, view->u.buf.offset, elements, 762 desc); 763 si_set_buf_desc_address(res, view->u.buf.offset, desc + 4); 764 } else { 765 static const unsigned char swizzle[4] = {0, 1, 2, 3}; 766 struct si_texture *tex = (struct si_texture *)res; 767 unsigned level = view->u.tex.level; 768 bool uses_dcc = vi_dcc_enabled(tex, level); 769 unsigned access = view->access; 770 771 if (uses_dcc && screen->always_allow_dcc_stores) 772 access |= SI_IMAGE_ACCESS_ALLOW_DCC_STORE; 773 774 assert(!tex->is_depth); 775 assert(fmask_desc || tex->surface.fmask_offset == 0); 776 777 if (uses_dcc && !skip_decompress && 778 !(access & SI_IMAGE_ACCESS_DCC_OFF) && 779 ((!(access & SI_IMAGE_ACCESS_ALLOW_DCC_STORE) && (access & PIPE_IMAGE_ACCESS_WRITE)) || 780 !vi_dcc_formats_compatible(screen, res->b.b.format, view->format))) { 781 /* If DCC can't be disabled, at least decompress it. 782 * The decompression is relatively cheap if the surface 783 * has been decompressed already. 784 */ 785 if (!si_texture_disable_dcc(ctx, tex)) 786 si_decompress_dcc(ctx, tex); 787 } 788 789 unsigned width = res->b.b.width0; 790 unsigned height = res->b.b.height0; 791 unsigned depth = res->b.b.depth0; 792 unsigned hw_level = level; 793 794 if (ctx->gfx_level <= GFX8) { 795 /* Always force the base level to the selected level. 796 * 797 * This is required for 3D textures, where otherwise 798 * selecting a single slice for non-layered bindings 799 * fails. It doesn't hurt the other targets. 800 */ 801 width = u_minify(width, level); 802 height = u_minify(height, level); 803 depth = u_minify(depth, level); 804 hw_level = 0; 805 } 806 807 if (access & SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT) { 808 if (ctx->gfx_level >= GFX9) { 809 /* Since the aligned width and height are derived from the width and height 810 * by the hw, set them directly as the width and height, so that UINT formats 811 * get exactly the same layout as BCn formats. 812 */ 813 width = tex->surface.u.gfx9.base_mip_width; 814 height = tex->surface.u.gfx9.base_mip_height; 815 } else { 816 width = util_format_get_nblocksx(tex->buffer.b.b.format, width); 817 height = util_format_get_nblocksy(tex->buffer.b.b.format, height); 818 } 819 } 820 821 screen->make_texture_descriptor( 822 screen, tex, false, res->b.b.target, view->format, swizzle, hw_level, hw_level, 823 view->u.tex.first_layer, view->u.tex.last_layer, width, height, depth, desc, fmask_desc); 824 si_set_mutable_tex_desc_fields(screen, tex, &tex->surface.u.legacy.level[level], level, level, 825 util_format_get_blockwidth(view->format), 826 false, access, desc); 827 } 828} 829 830static void si_set_shader_image(struct si_context *ctx, unsigned shader, unsigned slot, 831 const struct pipe_image_view *view, bool skip_decompress) 832{ 833 struct si_images *images = &ctx->images[shader]; 834 struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader); 835 struct si_resource *res; 836 837 if (!view || !view->resource) { 838 si_disable_shader_image(ctx, shader, slot); 839 return; 840 } 841 842 res = si_resource(view->resource); 843 844 si_set_shader_image_desc(ctx, view, skip_decompress, descs->list + si_get_image_slot(slot) * 8, 845 descs->list + si_get_image_slot(slot + SI_NUM_IMAGES) * 8); 846 847 if (&images->views[slot] != view) 848 util_copy_image_view(&images->views[slot], view); 849 850 if (res->b.b.target == PIPE_BUFFER) { 851 images->needs_color_decompress_mask &= ~(1 << slot); 852 images->display_dcc_store_mask &= ~(1u << slot); 853 res->bind_history |= SI_BIND_IMAGE_BUFFER(shader); 854 } else { 855 struct si_texture *tex = (struct si_texture *)res; 856 unsigned level = view->u.tex.level; 857 858 if (color_needs_decompression(tex)) { 859 images->needs_color_decompress_mask |= 1 << slot; 860 } else { 861 images->needs_color_decompress_mask &= ~(1 << slot); 862 } 863 864 if (tex->surface.display_dcc_offset && view->access & PIPE_IMAGE_ACCESS_WRITE) { 865 images->display_dcc_store_mask |= 1u << slot; 866 867 /* Set displayable_dcc_dirty for non-compute stages conservatively (before draw calls). */ 868 if (shader != PIPE_SHADER_COMPUTE) 869 tex->displayable_dcc_dirty = true; 870 } else { 871 images->display_dcc_store_mask &= ~(1u << slot); 872 } 873 874 if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound)) 875 ctx->need_check_render_feedback = true; 876 } 877 878 images->enabled_mask |= 1u << slot; 879 ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 880 881 /* Since this can flush, it must be done after enabled_mask is updated. */ 882 si_sampler_view_add_buffer( 883 ctx, &res->b.b, 884 (view->access & PIPE_IMAGE_ACCESS_WRITE) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false, 885 true); 886} 887 888static void si_set_shader_images(struct pipe_context *pipe, enum pipe_shader_type shader, 889 unsigned start_slot, unsigned count, 890 unsigned unbind_num_trailing_slots, 891 const struct pipe_image_view *views) 892{ 893 struct si_context *ctx = (struct si_context *)pipe; 894 unsigned i, slot; 895 896 assert(shader < SI_NUM_SHADERS); 897 898 if (!count && !unbind_num_trailing_slots) 899 return; 900 901 assert(start_slot + count + unbind_num_trailing_slots <= SI_NUM_IMAGES); 902 903 if (views) { 904 for (i = 0, slot = start_slot; i < count; ++i, ++slot) 905 si_set_shader_image(ctx, shader, slot, &views[i], false); 906 } else { 907 for (i = 0, slot = start_slot; i < count; ++i, ++slot) 908 si_set_shader_image(ctx, shader, slot, NULL, false); 909 } 910 911 for (i = 0; i < unbind_num_trailing_slots; ++i, ++slot) 912 si_set_shader_image(ctx, shader, slot, NULL, false); 913 914 if (shader == PIPE_SHADER_COMPUTE && 915 ctx->cs_shader_state.program && 916 start_slot < ctx->cs_shader_state.program->sel.cs_num_images_in_user_sgprs) 917 ctx->compute_image_sgprs_dirty = true; 918 919 si_update_shader_needs_decompress_mask(ctx, shader); 920} 921 922static void si_images_update_needs_color_decompress_mask(struct si_images *images) 923{ 924 unsigned mask = images->enabled_mask; 925 926 while (mask) { 927 int i = u_bit_scan(&mask); 928 struct pipe_resource *res = images->views[i].resource; 929 930 if (res && res->target != PIPE_BUFFER) { 931 struct si_texture *tex = (struct si_texture *)res; 932 933 if (color_needs_decompression(tex)) { 934 images->needs_color_decompress_mask |= 1 << i; 935 } else { 936 images->needs_color_decompress_mask &= ~(1 << i); 937 } 938 } 939 } 940} 941 942void si_update_ps_colorbuf0_slot(struct si_context *sctx) 943{ 944 struct si_buffer_resources *buffers = &sctx->internal_bindings; 945 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL]; 946 unsigned slot = SI_PS_IMAGE_COLORBUF0; 947 struct pipe_surface *surf = NULL; 948 949 /* si_texture_disable_dcc can get us here again. */ 950 if (sctx->in_update_ps_colorbuf0_slot || sctx->blitter_running) { 951 assert(!sctx->ps_uses_fbfetch || sctx->framebuffer.state.cbufs[0]); 952 return; 953 } 954 sctx->in_update_ps_colorbuf0_slot = true; 955 956 /* See whether FBFETCH is used and color buffer 0 is set. */ 957 if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.fs.uses_fbfetch_output && 958 sctx->framebuffer.state.nr_cbufs && sctx->framebuffer.state.cbufs[0]) 959 surf = sctx->framebuffer.state.cbufs[0]; 960 961 /* Return if FBFETCH transitions from disabled to disabled. */ 962 if (!buffers->buffers[slot] && !surf) { 963 assert(!sctx->ps_uses_fbfetch); 964 sctx->in_update_ps_colorbuf0_slot = false; 965 return; 966 } 967 968 sctx->ps_uses_fbfetch = surf != NULL; 969 si_update_ps_iter_samples(sctx); 970 971 if (surf) { 972 struct si_texture *tex = (struct si_texture *)surf->texture; 973 struct pipe_image_view view = {0}; 974 975 assert(tex); 976 assert(!tex->is_depth); 977 978 /* Disable DCC, because the texture is used as both a sampler 979 * and color buffer. 980 */ 981 si_texture_disable_dcc(sctx, tex); 982 983 if (tex->buffer.b.b.nr_samples <= 1 && tex->cmask_buffer) { 984 /* Disable CMASK. */ 985 assert(tex->cmask_buffer != &tex->buffer); 986 si_eliminate_fast_color_clear(sctx, tex, NULL); 987 si_texture_discard_cmask(sctx->screen, tex); 988 } 989 990 view.resource = surf->texture; 991 view.format = surf->format; 992 view.access = PIPE_IMAGE_ACCESS_READ; 993 view.u.tex.first_layer = surf->u.tex.first_layer; 994 view.u.tex.last_layer = surf->u.tex.last_layer; 995 view.u.tex.level = surf->u.tex.level; 996 997 /* Set the descriptor. */ 998 uint32_t *desc = descs->list + slot * 4; 999 memset(desc, 0, 16 * 4); 1000 si_set_shader_image_desc(sctx, &view, true, desc, desc + 8); 1001 1002 pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b); 1003 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, 1004 RADEON_USAGE_READ | RADEON_PRIO_SHADER_RW_IMAGE); 1005 buffers->enabled_mask |= 1llu << slot; 1006 } else { 1007 /* Clear the descriptor. */ 1008 memset(descs->list + slot * 4, 0, 8 * 4); 1009 pipe_resource_reference(&buffers->buffers[slot], NULL); 1010 buffers->enabled_mask &= ~(1llu << slot); 1011 } 1012 1013 sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL; 1014 sctx->in_update_ps_colorbuf0_slot = false; 1015} 1016 1017/* SAMPLER STATES */ 1018 1019static void si_bind_sampler_states(struct pipe_context *ctx, enum pipe_shader_type shader, 1020 unsigned start, unsigned count, void **states) 1021{ 1022 struct si_context *sctx = (struct si_context *)ctx; 1023 struct si_samplers *samplers = &sctx->samplers[shader]; 1024 struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader); 1025 struct si_sampler_state **sstates = (struct si_sampler_state **)states; 1026 int i; 1027 1028 if (!count || shader >= SI_NUM_SHADERS || !sstates) 1029 return; 1030 1031 for (i = 0; i < count; i++) { 1032 unsigned slot = start + i; 1033 unsigned desc_slot = si_get_sampler_slot(slot); 1034 1035 if (!sstates[i] || sstates[i] == samplers->sampler_states[slot]) 1036 continue; 1037 1038#ifndef NDEBUG 1039 assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC); 1040#endif 1041 samplers->sampler_states[slot] = sstates[i]; 1042 1043 /* If FMASK is bound, don't overwrite it. 1044 * The sampler state will be set after FMASK is unbound. 1045 */ 1046 struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[slot]; 1047 1048 struct si_texture *tex = NULL; 1049 1050 if (sview && sview->base.texture && sview->base.texture->target != PIPE_BUFFER) 1051 tex = (struct si_texture *)sview->base.texture; 1052 1053 if (tex && tex->surface.fmask_size) 1054 continue; 1055 1056 si_set_sampler_state_desc(sstates[i], sview, tex, desc->list + desc_slot * 16 + 12); 1057 1058 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 1059 } 1060} 1061 1062/* BUFFER RESOURCES */ 1063 1064static void si_init_buffer_resources(struct si_context *sctx, 1065 struct si_buffer_resources *buffers, 1066 struct si_descriptors *descs, unsigned num_buffers, 1067 short shader_userdata_rel_index, 1068 unsigned priority, 1069 unsigned priority_constbuf) 1070{ 1071 buffers->priority = priority; 1072 buffers->priority_constbuf = priority_constbuf; 1073 buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource *)); 1074 buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0])); 1075 1076 si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers); 1077 1078 /* Initialize buffer descriptors, so that we don't have to do it at bind time. */ 1079 for (unsigned i = 0; i < num_buffers; i++) { 1080 uint32_t *desc = descs->list + i * 4; 1081 1082 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1083 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 1084 1085 if (sctx->gfx_level >= GFX11) { 1086 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 1087 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); 1088 } else if (sctx->gfx_level >= GFX10) { 1089 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 1090 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); 1091 } else { 1092 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1093 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 1094 } 1095 } 1096} 1097 1098static void si_release_buffer_resources(struct si_buffer_resources *buffers, 1099 struct si_descriptors *descs) 1100{ 1101 int i; 1102 1103 for (i = 0; i < descs->num_elements; i++) { 1104 pipe_resource_reference(&buffers->buffers[i], NULL); 1105 } 1106 1107 FREE(buffers->buffers); 1108 FREE(buffers->offsets); 1109} 1110 1111static void si_buffer_resources_begin_new_cs(struct si_context *sctx, 1112 struct si_buffer_resources *buffers) 1113{ 1114 uint64_t mask = buffers->enabled_mask; 1115 1116 /* Add buffers to the CS. */ 1117 while (mask) { 1118 int i = u_bit_scan64(&mask); 1119 1120 radeon_add_to_buffer_list( 1121 sctx, &sctx->gfx_cs, si_resource(buffers->buffers[i]), 1122 (buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | 1123 (i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf)); 1124 } 1125} 1126 1127static bool si_buffer_resources_check_encrypted(struct si_context *sctx, 1128 struct si_buffer_resources *buffers) 1129{ 1130 uint64_t mask = buffers->enabled_mask; 1131 1132 while (mask) { 1133 int i = u_bit_scan64(&mask); 1134 1135 if (si_resource(buffers->buffers[i])->flags & RADEON_FLAG_ENCRYPTED) 1136 return true; 1137 } 1138 1139 return false; 1140} 1141 1142static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers, 1143 struct si_descriptors *descs, unsigned idx, 1144 struct pipe_resource **buf, unsigned *offset, 1145 unsigned *size) 1146{ 1147 pipe_resource_reference(buf, buffers->buffers[idx]); 1148 if (*buf) { 1149 struct si_resource *res = si_resource(*buf); 1150 const uint32_t *desc = descs->list + idx * 4; 1151 uint64_t va; 1152 1153 *size = desc[2]; 1154 1155 assert(G_008F04_STRIDE(desc[1]) == 0); 1156 va = si_desc_extract_buffer_address(desc); 1157 1158 assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size); 1159 *offset = va - res->gpu_address; 1160 } 1161} 1162 1163/* VERTEX BUFFERS */ 1164 1165static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) 1166{ 1167 int count = sctx->num_vertex_elements; 1168 int i; 1169 1170 for (i = 0; i < count; i++) { 1171 int vb = sctx->vertex_elements->vertex_buffer_index[i]; 1172 1173 if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) 1174 continue; 1175 if (!sctx->vertex_buffer[vb].buffer.resource) 1176 continue; 1177 1178 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, 1179 si_resource(sctx->vertex_buffer[vb].buffer.resource), 1180 RADEON_USAGE_READ | RADEON_PRIO_VERTEX_BUFFER); 1181 } 1182 1183 if (!sctx->vb_descriptors_buffer) 1184 return; 1185 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer, 1186 RADEON_USAGE_READ | RADEON_PRIO_DESCRIPTORS); 1187} 1188 1189/* CONSTANT BUFFERS */ 1190 1191static struct si_descriptors *si_const_and_shader_buffer_descriptors(struct si_context *sctx, 1192 unsigned shader) 1193{ 1194 return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)]; 1195} 1196 1197static void si_upload_const_buffer(struct si_context *sctx, struct si_resource **buf, 1198 const uint8_t *ptr, unsigned size, uint32_t *const_offset) 1199{ 1200 void *tmp; 1201 1202 u_upload_alloc(sctx->b.const_uploader, 0, size, si_optimal_tcc_alignment(sctx, size), 1203 const_offset, (struct pipe_resource **)buf, &tmp); 1204 if (*buf) 1205 util_memcpy_cpu_to_le32(tmp, ptr, size); 1206} 1207 1208static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_resources *buffers, 1209 unsigned descriptors_idx, uint slot, bool take_ownership, 1210 const struct pipe_constant_buffer *input) 1211{ 1212 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1213 assert(slot < descs->num_elements); 1214 pipe_resource_reference(&buffers->buffers[slot], NULL); 1215 1216 /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD is buggy 1217 * with a NULL buffer). We need to use a dummy buffer instead. */ 1218 if (sctx->gfx_level == GFX7 && (!input || (!input->buffer && !input->user_buffer))) 1219 input = &sctx->null_const_buf; 1220 1221 if (input && (input->buffer || input->user_buffer)) { 1222 struct pipe_resource *buffer = NULL; 1223 uint64_t va; 1224 unsigned buffer_offset; 1225 1226 /* Upload the user buffer if needed. */ 1227 if (input->user_buffer) { 1228 si_upload_const_buffer(sctx, (struct si_resource **)&buffer, input->user_buffer, 1229 input->buffer_size, &buffer_offset); 1230 if (!buffer) { 1231 /* Just unbind on failure. */ 1232 si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, false, NULL); 1233 return; 1234 } 1235 } else { 1236 if (take_ownership) { 1237 buffer = input->buffer; 1238 } else { 1239 pipe_resource_reference(&buffer, input->buffer); 1240 } 1241 buffer_offset = input->buffer_offset; 1242 } 1243 1244 va = si_resource(buffer)->gpu_address + buffer_offset; 1245 1246 /* Set the descriptor. */ 1247 uint32_t *desc = descs->list + slot * 4; 1248 desc[0] = va; 1249 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0); 1250 desc[2] = input->buffer_size; 1251 1252 buffers->buffers[slot] = buffer; 1253 buffers->offsets[slot] = buffer_offset; 1254 radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), 1255 RADEON_USAGE_READ | buffers->priority_constbuf, true); 1256 buffers->enabled_mask |= 1llu << slot; 1257 } else { 1258 /* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */ 1259 memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 3); 1260 buffers->enabled_mask &= ~(1llu << slot); 1261 } 1262 1263 sctx->descriptors_dirty |= 1u << descriptors_idx; 1264} 1265 1266void si_get_inline_uniform_state(union si_shader_key *key, enum pipe_shader_type shader, 1267 bool *inline_uniforms, uint32_t **inlined_values) 1268{ 1269 if (shader == PIPE_SHADER_FRAGMENT) { 1270 *inline_uniforms = key->ps.opt.inline_uniforms; 1271 *inlined_values = key->ps.opt.inlined_uniform_values; 1272 } else { 1273 *inline_uniforms = key->ge.opt.inline_uniforms; 1274 *inlined_values = key->ge.opt.inlined_uniform_values; 1275 } 1276} 1277 1278void si_invalidate_inlinable_uniforms(struct si_context *sctx, enum pipe_shader_type shader) 1279{ 1280 if (shader == PIPE_SHADER_COMPUTE) 1281 return; 1282 1283 bool inline_uniforms; 1284 uint32_t *inlined_values; 1285 si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values); 1286 1287 if (inline_uniforms) { 1288 if (shader == PIPE_SHADER_FRAGMENT) 1289 sctx->shaders[shader].key.ps.opt.inline_uniforms = false; 1290 else 1291 sctx->shaders[shader].key.ge.opt.inline_uniforms = false; 1292 1293 memset(inlined_values, 0, MAX_INLINABLE_UNIFORMS * 4); 1294 sctx->do_update_shaders = true; 1295 } 1296} 1297 1298static void si_pipe_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type shader, 1299 uint slot, bool take_ownership, 1300 const struct pipe_constant_buffer *input) 1301{ 1302 struct si_context *sctx = (struct si_context *)ctx; 1303 1304 if (shader >= SI_NUM_SHADERS) 1305 return; 1306 1307 if (input) { 1308 if (input->buffer) { 1309 if (slot == 0 && 1310 !(si_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) { 1311 assert(!"constant buffer 0 must have a 32-bit VM address, use const_uploader"); 1312 return; 1313 } 1314 si_resource(input->buffer)->bind_history |= SI_BIND_CONSTANT_BUFFER(shader); 1315 } 1316 1317 if (slot == 0) 1318 si_invalidate_inlinable_uniforms(sctx, shader); 1319 } 1320 1321 slot = si_get_constbuf_slot(slot); 1322 si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader], 1323 si_const_and_shader_buffer_descriptors_idx(shader), slot, 1324 take_ownership, input); 1325} 1326 1327static void si_set_inlinable_constants(struct pipe_context *ctx, 1328 enum pipe_shader_type shader, 1329 uint num_values, uint32_t *values) 1330{ 1331 struct si_context *sctx = (struct si_context *)ctx; 1332 1333 if (shader == PIPE_SHADER_COMPUTE) 1334 return; 1335 1336 bool inline_uniforms; 1337 uint32_t *inlined_values; 1338 si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values); 1339 1340 if (!inline_uniforms) { 1341 /* It's the first time we set the constants. Always update shaders. */ 1342 if (shader == PIPE_SHADER_FRAGMENT) 1343 sctx->shaders[shader].key.ps.opt.inline_uniforms = true; 1344 else 1345 sctx->shaders[shader].key.ge.opt.inline_uniforms = true; 1346 1347 memcpy(inlined_values, values, num_values * 4); 1348 sctx->do_update_shaders = true; 1349 return; 1350 } 1351 1352 /* We have already set inlinable constants for this shader. Update the shader only if 1353 * the constants are being changed so as not to update shaders needlessly. 1354 */ 1355 if (memcmp(inlined_values, values, num_values * 4)) { 1356 memcpy(inlined_values, values, num_values * 4); 1357 sctx->do_update_shaders = true; 1358 } 1359} 1360 1361void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot, 1362 struct pipe_constant_buffer *cbuf) 1363{ 1364 cbuf->user_buffer = NULL; 1365 si_get_buffer_from_descriptors( 1366 &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors(sctx, shader), 1367 si_get_constbuf_slot(slot), &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size); 1368} 1369 1370/* SHADER BUFFERS */ 1371 1372static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resources *buffers, 1373 unsigned descriptors_idx, uint slot, 1374 const struct pipe_shader_buffer *sbuffer, bool writable, 1375 unsigned priority) 1376{ 1377 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1378 uint32_t *desc = descs->list + slot * 4; 1379 1380 if (!sbuffer || !sbuffer->buffer) { 1381 pipe_resource_reference(&buffers->buffers[slot], NULL); 1382 /* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */ 1383 memset(desc, 0, sizeof(uint32_t) * 3); 1384 buffers->enabled_mask &= ~(1llu << slot); 1385 buffers->writable_mask &= ~(1llu << slot); 1386 sctx->descriptors_dirty |= 1u << descriptors_idx; 1387 return; 1388 } 1389 1390 struct si_resource *buf = si_resource(sbuffer->buffer); 1391 uint64_t va = buf->gpu_address + sbuffer->buffer_offset; 1392 1393 desc[0] = va; 1394 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0); 1395 desc[2] = sbuffer->buffer_size; 1396 1397 pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); 1398 buffers->offsets[slot] = sbuffer->buffer_offset; 1399 radeon_add_to_gfx_buffer_list_check_mem( 1400 sctx, buf, (writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | priority, true); 1401 if (writable) 1402 buffers->writable_mask |= 1llu << slot; 1403 else 1404 buffers->writable_mask &= ~(1llu << slot); 1405 1406 buffers->enabled_mask |= 1llu << slot; 1407 sctx->descriptors_dirty |= 1lu << descriptors_idx; 1408 1409 util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset, 1410 sbuffer->buffer_offset + sbuffer->buffer_size); 1411} 1412 1413void si_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader, 1414 unsigned start_slot, unsigned count, 1415 const struct pipe_shader_buffer *sbuffers, 1416 unsigned writable_bitmask, bool internal_blit) 1417{ 1418 struct si_context *sctx = (struct si_context *)ctx; 1419 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader]; 1420 unsigned descriptors_idx = si_const_and_shader_buffer_descriptors_idx(shader); 1421 unsigned i; 1422 1423 assert(start_slot + count <= SI_NUM_SHADER_BUFFERS); 1424 1425 if (shader == PIPE_SHADER_COMPUTE && 1426 sctx->cs_shader_state.program && 1427 start_slot < sctx->cs_shader_state.program->sel.cs_num_shaderbufs_in_user_sgprs) 1428 sctx->compute_shaderbuf_sgprs_dirty = true; 1429 1430 for (i = 0; i < count; ++i) { 1431 const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL; 1432 unsigned slot = si_get_shaderbuf_slot(start_slot + i); 1433 1434 /* Don't track bind history for internal blits, such as clear_buffer and copy_buffer 1435 * to prevent unnecessary synchronization before compute blits later. 1436 */ 1437 if (!internal_blit && sbuffer && sbuffer->buffer) 1438 si_resource(sbuffer->buffer)->bind_history |= SI_BIND_SHADER_BUFFER(shader); 1439 1440 si_set_shader_buffer(sctx, buffers, descriptors_idx, slot, sbuffer, 1441 !!(writable_bitmask & (1u << i)), buffers->priority); 1442 } 1443} 1444 1445static void si_pipe_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader, 1446 unsigned start_slot, unsigned count, 1447 const struct pipe_shader_buffer *sbuffers, 1448 unsigned writable_bitmask) 1449{ 1450 si_set_shader_buffers(ctx, shader, start_slot, count, sbuffers, writable_bitmask, false); 1451} 1452 1453void si_get_shader_buffers(struct si_context *sctx, enum pipe_shader_type shader, uint start_slot, 1454 uint count, struct pipe_shader_buffer *sbuf) 1455{ 1456 struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader]; 1457 struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader); 1458 1459 for (unsigned i = 0; i < count; ++i) { 1460 si_get_buffer_from_descriptors(buffers, descs, si_get_shaderbuf_slot(start_slot + i), 1461 &sbuf[i].buffer, &sbuf[i].buffer_offset, &sbuf[i].buffer_size); 1462 } 1463} 1464 1465/* RING BUFFERS */ 1466 1467void si_set_internal_const_buffer(struct si_context *sctx, uint slot, 1468 const struct pipe_constant_buffer *input) 1469{ 1470 si_set_constant_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, false, input); 1471} 1472 1473void si_set_internal_shader_buffer(struct si_context *sctx, uint slot, 1474 const struct pipe_shader_buffer *sbuffer) 1475{ 1476 si_set_shader_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, sbuffer, true, 1477 RADEON_PRIO_SHADER_RW_BUFFER); 1478} 1479 1480void si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource *buffer, 1481 unsigned stride, unsigned num_records, bool add_tid, bool swizzle, 1482 unsigned element_size, unsigned index_stride, uint64_t offset) 1483{ 1484 struct si_buffer_resources *buffers = &sctx->internal_bindings; 1485 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL]; 1486 1487 /* The stride field in the resource descriptor has 14 bits */ 1488 assert(stride < (1 << 14)); 1489 1490 assert(slot < descs->num_elements); 1491 pipe_resource_reference(&buffers->buffers[slot], NULL); 1492 1493 if (buffer) { 1494 uint64_t va; 1495 1496 va = si_resource(buffer)->gpu_address + offset; 1497 1498 switch (element_size) { 1499 default: 1500 assert(!"Unsupported ring buffer element size"); 1501 case 0: 1502 case 2: 1503 element_size = 0; 1504 break; 1505 case 4: 1506 element_size = 1; 1507 break; 1508 case 8: 1509 element_size = 2; 1510 break; 1511 case 16: 1512 element_size = 3; 1513 break; 1514 } 1515 1516 switch (index_stride) { 1517 default: 1518 assert(!"Unsupported ring buffer index stride"); 1519 case 0: 1520 case 8: 1521 index_stride = 0; 1522 break; 1523 case 16: 1524 index_stride = 1; 1525 break; 1526 case 32: 1527 index_stride = 2; 1528 break; 1529 case 64: 1530 index_stride = 3; 1531 break; 1532 } 1533 1534 if (sctx->gfx_level >= GFX8 && stride) 1535 num_records *= stride; 1536 1537 /* Set the descriptor. */ 1538 uint32_t *desc = descs->list + slot * 4; 1539 desc[0] = va; 1540 desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); 1541 desc[2] = num_records; 1542 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1543 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 1544 S_008F0C_INDEX_STRIDE(index_stride) | S_008F0C_ADD_TID_ENABLE(add_tid); 1545 1546 if (sctx->gfx_level >= GFX11) { 1547 assert(!swizzle || element_size == 1 || element_size == 3); /* 4 or 16 bytes */ 1548 desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(swizzle ? element_size : 0); 1549 } else if (sctx->gfx_level >= GFX9) { 1550 assert(!swizzle || element_size == 1); /* only 4 bytes on GFX9 */ 1551 desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(swizzle); 1552 } else { 1553 desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(swizzle); 1554 desc[3] |= S_008F0C_ELEMENT_SIZE(element_size); 1555 } 1556 1557 if (sctx->gfx_level >= GFX11) { 1558 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 1559 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); 1560 } else if (sctx->gfx_level >= GFX10) { 1561 desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 1562 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); 1563 } else { 1564 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1565 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 1566 } 1567 1568 pipe_resource_reference(&buffers->buffers[slot], buffer); 1569 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), 1570 RADEON_USAGE_READWRITE | buffers->priority); 1571 buffers->enabled_mask |= 1llu << slot; 1572 } else { 1573 /* Clear the descriptor. */ 1574 memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4); 1575 buffers->enabled_mask &= ~(1llu << slot); 1576 } 1577 1578 sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL; 1579} 1580 1581/* INTERNAL CONST BUFFERS */ 1582 1583static void si_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state) 1584{ 1585 struct si_context *sctx = (struct si_context *)ctx; 1586 struct pipe_constant_buffer cb = {}; 1587 unsigned stipple[32]; 1588 int i; 1589 1590 for (i = 0; i < 32; i++) 1591 stipple[i] = util_bitreverse(state->stipple[i]); 1592 1593 cb.user_buffer = stipple; 1594 cb.buffer_size = sizeof(stipple); 1595 1596 si_set_internal_const_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb); 1597} 1598 1599/* TEXTURE METADATA ENABLE/DISABLE */ 1600 1601static void si_resident_handles_update_needs_color_decompress(struct si_context *sctx) 1602{ 1603 util_dynarray_clear(&sctx->resident_tex_needs_color_decompress); 1604 util_dynarray_clear(&sctx->resident_img_needs_color_decompress); 1605 1606 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 1607 struct pipe_resource *res = (*tex_handle)->view->texture; 1608 struct si_texture *tex; 1609 1610 if (!res || res->target == PIPE_BUFFER) 1611 continue; 1612 1613 tex = (struct si_texture *)res; 1614 if (!color_needs_decompression(tex)) 1615 continue; 1616 1617 util_dynarray_append(&sctx->resident_tex_needs_color_decompress, struct si_texture_handle *, 1618 *tex_handle); 1619 } 1620 1621 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 1622 struct pipe_image_view *view = &(*img_handle)->view; 1623 struct pipe_resource *res = view->resource; 1624 struct si_texture *tex; 1625 1626 if (!res || res->target == PIPE_BUFFER) 1627 continue; 1628 1629 tex = (struct si_texture *)res; 1630 if (!color_needs_decompression(tex)) 1631 continue; 1632 1633 util_dynarray_append(&sctx->resident_img_needs_color_decompress, struct si_image_handle *, 1634 *img_handle); 1635 } 1636} 1637 1638/* CMASK can be enabled (for fast clear) and disabled (for texture export) 1639 * while the texture is bound, possibly by a different context. In that case, 1640 * call this function to update needs_*_decompress_masks. 1641 */ 1642void si_update_needs_color_decompress_masks(struct si_context *sctx) 1643{ 1644 for (int i = 0; i < SI_NUM_SHADERS; ++i) { 1645 si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]); 1646 si_images_update_needs_color_decompress_mask(&sctx->images[i]); 1647 si_update_shader_needs_decompress_mask(sctx, i); 1648 } 1649 1650 si_resident_handles_update_needs_color_decompress(sctx); 1651} 1652 1653/* BUFFER DISCARD/INVALIDATION */ 1654 1655/* Reset descriptors of buffer resources after \p buf has been invalidated. 1656 * If buf == NULL, reset all descriptors. 1657 */ 1658static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, 1659 unsigned descriptors_idx, uint64_t slot_mask, 1660 struct pipe_resource *buf, unsigned priority) 1661{ 1662 struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1663 bool noop = true; 1664 uint64_t mask = buffers->enabled_mask & slot_mask; 1665 1666 while (mask) { 1667 unsigned i = u_bit_scan64(&mask); 1668 struct pipe_resource *buffer = buffers->buffers[i]; 1669 1670 if (buffer && (!buf || buffer == buf)) { 1671 si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4); 1672 sctx->descriptors_dirty |= 1u << descriptors_idx; 1673 1674 radeon_add_to_gfx_buffer_list_check_mem( 1675 sctx, si_resource(buffer), 1676 (buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | 1677 priority, true); 1678 noop = false; 1679 } 1680 } 1681 return !noop; 1682} 1683 1684/* Update all buffer bindings where the buffer is bound, including 1685 * all resource descriptors. This is invalidate_buffer without 1686 * the invalidation. 1687 * 1688 * If buf == NULL, update all buffer bindings. 1689 */ 1690void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) 1691{ 1692 struct si_resource *buffer = si_resource(buf); 1693 unsigned i; 1694 unsigned num_elems = sctx->num_vertex_elements; 1695 1696 /* We changed the buffer, now we need to bind it where the old one 1697 * was bound. This consists of 2 things: 1698 * 1) Updating the resource descriptor and dirtying it. 1699 * 2) Adding a relocation to the CS, so that it's usable. 1700 */ 1701 1702 /* Vertex buffers. */ 1703 if (!buffer) { 1704 sctx->vertex_buffers_dirty = num_elems > 0; 1705 } else if (buffer->bind_history & SI_BIND_VERTEX_BUFFER) { 1706 for (i = 0; i < num_elems; i++) { 1707 int vb = sctx->vertex_elements->vertex_buffer_index[i]; 1708 1709 if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) 1710 continue; 1711 if (!sctx->vertex_buffer[vb].buffer.resource) 1712 continue; 1713 1714 if (sctx->vertex_buffer[vb].buffer.resource == buf) { 1715 sctx->vertex_buffers_dirty = num_elems > 0; 1716 break; 1717 } 1718 } 1719 } 1720 1721 /* Streamout buffers. (other internal buffers can't be invalidated) */ 1722 if (!buffer || buffer->bind_history & SI_BIND_STREAMOUT_BUFFER) { 1723 for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) { 1724 struct si_buffer_resources *buffers = &sctx->internal_bindings; 1725 struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL]; 1726 struct pipe_resource *buffer = buffers->buffers[i]; 1727 1728 if (!buffer || (buf && buffer != buf)) 1729 continue; 1730 1731 si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4); 1732 sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL; 1733 1734 radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_WRITE | 1735 RADEON_PRIO_SHADER_RW_BUFFER, true); 1736 1737 /* Update the streamout state. */ 1738 if (sctx->streamout.begin_emitted) 1739 si_emit_streamout_end(sctx); 1740 sctx->streamout.append_bitmask = sctx->streamout.enabled_mask; 1741 si_streamout_buffers_dirty(sctx); 1742 } 1743 } 1744 1745 /* Constant and shader buffers. */ 1746 if (!buffer || buffer->bind_history & SI_BIND_CONSTANT_BUFFER_ALL) { 1747 unsigned mask = buffer ? (buffer->bind_history & SI_BIND_CONSTANT_BUFFER_ALL) >> 1748 SI_BIND_CONSTANT_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS); 1749 u_foreach_bit(shader, mask) { 1750 si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], 1751 si_const_and_shader_buffer_descriptors_idx(shader), 1752 u_bit_consecutive64(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), 1753 buf, sctx->const_and_shader_buffers[shader].priority_constbuf); 1754 } 1755 } 1756 1757 if (!buffer || buffer->bind_history & SI_BIND_SHADER_BUFFER_ALL) { 1758 unsigned mask = buffer ? (buffer->bind_history & SI_BIND_SHADER_BUFFER_ALL) >> 1759 SI_BIND_SHADER_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS); 1760 u_foreach_bit(shader, mask) { 1761 if (si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], 1762 si_const_and_shader_buffer_descriptors_idx(shader), 1763 u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf, 1764 sctx->const_and_shader_buffers[shader].priority) && 1765 shader == PIPE_SHADER_COMPUTE) { 1766 sctx->compute_shaderbuf_sgprs_dirty = true; 1767 } 1768 } 1769 } 1770 1771 if (!buffer || buffer->bind_history & SI_BIND_SAMPLER_BUFFER_ALL) { 1772 unsigned mask = buffer ? (buffer->bind_history & SI_BIND_SAMPLER_BUFFER_ALL) >> 1773 SI_BIND_SAMPLER_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS); 1774 /* Texture buffers - update bindings. */ 1775 u_foreach_bit(shader, mask) { 1776 struct si_samplers *samplers = &sctx->samplers[shader]; 1777 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); 1778 unsigned mask = samplers->enabled_mask; 1779 1780 while (mask) { 1781 unsigned i = u_bit_scan(&mask); 1782 struct pipe_resource *buffer = samplers->views[i]->texture; 1783 1784 if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) { 1785 unsigned desc_slot = si_get_sampler_slot(i); 1786 1787 si_set_buf_desc_address(si_resource(buffer), samplers->views[i]->u.buf.offset, 1788 descs->list + desc_slot * 16 + 4); 1789 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 1790 1791 radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ | 1792 RADEON_PRIO_SAMPLER_BUFFER, true); 1793 } 1794 } 1795 } 1796 } 1797 1798 /* Shader images */ 1799 if (!buffer || buffer->bind_history & SI_BIND_IMAGE_BUFFER_ALL) { 1800 unsigned mask = buffer ? (buffer->bind_history & SI_BIND_IMAGE_BUFFER_SHIFT) >> 1801 SI_BIND_IMAGE_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS); 1802 u_foreach_bit(shader, mask) { 1803 struct si_images *images = &sctx->images[shader]; 1804 struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); 1805 unsigned mask = images->enabled_mask; 1806 1807 while (mask) { 1808 unsigned i = u_bit_scan(&mask); 1809 struct pipe_resource *buffer = images->views[i].resource; 1810 1811 if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) { 1812 unsigned desc_slot = si_get_image_slot(i); 1813 1814 if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE) 1815 si_mark_image_range_valid(&images->views[i]); 1816 1817 si_set_buf_desc_address(si_resource(buffer), images->views[i].u.buf.offset, 1818 descs->list + desc_slot * 8 + 4); 1819 sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 1820 1821 radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), 1822 RADEON_USAGE_READWRITE | 1823 RADEON_PRIO_SAMPLER_BUFFER, true); 1824 1825 if (shader == PIPE_SHADER_COMPUTE) 1826 sctx->compute_image_sgprs_dirty = true; 1827 } 1828 } 1829 } 1830 } 1831 1832 /* Bindless texture handles */ 1833 if (!buffer || buffer->texture_handle_allocated) { 1834 struct si_descriptors *descs = &sctx->bindless_descriptors; 1835 1836 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 1837 struct pipe_sampler_view *view = (*tex_handle)->view; 1838 unsigned desc_slot = (*tex_handle)->desc_slot; 1839 struct pipe_resource *buffer = view->texture; 1840 1841 if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) { 1842 si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, 1843 descs->list + desc_slot * 16 + 4); 1844 1845 (*tex_handle)->desc_dirty = true; 1846 sctx->bindless_descriptors_dirty = true; 1847 1848 radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ | 1849 RADEON_PRIO_SAMPLER_BUFFER, true); 1850 } 1851 } 1852 } 1853 1854 /* Bindless image handles */ 1855 if (!buffer || buffer->image_handle_allocated) { 1856 struct si_descriptors *descs = &sctx->bindless_descriptors; 1857 1858 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 1859 struct pipe_image_view *view = &(*img_handle)->view; 1860 unsigned desc_slot = (*img_handle)->desc_slot; 1861 struct pipe_resource *buffer = view->resource; 1862 1863 if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) { 1864 if (view->access & PIPE_IMAGE_ACCESS_WRITE) 1865 si_mark_image_range_valid(view); 1866 1867 si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, 1868 descs->list + desc_slot * 16 + 4); 1869 1870 (*img_handle)->desc_dirty = true; 1871 sctx->bindless_descriptors_dirty = true; 1872 1873 radeon_add_to_gfx_buffer_list_check_mem( 1874 sctx, si_resource(buffer), RADEON_USAGE_READWRITE | RADEON_PRIO_SAMPLER_BUFFER, true); 1875 } 1876 } 1877 } 1878 1879 if (buffer) { 1880 /* Do the same for other contexts. They will invoke this function 1881 * with buffer == NULL. 1882 */ 1883 unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter); 1884 1885 /* Skip the update for the current context, because we have already updated 1886 * the buffer bindings. 1887 */ 1888 if (new_counter == sctx->last_dirty_buf_counter + 1) 1889 sctx->last_dirty_buf_counter = new_counter; 1890 } 1891} 1892 1893static void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc_slot, 1894 unsigned num_dwords) 1895{ 1896 struct si_descriptors *desc = &sctx->bindless_descriptors; 1897 unsigned desc_slot_offset = desc_slot * 16; 1898 uint32_t *data; 1899 uint64_t va; 1900 1901 data = desc->list + desc_slot_offset; 1902 va = desc->gpu_address + desc_slot_offset * 4; 1903 1904 si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, num_dwords * 4, V_370_TC_L2, 1905 V_370_ME, data); 1906} 1907 1908static void si_upload_bindless_descriptors(struct si_context *sctx) 1909{ 1910 if (!sctx->bindless_descriptors_dirty) 1911 return; 1912 1913 /* Wait for graphics/compute to be idle before updating the resident 1914 * descriptors directly in memory, in case the GPU is using them. 1915 */ 1916 sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; 1917 sctx->emit_cache_flush(sctx, &sctx->gfx_cs); 1918 1919 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 1920 unsigned desc_slot = (*tex_handle)->desc_slot; 1921 1922 if (!(*tex_handle)->desc_dirty) 1923 continue; 1924 1925 si_upload_bindless_descriptor(sctx, desc_slot, 16); 1926 (*tex_handle)->desc_dirty = false; 1927 } 1928 1929 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 1930 unsigned desc_slot = (*img_handle)->desc_slot; 1931 1932 if (!(*img_handle)->desc_dirty) 1933 continue; 1934 1935 si_upload_bindless_descriptor(sctx, desc_slot, 8); 1936 (*img_handle)->desc_dirty = false; 1937 } 1938 1939 /* Invalidate scalar L0 because the cache doesn't know that L2 changed. */ 1940 sctx->flags |= SI_CONTEXT_INV_SCACHE; 1941 sctx->bindless_descriptors_dirty = false; 1942} 1943 1944/* Update mutable image descriptor fields of all resident textures. */ 1945static void si_update_bindless_texture_descriptor(struct si_context *sctx, 1946 struct si_texture_handle *tex_handle) 1947{ 1948 struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view; 1949 struct si_descriptors *desc = &sctx->bindless_descriptors; 1950 unsigned desc_slot_offset = tex_handle->desc_slot * 16; 1951 uint32_t desc_list[16]; 1952 1953 if (sview->base.texture->target == PIPE_BUFFER) 1954 return; 1955 1956 memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list)); 1957 si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate, desc->list + desc_slot_offset); 1958 1959 if (memcmp(desc_list, desc->list + desc_slot_offset, sizeof(desc_list))) { 1960 tex_handle->desc_dirty = true; 1961 sctx->bindless_descriptors_dirty = true; 1962 } 1963} 1964 1965static void si_update_bindless_image_descriptor(struct si_context *sctx, 1966 struct si_image_handle *img_handle) 1967{ 1968 struct si_descriptors *desc = &sctx->bindless_descriptors; 1969 unsigned desc_slot_offset = img_handle->desc_slot * 16; 1970 struct pipe_image_view *view = &img_handle->view; 1971 struct pipe_resource *res = view->resource; 1972 uint32_t image_desc[16]; 1973 unsigned desc_size = (res->nr_samples >= 2 ? 16 : 8) * 4; 1974 1975 if (res->target == PIPE_BUFFER) 1976 return; 1977 1978 memcpy(image_desc, desc->list + desc_slot_offset, desc_size); 1979 si_set_shader_image_desc(sctx, view, true, desc->list + desc_slot_offset, 1980 desc->list + desc_slot_offset + 8); 1981 1982 if (memcmp(image_desc, desc->list + desc_slot_offset, desc_size)) { 1983 img_handle->desc_dirty = true; 1984 sctx->bindless_descriptors_dirty = true; 1985 } 1986} 1987 1988static void si_update_all_resident_texture_descriptors(struct si_context *sctx) 1989{ 1990 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 1991 si_update_bindless_texture_descriptor(sctx, *tex_handle); 1992 } 1993 1994 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 1995 si_update_bindless_image_descriptor(sctx, *img_handle); 1996 } 1997 1998 si_upload_bindless_descriptors(sctx); 1999} 2000 2001/* Update mutable image descriptor fields of all bound textures. */ 2002void si_update_all_texture_descriptors(struct si_context *sctx) 2003{ 2004 unsigned shader; 2005 2006 for (shader = 0; shader < SI_NUM_SHADERS; shader++) { 2007 struct si_samplers *samplers = &sctx->samplers[shader]; 2008 struct si_images *images = &sctx->images[shader]; 2009 unsigned mask; 2010 2011 /* Images. */ 2012 mask = images->enabled_mask; 2013 while (mask) { 2014 unsigned i = u_bit_scan(&mask); 2015 struct pipe_image_view *view = &images->views[i]; 2016 2017 if (!view->resource || view->resource->target == PIPE_BUFFER) 2018 continue; 2019 2020 si_set_shader_image(sctx, shader, i, view, true); 2021 } 2022 2023 /* Sampler views. */ 2024 mask = samplers->enabled_mask; 2025 while (mask) { 2026 unsigned i = u_bit_scan(&mask); 2027 struct pipe_sampler_view *view = samplers->views[i]; 2028 2029 if (!view || !view->texture || view->texture->target == PIPE_BUFFER) 2030 continue; 2031 2032 si_set_sampler_views(sctx, shader, i, 1, 0, false, &samplers->views[i], true); 2033 } 2034 2035 si_update_shader_needs_decompress_mask(sctx, shader); 2036 } 2037 2038 si_update_all_resident_texture_descriptors(sctx); 2039 si_update_ps_colorbuf0_slot(sctx); 2040} 2041 2042/* SHADER USER DATA */ 2043 2044static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shader) 2045{ 2046 sctx->shader_pointers_dirty |= 2047 u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, SI_NUM_SHADER_DESCS); 2048 2049 if (shader == PIPE_SHADER_VERTEX) { 2050 unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen); 2051 2052 sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL && 2053 sctx->num_vertex_elements > 2054 num_vbos_in_user_sgprs; 2055 sctx->vertex_buffer_user_sgprs_dirty = 2056 sctx->num_vertex_elements > 0 && num_vbos_in_user_sgprs; 2057 } 2058 2059 si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 2060} 2061 2062void si_shader_pointers_mark_dirty(struct si_context *sctx) 2063{ 2064 unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen); 2065 2066 sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS); 2067 sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL && 2068 sctx->num_vertex_elements > 2069 num_vbos_in_user_sgprs; 2070 sctx->vertex_buffer_user_sgprs_dirty = 2071 sctx->num_vertex_elements > 0 && num_vbos_in_user_sgprs; 2072 si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 2073 sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL; 2074 sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL; 2075 sctx->compute_shaderbuf_sgprs_dirty = true; 2076 sctx->compute_image_sgprs_dirty = true; 2077 if (sctx->gfx_level >= GFX11) 2078 sctx->gs_attribute_ring_pointer_dirty = true; 2079} 2080 2081/* Set a base register address for user data constants in the given shader. 2082 * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*. 2083 */ 2084static void si_set_user_data_base(struct si_context *sctx, unsigned shader, uint32_t new_base) 2085{ 2086 uint32_t *base = &sctx->shader_pointers.sh_base[shader]; 2087 2088 if (*base != new_base) { 2089 *base = new_base; 2090 2091 if (new_base) 2092 si_mark_shader_pointers_dirty(sctx, shader); 2093 2094 /* Any change in enabled shader stages requires re-emitting 2095 * the VS state SGPR, because it contains the clamp_vertex_color 2096 * state, which can be done in VS, TES, and GS. 2097 */ 2098 sctx->last_vs_state = ~0; 2099 sctx->last_gs_state = ~0; 2100 } 2101} 2102 2103/* This must be called when these are changed between enabled and disabled 2104 * - geometry shader 2105 * - tessellation evaluation shader 2106 * - NGG 2107 */ 2108void si_shader_change_notify(struct si_context *sctx) 2109{ 2110 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 2111 si_get_user_data_base(sctx->gfx_level, 2112 sctx->shader.tes.cso ? TESS_ON : TESS_OFF, 2113 sctx->shader.gs.cso ? GS_ON : GS_OFF, 2114 sctx->ngg ? NGG_ON : NGG_OFF, 2115 PIPE_SHADER_VERTEX)); 2116 2117 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 2118 si_get_user_data_base(sctx->gfx_level, 2119 sctx->shader.tes.cso ? TESS_ON : TESS_OFF, 2120 sctx->shader.gs.cso ? GS_ON : GS_OFF, 2121 sctx->ngg ? NGG_ON : NGG_OFF, 2122 PIPE_SHADER_TESS_EVAL)); 2123 2124 /* Update as_* flags in shader keys. Ignore disabled shader stages. 2125 * as_ls = VS before TCS 2126 * as_es = VS before GS or TES before GS 2127 * as_ngg = NGG enabled for the last geometry stage. 2128 * If GS sets as_ngg, the previous stage must set as_ngg too. 2129 */ 2130 if (sctx->shader.tes.cso) { 2131 sctx->shader.vs.key.ge.as_ls = 1; 2132 sctx->shader.vs.key.ge.as_es = 0; 2133 sctx->shader.vs.key.ge.as_ngg = 0; 2134 2135 if (sctx->shader.gs.cso) { 2136 sctx->shader.tes.key.ge.as_es = 1; 2137 sctx->shader.tes.key.ge.as_ngg = sctx->ngg; 2138 sctx->shader.gs.key.ge.as_ngg = sctx->ngg; 2139 } else { 2140 sctx->shader.tes.key.ge.as_es = 0; 2141 sctx->shader.tes.key.ge.as_ngg = sctx->ngg; 2142 } 2143 } else if (sctx->shader.gs.cso) { 2144 sctx->shader.vs.key.ge.as_ls = 0; 2145 sctx->shader.vs.key.ge.as_es = 1; 2146 sctx->shader.vs.key.ge.as_ngg = sctx->ngg; 2147 sctx->shader.gs.key.ge.as_ngg = sctx->ngg; 2148 } else { 2149 sctx->shader.vs.key.ge.as_ls = 0; 2150 sctx->shader.vs.key.ge.as_es = 0; 2151 sctx->shader.vs.key.ge.as_ngg = sctx->ngg; 2152 } 2153} 2154 2155#define si_emit_consecutive_shader_pointers(sctx, pointer_mask, sh_base) do { \ 2156 unsigned sh_reg_base = (sh_base); \ 2157 if (sh_reg_base) { \ 2158 unsigned mask = sctx->shader_pointers_dirty & (pointer_mask); \ 2159 \ 2160 while (mask) { \ 2161 int start, count; \ 2162 u_bit_scan_consecutive_range(&mask, &start, &count); \ 2163 \ 2164 struct si_descriptors *descs = &sctx->descriptors[start]; \ 2165 unsigned sh_offset = sh_reg_base + descs->shader_userdata_offset; \ 2166 \ 2167 radeon_set_sh_reg_seq(sh_offset, count); \ 2168 for (int i = 0; i < count; i++) \ 2169 radeon_emit_32bit_pointer(sctx->screen, descs[i].gpu_address); \ 2170 } \ 2171 } \ 2172} while (0) 2173 2174static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs) 2175{ 2176 radeon_begin(&sctx->gfx_cs); 2177 2178 if (sctx->gfx_level >= GFX11) { 2179 radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2180 radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); 2181 radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); 2182 radeon_end(); 2183 return; 2184 } else if (sctx->gfx_level >= GFX10) { 2185 radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2186 /* HW VS stage only used in non-NGG mode. */ 2187 radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); 2188 radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); 2189 radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); 2190 radeon_end(); 2191 return; 2192 } else if (sctx->gfx_level == GFX9 && sctx->shadowed_regs) { 2193 /* We can't use the COMMON registers with register shadowing. */ 2194 radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2195 radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); 2196 radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0); 2197 radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0); 2198 radeon_end(); 2199 return; 2200 } else if (sctx->gfx_level == GFX9) { 2201 /* Broadcast it to all shader stages. */ 2202 radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0); 2203 radeon_end(); 2204 return; 2205 } 2206 2207 radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2208 radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); 2209 radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0); 2210 radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); 2211 radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); 2212 radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0); 2213 radeon_end(); 2214} 2215 2216void si_emit_graphics_shader_pointers(struct si_context *sctx) 2217{ 2218 uint32_t *sh_base = sctx->shader_pointers.sh_base; 2219 2220 if (sctx->shader_pointers_dirty & (1 << SI_DESCS_INTERNAL)) { 2221 si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_INTERNAL]); 2222 } 2223 2224 radeon_begin(&sctx->gfx_cs); 2225 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX), 2226 sh_base[PIPE_SHADER_VERTEX]); 2227 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL), 2228 sh_base[PIPE_SHADER_TESS_EVAL]); 2229 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT), 2230 sh_base[PIPE_SHADER_FRAGMENT]); 2231 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), 2232 sh_base[PIPE_SHADER_TESS_CTRL]); 2233 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), 2234 sh_base[PIPE_SHADER_GEOMETRY]); 2235 2236 if (sctx->gs_attribute_ring_pointer_dirty) { 2237 assert(sctx->gfx_level >= GFX11); 2238 radeon_set_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + GFX9_SGPR_ATTRIBUTE_RING_ADDR * 4, 2239 sctx->screen->attribute_ring->gpu_address); 2240 sctx->gs_attribute_ring_pointer_dirty = false; 2241 } 2242 radeon_end(); 2243 2244 sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_INTERNAL, SI_DESCS_FIRST_COMPUTE); 2245 2246 if (sctx->graphics_bindless_pointer_dirty) { 2247 si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors); 2248 sctx->graphics_bindless_pointer_dirty = false; 2249 } 2250} 2251 2252void si_emit_compute_shader_pointers(struct si_context *sctx) 2253{ 2254 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 2255 struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel; 2256 unsigned base = R_00B900_COMPUTE_USER_DATA_0; 2257 2258 radeon_begin(cs); 2259 si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE), 2260 R_00B900_COMPUTE_USER_DATA_0); 2261 sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE); 2262 2263 if (sctx->compute_bindless_pointer_dirty) { 2264 radeon_emit_one_32bit_pointer(sctx, &sctx->bindless_descriptors, base); 2265 sctx->compute_bindless_pointer_dirty = false; 2266 } 2267 2268 /* Set shader buffer descriptors in user SGPRs. */ 2269 unsigned num_shaderbufs = shader->cs_num_shaderbufs_in_user_sgprs; 2270 if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) { 2271 struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE); 2272 2273 radeon_set_sh_reg_seq(R_00B900_COMPUTE_USER_DATA_0 + 2274 shader->cs_shaderbufs_sgpr_index * 4, 2275 num_shaderbufs * 4); 2276 2277 for (unsigned i = 0; i < num_shaderbufs; i++) 2278 radeon_emit_array(&desc->list[si_get_shaderbuf_slot(i) * 4], 4); 2279 2280 sctx->compute_shaderbuf_sgprs_dirty = false; 2281 } 2282 2283 /* Set image descriptors in user SGPRs. */ 2284 unsigned num_images = shader->cs_num_images_in_user_sgprs; 2285 if (num_images && sctx->compute_image_sgprs_dirty) { 2286 struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE); 2287 2288 radeon_set_sh_reg_seq(R_00B900_COMPUTE_USER_DATA_0 + 2289 shader->cs_images_sgpr_index * 4, 2290 shader->cs_images_num_sgprs); 2291 2292 for (unsigned i = 0; i < num_images; i++) { 2293 unsigned desc_offset = si_get_image_slot(i) * 8; 2294 unsigned num_sgprs = 8; 2295 2296 /* Image buffers are in desc[4..7]. */ 2297 if (BITSET_TEST(shader->info.base.image_buffers, i)) { 2298 desc_offset += 4; 2299 num_sgprs = 4; 2300 } 2301 2302 radeon_emit_array(&desc->list[desc_offset], num_sgprs); 2303 } 2304 2305 sctx->compute_image_sgprs_dirty = false; 2306 } 2307 radeon_end(); 2308} 2309 2310/* BINDLESS */ 2311 2312static void si_init_bindless_descriptors(struct si_context *sctx, struct si_descriptors *desc, 2313 short shader_userdata_rel_index, unsigned num_elements) 2314{ 2315 ASSERTED unsigned desc_slot; 2316 2317 si_init_descriptors(desc, shader_userdata_rel_index, 16, num_elements); 2318 sctx->bindless_descriptors.num_active_slots = num_elements; 2319 2320 /* The first bindless descriptor is stored at slot 1, because 0 is not 2321 * considered to be a valid handle. 2322 */ 2323 sctx->num_bindless_descriptors = 1; 2324 2325 /* Track which bindless slots are used (or not). */ 2326 util_idalloc_init(&sctx->bindless_used_slots, num_elements); 2327 2328 /* Reserve slot 0 because it's an invalid handle for bindless. */ 2329 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots); 2330 assert(desc_slot == 0); 2331} 2332 2333static void si_release_bindless_descriptors(struct si_context *sctx) 2334{ 2335 si_release_descriptors(&sctx->bindless_descriptors); 2336 util_idalloc_fini(&sctx->bindless_used_slots); 2337} 2338 2339static unsigned si_get_first_free_bindless_slot(struct si_context *sctx) 2340{ 2341 struct si_descriptors *desc = &sctx->bindless_descriptors; 2342 unsigned desc_slot; 2343 2344 desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots); 2345 if (desc_slot >= desc->num_elements) { 2346 /* The array of bindless descriptors is full, resize it. */ 2347 unsigned slot_size = desc->element_dw_size * 4; 2348 unsigned new_num_elements = desc->num_elements * 2; 2349 2350 desc->list = 2351 REALLOC(desc->list, desc->num_elements * slot_size, new_num_elements * slot_size); 2352 desc->num_elements = new_num_elements; 2353 desc->num_active_slots = new_num_elements; 2354 } 2355 2356 assert(desc_slot); 2357 return desc_slot; 2358} 2359 2360static unsigned si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list, 2361 unsigned size) 2362{ 2363 struct si_descriptors *desc = &sctx->bindless_descriptors; 2364 unsigned desc_slot, desc_slot_offset; 2365 2366 /* Find a free slot. */ 2367 desc_slot = si_get_first_free_bindless_slot(sctx); 2368 2369 /* For simplicity, sampler and image bindless descriptors use fixed 2370 * 16-dword slots for now. Image descriptors only need 8-dword but this 2371 * doesn't really matter because no real apps use image handles. 2372 */ 2373 desc_slot_offset = desc_slot * 16; 2374 2375 /* Copy the descriptor into the array. */ 2376 memcpy(desc->list + desc_slot_offset, desc_list, size); 2377 2378 /* Re-upload the whole array of bindless descriptors into a new buffer. 2379 */ 2380 if (!si_upload_descriptors(sctx, desc)) 2381 return 0; 2382 2383 /* Make sure to re-emit the shader pointers for all stages. */ 2384 sctx->graphics_bindless_pointer_dirty = true; 2385 sctx->compute_bindless_pointer_dirty = true; 2386 si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 2387 2388 return desc_slot; 2389} 2390 2391static void si_update_bindless_buffer_descriptor(struct si_context *sctx, unsigned desc_slot, 2392 struct pipe_resource *resource, uint64_t offset, 2393 bool *desc_dirty) 2394{ 2395 struct si_descriptors *desc = &sctx->bindless_descriptors; 2396 struct si_resource *buf = si_resource(resource); 2397 unsigned desc_slot_offset = desc_slot * 16; 2398 uint32_t *desc_list = desc->list + desc_slot_offset + 4; 2399 uint64_t old_desc_va; 2400 2401 assert(resource->target == PIPE_BUFFER); 2402 2403 /* Retrieve the old buffer addr from the descriptor. */ 2404 old_desc_va = si_desc_extract_buffer_address(desc_list); 2405 2406 if (old_desc_va != buf->gpu_address + offset) { 2407 /* The buffer has been invalidated when the handle wasn't 2408 * resident, update the descriptor and the dirty flag. 2409 */ 2410 si_set_buf_desc_address(buf, offset, &desc_list[0]); 2411 2412 *desc_dirty = true; 2413 } 2414} 2415 2416static uint64_t si_create_texture_handle(struct pipe_context *ctx, struct pipe_sampler_view *view, 2417 const struct pipe_sampler_state *state) 2418{ 2419 struct si_sampler_view *sview = (struct si_sampler_view *)view; 2420 struct si_context *sctx = (struct si_context *)ctx; 2421 struct si_texture_handle *tex_handle; 2422 struct si_sampler_state *sstate; 2423 uint32_t desc_list[16]; 2424 uint64_t handle; 2425 2426 tex_handle = CALLOC_STRUCT(si_texture_handle); 2427 if (!tex_handle) 2428 return 0; 2429 2430 memset(desc_list, 0, sizeof(desc_list)); 2431 si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor); 2432 2433 sstate = ctx->create_sampler_state(ctx, state); 2434 if (!sstate) { 2435 FREE(tex_handle); 2436 return 0; 2437 } 2438 2439 si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]); 2440 memcpy(&tex_handle->sstate, sstate, sizeof(*sstate)); 2441 ctx->delete_sampler_state(ctx, sstate); 2442 2443 tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list)); 2444 if (!tex_handle->desc_slot) { 2445 FREE(tex_handle); 2446 return 0; 2447 } 2448 2449 handle = tex_handle->desc_slot; 2450 2451 if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)(uintptr_t)handle, tex_handle)) { 2452 FREE(tex_handle); 2453 return 0; 2454 } 2455 2456 pipe_sampler_view_reference(&tex_handle->view, view); 2457 2458 si_resource(sview->base.texture)->texture_handle_allocated = true; 2459 2460 return handle; 2461} 2462 2463static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle) 2464{ 2465 struct si_context *sctx = (struct si_context *)ctx; 2466 struct si_texture_handle *tex_handle; 2467 struct hash_entry *entry; 2468 2469 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle); 2470 if (!entry) 2471 return; 2472 2473 tex_handle = (struct si_texture_handle *)entry->data; 2474 2475 /* Allow this descriptor slot to be re-used. */ 2476 util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot); 2477 2478 pipe_sampler_view_reference(&tex_handle->view, NULL); 2479 _mesa_hash_table_remove(sctx->tex_handles, entry); 2480 FREE(tex_handle); 2481} 2482 2483static void si_make_texture_handle_resident(struct pipe_context *ctx, uint64_t handle, 2484 bool resident) 2485{ 2486 struct si_context *sctx = (struct si_context *)ctx; 2487 struct si_texture_handle *tex_handle; 2488 struct si_sampler_view *sview; 2489 struct hash_entry *entry; 2490 2491 entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle); 2492 if (!entry) 2493 return; 2494 2495 tex_handle = (struct si_texture_handle *)entry->data; 2496 sview = (struct si_sampler_view *)tex_handle->view; 2497 2498 if (resident) { 2499 if (sview->base.texture->target != PIPE_BUFFER) { 2500 struct si_texture *tex = (struct si_texture *)sview->base.texture; 2501 2502 if (depth_needs_decompression(tex, sview->is_stencil_sampler)) { 2503 util_dynarray_append(&sctx->resident_tex_needs_depth_decompress, 2504 struct si_texture_handle *, tex_handle); 2505 } 2506 2507 if (color_needs_decompression(tex)) { 2508 util_dynarray_append(&sctx->resident_tex_needs_color_decompress, 2509 struct si_texture_handle *, tex_handle); 2510 } 2511 2512 if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) && 2513 p_atomic_read(&tex->framebuffers_bound)) 2514 sctx->need_check_render_feedback = true; 2515 2516 si_update_bindless_texture_descriptor(sctx, tex_handle); 2517 } else { 2518 si_update_bindless_buffer_descriptor(sctx, tex_handle->desc_slot, sview->base.texture, 2519 sview->base.u.buf.offset, &tex_handle->desc_dirty); 2520 } 2521 2522 /* Re-upload the descriptor if it has been updated while it 2523 * wasn't resident. 2524 */ 2525 if (tex_handle->desc_dirty) 2526 sctx->bindless_descriptors_dirty = true; 2527 2528 /* Add the texture handle to the per-context list. */ 2529 util_dynarray_append(&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle); 2530 2531 /* Add the buffers to the current CS in case si_begin_new_cs() 2532 * is not going to be called. 2533 */ 2534 si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ, 2535 sview->is_stencil_sampler, false); 2536 } else { 2537 /* Remove the texture handle from the per-context list. */ 2538 util_dynarray_delete_unordered(&sctx->resident_tex_handles, struct si_texture_handle *, 2539 tex_handle); 2540 2541 if (sview->base.texture->target != PIPE_BUFFER) { 2542 util_dynarray_delete_unordered(&sctx->resident_tex_needs_depth_decompress, 2543 struct si_texture_handle *, tex_handle); 2544 2545 util_dynarray_delete_unordered(&sctx->resident_tex_needs_color_decompress, 2546 struct si_texture_handle *, tex_handle); 2547 } 2548 } 2549} 2550 2551static uint64_t si_create_image_handle(struct pipe_context *ctx, const struct pipe_image_view *view) 2552{ 2553 struct si_context *sctx = (struct si_context *)ctx; 2554 struct si_image_handle *img_handle; 2555 uint32_t desc_list[16]; 2556 uint64_t handle; 2557 2558 if (!view || !view->resource) 2559 return 0; 2560 2561 img_handle = CALLOC_STRUCT(si_image_handle); 2562 if (!img_handle) 2563 return 0; 2564 2565 memset(desc_list, 0, sizeof(desc_list)); 2566 si_init_descriptor_list(&desc_list[0], 8, 2, null_image_descriptor); 2567 2568 si_set_shader_image_desc(sctx, view, false, &desc_list[0], &desc_list[8]); 2569 2570 img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list)); 2571 if (!img_handle->desc_slot) { 2572 FREE(img_handle); 2573 return 0; 2574 } 2575 2576 handle = img_handle->desc_slot; 2577 2578 if (!_mesa_hash_table_insert(sctx->img_handles, (void *)(uintptr_t)handle, img_handle)) { 2579 FREE(img_handle); 2580 return 0; 2581 } 2582 2583 util_copy_image_view(&img_handle->view, view); 2584 2585 si_resource(view->resource)->image_handle_allocated = true; 2586 2587 return handle; 2588} 2589 2590static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle) 2591{ 2592 struct si_context *sctx = (struct si_context *)ctx; 2593 struct si_image_handle *img_handle; 2594 struct hash_entry *entry; 2595 2596 entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle); 2597 if (!entry) 2598 return; 2599 2600 img_handle = (struct si_image_handle *)entry->data; 2601 2602 util_copy_image_view(&img_handle->view, NULL); 2603 _mesa_hash_table_remove(sctx->img_handles, entry); 2604 FREE(img_handle); 2605} 2606 2607static void si_make_image_handle_resident(struct pipe_context *ctx, uint64_t handle, 2608 unsigned access, bool resident) 2609{ 2610 struct si_context *sctx = (struct si_context *)ctx; 2611 struct si_image_handle *img_handle; 2612 struct pipe_image_view *view; 2613 struct si_resource *res; 2614 struct hash_entry *entry; 2615 2616 entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle); 2617 if (!entry) 2618 return; 2619 2620 img_handle = (struct si_image_handle *)entry->data; 2621 view = &img_handle->view; 2622 res = si_resource(view->resource); 2623 2624 if (resident) { 2625 if (res->b.b.target != PIPE_BUFFER) { 2626 struct si_texture *tex = (struct si_texture *)res; 2627 unsigned level = view->u.tex.level; 2628 2629 if (color_needs_decompression(tex)) { 2630 util_dynarray_append(&sctx->resident_img_needs_color_decompress, 2631 struct si_image_handle *, img_handle); 2632 } 2633 2634 if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound)) 2635 sctx->need_check_render_feedback = true; 2636 2637 si_update_bindless_image_descriptor(sctx, img_handle); 2638 } else { 2639 si_update_bindless_buffer_descriptor(sctx, img_handle->desc_slot, view->resource, 2640 view->u.buf.offset, &img_handle->desc_dirty); 2641 } 2642 2643 /* Re-upload the descriptor if it has been updated while it 2644 * wasn't resident. 2645 */ 2646 if (img_handle->desc_dirty) 2647 sctx->bindless_descriptors_dirty = true; 2648 2649 /* Add the image handle to the per-context list. */ 2650 util_dynarray_append(&sctx->resident_img_handles, struct si_image_handle *, img_handle); 2651 2652 /* Add the buffers to the current CS in case si_begin_new_cs() 2653 * is not going to be called. 2654 */ 2655 si_sampler_view_add_buffer( 2656 sctx, view->resource, 2657 (access & PIPE_IMAGE_ACCESS_WRITE) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false, 2658 false); 2659 } else { 2660 /* Remove the image handle from the per-context list. */ 2661 util_dynarray_delete_unordered(&sctx->resident_img_handles, struct si_image_handle *, 2662 img_handle); 2663 2664 if (res->b.b.target != PIPE_BUFFER) { 2665 util_dynarray_delete_unordered(&sctx->resident_img_needs_color_decompress, 2666 struct si_image_handle *, img_handle); 2667 } 2668 } 2669} 2670 2671static void si_resident_buffers_add_all_to_bo_list(struct si_context *sctx) 2672{ 2673 unsigned num_resident_tex_handles, num_resident_img_handles; 2674 2675 num_resident_tex_handles = sctx->resident_tex_handles.size / sizeof(struct si_texture_handle *); 2676 num_resident_img_handles = sctx->resident_img_handles.size / sizeof(struct si_image_handle *); 2677 2678 /* Add all resident texture handles. */ 2679 util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 2680 struct si_sampler_view *sview = (struct si_sampler_view *)(*tex_handle)->view; 2681 2682 si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ, 2683 sview->is_stencil_sampler, false); 2684 } 2685 2686 /* Add all resident image handles. */ 2687 util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 2688 struct pipe_image_view *view = &(*img_handle)->view; 2689 2690 si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false, false); 2691 } 2692 2693 sctx->num_resident_handles += num_resident_tex_handles + num_resident_img_handles; 2694 assert(sctx->bo_list_add_all_resident_resources); 2695 sctx->bo_list_add_all_resident_resources = false; 2696} 2697 2698/* INIT/DEINIT/UPLOAD */ 2699 2700void si_init_all_descriptors(struct si_context *sctx) 2701{ 2702 int i; 2703 unsigned first_shader = sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE; 2704 unsigned hs_sgpr0, gs_sgpr0; 2705 2706 if (sctx->gfx_level >= GFX11) { 2707 hs_sgpr0 = R_00B420_SPI_SHADER_PGM_LO_HS; 2708 gs_sgpr0 = R_00B220_SPI_SHADER_PGM_LO_GS; 2709 } else { 2710 hs_sgpr0 = R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS; 2711 gs_sgpr0 = R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS; 2712 } 2713 2714 for (i = first_shader; i < SI_NUM_SHADERS; i++) { 2715 bool is_2nd = 2716 sctx->gfx_level >= GFX9 && (i == PIPE_SHADER_TESS_CTRL || i == PIPE_SHADER_GEOMETRY); 2717 unsigned num_sampler_slots = SI_NUM_IMAGE_SLOTS / 2 + SI_NUM_SAMPLERS; 2718 unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS; 2719 int rel_dw_offset; 2720 struct si_descriptors *desc; 2721 2722 if (is_2nd) { 2723 if (i == PIPE_SHADER_TESS_CTRL) { 2724 rel_dw_offset = 2725 (hs_sgpr0 - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4; 2726 } else if (sctx->gfx_level >= GFX10) { /* PIPE_SHADER_GEOMETRY */ 2727 rel_dw_offset = 2728 (gs_sgpr0 - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4; 2729 } else { 2730 rel_dw_offset = 2731 (gs_sgpr0 - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4; 2732 } 2733 } else { 2734 rel_dw_offset = SI_SGPR_CONST_AND_SHADER_BUFFERS; 2735 } 2736 desc = si_const_and_shader_buffer_descriptors(sctx, i); 2737 si_init_buffer_resources(sctx, &sctx->const_and_shader_buffers[i], desc, num_buffer_slots, 2738 rel_dw_offset, RADEON_PRIO_SHADER_RW_BUFFER, 2739 RADEON_PRIO_CONST_BUFFER); 2740 desc->slot_index_to_bind_directly = si_get_constbuf_slot(0); 2741 2742 if (is_2nd) { 2743 if (i == PIPE_SHADER_TESS_CTRL) { 2744 rel_dw_offset = 2745 (hs_sgpr0 + 4 - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4; 2746 } else if (sctx->gfx_level >= GFX10) { /* PIPE_SHADER_GEOMETRY */ 2747 rel_dw_offset = 2748 (gs_sgpr0 + 4 - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4; 2749 } else { 2750 rel_dw_offset = 2751 (gs_sgpr0 + 4 - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4; 2752 } 2753 } else { 2754 rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES; 2755 } 2756 2757 desc = si_sampler_and_image_descriptors(sctx, i); 2758 si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots); 2759 2760 int j; 2761 for (j = 0; j < SI_NUM_IMAGE_SLOTS; j++) 2762 memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4); 2763 for (; j < SI_NUM_IMAGE_SLOTS + SI_NUM_SAMPLERS * 2; j++) 2764 memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4); 2765 } 2766 2767 si_init_buffer_resources(sctx, &sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL], 2768 SI_NUM_INTERNAL_BINDINGS, SI_SGPR_INTERNAL_BINDINGS, 2769 /* The second priority is used by 2770 * const buffers in RW buffer slots. */ 2771 RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER); 2772 sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots = SI_NUM_INTERNAL_BINDINGS; 2773 2774 /* Initialize an array of 1024 bindless descriptors, when the limit is 2775 * reached, just make it larger and re-upload the whole array. 2776 */ 2777 si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors, 2778 SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, 1024); 2779 2780 sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS); 2781 2782 /* Set pipe_context functions. */ 2783 sctx->b.bind_sampler_states = si_bind_sampler_states; 2784 sctx->b.set_shader_images = si_set_shader_images; 2785 sctx->b.set_constant_buffer = si_pipe_set_constant_buffer; 2786 sctx->b.set_inlinable_constants = si_set_inlinable_constants; 2787 sctx->b.set_shader_buffers = si_pipe_set_shader_buffers; 2788 sctx->b.set_sampler_views = si_pipe_set_sampler_views; 2789 sctx->b.create_texture_handle = si_create_texture_handle; 2790 sctx->b.delete_texture_handle = si_delete_texture_handle; 2791 sctx->b.make_texture_handle_resident = si_make_texture_handle_resident; 2792 sctx->b.create_image_handle = si_create_image_handle; 2793 sctx->b.delete_image_handle = si_delete_image_handle; 2794 sctx->b.make_image_handle_resident = si_make_image_handle_resident; 2795 2796 if (!sctx->has_graphics) 2797 return; 2798 2799 sctx->b.set_polygon_stipple = si_set_polygon_stipple; 2800 2801 /* Shader user data. */ 2802 sctx->atoms.s.shader_pointers.emit = si_emit_graphics_shader_pointers; 2803 2804 /* Set default and immutable mappings. */ 2805 si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 2806 si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF, 2807 sctx->ngg, PIPE_SHADER_VERTEX)); 2808 si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, 2809 si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF, 2810 NGG_OFF, PIPE_SHADER_TESS_CTRL)); 2811 si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, 2812 si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF, 2813 NGG_OFF, PIPE_SHADER_GEOMETRY)); 2814 si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2815 2816 si_set_ring_buffer(sctx, SI_GS_ATTRIBUTE_RING, &sctx->screen->attribute_ring->b.b, 2817 0, ~0u, false, true, 16, 32, 0); 2818} 2819 2820static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask) 2821{ 2822 unsigned dirty = sctx->descriptors_dirty & mask; 2823 2824 if (dirty) { 2825 unsigned iter_mask = dirty; 2826 2827 do { 2828 if (!si_upload_descriptors(sctx, &sctx->descriptors[u_bit_scan(&iter_mask)])) 2829 return false; 2830 } while (iter_mask); 2831 2832 sctx->descriptors_dirty &= ~dirty; 2833 sctx->shader_pointers_dirty |= dirty; 2834 si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 2835 } 2836 2837 si_upload_bindless_descriptors(sctx); 2838 return true; 2839} 2840 2841bool si_upload_graphics_shader_descriptors(struct si_context *sctx) 2842{ 2843 const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE); 2844 return si_upload_shader_descriptors(sctx, mask); 2845} 2846 2847bool si_upload_compute_shader_descriptors(struct si_context *sctx) 2848{ 2849 /* This does not update internal bindings as that is not needed for compute shaders 2850 * and the input buffer is using the same SGPR's anyway. 2851 */ 2852 const unsigned mask = 2853 u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE); 2854 return si_upload_shader_descriptors(sctx, mask); 2855} 2856 2857void si_release_all_descriptors(struct si_context *sctx) 2858{ 2859 int i; 2860 2861 for (i = 0; i < SI_NUM_SHADERS; i++) { 2862 si_release_buffer_resources(&sctx->const_and_shader_buffers[i], 2863 si_const_and_shader_buffer_descriptors(sctx, i)); 2864 si_release_sampler_views(&sctx->samplers[i]); 2865 si_release_image_views(&sctx->images[i]); 2866 } 2867 si_release_buffer_resources(&sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL]); 2868 for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++) 2869 pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]); 2870 2871 for (i = 0; i < SI_NUM_DESCS; ++i) 2872 si_release_descriptors(&sctx->descriptors[i]); 2873 2874 si_resource_reference(&sctx->vb_descriptors_buffer, NULL); 2875 sctx->vb_descriptors_gpu_list = NULL; /* points into a mapped buffer */ 2876 2877 si_release_bindless_descriptors(sctx); 2878} 2879 2880bool si_gfx_resources_check_encrypted(struct si_context *sctx) 2881{ 2882 bool use_encrypted_bo = false; 2883 2884 for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS && !use_encrypted_bo; i++) { 2885 struct si_shader_ctx_state *current_shader = &sctx->shaders[i]; 2886 if (!current_shader->cso) 2887 continue; 2888 2889 use_encrypted_bo |= 2890 si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[i]); 2891 use_encrypted_bo |= 2892 si_sampler_views_check_encrypted(sctx, &sctx->samplers[i], 2893 current_shader->cso->info.base.textures_used[0]); 2894 use_encrypted_bo |= si_image_views_check_encrypted(sctx, &sctx->images[i], 2895 u_bit_consecutive(0, current_shader->cso->info.base.num_images)); 2896 } 2897 use_encrypted_bo |= si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings); 2898 2899 struct si_state_blend *blend = sctx->queued.named.blend; 2900 for (int i = 0; i < sctx->framebuffer.state.nr_cbufs && !use_encrypted_bo; i++) { 2901 struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2902 if (surf && surf->texture) { 2903 struct si_texture *tex = (struct si_texture *)surf->texture; 2904 if (!(tex->buffer.flags & RADEON_FLAG_ENCRYPTED)) 2905 continue; 2906 2907 /* Are we reading from this framebuffer */ 2908 if (((blend->blend_enable_4bit >> (4 * i)) & 0xf) || 2909 vi_dcc_enabled(tex, 0)) { 2910 use_encrypted_bo = true; 2911 } 2912 } 2913 } 2914 2915 if (sctx->framebuffer.state.zsbuf) { 2916 struct si_texture* zs = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture; 2917 if (zs && 2918 (zs->buffer.flags & RADEON_FLAG_ENCRYPTED)) { 2919 /* TODO: This isn't needed if depth.func is PIPE_FUNC_NEVER or PIPE_FUNC_ALWAYS */ 2920 use_encrypted_bo = true; 2921 } 2922 } 2923 2924#ifndef NDEBUG 2925 if (use_encrypted_bo) { 2926 /* Verify that color buffers are encrypted */ 2927 for (int i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2928 struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2929 if (!surf) 2930 continue; 2931 struct si_texture *tex = (struct si_texture *)surf->texture; 2932 assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)); 2933 } 2934 /* Verify that depth/stencil buffer is encrypted */ 2935 if (sctx->framebuffer.state.zsbuf) { 2936 struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; 2937 struct si_texture *tex = (struct si_texture *)surf->texture; 2938 assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)); 2939 } 2940 } 2941#endif 2942 2943 return use_encrypted_bo; 2944} 2945 2946void si_gfx_resources_add_all_to_bo_list(struct si_context *sctx) 2947{ 2948 for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) { 2949 si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]); 2950 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]); 2951 si_image_views_begin_new_cs(sctx, &sctx->images[i]); 2952 } 2953 si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings); 2954 si_vertex_buffers_begin_new_cs(sctx); 2955 2956 if (sctx->bo_list_add_all_resident_resources) 2957 si_resident_buffers_add_all_to_bo_list(sctx); 2958 2959 assert(sctx->bo_list_add_all_gfx_resources); 2960 sctx->bo_list_add_all_gfx_resources = false; 2961} 2962 2963bool si_compute_resources_check_encrypted(struct si_context *sctx) 2964{ 2965 unsigned sh = PIPE_SHADER_COMPUTE; 2966 2967 struct si_shader_info* info = &sctx->cs_shader_state.program->sel.info; 2968 2969 /* TODO: we should assert that either use_encrypted_bo is false, 2970 * or all writable buffers are encrypted. 2971 */ 2972 return si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[sh]) || 2973 si_sampler_views_check_encrypted(sctx, &sctx->samplers[sh], info->base.textures_used[0]) || 2974 si_image_views_check_encrypted(sctx, &sctx->images[sh], u_bit_consecutive(0, info->base.num_images)) || 2975 si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings); 2976} 2977 2978void si_compute_resources_add_all_to_bo_list(struct si_context *sctx) 2979{ 2980 unsigned sh = PIPE_SHADER_COMPUTE; 2981 2982 si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[sh]); 2983 si_sampler_views_begin_new_cs(sctx, &sctx->samplers[sh]); 2984 si_image_views_begin_new_cs(sctx, &sctx->images[sh]); 2985 si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings); 2986 2987 if (sctx->bo_list_add_all_resident_resources) 2988 si_resident_buffers_add_all_to_bo_list(sctx); 2989 2990 assert(sctx->bo_list_add_all_compute_resources); 2991 sctx->bo_list_add_all_compute_resources = false; 2992} 2993 2994void si_add_all_descriptors_to_bo_list(struct si_context *sctx) 2995{ 2996 for (unsigned i = 0; i < SI_NUM_DESCS; ++i) 2997 si_add_descriptors_to_bo_list(sctx, &sctx->descriptors[i]); 2998 si_add_descriptors_to_bo_list(sctx, &sctx->bindless_descriptors); 2999 3000 sctx->bo_list_add_all_resident_resources = true; 3001 sctx->bo_list_add_all_gfx_resources = true; 3002 sctx->bo_list_add_all_compute_resources = true; 3003} 3004 3005void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx, uint64_t new_active_mask) 3006{ 3007 struct si_descriptors *desc = &sctx->descriptors[desc_idx]; 3008 3009 /* Ignore no-op updates and updates that disable all slots. */ 3010 if (!new_active_mask || 3011 new_active_mask == u_bit_consecutive64(desc->first_active_slot, desc->num_active_slots)) 3012 return; 3013 3014 int first, count; 3015 u_bit_scan_consecutive_range64(&new_active_mask, &first, &count); 3016 assert(new_active_mask == 0); 3017 3018 /* Upload/dump descriptors if slots are being enabled. */ 3019 if (first < desc->first_active_slot || 3020 first + count > desc->first_active_slot + desc->num_active_slots) 3021 sctx->descriptors_dirty |= 1u << desc_idx; 3022 3023 desc->first_active_slot = first; 3024 desc->num_active_slots = count; 3025} 3026 3027void si_set_active_descriptors_for_shader(struct si_context *sctx, struct si_shader_selector *sel) 3028{ 3029 if (!sel) 3030 return; 3031 3032 si_set_active_descriptors(sctx, sel->const_and_shader_buf_descriptors_index, 3033 sel->active_const_and_shader_buffers); 3034 si_set_active_descriptors(sctx, sel->sampler_and_images_descriptors_index, 3035 sel->active_samplers_and_images); 3036} 3037