1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2013 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * All Rights Reserved. 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci/* Resource binding slots and sampler states (each described with 8 or 26bf215546Sopenharmony_ci * 4 dwords) are stored in lists in memory which is accessed by shaders 27bf215546Sopenharmony_ci * using scalar load instructions. 28bf215546Sopenharmony_ci * 29bf215546Sopenharmony_ci * This file is responsible for managing such lists. It keeps a copy of all 30bf215546Sopenharmony_ci * descriptors in CPU memory and re-uploads a whole list if some slots have 31bf215546Sopenharmony_ci * been changed. 32bf215546Sopenharmony_ci * 33bf215546Sopenharmony_ci * This code is also responsible for updating shader pointers to those lists. 34bf215546Sopenharmony_ci * 35bf215546Sopenharmony_ci * Note that CP DMA can't be used for updating the lists, because a GPU hang 36bf215546Sopenharmony_ci * could leave the list in a mid-IB state and the next IB would get wrong 37bf215546Sopenharmony_ci * descriptors and the whole context would be unusable at that point. 38bf215546Sopenharmony_ci * (Note: The register shadowing can't be used due to the same reason) 39bf215546Sopenharmony_ci * 40bf215546Sopenharmony_ci * Also, uploading descriptors to newly allocated memory doesn't require 41bf215546Sopenharmony_ci * a KCACHE flush. 42bf215546Sopenharmony_ci * 43bf215546Sopenharmony_ci * 44bf215546Sopenharmony_ci * Possible scenarios for one 16 dword image+sampler slot: 45bf215546Sopenharmony_ci * 46bf215546Sopenharmony_ci * | Image | w/ FMASK | Buffer | NULL 47bf215546Sopenharmony_ci * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3] 48bf215546Sopenharmony_ci * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0 49bf215546Sopenharmony_ci * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3] 50bf215546Sopenharmony_ci * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3] 51bf215546Sopenharmony_ci * 52bf215546Sopenharmony_ci * FMASK implies MSAA, therefore no sampler state. 53bf215546Sopenharmony_ci * Sampler states are never unbound except when FMASK is bound. 54bf215546Sopenharmony_ci */ 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci#include "si_pipe.h" 57bf215546Sopenharmony_ci#include "si_compute.h" 58bf215546Sopenharmony_ci#include "si_build_pm4.h" 59bf215546Sopenharmony_ci#include "sid.h" 60bf215546Sopenharmony_ci#include "util/format/u_format.h" 61bf215546Sopenharmony_ci#include "util/hash_table.h" 62bf215546Sopenharmony_ci#include "util/u_idalloc.h" 63bf215546Sopenharmony_ci#include "util/u_memory.h" 64bf215546Sopenharmony_ci#include "util/u_upload_mgr.h" 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci/* NULL image and buffer descriptor for textures (alpha = 1) and images 67bf215546Sopenharmony_ci * (alpha = 0). 68bf215546Sopenharmony_ci * 69bf215546Sopenharmony_ci * For images, all fields must be zero except for the swizzle, which 70bf215546Sopenharmony_ci * supports arbitrary combinations of 0s and 1s. The texture type must be 71bf215546Sopenharmony_ci * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs. 72bf215546Sopenharmony_ci * 73bf215546Sopenharmony_ci * For buffers, all fields must be zero. If they are not, the hw hangs. 74bf215546Sopenharmony_ci * 75bf215546Sopenharmony_ci * This is the only reason why the buffer descriptor must be in words [4:7]. 76bf215546Sopenharmony_ci */ 77bf215546Sopenharmony_cistatic uint32_t null_texture_descriptor[8] = { 78bf215546Sopenharmony_ci 0, 0, 0, S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) | S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) 79bf215546Sopenharmony_ci /* the rest must contain zeros, which is also used by the buffer 80bf215546Sopenharmony_ci * descriptor */ 81bf215546Sopenharmony_ci}; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic uint32_t null_image_descriptor[8] = { 84bf215546Sopenharmony_ci 0, 0, 0, S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) 85bf215546Sopenharmony_ci /* the rest must contain zeros, which is also used by the buffer 86bf215546Sopenharmony_ci * descriptor */ 87bf215546Sopenharmony_ci}; 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_cistatic uint64_t si_desc_extract_buffer_address(const uint32_t *desc) 90bf215546Sopenharmony_ci{ 91bf215546Sopenharmony_ci uint64_t va = desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci /* Sign-extend the 48-bit address. */ 94bf215546Sopenharmony_ci va <<= 16; 95bf215546Sopenharmony_ci va = (int64_t)va >> 16; 96bf215546Sopenharmony_ci return va; 97bf215546Sopenharmony_ci} 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_cistatic void si_init_descriptor_list(uint32_t *desc_list, unsigned element_dw_size, 100bf215546Sopenharmony_ci unsigned num_elements, const uint32_t *null_descriptor) 101bf215546Sopenharmony_ci{ 102bf215546Sopenharmony_ci int i; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci /* Initialize the array to NULL descriptors if the element size is 8. */ 105bf215546Sopenharmony_ci if (null_descriptor) { 106bf215546Sopenharmony_ci assert(element_dw_size % 8 == 0); 107bf215546Sopenharmony_ci for (i = 0; i < num_elements * element_dw_size / 8; i++) 108bf215546Sopenharmony_ci memcpy(desc_list + i * 8, null_descriptor, 8 * 4); 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci} 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_cistatic void si_init_descriptors(struct si_descriptors *desc, short shader_userdata_rel_index, 113bf215546Sopenharmony_ci unsigned element_dw_size, unsigned num_elements) 114bf215546Sopenharmony_ci{ 115bf215546Sopenharmony_ci desc->list = CALLOC(num_elements, element_dw_size * 4); 116bf215546Sopenharmony_ci desc->element_dw_size = element_dw_size; 117bf215546Sopenharmony_ci desc->num_elements = num_elements; 118bf215546Sopenharmony_ci desc->shader_userdata_offset = shader_userdata_rel_index * 4; 119bf215546Sopenharmony_ci desc->slot_index_to_bind_directly = -1; 120bf215546Sopenharmony_ci} 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_cistatic void si_release_descriptors(struct si_descriptors *desc) 123bf215546Sopenharmony_ci{ 124bf215546Sopenharmony_ci si_resource_reference(&desc->buffer, NULL); 125bf215546Sopenharmony_ci FREE(desc->list); 126bf215546Sopenharmony_ci} 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_cistatic bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc) 129bf215546Sopenharmony_ci{ 130bf215546Sopenharmony_ci unsigned slot_size = desc->element_dw_size * 4; 131bf215546Sopenharmony_ci unsigned first_slot_offset = desc->first_active_slot * slot_size; 132bf215546Sopenharmony_ci unsigned upload_size = desc->num_active_slots * slot_size; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci /* Skip the upload if no shader is using the descriptors. dirty_mask 135bf215546Sopenharmony_ci * will stay dirty and the descriptors will be uploaded when there is 136bf215546Sopenharmony_ci * a shader using them. 137bf215546Sopenharmony_ci */ 138bf215546Sopenharmony_ci if (!upload_size) 139bf215546Sopenharmony_ci return true; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci /* If there is just one active descriptor, bind it directly. */ 142bf215546Sopenharmony_ci if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly && 143bf215546Sopenharmony_ci desc->num_active_slots == 1) { 144bf215546Sopenharmony_ci uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly * desc->element_dw_size]; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci /* The buffer is already in the buffer list. */ 147bf215546Sopenharmony_ci si_resource_reference(&desc->buffer, NULL); 148bf215546Sopenharmony_ci desc->gpu_list = NULL; 149bf215546Sopenharmony_ci desc->gpu_address = si_desc_extract_buffer_address(descriptor); 150bf215546Sopenharmony_ci return true; 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci uint32_t *ptr; 154bf215546Sopenharmony_ci unsigned buffer_offset; 155bf215546Sopenharmony_ci u_upload_alloc(sctx->b.const_uploader, first_slot_offset, upload_size, 156bf215546Sopenharmony_ci si_optimal_tcc_alignment(sctx, upload_size), &buffer_offset, 157bf215546Sopenharmony_ci (struct pipe_resource **)&desc->buffer, (void **)&ptr); 158bf215546Sopenharmony_ci if (!desc->buffer) { 159bf215546Sopenharmony_ci desc->gpu_address = 0; 160bf215546Sopenharmony_ci return false; /* skip the draw call */ 161bf215546Sopenharmony_ci } 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci util_memcpy_cpu_to_le32(ptr, (char *)desc->list + first_slot_offset, upload_size); 164bf215546Sopenharmony_ci desc->gpu_list = ptr - first_slot_offset / 4; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, 167bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_DESCRIPTORS); 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci /* The shader pointer should point to slot 0. */ 170bf215546Sopenharmony_ci buffer_offset -= first_slot_offset; 171bf215546Sopenharmony_ci desc->gpu_address = desc->buffer->gpu_address + buffer_offset; 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci assert(desc->buffer->flags & RADEON_FLAG_32BIT); 174bf215546Sopenharmony_ci assert((desc->buffer->gpu_address >> 32) == sctx->screen->info.address32_hi); 175bf215546Sopenharmony_ci assert((desc->gpu_address >> 32) == sctx->screen->info.address32_hi); 176bf215546Sopenharmony_ci return true; 177bf215546Sopenharmony_ci} 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_cistatic void 180bf215546Sopenharmony_cisi_add_descriptors_to_bo_list(struct si_context *sctx, struct si_descriptors *desc) 181bf215546Sopenharmony_ci{ 182bf215546Sopenharmony_ci if (!desc->buffer) 183bf215546Sopenharmony_ci return; 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, 186bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_DESCRIPTORS); 187bf215546Sopenharmony_ci} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci/* SAMPLER VIEWS */ 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_cistatic inline unsigned si_get_sampler_view_priority(struct si_resource *res) 192bf215546Sopenharmony_ci{ 193bf215546Sopenharmony_ci if (res->b.b.target == PIPE_BUFFER) 194bf215546Sopenharmony_ci return RADEON_PRIO_SAMPLER_BUFFER; 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci if (res->b.b.nr_samples > 1) 197bf215546Sopenharmony_ci return RADEON_PRIO_SAMPLER_TEXTURE_MSAA; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci return RADEON_PRIO_SAMPLER_TEXTURE; 200bf215546Sopenharmony_ci} 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_cistatic struct si_descriptors *si_sampler_and_image_descriptors(struct si_context *sctx, 203bf215546Sopenharmony_ci unsigned shader) 204bf215546Sopenharmony_ci{ 205bf215546Sopenharmony_ci return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)]; 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_cistatic void si_release_sampler_views(struct si_samplers *samplers) 209bf215546Sopenharmony_ci{ 210bf215546Sopenharmony_ci int i; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(samplers->views); i++) { 213bf215546Sopenharmony_ci pipe_sampler_view_reference(&samplers->views[i], NULL); 214bf215546Sopenharmony_ci } 215bf215546Sopenharmony_ci} 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_cistatic void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_resource *resource, 218bf215546Sopenharmony_ci unsigned usage, bool is_stencil_sampler, 219bf215546Sopenharmony_ci bool check_mem) 220bf215546Sopenharmony_ci{ 221bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)resource; 222bf215546Sopenharmony_ci unsigned priority; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci if (!resource) 225bf215546Sopenharmony_ci return; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci /* Use the flushed depth texture if direct sampling is unsupported. */ 228bf215546Sopenharmony_ci if (resource->target != PIPE_BUFFER && tex->is_depth && 229bf215546Sopenharmony_ci !si_can_sample_zs(tex, is_stencil_sampler)) 230bf215546Sopenharmony_ci tex = tex->flushed_depth_texture; 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci priority = si_get_sampler_view_priority(&tex->buffer); 233bf215546Sopenharmony_ci radeon_add_to_gfx_buffer_list_check_mem(sctx, &tex->buffer, usage | priority, check_mem); 234bf215546Sopenharmony_ci} 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_cistatic void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_samplers *samplers) 237bf215546Sopenharmony_ci{ 238bf215546Sopenharmony_ci unsigned mask = samplers->enabled_mask; 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci /* Add buffers to the CS. */ 241bf215546Sopenharmony_ci while (mask) { 242bf215546Sopenharmony_ci int i = u_bit_scan(&mask); 243bf215546Sopenharmony_ci struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i]; 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ, 246bf215546Sopenharmony_ci sview->is_stencil_sampler, false); 247bf215546Sopenharmony_ci } 248bf215546Sopenharmony_ci} 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_cistatic bool si_sampler_views_check_encrypted(struct si_context *sctx, struct si_samplers *samplers, 251bf215546Sopenharmony_ci unsigned samplers_declared) 252bf215546Sopenharmony_ci{ 253bf215546Sopenharmony_ci unsigned mask = samplers->enabled_mask & samplers_declared; 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci /* Verify if a samplers uses an encrypted resource */ 256bf215546Sopenharmony_ci while (mask) { 257bf215546Sopenharmony_ci int i = u_bit_scan(&mask); 258bf215546Sopenharmony_ci struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i]; 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci struct si_resource *res = si_resource(sview->base.texture); 261bf215546Sopenharmony_ci if (res->flags & RADEON_FLAG_ENCRYPTED) 262bf215546Sopenharmony_ci return true; 263bf215546Sopenharmony_ci } 264bf215546Sopenharmony_ci return false; 265bf215546Sopenharmony_ci} 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci/* Set buffer descriptor fields that can be changed by reallocations. */ 268bf215546Sopenharmony_cistatic void si_set_buf_desc_address(struct si_resource *buf, uint64_t offset, uint32_t *state) 269bf215546Sopenharmony_ci{ 270bf215546Sopenharmony_ci uint64_t va = buf->gpu_address + offset; 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci state[0] = va; 273bf215546Sopenharmony_ci state[1] &= C_008F04_BASE_ADDRESS_HI; 274bf215546Sopenharmony_ci state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32); 275bf215546Sopenharmony_ci} 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci/* Set texture descriptor fields that can be changed by reallocations. 278bf215546Sopenharmony_ci * 279bf215546Sopenharmony_ci * \param tex texture 280bf215546Sopenharmony_ci * \param base_level_info information of the level of BASE_ADDRESS 281bf215546Sopenharmony_ci * \param base_level the level of BASE_ADDRESS 282bf215546Sopenharmony_ci * \param first_level pipe_sampler_view.u.tex.first_level 283bf215546Sopenharmony_ci * \param block_width util_format_get_blockwidth() 284bf215546Sopenharmony_ci * \param is_stencil select between separate Z & Stencil 285bf215546Sopenharmony_ci * \param state descriptor to update 286bf215546Sopenharmony_ci */ 287bf215546Sopenharmony_civoid si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture *tex, 288bf215546Sopenharmony_ci const struct legacy_surf_level *base_level_info, 289bf215546Sopenharmony_ci unsigned base_level, unsigned first_level, unsigned block_width, 290bf215546Sopenharmony_ci /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */ 291bf215546Sopenharmony_ci bool is_stencil, uint16_t access, uint32_t * restrict state) 292bf215546Sopenharmony_ci{ 293bf215546Sopenharmony_ci uint64_t va, meta_va = 0; 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci if (tex->is_depth && !si_can_sample_zs(tex, is_stencil)) { 296bf215546Sopenharmony_ci tex = tex->flushed_depth_texture; 297bf215546Sopenharmony_ci is_stencil = false; 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci va = tex->buffer.gpu_address; 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX9) { 303bf215546Sopenharmony_ci /* Only stencil_offset needs to be added here. */ 304bf215546Sopenharmony_ci if (is_stencil) 305bf215546Sopenharmony_ci va += tex->surface.u.gfx9.zs.stencil_offset; 306bf215546Sopenharmony_ci else 307bf215546Sopenharmony_ci va += tex->surface.u.gfx9.surf_offset; 308bf215546Sopenharmony_ci } else { 309bf215546Sopenharmony_ci va += (uint64_t)base_level_info->offset_256B * 256; 310bf215546Sopenharmony_ci } 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci state[0] = va >> 8; 313bf215546Sopenharmony_ci state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci /* Only macrotiled modes can set tile swizzle. 316bf215546Sopenharmony_ci * GFX9 doesn't use (legacy) base_level_info. 317bf215546Sopenharmony_ci */ 318bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D) 319bf215546Sopenharmony_ci state[0] |= tex->surface.tile_swizzle; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX8) { 322bf215546Sopenharmony_ci if (!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level)) { 323bf215546Sopenharmony_ci meta_va = tex->buffer.gpu_address + tex->surface.meta_offset; 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci if (sscreen->info.gfx_level == GFX8) { 326bf215546Sopenharmony_ci meta_va += tex->surface.u.legacy.color.dcc_level[base_level].dcc_offset; 327bf215546Sopenharmony_ci assert(base_level_info->mode == RADEON_SURF_MODE_2D); 328bf215546Sopenharmony_ci } 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci unsigned dcc_tile_swizzle = tex->surface.tile_swizzle << 8; 331bf215546Sopenharmony_ci dcc_tile_swizzle &= (1 << tex->surface.meta_alignment_log2) - 1; 332bf215546Sopenharmony_ci meta_va |= dcc_tile_swizzle; 333bf215546Sopenharmony_ci } else if (vi_tc_compat_htile_enabled(tex, first_level, 334bf215546Sopenharmony_ci is_stencil ? PIPE_MASK_S : PIPE_MASK_Z)) { 335bf215546Sopenharmony_ci meta_va = tex->buffer.gpu_address + tex->surface.meta_offset; 336bf215546Sopenharmony_ci } 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci if (meta_va) 339bf215546Sopenharmony_ci state[6] |= S_008F28_COMPRESSION_EN(1); 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX8 && sscreen->info.gfx_level <= GFX9) 343bf215546Sopenharmony_ci state[7] = meta_va >> 8; 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX10) { 346bf215546Sopenharmony_ci if (is_stencil) { 347bf215546Sopenharmony_ci state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode); 348bf215546Sopenharmony_ci } else { 349bf215546Sopenharmony_ci state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.swizzle_mode); 350bf215546Sopenharmony_ci } 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci if (meta_va) { 353bf215546Sopenharmony_ci struct gfx9_surf_meta_flags meta = { 354bf215546Sopenharmony_ci .rb_aligned = 1, 355bf215546Sopenharmony_ci .pipe_aligned = 1, 356bf215546Sopenharmony_ci }; 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci if (!tex->is_depth && tex->surface.meta_offset) 359bf215546Sopenharmony_ci meta = tex->surface.u.gfx9.color.dcc; 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) | 362bf215546Sopenharmony_ci S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8) | 363bf215546Sopenharmony_ci /* DCC image stores require the following settings: 364bf215546Sopenharmony_ci * - INDEPENDENT_64B_BLOCKS = 0 365bf215546Sopenharmony_ci * - INDEPENDENT_128B_BLOCKS = 1 366bf215546Sopenharmony_ci * - MAX_COMPRESSED_BLOCK_SIZE = 128B 367bf215546Sopenharmony_ci * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used) 368bf215546Sopenharmony_ci * 369bf215546Sopenharmony_ci * The same limitations apply to SDMA compressed stores because 370bf215546Sopenharmony_ci * SDMA uses the same DCC codec. 371bf215546Sopenharmony_ci */ 372bf215546Sopenharmony_ci S_00A018_WRITE_COMPRESS_ENABLE(ac_surface_supports_dcc_image_stores(sscreen->info.gfx_level, &tex->surface) && 373bf215546Sopenharmony_ci (access & SI_IMAGE_ACCESS_ALLOW_DCC_STORE)); 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci /* TC-compatible MSAA HTILE requires ITERATE_256. */ 376bf215546Sopenharmony_ci if (tex->is_depth && tex->buffer.b.b.nr_samples >= 2) 377bf215546Sopenharmony_ci state[6] |= S_00A018_ITERATE_256(1); 378bf215546Sopenharmony_ci } 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci state[7] = meta_va >> 16; 381bf215546Sopenharmony_ci } else if (sscreen->info.gfx_level == GFX9) { 382bf215546Sopenharmony_ci if (is_stencil) { 383bf215546Sopenharmony_ci state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode); 384bf215546Sopenharmony_ci state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.zs.stencil_epitch); 385bf215546Sopenharmony_ci } else { 386bf215546Sopenharmony_ci uint16_t epitch = tex->surface.u.gfx9.epitch; 387bf215546Sopenharmony_ci if (tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM && 388bf215546Sopenharmony_ci block_width == 1) { 389bf215546Sopenharmony_ci /* epitch is patched in ac_surface for sdma/vcn blocks to get 390bf215546Sopenharmony_ci * a value expressed in elements unit. 391bf215546Sopenharmony_ci * But here the texture is used with block_width == 1 so we 392bf215546Sopenharmony_ci * need epitch in pixel units. 393bf215546Sopenharmony_ci */ 394bf215546Sopenharmony_ci epitch = (epitch + 1) / tex->surface.blk_w - 1; 395bf215546Sopenharmony_ci } 396bf215546Sopenharmony_ci state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode); 397bf215546Sopenharmony_ci state[4] |= S_008F20_PITCH(epitch); 398bf215546Sopenharmony_ci } 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci state[5] &= 401bf215546Sopenharmony_ci C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED; 402bf215546Sopenharmony_ci if (meta_va) { 403bf215546Sopenharmony_ci struct gfx9_surf_meta_flags meta = { 404bf215546Sopenharmony_ci .rb_aligned = 1, 405bf215546Sopenharmony_ci .pipe_aligned = 1, 406bf215546Sopenharmony_ci }; 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci if (!tex->is_depth && tex->surface.meta_offset) 409bf215546Sopenharmony_ci meta = tex->surface.u.gfx9.color.dcc; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | 412bf215546Sopenharmony_ci S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) | 413bf215546Sopenharmony_ci S_008F24_META_RB_ALIGNED(meta.rb_aligned); 414bf215546Sopenharmony_ci } 415bf215546Sopenharmony_ci } else { 416bf215546Sopenharmony_ci /* GFX6-GFX8 */ 417bf215546Sopenharmony_ci unsigned pitch = base_level_info->nblk_x * block_width; 418bf215546Sopenharmony_ci unsigned index = si_tile_mode_index(tex, base_level, is_stencil); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci state[3] |= S_008F1C_TILING_INDEX(index); 421bf215546Sopenharmony_ci state[4] |= S_008F20_PITCH(pitch - 1); 422bf215546Sopenharmony_ci } 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci if (tex->swap_rgb_to_bgr) { 425bf215546Sopenharmony_ci unsigned swizzle_x = G_008F1C_DST_SEL_X(state[3]); 426bf215546Sopenharmony_ci unsigned swizzle_z = G_008F1C_DST_SEL_Z(state[3]); 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci state[3] &= C_008F1C_DST_SEL_X; 429bf215546Sopenharmony_ci state[3] |= S_008F1C_DST_SEL_X(swizzle_z); 430bf215546Sopenharmony_ci state[3] &= C_008F1C_DST_SEL_Z; 431bf215546Sopenharmony_ci state[3] |= S_008F1C_DST_SEL_Z(swizzle_x); 432bf215546Sopenharmony_ci } 433bf215546Sopenharmony_ci} 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_cistatic void si_set_sampler_state_desc(struct si_sampler_state *sstate, 436bf215546Sopenharmony_ci struct si_sampler_view *sview, struct si_texture *tex, 437bf215546Sopenharmony_ci uint32_t *desc) 438bf215546Sopenharmony_ci{ 439bf215546Sopenharmony_ci if (tex && tex->upgraded_depth && sview && !sview->is_stencil_sampler) 440bf215546Sopenharmony_ci memcpy(desc, sstate->upgraded_depth_val, 4 * 4); 441bf215546Sopenharmony_ci else 442bf215546Sopenharmony_ci memcpy(desc, sstate->val, 4 * 4); 443bf215546Sopenharmony_ci} 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_cistatic void si_set_sampler_view_desc(struct si_context *sctx, struct si_sampler_view *sview, 446bf215546Sopenharmony_ci struct si_sampler_state *sstate, 447bf215546Sopenharmony_ci /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */ 448bf215546Sopenharmony_ci uint32_t * restrict desc) 449bf215546Sopenharmony_ci{ 450bf215546Sopenharmony_ci struct pipe_sampler_view *view = &sview->base; 451bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)view->texture; 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci assert(tex); /* views with texture == NULL aren't supported */ 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci if (tex->buffer.b.b.target == PIPE_BUFFER) { 456bf215546Sopenharmony_ci memcpy(desc, sview->state, 8 * 4); 457bf215546Sopenharmony_ci memcpy(desc + 8, null_texture_descriptor, 4 * 4); /* Disable FMASK. */ 458bf215546Sopenharmony_ci si_set_buf_desc_address(&tex->buffer, sview->base.u.buf.offset, desc + 4); 459bf215546Sopenharmony_ci return; 460bf215546Sopenharmony_ci } 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci if (unlikely(sview->dcc_incompatible)) { 463bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, view->u.tex.first_level)) 464bf215546Sopenharmony_ci if (!si_texture_disable_dcc(sctx, tex)) 465bf215546Sopenharmony_ci si_decompress_dcc(sctx, tex); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci sview->dcc_incompatible = false; 468bf215546Sopenharmony_ci } 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci bool is_separate_stencil = tex->db_compatible && sview->is_stencil_sampler; 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_ci memcpy(desc, sview->state, 8 * 4); 473bf215546Sopenharmony_ci si_set_mutable_tex_desc_fields(sctx->screen, tex, sview->base_level_info, 0, 474bf215546Sopenharmony_ci sview->base.u.tex.first_level, sview->block_width, 475bf215546Sopenharmony_ci is_separate_stencil, 0, desc); 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci if (tex->surface.fmask_size) { 478bf215546Sopenharmony_ci memcpy(desc + 8, sview->fmask_state, 8 * 4); 479bf215546Sopenharmony_ci } else { 480bf215546Sopenharmony_ci /* Disable FMASK and bind sampler state in [12:15]. */ 481bf215546Sopenharmony_ci memcpy(desc + 8, null_texture_descriptor, 4 * 4); 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_ci if (sstate) 484bf215546Sopenharmony_ci si_set_sampler_state_desc(sstate, sview, tex, desc + 12); 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci} 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_cistatic bool color_needs_decompression(struct si_texture *tex) 489bf215546Sopenharmony_ci{ 490bf215546Sopenharmony_ci if (tex->is_depth) 491bf215546Sopenharmony_ci return false; 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci return tex->surface.fmask_size || 494bf215546Sopenharmony_ci (tex->dirty_level_mask && (tex->cmask_buffer || tex->surface.meta_offset)); 495bf215546Sopenharmony_ci} 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_cistatic bool depth_needs_decompression(struct si_texture *tex, bool is_stencil) 498bf215546Sopenharmony_ci{ 499bf215546Sopenharmony_ci /* If the depth/stencil texture is TC-compatible, no decompression 500bf215546Sopenharmony_ci * will be done. The decompression function will only flush DB caches 501bf215546Sopenharmony_ci * to make it coherent with shaders. That's necessary because the driver 502bf215546Sopenharmony_ci * doesn't flush DB caches in any other case. 503bf215546Sopenharmony_ci */ 504bf215546Sopenharmony_ci return tex->db_compatible && (tex->dirty_level_mask || (is_stencil && tex->stencil_dirty_level_mask)); 505bf215546Sopenharmony_ci} 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_cistatic void si_reset_sampler_view_slot(struct si_samplers *samplers, unsigned slot, 508bf215546Sopenharmony_ci uint32_t * restrict desc) 509bf215546Sopenharmony_ci{ 510bf215546Sopenharmony_ci pipe_sampler_view_reference(&samplers->views[slot], NULL); 511bf215546Sopenharmony_ci memcpy(desc, null_texture_descriptor, 8 * 4); 512bf215546Sopenharmony_ci /* Only clear the lower dwords of FMASK. */ 513bf215546Sopenharmony_ci memcpy(desc + 8, null_texture_descriptor, 4 * 4); 514bf215546Sopenharmony_ci /* Re-set the sampler state if we are transitioning from FMASK. */ 515bf215546Sopenharmony_ci if (samplers->sampler_states[slot]) 516bf215546Sopenharmony_ci si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL, desc + 12); 517bf215546Sopenharmony_ci} 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_cistatic void si_set_sampler_views(struct si_context *sctx, unsigned shader, 520bf215546Sopenharmony_ci unsigned start_slot, unsigned count, 521bf215546Sopenharmony_ci unsigned unbind_num_trailing_slots, 522bf215546Sopenharmony_ci bool take_ownership, struct pipe_sampler_view **views, 523bf215546Sopenharmony_ci bool disallow_early_out) 524bf215546Sopenharmony_ci{ 525bf215546Sopenharmony_ci struct si_samplers *samplers = &sctx->samplers[shader]; 526bf215546Sopenharmony_ci struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); 527bf215546Sopenharmony_ci uint32_t unbound_mask = 0; 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci if (views) { 530bf215546Sopenharmony_ci for (unsigned i = 0; i < count; i++) { 531bf215546Sopenharmony_ci unsigned slot = start_slot + i; 532bf215546Sopenharmony_ci struct si_sampler_view *sview = (struct si_sampler_view *)views[i]; 533bf215546Sopenharmony_ci unsigned desc_slot = si_get_sampler_slot(slot); 534bf215546Sopenharmony_ci /* restrict decreases overhead of si_set_sampler_view_desc ~8x. */ 535bf215546Sopenharmony_ci uint32_t *restrict desc = descs->list + desc_slot * 16; 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci if (samplers->views[slot] == &sview->base && !disallow_early_out) { 538bf215546Sopenharmony_ci if (take_ownership) { 539bf215546Sopenharmony_ci struct pipe_sampler_view *view = views[i]; 540bf215546Sopenharmony_ci pipe_sampler_view_reference(&view, NULL); 541bf215546Sopenharmony_ci } 542bf215546Sopenharmony_ci continue; 543bf215546Sopenharmony_ci } 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_ci if (sview) { 546bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)sview->base.texture; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci si_set_sampler_view_desc(sctx, sview, samplers->sampler_states[slot], desc); 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci if (tex->buffer.b.b.target == PIPE_BUFFER) { 551bf215546Sopenharmony_ci tex->buffer.bind_history |= SI_BIND_SAMPLER_BUFFER(shader); 552bf215546Sopenharmony_ci samplers->needs_depth_decompress_mask &= ~(1u << slot); 553bf215546Sopenharmony_ci samplers->needs_color_decompress_mask &= ~(1u << slot); 554bf215546Sopenharmony_ci } else { 555bf215546Sopenharmony_ci if (tex->is_depth) { 556bf215546Sopenharmony_ci samplers->has_depth_tex_mask |= 1u << slot; 557bf215546Sopenharmony_ci samplers->needs_color_decompress_mask &= ~(1u << slot); 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci if (depth_needs_decompression(tex, sview->is_stencil_sampler)) { 560bf215546Sopenharmony_ci samplers->needs_depth_decompress_mask |= 1u << slot; 561bf215546Sopenharmony_ci } else { 562bf215546Sopenharmony_ci samplers->needs_depth_decompress_mask &= ~(1u << slot); 563bf215546Sopenharmony_ci } 564bf215546Sopenharmony_ci } else { 565bf215546Sopenharmony_ci samplers->has_depth_tex_mask &= ~(1u << slot); 566bf215546Sopenharmony_ci samplers->needs_depth_decompress_mask &= ~(1u << slot); 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci if (color_needs_decompression(tex)) { 569bf215546Sopenharmony_ci samplers->needs_color_decompress_mask |= 1u << slot; 570bf215546Sopenharmony_ci } else { 571bf215546Sopenharmony_ci samplers->needs_color_decompress_mask &= ~(1u << slot); 572bf215546Sopenharmony_ci } 573bf215546Sopenharmony_ci } 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) && 576bf215546Sopenharmony_ci p_atomic_read(&tex->framebuffers_bound)) 577bf215546Sopenharmony_ci sctx->need_check_render_feedback = true; 578bf215546Sopenharmony_ci } 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci if (take_ownership) { 581bf215546Sopenharmony_ci pipe_sampler_view_reference(&samplers->views[slot], NULL); 582bf215546Sopenharmony_ci samplers->views[slot] = &sview->base; 583bf215546Sopenharmony_ci } else { 584bf215546Sopenharmony_ci pipe_sampler_view_reference(&samplers->views[slot], &sview->base); 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci samplers->enabled_mask |= 1u << slot; 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci /* Since this can flush, it must be done after enabled_mask is 589bf215546Sopenharmony_ci * updated. */ 590bf215546Sopenharmony_ci si_sampler_view_add_buffer(sctx, &tex->buffer.b.b, RADEON_USAGE_READ, 591bf215546Sopenharmony_ci sview->is_stencil_sampler, true); 592bf215546Sopenharmony_ci } else { 593bf215546Sopenharmony_ci si_reset_sampler_view_slot(samplers, slot, desc); 594bf215546Sopenharmony_ci unbound_mask |= 1u << slot; 595bf215546Sopenharmony_ci } 596bf215546Sopenharmony_ci } 597bf215546Sopenharmony_ci } else { 598bf215546Sopenharmony_ci unbind_num_trailing_slots += count; 599bf215546Sopenharmony_ci count = 0; 600bf215546Sopenharmony_ci } 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci for (unsigned i = 0; i < unbind_num_trailing_slots; i++) { 603bf215546Sopenharmony_ci unsigned slot = start_slot + count + i; 604bf215546Sopenharmony_ci unsigned desc_slot = si_get_sampler_slot(slot); 605bf215546Sopenharmony_ci uint32_t * restrict desc = descs->list + desc_slot * 16; 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci if (samplers->views[slot]) 608bf215546Sopenharmony_ci si_reset_sampler_view_slot(samplers, slot, desc); 609bf215546Sopenharmony_ci } 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci unbound_mask |= BITFIELD_RANGE(start_slot + count, unbind_num_trailing_slots); 612bf215546Sopenharmony_ci samplers->enabled_mask &= ~unbound_mask; 613bf215546Sopenharmony_ci samplers->has_depth_tex_mask &= ~unbound_mask; 614bf215546Sopenharmony_ci samplers->needs_depth_decompress_mask &= ~unbound_mask; 615bf215546Sopenharmony_ci samplers->needs_color_decompress_mask &= ~unbound_mask; 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 618bf215546Sopenharmony_ci} 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_cistatic void si_update_shader_needs_decompress_mask(struct si_context *sctx, unsigned shader) 621bf215546Sopenharmony_ci{ 622bf215546Sopenharmony_ci struct si_samplers *samplers = &sctx->samplers[shader]; 623bf215546Sopenharmony_ci unsigned shader_bit = 1 << shader; 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci if (samplers->needs_depth_decompress_mask || samplers->needs_color_decompress_mask || 626bf215546Sopenharmony_ci sctx->images[shader].needs_color_decompress_mask) 627bf215546Sopenharmony_ci sctx->shader_needs_decompress_mask |= shader_bit; 628bf215546Sopenharmony_ci else 629bf215546Sopenharmony_ci sctx->shader_needs_decompress_mask &= ~shader_bit; 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci if (samplers->has_depth_tex_mask) 632bf215546Sopenharmony_ci sctx->shader_has_depth_tex |= shader_bit; 633bf215546Sopenharmony_ci else 634bf215546Sopenharmony_ci sctx->shader_has_depth_tex &= ~shader_bit; 635bf215546Sopenharmony_ci} 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_cistatic void si_pipe_set_sampler_views(struct pipe_context *ctx, enum pipe_shader_type shader, 638bf215546Sopenharmony_ci unsigned start, unsigned count, 639bf215546Sopenharmony_ci unsigned unbind_num_trailing_slots, 640bf215546Sopenharmony_ci bool take_ownership, struct pipe_sampler_view **views) 641bf215546Sopenharmony_ci{ 642bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_ci if ((!count && !unbind_num_trailing_slots) || shader >= SI_NUM_SHADERS) 645bf215546Sopenharmony_ci return; 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci si_set_sampler_views(sctx, shader, start, count, unbind_num_trailing_slots, 648bf215546Sopenharmony_ci take_ownership, views, false); 649bf215546Sopenharmony_ci si_update_shader_needs_decompress_mask(sctx, shader); 650bf215546Sopenharmony_ci} 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_cistatic void si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers) 653bf215546Sopenharmony_ci{ 654bf215546Sopenharmony_ci unsigned mask = samplers->enabled_mask; 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci while (mask) { 657bf215546Sopenharmony_ci int i = u_bit_scan(&mask); 658bf215546Sopenharmony_ci struct pipe_resource *res = samplers->views[i]->texture; 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_ci if (res && res->target != PIPE_BUFFER) { 661bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)res; 662bf215546Sopenharmony_ci 663bf215546Sopenharmony_ci if (color_needs_decompression(tex)) { 664bf215546Sopenharmony_ci samplers->needs_color_decompress_mask |= 1u << i; 665bf215546Sopenharmony_ci } else { 666bf215546Sopenharmony_ci samplers->needs_color_decompress_mask &= ~(1u << i); 667bf215546Sopenharmony_ci } 668bf215546Sopenharmony_ci } 669bf215546Sopenharmony_ci } 670bf215546Sopenharmony_ci} 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci/* IMAGE VIEWS */ 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_cistatic void si_release_image_views(struct si_images *images) 675bf215546Sopenharmony_ci{ 676bf215546Sopenharmony_ci unsigned i; 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci for (i = 0; i < SI_NUM_IMAGES; ++i) { 679bf215546Sopenharmony_ci struct pipe_image_view *view = &images->views[i]; 680bf215546Sopenharmony_ci 681bf215546Sopenharmony_ci pipe_resource_reference(&view->resource, NULL); 682bf215546Sopenharmony_ci } 683bf215546Sopenharmony_ci} 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_cistatic void si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images) 686bf215546Sopenharmony_ci{ 687bf215546Sopenharmony_ci uint mask = images->enabled_mask; 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_ci /* Add buffers to the CS. */ 690bf215546Sopenharmony_ci while (mask) { 691bf215546Sopenharmony_ci int i = u_bit_scan(&mask); 692bf215546Sopenharmony_ci struct pipe_image_view *view = &images->views[i]; 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci assert(view->resource); 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false, false); 697bf215546Sopenharmony_ci } 698bf215546Sopenharmony_ci} 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_cistatic bool si_image_views_check_encrypted(struct si_context *sctx, struct si_images *images, 701bf215546Sopenharmony_ci unsigned images_declared) 702bf215546Sopenharmony_ci{ 703bf215546Sopenharmony_ci uint mask = images->enabled_mask & images_declared; 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci while (mask) { 706bf215546Sopenharmony_ci int i = u_bit_scan(&mask); 707bf215546Sopenharmony_ci struct pipe_image_view *view = &images->views[i]; 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_ci assert(view->resource); 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)view->resource; 712bf215546Sopenharmony_ci if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) 713bf215546Sopenharmony_ci return true; 714bf215546Sopenharmony_ci } 715bf215546Sopenharmony_ci return false; 716bf215546Sopenharmony_ci} 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_cistatic void si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot) 719bf215546Sopenharmony_ci{ 720bf215546Sopenharmony_ci struct si_images *images = &ctx->images[shader]; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci if (images->enabled_mask & (1u << slot)) { 723bf215546Sopenharmony_ci struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader); 724bf215546Sopenharmony_ci unsigned desc_slot = si_get_image_slot(slot); 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_ci pipe_resource_reference(&images->views[slot].resource, NULL); 727bf215546Sopenharmony_ci images->needs_color_decompress_mask &= ~(1 << slot); 728bf215546Sopenharmony_ci 729bf215546Sopenharmony_ci memcpy(descs->list + desc_slot * 8, null_image_descriptor, 8 * 4); 730bf215546Sopenharmony_ci images->enabled_mask &= ~(1u << slot); 731bf215546Sopenharmony_ci images->display_dcc_store_mask &= ~(1u << slot); 732bf215546Sopenharmony_ci ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 733bf215546Sopenharmony_ci } 734bf215546Sopenharmony_ci} 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_cistatic void si_mark_image_range_valid(const struct pipe_image_view *view) 737bf215546Sopenharmony_ci{ 738bf215546Sopenharmony_ci struct si_resource *res = si_resource(view->resource); 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci if (res->b.b.target != PIPE_BUFFER) 741bf215546Sopenharmony_ci return; 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci util_range_add(&res->b.b, &res->valid_buffer_range, view->u.buf.offset, 744bf215546Sopenharmony_ci view->u.buf.offset + view->u.buf.size); 745bf215546Sopenharmony_ci} 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_cistatic void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_image_view *view, 748bf215546Sopenharmony_ci bool skip_decompress, uint32_t *desc, uint32_t *fmask_desc) 749bf215546Sopenharmony_ci{ 750bf215546Sopenharmony_ci struct si_screen *screen = ctx->screen; 751bf215546Sopenharmony_ci struct si_resource *res; 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci res = si_resource(view->resource); 754bf215546Sopenharmony_ci 755bf215546Sopenharmony_ci if (res->b.b.target == PIPE_BUFFER) { 756bf215546Sopenharmony_ci if (view->access & PIPE_IMAGE_ACCESS_WRITE) 757bf215546Sopenharmony_ci si_mark_image_range_valid(view); 758bf215546Sopenharmony_ci uint32_t elements = si_clamp_texture_texel_count(screen->max_texel_buffer_elements, 759bf215546Sopenharmony_ci view->format, view->u.buf.size); 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci si_make_buffer_descriptor(screen, res, view->format, view->u.buf.offset, elements, 762bf215546Sopenharmony_ci desc); 763bf215546Sopenharmony_ci si_set_buf_desc_address(res, view->u.buf.offset, desc + 4); 764bf215546Sopenharmony_ci } else { 765bf215546Sopenharmony_ci static const unsigned char swizzle[4] = {0, 1, 2, 3}; 766bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)res; 767bf215546Sopenharmony_ci unsigned level = view->u.tex.level; 768bf215546Sopenharmony_ci bool uses_dcc = vi_dcc_enabled(tex, level); 769bf215546Sopenharmony_ci unsigned access = view->access; 770bf215546Sopenharmony_ci 771bf215546Sopenharmony_ci if (uses_dcc && screen->always_allow_dcc_stores) 772bf215546Sopenharmony_ci access |= SI_IMAGE_ACCESS_ALLOW_DCC_STORE; 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_ci assert(!tex->is_depth); 775bf215546Sopenharmony_ci assert(fmask_desc || tex->surface.fmask_offset == 0); 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci if (uses_dcc && !skip_decompress && 778bf215546Sopenharmony_ci !(access & SI_IMAGE_ACCESS_DCC_OFF) && 779bf215546Sopenharmony_ci ((!(access & SI_IMAGE_ACCESS_ALLOW_DCC_STORE) && (access & PIPE_IMAGE_ACCESS_WRITE)) || 780bf215546Sopenharmony_ci !vi_dcc_formats_compatible(screen, res->b.b.format, view->format))) { 781bf215546Sopenharmony_ci /* If DCC can't be disabled, at least decompress it. 782bf215546Sopenharmony_ci * The decompression is relatively cheap if the surface 783bf215546Sopenharmony_ci * has been decompressed already. 784bf215546Sopenharmony_ci */ 785bf215546Sopenharmony_ci if (!si_texture_disable_dcc(ctx, tex)) 786bf215546Sopenharmony_ci si_decompress_dcc(ctx, tex); 787bf215546Sopenharmony_ci } 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci unsigned width = res->b.b.width0; 790bf215546Sopenharmony_ci unsigned height = res->b.b.height0; 791bf215546Sopenharmony_ci unsigned depth = res->b.b.depth0; 792bf215546Sopenharmony_ci unsigned hw_level = level; 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci if (ctx->gfx_level <= GFX8) { 795bf215546Sopenharmony_ci /* Always force the base level to the selected level. 796bf215546Sopenharmony_ci * 797bf215546Sopenharmony_ci * This is required for 3D textures, where otherwise 798bf215546Sopenharmony_ci * selecting a single slice for non-layered bindings 799bf215546Sopenharmony_ci * fails. It doesn't hurt the other targets. 800bf215546Sopenharmony_ci */ 801bf215546Sopenharmony_ci width = u_minify(width, level); 802bf215546Sopenharmony_ci height = u_minify(height, level); 803bf215546Sopenharmony_ci depth = u_minify(depth, level); 804bf215546Sopenharmony_ci hw_level = 0; 805bf215546Sopenharmony_ci } 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_ci if (access & SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT) { 808bf215546Sopenharmony_ci if (ctx->gfx_level >= GFX9) { 809bf215546Sopenharmony_ci /* Since the aligned width and height are derived from the width and height 810bf215546Sopenharmony_ci * by the hw, set them directly as the width and height, so that UINT formats 811bf215546Sopenharmony_ci * get exactly the same layout as BCn formats. 812bf215546Sopenharmony_ci */ 813bf215546Sopenharmony_ci width = tex->surface.u.gfx9.base_mip_width; 814bf215546Sopenharmony_ci height = tex->surface.u.gfx9.base_mip_height; 815bf215546Sopenharmony_ci } else { 816bf215546Sopenharmony_ci width = util_format_get_nblocksx(tex->buffer.b.b.format, width); 817bf215546Sopenharmony_ci height = util_format_get_nblocksy(tex->buffer.b.b.format, height); 818bf215546Sopenharmony_ci } 819bf215546Sopenharmony_ci } 820bf215546Sopenharmony_ci 821bf215546Sopenharmony_ci screen->make_texture_descriptor( 822bf215546Sopenharmony_ci screen, tex, false, res->b.b.target, view->format, swizzle, hw_level, hw_level, 823bf215546Sopenharmony_ci view->u.tex.first_layer, view->u.tex.last_layer, width, height, depth, desc, fmask_desc); 824bf215546Sopenharmony_ci si_set_mutable_tex_desc_fields(screen, tex, &tex->surface.u.legacy.level[level], level, level, 825bf215546Sopenharmony_ci util_format_get_blockwidth(view->format), 826bf215546Sopenharmony_ci false, access, desc); 827bf215546Sopenharmony_ci } 828bf215546Sopenharmony_ci} 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_cistatic void si_set_shader_image(struct si_context *ctx, unsigned shader, unsigned slot, 831bf215546Sopenharmony_ci const struct pipe_image_view *view, bool skip_decompress) 832bf215546Sopenharmony_ci{ 833bf215546Sopenharmony_ci struct si_images *images = &ctx->images[shader]; 834bf215546Sopenharmony_ci struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader); 835bf215546Sopenharmony_ci struct si_resource *res; 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci if (!view || !view->resource) { 838bf215546Sopenharmony_ci si_disable_shader_image(ctx, shader, slot); 839bf215546Sopenharmony_ci return; 840bf215546Sopenharmony_ci } 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci res = si_resource(view->resource); 843bf215546Sopenharmony_ci 844bf215546Sopenharmony_ci si_set_shader_image_desc(ctx, view, skip_decompress, descs->list + si_get_image_slot(slot) * 8, 845bf215546Sopenharmony_ci descs->list + si_get_image_slot(slot + SI_NUM_IMAGES) * 8); 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci if (&images->views[slot] != view) 848bf215546Sopenharmony_ci util_copy_image_view(&images->views[slot], view); 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci if (res->b.b.target == PIPE_BUFFER) { 851bf215546Sopenharmony_ci images->needs_color_decompress_mask &= ~(1 << slot); 852bf215546Sopenharmony_ci images->display_dcc_store_mask &= ~(1u << slot); 853bf215546Sopenharmony_ci res->bind_history |= SI_BIND_IMAGE_BUFFER(shader); 854bf215546Sopenharmony_ci } else { 855bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)res; 856bf215546Sopenharmony_ci unsigned level = view->u.tex.level; 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci if (color_needs_decompression(tex)) { 859bf215546Sopenharmony_ci images->needs_color_decompress_mask |= 1 << slot; 860bf215546Sopenharmony_ci } else { 861bf215546Sopenharmony_ci images->needs_color_decompress_mask &= ~(1 << slot); 862bf215546Sopenharmony_ci } 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci if (tex->surface.display_dcc_offset && view->access & PIPE_IMAGE_ACCESS_WRITE) { 865bf215546Sopenharmony_ci images->display_dcc_store_mask |= 1u << slot; 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci /* Set displayable_dcc_dirty for non-compute stages conservatively (before draw calls). */ 868bf215546Sopenharmony_ci if (shader != PIPE_SHADER_COMPUTE) 869bf215546Sopenharmony_ci tex->displayable_dcc_dirty = true; 870bf215546Sopenharmony_ci } else { 871bf215546Sopenharmony_ci images->display_dcc_store_mask &= ~(1u << slot); 872bf215546Sopenharmony_ci } 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound)) 875bf215546Sopenharmony_ci ctx->need_check_render_feedback = true; 876bf215546Sopenharmony_ci } 877bf215546Sopenharmony_ci 878bf215546Sopenharmony_ci images->enabled_mask |= 1u << slot; 879bf215546Sopenharmony_ci ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_ci /* Since this can flush, it must be done after enabled_mask is updated. */ 882bf215546Sopenharmony_ci si_sampler_view_add_buffer( 883bf215546Sopenharmony_ci ctx, &res->b.b, 884bf215546Sopenharmony_ci (view->access & PIPE_IMAGE_ACCESS_WRITE) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false, 885bf215546Sopenharmony_ci true); 886bf215546Sopenharmony_ci} 887bf215546Sopenharmony_ci 888bf215546Sopenharmony_cistatic void si_set_shader_images(struct pipe_context *pipe, enum pipe_shader_type shader, 889bf215546Sopenharmony_ci unsigned start_slot, unsigned count, 890bf215546Sopenharmony_ci unsigned unbind_num_trailing_slots, 891bf215546Sopenharmony_ci const struct pipe_image_view *views) 892bf215546Sopenharmony_ci{ 893bf215546Sopenharmony_ci struct si_context *ctx = (struct si_context *)pipe; 894bf215546Sopenharmony_ci unsigned i, slot; 895bf215546Sopenharmony_ci 896bf215546Sopenharmony_ci assert(shader < SI_NUM_SHADERS); 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_ci if (!count && !unbind_num_trailing_slots) 899bf215546Sopenharmony_ci return; 900bf215546Sopenharmony_ci 901bf215546Sopenharmony_ci assert(start_slot + count + unbind_num_trailing_slots <= SI_NUM_IMAGES); 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci if (views) { 904bf215546Sopenharmony_ci for (i = 0, slot = start_slot; i < count; ++i, ++slot) 905bf215546Sopenharmony_ci si_set_shader_image(ctx, shader, slot, &views[i], false); 906bf215546Sopenharmony_ci } else { 907bf215546Sopenharmony_ci for (i = 0, slot = start_slot; i < count; ++i, ++slot) 908bf215546Sopenharmony_ci si_set_shader_image(ctx, shader, slot, NULL, false); 909bf215546Sopenharmony_ci } 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci for (i = 0; i < unbind_num_trailing_slots; ++i, ++slot) 912bf215546Sopenharmony_ci si_set_shader_image(ctx, shader, slot, NULL, false); 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci if (shader == PIPE_SHADER_COMPUTE && 915bf215546Sopenharmony_ci ctx->cs_shader_state.program && 916bf215546Sopenharmony_ci start_slot < ctx->cs_shader_state.program->sel.cs_num_images_in_user_sgprs) 917bf215546Sopenharmony_ci ctx->compute_image_sgprs_dirty = true; 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci si_update_shader_needs_decompress_mask(ctx, shader); 920bf215546Sopenharmony_ci} 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_cistatic void si_images_update_needs_color_decompress_mask(struct si_images *images) 923bf215546Sopenharmony_ci{ 924bf215546Sopenharmony_ci unsigned mask = images->enabled_mask; 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci while (mask) { 927bf215546Sopenharmony_ci int i = u_bit_scan(&mask); 928bf215546Sopenharmony_ci struct pipe_resource *res = images->views[i].resource; 929bf215546Sopenharmony_ci 930bf215546Sopenharmony_ci if (res && res->target != PIPE_BUFFER) { 931bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)res; 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci if (color_needs_decompression(tex)) { 934bf215546Sopenharmony_ci images->needs_color_decompress_mask |= 1 << i; 935bf215546Sopenharmony_ci } else { 936bf215546Sopenharmony_ci images->needs_color_decompress_mask &= ~(1 << i); 937bf215546Sopenharmony_ci } 938bf215546Sopenharmony_ci } 939bf215546Sopenharmony_ci } 940bf215546Sopenharmony_ci} 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_civoid si_update_ps_colorbuf0_slot(struct si_context *sctx) 943bf215546Sopenharmony_ci{ 944bf215546Sopenharmony_ci struct si_buffer_resources *buffers = &sctx->internal_bindings; 945bf215546Sopenharmony_ci struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL]; 946bf215546Sopenharmony_ci unsigned slot = SI_PS_IMAGE_COLORBUF0; 947bf215546Sopenharmony_ci struct pipe_surface *surf = NULL; 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci /* si_texture_disable_dcc can get us here again. */ 950bf215546Sopenharmony_ci if (sctx->in_update_ps_colorbuf0_slot || sctx->blitter_running) { 951bf215546Sopenharmony_ci assert(!sctx->ps_uses_fbfetch || sctx->framebuffer.state.cbufs[0]); 952bf215546Sopenharmony_ci return; 953bf215546Sopenharmony_ci } 954bf215546Sopenharmony_ci sctx->in_update_ps_colorbuf0_slot = true; 955bf215546Sopenharmony_ci 956bf215546Sopenharmony_ci /* See whether FBFETCH is used and color buffer 0 is set. */ 957bf215546Sopenharmony_ci if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.fs.uses_fbfetch_output && 958bf215546Sopenharmony_ci sctx->framebuffer.state.nr_cbufs && sctx->framebuffer.state.cbufs[0]) 959bf215546Sopenharmony_ci surf = sctx->framebuffer.state.cbufs[0]; 960bf215546Sopenharmony_ci 961bf215546Sopenharmony_ci /* Return if FBFETCH transitions from disabled to disabled. */ 962bf215546Sopenharmony_ci if (!buffers->buffers[slot] && !surf) { 963bf215546Sopenharmony_ci assert(!sctx->ps_uses_fbfetch); 964bf215546Sopenharmony_ci sctx->in_update_ps_colorbuf0_slot = false; 965bf215546Sopenharmony_ci return; 966bf215546Sopenharmony_ci } 967bf215546Sopenharmony_ci 968bf215546Sopenharmony_ci sctx->ps_uses_fbfetch = surf != NULL; 969bf215546Sopenharmony_ci si_update_ps_iter_samples(sctx); 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_ci if (surf) { 972bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)surf->texture; 973bf215546Sopenharmony_ci struct pipe_image_view view = {0}; 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci assert(tex); 976bf215546Sopenharmony_ci assert(!tex->is_depth); 977bf215546Sopenharmony_ci 978bf215546Sopenharmony_ci /* Disable DCC, because the texture is used as both a sampler 979bf215546Sopenharmony_ci * and color buffer. 980bf215546Sopenharmony_ci */ 981bf215546Sopenharmony_ci si_texture_disable_dcc(sctx, tex); 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_ci if (tex->buffer.b.b.nr_samples <= 1 && tex->cmask_buffer) { 984bf215546Sopenharmony_ci /* Disable CMASK. */ 985bf215546Sopenharmony_ci assert(tex->cmask_buffer != &tex->buffer); 986bf215546Sopenharmony_ci si_eliminate_fast_color_clear(sctx, tex, NULL); 987bf215546Sopenharmony_ci si_texture_discard_cmask(sctx->screen, tex); 988bf215546Sopenharmony_ci } 989bf215546Sopenharmony_ci 990bf215546Sopenharmony_ci view.resource = surf->texture; 991bf215546Sopenharmony_ci view.format = surf->format; 992bf215546Sopenharmony_ci view.access = PIPE_IMAGE_ACCESS_READ; 993bf215546Sopenharmony_ci view.u.tex.first_layer = surf->u.tex.first_layer; 994bf215546Sopenharmony_ci view.u.tex.last_layer = surf->u.tex.last_layer; 995bf215546Sopenharmony_ci view.u.tex.level = surf->u.tex.level; 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_ci /* Set the descriptor. */ 998bf215546Sopenharmony_ci uint32_t *desc = descs->list + slot * 4; 999bf215546Sopenharmony_ci memset(desc, 0, 16 * 4); 1000bf215546Sopenharmony_ci si_set_shader_image_desc(sctx, &view, true, desc, desc + 8); 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b); 1003bf215546Sopenharmony_ci radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, 1004bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_SHADER_RW_IMAGE); 1005bf215546Sopenharmony_ci buffers->enabled_mask |= 1llu << slot; 1006bf215546Sopenharmony_ci } else { 1007bf215546Sopenharmony_ci /* Clear the descriptor. */ 1008bf215546Sopenharmony_ci memset(descs->list + slot * 4, 0, 8 * 4); 1009bf215546Sopenharmony_ci pipe_resource_reference(&buffers->buffers[slot], NULL); 1010bf215546Sopenharmony_ci buffers->enabled_mask &= ~(1llu << slot); 1011bf215546Sopenharmony_ci } 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL; 1014bf215546Sopenharmony_ci sctx->in_update_ps_colorbuf0_slot = false; 1015bf215546Sopenharmony_ci} 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci/* SAMPLER STATES */ 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_cistatic void si_bind_sampler_states(struct pipe_context *ctx, enum pipe_shader_type shader, 1020bf215546Sopenharmony_ci unsigned start, unsigned count, void **states) 1021bf215546Sopenharmony_ci{ 1022bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1023bf215546Sopenharmony_ci struct si_samplers *samplers = &sctx->samplers[shader]; 1024bf215546Sopenharmony_ci struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader); 1025bf215546Sopenharmony_ci struct si_sampler_state **sstates = (struct si_sampler_state **)states; 1026bf215546Sopenharmony_ci int i; 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci if (!count || shader >= SI_NUM_SHADERS || !sstates) 1029bf215546Sopenharmony_ci return; 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 1032bf215546Sopenharmony_ci unsigned slot = start + i; 1033bf215546Sopenharmony_ci unsigned desc_slot = si_get_sampler_slot(slot); 1034bf215546Sopenharmony_ci 1035bf215546Sopenharmony_ci if (!sstates[i] || sstates[i] == samplers->sampler_states[slot]) 1036bf215546Sopenharmony_ci continue; 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_ci#ifndef NDEBUG 1039bf215546Sopenharmony_ci assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC); 1040bf215546Sopenharmony_ci#endif 1041bf215546Sopenharmony_ci samplers->sampler_states[slot] = sstates[i]; 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci /* If FMASK is bound, don't overwrite it. 1044bf215546Sopenharmony_ci * The sampler state will be set after FMASK is unbound. 1045bf215546Sopenharmony_ci */ 1046bf215546Sopenharmony_ci struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[slot]; 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci struct si_texture *tex = NULL; 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_ci if (sview && sview->base.texture && sview->base.texture->target != PIPE_BUFFER) 1051bf215546Sopenharmony_ci tex = (struct si_texture *)sview->base.texture; 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ci if (tex && tex->surface.fmask_size) 1054bf215546Sopenharmony_ci continue; 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_ci si_set_sampler_state_desc(sstates[i], sview, tex, desc->list + desc_slot * 16 + 12); 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 1059bf215546Sopenharmony_ci } 1060bf215546Sopenharmony_ci} 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci/* BUFFER RESOURCES */ 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_cistatic void si_init_buffer_resources(struct si_context *sctx, 1065bf215546Sopenharmony_ci struct si_buffer_resources *buffers, 1066bf215546Sopenharmony_ci struct si_descriptors *descs, unsigned num_buffers, 1067bf215546Sopenharmony_ci short shader_userdata_rel_index, 1068bf215546Sopenharmony_ci unsigned priority, 1069bf215546Sopenharmony_ci unsigned priority_constbuf) 1070bf215546Sopenharmony_ci{ 1071bf215546Sopenharmony_ci buffers->priority = priority; 1072bf215546Sopenharmony_ci buffers->priority_constbuf = priority_constbuf; 1073bf215546Sopenharmony_ci buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource *)); 1074bf215546Sopenharmony_ci buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0])); 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_ci si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers); 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci /* Initialize buffer descriptors, so that we don't have to do it at bind time. */ 1079bf215546Sopenharmony_ci for (unsigned i = 0; i < num_buffers; i++) { 1080bf215546Sopenharmony_ci uint32_t *desc = descs->list + i * 4; 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1083bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 1084bf215546Sopenharmony_ci 1085bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 1086bf215546Sopenharmony_ci desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 1087bf215546Sopenharmony_ci S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); 1088bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX10) { 1089bf215546Sopenharmony_ci desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 1090bf215546Sopenharmony_ci S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); 1091bf215546Sopenharmony_ci } else { 1092bf215546Sopenharmony_ci desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1093bf215546Sopenharmony_ci S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci } 1096bf215546Sopenharmony_ci} 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_cistatic void si_release_buffer_resources(struct si_buffer_resources *buffers, 1099bf215546Sopenharmony_ci struct si_descriptors *descs) 1100bf215546Sopenharmony_ci{ 1101bf215546Sopenharmony_ci int i; 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci for (i = 0; i < descs->num_elements; i++) { 1104bf215546Sopenharmony_ci pipe_resource_reference(&buffers->buffers[i], NULL); 1105bf215546Sopenharmony_ci } 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci FREE(buffers->buffers); 1108bf215546Sopenharmony_ci FREE(buffers->offsets); 1109bf215546Sopenharmony_ci} 1110bf215546Sopenharmony_ci 1111bf215546Sopenharmony_cistatic void si_buffer_resources_begin_new_cs(struct si_context *sctx, 1112bf215546Sopenharmony_ci struct si_buffer_resources *buffers) 1113bf215546Sopenharmony_ci{ 1114bf215546Sopenharmony_ci uint64_t mask = buffers->enabled_mask; 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci /* Add buffers to the CS. */ 1117bf215546Sopenharmony_ci while (mask) { 1118bf215546Sopenharmony_ci int i = u_bit_scan64(&mask); 1119bf215546Sopenharmony_ci 1120bf215546Sopenharmony_ci radeon_add_to_buffer_list( 1121bf215546Sopenharmony_ci sctx, &sctx->gfx_cs, si_resource(buffers->buffers[i]), 1122bf215546Sopenharmony_ci (buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | 1123bf215546Sopenharmony_ci (i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf)); 1124bf215546Sopenharmony_ci } 1125bf215546Sopenharmony_ci} 1126bf215546Sopenharmony_ci 1127bf215546Sopenharmony_cistatic bool si_buffer_resources_check_encrypted(struct si_context *sctx, 1128bf215546Sopenharmony_ci struct si_buffer_resources *buffers) 1129bf215546Sopenharmony_ci{ 1130bf215546Sopenharmony_ci uint64_t mask = buffers->enabled_mask; 1131bf215546Sopenharmony_ci 1132bf215546Sopenharmony_ci while (mask) { 1133bf215546Sopenharmony_ci int i = u_bit_scan64(&mask); 1134bf215546Sopenharmony_ci 1135bf215546Sopenharmony_ci if (si_resource(buffers->buffers[i])->flags & RADEON_FLAG_ENCRYPTED) 1136bf215546Sopenharmony_ci return true; 1137bf215546Sopenharmony_ci } 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_ci return false; 1140bf215546Sopenharmony_ci} 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_cistatic void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers, 1143bf215546Sopenharmony_ci struct si_descriptors *descs, unsigned idx, 1144bf215546Sopenharmony_ci struct pipe_resource **buf, unsigned *offset, 1145bf215546Sopenharmony_ci unsigned *size) 1146bf215546Sopenharmony_ci{ 1147bf215546Sopenharmony_ci pipe_resource_reference(buf, buffers->buffers[idx]); 1148bf215546Sopenharmony_ci if (*buf) { 1149bf215546Sopenharmony_ci struct si_resource *res = si_resource(*buf); 1150bf215546Sopenharmony_ci const uint32_t *desc = descs->list + idx * 4; 1151bf215546Sopenharmony_ci uint64_t va; 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci *size = desc[2]; 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci assert(G_008F04_STRIDE(desc[1]) == 0); 1156bf215546Sopenharmony_ci va = si_desc_extract_buffer_address(desc); 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_ci assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size); 1159bf215546Sopenharmony_ci *offset = va - res->gpu_address; 1160bf215546Sopenharmony_ci } 1161bf215546Sopenharmony_ci} 1162bf215546Sopenharmony_ci 1163bf215546Sopenharmony_ci/* VERTEX BUFFERS */ 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_cistatic void si_vertex_buffers_begin_new_cs(struct si_context *sctx) 1166bf215546Sopenharmony_ci{ 1167bf215546Sopenharmony_ci int count = sctx->num_vertex_elements; 1168bf215546Sopenharmony_ci int i; 1169bf215546Sopenharmony_ci 1170bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 1171bf215546Sopenharmony_ci int vb = sctx->vertex_elements->vertex_buffer_index[i]; 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) 1174bf215546Sopenharmony_ci continue; 1175bf215546Sopenharmony_ci if (!sctx->vertex_buffer[vb].buffer.resource) 1176bf215546Sopenharmony_ci continue; 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ci radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, 1179bf215546Sopenharmony_ci si_resource(sctx->vertex_buffer[vb].buffer.resource), 1180bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_VERTEX_BUFFER); 1181bf215546Sopenharmony_ci } 1182bf215546Sopenharmony_ci 1183bf215546Sopenharmony_ci if (!sctx->vb_descriptors_buffer) 1184bf215546Sopenharmony_ci return; 1185bf215546Sopenharmony_ci radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer, 1186bf215546Sopenharmony_ci RADEON_USAGE_READ | RADEON_PRIO_DESCRIPTORS); 1187bf215546Sopenharmony_ci} 1188bf215546Sopenharmony_ci 1189bf215546Sopenharmony_ci/* CONSTANT BUFFERS */ 1190bf215546Sopenharmony_ci 1191bf215546Sopenharmony_cistatic struct si_descriptors *si_const_and_shader_buffer_descriptors(struct si_context *sctx, 1192bf215546Sopenharmony_ci unsigned shader) 1193bf215546Sopenharmony_ci{ 1194bf215546Sopenharmony_ci return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)]; 1195bf215546Sopenharmony_ci} 1196bf215546Sopenharmony_ci 1197bf215546Sopenharmony_cistatic void si_upload_const_buffer(struct si_context *sctx, struct si_resource **buf, 1198bf215546Sopenharmony_ci const uint8_t *ptr, unsigned size, uint32_t *const_offset) 1199bf215546Sopenharmony_ci{ 1200bf215546Sopenharmony_ci void *tmp; 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_ci u_upload_alloc(sctx->b.const_uploader, 0, size, si_optimal_tcc_alignment(sctx, size), 1203bf215546Sopenharmony_ci const_offset, (struct pipe_resource **)buf, &tmp); 1204bf215546Sopenharmony_ci if (*buf) 1205bf215546Sopenharmony_ci util_memcpy_cpu_to_le32(tmp, ptr, size); 1206bf215546Sopenharmony_ci} 1207bf215546Sopenharmony_ci 1208bf215546Sopenharmony_cistatic void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_resources *buffers, 1209bf215546Sopenharmony_ci unsigned descriptors_idx, uint slot, bool take_ownership, 1210bf215546Sopenharmony_ci const struct pipe_constant_buffer *input) 1211bf215546Sopenharmony_ci{ 1212bf215546Sopenharmony_ci struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1213bf215546Sopenharmony_ci assert(slot < descs->num_elements); 1214bf215546Sopenharmony_ci pipe_resource_reference(&buffers->buffers[slot], NULL); 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_ci /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD is buggy 1217bf215546Sopenharmony_ci * with a NULL buffer). We need to use a dummy buffer instead. */ 1218bf215546Sopenharmony_ci if (sctx->gfx_level == GFX7 && (!input || (!input->buffer && !input->user_buffer))) 1219bf215546Sopenharmony_ci input = &sctx->null_const_buf; 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_ci if (input && (input->buffer || input->user_buffer)) { 1222bf215546Sopenharmony_ci struct pipe_resource *buffer = NULL; 1223bf215546Sopenharmony_ci uint64_t va; 1224bf215546Sopenharmony_ci unsigned buffer_offset; 1225bf215546Sopenharmony_ci 1226bf215546Sopenharmony_ci /* Upload the user buffer if needed. */ 1227bf215546Sopenharmony_ci if (input->user_buffer) { 1228bf215546Sopenharmony_ci si_upload_const_buffer(sctx, (struct si_resource **)&buffer, input->user_buffer, 1229bf215546Sopenharmony_ci input->buffer_size, &buffer_offset); 1230bf215546Sopenharmony_ci if (!buffer) { 1231bf215546Sopenharmony_ci /* Just unbind on failure. */ 1232bf215546Sopenharmony_ci si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, false, NULL); 1233bf215546Sopenharmony_ci return; 1234bf215546Sopenharmony_ci } 1235bf215546Sopenharmony_ci } else { 1236bf215546Sopenharmony_ci if (take_ownership) { 1237bf215546Sopenharmony_ci buffer = input->buffer; 1238bf215546Sopenharmony_ci } else { 1239bf215546Sopenharmony_ci pipe_resource_reference(&buffer, input->buffer); 1240bf215546Sopenharmony_ci } 1241bf215546Sopenharmony_ci buffer_offset = input->buffer_offset; 1242bf215546Sopenharmony_ci } 1243bf215546Sopenharmony_ci 1244bf215546Sopenharmony_ci va = si_resource(buffer)->gpu_address + buffer_offset; 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_ci /* Set the descriptor. */ 1247bf215546Sopenharmony_ci uint32_t *desc = descs->list + slot * 4; 1248bf215546Sopenharmony_ci desc[0] = va; 1249bf215546Sopenharmony_ci desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0); 1250bf215546Sopenharmony_ci desc[2] = input->buffer_size; 1251bf215546Sopenharmony_ci 1252bf215546Sopenharmony_ci buffers->buffers[slot] = buffer; 1253bf215546Sopenharmony_ci buffers->offsets[slot] = buffer_offset; 1254bf215546Sopenharmony_ci radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), 1255bf215546Sopenharmony_ci RADEON_USAGE_READ | buffers->priority_constbuf, true); 1256bf215546Sopenharmony_ci buffers->enabled_mask |= 1llu << slot; 1257bf215546Sopenharmony_ci } else { 1258bf215546Sopenharmony_ci /* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */ 1259bf215546Sopenharmony_ci memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 3); 1260bf215546Sopenharmony_ci buffers->enabled_mask &= ~(1llu << slot); 1261bf215546Sopenharmony_ci } 1262bf215546Sopenharmony_ci 1263bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << descriptors_idx; 1264bf215546Sopenharmony_ci} 1265bf215546Sopenharmony_ci 1266bf215546Sopenharmony_civoid si_get_inline_uniform_state(union si_shader_key *key, enum pipe_shader_type shader, 1267bf215546Sopenharmony_ci bool *inline_uniforms, uint32_t **inlined_values) 1268bf215546Sopenharmony_ci{ 1269bf215546Sopenharmony_ci if (shader == PIPE_SHADER_FRAGMENT) { 1270bf215546Sopenharmony_ci *inline_uniforms = key->ps.opt.inline_uniforms; 1271bf215546Sopenharmony_ci *inlined_values = key->ps.opt.inlined_uniform_values; 1272bf215546Sopenharmony_ci } else { 1273bf215546Sopenharmony_ci *inline_uniforms = key->ge.opt.inline_uniforms; 1274bf215546Sopenharmony_ci *inlined_values = key->ge.opt.inlined_uniform_values; 1275bf215546Sopenharmony_ci } 1276bf215546Sopenharmony_ci} 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_civoid si_invalidate_inlinable_uniforms(struct si_context *sctx, enum pipe_shader_type shader) 1279bf215546Sopenharmony_ci{ 1280bf215546Sopenharmony_ci if (shader == PIPE_SHADER_COMPUTE) 1281bf215546Sopenharmony_ci return; 1282bf215546Sopenharmony_ci 1283bf215546Sopenharmony_ci bool inline_uniforms; 1284bf215546Sopenharmony_ci uint32_t *inlined_values; 1285bf215546Sopenharmony_ci si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values); 1286bf215546Sopenharmony_ci 1287bf215546Sopenharmony_ci if (inline_uniforms) { 1288bf215546Sopenharmony_ci if (shader == PIPE_SHADER_FRAGMENT) 1289bf215546Sopenharmony_ci sctx->shaders[shader].key.ps.opt.inline_uniforms = false; 1290bf215546Sopenharmony_ci else 1291bf215546Sopenharmony_ci sctx->shaders[shader].key.ge.opt.inline_uniforms = false; 1292bf215546Sopenharmony_ci 1293bf215546Sopenharmony_ci memset(inlined_values, 0, MAX_INLINABLE_UNIFORMS * 4); 1294bf215546Sopenharmony_ci sctx->do_update_shaders = true; 1295bf215546Sopenharmony_ci } 1296bf215546Sopenharmony_ci} 1297bf215546Sopenharmony_ci 1298bf215546Sopenharmony_cistatic void si_pipe_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type shader, 1299bf215546Sopenharmony_ci uint slot, bool take_ownership, 1300bf215546Sopenharmony_ci const struct pipe_constant_buffer *input) 1301bf215546Sopenharmony_ci{ 1302bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci if (shader >= SI_NUM_SHADERS) 1305bf215546Sopenharmony_ci return; 1306bf215546Sopenharmony_ci 1307bf215546Sopenharmony_ci if (input) { 1308bf215546Sopenharmony_ci if (input->buffer) { 1309bf215546Sopenharmony_ci if (slot == 0 && 1310bf215546Sopenharmony_ci !(si_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) { 1311bf215546Sopenharmony_ci assert(!"constant buffer 0 must have a 32-bit VM address, use const_uploader"); 1312bf215546Sopenharmony_ci return; 1313bf215546Sopenharmony_ci } 1314bf215546Sopenharmony_ci si_resource(input->buffer)->bind_history |= SI_BIND_CONSTANT_BUFFER(shader); 1315bf215546Sopenharmony_ci } 1316bf215546Sopenharmony_ci 1317bf215546Sopenharmony_ci if (slot == 0) 1318bf215546Sopenharmony_ci si_invalidate_inlinable_uniforms(sctx, shader); 1319bf215546Sopenharmony_ci } 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_ci slot = si_get_constbuf_slot(slot); 1322bf215546Sopenharmony_ci si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader], 1323bf215546Sopenharmony_ci si_const_and_shader_buffer_descriptors_idx(shader), slot, 1324bf215546Sopenharmony_ci take_ownership, input); 1325bf215546Sopenharmony_ci} 1326bf215546Sopenharmony_ci 1327bf215546Sopenharmony_cistatic void si_set_inlinable_constants(struct pipe_context *ctx, 1328bf215546Sopenharmony_ci enum pipe_shader_type shader, 1329bf215546Sopenharmony_ci uint num_values, uint32_t *values) 1330bf215546Sopenharmony_ci{ 1331bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1332bf215546Sopenharmony_ci 1333bf215546Sopenharmony_ci if (shader == PIPE_SHADER_COMPUTE) 1334bf215546Sopenharmony_ci return; 1335bf215546Sopenharmony_ci 1336bf215546Sopenharmony_ci bool inline_uniforms; 1337bf215546Sopenharmony_ci uint32_t *inlined_values; 1338bf215546Sopenharmony_ci si_get_inline_uniform_state(&sctx->shaders[shader].key, shader, &inline_uniforms, &inlined_values); 1339bf215546Sopenharmony_ci 1340bf215546Sopenharmony_ci if (!inline_uniforms) { 1341bf215546Sopenharmony_ci /* It's the first time we set the constants. Always update shaders. */ 1342bf215546Sopenharmony_ci if (shader == PIPE_SHADER_FRAGMENT) 1343bf215546Sopenharmony_ci sctx->shaders[shader].key.ps.opt.inline_uniforms = true; 1344bf215546Sopenharmony_ci else 1345bf215546Sopenharmony_ci sctx->shaders[shader].key.ge.opt.inline_uniforms = true; 1346bf215546Sopenharmony_ci 1347bf215546Sopenharmony_ci memcpy(inlined_values, values, num_values * 4); 1348bf215546Sopenharmony_ci sctx->do_update_shaders = true; 1349bf215546Sopenharmony_ci return; 1350bf215546Sopenharmony_ci } 1351bf215546Sopenharmony_ci 1352bf215546Sopenharmony_ci /* We have already set inlinable constants for this shader. Update the shader only if 1353bf215546Sopenharmony_ci * the constants are being changed so as not to update shaders needlessly. 1354bf215546Sopenharmony_ci */ 1355bf215546Sopenharmony_ci if (memcmp(inlined_values, values, num_values * 4)) { 1356bf215546Sopenharmony_ci memcpy(inlined_values, values, num_values * 4); 1357bf215546Sopenharmony_ci sctx->do_update_shaders = true; 1358bf215546Sopenharmony_ci } 1359bf215546Sopenharmony_ci} 1360bf215546Sopenharmony_ci 1361bf215546Sopenharmony_civoid si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot, 1362bf215546Sopenharmony_ci struct pipe_constant_buffer *cbuf) 1363bf215546Sopenharmony_ci{ 1364bf215546Sopenharmony_ci cbuf->user_buffer = NULL; 1365bf215546Sopenharmony_ci si_get_buffer_from_descriptors( 1366bf215546Sopenharmony_ci &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors(sctx, shader), 1367bf215546Sopenharmony_ci si_get_constbuf_slot(slot), &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size); 1368bf215546Sopenharmony_ci} 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci/* SHADER BUFFERS */ 1371bf215546Sopenharmony_ci 1372bf215546Sopenharmony_cistatic void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resources *buffers, 1373bf215546Sopenharmony_ci unsigned descriptors_idx, uint slot, 1374bf215546Sopenharmony_ci const struct pipe_shader_buffer *sbuffer, bool writable, 1375bf215546Sopenharmony_ci unsigned priority) 1376bf215546Sopenharmony_ci{ 1377bf215546Sopenharmony_ci struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1378bf215546Sopenharmony_ci uint32_t *desc = descs->list + slot * 4; 1379bf215546Sopenharmony_ci 1380bf215546Sopenharmony_ci if (!sbuffer || !sbuffer->buffer) { 1381bf215546Sopenharmony_ci pipe_resource_reference(&buffers->buffers[slot], NULL); 1382bf215546Sopenharmony_ci /* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */ 1383bf215546Sopenharmony_ci memset(desc, 0, sizeof(uint32_t) * 3); 1384bf215546Sopenharmony_ci buffers->enabled_mask &= ~(1llu << slot); 1385bf215546Sopenharmony_ci buffers->writable_mask &= ~(1llu << slot); 1386bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << descriptors_idx; 1387bf215546Sopenharmony_ci return; 1388bf215546Sopenharmony_ci } 1389bf215546Sopenharmony_ci 1390bf215546Sopenharmony_ci struct si_resource *buf = si_resource(sbuffer->buffer); 1391bf215546Sopenharmony_ci uint64_t va = buf->gpu_address + sbuffer->buffer_offset; 1392bf215546Sopenharmony_ci 1393bf215546Sopenharmony_ci desc[0] = va; 1394bf215546Sopenharmony_ci desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0); 1395bf215546Sopenharmony_ci desc[2] = sbuffer->buffer_size; 1396bf215546Sopenharmony_ci 1397bf215546Sopenharmony_ci pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); 1398bf215546Sopenharmony_ci buffers->offsets[slot] = sbuffer->buffer_offset; 1399bf215546Sopenharmony_ci radeon_add_to_gfx_buffer_list_check_mem( 1400bf215546Sopenharmony_ci sctx, buf, (writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | priority, true); 1401bf215546Sopenharmony_ci if (writable) 1402bf215546Sopenharmony_ci buffers->writable_mask |= 1llu << slot; 1403bf215546Sopenharmony_ci else 1404bf215546Sopenharmony_ci buffers->writable_mask &= ~(1llu << slot); 1405bf215546Sopenharmony_ci 1406bf215546Sopenharmony_ci buffers->enabled_mask |= 1llu << slot; 1407bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1lu << descriptors_idx; 1408bf215546Sopenharmony_ci 1409bf215546Sopenharmony_ci util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset, 1410bf215546Sopenharmony_ci sbuffer->buffer_offset + sbuffer->buffer_size); 1411bf215546Sopenharmony_ci} 1412bf215546Sopenharmony_ci 1413bf215546Sopenharmony_civoid si_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader, 1414bf215546Sopenharmony_ci unsigned start_slot, unsigned count, 1415bf215546Sopenharmony_ci const struct pipe_shader_buffer *sbuffers, 1416bf215546Sopenharmony_ci unsigned writable_bitmask, bool internal_blit) 1417bf215546Sopenharmony_ci{ 1418bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1419bf215546Sopenharmony_ci struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader]; 1420bf215546Sopenharmony_ci unsigned descriptors_idx = si_const_and_shader_buffer_descriptors_idx(shader); 1421bf215546Sopenharmony_ci unsigned i; 1422bf215546Sopenharmony_ci 1423bf215546Sopenharmony_ci assert(start_slot + count <= SI_NUM_SHADER_BUFFERS); 1424bf215546Sopenharmony_ci 1425bf215546Sopenharmony_ci if (shader == PIPE_SHADER_COMPUTE && 1426bf215546Sopenharmony_ci sctx->cs_shader_state.program && 1427bf215546Sopenharmony_ci start_slot < sctx->cs_shader_state.program->sel.cs_num_shaderbufs_in_user_sgprs) 1428bf215546Sopenharmony_ci sctx->compute_shaderbuf_sgprs_dirty = true; 1429bf215546Sopenharmony_ci 1430bf215546Sopenharmony_ci for (i = 0; i < count; ++i) { 1431bf215546Sopenharmony_ci const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL; 1432bf215546Sopenharmony_ci unsigned slot = si_get_shaderbuf_slot(start_slot + i); 1433bf215546Sopenharmony_ci 1434bf215546Sopenharmony_ci /* Don't track bind history for internal blits, such as clear_buffer and copy_buffer 1435bf215546Sopenharmony_ci * to prevent unnecessary synchronization before compute blits later. 1436bf215546Sopenharmony_ci */ 1437bf215546Sopenharmony_ci if (!internal_blit && sbuffer && sbuffer->buffer) 1438bf215546Sopenharmony_ci si_resource(sbuffer->buffer)->bind_history |= SI_BIND_SHADER_BUFFER(shader); 1439bf215546Sopenharmony_ci 1440bf215546Sopenharmony_ci si_set_shader_buffer(sctx, buffers, descriptors_idx, slot, sbuffer, 1441bf215546Sopenharmony_ci !!(writable_bitmask & (1u << i)), buffers->priority); 1442bf215546Sopenharmony_ci } 1443bf215546Sopenharmony_ci} 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_cistatic void si_pipe_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader, 1446bf215546Sopenharmony_ci unsigned start_slot, unsigned count, 1447bf215546Sopenharmony_ci const struct pipe_shader_buffer *sbuffers, 1448bf215546Sopenharmony_ci unsigned writable_bitmask) 1449bf215546Sopenharmony_ci{ 1450bf215546Sopenharmony_ci si_set_shader_buffers(ctx, shader, start_slot, count, sbuffers, writable_bitmask, false); 1451bf215546Sopenharmony_ci} 1452bf215546Sopenharmony_ci 1453bf215546Sopenharmony_civoid si_get_shader_buffers(struct si_context *sctx, enum pipe_shader_type shader, uint start_slot, 1454bf215546Sopenharmony_ci uint count, struct pipe_shader_buffer *sbuf) 1455bf215546Sopenharmony_ci{ 1456bf215546Sopenharmony_ci struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader]; 1457bf215546Sopenharmony_ci struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader); 1458bf215546Sopenharmony_ci 1459bf215546Sopenharmony_ci for (unsigned i = 0; i < count; ++i) { 1460bf215546Sopenharmony_ci si_get_buffer_from_descriptors(buffers, descs, si_get_shaderbuf_slot(start_slot + i), 1461bf215546Sopenharmony_ci &sbuf[i].buffer, &sbuf[i].buffer_offset, &sbuf[i].buffer_size); 1462bf215546Sopenharmony_ci } 1463bf215546Sopenharmony_ci} 1464bf215546Sopenharmony_ci 1465bf215546Sopenharmony_ci/* RING BUFFERS */ 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_civoid si_set_internal_const_buffer(struct si_context *sctx, uint slot, 1468bf215546Sopenharmony_ci const struct pipe_constant_buffer *input) 1469bf215546Sopenharmony_ci{ 1470bf215546Sopenharmony_ci si_set_constant_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, false, input); 1471bf215546Sopenharmony_ci} 1472bf215546Sopenharmony_ci 1473bf215546Sopenharmony_civoid si_set_internal_shader_buffer(struct si_context *sctx, uint slot, 1474bf215546Sopenharmony_ci const struct pipe_shader_buffer *sbuffer) 1475bf215546Sopenharmony_ci{ 1476bf215546Sopenharmony_ci si_set_shader_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, sbuffer, true, 1477bf215546Sopenharmony_ci RADEON_PRIO_SHADER_RW_BUFFER); 1478bf215546Sopenharmony_ci} 1479bf215546Sopenharmony_ci 1480bf215546Sopenharmony_civoid si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource *buffer, 1481bf215546Sopenharmony_ci unsigned stride, unsigned num_records, bool add_tid, bool swizzle, 1482bf215546Sopenharmony_ci unsigned element_size, unsigned index_stride, uint64_t offset) 1483bf215546Sopenharmony_ci{ 1484bf215546Sopenharmony_ci struct si_buffer_resources *buffers = &sctx->internal_bindings; 1485bf215546Sopenharmony_ci struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL]; 1486bf215546Sopenharmony_ci 1487bf215546Sopenharmony_ci /* The stride field in the resource descriptor has 14 bits */ 1488bf215546Sopenharmony_ci assert(stride < (1 << 14)); 1489bf215546Sopenharmony_ci 1490bf215546Sopenharmony_ci assert(slot < descs->num_elements); 1491bf215546Sopenharmony_ci pipe_resource_reference(&buffers->buffers[slot], NULL); 1492bf215546Sopenharmony_ci 1493bf215546Sopenharmony_ci if (buffer) { 1494bf215546Sopenharmony_ci uint64_t va; 1495bf215546Sopenharmony_ci 1496bf215546Sopenharmony_ci va = si_resource(buffer)->gpu_address + offset; 1497bf215546Sopenharmony_ci 1498bf215546Sopenharmony_ci switch (element_size) { 1499bf215546Sopenharmony_ci default: 1500bf215546Sopenharmony_ci assert(!"Unsupported ring buffer element size"); 1501bf215546Sopenharmony_ci case 0: 1502bf215546Sopenharmony_ci case 2: 1503bf215546Sopenharmony_ci element_size = 0; 1504bf215546Sopenharmony_ci break; 1505bf215546Sopenharmony_ci case 4: 1506bf215546Sopenharmony_ci element_size = 1; 1507bf215546Sopenharmony_ci break; 1508bf215546Sopenharmony_ci case 8: 1509bf215546Sopenharmony_ci element_size = 2; 1510bf215546Sopenharmony_ci break; 1511bf215546Sopenharmony_ci case 16: 1512bf215546Sopenharmony_ci element_size = 3; 1513bf215546Sopenharmony_ci break; 1514bf215546Sopenharmony_ci } 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ci switch (index_stride) { 1517bf215546Sopenharmony_ci default: 1518bf215546Sopenharmony_ci assert(!"Unsupported ring buffer index stride"); 1519bf215546Sopenharmony_ci case 0: 1520bf215546Sopenharmony_ci case 8: 1521bf215546Sopenharmony_ci index_stride = 0; 1522bf215546Sopenharmony_ci break; 1523bf215546Sopenharmony_ci case 16: 1524bf215546Sopenharmony_ci index_stride = 1; 1525bf215546Sopenharmony_ci break; 1526bf215546Sopenharmony_ci case 32: 1527bf215546Sopenharmony_ci index_stride = 2; 1528bf215546Sopenharmony_ci break; 1529bf215546Sopenharmony_ci case 64: 1530bf215546Sopenharmony_ci index_stride = 3; 1531bf215546Sopenharmony_ci break; 1532bf215546Sopenharmony_ci } 1533bf215546Sopenharmony_ci 1534bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX8 && stride) 1535bf215546Sopenharmony_ci num_records *= stride; 1536bf215546Sopenharmony_ci 1537bf215546Sopenharmony_ci /* Set the descriptor. */ 1538bf215546Sopenharmony_ci uint32_t *desc = descs->list + slot * 4; 1539bf215546Sopenharmony_ci desc[0] = va; 1540bf215546Sopenharmony_ci desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); 1541bf215546Sopenharmony_ci desc[2] = num_records; 1542bf215546Sopenharmony_ci desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 1543bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 1544bf215546Sopenharmony_ci S_008F0C_INDEX_STRIDE(index_stride) | S_008F0C_ADD_TID_ENABLE(add_tid); 1545bf215546Sopenharmony_ci 1546bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 1547bf215546Sopenharmony_ci assert(!swizzle || element_size == 1 || element_size == 3); /* 4 or 16 bytes */ 1548bf215546Sopenharmony_ci desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(swizzle ? element_size : 0); 1549bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX9) { 1550bf215546Sopenharmony_ci assert(!swizzle || element_size == 1); /* only 4 bytes on GFX9 */ 1551bf215546Sopenharmony_ci desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(swizzle); 1552bf215546Sopenharmony_ci } else { 1553bf215546Sopenharmony_ci desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(swizzle); 1554bf215546Sopenharmony_ci desc[3] |= S_008F0C_ELEMENT_SIZE(element_size); 1555bf215546Sopenharmony_ci } 1556bf215546Sopenharmony_ci 1557bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 1558bf215546Sopenharmony_ci desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 1559bf215546Sopenharmony_ci S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); 1560bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX10) { 1561bf215546Sopenharmony_ci desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 1562bf215546Sopenharmony_ci S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); 1563bf215546Sopenharmony_ci } else { 1564bf215546Sopenharmony_ci desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 1565bf215546Sopenharmony_ci S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 1566bf215546Sopenharmony_ci } 1567bf215546Sopenharmony_ci 1568bf215546Sopenharmony_ci pipe_resource_reference(&buffers->buffers[slot], buffer); 1569bf215546Sopenharmony_ci radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), 1570bf215546Sopenharmony_ci RADEON_USAGE_READWRITE | buffers->priority); 1571bf215546Sopenharmony_ci buffers->enabled_mask |= 1llu << slot; 1572bf215546Sopenharmony_ci } else { 1573bf215546Sopenharmony_ci /* Clear the descriptor. */ 1574bf215546Sopenharmony_ci memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4); 1575bf215546Sopenharmony_ci buffers->enabled_mask &= ~(1llu << slot); 1576bf215546Sopenharmony_ci } 1577bf215546Sopenharmony_ci 1578bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL; 1579bf215546Sopenharmony_ci} 1580bf215546Sopenharmony_ci 1581bf215546Sopenharmony_ci/* INTERNAL CONST BUFFERS */ 1582bf215546Sopenharmony_ci 1583bf215546Sopenharmony_cistatic void si_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state) 1584bf215546Sopenharmony_ci{ 1585bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1586bf215546Sopenharmony_ci struct pipe_constant_buffer cb = {}; 1587bf215546Sopenharmony_ci unsigned stipple[32]; 1588bf215546Sopenharmony_ci int i; 1589bf215546Sopenharmony_ci 1590bf215546Sopenharmony_ci for (i = 0; i < 32; i++) 1591bf215546Sopenharmony_ci stipple[i] = util_bitreverse(state->stipple[i]); 1592bf215546Sopenharmony_ci 1593bf215546Sopenharmony_ci cb.user_buffer = stipple; 1594bf215546Sopenharmony_ci cb.buffer_size = sizeof(stipple); 1595bf215546Sopenharmony_ci 1596bf215546Sopenharmony_ci si_set_internal_const_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb); 1597bf215546Sopenharmony_ci} 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_ci/* TEXTURE METADATA ENABLE/DISABLE */ 1600bf215546Sopenharmony_ci 1601bf215546Sopenharmony_cistatic void si_resident_handles_update_needs_color_decompress(struct si_context *sctx) 1602bf215546Sopenharmony_ci{ 1603bf215546Sopenharmony_ci util_dynarray_clear(&sctx->resident_tex_needs_color_decompress); 1604bf215546Sopenharmony_ci util_dynarray_clear(&sctx->resident_img_needs_color_decompress); 1605bf215546Sopenharmony_ci 1606bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 1607bf215546Sopenharmony_ci struct pipe_resource *res = (*tex_handle)->view->texture; 1608bf215546Sopenharmony_ci struct si_texture *tex; 1609bf215546Sopenharmony_ci 1610bf215546Sopenharmony_ci if (!res || res->target == PIPE_BUFFER) 1611bf215546Sopenharmony_ci continue; 1612bf215546Sopenharmony_ci 1613bf215546Sopenharmony_ci tex = (struct si_texture *)res; 1614bf215546Sopenharmony_ci if (!color_needs_decompression(tex)) 1615bf215546Sopenharmony_ci continue; 1616bf215546Sopenharmony_ci 1617bf215546Sopenharmony_ci util_dynarray_append(&sctx->resident_tex_needs_color_decompress, struct si_texture_handle *, 1618bf215546Sopenharmony_ci *tex_handle); 1619bf215546Sopenharmony_ci } 1620bf215546Sopenharmony_ci 1621bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 1622bf215546Sopenharmony_ci struct pipe_image_view *view = &(*img_handle)->view; 1623bf215546Sopenharmony_ci struct pipe_resource *res = view->resource; 1624bf215546Sopenharmony_ci struct si_texture *tex; 1625bf215546Sopenharmony_ci 1626bf215546Sopenharmony_ci if (!res || res->target == PIPE_BUFFER) 1627bf215546Sopenharmony_ci continue; 1628bf215546Sopenharmony_ci 1629bf215546Sopenharmony_ci tex = (struct si_texture *)res; 1630bf215546Sopenharmony_ci if (!color_needs_decompression(tex)) 1631bf215546Sopenharmony_ci continue; 1632bf215546Sopenharmony_ci 1633bf215546Sopenharmony_ci util_dynarray_append(&sctx->resident_img_needs_color_decompress, struct si_image_handle *, 1634bf215546Sopenharmony_ci *img_handle); 1635bf215546Sopenharmony_ci } 1636bf215546Sopenharmony_ci} 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_ci/* CMASK can be enabled (for fast clear) and disabled (for texture export) 1639bf215546Sopenharmony_ci * while the texture is bound, possibly by a different context. In that case, 1640bf215546Sopenharmony_ci * call this function to update needs_*_decompress_masks. 1641bf215546Sopenharmony_ci */ 1642bf215546Sopenharmony_civoid si_update_needs_color_decompress_masks(struct si_context *sctx) 1643bf215546Sopenharmony_ci{ 1644bf215546Sopenharmony_ci for (int i = 0; i < SI_NUM_SHADERS; ++i) { 1645bf215546Sopenharmony_ci si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]); 1646bf215546Sopenharmony_ci si_images_update_needs_color_decompress_mask(&sctx->images[i]); 1647bf215546Sopenharmony_ci si_update_shader_needs_decompress_mask(sctx, i); 1648bf215546Sopenharmony_ci } 1649bf215546Sopenharmony_ci 1650bf215546Sopenharmony_ci si_resident_handles_update_needs_color_decompress(sctx); 1651bf215546Sopenharmony_ci} 1652bf215546Sopenharmony_ci 1653bf215546Sopenharmony_ci/* BUFFER DISCARD/INVALIDATION */ 1654bf215546Sopenharmony_ci 1655bf215546Sopenharmony_ci/* Reset descriptors of buffer resources after \p buf has been invalidated. 1656bf215546Sopenharmony_ci * If buf == NULL, reset all descriptors. 1657bf215546Sopenharmony_ci */ 1658bf215546Sopenharmony_cistatic bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, 1659bf215546Sopenharmony_ci unsigned descriptors_idx, uint64_t slot_mask, 1660bf215546Sopenharmony_ci struct pipe_resource *buf, unsigned priority) 1661bf215546Sopenharmony_ci{ 1662bf215546Sopenharmony_ci struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; 1663bf215546Sopenharmony_ci bool noop = true; 1664bf215546Sopenharmony_ci uint64_t mask = buffers->enabled_mask & slot_mask; 1665bf215546Sopenharmony_ci 1666bf215546Sopenharmony_ci while (mask) { 1667bf215546Sopenharmony_ci unsigned i = u_bit_scan64(&mask); 1668bf215546Sopenharmony_ci struct pipe_resource *buffer = buffers->buffers[i]; 1669bf215546Sopenharmony_ci 1670bf215546Sopenharmony_ci if (buffer && (!buf || buffer == buf)) { 1671bf215546Sopenharmony_ci si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4); 1672bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << descriptors_idx; 1673bf215546Sopenharmony_ci 1674bf215546Sopenharmony_ci radeon_add_to_gfx_buffer_list_check_mem( 1675bf215546Sopenharmony_ci sctx, si_resource(buffer), 1676bf215546Sopenharmony_ci (buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ) | 1677bf215546Sopenharmony_ci priority, true); 1678bf215546Sopenharmony_ci noop = false; 1679bf215546Sopenharmony_ci } 1680bf215546Sopenharmony_ci } 1681bf215546Sopenharmony_ci return !noop; 1682bf215546Sopenharmony_ci} 1683bf215546Sopenharmony_ci 1684bf215546Sopenharmony_ci/* Update all buffer bindings where the buffer is bound, including 1685bf215546Sopenharmony_ci * all resource descriptors. This is invalidate_buffer without 1686bf215546Sopenharmony_ci * the invalidation. 1687bf215546Sopenharmony_ci * 1688bf215546Sopenharmony_ci * If buf == NULL, update all buffer bindings. 1689bf215546Sopenharmony_ci */ 1690bf215546Sopenharmony_civoid si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) 1691bf215546Sopenharmony_ci{ 1692bf215546Sopenharmony_ci struct si_resource *buffer = si_resource(buf); 1693bf215546Sopenharmony_ci unsigned i; 1694bf215546Sopenharmony_ci unsigned num_elems = sctx->num_vertex_elements; 1695bf215546Sopenharmony_ci 1696bf215546Sopenharmony_ci /* We changed the buffer, now we need to bind it where the old one 1697bf215546Sopenharmony_ci * was bound. This consists of 2 things: 1698bf215546Sopenharmony_ci * 1) Updating the resource descriptor and dirtying it. 1699bf215546Sopenharmony_ci * 2) Adding a relocation to the CS, so that it's usable. 1700bf215546Sopenharmony_ci */ 1701bf215546Sopenharmony_ci 1702bf215546Sopenharmony_ci /* Vertex buffers. */ 1703bf215546Sopenharmony_ci if (!buffer) { 1704bf215546Sopenharmony_ci sctx->vertex_buffers_dirty = num_elems > 0; 1705bf215546Sopenharmony_ci } else if (buffer->bind_history & SI_BIND_VERTEX_BUFFER) { 1706bf215546Sopenharmony_ci for (i = 0; i < num_elems; i++) { 1707bf215546Sopenharmony_ci int vb = sctx->vertex_elements->vertex_buffer_index[i]; 1708bf215546Sopenharmony_ci 1709bf215546Sopenharmony_ci if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) 1710bf215546Sopenharmony_ci continue; 1711bf215546Sopenharmony_ci if (!sctx->vertex_buffer[vb].buffer.resource) 1712bf215546Sopenharmony_ci continue; 1713bf215546Sopenharmony_ci 1714bf215546Sopenharmony_ci if (sctx->vertex_buffer[vb].buffer.resource == buf) { 1715bf215546Sopenharmony_ci sctx->vertex_buffers_dirty = num_elems > 0; 1716bf215546Sopenharmony_ci break; 1717bf215546Sopenharmony_ci } 1718bf215546Sopenharmony_ci } 1719bf215546Sopenharmony_ci } 1720bf215546Sopenharmony_ci 1721bf215546Sopenharmony_ci /* Streamout buffers. (other internal buffers can't be invalidated) */ 1722bf215546Sopenharmony_ci if (!buffer || buffer->bind_history & SI_BIND_STREAMOUT_BUFFER) { 1723bf215546Sopenharmony_ci for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) { 1724bf215546Sopenharmony_ci struct si_buffer_resources *buffers = &sctx->internal_bindings; 1725bf215546Sopenharmony_ci struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL]; 1726bf215546Sopenharmony_ci struct pipe_resource *buffer = buffers->buffers[i]; 1727bf215546Sopenharmony_ci 1728bf215546Sopenharmony_ci if (!buffer || (buf && buffer != buf)) 1729bf215546Sopenharmony_ci continue; 1730bf215546Sopenharmony_ci 1731bf215546Sopenharmony_ci si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4); 1732bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL; 1733bf215546Sopenharmony_ci 1734bf215546Sopenharmony_ci radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_WRITE | 1735bf215546Sopenharmony_ci RADEON_PRIO_SHADER_RW_BUFFER, true); 1736bf215546Sopenharmony_ci 1737bf215546Sopenharmony_ci /* Update the streamout state. */ 1738bf215546Sopenharmony_ci if (sctx->streamout.begin_emitted) 1739bf215546Sopenharmony_ci si_emit_streamout_end(sctx); 1740bf215546Sopenharmony_ci sctx->streamout.append_bitmask = sctx->streamout.enabled_mask; 1741bf215546Sopenharmony_ci si_streamout_buffers_dirty(sctx); 1742bf215546Sopenharmony_ci } 1743bf215546Sopenharmony_ci } 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_ci /* Constant and shader buffers. */ 1746bf215546Sopenharmony_ci if (!buffer || buffer->bind_history & SI_BIND_CONSTANT_BUFFER_ALL) { 1747bf215546Sopenharmony_ci unsigned mask = buffer ? (buffer->bind_history & SI_BIND_CONSTANT_BUFFER_ALL) >> 1748bf215546Sopenharmony_ci SI_BIND_CONSTANT_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS); 1749bf215546Sopenharmony_ci u_foreach_bit(shader, mask) { 1750bf215546Sopenharmony_ci si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], 1751bf215546Sopenharmony_ci si_const_and_shader_buffer_descriptors_idx(shader), 1752bf215546Sopenharmony_ci u_bit_consecutive64(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), 1753bf215546Sopenharmony_ci buf, sctx->const_and_shader_buffers[shader].priority_constbuf); 1754bf215546Sopenharmony_ci } 1755bf215546Sopenharmony_ci } 1756bf215546Sopenharmony_ci 1757bf215546Sopenharmony_ci if (!buffer || buffer->bind_history & SI_BIND_SHADER_BUFFER_ALL) { 1758bf215546Sopenharmony_ci unsigned mask = buffer ? (buffer->bind_history & SI_BIND_SHADER_BUFFER_ALL) >> 1759bf215546Sopenharmony_ci SI_BIND_SHADER_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS); 1760bf215546Sopenharmony_ci u_foreach_bit(shader, mask) { 1761bf215546Sopenharmony_ci if (si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], 1762bf215546Sopenharmony_ci si_const_and_shader_buffer_descriptors_idx(shader), 1763bf215546Sopenharmony_ci u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf, 1764bf215546Sopenharmony_ci sctx->const_and_shader_buffers[shader].priority) && 1765bf215546Sopenharmony_ci shader == PIPE_SHADER_COMPUTE) { 1766bf215546Sopenharmony_ci sctx->compute_shaderbuf_sgprs_dirty = true; 1767bf215546Sopenharmony_ci } 1768bf215546Sopenharmony_ci } 1769bf215546Sopenharmony_ci } 1770bf215546Sopenharmony_ci 1771bf215546Sopenharmony_ci if (!buffer || buffer->bind_history & SI_BIND_SAMPLER_BUFFER_ALL) { 1772bf215546Sopenharmony_ci unsigned mask = buffer ? (buffer->bind_history & SI_BIND_SAMPLER_BUFFER_ALL) >> 1773bf215546Sopenharmony_ci SI_BIND_SAMPLER_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS); 1774bf215546Sopenharmony_ci /* Texture buffers - update bindings. */ 1775bf215546Sopenharmony_ci u_foreach_bit(shader, mask) { 1776bf215546Sopenharmony_ci struct si_samplers *samplers = &sctx->samplers[shader]; 1777bf215546Sopenharmony_ci struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); 1778bf215546Sopenharmony_ci unsigned mask = samplers->enabled_mask; 1779bf215546Sopenharmony_ci 1780bf215546Sopenharmony_ci while (mask) { 1781bf215546Sopenharmony_ci unsigned i = u_bit_scan(&mask); 1782bf215546Sopenharmony_ci struct pipe_resource *buffer = samplers->views[i]->texture; 1783bf215546Sopenharmony_ci 1784bf215546Sopenharmony_ci if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) { 1785bf215546Sopenharmony_ci unsigned desc_slot = si_get_sampler_slot(i); 1786bf215546Sopenharmony_ci 1787bf215546Sopenharmony_ci si_set_buf_desc_address(si_resource(buffer), samplers->views[i]->u.buf.offset, 1788bf215546Sopenharmony_ci descs->list + desc_slot * 16 + 4); 1789bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 1790bf215546Sopenharmony_ci 1791bf215546Sopenharmony_ci radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ | 1792bf215546Sopenharmony_ci RADEON_PRIO_SAMPLER_BUFFER, true); 1793bf215546Sopenharmony_ci } 1794bf215546Sopenharmony_ci } 1795bf215546Sopenharmony_ci } 1796bf215546Sopenharmony_ci } 1797bf215546Sopenharmony_ci 1798bf215546Sopenharmony_ci /* Shader images */ 1799bf215546Sopenharmony_ci if (!buffer || buffer->bind_history & SI_BIND_IMAGE_BUFFER_ALL) { 1800bf215546Sopenharmony_ci unsigned mask = buffer ? (buffer->bind_history & SI_BIND_IMAGE_BUFFER_SHIFT) >> 1801bf215546Sopenharmony_ci SI_BIND_IMAGE_BUFFER_SHIFT : BITFIELD_MASK(SI_NUM_SHADERS); 1802bf215546Sopenharmony_ci u_foreach_bit(shader, mask) { 1803bf215546Sopenharmony_ci struct si_images *images = &sctx->images[shader]; 1804bf215546Sopenharmony_ci struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); 1805bf215546Sopenharmony_ci unsigned mask = images->enabled_mask; 1806bf215546Sopenharmony_ci 1807bf215546Sopenharmony_ci while (mask) { 1808bf215546Sopenharmony_ci unsigned i = u_bit_scan(&mask); 1809bf215546Sopenharmony_ci struct pipe_resource *buffer = images->views[i].resource; 1810bf215546Sopenharmony_ci 1811bf215546Sopenharmony_ci if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) { 1812bf215546Sopenharmony_ci unsigned desc_slot = si_get_image_slot(i); 1813bf215546Sopenharmony_ci 1814bf215546Sopenharmony_ci if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE) 1815bf215546Sopenharmony_ci si_mark_image_range_valid(&images->views[i]); 1816bf215546Sopenharmony_ci 1817bf215546Sopenharmony_ci si_set_buf_desc_address(si_resource(buffer), images->views[i].u.buf.offset, 1818bf215546Sopenharmony_ci descs->list + desc_slot * 8 + 4); 1819bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); 1820bf215546Sopenharmony_ci 1821bf215546Sopenharmony_ci radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), 1822bf215546Sopenharmony_ci RADEON_USAGE_READWRITE | 1823bf215546Sopenharmony_ci RADEON_PRIO_SAMPLER_BUFFER, true); 1824bf215546Sopenharmony_ci 1825bf215546Sopenharmony_ci if (shader == PIPE_SHADER_COMPUTE) 1826bf215546Sopenharmony_ci sctx->compute_image_sgprs_dirty = true; 1827bf215546Sopenharmony_ci } 1828bf215546Sopenharmony_ci } 1829bf215546Sopenharmony_ci } 1830bf215546Sopenharmony_ci } 1831bf215546Sopenharmony_ci 1832bf215546Sopenharmony_ci /* Bindless texture handles */ 1833bf215546Sopenharmony_ci if (!buffer || buffer->texture_handle_allocated) { 1834bf215546Sopenharmony_ci struct si_descriptors *descs = &sctx->bindless_descriptors; 1835bf215546Sopenharmony_ci 1836bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 1837bf215546Sopenharmony_ci struct pipe_sampler_view *view = (*tex_handle)->view; 1838bf215546Sopenharmony_ci unsigned desc_slot = (*tex_handle)->desc_slot; 1839bf215546Sopenharmony_ci struct pipe_resource *buffer = view->texture; 1840bf215546Sopenharmony_ci 1841bf215546Sopenharmony_ci if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) { 1842bf215546Sopenharmony_ci si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, 1843bf215546Sopenharmony_ci descs->list + desc_slot * 16 + 4); 1844bf215546Sopenharmony_ci 1845bf215546Sopenharmony_ci (*tex_handle)->desc_dirty = true; 1846bf215546Sopenharmony_ci sctx->bindless_descriptors_dirty = true; 1847bf215546Sopenharmony_ci 1848bf215546Sopenharmony_ci radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ | 1849bf215546Sopenharmony_ci RADEON_PRIO_SAMPLER_BUFFER, true); 1850bf215546Sopenharmony_ci } 1851bf215546Sopenharmony_ci } 1852bf215546Sopenharmony_ci } 1853bf215546Sopenharmony_ci 1854bf215546Sopenharmony_ci /* Bindless image handles */ 1855bf215546Sopenharmony_ci if (!buffer || buffer->image_handle_allocated) { 1856bf215546Sopenharmony_ci struct si_descriptors *descs = &sctx->bindless_descriptors; 1857bf215546Sopenharmony_ci 1858bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 1859bf215546Sopenharmony_ci struct pipe_image_view *view = &(*img_handle)->view; 1860bf215546Sopenharmony_ci unsigned desc_slot = (*img_handle)->desc_slot; 1861bf215546Sopenharmony_ci struct pipe_resource *buffer = view->resource; 1862bf215546Sopenharmony_ci 1863bf215546Sopenharmony_ci if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) { 1864bf215546Sopenharmony_ci if (view->access & PIPE_IMAGE_ACCESS_WRITE) 1865bf215546Sopenharmony_ci si_mark_image_range_valid(view); 1866bf215546Sopenharmony_ci 1867bf215546Sopenharmony_ci si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, 1868bf215546Sopenharmony_ci descs->list + desc_slot * 16 + 4); 1869bf215546Sopenharmony_ci 1870bf215546Sopenharmony_ci (*img_handle)->desc_dirty = true; 1871bf215546Sopenharmony_ci sctx->bindless_descriptors_dirty = true; 1872bf215546Sopenharmony_ci 1873bf215546Sopenharmony_ci radeon_add_to_gfx_buffer_list_check_mem( 1874bf215546Sopenharmony_ci sctx, si_resource(buffer), RADEON_USAGE_READWRITE | RADEON_PRIO_SAMPLER_BUFFER, true); 1875bf215546Sopenharmony_ci } 1876bf215546Sopenharmony_ci } 1877bf215546Sopenharmony_ci } 1878bf215546Sopenharmony_ci 1879bf215546Sopenharmony_ci if (buffer) { 1880bf215546Sopenharmony_ci /* Do the same for other contexts. They will invoke this function 1881bf215546Sopenharmony_ci * with buffer == NULL. 1882bf215546Sopenharmony_ci */ 1883bf215546Sopenharmony_ci unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter); 1884bf215546Sopenharmony_ci 1885bf215546Sopenharmony_ci /* Skip the update for the current context, because we have already updated 1886bf215546Sopenharmony_ci * the buffer bindings. 1887bf215546Sopenharmony_ci */ 1888bf215546Sopenharmony_ci if (new_counter == sctx->last_dirty_buf_counter + 1) 1889bf215546Sopenharmony_ci sctx->last_dirty_buf_counter = new_counter; 1890bf215546Sopenharmony_ci } 1891bf215546Sopenharmony_ci} 1892bf215546Sopenharmony_ci 1893bf215546Sopenharmony_cistatic void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc_slot, 1894bf215546Sopenharmony_ci unsigned num_dwords) 1895bf215546Sopenharmony_ci{ 1896bf215546Sopenharmony_ci struct si_descriptors *desc = &sctx->bindless_descriptors; 1897bf215546Sopenharmony_ci unsigned desc_slot_offset = desc_slot * 16; 1898bf215546Sopenharmony_ci uint32_t *data; 1899bf215546Sopenharmony_ci uint64_t va; 1900bf215546Sopenharmony_ci 1901bf215546Sopenharmony_ci data = desc->list + desc_slot_offset; 1902bf215546Sopenharmony_ci va = desc->gpu_address + desc_slot_offset * 4; 1903bf215546Sopenharmony_ci 1904bf215546Sopenharmony_ci si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, num_dwords * 4, V_370_TC_L2, 1905bf215546Sopenharmony_ci V_370_ME, data); 1906bf215546Sopenharmony_ci} 1907bf215546Sopenharmony_ci 1908bf215546Sopenharmony_cistatic void si_upload_bindless_descriptors(struct si_context *sctx) 1909bf215546Sopenharmony_ci{ 1910bf215546Sopenharmony_ci if (!sctx->bindless_descriptors_dirty) 1911bf215546Sopenharmony_ci return; 1912bf215546Sopenharmony_ci 1913bf215546Sopenharmony_ci /* Wait for graphics/compute to be idle before updating the resident 1914bf215546Sopenharmony_ci * descriptors directly in memory, in case the GPU is using them. 1915bf215546Sopenharmony_ci */ 1916bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; 1917bf215546Sopenharmony_ci sctx->emit_cache_flush(sctx, &sctx->gfx_cs); 1918bf215546Sopenharmony_ci 1919bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 1920bf215546Sopenharmony_ci unsigned desc_slot = (*tex_handle)->desc_slot; 1921bf215546Sopenharmony_ci 1922bf215546Sopenharmony_ci if (!(*tex_handle)->desc_dirty) 1923bf215546Sopenharmony_ci continue; 1924bf215546Sopenharmony_ci 1925bf215546Sopenharmony_ci si_upload_bindless_descriptor(sctx, desc_slot, 16); 1926bf215546Sopenharmony_ci (*tex_handle)->desc_dirty = false; 1927bf215546Sopenharmony_ci } 1928bf215546Sopenharmony_ci 1929bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 1930bf215546Sopenharmony_ci unsigned desc_slot = (*img_handle)->desc_slot; 1931bf215546Sopenharmony_ci 1932bf215546Sopenharmony_ci if (!(*img_handle)->desc_dirty) 1933bf215546Sopenharmony_ci continue; 1934bf215546Sopenharmony_ci 1935bf215546Sopenharmony_ci si_upload_bindless_descriptor(sctx, desc_slot, 8); 1936bf215546Sopenharmony_ci (*img_handle)->desc_dirty = false; 1937bf215546Sopenharmony_ci } 1938bf215546Sopenharmony_ci 1939bf215546Sopenharmony_ci /* Invalidate scalar L0 because the cache doesn't know that L2 changed. */ 1940bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_INV_SCACHE; 1941bf215546Sopenharmony_ci sctx->bindless_descriptors_dirty = false; 1942bf215546Sopenharmony_ci} 1943bf215546Sopenharmony_ci 1944bf215546Sopenharmony_ci/* Update mutable image descriptor fields of all resident textures. */ 1945bf215546Sopenharmony_cistatic void si_update_bindless_texture_descriptor(struct si_context *sctx, 1946bf215546Sopenharmony_ci struct si_texture_handle *tex_handle) 1947bf215546Sopenharmony_ci{ 1948bf215546Sopenharmony_ci struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view; 1949bf215546Sopenharmony_ci struct si_descriptors *desc = &sctx->bindless_descriptors; 1950bf215546Sopenharmony_ci unsigned desc_slot_offset = tex_handle->desc_slot * 16; 1951bf215546Sopenharmony_ci uint32_t desc_list[16]; 1952bf215546Sopenharmony_ci 1953bf215546Sopenharmony_ci if (sview->base.texture->target == PIPE_BUFFER) 1954bf215546Sopenharmony_ci return; 1955bf215546Sopenharmony_ci 1956bf215546Sopenharmony_ci memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list)); 1957bf215546Sopenharmony_ci si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate, desc->list + desc_slot_offset); 1958bf215546Sopenharmony_ci 1959bf215546Sopenharmony_ci if (memcmp(desc_list, desc->list + desc_slot_offset, sizeof(desc_list))) { 1960bf215546Sopenharmony_ci tex_handle->desc_dirty = true; 1961bf215546Sopenharmony_ci sctx->bindless_descriptors_dirty = true; 1962bf215546Sopenharmony_ci } 1963bf215546Sopenharmony_ci} 1964bf215546Sopenharmony_ci 1965bf215546Sopenharmony_cistatic void si_update_bindless_image_descriptor(struct si_context *sctx, 1966bf215546Sopenharmony_ci struct si_image_handle *img_handle) 1967bf215546Sopenharmony_ci{ 1968bf215546Sopenharmony_ci struct si_descriptors *desc = &sctx->bindless_descriptors; 1969bf215546Sopenharmony_ci unsigned desc_slot_offset = img_handle->desc_slot * 16; 1970bf215546Sopenharmony_ci struct pipe_image_view *view = &img_handle->view; 1971bf215546Sopenharmony_ci struct pipe_resource *res = view->resource; 1972bf215546Sopenharmony_ci uint32_t image_desc[16]; 1973bf215546Sopenharmony_ci unsigned desc_size = (res->nr_samples >= 2 ? 16 : 8) * 4; 1974bf215546Sopenharmony_ci 1975bf215546Sopenharmony_ci if (res->target == PIPE_BUFFER) 1976bf215546Sopenharmony_ci return; 1977bf215546Sopenharmony_ci 1978bf215546Sopenharmony_ci memcpy(image_desc, desc->list + desc_slot_offset, desc_size); 1979bf215546Sopenharmony_ci si_set_shader_image_desc(sctx, view, true, desc->list + desc_slot_offset, 1980bf215546Sopenharmony_ci desc->list + desc_slot_offset + 8); 1981bf215546Sopenharmony_ci 1982bf215546Sopenharmony_ci if (memcmp(image_desc, desc->list + desc_slot_offset, desc_size)) { 1983bf215546Sopenharmony_ci img_handle->desc_dirty = true; 1984bf215546Sopenharmony_ci sctx->bindless_descriptors_dirty = true; 1985bf215546Sopenharmony_ci } 1986bf215546Sopenharmony_ci} 1987bf215546Sopenharmony_ci 1988bf215546Sopenharmony_cistatic void si_update_all_resident_texture_descriptors(struct si_context *sctx) 1989bf215546Sopenharmony_ci{ 1990bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 1991bf215546Sopenharmony_ci si_update_bindless_texture_descriptor(sctx, *tex_handle); 1992bf215546Sopenharmony_ci } 1993bf215546Sopenharmony_ci 1994bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 1995bf215546Sopenharmony_ci si_update_bindless_image_descriptor(sctx, *img_handle); 1996bf215546Sopenharmony_ci } 1997bf215546Sopenharmony_ci 1998bf215546Sopenharmony_ci si_upload_bindless_descriptors(sctx); 1999bf215546Sopenharmony_ci} 2000bf215546Sopenharmony_ci 2001bf215546Sopenharmony_ci/* Update mutable image descriptor fields of all bound textures. */ 2002bf215546Sopenharmony_civoid si_update_all_texture_descriptors(struct si_context *sctx) 2003bf215546Sopenharmony_ci{ 2004bf215546Sopenharmony_ci unsigned shader; 2005bf215546Sopenharmony_ci 2006bf215546Sopenharmony_ci for (shader = 0; shader < SI_NUM_SHADERS; shader++) { 2007bf215546Sopenharmony_ci struct si_samplers *samplers = &sctx->samplers[shader]; 2008bf215546Sopenharmony_ci struct si_images *images = &sctx->images[shader]; 2009bf215546Sopenharmony_ci unsigned mask; 2010bf215546Sopenharmony_ci 2011bf215546Sopenharmony_ci /* Images. */ 2012bf215546Sopenharmony_ci mask = images->enabled_mask; 2013bf215546Sopenharmony_ci while (mask) { 2014bf215546Sopenharmony_ci unsigned i = u_bit_scan(&mask); 2015bf215546Sopenharmony_ci struct pipe_image_view *view = &images->views[i]; 2016bf215546Sopenharmony_ci 2017bf215546Sopenharmony_ci if (!view->resource || view->resource->target == PIPE_BUFFER) 2018bf215546Sopenharmony_ci continue; 2019bf215546Sopenharmony_ci 2020bf215546Sopenharmony_ci si_set_shader_image(sctx, shader, i, view, true); 2021bf215546Sopenharmony_ci } 2022bf215546Sopenharmony_ci 2023bf215546Sopenharmony_ci /* Sampler views. */ 2024bf215546Sopenharmony_ci mask = samplers->enabled_mask; 2025bf215546Sopenharmony_ci while (mask) { 2026bf215546Sopenharmony_ci unsigned i = u_bit_scan(&mask); 2027bf215546Sopenharmony_ci struct pipe_sampler_view *view = samplers->views[i]; 2028bf215546Sopenharmony_ci 2029bf215546Sopenharmony_ci if (!view || !view->texture || view->texture->target == PIPE_BUFFER) 2030bf215546Sopenharmony_ci continue; 2031bf215546Sopenharmony_ci 2032bf215546Sopenharmony_ci si_set_sampler_views(sctx, shader, i, 1, 0, false, &samplers->views[i], true); 2033bf215546Sopenharmony_ci } 2034bf215546Sopenharmony_ci 2035bf215546Sopenharmony_ci si_update_shader_needs_decompress_mask(sctx, shader); 2036bf215546Sopenharmony_ci } 2037bf215546Sopenharmony_ci 2038bf215546Sopenharmony_ci si_update_all_resident_texture_descriptors(sctx); 2039bf215546Sopenharmony_ci si_update_ps_colorbuf0_slot(sctx); 2040bf215546Sopenharmony_ci} 2041bf215546Sopenharmony_ci 2042bf215546Sopenharmony_ci/* SHADER USER DATA */ 2043bf215546Sopenharmony_ci 2044bf215546Sopenharmony_cistatic void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shader) 2045bf215546Sopenharmony_ci{ 2046bf215546Sopenharmony_ci sctx->shader_pointers_dirty |= 2047bf215546Sopenharmony_ci u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, SI_NUM_SHADER_DESCS); 2048bf215546Sopenharmony_ci 2049bf215546Sopenharmony_ci if (shader == PIPE_SHADER_VERTEX) { 2050bf215546Sopenharmony_ci unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen); 2051bf215546Sopenharmony_ci 2052bf215546Sopenharmony_ci sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL && 2053bf215546Sopenharmony_ci sctx->num_vertex_elements > 2054bf215546Sopenharmony_ci num_vbos_in_user_sgprs; 2055bf215546Sopenharmony_ci sctx->vertex_buffer_user_sgprs_dirty = 2056bf215546Sopenharmony_ci sctx->num_vertex_elements > 0 && num_vbos_in_user_sgprs; 2057bf215546Sopenharmony_ci } 2058bf215546Sopenharmony_ci 2059bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 2060bf215546Sopenharmony_ci} 2061bf215546Sopenharmony_ci 2062bf215546Sopenharmony_civoid si_shader_pointers_mark_dirty(struct si_context *sctx) 2063bf215546Sopenharmony_ci{ 2064bf215546Sopenharmony_ci unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen); 2065bf215546Sopenharmony_ci 2066bf215546Sopenharmony_ci sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS); 2067bf215546Sopenharmony_ci sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL && 2068bf215546Sopenharmony_ci sctx->num_vertex_elements > 2069bf215546Sopenharmony_ci num_vbos_in_user_sgprs; 2070bf215546Sopenharmony_ci sctx->vertex_buffer_user_sgprs_dirty = 2071bf215546Sopenharmony_ci sctx->num_vertex_elements > 0 && num_vbos_in_user_sgprs; 2072bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 2073bf215546Sopenharmony_ci sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL; 2074bf215546Sopenharmony_ci sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL; 2075bf215546Sopenharmony_ci sctx->compute_shaderbuf_sgprs_dirty = true; 2076bf215546Sopenharmony_ci sctx->compute_image_sgprs_dirty = true; 2077bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) 2078bf215546Sopenharmony_ci sctx->gs_attribute_ring_pointer_dirty = true; 2079bf215546Sopenharmony_ci} 2080bf215546Sopenharmony_ci 2081bf215546Sopenharmony_ci/* Set a base register address for user data constants in the given shader. 2082bf215546Sopenharmony_ci * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*. 2083bf215546Sopenharmony_ci */ 2084bf215546Sopenharmony_cistatic void si_set_user_data_base(struct si_context *sctx, unsigned shader, uint32_t new_base) 2085bf215546Sopenharmony_ci{ 2086bf215546Sopenharmony_ci uint32_t *base = &sctx->shader_pointers.sh_base[shader]; 2087bf215546Sopenharmony_ci 2088bf215546Sopenharmony_ci if (*base != new_base) { 2089bf215546Sopenharmony_ci *base = new_base; 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_ci if (new_base) 2092bf215546Sopenharmony_ci si_mark_shader_pointers_dirty(sctx, shader); 2093bf215546Sopenharmony_ci 2094bf215546Sopenharmony_ci /* Any change in enabled shader stages requires re-emitting 2095bf215546Sopenharmony_ci * the VS state SGPR, because it contains the clamp_vertex_color 2096bf215546Sopenharmony_ci * state, which can be done in VS, TES, and GS. 2097bf215546Sopenharmony_ci */ 2098bf215546Sopenharmony_ci sctx->last_vs_state = ~0; 2099bf215546Sopenharmony_ci sctx->last_gs_state = ~0; 2100bf215546Sopenharmony_ci } 2101bf215546Sopenharmony_ci} 2102bf215546Sopenharmony_ci 2103bf215546Sopenharmony_ci/* This must be called when these are changed between enabled and disabled 2104bf215546Sopenharmony_ci * - geometry shader 2105bf215546Sopenharmony_ci * - tessellation evaluation shader 2106bf215546Sopenharmony_ci * - NGG 2107bf215546Sopenharmony_ci */ 2108bf215546Sopenharmony_civoid si_shader_change_notify(struct si_context *sctx) 2109bf215546Sopenharmony_ci{ 2110bf215546Sopenharmony_ci si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 2111bf215546Sopenharmony_ci si_get_user_data_base(sctx->gfx_level, 2112bf215546Sopenharmony_ci sctx->shader.tes.cso ? TESS_ON : TESS_OFF, 2113bf215546Sopenharmony_ci sctx->shader.gs.cso ? GS_ON : GS_OFF, 2114bf215546Sopenharmony_ci sctx->ngg ? NGG_ON : NGG_OFF, 2115bf215546Sopenharmony_ci PIPE_SHADER_VERTEX)); 2116bf215546Sopenharmony_ci 2117bf215546Sopenharmony_ci si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 2118bf215546Sopenharmony_ci si_get_user_data_base(sctx->gfx_level, 2119bf215546Sopenharmony_ci sctx->shader.tes.cso ? TESS_ON : TESS_OFF, 2120bf215546Sopenharmony_ci sctx->shader.gs.cso ? GS_ON : GS_OFF, 2121bf215546Sopenharmony_ci sctx->ngg ? NGG_ON : NGG_OFF, 2122bf215546Sopenharmony_ci PIPE_SHADER_TESS_EVAL)); 2123bf215546Sopenharmony_ci 2124bf215546Sopenharmony_ci /* Update as_* flags in shader keys. Ignore disabled shader stages. 2125bf215546Sopenharmony_ci * as_ls = VS before TCS 2126bf215546Sopenharmony_ci * as_es = VS before GS or TES before GS 2127bf215546Sopenharmony_ci * as_ngg = NGG enabled for the last geometry stage. 2128bf215546Sopenharmony_ci * If GS sets as_ngg, the previous stage must set as_ngg too. 2129bf215546Sopenharmony_ci */ 2130bf215546Sopenharmony_ci if (sctx->shader.tes.cso) { 2131bf215546Sopenharmony_ci sctx->shader.vs.key.ge.as_ls = 1; 2132bf215546Sopenharmony_ci sctx->shader.vs.key.ge.as_es = 0; 2133bf215546Sopenharmony_ci sctx->shader.vs.key.ge.as_ngg = 0; 2134bf215546Sopenharmony_ci 2135bf215546Sopenharmony_ci if (sctx->shader.gs.cso) { 2136bf215546Sopenharmony_ci sctx->shader.tes.key.ge.as_es = 1; 2137bf215546Sopenharmony_ci sctx->shader.tes.key.ge.as_ngg = sctx->ngg; 2138bf215546Sopenharmony_ci sctx->shader.gs.key.ge.as_ngg = sctx->ngg; 2139bf215546Sopenharmony_ci } else { 2140bf215546Sopenharmony_ci sctx->shader.tes.key.ge.as_es = 0; 2141bf215546Sopenharmony_ci sctx->shader.tes.key.ge.as_ngg = sctx->ngg; 2142bf215546Sopenharmony_ci } 2143bf215546Sopenharmony_ci } else if (sctx->shader.gs.cso) { 2144bf215546Sopenharmony_ci sctx->shader.vs.key.ge.as_ls = 0; 2145bf215546Sopenharmony_ci sctx->shader.vs.key.ge.as_es = 1; 2146bf215546Sopenharmony_ci sctx->shader.vs.key.ge.as_ngg = sctx->ngg; 2147bf215546Sopenharmony_ci sctx->shader.gs.key.ge.as_ngg = sctx->ngg; 2148bf215546Sopenharmony_ci } else { 2149bf215546Sopenharmony_ci sctx->shader.vs.key.ge.as_ls = 0; 2150bf215546Sopenharmony_ci sctx->shader.vs.key.ge.as_es = 0; 2151bf215546Sopenharmony_ci sctx->shader.vs.key.ge.as_ngg = sctx->ngg; 2152bf215546Sopenharmony_ci } 2153bf215546Sopenharmony_ci} 2154bf215546Sopenharmony_ci 2155bf215546Sopenharmony_ci#define si_emit_consecutive_shader_pointers(sctx, pointer_mask, sh_base) do { \ 2156bf215546Sopenharmony_ci unsigned sh_reg_base = (sh_base); \ 2157bf215546Sopenharmony_ci if (sh_reg_base) { \ 2158bf215546Sopenharmony_ci unsigned mask = sctx->shader_pointers_dirty & (pointer_mask); \ 2159bf215546Sopenharmony_ci \ 2160bf215546Sopenharmony_ci while (mask) { \ 2161bf215546Sopenharmony_ci int start, count; \ 2162bf215546Sopenharmony_ci u_bit_scan_consecutive_range(&mask, &start, &count); \ 2163bf215546Sopenharmony_ci \ 2164bf215546Sopenharmony_ci struct si_descriptors *descs = &sctx->descriptors[start]; \ 2165bf215546Sopenharmony_ci unsigned sh_offset = sh_reg_base + descs->shader_userdata_offset; \ 2166bf215546Sopenharmony_ci \ 2167bf215546Sopenharmony_ci radeon_set_sh_reg_seq(sh_offset, count); \ 2168bf215546Sopenharmony_ci for (int i = 0; i < count; i++) \ 2169bf215546Sopenharmony_ci radeon_emit_32bit_pointer(sctx->screen, descs[i].gpu_address); \ 2170bf215546Sopenharmony_ci } \ 2171bf215546Sopenharmony_ci } \ 2172bf215546Sopenharmony_ci} while (0) 2173bf215546Sopenharmony_ci 2174bf215546Sopenharmony_cistatic void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs) 2175bf215546Sopenharmony_ci{ 2176bf215546Sopenharmony_ci radeon_begin(&sctx->gfx_cs); 2177bf215546Sopenharmony_ci 2178bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 2179bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2180bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); 2181bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); 2182bf215546Sopenharmony_ci radeon_end(); 2183bf215546Sopenharmony_ci return; 2184bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX10) { 2185bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2186bf215546Sopenharmony_ci /* HW VS stage only used in non-NGG mode. */ 2187bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); 2188bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); 2189bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); 2190bf215546Sopenharmony_ci radeon_end(); 2191bf215546Sopenharmony_ci return; 2192bf215546Sopenharmony_ci } else if (sctx->gfx_level == GFX9 && sctx->shadowed_regs) { 2193bf215546Sopenharmony_ci /* We can't use the COMMON registers with register shadowing. */ 2194bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2195bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); 2196bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0); 2197bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0); 2198bf215546Sopenharmony_ci radeon_end(); 2199bf215546Sopenharmony_ci return; 2200bf215546Sopenharmony_ci } else if (sctx->gfx_level == GFX9) { 2201bf215546Sopenharmony_ci /* Broadcast it to all shader stages. */ 2202bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0); 2203bf215546Sopenharmony_ci radeon_end(); 2204bf215546Sopenharmony_ci return; 2205bf215546Sopenharmony_ci } 2206bf215546Sopenharmony_ci 2207bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2208bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); 2209bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0); 2210bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); 2211bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); 2212bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0); 2213bf215546Sopenharmony_ci radeon_end(); 2214bf215546Sopenharmony_ci} 2215bf215546Sopenharmony_ci 2216bf215546Sopenharmony_civoid si_emit_graphics_shader_pointers(struct si_context *sctx) 2217bf215546Sopenharmony_ci{ 2218bf215546Sopenharmony_ci uint32_t *sh_base = sctx->shader_pointers.sh_base; 2219bf215546Sopenharmony_ci 2220bf215546Sopenharmony_ci if (sctx->shader_pointers_dirty & (1 << SI_DESCS_INTERNAL)) { 2221bf215546Sopenharmony_ci si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_INTERNAL]); 2222bf215546Sopenharmony_ci } 2223bf215546Sopenharmony_ci 2224bf215546Sopenharmony_ci radeon_begin(&sctx->gfx_cs); 2225bf215546Sopenharmony_ci si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX), 2226bf215546Sopenharmony_ci sh_base[PIPE_SHADER_VERTEX]); 2227bf215546Sopenharmony_ci si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL), 2228bf215546Sopenharmony_ci sh_base[PIPE_SHADER_TESS_EVAL]); 2229bf215546Sopenharmony_ci si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT), 2230bf215546Sopenharmony_ci sh_base[PIPE_SHADER_FRAGMENT]); 2231bf215546Sopenharmony_ci si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), 2232bf215546Sopenharmony_ci sh_base[PIPE_SHADER_TESS_CTRL]); 2233bf215546Sopenharmony_ci si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), 2234bf215546Sopenharmony_ci sh_base[PIPE_SHADER_GEOMETRY]); 2235bf215546Sopenharmony_ci 2236bf215546Sopenharmony_ci if (sctx->gs_attribute_ring_pointer_dirty) { 2237bf215546Sopenharmony_ci assert(sctx->gfx_level >= GFX11); 2238bf215546Sopenharmony_ci radeon_set_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + GFX9_SGPR_ATTRIBUTE_RING_ADDR * 4, 2239bf215546Sopenharmony_ci sctx->screen->attribute_ring->gpu_address); 2240bf215546Sopenharmony_ci sctx->gs_attribute_ring_pointer_dirty = false; 2241bf215546Sopenharmony_ci } 2242bf215546Sopenharmony_ci radeon_end(); 2243bf215546Sopenharmony_ci 2244bf215546Sopenharmony_ci sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_INTERNAL, SI_DESCS_FIRST_COMPUTE); 2245bf215546Sopenharmony_ci 2246bf215546Sopenharmony_ci if (sctx->graphics_bindless_pointer_dirty) { 2247bf215546Sopenharmony_ci si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors); 2248bf215546Sopenharmony_ci sctx->graphics_bindless_pointer_dirty = false; 2249bf215546Sopenharmony_ci } 2250bf215546Sopenharmony_ci} 2251bf215546Sopenharmony_ci 2252bf215546Sopenharmony_civoid si_emit_compute_shader_pointers(struct si_context *sctx) 2253bf215546Sopenharmony_ci{ 2254bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 2255bf215546Sopenharmony_ci struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel; 2256bf215546Sopenharmony_ci unsigned base = R_00B900_COMPUTE_USER_DATA_0; 2257bf215546Sopenharmony_ci 2258bf215546Sopenharmony_ci radeon_begin(cs); 2259bf215546Sopenharmony_ci si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE), 2260bf215546Sopenharmony_ci R_00B900_COMPUTE_USER_DATA_0); 2261bf215546Sopenharmony_ci sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE); 2262bf215546Sopenharmony_ci 2263bf215546Sopenharmony_ci if (sctx->compute_bindless_pointer_dirty) { 2264bf215546Sopenharmony_ci radeon_emit_one_32bit_pointer(sctx, &sctx->bindless_descriptors, base); 2265bf215546Sopenharmony_ci sctx->compute_bindless_pointer_dirty = false; 2266bf215546Sopenharmony_ci } 2267bf215546Sopenharmony_ci 2268bf215546Sopenharmony_ci /* Set shader buffer descriptors in user SGPRs. */ 2269bf215546Sopenharmony_ci unsigned num_shaderbufs = shader->cs_num_shaderbufs_in_user_sgprs; 2270bf215546Sopenharmony_ci if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) { 2271bf215546Sopenharmony_ci struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE); 2272bf215546Sopenharmony_ci 2273bf215546Sopenharmony_ci radeon_set_sh_reg_seq(R_00B900_COMPUTE_USER_DATA_0 + 2274bf215546Sopenharmony_ci shader->cs_shaderbufs_sgpr_index * 4, 2275bf215546Sopenharmony_ci num_shaderbufs * 4); 2276bf215546Sopenharmony_ci 2277bf215546Sopenharmony_ci for (unsigned i = 0; i < num_shaderbufs; i++) 2278bf215546Sopenharmony_ci radeon_emit_array(&desc->list[si_get_shaderbuf_slot(i) * 4], 4); 2279bf215546Sopenharmony_ci 2280bf215546Sopenharmony_ci sctx->compute_shaderbuf_sgprs_dirty = false; 2281bf215546Sopenharmony_ci } 2282bf215546Sopenharmony_ci 2283bf215546Sopenharmony_ci /* Set image descriptors in user SGPRs. */ 2284bf215546Sopenharmony_ci unsigned num_images = shader->cs_num_images_in_user_sgprs; 2285bf215546Sopenharmony_ci if (num_images && sctx->compute_image_sgprs_dirty) { 2286bf215546Sopenharmony_ci struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE); 2287bf215546Sopenharmony_ci 2288bf215546Sopenharmony_ci radeon_set_sh_reg_seq(R_00B900_COMPUTE_USER_DATA_0 + 2289bf215546Sopenharmony_ci shader->cs_images_sgpr_index * 4, 2290bf215546Sopenharmony_ci shader->cs_images_num_sgprs); 2291bf215546Sopenharmony_ci 2292bf215546Sopenharmony_ci for (unsigned i = 0; i < num_images; i++) { 2293bf215546Sopenharmony_ci unsigned desc_offset = si_get_image_slot(i) * 8; 2294bf215546Sopenharmony_ci unsigned num_sgprs = 8; 2295bf215546Sopenharmony_ci 2296bf215546Sopenharmony_ci /* Image buffers are in desc[4..7]. */ 2297bf215546Sopenharmony_ci if (BITSET_TEST(shader->info.base.image_buffers, i)) { 2298bf215546Sopenharmony_ci desc_offset += 4; 2299bf215546Sopenharmony_ci num_sgprs = 4; 2300bf215546Sopenharmony_ci } 2301bf215546Sopenharmony_ci 2302bf215546Sopenharmony_ci radeon_emit_array(&desc->list[desc_offset], num_sgprs); 2303bf215546Sopenharmony_ci } 2304bf215546Sopenharmony_ci 2305bf215546Sopenharmony_ci sctx->compute_image_sgprs_dirty = false; 2306bf215546Sopenharmony_ci } 2307bf215546Sopenharmony_ci radeon_end(); 2308bf215546Sopenharmony_ci} 2309bf215546Sopenharmony_ci 2310bf215546Sopenharmony_ci/* BINDLESS */ 2311bf215546Sopenharmony_ci 2312bf215546Sopenharmony_cistatic void si_init_bindless_descriptors(struct si_context *sctx, struct si_descriptors *desc, 2313bf215546Sopenharmony_ci short shader_userdata_rel_index, unsigned num_elements) 2314bf215546Sopenharmony_ci{ 2315bf215546Sopenharmony_ci ASSERTED unsigned desc_slot; 2316bf215546Sopenharmony_ci 2317bf215546Sopenharmony_ci si_init_descriptors(desc, shader_userdata_rel_index, 16, num_elements); 2318bf215546Sopenharmony_ci sctx->bindless_descriptors.num_active_slots = num_elements; 2319bf215546Sopenharmony_ci 2320bf215546Sopenharmony_ci /* The first bindless descriptor is stored at slot 1, because 0 is not 2321bf215546Sopenharmony_ci * considered to be a valid handle. 2322bf215546Sopenharmony_ci */ 2323bf215546Sopenharmony_ci sctx->num_bindless_descriptors = 1; 2324bf215546Sopenharmony_ci 2325bf215546Sopenharmony_ci /* Track which bindless slots are used (or not). */ 2326bf215546Sopenharmony_ci util_idalloc_init(&sctx->bindless_used_slots, num_elements); 2327bf215546Sopenharmony_ci 2328bf215546Sopenharmony_ci /* Reserve slot 0 because it's an invalid handle for bindless. */ 2329bf215546Sopenharmony_ci desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots); 2330bf215546Sopenharmony_ci assert(desc_slot == 0); 2331bf215546Sopenharmony_ci} 2332bf215546Sopenharmony_ci 2333bf215546Sopenharmony_cistatic void si_release_bindless_descriptors(struct si_context *sctx) 2334bf215546Sopenharmony_ci{ 2335bf215546Sopenharmony_ci si_release_descriptors(&sctx->bindless_descriptors); 2336bf215546Sopenharmony_ci util_idalloc_fini(&sctx->bindless_used_slots); 2337bf215546Sopenharmony_ci} 2338bf215546Sopenharmony_ci 2339bf215546Sopenharmony_cistatic unsigned si_get_first_free_bindless_slot(struct si_context *sctx) 2340bf215546Sopenharmony_ci{ 2341bf215546Sopenharmony_ci struct si_descriptors *desc = &sctx->bindless_descriptors; 2342bf215546Sopenharmony_ci unsigned desc_slot; 2343bf215546Sopenharmony_ci 2344bf215546Sopenharmony_ci desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots); 2345bf215546Sopenharmony_ci if (desc_slot >= desc->num_elements) { 2346bf215546Sopenharmony_ci /* The array of bindless descriptors is full, resize it. */ 2347bf215546Sopenharmony_ci unsigned slot_size = desc->element_dw_size * 4; 2348bf215546Sopenharmony_ci unsigned new_num_elements = desc->num_elements * 2; 2349bf215546Sopenharmony_ci 2350bf215546Sopenharmony_ci desc->list = 2351bf215546Sopenharmony_ci REALLOC(desc->list, desc->num_elements * slot_size, new_num_elements * slot_size); 2352bf215546Sopenharmony_ci desc->num_elements = new_num_elements; 2353bf215546Sopenharmony_ci desc->num_active_slots = new_num_elements; 2354bf215546Sopenharmony_ci } 2355bf215546Sopenharmony_ci 2356bf215546Sopenharmony_ci assert(desc_slot); 2357bf215546Sopenharmony_ci return desc_slot; 2358bf215546Sopenharmony_ci} 2359bf215546Sopenharmony_ci 2360bf215546Sopenharmony_cistatic unsigned si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list, 2361bf215546Sopenharmony_ci unsigned size) 2362bf215546Sopenharmony_ci{ 2363bf215546Sopenharmony_ci struct si_descriptors *desc = &sctx->bindless_descriptors; 2364bf215546Sopenharmony_ci unsigned desc_slot, desc_slot_offset; 2365bf215546Sopenharmony_ci 2366bf215546Sopenharmony_ci /* Find a free slot. */ 2367bf215546Sopenharmony_ci desc_slot = si_get_first_free_bindless_slot(sctx); 2368bf215546Sopenharmony_ci 2369bf215546Sopenharmony_ci /* For simplicity, sampler and image bindless descriptors use fixed 2370bf215546Sopenharmony_ci * 16-dword slots for now. Image descriptors only need 8-dword but this 2371bf215546Sopenharmony_ci * doesn't really matter because no real apps use image handles. 2372bf215546Sopenharmony_ci */ 2373bf215546Sopenharmony_ci desc_slot_offset = desc_slot * 16; 2374bf215546Sopenharmony_ci 2375bf215546Sopenharmony_ci /* Copy the descriptor into the array. */ 2376bf215546Sopenharmony_ci memcpy(desc->list + desc_slot_offset, desc_list, size); 2377bf215546Sopenharmony_ci 2378bf215546Sopenharmony_ci /* Re-upload the whole array of bindless descriptors into a new buffer. 2379bf215546Sopenharmony_ci */ 2380bf215546Sopenharmony_ci if (!si_upload_descriptors(sctx, desc)) 2381bf215546Sopenharmony_ci return 0; 2382bf215546Sopenharmony_ci 2383bf215546Sopenharmony_ci /* Make sure to re-emit the shader pointers for all stages. */ 2384bf215546Sopenharmony_ci sctx->graphics_bindless_pointer_dirty = true; 2385bf215546Sopenharmony_ci sctx->compute_bindless_pointer_dirty = true; 2386bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 2387bf215546Sopenharmony_ci 2388bf215546Sopenharmony_ci return desc_slot; 2389bf215546Sopenharmony_ci} 2390bf215546Sopenharmony_ci 2391bf215546Sopenharmony_cistatic void si_update_bindless_buffer_descriptor(struct si_context *sctx, unsigned desc_slot, 2392bf215546Sopenharmony_ci struct pipe_resource *resource, uint64_t offset, 2393bf215546Sopenharmony_ci bool *desc_dirty) 2394bf215546Sopenharmony_ci{ 2395bf215546Sopenharmony_ci struct si_descriptors *desc = &sctx->bindless_descriptors; 2396bf215546Sopenharmony_ci struct si_resource *buf = si_resource(resource); 2397bf215546Sopenharmony_ci unsigned desc_slot_offset = desc_slot * 16; 2398bf215546Sopenharmony_ci uint32_t *desc_list = desc->list + desc_slot_offset + 4; 2399bf215546Sopenharmony_ci uint64_t old_desc_va; 2400bf215546Sopenharmony_ci 2401bf215546Sopenharmony_ci assert(resource->target == PIPE_BUFFER); 2402bf215546Sopenharmony_ci 2403bf215546Sopenharmony_ci /* Retrieve the old buffer addr from the descriptor. */ 2404bf215546Sopenharmony_ci old_desc_va = si_desc_extract_buffer_address(desc_list); 2405bf215546Sopenharmony_ci 2406bf215546Sopenharmony_ci if (old_desc_va != buf->gpu_address + offset) { 2407bf215546Sopenharmony_ci /* The buffer has been invalidated when the handle wasn't 2408bf215546Sopenharmony_ci * resident, update the descriptor and the dirty flag. 2409bf215546Sopenharmony_ci */ 2410bf215546Sopenharmony_ci si_set_buf_desc_address(buf, offset, &desc_list[0]); 2411bf215546Sopenharmony_ci 2412bf215546Sopenharmony_ci *desc_dirty = true; 2413bf215546Sopenharmony_ci } 2414bf215546Sopenharmony_ci} 2415bf215546Sopenharmony_ci 2416bf215546Sopenharmony_cistatic uint64_t si_create_texture_handle(struct pipe_context *ctx, struct pipe_sampler_view *view, 2417bf215546Sopenharmony_ci const struct pipe_sampler_state *state) 2418bf215546Sopenharmony_ci{ 2419bf215546Sopenharmony_ci struct si_sampler_view *sview = (struct si_sampler_view *)view; 2420bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 2421bf215546Sopenharmony_ci struct si_texture_handle *tex_handle; 2422bf215546Sopenharmony_ci struct si_sampler_state *sstate; 2423bf215546Sopenharmony_ci uint32_t desc_list[16]; 2424bf215546Sopenharmony_ci uint64_t handle; 2425bf215546Sopenharmony_ci 2426bf215546Sopenharmony_ci tex_handle = CALLOC_STRUCT(si_texture_handle); 2427bf215546Sopenharmony_ci if (!tex_handle) 2428bf215546Sopenharmony_ci return 0; 2429bf215546Sopenharmony_ci 2430bf215546Sopenharmony_ci memset(desc_list, 0, sizeof(desc_list)); 2431bf215546Sopenharmony_ci si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor); 2432bf215546Sopenharmony_ci 2433bf215546Sopenharmony_ci sstate = ctx->create_sampler_state(ctx, state); 2434bf215546Sopenharmony_ci if (!sstate) { 2435bf215546Sopenharmony_ci FREE(tex_handle); 2436bf215546Sopenharmony_ci return 0; 2437bf215546Sopenharmony_ci } 2438bf215546Sopenharmony_ci 2439bf215546Sopenharmony_ci si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]); 2440bf215546Sopenharmony_ci memcpy(&tex_handle->sstate, sstate, sizeof(*sstate)); 2441bf215546Sopenharmony_ci ctx->delete_sampler_state(ctx, sstate); 2442bf215546Sopenharmony_ci 2443bf215546Sopenharmony_ci tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list)); 2444bf215546Sopenharmony_ci if (!tex_handle->desc_slot) { 2445bf215546Sopenharmony_ci FREE(tex_handle); 2446bf215546Sopenharmony_ci return 0; 2447bf215546Sopenharmony_ci } 2448bf215546Sopenharmony_ci 2449bf215546Sopenharmony_ci handle = tex_handle->desc_slot; 2450bf215546Sopenharmony_ci 2451bf215546Sopenharmony_ci if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)(uintptr_t)handle, tex_handle)) { 2452bf215546Sopenharmony_ci FREE(tex_handle); 2453bf215546Sopenharmony_ci return 0; 2454bf215546Sopenharmony_ci } 2455bf215546Sopenharmony_ci 2456bf215546Sopenharmony_ci pipe_sampler_view_reference(&tex_handle->view, view); 2457bf215546Sopenharmony_ci 2458bf215546Sopenharmony_ci si_resource(sview->base.texture)->texture_handle_allocated = true; 2459bf215546Sopenharmony_ci 2460bf215546Sopenharmony_ci return handle; 2461bf215546Sopenharmony_ci} 2462bf215546Sopenharmony_ci 2463bf215546Sopenharmony_cistatic void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle) 2464bf215546Sopenharmony_ci{ 2465bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 2466bf215546Sopenharmony_ci struct si_texture_handle *tex_handle; 2467bf215546Sopenharmony_ci struct hash_entry *entry; 2468bf215546Sopenharmony_ci 2469bf215546Sopenharmony_ci entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle); 2470bf215546Sopenharmony_ci if (!entry) 2471bf215546Sopenharmony_ci return; 2472bf215546Sopenharmony_ci 2473bf215546Sopenharmony_ci tex_handle = (struct si_texture_handle *)entry->data; 2474bf215546Sopenharmony_ci 2475bf215546Sopenharmony_ci /* Allow this descriptor slot to be re-used. */ 2476bf215546Sopenharmony_ci util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot); 2477bf215546Sopenharmony_ci 2478bf215546Sopenharmony_ci pipe_sampler_view_reference(&tex_handle->view, NULL); 2479bf215546Sopenharmony_ci _mesa_hash_table_remove(sctx->tex_handles, entry); 2480bf215546Sopenharmony_ci FREE(tex_handle); 2481bf215546Sopenharmony_ci} 2482bf215546Sopenharmony_ci 2483bf215546Sopenharmony_cistatic void si_make_texture_handle_resident(struct pipe_context *ctx, uint64_t handle, 2484bf215546Sopenharmony_ci bool resident) 2485bf215546Sopenharmony_ci{ 2486bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 2487bf215546Sopenharmony_ci struct si_texture_handle *tex_handle; 2488bf215546Sopenharmony_ci struct si_sampler_view *sview; 2489bf215546Sopenharmony_ci struct hash_entry *entry; 2490bf215546Sopenharmony_ci 2491bf215546Sopenharmony_ci entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle); 2492bf215546Sopenharmony_ci if (!entry) 2493bf215546Sopenharmony_ci return; 2494bf215546Sopenharmony_ci 2495bf215546Sopenharmony_ci tex_handle = (struct si_texture_handle *)entry->data; 2496bf215546Sopenharmony_ci sview = (struct si_sampler_view *)tex_handle->view; 2497bf215546Sopenharmony_ci 2498bf215546Sopenharmony_ci if (resident) { 2499bf215546Sopenharmony_ci if (sview->base.texture->target != PIPE_BUFFER) { 2500bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)sview->base.texture; 2501bf215546Sopenharmony_ci 2502bf215546Sopenharmony_ci if (depth_needs_decompression(tex, sview->is_stencil_sampler)) { 2503bf215546Sopenharmony_ci util_dynarray_append(&sctx->resident_tex_needs_depth_decompress, 2504bf215546Sopenharmony_ci struct si_texture_handle *, tex_handle); 2505bf215546Sopenharmony_ci } 2506bf215546Sopenharmony_ci 2507bf215546Sopenharmony_ci if (color_needs_decompression(tex)) { 2508bf215546Sopenharmony_ci util_dynarray_append(&sctx->resident_tex_needs_color_decompress, 2509bf215546Sopenharmony_ci struct si_texture_handle *, tex_handle); 2510bf215546Sopenharmony_ci } 2511bf215546Sopenharmony_ci 2512bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) && 2513bf215546Sopenharmony_ci p_atomic_read(&tex->framebuffers_bound)) 2514bf215546Sopenharmony_ci sctx->need_check_render_feedback = true; 2515bf215546Sopenharmony_ci 2516bf215546Sopenharmony_ci si_update_bindless_texture_descriptor(sctx, tex_handle); 2517bf215546Sopenharmony_ci } else { 2518bf215546Sopenharmony_ci si_update_bindless_buffer_descriptor(sctx, tex_handle->desc_slot, sview->base.texture, 2519bf215546Sopenharmony_ci sview->base.u.buf.offset, &tex_handle->desc_dirty); 2520bf215546Sopenharmony_ci } 2521bf215546Sopenharmony_ci 2522bf215546Sopenharmony_ci /* Re-upload the descriptor if it has been updated while it 2523bf215546Sopenharmony_ci * wasn't resident. 2524bf215546Sopenharmony_ci */ 2525bf215546Sopenharmony_ci if (tex_handle->desc_dirty) 2526bf215546Sopenharmony_ci sctx->bindless_descriptors_dirty = true; 2527bf215546Sopenharmony_ci 2528bf215546Sopenharmony_ci /* Add the texture handle to the per-context list. */ 2529bf215546Sopenharmony_ci util_dynarray_append(&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle); 2530bf215546Sopenharmony_ci 2531bf215546Sopenharmony_ci /* Add the buffers to the current CS in case si_begin_new_cs() 2532bf215546Sopenharmony_ci * is not going to be called. 2533bf215546Sopenharmony_ci */ 2534bf215546Sopenharmony_ci si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ, 2535bf215546Sopenharmony_ci sview->is_stencil_sampler, false); 2536bf215546Sopenharmony_ci } else { 2537bf215546Sopenharmony_ci /* Remove the texture handle from the per-context list. */ 2538bf215546Sopenharmony_ci util_dynarray_delete_unordered(&sctx->resident_tex_handles, struct si_texture_handle *, 2539bf215546Sopenharmony_ci tex_handle); 2540bf215546Sopenharmony_ci 2541bf215546Sopenharmony_ci if (sview->base.texture->target != PIPE_BUFFER) { 2542bf215546Sopenharmony_ci util_dynarray_delete_unordered(&sctx->resident_tex_needs_depth_decompress, 2543bf215546Sopenharmony_ci struct si_texture_handle *, tex_handle); 2544bf215546Sopenharmony_ci 2545bf215546Sopenharmony_ci util_dynarray_delete_unordered(&sctx->resident_tex_needs_color_decompress, 2546bf215546Sopenharmony_ci struct si_texture_handle *, tex_handle); 2547bf215546Sopenharmony_ci } 2548bf215546Sopenharmony_ci } 2549bf215546Sopenharmony_ci} 2550bf215546Sopenharmony_ci 2551bf215546Sopenharmony_cistatic uint64_t si_create_image_handle(struct pipe_context *ctx, const struct pipe_image_view *view) 2552bf215546Sopenharmony_ci{ 2553bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 2554bf215546Sopenharmony_ci struct si_image_handle *img_handle; 2555bf215546Sopenharmony_ci uint32_t desc_list[16]; 2556bf215546Sopenharmony_ci uint64_t handle; 2557bf215546Sopenharmony_ci 2558bf215546Sopenharmony_ci if (!view || !view->resource) 2559bf215546Sopenharmony_ci return 0; 2560bf215546Sopenharmony_ci 2561bf215546Sopenharmony_ci img_handle = CALLOC_STRUCT(si_image_handle); 2562bf215546Sopenharmony_ci if (!img_handle) 2563bf215546Sopenharmony_ci return 0; 2564bf215546Sopenharmony_ci 2565bf215546Sopenharmony_ci memset(desc_list, 0, sizeof(desc_list)); 2566bf215546Sopenharmony_ci si_init_descriptor_list(&desc_list[0], 8, 2, null_image_descriptor); 2567bf215546Sopenharmony_ci 2568bf215546Sopenharmony_ci si_set_shader_image_desc(sctx, view, false, &desc_list[0], &desc_list[8]); 2569bf215546Sopenharmony_ci 2570bf215546Sopenharmony_ci img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list)); 2571bf215546Sopenharmony_ci if (!img_handle->desc_slot) { 2572bf215546Sopenharmony_ci FREE(img_handle); 2573bf215546Sopenharmony_ci return 0; 2574bf215546Sopenharmony_ci } 2575bf215546Sopenharmony_ci 2576bf215546Sopenharmony_ci handle = img_handle->desc_slot; 2577bf215546Sopenharmony_ci 2578bf215546Sopenharmony_ci if (!_mesa_hash_table_insert(sctx->img_handles, (void *)(uintptr_t)handle, img_handle)) { 2579bf215546Sopenharmony_ci FREE(img_handle); 2580bf215546Sopenharmony_ci return 0; 2581bf215546Sopenharmony_ci } 2582bf215546Sopenharmony_ci 2583bf215546Sopenharmony_ci util_copy_image_view(&img_handle->view, view); 2584bf215546Sopenharmony_ci 2585bf215546Sopenharmony_ci si_resource(view->resource)->image_handle_allocated = true; 2586bf215546Sopenharmony_ci 2587bf215546Sopenharmony_ci return handle; 2588bf215546Sopenharmony_ci} 2589bf215546Sopenharmony_ci 2590bf215546Sopenharmony_cistatic void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle) 2591bf215546Sopenharmony_ci{ 2592bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 2593bf215546Sopenharmony_ci struct si_image_handle *img_handle; 2594bf215546Sopenharmony_ci struct hash_entry *entry; 2595bf215546Sopenharmony_ci 2596bf215546Sopenharmony_ci entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle); 2597bf215546Sopenharmony_ci if (!entry) 2598bf215546Sopenharmony_ci return; 2599bf215546Sopenharmony_ci 2600bf215546Sopenharmony_ci img_handle = (struct si_image_handle *)entry->data; 2601bf215546Sopenharmony_ci 2602bf215546Sopenharmony_ci util_copy_image_view(&img_handle->view, NULL); 2603bf215546Sopenharmony_ci _mesa_hash_table_remove(sctx->img_handles, entry); 2604bf215546Sopenharmony_ci FREE(img_handle); 2605bf215546Sopenharmony_ci} 2606bf215546Sopenharmony_ci 2607bf215546Sopenharmony_cistatic void si_make_image_handle_resident(struct pipe_context *ctx, uint64_t handle, 2608bf215546Sopenharmony_ci unsigned access, bool resident) 2609bf215546Sopenharmony_ci{ 2610bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 2611bf215546Sopenharmony_ci struct si_image_handle *img_handle; 2612bf215546Sopenharmony_ci struct pipe_image_view *view; 2613bf215546Sopenharmony_ci struct si_resource *res; 2614bf215546Sopenharmony_ci struct hash_entry *entry; 2615bf215546Sopenharmony_ci 2616bf215546Sopenharmony_ci entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle); 2617bf215546Sopenharmony_ci if (!entry) 2618bf215546Sopenharmony_ci return; 2619bf215546Sopenharmony_ci 2620bf215546Sopenharmony_ci img_handle = (struct si_image_handle *)entry->data; 2621bf215546Sopenharmony_ci view = &img_handle->view; 2622bf215546Sopenharmony_ci res = si_resource(view->resource); 2623bf215546Sopenharmony_ci 2624bf215546Sopenharmony_ci if (resident) { 2625bf215546Sopenharmony_ci if (res->b.b.target != PIPE_BUFFER) { 2626bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)res; 2627bf215546Sopenharmony_ci unsigned level = view->u.tex.level; 2628bf215546Sopenharmony_ci 2629bf215546Sopenharmony_ci if (color_needs_decompression(tex)) { 2630bf215546Sopenharmony_ci util_dynarray_append(&sctx->resident_img_needs_color_decompress, 2631bf215546Sopenharmony_ci struct si_image_handle *, img_handle); 2632bf215546Sopenharmony_ci } 2633bf215546Sopenharmony_ci 2634bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound)) 2635bf215546Sopenharmony_ci sctx->need_check_render_feedback = true; 2636bf215546Sopenharmony_ci 2637bf215546Sopenharmony_ci si_update_bindless_image_descriptor(sctx, img_handle); 2638bf215546Sopenharmony_ci } else { 2639bf215546Sopenharmony_ci si_update_bindless_buffer_descriptor(sctx, img_handle->desc_slot, view->resource, 2640bf215546Sopenharmony_ci view->u.buf.offset, &img_handle->desc_dirty); 2641bf215546Sopenharmony_ci } 2642bf215546Sopenharmony_ci 2643bf215546Sopenharmony_ci /* Re-upload the descriptor if it has been updated while it 2644bf215546Sopenharmony_ci * wasn't resident. 2645bf215546Sopenharmony_ci */ 2646bf215546Sopenharmony_ci if (img_handle->desc_dirty) 2647bf215546Sopenharmony_ci sctx->bindless_descriptors_dirty = true; 2648bf215546Sopenharmony_ci 2649bf215546Sopenharmony_ci /* Add the image handle to the per-context list. */ 2650bf215546Sopenharmony_ci util_dynarray_append(&sctx->resident_img_handles, struct si_image_handle *, img_handle); 2651bf215546Sopenharmony_ci 2652bf215546Sopenharmony_ci /* Add the buffers to the current CS in case si_begin_new_cs() 2653bf215546Sopenharmony_ci * is not going to be called. 2654bf215546Sopenharmony_ci */ 2655bf215546Sopenharmony_ci si_sampler_view_add_buffer( 2656bf215546Sopenharmony_ci sctx, view->resource, 2657bf215546Sopenharmony_ci (access & PIPE_IMAGE_ACCESS_WRITE) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false, 2658bf215546Sopenharmony_ci false); 2659bf215546Sopenharmony_ci } else { 2660bf215546Sopenharmony_ci /* Remove the image handle from the per-context list. */ 2661bf215546Sopenharmony_ci util_dynarray_delete_unordered(&sctx->resident_img_handles, struct si_image_handle *, 2662bf215546Sopenharmony_ci img_handle); 2663bf215546Sopenharmony_ci 2664bf215546Sopenharmony_ci if (res->b.b.target != PIPE_BUFFER) { 2665bf215546Sopenharmony_ci util_dynarray_delete_unordered(&sctx->resident_img_needs_color_decompress, 2666bf215546Sopenharmony_ci struct si_image_handle *, img_handle); 2667bf215546Sopenharmony_ci } 2668bf215546Sopenharmony_ci } 2669bf215546Sopenharmony_ci} 2670bf215546Sopenharmony_ci 2671bf215546Sopenharmony_cistatic void si_resident_buffers_add_all_to_bo_list(struct si_context *sctx) 2672bf215546Sopenharmony_ci{ 2673bf215546Sopenharmony_ci unsigned num_resident_tex_handles, num_resident_img_handles; 2674bf215546Sopenharmony_ci 2675bf215546Sopenharmony_ci num_resident_tex_handles = sctx->resident_tex_handles.size / sizeof(struct si_texture_handle *); 2676bf215546Sopenharmony_ci num_resident_img_handles = sctx->resident_img_handles.size / sizeof(struct si_image_handle *); 2677bf215546Sopenharmony_ci 2678bf215546Sopenharmony_ci /* Add all resident texture handles. */ 2679bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { 2680bf215546Sopenharmony_ci struct si_sampler_view *sview = (struct si_sampler_view *)(*tex_handle)->view; 2681bf215546Sopenharmony_ci 2682bf215546Sopenharmony_ci si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ, 2683bf215546Sopenharmony_ci sview->is_stencil_sampler, false); 2684bf215546Sopenharmony_ci } 2685bf215546Sopenharmony_ci 2686bf215546Sopenharmony_ci /* Add all resident image handles. */ 2687bf215546Sopenharmony_ci util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) { 2688bf215546Sopenharmony_ci struct pipe_image_view *view = &(*img_handle)->view; 2689bf215546Sopenharmony_ci 2690bf215546Sopenharmony_ci si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false, false); 2691bf215546Sopenharmony_ci } 2692bf215546Sopenharmony_ci 2693bf215546Sopenharmony_ci sctx->num_resident_handles += num_resident_tex_handles + num_resident_img_handles; 2694bf215546Sopenharmony_ci assert(sctx->bo_list_add_all_resident_resources); 2695bf215546Sopenharmony_ci sctx->bo_list_add_all_resident_resources = false; 2696bf215546Sopenharmony_ci} 2697bf215546Sopenharmony_ci 2698bf215546Sopenharmony_ci/* INIT/DEINIT/UPLOAD */ 2699bf215546Sopenharmony_ci 2700bf215546Sopenharmony_civoid si_init_all_descriptors(struct si_context *sctx) 2701bf215546Sopenharmony_ci{ 2702bf215546Sopenharmony_ci int i; 2703bf215546Sopenharmony_ci unsigned first_shader = sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE; 2704bf215546Sopenharmony_ci unsigned hs_sgpr0, gs_sgpr0; 2705bf215546Sopenharmony_ci 2706bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 2707bf215546Sopenharmony_ci hs_sgpr0 = R_00B420_SPI_SHADER_PGM_LO_HS; 2708bf215546Sopenharmony_ci gs_sgpr0 = R_00B220_SPI_SHADER_PGM_LO_GS; 2709bf215546Sopenharmony_ci } else { 2710bf215546Sopenharmony_ci hs_sgpr0 = R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS; 2711bf215546Sopenharmony_ci gs_sgpr0 = R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS; 2712bf215546Sopenharmony_ci } 2713bf215546Sopenharmony_ci 2714bf215546Sopenharmony_ci for (i = first_shader; i < SI_NUM_SHADERS; i++) { 2715bf215546Sopenharmony_ci bool is_2nd = 2716bf215546Sopenharmony_ci sctx->gfx_level >= GFX9 && (i == PIPE_SHADER_TESS_CTRL || i == PIPE_SHADER_GEOMETRY); 2717bf215546Sopenharmony_ci unsigned num_sampler_slots = SI_NUM_IMAGE_SLOTS / 2 + SI_NUM_SAMPLERS; 2718bf215546Sopenharmony_ci unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS; 2719bf215546Sopenharmony_ci int rel_dw_offset; 2720bf215546Sopenharmony_ci struct si_descriptors *desc; 2721bf215546Sopenharmony_ci 2722bf215546Sopenharmony_ci if (is_2nd) { 2723bf215546Sopenharmony_ci if (i == PIPE_SHADER_TESS_CTRL) { 2724bf215546Sopenharmony_ci rel_dw_offset = 2725bf215546Sopenharmony_ci (hs_sgpr0 - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4; 2726bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX10) { /* PIPE_SHADER_GEOMETRY */ 2727bf215546Sopenharmony_ci rel_dw_offset = 2728bf215546Sopenharmony_ci (gs_sgpr0 - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4; 2729bf215546Sopenharmony_ci } else { 2730bf215546Sopenharmony_ci rel_dw_offset = 2731bf215546Sopenharmony_ci (gs_sgpr0 - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4; 2732bf215546Sopenharmony_ci } 2733bf215546Sopenharmony_ci } else { 2734bf215546Sopenharmony_ci rel_dw_offset = SI_SGPR_CONST_AND_SHADER_BUFFERS; 2735bf215546Sopenharmony_ci } 2736bf215546Sopenharmony_ci desc = si_const_and_shader_buffer_descriptors(sctx, i); 2737bf215546Sopenharmony_ci si_init_buffer_resources(sctx, &sctx->const_and_shader_buffers[i], desc, num_buffer_slots, 2738bf215546Sopenharmony_ci rel_dw_offset, RADEON_PRIO_SHADER_RW_BUFFER, 2739bf215546Sopenharmony_ci RADEON_PRIO_CONST_BUFFER); 2740bf215546Sopenharmony_ci desc->slot_index_to_bind_directly = si_get_constbuf_slot(0); 2741bf215546Sopenharmony_ci 2742bf215546Sopenharmony_ci if (is_2nd) { 2743bf215546Sopenharmony_ci if (i == PIPE_SHADER_TESS_CTRL) { 2744bf215546Sopenharmony_ci rel_dw_offset = 2745bf215546Sopenharmony_ci (hs_sgpr0 + 4 - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4; 2746bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX10) { /* PIPE_SHADER_GEOMETRY */ 2747bf215546Sopenharmony_ci rel_dw_offset = 2748bf215546Sopenharmony_ci (gs_sgpr0 + 4 - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4; 2749bf215546Sopenharmony_ci } else { 2750bf215546Sopenharmony_ci rel_dw_offset = 2751bf215546Sopenharmony_ci (gs_sgpr0 + 4 - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4; 2752bf215546Sopenharmony_ci } 2753bf215546Sopenharmony_ci } else { 2754bf215546Sopenharmony_ci rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES; 2755bf215546Sopenharmony_ci } 2756bf215546Sopenharmony_ci 2757bf215546Sopenharmony_ci desc = si_sampler_and_image_descriptors(sctx, i); 2758bf215546Sopenharmony_ci si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots); 2759bf215546Sopenharmony_ci 2760bf215546Sopenharmony_ci int j; 2761bf215546Sopenharmony_ci for (j = 0; j < SI_NUM_IMAGE_SLOTS; j++) 2762bf215546Sopenharmony_ci memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4); 2763bf215546Sopenharmony_ci for (; j < SI_NUM_IMAGE_SLOTS + SI_NUM_SAMPLERS * 2; j++) 2764bf215546Sopenharmony_ci memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4); 2765bf215546Sopenharmony_ci } 2766bf215546Sopenharmony_ci 2767bf215546Sopenharmony_ci si_init_buffer_resources(sctx, &sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL], 2768bf215546Sopenharmony_ci SI_NUM_INTERNAL_BINDINGS, SI_SGPR_INTERNAL_BINDINGS, 2769bf215546Sopenharmony_ci /* The second priority is used by 2770bf215546Sopenharmony_ci * const buffers in RW buffer slots. */ 2771bf215546Sopenharmony_ci RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER); 2772bf215546Sopenharmony_ci sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots = SI_NUM_INTERNAL_BINDINGS; 2773bf215546Sopenharmony_ci 2774bf215546Sopenharmony_ci /* Initialize an array of 1024 bindless descriptors, when the limit is 2775bf215546Sopenharmony_ci * reached, just make it larger and re-upload the whole array. 2776bf215546Sopenharmony_ci */ 2777bf215546Sopenharmony_ci si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors, 2778bf215546Sopenharmony_ci SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, 1024); 2779bf215546Sopenharmony_ci 2780bf215546Sopenharmony_ci sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS); 2781bf215546Sopenharmony_ci 2782bf215546Sopenharmony_ci /* Set pipe_context functions. */ 2783bf215546Sopenharmony_ci sctx->b.bind_sampler_states = si_bind_sampler_states; 2784bf215546Sopenharmony_ci sctx->b.set_shader_images = si_set_shader_images; 2785bf215546Sopenharmony_ci sctx->b.set_constant_buffer = si_pipe_set_constant_buffer; 2786bf215546Sopenharmony_ci sctx->b.set_inlinable_constants = si_set_inlinable_constants; 2787bf215546Sopenharmony_ci sctx->b.set_shader_buffers = si_pipe_set_shader_buffers; 2788bf215546Sopenharmony_ci sctx->b.set_sampler_views = si_pipe_set_sampler_views; 2789bf215546Sopenharmony_ci sctx->b.create_texture_handle = si_create_texture_handle; 2790bf215546Sopenharmony_ci sctx->b.delete_texture_handle = si_delete_texture_handle; 2791bf215546Sopenharmony_ci sctx->b.make_texture_handle_resident = si_make_texture_handle_resident; 2792bf215546Sopenharmony_ci sctx->b.create_image_handle = si_create_image_handle; 2793bf215546Sopenharmony_ci sctx->b.delete_image_handle = si_delete_image_handle; 2794bf215546Sopenharmony_ci sctx->b.make_image_handle_resident = si_make_image_handle_resident; 2795bf215546Sopenharmony_ci 2796bf215546Sopenharmony_ci if (!sctx->has_graphics) 2797bf215546Sopenharmony_ci return; 2798bf215546Sopenharmony_ci 2799bf215546Sopenharmony_ci sctx->b.set_polygon_stipple = si_set_polygon_stipple; 2800bf215546Sopenharmony_ci 2801bf215546Sopenharmony_ci /* Shader user data. */ 2802bf215546Sopenharmony_ci sctx->atoms.s.shader_pointers.emit = si_emit_graphics_shader_pointers; 2803bf215546Sopenharmony_ci 2804bf215546Sopenharmony_ci /* Set default and immutable mappings. */ 2805bf215546Sopenharmony_ci si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, 2806bf215546Sopenharmony_ci si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF, 2807bf215546Sopenharmony_ci sctx->ngg, PIPE_SHADER_VERTEX)); 2808bf215546Sopenharmony_ci si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, 2809bf215546Sopenharmony_ci si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF, 2810bf215546Sopenharmony_ci NGG_OFF, PIPE_SHADER_TESS_CTRL)); 2811bf215546Sopenharmony_ci si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, 2812bf215546Sopenharmony_ci si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF, 2813bf215546Sopenharmony_ci NGG_OFF, PIPE_SHADER_GEOMETRY)); 2814bf215546Sopenharmony_ci si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0); 2815bf215546Sopenharmony_ci 2816bf215546Sopenharmony_ci si_set_ring_buffer(sctx, SI_GS_ATTRIBUTE_RING, &sctx->screen->attribute_ring->b.b, 2817bf215546Sopenharmony_ci 0, ~0u, false, true, 16, 32, 0); 2818bf215546Sopenharmony_ci} 2819bf215546Sopenharmony_ci 2820bf215546Sopenharmony_cistatic bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask) 2821bf215546Sopenharmony_ci{ 2822bf215546Sopenharmony_ci unsigned dirty = sctx->descriptors_dirty & mask; 2823bf215546Sopenharmony_ci 2824bf215546Sopenharmony_ci if (dirty) { 2825bf215546Sopenharmony_ci unsigned iter_mask = dirty; 2826bf215546Sopenharmony_ci 2827bf215546Sopenharmony_ci do { 2828bf215546Sopenharmony_ci if (!si_upload_descriptors(sctx, &sctx->descriptors[u_bit_scan(&iter_mask)])) 2829bf215546Sopenharmony_ci return false; 2830bf215546Sopenharmony_ci } while (iter_mask); 2831bf215546Sopenharmony_ci 2832bf215546Sopenharmony_ci sctx->descriptors_dirty &= ~dirty; 2833bf215546Sopenharmony_ci sctx->shader_pointers_dirty |= dirty; 2834bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers); 2835bf215546Sopenharmony_ci } 2836bf215546Sopenharmony_ci 2837bf215546Sopenharmony_ci si_upload_bindless_descriptors(sctx); 2838bf215546Sopenharmony_ci return true; 2839bf215546Sopenharmony_ci} 2840bf215546Sopenharmony_ci 2841bf215546Sopenharmony_cibool si_upload_graphics_shader_descriptors(struct si_context *sctx) 2842bf215546Sopenharmony_ci{ 2843bf215546Sopenharmony_ci const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE); 2844bf215546Sopenharmony_ci return si_upload_shader_descriptors(sctx, mask); 2845bf215546Sopenharmony_ci} 2846bf215546Sopenharmony_ci 2847bf215546Sopenharmony_cibool si_upload_compute_shader_descriptors(struct si_context *sctx) 2848bf215546Sopenharmony_ci{ 2849bf215546Sopenharmony_ci /* This does not update internal bindings as that is not needed for compute shaders 2850bf215546Sopenharmony_ci * and the input buffer is using the same SGPR's anyway. 2851bf215546Sopenharmony_ci */ 2852bf215546Sopenharmony_ci const unsigned mask = 2853bf215546Sopenharmony_ci u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE); 2854bf215546Sopenharmony_ci return si_upload_shader_descriptors(sctx, mask); 2855bf215546Sopenharmony_ci} 2856bf215546Sopenharmony_ci 2857bf215546Sopenharmony_civoid si_release_all_descriptors(struct si_context *sctx) 2858bf215546Sopenharmony_ci{ 2859bf215546Sopenharmony_ci int i; 2860bf215546Sopenharmony_ci 2861bf215546Sopenharmony_ci for (i = 0; i < SI_NUM_SHADERS; i++) { 2862bf215546Sopenharmony_ci si_release_buffer_resources(&sctx->const_and_shader_buffers[i], 2863bf215546Sopenharmony_ci si_const_and_shader_buffer_descriptors(sctx, i)); 2864bf215546Sopenharmony_ci si_release_sampler_views(&sctx->samplers[i]); 2865bf215546Sopenharmony_ci si_release_image_views(&sctx->images[i]); 2866bf215546Sopenharmony_ci } 2867bf215546Sopenharmony_ci si_release_buffer_resources(&sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL]); 2868bf215546Sopenharmony_ci for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++) 2869bf215546Sopenharmony_ci pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]); 2870bf215546Sopenharmony_ci 2871bf215546Sopenharmony_ci for (i = 0; i < SI_NUM_DESCS; ++i) 2872bf215546Sopenharmony_ci si_release_descriptors(&sctx->descriptors[i]); 2873bf215546Sopenharmony_ci 2874bf215546Sopenharmony_ci si_resource_reference(&sctx->vb_descriptors_buffer, NULL); 2875bf215546Sopenharmony_ci sctx->vb_descriptors_gpu_list = NULL; /* points into a mapped buffer */ 2876bf215546Sopenharmony_ci 2877bf215546Sopenharmony_ci si_release_bindless_descriptors(sctx); 2878bf215546Sopenharmony_ci} 2879bf215546Sopenharmony_ci 2880bf215546Sopenharmony_cibool si_gfx_resources_check_encrypted(struct si_context *sctx) 2881bf215546Sopenharmony_ci{ 2882bf215546Sopenharmony_ci bool use_encrypted_bo = false; 2883bf215546Sopenharmony_ci 2884bf215546Sopenharmony_ci for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS && !use_encrypted_bo; i++) { 2885bf215546Sopenharmony_ci struct si_shader_ctx_state *current_shader = &sctx->shaders[i]; 2886bf215546Sopenharmony_ci if (!current_shader->cso) 2887bf215546Sopenharmony_ci continue; 2888bf215546Sopenharmony_ci 2889bf215546Sopenharmony_ci use_encrypted_bo |= 2890bf215546Sopenharmony_ci si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[i]); 2891bf215546Sopenharmony_ci use_encrypted_bo |= 2892bf215546Sopenharmony_ci si_sampler_views_check_encrypted(sctx, &sctx->samplers[i], 2893bf215546Sopenharmony_ci current_shader->cso->info.base.textures_used[0]); 2894bf215546Sopenharmony_ci use_encrypted_bo |= si_image_views_check_encrypted(sctx, &sctx->images[i], 2895bf215546Sopenharmony_ci u_bit_consecutive(0, current_shader->cso->info.base.num_images)); 2896bf215546Sopenharmony_ci } 2897bf215546Sopenharmony_ci use_encrypted_bo |= si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings); 2898bf215546Sopenharmony_ci 2899bf215546Sopenharmony_ci struct si_state_blend *blend = sctx->queued.named.blend; 2900bf215546Sopenharmony_ci for (int i = 0; i < sctx->framebuffer.state.nr_cbufs && !use_encrypted_bo; i++) { 2901bf215546Sopenharmony_ci struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2902bf215546Sopenharmony_ci if (surf && surf->texture) { 2903bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)surf->texture; 2904bf215546Sopenharmony_ci if (!(tex->buffer.flags & RADEON_FLAG_ENCRYPTED)) 2905bf215546Sopenharmony_ci continue; 2906bf215546Sopenharmony_ci 2907bf215546Sopenharmony_ci /* Are we reading from this framebuffer */ 2908bf215546Sopenharmony_ci if (((blend->blend_enable_4bit >> (4 * i)) & 0xf) || 2909bf215546Sopenharmony_ci vi_dcc_enabled(tex, 0)) { 2910bf215546Sopenharmony_ci use_encrypted_bo = true; 2911bf215546Sopenharmony_ci } 2912bf215546Sopenharmony_ci } 2913bf215546Sopenharmony_ci } 2914bf215546Sopenharmony_ci 2915bf215546Sopenharmony_ci if (sctx->framebuffer.state.zsbuf) { 2916bf215546Sopenharmony_ci struct si_texture* zs = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture; 2917bf215546Sopenharmony_ci if (zs && 2918bf215546Sopenharmony_ci (zs->buffer.flags & RADEON_FLAG_ENCRYPTED)) { 2919bf215546Sopenharmony_ci /* TODO: This isn't needed if depth.func is PIPE_FUNC_NEVER or PIPE_FUNC_ALWAYS */ 2920bf215546Sopenharmony_ci use_encrypted_bo = true; 2921bf215546Sopenharmony_ci } 2922bf215546Sopenharmony_ci } 2923bf215546Sopenharmony_ci 2924bf215546Sopenharmony_ci#ifndef NDEBUG 2925bf215546Sopenharmony_ci if (use_encrypted_bo) { 2926bf215546Sopenharmony_ci /* Verify that color buffers are encrypted */ 2927bf215546Sopenharmony_ci for (int i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 2928bf215546Sopenharmony_ci struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2929bf215546Sopenharmony_ci if (!surf) 2930bf215546Sopenharmony_ci continue; 2931bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)surf->texture; 2932bf215546Sopenharmony_ci assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)); 2933bf215546Sopenharmony_ci } 2934bf215546Sopenharmony_ci /* Verify that depth/stencil buffer is encrypted */ 2935bf215546Sopenharmony_ci if (sctx->framebuffer.state.zsbuf) { 2936bf215546Sopenharmony_ci struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; 2937bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)surf->texture; 2938bf215546Sopenharmony_ci assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)); 2939bf215546Sopenharmony_ci } 2940bf215546Sopenharmony_ci } 2941bf215546Sopenharmony_ci#endif 2942bf215546Sopenharmony_ci 2943bf215546Sopenharmony_ci return use_encrypted_bo; 2944bf215546Sopenharmony_ci} 2945bf215546Sopenharmony_ci 2946bf215546Sopenharmony_civoid si_gfx_resources_add_all_to_bo_list(struct si_context *sctx) 2947bf215546Sopenharmony_ci{ 2948bf215546Sopenharmony_ci for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) { 2949bf215546Sopenharmony_ci si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]); 2950bf215546Sopenharmony_ci si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]); 2951bf215546Sopenharmony_ci si_image_views_begin_new_cs(sctx, &sctx->images[i]); 2952bf215546Sopenharmony_ci } 2953bf215546Sopenharmony_ci si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings); 2954bf215546Sopenharmony_ci si_vertex_buffers_begin_new_cs(sctx); 2955bf215546Sopenharmony_ci 2956bf215546Sopenharmony_ci if (sctx->bo_list_add_all_resident_resources) 2957bf215546Sopenharmony_ci si_resident_buffers_add_all_to_bo_list(sctx); 2958bf215546Sopenharmony_ci 2959bf215546Sopenharmony_ci assert(sctx->bo_list_add_all_gfx_resources); 2960bf215546Sopenharmony_ci sctx->bo_list_add_all_gfx_resources = false; 2961bf215546Sopenharmony_ci} 2962bf215546Sopenharmony_ci 2963bf215546Sopenharmony_cibool si_compute_resources_check_encrypted(struct si_context *sctx) 2964bf215546Sopenharmony_ci{ 2965bf215546Sopenharmony_ci unsigned sh = PIPE_SHADER_COMPUTE; 2966bf215546Sopenharmony_ci 2967bf215546Sopenharmony_ci struct si_shader_info* info = &sctx->cs_shader_state.program->sel.info; 2968bf215546Sopenharmony_ci 2969bf215546Sopenharmony_ci /* TODO: we should assert that either use_encrypted_bo is false, 2970bf215546Sopenharmony_ci * or all writable buffers are encrypted. 2971bf215546Sopenharmony_ci */ 2972bf215546Sopenharmony_ci return si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[sh]) || 2973bf215546Sopenharmony_ci si_sampler_views_check_encrypted(sctx, &sctx->samplers[sh], info->base.textures_used[0]) || 2974bf215546Sopenharmony_ci si_image_views_check_encrypted(sctx, &sctx->images[sh], u_bit_consecutive(0, info->base.num_images)) || 2975bf215546Sopenharmony_ci si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings); 2976bf215546Sopenharmony_ci} 2977bf215546Sopenharmony_ci 2978bf215546Sopenharmony_civoid si_compute_resources_add_all_to_bo_list(struct si_context *sctx) 2979bf215546Sopenharmony_ci{ 2980bf215546Sopenharmony_ci unsigned sh = PIPE_SHADER_COMPUTE; 2981bf215546Sopenharmony_ci 2982bf215546Sopenharmony_ci si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[sh]); 2983bf215546Sopenharmony_ci si_sampler_views_begin_new_cs(sctx, &sctx->samplers[sh]); 2984bf215546Sopenharmony_ci si_image_views_begin_new_cs(sctx, &sctx->images[sh]); 2985bf215546Sopenharmony_ci si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings); 2986bf215546Sopenharmony_ci 2987bf215546Sopenharmony_ci if (sctx->bo_list_add_all_resident_resources) 2988bf215546Sopenharmony_ci si_resident_buffers_add_all_to_bo_list(sctx); 2989bf215546Sopenharmony_ci 2990bf215546Sopenharmony_ci assert(sctx->bo_list_add_all_compute_resources); 2991bf215546Sopenharmony_ci sctx->bo_list_add_all_compute_resources = false; 2992bf215546Sopenharmony_ci} 2993bf215546Sopenharmony_ci 2994bf215546Sopenharmony_civoid si_add_all_descriptors_to_bo_list(struct si_context *sctx) 2995bf215546Sopenharmony_ci{ 2996bf215546Sopenharmony_ci for (unsigned i = 0; i < SI_NUM_DESCS; ++i) 2997bf215546Sopenharmony_ci si_add_descriptors_to_bo_list(sctx, &sctx->descriptors[i]); 2998bf215546Sopenharmony_ci si_add_descriptors_to_bo_list(sctx, &sctx->bindless_descriptors); 2999bf215546Sopenharmony_ci 3000bf215546Sopenharmony_ci sctx->bo_list_add_all_resident_resources = true; 3001bf215546Sopenharmony_ci sctx->bo_list_add_all_gfx_resources = true; 3002bf215546Sopenharmony_ci sctx->bo_list_add_all_compute_resources = true; 3003bf215546Sopenharmony_ci} 3004bf215546Sopenharmony_ci 3005bf215546Sopenharmony_civoid si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx, uint64_t new_active_mask) 3006bf215546Sopenharmony_ci{ 3007bf215546Sopenharmony_ci struct si_descriptors *desc = &sctx->descriptors[desc_idx]; 3008bf215546Sopenharmony_ci 3009bf215546Sopenharmony_ci /* Ignore no-op updates and updates that disable all slots. */ 3010bf215546Sopenharmony_ci if (!new_active_mask || 3011bf215546Sopenharmony_ci new_active_mask == u_bit_consecutive64(desc->first_active_slot, desc->num_active_slots)) 3012bf215546Sopenharmony_ci return; 3013bf215546Sopenharmony_ci 3014bf215546Sopenharmony_ci int first, count; 3015bf215546Sopenharmony_ci u_bit_scan_consecutive_range64(&new_active_mask, &first, &count); 3016bf215546Sopenharmony_ci assert(new_active_mask == 0); 3017bf215546Sopenharmony_ci 3018bf215546Sopenharmony_ci /* Upload/dump descriptors if slots are being enabled. */ 3019bf215546Sopenharmony_ci if (first < desc->first_active_slot || 3020bf215546Sopenharmony_ci first + count > desc->first_active_slot + desc->num_active_slots) 3021bf215546Sopenharmony_ci sctx->descriptors_dirty |= 1u << desc_idx; 3022bf215546Sopenharmony_ci 3023bf215546Sopenharmony_ci desc->first_active_slot = first; 3024bf215546Sopenharmony_ci desc->num_active_slots = count; 3025bf215546Sopenharmony_ci} 3026bf215546Sopenharmony_ci 3027bf215546Sopenharmony_civoid si_set_active_descriptors_for_shader(struct si_context *sctx, struct si_shader_selector *sel) 3028bf215546Sopenharmony_ci{ 3029bf215546Sopenharmony_ci if (!sel) 3030bf215546Sopenharmony_ci return; 3031bf215546Sopenharmony_ci 3032bf215546Sopenharmony_ci si_set_active_descriptors(sctx, sel->const_and_shader_buf_descriptors_index, 3033bf215546Sopenharmony_ci sel->active_const_and_shader_buffers); 3034bf215546Sopenharmony_ci si_set_active_descriptors(sctx, sel->sampler_and_images_descriptors_index, 3035bf215546Sopenharmony_ci sel->active_samplers_and_images); 3036bf215546Sopenharmony_ci} 3037