1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Google 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#define AC_SURFACE_INCLUDE_NIR 25bf215546Sopenharmony_ci#include "ac_surface.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "radv_meta.h" 28bf215546Sopenharmony_ci#include "radv_private.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_cistatic nir_shader * 31bf215546Sopenharmony_cibuild_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf) 32bf215546Sopenharmony_ci{ 33bf215546Sopenharmony_ci enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF; 34bf215546Sopenharmony_ci const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT); 35bf215546Sopenharmony_ci nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_retile_compute"); 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci b.shader->info.workgroup_size[0] = 8; 38bf215546Sopenharmony_ci b.shader->info.workgroup_size[1] = 8; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci nir_ssa_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); 41bf215546Sopenharmony_ci nir_ssa_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1); 42bf215546Sopenharmony_ci nir_ssa_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2); 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci nir_ssa_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8); 45bf215546Sopenharmony_ci nir_ssa_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1); 46bf215546Sopenharmony_ci nir_ssa_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2); 47bf215546Sopenharmony_ci nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in"); 48bf215546Sopenharmony_ci input_dcc->data.descriptor_set = 0; 49bf215546Sopenharmony_ci input_dcc->data.binding = 0; 50bf215546Sopenharmony_ci nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out"); 51bf215546Sopenharmony_ci output_dcc->data.descriptor_set = 0; 52bf215546Sopenharmony_ci output_dcc->data.binding = 1; 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa; 55bf215546Sopenharmony_ci nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci nir_ssa_def *coord = get_global_ids(&b, 2); 58bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_int(&b, 0); 59bf215546Sopenharmony_ci coord = nir_imul( 60bf215546Sopenharmony_ci &b, coord, 61bf215546Sopenharmony_ci nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height)); 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci nir_ssa_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe, 64bf215546Sopenharmony_ci &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, 65bf215546Sopenharmony_ci src_dcc_height, zero, nir_channel(&b, coord, 0), 66bf215546Sopenharmony_ci nir_channel(&b, coord, 1), zero, zero, zero); 67bf215546Sopenharmony_ci nir_ssa_def *dst = ac_nir_dcc_addr_from_coord( 68bf215546Sopenharmony_ci &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, 69bf215546Sopenharmony_ci dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), 70bf215546Sopenharmony_ci zero, zero, zero); 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, 73bf215546Sopenharmony_ci nir_vec4(&b, src, src, src, src), 74bf215546Sopenharmony_ci nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), 75bf215546Sopenharmony_ci .image_dim = dim); 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), 78bf215546Sopenharmony_ci nir_ssa_undef(&b, 1, 32), dcc_val, nir_imm_int(&b, 0), .image_dim = dim); 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci return b.shader; 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_civoid 84bf215546Sopenharmony_ciradv_device_finish_meta_dcc_retile_state(struct radv_device *device) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci struct radv_meta_state *state = &device->meta_state; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(state->dcc_retile.pipeline); i++) { 89bf215546Sopenharmony_ci radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline[i], 90bf215546Sopenharmony_ci &state->alloc); 91bf215546Sopenharmony_ci } 92bf215546Sopenharmony_ci radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout, 93bf215546Sopenharmony_ci &state->alloc); 94bf215546Sopenharmony_ci device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), 95bf215546Sopenharmony_ci state->dcc_retile.ds_layout, &state->alloc); 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci /* Reset for next finish. */ 98bf215546Sopenharmony_ci memset(&state->dcc_retile, 0, sizeof(state->dcc_retile)); 99bf215546Sopenharmony_ci} 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci/* 102bf215546Sopenharmony_ci * This take a surface, but the only things used are: 103bf215546Sopenharmony_ci * - BPE 104bf215546Sopenharmony_ci * - DCC equations 105bf215546Sopenharmony_ci * - DCC block size 106bf215546Sopenharmony_ci * 107bf215546Sopenharmony_ci * BPE is always 4 at the moment and the rest is derived from the tilemode. 108bf215546Sopenharmony_ci */ 109bf215546Sopenharmony_cistatic VkResult 110bf215546Sopenharmony_ciradv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon_surf *surf) 111bf215546Sopenharmony_ci{ 112bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 113bf215546Sopenharmony_ci nir_shader *cs = build_dcc_retile_compute_shader(device, surf); 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci VkDescriptorSetLayoutCreateInfo ds_create_info = { 116bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 117bf215546Sopenharmony_ci .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 118bf215546Sopenharmony_ci .bindingCount = 2, 119bf215546Sopenharmony_ci .pBindings = (VkDescriptorSetLayoutBinding[]){ 120bf215546Sopenharmony_ci {.binding = 0, 121bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 122bf215546Sopenharmony_ci .descriptorCount = 1, 123bf215546Sopenharmony_ci .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 124bf215546Sopenharmony_ci .pImmutableSamplers = NULL}, 125bf215546Sopenharmony_ci {.binding = 1, 126bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 127bf215546Sopenharmony_ci .descriptorCount = 1, 128bf215546Sopenharmony_ci .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 129bf215546Sopenharmony_ci .pImmutableSamplers = NULL}, 130bf215546Sopenharmony_ci }}; 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 133bf215546Sopenharmony_ci &device->meta_state.alloc, 134bf215546Sopenharmony_ci &device->meta_state.dcc_retile.ds_layout); 135bf215546Sopenharmony_ci if (result != VK_SUCCESS) 136bf215546Sopenharmony_ci goto cleanup; 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci VkPipelineLayoutCreateInfo pl_create_info = { 139bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 140bf215546Sopenharmony_ci .setLayoutCount = 1, 141bf215546Sopenharmony_ci .pSetLayouts = &device->meta_state.dcc_retile.ds_layout, 142bf215546Sopenharmony_ci .pushConstantRangeCount = 1, 143bf215546Sopenharmony_ci .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 144bf215546Sopenharmony_ci }; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci result = 147bf215546Sopenharmony_ci radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 148bf215546Sopenharmony_ci &device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout); 149bf215546Sopenharmony_ci if (result != VK_SUCCESS) 150bf215546Sopenharmony_ci goto cleanup; 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci /* compute shader */ 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 155bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 156bf215546Sopenharmony_ci .stage = VK_SHADER_STAGE_COMPUTE_BIT, 157bf215546Sopenharmony_ci .module = vk_shader_module_handle_from_nir(cs), 158bf215546Sopenharmony_ci .pName = "main", 159bf215546Sopenharmony_ci .pSpecializationInfo = NULL, 160bf215546Sopenharmony_ci }; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci VkComputePipelineCreateInfo vk_pipeline_info = { 163bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 164bf215546Sopenharmony_ci .stage = pipeline_shader_stage, 165bf215546Sopenharmony_ci .flags = 0, 166bf215546Sopenharmony_ci .layout = device->meta_state.dcc_retile.p_layout, 167bf215546Sopenharmony_ci }; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci result = radv_CreateComputePipelines( 170bf215546Sopenharmony_ci radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 171bf215546Sopenharmony_ci &vk_pipeline_info, NULL, &device->meta_state.dcc_retile.pipeline[surf->u.gfx9.swizzle_mode]); 172bf215546Sopenharmony_ci if (result != VK_SUCCESS) 173bf215546Sopenharmony_ci goto cleanup; 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_cicleanup: 176bf215546Sopenharmony_ci ralloc_free(cs); 177bf215546Sopenharmony_ci return result; 178bf215546Sopenharmony_ci} 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_civoid 181bf215546Sopenharmony_ciradv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) 182bf215546Sopenharmony_ci{ 183bf215546Sopenharmony_ci struct radv_meta_saved_state saved_state; 184bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 185bf215546Sopenharmony_ci struct radv_buffer buffer; 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci assert(image->vk.image_type == VK_IMAGE_TYPE_2D); 188bf215546Sopenharmony_ci assert(image->info.array_size == 1 && image->info.levels == 1); 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, image) | 193bf215546Sopenharmony_ci radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci unsigned swizzle_mode = image->planes[0].surface.u.gfx9.swizzle_mode; 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci /* Compile pipelines if not already done so. */ 198bf215546Sopenharmony_ci if (!cmd_buffer->device->meta_state.dcc_retile.pipeline[swizzle_mode]) { 199bf215546Sopenharmony_ci VkResult ret = 200bf215546Sopenharmony_ci radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface); 201bf215546Sopenharmony_ci if (ret != VK_SUCCESS) { 202bf215546Sopenharmony_ci cmd_buffer->record_result = ret; 203bf215546Sopenharmony_ci return; 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci radv_meta_save( 208bf215546Sopenharmony_ci &saved_state, cmd_buffer, 209bf215546Sopenharmony_ci RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS); 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 212bf215546Sopenharmony_ci device->meta_state.dcc_retile.pipeline[swizzle_mode]); 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci radv_buffer_init(&buffer, device, image->bindings[0].bo, image->size, image->bindings[0].offset); 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci struct radv_buffer_view views[2]; 217bf215546Sopenharmony_ci VkBufferView view_handles[2]; 218bf215546Sopenharmony_ci radv_buffer_view_init(views, cmd_buffer->device, 219bf215546Sopenharmony_ci &(VkBufferViewCreateInfo){ 220bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 221bf215546Sopenharmony_ci .buffer = radv_buffer_to_handle(&buffer), 222bf215546Sopenharmony_ci .offset = image->planes[0].surface.meta_offset, 223bf215546Sopenharmony_ci .range = image->planes[0].surface.meta_size, 224bf215546Sopenharmony_ci .format = VK_FORMAT_R8_UINT, 225bf215546Sopenharmony_ci }); 226bf215546Sopenharmony_ci radv_buffer_view_init(views + 1, cmd_buffer->device, 227bf215546Sopenharmony_ci &(VkBufferViewCreateInfo){ 228bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 229bf215546Sopenharmony_ci .buffer = radv_buffer_to_handle(&buffer), 230bf215546Sopenharmony_ci .offset = image->planes[0].surface.display_dcc_offset, 231bf215546Sopenharmony_ci .range = image->planes[0].surface.u.gfx9.color.display_dcc_size, 232bf215546Sopenharmony_ci .format = VK_FORMAT_R8_UINT, 233bf215546Sopenharmony_ci }); 234bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; ++i) 235bf215546Sopenharmony_ci view_handles[i] = radv_buffer_view_to_handle(&views[i]); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 238bf215546Sopenharmony_ci device->meta_state.dcc_retile.p_layout, 0, /* set */ 239bf215546Sopenharmony_ci 2, /* descriptorWriteCount */ 240bf215546Sopenharmony_ci (VkWriteDescriptorSet[]){ 241bf215546Sopenharmony_ci { 242bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 243bf215546Sopenharmony_ci .dstBinding = 0, 244bf215546Sopenharmony_ci .dstArrayElement = 0, 245bf215546Sopenharmony_ci .descriptorCount = 1, 246bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 247bf215546Sopenharmony_ci .pTexelBufferView = &view_handles[0], 248bf215546Sopenharmony_ci }, 249bf215546Sopenharmony_ci { 250bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 251bf215546Sopenharmony_ci .dstBinding = 1, 252bf215546Sopenharmony_ci .dstArrayElement = 0, 253bf215546Sopenharmony_ci .descriptorCount = 1, 254bf215546Sopenharmony_ci .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 255bf215546Sopenharmony_ci .pTexelBufferView = &view_handles[1], 256bf215546Sopenharmony_ci }, 257bf215546Sopenharmony_ci }); 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci unsigned width = DIV_ROUND_UP(image->info.width, vk_format_get_blockwidth(image->vk.format)); 260bf215546Sopenharmony_ci unsigned height = DIV_ROUND_UP(image->info.height, vk_format_get_blockheight(image->vk.format)); 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width); 263bf215546Sopenharmony_ci unsigned dcc_height = 264bf215546Sopenharmony_ci DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height); 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci uint32_t constants[] = { 267bf215546Sopenharmony_ci image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1, 268bf215546Sopenharmony_ci image->planes[0].surface.u.gfx9.color.dcc_height, 269bf215546Sopenharmony_ci image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1, 270bf215546Sopenharmony_ci image->planes[0].surface.u.gfx9.color.display_dcc_height, 271bf215546Sopenharmony_ci }; 272bf215546Sopenharmony_ci radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 273bf215546Sopenharmony_ci device->meta_state.dcc_retile.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, 274bf215546Sopenharmony_ci constants); 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1); 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci radv_buffer_view_finish(views); 279bf215546Sopenharmony_ci radv_buffer_view_finish(views + 1); 280bf215546Sopenharmony_ci radv_buffer_finish(&buffer); 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci radv_meta_restore(&saved_state, cmd_buffer); 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | 285bf215546Sopenharmony_ci radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); 286bf215546Sopenharmony_ci} 287