1/* 2 * Copyright © 2021 Google 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#define AC_SURFACE_INCLUDE_NIR 25#include "ac_surface.h" 26 27#include "radv_meta.h" 28#include "radv_private.h" 29 30static nir_shader * 31build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf) 32{ 33 enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF; 34 const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT); 35 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_retile_compute"); 36 37 b.shader->info.workgroup_size[0] = 8; 38 b.shader->info.workgroup_size[1] = 8; 39 40 nir_ssa_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); 41 nir_ssa_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1); 42 nir_ssa_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2); 43 44 nir_ssa_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8); 45 nir_ssa_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1); 46 nir_ssa_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2); 47 nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in"); 48 input_dcc->data.descriptor_set = 0; 49 input_dcc->data.binding = 0; 50 nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out"); 51 output_dcc->data.descriptor_set = 0; 52 output_dcc->data.binding = 1; 53 54 nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa; 55 nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa; 56 57 nir_ssa_def *coord = get_global_ids(&b, 2); 58 nir_ssa_def *zero = nir_imm_int(&b, 0); 59 coord = nir_imul( 60 &b, coord, 61 nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height)); 62 63 nir_ssa_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe, 64 &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, 65 src_dcc_height, zero, nir_channel(&b, coord, 0), 66 nir_channel(&b, coord, 1), zero, zero, zero); 67 nir_ssa_def *dst = ac_nir_dcc_addr_from_coord( 68 &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, 69 dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), 70 zero, zero, zero); 71 72 nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, 73 nir_vec4(&b, src, src, src, src), 74 nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), 75 .image_dim = dim); 76 77 nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), 78 nir_ssa_undef(&b, 1, 32), dcc_val, nir_imm_int(&b, 0), .image_dim = dim); 79 80 return b.shader; 81} 82 83void 84radv_device_finish_meta_dcc_retile_state(struct radv_device *device) 85{ 86 struct radv_meta_state *state = &device->meta_state; 87 88 for (unsigned i = 0; i < ARRAY_SIZE(state->dcc_retile.pipeline); i++) { 89 radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline[i], 90 &state->alloc); 91 } 92 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout, 93 &state->alloc); 94 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), 95 state->dcc_retile.ds_layout, &state->alloc); 96 97 /* Reset for next finish. */ 98 memset(&state->dcc_retile, 0, sizeof(state->dcc_retile)); 99} 100 101/* 102 * This take a surface, but the only things used are: 103 * - BPE 104 * - DCC equations 105 * - DCC block size 106 * 107 * BPE is always 4 at the moment and the rest is derived from the tilemode. 108 */ 109static VkResult 110radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon_surf *surf) 111{ 112 VkResult result = VK_SUCCESS; 113 nir_shader *cs = build_dcc_retile_compute_shader(device, surf); 114 115 VkDescriptorSetLayoutCreateInfo ds_create_info = { 116 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 117 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 118 .bindingCount = 2, 119 .pBindings = (VkDescriptorSetLayoutBinding[]){ 120 {.binding = 0, 121 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 122 .descriptorCount = 1, 123 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 124 .pImmutableSamplers = NULL}, 125 {.binding = 1, 126 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 127 .descriptorCount = 1, 128 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 129 .pImmutableSamplers = NULL}, 130 }}; 131 132 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 133 &device->meta_state.alloc, 134 &device->meta_state.dcc_retile.ds_layout); 135 if (result != VK_SUCCESS) 136 goto cleanup; 137 138 VkPipelineLayoutCreateInfo pl_create_info = { 139 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 140 .setLayoutCount = 1, 141 .pSetLayouts = &device->meta_state.dcc_retile.ds_layout, 142 .pushConstantRangeCount = 1, 143 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 144 }; 145 146 result = 147 radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 148 &device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout); 149 if (result != VK_SUCCESS) 150 goto cleanup; 151 152 /* compute shader */ 153 154 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 155 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 156 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 157 .module = vk_shader_module_handle_from_nir(cs), 158 .pName = "main", 159 .pSpecializationInfo = NULL, 160 }; 161 162 VkComputePipelineCreateInfo vk_pipeline_info = { 163 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 164 .stage = pipeline_shader_stage, 165 .flags = 0, 166 .layout = device->meta_state.dcc_retile.p_layout, 167 }; 168 169 result = radv_CreateComputePipelines( 170 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 171 &vk_pipeline_info, NULL, &device->meta_state.dcc_retile.pipeline[surf->u.gfx9.swizzle_mode]); 172 if (result != VK_SUCCESS) 173 goto cleanup; 174 175cleanup: 176 ralloc_free(cs); 177 return result; 178} 179 180void 181radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) 182{ 183 struct radv_meta_saved_state saved_state; 184 struct radv_device *device = cmd_buffer->device; 185 struct radv_buffer buffer; 186 187 assert(image->vk.image_type == VK_IMAGE_TYPE_2D); 188 assert(image->info.array_size == 1 && image->info.levels == 1); 189 190 struct radv_cmd_state *state = &cmd_buffer->state; 191 192 state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, image) | 193 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); 194 195 unsigned swizzle_mode = image->planes[0].surface.u.gfx9.swizzle_mode; 196 197 /* Compile pipelines if not already done so. */ 198 if (!cmd_buffer->device->meta_state.dcc_retile.pipeline[swizzle_mode]) { 199 VkResult ret = 200 radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface); 201 if (ret != VK_SUCCESS) { 202 cmd_buffer->record_result = ret; 203 return; 204 } 205 } 206 207 radv_meta_save( 208 &saved_state, cmd_buffer, 209 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS); 210 211 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 212 device->meta_state.dcc_retile.pipeline[swizzle_mode]); 213 214 radv_buffer_init(&buffer, device, image->bindings[0].bo, image->size, image->bindings[0].offset); 215 216 struct radv_buffer_view views[2]; 217 VkBufferView view_handles[2]; 218 radv_buffer_view_init(views, cmd_buffer->device, 219 &(VkBufferViewCreateInfo){ 220 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 221 .buffer = radv_buffer_to_handle(&buffer), 222 .offset = image->planes[0].surface.meta_offset, 223 .range = image->planes[0].surface.meta_size, 224 .format = VK_FORMAT_R8_UINT, 225 }); 226 radv_buffer_view_init(views + 1, cmd_buffer->device, 227 &(VkBufferViewCreateInfo){ 228 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, 229 .buffer = radv_buffer_to_handle(&buffer), 230 .offset = image->planes[0].surface.display_dcc_offset, 231 .range = image->planes[0].surface.u.gfx9.color.display_dcc_size, 232 .format = VK_FORMAT_R8_UINT, 233 }); 234 for (unsigned i = 0; i < 2; ++i) 235 view_handles[i] = radv_buffer_view_to_handle(&views[i]); 236 237 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, 238 device->meta_state.dcc_retile.p_layout, 0, /* set */ 239 2, /* descriptorWriteCount */ 240 (VkWriteDescriptorSet[]){ 241 { 242 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 243 .dstBinding = 0, 244 .dstArrayElement = 0, 245 .descriptorCount = 1, 246 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 247 .pTexelBufferView = &view_handles[0], 248 }, 249 { 250 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 251 .dstBinding = 1, 252 .dstArrayElement = 0, 253 .descriptorCount = 1, 254 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 255 .pTexelBufferView = &view_handles[1], 256 }, 257 }); 258 259 unsigned width = DIV_ROUND_UP(image->info.width, vk_format_get_blockwidth(image->vk.format)); 260 unsigned height = DIV_ROUND_UP(image->info.height, vk_format_get_blockheight(image->vk.format)); 261 262 unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width); 263 unsigned dcc_height = 264 DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height); 265 266 uint32_t constants[] = { 267 image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1, 268 image->planes[0].surface.u.gfx9.color.dcc_height, 269 image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1, 270 image->planes[0].surface.u.gfx9.color.display_dcc_height, 271 }; 272 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 273 device->meta_state.dcc_retile.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, 274 constants); 275 276 radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1); 277 278 radv_buffer_view_finish(views); 279 radv_buffer_view_finish(views + 1); 280 radv_buffer_finish(&buffer); 281 282 radv_meta_restore(&saved_state, cmd_buffer); 283 284 state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | 285 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image); 286} 287