1/* 2 * Copyright © 2016 Dave Airlie 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26 27#include "nir/nir_builder.h" 28#include "radv_meta.h" 29#include "radv_private.h" 30#include "sid.h" 31#include "vk_format.h" 32 33static nir_ssa_def * 34radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input) 35{ 36 unsigned i; 37 38 nir_ssa_def *cmp[3]; 39 for (i = 0; i < 3; i++) 40 cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c)); 41 42 nir_ssa_def *ltvals[3]; 43 for (i = 0; i < 3; i++) 44 ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92)); 45 46 nir_ssa_def *gtvals[3]; 47 48 for (i = 0; i < 3; i++) { 49 gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4)); 50 gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055)); 51 gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055)); 52 } 53 54 nir_ssa_def *comp[4]; 55 for (i = 0; i < 3; i++) 56 comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]); 57 comp[3] = nir_channels(b, input, 1 << 3); 58 return nir_vec(b, comp, 4); 59} 60 61static nir_shader * 62build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples) 63{ 64 enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT; 65 const struct glsl_type *sampler_type = 66 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type); 67 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type); 68 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples, 69 is_integer ? "int" : (is_srgb ? "srgb" : "float")); 70 b.shader->info.workgroup_size[0] = 8; 71 b.shader->info.workgroup_size[1] = 8; 72 73 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); 74 input_img->data.descriptor_set = 0; 75 input_img->data.binding = 0; 76 77 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img"); 78 output_img->data.descriptor_set = 0; 79 output_img->data.binding = 1; 80 81 nir_ssa_def *global_id = get_global_ids(&b, 2); 82 83 nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8); 84 nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16); 85 86 nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset); 87 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset); 88 89 nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color"); 90 91 radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, src_coord); 92 93 nir_ssa_def *outval = nir_load_var(&b, color); 94 if (is_srgb) 95 outval = radv_meta_build_resolve_srgb_conversion(&b, outval); 96 97 nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), 98 nir_channel(&b, dst_coord, 1), 99 nir_ssa_undef(&b, 1, 32), 100 nir_ssa_undef(&b, 1, 32)); 101 102 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, 103 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), 104 .image_dim = GLSL_SAMPLER_DIM_2D); 105 return b.shader; 106} 107 108enum { 109 DEPTH_RESOLVE, 110 STENCIL_RESOLVE, 111}; 112 113static const char * 114get_resolve_mode_str(VkResolveModeFlagBits resolve_mode) 115{ 116 switch (resolve_mode) { 117 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: 118 return "zero"; 119 case VK_RESOLVE_MODE_AVERAGE_BIT: 120 return "average"; 121 case VK_RESOLVE_MODE_MIN_BIT: 122 return "min"; 123 case VK_RESOLVE_MODE_MAX_BIT: 124 return "max"; 125 default: 126 unreachable("invalid resolve mode"); 127 } 128} 129 130static nir_shader * 131build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index, 132 VkResolveModeFlagBits resolve_mode) 133{ 134 enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT; 135 const struct glsl_type *sampler_type = 136 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type); 137 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type); 138 139 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d", 140 index == DEPTH_RESOLVE ? "depth" : "stencil", 141 get_resolve_mode_str(resolve_mode), samples); 142 b.shader->info.workgroup_size[0] = 8; 143 b.shader->info.workgroup_size[1] = 8; 144 145 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex"); 146 input_img->data.descriptor_set = 0; 147 input_img->data.binding = 0; 148 149 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img"); 150 output_img->data.descriptor_set = 0; 151 output_img->data.binding = 1; 152 153 nir_ssa_def *img_coord = get_global_ids(&b, 3); 154 155 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa; 156 157 nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32; 158 159 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 160 tex->sampler_dim = GLSL_SAMPLER_DIM_MS; 161 tex->op = nir_texop_txf_ms; 162 tex->src[0].src_type = nir_tex_src_coord; 163 tex->src[0].src = nir_src_for_ssa(img_coord); 164 tex->src[1].src_type = nir_tex_src_ms_index; 165 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 166 tex->src[2].src_type = nir_tex_src_texture_deref; 167 tex->src[2].src = nir_src_for_ssa(input_img_deref); 168 tex->dest_type = type; 169 tex->is_array = true; 170 tex->coord_components = 3; 171 172 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 173 nir_builder_instr_insert(&b, &tex->instr); 174 175 nir_ssa_def *outval = &tex->dest.ssa; 176 177 if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) { 178 for (int i = 1; i < samples; i++) { 179 nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3); 180 tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS; 181 tex_add->op = nir_texop_txf_ms; 182 tex_add->src[0].src_type = nir_tex_src_coord; 183 tex_add->src[0].src = nir_src_for_ssa(img_coord); 184 tex_add->src[1].src_type = nir_tex_src_ms_index; 185 tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i)); 186 tex_add->src[2].src_type = nir_tex_src_texture_deref; 187 tex_add->src[2].src = nir_src_for_ssa(input_img_deref); 188 tex_add->dest_type = type; 189 tex_add->is_array = true; 190 tex_add->coord_components = 3; 191 192 nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex"); 193 nir_builder_instr_insert(&b, &tex_add->instr); 194 195 switch (resolve_mode) { 196 case VK_RESOLVE_MODE_AVERAGE_BIT: 197 assert(index == DEPTH_RESOLVE); 198 outval = nir_fadd(&b, outval, &tex_add->dest.ssa); 199 break; 200 case VK_RESOLVE_MODE_MIN_BIT: 201 if (index == DEPTH_RESOLVE) 202 outval = nir_fmin(&b, outval, &tex_add->dest.ssa); 203 else 204 outval = nir_umin(&b, outval, &tex_add->dest.ssa); 205 break; 206 case VK_RESOLVE_MODE_MAX_BIT: 207 if (index == DEPTH_RESOLVE) 208 outval = nir_fmax(&b, outval, &tex_add->dest.ssa); 209 else 210 outval = nir_umax(&b, outval, &tex_add->dest.ssa); 211 break; 212 default: 213 unreachable("invalid resolve mode"); 214 } 215 } 216 217 if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT) 218 outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples)); 219 } 220 221 nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1), 222 nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32)); 223 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, 224 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), 225 .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true); 226 return b.shader; 227} 228 229static VkResult 230create_layout(struct radv_device *device) 231{ 232 VkResult result; 233 /* 234 * two descriptors one for the image being sampled 235 * one for the buffer being written. 236 */ 237 VkDescriptorSetLayoutCreateInfo ds_create_info = { 238 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 239 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 240 .bindingCount = 2, 241 .pBindings = (VkDescriptorSetLayoutBinding[]){ 242 {.binding = 0, 243 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 244 .descriptorCount = 1, 245 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 246 .pImmutableSamplers = NULL}, 247 {.binding = 1, 248 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 249 .descriptorCount = 1, 250 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 251 .pImmutableSamplers = NULL}, 252 }}; 253 254 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 255 &device->meta_state.alloc, 256 &device->meta_state.resolve_compute.ds_layout); 257 if (result != VK_SUCCESS) 258 goto fail; 259 260 VkPipelineLayoutCreateInfo pl_create_info = { 261 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 262 .setLayoutCount = 1, 263 .pSetLayouts = &device->meta_state.resolve_compute.ds_layout, 264 .pushConstantRangeCount = 1, 265 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16}, 266 }; 267 268 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 269 &device->meta_state.alloc, 270 &device->meta_state.resolve_compute.p_layout); 271 if (result != VK_SUCCESS) 272 goto fail; 273 return VK_SUCCESS; 274fail: 275 return result; 276} 277 278static VkResult 279create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb, 280 VkPipeline *pipeline) 281{ 282 VkResult result; 283 284 mtx_lock(&device->meta_state.mtx); 285 if (*pipeline) { 286 mtx_unlock(&device->meta_state.mtx); 287 return VK_SUCCESS; 288 } 289 290 nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples); 291 292 /* compute shader */ 293 294 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 295 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 296 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 297 .module = vk_shader_module_handle_from_nir(cs), 298 .pName = "main", 299 .pSpecializationInfo = NULL, 300 }; 301 302 VkComputePipelineCreateInfo vk_pipeline_info = { 303 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 304 .stage = pipeline_shader_stage, 305 .flags = 0, 306 .layout = device->meta_state.resolve_compute.p_layout, 307 }; 308 309 result = radv_CreateComputePipelines(radv_device_to_handle(device), 310 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 311 &vk_pipeline_info, NULL, pipeline); 312 if (result != VK_SUCCESS) 313 goto fail; 314 315 ralloc_free(cs); 316 mtx_unlock(&device->meta_state.mtx); 317 return VK_SUCCESS; 318fail: 319 ralloc_free(cs); 320 mtx_unlock(&device->meta_state.mtx); 321 return result; 322} 323 324static VkResult 325create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index, 326 VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline) 327{ 328 VkResult result; 329 330 mtx_lock(&device->meta_state.mtx); 331 if (*pipeline) { 332 mtx_unlock(&device->meta_state.mtx); 333 return VK_SUCCESS; 334 } 335 336 nir_shader *cs = 337 build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode); 338 339 /* compute shader */ 340 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 341 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 342 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 343 .module = vk_shader_module_handle_from_nir(cs), 344 .pName = "main", 345 .pSpecializationInfo = NULL, 346 }; 347 348 VkComputePipelineCreateInfo vk_pipeline_info = { 349 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 350 .stage = pipeline_shader_stage, 351 .flags = 0, 352 .layout = device->meta_state.resolve_compute.p_layout, 353 }; 354 355 result = radv_CreateComputePipelines(radv_device_to_handle(device), 356 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 357 &vk_pipeline_info, NULL, pipeline); 358 if (result != VK_SUCCESS) 359 goto fail; 360 361 ralloc_free(cs); 362 mtx_unlock(&device->meta_state.mtx); 363 return VK_SUCCESS; 364fail: 365 ralloc_free(cs); 366 mtx_unlock(&device->meta_state.mtx); 367 return result; 368} 369 370VkResult 371radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand) 372{ 373 struct radv_meta_state *state = &device->meta_state; 374 VkResult res; 375 376 res = create_layout(device); 377 if (res != VK_SUCCESS) 378 return res; 379 380 if (on_demand) 381 return VK_SUCCESS; 382 383 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { 384 uint32_t samples = 1 << i; 385 386 res = create_resolve_pipeline(device, samples, false, false, 387 &state->resolve_compute.rc[i].pipeline); 388 if (res != VK_SUCCESS) 389 return res; 390 391 res = create_resolve_pipeline(device, samples, true, false, 392 &state->resolve_compute.rc[i].i_pipeline); 393 if (res != VK_SUCCESS) 394 return res; 395 396 res = create_resolve_pipeline(device, samples, false, true, 397 &state->resolve_compute.rc[i].srgb_pipeline); 398 if (res != VK_SUCCESS) 399 return res; 400 401 res = create_depth_stencil_resolve_pipeline( 402 device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT, 403 &state->resolve_compute.depth[i].average_pipeline); 404 if (res != VK_SUCCESS) 405 return res; 406 407 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, 408 VK_RESOLVE_MODE_MAX_BIT, 409 &state->resolve_compute.depth[i].max_pipeline); 410 if (res != VK_SUCCESS) 411 return res; 412 413 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, 414 VK_RESOLVE_MODE_MIN_BIT, 415 &state->resolve_compute.depth[i].min_pipeline); 416 if (res != VK_SUCCESS) 417 return res; 418 419 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, 420 VK_RESOLVE_MODE_MAX_BIT, 421 &state->resolve_compute.stencil[i].max_pipeline); 422 if (res != VK_SUCCESS) 423 return res; 424 425 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, 426 VK_RESOLVE_MODE_MIN_BIT, 427 &state->resolve_compute.stencil[i].min_pipeline); 428 if (res != VK_SUCCESS) 429 return res; 430 } 431 432 res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, 433 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, 434 &state->resolve_compute.depth_zero_pipeline); 435 if (res != VK_SUCCESS) 436 return res; 437 438 return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, 439 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, 440 &state->resolve_compute.stencil_zero_pipeline); 441} 442 443void 444radv_device_finish_meta_resolve_compute_state(struct radv_device *device) 445{ 446 struct radv_meta_state *state = &device->meta_state; 447 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) { 448 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline, 449 &state->alloc); 450 451 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline, 452 &state->alloc); 453 454 radv_DestroyPipeline(radv_device_to_handle(device), 455 state->resolve_compute.rc[i].srgb_pipeline, &state->alloc); 456 457 radv_DestroyPipeline(radv_device_to_handle(device), 458 state->resolve_compute.depth[i].average_pipeline, &state->alloc); 459 460 radv_DestroyPipeline(radv_device_to_handle(device), 461 state->resolve_compute.depth[i].max_pipeline, &state->alloc); 462 463 radv_DestroyPipeline(radv_device_to_handle(device), 464 state->resolve_compute.depth[i].min_pipeline, &state->alloc); 465 466 radv_DestroyPipeline(radv_device_to_handle(device), 467 state->resolve_compute.stencil[i].max_pipeline, &state->alloc); 468 469 radv_DestroyPipeline(radv_device_to_handle(device), 470 state->resolve_compute.stencil[i].min_pipeline, &state->alloc); 471 } 472 473 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline, 474 &state->alloc); 475 476 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline, 477 &state->alloc); 478 479 device->vk.dispatch_table.DestroyDescriptorSetLayout( 480 radv_device_to_handle(device), state->resolve_compute.ds_layout, &state->alloc); 481 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout, 482 &state->alloc); 483} 484 485static VkPipeline * 486radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview) 487{ 488 struct radv_device *device = cmd_buffer->device; 489 struct radv_meta_state *state = &device->meta_state; 490 uint32_t samples = src_iview->image->info.samples; 491 uint32_t samples_log2 = ffs(samples) - 1; 492 VkPipeline *pipeline; 493 494 if (vk_format_is_int(src_iview->vk.format)) 495 pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline; 496 else if (vk_format_is_srgb(src_iview->vk.format)) 497 pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline; 498 else 499 pipeline = &state->resolve_compute.rc[samples_log2].pipeline; 500 501 if (!*pipeline) { 502 VkResult ret; 503 504 ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk.format), 505 vk_format_is_srgb(src_iview->vk.format), pipeline); 506 if (ret != VK_SUCCESS) { 507 cmd_buffer->record_result = ret; 508 return NULL; 509 } 510 } 511 512 return pipeline; 513} 514 515static void 516emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, 517 struct radv_image_view *dest_iview, const VkOffset2D *src_offset, 518 const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent) 519{ 520 struct radv_device *device = cmd_buffer->device; 521 VkPipeline *pipeline; 522 523 radv_meta_push_descriptor_set( 524 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout, 525 0, /* set */ 526 2, /* descriptorWriteCount */ 527 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 528 .dstBinding = 0, 529 .dstArrayElement = 0, 530 .descriptorCount = 1, 531 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 532 .pImageInfo = 533 (VkDescriptorImageInfo[]){ 534 {.sampler = VK_NULL_HANDLE, 535 .imageView = radv_image_view_to_handle(src_iview), 536 .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, 537 }}, 538 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 539 .dstBinding = 1, 540 .dstArrayElement = 0, 541 .descriptorCount = 1, 542 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 543 .pImageInfo = (VkDescriptorImageInfo[]){ 544 { 545 .sampler = VK_NULL_HANDLE, 546 .imageView = radv_image_view_to_handle(dest_iview), 547 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 548 }, 549 }}}); 550 551 pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview); 552 553 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 554 *pipeline); 555 556 unsigned push_constants[4] = { 557 src_offset->x, 558 src_offset->y, 559 dest_offset->x, 560 dest_offset->y, 561 }; 562 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 563 device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 564 0, 16, push_constants); 565 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1); 566} 567 568static void 569emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, 570 struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent, 571 VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode) 572{ 573 struct radv_device *device = cmd_buffer->device; 574 const uint32_t samples = src_iview->image->info.samples; 575 const uint32_t samples_log2 = ffs(samples) - 1; 576 VkPipeline *pipeline; 577 578 radv_meta_push_descriptor_set( 579 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout, 580 0, /* set */ 581 2, /* descriptorWriteCount */ 582 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 583 .dstBinding = 0, 584 .dstArrayElement = 0, 585 .descriptorCount = 1, 586 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 587 .pImageInfo = 588 (VkDescriptorImageInfo[]){ 589 {.sampler = VK_NULL_HANDLE, 590 .imageView = radv_image_view_to_handle(src_iview), 591 .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, 592 }}, 593 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 594 .dstBinding = 1, 595 .dstArrayElement = 0, 596 .descriptorCount = 1, 597 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 598 .pImageInfo = (VkDescriptorImageInfo[]){ 599 { 600 .sampler = VK_NULL_HANDLE, 601 .imageView = radv_image_view_to_handle(dest_iview), 602 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 603 }, 604 }}}); 605 606 switch (resolve_mode) { 607 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: 608 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) 609 pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline; 610 else 611 pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline; 612 break; 613 case VK_RESOLVE_MODE_AVERAGE_BIT: 614 assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT); 615 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline; 616 break; 617 case VK_RESOLVE_MODE_MIN_BIT: 618 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) 619 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline; 620 else 621 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline; 622 break; 623 case VK_RESOLVE_MODE_MAX_BIT: 624 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) 625 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline; 626 else 627 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline; 628 break; 629 default: 630 unreachable("invalid resolve mode"); 631 } 632 633 if (!*pipeline) { 634 int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE; 635 VkResult ret; 636 637 ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline); 638 if (ret != VK_SUCCESS) { 639 cmd_buffer->record_result = ret; 640 return; 641 } 642 } 643 644 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 645 *pipeline); 646 647 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 648 resolve_extent->depth); 649} 650 651void 652radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, 653 VkFormat src_format, VkImageLayout src_image_layout, 654 struct radv_image *dest_image, VkFormat dest_format, 655 VkImageLayout dest_image_layout, const VkImageResolve2 *region) 656{ 657 struct radv_meta_saved_state saved_state; 658 659 radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region); 660 661 /* For partial resolves, DCC should be decompressed before resolving 662 * because the metadata is re-initialized to the uncompressed after. 663 */ 664 uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->qf, 665 cmd_buffer->qf); 666 667 if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) && 668 radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel, 669 dest_image_layout, false, queue_mask) && 670 (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z || 671 region->extent.width != dest_image->info.width || 672 region->extent.height != dest_image->info.height || 673 region->extent.depth != dest_image->info.depth)) { 674 radv_decompress_dcc(cmd_buffer, dest_image, 675 &(VkImageSubresourceRange){ 676 .aspectMask = region->dstSubresource.aspectMask, 677 .baseMipLevel = region->dstSubresource.mipLevel, 678 .levelCount = 1, 679 .baseArrayLayer = region->dstSubresource.baseArrayLayer, 680 .layerCount = region->dstSubresource.layerCount, 681 }); 682 } 683 684 radv_meta_save( 685 &saved_state, cmd_buffer, 686 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); 687 688 assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); 689 assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); 690 assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount); 691 692 const uint32_t src_base_layer = 693 radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset); 694 695 const uint32_t dest_base_layer = 696 radv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, ®ion->dstOffset); 697 698 const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent); 699 const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset); 700 const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dest_image->vk, region->dstOffset); 701 702 for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) { 703 704 struct radv_image_view src_iview; 705 radv_image_view_init(&src_iview, cmd_buffer->device, 706 &(VkImageViewCreateInfo){ 707 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 708 .image = radv_image_to_handle(src_image), 709 .viewType = radv_meta_get_view_type(src_image), 710 .format = src_format, 711 .subresourceRange = 712 { 713 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 714 .baseMipLevel = region->srcSubresource.mipLevel, 715 .levelCount = 1, 716 .baseArrayLayer = src_base_layer + layer, 717 .layerCount = 1, 718 }, 719 }, 720 0, NULL); 721 722 struct radv_image_view dest_iview; 723 radv_image_view_init(&dest_iview, cmd_buffer->device, 724 &(VkImageViewCreateInfo){ 725 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 726 .image = radv_image_to_handle(dest_image), 727 .viewType = radv_meta_get_view_type(dest_image), 728 .format = vk_to_non_srgb_format(dest_format), 729 .subresourceRange = 730 { 731 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 732 .baseMipLevel = region->dstSubresource.mipLevel, 733 .levelCount = 1, 734 .baseArrayLayer = dest_base_layer + layer, 735 .layerCount = 1, 736 }, 737 }, 738 0, NULL); 739 740 emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y}, 741 &(VkOffset2D){dstOffset.x, dstOffset.y}, 742 &(VkExtent2D){extent.width, extent.height}); 743 744 radv_image_view_finish(&src_iview); 745 radv_image_view_finish(&dest_iview); 746 } 747 748 radv_meta_restore(&saved_state, cmd_buffer); 749 750 if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) && 751 radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel, 752 dest_image_layout, false, queue_mask)) { 753 754 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE; 755 756 VkImageSubresourceRange range = { 757 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 758 .baseMipLevel = region->dstSubresource.mipLevel, 759 .levelCount = 1, 760 .baseArrayLayer = dest_base_layer, 761 .layerCount = region->dstSubresource.layerCount, 762 }; 763 764 cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff); 765 } 766} 767 768/** 769 * Emit any needed resolves for the current subpass. 770 */ 771void 772radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer) 773{ 774 struct vk_framebuffer *fb = cmd_buffer->state.framebuffer; 775 const struct radv_subpass *subpass = cmd_buffer->state.subpass; 776 struct radv_subpass_barrier barrier; 777 uint32_t layer_count = fb->layers; 778 779 if (subpass->view_mask) 780 layer_count = util_last_bit(subpass->view_mask); 781 782 /* Resolves happen before the end-of-subpass barriers get executed, so 783 * we have to make the attachment shader-readable. 784 */ 785 barrier.src_stage_mask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; 786 barrier.src_access_mask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; 787 barrier.dst_access_mask = VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT; 788 radv_emit_subpass_barrier(cmd_buffer, &barrier); 789 790 for (uint32_t i = 0; i < subpass->color_count; ++i) { 791 struct radv_subpass_attachment src_att = subpass->color_attachments[i]; 792 struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i]; 793 794 if (dst_att.attachment == VK_ATTACHMENT_UNUSED) 795 continue; 796 797 struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview; 798 struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview; 799 800 VkImageResolve2 region = { 801 .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2, 802 .extent = (VkExtent3D){fb->width, fb->height, 1}, 803 .srcSubresource = 804 (VkImageSubresourceLayers){ 805 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 806 .mipLevel = src_iview->vk.base_mip_level, 807 .baseArrayLayer = src_iview->vk.base_array_layer, 808 .layerCount = layer_count, 809 }, 810 .dstSubresource = 811 (VkImageSubresourceLayers){ 812 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 813 .mipLevel = dst_iview->vk.base_mip_level, 814 .baseArrayLayer = dst_iview->vk.base_array_layer, 815 .layerCount = layer_count, 816 }, 817 .srcOffset = (VkOffset3D){0, 0, 0}, 818 .dstOffset = (VkOffset3D){0, 0, 0}, 819 }; 820 821 radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format, 822 src_att.layout, dst_iview->image, dst_iview->vk.format, 823 dst_att.layout, ®ion); 824 } 825 826 cmd_buffer->state.flush_bits |= 827 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | 828 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL); 829} 830 831void 832radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer, 833 VkImageAspectFlags aspects, 834 VkResolveModeFlagBits resolve_mode) 835{ 836 struct vk_framebuffer *fb = cmd_buffer->state.framebuffer; 837 const struct radv_subpass *subpass = cmd_buffer->state.subpass; 838 struct radv_meta_saved_state saved_state; 839 uint32_t layer_count = fb->layers; 840 841 if (subpass->view_mask) 842 layer_count = util_last_bit(subpass->view_mask); 843 844 /* Resolves happen before the end-of-subpass barriers get executed, so 845 * we have to make the attachment shader-readable. 846 */ 847 cmd_buffer->state.flush_bits |= 848 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) | 849 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, NULL) | 850 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL); 851 852 struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment; 853 struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview; 854 struct radv_image *src_image = src_iview->image; 855 856 VkImageResolve2 region = {0}; 857 region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2; 858 region.srcSubresource.aspectMask = aspects; 859 region.srcSubresource.mipLevel = 0; 860 region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer; 861 region.srcSubresource.layerCount = layer_count; 862 863 radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, ®ion); 864 865 radv_meta_save(&saved_state, cmd_buffer, 866 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS); 867 868 struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment; 869 struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview; 870 struct radv_image *dst_image = dst_iview->image; 871 872 struct radv_image_view tsrc_iview; 873 radv_image_view_init(&tsrc_iview, cmd_buffer->device, 874 &(VkImageViewCreateInfo){ 875 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 876 .image = radv_image_to_handle(src_image), 877 .viewType = radv_meta_get_view_type(src_image), 878 .format = src_iview->vk.format, 879 .subresourceRange = 880 { 881 .aspectMask = aspects, 882 .baseMipLevel = src_iview->vk.base_mip_level, 883 .levelCount = 1, 884 .baseArrayLayer = src_iview->vk.base_array_layer, 885 .layerCount = layer_count, 886 }, 887 }, 888 0, NULL); 889 890 struct radv_image_view tdst_iview; 891 radv_image_view_init(&tdst_iview, cmd_buffer->device, 892 &(VkImageViewCreateInfo){ 893 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 894 .image = radv_image_to_handle(dst_image), 895 .viewType = radv_meta_get_view_type(dst_image), 896 .format = dst_iview->vk.format, 897 .subresourceRange = 898 { 899 .aspectMask = aspects, 900 .baseMipLevel = dst_iview->vk.base_mip_level, 901 .levelCount = 1, 902 .baseArrayLayer = dst_iview->vk.base_array_layer, 903 .layerCount = layer_count, 904 }, 905 }, 906 0, NULL); 907 908 emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview, 909 &(VkExtent3D){fb->width, fb->height, layer_count}, aspects, 910 resolve_mode); 911 912 cmd_buffer->state.flush_bits |= 913 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | 914 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL); 915 916 VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout; 917 uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, 918 cmd_buffer->qf); 919 920 if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) { 921 VkImageSubresourceRange range = {0}; 922 range.aspectMask = aspects; 923 range.baseMipLevel = dst_iview->vk.base_mip_level; 924 range.levelCount = 1; 925 range.baseArrayLayer = dst_iview->vk.base_array_layer; 926 range.layerCount = layer_count; 927 928 uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image); 929 930 cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value); 931 } 932 933 radv_image_view_finish(&tsrc_iview); 934 radv_image_view_finish(&tdst_iview); 935 936 radv_meta_restore(&saved_state, cmd_buffer); 937} 938