1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26
27#include "radv_meta.h"
28#include "radv_private.h"
29#include "sid.h"
30
31enum radv_color_op {
32   FAST_CLEAR_ELIMINATE,
33   FMASK_DECOMPRESS,
34   DCC_DECOMPRESS,
35};
36
37static nir_shader *
38build_dcc_decompress_compute_shader(struct radv_device *dev)
39{
40   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
41
42   nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_decompress_compute");
43
44   /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
45   b.shader->info.workgroup_size[0] = 16;
46   b.shader->info.workgroup_size[1] = 16;
47   nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
48   input_img->data.descriptor_set = 0;
49   input_img->data.binding = 0;
50
51   nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
52   output_img->data.descriptor_set = 0;
53   output_img->data.binding = 1;
54
55   nir_ssa_def *global_id = get_global_ids(&b, 2);
56   nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0),
57                                         nir_channel(&b, global_id, 1),
58                                         nir_ssa_undef(&b, 1, 32),
59                                         nir_ssa_undef(&b, 1, 32));
60
61   nir_ssa_def *data = nir_image_deref_load(
62      &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
63      nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
64
65   /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
66    * creating a vmcnt(0) because it expects the L1 cache to keep memory
67    * operations in-order for the same workgroup. The vmcnt(0) seems
68    * necessary however. */
69   nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
70                      .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
71
72   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
73                         nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
74                         .image_dim = GLSL_SAMPLER_DIM_2D);
75   return b.shader;
76}
77
78static VkResult
79create_dcc_compress_compute(struct radv_device *device)
80{
81   VkResult result = VK_SUCCESS;
82   nir_shader *cs = build_dcc_decompress_compute_shader(device);
83
84   VkDescriptorSetLayoutCreateInfo ds_create_info = {
85      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
86      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
87      .bindingCount = 2,
88      .pBindings = (VkDescriptorSetLayoutBinding[]){
89         {.binding = 0,
90          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
91          .descriptorCount = 1,
92          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
93          .pImmutableSamplers = NULL},
94         {.binding = 1,
95          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
96          .descriptorCount = 1,
97          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
98          .pImmutableSamplers = NULL},
99      }};
100
101   result = radv_CreateDescriptorSetLayout(
102      radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
103      &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
104   if (result != VK_SUCCESS)
105      goto cleanup;
106
107   VkPipelineLayoutCreateInfo pl_create_info = {
108      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
109      .setLayoutCount = 1,
110      .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
111      .pushConstantRangeCount = 0,
112      .pPushConstantRanges = NULL,
113   };
114
115   result = radv_CreatePipelineLayout(
116      radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
117      &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
118   if (result != VK_SUCCESS)
119      goto cleanup;
120
121   /* compute shader */
122
123   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
124      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
125      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
126      .module = vk_shader_module_handle_from_nir(cs),
127      .pName = "main",
128      .pSpecializationInfo = NULL,
129   };
130
131   VkComputePipelineCreateInfo vk_pipeline_info = {
132      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
133      .stage = pipeline_shader_stage,
134      .flags = 0,
135      .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
136   };
137
138   result = radv_CreateComputePipelines(
139      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
140      &vk_pipeline_info, NULL,
141      &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
142   if (result != VK_SUCCESS)
143      goto cleanup;
144
145cleanup:
146   ralloc_free(cs);
147   return result;
148}
149
150static VkResult
151create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
152{
153   VkPipelineLayoutCreateInfo pl_create_info = {
154      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
155      .setLayoutCount = 0,
156      .pSetLayouts = NULL,
157      .pushConstantRangeCount = 0,
158      .pPushConstantRanges = NULL,
159   };
160
161   return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
162                                    &device->meta_state.alloc, layout);
163}
164
165static VkResult
166create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
167{
168   VkResult result;
169   VkDevice device_h = radv_device_to_handle(device);
170
171   nir_shader *fs_module = radv_meta_build_nir_fs_noop(device);
172
173   if (!fs_module) {
174      /* XXX: Need more accurate error */
175      result = VK_ERROR_OUT_OF_HOST_MEMORY;
176      goto cleanup;
177   }
178
179   const VkPipelineShaderStageCreateInfo stages[2] = {
180      {
181         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
182         .stage = VK_SHADER_STAGE_VERTEX_BIT,
183         .module = vs_module_h,
184         .pName = "main",
185      },
186      {
187         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
188         .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
189         .module = vk_shader_module_handle_from_nir(fs_module),
190         .pName = "main",
191      },
192   };
193
194   const VkPipelineVertexInputStateCreateInfo vi_state = {
195      .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
196      .vertexBindingDescriptionCount = 0,
197      .vertexAttributeDescriptionCount = 0,
198   };
199
200   const VkPipelineInputAssemblyStateCreateInfo ia_state = {
201      .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
202      .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
203      .primitiveRestartEnable = false,
204   };
205
206   const VkPipelineColorBlendStateCreateInfo blend_state = {
207      .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
208      .logicOpEnable = false,
209      .attachmentCount = 1,
210      .pAttachments = (VkPipelineColorBlendAttachmentState[]){
211         {
212            .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
213                              VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
214         },
215      }};
216   const VkPipelineRasterizationStateCreateInfo rs_state = {
217      .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
218      .depthClampEnable = false,
219      .rasterizerDiscardEnable = false,
220      .polygonMode = VK_POLYGON_MODE_FILL,
221      .cullMode = VK_CULL_MODE_NONE,
222      .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
223   };
224
225   const VkFormat color_format = VK_FORMAT_R8_UNORM;
226   const VkPipelineRenderingCreateInfo rendering_create_info = {
227      .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
228      .colorAttachmentCount = 1,
229      .pColorAttachmentFormats = &color_format,
230   };
231
232   result = radv_graphics_pipeline_create(
233      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
234      &(VkGraphicsPipelineCreateInfo){
235         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
236         .pNext = &rendering_create_info,
237         .stageCount = 2,
238         .pStages = stages,
239
240         .pVertexInputState = &vi_state,
241         .pInputAssemblyState = &ia_state,
242
243         .pViewportState =
244            &(VkPipelineViewportStateCreateInfo){
245               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
246               .viewportCount = 1,
247               .scissorCount = 1,
248            },
249         .pRasterizationState = &rs_state,
250         .pMultisampleState =
251            &(VkPipelineMultisampleStateCreateInfo){
252               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
253               .rasterizationSamples = 1,
254               .sampleShadingEnable = false,
255               .pSampleMask = NULL,
256               .alphaToCoverageEnable = false,
257               .alphaToOneEnable = false,
258            },
259         .pColorBlendState = &blend_state,
260         .pDynamicState =
261            &(VkPipelineDynamicStateCreateInfo){
262               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
263               .dynamicStateCount = 2,
264               .pDynamicStates =
265                  (VkDynamicState[]){
266                     VK_DYNAMIC_STATE_VIEWPORT,
267                     VK_DYNAMIC_STATE_SCISSOR,
268                  },
269            },
270         .layout = layout,
271         .renderPass = VK_NULL_HANDLE,
272         .subpass = 0,
273      },
274      &(struct radv_graphics_pipeline_create_info){
275         .use_rectlist = true,
276         .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
277      },
278      &device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
279   if (result != VK_SUCCESS)
280      goto cleanup;
281
282   result = radv_graphics_pipeline_create(
283      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
284      &(VkGraphicsPipelineCreateInfo){
285         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
286         .pNext = &rendering_create_info,
287         .stageCount = 2,
288         .pStages = stages,
289
290         .pVertexInputState = &vi_state,
291         .pInputAssemblyState = &ia_state,
292
293         .pViewportState =
294            &(VkPipelineViewportStateCreateInfo){
295               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
296               .viewportCount = 1,
297               .scissorCount = 1,
298            },
299         .pRasterizationState = &rs_state,
300         .pMultisampleState =
301            &(VkPipelineMultisampleStateCreateInfo){
302               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
303               .rasterizationSamples = 1,
304               .sampleShadingEnable = false,
305               .pSampleMask = NULL,
306               .alphaToCoverageEnable = false,
307               .alphaToOneEnable = false,
308            },
309         .pColorBlendState = &blend_state,
310         .pDynamicState =
311            &(VkPipelineDynamicStateCreateInfo){
312               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
313               .dynamicStateCount = 2,
314               .pDynamicStates =
315                  (VkDynamicState[]){
316                     VK_DYNAMIC_STATE_VIEWPORT,
317                     VK_DYNAMIC_STATE_SCISSOR,
318                  },
319            },
320         .layout = layout,
321         .renderPass = VK_NULL_HANDLE,
322         .subpass = 0,
323      },
324      &(struct radv_graphics_pipeline_create_info){
325         .use_rectlist = true,
326         .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
327      },
328      &device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
329   if (result != VK_SUCCESS)
330      goto cleanup;
331
332   result = radv_graphics_pipeline_create(
333      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
334      &(VkGraphicsPipelineCreateInfo){
335         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
336         .pNext = &rendering_create_info,
337         .stageCount = 2,
338         .pStages = stages,
339
340         .pVertexInputState = &vi_state,
341         .pInputAssemblyState = &ia_state,
342
343         .pViewportState =
344            &(VkPipelineViewportStateCreateInfo){
345               .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
346               .viewportCount = 1,
347               .scissorCount = 1,
348            },
349         .pRasterizationState = &rs_state,
350         .pMultisampleState =
351            &(VkPipelineMultisampleStateCreateInfo){
352               .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
353               .rasterizationSamples = 1,
354               .sampleShadingEnable = false,
355               .pSampleMask = NULL,
356               .alphaToCoverageEnable = false,
357               .alphaToOneEnable = false,
358            },
359         .pColorBlendState = &blend_state,
360         .pDynamicState =
361            &(VkPipelineDynamicStateCreateInfo){
362               .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
363               .dynamicStateCount = 2,
364               .pDynamicStates =
365                  (VkDynamicState[]){
366                     VK_DYNAMIC_STATE_VIEWPORT,
367                     VK_DYNAMIC_STATE_SCISSOR,
368                  },
369            },
370         .layout = layout,
371         .renderPass = VK_NULL_HANDLE,
372         .subpass = 0,
373      },
374      &(struct radv_graphics_pipeline_create_info){
375         .use_rectlist = true,
376         .custom_blend_mode = device->physical_device->rad_info.gfx_level >= GFX11
377                                 ? V_028808_CB_DCC_DECOMPRESS_GFX11
378                                 : V_028808_CB_DCC_DECOMPRESS_GFX8,
379      },
380      &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
381   if (result != VK_SUCCESS)
382      goto cleanup;
383
384cleanup:
385   ralloc_free(fs_module);
386   return result;
387}
388
389void
390radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
391{
392   struct radv_meta_state *state = &device->meta_state;
393
394   radv_DestroyPipeline(radv_device_to_handle(device),
395                        state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
396   radv_DestroyPipeline(radv_device_to_handle(device),
397                        state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc);
398   radv_DestroyPipeline(radv_device_to_handle(device),
399                        state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
400   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout,
401                              &state->alloc);
402
403   radv_DestroyPipeline(radv_device_to_handle(device),
404                        state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc);
405   radv_DestroyPipelineLayout(radv_device_to_handle(device),
406                              state->fast_clear_flush.dcc_decompress_compute_p_layout,
407                              &state->alloc);
408   device->vk.dispatch_table.DestroyDescriptorSetLayout(
409      radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_ds_layout,
410      &state->alloc);
411}
412
413static VkResult
414radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
415{
416   VkResult res = VK_SUCCESS;
417
418   mtx_lock(&device->meta_state.mtx);
419   if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
420      mtx_unlock(&device->meta_state.mtx);
421      return VK_SUCCESS;
422   }
423
424   nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
425   if (!vs_module) {
426      /* XXX: Need more accurate error */
427      res = VK_ERROR_OUT_OF_HOST_MEMORY;
428      goto cleanup;
429   }
430
431   res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout);
432   if (res != VK_SUCCESS)
433      goto cleanup;
434
435   VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
436   res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout);
437   if (res != VK_SUCCESS)
438      goto cleanup;
439
440   res = create_dcc_compress_compute(device);
441   if (res != VK_SUCCESS)
442      goto cleanup;
443
444cleanup:
445   ralloc_free(vs_module);
446   mtx_unlock(&device->meta_state.mtx);
447
448   return res;
449}
450
451VkResult
452radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
453{
454   if (on_demand)
455      return VK_SUCCESS;
456
457   return radv_device_init_meta_fast_clear_flush_state_internal(device);
458}
459
460static void
461radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
462                                           struct radv_image *image, uint64_t pred_offset,
463                                           bool value)
464{
465   uint64_t va = 0;
466
467   if (value) {
468      va = radv_buffer_get_va(image->bindings[0].bo) + image->bindings[0].offset;
469      va += pred_offset;
470   }
471
472   si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
473}
474
475static void
476radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
477                               const VkImageSubresourceRange *range, int level, int layer,
478                               bool flush_cb)
479{
480   struct radv_device *device = cmd_buffer->device;
481   struct radv_image_view iview;
482   uint32_t width, height;
483
484   width = radv_minify(image->info.width, range->baseMipLevel + level);
485   height = radv_minify(image->info.height, range->baseMipLevel + level);
486
487   radv_image_view_init(&iview, device,
488                        &(VkImageViewCreateInfo){
489                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
490                           .image = radv_image_to_handle(image),
491                           .viewType = radv_meta_get_view_type(image),
492                           .format = image->vk.format,
493                           .subresourceRange =
494                              {
495                                 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
496                                 .baseMipLevel = range->baseMipLevel + level,
497                                 .levelCount = 1,
498                                 .baseArrayLayer = range->baseArrayLayer + layer,
499                                 .layerCount = 1,
500                              },
501                        },
502                        0, NULL);
503
504   const VkRenderingAttachmentInfo color_att = {
505      .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
506      .imageView = radv_image_view_to_handle(&iview),
507      .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
508      .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
509      .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
510   };
511
512   const VkRenderingInfo rendering_info = {
513      .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
514      .renderArea = {
515         .offset = { 0, 0 },
516         .extent = { width, height }
517      },
518      .layerCount = 1,
519      .colorAttachmentCount = 1,
520      .pColorAttachments = &color_att,
521   };
522
523   radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
524
525   if (flush_cb)
526      cmd_buffer->state.flush_bits |=
527         radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
528
529   radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
530
531   if (flush_cb)
532      cmd_buffer->state.flush_bits |=
533         radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
534
535   radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
536
537   radv_image_view_finish(&iview);
538}
539
540static void
541radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
542                         const VkImageSubresourceRange *subresourceRange, enum radv_color_op op)
543{
544   struct radv_device *device = cmd_buffer->device;
545   struct radv_meta_saved_state saved_state;
546   bool old_predicating = false;
547   bool flush_cb = false;
548   uint64_t pred_offset;
549   VkPipeline *pipeline;
550
551   switch (op) {
552   case FAST_CLEAR_ELIMINATE:
553      pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
554      pred_offset = image->fce_pred_offset;
555      break;
556   case FMASK_DECOMPRESS:
557      pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
558      pred_offset = 0; /* FMASK_DECOMPRESS is never predicated. */
559
560      /* Flushing CB is required before and after FMASK_DECOMPRESS. */
561      flush_cb = true;
562      break;
563   case DCC_DECOMPRESS:
564      pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
565      pred_offset = image->dcc_pred_offset;
566
567      /* Flushing CB is required before and after DCC_DECOMPRESS. */
568      flush_cb = true;
569      break;
570   default:
571      unreachable("Invalid color op");
572   }
573
574   if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
575       (image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
576        subresourceRange->baseArrayLayer != 0)) {
577      /* Only use predication if the image has DCC with mipmaps or
578       * if the range of layers covers the whole image because the
579       * predication is based on mip level.
580       */
581      pred_offset = 0;
582   }
583
584   if (!*pipeline) {
585      VkResult ret;
586
587      ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
588      if (ret != VK_SUCCESS) {
589         cmd_buffer->record_result = ret;
590         return;
591      }
592   }
593
594   radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS);
595
596   if (pred_offset) {
597      pred_offset += 8 * subresourceRange->baseMipLevel;
598
599      old_predicating = cmd_buffer->state.predicating;
600
601      radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
602      cmd_buffer->state.predicating = true;
603   }
604
605   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
606                        *pipeline);
607
608   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
609      uint32_t width, height;
610
611      /* Do not decompress levels without DCC. */
612      if (op == DCC_DECOMPRESS && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
613         continue;
614
615      width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
616      height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
617
618      radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
619                          &(VkViewport){.x = 0,
620                                        .y = 0,
621                                        .width = width,
622                                        .height = height,
623                                        .minDepth = 0.0f,
624                                        .maxDepth = 1.0f});
625
626      radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
627                         &(VkRect2D){
628                            .offset = {0, 0},
629                            .extent = {width, height},
630                         });
631
632      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
633         radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb);
634      }
635   }
636
637   cmd_buffer->state.flush_bits |=
638      RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
639
640   if (pred_offset) {
641      pred_offset += 8 * subresourceRange->baseMipLevel;
642
643      cmd_buffer->state.predicating = old_predicating;
644
645      radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
646
647      if (cmd_buffer->state.predication_type != -1) {
648         /* Restore previous conditional rendering user state. */
649         si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
650                                       cmd_buffer->state.predication_op,
651                                       cmd_buffer->state.predication_va);
652      }
653   }
654
655   radv_meta_restore(&saved_state, cmd_buffer);
656
657   /* Clear the image's fast-clear eliminate predicate because FMASK_DECOMPRESS and DCC_DECOMPRESS
658    * also perform a fast-clear eliminate.
659    */
660   radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
661
662   /* Mark the image as being decompressed. */
663   if (op == DCC_DECOMPRESS)
664      radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
665}
666
667static void
668radv_fast_clear_eliminate(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
669                          const VkImageSubresourceRange *subresourceRange)
670{
671   struct radv_barrier_data barrier = {0};
672
673   barrier.layout_transitions.fast_clear_eliminate = 1;
674   radv_describe_layout_transition(cmd_buffer, &barrier);
675
676   radv_process_color_image(cmd_buffer, image, subresourceRange, FAST_CLEAR_ELIMINATE);
677}
678
679static void
680radv_fmask_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
681                      const VkImageSubresourceRange *subresourceRange)
682{
683   struct radv_barrier_data barrier = {0};
684
685   barrier.layout_transitions.fmask_decompress = 1;
686   radv_describe_layout_transition(cmd_buffer, &barrier);
687
688   radv_process_color_image(cmd_buffer, image, subresourceRange, FMASK_DECOMPRESS);
689}
690
691void
692radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
693                                    const VkImageSubresourceRange *subresourceRange)
694{
695   if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
696      if (radv_image_has_dcc(image) && radv_image_has_cmask(image)) {
697         /* MSAA images with DCC and CMASK might have been fast-cleared and might require a FCE but
698          * FMASK_DECOMPRESS can't eliminate DCC fast clears.
699          */
700         radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
701      }
702
703      radv_fmask_decompress(cmd_buffer, image, subresourceRange);
704   } else {
705      /* Skip fast clear eliminate for images that support comp-to-single fast clears. */
706      if (image->support_comp_to_single)
707         return;
708
709      radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
710   }
711}
712
713static void
714radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
715                            const VkImageSubresourceRange *subresourceRange)
716{
717   struct radv_meta_saved_state saved_state;
718   struct radv_image_view load_iview = {0};
719   struct radv_image_view store_iview = {0};
720   struct radv_device *device = cmd_buffer->device;
721
722   cmd_buffer->state.flush_bits |=
723      radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
724
725   if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
726      VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
727      if (ret != VK_SUCCESS) {
728         cmd_buffer->record_result = ret;
729         return;
730      }
731   }
732
733   radv_meta_save(&saved_state, cmd_buffer,
734                  RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
735
736   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
737                        device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
738
739   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
740      uint32_t width, height;
741
742      /* Do not decompress levels without DCC. */
743      if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
744         continue;
745
746      width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
747      height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
748
749      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
750         radv_image_view_init(
751            &load_iview, cmd_buffer->device,
752            &(VkImageViewCreateInfo){
753               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
754               .image = radv_image_to_handle(image),
755               .viewType = VK_IMAGE_VIEW_TYPE_2D,
756               .format = image->vk.format,
757               .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
758                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
759                                    .levelCount = 1,
760                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
761                                    .layerCount = 1},
762            },
763            0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
764         radv_image_view_init(
765            &store_iview, cmd_buffer->device,
766            &(VkImageViewCreateInfo){
767               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
768               .image = radv_image_to_handle(image),
769               .viewType = VK_IMAGE_VIEW_TYPE_2D,
770               .format = image->vk.format,
771               .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
772                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
773                                    .levelCount = 1,
774                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
775                                    .layerCount = 1},
776            },
777            0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
778
779         radv_meta_push_descriptor_set(
780            cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
781            device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */
782            2, /* descriptorWriteCount */
783            (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
784                                      .dstBinding = 0,
785                                      .dstArrayElement = 0,
786                                      .descriptorCount = 1,
787                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
788                                      .pImageInfo =
789                                         (VkDescriptorImageInfo[]){
790                                            {
791                                               .sampler = VK_NULL_HANDLE,
792                                               .imageView = radv_image_view_to_handle(&load_iview),
793                                               .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
794                                            },
795                                         }},
796                                     {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
797                                      .dstBinding = 1,
798                                      .dstArrayElement = 0,
799                                      .descriptorCount = 1,
800                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
801                                      .pImageInfo = (VkDescriptorImageInfo[]){
802                                         {
803                                            .sampler = VK_NULL_HANDLE,
804                                            .imageView = radv_image_view_to_handle(&store_iview),
805                                            .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
806                                         },
807                                      }}});
808
809         radv_unaligned_dispatch(cmd_buffer, width, height, 1);
810
811         radv_image_view_finish(&load_iview);
812         radv_image_view_finish(&store_iview);
813      }
814   }
815
816   /* Mark this image as actually being decompressed. */
817   radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
818
819   radv_meta_restore(&saved_state, cmd_buffer);
820
821   cmd_buffer->state.flush_bits |=
822      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
823      radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
824
825   /* Initialize the DCC metadata as "fully expanded". */
826   cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
827}
828
829void
830radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
831                    const VkImageSubresourceRange *subresourceRange)
832{
833   struct radv_barrier_data barrier = {0};
834
835   barrier.layout_transitions.dcc_decompress = 1;
836   radv_describe_layout_transition(cmd_buffer, &barrier);
837
838   if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
839      radv_process_color_image(cmd_buffer, image, subresourceRange, DCC_DECOMPRESS);
840   else
841      radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
842}
843