1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26
27#include "radv_meta.h"
28#include "radv_private.h"
29#include "sid.h"
30
31enum radv_depth_op {
32   DEPTH_DECOMPRESS,
33   DEPTH_RESUMMARIZE,
34};
35
36static nir_shader *
37build_expand_depth_stencil_compute_shader(struct radv_device *dev)
38{
39   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
40
41   nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "expand_depth_stencil_compute");
42
43   /* We need at least 8/8/1 to cover an entire HTILE block in a single workgroup. */
44   b.shader->info.workgroup_size[0] = 8;
45   b.shader->info.workgroup_size[1] = 8;
46   nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
47   input_img->data.descriptor_set = 0;
48   input_img->data.binding = 0;
49
50   nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
51   output_img->data.descriptor_set = 0;
52   output_img->data.binding = 1;
53
54   nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
55   nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
56   nir_ssa_def *block_size =
57      nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
58                    b.shader->info.workgroup_size[2], 0);
59
60   nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
61
62   nir_ssa_def *data = nir_image_deref_load(
63      &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32),
64      nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
65
66   /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
67    * creating a vmcnt(0) because it expects the L1 cache to keep memory
68    * operations in-order for the same workgroup. The vmcnt(0) seems
69    * necessary however. */
70   nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
71                      .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
72
73   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
74                         nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
75                         .image_dim = GLSL_SAMPLER_DIM_2D);
76   return b.shader;
77}
78
79static VkResult
80create_expand_depth_stencil_compute(struct radv_device *device)
81{
82   VkResult result = VK_SUCCESS;
83   nir_shader *cs = build_expand_depth_stencil_compute_shader(device);
84
85   VkDescriptorSetLayoutCreateInfo ds_create_info = {
86      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
87      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
88      .bindingCount = 2,
89      .pBindings = (VkDescriptorSetLayoutBinding[]){
90         {.binding = 0,
91          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
92          .descriptorCount = 1,
93          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
94          .pImmutableSamplers = NULL},
95         {.binding = 1,
96          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
97          .descriptorCount = 1,
98          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
99          .pImmutableSamplers = NULL},
100      }};
101
102   result = radv_CreateDescriptorSetLayout(
103      radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
104      &device->meta_state.expand_depth_stencil_compute_ds_layout);
105   if (result != VK_SUCCESS)
106      goto cleanup;
107
108   VkPipelineLayoutCreateInfo pl_create_info = {
109      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
110      .setLayoutCount = 1,
111      .pSetLayouts = &device->meta_state.expand_depth_stencil_compute_ds_layout,
112      .pushConstantRangeCount = 0,
113      .pPushConstantRanges = NULL,
114   };
115
116   result = radv_CreatePipelineLayout(
117      radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
118      &device->meta_state.expand_depth_stencil_compute_p_layout);
119   if (result != VK_SUCCESS)
120      goto cleanup;
121
122   /* compute shader */
123
124   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
125      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
126      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
127      .module = vk_shader_module_handle_from_nir(cs),
128      .pName = "main",
129      .pSpecializationInfo = NULL,
130   };
131
132   VkComputePipelineCreateInfo vk_pipeline_info = {
133      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
134      .stage = pipeline_shader_stage,
135      .flags = 0,
136      .layout = device->meta_state.expand_depth_stencil_compute_p_layout,
137   };
138
139   result = radv_CreateComputePipelines(
140      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
141      &vk_pipeline_info, NULL,
142      &device->meta_state.expand_depth_stencil_compute_pipeline);
143   if (result != VK_SUCCESS)
144      goto cleanup;
145
146cleanup:
147   ralloc_free(cs);
148   return result;
149}
150
151static VkResult
152create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
153{
154   VkPipelineLayoutCreateInfo pl_create_info = {
155      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
156      .setLayoutCount = 0,
157      .pSetLayouts = NULL,
158      .pushConstantRangeCount = 0,
159      .pPushConstantRanges = NULL,
160   };
161
162   return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
163                                    &device->meta_state.alloc, layout);
164}
165
166static VkResult
167create_pipeline(struct radv_device *device, uint32_t samples, VkPipelineLayout layout,
168                enum radv_depth_op op, VkPipeline *pipeline)
169{
170   VkResult result;
171   VkDevice device_h = radv_device_to_handle(device);
172
173   mtx_lock(&device->meta_state.mtx);
174   if (*pipeline) {
175      mtx_unlock(&device->meta_state.mtx);
176      return VK_SUCCESS;
177   }
178
179   nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
180   nir_shader *fs_module = radv_meta_build_nir_fs_noop(device);
181
182   if (!vs_module || !fs_module) {
183      /* XXX: Need more accurate error */
184      result = VK_ERROR_OUT_OF_HOST_MEMORY;
185      goto cleanup;
186   }
187
188   const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
189      .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
190      .sampleLocationsEnable = false,
191   };
192
193   const VkPipelineRenderingCreateInfo rendering_create_info = {
194      .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
195      .depthAttachmentFormat = VK_FORMAT_D32_SFLOAT_S8_UINT,
196      .stencilAttachmentFormat = VK_FORMAT_D32_SFLOAT_S8_UINT,
197   };
198
199   const VkGraphicsPipelineCreateInfo pipeline_create_info = {
200      .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
201      .pNext = &rendering_create_info,
202      .stageCount = 2,
203      .pStages =
204         (VkPipelineShaderStageCreateInfo[]){
205            {
206               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
207               .stage = VK_SHADER_STAGE_VERTEX_BIT,
208               .module = vk_shader_module_handle_from_nir(vs_module),
209               .pName = "main",
210            },
211            {
212               .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
213               .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
214               .module = vk_shader_module_handle_from_nir(fs_module),
215               .pName = "main",
216            },
217         },
218      .pVertexInputState =
219         &(VkPipelineVertexInputStateCreateInfo){
220            .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
221            .vertexBindingDescriptionCount = 0,
222            .vertexAttributeDescriptionCount = 0,
223         },
224      .pInputAssemblyState =
225         &(VkPipelineInputAssemblyStateCreateInfo){
226            .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
227            .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
228            .primitiveRestartEnable = false,
229         },
230      .pViewportState =
231         &(VkPipelineViewportStateCreateInfo){
232            .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
233            .viewportCount = 1,
234            .scissorCount = 1,
235         },
236      .pRasterizationState =
237         &(VkPipelineRasterizationStateCreateInfo){
238            .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
239            .depthClampEnable = false,
240            .rasterizerDiscardEnable = false,
241            .polygonMode = VK_POLYGON_MODE_FILL,
242            .cullMode = VK_CULL_MODE_NONE,
243            .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
244         },
245      .pMultisampleState =
246         &(VkPipelineMultisampleStateCreateInfo){
247            .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
248            .pNext = &sample_locs_create_info,
249            .rasterizationSamples = samples,
250            .sampleShadingEnable = false,
251            .pSampleMask = NULL,
252            .alphaToCoverageEnable = false,
253            .alphaToOneEnable = false,
254         },
255      .pColorBlendState =
256         &(VkPipelineColorBlendStateCreateInfo){
257            .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
258            .logicOpEnable = false,
259            .attachmentCount = 0,
260            .pAttachments = NULL,
261         },
262      .pDepthStencilState =
263         &(VkPipelineDepthStencilStateCreateInfo){
264            .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
265            .depthTestEnable = false,
266            .depthWriteEnable = false,
267            .depthBoundsTestEnable = false,
268            .stencilTestEnable = false,
269         },
270      .pDynamicState =
271         &(VkPipelineDynamicStateCreateInfo){
272            .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
273            .dynamicStateCount = 3,
274            .pDynamicStates =
275               (VkDynamicState[]){
276                  VK_DYNAMIC_STATE_VIEWPORT,
277                  VK_DYNAMIC_STATE_SCISSOR,
278                  VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
279               },
280         },
281      .layout = layout,
282      .renderPass = VK_NULL_HANDLE,
283      .subpass = 0,
284   };
285
286   struct radv_graphics_pipeline_create_info extra = {
287      .use_rectlist = true,
288      .depth_compress_disable = true,
289      .stencil_compress_disable = true,
290      .resummarize_enable = op == DEPTH_RESUMMARIZE,
291   };
292
293   result = radv_graphics_pipeline_create(
294      device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &pipeline_create_info,
295      &extra, &device->meta_state.alloc, pipeline);
296
297cleanup:
298   ralloc_free(fs_module);
299   ralloc_free(vs_module);
300   mtx_unlock(&device->meta_state.mtx);
301   return result;
302}
303
304void
305radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
306{
307   struct radv_meta_state *state = &device->meta_state;
308
309   for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
310      radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout,
311                                 &state->alloc);
312
313      radv_DestroyPipeline(radv_device_to_handle(device),
314                           state->depth_decomp[i].decompress_pipeline, &state->alloc);
315      radv_DestroyPipeline(radv_device_to_handle(device),
316                           state->depth_decomp[i].resummarize_pipeline, &state->alloc);
317   }
318
319   radv_DestroyPipeline(radv_device_to_handle(device),
320                        state->expand_depth_stencil_compute_pipeline, &state->alloc);
321   radv_DestroyPipelineLayout(radv_device_to_handle(device),
322                              state->expand_depth_stencil_compute_p_layout, &state->alloc);
323   device->vk.dispatch_table.DestroyDescriptorSetLayout(
324      radv_device_to_handle(device), state->expand_depth_stencil_compute_ds_layout, &state->alloc);
325}
326
327VkResult
328radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
329{
330   struct radv_meta_state *state = &device->meta_state;
331   VkResult res = VK_SUCCESS;
332
333   for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
334      uint32_t samples = 1 << i;
335
336      res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout);
337      if (res != VK_SUCCESS)
338         return res;
339
340      if (on_demand)
341         continue;
342
343      res = create_pipeline(device, samples, state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS,
344                            &state->depth_decomp[i].decompress_pipeline);
345      if (res != VK_SUCCESS)
346         return res;
347
348      res = create_pipeline(device, samples, state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE,
349                            &state->depth_decomp[i].resummarize_pipeline);
350      if (res != VK_SUCCESS)
351         return res;
352   }
353
354   return create_expand_depth_stencil_compute(device);
355}
356
357static VkPipeline *
358radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
359                        const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op)
360{
361   struct radv_meta_state *state = &cmd_buffer->device->meta_state;
362   uint32_t samples = image->info.samples;
363   uint32_t samples_log2 = ffs(samples) - 1;
364   VkPipeline *pipeline;
365
366   if (!state->depth_decomp[samples_log2].decompress_pipeline) {
367      VkResult ret;
368
369      ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout,
370                            DEPTH_DECOMPRESS, &state->depth_decomp[samples_log2].decompress_pipeline);
371      if (ret != VK_SUCCESS) {
372         cmd_buffer->record_result = ret;
373         return NULL;
374      }
375
376      ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout,
377                            DEPTH_RESUMMARIZE, &state->depth_decomp[samples_log2].resummarize_pipeline);
378      if (ret != VK_SUCCESS) {
379         cmd_buffer->record_result = ret;
380         return NULL;
381      }
382   }
383
384   switch (op) {
385   case DEPTH_DECOMPRESS:
386      pipeline = &state->depth_decomp[samples_log2].decompress_pipeline;
387      break;
388   case DEPTH_RESUMMARIZE:
389      pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
390      break;
391   default:
392      unreachable("unknown operation");
393   }
394
395   return pipeline;
396}
397
398static void
399radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
400                               const VkImageSubresourceRange *range, int level, int layer)
401{
402   struct radv_device *device = cmd_buffer->device;
403   struct radv_image_view iview;
404   uint32_t width, height;
405
406   width = radv_minify(image->info.width, range->baseMipLevel + level);
407   height = radv_minify(image->info.height, range->baseMipLevel + level);
408
409   radv_image_view_init(&iview, device,
410                        &(VkImageViewCreateInfo){
411                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
412                           .image = radv_image_to_handle(image),
413                           .viewType = radv_meta_get_view_type(image),
414                           .format = image->vk.format,
415                           .subresourceRange =
416                              {
417                                 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
418                                 .baseMipLevel = range->baseMipLevel + level,
419                                 .levelCount = 1,
420                                 .baseArrayLayer = range->baseArrayLayer + layer,
421                                 .layerCount = 1,
422                              },
423                        },
424                        0, NULL);
425
426   const VkRenderingAttachmentInfo depth_att = {
427      .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
428      .imageView = radv_image_view_to_handle(&iview),
429      .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
430      .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
431      .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
432   };
433
434   const VkRenderingAttachmentInfo stencil_att = {
435      .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
436      .imageView = radv_image_view_to_handle(&iview),
437      .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
438      .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
439      .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
440   };
441
442   const VkRenderingInfo rendering_info = {
443      .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
444      .renderArea = {
445         .offset = { 0, 0 },
446         .extent = { width, height }
447      },
448      .layerCount = 1,
449      .pDepthAttachment = &depth_att,
450      .pStencilAttachment = &stencil_att,
451   };
452
453   radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
454
455   radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
456
457   radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
458
459   radv_image_view_finish(&iview);
460}
461
462static void
463radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
464                           const VkImageSubresourceRange *subresourceRange,
465                           struct radv_sample_locations_state *sample_locs, enum radv_depth_op op)
466{
467   struct radv_meta_saved_state saved_state;
468   VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
469   VkPipeline *pipeline;
470
471   radv_meta_save(
472      &saved_state, cmd_buffer,
473      RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_SAMPLE_LOCATIONS | RADV_META_SAVE_PASS);
474
475   pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);
476
477   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
478                        *pipeline);
479
480   if (sample_locs) {
481      assert(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
482
483      /* Set the sample locations specified during explicit or
484       * automatic layout transitions, otherwise the depth decompress
485       * pass uses the default HW locations.
486       */
487      radv_CmdSetSampleLocationsEXT(cmd_buffer_h,
488                                    &(VkSampleLocationsInfoEXT){
489                                       .sampleLocationsPerPixel = sample_locs->per_pixel,
490                                       .sampleLocationGridSize = sample_locs->grid_size,
491                                       .sampleLocationsCount = sample_locs->count,
492                                       .pSampleLocations = sample_locs->locations,
493                                    });
494   }
495
496   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
497
498      /* Do not decompress levels without HTILE. */
499      if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
500         continue;
501
502      uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
503      uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
504
505      radv_CmdSetViewport(cmd_buffer_h, 0, 1,
506                          &(VkViewport){.x = 0,
507                                        .y = 0,
508                                        .width = width,
509                                        .height = height,
510                                        .minDepth = 0.0f,
511                                        .maxDepth = 1.0f});
512
513      radv_CmdSetScissor(cmd_buffer_h, 0, 1,
514                         &(VkRect2D){
515                            .offset = {0, 0},
516                            .extent = {width, height},
517                         });
518
519      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
520         radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);
521      }
522   }
523
524   radv_meta_restore(&saved_state, cmd_buffer);
525}
526
527static void
528radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
529                                  const VkImageSubresourceRange *subresourceRange)
530{
531   struct radv_meta_saved_state saved_state;
532   struct radv_image_view load_iview = {0};
533   struct radv_image_view store_iview = {0};
534   struct radv_device *device = cmd_buffer->device;
535
536   assert(radv_image_is_tc_compat_htile(image));
537
538   cmd_buffer->state.flush_bits |=
539      radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
540
541   radv_meta_save(&saved_state, cmd_buffer,
542                  RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
543
544   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
545                        device->meta_state.expand_depth_stencil_compute_pipeline);
546
547   for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
548      uint32_t width, height;
549
550      /* Do not decompress levels without HTILE. */
551      if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
552         continue;
553
554      width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
555      height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
556
557      for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
558         radv_image_view_init(
559            &load_iview, cmd_buffer->device,
560            &(VkImageViewCreateInfo){
561               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
562               .image = radv_image_to_handle(image),
563               .viewType = VK_IMAGE_VIEW_TYPE_2D,
564               .format = image->vk.format,
565               .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
566                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
567                                    .levelCount = 1,
568                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
569                                    .layerCount = 1},
570            },
571            0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
572         radv_image_view_init(
573            &store_iview, cmd_buffer->device,
574            &(VkImageViewCreateInfo){
575               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
576               .image = radv_image_to_handle(image),
577               .viewType = VK_IMAGE_VIEW_TYPE_2D,
578               .format = image->vk.format,
579               .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
580                                    .baseMipLevel = subresourceRange->baseMipLevel + l,
581                                    .levelCount = 1,
582                                    .baseArrayLayer = subresourceRange->baseArrayLayer + s,
583                                    .layerCount = 1},
584            },
585            0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
586
587         radv_meta_push_descriptor_set(
588            cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
589            device->meta_state.expand_depth_stencil_compute_p_layout, 0, /* set */
590            2, /* descriptorWriteCount */
591            (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
592                                      .dstBinding = 0,
593                                      .dstArrayElement = 0,
594                                      .descriptorCount = 1,
595                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
596                                      .pImageInfo =
597                                         (VkDescriptorImageInfo[]){
598                                            {
599                                               .sampler = VK_NULL_HANDLE,
600                                               .imageView = radv_image_view_to_handle(&load_iview),
601                                               .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
602                                            },
603                                         }},
604                                     {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
605                                      .dstBinding = 1,
606                                      .dstArrayElement = 0,
607                                      .descriptorCount = 1,
608                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
609                                      .pImageInfo = (VkDescriptorImageInfo[]){
610                                         {
611                                            .sampler = VK_NULL_HANDLE,
612                                            .imageView = radv_image_view_to_handle(&store_iview),
613                                            .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
614                                         },
615                                      }}});
616
617         radv_unaligned_dispatch(cmd_buffer, width, height, 1);
618
619         radv_image_view_finish(&load_iview);
620         radv_image_view_finish(&store_iview);
621      }
622   }
623
624   radv_meta_restore(&saved_state, cmd_buffer);
625
626   cmd_buffer->state.flush_bits |=
627      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
628      radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
629
630   /* Initialize the HTILE metadata as "fully expanded". */
631   uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
632
633   cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, image, subresourceRange, htile_value);
634}
635
636void
637radv_expand_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
638                          const VkImageSubresourceRange *subresourceRange,
639                          struct radv_sample_locations_state *sample_locs)
640{
641   struct radv_barrier_data barrier = {0};
642
643   barrier.layout_transitions.depth_stencil_expand = 1;
644   radv_describe_layout_transition(cmd_buffer, &barrier);
645
646   if (cmd_buffer->qf == RADV_QUEUE_GENERAL) {
647      radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS);
648   } else {
649      radv_expand_depth_stencil_compute(cmd_buffer, image, subresourceRange);
650   }
651}
652
653void
654radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
655                               const VkImageSubresourceRange *subresourceRange,
656                               struct radv_sample_locations_state *sample_locs)
657{
658   struct radv_barrier_data barrier = {0};
659
660   barrier.layout_transitions.depth_stencil_resummarize = 1;
661   radv_describe_layout_transition(cmd_buffer, &barrier);
662
663   assert(cmd_buffer->qf == RADV_QUEUE_GENERAL);
664   radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE);
665}
666