1/*
2 * Copyright © 2016 Dave Airlie
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26
27#include "nir/nir_builder.h"
28#include "radv_meta.h"
29#include "radv_private.h"
30#include "sid.h"
31#include "vk_format.h"
32
33static nir_ssa_def *
34radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
35{
36   unsigned i;
37
38   nir_ssa_def *cmp[3];
39   for (i = 0; i < 3; i++)
40      cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c));
41
42   nir_ssa_def *ltvals[3];
43   for (i = 0; i < 3; i++)
44      ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));
45
46   nir_ssa_def *gtvals[3];
47
48   for (i = 0; i < 3; i++) {
49      gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4));
50      gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));
51      gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));
52   }
53
54   nir_ssa_def *comp[4];
55   for (i = 0; i < 3; i++)
56      comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
57   comp[3] = nir_channels(b, input, 1 << 3);
58   return nir_vec(b, comp, 4);
59}
60
61static nir_shader *
62build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
63{
64   enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
65   const struct glsl_type *sampler_type =
66      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
67   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type);
68   nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples,
69                                         is_integer ? "int" : (is_srgb ? "srgb" : "float"));
70   b.shader->info.workgroup_size[0] = 8;
71   b.shader->info.workgroup_size[1] = 8;
72
73   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
74   input_img->data.descriptor_set = 0;
75   input_img->data.binding = 0;
76
77   nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
78   output_img->data.descriptor_set = 0;
79   output_img->data.binding = 1;
80
81   nir_ssa_def *global_id = get_global_ids(&b, 2);
82
83   nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
84   nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
85
86   nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
87   nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
88
89   nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
90
91   radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, src_coord);
92
93   nir_ssa_def *outval = nir_load_var(&b, color);
94   if (is_srgb)
95      outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
96
97   nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0),
98                                         nir_channel(&b, dst_coord, 1),
99                                         nir_ssa_undef(&b, 1, 32),
100                                         nir_ssa_undef(&b, 1, 32));
101
102   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
103                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
104                         .image_dim = GLSL_SAMPLER_DIM_2D);
105   return b.shader;
106}
107
108enum {
109   DEPTH_RESOLVE,
110   STENCIL_RESOLVE,
111};
112
113static const char *
114get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
115{
116   switch (resolve_mode) {
117   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
118      return "zero";
119   case VK_RESOLVE_MODE_AVERAGE_BIT:
120      return "average";
121   case VK_RESOLVE_MODE_MIN_BIT:
122      return "min";
123   case VK_RESOLVE_MODE_MAX_BIT:
124      return "max";
125   default:
126      unreachable("invalid resolve mode");
127   }
128}
129
130static nir_shader *
131build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
132                                           VkResolveModeFlagBits resolve_mode)
133{
134   enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
135   const struct glsl_type *sampler_type =
136      glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
137   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type);
138
139   nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
140                                         index == DEPTH_RESOLVE ? "depth" : "stencil",
141                                         get_resolve_mode_str(resolve_mode), samples);
142   b.shader->info.workgroup_size[0] = 8;
143   b.shader->info.workgroup_size[1] = 8;
144
145   nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
146   input_img->data.descriptor_set = 0;
147   input_img->data.binding = 0;
148
149   nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
150   output_img->data.descriptor_set = 0;
151   output_img->data.binding = 1;
152
153   nir_ssa_def *img_coord = get_global_ids(&b, 3);
154
155   nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
156
157   nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
158
159   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
160   tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
161   tex->op = nir_texop_txf_ms;
162   tex->src[0].src_type = nir_tex_src_coord;
163   tex->src[0].src = nir_src_for_ssa(img_coord);
164   tex->src[1].src_type = nir_tex_src_ms_index;
165   tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
166   tex->src[2].src_type = nir_tex_src_texture_deref;
167   tex->src[2].src = nir_src_for_ssa(input_img_deref);
168   tex->dest_type = type;
169   tex->is_array = true;
170   tex->coord_components = 3;
171
172   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
173   nir_builder_instr_insert(&b, &tex->instr);
174
175   nir_ssa_def *outval = &tex->dest.ssa;
176
177   if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
178      for (int i = 1; i < samples; i++) {
179         nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
180         tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
181         tex_add->op = nir_texop_txf_ms;
182         tex_add->src[0].src_type = nir_tex_src_coord;
183         tex_add->src[0].src = nir_src_for_ssa(img_coord);
184         tex_add->src[1].src_type = nir_tex_src_ms_index;
185         tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
186         tex_add->src[2].src_type = nir_tex_src_texture_deref;
187         tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
188         tex_add->dest_type = type;
189         tex_add->is_array = true;
190         tex_add->coord_components = 3;
191
192         nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
193         nir_builder_instr_insert(&b, &tex_add->instr);
194
195         switch (resolve_mode) {
196         case VK_RESOLVE_MODE_AVERAGE_BIT:
197            assert(index == DEPTH_RESOLVE);
198            outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
199            break;
200         case VK_RESOLVE_MODE_MIN_BIT:
201            if (index == DEPTH_RESOLVE)
202               outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
203            else
204               outval = nir_umin(&b, outval, &tex_add->dest.ssa);
205            break;
206         case VK_RESOLVE_MODE_MAX_BIT:
207            if (index == DEPTH_RESOLVE)
208               outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
209            else
210               outval = nir_umax(&b, outval, &tex_add->dest.ssa);
211            break;
212         default:
213            unreachable("invalid resolve mode");
214         }
215      }
216
217      if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT)
218         outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
219   }
220
221   nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
222                                 nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32));
223   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
224                         nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
225                         .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
226   return b.shader;
227}
228
229static VkResult
230create_layout(struct radv_device *device)
231{
232   VkResult result;
233   /*
234    * two descriptors one for the image being sampled
235    * one for the buffer being written.
236    */
237   VkDescriptorSetLayoutCreateInfo ds_create_info = {
238      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
239      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
240      .bindingCount = 2,
241      .pBindings = (VkDescriptorSetLayoutBinding[]){
242         {.binding = 0,
243          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
244          .descriptorCount = 1,
245          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
246          .pImmutableSamplers = NULL},
247         {.binding = 1,
248          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
249          .descriptorCount = 1,
250          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
251          .pImmutableSamplers = NULL},
252      }};
253
254   result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
255                                           &device->meta_state.alloc,
256                                           &device->meta_state.resolve_compute.ds_layout);
257   if (result != VK_SUCCESS)
258      goto fail;
259
260   VkPipelineLayoutCreateInfo pl_create_info = {
261      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
262      .setLayoutCount = 1,
263      .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
264      .pushConstantRangeCount = 1,
265      .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
266   };
267
268   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
269                                      &device->meta_state.alloc,
270                                      &device->meta_state.resolve_compute.p_layout);
271   if (result != VK_SUCCESS)
272      goto fail;
273   return VK_SUCCESS;
274fail:
275   return result;
276}
277
278static VkResult
279create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
280                        VkPipeline *pipeline)
281{
282   VkResult result;
283
284   mtx_lock(&device->meta_state.mtx);
285   if (*pipeline) {
286      mtx_unlock(&device->meta_state.mtx);
287      return VK_SUCCESS;
288   }
289
290   nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
291
292   /* compute shader */
293
294   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
295      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
296      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
297      .module = vk_shader_module_handle_from_nir(cs),
298      .pName = "main",
299      .pSpecializationInfo = NULL,
300   };
301
302   VkComputePipelineCreateInfo vk_pipeline_info = {
303      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
304      .stage = pipeline_shader_stage,
305      .flags = 0,
306      .layout = device->meta_state.resolve_compute.p_layout,
307   };
308
309   result = radv_CreateComputePipelines(radv_device_to_handle(device),
310                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
311                                        &vk_pipeline_info, NULL, pipeline);
312   if (result != VK_SUCCESS)
313      goto fail;
314
315   ralloc_free(cs);
316   mtx_unlock(&device->meta_state.mtx);
317   return VK_SUCCESS;
318fail:
319   ralloc_free(cs);
320   mtx_unlock(&device->meta_state.mtx);
321   return result;
322}
323
324static VkResult
325create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
326                                      VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
327{
328   VkResult result;
329
330   mtx_lock(&device->meta_state.mtx);
331   if (*pipeline) {
332      mtx_unlock(&device->meta_state.mtx);
333      return VK_SUCCESS;
334   }
335
336   nir_shader *cs =
337      build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
338
339   /* compute shader */
340   VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
341      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
342      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
343      .module = vk_shader_module_handle_from_nir(cs),
344      .pName = "main",
345      .pSpecializationInfo = NULL,
346   };
347
348   VkComputePipelineCreateInfo vk_pipeline_info = {
349      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
350      .stage = pipeline_shader_stage,
351      .flags = 0,
352      .layout = device->meta_state.resolve_compute.p_layout,
353   };
354
355   result = radv_CreateComputePipelines(radv_device_to_handle(device),
356                                        radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
357                                        &vk_pipeline_info, NULL, pipeline);
358   if (result != VK_SUCCESS)
359      goto fail;
360
361   ralloc_free(cs);
362   mtx_unlock(&device->meta_state.mtx);
363   return VK_SUCCESS;
364fail:
365   ralloc_free(cs);
366   mtx_unlock(&device->meta_state.mtx);
367   return result;
368}
369
370VkResult
371radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
372{
373   struct radv_meta_state *state = &device->meta_state;
374   VkResult res;
375
376   res = create_layout(device);
377   if (res != VK_SUCCESS)
378      return res;
379
380   if (on_demand)
381      return VK_SUCCESS;
382
383   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
384      uint32_t samples = 1 << i;
385
386      res = create_resolve_pipeline(device, samples, false, false,
387                                    &state->resolve_compute.rc[i].pipeline);
388      if (res != VK_SUCCESS)
389         return res;
390
391      res = create_resolve_pipeline(device, samples, true, false,
392                                    &state->resolve_compute.rc[i].i_pipeline);
393      if (res != VK_SUCCESS)
394         return res;
395
396      res = create_resolve_pipeline(device, samples, false, true,
397                                    &state->resolve_compute.rc[i].srgb_pipeline);
398      if (res != VK_SUCCESS)
399         return res;
400
401      res = create_depth_stencil_resolve_pipeline(
402         device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT,
403         &state->resolve_compute.depth[i].average_pipeline);
404      if (res != VK_SUCCESS)
405         return res;
406
407      res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
408                                                  VK_RESOLVE_MODE_MAX_BIT,
409                                                  &state->resolve_compute.depth[i].max_pipeline);
410      if (res != VK_SUCCESS)
411         return res;
412
413      res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
414                                                  VK_RESOLVE_MODE_MIN_BIT,
415                                                  &state->resolve_compute.depth[i].min_pipeline);
416      if (res != VK_SUCCESS)
417         return res;
418
419      res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
420                                                  VK_RESOLVE_MODE_MAX_BIT,
421                                                  &state->resolve_compute.stencil[i].max_pipeline);
422      if (res != VK_SUCCESS)
423         return res;
424
425      res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
426                                                  VK_RESOLVE_MODE_MIN_BIT,
427                                                  &state->resolve_compute.stencil[i].min_pipeline);
428      if (res != VK_SUCCESS)
429         return res;
430   }
431
432   res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
433                                               VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
434                                               &state->resolve_compute.depth_zero_pipeline);
435   if (res != VK_SUCCESS)
436      return res;
437
438   return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
439                                                VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
440                                                &state->resolve_compute.stencil_zero_pipeline);
441}
442
443void
444radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
445{
446   struct radv_meta_state *state = &device->meta_state;
447   for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
448      radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,
449                           &state->alloc);
450
451      radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,
452                           &state->alloc);
453
454      radv_DestroyPipeline(radv_device_to_handle(device),
455                           state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
456
457      radv_DestroyPipeline(radv_device_to_handle(device),
458                           state->resolve_compute.depth[i].average_pipeline, &state->alloc);
459
460      radv_DestroyPipeline(radv_device_to_handle(device),
461                           state->resolve_compute.depth[i].max_pipeline, &state->alloc);
462
463      radv_DestroyPipeline(radv_device_to_handle(device),
464                           state->resolve_compute.depth[i].min_pipeline, &state->alloc);
465
466      radv_DestroyPipeline(radv_device_to_handle(device),
467                           state->resolve_compute.stencil[i].max_pipeline, &state->alloc);
468
469      radv_DestroyPipeline(radv_device_to_handle(device),
470                           state->resolve_compute.stencil[i].min_pipeline, &state->alloc);
471   }
472
473   radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,
474                        &state->alloc);
475
476   radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,
477                        &state->alloc);
478
479   device->vk.dispatch_table.DestroyDescriptorSetLayout(
480      radv_device_to_handle(device), state->resolve_compute.ds_layout, &state->alloc);
481   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,
482                              &state->alloc);
483}
484
485static VkPipeline *
486radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
487{
488   struct radv_device *device = cmd_buffer->device;
489   struct radv_meta_state *state = &device->meta_state;
490   uint32_t samples = src_iview->image->info.samples;
491   uint32_t samples_log2 = ffs(samples) - 1;
492   VkPipeline *pipeline;
493
494   if (vk_format_is_int(src_iview->vk.format))
495      pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
496   else if (vk_format_is_srgb(src_iview->vk.format))
497      pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
498   else
499      pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
500
501   if (!*pipeline) {
502      VkResult ret;
503
504      ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk.format),
505                                    vk_format_is_srgb(src_iview->vk.format), pipeline);
506      if (ret != VK_SUCCESS) {
507         cmd_buffer->record_result = ret;
508         return NULL;
509      }
510   }
511
512   return pipeline;
513}
514
515static void
516emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
517             struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
518             const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
519{
520   struct radv_device *device = cmd_buffer->device;
521   VkPipeline *pipeline;
522
523   radv_meta_push_descriptor_set(
524      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
525      0, /* set */
526      2, /* descriptorWriteCount */
527      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
528                                .dstBinding = 0,
529                                .dstArrayElement = 0,
530                                .descriptorCount = 1,
531                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
532                                .pImageInfo =
533                                   (VkDescriptorImageInfo[]){
534                                      {.sampler = VK_NULL_HANDLE,
535                                       .imageView = radv_image_view_to_handle(src_iview),
536                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
537                                   }},
538                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
539                                .dstBinding = 1,
540                                .dstArrayElement = 0,
541                                .descriptorCount = 1,
542                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
543                                .pImageInfo = (VkDescriptorImageInfo[]){
544                                   {
545                                      .sampler = VK_NULL_HANDLE,
546                                      .imageView = radv_image_view_to_handle(dest_iview),
547                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
548                                   },
549                                }}});
550
551   pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
552
553   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
554                        *pipeline);
555
556   unsigned push_constants[4] = {
557      src_offset->x,
558      src_offset->y,
559      dest_offset->x,
560      dest_offset->y,
561   };
562   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
563                         device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
564                         0, 16, push_constants);
565   radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
566}
567
568static void
569emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
570                           struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent,
571                           VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
572{
573   struct radv_device *device = cmd_buffer->device;
574   const uint32_t samples = src_iview->image->info.samples;
575   const uint32_t samples_log2 = ffs(samples) - 1;
576   VkPipeline *pipeline;
577
578   radv_meta_push_descriptor_set(
579      cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
580      0, /* set */
581      2, /* descriptorWriteCount */
582      (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
583                                .dstBinding = 0,
584                                .dstArrayElement = 0,
585                                .descriptorCount = 1,
586                                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
587                                .pImageInfo =
588                                   (VkDescriptorImageInfo[]){
589                                      {.sampler = VK_NULL_HANDLE,
590                                       .imageView = radv_image_view_to_handle(src_iview),
591                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
592                                   }},
593                               {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
594                                .dstBinding = 1,
595                                .dstArrayElement = 0,
596                                .descriptorCount = 1,
597                                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
598                                .pImageInfo = (VkDescriptorImageInfo[]){
599                                   {
600                                      .sampler = VK_NULL_HANDLE,
601                                      .imageView = radv_image_view_to_handle(dest_iview),
602                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
603                                   },
604                                }}});
605
606   switch (resolve_mode) {
607   case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
608      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
609         pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
610      else
611         pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
612      break;
613   case VK_RESOLVE_MODE_AVERAGE_BIT:
614      assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
615      pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
616      break;
617   case VK_RESOLVE_MODE_MIN_BIT:
618      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
619         pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
620      else
621         pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
622      break;
623   case VK_RESOLVE_MODE_MAX_BIT:
624      if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
625         pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
626      else
627         pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
628      break;
629   default:
630      unreachable("invalid resolve mode");
631   }
632
633   if (!*pipeline) {
634      int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
635      VkResult ret;
636
637      ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
638      if (ret != VK_SUCCESS) {
639         cmd_buffer->record_result = ret;
640         return;
641      }
642   }
643
644   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
645                        *pipeline);
646
647   radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,
648                           resolve_extent->depth);
649}
650
651void
652radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
653                                VkFormat src_format, VkImageLayout src_image_layout,
654                                struct radv_image *dest_image, VkFormat dest_format,
655                                VkImageLayout dest_image_layout, const VkImageResolve2 *region)
656{
657   struct radv_meta_saved_state saved_state;
658
659   radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
660
661   /* For partial resolves, DCC should be decompressed before resolving
662    * because the metadata is re-initialized to the uncompressed after.
663    */
664   uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->qf,
665                                                      cmd_buffer->qf);
666
667   if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
668       radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
669                                  dest_image_layout, false, queue_mask) &&
670       (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
671        region->extent.width != dest_image->info.width ||
672        region->extent.height != dest_image->info.height ||
673        region->extent.depth != dest_image->info.depth)) {
674      radv_decompress_dcc(cmd_buffer, dest_image,
675                          &(VkImageSubresourceRange){
676                             .aspectMask = region->dstSubresource.aspectMask,
677                             .baseMipLevel = region->dstSubresource.mipLevel,
678                             .levelCount = 1,
679                             .baseArrayLayer = region->dstSubresource.baseArrayLayer,
680                             .layerCount = region->dstSubresource.layerCount,
681                          });
682   }
683
684   radv_meta_save(
685      &saved_state, cmd_buffer,
686      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
687
688   assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
689   assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
690   assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
691
692   const uint32_t src_base_layer =
693      radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
694
695   const uint32_t dest_base_layer =
696      radv_meta_get_iview_layer(dest_image, &region->dstSubresource, &region->dstOffset);
697
698   const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
699   const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
700   const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dest_image->vk, region->dstOffset);
701
702   for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
703
704      struct radv_image_view src_iview;
705      radv_image_view_init(&src_iview, cmd_buffer->device,
706                           &(VkImageViewCreateInfo){
707                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
708                              .image = radv_image_to_handle(src_image),
709                              .viewType = radv_meta_get_view_type(src_image),
710                              .format = src_format,
711                              .subresourceRange =
712                                 {
713                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
714                                    .baseMipLevel = region->srcSubresource.mipLevel,
715                                    .levelCount = 1,
716                                    .baseArrayLayer = src_base_layer + layer,
717                                    .layerCount = 1,
718                                 },
719                           },
720                           0, NULL);
721
722      struct radv_image_view dest_iview;
723      radv_image_view_init(&dest_iview, cmd_buffer->device,
724                           &(VkImageViewCreateInfo){
725                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
726                              .image = radv_image_to_handle(dest_image),
727                              .viewType = radv_meta_get_view_type(dest_image),
728                              .format = vk_to_non_srgb_format(dest_format),
729                              .subresourceRange =
730                                 {
731                                    .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
732                                    .baseMipLevel = region->dstSubresource.mipLevel,
733                                    .levelCount = 1,
734                                    .baseArrayLayer = dest_base_layer + layer,
735                                    .layerCount = 1,
736                                 },
737                           },
738                           0, NULL);
739
740      emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
741                   &(VkOffset2D){dstOffset.x, dstOffset.y},
742                   &(VkExtent2D){extent.width, extent.height});
743
744      radv_image_view_finish(&src_iview);
745      radv_image_view_finish(&dest_iview);
746   }
747
748   radv_meta_restore(&saved_state, cmd_buffer);
749
750   if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
751       radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
752                                  dest_image_layout, false, queue_mask)) {
753
754      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
755
756      VkImageSubresourceRange range = {
757         .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
758         .baseMipLevel = region->dstSubresource.mipLevel,
759         .levelCount = 1,
760         .baseArrayLayer = dest_base_layer,
761         .layerCount = region->dstSubresource.layerCount,
762      };
763
764      cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
765   }
766}
767
768/**
769 * Emit any needed resolves for the current subpass.
770 */
771void
772radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
773{
774   struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
775   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
776   struct radv_subpass_barrier barrier;
777   uint32_t layer_count = fb->layers;
778
779   if (subpass->view_mask)
780      layer_count = util_last_bit(subpass->view_mask);
781
782   /* Resolves happen before the end-of-subpass barriers get executed, so
783    * we have to make the attachment shader-readable.
784    */
785   barrier.src_stage_mask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
786   barrier.src_access_mask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
787   barrier.dst_access_mask = VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT;
788   radv_emit_subpass_barrier(cmd_buffer, &barrier);
789
790   for (uint32_t i = 0; i < subpass->color_count; ++i) {
791      struct radv_subpass_attachment src_att = subpass->color_attachments[i];
792      struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
793
794      if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
795         continue;
796
797      struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
798      struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
799
800      VkImageResolve2 region = {
801         .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2,
802         .extent = (VkExtent3D){fb->width, fb->height, 1},
803         .srcSubresource =
804            (VkImageSubresourceLayers){
805               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
806               .mipLevel = src_iview->vk.base_mip_level,
807               .baseArrayLayer = src_iview->vk.base_array_layer,
808               .layerCount = layer_count,
809            },
810         .dstSubresource =
811            (VkImageSubresourceLayers){
812               .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
813               .mipLevel = dst_iview->vk.base_mip_level,
814               .baseArrayLayer = dst_iview->vk.base_array_layer,
815               .layerCount = layer_count,
816            },
817         .srcOffset = (VkOffset3D){0, 0, 0},
818         .dstOffset = (VkOffset3D){0, 0, 0},
819      };
820
821      radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format,
822                                      src_att.layout, dst_iview->image, dst_iview->vk.format,
823                                      dst_att.layout, &region);
824   }
825
826   cmd_buffer->state.flush_bits |=
827      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
828      radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
829}
830
831void
832radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
833                                      VkImageAspectFlags aspects,
834                                      VkResolveModeFlagBits resolve_mode)
835{
836   struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
837   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
838   struct radv_meta_saved_state saved_state;
839   uint32_t layer_count = fb->layers;
840
841   if (subpass->view_mask)
842      layer_count = util_last_bit(subpass->view_mask);
843
844   /* Resolves happen before the end-of-subpass barriers get executed, so
845    * we have to make the attachment shader-readable.
846    */
847   cmd_buffer->state.flush_bits |=
848      radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
849      radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, NULL) |
850      radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
851
852   struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
853   struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
854   struct radv_image *src_image = src_iview->image;
855
856   VkImageResolve2 region = {0};
857   region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2;
858   region.srcSubresource.aspectMask = aspects;
859   region.srcSubresource.mipLevel = 0;
860   region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer;
861   region.srcSubresource.layerCount = layer_count;
862
863   radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
864
865   radv_meta_save(&saved_state, cmd_buffer,
866                  RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
867
868   struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
869   struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
870   struct radv_image *dst_image = dst_iview->image;
871
872   struct radv_image_view tsrc_iview;
873   radv_image_view_init(&tsrc_iview, cmd_buffer->device,
874                        &(VkImageViewCreateInfo){
875                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
876                           .image = radv_image_to_handle(src_image),
877                           .viewType = radv_meta_get_view_type(src_image),
878                           .format = src_iview->vk.format,
879                           .subresourceRange =
880                              {
881                                 .aspectMask = aspects,
882                                 .baseMipLevel = src_iview->vk.base_mip_level,
883                                 .levelCount = 1,
884                                 .baseArrayLayer = src_iview->vk.base_array_layer,
885                                 .layerCount = layer_count,
886                              },
887                        },
888                        0, NULL);
889
890   struct radv_image_view tdst_iview;
891   radv_image_view_init(&tdst_iview, cmd_buffer->device,
892                        &(VkImageViewCreateInfo){
893                           .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
894                           .image = radv_image_to_handle(dst_image),
895                           .viewType = radv_meta_get_view_type(dst_image),
896                           .format = dst_iview->vk.format,
897                           .subresourceRange =
898                              {
899                                 .aspectMask = aspects,
900                                 .baseMipLevel = dst_iview->vk.base_mip_level,
901                                 .levelCount = 1,
902                                 .baseArrayLayer = dst_iview->vk.base_array_layer,
903                                 .layerCount = layer_count,
904                              },
905                        },
906                        0, NULL);
907
908   emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
909                              &(VkExtent3D){fb->width, fb->height, layer_count}, aspects,
910                              resolve_mode);
911
912   cmd_buffer->state.flush_bits |=
913      RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
914      radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
915
916   VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;
917   uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
918                                                      cmd_buffer->qf);
919
920   if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) {
921      VkImageSubresourceRange range = {0};
922      range.aspectMask = aspects;
923      range.baseMipLevel = dst_iview->vk.base_mip_level;
924      range.levelCount = 1;
925      range.baseArrayLayer = dst_iview->vk.base_array_layer;
926      range.layerCount = layer_count;
927
928      uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
929
930      cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
931   }
932
933   radv_image_view_finish(&tsrc_iview);
934   radv_image_view_finish(&tdst_iview);
935
936   radv_meta_restore(&saved_state, cmd_buffer);
937}
938