1 /*
2  * Copyright © 2020 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26 
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 
30 static void
get_hw_clear_color(struct v3dv_device *device, const VkClearColorValue *color, VkFormat fb_format, VkFormat image_format, uint32_t internal_type, uint32_t internal_bpp, uint32_t *hw_color)31 get_hw_clear_color(struct v3dv_device *device,
32                    const VkClearColorValue *color,
33                    VkFormat fb_format,
34                    VkFormat image_format,
35                    uint32_t internal_type,
36                    uint32_t internal_bpp,
37                    uint32_t *hw_color)
38 {
39    const uint32_t internal_size = 4 << internal_bpp;
40 
41    /* If the image format doesn't match the framebuffer format, then we are
42     * trying to clear an unsupported tlb format using a compatible
43     * format for the framebuffer. In this case, we want to make sure that
44     * we pack the clear value according to the original format semantics,
45     * not the compatible format.
46     */
47    if (fb_format == image_format) {
48       v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
49                                          hw_color);
50    } else {
51       union util_color uc;
52       enum pipe_format pipe_image_format =
53          vk_format_to_pipe_format(image_format);
54       util_pack_color(color->float32, pipe_image_format, &uc);
55       memcpy(hw_color, uc.ui, internal_size);
56    }
57 }
58 
59 /* Returns true if the implementation is able to handle the case, false
60  * otherwise.
61 */
62 static bool
clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *image, const VkClearValue *clear_value, const VkImageSubresourceRange *range)63 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
64                 struct v3dv_image *image,
65                 const VkClearValue *clear_value,
66                 const VkImageSubresourceRange *range)
67 {
68    const VkOffset3D origin = { 0, 0, 0 };
69    VkFormat fb_format;
70    if (!v3dv_meta_can_use_tlb(image, &origin, &fb_format))
71       return false;
72 
73    uint32_t internal_type, internal_bpp;
74    v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
75       (fb_format, range->aspectMask,
76        &internal_type, &internal_bpp);
77 
78    union v3dv_clear_value hw_clear_value = { 0 };
79    if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
80       get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
81                          image->vk.format, internal_type, internal_bpp,
82                          &hw_clear_value.color[0]);
83    } else {
84       assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
85              (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
86       hw_clear_value.z = clear_value->depthStencil.depth;
87       hw_clear_value.s = clear_value->depthStencil.stencil;
88    }
89 
90    uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
91    uint32_t min_level = range->baseMipLevel;
92    uint32_t max_level = range->baseMipLevel + level_count;
93 
94    /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
95     * Instead, we need to consider the full depth dimension of the image, which
96     * goes from 0 up to the level's depth extent.
97     */
98    uint32_t min_layer;
99    uint32_t max_layer;
100    if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
101       min_layer = range->baseArrayLayer;
102       max_layer = range->baseArrayLayer +
103                   vk_image_subresource_layer_count(&image->vk, range);
104    } else {
105       min_layer = 0;
106       max_layer = 0;
107    }
108 
109    for (uint32_t level = min_level; level < max_level; level++) {
110       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
111          max_layer = u_minify(image->vk.extent.depth, level);
112 
113       uint32_t width = u_minify(image->vk.extent.width, level);
114       uint32_t height = u_minify(image->vk.extent.height, level);
115 
116       struct v3dv_job *job =
117          v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
118 
119       if (!job)
120          return true;
121 
122       v3dv_job_start_frame(job, width, height, max_layer, false,
123                            1, internal_bpp,
124                            image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
125 
126       struct v3dv_meta_framebuffer framebuffer;
127       v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
128                                                  internal_type,
129                                                  &job->frame_tiling);
130 
131       v3dv_X(job->device, job_emit_binning_flush)(job);
132 
133       /* If this triggers it is an application bug: the spec requires
134        * that any aspects to clear are present in the image.
135        */
136       assert(range->aspectMask & image->vk.aspects);
137 
138       v3dv_X(job->device, meta_emit_clear_image_rcl)
139          (job, image, &framebuffer, &hw_clear_value,
140           range->aspectMask, min_layer, max_layer, level);
141 
142       v3dv_cmd_buffer_finish_job(cmd_buffer);
143    }
144 
145    return true;
146 }
147 
148 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage _image, VkImageLayout imageLayout, const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)149 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
150                         VkImage _image,
151                         VkImageLayout imageLayout,
152                         const VkClearColorValue *pColor,
153                         uint32_t rangeCount,
154                         const VkImageSubresourceRange *pRanges)
155 {
156    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
157    V3DV_FROM_HANDLE(v3dv_image, image, _image);
158 
159    const VkClearValue clear_value = {
160       .color = *pColor,
161    };
162 
163    cmd_buffer->state.is_transfer = true;
164 
165    for (uint32_t i = 0; i < rangeCount; i++) {
166       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
167          continue;
168       unreachable("Unsupported color clear.");
169    }
170 
171    cmd_buffer->state.is_transfer = false;
172 }
173 
174 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage _image, VkImageLayout imageLayout, const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)175 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
176                                VkImage _image,
177                                VkImageLayout imageLayout,
178                                const VkClearDepthStencilValue *pDepthStencil,
179                                uint32_t rangeCount,
180                                const VkImageSubresourceRange *pRanges)
181 {
182    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
183    V3DV_FROM_HANDLE(v3dv_image, image, _image);
184 
185    const VkClearValue clear_value = {
186       .depthStencil = *pDepthStencil,
187    };
188 
189    cmd_buffer->state.is_transfer = true;
190 
191    for (uint32_t i = 0; i < rangeCount; i++) {
192       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
193          continue;
194       unreachable("Unsupported depth/stencil clear.");
195    }
196 
197    cmd_buffer->state.is_transfer = false;
198 }
199 
200 static void
destroy_color_clear_pipeline(VkDevice _device, uint64_t pipeline, VkAllocationCallbacks *alloc)201 destroy_color_clear_pipeline(VkDevice _device,
202                              uint64_t pipeline,
203                              VkAllocationCallbacks *alloc)
204 {
205    struct v3dv_meta_color_clear_pipeline *p =
206       (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
207    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
208    if (p->cached)
209       v3dv_DestroyRenderPass(_device, p->pass, alloc);
210    vk_free(alloc, p);
211 }
212 
213 static void
destroy_depth_clear_pipeline(VkDevice _device, struct v3dv_meta_depth_clear_pipeline *p, VkAllocationCallbacks *alloc)214 destroy_depth_clear_pipeline(VkDevice _device,
215                              struct v3dv_meta_depth_clear_pipeline *p,
216                              VkAllocationCallbacks *alloc)
217 {
218    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
219    vk_free(alloc, p);
220 }
221 
222 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device *device, VkPipelineLayout *pipeline_layout)223 create_color_clear_pipeline_layout(struct v3dv_device *device,
224                                    VkPipelineLayout *pipeline_layout)
225 {
226    /* FIXME: this is abusing a bit the API, since not all of our clear
227     * pipelines have a geometry shader. We could create 2 different pipeline
228     * layouts, but this works for us for now.
229     */
230    VkPushConstantRange ranges[2] = {
231       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
232       { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
233    };
234 
235    VkPipelineLayoutCreateInfo info = {
236       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
237       .setLayoutCount = 0,
238       .pushConstantRangeCount = 2,
239       .pPushConstantRanges = ranges,
240    };
241 
242    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
243                                     &info, &device->vk.alloc, pipeline_layout);
244 }
245 
246 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device *device, VkPipelineLayout *pipeline_layout)247 create_depth_clear_pipeline_layout(struct v3dv_device *device,
248                                    VkPipelineLayout *pipeline_layout)
249 {
250    /* FIXME: this is abusing a bit the API, since not all of our clear
251     * pipelines have a geometry shader. We could create 2 different pipeline
252     * layouts, but this works for us for now.
253     */
254    VkPushConstantRange ranges[2] = {
255       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
256       { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
257    };
258 
259    VkPipelineLayoutCreateInfo info = {
260       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
261       .setLayoutCount = 0,
262       .pushConstantRangeCount = 2,
263       .pPushConstantRanges = ranges
264    };
265 
266    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
267                                     &info, &device->vk.alloc, pipeline_layout);
268 }
269 
270 void
v3dv_meta_clear_init(struct v3dv_device *device)271 v3dv_meta_clear_init(struct v3dv_device *device)
272 {
273    device->meta.color_clear.cache =
274       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
275 
276    create_color_clear_pipeline_layout(device,
277                                       &device->meta.color_clear.p_layout);
278 
279    device->meta.depth_clear.cache =
280       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
281 
282    create_depth_clear_pipeline_layout(device,
283                                       &device->meta.depth_clear.p_layout);
284 }
285 
286 void
v3dv_meta_clear_finish(struct v3dv_device *device)287 v3dv_meta_clear_finish(struct v3dv_device *device)
288 {
289    VkDevice _device = v3dv_device_to_handle(device);
290 
291    hash_table_foreach(device->meta.color_clear.cache, entry) {
292       struct v3dv_meta_color_clear_pipeline *item = entry->data;
293       destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
294    }
295    _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
296 
297    if (device->meta.color_clear.p_layout) {
298       v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
299                                  &device->vk.alloc);
300    }
301 
302    hash_table_foreach(device->meta.depth_clear.cache, entry) {
303       struct v3dv_meta_depth_clear_pipeline *item = entry->data;
304       destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);
305    }
306    _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
307 
308    if (device->meta.depth_clear.p_layout) {
309       v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
310                                  &device->vk.alloc);
311    }
312 }
313 
314 static nir_shader *
get_clear_rect_vsnull315 get_clear_rect_vs()
316 {
317    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
318    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
319                                                   "meta clear vs");
320 
321    const struct glsl_type *vec4 = glsl_vec4_type();
322    nir_variable *vs_out_pos =
323       nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
324    vs_out_pos->data.location = VARYING_SLOT_POS;
325 
326    nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
327    nir_store_var(&b, vs_out_pos, pos, 0xf);
328 
329    return b.shader;
330 }
331 
332 static nir_shader *
get_clear_rect_gs(uint32_t push_constant_layer_base)333 get_clear_rect_gs(uint32_t push_constant_layer_base)
334 {
335    /* FIXME: this creates a geometry shader that takes the index of a single
336     * layer to clear from push constants, so we need to emit a draw call for
337     * each layer that we want to clear. We could actually do better and have it
338     * take a range of layers and then emit one triangle per layer to clear,
339     * however, if we were to do this we would need to be careful not to exceed
340     * the maximum number of output vertices allowed in a geometry shader.
341     */
342    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
343    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
344                                                   "meta clear gs");
345    nir_shader *nir = b.shader;
346    nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
347    nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
348                                (1ull << VARYING_SLOT_LAYER);
349    nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
350    nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
351    nir->info.gs.vertices_in = 3;
352    nir->info.gs.vertices_out = 3;
353    nir->info.gs.invocations = 1;
354    nir->info.gs.active_stream_mask = 0x1;
355 
356    /* in vec4 gl_Position[3] */
357    nir_variable *gs_in_pos =
358       nir_variable_create(b.shader, nir_var_shader_in,
359                           glsl_array_type(glsl_vec4_type(), 3, 0),
360                           "in_gl_Position");
361    gs_in_pos->data.location = VARYING_SLOT_POS;
362 
363    /* out vec4 gl_Position */
364    nir_variable *gs_out_pos =
365       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
366                           "out_gl_Position");
367    gs_out_pos->data.location = VARYING_SLOT_POS;
368 
369    /* out float gl_Layer */
370    nir_variable *gs_out_layer =
371       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
372                           "out_gl_Layer");
373    gs_out_layer->data.location = VARYING_SLOT_LAYER;
374 
375    /* Emit output triangle */
376    for (uint32_t i = 0; i < 3; i++) {
377       /* gl_Position from shader input */
378       nir_deref_instr *in_pos_i =
379          nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
380       nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
381 
382       /* gl_Layer from push constants */
383       nir_ssa_def *layer =
384          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
385                                 .base = push_constant_layer_base, .range = 4);
386       nir_store_var(&b, gs_out_layer, layer, 0x1);
387 
388       nir_emit_vertex(&b, 0);
389    }
390 
391    nir_end_primitive(&b, 0);
392 
393    return nir;
394 }
395 
396 static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)397 get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
398 {
399    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
400    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
401                                                   "meta clear fs");
402 
403    enum pipe_format pformat = vk_format_to_pipe_format(format);
404    const struct glsl_type *fs_out_type =
405       util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
406 
407    nir_variable *fs_out_color =
408       nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
409    fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
410 
411    nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
412    nir_store_var(&b, fs_out_color, color_load, 0xf);
413 
414    return b.shader;
415 }
416 
417 static nir_shader *
get_depth_clear_rect_fsnull418 get_depth_clear_rect_fs()
419 {
420    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
421    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
422                                                   "meta depth clear fs");
423 
424    nir_variable *fs_out_depth =
425       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
426                           "out_depth");
427    fs_out_depth->data.location = FRAG_RESULT_DEPTH;
428 
429    nir_ssa_def *depth_load =
430       nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
431 
432    nir_store_var(&b, fs_out_depth, depth_load, 0x1);
433 
434    return b.shader;
435 }
436 
437 static VkResult
create_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t samples, struct nir_shader *vs_nir, struct nir_shader *gs_nir, struct nir_shader *fs_nir, const VkPipelineVertexInputStateCreateInfo *vi_state, const VkPipelineDepthStencilStateCreateInfo *ds_state, const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineLayout layout, VkPipeline *pipeline)438 create_pipeline(struct v3dv_device *device,
439                 struct v3dv_render_pass *pass,
440                 uint32_t subpass_idx,
441                 uint32_t samples,
442                 struct nir_shader *vs_nir,
443                 struct nir_shader *gs_nir,
444                 struct nir_shader *fs_nir,
445                 const VkPipelineVertexInputStateCreateInfo *vi_state,
446                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
447                 const VkPipelineColorBlendStateCreateInfo *cb_state,
448                 const VkPipelineLayout layout,
449                 VkPipeline *pipeline)
450 {
451    VkPipelineShaderStageCreateInfo stages[3] = { 0 };
452    struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
453    struct vk_shader_module gs_m;
454    struct vk_shader_module fs_m;
455 
456    uint32_t stage_count = 0;
457    stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
458    stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
459    stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
460    stages[stage_count].pName = "main";
461    stage_count++;
462 
463    if (gs_nir) {
464       gs_m = vk_shader_module_from_nir(gs_nir);
465       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
466       stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
467       stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
468       stages[stage_count].pName = "main";
469       stage_count++;
470    }
471 
472    if (fs_nir) {
473       fs_m = vk_shader_module_from_nir(fs_nir);
474       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
475       stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
476       stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
477       stages[stage_count].pName = "main";
478       stage_count++;
479    }
480 
481    VkGraphicsPipelineCreateInfo info = {
482       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
483 
484       .stageCount = stage_count,
485       .pStages = stages,
486 
487       .pVertexInputState = vi_state,
488 
489       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
490          .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
491          .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
492          .primitiveRestartEnable = false,
493       },
494 
495       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
496          .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
497          .viewportCount = 1,
498          .scissorCount = 1,
499       },
500 
501       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
502          .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
503          .rasterizerDiscardEnable = false,
504          .polygonMode = VK_POLYGON_MODE_FILL,
505          .cullMode = VK_CULL_MODE_NONE,
506          .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
507          .depthBiasEnable = false,
508       },
509 
510       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
511          .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
512          .rasterizationSamples = samples,
513          .sampleShadingEnable = false,
514          .pSampleMask = NULL,
515          .alphaToCoverageEnable = false,
516          .alphaToOneEnable = false,
517       },
518 
519       .pDepthStencilState = ds_state,
520 
521       .pColorBlendState = cb_state,
522 
523       /* The meta clear pipeline declares all state as dynamic.
524        * As a consequence, vkCmdBindPipeline writes no dynamic state
525        * to the cmd buffer. Therefore, at the end of the meta clear,
526        * we need only restore dynamic state that was vkCmdSet.
527        */
528       .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
529          .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
530          .dynamicStateCount = 6,
531          .pDynamicStates = (VkDynamicState[]) {
532             VK_DYNAMIC_STATE_VIEWPORT,
533             VK_DYNAMIC_STATE_SCISSOR,
534             VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
535             VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
536             VK_DYNAMIC_STATE_STENCIL_REFERENCE,
537             VK_DYNAMIC_STATE_BLEND_CONSTANTS,
538             VK_DYNAMIC_STATE_DEPTH_BIAS,
539             VK_DYNAMIC_STATE_LINE_WIDTH,
540          },
541       },
542 
543       .flags = 0,
544       .layout = layout,
545       .renderPass = v3dv_render_pass_to_handle(pass),
546       .subpass = subpass_idx,
547    };
548 
549    VkResult result =
550       v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
551                                    VK_NULL_HANDLE,
552                                    1, &info,
553                                    &device->vk.alloc,
554                                    pipeline);
555 
556    ralloc_free(vs_nir);
557    ralloc_free(gs_nir);
558    ralloc_free(fs_nir);
559 
560    return result;
561 }
562 
563 static VkResult
create_color_clear_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t rt_idx, VkFormat format, uint32_t samples, uint32_t components, bool is_layered, VkPipelineLayout pipeline_layout, VkPipeline *pipeline)564 create_color_clear_pipeline(struct v3dv_device *device,
565                             struct v3dv_render_pass *pass,
566                             uint32_t subpass_idx,
567                             uint32_t rt_idx,
568                             VkFormat format,
569                             uint32_t samples,
570                             uint32_t components,
571                             bool is_layered,
572                             VkPipelineLayout pipeline_layout,
573                             VkPipeline *pipeline)
574 {
575    nir_shader *vs_nir = get_clear_rect_vs();
576    nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
577    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL;
578 
579    const VkPipelineVertexInputStateCreateInfo vi_state = {
580       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
581       .vertexBindingDescriptionCount = 0,
582       .vertexAttributeDescriptionCount = 0,
583    };
584 
585    const VkPipelineDepthStencilStateCreateInfo ds_state = {
586       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
587       .depthTestEnable = false,
588       .depthWriteEnable = false,
589       .depthBoundsTestEnable = false,
590       .stencilTestEnable = false,
591    };
592 
593    assert(subpass_idx < pass->subpass_count);
594    const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
595    assert(rt_idx < color_count);
596 
597    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
598    for (uint32_t i = 0; i < color_count; i++) {
599       blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
600          .blendEnable = false,
601          .colorWriteMask = i == rt_idx ? components : 0,
602       };
603    }
604 
605    const VkPipelineColorBlendStateCreateInfo cb_state = {
606       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
607       .logicOpEnable = false,
608       .attachmentCount = color_count,
609       .pAttachments = blend_att_state
610    };
611 
612    return create_pipeline(device,
613                           pass, subpass_idx,
614                           samples,
615                           vs_nir, gs_nir, fs_nir,
616                           &vi_state,
617                           &ds_state,
618                           &cb_state,
619                           pipeline_layout,
620                           pipeline);
621 }
622 
623 static VkResult
create_depth_clear_pipeline(struct v3dv_device *device, VkImageAspectFlags aspects, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t samples, bool is_layered, VkPipelineLayout pipeline_layout, VkPipeline *pipeline)624 create_depth_clear_pipeline(struct v3dv_device *device,
625                             VkImageAspectFlags aspects,
626                             struct v3dv_render_pass *pass,
627                             uint32_t subpass_idx,
628                             uint32_t samples,
629                             bool is_layered,
630                             VkPipelineLayout pipeline_layout,
631                             VkPipeline *pipeline)
632 {
633    const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
634    const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
635    assert(has_depth || has_stencil);
636 
637    nir_shader *vs_nir = get_clear_rect_vs();
638    nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
639    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL;
640 
641    const VkPipelineVertexInputStateCreateInfo vi_state = {
642       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
643       .vertexBindingDescriptionCount = 0,
644       .vertexAttributeDescriptionCount = 0,
645    };
646 
647    const VkPipelineDepthStencilStateCreateInfo ds_state = {
648       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
649       .depthTestEnable = has_depth,
650       .depthWriteEnable = has_depth,
651       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
652       .depthBoundsTestEnable = false,
653       .stencilTestEnable = has_stencil,
654       .front = {
655          .passOp = VK_STENCIL_OP_REPLACE,
656          .compareOp = VK_COMPARE_OP_ALWAYS,
657          /* compareMask, writeMask and reference are dynamic state */
658       },
659       .back = { 0 },
660    };
661 
662    assert(subpass_idx < pass->subpass_count);
663    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
664    const VkPipelineColorBlendStateCreateInfo cb_state = {
665       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
666       .logicOpEnable = false,
667       .attachmentCount = pass->subpasses[subpass_idx].color_count,
668       .pAttachments = blend_att_state,
669    };
670 
671    return create_pipeline(device,
672                           pass, subpass_idx,
673                           samples,
674                           vs_nir, gs_nir, fs_nir,
675                           &vi_state,
676                           &ds_state,
677                           &cb_state,
678                           pipeline_layout,
679                           pipeline);
680 }
681 
682 static VkResult
create_color_clear_render_pass(struct v3dv_device *device, uint32_t rt_idx, VkFormat format, uint32_t samples, VkRenderPass *pass)683 create_color_clear_render_pass(struct v3dv_device *device,
684                                uint32_t rt_idx,
685                                VkFormat format,
686                                uint32_t samples,
687                                VkRenderPass *pass)
688 {
689    VkAttachmentDescription2 att = {
690       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
691       .format = format,
692       .samples = samples,
693       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
694       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
695       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
696       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
697    };
698 
699    VkAttachmentReference2 att_ref = {
700       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
701       .attachment = rt_idx,
702       .layout = VK_IMAGE_LAYOUT_GENERAL,
703    };
704 
705    VkSubpassDescription2 subpass = {
706       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
707       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
708       .inputAttachmentCount = 0,
709       .colorAttachmentCount = 1,
710       .pColorAttachments = &att_ref,
711       .pResolveAttachments = NULL,
712       .pDepthStencilAttachment = NULL,
713       .preserveAttachmentCount = 0,
714       .pPreserveAttachments = NULL,
715    };
716 
717    VkRenderPassCreateInfo2 info = {
718       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
719       .attachmentCount = 1,
720       .pAttachments = &att,
721       .subpassCount = 1,
722       .pSubpasses = &subpass,
723       .dependencyCount = 0,
724       .pDependencies = NULL,
725    };
726 
727    return v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
728                                  &info, &device->vk.alloc, pass);
729 }
730 
731 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx, VkFormat format, uint32_t samples, uint32_t components, bool is_layered)732 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
733                                    VkFormat format,
734                                    uint32_t samples,
735                                    uint32_t components,
736                                    bool is_layered)
737 {
738    assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
739 
740    uint64_t key = 0;
741    uint32_t bit_offset = 0;
742 
743    key |= rt_idx;
744    bit_offset += 2;
745 
746    key |= ((uint64_t) format) << bit_offset;
747    bit_offset += 32;
748 
749    key |= ((uint64_t) samples) << bit_offset;
750    bit_offset += 4;
751 
752    key |= ((uint64_t) components) << bit_offset;
753    bit_offset += 4;
754 
755    key |= (is_layered ? 1ull : 0ull) << bit_offset;
756    bit_offset += 1;
757 
758    assert(bit_offset <= 64);
759    return key;
760 }
761 
762 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects, VkFormat format, uint32_t samples, bool is_layered)763 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
764                                    VkFormat format,
765                                    uint32_t samples,
766                                    bool is_layered)
767 {
768    uint64_t key = 0;
769    uint32_t bit_offset = 0;
770 
771    key |= format;
772    bit_offset += 32;
773 
774    key |= ((uint64_t) samples) << bit_offset;
775    bit_offset += 4;
776 
777    const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
778    key |= ((uint64_t) has_depth) << bit_offset;
779    bit_offset++;
780 
781    const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
782    key |= ((uint64_t) has_stencil) << bit_offset;
783    bit_offset++;;
784 
785    key |= (is_layered ? 1ull : 0ull) << bit_offset;
786    bit_offset += 1;
787 
788    assert(bit_offset <= 64);
789    return key;
790 }
791 
792 static VkResult
get_color_clear_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t rt_idx, uint32_t attachment_idx, VkFormat format, uint32_t samples, uint32_t components, bool is_layered, struct v3dv_meta_color_clear_pipeline **pipeline)793 get_color_clear_pipeline(struct v3dv_device *device,
794                          struct v3dv_render_pass *pass,
795                          uint32_t subpass_idx,
796                          uint32_t rt_idx,
797                          uint32_t attachment_idx,
798                          VkFormat format,
799                          uint32_t samples,
800                          uint32_t components,
801                          bool is_layered,
802                          struct v3dv_meta_color_clear_pipeline **pipeline)
803 {
804    assert(vk_format_is_color(format));
805 
806    VkResult result = VK_SUCCESS;
807 
808    /* If pass != NULL it means that we are emitting the clear as a draw call
809     * in the current pass bound by the application. In that case, we can't
810     * cache the pipeline, since it will be referencing that pass and the
811     * application could be destroying it at any point. Hopefully, the perf
812     * impact is not too big since we still have the device pipeline cache
813     * around and we won't end up re-compiling the clear shader.
814     *
815     * FIXME: alternatively, we could refcount (or maybe clone) the render pass
816     * provided by the application and include it in the pipeline key setup
817     * to make caching safe in this scenario, however, based on tests with
818     * vkQuake3, the fact that we are not caching here doesn't seem to have
819     * any significant impact in performance, so it might not be worth it.
820     */
821    const bool can_cache_pipeline = (pass == NULL);
822 
823    uint64_t key;
824    if (can_cache_pipeline) {
825       key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
826                                                components, is_layered);
827       mtx_lock(&device->meta.mtx);
828       struct hash_entry *entry =
829          _mesa_hash_table_search(device->meta.color_clear.cache, &key);
830       if (entry) {
831          mtx_unlock(&device->meta.mtx);
832          *pipeline = entry->data;
833          return VK_SUCCESS;
834       }
835    }
836 
837    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
838                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
839 
840    if (*pipeline == NULL) {
841       result = VK_ERROR_OUT_OF_HOST_MEMORY;
842       goto fail;
843    }
844 
845    if (!pass) {
846       result = create_color_clear_render_pass(device,
847                                               rt_idx,
848                                               format,
849                                               samples,
850                                               &(*pipeline)->pass);
851       if (result != VK_SUCCESS)
852          goto fail;
853 
854       pass = v3dv_render_pass_from_handle((*pipeline)->pass);
855    } else {
856       (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
857    }
858 
859    result = create_color_clear_pipeline(device,
860                                         pass,
861                                         subpass_idx,
862                                         rt_idx,
863                                         format,
864                                         samples,
865                                         components,
866                                         is_layered,
867                                         device->meta.color_clear.p_layout,
868                                         &(*pipeline)->pipeline);
869    if (result != VK_SUCCESS)
870       goto fail;
871 
872    if (can_cache_pipeline) {
873       (*pipeline)->key = key;
874       (*pipeline)->cached = true;
875       _mesa_hash_table_insert(device->meta.color_clear.cache,
876                               &(*pipeline)->key, *pipeline);
877 
878       mtx_unlock(&device->meta.mtx);
879    }
880 
881    return VK_SUCCESS;
882 
883 fail:
884    if (can_cache_pipeline)
885       mtx_unlock(&device->meta.mtx);
886 
887    VkDevice _device = v3dv_device_to_handle(device);
888    if (*pipeline) {
889       if ((*pipeline)->cached)
890          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
891       if ((*pipeline)->pipeline)
892          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
893       vk_free(&device->vk.alloc, *pipeline);
894       *pipeline = NULL;
895    }
896 
897    return result;
898 }
899 
900 static VkResult
get_depth_clear_pipeline(struct v3dv_device *device, VkImageAspectFlags aspects, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t attachment_idx, bool is_layered, struct v3dv_meta_depth_clear_pipeline **pipeline)901 get_depth_clear_pipeline(struct v3dv_device *device,
902                          VkImageAspectFlags aspects,
903                          struct v3dv_render_pass *pass,
904                          uint32_t subpass_idx,
905                          uint32_t attachment_idx,
906                          bool is_layered,
907                          struct v3dv_meta_depth_clear_pipeline **pipeline)
908 {
909    assert(subpass_idx < pass->subpass_count);
910    assert(attachment_idx != VK_ATTACHMENT_UNUSED);
911    assert(attachment_idx < pass->attachment_count);
912 
913    VkResult result = VK_SUCCESS;
914 
915    const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
916    const VkFormat format = pass->attachments[attachment_idx].desc.format;
917    assert(vk_format_is_depth_or_stencil(format));
918 
919    const uint64_t key =
920       get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered);
921    mtx_lock(&device->meta.mtx);
922    struct hash_entry *entry =
923       _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
924    if (entry) {
925       mtx_unlock(&device->meta.mtx);
926       *pipeline = entry->data;
927       return VK_SUCCESS;
928    }
929 
930    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
931                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
932 
933    if (*pipeline == NULL) {
934       result = VK_ERROR_OUT_OF_HOST_MEMORY;
935       goto fail;
936    }
937 
938    result = create_depth_clear_pipeline(device,
939                                         aspects,
940                                         pass,
941                                         subpass_idx,
942                                         samples,
943                                         is_layered,
944                                         device->meta.depth_clear.p_layout,
945                                         &(*pipeline)->pipeline);
946    if (result != VK_SUCCESS)
947       goto fail;
948 
949    (*pipeline)->key = key;
950    _mesa_hash_table_insert(device->meta.depth_clear.cache,
951                            &(*pipeline)->key, *pipeline);
952 
953    mtx_unlock(&device->meta.mtx);
954    return VK_SUCCESS;
955 
956 fail:
957    mtx_unlock(&device->meta.mtx);
958 
959    VkDevice _device = v3dv_device_to_handle(device);
960    if (*pipeline) {
961       if ((*pipeline)->pipeline)
962          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
963       vk_free(&device->vk.alloc, *pipeline);
964       *pipeline = NULL;
965    }
966 
967    return result;
968 }
969 
970 /* Emits a scissored quad in the clear color */
971 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_render_pass *pass, struct v3dv_subpass *subpass, uint32_t rt_idx, const VkClearColorValue *clear_color, bool is_layered, bool all_rects_same_layers, uint32_t rect_count, const VkClearRect *rects)972 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
973                                struct v3dv_render_pass *pass,
974                                struct v3dv_subpass *subpass,
975                                uint32_t rt_idx,
976                                const VkClearColorValue *clear_color,
977                                bool is_layered,
978                                bool all_rects_same_layers,
979                                uint32_t rect_count,
980                                const VkClearRect *rects)
981 {
982    /* Skip if attachment is unused in the current subpass */
983    assert(rt_idx < subpass->color_count);
984    const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
985    if (attachment_idx == VK_ATTACHMENT_UNUSED)
986       return;
987 
988    /* Obtain a pipeline for this clear */
989    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
990    const VkFormat format =
991       cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
992    const VkFormat samples =
993       cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
994    const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
995                                VK_COLOR_COMPONENT_G_BIT |
996                                VK_COLOR_COMPONENT_B_BIT |
997                                VK_COLOR_COMPONENT_A_BIT;
998    struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
999    VkResult result = get_color_clear_pipeline(cmd_buffer->device,
1000                                               pass,
1001                                               cmd_buffer->state.subpass_idx,
1002                                               rt_idx,
1003                                               attachment_idx,
1004                                               format,
1005                                               samples,
1006                                               components,
1007                                               is_layered,
1008                                               &pipeline);
1009    if (result != VK_SUCCESS) {
1010       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1011          v3dv_flag_oom(cmd_buffer, NULL);
1012       return;
1013    }
1014    assert(pipeline && pipeline->pipeline);
1015 
1016    /* Emit clear rects */
1017    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1018 
1019    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1020    v3dv_CmdPushConstants(cmd_buffer_handle,
1021                          cmd_buffer->device->meta.depth_clear.p_layout,
1022                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1023                          clear_color->float32);
1024 
1025    v3dv_CmdBindPipeline(cmd_buffer_handle,
1026                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1027                         pipeline->pipeline);
1028 
1029    uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1030 
1031    for (uint32_t i = 0; i < rect_count; i++) {
1032       const VkViewport viewport = {
1033          .x = rects[i].rect.offset.x,
1034          .y = rects[i].rect.offset.y,
1035          .width = rects[i].rect.extent.width,
1036          .height = rects[i].rect.extent.height,
1037          .minDepth = 0.0f,
1038          .maxDepth = 1.0f
1039       };
1040       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1041       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1042 
1043       if (is_layered) {
1044          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1045               layer_offset++) {
1046             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1047             v3dv_CmdPushConstants(cmd_buffer_handle,
1048                                   cmd_buffer->device->meta.depth_clear.p_layout,
1049                                   VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1050             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1051          }
1052       } else {
1053          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1054          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1055       }
1056    }
1057 
1058    /* Subpass pipelines can't be cached because they include a reference to the
1059     * render pass currently bound by the application, which means that we need
1060     * to destroy them manually here.
1061     */
1062    assert(!pipeline->cached);
1063    v3dv_cmd_buffer_add_private_obj(
1064       cmd_buffer, (uintptr_t)pipeline,
1065       (v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);
1066 
1067    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1068 }
1069 
1070 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1071  * and the stencil aspect by using stencil testing.
1072  */
1073 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_render_pass *pass, struct v3dv_subpass *subpass, VkImageAspectFlags aspects, const VkClearDepthStencilValue *clear_ds, bool is_layered, bool all_rects_same_layers, uint32_t rect_count, const VkClearRect *rects)1074 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1075                             struct v3dv_render_pass *pass,
1076                             struct v3dv_subpass *subpass,
1077                             VkImageAspectFlags aspects,
1078                             const VkClearDepthStencilValue *clear_ds,
1079                             bool is_layered,
1080                             bool all_rects_same_layers,
1081                             uint32_t rect_count,
1082                             const VkClearRect *rects)
1083 {
1084    /* Skip if attachment is unused in the current subpass */
1085    const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1086    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1087       return;
1088 
1089    /* Obtain a pipeline for this clear */
1090    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1091    struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1092    VkResult result = get_depth_clear_pipeline(cmd_buffer->device,
1093                                               aspects,
1094                                               pass,
1095                                               cmd_buffer->state.subpass_idx,
1096                                               attachment_idx,
1097                                               is_layered,
1098                                               &pipeline);
1099    if (result != VK_SUCCESS) {
1100       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1101          v3dv_flag_oom(cmd_buffer, NULL);
1102       return;
1103    }
1104    assert(pipeline && pipeline->pipeline);
1105 
1106    /* Emit clear rects */
1107    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1108 
1109    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1110    v3dv_CmdPushConstants(cmd_buffer_handle,
1111                          cmd_buffer->device->meta.depth_clear.p_layout,
1112                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1113                          &clear_ds->depth);
1114 
1115    v3dv_CmdBindPipeline(cmd_buffer_handle,
1116                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1117                         pipeline->pipeline);
1118 
1119    uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1120    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1121       v3dv_CmdSetStencilReference(cmd_buffer_handle,
1122                                   VK_STENCIL_FACE_FRONT_AND_BACK,
1123                                   clear_ds->stencil);
1124       v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,
1125                                   VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1126       v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,
1127                                     VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1128       dynamic_states |= VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK |
1129                         VK_DYNAMIC_STATE_STENCIL_WRITE_MASK |
1130                         VK_DYNAMIC_STATE_STENCIL_REFERENCE;
1131    }
1132 
1133    for (uint32_t i = 0; i < rect_count; i++) {
1134       const VkViewport viewport = {
1135          .x = rects[i].rect.offset.x,
1136          .y = rects[i].rect.offset.y,
1137          .width = rects[i].rect.extent.width,
1138          .height = rects[i].rect.extent.height,
1139          .minDepth = 0.0f,
1140          .maxDepth = 1.0f
1141       };
1142       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1143       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1144       if (is_layered) {
1145          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1146               layer_offset++) {
1147             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1148             v3dv_CmdPushConstants(cmd_buffer_handle,
1149                                   cmd_buffer->device->meta.depth_clear.p_layout,
1150                                   VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1151             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1152          }
1153       } else {
1154          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1155          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1156       }
1157    }
1158 
1159    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1160 }
1161 
1162 static void
gather_layering_info(uint32_t rect_count, const VkClearRect *rects, bool *is_layered, bool *all_rects_same_layers)1163 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1164                      bool *is_layered, bool *all_rects_same_layers)
1165 {
1166    *all_rects_same_layers = true;
1167 
1168    uint32_t min_layer = rects[0].baseArrayLayer;
1169    uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1170    for (uint32_t i = 1; i < rect_count; i++) {
1171       if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1172           rects[i].layerCount != rects[i - 1].layerCount) {
1173          *all_rects_same_layers = false;
1174          min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1175          max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1176                                      rects[i].layerCount - 1);
1177       }
1178    }
1179 
1180    *is_layered = !(min_layer == 0 && max_layer == 0);
1181 }
1182 
1183 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments, uint32_t rectCount, const VkClearRect *pRects)1184 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1185                          uint32_t attachmentCount,
1186                          const VkClearAttachment *pAttachments,
1187                          uint32_t rectCount,
1188                          const VkClearRect *pRects)
1189 {
1190    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1191 
1192    /* We can only clear attachments in the current subpass */
1193    assert(attachmentCount <= 5); /* 4 color + D/S */
1194 
1195    struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1196 
1197    assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1198    struct v3dv_subpass *subpass =
1199       &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1200 
1201    /* Emit a clear rect inside the current job for this subpass. For layered
1202     * framebuffers, we use a geometry shader to redirect clears to the
1203     * appropriate layers.
1204     */
1205    bool is_layered, all_rects_same_layers;
1206    gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1207    for (uint32_t i = 0; i < attachmentCount; i++) {
1208       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1209          emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1210                                         pAttachments[i].colorAttachment,
1211                                         &pAttachments[i].clearValue.color,
1212                                         is_layered, all_rects_same_layers,
1213                                         rectCount, pRects);
1214       } else {
1215          emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1216                                      pAttachments[i].aspectMask,
1217                                      &pAttachments[i].clearValue.depthStencil,
1218                                      is_layered, all_rects_same_layers,
1219                                      rectCount, pRects);
1220       }
1221    }
1222 }
1223