1 /*
2 * Copyright © 2020 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29
30 static void
get_hw_clear_color(struct v3dv_device *device, const VkClearColorValue *color, VkFormat fb_format, VkFormat image_format, uint32_t internal_type, uint32_t internal_bpp, uint32_t *hw_color)31 get_hw_clear_color(struct v3dv_device *device,
32 const VkClearColorValue *color,
33 VkFormat fb_format,
34 VkFormat image_format,
35 uint32_t internal_type,
36 uint32_t internal_bpp,
37 uint32_t *hw_color)
38 {
39 const uint32_t internal_size = 4 << internal_bpp;
40
41 /* If the image format doesn't match the framebuffer format, then we are
42 * trying to clear an unsupported tlb format using a compatible
43 * format for the framebuffer. In this case, we want to make sure that
44 * we pack the clear value according to the original format semantics,
45 * not the compatible format.
46 */
47 if (fb_format == image_format) {
48 v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
49 hw_color);
50 } else {
51 union util_color uc;
52 enum pipe_format pipe_image_format =
53 vk_format_to_pipe_format(image_format);
54 util_pack_color(color->float32, pipe_image_format, &uc);
55 memcpy(hw_color, uc.ui, internal_size);
56 }
57 }
58
59 /* Returns true if the implementation is able to handle the case, false
60 * otherwise.
61 */
62 static bool
clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *image, const VkClearValue *clear_value, const VkImageSubresourceRange *range)63 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
64 struct v3dv_image *image,
65 const VkClearValue *clear_value,
66 const VkImageSubresourceRange *range)
67 {
68 const VkOffset3D origin = { 0, 0, 0 };
69 VkFormat fb_format;
70 if (!v3dv_meta_can_use_tlb(image, &origin, &fb_format))
71 return false;
72
73 uint32_t internal_type, internal_bpp;
74 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
75 (fb_format, range->aspectMask,
76 &internal_type, &internal_bpp);
77
78 union v3dv_clear_value hw_clear_value = { 0 };
79 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
80 get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
81 image->vk.format, internal_type, internal_bpp,
82 &hw_clear_value.color[0]);
83 } else {
84 assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
85 (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
86 hw_clear_value.z = clear_value->depthStencil.depth;
87 hw_clear_value.s = clear_value->depthStencil.stencil;
88 }
89
90 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
91 uint32_t min_level = range->baseMipLevel;
92 uint32_t max_level = range->baseMipLevel + level_count;
93
94 /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
95 * Instead, we need to consider the full depth dimension of the image, which
96 * goes from 0 up to the level's depth extent.
97 */
98 uint32_t min_layer;
99 uint32_t max_layer;
100 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
101 min_layer = range->baseArrayLayer;
102 max_layer = range->baseArrayLayer +
103 vk_image_subresource_layer_count(&image->vk, range);
104 } else {
105 min_layer = 0;
106 max_layer = 0;
107 }
108
109 for (uint32_t level = min_level; level < max_level; level++) {
110 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
111 max_layer = u_minify(image->vk.extent.depth, level);
112
113 uint32_t width = u_minify(image->vk.extent.width, level);
114 uint32_t height = u_minify(image->vk.extent.height, level);
115
116 struct v3dv_job *job =
117 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
118
119 if (!job)
120 return true;
121
122 v3dv_job_start_frame(job, width, height, max_layer, false,
123 1, internal_bpp,
124 image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
125
126 struct v3dv_meta_framebuffer framebuffer;
127 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
128 internal_type,
129 &job->frame_tiling);
130
131 v3dv_X(job->device, job_emit_binning_flush)(job);
132
133 /* If this triggers it is an application bug: the spec requires
134 * that any aspects to clear are present in the image.
135 */
136 assert(range->aspectMask & image->vk.aspects);
137
138 v3dv_X(job->device, meta_emit_clear_image_rcl)
139 (job, image, &framebuffer, &hw_clear_value,
140 range->aspectMask, min_layer, max_layer, level);
141
142 v3dv_cmd_buffer_finish_job(cmd_buffer);
143 }
144
145 return true;
146 }
147
148 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage _image, VkImageLayout imageLayout, const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)149 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
150 VkImage _image,
151 VkImageLayout imageLayout,
152 const VkClearColorValue *pColor,
153 uint32_t rangeCount,
154 const VkImageSubresourceRange *pRanges)
155 {
156 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
157 V3DV_FROM_HANDLE(v3dv_image, image, _image);
158
159 const VkClearValue clear_value = {
160 .color = *pColor,
161 };
162
163 cmd_buffer->state.is_transfer = true;
164
165 for (uint32_t i = 0; i < rangeCount; i++) {
166 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
167 continue;
168 unreachable("Unsupported color clear.");
169 }
170
171 cmd_buffer->state.is_transfer = false;
172 }
173
174 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage _image, VkImageLayout imageLayout, const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)175 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
176 VkImage _image,
177 VkImageLayout imageLayout,
178 const VkClearDepthStencilValue *pDepthStencil,
179 uint32_t rangeCount,
180 const VkImageSubresourceRange *pRanges)
181 {
182 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
183 V3DV_FROM_HANDLE(v3dv_image, image, _image);
184
185 const VkClearValue clear_value = {
186 .depthStencil = *pDepthStencil,
187 };
188
189 cmd_buffer->state.is_transfer = true;
190
191 for (uint32_t i = 0; i < rangeCount; i++) {
192 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
193 continue;
194 unreachable("Unsupported depth/stencil clear.");
195 }
196
197 cmd_buffer->state.is_transfer = false;
198 }
199
200 static void
destroy_color_clear_pipeline(VkDevice _device, uint64_t pipeline, VkAllocationCallbacks *alloc)201 destroy_color_clear_pipeline(VkDevice _device,
202 uint64_t pipeline,
203 VkAllocationCallbacks *alloc)
204 {
205 struct v3dv_meta_color_clear_pipeline *p =
206 (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
207 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
208 if (p->cached)
209 v3dv_DestroyRenderPass(_device, p->pass, alloc);
210 vk_free(alloc, p);
211 }
212
213 static void
destroy_depth_clear_pipeline(VkDevice _device, struct v3dv_meta_depth_clear_pipeline *p, VkAllocationCallbacks *alloc)214 destroy_depth_clear_pipeline(VkDevice _device,
215 struct v3dv_meta_depth_clear_pipeline *p,
216 VkAllocationCallbacks *alloc)
217 {
218 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
219 vk_free(alloc, p);
220 }
221
222 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device *device, VkPipelineLayout *pipeline_layout)223 create_color_clear_pipeline_layout(struct v3dv_device *device,
224 VkPipelineLayout *pipeline_layout)
225 {
226 /* FIXME: this is abusing a bit the API, since not all of our clear
227 * pipelines have a geometry shader. We could create 2 different pipeline
228 * layouts, but this works for us for now.
229 */
230 VkPushConstantRange ranges[2] = {
231 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
232 { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
233 };
234
235 VkPipelineLayoutCreateInfo info = {
236 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
237 .setLayoutCount = 0,
238 .pushConstantRangeCount = 2,
239 .pPushConstantRanges = ranges,
240 };
241
242 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
243 &info, &device->vk.alloc, pipeline_layout);
244 }
245
246 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device *device, VkPipelineLayout *pipeline_layout)247 create_depth_clear_pipeline_layout(struct v3dv_device *device,
248 VkPipelineLayout *pipeline_layout)
249 {
250 /* FIXME: this is abusing a bit the API, since not all of our clear
251 * pipelines have a geometry shader. We could create 2 different pipeline
252 * layouts, but this works for us for now.
253 */
254 VkPushConstantRange ranges[2] = {
255 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
256 { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
257 };
258
259 VkPipelineLayoutCreateInfo info = {
260 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
261 .setLayoutCount = 0,
262 .pushConstantRangeCount = 2,
263 .pPushConstantRanges = ranges
264 };
265
266 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
267 &info, &device->vk.alloc, pipeline_layout);
268 }
269
270 void
v3dv_meta_clear_init(struct v3dv_device *device)271 v3dv_meta_clear_init(struct v3dv_device *device)
272 {
273 device->meta.color_clear.cache =
274 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
275
276 create_color_clear_pipeline_layout(device,
277 &device->meta.color_clear.p_layout);
278
279 device->meta.depth_clear.cache =
280 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
281
282 create_depth_clear_pipeline_layout(device,
283 &device->meta.depth_clear.p_layout);
284 }
285
286 void
v3dv_meta_clear_finish(struct v3dv_device *device)287 v3dv_meta_clear_finish(struct v3dv_device *device)
288 {
289 VkDevice _device = v3dv_device_to_handle(device);
290
291 hash_table_foreach(device->meta.color_clear.cache, entry) {
292 struct v3dv_meta_color_clear_pipeline *item = entry->data;
293 destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
294 }
295 _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
296
297 if (device->meta.color_clear.p_layout) {
298 v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
299 &device->vk.alloc);
300 }
301
302 hash_table_foreach(device->meta.depth_clear.cache, entry) {
303 struct v3dv_meta_depth_clear_pipeline *item = entry->data;
304 destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);
305 }
306 _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
307
308 if (device->meta.depth_clear.p_layout) {
309 v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
310 &device->vk.alloc);
311 }
312 }
313
314 static nir_shader *
get_clear_rect_vsnull315 get_clear_rect_vs()
316 {
317 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
318 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
319 "meta clear vs");
320
321 const struct glsl_type *vec4 = glsl_vec4_type();
322 nir_variable *vs_out_pos =
323 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
324 vs_out_pos->data.location = VARYING_SLOT_POS;
325
326 nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
327 nir_store_var(&b, vs_out_pos, pos, 0xf);
328
329 return b.shader;
330 }
331
332 static nir_shader *
get_clear_rect_gs(uint32_t push_constant_layer_base)333 get_clear_rect_gs(uint32_t push_constant_layer_base)
334 {
335 /* FIXME: this creates a geometry shader that takes the index of a single
336 * layer to clear from push constants, so we need to emit a draw call for
337 * each layer that we want to clear. We could actually do better and have it
338 * take a range of layers and then emit one triangle per layer to clear,
339 * however, if we were to do this we would need to be careful not to exceed
340 * the maximum number of output vertices allowed in a geometry shader.
341 */
342 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
343 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
344 "meta clear gs");
345 nir_shader *nir = b.shader;
346 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
347 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
348 (1ull << VARYING_SLOT_LAYER);
349 nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
350 nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
351 nir->info.gs.vertices_in = 3;
352 nir->info.gs.vertices_out = 3;
353 nir->info.gs.invocations = 1;
354 nir->info.gs.active_stream_mask = 0x1;
355
356 /* in vec4 gl_Position[3] */
357 nir_variable *gs_in_pos =
358 nir_variable_create(b.shader, nir_var_shader_in,
359 glsl_array_type(glsl_vec4_type(), 3, 0),
360 "in_gl_Position");
361 gs_in_pos->data.location = VARYING_SLOT_POS;
362
363 /* out vec4 gl_Position */
364 nir_variable *gs_out_pos =
365 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
366 "out_gl_Position");
367 gs_out_pos->data.location = VARYING_SLOT_POS;
368
369 /* out float gl_Layer */
370 nir_variable *gs_out_layer =
371 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
372 "out_gl_Layer");
373 gs_out_layer->data.location = VARYING_SLOT_LAYER;
374
375 /* Emit output triangle */
376 for (uint32_t i = 0; i < 3; i++) {
377 /* gl_Position from shader input */
378 nir_deref_instr *in_pos_i =
379 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
380 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
381
382 /* gl_Layer from push constants */
383 nir_ssa_def *layer =
384 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
385 .base = push_constant_layer_base, .range = 4);
386 nir_store_var(&b, gs_out_layer, layer, 0x1);
387
388 nir_emit_vertex(&b, 0);
389 }
390
391 nir_end_primitive(&b, 0);
392
393 return nir;
394 }
395
396 static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)397 get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
398 {
399 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
400 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
401 "meta clear fs");
402
403 enum pipe_format pformat = vk_format_to_pipe_format(format);
404 const struct glsl_type *fs_out_type =
405 util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
406
407 nir_variable *fs_out_color =
408 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
409 fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
410
411 nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
412 nir_store_var(&b, fs_out_color, color_load, 0xf);
413
414 return b.shader;
415 }
416
417 static nir_shader *
get_depth_clear_rect_fsnull418 get_depth_clear_rect_fs()
419 {
420 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
421 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
422 "meta depth clear fs");
423
424 nir_variable *fs_out_depth =
425 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
426 "out_depth");
427 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
428
429 nir_ssa_def *depth_load =
430 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
431
432 nir_store_var(&b, fs_out_depth, depth_load, 0x1);
433
434 return b.shader;
435 }
436
437 static VkResult
create_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t samples, struct nir_shader *vs_nir, struct nir_shader *gs_nir, struct nir_shader *fs_nir, const VkPipelineVertexInputStateCreateInfo *vi_state, const VkPipelineDepthStencilStateCreateInfo *ds_state, const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineLayout layout, VkPipeline *pipeline)438 create_pipeline(struct v3dv_device *device,
439 struct v3dv_render_pass *pass,
440 uint32_t subpass_idx,
441 uint32_t samples,
442 struct nir_shader *vs_nir,
443 struct nir_shader *gs_nir,
444 struct nir_shader *fs_nir,
445 const VkPipelineVertexInputStateCreateInfo *vi_state,
446 const VkPipelineDepthStencilStateCreateInfo *ds_state,
447 const VkPipelineColorBlendStateCreateInfo *cb_state,
448 const VkPipelineLayout layout,
449 VkPipeline *pipeline)
450 {
451 VkPipelineShaderStageCreateInfo stages[3] = { 0 };
452 struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
453 struct vk_shader_module gs_m;
454 struct vk_shader_module fs_m;
455
456 uint32_t stage_count = 0;
457 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
458 stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
459 stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
460 stages[stage_count].pName = "main";
461 stage_count++;
462
463 if (gs_nir) {
464 gs_m = vk_shader_module_from_nir(gs_nir);
465 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
466 stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
467 stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
468 stages[stage_count].pName = "main";
469 stage_count++;
470 }
471
472 if (fs_nir) {
473 fs_m = vk_shader_module_from_nir(fs_nir);
474 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
475 stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
476 stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
477 stages[stage_count].pName = "main";
478 stage_count++;
479 }
480
481 VkGraphicsPipelineCreateInfo info = {
482 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
483
484 .stageCount = stage_count,
485 .pStages = stages,
486
487 .pVertexInputState = vi_state,
488
489 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
490 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
491 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
492 .primitiveRestartEnable = false,
493 },
494
495 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
496 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
497 .viewportCount = 1,
498 .scissorCount = 1,
499 },
500
501 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
502 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
503 .rasterizerDiscardEnable = false,
504 .polygonMode = VK_POLYGON_MODE_FILL,
505 .cullMode = VK_CULL_MODE_NONE,
506 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
507 .depthBiasEnable = false,
508 },
509
510 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
511 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
512 .rasterizationSamples = samples,
513 .sampleShadingEnable = false,
514 .pSampleMask = NULL,
515 .alphaToCoverageEnable = false,
516 .alphaToOneEnable = false,
517 },
518
519 .pDepthStencilState = ds_state,
520
521 .pColorBlendState = cb_state,
522
523 /* The meta clear pipeline declares all state as dynamic.
524 * As a consequence, vkCmdBindPipeline writes no dynamic state
525 * to the cmd buffer. Therefore, at the end of the meta clear,
526 * we need only restore dynamic state that was vkCmdSet.
527 */
528 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
529 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
530 .dynamicStateCount = 6,
531 .pDynamicStates = (VkDynamicState[]) {
532 VK_DYNAMIC_STATE_VIEWPORT,
533 VK_DYNAMIC_STATE_SCISSOR,
534 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
535 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
536 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
537 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
538 VK_DYNAMIC_STATE_DEPTH_BIAS,
539 VK_DYNAMIC_STATE_LINE_WIDTH,
540 },
541 },
542
543 .flags = 0,
544 .layout = layout,
545 .renderPass = v3dv_render_pass_to_handle(pass),
546 .subpass = subpass_idx,
547 };
548
549 VkResult result =
550 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
551 VK_NULL_HANDLE,
552 1, &info,
553 &device->vk.alloc,
554 pipeline);
555
556 ralloc_free(vs_nir);
557 ralloc_free(gs_nir);
558 ralloc_free(fs_nir);
559
560 return result;
561 }
562
563 static VkResult
create_color_clear_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t rt_idx, VkFormat format, uint32_t samples, uint32_t components, bool is_layered, VkPipelineLayout pipeline_layout, VkPipeline *pipeline)564 create_color_clear_pipeline(struct v3dv_device *device,
565 struct v3dv_render_pass *pass,
566 uint32_t subpass_idx,
567 uint32_t rt_idx,
568 VkFormat format,
569 uint32_t samples,
570 uint32_t components,
571 bool is_layered,
572 VkPipelineLayout pipeline_layout,
573 VkPipeline *pipeline)
574 {
575 nir_shader *vs_nir = get_clear_rect_vs();
576 nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
577 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL;
578
579 const VkPipelineVertexInputStateCreateInfo vi_state = {
580 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
581 .vertexBindingDescriptionCount = 0,
582 .vertexAttributeDescriptionCount = 0,
583 };
584
585 const VkPipelineDepthStencilStateCreateInfo ds_state = {
586 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
587 .depthTestEnable = false,
588 .depthWriteEnable = false,
589 .depthBoundsTestEnable = false,
590 .stencilTestEnable = false,
591 };
592
593 assert(subpass_idx < pass->subpass_count);
594 const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
595 assert(rt_idx < color_count);
596
597 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
598 for (uint32_t i = 0; i < color_count; i++) {
599 blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
600 .blendEnable = false,
601 .colorWriteMask = i == rt_idx ? components : 0,
602 };
603 }
604
605 const VkPipelineColorBlendStateCreateInfo cb_state = {
606 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
607 .logicOpEnable = false,
608 .attachmentCount = color_count,
609 .pAttachments = blend_att_state
610 };
611
612 return create_pipeline(device,
613 pass, subpass_idx,
614 samples,
615 vs_nir, gs_nir, fs_nir,
616 &vi_state,
617 &ds_state,
618 &cb_state,
619 pipeline_layout,
620 pipeline);
621 }
622
623 static VkResult
create_depth_clear_pipeline(struct v3dv_device *device, VkImageAspectFlags aspects, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t samples, bool is_layered, VkPipelineLayout pipeline_layout, VkPipeline *pipeline)624 create_depth_clear_pipeline(struct v3dv_device *device,
625 VkImageAspectFlags aspects,
626 struct v3dv_render_pass *pass,
627 uint32_t subpass_idx,
628 uint32_t samples,
629 bool is_layered,
630 VkPipelineLayout pipeline_layout,
631 VkPipeline *pipeline)
632 {
633 const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
634 const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
635 assert(has_depth || has_stencil);
636
637 nir_shader *vs_nir = get_clear_rect_vs();
638 nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
639 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL;
640
641 const VkPipelineVertexInputStateCreateInfo vi_state = {
642 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
643 .vertexBindingDescriptionCount = 0,
644 .vertexAttributeDescriptionCount = 0,
645 };
646
647 const VkPipelineDepthStencilStateCreateInfo ds_state = {
648 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
649 .depthTestEnable = has_depth,
650 .depthWriteEnable = has_depth,
651 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
652 .depthBoundsTestEnable = false,
653 .stencilTestEnable = has_stencil,
654 .front = {
655 .passOp = VK_STENCIL_OP_REPLACE,
656 .compareOp = VK_COMPARE_OP_ALWAYS,
657 /* compareMask, writeMask and reference are dynamic state */
658 },
659 .back = { 0 },
660 };
661
662 assert(subpass_idx < pass->subpass_count);
663 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
664 const VkPipelineColorBlendStateCreateInfo cb_state = {
665 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
666 .logicOpEnable = false,
667 .attachmentCount = pass->subpasses[subpass_idx].color_count,
668 .pAttachments = blend_att_state,
669 };
670
671 return create_pipeline(device,
672 pass, subpass_idx,
673 samples,
674 vs_nir, gs_nir, fs_nir,
675 &vi_state,
676 &ds_state,
677 &cb_state,
678 pipeline_layout,
679 pipeline);
680 }
681
682 static VkResult
create_color_clear_render_pass(struct v3dv_device *device, uint32_t rt_idx, VkFormat format, uint32_t samples, VkRenderPass *pass)683 create_color_clear_render_pass(struct v3dv_device *device,
684 uint32_t rt_idx,
685 VkFormat format,
686 uint32_t samples,
687 VkRenderPass *pass)
688 {
689 VkAttachmentDescription2 att = {
690 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
691 .format = format,
692 .samples = samples,
693 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
694 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
695 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
696 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
697 };
698
699 VkAttachmentReference2 att_ref = {
700 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
701 .attachment = rt_idx,
702 .layout = VK_IMAGE_LAYOUT_GENERAL,
703 };
704
705 VkSubpassDescription2 subpass = {
706 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
707 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
708 .inputAttachmentCount = 0,
709 .colorAttachmentCount = 1,
710 .pColorAttachments = &att_ref,
711 .pResolveAttachments = NULL,
712 .pDepthStencilAttachment = NULL,
713 .preserveAttachmentCount = 0,
714 .pPreserveAttachments = NULL,
715 };
716
717 VkRenderPassCreateInfo2 info = {
718 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
719 .attachmentCount = 1,
720 .pAttachments = &att,
721 .subpassCount = 1,
722 .pSubpasses = &subpass,
723 .dependencyCount = 0,
724 .pDependencies = NULL,
725 };
726
727 return v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
728 &info, &device->vk.alloc, pass);
729 }
730
731 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx, VkFormat format, uint32_t samples, uint32_t components, bool is_layered)732 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
733 VkFormat format,
734 uint32_t samples,
735 uint32_t components,
736 bool is_layered)
737 {
738 assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
739
740 uint64_t key = 0;
741 uint32_t bit_offset = 0;
742
743 key |= rt_idx;
744 bit_offset += 2;
745
746 key |= ((uint64_t) format) << bit_offset;
747 bit_offset += 32;
748
749 key |= ((uint64_t) samples) << bit_offset;
750 bit_offset += 4;
751
752 key |= ((uint64_t) components) << bit_offset;
753 bit_offset += 4;
754
755 key |= (is_layered ? 1ull : 0ull) << bit_offset;
756 bit_offset += 1;
757
758 assert(bit_offset <= 64);
759 return key;
760 }
761
762 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects, VkFormat format, uint32_t samples, bool is_layered)763 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
764 VkFormat format,
765 uint32_t samples,
766 bool is_layered)
767 {
768 uint64_t key = 0;
769 uint32_t bit_offset = 0;
770
771 key |= format;
772 bit_offset += 32;
773
774 key |= ((uint64_t) samples) << bit_offset;
775 bit_offset += 4;
776
777 const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
778 key |= ((uint64_t) has_depth) << bit_offset;
779 bit_offset++;
780
781 const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
782 key |= ((uint64_t) has_stencil) << bit_offset;
783 bit_offset++;;
784
785 key |= (is_layered ? 1ull : 0ull) << bit_offset;
786 bit_offset += 1;
787
788 assert(bit_offset <= 64);
789 return key;
790 }
791
792 static VkResult
get_color_clear_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t rt_idx, uint32_t attachment_idx, VkFormat format, uint32_t samples, uint32_t components, bool is_layered, struct v3dv_meta_color_clear_pipeline **pipeline)793 get_color_clear_pipeline(struct v3dv_device *device,
794 struct v3dv_render_pass *pass,
795 uint32_t subpass_idx,
796 uint32_t rt_idx,
797 uint32_t attachment_idx,
798 VkFormat format,
799 uint32_t samples,
800 uint32_t components,
801 bool is_layered,
802 struct v3dv_meta_color_clear_pipeline **pipeline)
803 {
804 assert(vk_format_is_color(format));
805
806 VkResult result = VK_SUCCESS;
807
808 /* If pass != NULL it means that we are emitting the clear as a draw call
809 * in the current pass bound by the application. In that case, we can't
810 * cache the pipeline, since it will be referencing that pass and the
811 * application could be destroying it at any point. Hopefully, the perf
812 * impact is not too big since we still have the device pipeline cache
813 * around and we won't end up re-compiling the clear shader.
814 *
815 * FIXME: alternatively, we could refcount (or maybe clone) the render pass
816 * provided by the application and include it in the pipeline key setup
817 * to make caching safe in this scenario, however, based on tests with
818 * vkQuake3, the fact that we are not caching here doesn't seem to have
819 * any significant impact in performance, so it might not be worth it.
820 */
821 const bool can_cache_pipeline = (pass == NULL);
822
823 uint64_t key;
824 if (can_cache_pipeline) {
825 key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
826 components, is_layered);
827 mtx_lock(&device->meta.mtx);
828 struct hash_entry *entry =
829 _mesa_hash_table_search(device->meta.color_clear.cache, &key);
830 if (entry) {
831 mtx_unlock(&device->meta.mtx);
832 *pipeline = entry->data;
833 return VK_SUCCESS;
834 }
835 }
836
837 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
838 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
839
840 if (*pipeline == NULL) {
841 result = VK_ERROR_OUT_OF_HOST_MEMORY;
842 goto fail;
843 }
844
845 if (!pass) {
846 result = create_color_clear_render_pass(device,
847 rt_idx,
848 format,
849 samples,
850 &(*pipeline)->pass);
851 if (result != VK_SUCCESS)
852 goto fail;
853
854 pass = v3dv_render_pass_from_handle((*pipeline)->pass);
855 } else {
856 (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
857 }
858
859 result = create_color_clear_pipeline(device,
860 pass,
861 subpass_idx,
862 rt_idx,
863 format,
864 samples,
865 components,
866 is_layered,
867 device->meta.color_clear.p_layout,
868 &(*pipeline)->pipeline);
869 if (result != VK_SUCCESS)
870 goto fail;
871
872 if (can_cache_pipeline) {
873 (*pipeline)->key = key;
874 (*pipeline)->cached = true;
875 _mesa_hash_table_insert(device->meta.color_clear.cache,
876 &(*pipeline)->key, *pipeline);
877
878 mtx_unlock(&device->meta.mtx);
879 }
880
881 return VK_SUCCESS;
882
883 fail:
884 if (can_cache_pipeline)
885 mtx_unlock(&device->meta.mtx);
886
887 VkDevice _device = v3dv_device_to_handle(device);
888 if (*pipeline) {
889 if ((*pipeline)->cached)
890 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
891 if ((*pipeline)->pipeline)
892 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
893 vk_free(&device->vk.alloc, *pipeline);
894 *pipeline = NULL;
895 }
896
897 return result;
898 }
899
900 static VkResult
get_depth_clear_pipeline(struct v3dv_device *device, VkImageAspectFlags aspects, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t attachment_idx, bool is_layered, struct v3dv_meta_depth_clear_pipeline **pipeline)901 get_depth_clear_pipeline(struct v3dv_device *device,
902 VkImageAspectFlags aspects,
903 struct v3dv_render_pass *pass,
904 uint32_t subpass_idx,
905 uint32_t attachment_idx,
906 bool is_layered,
907 struct v3dv_meta_depth_clear_pipeline **pipeline)
908 {
909 assert(subpass_idx < pass->subpass_count);
910 assert(attachment_idx != VK_ATTACHMENT_UNUSED);
911 assert(attachment_idx < pass->attachment_count);
912
913 VkResult result = VK_SUCCESS;
914
915 const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
916 const VkFormat format = pass->attachments[attachment_idx].desc.format;
917 assert(vk_format_is_depth_or_stencil(format));
918
919 const uint64_t key =
920 get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered);
921 mtx_lock(&device->meta.mtx);
922 struct hash_entry *entry =
923 _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
924 if (entry) {
925 mtx_unlock(&device->meta.mtx);
926 *pipeline = entry->data;
927 return VK_SUCCESS;
928 }
929
930 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
931 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
932
933 if (*pipeline == NULL) {
934 result = VK_ERROR_OUT_OF_HOST_MEMORY;
935 goto fail;
936 }
937
938 result = create_depth_clear_pipeline(device,
939 aspects,
940 pass,
941 subpass_idx,
942 samples,
943 is_layered,
944 device->meta.depth_clear.p_layout,
945 &(*pipeline)->pipeline);
946 if (result != VK_SUCCESS)
947 goto fail;
948
949 (*pipeline)->key = key;
950 _mesa_hash_table_insert(device->meta.depth_clear.cache,
951 &(*pipeline)->key, *pipeline);
952
953 mtx_unlock(&device->meta.mtx);
954 return VK_SUCCESS;
955
956 fail:
957 mtx_unlock(&device->meta.mtx);
958
959 VkDevice _device = v3dv_device_to_handle(device);
960 if (*pipeline) {
961 if ((*pipeline)->pipeline)
962 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
963 vk_free(&device->vk.alloc, *pipeline);
964 *pipeline = NULL;
965 }
966
967 return result;
968 }
969
970 /* Emits a scissored quad in the clear color */
971 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_render_pass *pass, struct v3dv_subpass *subpass, uint32_t rt_idx, const VkClearColorValue *clear_color, bool is_layered, bool all_rects_same_layers, uint32_t rect_count, const VkClearRect *rects)972 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
973 struct v3dv_render_pass *pass,
974 struct v3dv_subpass *subpass,
975 uint32_t rt_idx,
976 const VkClearColorValue *clear_color,
977 bool is_layered,
978 bool all_rects_same_layers,
979 uint32_t rect_count,
980 const VkClearRect *rects)
981 {
982 /* Skip if attachment is unused in the current subpass */
983 assert(rt_idx < subpass->color_count);
984 const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
985 if (attachment_idx == VK_ATTACHMENT_UNUSED)
986 return;
987
988 /* Obtain a pipeline for this clear */
989 assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
990 const VkFormat format =
991 cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
992 const VkFormat samples =
993 cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
994 const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
995 VK_COLOR_COMPONENT_G_BIT |
996 VK_COLOR_COMPONENT_B_BIT |
997 VK_COLOR_COMPONENT_A_BIT;
998 struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
999 VkResult result = get_color_clear_pipeline(cmd_buffer->device,
1000 pass,
1001 cmd_buffer->state.subpass_idx,
1002 rt_idx,
1003 attachment_idx,
1004 format,
1005 samples,
1006 components,
1007 is_layered,
1008 &pipeline);
1009 if (result != VK_SUCCESS) {
1010 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1011 v3dv_flag_oom(cmd_buffer, NULL);
1012 return;
1013 }
1014 assert(pipeline && pipeline->pipeline);
1015
1016 /* Emit clear rects */
1017 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1018
1019 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1020 v3dv_CmdPushConstants(cmd_buffer_handle,
1021 cmd_buffer->device->meta.depth_clear.p_layout,
1022 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1023 clear_color->float32);
1024
1025 v3dv_CmdBindPipeline(cmd_buffer_handle,
1026 VK_PIPELINE_BIND_POINT_GRAPHICS,
1027 pipeline->pipeline);
1028
1029 uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1030
1031 for (uint32_t i = 0; i < rect_count; i++) {
1032 const VkViewport viewport = {
1033 .x = rects[i].rect.offset.x,
1034 .y = rects[i].rect.offset.y,
1035 .width = rects[i].rect.extent.width,
1036 .height = rects[i].rect.extent.height,
1037 .minDepth = 0.0f,
1038 .maxDepth = 1.0f
1039 };
1040 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1041 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1042
1043 if (is_layered) {
1044 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1045 layer_offset++) {
1046 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1047 v3dv_CmdPushConstants(cmd_buffer_handle,
1048 cmd_buffer->device->meta.depth_clear.p_layout,
1049 VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1050 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1051 }
1052 } else {
1053 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1054 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1055 }
1056 }
1057
1058 /* Subpass pipelines can't be cached because they include a reference to the
1059 * render pass currently bound by the application, which means that we need
1060 * to destroy them manually here.
1061 */
1062 assert(!pipeline->cached);
1063 v3dv_cmd_buffer_add_private_obj(
1064 cmd_buffer, (uintptr_t)pipeline,
1065 (v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);
1066
1067 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1068 }
1069
1070 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1071 * and the stencil aspect by using stencil testing.
1072 */
1073 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_render_pass *pass, struct v3dv_subpass *subpass, VkImageAspectFlags aspects, const VkClearDepthStencilValue *clear_ds, bool is_layered, bool all_rects_same_layers, uint32_t rect_count, const VkClearRect *rects)1074 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1075 struct v3dv_render_pass *pass,
1076 struct v3dv_subpass *subpass,
1077 VkImageAspectFlags aspects,
1078 const VkClearDepthStencilValue *clear_ds,
1079 bool is_layered,
1080 bool all_rects_same_layers,
1081 uint32_t rect_count,
1082 const VkClearRect *rects)
1083 {
1084 /* Skip if attachment is unused in the current subpass */
1085 const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1086 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1087 return;
1088
1089 /* Obtain a pipeline for this clear */
1090 assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1091 struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1092 VkResult result = get_depth_clear_pipeline(cmd_buffer->device,
1093 aspects,
1094 pass,
1095 cmd_buffer->state.subpass_idx,
1096 attachment_idx,
1097 is_layered,
1098 &pipeline);
1099 if (result != VK_SUCCESS) {
1100 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1101 v3dv_flag_oom(cmd_buffer, NULL);
1102 return;
1103 }
1104 assert(pipeline && pipeline->pipeline);
1105
1106 /* Emit clear rects */
1107 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1108
1109 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1110 v3dv_CmdPushConstants(cmd_buffer_handle,
1111 cmd_buffer->device->meta.depth_clear.p_layout,
1112 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1113 &clear_ds->depth);
1114
1115 v3dv_CmdBindPipeline(cmd_buffer_handle,
1116 VK_PIPELINE_BIND_POINT_GRAPHICS,
1117 pipeline->pipeline);
1118
1119 uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1120 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1121 v3dv_CmdSetStencilReference(cmd_buffer_handle,
1122 VK_STENCIL_FACE_FRONT_AND_BACK,
1123 clear_ds->stencil);
1124 v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,
1125 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1126 v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,
1127 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1128 dynamic_states |= VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK |
1129 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK |
1130 VK_DYNAMIC_STATE_STENCIL_REFERENCE;
1131 }
1132
1133 for (uint32_t i = 0; i < rect_count; i++) {
1134 const VkViewport viewport = {
1135 .x = rects[i].rect.offset.x,
1136 .y = rects[i].rect.offset.y,
1137 .width = rects[i].rect.extent.width,
1138 .height = rects[i].rect.extent.height,
1139 .minDepth = 0.0f,
1140 .maxDepth = 1.0f
1141 };
1142 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1143 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1144 if (is_layered) {
1145 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1146 layer_offset++) {
1147 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1148 v3dv_CmdPushConstants(cmd_buffer_handle,
1149 cmd_buffer->device->meta.depth_clear.p_layout,
1150 VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1151 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1152 }
1153 } else {
1154 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1155 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1156 }
1157 }
1158
1159 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1160 }
1161
1162 static void
gather_layering_info(uint32_t rect_count, const VkClearRect *rects, bool *is_layered, bool *all_rects_same_layers)1163 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1164 bool *is_layered, bool *all_rects_same_layers)
1165 {
1166 *all_rects_same_layers = true;
1167
1168 uint32_t min_layer = rects[0].baseArrayLayer;
1169 uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1170 for (uint32_t i = 1; i < rect_count; i++) {
1171 if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1172 rects[i].layerCount != rects[i - 1].layerCount) {
1173 *all_rects_same_layers = false;
1174 min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1175 max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1176 rects[i].layerCount - 1);
1177 }
1178 }
1179
1180 *is_layered = !(min_layer == 0 && max_layer == 0);
1181 }
1182
1183 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments, uint32_t rectCount, const VkClearRect *pRects)1184 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1185 uint32_t attachmentCount,
1186 const VkClearAttachment *pAttachments,
1187 uint32_t rectCount,
1188 const VkClearRect *pRects)
1189 {
1190 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1191
1192 /* We can only clear attachments in the current subpass */
1193 assert(attachmentCount <= 5); /* 4 color + D/S */
1194
1195 struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1196
1197 assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1198 struct v3dv_subpass *subpass =
1199 &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1200
1201 /* Emit a clear rect inside the current job for this subpass. For layered
1202 * framebuffers, we use a geometry shader to redirect clears to the
1203 * appropriate layers.
1204 */
1205 bool is_layered, all_rects_same_layers;
1206 gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1207 for (uint32_t i = 0; i < attachmentCount; i++) {
1208 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1209 emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1210 pAttachments[i].colorAttachment,
1211 &pAttachments[i].clearValue.color,
1212 is_layered, all_rects_same_layers,
1213 rectCount, pRects);
1214 } else {
1215 emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1216 pAttachments[i].aspectMask,
1217 &pAttachments[i].clearValue.depthStencil,
1218 is_layered, all_rects_same_layers,
1219 rectCount, pRects);
1220 }
1221 }
1222 }
1223