1/*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24#include "nir/nir_builder.h"
25#include "pan_blitter.h"
26#include "pan_encoder.h"
27#include "pan_shader.h"
28
29#include "panvk_private.h"
30#include "panvk_vX_meta.h"
31
32#include "vk_format.h"
33
34static mali_ptr
35panvk_meta_clear_color_attachment_shader(struct panfrost_device *pdev,
36                                         struct pan_pool *bin_pool,
37                                         enum glsl_base_type base_type,
38                                         struct pan_shader_info *shader_info)
39{
40   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
41                                     GENX(pan_shader_get_compiler_options)(),
42                                     "panvk_meta_clear_attachment(base_type=%d)",
43                                     base_type);
44
45   const struct glsl_type *out_type = glsl_vector_type(base_type, 4);
46   nir_variable *out =
47      nir_variable_create(b.shader, nir_var_shader_out, out_type, "out");
48   out->data.location = FRAG_RESULT_DATA0;
49
50   nir_ssa_def *clear_values = nir_load_push_constant(&b, 4, 32,
51                                                      nir_imm_int(&b, 0),
52                                                     .range = ~0);
53   nir_store_var(&b, out, clear_values, 0xff);
54
55   struct panfrost_compile_inputs inputs = {
56      .gpu_id = pdev->gpu_id,
57      .is_blit = true,
58      .no_ubo_to_push = true,
59   };
60
61   struct util_dynarray binary;
62
63   util_dynarray_init(&binary, NULL);
64   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
65
66   shader_info->push.count = 4;
67
68   mali_ptr shader =
69      pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
70
71   util_dynarray_fini(&binary);
72   ralloc_free(b.shader);
73
74   return shader;
75}
76
77static mali_ptr
78panvk_meta_clear_color_attachment_emit_rsd(struct panfrost_device *pdev,
79                                           struct pan_pool *desc_pool,
80                                           enum pipe_format format,
81                                           unsigned rt,
82                                           struct pan_shader_info *shader_info,
83                                           mali_ptr shader)
84{
85   struct panfrost_ptr rsd_ptr =
86      pan_pool_alloc_desc_aggregate(desc_pool,
87                                    PAN_DESC(RENDERER_STATE),
88                                    PAN_DESC_ARRAY(rt + 1, BLEND));
89
90   pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
91      pan_shader_prepare_rsd(shader_info, shader, &cfg);
92
93      cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
94      cfg.multisample_misc.sample_mask = UINT16_MAX;
95      cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
96      cfg.properties.allow_forward_pixel_to_be_killed = true;
97      cfg.properties.allow_forward_pixel_to_kill = true;
98      cfg.properties.zs_update_operation = MALI_PIXEL_KILL_WEAK_EARLY;
99      cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_WEAK_EARLY;
100   }
101
102   void *bd = rsd_ptr.cpu + pan_size(RENDERER_STATE);
103
104   pan_pack(bd, BLEND, cfg) {
105      cfg.round_to_fb_precision = true;
106      cfg.load_destination = false;
107      cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
108      cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
109      cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
110      cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
111      cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
112      cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
113      cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
114      cfg.equation.color_mask = 0xf;
115      cfg.internal.fixed_function.num_comps = 4;
116      cfg.internal.fixed_function.rt = rt;
117      cfg.internal.fixed_function.conversion.memory_format =
118         panfrost_format_to_bifrost_blend(pdev, format, false);
119      cfg.internal.fixed_function.conversion.register_format =
120         shader_info->bifrost.blend[0].format;
121   }
122
123   return rsd_ptr.gpu;
124}
125
126static mali_ptr
127panvk_meta_clear_zs_attachment_emit_rsd(struct panfrost_device *pdev,
128                                        struct pan_pool *desc_pool,
129                                        VkImageAspectFlags mask,
130                                        VkClearDepthStencilValue value)
131{
132   struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc(desc_pool, RENDERER_STATE);
133
134   pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
135      cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
136      cfg.multisample_misc.sample_mask = UINT16_MAX;
137
138      if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
139         cfg.multisample_misc.depth_write_mask = true;
140         cfg.multisample_misc.depth_function = MALI_FUNC_NOT_EQUAL;
141
142         if (value.depth != 0.0) {
143            cfg.stencil_mask_misc.front_facing_depth_bias = true;
144            cfg.stencil_mask_misc.back_facing_depth_bias = true;
145            cfg.depth_units = INFINITY;
146            cfg.depth_bias_clamp = value.depth;
147         }
148      }
149
150      if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
151         cfg.stencil_mask_misc.stencil_enable = true;
152         cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
153         cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
154
155         cfg.stencil_front.compare_function =
156            (mask & VK_IMAGE_ASPECT_DEPTH_BIT) ?
157            MALI_FUNC_ALWAYS : MALI_FUNC_NOT_EQUAL;
158
159         cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_KEEP;
160         cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
161         cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
162         cfg.stencil_front.reference_value = value.stencil;
163         cfg.stencil_front.mask = 0xFF;
164         cfg.stencil_back = cfg.stencil_front;
165      }
166
167      cfg.properties.allow_forward_pixel_to_be_killed = true;
168      cfg.properties.zs_update_operation = MALI_PIXEL_KILL_WEAK_EARLY;
169      cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_WEAK_EARLY;
170   }
171
172   return rsd_ptr.gpu;
173}
174
175static void
176panvk_meta_clear_attachment_emit_dcd(struct pan_pool *pool,
177                                     mali_ptr coords, mali_ptr push_constants,
178                                     mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
179                                     void *out)
180{
181   pan_pack(out, DRAW, cfg) {
182      cfg.thread_storage = tsd;
183      cfg.state = rsd;
184      cfg.push_uniforms = push_constants;
185      cfg.position = coords;
186      cfg.viewport = vpd;
187   }
188}
189
190static struct panfrost_ptr
191panvk_meta_clear_attachment_emit_tiler_job(struct pan_pool *desc_pool,
192                                           struct pan_scoreboard *scoreboard,
193                                           mali_ptr coords,
194                                           mali_ptr push_constants,
195                                           mali_ptr vpd, mali_ptr rsd,
196                                           mali_ptr tsd, mali_ptr tiler)
197{
198   struct panfrost_ptr job =
199      pan_pool_alloc_desc(desc_pool, TILER_JOB);
200
201   panvk_meta_clear_attachment_emit_dcd(desc_pool,
202                                        coords,
203                                        push_constants,
204                                        vpd, tsd, rsd,
205                                        pan_section_ptr(job.cpu, TILER_JOB, DRAW));
206
207   pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
208      cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
209      cfg.index_count = 4;
210      cfg.job_task_split = 6;
211   }
212
213   pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
214      cfg.constant = 1.0f;
215   }
216
217   void *invoc = pan_section_ptr(job.cpu,
218                                 TILER_JOB,
219                                 INVOCATION);
220   panfrost_pack_work_groups_compute(invoc, 1, 4,
221                                     1, 1, 1, 1, true, false);
222
223   pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
224   pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
225      cfg.address = tiler;
226   }
227
228   panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
229                    false, false, 0, 0, &job, false);
230   return job;
231}
232
233static enum glsl_base_type
234panvk_meta_get_format_type(enum pipe_format format)
235{
236   const struct util_format_description *desc = util_format_description(format);
237   int i;
238
239   i = util_format_get_first_non_void_channel(format);
240   assert(i >= 0);
241
242   if (desc->channel[i].normalized)
243      return GLSL_TYPE_FLOAT;
244
245   switch(desc->channel[i].type) {
246
247   case UTIL_FORMAT_TYPE_UNSIGNED:
248      return GLSL_TYPE_UINT;
249
250   case UTIL_FORMAT_TYPE_SIGNED:
251      return GLSL_TYPE_INT;
252
253   case UTIL_FORMAT_TYPE_FLOAT:
254      return GLSL_TYPE_FLOAT;
255
256   default:
257      unreachable("Unhandled format");
258      return GLSL_TYPE_FLOAT;
259   }
260}
261
262static void
263panvk_meta_clear_attachment(struct panvk_cmd_buffer *cmdbuf,
264                            unsigned attachment, unsigned rt,
265                            VkImageAspectFlags mask,
266                            const VkClearValue *clear_value,
267                            const VkClearRect *clear_rect)
268{
269   struct panvk_physical_device *dev = cmdbuf->device->physical_device;
270   struct panfrost_device *pdev = &dev->pdev;
271   struct panvk_meta *meta = &cmdbuf->device->physical_device->meta;
272   struct panvk_batch *batch = cmdbuf->state.batch;
273   const struct panvk_render_pass *pass = cmdbuf->state.pass;
274   const struct panvk_render_pass_attachment *att = &pass->attachments[attachment];
275   unsigned minx = MAX2(clear_rect->rect.offset.x, 0);
276   unsigned miny = MAX2(clear_rect->rect.offset.y, 0);
277   unsigned maxx = MAX2(clear_rect->rect.offset.x + clear_rect->rect.extent.width - 1, 0);
278   unsigned maxy = MAX2(clear_rect->rect.offset.y + clear_rect->rect.extent.height - 1, 0);
279
280   panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
281   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
282   panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
283
284   mali_ptr vpd =
285      panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
286                                         minx, miny, maxx, maxy);
287
288   float rect[] = {
289      minx, miny, 0.0, 1.0,
290      maxx + 1, miny, 0.0, 1.0,
291      minx, maxy + 1, 0.0, 1.0,
292      maxx + 1, maxy + 1, 0.0, 1.0,
293   };
294   mali_ptr coordinates = pan_pool_upload_aligned(&cmdbuf->desc_pool.base,
295                                                  rect, sizeof(rect), 64);
296
297   enum glsl_base_type base_type = panvk_meta_get_format_type(att->format);
298
299   mali_ptr tiler = batch->tiler.descs.gpu;
300   mali_ptr tsd = batch->tls.gpu;
301
302   mali_ptr pushconsts = 0, rsd = 0;
303
304   if (mask & VK_IMAGE_ASPECT_COLOR_BIT) {
305      mali_ptr shader = meta->clear_attachment.color[base_type].shader;
306      struct pan_shader_info *shader_info = &meta->clear_attachment.color[base_type].shader_info;
307
308      pushconsts = pan_pool_upload_aligned(&cmdbuf->desc_pool.base,
309                              clear_value, sizeof(*clear_value), 16);
310
311      rsd = panvk_meta_clear_color_attachment_emit_rsd(pdev,
312                                                       &cmdbuf->desc_pool.base,
313                                                       att->format, rt,
314                                                       shader_info,
315                                                       shader);
316   } else {
317      rsd = panvk_meta_clear_zs_attachment_emit_rsd(pdev,
318                                                    &cmdbuf->desc_pool.base,
319                                                    mask,
320                                                    clear_value->depthStencil);
321   }
322
323   struct panfrost_ptr job;
324
325   job = panvk_meta_clear_attachment_emit_tiler_job(&cmdbuf->desc_pool.base,
326                                                    &batch->scoreboard,
327                                                    coordinates, pushconsts,
328                                                    vpd, rsd, tsd, tiler);
329
330   util_dynarray_append(&batch->jobs, void *, job.cpu);
331}
332
333static void
334panvk_meta_clear_color_img(struct panvk_cmd_buffer *cmdbuf,
335                           struct panvk_image *img,
336                           const VkClearColorValue *color,
337                           const VkImageSubresourceRange *range)
338{
339   struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
340   struct pan_image_view view = {
341      .format = img->pimage.layout.format,
342      .dim = MALI_TEXTURE_DIMENSION_2D,
343      .image = &img->pimage,
344      .nr_samples = img->pimage.layout.nr_samples,
345      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
346   };
347
348   cmdbuf->state.fb.crc_valid[0] = false;
349   *fbinfo = (struct pan_fb_info){
350      .nr_samples = img->pimage.layout.nr_samples,
351      .rt_count = 1,
352      .rts[0].view = &view,
353      .rts[0].clear = true,
354      .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
355   };
356
357   uint32_t clearval[4];
358   pan_pack_color(clearval, (union pipe_color_union *)color,
359                  img->pimage.layout.format, false);
360   memcpy(fbinfo->rts[0].clear_value, clearval, sizeof(fbinfo->rts[0].clear_value));
361
362   unsigned level_count = vk_image_subresource_level_count(&img->vk, range);
363   unsigned layer_count = vk_image_subresource_layer_count(&img->vk, range);
364
365   for (unsigned level = range->baseMipLevel;
366        level < range->baseMipLevel + level_count; level++) {
367      view.first_level = view.last_level = level;
368      fbinfo->width = u_minify(img->pimage.layout.width, level);
369      fbinfo->height = u_minify(img->pimage.layout.height, level);
370      fbinfo->extent.maxx = fbinfo->width - 1;
371      fbinfo->extent.maxy = fbinfo->height - 1;
372
373      for (unsigned layer = range->baseArrayLayer;
374           layer < range->baseArrayLayer + layer_count; layer++) {
375         view.first_layer = view.last_layer = layer;
376         panvk_cmd_open_batch(cmdbuf);
377         panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
378         panvk_per_arch(cmd_close_batch)(cmdbuf);
379      }
380   }
381}
382
383void
384panvk_per_arch(CmdClearColorImage)(VkCommandBuffer commandBuffer,
385                                   VkImage image,
386                                   VkImageLayout imageLayout,
387                                   const VkClearColorValue *pColor,
388                                   uint32_t rangeCount,
389                                   const VkImageSubresourceRange *pRanges)
390{
391   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
392   VK_FROM_HANDLE(panvk_image, img, image);
393
394   panvk_per_arch(cmd_close_batch)(cmdbuf);
395
396   for (unsigned i = 0; i < rangeCount; i++)
397      panvk_meta_clear_color_img(cmdbuf, img, pColor, &pRanges[i]);
398}
399
400static void
401panvk_meta_clear_zs_img(struct panvk_cmd_buffer *cmdbuf,
402                        struct panvk_image *img,
403                        const VkClearDepthStencilValue *value,
404                        const VkImageSubresourceRange *range)
405{
406   struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
407   struct pan_image_view view = {
408      .format = img->pimage.layout.format,
409      .dim = MALI_TEXTURE_DIMENSION_2D,
410      .image = &img->pimage,
411      .nr_samples = img->pimage.layout.nr_samples,
412      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
413   };
414
415   cmdbuf->state.fb.crc_valid[0] = false;
416   *fbinfo = (struct pan_fb_info){
417      .nr_samples = img->pimage.layout.nr_samples,
418      .rt_count = 1,
419      .zs.clear_value.depth = value->depth,
420      .zs.clear_value.stencil = value->stencil,
421      .zs.clear.z = range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT,
422      .zs.clear.s = range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT
423   };
424
425   const struct util_format_description *fdesc =
426      util_format_description(view.format);
427
428   if (util_format_has_depth(fdesc)) {
429      fbinfo->zs.view.zs = &view;
430      if (util_format_has_stencil(fdesc)) {
431         fbinfo->zs.preload.z = !fbinfo->zs.clear.z;
432         fbinfo->zs.preload.s = !fbinfo->zs.clear.s;
433      }
434   } else {
435      fbinfo->zs.view.s = &view;
436   }
437
438   unsigned level_count = vk_image_subresource_level_count(&img->vk, range);
439   unsigned layer_count = vk_image_subresource_layer_count(&img->vk, range);
440
441   for (unsigned level = range->baseMipLevel;
442        level < range->baseMipLevel + level_count; level++) {
443      view.first_level = view.last_level = level;
444      fbinfo->width = u_minify(img->pimage.layout.width, level);
445      fbinfo->height = u_minify(img->pimage.layout.height, level);
446      fbinfo->extent.maxx = fbinfo->width - 1;
447      fbinfo->extent.maxy = fbinfo->height - 1;
448
449      for (unsigned layer = range->baseArrayLayer;
450           layer < range->baseArrayLayer + layer_count; layer++) {
451         view.first_layer = view.last_layer = layer;
452         panvk_cmd_open_batch(cmdbuf);
453         panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
454         panvk_per_arch(cmd_close_batch)(cmdbuf);
455      }
456   }
457}
458
459void
460panvk_per_arch(CmdClearDepthStencilImage)(VkCommandBuffer commandBuffer,
461                                          VkImage image,
462                                          VkImageLayout imageLayout,
463                                          const VkClearDepthStencilValue *pDepthStencil,
464                                          uint32_t rangeCount,
465                                          const VkImageSubresourceRange *pRanges)
466{
467   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
468   VK_FROM_HANDLE(panvk_image, img, image);
469
470   panvk_per_arch(cmd_close_batch)(cmdbuf);
471
472   for (unsigned i = 0; i < rangeCount; i++)
473      panvk_meta_clear_zs_img(cmdbuf, img, pDepthStencil, &pRanges[i]);
474}
475
476void
477panvk_per_arch(CmdClearAttachments)(VkCommandBuffer commandBuffer,
478                                    uint32_t attachmentCount,
479                                    const VkClearAttachment *pAttachments,
480                                    uint32_t rectCount,
481                                    const VkClearRect *pRects)
482{
483   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
484   const struct panvk_subpass *subpass = cmdbuf->state.subpass;
485
486   for (unsigned i = 0; i < attachmentCount; i++) {
487      for (unsigned j = 0; j < rectCount; j++) {
488
489         uint32_t attachment, rt = 0;
490         if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
491            rt = pAttachments[i].colorAttachment;
492            attachment = subpass->color_attachments[rt].idx;
493         } else {
494            attachment = subpass->zs_attachment.idx;
495         }
496
497         if (attachment == VK_ATTACHMENT_UNUSED)
498               continue;
499
500         panvk_meta_clear_attachment(cmdbuf, attachment, rt,
501                                     pAttachments[i].aspectMask,
502                                     &pAttachments[i].clearValue,
503                                     &pRects[j]);
504      }
505   }
506}
507
508static void
509panvk_meta_clear_attachment_init(struct panvk_physical_device *dev)
510{
511   dev->meta.clear_attachment.color[GLSL_TYPE_UINT].shader =
512      panvk_meta_clear_color_attachment_shader(
513            &dev->pdev,
514            &dev->meta.bin_pool.base,
515            GLSL_TYPE_UINT,
516            &dev->meta.clear_attachment.color[GLSL_TYPE_UINT].shader_info);
517
518   dev->meta.clear_attachment.color[GLSL_TYPE_INT].shader =
519      panvk_meta_clear_color_attachment_shader(
520            &dev->pdev,
521            &dev->meta.bin_pool.base,
522            GLSL_TYPE_INT,
523            &dev->meta.clear_attachment.color[GLSL_TYPE_INT].shader_info);
524
525   dev->meta.clear_attachment.color[GLSL_TYPE_FLOAT].shader =
526      panvk_meta_clear_color_attachment_shader(
527            &dev->pdev,
528            &dev->meta.bin_pool.base,
529            GLSL_TYPE_FLOAT,
530            &dev->meta.clear_attachment.color[GLSL_TYPE_FLOAT].shader_info);
531}
532
533void
534panvk_per_arch(meta_clear_init)(struct panvk_physical_device *dev)
535{
536   panvk_meta_clear_attachment_init(dev);
537}
538