1/*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Derived from tu_cmd_buffer.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29#include "genxml/gen_macros.h"
30
31#include "panvk_cs.h"
32#include "panvk_private.h"
33
34#include "pan_blitter.h"
35#include "pan_cs.h"
36#include "pan_encoder.h"
37
38#include "util/rounding.h"
39#include "util/u_pack_color.h"
40#include "vk_format.h"
41
42static uint32_t
43panvk_debug_adjust_bo_flags(const struct panvk_device *device,
44                      uint32_t bo_flags)
45{
46   uint32_t debug_flags =
47      device->physical_device->instance->debug_flags;
48
49   if (debug_flags & PANVK_DEBUG_DUMP)
50      bo_flags &= ~PAN_BO_INVISIBLE;
51
52   return bo_flags;
53}
54
55static void
56panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf)
57{
58   const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
59   struct panvk_batch *batch = cmdbuf->state.batch;
60   struct panfrost_ptr job_ptr =
61      pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB);
62
63   GENX(pan_emit_fragment_job)(fbinfo, batch->fb.desc.gpu, job_ptr.cpu),
64   batch->fragment_job = job_ptr.gpu;
65   util_dynarray_append(&batch->jobs, void *, job_ptr.cpu);
66}
67
68void
69panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
70{
71   struct panvk_batch *batch = cmdbuf->state.batch;
72
73   if (!batch)
74      return;
75
76   const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
77
78   assert(batch);
79
80   bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
81   for (unsigned i = 0; i < fbinfo->rt_count; i++)
82      clear |= fbinfo->rts[i].clear;
83
84   if (!clear && !batch->scoreboard.first_job) {
85      if (util_dynarray_num_elements(&batch->event_ops, struct panvk_event_op) == 0) {
86         /* Content-less batch, let's drop it */
87         vk_free(&cmdbuf->pool->vk.alloc, batch);
88      } else {
89         /* Batch has no jobs but is needed for synchronization, let's add a
90          * NULL job so the SUBMIT ioctl doesn't choke on it.
91          */
92         struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base,
93                                                       JOB_HEADER);
94         util_dynarray_append(&batch->jobs, void *, ptr.cpu);
95         panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
96                          MALI_JOB_TYPE_NULL, false, false, 0, 0,
97                          &ptr, false);
98         list_addtail(&batch->node, &cmdbuf->batches);
99      }
100      cmdbuf->state.batch = NULL;
101      return;
102   }
103
104   struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
105
106   list_addtail(&batch->node, &cmdbuf->batches);
107
108   if (batch->scoreboard.first_tiler) {
109      struct panfrost_ptr preload_jobs[2];
110      unsigned num_preload_jobs =
111         GENX(pan_preload_fb)(&cmdbuf->desc_pool.base, &batch->scoreboard,
112                              &cmdbuf->state.fb.info, batch->tls.gpu,
113                              batch->tiler.descs.gpu, preload_jobs);
114      for (unsigned i = 0; i < num_preload_jobs; i++)
115         util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu);
116   }
117
118   if (batch->tlsinfo.tls.size) {
119      unsigned size = panfrost_get_total_stack_size(batch->tlsinfo.tls.size,
120                                                    pdev->thread_tls_alloc,
121                                                    pdev->core_id_range);
122      batch->tlsinfo.tls.ptr =
123         pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, size, 4096).gpu;
124   }
125
126   if (batch->tlsinfo.wls.size) {
127      assert(batch->wls_total_size);
128      batch->tlsinfo.wls.ptr =
129         pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->wls_total_size, 4096).gpu;
130   }
131
132   if (batch->tls.cpu)
133      GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
134
135   if (batch->fb.desc.cpu) {
136      batch->fb.desc.gpu |=
137         GENX(pan_emit_fbd)(pdev, &cmdbuf->state.fb.info, &batch->tlsinfo,
138                            &batch->tiler.ctx, batch->fb.desc.cpu);
139
140      panvk_cmd_prepare_fragment_job(cmdbuf);
141   }
142
143   cmdbuf->state.batch = NULL;
144}
145
146void
147panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer,
148                                const VkSubpassBeginInfo *pSubpassBeginInfo,
149                                const VkSubpassEndInfo *pSubpassEndInfo)
150{
151   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
152
153   panvk_per_arch(cmd_close_batch)(cmdbuf);
154
155   cmdbuf->state.subpass++;
156   panvk_cmd_fb_info_set_subpass(cmdbuf);
157   panvk_cmd_open_batch(cmdbuf);
158}
159
160void
161panvk_per_arch(CmdNextSubpass)(VkCommandBuffer cmd, VkSubpassContents contents)
162{
163   VkSubpassBeginInfo binfo = {
164      .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
165      .contents = contents
166   };
167   VkSubpassEndInfo einfo = {
168      .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
169   };
170
171   panvk_per_arch(CmdNextSubpass2)(cmd, &binfo, &einfo);
172}
173
174void
175panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
176{
177   struct panvk_batch *batch = cmdbuf->state.batch;
178
179   if (batch->fb.desc.gpu)
180      return;
181
182   const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
183   bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
184   unsigned tags = MALI_FBD_TAG_IS_MFBD;
185
186   batch->fb.info = cmdbuf->state.framebuffer;
187   batch->fb.desc =
188      pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base,
189                                    PAN_DESC(FRAMEBUFFER),
190                                    PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION),
191                                    PAN_DESC_ARRAY(MAX2(fbinfo->rt_count, 1), RENDER_TARGET));
192
193   /* Tag the pointer */
194   batch->fb.desc.gpu |= tags;
195
196   memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0,
197          sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds));
198}
199
200void
201panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
202{
203   struct panvk_batch *batch = cmdbuf->state.batch;
204
205   assert(batch);
206   if (!batch->tls.gpu) {
207      batch->tls =
208         pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
209   }
210}
211
212static void
213panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf,
214                               struct panvk_cmd_bind_point_state *bind_point_state,
215                               struct panvk_draw_info *draw)
216{
217   struct panvk_sysvals *sysvals = &bind_point_state->desc_state.sysvals;
218
219   unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
220   if (sysvals->first_vertex != draw->offset_start ||
221       sysvals->base_vertex != base_vertex ||
222       sysvals->base_instance != draw->first_instance) {
223      sysvals->first_vertex = draw->offset_start;
224      sysvals->base_vertex = base_vertex;
225      sysvals->base_instance = draw->first_instance;
226      bind_point_state->desc_state.sysvals_ptr = 0;
227   }
228
229   if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) {
230      memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants,
231             sizeof(cmdbuf->state.blend.constants));
232      bind_point_state->desc_state.sysvals_ptr = 0;
233   }
234
235   if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) {
236      panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport,
237                                         &sysvals->viewport_scale);
238      panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport,
239                                          &sysvals->viewport_offset);
240      bind_point_state->desc_state.sysvals_ptr = 0;
241   }
242}
243
244static void
245panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf,
246                          struct panvk_cmd_bind_point_state *bind_point_state)
247{
248   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
249
250   if (desc_state->sysvals_ptr)
251      return;
252
253   struct panfrost_ptr sysvals =
254      pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
255                             sizeof(desc_state->sysvals), 16);
256   memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals));
257   desc_state->sysvals_ptr = sysvals.gpu;
258}
259
260static void
261panvk_cmd_prepare_push_constants(struct panvk_cmd_buffer *cmdbuf,
262                                 struct panvk_cmd_bind_point_state *bind_point_state)
263{
264   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
265   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
266
267   if (!pipeline->layout->push_constants.size || desc_state->push_constants)
268      return;
269
270   struct panfrost_ptr push_constants =
271      pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
272                             ALIGN_POT(pipeline->layout->push_constants.size, 16),
273                             16);
274
275   memcpy(push_constants.cpu, cmdbuf->push_constants,
276          pipeline->layout->push_constants.size);
277   desc_state->push_constants = push_constants.gpu;
278}
279
280static void
281panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
282                       struct panvk_cmd_bind_point_state *bind_point_state)
283{
284   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
285   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
286
287   if (!pipeline->num_ubos || desc_state->ubos)
288      return;
289
290   panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state);
291   panvk_cmd_prepare_push_constants(cmdbuf, bind_point_state);
292
293   struct panfrost_ptr ubos =
294      pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
295                                pipeline->num_ubos,
296                                UNIFORM_BUFFER);
297
298   panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu);
299
300   desc_state->ubos = ubos.gpu;
301}
302
303static void
304panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf,
305                           struct panvk_cmd_bind_point_state *bind_point_state)
306{
307   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
308   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
309   unsigned num_textures = pipeline->layout->num_textures;
310
311   if (!num_textures || desc_state->textures)
312      return;
313
314   struct panfrost_ptr textures =
315      pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
316                             num_textures * pan_size(TEXTURE),
317                             pan_size(TEXTURE));
318
319   void *texture = textures.cpu;
320
321   for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
322      if (!desc_state->sets[i]) continue;
323
324      memcpy(texture,
325             desc_state->sets[i]->textures,
326             desc_state->sets[i]->layout->num_textures *
327             pan_size(TEXTURE));
328
329      texture += desc_state->sets[i]->layout->num_textures *
330                 pan_size(TEXTURE);
331   }
332
333   desc_state->textures = textures.gpu;
334}
335
336static void
337panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf,
338                           struct panvk_cmd_bind_point_state *bind_point_state)
339{
340   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
341   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
342   unsigned num_samplers = pipeline->layout->num_samplers;
343
344   if (!num_samplers || desc_state->samplers)
345      return;
346
347   struct panfrost_ptr samplers =
348      pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
349                                num_samplers,
350                                SAMPLER);
351
352   void *sampler = samplers.cpu;
353
354   /* Prepare the dummy sampler */
355   pan_pack(sampler, SAMPLER, cfg) {
356      cfg.seamless_cube_map = false;
357      cfg.magnify_nearest = true;
358      cfg.minify_nearest = true;
359      cfg.normalized_coordinates = false;
360   }
361
362   sampler += pan_size(SAMPLER);
363
364   for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
365      if (!desc_state->sets[i]) continue;
366
367      memcpy(sampler,
368             desc_state->sets[i]->samplers,
369             desc_state->sets[i]->layout->num_samplers *
370             pan_size(SAMPLER));
371
372      sampler += desc_state->sets[i]->layout->num_samplers *
373                 pan_size(SAMPLER);
374   }
375
376   desc_state->samplers = samplers.gpu;
377}
378
379static void
380panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
381                          struct panvk_draw_info *draw)
382{
383   const struct panvk_pipeline *pipeline =
384      panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
385
386   if (!pipeline->fs.dynamic_rsd) {
387      draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT];
388      return;
389   }
390
391   if (!cmdbuf->state.fs_rsd) {
392      struct panfrost_ptr rsd =
393         pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base,
394                                       PAN_DESC(RENDERER_STATE),
395                                       PAN_DESC_ARRAY(pipeline->blend.state.rt_count,
396                                                      BLEND));
397
398      struct mali_renderer_state_packed rsd_dyn;
399      struct mali_renderer_state_packed *rsd_templ =
400         (struct mali_renderer_state_packed *)&pipeline->fs.rsd_template;
401
402      STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ));
403
404      panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn);
405      pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE);
406      memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn));
407
408      void *bd = rsd.cpu + pan_size(RENDERER_STATE);
409      for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
410         if (pipeline->blend.constant[i].index != (uint8_t)~0) {
411            struct mali_blend_packed bd_dyn;
412            struct mali_blend_packed *bd_templ =
413               (struct mali_blend_packed *)&pipeline->blend.bd_template[i];
414
415            STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >= sizeof(*bd_templ));
416            panvk_per_arch(emit_blend_constant)(cmdbuf->device, pipeline, i,
417                                                cmdbuf->state.blend.constants,
418                                                &bd_dyn);
419            pan_merge(bd_dyn, (*bd_templ), BLEND);
420            memcpy(bd, &bd_dyn, sizeof(bd_dyn));
421         }
422         bd += pan_size(BLEND);
423      }
424
425      cmdbuf->state.fs_rsd = rsd.gpu;
426   }
427
428   draw->fs_rsd = cmdbuf->state.fs_rsd;
429}
430
431void
432panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
433                                      unsigned width, unsigned height)
434{
435   struct panvk_batch *batch = cmdbuf->state.batch;
436
437   if (batch->tiler.descs.cpu)
438      return;
439
440   batch->tiler.descs =
441      pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base,
442                                    PAN_DESC(TILER_CONTEXT),
443                                    PAN_DESC(TILER_HEAP));
444   STATIC_ASSERT(sizeof(batch->tiler.templ) >=
445                 pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
446
447   struct panfrost_ptr desc = {
448      .gpu = batch->tiler.descs.gpu,
449      .cpu = batch->tiler.templ,
450   };
451
452   panvk_per_arch(emit_tiler_context)(cmdbuf->device, width, height, &desc);
453   memcpy(batch->tiler.descs.cpu, batch->tiler.templ,
454          pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP));
455   batch->tiler.ctx.bifrost = batch->tiler.descs.gpu;
456}
457
458void
459panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf)
460{
461   const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
462
463   panvk_per_arch(cmd_get_tiler_context)(cmdbuf,
464                                         fbinfo->width,
465                                         fbinfo->height);
466}
467
468static void
469panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
470                                 struct panvk_draw_info *draw)
471{
472   struct panvk_batch *batch = cmdbuf->state.batch;
473
474   panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
475   draw->tiler_ctx = &batch->tiler.ctx;
476}
477
478static void
479panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
480                            struct panvk_draw_info *draw)
481{
482   const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
483   struct panvk_varyings_info *varyings = &cmdbuf->state.varyings;
484
485   panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base,
486                        draw->padded_vertex_count * draw->instance_count);
487
488   unsigned buf_count = panvk_varyings_buf_count(varyings);
489   struct panfrost_ptr bufs =
490      pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
491                                buf_count + 1,
492                                ATTRIBUTE_BUFFER);
493
494   panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu);
495
496   /* We need an empty entry to stop prefetching on Bifrost */
497   memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * buf_count), 0,
498          pan_size(ATTRIBUTE_BUFFER));
499
500   if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) {
501      draw->position = varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address +
502                       varyings->varying[VARYING_SLOT_POS].offset;
503   }
504
505   if (pipeline->ia.writes_point_size) {
506      draw->psiz = varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address +
507                       varyings->varying[VARYING_SLOT_POS].offset;
508   } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
509              pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
510              pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
511      draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH ?
512                         cmdbuf->state.rast.line_width : pipeline->rast.line_width;
513   } else {
514      draw->line_width = 1.0f;
515   }
516   draw->varying_bufs = bufs.gpu;
517
518   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
519      if (!varyings->stage[s].count) continue;
520
521      struct panfrost_ptr attribs =
522         pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
523                                   varyings->stage[s].count,
524                                   ATTRIBUTE);
525
526      panvk_per_arch(emit_varyings)(cmdbuf->device, varyings, s, attribs.cpu);
527      draw->stages[s].varyings = attribs.gpu;
528   }
529}
530
531static void
532panvk_fill_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
533                          struct panvk_cmd_bind_point_state *bind_point_state,
534                          void *attrib_bufs, void *attribs,
535                          unsigned first_buf)
536{
537   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
538   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
539
540   for (unsigned s = 0; s < pipeline->layout->num_sets; s++) {
541      const struct panvk_descriptor_set *set = desc_state->sets[s];
542
543      if (!set) continue;
544
545      const struct panvk_descriptor_set_layout *layout = set->layout;
546      unsigned img_idx = pipeline->layout->sets[s].img_offset;
547      unsigned offset = img_idx * pan_size(ATTRIBUTE_BUFFER) * 2;
548      unsigned size = layout->num_imgs * pan_size(ATTRIBUTE_BUFFER) * 2;
549
550      memcpy(attrib_bufs + offset, desc_state->sets[s]->img_attrib_bufs, size);
551
552      offset = img_idx * pan_size(ATTRIBUTE);
553      for (unsigned i = 0; i < layout->num_imgs; i++) {
554         pan_pack(attribs + offset, ATTRIBUTE, cfg) {
555            cfg.buffer_index = first_buf + (img_idx + i) * 2;
556            cfg.format = desc_state->sets[s]->img_fmts[i];
557         }
558         offset += pan_size(ATTRIBUTE);
559      }
560   }
561}
562
563static void
564panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
565                             struct panvk_cmd_bind_point_state *bind_point_state)
566{
567   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
568   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
569
570   if (desc_state->non_vs_attribs || !pipeline->img_access_mask)
571      return;
572
573   unsigned attrib_count = pipeline->layout->num_imgs;
574   unsigned attrib_buf_count = (pipeline->layout->num_imgs * 2);
575   struct panfrost_ptr bufs =
576      pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
577                                attrib_buf_count + 1,
578                                ATTRIBUTE_BUFFER);
579   struct panfrost_ptr attribs =
580      pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count,
581                                ATTRIBUTE);
582
583   panvk_fill_non_vs_attribs(cmdbuf, bind_point_state, bufs.cpu, attribs.cpu, 0);
584
585   desc_state->non_vs_attrib_bufs = bufs.gpu;
586   desc_state->non_vs_attribs = attribs.gpu;
587}
588
589static void
590panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
591                              struct panvk_draw_info *draw)
592{
593   struct panvk_cmd_bind_point_state *bind_point_state =
594      panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
595   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
596   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
597   unsigned num_imgs =
598      pipeline->img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX) ?
599      pipeline->layout->num_imgs : 0;
600   unsigned attrib_count = pipeline->attribs.attrib_count + num_imgs;
601
602   if (desc_state->vs_attribs || !attrib_count)
603      return;
604
605   if (!pipeline->attribs.buf_count) {
606      panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
607      desc_state->vs_attrib_bufs = desc_state->non_vs_attrib_bufs;
608      desc_state->vs_attribs = desc_state->non_vs_attribs;
609      return;
610   }
611
612   unsigned attrib_buf_count = pipeline->attribs.buf_count * 2;
613   struct panfrost_ptr bufs =
614      pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base,
615                                attrib_buf_count + 1,
616                                ATTRIBUTE_BUFFER);
617   struct panfrost_ptr attribs =
618      pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count,
619                                ATTRIBUTE);
620
621   panvk_per_arch(emit_attrib_bufs)(&pipeline->attribs,
622                                    cmdbuf->state.vb.bufs,
623                                    cmdbuf->state.vb.count,
624                                    draw, bufs.cpu);
625   panvk_per_arch(emit_attribs)(cmdbuf->device, draw, &pipeline->attribs,
626                                cmdbuf->state.vb.bufs, cmdbuf->state.vb.count,
627                                attribs.cpu);
628
629   if (attrib_count > pipeline->attribs.buf_count) {
630      unsigned bufs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE_BUFFER) * 2;
631      unsigned attribs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE);
632
633      panvk_fill_non_vs_attribs(cmdbuf, bind_point_state,
634                                bufs.cpu + bufs_offset, attribs.cpu + attribs_offset,
635                                pipeline->attribs.buf_count * 2);
636   }
637
638   /* A NULL entry is needed to stop prefecting on Bifrost */
639   memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
640          pan_size(ATTRIBUTE_BUFFER));
641
642   desc_state->vs_attrib_bufs = bufs.gpu;
643   desc_state->vs_attribs = attribs.gpu;
644}
645
646static void
647panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
648                              struct panvk_draw_info *draw)
649{
650   struct panvk_cmd_bind_point_state *bind_point_state =
651      panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
652   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
653   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
654
655   for (unsigned i = 0; i < ARRAY_SIZE(draw->stages); i++) {
656      if (i == MESA_SHADER_VERTEX) {
657         panvk_draw_prepare_vs_attribs(cmdbuf, draw);
658         draw->stages[i].attributes = desc_state->vs_attribs;
659         draw->stages[i].attribute_bufs = desc_state->vs_attrib_bufs;
660      } else if (pipeline->img_access_mask & BITFIELD_BIT(i)) {
661         panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
662         draw->stages[i].attributes = desc_state->non_vs_attribs;
663         draw->stages[i].attribute_bufs = desc_state->non_vs_attrib_bufs;
664      }
665   }
666}
667
668static void
669panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
670                            struct panvk_draw_info *draw)
671{
672   const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
673
674   if (pipeline->vpd) {
675      draw->viewport = pipeline->vpd;
676   } else if (cmdbuf->state.vpd) {
677      draw->viewport = cmdbuf->state.vpd;
678   } else {
679      struct panfrost_ptr vp =
680         pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT);
681
682      const VkViewport *viewport =
683         pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT ?
684         &cmdbuf->state.viewport : &pipeline->viewport;
685      const VkRect2D *scissor =
686         pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR ?
687         &cmdbuf->state.scissor : &pipeline->scissor;
688
689      panvk_per_arch(emit_viewport)(viewport, scissor, vp.cpu);
690      draw->viewport = cmdbuf->state.vpd = vp.gpu;
691   }
692}
693
694static void
695panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
696                              struct panvk_draw_info *draw)
697{
698   const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
699   struct panvk_batch *batch = cmdbuf->state.batch;
700   struct panfrost_ptr ptr =
701      pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
702
703   util_dynarray_append(&batch->jobs, void *, ptr.cpu);
704   draw->jobs.vertex = ptr;
705   panvk_per_arch(emit_vertex_job)(pipeline, draw, ptr.cpu);
706}
707
708static void
709panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
710                             struct panvk_draw_info *draw)
711{
712   const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
713   struct panvk_batch *batch = cmdbuf->state.batch;
714   struct panfrost_ptr ptr =
715      pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB);
716
717   util_dynarray_append(&batch->jobs, void *, ptr.cpu);
718   draw->jobs.tiler = ptr;
719   panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu);
720}
721
722static void
723panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf,
724               struct panvk_draw_info *draw)
725{
726   struct panvk_batch *batch = cmdbuf->state.batch;
727   struct panvk_cmd_bind_point_state *bind_point_state =
728      panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS);
729   const struct panvk_pipeline *pipeline =
730      panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
731
732   /* There are only 16 bits in the descriptor for the job ID, make sure all
733    * the 3 (2 in Bifrost) jobs in this draw are in the same batch.
734    */
735   if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) {
736      panvk_per_arch(cmd_close_batch)(cmdbuf);
737      panvk_cmd_preload_fb_after_batch_split(cmdbuf);
738      batch = panvk_cmd_open_batch(cmdbuf);
739   }
740
741   if (pipeline->rast.enable)
742      panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
743
744   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
745
746   panvk_cmd_prepare_draw_sysvals(cmdbuf, bind_point_state, draw);
747   panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
748   panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
749   panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
750
751   /* TODO: indexed draws */
752   struct panvk_descriptor_state *desc_state =
753      panvk_cmd_get_desc_state(cmdbuf, GRAPHICS);
754
755   draw->tls = batch->tls.gpu;
756   draw->fb = batch->fb.desc.gpu;
757   draw->ubos = desc_state->ubos;
758   draw->textures = desc_state->textures;
759   draw->samplers = desc_state->samplers;
760
761   STATIC_ASSERT(sizeof(draw->invocation) >= sizeof(struct mali_invocation_packed));
762   panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw->invocation,
763                                      1, draw->vertex_range, draw->instance_count,
764                                      1, 1, 1, true, false);
765
766   panvk_draw_prepare_fs_rsd(cmdbuf, draw);
767   panvk_draw_prepare_varyings(cmdbuf, draw);
768   panvk_draw_prepare_attributes(cmdbuf, draw);
769   panvk_draw_prepare_viewport(cmdbuf, draw);
770   panvk_draw_prepare_tiler_context(cmdbuf, draw);
771   panvk_draw_prepare_vertex_job(cmdbuf, draw);
772   panvk_draw_prepare_tiler_job(cmdbuf, draw);
773   batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size);
774   assert(!pipeline->wls_size);
775
776   unsigned vjob_id =
777      panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
778                       MALI_JOB_TYPE_VERTEX, false, false, 0, 0,
779                       &draw->jobs.vertex, false);
780
781   if (pipeline->rast.enable) {
782      panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
783                       MALI_JOB_TYPE_TILER, false, false, vjob_id, 0,
784                       &draw->jobs.tiler, false);
785   }
786
787   /* Clear the dirty flags all at once */
788   desc_state->dirty = cmdbuf->state.dirty = 0;
789}
790
791void
792panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer,
793                        uint32_t vertexCount,
794                        uint32_t instanceCount,
795                        uint32_t firstVertex,
796                        uint32_t firstInstance)
797{
798   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
799
800   if (instanceCount == 0 || vertexCount == 0)
801      return;
802
803   struct panvk_draw_info draw = {
804      .first_vertex = firstVertex,
805      .vertex_count = vertexCount,
806      .vertex_range = vertexCount,
807      .first_instance = firstInstance,
808      .instance_count = instanceCount,
809      .padded_vertex_count = instanceCount > 1 ?
810                             panfrost_padded_vertex_count(vertexCount) :
811                             vertexCount,
812      .offset_start = firstVertex,
813   };
814
815   panvk_cmd_draw(cmdbuf, &draw);
816}
817
818static void
819panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf,
820                          uint32_t start, uint32_t count,
821                          bool restart,
822                          uint32_t *min, uint32_t *max)
823{
824   void *ptr = cmdbuf->state.ib.buffer->bo->ptr.cpu +
825               cmdbuf->state.ib.buffer->bo_offset +
826               cmdbuf->state.ib.offset;
827
828   fprintf(stderr, "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n");
829
830   assert(cmdbuf->state.ib.buffer);
831   assert(cmdbuf->state.ib.buffer->bo);
832   assert(cmdbuf->state.ib.buffer->bo->ptr.cpu);
833
834   *max = 0;
835
836   /* TODO: Use panfrost_minmax_cache */
837   /* TODO: Read full cacheline of data to mitigate the uncached
838    * mapping slowness.
839    */
840   switch (cmdbuf->state.ib.index_size) {
841#define MINMAX_SEARCH_CASE(sz) \
842   case sz: { \
843      uint ## sz ## _t *indices = ptr; \
844      *min = UINT ## sz ## _MAX; \
845      for (uint32_t i = 0; i < count; i++) { \
846         if (restart && indices[i + start] == UINT ## sz ##_MAX) continue; \
847         *min = MIN2(indices[i + start], *min); \
848         *max = MAX2(indices[i + start], *max); \
849      } \
850      break; \
851   }
852   MINMAX_SEARCH_CASE(32)
853   MINMAX_SEARCH_CASE(16)
854   MINMAX_SEARCH_CASE(8)
855#undef MINMAX_SEARCH_CASE
856   default:
857      unreachable("Invalid index size");
858   }
859}
860
861void
862panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
863                               uint32_t indexCount,
864                               uint32_t instanceCount,
865                               uint32_t firstIndex,
866                               int32_t vertexOffset,
867                               uint32_t firstInstance)
868{
869   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
870   uint32_t min_vertex, max_vertex;
871
872   if (instanceCount == 0 || indexCount == 0)
873      return;
874
875   const struct panvk_pipeline *pipeline =
876      panvk_cmd_get_pipeline(cmdbuf, GRAPHICS);
877   bool primitive_restart = pipeline->ia.primitive_restart;
878
879   panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart,
880                             &min_vertex, &max_vertex);
881
882   unsigned vertex_range = max_vertex - min_vertex + 1;
883   struct panvk_draw_info draw = {
884      .index_size = cmdbuf->state.ib.index_size,
885      .first_index = firstIndex,
886      .index_count = indexCount,
887      .vertex_offset = vertexOffset,
888      .first_instance = firstInstance,
889      .instance_count = instanceCount,
890      .vertex_range = vertex_range,
891      .vertex_count = indexCount + abs(vertexOffset),
892      .padded_vertex_count = instanceCount > 1 ?
893                             panfrost_padded_vertex_count(vertex_range) :
894                             vertex_range,
895      .offset_start = min_vertex + vertexOffset,
896      .indices = panvk_buffer_gpu_ptr(cmdbuf->state.ib.buffer,
897                                      cmdbuf->state.ib.offset) +
898                 (firstIndex * (cmdbuf->state.ib.index_size / 8)),
899   };
900
901   panvk_cmd_draw(cmdbuf, &draw);
902}
903
904VkResult
905panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
906{
907   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
908   VkResult ret =
909      cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY ?
910      cmdbuf->vk.cmd_queue.error : cmdbuf->record_result;
911
912   panvk_per_arch(cmd_close_batch)(cmdbuf);
913   cmdbuf->status = ret == VK_SUCCESS ?
914                    PANVK_CMD_BUFFER_STATUS_EXECUTABLE :
915                    PANVK_CMD_BUFFER_STATUS_INVALID;
916   return ret;
917}
918
919void
920panvk_per_arch(CmdEndRenderPass2)(VkCommandBuffer commandBuffer,
921                                  const VkSubpassEndInfo *pSubpassEndInfo)
922{
923   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
924
925   panvk_per_arch(cmd_close_batch)(cmdbuf);
926   vk_free(&cmdbuf->pool->vk.alloc, cmdbuf->state.clear);
927   cmdbuf->state.batch = NULL;
928   cmdbuf->state.pass = NULL;
929   cmdbuf->state.subpass = NULL;
930   cmdbuf->state.framebuffer = NULL;
931   cmdbuf->state.clear = NULL;
932}
933
934void
935panvk_per_arch(CmdEndRenderPass)(VkCommandBuffer cmd)
936{
937   VkSubpassEndInfo einfo = {
938      .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO,
939   };
940
941   panvk_per_arch(CmdEndRenderPass2)(cmd, &einfo);
942}
943
944
945void
946panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
947                                    const VkDependencyInfo *pDependencyInfo)
948{
949   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
950
951   /* Caches are flushed/invalidated at batch boundaries for now, nothing to do
952    * for memory barriers assuming we implement barriers with the creation of a
953    * new batch.
954    * FIXME: We can probably do better with a CacheFlush job that has the
955    * barrier flag set to true.
956    */
957   if (cmdbuf->state.batch) {
958      panvk_per_arch(cmd_close_batch)(cmdbuf);
959      panvk_cmd_preload_fb_after_batch_split(cmdbuf);
960      panvk_cmd_open_batch(cmdbuf);
961   }
962}
963
964static void
965panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf,
966                              struct panvk_event *event,
967                              enum panvk_event_op_type type)
968{
969   struct panvk_event_op op = {
970      .type = type,
971      .event = event,
972   };
973
974   if (cmdbuf->state.batch == NULL) {
975      /* No open batch, let's create a new one so this operation happens in
976       * the right order.
977       */
978      panvk_cmd_open_batch(cmdbuf);
979      util_dynarray_append(&cmdbuf->state.batch->event_ops,
980                           struct panvk_event_op,
981                           op);
982      panvk_per_arch(cmd_close_batch)(cmdbuf);
983   } else {
984      /* Let's close the current batch so the operation executes before any
985       * future commands.
986       */
987      util_dynarray_append(&cmdbuf->state.batch->event_ops,
988                           struct panvk_event_op,
989                           op);
990      panvk_per_arch(cmd_close_batch)(cmdbuf);
991      panvk_cmd_preload_fb_after_batch_split(cmdbuf);
992      panvk_cmd_open_batch(cmdbuf);
993   }
994}
995
996static void
997panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf,
998                               struct panvk_event *event)
999{
1000   struct panvk_event_op op = {
1001      .type = PANVK_EVENT_OP_WAIT,
1002      .event = event,
1003   };
1004
1005   if (cmdbuf->state.batch == NULL) {
1006      /* No open batch, let's create a new one and have it wait for this event. */
1007      panvk_cmd_open_batch(cmdbuf);
1008      util_dynarray_append(&cmdbuf->state.batch->event_ops,
1009                           struct panvk_event_op,
1010                           op);
1011   } else {
1012      /* Let's close the current batch so any future commands wait on the
1013       * event signal operation.
1014       */
1015      if (cmdbuf->state.batch->fragment_job ||
1016          cmdbuf->state.batch->scoreboard.first_job) {
1017         panvk_per_arch(cmd_close_batch)(cmdbuf);
1018         panvk_cmd_preload_fb_after_batch_split(cmdbuf);
1019         panvk_cmd_open_batch(cmdbuf);
1020      }
1021      util_dynarray_append(&cmdbuf->state.batch->event_ops,
1022                           struct panvk_event_op,
1023                           op);
1024   }
1025}
1026
1027void
1028panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer,
1029                             VkEvent _event,
1030                             const VkDependencyInfo *pDependencyInfo)
1031{
1032   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1033   VK_FROM_HANDLE(panvk_event, event, _event);
1034
1035   /* vkCmdSetEvent cannot be called inside a render pass */
1036   assert(cmdbuf->state.pass == NULL);
1037
1038   panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET);
1039}
1040
1041void
1042panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer,
1043                               VkEvent _event,
1044                               VkPipelineStageFlags2 stageMask)
1045{
1046   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1047   VK_FROM_HANDLE(panvk_event, event, _event);
1048
1049   /* vkCmdResetEvent cannot be called inside a render pass */
1050   assert(cmdbuf->state.pass == NULL);
1051
1052   panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET);
1053}
1054
1055void
1056panvk_per_arch(CmdWaitEvents2)(VkCommandBuffer commandBuffer,
1057                               uint32_t eventCount,
1058                               const VkEvent *pEvents,
1059                               const VkDependencyInfo *pDependencyInfos)
1060{
1061   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1062
1063   assert(eventCount > 0);
1064
1065   for (uint32_t i = 0; i < eventCount; i++) {
1066      VK_FROM_HANDLE(panvk_event, event, pEvents[i]);
1067      panvk_add_wait_event_operation(cmdbuf, event);
1068   }
1069}
1070
1071static VkResult
1072panvk_reset_cmdbuf(struct panvk_cmd_buffer *cmdbuf)
1073{
1074   vk_command_buffer_reset(&cmdbuf->vk);
1075
1076   cmdbuf->record_result = VK_SUCCESS;
1077
1078   list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1079      list_del(&batch->node);
1080      util_dynarray_fini(&batch->jobs);
1081      util_dynarray_fini(&batch->event_ops);
1082
1083      vk_free(&cmdbuf->pool->vk.alloc, batch);
1084   }
1085
1086   panvk_pool_reset(&cmdbuf->desc_pool);
1087   panvk_pool_reset(&cmdbuf->tls_pool);
1088   panvk_pool_reset(&cmdbuf->varying_pool);
1089   cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL;
1090
1091   for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
1092      memset(&cmdbuf->bind_points[i].desc_state.sets, 0, sizeof(cmdbuf->bind_points[0].desc_state.sets));
1093
1094   return cmdbuf->record_result;
1095}
1096
1097static void
1098panvk_destroy_cmdbuf(struct panvk_cmd_buffer *cmdbuf)
1099{
1100   struct panvk_device *device = cmdbuf->device;
1101
1102   list_del(&cmdbuf->pool_link);
1103
1104   list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
1105      list_del(&batch->node);
1106      util_dynarray_fini(&batch->jobs);
1107      util_dynarray_fini(&batch->event_ops);
1108
1109      vk_free(&cmdbuf->pool->vk.alloc, batch);
1110   }
1111
1112   panvk_pool_cleanup(&cmdbuf->desc_pool);
1113   panvk_pool_cleanup(&cmdbuf->tls_pool);
1114   panvk_pool_cleanup(&cmdbuf->varying_pool);
1115   vk_command_buffer_finish(&cmdbuf->vk);
1116   vk_free(&device->vk.alloc, cmdbuf);
1117}
1118
1119static VkResult
1120panvk_create_cmdbuf(struct panvk_device *device,
1121                    struct panvk_cmd_pool *pool,
1122                    VkCommandBufferLevel level,
1123                    struct panvk_cmd_buffer **cmdbuf_out)
1124{
1125   struct panvk_cmd_buffer *cmdbuf;
1126
1127   cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf),
1128                      8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1129   if (!cmdbuf)
1130      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1131
1132   VkResult result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, level);
1133   if (result != VK_SUCCESS) {
1134      vk_free(&device->vk.alloc, cmdbuf);
1135      return result;
1136   }
1137
1138   cmdbuf->device = device;
1139   cmdbuf->pool = pool;
1140
1141   if (pool) {
1142      list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers);
1143      cmdbuf->queue_family_index = pool->vk.queue_family_index;
1144   } else {
1145      /* Init the pool_link so we can safely call list_del when we destroy
1146       * the command buffer
1147       */
1148      list_inithead(&cmdbuf->pool_link);
1149      cmdbuf->queue_family_index = PANVK_QUEUE_GENERAL;
1150   }
1151
1152   panvk_pool_init(&cmdbuf->desc_pool, &device->physical_device->pdev,
1153                   pool ? &pool->desc_bo_pool : NULL, 0, 64 * 1024,
1154                   "Command buffer descriptor pool", true);
1155   panvk_pool_init(&cmdbuf->tls_pool, &device->physical_device->pdev,
1156                   pool ? &pool->tls_bo_pool : NULL,
1157                   panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE),
1158                   64 * 1024, "TLS pool", false);
1159   panvk_pool_init(&cmdbuf->varying_pool, &device->physical_device->pdev,
1160                   pool ? &pool->varying_bo_pool : NULL,
1161                   panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE),
1162                   64 * 1024, "Varyings pool", false);
1163   list_inithead(&cmdbuf->batches);
1164   cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL;
1165   *cmdbuf_out = cmdbuf;
1166   return VK_SUCCESS;
1167}
1168
1169VkResult
1170panvk_per_arch(AllocateCommandBuffers)(VkDevice _device,
1171                                       const VkCommandBufferAllocateInfo *pAllocateInfo,
1172                                       VkCommandBuffer *pCommandBuffers)
1173{
1174   VK_FROM_HANDLE(panvk_device, device, _device);
1175   VK_FROM_HANDLE(panvk_cmd_pool, pool, pAllocateInfo->commandPool);
1176
1177   VkResult result = VK_SUCCESS;
1178   unsigned i;
1179
1180   for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
1181      struct panvk_cmd_buffer *cmdbuf = NULL;
1182
1183      if (!list_is_empty(&pool->free_cmd_buffers)) {
1184         cmdbuf = list_first_entry(
1185            &pool->free_cmd_buffers, struct panvk_cmd_buffer, pool_link);
1186
1187         list_del(&cmdbuf->pool_link);
1188         list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers);
1189
1190         vk_command_buffer_finish(&cmdbuf->vk);
1191         result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, pAllocateInfo->level);
1192      } else {
1193         result = panvk_create_cmdbuf(device, pool, pAllocateInfo->level, &cmdbuf);
1194      }
1195
1196      if (result != VK_SUCCESS)
1197         goto err_free_cmd_bufs;
1198
1199      pCommandBuffers[i] = panvk_cmd_buffer_to_handle(cmdbuf);
1200   }
1201
1202   return VK_SUCCESS;
1203
1204err_free_cmd_bufs:
1205   panvk_per_arch(FreeCommandBuffers)(_device, pAllocateInfo->commandPool, i,
1206                                      pCommandBuffers);
1207   for (unsigned j = 0; j < i; j++)
1208      pCommandBuffers[j] = VK_NULL_HANDLE;
1209
1210   return result;
1211}
1212
1213void
1214panvk_per_arch(FreeCommandBuffers)(VkDevice device,
1215                                   VkCommandPool commandPool,
1216                                   uint32_t commandBufferCount,
1217                                   const VkCommandBuffer *pCommandBuffers)
1218{
1219   for (uint32_t i = 0; i < commandBufferCount; i++) {
1220      VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, pCommandBuffers[i]);
1221
1222      if (cmdbuf) {
1223         if (cmdbuf->pool) {
1224            list_del(&cmdbuf->pool_link);
1225            panvk_reset_cmdbuf(cmdbuf);
1226            list_addtail(&cmdbuf->pool_link,
1227                         &cmdbuf->pool->free_cmd_buffers);
1228         } else
1229            panvk_destroy_cmdbuf(cmdbuf);
1230      }
1231   }
1232}
1233
1234VkResult
1235panvk_per_arch(ResetCommandBuffer)(VkCommandBuffer commandBuffer,
1236                                   VkCommandBufferResetFlags flags)
1237{
1238   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1239
1240   return panvk_reset_cmdbuf(cmdbuf);
1241}
1242
1243VkResult
1244panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
1245                                   const VkCommandBufferBeginInfo *pBeginInfo)
1246{
1247   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1248   VkResult result = VK_SUCCESS;
1249
1250   if (cmdbuf->status != PANVK_CMD_BUFFER_STATUS_INITIAL) {
1251      /* If the command buffer has already been reset with
1252       * vkResetCommandBuffer, no need to do it again.
1253       */
1254      result = panvk_reset_cmdbuf(cmdbuf);
1255      if (result != VK_SUCCESS)
1256         return result;
1257   }
1258
1259   memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
1260
1261   cmdbuf->status = PANVK_CMD_BUFFER_STATUS_RECORDING;
1262
1263   return VK_SUCCESS;
1264}
1265
1266void
1267panvk_per_arch(DestroyCommandPool)(VkDevice _device,
1268                                   VkCommandPool commandPool,
1269                                   const VkAllocationCallbacks *pAllocator)
1270{
1271   VK_FROM_HANDLE(panvk_device, device, _device);
1272   VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1273
1274   list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf,
1275                            &pool->active_cmd_buffers, pool_link)
1276      panvk_destroy_cmdbuf(cmdbuf);
1277
1278   list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf,
1279                            &pool->free_cmd_buffers, pool_link)
1280      panvk_destroy_cmdbuf(cmdbuf);
1281
1282   panvk_bo_pool_cleanup(&pool->desc_bo_pool);
1283   panvk_bo_pool_cleanup(&pool->varying_bo_pool);
1284   panvk_bo_pool_cleanup(&pool->tls_bo_pool);
1285
1286   vk_command_pool_finish(&pool->vk);
1287   vk_free2(&device->vk.alloc, pAllocator, pool);
1288}
1289
1290VkResult
1291panvk_per_arch(ResetCommandPool)(VkDevice device,
1292                                 VkCommandPool commandPool,
1293                                 VkCommandPoolResetFlags flags)
1294{
1295   VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1296   VkResult result;
1297
1298   list_for_each_entry(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers,
1299                       pool_link)
1300   {
1301      result = panvk_reset_cmdbuf(cmdbuf);
1302      if (result != VK_SUCCESS)
1303         return result;
1304   }
1305
1306   return VK_SUCCESS;
1307}
1308
1309void
1310panvk_per_arch(TrimCommandPool)(VkDevice device,
1311                                VkCommandPool commandPool,
1312                                VkCommandPoolTrimFlags flags)
1313{
1314   VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool);
1315
1316   if (!pool)
1317      return;
1318
1319   list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf,
1320                            &pool->free_cmd_buffers, pool_link)
1321      panvk_destroy_cmdbuf(cmdbuf);
1322}
1323
1324void
1325panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer,
1326                            uint32_t x,
1327                            uint32_t y,
1328                            uint32_t z)
1329{
1330   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1331   const struct panfrost_device *pdev =
1332      &cmdbuf->device->physical_device->pdev;
1333   struct panvk_dispatch_info dispatch = {
1334      .wg_count = { x, y, z },
1335   };
1336
1337   panvk_per_arch(cmd_close_batch)(cmdbuf);
1338   struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1339
1340   struct panvk_cmd_bind_point_state *bind_point_state =
1341      panvk_cmd_get_bind_point_state(cmdbuf, COMPUTE);
1342   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
1343   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
1344   struct panfrost_ptr job =
1345      pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
1346
1347   struct panvk_sysvals *sysvals = &desc_state->sysvals;
1348   sysvals->num_work_groups.u32[0] = x;
1349   sysvals->num_work_groups.u32[1] = y;
1350   sysvals->num_work_groups.u32[2] = z;
1351   sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x;
1352   sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y;
1353   sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z;
1354   desc_state->sysvals_ptr = 0;
1355
1356   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1357   dispatch.tsd = batch->tls.gpu;
1358
1359   panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
1360   dispatch.attributes = desc_state->non_vs_attribs;
1361   dispatch.attribute_bufs = desc_state->non_vs_attrib_bufs;
1362
1363   panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
1364   dispatch.ubos = desc_state->ubos;
1365
1366   panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
1367   dispatch.textures = desc_state->textures;
1368
1369   panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
1370   dispatch.samplers = desc_state->samplers;
1371
1372   panvk_per_arch(emit_compute_job)(pipeline, &dispatch, job.cpu);
1373   panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard,
1374                    MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
1375                    &job, false);
1376
1377   batch->tlsinfo.tls.size = pipeline->tls_size;
1378   batch->tlsinfo.wls.size = pipeline->wls_size;
1379   if (batch->tlsinfo.wls.size) {
1380      batch->wls_total_size =
1381         pan_wls_mem_size(pdev, &dispatch.wg_count, batch->tlsinfo.wls.size);
1382   }
1383
1384   panvk_per_arch(cmd_close_batch)(cmdbuf);
1385   desc_state->dirty = 0;
1386}
1387