1#include "zink_compiler.h"
2#include "zink_context.h"
3#include "zink_program.h"
4#include "zink_query.h"
5#include "zink_resource.h"
6#include "zink_screen.h"
7#include "zink_state.h"
8#include "zink_surface.h"
9#include "zink_inlines.h"
10
11#include "tgsi/tgsi_from_mesa.h"
12#include "util/hash_table.h"
13#include "util/u_debug.h"
14#include "util/u_helpers.h"
15#include "util/u_inlines.h"
16#include "util/u_prim.h"
17#include "util/u_prim_restart.h"
18
19
20static void
21zink_emit_xfb_counter_barrier(struct zink_context *ctx)
22{
23   for (unsigned i = 0; i < ctx->num_so_targets; i++) {
24      struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
25      if (!t)
26         continue;
27      struct zink_resource *res = zink_resource(t->counter_buffer);
28      VkAccessFlags access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
29      VkPipelineStageFlags stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
30      if (t->counter_buffer_valid) {
31         /* Between the pause and resume there needs to be a memory barrier for the counter buffers
32          * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
33          * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT
34          * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
35          * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT.
36          *
37          * - from VK_EXT_transform_feedback spec
38          */
39         access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT;
40         stage |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
41      }
42      zink_resource_buffer_barrier(ctx, res, access, stage);
43      res->obj->unordered_read = false;
44   }
45}
46
47static void
48zink_emit_stream_output_targets(struct pipe_context *pctx)
49{
50   struct zink_context *ctx = zink_context(pctx);
51   struct zink_batch *batch = &ctx->batch;
52   VkBuffer buffers[PIPE_MAX_SO_OUTPUTS] = {0};
53   VkDeviceSize buffer_offsets[PIPE_MAX_SO_OUTPUTS] = {0};
54   VkDeviceSize buffer_sizes[PIPE_MAX_SO_OUTPUTS] = {0};
55
56   for (unsigned i = 0; i < ctx->num_so_targets; i++) {
57      struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
58      if (!t) {
59         /* no need to reference this or anything */
60         buffers[i] = zink_resource(ctx->dummy_xfb_buffer)->obj->buffer;
61         buffer_offsets[i] = 0;
62         buffer_sizes[i] = sizeof(uint8_t);
63         continue;
64      }
65      struct zink_resource *res = zink_resource(t->base.buffer);
66      if (!res->so_valid)
67         /* resource has been rebound */
68         t->counter_buffer_valid = false;
69      buffers[i] = res->obj->buffer;
70      zink_batch_reference_resource_rw(batch, res, true);
71      buffer_offsets[i] = t->base.buffer_offset;
72      buffer_sizes[i] = t->base.buffer_size;
73      res->so_valid = true;
74      util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset,
75                     t->base.buffer_offset + t->base.buffer_size);
76   }
77
78   VKCTX(CmdBindTransformFeedbackBuffersEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets,
79                                                 buffers, buffer_offsets,
80                                                 buffer_sizes);
81   ctx->dirty_so_targets = false;
82}
83
84ALWAYS_INLINE static void
85check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline)
86{
87   struct zink_resource *res = zink_resource(pres);
88   zink_resource_buffer_barrier(ctx, res, flags, pipeline);
89   res->obj->unordered_read = false;
90}
91
92ALWAYS_INLINE static void
93barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinfo,
94                     const struct pipe_draw_indirect_info *dindirect, struct pipe_resource *index_buffer)
95{
96   if (index_buffer)
97      check_buffer_barrier(ctx, index_buffer, VK_ACCESS_INDEX_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
98   if (dindirect && dindirect->buffer) {
99      check_buffer_barrier(ctx, dindirect->buffer,
100                           VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
101      if (dindirect->indirect_draw_count)
102         check_buffer_barrier(ctx, dindirect->indirect_draw_count,
103                              VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
104   }
105}
106
107template <zink_dynamic_state DYNAMIC_STATE>
108static void
109zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx)
110{
111   VkBuffer buffers[PIPE_MAX_ATTRIBS];
112   VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS];
113   VkDeviceSize buffer_strides[PIPE_MAX_ATTRIBS];
114   struct zink_vertex_elements_state *elems = ctx->element_state;
115   struct zink_screen *screen = zink_screen(ctx->base.screen);
116
117   if (!elems->hw_state.num_bindings)
118      return;
119
120   for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) {
121      struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ctx->element_state->binding_map[i];
122      assert(vb);
123      if (vb->buffer.resource) {
124         struct zink_resource *res = zink_resource(vb->buffer.resource);
125         assert(res->obj->buffer);
126         buffers[i] = res->obj->buffer;
127         buffer_offsets[i] = vb->buffer_offset;
128         buffer_strides[i] = vb->stride;
129         if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT)
130            elems->hw_state.dynbindings[i].stride = vb->stride;
131      } else {
132         buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer;
133         buffer_offsets[i] = 0;
134         buffer_strides[i] = 0;
135         if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT)
136            elems->hw_state.dynbindings[i].stride = 0;
137      }
138   }
139
140   if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT)
141      VKCTX(CmdBindVertexBuffers2EXT)(batch->state->cmdbuf, 0,
142                                          elems->hw_state.num_bindings,
143                                          buffers, buffer_offsets, NULL, buffer_strides);
144   else
145      VKSCR(CmdBindVertexBuffers)(batch->state->cmdbuf, 0,
146                             elems->hw_state.num_bindings,
147                             buffers, buffer_offsets);
148
149   if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT)
150      VKCTX(CmdSetVertexInputEXT)(batch->state->cmdbuf,
151                                      elems->hw_state.num_bindings, elems->hw_state.dynbindings,
152                                      elems->hw_state.num_attribs, elems->hw_state.dynattribs);
153
154   ctx->vertex_buffers_dirty = false;
155}
156
157static void
158zink_bind_vertex_state(struct zink_batch *batch, struct zink_context *ctx,
159                       struct pipe_vertex_state *vstate, uint32_t partial_velem_mask)
160{
161   if (!vstate->input.vbuffer.buffer.resource)
162      return;
163
164   const struct zink_vertex_elements_hw_state *hw_state = zink_vertex_state_mask(vstate, partial_velem_mask, true);
165   assert(hw_state);
166
167   struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
168   zink_batch_resource_usage_set(&ctx->batch, res, false);
169   VkDeviceSize offset = vstate->input.vbuffer.buffer_offset;
170   VKCTX(CmdBindVertexBuffers)(batch->state->cmdbuf, 0,
171                               hw_state->num_bindings,
172                               &res->obj->buffer, &offset);
173
174   VKCTX(CmdSetVertexInputEXT)(batch->state->cmdbuf,
175                               hw_state->num_bindings, hw_state->dynbindings,
176                               hw_state->num_attribs, hw_state->dynattribs);
177}
178
179static void
180update_gfx_program(struct zink_context *ctx)
181{
182   if (ctx->last_vertex_stage_dirty) {
183      enum pipe_shader_type pstage = pipe_shader_type_from_mesa(ctx->last_vertex_stage->nir->info.stage);
184      ctx->dirty_shader_stages |= BITFIELD_BIT(pstage);
185      memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base,
186             &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base,
187             sizeof(struct zink_vs_key_base));
188      ctx->last_vertex_stage_dirty = false;
189   }
190   unsigned bits = BITFIELD_MASK(PIPE_SHADER_COMPUTE);
191   if (ctx->gfx_dirty) {
192      struct zink_gfx_program *prog = NULL;
193
194      struct hash_table *ht = &ctx->program_cache[ctx->shader_stages >> 2];
195      const uint32_t hash = ctx->gfx_hash;
196      struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages);
197      if (entry) {
198         prog = (struct zink_gfx_program*)entry->data;
199         u_foreach_bit(stage, prog->stages_present & ~ctx->dirty_shader_stages)
200            ctx->gfx_pipeline_state.modules[stage] = prog->modules[stage]->shader;
201         /* ensure variants are always updated if keys have changed since last use */
202         ctx->dirty_shader_stages |= prog->stages_present;
203      } else {
204         ctx->dirty_shader_stages |= bits;
205         prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch);
206         _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog);
207      }
208      zink_update_gfx_program(ctx, prog);
209      if (prog && prog != ctx->curr_program)
210         zink_batch_reference_program(&ctx->batch, &prog->base);
211      if (ctx->curr_program)
212         ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
213      ctx->curr_program = prog;
214      ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
215      ctx->gfx_dirty = false;
216   } else if (ctx->dirty_shader_stages & bits) {
217      /* remove old hash */
218      ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
219      zink_update_gfx_program(ctx, ctx->curr_program);
220      /* apply new hash */
221      ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash;
222   }
223   ctx->dirty_shader_stages &= ~bits;
224}
225
226ALWAYS_INLINE static void
227update_drawid(struct zink_context *ctx, unsigned draw_id)
228{
229   VKCTX(CmdPushConstants)(ctx->batch.state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
230                      offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned),
231                      &draw_id);
232}
233
234ALWAYS_INLINE static void
235draw_indexed_need_index_buffer_unref(struct zink_context *ctx,
236             const struct pipe_draw_info *dinfo,
237             const struct pipe_draw_start_count_bias *draws,
238             unsigned num_draws,
239             unsigned draw_id,
240             bool needs_drawid)
241{
242   VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
243   if (dinfo->increment_draw_id && needs_drawid) {
244      for (unsigned i = 0; i < num_draws; i++) {
245         update_drawid(ctx, draw_id);
246         VKCTX(CmdDrawIndexed)(cmdbuf,
247            draws[i].count, dinfo->instance_count,
248            0, draws[i].index_bias, dinfo->start_instance);
249         draw_id++;
250      }
251   } else {
252      if (needs_drawid)
253         update_drawid(ctx, draw_id);
254      for (unsigned i = 0; i < num_draws; i++)
255         VKCTX(CmdDrawIndexed)(cmdbuf,
256            draws[i].count, dinfo->instance_count,
257            0, draws[i].index_bias, dinfo->start_instance);
258
259   }
260}
261
262template <zink_multidraw HAS_MULTIDRAW>
263ALWAYS_INLINE static void
264draw_indexed(struct zink_context *ctx,
265             const struct pipe_draw_info *dinfo,
266             const struct pipe_draw_start_count_bias *draws,
267             unsigned num_draws,
268             unsigned draw_id,
269             bool needs_drawid)
270{
271   VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
272   if (dinfo->increment_draw_id && needs_drawid) {
273      for (unsigned i = 0; i < num_draws; i++) {
274         update_drawid(ctx, draw_id);
275         VKCTX(CmdDrawIndexed)(cmdbuf,
276            draws[i].count, dinfo->instance_count,
277            draws[i].start, draws[i].index_bias, dinfo->start_instance);
278         draw_id++;
279      }
280   } else {
281      if (needs_drawid)
282         update_drawid(ctx, draw_id);
283      if (HAS_MULTIDRAW) {
284         VKCTX(CmdDrawMultiIndexedEXT)(cmdbuf, num_draws, (const VkMultiDrawIndexedInfoEXT*)draws,
285                                       dinfo->instance_count,
286                                       dinfo->start_instance, sizeof(struct pipe_draw_start_count_bias),
287                                       dinfo->index_bias_varies ? NULL : &draws[0].index_bias);
288      } else {
289         for (unsigned i = 0; i < num_draws; i++)
290            VKCTX(CmdDrawIndexed)(cmdbuf,
291               draws[i].count, dinfo->instance_count,
292               draws[i].start, draws[i].index_bias, dinfo->start_instance);
293      }
294   }
295}
296
297template <zink_multidraw HAS_MULTIDRAW>
298ALWAYS_INLINE static void
299draw(struct zink_context *ctx,
300     const struct pipe_draw_info *dinfo,
301     const struct pipe_draw_start_count_bias *draws,
302     unsigned num_draws,
303     unsigned draw_id,
304     bool needs_drawid)
305{
306   VkCommandBuffer cmdbuf = ctx->batch.state->cmdbuf;
307   if (dinfo->increment_draw_id && needs_drawid) {
308      for (unsigned i = 0; i < num_draws; i++) {
309         update_drawid(ctx, draw_id);
310         VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
311         draw_id++;
312      }
313   } else {
314      if (needs_drawid)
315         update_drawid(ctx, draw_id);
316      if (HAS_MULTIDRAW)
317         VKCTX(CmdDrawMultiEXT)(cmdbuf, num_draws, (const VkMultiDrawInfoEXT*)draws,
318                                dinfo->instance_count, dinfo->start_instance,
319                                sizeof(struct pipe_draw_start_count_bias));
320      else {
321         for (unsigned i = 0; i < num_draws; i++)
322            VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance);
323
324      }
325   }
326}
327
328static void
329update_barriers(struct zink_context *ctx, bool is_compute,
330                struct pipe_resource *index, struct pipe_resource *indirect, struct pipe_resource *indirect_draw_count)
331{
332   if (!ctx->need_barriers[is_compute]->entries)
333      return;
334   struct set *need_barriers = ctx->need_barriers[is_compute];
335   ctx->barrier_set_idx[is_compute] = !ctx->barrier_set_idx[is_compute];
336   ctx->need_barriers[is_compute] = &ctx->update_barriers[is_compute][ctx->barrier_set_idx[is_compute]];
337   set_foreach(need_barriers, he) {
338      struct zink_resource *res = (struct zink_resource *)he->key;
339      if (res->bind_count[is_compute]) {
340         VkPipelineStageFlagBits pipeline = is_compute ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : res->gfx_barrier;
341         if (res->base.b.target == PIPE_BUFFER)
342            zink_resource_buffer_barrier(ctx, res, res->barrier_access[is_compute], pipeline);
343         else {
344            VkImageLayout layout = zink_descriptor_util_image_layout_eval(ctx, res, is_compute);
345            if (layout != res->layout)
346               zink_resource_image_barrier(ctx, res, layout, res->barrier_access[is_compute], pipeline);
347         }
348         if (zink_resource_access_is_write(res->barrier_access[is_compute]))
349            res->obj->unordered_read = res->obj->unordered_write = false;
350         else
351            res->obj->unordered_read = false;
352         /* always barrier on draw if this resource has either multiple image write binds or
353          * image write binds and image read binds
354          */
355         if (res->write_bind_count[is_compute] && res->bind_count[is_compute] > 1)
356            _mesa_set_add_pre_hashed(ctx->need_barriers[is_compute], he->hash, res);
357      }
358      _mesa_set_remove(need_barriers, he);
359      if (!need_barriers->entries)
360         break;
361   }
362}
363
364template <bool BATCH_CHANGED>
365static bool
366update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum pipe_prim_type mode)
367{
368   VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline;
369   update_gfx_program(ctx);
370   VkPipeline pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
371   bool pipeline_changed = prev_pipeline != pipeline;
372   if (BATCH_CHANGED || pipeline_changed)
373      VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
374   return pipeline_changed;
375}
376
377static bool
378hack_conditional_render(struct pipe_context *pctx,
379                        const struct pipe_draw_info *dinfo,
380                        unsigned drawid_offset,
381                        const struct pipe_draw_indirect_info *dindirect,
382                        const struct pipe_draw_start_count_bias *draws,
383                        unsigned num_draws)
384{
385   struct zink_context *ctx = zink_context(pctx);
386   struct zink_batch_state *bs = ctx->batch.state;
387   static bool warned;
388   if (!warned) {
389      fprintf(stderr, "ZINK: warning, this is cpu-based conditional rendering, say bye-bye to fps\n");
390      warned = true;
391   }
392   if (!zink_check_conditional_render(ctx))
393      return false;
394   if (bs != ctx->batch.state) {
395      bool prev = ctx->render_condition_active;
396      ctx->render_condition_active = false;
397      zink_select_draw_vbo(ctx);
398      pctx->draw_vbo(pctx, dinfo, drawid_offset, dindirect, draws, num_draws);
399      ctx->render_condition_active = prev;
400      return false;
401   }
402   return true;
403}
404
405template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED, bool DRAW_STATE>
406void
407zink_draw(struct pipe_context *pctx,
408          const struct pipe_draw_info *dinfo,
409          unsigned drawid_offset,
410          const struct pipe_draw_indirect_info *dindirect,
411          const struct pipe_draw_start_count_bias *draws,
412          unsigned num_draws,
413          struct pipe_vertex_state *vstate,
414          uint32_t partial_velem_mask)
415{
416   if (!dindirect && (!draws[0].count || !dinfo->instance_count))
417      return;
418
419   struct zink_context *ctx = zink_context(pctx);
420   struct zink_screen *screen = zink_screen(pctx->screen);
421   struct zink_rasterizer_state *rast_state = ctx->rast_state;
422   struct zink_depth_stencil_alpha_state *dsa_state = ctx->dsa_state;
423   struct zink_batch *batch = &ctx->batch;
424   struct zink_so_target *so_target =
425      dindirect && dindirect->count_from_stream_output ?
426         zink_so_target(dindirect->count_from_stream_output) : NULL;
427   VkBuffer counter_buffers[PIPE_MAX_SO_OUTPUTS];
428   VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_OUTPUTS];
429   bool need_index_buffer_unref = false;
430   bool mode_changed = ctx->gfx_pipeline_state.gfx_prim_mode != dinfo->mode;
431   bool reads_drawid = ctx->shader_reads_drawid;
432   bool reads_basevertex = ctx->shader_reads_basevertex;
433   unsigned work_count = ctx->batch.work_count;
434   enum pipe_prim_type mode = (enum pipe_prim_type)dinfo->mode;
435
436   if (unlikely(!screen->info.have_EXT_conditional_rendering)) {
437      if (!hack_conditional_render(pctx, dinfo, drawid_offset, dindirect, draws, num_draws))
438         return;
439   }
440
441   if (ctx->memory_barrier)
442      zink_flush_memory_barrier(ctx, false);
443
444   if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter)) {
445      ctx->buffer_rebind_counter = screen->buffer_rebind_counter;
446      zink_rebind_all_buffers(ctx);
447   }
448
449   if (unlikely(ctx->image_rebind_counter < screen->image_rebind_counter)) {
450      ctx->image_rebind_counter = screen->image_rebind_counter;
451      zink_rebind_all_images(ctx);
452   }
453
454   unsigned index_offset = 0;
455   unsigned index_size = dinfo->index_size;
456   struct pipe_resource *index_buffer = NULL;
457   if (index_size > 0) {
458      if (dinfo->has_user_indices) {
459         if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer, &index_offset, 4)) {
460            debug_printf("util_upload_index_buffer() failed\n");
461            return;
462         }
463         /* this will have extra refs from tc */
464         if (screen->threaded)
465            zink_batch_reference_resource_move(batch, zink_resource(index_buffer));
466         else
467            zink_batch_reference_resource(batch, zink_resource(index_buffer));
468      } else {
469         index_buffer = dinfo->index.resource;
470         zink_batch_reference_resource_rw(batch, zink_resource(index_buffer), false);
471      }
472      assert(index_size <= 4 && index_size != 3);
473      assert(index_size != 1 || screen->info.have_EXT_index_type_uint8);
474   }
475
476   bool have_streamout = !!ctx->num_so_targets;
477   if (have_streamout) {
478      zink_emit_xfb_counter_barrier(ctx);
479      if (ctx->dirty_so_targets) {
480         /* have to loop here and below because barriers must be emitted out of renderpass,
481          * but xfb buffers can't be bound before the renderpass is active to avoid
482          * breaking from recursion
483          */
484         for (unsigned i = 0; i < ctx->num_so_targets; i++) {
485            struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i];
486            if (t) {
487               struct zink_resource *res = zink_resource(t->base.buffer);
488               zink_resource_buffer_barrier(ctx, res,
489                                            VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT);
490               res->obj->unordered_read = res->obj->unordered_write = false;
491            }
492         }
493      }
494   }
495
496   barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer);
497   /* this may re-emit draw buffer barriers, but such synchronization is harmless */
498   update_barriers(ctx, false, index_buffer, dindirect ? dindirect->buffer : NULL, dindirect ? dindirect->indirect_draw_count : NULL);
499
500   /* ensure synchronization between doing streamout with counter buffer
501    * and using counter buffer for indirect draw
502    */
503   if (so_target && so_target->counter_buffer_valid) {
504      struct zink_resource *res = zink_resource(so_target->counter_buffer);
505      zink_resource_buffer_barrier(ctx, res,
506                                   VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT,
507                                   VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
508      res->obj->unordered_read = false;
509   }
510
511   zink_query_update_gs_states(ctx, dinfo->was_line_loop);
512
513   if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
514      zink_batch_no_rp(ctx);
515      VkMemoryBarrier mb;
516      mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
517      mb.pNext = NULL;
518      mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
519      mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
520      VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
521                                VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
522                                VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
523                                0, 1, &mb, 0, NULL, 0, NULL);
524   }
525
526   zink_batch_rp(ctx);
527   /* check dead swapchain */
528   if (unlikely(!ctx->batch.in_rp))
529      return;
530
531   if (BATCH_CHANGED)
532      zink_update_descriptor_refs(ctx, false);
533
534   /* these must be after renderpass start to avoid issues with recursion */
535   bool drawid_broken = false;
536   if (reads_drawid && (!dindirect || !dindirect->buffer))
537      drawid_broken = (drawid_offset != 0 ||
538                      (!HAS_MULTIDRAW && num_draws > 1) ||
539                      (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id));
540   if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid)
541      zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken;
542   if (mode_changed) {
543      bool points_changed = false;
544      if (mode == PIPE_PRIM_POINTS) {
545         ctx->gfx_pipeline_state.has_points++;
546         points_changed = true;
547      } else if (ctx->gfx_pipeline_state.gfx_prim_mode == PIPE_PRIM_POINTS) {
548         ctx->gfx_pipeline_state.has_points--;
549         points_changed = true;
550      }
551      if (points_changed && ctx->rast_state->base.point_quad_rasterization)
552         zink_set_fs_point_coord_key(ctx);
553   }
554   ctx->gfx_pipeline_state.gfx_prim_mode = mode;
555
556   if (index_size) {
557      const VkIndexType index_type[3] = {
558         VK_INDEX_TYPE_UINT8_EXT,
559         VK_INDEX_TYPE_UINT16,
560         VK_INDEX_TYPE_UINT32,
561      };
562      struct zink_resource *res = zink_resource(index_buffer);
563      VKCTX(CmdBindIndexBuffer)(batch->state->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]);
564   }
565   if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2) {
566      if (ctx->gfx_pipeline_state.dyn_state2.primitive_restart != dinfo->primitive_restart)
567         ctx->gfx_pipeline_state.dirty = true;
568      ctx->gfx_pipeline_state.dyn_state2.primitive_restart = dinfo->primitive_restart;
569   }
570
571   if (have_streamout && ctx->dirty_so_targets)
572      zink_emit_stream_output_targets(pctx);
573
574   bool pipeline_changed = update_gfx_pipeline<BATCH_CHANGED>(ctx, batch->state, mode);
575
576   if (BATCH_CHANGED || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
577      VkViewport viewports[PIPE_MAX_VIEWPORTS];
578      for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
579         VkViewport viewport = {
580            ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0],
581            ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1],
582            MAX2(ctx->vp_state.viewport_states[i].scale[0] * 2, 1),
583            ctx->vp_state.viewport_states[i].scale[1] * 2,
584            CLAMP(ctx->rast_state->base.clip_halfz ?
585                  ctx->vp_state.viewport_states[i].translate[2] :
586                  ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2],
587                  0, 1),
588            CLAMP(ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2],
589                  0, 1)
590         };
591         if (!ctx->rast_state->base.half_pixel_center) {
592             /* magic constant value from dxvk */
593             float cf = 0.5f - (1.0f / 128.0f);
594             viewport.x += cf;
595             if (viewport.height < 0)
596                viewport.y += cf;
597             else
598                viewport.y -= cf;
599         }
600         viewports[i] = viewport;
601      }
602      if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
603         VKCTX(CmdSetViewportWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, viewports);
604      else
605         VKCTX(CmdSetViewport)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, viewports);
606   }
607   if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) {
608      VkRect2D scissors[PIPE_MAX_VIEWPORTS];
609      if (ctx->rast_state->base.scissor) {
610         for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
611            scissors[i].offset.x = ctx->vp_state.scissor_states[i].minx;
612            scissors[i].offset.y = ctx->vp_state.scissor_states[i].miny;
613            scissors[i].extent.width = ctx->vp_state.scissor_states[i].maxx - ctx->vp_state.scissor_states[i].minx;
614            scissors[i].extent.height = ctx->vp_state.scissor_states[i].maxy - ctx->vp_state.scissor_states[i].miny;
615         }
616      } else {
617         for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) {
618            scissors[i].offset.x = 0;
619            scissors[i].offset.y = 0;
620            scissors[i].extent.width = ctx->fb_state.width;
621            scissors[i].extent.height = ctx->fb_state.height;
622         }
623      }
624      if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE)
625         VKCTX(CmdSetScissorWithCountEXT)(batch->state->cmdbuf, ctx->vp_state.num_viewports, scissors);
626      else
627         VKCTX(CmdSetScissor)(batch->state->cmdbuf, 0, ctx->vp_state.num_viewports, scissors);
628   }
629   ctx->vp_state_changed = false;
630   ctx->scissor_changed = false;
631
632   if (BATCH_CHANGED || ctx->stencil_ref_changed) {
633      VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
634                               ctx->stencil_ref.ref_value[0]);
635      VKCTX(CmdSetStencilReference)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
636                               ctx->stencil_ref.ref_value[1]);
637      ctx->stencil_ref_changed = false;
638   }
639
640   if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) {
641      VKCTX(CmdSetDepthBoundsTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_bounds_test);
642      if (dsa_state->hw_state.depth_bounds_test)
643         VKCTX(CmdSetDepthBounds)(batch->state->cmdbuf,
644                             dsa_state->hw_state.min_depth_bounds,
645                             dsa_state->hw_state.max_depth_bounds);
646      VKCTX(CmdSetDepthTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_test);
647      if (dsa_state->hw_state.depth_test)
648         VKCTX(CmdSetDepthCompareOpEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_compare_op);
649      VKCTX(CmdSetDepthWriteEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.depth_write);
650      VKCTX(CmdSetStencilTestEnableEXT)(batch->state->cmdbuf, dsa_state->hw_state.stencil_test);
651      if (dsa_state->hw_state.stencil_test) {
652         VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT,
653                                       dsa_state->hw_state.stencil_front.failOp,
654                                       dsa_state->hw_state.stencil_front.passOp,
655                                       dsa_state->hw_state.stencil_front.depthFailOp,
656                                       dsa_state->hw_state.stencil_front.compareOp);
657         VKCTX(CmdSetStencilOpEXT)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT,
658                                       dsa_state->hw_state.stencil_back.failOp,
659                                       dsa_state->hw_state.stencil_back.passOp,
660                                       dsa_state->hw_state.stencil_back.depthFailOp,
661                                       dsa_state->hw_state.stencil_back.compareOp);
662         if (dsa_state->base.stencil[1].enabled) {
663            VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.writeMask);
664            VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.writeMask);
665            VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.compareMask);
666            VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.compareMask);
667         } else {
668            VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
669            VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
670         }
671      } else {
672         VKCTX(CmdSetStencilWriteMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask);
673         VKCTX(CmdSetStencilCompareMask)(batch->state->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask);
674      }
675   }
676   ctx->dsa_state_changed = false;
677
678   bool rast_state_changed = ctx->rast_state_changed;
679   if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || rast_state_changed)) {
680      VKCTX(CmdSetFrontFaceEXT)(batch->state->cmdbuf, (VkFrontFace)ctx->gfx_pipeline_state.dyn_state1.front_face);
681      VKCTX(CmdSetCullModeEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.cull_mode);
682   }
683   if ((BATCH_CHANGED || rast_state_changed) &&
684       screen->info.have_EXT_line_rasterization && rast_state->base.line_stipple_enable)
685      VKCTX(CmdSetLineStippleEXT)(batch->state->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern);
686
687   if (BATCH_CHANGED || ctx->rast_state_changed) {
688      enum pipe_prim_type reduced_prim = ctx->last_vertex_stage->reduced_prim;
689      if (reduced_prim == PIPE_PRIM_MAX)
690         reduced_prim = u_reduced_prim(mode);
691
692      bool depth_bias = false;
693      switch (reduced_prim) {
694      case PIPE_PRIM_POINTS:
695         depth_bias = rast_state->offset_point;
696         break;
697
698      case PIPE_PRIM_LINES:
699         depth_bias = rast_state->offset_line;
700         break;
701
702      case PIPE_PRIM_TRIANGLES:
703         depth_bias = rast_state->offset_tri;
704         break;
705
706      default:
707         unreachable("unexpected reduced prim");
708      }
709
710      VKCTX(CmdSetLineWidth)(batch->state->cmdbuf, rast_state->line_width);
711      if (depth_bias) {
712         if (rast_state->base.offset_units_unscaled) {
713            VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units * ctx->depth_bias_scale_factor, rast_state->offset_clamp, rast_state->offset_scale);
714         } else {
715            VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale);
716         }
717      } else {
718         VKCTX(CmdSetDepthBias)(batch->state->cmdbuf, 0.0f, 0.0f, 0.0f);
719      }
720   }
721   ctx->rast_state_changed = false;
722
723   if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) {
724      if (ctx->sample_locations_changed) {
725         VkSampleLocationsInfoEXT loc;
726         zink_init_vk_sample_locations(ctx, &loc);
727         VKCTX(CmdSetSampleLocationsEXT)(batch->state->cmdbuf, &loc);
728      }
729      ctx->sample_locations_changed = false;
730   }
731
732   if ((BATCH_CHANGED || ctx->blend_state_changed) &&
733       ctx->gfx_pipeline_state.blend_state->need_blend_constants) {
734      VKCTX(CmdSetBlendConstants)(batch->state->cmdbuf, ctx->blend_constants);
735   }
736   ctx->blend_state_changed = false;
737
738   if (DRAW_STATE)
739      zink_bind_vertex_state(batch, ctx, vstate, partial_velem_mask);
740   else if (BATCH_CHANGED || ctx->vertex_buffers_dirty) {
741      if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || ctx->gfx_pipeline_state.uses_dynamic_stride)
742         zink_bind_vertex_buffers<DYNAMIC_STATE>(batch, ctx);
743      else
744         zink_bind_vertex_buffers<ZINK_NO_DYNAMIC_STATE>(batch, ctx);
745   }
746
747   if (BATCH_CHANGED) {
748      ctx->pipeline_changed[0] = false;
749      zink_select_draw_vbo(ctx);
750   }
751
752   if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || mode_changed)) {
753      VKCTX(CmdSetPrimitiveTopologyEXT)(batch->state->cmdbuf, zink_primitive_topology(mode));
754   }
755
756   if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) {
757      VKCTX(CmdSetPrimitiveRestartEnableEXT)(batch->state->cmdbuf, dinfo->primitive_restart);
758      ctx->primitive_restart = dinfo->primitive_restart;
759   }
760
761   if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->rasterizer_discard_changed)) {
762      VKCTX(CmdSetRasterizerDiscardEnableEXT)(batch->state->cmdbuf, ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard);
763      ctx->rasterizer_discard_changed = false;
764   }
765
766   if (zink_program_has_descriptors(&ctx->curr_program->base))
767      screen->descriptors_update(ctx, false);
768
769   if (ctx->di.any_bindless_dirty &&
770       /* some apps (d3dretrace) call MakeTextureHandleResidentARB randomly */
771       zink_program_has_descriptors(&ctx->curr_program->base) &&
772       ctx->curr_program->base.dd->bindless)
773      zink_descriptors_update_bindless(ctx);
774
775   if (reads_basevertex) {
776      unsigned draw_mode_is_indexed = index_size > 0;
777      VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_VERTEX_BIT,
778                         offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned),
779                         &draw_mode_is_indexed);
780   }
781   if (ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL] && ctx->curr_program->shaders[PIPE_SHADER_TESS_CTRL]->is_generated) {
782      VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
783                         offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6,
784                         &ctx->tess_levels[0]);
785   }
786
787   if (have_streamout) {
788      for (unsigned i = 0; i < ctx->num_so_targets; i++) {
789         struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
790         counter_buffers[i] = VK_NULL_HANDLE;
791         if (t) {
792            struct zink_resource *res = zink_resource(t->counter_buffer);
793            t->stride = ctx->last_vertex_stage->sinfo.so_info.stride[i] * sizeof(uint32_t);
794            zink_batch_reference_resource_rw(batch, res, true);
795            res->obj->unordered_read = res->obj->unordered_write = false;
796            if (t->counter_buffer_valid) {
797               counter_buffers[i] = res->obj->buffer;
798               counter_buffer_offsets[i] = t->counter_buffer_offset;
799            }
800         }
801      }
802      VKCTX(CmdBeginTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
803   }
804
805   bool needs_drawid = reads_drawid && zink_get_last_vertex_key(ctx)->push_drawid;
806   work_count += num_draws;
807   if (index_size > 0) {
808      if (dindirect && dindirect->buffer) {
809         assert(num_draws == 1);
810         if (needs_drawid)
811            update_drawid(ctx, drawid_offset);
812         struct zink_resource *indirect = zink_resource(dindirect->buffer);
813         zink_batch_reference_resource_rw(batch, indirect, false);
814         if (dindirect->indirect_draw_count) {
815             struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
816             zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
817             VKCTX(CmdDrawIndexedIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
818                                                indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
819                                                dindirect->draw_count, dindirect->stride);
820         } else
821            VKCTX(CmdDrawIndexedIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
822      } else {
823         if (need_index_buffer_unref)
824            draw_indexed_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
825         else
826            draw_indexed<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
827      }
828   } else {
829      if (so_target && screen->info.tf_props.transformFeedbackDraw) {
830         /* GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_api attempts a bogus xfb
831          * draw using a streamout target that has no data
832          * to avoid hanging the gpu, reject any such draws
833          */
834         if (so_target->counter_buffer_valid) {
835            if (needs_drawid)
836               update_drawid(ctx, drawid_offset);
837            zink_batch_reference_resource_rw(batch, zink_resource(so_target->base.buffer), false);
838            zink_batch_reference_resource_rw(batch, zink_resource(so_target->counter_buffer), true);
839            VKCTX(CmdDrawIndirectByteCountEXT)(batch->state->cmdbuf, dinfo->instance_count, dinfo->start_instance,
840                                          zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0,
841                                          MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride));
842         }
843      } else if (dindirect && dindirect->buffer) {
844         assert(num_draws == 1);
845         if (needs_drawid)
846            update_drawid(ctx, drawid_offset);
847         struct zink_resource *indirect = zink_resource(dindirect->buffer);
848         zink_batch_reference_resource_rw(batch, indirect, false);
849         if (dindirect->indirect_draw_count) {
850             struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count);
851             zink_batch_reference_resource_rw(batch, indirect_draw_count, false);
852             VKCTX(CmdDrawIndirectCount)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset,
853                                           indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset,
854                                           dindirect->draw_count, dindirect->stride);
855         } else
856            VKCTX(CmdDrawIndirect)(batch->state->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride);
857      } else {
858         draw<HAS_MULTIDRAW>(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid);
859      }
860   }
861
862   if (have_streamout) {
863      for (unsigned i = 0; i < ctx->num_so_targets; i++) {
864         struct zink_so_target *t = zink_so_target(ctx->so_targets[i]);
865         if (t) {
866            counter_buffers[i] = zink_resource(t->counter_buffer)->obj->buffer;
867            counter_buffer_offsets[i] = t->counter_buffer_offset;
868            t->counter_buffer_valid = true;
869         }
870      }
871      VKCTX(CmdEndTransformFeedbackEXT)(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
872   }
873   batch->has_work = true;
874   batch->last_was_compute = false;
875   ctx->batch.work_count = work_count;
876   /* flush if there's >100k draws */
877   if (unlikely(work_count >= 30000) || ctx->oom_flush)
878      pctx->flush(pctx, NULL, 0);
879}
880
881template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
882static void
883zink_draw_vbo(struct pipe_context *pctx,
884              const struct pipe_draw_info *info,
885              unsigned drawid_offset,
886              const struct pipe_draw_indirect_info *indirect,
887              const struct pipe_draw_start_count_bias *draws,
888              unsigned num_draws)
889{
890   zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, false>(pctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0);
891}
892
893template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
894static void
895zink_draw_vertex_state(struct pipe_context *pctx,
896                       struct pipe_vertex_state *vstate,
897                       uint32_t partial_velem_mask,
898                       struct pipe_draw_vertex_state_info info,
899                       const struct pipe_draw_start_count_bias *draws,
900                       unsigned num_draws)
901{
902   struct pipe_draw_info dinfo = {};
903
904   dinfo.mode = info.mode;
905   dinfo.index_size = 4;
906   dinfo.instance_count = 1;
907   dinfo.index.resource = vstate->input.indexbuf;
908   struct zink_context *ctx = zink_context(pctx);
909   struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource);
910   zink_resource_buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
911                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT);
912   res->obj->unordered_read = false;
913   struct zink_vertex_elements_hw_state *hw_state = ctx->gfx_pipeline_state.element_state;
914   ctx->gfx_pipeline_state.element_state = &((struct zink_vertex_state*)vstate)->velems.hw_state;
915
916   zink_draw<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED, true>(pctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask);
917   ctx->gfx_pipeline_state.element_state = hw_state;
918
919   if (info.take_vertex_state_ownership)
920      pipe_vertex_state_reference(&vstate, NULL);
921}
922
923template <bool BATCH_CHANGED>
924static void
925zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
926{
927   struct zink_context *ctx = zink_context(pctx);
928   struct zink_screen *screen = zink_screen(pctx->screen);
929   struct zink_batch *batch = &ctx->batch;
930
931   if (ctx->render_condition_active)
932      zink_start_conditional_render(ctx);
933
934   if (info->indirect) {
935      /*
936         VK_ACCESS_INDIRECT_COMMAND_READ_BIT specifies read access to indirect command data read as
937         part of an indirect build, trace, drawing or dispatching command. Such access occurs in the
938         VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT pipeline stage.
939
940         - Chapter 7. Synchronization and Cache Control
941       */
942      check_buffer_barrier(ctx, info->indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT);
943   }
944
945   update_barriers(ctx, true, NULL, info->indirect, NULL);
946   if (ctx->memory_barrier)
947      zink_flush_memory_barrier(ctx, true);
948
949   if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) {
950      zink_batch_no_rp(ctx);
951      VkMemoryBarrier mb;
952      mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
953      mb.pNext = NULL;
954      mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
955      mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
956      VKSCR(CmdPipelineBarrier)(ctx->batch.state->cmdbuf,
957                                VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
958                                VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
959                                0, 1, &mb, 0, NULL, 0, NULL);
960   }
961
962   if (zink_program_has_descriptors(&ctx->curr_compute->base))
963      screen->descriptors_update(ctx, true);
964   if (ctx->di.any_bindless_dirty && ctx->curr_compute->base.dd->bindless)
965      zink_descriptors_update_bindless(ctx);
966
967   zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info->block);
968   VkPipeline prev_pipeline = ctx->compute_pipeline_state.pipeline;
969
970   if (BATCH_CHANGED) {
971      zink_update_descriptor_refs(ctx, true);
972      zink_batch_reference_program(&ctx->batch, &ctx->curr_compute->base);
973   }
974   if (ctx->dirty_shader_stages & BITFIELD_BIT(PIPE_SHADER_COMPUTE)) {
975      /* update inlinable constants */
976      zink_update_compute_program(ctx);
977      ctx->dirty_shader_stages &= ~BITFIELD_BIT(PIPE_SHADER_COMPUTE);
978   }
979
980   VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute,
981                                               &ctx->compute_pipeline_state);
982
983   if (prev_pipeline != pipeline || BATCH_CHANGED)
984      VKCTX(CmdBindPipeline)(batch->state->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
985   if (BATCH_CHANGED) {
986      ctx->pipeline_changed[1] = false;
987      zink_select_launch_grid(ctx);
988   }
989
990   if (BITSET_TEST(ctx->compute_stage->nir->info.system_values_read, SYSTEM_VALUE_WORK_DIM))
991      VKCTX(CmdPushConstants)(batch->state->cmdbuf, ctx->curr_compute->base.layout, VK_SHADER_STAGE_COMPUTE_BIT,
992                         offsetof(struct zink_cs_push_constant, work_dim), sizeof(uint32_t),
993                         &info->work_dim);
994
995   batch->work_count++;
996   zink_batch_no_rp(ctx);
997   if (info->indirect) {
998      VKCTX(CmdDispatchIndirect)(batch->state->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset);
999      zink_batch_reference_resource_rw(batch, zink_resource(info->indirect), false);
1000   } else
1001      VKCTX(CmdDispatch)(batch->state->cmdbuf, info->grid[0], info->grid[1], info->grid[2]);
1002   batch->has_work = true;
1003   batch->last_was_compute = true;
1004   /* flush if there's >100k computes */
1005   if (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush)
1006      pctx->flush(pctx, NULL, 0);
1007}
1008
1009template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE, bool BATCH_CHANGED>
1010static void
1011init_batch_changed_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][4][2], pipe_draw_vertex_state_func draw_state_array[2][4][2])
1012{
1013   draw_vbo_array[HAS_MULTIDRAW][DYNAMIC_STATE][BATCH_CHANGED] = zink_draw_vbo<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED>;
1014   draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][BATCH_CHANGED] = zink_draw_vertex_state<HAS_MULTIDRAW, DYNAMIC_STATE, BATCH_CHANGED>;
1015}
1016
1017template <zink_multidraw HAS_MULTIDRAW, zink_dynamic_state DYNAMIC_STATE>
1018static void
1019init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][4][2], pipe_draw_vertex_state_func draw_state_array[2][4][2])
1020{
1021   init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, false>(ctx, draw_vbo_array, draw_state_array);
1022   init_batch_changed_functions<HAS_MULTIDRAW, DYNAMIC_STATE, true>(ctx, draw_vbo_array, draw_state_array);
1023}
1024
1025template <zink_multidraw HAS_MULTIDRAW>
1026static void
1027init_multidraw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][4][2], pipe_draw_vertex_state_func draw_state_array[2][4][2])
1028{
1029   init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_NO_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
1030   init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE>(ctx, draw_vbo_array, draw_state_array);
1031   init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_STATE2>(ctx, draw_vbo_array, draw_state_array);
1032   init_dynamic_state_functions<HAS_MULTIDRAW, ZINK_DYNAMIC_VERTEX_INPUT>(ctx, draw_vbo_array, draw_state_array);
1033}
1034
1035static void
1036init_all_draw_functions(struct zink_context *ctx, pipe_draw_vbo_func draw_vbo_array[2][4][2], pipe_draw_vertex_state_func draw_state_array[2][4][2])
1037{
1038   init_multidraw_functions<ZINK_NO_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
1039   init_multidraw_functions<ZINK_MULTIDRAW>(ctx, draw_vbo_array, draw_state_array);
1040}
1041
1042template <bool BATCH_CHANGED>
1043static void
1044init_grid_batch_changed_functions(struct zink_context *ctx)
1045{
1046   ctx->launch_grid[BATCH_CHANGED] = zink_launch_grid<BATCH_CHANGED>;
1047}
1048
1049static void
1050init_all_grid_functions(struct zink_context *ctx)
1051{
1052   init_grid_batch_changed_functions<false>(ctx);
1053   init_grid_batch_changed_functions<true>(ctx);
1054}
1055
1056static void
1057zink_invalid_draw_vbo(struct pipe_context *pipe,
1058                      const struct pipe_draw_info *dinfo,
1059                      unsigned drawid_offset,
1060                      const struct pipe_draw_indirect_info *dindirect,
1061                      const struct pipe_draw_start_count_bias *draws,
1062                      unsigned num_draws)
1063{
1064   unreachable("vertex shader not bound");
1065}
1066
1067static void
1068zink_invalid_draw_vertex_state(struct pipe_context *pipe,
1069                               struct pipe_vertex_state *vstate,
1070                               uint32_t partial_velem_mask,
1071                               struct pipe_draw_vertex_state_info info,
1072                               const struct pipe_draw_start_count_bias *draws,
1073                               unsigned num_draws)
1074{
1075   unreachable("vertex shader not bound");
1076}
1077
1078static void
1079zink_invalid_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
1080{
1081   unreachable("compute shader not bound");
1082}
1083
1084template <unsigned STAGE_MASK>
1085static uint32_t
1086hash_gfx_program(const void *key)
1087{
1088   const struct zink_shader **shaders = (const struct zink_shader**)key;
1089   uint32_t base_hash = shaders[PIPE_SHADER_VERTEX]->hash ^ shaders[PIPE_SHADER_FRAGMENT]->hash;
1090   if (STAGE_MASK == 0) //VS+FS
1091      return base_hash;
1092   if (STAGE_MASK == 1) //VS+GS+FS
1093      return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash;
1094   /*VS+TCS+FS isn't a thing */
1095   /*VS+TCS+GS+FS isn't a thing */
1096   if (STAGE_MASK == 4) //VS+TES+FS
1097      return base_hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1098   if (STAGE_MASK == 5) //VS+TES+GS+FS
1099      return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1100   if (STAGE_MASK == 6) //VS+TCS+TES+FS
1101      return base_hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1102
1103   /* all stages */
1104   return base_hash ^ shaders[PIPE_SHADER_GEOMETRY]->hash ^ shaders[PIPE_SHADER_TESS_CTRL]->hash ^ shaders[PIPE_SHADER_TESS_EVAL]->hash;
1105}
1106
1107template <unsigned STAGE_MASK>
1108static bool
1109equals_gfx_program(const void *a, const void *b)
1110{
1111   const void **sa = (const void**)a;
1112   const void **sb = (const void**)b;
1113   if (STAGE_MASK == 0) //VS+FS
1114      return !memcmp(a, b, sizeof(void*) * 2);
1115   if (STAGE_MASK == 1) //VS+GS+FS
1116      return !memcmp(a, b, sizeof(void*) * 3);
1117   /*VS+TCS+FS isn't a thing */
1118   /*VS+TCS+GS+FS isn't a thing */
1119   if (STAGE_MASK == 4) //VS+TES+FS
1120      return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 2);
1121   if (STAGE_MASK == 5) //VS+TES+GS+FS
1122      return sa[PIPE_SHADER_TESS_EVAL] == sb[PIPE_SHADER_TESS_EVAL] && !memcmp(a, b, sizeof(void*) * 3);
1123   if (STAGE_MASK == 6) //VS+TCS+TES+FS
1124      return !memcmp(&sa[PIPE_SHADER_TESS_CTRL], &sb[PIPE_SHADER_TESS_CTRL], sizeof(void*) * 2) &&
1125             !memcmp(a, b, sizeof(void*) * 2);
1126
1127   /* all stages */
1128   return !memcmp(a, b, sizeof(void*) * ZINK_SHADER_COUNT);
1129}
1130
1131extern "C"
1132void
1133zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen)
1134{
1135   pipe_draw_vbo_func draw_vbo_array[2][4] //multidraw, zink_dynamic_state
1136                                    [2];   //batch changed
1137   pipe_draw_vertex_state_func draw_state_array[2][4] //multidraw, zink_dynamic_state
1138                                               [2];   //batch changed
1139   zink_dynamic_state dynamic;
1140   if (screen->info.have_EXT_extended_dynamic_state) {
1141      if (screen->info.have_EXT_extended_dynamic_state2) {
1142         if (screen->info.have_EXT_vertex_input_dynamic_state)
1143            dynamic = ZINK_DYNAMIC_VERTEX_INPUT;
1144         else
1145            dynamic = ZINK_DYNAMIC_STATE2;
1146      } else {
1147         dynamic = ZINK_DYNAMIC_STATE;
1148      }
1149   } else {
1150      dynamic = ZINK_NO_DYNAMIC_STATE;
1151   }
1152   init_all_draw_functions(ctx, draw_vbo_array, draw_state_array);
1153   memcpy(ctx->draw_vbo, &draw_vbo_array[screen->info.have_EXT_multi_draw]
1154                                        [dynamic],
1155                                        sizeof(ctx->draw_vbo));
1156   memcpy(ctx->draw_state, &draw_state_array[screen->info.have_EXT_multi_draw]
1157                                          [dynamic],
1158                                          sizeof(ctx->draw_state));
1159
1160   /* Bind a fake draw_vbo, so that draw_vbo isn't NULL, which would skip
1161    * initialization of callbacks in upper layers (such as u_threaded_context).
1162    */
1163   ctx->base.draw_vbo = zink_invalid_draw_vbo;
1164   ctx->base.draw_vertex_state = zink_invalid_draw_vertex_state;
1165
1166   _mesa_hash_table_init(&ctx->program_cache[0], ctx, hash_gfx_program<0>, equals_gfx_program<0>);
1167   _mesa_hash_table_init(&ctx->program_cache[1], ctx, hash_gfx_program<1>, equals_gfx_program<1>);
1168   _mesa_hash_table_init(&ctx->program_cache[2], ctx, hash_gfx_program<2>, equals_gfx_program<2>);
1169   _mesa_hash_table_init(&ctx->program_cache[3], ctx, hash_gfx_program<3>, equals_gfx_program<3>);
1170   _mesa_hash_table_init(&ctx->program_cache[4], ctx, hash_gfx_program<4>, equals_gfx_program<4>);
1171   _mesa_hash_table_init(&ctx->program_cache[5], ctx, hash_gfx_program<5>, equals_gfx_program<5>);
1172   _mesa_hash_table_init(&ctx->program_cache[6], ctx, hash_gfx_program<6>, equals_gfx_program<6>);
1173   _mesa_hash_table_init(&ctx->program_cache[7], ctx, hash_gfx_program<7>, equals_gfx_program<7>);
1174}
1175
1176void
1177zink_init_grid_functions(struct zink_context *ctx)
1178{
1179   init_all_grid_functions(ctx);
1180   /* Bind a fake launch_grid, so that draw_vbo isn't NULL, which would skip
1181    * initialization of callbacks in upper layers (such as u_threaded_context).
1182    */
1183   ctx->base.launch_grid = zink_invalid_launch_grid;
1184}
1185