1/*
2 * Copyright © 2021 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "v3dv_private.h"
25#include "broadcom/common/v3d_macros.h"
26#include "broadcom/cle/v3dx_pack.h"
27#include "broadcom/compiler/v3d_compiler.h"
28
29static uint8_t
30blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
31{
32   switch (factor) {
33   case VK_BLEND_FACTOR_ZERO:
34   case VK_BLEND_FACTOR_ONE:
35   case VK_BLEND_FACTOR_SRC_COLOR:
36   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
37   case VK_BLEND_FACTOR_DST_COLOR:
38   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
39   case VK_BLEND_FACTOR_SRC_ALPHA:
40   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
41   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
42      return factor;
43   case VK_BLEND_FACTOR_CONSTANT_COLOR:
44   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
45   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
46   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
47      *needs_constants = true;
48      return factor;
49   case VK_BLEND_FACTOR_DST_ALPHA:
50      return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
51                             V3D_BLEND_FACTOR_DST_ALPHA;
52   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
53      return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
54                             V3D_BLEND_FACTOR_INV_DST_ALPHA;
55   case VK_BLEND_FACTOR_SRC1_COLOR:
56   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
57   case VK_BLEND_FACTOR_SRC1_ALPHA:
58   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
59      assert(!"Invalid blend factor: dual source blending not supported.");
60   default:
61      assert(!"Unknown blend factor.");
62   }
63
64   /* Should be handled by the switch, added to avoid a "end of non-void
65    * function" error
66    */
67   unreachable("Unknown blend factor.");
68}
69
70static void
71pack_blend(struct v3dv_pipeline *pipeline,
72           const VkPipelineColorBlendStateCreateInfo *cb_info)
73{
74   /* By default, we are not enabling blending and all color channel writes are
75    * enabled. Color write enables are independent of whether blending is
76    * enabled or not.
77    *
78    * Vulkan specifies color write masks so that bits set correspond to
79    * enabled channels. Our hardware does it the other way around.
80    */
81   pipeline->blend.enables = 0;
82   pipeline->blend.color_write_masks = 0; /* All channels enabled */
83
84   if (!cb_info)
85      return;
86
87   assert(pipeline->subpass);
88   if (pipeline->subpass->color_count == 0)
89      return;
90
91   assert(pipeline->subpass->color_count == cb_info->attachmentCount);
92
93   pipeline->blend.needs_color_constants = false;
94   uint32_t color_write_masks = 0;
95   for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
96      const VkPipelineColorBlendAttachmentState *b_state =
97         &cb_info->pAttachments[i];
98
99      uint32_t attachment_idx =
100         pipeline->subpass->color_attachments[i].attachment;
101      if (attachment_idx == VK_ATTACHMENT_UNUSED)
102         continue;
103
104      color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
105
106      if (!b_state->blendEnable)
107         continue;
108
109      VkAttachmentDescription2 *desc =
110         &pipeline->pass->attachments[attachment_idx].desc;
111      const struct v3dv_format *format = v3dX(get_format)(desc->format);
112      bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
113
114      uint8_t rt_mask = 1 << i;
115      pipeline->blend.enables |= rt_mask;
116
117      v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
118         config.render_target_mask = rt_mask;
119
120         config.color_blend_mode = b_state->colorBlendOp;
121         config.color_blend_dst_factor =
122            blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
123                         &pipeline->blend.needs_color_constants);
124         config.color_blend_src_factor =
125            blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
126                         &pipeline->blend.needs_color_constants);
127
128         config.alpha_blend_mode = b_state->alphaBlendOp;
129         config.alpha_blend_dst_factor =
130            blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
131                         &pipeline->blend.needs_color_constants);
132         config.alpha_blend_src_factor =
133            blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
134                         &pipeline->blend.needs_color_constants);
135      }
136   }
137
138   pipeline->blend.color_write_masks = color_write_masks;
139}
140
141/* This requires that pack_blend() had been called before so we can set
142 * the overall blend enable bit in the CFG_BITS packet.
143 */
144static void
145pack_cfg_bits(struct v3dv_pipeline *pipeline,
146              const VkPipelineDepthStencilStateCreateInfo *ds_info,
147              const VkPipelineRasterizationStateCreateInfo *rs_info,
148              const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
149              const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
150              const VkPipelineMultisampleStateCreateInfo *ms_info)
151{
152   assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
153
154   pipeline->msaa =
155      ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
156
157   v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
158      config.enable_forward_facing_primitive =
159         rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
160
161      config.enable_reverse_facing_primitive =
162         rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
163
164      /* Seems like the hardware is backwards regarding this setting... */
165      config.clockwise_primitives =
166         rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
167
168      config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false;
169
170      /* This is required to pass line rasterization tests in CTS while
171       * exposing, at least, a minimum of 4-bits of subpixel precision
172       * (the minimum requirement).
173       */
174      if (ls_info &&
175          ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
176         config.line_rasterization = V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
177      else
178         config.line_rasterization = V3D_LINE_RASTERIZATION_PERP_END_CAPS;
179
180      if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
181         config.direct3d_wireframe_triangles_mode = true;
182         config.direct3d_point_fill_mode =
183            rs_info->polygonMode == VK_POLYGON_MODE_POINT;
184      }
185
186      /* diamond-exit rasterization does not suport oversample */
187      config.rasterizer_oversample_mode =
188         (config.line_rasterization == V3D_LINE_RASTERIZATION_PERP_END_CAPS &&
189          pipeline->msaa) ? 1 : 0;
190
191      /* From the Vulkan spec:
192       *
193       *   "Provoking Vertex:
194       *
195       *       The vertex in a primitive from which flat shaded attribute
196       *       values are taken. This is generally the “first” vertex in the
197       *       primitive, and depends on the primitive topology."
198       *
199       * First vertex is the Direct3D style for provoking vertex. OpenGL uses
200       * the last vertex by default.
201       */
202      if (pv_info) {
203         config.direct3d_provoking_vertex =
204            pv_info->provokingVertexMode ==
205               VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
206      } else {
207         config.direct3d_provoking_vertex = true;
208      }
209
210      config.blend_enable = pipeline->blend.enables != 0;
211
212      /* Disable depth/stencil if we don't have a D/S attachment */
213      bool has_ds_attachment =
214         pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
215
216      if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
217         config.z_updates_enable = ds_info->depthWriteEnable;
218         config.depth_test_function = ds_info->depthCompareOp;
219      } else {
220         config.depth_test_function = VK_COMPARE_OP_ALWAYS;
221      }
222
223      /* EZ state will be updated at draw time based on bound pipeline state */
224      config.early_z_updates_enable = false;
225      config.early_z_enable = false;
226
227      config.stencil_enable =
228         ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
229
230      pipeline->z_updates_enable = config.z_updates_enable;
231   };
232}
233
234static uint32_t
235translate_stencil_op(enum pipe_stencil_op op)
236{
237   switch (op) {
238   case VK_STENCIL_OP_KEEP:
239      return V3D_STENCIL_OP_KEEP;
240   case VK_STENCIL_OP_ZERO:
241      return V3D_STENCIL_OP_ZERO;
242   case VK_STENCIL_OP_REPLACE:
243      return V3D_STENCIL_OP_REPLACE;
244   case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
245      return V3D_STENCIL_OP_INCR;
246   case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
247      return V3D_STENCIL_OP_DECR;
248   case VK_STENCIL_OP_INVERT:
249      return V3D_STENCIL_OP_INVERT;
250   case VK_STENCIL_OP_INCREMENT_AND_WRAP:
251      return V3D_STENCIL_OP_INCWRAP;
252   case VK_STENCIL_OP_DECREMENT_AND_WRAP:
253      return V3D_STENCIL_OP_DECWRAP;
254   default:
255      unreachable("bad stencil op");
256   }
257}
258
259static void
260pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
261                        uint8_t *stencil_cfg,
262                        bool is_front,
263                        bool is_back,
264                        const VkStencilOpState *stencil_state)
265{
266   /* From the Vulkan spec:
267    *
268    *   "Reference is an integer reference value that is used in the unsigned
269    *    stencil comparison. The reference value used by stencil comparison
270    *    must be within the range [0,2^s-1] , where s is the number of bits in
271    *    the stencil framebuffer attachment, otherwise the reference value is
272    *    considered undefined."
273    *
274    * In our case, 's' is always 8, so we clamp to that to prevent our packing
275    * functions to assert in debug mode if they see larger values.
276    *
277    * If we have dynamic state we need to make sure we set the corresponding
278    * state bits to 0, since cl_emit_with_prepacked ORs the new value with
279    * the old.
280    */
281   const uint8_t write_mask =
282      pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
283         0 : stencil_state->writeMask & 0xff;
284
285   const uint8_t compare_mask =
286      pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
287         0 : stencil_state->compareMask & 0xff;
288
289   const uint8_t reference =
290      pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
291         0 : stencil_state->reference & 0xff;
292
293   v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
294      config.front_config = is_front;
295      config.back_config = is_back;
296      config.stencil_write_mask = write_mask;
297      config.stencil_test_mask = compare_mask;
298      config.stencil_test_function = stencil_state->compareOp;
299      config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
300      config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
301      config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
302      config.stencil_ref_value = reference;
303   }
304}
305
306static void
307pack_stencil_cfg(struct v3dv_pipeline *pipeline,
308                 const VkPipelineDepthStencilStateCreateInfo *ds_info)
309{
310   assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
311
312   if (!ds_info || !ds_info->stencilTestEnable)
313      return;
314
315   if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
316      return;
317
318   const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
319                                           V3DV_DYNAMIC_STENCIL_WRITE_MASK |
320                                           V3DV_DYNAMIC_STENCIL_REFERENCE;
321
322
323   /* If front != back or we have dynamic stencil state we can't emit a single
324    * packet for both faces.
325    */
326   bool needs_front_and_back = false;
327   if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
328       memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
329      needs_front_and_back = true;
330
331   /* If the front and back configurations are the same we can emit both with
332    * a single packet.
333    */
334   pipeline->emit_stencil_cfg[0] = true;
335   if (!needs_front_and_back) {
336      pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
337                              true, true, &ds_info->front);
338   } else {
339      pipeline->emit_stencil_cfg[1] = true;
340      pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
341                              true, false, &ds_info->front);
342      pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
343                              false, true, &ds_info->back);
344   }
345}
346
347void
348v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
349                          const VkPipelineColorBlendStateCreateInfo *cb_info,
350                          const VkPipelineDepthStencilStateCreateInfo *ds_info,
351                          const VkPipelineRasterizationStateCreateInfo *rs_info,
352                          const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
353                          const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
354                          const VkPipelineMultisampleStateCreateInfo *ms_info)
355{
356   pack_blend(pipeline, cb_info);
357   pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ls_info, ms_info);
358   pack_stencil_cfg(pipeline, ds_info);
359}
360
361static void
362pack_shader_state_record(struct v3dv_pipeline *pipeline)
363{
364   assert(sizeof(pipeline->shader_state_record) ==
365          cl_packet_length(GL_SHADER_STATE_RECORD));
366
367   struct v3d_fs_prog_data *prog_data_fs =
368      pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
369
370   struct v3d_vs_prog_data *prog_data_vs =
371      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
372
373   struct v3d_vs_prog_data *prog_data_vs_bin =
374      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
375
376
377   /* Note: we are not packing addresses, as we need the job (see
378    * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
379    * point as they depend on dynamic info that can be set after create the
380    * pipeline (like viewport), . Would need to be filled later, so we are
381    * doing a partial prepacking.
382    */
383   v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
384      shader.enable_clipping = true;
385
386      if (!pipeline->has_gs) {
387         shader.point_size_in_shaded_vertex_data =
388            pipeline->topology == PIPE_PRIM_POINTS;
389      } else {
390         struct v3d_gs_prog_data *prog_data_gs =
391            pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs;
392         shader.point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz;
393      }
394
395      /* Must be set if the shader modifies Z, discards, or modifies
396       * the sample mask.  For any of these cases, the fragment
397       * shader needs to write the Z value (even just discards).
398       */
399      shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
400
401      /* Set if the EZ test must be disabled (due to shader side
402       * effects and the early_z flag not being present in the
403       * shader).
404       */
405      shader.turn_off_early_z_test = prog_data_fs->disable_ez;
406
407      shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
408         prog_data_fs->uses_center_w;
409
410      /* The description for gl_SampleID states that if a fragment shader reads
411       * it, then we should automatically activate per-sample shading. However,
412       * the Vulkan spec also states that if a framebuffer has no attachments:
413       *
414       *    "The subpass continues to use the width, height, and layers of the
415       *     framebuffer to define the dimensions of the rendering area, and the
416       *     rasterizationSamples from each pipeline’s
417       *     VkPipelineMultisampleStateCreateInfo to define the number of
418       *     samples used in rasterization multisample rasterization."
419       *
420       * So in this scenario, if the pipeline doesn't enable multiple samples
421       * but the fragment shader accesses gl_SampleID we would be requested
422       * to do per-sample shading in single sample rasterization mode, which
423       * is pointless, so just disable it in that case.
424       */
425      shader.enable_sample_rate_shading =
426         pipeline->sample_rate_shading ||
427         (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
428
429      shader.any_shader_reads_hardware_written_primitive_id = false;
430
431      shader.do_scoreboard_wait_on_first_thread_switch =
432         prog_data_fs->lock_scoreboard_on_first_thrsw;
433      shader.disable_implicit_point_line_varyings =
434         !prog_data_fs->uses_implicit_point_line_varyings;
435
436      shader.number_of_varyings_in_fragment_shader =
437         prog_data_fs->num_inputs;
438
439      shader.coordinate_shader_propagate_nans = true;
440      shader.vertex_shader_propagate_nans = true;
441      shader.fragment_shader_propagate_nans = true;
442
443      /* Note: see previous note about adresses */
444      /* shader.coordinate_shader_code_address */
445      /* shader.vertex_shader_code_address */
446      /* shader.fragment_shader_code_address */
447
448      /* FIXME: Use combined input/output size flag in the common case (also
449       * on v3d, see v3dx_draw).
450       */
451      shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
452         prog_data_vs_bin->separate_segments;
453      shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
454         prog_data_vs->separate_segments;
455
456      shader.coordinate_shader_input_vpm_segment_size =
457         prog_data_vs_bin->separate_segments ?
458         prog_data_vs_bin->vpm_input_size : 1;
459      shader.vertex_shader_input_vpm_segment_size =
460         prog_data_vs->separate_segments ?
461         prog_data_vs->vpm_input_size : 1;
462
463      shader.coordinate_shader_output_vpm_segment_size =
464         prog_data_vs_bin->vpm_output_size;
465      shader.vertex_shader_output_vpm_segment_size =
466         prog_data_vs->vpm_output_size;
467
468      /* Note: see previous note about adresses */
469      /* shader.coordinate_shader_uniforms_address */
470      /* shader.vertex_shader_uniforms_address */
471      /* shader.fragment_shader_uniforms_address */
472
473      shader.min_coord_shader_input_segments_required_in_play =
474         pipeline->vpm_cfg_bin.As;
475      shader.min_vertex_shader_input_segments_required_in_play =
476         pipeline->vpm_cfg.As;
477
478      shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
479         pipeline->vpm_cfg_bin.Ve;
480      shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
481         pipeline->vpm_cfg.Ve;
482
483      shader.coordinate_shader_4_way_threadable =
484         prog_data_vs_bin->base.threads == 4;
485      shader.vertex_shader_4_way_threadable =
486         prog_data_vs->base.threads == 4;
487      shader.fragment_shader_4_way_threadable =
488         prog_data_fs->base.threads == 4;
489
490      shader.coordinate_shader_start_in_final_thread_section =
491         prog_data_vs_bin->base.single_seg;
492      shader.vertex_shader_start_in_final_thread_section =
493         prog_data_vs->base.single_seg;
494      shader.fragment_shader_start_in_final_thread_section =
495         prog_data_fs->base.single_seg;
496
497      shader.vertex_id_read_by_coordinate_shader =
498         prog_data_vs_bin->uses_vid;
499      shader.base_instance_id_read_by_coordinate_shader =
500         prog_data_vs_bin->uses_biid;
501      shader.instance_id_read_by_coordinate_shader =
502         prog_data_vs_bin->uses_iid;
503      shader.vertex_id_read_by_vertex_shader =
504         prog_data_vs->uses_vid;
505      shader.base_instance_id_read_by_vertex_shader =
506         prog_data_vs->uses_biid;
507      shader.instance_id_read_by_vertex_shader =
508         prog_data_vs->uses_iid;
509
510      /* Note: see previous note about adresses */
511      /* shader.address_of_default_attribute_values */
512   }
513}
514
515static void
516pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
517{
518   assert(sizeof(pipeline->vcm_cache_size) ==
519          cl_packet_length(VCM_CACHE_SIZE));
520
521   v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
522      vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
523      vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
524   }
525}
526
527/* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
528static uint8_t
529get_attr_type(const struct util_format_description *desc)
530{
531   uint32_t r_size = desc->channel[0].size;
532   uint8_t attr_type = ATTRIBUTE_FLOAT;
533
534   switch (desc->channel[0].type) {
535   case UTIL_FORMAT_TYPE_FLOAT:
536      if (r_size == 32) {
537         attr_type = ATTRIBUTE_FLOAT;
538      } else {
539         assert(r_size == 16);
540         attr_type = ATTRIBUTE_HALF_FLOAT;
541      }
542      break;
543
544   case UTIL_FORMAT_TYPE_SIGNED:
545   case UTIL_FORMAT_TYPE_UNSIGNED:
546      switch (r_size) {
547      case 32:
548         attr_type = ATTRIBUTE_INT;
549         break;
550      case 16:
551         attr_type = ATTRIBUTE_SHORT;
552         break;
553      case 10:
554         attr_type = ATTRIBUTE_INT2_10_10_10;
555         break;
556      case 8:
557         attr_type = ATTRIBUTE_BYTE;
558         break;
559      default:
560         fprintf(stderr,
561                 "format %s unsupported\n",
562                 desc->name);
563         attr_type = ATTRIBUTE_BYTE;
564         abort();
565      }
566      break;
567
568   default:
569      fprintf(stderr,
570              "format %s unsupported\n",
571              desc->name);
572      abort();
573   }
574
575   return attr_type;
576}
577
578static void
579pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
580                                   uint32_t index,
581                                   const VkVertexInputAttributeDescription *vi_desc)
582{
583   const uint32_t packet_length =
584      cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
585
586   const struct util_format_description *desc =
587      vk_format_description(vi_desc->format);
588
589   uint32_t binding = vi_desc->binding;
590
591   v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
592             GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
593
594      /* vec_size == 0 means 4 */
595      attr.vec_size = desc->nr_channels & 3;
596      attr.signed_int_type = (desc->channel[0].type ==
597                              UTIL_FORMAT_TYPE_SIGNED);
598      attr.normalized_int_type = desc->channel[0].normalized;
599      attr.read_as_int_uint = desc->channel[0].pure_integer;
600
601      attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
602                                   0xffff);
603      attr.stride = pipeline->vb[binding].stride;
604      attr.type = get_attr_type(desc);
605   }
606}
607
608void
609v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
610                                  const VkPipelineVertexInputStateCreateInfo *vi_info,
611                                  const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info)
612{
613   pack_shader_state_record(pipeline);
614   pack_vcm_cache_size(pipeline);
615
616   pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
617   for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
618      const VkVertexInputBindingDescription *desc =
619         &vi_info->pVertexBindingDescriptions[i];
620
621      pipeline->vb[desc->binding].stride = desc->stride;
622      pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
623   }
624
625   if (vd_info) {
626      for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) {
627         const VkVertexInputBindingDivisorDescriptionEXT *desc =
628            &vd_info->pVertexBindingDivisors[i];
629
630         pipeline->vb[desc->binding].instance_divisor = desc->divisor;
631      }
632   }
633
634   pipeline->va_count = 0;
635   struct v3d_vs_prog_data *prog_data_vs =
636      pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
637
638   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
639      const VkVertexInputAttributeDescription *desc =
640         &vi_info->pVertexAttributeDescriptions[i];
641      uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
642
643      /* We use a custom driver_location_map instead of
644       * nir_find_variable_with_location because if we were able to get the
645       * shader variant from the cache, we would not have the nir shader
646       * available.
647       */
648      uint32_t driver_location =
649         prog_data_vs->driver_location_map[location];
650
651      if (driver_location != -1) {
652         assert(driver_location < MAX_VERTEX_ATTRIBS);
653         pipeline->va[driver_location].offset = desc->offset;
654         pipeline->va[driver_location].binding = desc->binding;
655         pipeline->va[driver_location].vk_format = desc->format;
656
657         pack_shader_state_attribute_record(pipeline, driver_location, desc);
658
659         pipeline->va_count++;
660      }
661   }
662}
663