1/*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Based in part on v3d driver which is:
5 *
6 * Copyright © 2014-2017 Broadcom
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28#include "v3dv_private.h"
29
30/* The only version specific structure that we need is
31 * TMU_CONFIG_PARAMETER_1. This didn't seem to change significantly from
32 * previous V3D versions and we don't expect that to change, so for now let's
33 * just hardcode the V3D version here.
34 */
35#define V3D_VERSION 41
36#include "broadcom/common/v3d_macros.h"
37#include "broadcom/cle/v3dx_pack.h"
38
39/* Our Vulkan resource indices represent indices in descriptor maps which
40 * include all shader stages, so we need to size the arrays below
41 * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS.
42 */
43#define MAX_STAGES 3
44
45#define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
46struct texture_bo_list {
47   struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS];
48};
49
50/* This tracks state BOs for both textures and samplers, so we
51 * multiply by 2.
52 */
53#define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
54struct state_bo_list {
55   uint32_t count;
56   struct v3dv_bo *states[MAX_TOTAL_STATES];
57};
58
59#define MAX_TOTAL_UNIFORM_BUFFERS ((MAX_UNIFORM_BUFFERS + \
60                                    MAX_INLINE_UNIFORM_BUFFERS) * MAX_STAGES)
61#define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES)
62struct buffer_bo_list {
63   struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS];
64   struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS];
65};
66
67static bool
68state_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo)
69{
70   for (int i = 0; i < list->count; i++) {
71      if (list->states[i] == bo)
72         return true;
73   }
74   return false;
75}
76
77static void
78push_constants_bo_free(VkDevice _device,
79                       uint64_t bo_ptr,
80                       VkAllocationCallbacks *alloc)
81{
82   V3DV_FROM_HANDLE(v3dv_device, device, _device);
83   v3dv_bo_free(device, (struct v3dv_bo *)(uintptr_t) bo_ptr);
84}
85
86/*
87 * This method checks if the ubo used for push constants is needed to be
88 * updated or not.
89 *
90 * push contants ubo is only used for push constants accessed by a non-const
91 * index.
92 */
93static void
94check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer,
95                         struct v3dv_pipeline *pipeline)
96{
97   if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO) ||
98       pipeline->layout->push_constant_size == 0)
99      return;
100
101   if (cmd_buffer->push_constants_resource.bo == NULL) {
102      cmd_buffer->push_constants_resource.bo =
103         v3dv_bo_alloc(cmd_buffer->device, 4096, "push constants", true);
104
105      v3dv_job_add_bo(cmd_buffer->state.job,
106                      cmd_buffer->push_constants_resource.bo);
107
108      if (!cmd_buffer->push_constants_resource.bo) {
109         fprintf(stderr, "Failed to allocate memory for push constants\n");
110         abort();
111      }
112
113      bool ok = v3dv_bo_map(cmd_buffer->device,
114                            cmd_buffer->push_constants_resource.bo,
115                            cmd_buffer->push_constants_resource.bo->size);
116      if (!ok) {
117         fprintf(stderr, "failed to map push constants buffer\n");
118         abort();
119      }
120   } else {
121      if (cmd_buffer->push_constants_resource.offset +
122          cmd_buffer->state.push_constants_size <=
123          cmd_buffer->push_constants_resource.bo->size) {
124         cmd_buffer->push_constants_resource.offset +=
125            cmd_buffer->state.push_constants_size;
126      } else {
127         /* We ran out of space so we'll have to allocate a new buffer but we
128          * need to ensure the old one is preserved until the end of the command
129          * buffer life and make sure it is eventually freed. We use the
130          * private object machinery in the command buffer for this.
131          */
132         v3dv_cmd_buffer_add_private_obj(
133            cmd_buffer, (uintptr_t) cmd_buffer->push_constants_resource.bo,
134            (v3dv_cmd_buffer_private_obj_destroy_cb) push_constants_bo_free);
135
136         /* Now call back so we create a new BO */
137         cmd_buffer->push_constants_resource.bo = NULL;
138         check_push_constants_ubo(cmd_buffer, pipeline);
139         return;
140      }
141   }
142
143   assert(cmd_buffer->state.push_constants_size <= MAX_PUSH_CONSTANTS_SIZE);
144   memcpy(cmd_buffer->push_constants_resource.bo->map +
145          cmd_buffer->push_constants_resource.offset,
146          cmd_buffer->state.push_constants_data,
147          cmd_buffer->state.push_constants_size);
148
149   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO;
150}
151
152/** V3D 4.x TMU configuration parameter 0 (texture) */
153static void
154write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
155             struct v3dv_pipeline *pipeline,
156             enum broadcom_shader_stage stage,
157             struct v3dv_cl_out **uniforms,
158             uint32_t data,
159             struct texture_bo_list *tex_bos,
160             struct state_bo_list *state_bos)
161{
162   uint32_t texture_idx = v3d_unit_data_get_unit(data);
163
164   struct v3dv_descriptor_state *descriptor_state =
165      v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
166
167   /* We need to ensure that the texture bo is added to the job */
168   struct v3dv_bo *texture_bo =
169      v3dv_descriptor_map_get_texture_bo(descriptor_state,
170                                         &pipeline->shared_data->maps[stage]->texture_map,
171                                         pipeline->layout, texture_idx);
172   assert(texture_bo);
173   assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS);
174   tex_bos->tex[texture_idx] = texture_bo;
175
176   struct v3dv_cl_reloc state_reloc =
177      v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state,
178                                                   &pipeline->shared_data->maps[stage]->texture_map,
179                                                   pipeline->layout,
180                                                   texture_idx);
181
182   cl_aligned_u32(uniforms, state_reloc.bo->offset +
183                            state_reloc.offset +
184                            v3d_unit_data_get_offset(data));
185
186   /* Texture and Sampler states are typically suballocated, so they are
187    * usually the same BO: only flag them once to avoid trying to add them
188    * multiple times to the job later.
189    */
190   if (!state_bo_in_list(state_bos, state_reloc.bo)) {
191      assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
192      state_bos->states[state_bos->count++] = state_reloc.bo;
193   }
194}
195
196/** V3D 4.x TMU configuration parameter 1 (sampler) */
197static void
198write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
199             struct v3dv_pipeline *pipeline,
200             enum broadcom_shader_stage stage,
201             struct v3dv_cl_out **uniforms,
202             uint32_t data,
203             struct state_bo_list *state_bos)
204{
205   uint32_t sampler_idx = v3d_unit_data_get_unit(data);
206   struct v3dv_descriptor_state *descriptor_state =
207      v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
208
209   assert(sampler_idx != V3DV_NO_SAMPLER_16BIT_IDX &&
210          sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX);
211
212   struct v3dv_cl_reloc sampler_state_reloc =
213      v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state,
214                                            &pipeline->shared_data->maps[stage]->sampler_map,
215                                            pipeline->layout, sampler_idx);
216
217   const struct v3dv_sampler *sampler =
218      v3dv_descriptor_map_get_sampler(descriptor_state,
219                                      &pipeline->shared_data->maps[stage]->sampler_map,
220                                      pipeline->layout, sampler_idx);
221   assert(sampler);
222
223   /* Set unnormalized coordinates flag from sampler object */
224   uint32_t p1_packed = v3d_unit_data_get_offset(data);
225   if (sampler->unnormalized_coordinates) {
226      struct V3DX(TMU_CONFIG_PARAMETER_1) p1_unpacked;
227      V3DX(TMU_CONFIG_PARAMETER_1_unpack)((uint8_t *)&p1_packed, &p1_unpacked);
228      p1_unpacked.unnormalized_coordinates = true;
229      V3DX(TMU_CONFIG_PARAMETER_1_pack)(NULL, (uint8_t *)&p1_packed,
230                                        &p1_unpacked);
231   }
232
233   cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset +
234                            sampler_state_reloc.offset +
235                            p1_packed);
236
237   /* Texture and Sampler states are typically suballocated, so they are
238    * usually the same BO: only flag them once to avoid trying to add them
239    * multiple times to the job later.
240    */
241   if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) {
242      assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
243      state_bos->states[state_bos->count++] = sampler_state_reloc.bo;
244   }
245}
246
247static void
248write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
249                        struct v3dv_pipeline *pipeline,
250                        enum broadcom_shader_stage stage,
251                        struct v3dv_cl_out **uniforms,
252                        enum quniform_contents content,
253                        uint32_t data,
254                        struct buffer_bo_list *buffer_bos)
255{
256   struct v3dv_descriptor_state *descriptor_state =
257      v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
258
259   struct v3dv_descriptor_map *map =
260      content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
261      &pipeline->shared_data->maps[stage]->ubo_map :
262      &pipeline->shared_data->maps[stage]->ssbo_map;
263
264   uint32_t offset =
265      content == QUNIFORM_UBO_ADDR ?
266      v3d_unit_data_get_offset(data) :
267      0;
268
269   uint32_t dynamic_offset = 0;
270
271   /* For ubos, index is shifted, as 0 is reserved for push constants
272    * and 1..MAX_INLINE_UNIFORM_BUFFERS are reserved for inline uniform
273    * buffers.
274    */
275   uint32_t index = v3d_unit_data_get_unit(data);
276   if (content == QUNIFORM_UBO_ADDR && index == 0) {
277      /* Ensure the push constants UBO is created and updated. This also
278       * adds the BO to the job so we don't need to track it in buffer_bos.
279       */
280      check_push_constants_ubo(cmd_buffer, pipeline);
281
282      struct v3dv_cl_reloc *resource =
283         &cmd_buffer->push_constants_resource;
284      assert(resource->bo);
285
286      cl_aligned_u32(uniforms, resource->bo->offset +
287                               resource->offset +
288                               offset + dynamic_offset);
289   } else {
290      if (content == QUNIFORM_UBO_ADDR) {
291         /* We reserve index 0 for push constants and artificially increase our
292          * indices by one for that reason, fix that now before accessing the
293          * descriptor map.
294          */
295         assert(index > 0);
296         index--;
297      } else {
298         index = data;
299      }
300
301      struct v3dv_descriptor *descriptor =
302         v3dv_descriptor_map_get_descriptor(descriptor_state, map,
303                                            pipeline->layout,
304                                            index, &dynamic_offset);
305
306      /* Inline UBO descriptors store UBO data in descriptor pool memory,
307       * instead of an external buffer.
308       */
309      assert(descriptor);
310
311      if (content == QUNIFORM_GET_SSBO_SIZE ||
312          content == QUNIFORM_GET_UBO_SIZE) {
313         cl_aligned_u32(uniforms, descriptor->range);
314      } else {
315         /* Inline uniform buffers store their contents in pool memory instead
316          * of an external buffer.
317          */
318         struct v3dv_bo *bo;
319         uint32_t addr;
320         if (descriptor->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
321            assert(dynamic_offset == 0);
322            struct v3dv_cl_reloc reloc =
323               v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
324                                                     descriptor_state, map,
325                                                     pipeline->layout, index,
326                                                     NULL);
327            bo = reloc.bo;
328            addr = reloc.bo->offset + reloc.offset + offset;
329         } else {
330            assert(descriptor->buffer);
331            assert(descriptor->buffer->mem);
332            assert(descriptor->buffer->mem->bo);
333
334            bo = descriptor->buffer->mem->bo;
335            addr = bo->offset +
336                   descriptor->buffer->mem_offset +
337                   descriptor->offset +
338                   offset + dynamic_offset;
339         }
340
341         cl_aligned_u32(uniforms, addr);
342
343         if (content == QUNIFORM_UBO_ADDR) {
344            assert(index < MAX_TOTAL_UNIFORM_BUFFERS);
345            buffer_bos->ubo[index] = bo;
346         } else {
347            assert(index < MAX_TOTAL_STORAGE_BUFFERS);
348            buffer_bos->ssbo[index] = bo;
349         }
350      }
351   }
352}
353
354static void
355write_inline_uniform(struct v3dv_cl_out **uniforms,
356                     uint32_t index,
357                     uint32_t offset,
358                     struct v3dv_cmd_buffer *cmd_buffer,
359                     struct v3dv_pipeline *pipeline,
360                     enum broadcom_shader_stage stage)
361{
362   assert(index < MAX_INLINE_UNIFORM_BUFFERS);
363
364   struct v3dv_descriptor_state *descriptor_state =
365      v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
366
367   struct v3dv_descriptor_map *map =
368      &pipeline->shared_data->maps[stage]->ubo_map;
369
370   struct v3dv_cl_reloc reloc =
371      v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
372                                            descriptor_state, map,
373                                            pipeline->layout, index,
374                                            NULL);
375
376   /* Offset comes in 32-bit units */
377   uint32_t *addr = reloc.bo->map + reloc.offset + 4 * offset;
378   cl_aligned_u32(uniforms, *addr);
379}
380
381static uint32_t
382get_texture_size_from_image_view(struct v3dv_image_view *image_view,
383                                 enum quniform_contents contents,
384                                 uint32_t data)
385{
386   switch(contents) {
387   case QUNIFORM_IMAGE_WIDTH:
388   case QUNIFORM_TEXTURE_WIDTH:
389      /* We don't u_minify the values, as we are using the image_view
390       * extents
391       */
392      return image_view->vk.extent.width;
393   case QUNIFORM_IMAGE_HEIGHT:
394   case QUNIFORM_TEXTURE_HEIGHT:
395      return image_view->vk.extent.height;
396   case QUNIFORM_IMAGE_DEPTH:
397   case QUNIFORM_TEXTURE_DEPTH:
398      return image_view->vk.extent.depth;
399   case QUNIFORM_IMAGE_ARRAY_SIZE:
400   case QUNIFORM_TEXTURE_ARRAY_SIZE:
401      if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
402         return image_view->vk.layer_count;
403      } else {
404         assert(image_view->vk.layer_count % 6 == 0);
405         return image_view->vk.layer_count / 6;
406      }
407   case QUNIFORM_TEXTURE_LEVELS:
408      return image_view->vk.level_count;
409   case QUNIFORM_TEXTURE_SAMPLES:
410      assert(image_view->vk.image);
411      return image_view->vk.image->samples;
412   default:
413      unreachable("Bad texture size field");
414   }
415}
416
417
418static uint32_t
419get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
420                                  enum quniform_contents contents,
421                                  uint32_t data)
422{
423   switch(contents) {
424   case QUNIFORM_IMAGE_WIDTH:
425   case QUNIFORM_TEXTURE_WIDTH:
426      return buffer_view->num_elements;
427   /* Only size can be queried for texel buffers  */
428   default:
429      unreachable("Bad texture size field for texel buffers");
430   }
431}
432
433static uint32_t
434get_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
435                 struct v3dv_pipeline *pipeline,
436                 enum broadcom_shader_stage stage,
437                 enum quniform_contents contents,
438                 uint32_t data)
439{
440   uint32_t texture_idx = data;
441
442   struct v3dv_descriptor_state *descriptor_state =
443      v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
444
445   struct v3dv_descriptor *descriptor =
446      v3dv_descriptor_map_get_descriptor(descriptor_state,
447                                         &pipeline->shared_data->maps[stage]->texture_map,
448                                         pipeline->layout,
449                                         texture_idx, NULL);
450
451   assert(descriptor);
452
453   switch (descriptor->type) {
454   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
455   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
456   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
457   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
458      return get_texture_size_from_image_view(descriptor->image_view,
459                                              contents, data);
460   case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
461   case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
462      return get_texture_size_from_buffer_view(descriptor->buffer_view,
463                                               contents, data);
464   default:
465      unreachable("Wrong descriptor for getting texture size");
466   }
467}
468
469struct v3dv_cl_reloc
470v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
471                               struct v3dv_pipeline *pipeline,
472                               struct v3dv_shader_variant *variant,
473                               uint32_t **wg_count_offsets)
474{
475   struct v3d_uniform_list *uinfo =
476      &variant->prog_data.base->uniforms;
477   struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
478
479   struct v3dv_job *job = cmd_buffer->state.job;
480   assert(job);
481   assert(job->cmd_buffer == cmd_buffer);
482
483   struct texture_bo_list tex_bos = { 0 };
484   struct state_bo_list state_bos = { 0 };
485   struct buffer_bo_list buffer_bos = { 0 };
486
487   /* The hardware always pre-fetches the next uniform (also when there
488    * aren't any), so we always allocate space for an extra slot. This
489    * fixes MMU exceptions reported since Linux kernel 5.4 when the
490    * uniforms fill up the tail bytes of a page in the indirect
491    * BO. In that scenario, when the hardware pre-fetches after reading
492    * the last uniform it will read beyond the end of the page and trigger
493    * the MMU exception.
494    */
495   v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
496
497   struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
498
499   struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
500
501   for (int i = 0; i < uinfo->count; i++) {
502      uint32_t data = uinfo->data[i];
503
504      switch (uinfo->contents[i]) {
505      case QUNIFORM_CONSTANT:
506         cl_aligned_u32(&uniforms, data);
507         break;
508
509      case QUNIFORM_UNIFORM:
510         cl_aligned_u32(&uniforms, cmd_buffer->state.push_constants_data[data]);
511         break;
512
513      case QUNIFORM_INLINE_UBO_0:
514      case QUNIFORM_INLINE_UBO_1:
515      case QUNIFORM_INLINE_UBO_2:
516      case QUNIFORM_INLINE_UBO_3:
517         write_inline_uniform(&uniforms,
518                              uinfo->contents[i] - QUNIFORM_INLINE_UBO_0, data,
519                              cmd_buffer, pipeline, variant->stage);
520         break;
521
522      case QUNIFORM_VIEWPORT_X_SCALE:
523         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
524         break;
525
526      case QUNIFORM_VIEWPORT_Y_SCALE:
527         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
528         break;
529
530      case QUNIFORM_VIEWPORT_Z_OFFSET:
531         cl_aligned_f(&uniforms, dynamic->viewport.translate[0][2]);
532         break;
533
534      case QUNIFORM_VIEWPORT_Z_SCALE:
535         cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]);
536         break;
537
538      case QUNIFORM_SSBO_OFFSET:
539      case QUNIFORM_UBO_ADDR:
540      case QUNIFORM_GET_SSBO_SIZE:
541      case QUNIFORM_GET_UBO_SIZE:
542         write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms,
543                                 uinfo->contents[i], data, &buffer_bos);
544
545        break;
546
547      case QUNIFORM_IMAGE_TMU_CONFIG_P0:
548      case QUNIFORM_TMU_CONFIG_P0:
549         write_tmu_p0(cmd_buffer, pipeline, variant->stage,
550                      &uniforms, data, &tex_bos, &state_bos);
551         break;
552
553      case QUNIFORM_TMU_CONFIG_P1:
554         write_tmu_p1(cmd_buffer, pipeline, variant->stage,
555                      &uniforms, data, &state_bos);
556         break;
557
558      case QUNIFORM_IMAGE_WIDTH:
559      case QUNIFORM_IMAGE_HEIGHT:
560      case QUNIFORM_IMAGE_DEPTH:
561      case QUNIFORM_IMAGE_ARRAY_SIZE:
562      case QUNIFORM_TEXTURE_WIDTH:
563      case QUNIFORM_TEXTURE_HEIGHT:
564      case QUNIFORM_TEXTURE_DEPTH:
565      case QUNIFORM_TEXTURE_ARRAY_SIZE:
566      case QUNIFORM_TEXTURE_LEVELS:
567      case QUNIFORM_TEXTURE_SAMPLES:
568         cl_aligned_u32(&uniforms,
569                        get_texture_size(cmd_buffer,
570                                         pipeline,
571                                         variant->stage,
572                                         uinfo->contents[i],
573                                         data));
574         break;
575
576      /* We generate this from geometry shaders to cap the generated gl_Layer
577       * to be within the number of layers of the framebuffer so we prevent the
578       * binner from trying to access tile state memory out of bounds (for
579       * layers that don't exist).
580       *
581       * Unfortunately, for secondary command buffers we may not know the
582       * number of layers in the framebuffer at this stage. Since we are
583       * only using this to sanitize the shader and it should not have any
584       * impact on correct shaders that emit valid values for gl_Layer,
585       * we just work around it by using the largest number of layers we
586       * support.
587       *
588       * FIXME: we could do better than this by recording in the job that
589       * the value at this uniform offset is not correct, and patch it when
590       * we execute the secondary command buffer into a primary, since we do
591       * have the correct number of layers at that point, but again, since this
592       * is only for sanityzing the shader and it only affects the specific case
593       * of secondary command buffers without framebuffer info available it
594       * might not be worth the trouble.
595       *
596       * With multiview the number of layers is dictated by the view mask
597       * and not by the framebuffer layers. We do set the job's frame tiling
598       * information correctly from the view mask in that case, however,
599       * secondary command buffers may not have valid frame tiling data,
600       * so when multiview is enabled, we always set the number of layers
601       * from the subpass view mask.
602       */
603      case QUNIFORM_FB_LAYERS: {
604         const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state;
605         const uint32_t view_mask =
606            state->pass->subpasses[state->subpass_idx].view_mask;
607
608         uint32_t num_layers;
609         if (view_mask != 0) {
610            num_layers = util_last_bit(view_mask);
611         } else if (job->frame_tiling.layers != 0) {
612            num_layers = job->frame_tiling.layers;
613         } else if (cmd_buffer->state.framebuffer) {
614            num_layers = cmd_buffer->state.framebuffer->layers;
615         } else {
616            assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
617            num_layers = 2048;
618#if DEBUG
619            fprintf(stderr, "Skipping gl_LayerID shader sanity check for "
620                            "secondary command buffer\n");
621#endif
622         }
623         cl_aligned_u32(&uniforms, num_layers);
624         break;
625      }
626
627      case QUNIFORM_VIEW_INDEX:
628         cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index);
629         break;
630
631      case QUNIFORM_NUM_WORK_GROUPS:
632         assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
633         assert(job->csd.wg_count[data] > 0);
634         if (wg_count_offsets)
635            wg_count_offsets[data] = (uint32_t *) uniforms;
636         cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
637         break;
638
639      case QUNIFORM_WORK_GROUP_BASE:
640         assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
641         cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
642         break;
643
644      case QUNIFORM_SHARED_OFFSET:
645         assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
646         assert(job->csd.shared_memory);
647         cl_aligned_u32(&uniforms, job->csd.shared_memory->offset);
648         break;
649
650      case QUNIFORM_SPILL_OFFSET:
651         assert(pipeline->spill.bo);
652         cl_aligned_u32(&uniforms, pipeline->spill.bo->offset);
653         break;
654
655      case QUNIFORM_SPILL_SIZE_PER_THREAD:
656         assert(pipeline->spill.size_per_thread > 0);
657         cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread);
658         break;
659
660      default:
661         unreachable("unsupported quniform_contents uniform type\n");
662      }
663   }
664
665   cl_end(&job->indirect, uniforms);
666
667   for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) {
668      if (tex_bos.tex[i])
669         v3dv_job_add_bo(job, tex_bos.tex[i]);
670   }
671
672   for (int i = 0; i < state_bos.count; i++)
673      v3dv_job_add_bo(job, state_bos.states[i]);
674
675   for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) {
676      if (buffer_bos.ubo[i])
677         v3dv_job_add_bo(job, buffer_bos.ubo[i]);
678   }
679
680   for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) {
681      if (buffer_bos.ssbo[i])
682         v3dv_job_add_bo(job, buffer_bos.ssbo[i]);
683   }
684
685   if (job->csd.shared_memory)
686      v3dv_job_add_bo(job, job->csd.shared_memory);
687
688   if (pipeline->spill.bo)
689      v3dv_job_add_bo(job, pipeline->spill.bo);
690
691   return uniform_stream;
692}
693
694struct v3dv_cl_reloc
695v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
696                    struct v3dv_pipeline *pipeline,
697                    struct v3dv_shader_variant *variant)
698{
699   return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL);
700}
701