1/*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on radv driver which is:
8 * Copyright © 2016 Red Hat.
9 * Copyright © 2016 Bas Nieuwenhuizen
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
20 * Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 * IN THE SOFTWARE.
29 */
30#ifndef V3DV_PRIVATE_H
31#define V3DV_PRIVATE_H
32
33#include <stdio.h>
34#include <string.h>
35#include <vulkan/vulkan.h>
36#include <vulkan/vk_icd.h>
37#include <vk_enum_to_str.h>
38
39#include "vk_device.h"
40#include "vk_format.h"
41#include "vk_instance.h"
42#include "vk_image.h"
43#include "vk_log.h"
44#include "vk_physical_device.h"
45#include "vk_shader_module.h"
46#include "vk_sync.h"
47#include "vk_sync_timeline.h"
48#include "vk_util.h"
49
50#include "vk_command_buffer.h"
51#include "vk_command_pool.h"
52#include "vk_queue.h"
53
54#include <xf86drm.h>
55
56#ifdef HAVE_VALGRIND
57#include <valgrind.h>
58#include <memcheck.h>
59#define VG(x) x
60#else
61#define VG(x) ((void)0)
62#endif
63
64#include "v3dv_limits.h"
65
66#include "common/v3d_device_info.h"
67#include "common/v3d_limits.h"
68#include "common/v3d_tiling.h"
69#include "common/v3d_util.h"
70
71#include "compiler/shader_enums.h"
72#include "compiler/spirv/nir_spirv.h"
73
74#include "compiler/v3d_compiler.h"
75
76#include "vk_debug_report.h"
77#include "util/set.h"
78#include "util/hash_table.h"
79#include "util/sparse_array.h"
80#include "util/xmlconfig.h"
81#include "u_atomic.h"
82
83#include "v3dv_entrypoints.h"
84#include "v3dv_bo.h"
85
86#include "drm-uapi/v3d_drm.h"
87
88#include "vk_alloc.h"
89#include "simulator/v3d_simulator.h"
90
91#include "v3dv_cl.h"
92
93#include "wsi_common.h"
94
95/* A non-fatal assert.  Useful for debugging. */
96#ifdef DEBUG
97#define v3dv_assert(x) ({ \
98   if (unlikely(!(x))) \
99      fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
100})
101#else
102#define v3dv_assert(x)
103#endif
104
105#define perf_debug(...) do {                       \
106   if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))       \
107      fprintf(stderr, __VA_ARGS__);                \
108} while (0)
109
110struct v3dv_instance;
111
112#ifdef USE_V3D_SIMULATOR
113#define using_v3d_simulator true
114#else
115#define using_v3d_simulator false
116#endif
117
118struct v3d_simulator_file;
119
120/* Minimum required by the Vulkan 1.1 spec */
121#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
122
123struct v3dv_physical_device {
124   struct vk_physical_device vk;
125
126   char *name;
127   int32_t render_fd;
128   int32_t display_fd;
129   int32_t master_fd;
130
131   /* We need these because it is not clear how to detect
132    * valid devids in a portable way
133     */
134   bool has_primary;
135   bool has_render;
136
137   dev_t primary_devid;
138   dev_t render_devid;
139
140#if using_v3d_simulator
141   uint32_t device_id;
142#endif
143
144   uint8_t driver_build_sha1[20];
145   uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
146   uint8_t device_uuid[VK_UUID_SIZE];
147   uint8_t driver_uuid[VK_UUID_SIZE];
148
149   struct vk_sync_type drm_syncobj_type;
150   struct vk_sync_timeline_type sync_timeline_type;
151   const struct vk_sync_type *sync_types[3];
152
153   struct disk_cache *disk_cache;
154
155   mtx_t mutex;
156
157   struct wsi_device wsi_device;
158
159   VkPhysicalDeviceMemoryProperties memory;
160
161   struct v3d_device_info devinfo;
162
163   struct v3d_simulator_file *sim_file;
164
165   const struct v3d_compiler *compiler;
166   uint32_t next_program_id;
167
168   /* This array holds all our 'struct v3dv_bo' allocations. We use this
169    * so we can add a refcount to our BOs and check if a particular BO
170    * was already allocated in this device using its GEM handle. This is
171    * necessary to properly manage BO imports, because the kernel doesn't
172    * refcount the underlying BO memory.
173    *
174    * Specifically, when self-importing (i.e. importing a BO into the same
175    * device that created it), the kernel will give us the same BO handle
176    * for both BOs and we must only free it once when  both references are
177    * freed. Otherwise, if we are not self-importing, we get two differnt BO
178    * handles, and we want to free each one individually.
179    *
180    * The BOs in this map all have a refcnt with the referece counter and
181    * only self-imported BOs will ever have a refcnt > 1.
182    */
183   struct util_sparse_array bo_map;
184
185   struct {
186      bool merge_jobs;
187   } options;
188
189   struct {
190      bool multisync;
191      bool perfmon;
192   } caps;
193};
194
195VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
196                                              struct v3dv_physical_device *pdevice,
197                                              VkIcdSurfaceBase *surface);
198
199static inline struct v3dv_bo *
200v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
201{
202   return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
203}
204
205VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
206void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
207struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
208                                                     uint32_t index);
209
210void v3dv_meta_clear_init(struct v3dv_device *device);
211void v3dv_meta_clear_finish(struct v3dv_device *device);
212
213void v3dv_meta_blit_init(struct v3dv_device *device);
214void v3dv_meta_blit_finish(struct v3dv_device *device);
215
216void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
217void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
218
219bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
220                           const VkOffset3D *offset,
221                           VkFormat *compat_format);
222
223struct v3dv_instance {
224   struct vk_instance vk;
225
226   int physicalDeviceCount;
227   struct v3dv_physical_device physicalDevice;
228
229   bool pipeline_cache_enabled;
230   bool default_pipeline_cache_enabled;
231};
232
233/* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
234 * tfu), we still need a syncobj to track the last overall job submitted
235 * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
236 * start expecting multisync to be present and drop the legacy implementation
237 * together with this V3DV_QUEUE_ANY tracker.
238 */
239enum v3dv_queue_type {
240   V3DV_QUEUE_CL = 0,
241   V3DV_QUEUE_CSD,
242   V3DV_QUEUE_TFU,
243   V3DV_QUEUE_ANY,
244   V3DV_QUEUE_COUNT,
245};
246
247/* For each GPU queue, we use a syncobj to track the last job submitted. We
248 * set the flag `first` to determine when we are starting a new cmd buffer
249 * batch and therefore a job submitted to a given queue will be the first in a
250 * cmd buf batch.
251 */
252struct v3dv_last_job_sync {
253   /* If the job is the first submitted to a GPU queue in a cmd buffer batch.
254    *
255    * We use V3DV_QUEUE_{CL,CSD,TFU} both with and without multisync.
256    */
257   bool first[V3DV_QUEUE_COUNT];
258   /* Array of syncobj to track the last job submitted to a GPU queue.
259    *
260    * With multisync we use V3DV_QUEUE_{CL,CSD,TFU} to track syncobjs for each
261    * queue, but without multisync we only track the last job submitted to any
262    * queue in V3DV_QUEUE_ANY.
263    */
264   uint32_t syncs[V3DV_QUEUE_COUNT];
265};
266
267struct v3dv_queue {
268   struct vk_queue vk;
269
270   struct v3dv_device *device;
271
272   struct v3dv_last_job_sync last_job_syncs;
273
274   struct v3dv_job *noop_job;
275
276   /* The last active perfmon ID to prevent mixing of counter results when a
277    * job is submitted with a different perfmon id.
278    */
279   uint32_t last_perfmon_id;
280};
281
282VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
283                                  struct vk_queue_submit *submit);
284
285#define V3DV_META_BLIT_CACHE_KEY_SIZE              (4 * sizeof(uint32_t))
286#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
287                                                    sizeof(VkComponentMapping))
288
289struct v3dv_meta_color_clear_pipeline {
290   VkPipeline pipeline;
291   VkRenderPass pass;
292   bool cached;
293   uint64_t key;
294};
295
296struct v3dv_meta_depth_clear_pipeline {
297   VkPipeline pipeline;
298   uint64_t key;
299};
300
301struct v3dv_meta_blit_pipeline {
302   VkPipeline pipeline;
303   VkRenderPass pass;
304   VkRenderPass pass_no_load;
305   uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
306};
307
308struct v3dv_meta_texel_buffer_copy_pipeline {
309   VkPipeline pipeline;
310   VkRenderPass pass;
311   VkRenderPass pass_no_load;
312   uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
313};
314
315struct v3dv_pipeline_key {
316   bool robust_buffer_access;
317   uint8_t topology;
318   uint8_t logicop_func;
319   bool msaa;
320   bool sample_coverage;
321   bool sample_alpha_to_coverage;
322   bool sample_alpha_to_one;
323   uint8_t cbufs;
324   struct {
325      enum pipe_format format;
326      uint8_t swizzle[4];
327   } color_fmt[V3D_MAX_DRAW_BUFFERS];
328   uint8_t f32_color_rb;
329   uint32_t va_swap_rb_mask;
330   bool has_multiview;
331};
332
333struct v3dv_pipeline_cache_stats {
334   uint32_t miss;
335   uint32_t hit;
336   uint32_t count;
337   uint32_t on_disk_hit;
338};
339
340/* Equivalent to gl_shader_stage, but including the coordinate shaders
341 *
342 * FIXME: perhaps move to common
343 */
344enum broadcom_shader_stage {
345   BROADCOM_SHADER_VERTEX,
346   BROADCOM_SHADER_VERTEX_BIN,
347   BROADCOM_SHADER_GEOMETRY,
348   BROADCOM_SHADER_GEOMETRY_BIN,
349   BROADCOM_SHADER_FRAGMENT,
350   BROADCOM_SHADER_COMPUTE,
351};
352
353#define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
354
355/* Assumes that coordinate shaders will be custom-handled by the caller */
356static inline enum broadcom_shader_stage
357gl_shader_stage_to_broadcom(gl_shader_stage stage)
358{
359   switch (stage) {
360   case MESA_SHADER_VERTEX:
361      return BROADCOM_SHADER_VERTEX;
362   case MESA_SHADER_GEOMETRY:
363      return BROADCOM_SHADER_GEOMETRY;
364   case MESA_SHADER_FRAGMENT:
365      return BROADCOM_SHADER_FRAGMENT;
366   case MESA_SHADER_COMPUTE:
367      return BROADCOM_SHADER_COMPUTE;
368   default:
369      unreachable("Unknown gl shader stage");
370   }
371}
372
373static inline gl_shader_stage
374broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
375{
376   switch (stage) {
377   case BROADCOM_SHADER_VERTEX:
378   case BROADCOM_SHADER_VERTEX_BIN:
379      return MESA_SHADER_VERTEX;
380   case BROADCOM_SHADER_GEOMETRY:
381   case BROADCOM_SHADER_GEOMETRY_BIN:
382      return MESA_SHADER_GEOMETRY;
383   case BROADCOM_SHADER_FRAGMENT:
384      return MESA_SHADER_FRAGMENT;
385   case BROADCOM_SHADER_COMPUTE:
386      return MESA_SHADER_COMPUTE;
387   default:
388      unreachable("Unknown broadcom shader stage");
389   }
390}
391
392static inline bool
393broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
394{
395   switch (stage) {
396   case BROADCOM_SHADER_VERTEX_BIN:
397   case BROADCOM_SHADER_GEOMETRY_BIN:
398      return true;
399   default:
400      return false;
401   }
402}
403
404static inline bool
405broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
406{
407   switch (stage) {
408   case BROADCOM_SHADER_VERTEX:
409   case BROADCOM_SHADER_GEOMETRY:
410      return true;
411   default:
412      return false;
413   }
414}
415
416static inline enum broadcom_shader_stage
417broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
418{
419   switch (stage) {
420   case BROADCOM_SHADER_VERTEX:
421      return BROADCOM_SHADER_VERTEX_BIN;
422   case BROADCOM_SHADER_GEOMETRY:
423      return BROADCOM_SHADER_GEOMETRY_BIN;
424   default:
425      unreachable("Invalid shader stage");
426   }
427}
428
429static inline const char *
430broadcom_shader_stage_name(enum broadcom_shader_stage stage)
431{
432   switch(stage) {
433   case BROADCOM_SHADER_VERTEX_BIN:
434      return "MESA_SHADER_VERTEX_BIN";
435   case BROADCOM_SHADER_GEOMETRY_BIN:
436      return "MESA_SHADER_GEOMETRY_BIN";
437   default:
438      return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
439   }
440}
441
442struct v3dv_pipeline_cache {
443   struct vk_object_base base;
444
445   struct v3dv_device *device;
446   mtx_t mutex;
447
448   struct hash_table *nir_cache;
449   struct v3dv_pipeline_cache_stats nir_stats;
450
451   struct hash_table *cache;
452   struct v3dv_pipeline_cache_stats stats;
453
454   /* For VK_EXT_pipeline_creation_cache_control. */
455   bool externally_synchronized;
456};
457
458struct v3dv_device {
459   struct vk_device vk;
460
461   struct v3dv_instance *instance;
462   struct v3dv_physical_device *pdevice;
463
464   struct v3d_device_info devinfo;
465   struct v3dv_queue queue;
466
467   /* Guards query->maybe_available and value for timestamps */
468   mtx_t query_mutex;
469
470   /* Signaled whenever a query is ended */
471   cnd_t query_ended;
472
473   /* Resources used for meta operations */
474   struct {
475      mtx_t mtx;
476      struct {
477         VkPipelineLayout p_layout;
478         struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
479      } color_clear;
480      struct {
481         VkPipelineLayout p_layout;
482         struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
483      } depth_clear;
484      struct {
485         VkDescriptorSetLayout ds_layout;
486         VkPipelineLayout p_layout;
487         struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
488      } blit;
489      struct {
490         VkDescriptorSetLayout ds_layout;
491         VkPipelineLayout p_layout;
492         struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
493      } texel_buffer_copy;
494   } meta;
495
496   struct v3dv_bo_cache {
497      /** List of struct v3d_bo freed, by age. */
498      struct list_head time_list;
499      /** List of struct v3d_bo freed, per size, by age. */
500      struct list_head *size_list;
501      uint32_t size_list_size;
502
503      mtx_t lock;
504
505      uint32_t cache_size;
506      uint32_t cache_count;
507      uint32_t max_cache_size;
508   } bo_cache;
509
510   uint32_t bo_size;
511   uint32_t bo_count;
512
513   struct v3dv_pipeline_cache default_pipeline_cache;
514
515   /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
516    * following covers the most common case, that is all attributes format
517    * being float being float, allowing us to reuse the same BO for all
518    * pipelines matching this requirement. Pipelines that need integer
519    * attributes will create their own BO.
520    */
521   struct v3dv_bo *default_attribute_float;
522   VkPhysicalDeviceFeatures features;
523
524   void *device_address_mem_ctx;
525   struct util_dynarray device_address_bo_list; /* Array of struct v3dv_bo * */
526
527#ifdef ANDROID
528   const void *gralloc;
529   enum {
530      V3DV_GRALLOC_UNKNOWN,
531      V3DV_GRALLOC_CROS,
532      V3DV_GRALLOC_OTHER,
533   } gralloc_type;
534#endif
535};
536
537struct v3dv_device_memory {
538   struct vk_object_base base;
539
540   struct v3dv_bo *bo;
541   const VkMemoryType *type;
542   bool is_for_wsi;
543   bool is_for_device_address;
544};
545
546#define V3D_OUTPUT_IMAGE_FORMAT_NO 255
547#define TEXTURE_DATA_FORMAT_NO     255
548
549struct v3dv_format {
550   bool supported;
551
552   /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
553   uint8_t rt_type;
554
555   /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
556   uint8_t tex_type;
557
558   /* Swizzle to apply to the RGBA shader output for storing to the tile
559    * buffer, to the RGBA tile buffer to produce shader input (for
560    * blending), and for turning the rgba8888 texture sampler return
561    * value into shader rgba values.
562    */
563   uint8_t swizzle[4];
564
565   /* Whether the return value is 16F/I/UI or 32F/I/UI. */
566   uint8_t return_size;
567
568   /* If the format supports (linear) filtering when texturing. */
569   bool supports_filtering;
570};
571
572struct v3d_resource_slice {
573   uint32_t offset;
574   uint32_t stride;
575   uint32_t padded_height;
576   /* Size of a single pane of the slice.  For 3D textures, there will be
577    * a number of panes equal to the minified, power-of-two-aligned
578    * depth.
579    */
580   uint32_t size;
581   uint8_t ub_pad;
582   enum v3d_tiling_mode tiling;
583   uint32_t padded_height_of_output_image_in_uif_blocks;
584};
585
586bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
587bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
588
589struct v3dv_image {
590   struct vk_image vk;
591
592   const struct v3dv_format *format;
593   uint32_t cpp;
594   bool tiled;
595
596   struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
597   uint64_t size; /* Total size in bytes */
598   uint32_t cube_map_stride;
599
600   struct v3dv_device_memory *mem;
601   VkDeviceSize mem_offset;
602   uint32_t alignment;
603
604#ifdef ANDROID
605   /* Image is backed by VK_ANDROID_native_buffer, */
606   bool is_native_buffer_memory;
607#endif
608};
609
610VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
611
612/* Pre-generating packets needs to consider changes in packet sizes across hw
613 * versions. Keep things simple and allocate enough space for any supported
614 * version. We ensure the size is large enough through static asserts.
615 */
616#define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
617#define V3DV_SAMPLER_STATE_LENGTH 24
618#define V3DV_BLEND_CFG_LENGTH 5
619#define V3DV_CFG_BITS_LENGTH 4
620#define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
621#define V3DV_VCM_CACHE_SIZE_LENGTH 2
622#define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
623#define V3DV_STENCIL_CFG_LENGTH 6
624
625struct v3dv_image_view {
626   struct vk_image_view vk;
627
628   const struct v3dv_format *format;
629   bool swap_rb;
630   bool channel_reverse;
631   uint32_t internal_bpp;
632   uint32_t internal_type;
633   uint32_t offset;
634
635   /* Precomputed (composed from createinfo->components and formar swizzle)
636    * swizzles to pass in to the shader key.
637    *
638    * This could be also included on the descriptor bo, but the shader state
639    * packet doesn't need it on a bo, so we can just avoid a memory copy
640    */
641   uint8_t swizzle[4];
642
643   /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
644    * during UpdateDescriptorSets.
645    *
646    * Empirical tests show that cube arrays need a different shader state
647    * depending on whether they are used with a sampler or not, so for these
648    * we generate two states and select the one to use based on the descriptor
649    * type.
650    */
651   uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
652};
653
654VkResult v3dv_create_image_view(struct v3dv_device *device,
655                                const VkImageViewCreateInfo *pCreateInfo,
656                                VkImageView *pView);
657
658uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
659
660struct v3dv_buffer {
661   struct vk_object_base base;
662
663   VkDeviceSize size;
664   VkBufferUsageFlags usage;
665   uint32_t alignment;
666
667   struct v3dv_device_memory *mem;
668   VkDeviceSize mem_offset;
669};
670
671struct v3dv_buffer_view {
672   struct vk_object_base base;
673
674   struct v3dv_buffer *buffer;
675
676   VkFormat vk_format;
677   const struct v3dv_format *format;
678   uint32_t internal_bpp;
679   uint32_t internal_type;
680
681   uint32_t offset;
682   uint32_t size;
683   uint32_t num_elements;
684
685   /* Prepacked TEXTURE_SHADER_STATE. */
686   uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
687};
688
689struct v3dv_subpass_attachment {
690   uint32_t attachment;
691   VkImageLayout layout;
692};
693
694struct v3dv_subpass {
695   uint32_t input_count;
696   struct v3dv_subpass_attachment *input_attachments;
697
698   uint32_t color_count;
699   struct v3dv_subpass_attachment *color_attachments;
700   struct v3dv_subpass_attachment *resolve_attachments;
701
702   struct v3dv_subpass_attachment ds_attachment;
703   struct v3dv_subpass_attachment ds_resolve_attachment;
704   bool resolve_depth, resolve_stencil;
705
706   /* If we need to emit the clear of the depth/stencil attachment using a
707    * a draw call instead of using the TLB (GFXH-1461).
708    */
709   bool do_depth_clear_with_draw;
710   bool do_stencil_clear_with_draw;
711
712   /* Multiview */
713   uint32_t view_mask;
714};
715
716struct v3dv_render_pass_attachment {
717   VkAttachmentDescription2 desc;
718
719   uint32_t first_subpass;
720   uint32_t last_subpass;
721
722   /* When multiview is enabled, we no longer care about when a particular
723    * attachment is first or last used in a render pass, since not all views
724    * in the attachment will meet that criteria. Instead, we need to track
725    * each individual view (layer) in each attachment and emit our stores,
726    * loads and clears accordingly.
727    */
728   struct {
729      uint32_t first_subpass;
730      uint32_t last_subpass;
731   } views[MAX_MULTIVIEW_VIEW_COUNT];
732
733   /* If this is a multisampled attachment that is going to be resolved,
734    * whether we may be able to use the TLB hardware resolve based on the
735    * attachment format.
736    */
737   bool try_tlb_resolve;
738};
739
740struct v3dv_render_pass {
741   struct vk_object_base base;
742
743   bool multiview_enabled;
744
745   uint32_t attachment_count;
746   struct v3dv_render_pass_attachment *attachments;
747
748   uint32_t subpass_count;
749   struct v3dv_subpass *subpasses;
750
751   struct v3dv_subpass_attachment *subpass_attachments;
752};
753
754struct v3dv_framebuffer {
755   struct vk_object_base base;
756
757   uint32_t width;
758   uint32_t height;
759   uint32_t layers;
760
761   /* Typically, edge tiles in the framebuffer have padding depending on the
762    * underlying tiling layout. One consequnce of this is that when the
763    * framebuffer dimensions are not aligned to tile boundaries, tile stores
764    * would still write full tiles on the edges and write to the padded area.
765    * If the framebuffer is aliasing a smaller region of a larger image, then
766    * we need to be careful with this though, as we won't have padding on the
767    * edge tiles (which typically means that we need to load the tile buffer
768    * before we store).
769    */
770   bool has_edge_padding;
771
772   uint32_t attachment_count;
773   uint32_t color_attachment_count;
774
775   /* Notice that elements in 'attachments' will be NULL if the framebuffer
776    * was created imageless. The driver is expected to access attachment info
777    * from the command buffer state instead.
778    */
779   struct v3dv_image_view *attachments[0];
780};
781
782struct v3dv_frame_tiling {
783   uint32_t width;
784   uint32_t height;
785   uint32_t layers;
786   uint32_t render_target_count;
787   uint32_t internal_bpp;
788   bool     msaa;
789   bool     double_buffer;
790   uint32_t tile_width;
791   uint32_t tile_height;
792   uint32_t draw_tiles_x;
793   uint32_t draw_tiles_y;
794   uint32_t supertile_width;
795   uint32_t supertile_height;
796   uint32_t frame_width_in_supertiles;
797   uint32_t frame_height_in_supertiles;
798};
799
800bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
801                                       const VkRect2D *area,
802                                       struct v3dv_framebuffer *fb,
803                                       struct v3dv_render_pass *pass,
804                                       uint32_t subpass_idx);
805
806/* Checks if we need to emit 2 initial tile clears for double buffer mode.
807 * This happens when we render at least 2 tiles, because in this mode each
808 * tile uses a different half of the tile buffer memory so we can have 2 tiles
809 * in flight (one being stored to memory and the next being rendered). In this
810 * scenario, if we emit a single initial tile clear we would only clear the
811 * first half of the tile buffer.
812 */
813static inline bool
814v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
815{
816   return tiling->double_buffer &&
817          (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
818           tiling->layers > 1);
819}
820
821enum v3dv_cmd_buffer_status {
822   V3DV_CMD_BUFFER_STATUS_NEW           = 0,
823   V3DV_CMD_BUFFER_STATUS_INITIALIZED   = 1,
824   V3DV_CMD_BUFFER_STATUS_RECORDING     = 2,
825   V3DV_CMD_BUFFER_STATUS_EXECUTABLE    = 3
826};
827
828union v3dv_clear_value {
829   uint32_t color[4];
830   struct {
831      float z;
832      uint8_t s;
833   };
834};
835
836struct v3dv_cmd_buffer_attachment_state {
837   /* The original clear value as provided by the Vulkan API */
838   VkClearValue vk_clear_value;
839
840   /* The hardware clear value */
841   union v3dv_clear_value clear_value;
842
843   /* The underlying image view (from the framebuffer or, if imageless
844    * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
845    */
846   struct v3dv_image_view *image_view;
847
848   /* If this is a multisampled attachment with a resolve operation. */
849   bool has_resolve;
850
851   /* If this is a multisampled attachment with a resolve operation,
852    * whether we can use the TLB for the resolve.
853    */
854   bool use_tlb_resolve;
855};
856
857struct v3dv_viewport_state {
858   uint32_t count;
859   VkViewport viewports[MAX_VIEWPORTS];
860   float translate[MAX_VIEWPORTS][3];
861   float scale[MAX_VIEWPORTS][3];
862};
863
864struct v3dv_scissor_state {
865   uint32_t count;
866   VkRect2D scissors[MAX_SCISSORS];
867};
868
869/* Mostly a v3dv mapping of VkDynamicState, used to track which data as
870 * defined as dynamic
871 */
872enum v3dv_dynamic_state_bits {
873   V3DV_DYNAMIC_VIEWPORT                  = 1 << 0,
874   V3DV_DYNAMIC_SCISSOR                   = 1 << 1,
875   V3DV_DYNAMIC_STENCIL_COMPARE_MASK      = 1 << 2,
876   V3DV_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 3,
877   V3DV_DYNAMIC_STENCIL_REFERENCE         = 1 << 4,
878   V3DV_DYNAMIC_BLEND_CONSTANTS           = 1 << 5,
879   V3DV_DYNAMIC_DEPTH_BIAS                = 1 << 6,
880   V3DV_DYNAMIC_LINE_WIDTH                = 1 << 7,
881   V3DV_DYNAMIC_COLOR_WRITE_ENABLE        = 1 << 8,
882   V3DV_DYNAMIC_ALL                       = (1 << 9) - 1,
883};
884
885/* Flags for dirty pipeline state.
886 */
887enum v3dv_cmd_dirty_bits {
888   V3DV_CMD_DIRTY_VIEWPORT                  = 1 << 0,
889   V3DV_CMD_DIRTY_SCISSOR                   = 1 << 1,
890   V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK      = 1 << 2,
891   V3DV_CMD_DIRTY_STENCIL_WRITE_MASK        = 1 << 3,
892   V3DV_CMD_DIRTY_STENCIL_REFERENCE         = 1 << 4,
893   V3DV_CMD_DIRTY_PIPELINE                  = 1 << 5,
894   V3DV_CMD_DIRTY_COMPUTE_PIPELINE          = 1 << 6,
895   V3DV_CMD_DIRTY_VERTEX_BUFFER             = 1 << 7,
896   V3DV_CMD_DIRTY_INDEX_BUFFER              = 1 << 8,
897   V3DV_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 9,
898   V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS   = 1 << 10,
899   V3DV_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 11,
900   V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO        = 1 << 12,
901   V3DV_CMD_DIRTY_BLEND_CONSTANTS           = 1 << 13,
902   V3DV_CMD_DIRTY_OCCLUSION_QUERY           = 1 << 14,
903   V3DV_CMD_DIRTY_DEPTH_BIAS                = 1 << 15,
904   V3DV_CMD_DIRTY_LINE_WIDTH                = 1 << 16,
905   V3DV_CMD_DIRTY_VIEW_INDEX                = 1 << 17,
906   V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE        = 1 << 18,
907};
908
909struct v3dv_dynamic_state {
910   /**
911    * Bitmask of (1 << VK_DYNAMIC_STATE_*).
912    * Defines the set of saved dynamic state.
913    */
914   uint32_t mask;
915
916   struct v3dv_viewport_state viewport;
917
918   struct v3dv_scissor_state scissor;
919
920   struct {
921      uint32_t front;
922      uint32_t back;
923   } stencil_compare_mask;
924
925   struct {
926      uint32_t front;
927      uint32_t back;
928   } stencil_write_mask;
929
930   struct {
931      uint32_t front;
932      uint32_t back;
933   } stencil_reference;
934
935   float blend_constants[4];
936
937   struct {
938      float constant_factor;
939      float depth_bias_clamp;
940      float slope_factor;
941   } depth_bias;
942
943   float line_width;
944
945   uint32_t color_write_enable;
946};
947
948void v3dv_viewport_compute_xform(const VkViewport *viewport,
949                                 float scale[3],
950                                 float translate[3]);
951
952enum v3dv_ez_state {
953   V3D_EZ_UNDECIDED = 0,
954   V3D_EZ_GT_GE,
955   V3D_EZ_LT_LE,
956   V3D_EZ_DISABLED,
957};
958
959enum v3dv_job_type {
960   V3DV_JOB_TYPE_GPU_CL = 0,
961   V3DV_JOB_TYPE_GPU_CL_SECONDARY,
962   V3DV_JOB_TYPE_GPU_TFU,
963   V3DV_JOB_TYPE_GPU_CSD,
964   V3DV_JOB_TYPE_CPU_RESET_QUERIES,
965   V3DV_JOB_TYPE_CPU_END_QUERY,
966   V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
967   V3DV_JOB_TYPE_CPU_SET_EVENT,
968   V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
969   V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
970   V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
971   V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
972};
973
974struct v3dv_reset_query_cpu_job_info {
975   struct v3dv_query_pool *pool;
976   uint32_t first;
977   uint32_t count;
978};
979
980struct v3dv_end_query_cpu_job_info {
981   struct v3dv_query_pool *pool;
982   uint32_t query;
983
984   /* This is one unless multiview is used */
985   uint32_t count;
986};
987
988struct v3dv_copy_query_results_cpu_job_info {
989   struct v3dv_query_pool *pool;
990   uint32_t first;
991   uint32_t count;
992   struct v3dv_buffer *dst;
993   uint32_t offset;
994   uint32_t stride;
995   VkQueryResultFlags flags;
996};
997
998struct v3dv_submit_sync_info {
999   /* List of syncs to wait before running a job */
1000   uint32_t wait_count;
1001   struct vk_sync_wait *waits;
1002
1003   /* List of syncs to signal when all jobs complete */
1004   uint32_t signal_count;
1005   struct vk_sync_signal *signals;
1006};
1007
1008struct v3dv_event_set_cpu_job_info {
1009   struct v3dv_event *event;
1010   int state;
1011};
1012
1013struct v3dv_event_wait_cpu_job_info {
1014   /* List of events to wait on */
1015   uint32_t event_count;
1016   struct v3dv_event **events;
1017};
1018
1019struct v3dv_copy_buffer_to_image_cpu_job_info {
1020   struct v3dv_image *image;
1021   struct v3dv_buffer *buffer;
1022   uint32_t buffer_offset;
1023   uint32_t buffer_stride;
1024   uint32_t buffer_layer_stride;
1025   VkOffset3D image_offset;
1026   VkExtent3D image_extent;
1027   uint32_t mip_level;
1028   uint32_t base_layer;
1029   uint32_t layer_count;
1030};
1031
1032struct v3dv_csd_indirect_cpu_job_info {
1033   struct v3dv_buffer *buffer;
1034   uint32_t offset;
1035   struct v3dv_job *csd_job;
1036   uint32_t wg_size;
1037   uint32_t *wg_uniform_offsets[3];
1038   bool needs_wg_uniform_rewrite;
1039};
1040
1041struct v3dv_timestamp_query_cpu_job_info {
1042   struct v3dv_query_pool *pool;
1043   uint32_t query;
1044
1045   /* This is one unless multiview is used */
1046   uint32_t count;
1047};
1048
1049/* Number of perfmons required to handle all supported performance counters */
1050#define V3DV_MAX_PERFMONS DIV_ROUND_UP(V3D_PERFCNT_NUM, \
1051                                       DRM_V3D_MAX_PERF_COUNTERS)
1052
1053struct v3dv_perf_query {
1054   uint32_t kperfmon_ids[V3DV_MAX_PERFMONS];
1055
1056   /* A DRM syncobj to wait on the GPU jobs for which we are collecting
1057    * performance data.
1058    */
1059   struct vk_sync *last_job_sync;
1060};
1061
1062struct v3dv_job {
1063   struct list_head list_link;
1064
1065   /* We only create job clones when executing secondary command buffers into
1066    * primaries. These clones don't make deep copies of the original object
1067    * so we want to flag them to avoid freeing resources they don't own.
1068    */
1069   bool is_clone;
1070
1071   /* If the job executes on the transfer stage of the pipeline */
1072   bool is_transfer;
1073
1074   /* VK_KHR_buffer_device_address allows shaders to use pointers that can
1075    * dereference memory in any buffer that has been flagged with
1076    * VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR. These buffers may not
1077    * be bound via descriptor sets, so we need to make sure that a job that
1078    * uses this functionality includes all these buffers in its kernel
1079    * submission.
1080    */
1081   bool uses_buffer_device_address;
1082
1083   enum v3dv_job_type type;
1084
1085   struct v3dv_device *device;
1086
1087   struct v3dv_cmd_buffer *cmd_buffer;
1088
1089   struct v3dv_cl bcl;
1090   struct v3dv_cl rcl;
1091   struct v3dv_cl indirect;
1092
1093   /* Set of all BOs referenced by the job. This will be used for making
1094    * the list of BOs that the kernel will need to have paged in to
1095    * execute our job.
1096    */
1097   struct set *bos;
1098   uint32_t bo_count;
1099   uint64_t bo_handle_mask;
1100
1101   struct v3dv_bo *tile_alloc;
1102   struct v3dv_bo *tile_state;
1103
1104   bool tmu_dirty_rcl;
1105
1106   uint32_t first_subpass;
1107
1108   /* When the current subpass is split into multiple jobs, this flag is set
1109    * to true for any jobs after the first in the same subpass.
1110    */
1111   bool is_subpass_continue;
1112
1113   /* If this job is the last job emitted for a subpass. */
1114   bool is_subpass_finish;
1115
1116   struct v3dv_frame_tiling frame_tiling;
1117
1118   enum v3dv_ez_state ez_state;
1119   enum v3dv_ez_state first_ez_state;
1120
1121   /* If we have already decided if we need to disable Early Z/S completely
1122    * for this job.
1123    */
1124   bool decided_global_ez_enable;
1125
1126   /* If the job emitted any draw calls with Early Z/S enabled */
1127   bool has_ez_draws;
1128
1129   /* If this job has been configured to use early Z/S clear */
1130   bool early_zs_clear;
1131
1132   /* Number of draw calls recorded into the job */
1133   uint32_t draw_count;
1134
1135   /* A flag indicating whether we want to flush every draw separately. This
1136    * can be used for debugging, or for cases where special circumstances
1137    * require this behavior.
1138    */
1139   bool always_flush;
1140
1141   /* A mask of V3DV_BARRIER_* indicating the source(s) of the barrier. We
1142    * can use this to select the hw queues where we need to serialize the job.
1143    */
1144   uint8_t serialize;
1145
1146   /* If this is a CL job, whether we should sync before binning */
1147   bool needs_bcl_sync;
1148
1149   /* Job specs for CPU jobs */
1150   union {
1151      struct v3dv_reset_query_cpu_job_info          query_reset;
1152      struct v3dv_end_query_cpu_job_info            query_end;
1153      struct v3dv_copy_query_results_cpu_job_info   query_copy_results;
1154      struct v3dv_event_set_cpu_job_info            event_set;
1155      struct v3dv_event_wait_cpu_job_info           event_wait;
1156      struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
1157      struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
1158      struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
1159   } cpu;
1160
1161   /* Job specs for TFU jobs */
1162   struct drm_v3d_submit_tfu tfu;
1163
1164   /* Job specs for CSD jobs */
1165   struct {
1166      struct v3dv_bo *shared_memory;
1167      uint32_t wg_count[3];
1168      uint32_t wg_base[3];
1169      struct drm_v3d_submit_csd submit;
1170   } csd;
1171
1172   /* Perfmons with last job sync for CSD and CL jobs */
1173   struct v3dv_perf_query *perf;
1174};
1175
1176void v3dv_job_init(struct v3dv_job *job,
1177                   enum v3dv_job_type type,
1178                   struct v3dv_device *device,
1179                   struct v3dv_cmd_buffer *cmd_buffer,
1180                   int32_t subpass_idx);
1181void v3dv_job_destroy(struct v3dv_job *job);
1182
1183void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1184void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1185
1186void v3dv_job_start_frame(struct v3dv_job *job,
1187                          uint32_t width,
1188                          uint32_t height,
1189                          uint32_t layers,
1190                          bool allocate_tile_state_for_all_layers,
1191                          uint32_t render_target_count,
1192                          uint8_t max_internal_bpp,
1193                          bool msaa);
1194
1195bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1196
1197struct v3dv_job *
1198v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1199                             struct v3dv_cmd_buffer *cmd_buffer);
1200
1201struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1202                                                enum v3dv_job_type type,
1203                                                struct v3dv_cmd_buffer *cmd_buffer,
1204                                                uint32_t subpass_idx);
1205
1206void
1207v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1208                                   uint32_t slot_size,
1209                                   uint32_t used_count,
1210                                   uint32_t *alloc_count,
1211                                   void **ptr);
1212
1213void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
1214                                   bool indexed, bool indirect);
1215
1216/* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1217 * cmd_buffer specific header?
1218 */
1219struct v3dv_draw_info {
1220   uint32_t vertex_count;
1221   uint32_t instance_count;
1222   uint32_t first_vertex;
1223   uint32_t first_instance;
1224};
1225
1226struct v3dv_vertex_binding {
1227   struct v3dv_buffer *buffer;
1228   VkDeviceSize offset;
1229};
1230
1231struct v3dv_descriptor_state {
1232   struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1233   uint32_t valid;
1234   uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1235};
1236
1237struct v3dv_cmd_pipeline_state {
1238   struct v3dv_pipeline *pipeline;
1239
1240   struct v3dv_descriptor_state descriptor_state;
1241};
1242
1243enum {
1244   V3DV_BARRIER_GRAPHICS_BIT = (1 << 0),
1245   V3DV_BARRIER_COMPUTE_BIT  = (1 << 1),
1246   V3DV_BARRIER_TRANSFER_BIT = (1 << 2),
1247};
1248#define V3DV_BARRIER_ALL (V3DV_BARRIER_GRAPHICS_BIT | \
1249                          V3DV_BARRIER_TRANSFER_BIT | \
1250                          V3DV_BARRIER_COMPUTE_BIT);
1251
1252struct v3dv_barrier_state {
1253   /* Mask of V3DV_BARRIER_* indicating where we consume a barrier. */
1254   uint8_t dst_mask;
1255
1256   /* For each possible consumer of a barrier, a mask of V3DV_BARRIER_*
1257    * indicating the sources of the dependency.
1258    */
1259   uint8_t src_mask_graphics;
1260   uint8_t src_mask_transfer;
1261   uint8_t src_mask_compute;
1262
1263   /* For graphics barriers, access masks involved. Used to decide if we need
1264    * to execute a binning or render barrier.
1265    */
1266   VkAccessFlags bcl_buffer_access;
1267   VkAccessFlags bcl_image_access;
1268};
1269
1270struct v3dv_cmd_buffer_state {
1271   struct v3dv_render_pass *pass;
1272   struct v3dv_framebuffer *framebuffer;
1273   VkRect2D render_area;
1274
1275   /* Current job being recorded */
1276   struct v3dv_job *job;
1277
1278   uint32_t subpass_idx;
1279
1280   struct v3dv_cmd_pipeline_state gfx;
1281   struct v3dv_cmd_pipeline_state compute;
1282
1283   struct v3dv_dynamic_state dynamic;
1284
1285   uint32_t dirty;
1286   VkShaderStageFlagBits dirty_descriptor_stages;
1287   VkShaderStageFlagBits dirty_push_constants_stages;
1288
1289   /* Current clip window. We use this to check whether we have an active
1290    * scissor, since in that case we can't use TLB clears and need to fallback
1291    * to drawing rects.
1292    */
1293   VkRect2D clip_window;
1294
1295   /* Whether our render area is aligned to tile boundaries. If this is false
1296    * then we have tiles that are only partially covered by the render area,
1297    * and therefore, we need to be careful with our loads and stores so we don't
1298    * modify pixels for the tile area that is not covered by the render area.
1299    * This means, for example, that we can't use the TLB to clear, since that
1300    * always clears full tiles.
1301    */
1302   bool tile_aligned_render_area;
1303
1304   /* FIXME: we have just one client-side BO for the push constants,
1305    * independently of the stageFlags in vkCmdPushConstants, and the
1306    * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1307    * tunning in the future if it makes sense.
1308    */
1309   uint32_t push_constants_size;
1310   uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1311
1312   uint32_t attachment_alloc_count;
1313   struct v3dv_cmd_buffer_attachment_state *attachments;
1314
1315   struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1316
1317   struct {
1318      VkBuffer buffer;
1319      VkDeviceSize offset;
1320      uint8_t index_size;
1321   } index_buffer;
1322
1323   /* Current uniforms */
1324   struct {
1325      struct v3dv_cl_reloc vs_bin;
1326      struct v3dv_cl_reloc vs;
1327      struct v3dv_cl_reloc gs_bin;
1328      struct v3dv_cl_reloc gs;
1329      struct v3dv_cl_reloc fs;
1330   } uniforms;
1331
1332   /* Current view index for multiview rendering */
1333   uint32_t view_index;
1334
1335   /* Used to flag OOM conditions during command buffer recording */
1336   bool oom;
1337
1338   /* If we are currently recording job(s) for a transfer operation */
1339   bool is_transfer;
1340
1341   /* Barrier state tracking */
1342   struct v3dv_barrier_state barrier;
1343
1344   /* Secondary command buffer state */
1345   struct {
1346      bool occlusion_query_enable;
1347   } inheritance;
1348
1349   /* Command buffer state saved during a meta operation */
1350   struct {
1351      uint32_t subpass_idx;
1352      VkRenderPass pass;
1353      VkFramebuffer framebuffer;
1354
1355      uint32_t attachment_alloc_count;
1356      uint32_t attachment_count;
1357      struct v3dv_cmd_buffer_attachment_state *attachments;
1358
1359      bool tile_aligned_render_area;
1360      VkRect2D render_area;
1361
1362      struct v3dv_dynamic_state dynamic;
1363
1364      struct v3dv_cmd_pipeline_state gfx;
1365      bool has_descriptor_state;
1366
1367      uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1368      uint32_t push_constants_size;
1369   } meta;
1370
1371   /* Command buffer state for queries */
1372   struct {
1373      /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1374       * a render pass. We queue these here and then schedule the corresponding
1375       * CPU jobs for them at the time we finish the GPU job in which they have
1376       * been recorded.
1377       */
1378      struct {
1379         uint32_t used_count;
1380         uint32_t alloc_count;
1381         struct v3dv_end_query_cpu_job_info *states;
1382      } end;
1383
1384      struct {
1385         /* This BO is not NULL if we have an active occlusion query, that is,
1386          * we have called vkCmdBeginQuery but not vkCmdEndQuery.
1387          */
1388         struct v3dv_bo *bo;
1389         uint32_t offset;
1390
1391         /* This pointer is not NULL if we have an active performance query */
1392         struct v3dv_perf_query *perf;
1393      } active_query;
1394   } query;
1395};
1396
1397/* The following struct represents the info from a descriptor that we store on
1398 * the host memory. They are mostly links to other existing vulkan objects,
1399 * like the image_view in order to access to swizzle info, or the buffer used
1400 * for a UBO/SSBO, for example.
1401 *
1402 * FIXME: revisit if makes sense to just move everything that would be needed
1403 * from a descriptor to the bo.
1404 */
1405struct v3dv_descriptor {
1406   VkDescriptorType type;
1407
1408   union {
1409      struct {
1410         struct v3dv_image_view *image_view;
1411         struct v3dv_sampler *sampler;
1412      };
1413
1414      struct {
1415         struct v3dv_buffer *buffer;
1416         size_t offset;
1417         size_t range;
1418      };
1419
1420      struct v3dv_buffer_view *buffer_view;
1421   };
1422};
1423
1424struct v3dv_query {
1425   bool maybe_available;
1426   union {
1427      /* Used by GPU queries (occlusion) */
1428      struct {
1429         struct v3dv_bo *bo;
1430         uint32_t offset;
1431      };
1432      /* Used by CPU queries (timestamp) */
1433      uint64_t value;
1434
1435      /* Used by performance queries */
1436      struct v3dv_perf_query perf;
1437   };
1438};
1439
1440struct v3dv_query_pool {
1441   struct vk_object_base base;
1442
1443   struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
1444
1445   /* Only used with performance queries */
1446   struct {
1447      uint32_t ncounters;
1448      uint8_t counters[V3D_PERFCNT_NUM];
1449
1450      /* V3D has a limit on the number of counters we can track in a
1451       * single performance monitor, so if too many counters are requested
1452       * we need to create multiple monitors to record all of them. This
1453       * field represents the number of monitors required for the number
1454       * of counters requested.
1455       */
1456      uint8_t nperfmons;
1457   } perfmon;
1458
1459   VkQueryType query_type;
1460   uint32_t query_count;
1461   struct v3dv_query *queries;
1462};
1463
1464VkResult v3dv_get_query_pool_results(struct v3dv_device *device,
1465                                     struct v3dv_query_pool *pool,
1466                                     uint32_t first,
1467                                     uint32_t count,
1468                                     void *data,
1469                                     VkDeviceSize stride,
1470                                     VkQueryResultFlags flags);
1471
1472void v3dv_reset_query_pools(struct v3dv_device *device,
1473                            struct v3dv_query_pool *query_pool,
1474                            uint32_t first,
1475                            uint32_t last);
1476
1477typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1478                                                       uint64_t pobj,
1479                                                       VkAllocationCallbacks *alloc);
1480struct v3dv_cmd_buffer_private_obj {
1481   struct list_head list_link;
1482   uint64_t obj;
1483   v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1484};
1485
1486struct v3dv_cmd_buffer {
1487   struct vk_command_buffer vk;
1488
1489   struct v3dv_device *device;
1490
1491   /* Used at submit time to link command buffers in the submission that have
1492    * spawned wait threads, so we can then wait on all of them to complete
1493    * before we process any signal sempahores or fences.
1494    */
1495   struct list_head list_link;
1496
1497   VkCommandBufferUsageFlags usage_flags;
1498
1499   enum v3dv_cmd_buffer_status status;
1500
1501   struct v3dv_cmd_buffer_state state;
1502
1503   /* Buffer where we upload push constant data to resolve indirect indexing */
1504   struct v3dv_cl_reloc push_constants_resource;
1505
1506   /* Collection of Vulkan objects created internally by the driver (typically
1507    * during recording of meta operations) that are part of the command buffer
1508    * and should be destroyed with it.
1509    */
1510   struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1511
1512   /* Per-command buffer resources for meta operations. */
1513   struct {
1514      struct {
1515         /* The current descriptor pool for blit sources */
1516         VkDescriptorPool dspool;
1517      } blit;
1518      struct {
1519         /* The current descriptor pool for texel buffer copy sources */
1520         VkDescriptorPool dspool;
1521      } texel_buffer_copy;
1522   } meta;
1523
1524   /* List of jobs in the command buffer. For primary command buffers it
1525    * represents the jobs we want to submit to the GPU. For secondary command
1526    * buffers it represents jobs that will be merged into a primary command
1527    * buffer via vkCmdExecuteCommands.
1528    */
1529   struct list_head jobs;
1530};
1531
1532struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1533                                           int32_t subpass_idx,
1534                                           enum v3dv_job_type type);
1535void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1536
1537struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1538                                               uint32_t subpass_idx);
1539struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1540                                                uint32_t subpass_idx);
1541
1542void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1543
1544void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1545                                     bool push_descriptor_state);
1546void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1547                                    uint32_t dirty_dynamic_state,
1548                                    bool needs_subpass_resume);
1549
1550void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
1551                                   struct v3dv_query_pool *pool,
1552                                   uint32_t first,
1553                                   uint32_t count);
1554
1555void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1556                                 struct v3dv_query_pool *pool,
1557                                 uint32_t query,
1558                                 VkQueryControlFlags flags);
1559
1560void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1561                               struct v3dv_query_pool *pool,
1562                               uint32_t query);
1563
1564void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1565                                        struct v3dv_query_pool *pool,
1566                                        uint32_t first,
1567                                        uint32_t count,
1568                                        struct v3dv_buffer *dst,
1569                                        uint32_t offset,
1570                                        uint32_t stride,
1571                                        VkQueryResultFlags flags);
1572
1573void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1574                                 struct drm_v3d_submit_tfu *tfu);
1575
1576void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
1577                                              const uint32_t *wg_counts);
1578
1579void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1580                                     uint64_t obj,
1581                                     v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1582
1583void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst,
1584                                         struct v3dv_barrier_state *src);
1585
1586struct v3dv_event {
1587   struct vk_object_base base;
1588   int state;
1589};
1590
1591struct v3dv_shader_variant {
1592   enum broadcom_shader_stage stage;
1593
1594   union {
1595      struct v3d_prog_data *base;
1596      struct v3d_vs_prog_data *vs;
1597      struct v3d_gs_prog_data *gs;
1598      struct v3d_fs_prog_data *fs;
1599      struct v3d_compute_prog_data *cs;
1600   } prog_data;
1601
1602   /* We explicitly save the prog_data_size as it would make easier to
1603    * serialize
1604    */
1605   uint32_t prog_data_size;
1606
1607   /* The assembly for this variant will be uploaded to a BO shared with all
1608    * other shader stages in that pipeline. This is the offset in that BO.
1609    */
1610   uint32_t assembly_offset;
1611
1612   /* Note: it is really likely that qpu_insts would be NULL, as it will be
1613    * used only temporarily, to upload it to the shared bo, as we compile the
1614    * different stages individually.
1615    */
1616   uint64_t *qpu_insts;
1617   uint32_t qpu_insts_size;
1618};
1619
1620/*
1621 * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1622 * other methods doesn't have so many parameters.
1623 *
1624 * FIXME: for the case of the coordinate shader and the vertex shader, module,
1625 * entrypoint, spec_info and nir are the same. There are also info only
1626 * relevant to some stages. But seemed too much a hassle to create a new
1627 * struct only to handle that. Revisit if such kind of info starts to grow.
1628 */
1629struct v3dv_pipeline_stage {
1630   struct v3dv_pipeline *pipeline;
1631
1632   enum broadcom_shader_stage stage;
1633
1634   const struct vk_shader_module *module;
1635   const char *entrypoint;
1636   const VkSpecializationInfo *spec_info;
1637
1638   nir_shader *nir;
1639
1640   /* The following is the combined hash of module+entrypoint+spec_info+nir */
1641   unsigned char shader_sha1[20];
1642
1643   /** A name for this program, so you can track it in shader-db output. */
1644   uint32_t program_id;
1645
1646   VkPipelineCreationFeedback feedback;
1647};
1648
1649/* We are using the descriptor pool entry for two things:
1650 * * Track the allocated sets, so we can properly free it if needed
1651 * * Track the suballocated pool bo regions, so if some descriptor set is
1652 *   freed, the gap could be reallocated later.
1653 *
1654 * Those only make sense if the pool was not created with the flag
1655 * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1656 */
1657struct v3dv_descriptor_pool_entry
1658{
1659   struct v3dv_descriptor_set *set;
1660   /* Offset and size of the subregion allocated for this entry from the
1661    * pool->bo
1662    */
1663   uint32_t offset;
1664   uint32_t size;
1665};
1666
1667struct v3dv_descriptor_pool {
1668   struct vk_object_base base;
1669
1670   /* A list with all descriptor sets allocated from the pool. */
1671   struct list_head set_list;
1672
1673   /* If this descriptor pool has been allocated for the driver for internal
1674    * use, typically to implement meta operations.
1675    */
1676   bool is_driver_internal;
1677
1678   struct v3dv_bo *bo;
1679   /* Current offset at the descriptor bo. 0 means that we didn't use it for
1680    * any descriptor. If the descriptor bo is NULL, current offset is
1681    * meaningless
1682    */
1683   uint32_t current_offset;
1684
1685   /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1686    * descriptor sets are handled as a whole as pool memory and handled by the
1687    * following pointers. If set, they are not used, and individually
1688    * descriptor sets are allocated/freed.
1689    */
1690   uint8_t *host_memory_base;
1691   uint8_t *host_memory_ptr;
1692   uint8_t *host_memory_end;
1693
1694   uint32_t entry_count;
1695   uint32_t max_entry_count;
1696   struct v3dv_descriptor_pool_entry entries[0];
1697};
1698
1699struct v3dv_descriptor_set {
1700   struct vk_object_base base;
1701
1702   /* List link into the list of all sets allocated from the pool */
1703   struct list_head pool_link;
1704
1705   struct v3dv_descriptor_pool *pool;
1706
1707   struct v3dv_descriptor_set_layout *layout;
1708
1709   /* Offset relative to the descriptor pool bo for this set */
1710   uint32_t base_offset;
1711
1712   /* The descriptors below can be indexed (set/binding) using the set_layout
1713    */
1714   struct v3dv_descriptor descriptors[0];
1715};
1716
1717struct v3dv_descriptor_set_binding_layout {
1718   VkDescriptorType type;
1719
1720   /* Number of array elements in this binding */
1721   uint32_t array_size;
1722
1723   /* Index into the flattend descriptor set */
1724   uint32_t descriptor_index;
1725
1726   uint32_t dynamic_offset_count;
1727   uint32_t dynamic_offset_index;
1728
1729   /* Offset into the descriptor set where this descriptor lives (final offset
1730    * on the descriptor bo need to take into account set->base_offset)
1731    */
1732   uint32_t descriptor_offset;
1733
1734   /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
1735    * if there are no immutable samplers.
1736    */
1737   uint32_t immutable_samplers_offset;
1738};
1739
1740struct v3dv_descriptor_set_layout {
1741   struct vk_object_base base;
1742
1743   VkDescriptorSetLayoutCreateFlags flags;
1744
1745   /* Number of bindings in this descriptor set */
1746   uint32_t binding_count;
1747
1748   /* Total bo size needed for this descriptor set
1749    */
1750   uint32_t bo_size;
1751
1752   /* Shader stages affected by this descriptor set */
1753   uint16_t shader_stages;
1754
1755   /* Number of descriptors in this descriptor set */
1756   uint32_t descriptor_count;
1757
1758   /* Number of dynamic offsets used by this descriptor set */
1759   uint16_t dynamic_offset_count;
1760
1761   /* Descriptor set layouts can be destroyed even if they are still being
1762    * used.
1763    */
1764   uint32_t ref_cnt;
1765
1766   /* Bindings in this descriptor set */
1767   struct v3dv_descriptor_set_binding_layout binding[0];
1768};
1769
1770void
1771v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
1772                                   struct v3dv_descriptor_set_layout *set_layout);
1773
1774static inline void
1775v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
1776{
1777   assert(set_layout && set_layout->ref_cnt >= 1);
1778   p_atomic_inc(&set_layout->ref_cnt);
1779}
1780
1781static inline void
1782v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
1783                                 struct v3dv_descriptor_set_layout *set_layout)
1784{
1785   assert(set_layout && set_layout->ref_cnt >= 1);
1786   if (p_atomic_dec_zero(&set_layout->ref_cnt))
1787      v3dv_descriptor_set_layout_destroy(device, set_layout);
1788}
1789
1790struct v3dv_pipeline_layout {
1791   struct vk_object_base base;
1792
1793   struct {
1794      struct v3dv_descriptor_set_layout *layout;
1795      uint32_t dynamic_offset_start;
1796   } set[MAX_SETS];
1797
1798   uint32_t num_sets;
1799
1800   /* Shader stages that are declared to use descriptors from this layout */
1801   uint32_t shader_stages;
1802
1803   uint32_t dynamic_offset_count;
1804   uint32_t push_constant_size;
1805
1806   unsigned char sha1[20];
1807};
1808
1809/*
1810 * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
1811 * it to be big enough to include the max value for all of them.
1812 *
1813 * FIXME: one alternative would be to allocate the map as big as you need for
1814 * each descriptor type. That would means more individual allocations.
1815 */
1816#define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS,                         \
1817                                 MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
1818                                 MAX_STORAGE_BUFFERS)
1819
1820
1821struct v3dv_descriptor_map {
1822   /* TODO: avoid fixed size array/justify the size */
1823   unsigned num_desc; /* Number of descriptors  */
1824   int set[DESCRIPTOR_MAP_SIZE];
1825   int binding[DESCRIPTOR_MAP_SIZE];
1826   int array_index[DESCRIPTOR_MAP_SIZE];
1827   int array_size[DESCRIPTOR_MAP_SIZE];
1828   bool used[DESCRIPTOR_MAP_SIZE];
1829
1830   /* NOTE: the following is only for sampler, but this is the easier place to
1831    * put it.
1832    */
1833   uint8_t return_size[DESCRIPTOR_MAP_SIZE];
1834};
1835
1836struct v3dv_sampler {
1837   struct vk_object_base base;
1838
1839   bool compare_enable;
1840   bool unnormalized_coordinates;
1841   bool clamp_to_transparent_black_border;
1842
1843   /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
1844    * configuration. If needed it will be copied to the descriptor info during
1845    * UpdateDescriptorSets
1846    */
1847   uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
1848};
1849
1850struct v3dv_descriptor_template_entry {
1851   /* The type of descriptor in this entry */
1852   VkDescriptorType type;
1853
1854   /* Binding in the descriptor set */
1855   uint32_t binding;
1856
1857   /* Offset at which to write into the descriptor set binding */
1858   uint32_t array_element;
1859
1860   /* Number of elements to write into the descriptor set binding */
1861   uint32_t array_count;
1862
1863   /* Offset into the user provided data */
1864   size_t offset;
1865
1866   /* Stride between elements into the user provided data */
1867   size_t stride;
1868};
1869
1870struct v3dv_descriptor_update_template {
1871   struct vk_object_base base;
1872
1873   VkPipelineBindPoint bind_point;
1874
1875   /* The descriptor set this template corresponds to. This value is only
1876    * valid if the template was created with the templateType
1877    * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
1878    */
1879   uint8_t set;
1880
1881   /* Number of entries in this template */
1882   uint32_t entry_count;
1883
1884   /* Entries of the template */
1885   struct v3dv_descriptor_template_entry entries[0];
1886};
1887
1888
1889/* We keep two special values for the sampler idx that represents exactly when a
1890 * sampler is not needed/provided. The main use is that even if we don't have
1891 * sampler, we still need to do the output unpacking (through
1892 * nir_lower_tex). The easier way to do this is to add those special "no
1893 * sampler" in the sampler_map, and then use the proper unpacking for that
1894 * case.
1895 *
1896 * We have one when we want a 16bit output size, and other when we want a
1897 * 32bit output size. We use the info coming from the RelaxedPrecision
1898 * decoration to decide between one and the other.
1899 */
1900#define V3DV_NO_SAMPLER_16BIT_IDX 0
1901#define V3DV_NO_SAMPLER_32BIT_IDX 1
1902
1903/*
1904 * Following two methods are using on the combined to/from texture/sampler
1905 * indices maps at v3dv_pipeline.
1906 */
1907static inline uint32_t
1908v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
1909                                        uint32_t sampler_index)
1910{
1911   return texture_index << 24 | sampler_index;
1912}
1913
1914static inline void
1915v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
1916                                        uint32_t *texture_index,
1917                                        uint32_t *sampler_index)
1918{
1919   uint32_t texture = combined_index_key >> 24;
1920   uint32_t sampler = combined_index_key & 0xffffff;
1921
1922   if (texture_index)
1923      *texture_index = texture;
1924
1925   if (sampler_index)
1926      *sampler_index = sampler;
1927}
1928
1929struct v3dv_descriptor_maps {
1930   struct v3dv_descriptor_map ubo_map;
1931   struct v3dv_descriptor_map ssbo_map;
1932   struct v3dv_descriptor_map sampler_map;
1933   struct v3dv_descriptor_map texture_map;
1934};
1935
1936/* The structure represents data shared between different objects, like the
1937 * pipeline and the pipeline cache, so we ref count it to know when it should
1938 * be freed.
1939 */
1940struct v3dv_pipeline_shared_data {
1941   uint32_t ref_cnt;
1942
1943   unsigned char sha1_key[20];
1944
1945   struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
1946   struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
1947
1948   struct v3dv_bo *assembly_bo;
1949};
1950
1951struct v3dv_pipeline_executable_data {
1952   enum broadcom_shader_stage stage;
1953   char *nir_str;
1954   char *qpu_str;
1955};
1956
1957struct v3dv_pipeline {
1958   struct vk_object_base base;
1959
1960   struct v3dv_device *device;
1961
1962   VkShaderStageFlags active_stages;
1963
1964   struct v3dv_render_pass *pass;
1965   struct v3dv_subpass *subpass;
1966
1967   /* Note: We can't use just a MESA_SHADER_STAGES array because we also need
1968    * to track binning shaders. Note these will be freed once the pipeline
1969    * has been compiled.
1970    */
1971   struct v3dv_pipeline_stage *vs;
1972   struct v3dv_pipeline_stage *vs_bin;
1973   struct v3dv_pipeline_stage *gs;
1974   struct v3dv_pipeline_stage *gs_bin;
1975   struct v3dv_pipeline_stage *fs;
1976   struct v3dv_pipeline_stage *cs;
1977
1978   /* Flags for whether optional pipeline stages are present, for convenience */
1979   bool has_gs;
1980
1981   /* Whether any stage in this pipeline uses VK_KHR_buffer_device_address */
1982   bool uses_buffer_device_address;
1983
1984   /* Spilling memory requirements */
1985   struct {
1986      struct v3dv_bo *bo;
1987      uint32_t size_per_thread;
1988   } spill;
1989
1990   struct v3dv_dynamic_state dynamic_state;
1991
1992   struct v3dv_pipeline_layout *layout;
1993
1994   /* Whether this pipeline enables depth writes */
1995   bool z_updates_enable;
1996
1997   enum v3dv_ez_state ez_state;
1998
1999   /* If ez_state is V3D_EZ_DISABLED, if the reason for disabling is that the
2000    * pipeline selects an incompatible depth test function.
2001    */
2002   bool incompatible_ez_test;
2003
2004   bool msaa;
2005   bool sample_rate_shading;
2006   uint32_t sample_mask;
2007
2008   bool primitive_restart;
2009
2010   /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
2011    * array with such binding
2012    */
2013   struct v3dv_pipeline_vertex_binding {
2014      uint32_t stride;
2015      uint32_t instance_divisor;
2016   } vb[MAX_VBS];
2017   uint32_t vb_count;
2018
2019   /* Note that a lot of info from VkVertexInputAttributeDescription is
2020    * already prepacked, so here we are only storing those that need recheck
2021    * later. The array must be indexed by driver location, since that is the
2022    * order in which we need to emit the attributes.
2023    */
2024   struct v3dv_pipeline_vertex_attrib {
2025      uint32_t binding;
2026      uint32_t offset;
2027      VkFormat vk_format;
2028   } va[MAX_VERTEX_ATTRIBS];
2029   uint32_t va_count;
2030
2031   enum pipe_prim_type topology;
2032
2033   struct v3dv_pipeline_shared_data *shared_data;
2034
2035   /* It is the combined stages sha1, layout sha1, plus the pipeline key sha1. */
2036   unsigned char sha1[20];
2037
2038   /* In general we can reuse v3dv_device->default_attribute_float, so note
2039    * that the following can be NULL.
2040    *
2041    * FIXME: the content of this BO will be small, so it could be improved to
2042    * be uploaded to a common BO. But as in most cases it will be NULL, it is
2043    * not a priority.
2044    */
2045   struct v3dv_bo *default_attribute_values;
2046
2047   struct vpm_config vpm_cfg;
2048   struct vpm_config vpm_cfg_bin;
2049
2050   /* If the pipeline should emit any of the stencil configuration packets */
2051   bool emit_stencil_cfg[2];
2052
2053   /* Blend state */
2054   struct {
2055      /* Per-RT bit mask with blend enables */
2056      uint8_t enables;
2057      /* Per-RT prepacked blend config packets */
2058      uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
2059      /* Flag indicating whether the blend factors in use require
2060       * color constants.
2061       */
2062      bool needs_color_constants;
2063      /* Mask with enabled color channels for each RT (4 bits per RT) */
2064      uint32_t color_write_masks;
2065   } blend;
2066
2067   /* Depth bias */
2068   struct {
2069      bool enabled;
2070      bool is_z16;
2071   } depth_bias;
2072
2073   struct {
2074      void *mem_ctx;
2075      bool has_data;
2076      struct util_dynarray data; /* Array of v3dv_pipeline_executable_data */
2077   } executables;
2078
2079   /* Packets prepacked during pipeline creation
2080    */
2081   uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
2082   uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
2083   uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
2084   uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
2085                        MAX_VERTEX_ATTRIBS];
2086   uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
2087};
2088
2089static inline VkPipelineBindPoint
2090v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
2091{
2092   assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
2093          !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
2094   return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
2095      VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
2096}
2097
2098static inline struct v3dv_descriptor_state*
2099v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
2100                                     struct v3dv_pipeline *pipeline)
2101{
2102   if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
2103      return &cmd_buffer->state.compute.descriptor_state;
2104   else
2105      return &cmd_buffer->state.gfx.descriptor_state;
2106}
2107
2108const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
2109
2110uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
2111uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
2112
2113#define v3dv_debug_ignored_stype(sType) \
2114   mesa_logd("%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
2115
2116const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
2117uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
2118const struct v3dv_format *
2119v3dv_get_compatible_tfu_format(struct v3dv_device *device,
2120                               uint32_t bpp, VkFormat *out_vk_format);
2121bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
2122                                          VkFormat vk_format,
2123                                          VkFormatFeatureFlags2 features);
2124
2125struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2126                                         struct v3dv_pipeline *pipeline,
2127                                         struct v3dv_shader_variant *variant);
2128
2129struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
2130                                                    struct v3dv_pipeline *pipeline,
2131                                                    struct v3dv_shader_variant *variant,
2132                                                    uint32_t **wg_count_offsets);
2133
2134struct v3dv_shader_variant *
2135v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
2136                        struct v3dv_pipeline_cache *cache,
2137                        struct v3d_key *key,
2138                        size_t key_size,
2139                        const VkAllocationCallbacks *pAllocator,
2140                        VkResult *out_vk_result);
2141
2142struct v3dv_shader_variant *
2143v3dv_shader_variant_create(struct v3dv_device *device,
2144                           enum broadcom_shader_stage stage,
2145                           struct v3d_prog_data *prog_data,
2146                           uint32_t prog_data_size,
2147                           uint32_t assembly_offset,
2148                           uint64_t *qpu_insts,
2149                           uint32_t qpu_insts_size,
2150                           VkResult *out_vk_result);
2151
2152void
2153v3dv_shader_variant_destroy(struct v3dv_device *device,
2154                            struct v3dv_shader_variant *variant);
2155
2156static inline void
2157v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2158{
2159   assert(shared_data && shared_data->ref_cnt >= 1);
2160   p_atomic_inc(&shared_data->ref_cnt);
2161}
2162
2163void
2164v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2165                                  struct v3dv_pipeline_shared_data *shared_data);
2166
2167static inline void
2168v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2169                                struct v3dv_pipeline_shared_data *shared_data)
2170{
2171   assert(shared_data && shared_data->ref_cnt >= 1);
2172   if (p_atomic_dec_zero(&shared_data->ref_cnt))
2173      v3dv_pipeline_shared_data_destroy(device, shared_data);
2174}
2175
2176struct v3dv_descriptor *
2177v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2178                                   struct v3dv_descriptor_map *map,
2179                                   struct v3dv_pipeline_layout *pipeline_layout,
2180                                   uint32_t index,
2181                                   uint32_t *dynamic_offset);
2182
2183struct v3dv_cl_reloc
2184v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2185                                      struct v3dv_descriptor_state *descriptor_state,
2186                                      struct v3dv_descriptor_map *map,
2187                                      struct v3dv_pipeline_layout *pipeline_layout,
2188                                      uint32_t index,
2189                                      VkDescriptorType *out_type);
2190
2191const struct v3dv_sampler *
2192v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2193                                struct v3dv_descriptor_map *map,
2194                                struct v3dv_pipeline_layout *pipeline_layout,
2195                                uint32_t index);
2196
2197struct v3dv_cl_reloc
2198v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2199                                      struct v3dv_descriptor_state *descriptor_state,
2200                                      struct v3dv_descriptor_map *map,
2201                                      struct v3dv_pipeline_layout *pipeline_layout,
2202                                      uint32_t index);
2203
2204struct v3dv_cl_reloc
2205v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2206                                             struct v3dv_descriptor_state *descriptor_state,
2207                                             struct v3dv_descriptor_map *map,
2208                                             struct v3dv_pipeline_layout *pipeline_layout,
2209                                             uint32_t index);
2210
2211struct v3dv_bo*
2212v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2213                                   struct v3dv_descriptor_map *map,
2214                                   struct v3dv_pipeline_layout *pipeline_layout,
2215                                   uint32_t index);
2216
2217static inline const struct v3dv_sampler *
2218v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2219                        const struct v3dv_descriptor_set_binding_layout *binding)
2220{
2221   assert(binding->immutable_samplers_offset);
2222   return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2223}
2224
2225void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2226                              struct v3dv_device *device,
2227                              VkPipelineCacheCreateFlags,
2228                              bool cache_enabled);
2229
2230void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2231
2232void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2233                                    struct v3dv_pipeline_cache *cache,
2234                                    nir_shader *nir,
2235                                    unsigned char sha1_key[20]);
2236
2237nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2238                                               struct v3dv_pipeline_cache *cache,
2239                                               const nir_shader_compiler_options *nir_options,
2240                                               unsigned char sha1_key[20]);
2241
2242struct v3dv_pipeline_shared_data *
2243v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2244                                        unsigned char sha1_key[20],
2245                                        bool *cache_hit);
2246
2247void
2248v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2249                                    struct v3dv_pipeline_cache *cache);
2250
2251struct v3dv_bo *
2252v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2253                                              struct v3dv_pipeline *pipeline);
2254
2255#define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle)			\
2256   VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2257
2258VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2259                       VK_OBJECT_TYPE_COMMAND_BUFFER)
2260VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2261VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2262                       VK_OBJECT_TYPE_INSTANCE)
2263VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2264                       VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2265VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2266
2267VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2268                               VK_OBJECT_TYPE_BUFFER)
2269VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2270                               VK_OBJECT_TYPE_BUFFER_VIEW)
2271VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
2272                               VK_OBJECT_TYPE_DEVICE_MEMORY)
2273VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2274                               VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2275VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2276                               VK_OBJECT_TYPE_DESCRIPTOR_SET)
2277VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2278                               VkDescriptorSetLayout,
2279                               VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2280VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
2281                               VkDescriptorUpdateTemplate,
2282                               VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2283VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2284VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2285                               VK_OBJECT_TYPE_FRAMEBUFFER)
2286VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2287                               VK_OBJECT_TYPE_IMAGE)
2288VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2289                               VK_OBJECT_TYPE_IMAGE_VIEW)
2290VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2291                               VK_OBJECT_TYPE_PIPELINE)
2292VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2293                               VK_OBJECT_TYPE_PIPELINE_CACHE)
2294VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2295                               VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2296VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2297                               VK_OBJECT_TYPE_QUERY_POOL)
2298VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2299                               VK_OBJECT_TYPE_RENDER_PASS)
2300VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2301                               VK_OBJECT_TYPE_SAMPLER)
2302
2303static inline int
2304v3dv_ioctl(int fd, unsigned long request, void *arg)
2305{
2306   if (using_v3d_simulator)
2307      return v3d_simulator_ioctl(fd, request, arg);
2308   else
2309      return drmIoctl(fd, request, arg);
2310}
2311
2312/* Flags OOM conditions in command buffer state.
2313 *
2314 * Note: notice that no-op jobs don't have a command buffer reference.
2315 */
2316static inline void
2317v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2318{
2319   if (cmd_buffer) {
2320      cmd_buffer->state.oom = true;
2321   } else {
2322      assert(job);
2323      if (job->cmd_buffer)
2324         job->cmd_buffer->state.oom = true;
2325   }
2326}
2327
2328#define v3dv_return_if_oom(_cmd_buffer, _job) do {                  \
2329   const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer;        \
2330   if (__cmd_buffer && __cmd_buffer->state.oom)                     \
2331      return;                                                       \
2332   const struct v3dv_job *__job = _job;                             \
2333   if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom)  \
2334      return;                                                       \
2335} while(0)                                                          \
2336
2337static inline uint32_t
2338u64_hash(const void *key)
2339{
2340   return _mesa_hash_data(key, sizeof(uint64_t));
2341}
2342
2343static inline bool
2344u64_compare(const void *key1, const void *key2)
2345{
2346   return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2347}
2348
2349/* Helper to call hw ver speficic functions */
2350#define v3dv_X(device, thing) ({                      \
2351   __typeof(&v3d42_##thing) v3d_X_thing;              \
2352   switch (device->devinfo.ver) {                     \
2353   case 42:                                           \
2354      v3d_X_thing = &v3d42_##thing;                   \
2355      break;                                          \
2356   default:                                           \
2357      unreachable("Unsupported hardware generation"); \
2358   }                                                  \
2359   v3d_X_thing;                                       \
2360})
2361
2362
2363/* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2364 * define v3dX for each version supported, because when we compile code that
2365 * is not version-specific, all version-specific macros need to be already
2366 * defined.
2367 */
2368#ifdef v3dX
2369#  include "v3dvx_private.h"
2370#else
2371#  define v3dX(x) v3d42_##x
2372#  include "v3dvx_private.h"
2373#  undef v3dX
2374#endif
2375
2376#ifdef ANDROID
2377VkResult
2378v3dv_gralloc_info(struct v3dv_device *device,
2379                  const VkNativeBufferANDROID *gralloc_info,
2380                  int *out_dmabuf,
2381                  int *out_stride,
2382                  int *out_size,
2383                  uint64_t *out_modifier);
2384
2385VkResult
2386v3dv_import_native_buffer_fd(VkDevice device_h,
2387                             int dma_buf,
2388                             const VkAllocationCallbacks *alloc,
2389                             VkImage image_h);
2390#endif /* ANDROID */
2391
2392#endif /* V3DV_PRIVATE_H */
2393