1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28#ifndef RADV_PRIVATE_H
29#define RADV_PRIVATE_H
30
31#include <assert.h>
32#include <stdbool.h>
33#include <stdint.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#ifdef HAVE_VALGRIND
38#include <memcheck.h>
39#include <valgrind.h>
40#define VG(x) x
41#else
42#define VG(x) ((void)0)
43#endif
44
45#include "c11/threads.h"
46#ifndef _WIN32
47#include <amdgpu.h>
48#include <xf86drm.h>
49#endif
50#include "compiler/shader_enums.h"
51#include "util/bitscan.h"
52#include "util/list.h"
53#include "util/macros.h"
54#include "util/rwlock.h"
55#include "util/xmlconfig.h"
56#include "vk_alloc.h"
57#include "vk_buffer.h"
58#include "vk_command_buffer.h"
59#include "vk_command_pool.h"
60#include "vk_debug_report.h"
61#include "vk_device.h"
62#include "vk_format.h"
63#include "vk_instance.h"
64#include "vk_log.h"
65#include "vk_physical_device.h"
66#include "vk_shader_module.h"
67#include "vk_queue.h"
68#include "vk_util.h"
69#include "vk_image.h"
70#include "vk_framebuffer.h"
71
72#include "ac_binary.h"
73#include "ac_gpu_info.h"
74#include "ac_shader_util.h"
75#include "ac_spm.h"
76#include "ac_sqtt.h"
77#include "ac_surface.h"
78#include "radv_constants.h"
79#include "radv_descriptor_set.h"
80#include "radv_radeon_winsys.h"
81#include "radv_shader.h"
82#include "radv_shader_args.h"
83#include "sid.h"
84
85#include "radix_sort/radix_sort_vk_devaddr.h"
86
87/* Pre-declarations needed for WSI entrypoints */
88struct wl_surface;
89struct wl_display;
90typedef struct xcb_connection_t xcb_connection_t;
91typedef uint32_t xcb_visualid_t;
92typedef uint32_t xcb_window_t;
93
94#include <vulkan/vk_android_native_buffer.h>
95#include <vulkan/vk_icd.h>
96#include <vulkan/vulkan.h>
97#include <vulkan/vulkan_android.h>
98
99#include "radv_entrypoints.h"
100
101#include "wsi_common.h"
102
103#ifdef __cplusplus
104extern "C"
105{
106#endif
107
108/* Helper to determine if we should compile
109 * any of the Android AHB support.
110 *
111 * To actually enable the ext we also need
112 * the necessary kernel support.
113 */
114#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
115#define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1
116#include <vndk/hardware_buffer.h>
117#else
118#define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
119#endif
120
121#ifdef _WIN32
122#define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
123#else
124#define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
125#endif
126
127#ifdef _WIN32
128#define radv_printflike(a, b)
129#else
130#define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
131#endif
132
133static inline uint32_t
134align_u32(uint32_t v, uint32_t a)
135{
136   assert(a != 0 && a == (a & -a));
137   return (v + a - 1) & ~(a - 1);
138}
139
140static inline uint32_t
141align_u32_npot(uint32_t v, uint32_t a)
142{
143   return (v + a - 1) / a * a;
144}
145
146static inline uint64_t
147align_u64(uint64_t v, uint64_t a)
148{
149   assert(a != 0 && a == (a & -a));
150   return (v + a - 1) & ~(a - 1);
151}
152
153static inline int32_t
154align_i32(int32_t v, int32_t a)
155{
156   assert(a != 0 && a == (a & -a));
157   return (v + a - 1) & ~(a - 1);
158}
159
160/** Alignment must be a power of 2. */
161static inline bool
162radv_is_aligned(uintmax_t n, uintmax_t a)
163{
164   assert(a == (a & -a));
165   return (n & (a - 1)) == 0;
166}
167
168static inline uint32_t
169round_up_u32(uint32_t v, uint32_t a)
170{
171   return (v + a - 1) / a;
172}
173
174static inline uint64_t
175round_up_u64(uint64_t v, uint64_t a)
176{
177   return (v + a - 1) / a;
178}
179
180static inline uint32_t
181radv_minify(uint32_t n, uint32_t levels)
182{
183   if (unlikely(n == 0))
184      return 0;
185   else
186      return MAX2(n >> levels, 1);
187}
188static inline float
189radv_clamp_f(float f, float min, float max)
190{
191   assert(min < max);
192
193   if (f > max)
194      return max;
195   else if (f < min)
196      return min;
197   else
198      return f;
199}
200
201static inline bool
202radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
203{
204   if (*inout_mask & clear_mask) {
205      *inout_mask &= ~clear_mask;
206      return true;
207   } else {
208      return false;
209   }
210}
211
212static inline int
213radv_float_to_sfixed(float value, unsigned frac_bits)
214{
215   return value * (1 << frac_bits);
216}
217
218static inline unsigned int
219radv_float_to_ufixed(float value, unsigned frac_bits)
220{
221   return value * (1 << frac_bits);
222}
223
224/* Whenever we generate an error, pass it through this function. Useful for
225 * debugging, where we can break on it. Only call at error site, not when
226 * propagating errors. Might be useful to plug in a stack trace here.
227 */
228
229struct radv_image_view;
230struct radv_instance;
231
232/* A non-fatal assert.  Useful for debugging. */
233#ifdef NDEBUG
234#define radv_assert(x)                                                                             \
235   do {                                                                                            \
236   } while (0)
237#else
238#define radv_assert(x)                                                                             \
239   do {                                                                                            \
240      if (unlikely(!(x)))                                                                          \
241         fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x);                            \
242   } while (0)
243#endif
244
245int radv_get_instance_entrypoint_index(const char *name);
246int radv_get_device_entrypoint_index(const char *name);
247int radv_get_physical_device_entrypoint_index(const char *name);
248
249const char *radv_get_instance_entry_name(int index);
250const char *radv_get_physical_device_entry_name(int index);
251const char *radv_get_device_entry_name(int index);
252
253/* queue types */
254enum radv_queue_family {
255   RADV_QUEUE_GENERAL,
256   RADV_QUEUE_COMPUTE,
257   RADV_QUEUE_TRANSFER,
258   RADV_MAX_QUEUE_FAMILIES,
259   RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES,
260   RADV_QUEUE_IGNORED,
261};
262
263struct radv_perfcounter_desc;
264
265struct radv_physical_device {
266   struct vk_physical_device vk;
267
268   /* Link in radv_instance::physical_devices */
269   struct list_head link;
270
271   struct radv_instance *instance;
272
273   struct radeon_winsys *ws;
274   struct radeon_info rad_info;
275   char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
276   char marketing_name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
277   uint8_t driver_uuid[VK_UUID_SIZE];
278   uint8_t device_uuid[VK_UUID_SIZE];
279   uint8_t cache_uuid[VK_UUID_SIZE];
280
281   int local_fd;
282   int master_fd;
283   struct wsi_device wsi_device;
284
285   bool out_of_order_rast_allowed;
286
287   /* Whether DCC should be enabled for MSAA textures. */
288   bool dcc_msaa_allowed;
289
290   /* Whether to enable NGG. */
291   bool use_ngg;
292
293   /* Whether to enable NGG culling. */
294   bool use_ngg_culling;
295
296   /* Whether to enable NGG streamout. */
297   bool use_ngg_streamout;
298
299   /* Number of threads per wave. */
300   uint8_t ps_wave_size;
301   uint8_t cs_wave_size;
302   uint8_t ge_wave_size;
303   uint8_t rt_wave_size;
304
305   /* Whether to use the LLVM compiler backend */
306   bool use_llvm;
307
308   /* Whether to emulate ETC2 image support on HW without support. */
309   bool emulate_etc2;
310
311   /* This is the drivers on-disk cache used as a fallback as opposed to
312    * the pipeline cache defined by apps.
313    */
314   struct disk_cache *disk_cache;
315
316   VkPhysicalDeviceMemoryProperties memory_properties;
317   enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
318   enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
319   unsigned heaps;
320
321   /* Bitmask of memory types that use the 32-bit address space. */
322   uint32_t memory_types_32bit;
323
324#ifndef _WIN32
325   int available_nodes;
326   drmPciBusInfo bus_info;
327
328   dev_t primary_devid;
329   dev_t render_devid;
330#endif
331
332   nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES];
333
334   enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES];
335   uint32_t num_queues;
336
337   uint32_t gs_table_depth;
338
339   struct ac_hs_info hs;
340   struct ac_task_info task_info;
341
342   /* Performance counters. */
343   struct ac_perfcounters ac_perfcounters;
344
345   uint32_t num_perfcounters;
346   struct radv_perfcounter_desc *perfcounters;
347};
348
349struct radv_instance {
350   struct vk_instance vk;
351
352   VkAllocationCallbacks alloc;
353
354   uint64_t debug_flags;
355   uint64_t perftest_flags;
356
357   bool physical_devices_enumerated;
358   struct list_head physical_devices;
359
360   struct driOptionCache dri_options;
361   struct driOptionCache available_dri_options;
362
363   /**
364    * Workarounds for game bugs.
365    */
366   bool enable_mrt_output_nan_fixup;
367   bool disable_tc_compat_htile_in_general;
368   bool disable_shrink_image_store;
369   bool absolute_depth_bias;
370   bool disable_aniso_single_level;
371   bool zero_vram;
372   bool disable_sinking_load_input_fs;
373   bool flush_before_query_copy;
374};
375
376VkResult radv_init_wsi(struct radv_physical_device *physical_device);
377void radv_finish_wsi(struct radv_physical_device *physical_device);
378
379struct cache_entry;
380
381struct radv_pipeline_cache {
382   struct vk_object_base base;
383   struct radv_device *device;
384   mtx_t mutex;
385   VkPipelineCacheCreateFlags flags;
386
387   uint32_t total_size;
388   uint32_t table_size;
389   uint32_t kernel_count;
390   struct cache_entry **hash_table;
391   bool modified;
392
393   VkAllocationCallbacks alloc;
394};
395
396struct radv_shader_binary;
397struct radv_shader;
398struct radv_pipeline_shader_stack_size;
399
400void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device);
401void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
402bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size);
403
404bool radv_create_shaders_from_pipeline_cache(
405   struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
406   struct radv_pipeline *pipeline, struct radv_pipeline_shader_stack_size **stack_sizes,
407   uint32_t *num_stack_sizes, bool *found_in_application_cache);
408
409void radv_pipeline_cache_insert_shaders(
410   struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
411   struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries,
412   const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes);
413
414VkResult radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
415                             struct radv_shader_binary **binaries,
416                             struct radv_shader_binary *gs_copy_binary);
417
418enum radv_blit_ds_layout {
419   RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
420   RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
421   RADV_BLIT_DS_LAYOUT_COUNT,
422};
423
424static inline enum radv_blit_ds_layout
425radv_meta_blit_ds_to_type(VkImageLayout layout)
426{
427   return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE
428                                              : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
429}
430
431static inline VkImageLayout
432radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
433{
434   return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
435                                                       : VK_IMAGE_LAYOUT_GENERAL;
436}
437
438enum radv_meta_dst_layout {
439   RADV_META_DST_LAYOUT_GENERAL,
440   RADV_META_DST_LAYOUT_OPTIMAL,
441   RADV_META_DST_LAYOUT_COUNT,
442};
443
444static inline enum radv_meta_dst_layout
445radv_meta_dst_layout_from_layout(VkImageLayout layout)
446{
447   return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL
448                                              : RADV_META_DST_LAYOUT_OPTIMAL;
449}
450
451static inline VkImageLayout
452radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
453{
454   return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
455                                                 : VK_IMAGE_LAYOUT_GENERAL;
456}
457
458struct radv_meta_state {
459   VkAllocationCallbacks alloc;
460
461   struct radv_pipeline_cache cache;
462
463   /*
464    * For on-demand pipeline creation, makes sure that
465    * only one thread tries to build a pipeline at the same time.
466    */
467   mtx_t mtx;
468
469   /**
470    * Use array element `i` for images with `2^i` samples.
471    */
472   struct {
473      VkPipeline color_pipelines[NUM_META_FS_KEYS];
474   } color_clear[MAX_SAMPLES_LOG2][MAX_RTS];
475
476   struct {
477      VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
478      VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
479      VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
480
481      VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
482      VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
483      VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
484   } ds_clear[MAX_SAMPLES_LOG2];
485
486   VkPipelineLayout clear_color_p_layout;
487   VkPipelineLayout clear_depth_p_layout;
488   VkPipelineLayout clear_depth_unrestricted_p_layout;
489
490   /* Optimized compute fast HTILE clear for stencil or depth only. */
491   VkPipeline clear_htile_mask_pipeline;
492   VkPipelineLayout clear_htile_mask_p_layout;
493   VkDescriptorSetLayout clear_htile_mask_ds_layout;
494
495   /* Copy VRS into HTILE. */
496   VkPipeline copy_vrs_htile_pipeline;
497   VkPipelineLayout copy_vrs_htile_p_layout;
498   VkDescriptorSetLayout copy_vrs_htile_ds_layout;
499
500   /* Clear DCC with comp-to-single. */
501   VkPipeline clear_dcc_comp_to_single_pipeline[2]; /* 0: 1x, 1: 2x/4x/8x */
502   VkPipelineLayout clear_dcc_comp_to_single_p_layout;
503   VkDescriptorSetLayout clear_dcc_comp_to_single_ds_layout;
504
505   struct {
506      /** Pipeline that blits from a 1D image. */
507      VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
508
509      /** Pipeline that blits from a 2D image. */
510      VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
511
512      /** Pipeline that blits from a 3D image. */
513      VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
514
515      VkPipeline depth_only_1d_pipeline;
516      VkPipeline depth_only_2d_pipeline;
517      VkPipeline depth_only_3d_pipeline;
518
519      VkPipeline stencil_only_1d_pipeline;
520      VkPipeline stencil_only_2d_pipeline;
521      VkPipeline stencil_only_3d_pipeline;
522      VkPipelineLayout pipeline_layout;
523      VkDescriptorSetLayout ds_layout;
524   } blit;
525
526   struct {
527      VkPipelineLayout p_layouts[5];
528      VkDescriptorSetLayout ds_layouts[5];
529      VkPipeline pipelines[5][NUM_META_FS_KEYS];
530
531      VkPipeline depth_only_pipeline[5];
532
533      VkPipeline stencil_only_pipeline[5];
534   } blit2d[MAX_SAMPLES_LOG2];
535
536   struct {
537      VkPipelineLayout img_p_layout;
538      VkDescriptorSetLayout img_ds_layout;
539      VkPipeline pipeline;
540      VkPipeline pipeline_3d;
541   } itob;
542   struct {
543      VkPipelineLayout img_p_layout;
544      VkDescriptorSetLayout img_ds_layout;
545      VkPipeline pipeline;
546      VkPipeline pipeline_3d;
547   } btoi;
548   struct {
549      VkPipelineLayout img_p_layout;
550      VkDescriptorSetLayout img_ds_layout;
551      VkPipeline pipeline;
552   } btoi_r32g32b32;
553   struct {
554      VkPipelineLayout img_p_layout;
555      VkDescriptorSetLayout img_ds_layout;
556      VkPipeline pipeline[MAX_SAMPLES_LOG2];
557      VkPipeline pipeline_3d;
558   } itoi;
559   struct {
560      VkPipelineLayout img_p_layout;
561      VkDescriptorSetLayout img_ds_layout;
562      VkPipeline pipeline;
563   } itoi_r32g32b32;
564   struct {
565      VkPipelineLayout img_p_layout;
566      VkDescriptorSetLayout img_ds_layout;
567      VkPipeline pipeline[MAX_SAMPLES_LOG2];
568      VkPipeline pipeline_3d;
569   } cleari;
570   struct {
571      VkPipelineLayout img_p_layout;
572      VkDescriptorSetLayout img_ds_layout;
573      VkPipeline pipeline;
574   } cleari_r32g32b32;
575   struct {
576      VkPipelineLayout p_layout;
577      VkDescriptorSetLayout ds_layout;
578      VkPipeline pipeline[MAX_SAMPLES_LOG2];
579   } fmask_copy;
580
581   struct {
582      VkPipelineLayout p_layout;
583      VkPipeline pipeline[NUM_META_FS_KEYS];
584   } resolve;
585
586   struct {
587      VkDescriptorSetLayout ds_layout;
588      VkPipelineLayout p_layout;
589      struct {
590         VkPipeline pipeline;
591         VkPipeline i_pipeline;
592         VkPipeline srgb_pipeline;
593      } rc[MAX_SAMPLES_LOG2];
594
595      VkPipeline depth_zero_pipeline;
596      struct {
597         VkPipeline average_pipeline;
598         VkPipeline max_pipeline;
599         VkPipeline min_pipeline;
600      } depth[MAX_SAMPLES_LOG2];
601
602      VkPipeline stencil_zero_pipeline;
603      struct {
604         VkPipeline max_pipeline;
605         VkPipeline min_pipeline;
606      } stencil[MAX_SAMPLES_LOG2];
607   } resolve_compute;
608
609   struct {
610      VkDescriptorSetLayout ds_layout;
611      VkPipelineLayout p_layout;
612
613      struct {
614         VkPipeline pipeline[NUM_META_FS_KEYS];
615      } rc[MAX_SAMPLES_LOG2];
616
617      VkPipeline depth_zero_pipeline;
618      struct {
619         VkPipeline average_pipeline;
620         VkPipeline max_pipeline;
621         VkPipeline min_pipeline;
622      } depth[MAX_SAMPLES_LOG2];
623
624      VkPipeline stencil_zero_pipeline;
625      struct {
626         VkPipeline max_pipeline;
627         VkPipeline min_pipeline;
628      } stencil[MAX_SAMPLES_LOG2];
629   } resolve_fragment;
630
631   struct {
632      VkPipelineLayout p_layout;
633      VkPipeline decompress_pipeline;
634      VkPipeline resummarize_pipeline;
635   } depth_decomp[MAX_SAMPLES_LOG2];
636
637   VkDescriptorSetLayout expand_depth_stencil_compute_ds_layout;
638   VkPipelineLayout expand_depth_stencil_compute_p_layout;
639   VkPipeline expand_depth_stencil_compute_pipeline;
640
641   struct {
642      VkPipelineLayout p_layout;
643      VkPipeline cmask_eliminate_pipeline;
644      VkPipeline fmask_decompress_pipeline;
645      VkPipeline dcc_decompress_pipeline;
646
647      VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
648      VkPipelineLayout dcc_decompress_compute_p_layout;
649      VkPipeline dcc_decompress_compute_pipeline;
650   } fast_clear_flush;
651
652   struct {
653      VkPipelineLayout fill_p_layout;
654      VkPipelineLayout copy_p_layout;
655      VkPipeline fill_pipeline;
656      VkPipeline copy_pipeline;
657   } buffer;
658
659   struct {
660      VkDescriptorSetLayout ds_layout;
661      VkPipelineLayout p_layout;
662      VkPipeline occlusion_query_pipeline;
663      VkPipeline pipeline_statistics_query_pipeline;
664      VkPipeline tfb_query_pipeline;
665      VkPipeline timestamp_query_pipeline;
666      VkPipeline pg_query_pipeline;
667   } query;
668
669   struct {
670      VkDescriptorSetLayout ds_layout;
671      VkPipelineLayout p_layout;
672      VkPipeline pipeline[MAX_SAMPLES_LOG2];
673   } fmask_expand;
674
675   struct {
676      VkDescriptorSetLayout ds_layout;
677      VkPipelineLayout p_layout;
678      VkPipeline pipeline[32];
679   } dcc_retile;
680
681   struct {
682      VkPipelineLayout leaf_p_layout;
683      VkPipeline leaf_pipeline;
684      VkPipelineLayout morton_p_layout;
685      VkPipeline morton_pipeline;
686      VkPipelineLayout internal_p_layout;
687      VkPipeline internal_pipeline;
688      VkPipelineLayout copy_p_layout;
689      VkPipeline copy_pipeline;
690
691      struct radix_sort_vk *radix_sort;
692      struct radix_sort_vk_sort_devaddr_info radix_sort_info;
693   } accel_struct_build;
694
695   struct {
696      VkDescriptorSetLayout ds_layout;
697      VkPipelineLayout p_layout;
698      VkPipeline pipeline;
699   } etc_decode;
700
701   struct {
702      VkDescriptorSetLayout ds_layout;
703      VkPipelineLayout p_layout;
704      VkPipeline pipeline;
705   } dgc_prepare;
706};
707
708#define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
709
710struct radv_deferred_queue_submission;
711
712static inline enum radv_queue_family
713vk_queue_to_radv(const struct radv_physical_device *phys_dev, int queue_family_index)
714{
715   if (queue_family_index == VK_QUEUE_FAMILY_EXTERNAL ||
716       queue_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
717      return RADV_QUEUE_FOREIGN;
718   if (queue_family_index == VK_QUEUE_FAMILY_IGNORED)
719      return RADV_QUEUE_IGNORED;
720
721   assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES);
722   return phys_dev->vk_queue_to_radv[queue_family_index];
723}
724
725enum amd_ip_type radv_queue_family_to_ring(struct radv_physical_device *physical_device,
726                                         enum radv_queue_family f);
727
728struct radv_queue_ring_info {
729   uint32_t scratch_size_per_wave;
730   uint32_t scratch_waves;
731   uint32_t compute_scratch_size_per_wave;
732   uint32_t compute_scratch_waves;
733   uint32_t esgs_ring_size;
734   uint32_t gsvs_ring_size;
735   bool tess_rings;
736   bool task_rings;
737   bool mesh_scratch_ring;
738   bool gds;
739   bool gds_oa;
740   bool sample_positions;
741};
742
743struct radv_queue_state {
744   enum radv_queue_family qf;
745   struct radv_queue_ring_info ring_info;
746
747   struct radeon_winsys_bo *scratch_bo;
748   struct radeon_winsys_bo *descriptor_bo;
749   struct radeon_winsys_bo *compute_scratch_bo;
750   struct radeon_winsys_bo *esgs_ring_bo;
751   struct radeon_winsys_bo *gsvs_ring_bo;
752   struct radeon_winsys_bo *tess_rings_bo;
753   struct radeon_winsys_bo *task_rings_bo;
754   struct radeon_winsys_bo *mesh_scratch_ring_bo;
755   struct radeon_winsys_bo *gds_bo;
756   struct radeon_winsys_bo *gds_oa_bo;
757
758   struct radeon_cmdbuf *initial_preamble_cs;
759   struct radeon_cmdbuf *initial_full_flush_preamble_cs;
760   struct radeon_cmdbuf *continue_preamble_cs;
761};
762
763struct radv_queue {
764   struct vk_queue vk;
765   struct radv_device *device;
766   struct radeon_winsys_ctx *hw_ctx;
767   enum radeon_ctx_priority priority;
768   struct radv_queue_state state;
769   struct radv_queue_state *ace_internal_state;
770};
771
772#define RADV_BORDER_COLOR_COUNT       4096
773#define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
774
775struct radv_device_border_color_data {
776   bool used[RADV_BORDER_COLOR_COUNT];
777
778   struct radeon_winsys_bo *bo;
779   VkClearColorValue *colors_gpu_ptr;
780
781   /* Mutex is required to guarantee vkCreateSampler thread safety
782    * given that we are writing to a buffer and checking color occupation */
783   mtx_t mutex;
784};
785
786enum radv_force_vrs {
787   RADV_FORCE_VRS_1x1 = 0,
788   RADV_FORCE_VRS_2x2,
789   RADV_FORCE_VRS_2x1,
790   RADV_FORCE_VRS_1x2,
791};
792
793struct radv_notifier {
794   int fd;
795   int watch;
796   bool quit;
797   thrd_t thread;
798};
799
800struct radv_device {
801   struct vk_device vk;
802
803   struct radv_instance *instance;
804   struct radeon_winsys *ws;
805
806   struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
807   struct radv_meta_state meta_state;
808
809   struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
810   int queue_count[RADV_MAX_QUEUE_FAMILIES];
811
812   bool pbb_allowed;
813   uint32_t scratch_waves;
814   uint32_t dispatch_initiator;
815   uint32_t dispatch_initiator_task;
816
817   /* MSAA sample locations.
818    * The first index is the sample index.
819    * The second index is the coordinate: X, Y. */
820   float sample_locations_1x[1][2];
821   float sample_locations_2x[2][2];
822   float sample_locations_4x[4][2];
823   float sample_locations_8x[8][2];
824
825   /* GFX7 and later */
826   uint32_t gfx_init_size_dw;
827   struct radeon_winsys_bo *gfx_init;
828
829   struct radeon_winsys_bo *trace_bo;
830   uint32_t *trace_id_ptr;
831
832   /* Whether to keep shader debug info, for debugging. */
833   bool keep_shader_info;
834
835   struct radv_physical_device *physical_device;
836
837   /* Backup in-memory cache to be used if the app doesn't provide one */
838   struct radv_pipeline_cache *mem_cache;
839
840   /*
841    * use different counters so MSAA MRTs get consecutive surface indices,
842    * even if MASK is allocated in between.
843    */
844   uint32_t image_mrt_offset_counter;
845   uint32_t fmask_mrt_offset_counter;
846
847   struct list_head shader_arenas;
848   unsigned shader_arena_shift;
849   uint8_t shader_free_list_mask;
850   struct list_head shader_free_lists[RADV_SHADER_ALLOC_NUM_FREE_LISTS];
851   struct list_head shader_block_obj_pool;
852   mtx_t shader_arena_mutex;
853
854   /* For detecting VM faults reported by dmesg. */
855   uint64_t dmesg_timestamp;
856
857   /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
858   bool robust_buffer_access;
859   bool robust_buffer_access2;
860
861   /* Whether to inline the compute dispatch size in user sgprs. */
862   bool load_grid_size_from_user_sgpr;
863
864   /* Whether the driver uses a global BO list. */
865   bool use_global_bo_list;
866
867   /* Whether attachment VRS is enabled. */
868   bool attachment_vrs_enabled;
869
870   /* Whether shader image 32-bit float atomics are enabled. */
871   bool image_float32_atomics;
872
873   /* Whether 2D views of 3D image is enabled. */
874   bool image_2d_view_of_3d;
875
876   /* Whether primitives generated query features are enabled. */
877   bool primitives_generated_query;
878
879   /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
880   int force_aniso;
881
882   struct radv_device_border_color_data border_color_data;
883
884   /* Thread trace. */
885   struct ac_thread_trace_data thread_trace;
886
887   /* SPM. */
888   struct ac_spm_trace_data spm_trace;
889
890   /* Trap handler. */
891   struct radv_trap_handler_shader *trap_handler_shader;
892   struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
893   uint32_t *tma_ptr;
894
895   /* Overallocation. */
896   bool overallocation_disallowed;
897   uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
898   mtx_t overallocation_mutex;
899
900   /* RADV_FORCE_VRS. */
901   struct radv_notifier notifier;
902   enum radv_force_vrs force_vrs;
903
904   /* Depth image for VRS when not bound by the app. */
905   struct {
906      struct radv_image *image;
907      struct radv_buffer *buffer; /* HTILE */
908      struct radv_device_memory *mem;
909   } vrs;
910
911   struct u_rwlock vs_prologs_lock;
912   struct hash_table *vs_prologs;
913
914   /* Prime blit sdma queue */
915   struct radv_queue *private_sdma_queue;
916
917   struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
918   struct radv_shader_part *instance_rate_vs_prologs[816];
919
920   simple_mtx_t trace_mtx;
921
922   /* Whether per-vertex VRS is forced. */
923   bool force_vrs_enabled;
924
925   /* Whether shaders created through application entrypoints are considered internal. */
926   bool app_shaders_internal;
927
928   simple_mtx_t pstate_mtx;
929   unsigned pstate_cnt;
930
931   /* BO to contain some performance counter helpers:
932    * - A lock for profiling cmdbuffers.
933    * - a temporary fence for the end query synchronization.
934    * - the pass to use for profiling. (as an array of bools)
935    */
936   struct radeon_winsys_bo *perf_counter_bo;
937
938   /* Interleaved lock/unlock commandbuffers for perfcounter passes. */
939   struct radeon_cmdbuf **perf_counter_lock_cs;
940
941   bool uses_device_generated_commands;
942};
943
944bool radv_device_acquire_performance_counters(struct radv_device *device);
945void radv_device_release_performance_counters(struct radv_device *device);
946
947struct radv_device_memory {
948   struct vk_object_base base;
949   struct radeon_winsys_bo *bo;
950   /* for dedicated allocations */
951   struct radv_image *image;
952   struct radv_buffer *buffer;
953   uint32_t heap_index;
954   uint64_t alloc_size;
955   void *map;
956   void *user_ptr;
957
958#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
959   struct AHardwareBuffer *android_hardware_buffer;
960#endif
961};
962
963void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
964                             struct radeon_winsys_bo *bo);
965void radv_device_memory_finish(struct radv_device_memory *mem);
966
967struct radv_descriptor_range {
968   uint64_t va;
969   uint32_t size;
970};
971
972struct radv_descriptor_set_header {
973   struct vk_object_base base;
974   struct radv_descriptor_set_layout *layout;
975   uint32_t size;
976   uint32_t buffer_count;
977
978   struct radeon_winsys_bo *bo;
979   uint64_t va;
980   uint32_t *mapped_ptr;
981   struct radv_descriptor_range *dynamic_descriptors;
982};
983
984struct radv_descriptor_set {
985   struct radv_descriptor_set_header header;
986
987   struct radeon_winsys_bo *descriptors[];
988};
989
990struct radv_push_descriptor_set {
991   struct radv_descriptor_set_header set;
992   uint32_t capacity;
993};
994
995struct radv_descriptor_pool_entry {
996   uint32_t offset;
997   uint32_t size;
998   struct radv_descriptor_set *set;
999};
1000
1001struct radv_descriptor_pool {
1002   struct vk_object_base base;
1003   struct radeon_winsys_bo *bo;
1004   uint8_t *host_bo;
1005   uint8_t *mapped_ptr;
1006   uint64_t current_offset;
1007   uint64_t size;
1008
1009   uint8_t *host_memory_base;
1010   uint8_t *host_memory_ptr;
1011   uint8_t *host_memory_end;
1012
1013   uint32_t entry_count;
1014   uint32_t max_entry_count;
1015   struct radv_descriptor_pool_entry entries[0];
1016};
1017
1018struct radv_descriptor_update_template_entry {
1019   VkDescriptorType descriptor_type;
1020
1021   /* The number of descriptors to update */
1022   uint32_t descriptor_count;
1023
1024   /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
1025   uint32_t dst_offset;
1026
1027   /* In dwords. Not valid/used for dynamic descriptors */
1028   uint32_t dst_stride;
1029
1030   uint32_t buffer_offset;
1031
1032   /* Only valid for combined image samplers and samplers */
1033   uint8_t has_sampler;
1034   uint8_t sampler_offset;
1035
1036   /* In bytes */
1037   size_t src_offset;
1038   size_t src_stride;
1039
1040   /* For push descriptors */
1041   const uint32_t *immutable_samplers;
1042};
1043
1044struct radv_descriptor_update_template {
1045   struct vk_object_base base;
1046   uint32_t entry_count;
1047   VkPipelineBindPoint bind_point;
1048   struct radv_descriptor_update_template_entry entry[0];
1049};
1050
1051struct radv_buffer {
1052   struct vk_buffer vk;
1053
1054   /* Set when bound */
1055   struct radeon_winsys_bo *bo;
1056   VkDeviceSize offset;
1057};
1058
1059void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
1060                      struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset);
1061void radv_buffer_finish(struct radv_buffer *buffer);
1062
1063enum radv_dynamic_state_bits {
1064   RADV_DYNAMIC_VIEWPORT = 1ull << 0,
1065   RADV_DYNAMIC_SCISSOR = 1ull << 1,
1066   RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
1067   RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1068   RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1069   RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1070   RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1071   RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1072   RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1073   RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1074   RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1075   RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1076   RADV_DYNAMIC_CULL_MODE = 1ull << 12,
1077   RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
1078   RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1079   RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1080   RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1081   RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1082   RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1083   RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1084   RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
1085   RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1086   RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1087   RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1088   RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1089   RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1090   RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
1091   RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1092   RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1093   RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1094   RADV_DYNAMIC_ALL = (1ull << 30) - 1,
1095};
1096
1097enum radv_cmd_dirty_bits {
1098   /* Keep the dynamic state dirty bits in sync with
1099    * enum radv_dynamic_state_bits */
1100   RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
1101   RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
1102   RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
1103   RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1104   RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1105   RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1106   RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1107   RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1108   RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1109   RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1110   RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1111   RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1112   RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
1113   RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
1114   RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1115   RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1116   RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1117   RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1118   RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1119   RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1120   RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
1121   RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1122   RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1123   RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1124   RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1125   RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1126   RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1ull << 26,
1127   RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1128   RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1129   RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1130   RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 30) - 1,
1131   RADV_CMD_DIRTY_PIPELINE = 1ull << 30,
1132   RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 31,
1133   RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 32,
1134   RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 33,
1135   RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 34,
1136};
1137
1138enum radv_cmd_flush_bits {
1139   /* Instruction cache. */
1140   RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
1141   /* Scalar L1 cache. */
1142   RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
1143   /* Vector L1 cache. */
1144   RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
1145   /* L2 cache + L2 metadata cache writeback & invalidate.
1146    * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
1147   RADV_CMD_FLAG_INV_L2 = 1 << 3,
1148   /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
1149    * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
1150    * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
1151   RADV_CMD_FLAG_WB_L2 = 1 << 4,
1152   /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
1153    * changed and we want to read an image from shaders. */
1154   RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
1155   /* Framebuffer caches */
1156   RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
1157   RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
1158   RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
1159   RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
1160   /* Engine synchronization. */
1161   RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
1162   RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
1163   RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
1164   RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
1165   /* Pipeline query controls. */
1166   RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
1167   RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
1168   RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
1169
1170   RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER =
1171      (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
1172       RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META),
1173
1174   RADV_CMD_FLUSH_ALL_COMPUTE =
1175      (RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
1176       RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH),
1177};
1178
1179enum radv_nggc_settings {
1180   radv_nggc_none = 0,
1181   radv_nggc_front_face = 1 << 0,
1182   radv_nggc_back_face = 1 << 1,
1183   radv_nggc_face_is_ccw = 1 << 2,
1184   radv_nggc_small_primitives = 1 << 3,
1185};
1186
1187struct radv_vertex_binding {
1188   VkDeviceSize offset;
1189   VkDeviceSize size;
1190   VkDeviceSize stride;
1191};
1192
1193struct radv_streamout_binding {
1194   struct radv_buffer *buffer;
1195   VkDeviceSize offset;
1196   VkDeviceSize size;
1197};
1198
1199struct radv_streamout_state {
1200   /* Mask of bound streamout buffers. */
1201   uint8_t enabled_mask;
1202
1203   /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
1204   uint32_t hw_enabled_mask;
1205
1206   /* State of VGT_STRMOUT_(CONFIG|EN) */
1207   bool streamout_enabled;
1208};
1209
1210struct radv_viewport_state {
1211   uint32_t count;
1212   VkViewport viewports[MAX_VIEWPORTS];
1213   struct {
1214      float scale[3];
1215      float translate[3];
1216   } xform[MAX_VIEWPORTS];
1217};
1218
1219struct radv_scissor_state {
1220   uint32_t count;
1221   VkRect2D scissors[MAX_SCISSORS];
1222};
1223
1224struct radv_discard_rectangle_state {
1225   uint32_t count;
1226   VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
1227};
1228
1229struct radv_sample_locations_state {
1230   VkSampleCountFlagBits per_pixel;
1231   VkExtent2D grid_size;
1232   uint32_t count;
1233   VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
1234};
1235
1236struct radv_dynamic_state {
1237   /**
1238    * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
1239    * Defines the set of saved dynamic state.
1240    */
1241   uint64_t mask;
1242
1243   struct radv_viewport_state viewport;
1244
1245   struct radv_scissor_state scissor;
1246
1247   float line_width;
1248
1249   struct {
1250      float bias;
1251      float clamp;
1252      float slope;
1253   } depth_bias;
1254
1255   float blend_constants[4];
1256
1257   struct {
1258      float min;
1259      float max;
1260   } depth_bounds;
1261
1262   struct {
1263      uint32_t front;
1264      uint32_t back;
1265   } stencil_compare_mask;
1266
1267   struct {
1268      uint32_t front;
1269      uint32_t back;
1270   } stencil_write_mask;
1271
1272   struct {
1273      struct {
1274         VkStencilOp fail_op;
1275         VkStencilOp pass_op;
1276         VkStencilOp depth_fail_op;
1277         VkCompareOp compare_op;
1278      } front;
1279
1280      struct {
1281         VkStencilOp fail_op;
1282         VkStencilOp pass_op;
1283         VkStencilOp depth_fail_op;
1284         VkCompareOp compare_op;
1285      } back;
1286   } stencil_op;
1287
1288   struct {
1289      uint32_t front;
1290      uint32_t back;
1291   } stencil_reference;
1292
1293   struct radv_discard_rectangle_state discard_rectangle;
1294
1295   struct radv_sample_locations_state sample_location;
1296
1297   struct {
1298      uint32_t factor;
1299      uint16_t pattern;
1300   } line_stipple;
1301
1302   VkCullModeFlags cull_mode;
1303   VkFrontFace front_face;
1304   unsigned primitive_topology;
1305
1306   bool depth_test_enable;
1307   bool depth_write_enable;
1308   VkCompareOp depth_compare_op;
1309   bool depth_bounds_test_enable;
1310   bool stencil_test_enable;
1311
1312   struct {
1313      VkExtent2D size;
1314      VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
1315   } fragment_shading_rate;
1316
1317   bool depth_bias_enable;
1318   bool primitive_restart_enable;
1319   bool rasterizer_discard_enable;
1320
1321   unsigned logic_op;
1322
1323   uint32_t color_write_enable;
1324};
1325
1326extern const struct radv_dynamic_state default_dynamic_state;
1327
1328const char *radv_get_debug_option_name(int id);
1329
1330const char *radv_get_perftest_option_name(int id);
1331
1332int radv_get_int_debug_option(const char *name, int default_value);
1333
1334struct radv_color_buffer_info {
1335   uint64_t cb_color_base;
1336   uint64_t cb_color_cmask;
1337   uint64_t cb_color_fmask;
1338   uint64_t cb_dcc_base;
1339   uint32_t cb_color_slice;
1340   uint32_t cb_color_view;
1341   uint32_t cb_color_info;
1342   uint32_t cb_color_attrib;
1343   uint32_t cb_color_attrib2; /* GFX9 and later */
1344   uint32_t cb_color_attrib3; /* GFX10 and later */
1345   uint32_t cb_dcc_control;
1346   uint32_t cb_color_cmask_slice;
1347   uint32_t cb_color_fmask_slice;
1348   union {
1349      uint32_t cb_color_pitch; // GFX6-GFX8
1350      uint32_t cb_mrt_epitch;  // GFX9+
1351   };
1352};
1353
1354struct radv_ds_buffer_info {
1355   uint64_t db_z_read_base;
1356   uint64_t db_stencil_read_base;
1357   uint64_t db_z_write_base;
1358   uint64_t db_stencil_write_base;
1359   uint64_t db_htile_data_base;
1360   uint32_t db_depth_info;
1361   uint32_t db_z_info;
1362   uint32_t db_stencil_info;
1363   uint32_t db_depth_view;
1364   uint32_t db_depth_size;
1365   uint32_t db_depth_slice;
1366   uint32_t db_htile_surface;
1367   uint32_t pa_su_poly_offset_db_fmt_cntl;
1368   uint32_t db_z_info2;       /* GFX9 only */
1369   uint32_t db_stencil_info2; /* GFX9 only */
1370};
1371
1372void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1373                                   struct radv_image_view *iview);
1374void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
1375                                struct radv_image_view *iview);
1376void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
1377                                 struct radv_ds_buffer_info *ds);
1378
1379/**
1380 * Attachment state when recording a renderpass instance.
1381 *
1382 * The clear value is valid only if there exists a pending clear.
1383 */
1384struct radv_attachment_state {
1385   VkImageAspectFlags pending_clear_aspects;
1386   uint32_t cleared_views;
1387   VkClearValue clear_value;
1388   VkImageLayout current_layout;
1389   VkImageLayout current_stencil_layout;
1390   bool current_in_render_loop;
1391   struct radv_sample_locations_state sample_location;
1392
1393   union {
1394      struct radv_color_buffer_info cb;
1395      struct radv_ds_buffer_info ds;
1396   };
1397   struct radv_image_view *iview;
1398};
1399
1400struct radv_descriptor_state {
1401   struct radv_descriptor_set *sets[MAX_SETS];
1402   uint32_t dirty;
1403   uint32_t valid;
1404   struct radv_push_descriptor_set push_set;
1405   bool push_dirty;
1406   uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
1407};
1408
1409struct radv_subpass_sample_locs_state {
1410   uint32_t subpass_idx;
1411   struct radv_sample_locations_state sample_location;
1412};
1413
1414enum rgp_flush_bits {
1415   RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
1416   RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
1417   RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
1418   RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
1419   RGP_FLUSH_PFP_SYNC_ME = 0x10,
1420   RGP_FLUSH_SYNC_CP_DMA = 0x20,
1421   RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
1422   RGP_FLUSH_INVAL_ICACHE = 0x80,
1423   RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
1424   RGP_FLUSH_FLUSH_L2 = 0x200,
1425   RGP_FLUSH_INVAL_L2 = 0x400,
1426   RGP_FLUSH_FLUSH_CB = 0x800,
1427   RGP_FLUSH_INVAL_CB = 0x1000,
1428   RGP_FLUSH_FLUSH_DB = 0x2000,
1429   RGP_FLUSH_INVAL_DB = 0x4000,
1430   RGP_FLUSH_INVAL_L1 = 0x8000,
1431};
1432
1433struct radv_cmd_state {
1434   /* Vertex descriptors */
1435   uint64_t vb_va;
1436
1437   bool predicating;
1438   uint64_t dirty;
1439
1440   uint32_t prefetch_L2_mask;
1441
1442   struct radv_graphics_pipeline *graphics_pipeline;
1443   struct radv_graphics_pipeline *emitted_graphics_pipeline;
1444   struct radv_compute_pipeline *compute_pipeline;
1445   struct radv_compute_pipeline *emitted_compute_pipeline;
1446   struct radv_compute_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
1447   struct vk_framebuffer *framebuffer;
1448   struct radv_render_pass *pass;
1449   const struct radv_subpass *subpass;
1450   struct radv_dynamic_state dynamic;
1451   struct radv_vs_input_state dynamic_vs_input;
1452   struct radv_attachment_state *attachments;
1453   struct radv_streamout_state streamout;
1454   VkRect2D render_area;
1455
1456   uint32_t num_subpass_sample_locs;
1457   struct radv_subpass_sample_locs_state *subpass_sample_locs;
1458
1459   /* Index buffer */
1460   struct radv_buffer *index_buffer;
1461   uint64_t index_offset;
1462   uint32_t index_type;
1463   uint32_t max_index_count;
1464   uint64_t index_va;
1465   int32_t last_index_type;
1466
1467   int32_t last_primitive_reset_en;
1468   uint32_t last_primitive_reset_index;
1469   enum radv_cmd_flush_bits flush_bits;
1470   unsigned active_occlusion_queries;
1471   bool perfect_occlusion_queries_enabled;
1472   unsigned active_pipeline_queries;
1473   unsigned active_pipeline_gds_queries;
1474   bool prims_gen_query_enabled;
1475   uint32_t trace_id;
1476   uint32_t last_ia_multi_vgt_param;
1477
1478   uint32_t last_num_instances;
1479   uint32_t last_first_instance;
1480   uint32_t last_vertex_offset;
1481   uint32_t last_drawid;
1482   uint32_t last_subpass_color_count;
1483
1484   uint32_t last_sx_ps_downconvert;
1485   uint32_t last_sx_blend_opt_epsilon;
1486   uint32_t last_sx_blend_opt_control;
1487
1488   /* Whether CP DMA is busy/idle. */
1489   bool dma_is_busy;
1490
1491   /* Whether any images that are not L2 coherent are dirty from the CB. */
1492   bool rb_noncoherent_dirty;
1493
1494   /* Conditional rendering info. */
1495   uint8_t predication_op; /* 32-bit or 64-bit predicate value */
1496   int predication_type;   /* -1: disabled, 0: normal, 1: inverted */
1497   uint64_t predication_va;
1498
1499   /* Inheritance info. */
1500   VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
1501
1502   bool context_roll_without_scissor_emitted;
1503
1504   /* SQTT related state. */
1505   uint32_t current_event_type;
1506   uint32_t num_events;
1507   uint32_t num_layout_transitions;
1508   bool pending_sqtt_barrier_end;
1509   enum rgp_flush_bits sqtt_flush_bits;
1510
1511   /* NGG culling state. */
1512   uint32_t last_nggc_settings;
1513   int8_t last_nggc_settings_sgpr_idx;
1514   bool last_nggc_skip;
1515
1516   /* Mesh shading state. */
1517   bool mesh_shading;
1518
1519   uint8_t cb_mip[MAX_RTS];
1520
1521   /* Whether DRAW_{INDEX}_INDIRECT_MULTI is emitted. */
1522   bool uses_draw_indirect_multi;
1523
1524   uint32_t rt_stack_size;
1525
1526   struct radv_shader_part *emitted_vs_prolog;
1527   uint32_t *emitted_vs_prolog_key;
1528   uint32_t emitted_vs_prolog_key_hash;
1529   uint32_t vbo_misaligned_mask;
1530   uint32_t vbo_misaligned_mask_invalid;
1531   uint32_t vbo_bound_mask;
1532
1533   /* Whether the cmdbuffer owns the current render pass rather than the app. */
1534   bool own_render_pass;
1535
1536   /* Per-vertex VRS state. */
1537   uint32_t last_vrs_rates;
1538   int8_t last_vrs_rates_sgpr_idx;
1539
1540   /* Whether to suspend streamout for internal driver operations. */
1541   bool suspend_streamout;
1542
1543   /* Whether this commandbuffer uses performance counters. */
1544   bool uses_perf_counters;
1545};
1546
1547struct radv_cmd_pool {
1548   struct vk_command_pool vk;
1549   struct list_head cmd_buffers;
1550   struct list_head free_cmd_buffers;
1551};
1552
1553struct radv_cmd_buffer_upload {
1554   uint8_t *map;
1555   unsigned offset;
1556   uint64_t size;
1557   struct radeon_winsys_bo *upload_bo;
1558   struct list_head list;
1559};
1560
1561enum radv_cmd_buffer_status {
1562   RADV_CMD_BUFFER_STATUS_INVALID,
1563   RADV_CMD_BUFFER_STATUS_INITIAL,
1564   RADV_CMD_BUFFER_STATUS_RECORDING,
1565   RADV_CMD_BUFFER_STATUS_EXECUTABLE,
1566   RADV_CMD_BUFFER_STATUS_PENDING,
1567};
1568
1569struct dynamic_vertex_format_cache {
1570   VkFormat format;
1571   uint8_t hw_fmt;
1572   uint8_t fmt_align_req_minus_1;
1573   uint8_t fmt_size;
1574   bool post_shuffle;
1575   bool alpha_adjust_lo;
1576   bool alpha_adjust_hi;
1577};
1578
1579struct radv_cmd_buffer {
1580   struct vk_command_buffer vk;
1581
1582   struct radv_device *device;
1583
1584   struct radv_cmd_pool *pool;
1585   struct list_head pool_link;
1586
1587   struct util_dynarray cached_vertex_formats;
1588   VkCommandBufferUsageFlags usage_flags;
1589   enum radv_cmd_buffer_status status;
1590   struct radeon_cmdbuf *cs;
1591   struct radv_cmd_state state;
1592   struct radv_buffer *vertex_binding_buffers[MAX_VBS];
1593   struct radv_vertex_binding vertex_bindings[MAX_VBS];
1594   uint32_t used_vertex_bindings;
1595   struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
1596   enum radv_queue_family qf;
1597
1598   uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
1599   VkShaderStageFlags push_constant_stages;
1600   struct radv_descriptor_set_header meta_push_descriptors;
1601
1602   struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
1603
1604   struct radv_cmd_buffer_upload upload;
1605
1606   uint32_t scratch_size_per_wave_needed;
1607   uint32_t scratch_waves_wanted;
1608   uint32_t compute_scratch_size_per_wave_needed;
1609   uint32_t compute_scratch_waves_wanted;
1610   uint32_t esgs_ring_size_needed;
1611   uint32_t gsvs_ring_size_needed;
1612   bool tess_rings_needed;
1613   bool task_rings_needed;
1614   bool mesh_scratch_ring_needed;
1615   bool gds_needed;    /* for GFX10 streamout and NGG GS queries */
1616   bool gds_oa_needed; /* for GFX10 streamout */
1617   bool sample_positions_needed;
1618
1619   VkResult record_result;
1620
1621   uint64_t gfx9_fence_va;
1622   uint32_t gfx9_fence_idx;
1623   uint64_t gfx9_eop_bug_va;
1624
1625   uint64_t mec_inv_pred_va;  /* For inverted predication when using MEC. */
1626   bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
1627
1628   struct {
1629      /**
1630       * Internal command stream that is used when some graphics work
1631       * also requires a submission to the compute queue.
1632       */
1633      struct radeon_cmdbuf *cs;
1634
1635      /** Flush bits for the internal cmdbuf. */
1636      enum radv_cmd_flush_bits flush_bits;
1637
1638      /**
1639       * For synchronization between the ACE and GFX cmdbuf.
1640       * The value of this semaphore is incremented whenever we
1641       * encounter a barrier that affects ACE. At sync points,
1642       * GFX writes the value to its address, and ACE waits until
1643       * it detects that the value has been written.
1644       */
1645      struct {
1646         uint64_t va;                    /* Virtual address of the semaphore. */
1647         uint32_t gfx2ace_value;         /* Current value on GFX. */
1648         uint32_t emitted_gfx2ace_value; /* Emitted value on GFX. */
1649      } sem;
1650   } ace_internal;
1651
1652   /**
1653    * Whether a query pool has been resetted and we have to flush caches.
1654    */
1655   bool pending_reset_query;
1656
1657   /**
1658    * Bitmask of pending active query flushes.
1659    */
1660   enum radv_cmd_flush_bits active_query_flush_bits;
1661};
1662
1663struct radv_image;
1664struct radv_image_view;
1665
1666bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
1667
1668bool radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer);
1669void radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer);
1670
1671void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
1672void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
1673
1674void cik_create_gfx_config(struct radv_device *device);
1675
1676void si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
1677                       const VkViewport *viewports, unsigned rast_prim, float line_width);
1678
1679uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
1680                                   bool indirect_draw, bool count_from_stream_output,
1681                                   uint32_t draw_vertex_count, unsigned topology,
1682                                   bool prim_restart_enable);
1683void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, bool is_mec,
1684                                unsigned event, unsigned event_flags, unsigned dst_sel,
1685                                unsigned data_sel, uint64_t va, uint32_t new_fence,
1686                                uint64_t gfx9_eop_bug_va);
1687
1688void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
1689                      uint32_t mask);
1690void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
1691                            uint32_t *fence_ptr, uint64_t va, bool is_mec,
1692                            enum radv_cmd_flush_bits flush_bits,
1693                            enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
1694void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
1695void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
1696                                   unsigned pred_op, uint64_t va);
1697void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
1698                           uint64_t size);
1699void si_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
1700                           unsigned size, bool predicating);
1701void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
1702void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
1703                            unsigned value);
1704void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
1705
1706void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlusion_queries);
1707uint32_t radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer);
1708uint32_t radv_get_vgt_index_size(uint32_t type);
1709
1710unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
1711uint32_t radv_hash_vs_prolog(const void *key_);
1712bool radv_cmp_vs_prolog(const void *a_, const void *b_);
1713
1714bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1715                                  unsigned *out_offset, void **ptr);
1716void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
1717                                 const struct radv_subpass *subpass);
1718void radv_cmd_buffer_restore_subpass(struct radv_cmd_buffer *cmd_buffer,
1719                                     const struct radv_subpass *subpass);
1720bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1721                                 const void *data, unsigned *out_offset);
1722void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer,
1723                                   const struct radv_graphics_pipeline *pipeline,
1724                                   bool full_null_descriptors, void *vb_ptr);
1725void radv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs);
1726
1727void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
1728void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
1729void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
1730void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
1731                                           VkImageAspectFlags aspects,
1732                                           VkResolveModeFlagBits resolve_mode);
1733void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
1734void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
1735                                           VkImageAspectFlags aspects,
1736                                           VkResolveModeFlagBits resolve_mode);
1737void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
1738unsigned radv_get_default_max_sample_dist(int log_samples);
1739void radv_device_init_msaa(struct radv_device *device);
1740VkResult radv_device_init_vrs_state(struct radv_device *device);
1741
1742void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1743                                   const struct radv_image_view *iview,
1744                                   VkClearDepthStencilValue ds_clear_value,
1745                                   VkImageAspectFlags aspects);
1746
1747void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1748                                      const struct radv_image_view *iview, int cb_idx,
1749                                      uint32_t color_values[2]);
1750
1751bool radv_image_use_dcc_image_stores(const struct radv_device *device,
1752                                     const struct radv_image *image);
1753bool radv_image_use_dcc_predication(const struct radv_device *device,
1754                                    const struct radv_image *image);
1755
1756void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1757                              const VkImageSubresourceRange *range, bool value);
1758
1759void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1760                              const VkImageSubresourceRange *range, bool value);
1761enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
1762                                               VkAccessFlags2 src_flags,
1763                                               const struct radv_image *image);
1764enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
1765                                               VkAccessFlags2 dst_flags,
1766                                               const struct radv_image *image);
1767uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1768                          struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value);
1769void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
1770                      struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
1771                      uint64_t size);
1772
1773void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
1774bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
1775void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1776                      struct radv_device_memory *mem);
1777
1778static inline void
1779radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
1780                              bool use_32bit_pointers)
1781{
1782   radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
1783   radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
1784}
1785
1786static inline void
1787radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
1788                              bool use_32bit_pointers)
1789{
1790   radeon_emit(cs, va);
1791
1792   if (use_32bit_pointers) {
1793      assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
1794   } else {
1795      radeon_emit(cs, va >> 32);
1796   }
1797}
1798
1799static inline void
1800radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset,
1801                         uint64_t va, bool global)
1802{
1803   bool use_32bit_pointers = !global;
1804
1805   radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
1806   radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
1807}
1808
1809static inline struct radv_descriptor_state *
1810radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
1811{
1812   switch (bind_point) {
1813   case VK_PIPELINE_BIND_POINT_GRAPHICS:
1814   case VK_PIPELINE_BIND_POINT_COMPUTE:
1815      return &cmd_buffer->descriptors[bind_point];
1816   case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1817      return &cmd_buffer->descriptors[2];
1818   default:
1819      unreachable("Unhandled bind point");
1820   }
1821}
1822
1823void
1824radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
1825
1826/*
1827 * Takes x,y,z as exact numbers of invocations, instead of blocks.
1828 *
1829 * Limitations: Can't call normal dispatch functions without binding or rebinding
1830 *              the compute pipeline.
1831 */
1832void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
1833                             uint32_t z);
1834
1835void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo,
1836                            uint64_t va);
1837
1838struct radv_event {
1839   struct vk_object_base base;
1840   struct radeon_winsys_bo *bo;
1841   uint64_t *map;
1842};
1843
1844#define RADV_HASH_SHADER_CS_WAVE32         (1 << 1)
1845#define RADV_HASH_SHADER_PS_WAVE32         (1 << 2)
1846#define RADV_HASH_SHADER_GE_WAVE32         (1 << 3)
1847#define RADV_HASH_SHADER_LLVM              (1 << 4)
1848#define RADV_HASH_SHADER_KEEP_STATISTICS   (1 << 8)
1849#define RADV_HASH_SHADER_USE_NGG_CULLING   (1 << 13)
1850#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
1851#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
1852#define RADV_HASH_SHADER_EMULATE_RT            (1 << 16)
1853#define RADV_HASH_SHADER_SPLIT_FMA             (1 << 17)
1854#define RADV_HASH_SHADER_RT_WAVE64             (1 << 18)
1855
1856struct radv_pipeline_key;
1857
1858void radv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo,
1859                              struct radv_pipeline_stage *out_stage, gl_shader_stage stage);
1860
1861void radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages,
1862                       const struct radv_pipeline_layout *layout,
1863                       const struct radv_pipeline_key *key, uint32_t flags);
1864
1865void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1866                          uint32_t flags);
1867
1868uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats);
1869
1870bool radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo);
1871
1872bool radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines);
1873
1874bool radv_emulate_rt(const struct radv_physical_device *pdevice);
1875
1876enum {
1877   RADV_RT_STAGE_BITS = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
1878                         VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
1879                         VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR)
1880};
1881
1882#define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
1883
1884#define radv_foreach_stage(stage, stage_bits)                                                      \
1885   for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK);            \
1886        stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
1887
1888extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
1889unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
1890
1891struct radv_multisample_state {
1892   uint32_t db_eqaa;
1893   uint32_t pa_sc_mode_cntl_0;
1894   uint32_t pa_sc_mode_cntl_1;
1895   uint32_t pa_sc_aa_config;
1896   uint32_t pa_sc_aa_mask[2];
1897   unsigned num_samples;
1898};
1899
1900struct radv_vrs_state {
1901   uint32_t pa_cl_vrs_cntl;
1902};
1903
1904struct radv_prim_vertex_count {
1905   uint8_t min;
1906   uint8_t incr;
1907};
1908
1909struct radv_ia_multi_vgt_param_helpers {
1910   uint32_t base;
1911   bool partial_es_wave;
1912   uint8_t primgroup_size;
1913   bool ia_switch_on_eoi;
1914   bool partial_vs_wave;
1915};
1916
1917struct radv_binning_state {
1918   uint32_t pa_sc_binner_cntl_0;
1919};
1920
1921#define SI_GS_PER_ES 128
1922
1923enum radv_pipeline_type {
1924   RADV_PIPELINE_GRAPHICS,
1925   /* Compute pipeline (incl raytracing pipeline) */
1926   RADV_PIPELINE_COMPUTE,
1927   /* Pipeline library. This can't actually run and merely is a partial pipeline. */
1928   RADV_PIPELINE_LIBRARY
1929};
1930
1931struct radv_pipeline_group_handle {
1932   uint32_t handles[2];
1933};
1934
1935struct radv_pipeline_shader_stack_size {
1936   uint32_t recursive_size;
1937   /* anyhit + intersection */
1938   uint32_t non_recursive_size;
1939};
1940
1941struct radv_pipeline_slab {
1942   uint32_t ref_count;
1943
1944   union radv_shader_arena_block *alloc;
1945};
1946
1947void radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab);
1948
1949struct radv_vertex_input_info {
1950   uint32_t instance_rate_inputs;
1951   uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
1952   uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
1953   uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
1954   uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
1955   uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
1956   uint8_t vertex_binding_align[MAX_VBS];
1957   enum radv_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
1958   uint32_t vertex_post_shuffle;
1959   uint32_t binding_stride[MAX_VBS];
1960   uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
1961   uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
1962   uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
1963};
1964
1965struct radv_input_assembly_info {
1966   uint8_t primitive_topology; /* VkPrimitiveTopology */
1967   bool primitive_restart_enable;
1968};
1969
1970struct radv_tessellation_info {
1971   uint8_t patch_control_points;
1972   VkTessellationDomainOrigin domain_origin;
1973};
1974
1975struct radv_viewport_info {
1976   bool negative_one_to_one;
1977   uint8_t viewport_count;
1978   uint8_t scissor_count;
1979   VkRect2D scissors[MAX_SCISSORS];
1980   VkViewport viewports[MAX_VIEWPORTS];
1981};
1982
1983struct radv_rasterization_info {
1984   bool discard_enable;
1985   VkFrontFace front_face;
1986   VkCullModeFlags cull_mode;
1987   uint8_t polygon_mode; /* VkPolygonMode */
1988   bool depth_bias_enable;
1989   bool depth_clamp_enable;
1990   float line_width;
1991   float depth_bias_constant_factor;
1992   float depth_bias_clamp;
1993   float depth_bias_slope_factor;
1994   VkConservativeRasterizationModeEXT conservative_mode;
1995   bool provoking_vtx_last;
1996   bool stippled_line_enable;
1997   VkLineRasterizationModeEXT line_raster_mode;
1998   uint32_t line_stipple_factor;
1999   uint16_t line_stipple_pattern;
2000   bool depth_clip_disable;
2001   VkRasterizationOrderAMD order;
2002};
2003
2004struct radv_discard_rectangle_info {
2005   VkDiscardRectangleModeEXT mode;
2006   VkRect2D rects[MAX_DISCARD_RECTANGLES];
2007   uint8_t count;
2008};
2009
2010struct radv_multisample_info {
2011   bool sample_shading_enable;
2012   bool alpha_to_coverage_enable;
2013   bool sample_locs_enable;
2014   VkSampleCountFlagBits raster_samples;
2015   float min_sample_shading;
2016   uint16_t sample_mask;
2017   uint8_t sample_locs_count;
2018   VkSampleCountFlagBits sample_locs_per_pixel;
2019   VkExtent2D sample_locs_grid_size;
2020   VkSampleLocationEXT sample_locs[MAX_SAMPLE_LOCATIONS];
2021};
2022
2023struct radv_stencil_op_info {
2024   VkStencilOp fail_op;
2025   VkStencilOp pass_op;
2026   VkStencilOp depth_fail_op;
2027   VkCompareOp compare_op;
2028   uint8_t compare_mask;
2029   uint8_t write_mask;
2030   uint8_t reference;
2031};
2032
2033struct radv_depth_stencil_info {
2034   bool stencil_test_enable;
2035   bool depth_test_enable;
2036   bool depth_write_enable;
2037   bool depth_bounds_test_enable;
2038   struct {
2039      float min;
2040      float max;
2041   } depth_bounds;
2042   struct radv_stencil_op_info front;
2043   struct radv_stencil_op_info back;
2044   VkCompareOp depth_compare_op;
2045};
2046
2047struct radv_rendering_info {
2048   uint32_t view_mask;
2049   uint32_t color_att_count;
2050   VkFormat color_att_formats[MAX_RTS];
2051   VkFormat depth_att_format;
2052   VkFormat stencil_att_format;
2053};
2054
2055struct radv_color_blend_info {
2056   bool logic_op_enable;
2057   uint8_t att_count;
2058   uint16_t logic_op;
2059   uint32_t color_write_enable;
2060   float blend_constants[4];
2061   struct {
2062      uint8_t color_write_mask;
2063      bool blend_enable;
2064      uint16_t color_blend_op;
2065      uint16_t alpha_blend_op;
2066      uint16_t src_color_blend_factor;
2067      uint16_t dst_color_blend_factor;
2068      uint16_t src_alpha_blend_factor;
2069      uint16_t dst_alpha_blend_factor;
2070   } att[MAX_RTS];
2071};
2072
2073struct radv_fragment_shading_rate_info {
2074   VkExtent2D size;
2075   VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
2076};
2077
2078struct radv_graphics_pipeline_info {
2079   struct radv_vertex_input_info vi;
2080   struct radv_input_assembly_info ia;
2081
2082   struct radv_tessellation_info ts;
2083   struct radv_viewport_info vp;
2084   struct radv_rasterization_info rs;
2085   struct radv_discard_rectangle_info dr;
2086
2087   struct radv_multisample_info ms;
2088   struct radv_depth_stencil_info ds;
2089   struct radv_rendering_info ri;
2090   struct radv_color_blend_info cb;
2091
2092   struct radv_fragment_shading_rate_info fsr;
2093
2094   /* VK_AMD_mixed_attachment_samples */
2095   uint8_t color_att_samples;
2096   uint8_t ds_att_samples;
2097};
2098
2099enum radv_depth_clamp_mode {
2100   RADV_DEPTH_CLAMP_MODE_VIEWPORT = 0,       /* Clamp to the viewport min/max depth bounds */
2101   RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE = 1,    /* Clamp between 0.0f and 1.0f */
2102   RADV_DEPTH_CLAMP_MODE_DISABLED = 2,       /* Disable depth clamping */
2103};
2104
2105struct radv_pipeline {
2106   struct vk_object_base base;
2107   enum radv_pipeline_type type;
2108
2109   struct radv_device *device;
2110
2111   struct radv_pipeline_slab *slab;
2112   struct radeon_winsys_bo *slab_bo;
2113
2114   bool need_indirect_descriptor_sets;
2115   struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
2116   struct radv_shader *gs_copy_shader;
2117
2118   struct radeon_cmdbuf cs;
2119   uint32_t ctx_cs_hash;
2120   struct radeon_cmdbuf ctx_cs;
2121
2122   uint32_t user_data_0[MESA_VULKAN_SHADER_STAGES];
2123
2124   unsigned max_waves;
2125   unsigned scratch_bytes_per_wave;
2126
2127   /* Unique pipeline hash identifier. */
2128   uint64_t pipeline_hash;
2129
2130   /* Pipeline layout info. */
2131   uint32_t push_constant_size;
2132   uint32_t dynamic_offset_count;
2133};
2134
2135struct radv_graphics_pipeline {
2136   struct radv_pipeline base;
2137
2138   VkShaderStageFlags active_stages;
2139
2140   struct radv_dynamic_state dynamic_state;
2141
2142   uint64_t dynamic_states;
2143   struct radv_multisample_state ms;
2144   struct radv_binning_state binning;
2145   struct radv_vrs_state vrs;
2146   uint32_t spi_baryc_cntl;
2147   unsigned esgs_ring_size;
2148   unsigned gsvs_ring_size;
2149   uint32_t vtx_base_sgpr;
2150   struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
2151   uint8_t vtx_emit_num;
2152   uint64_t needed_dynamic_state;
2153   unsigned tess_patch_control_points;
2154   unsigned pa_su_sc_mode_cntl;
2155   unsigned db_depth_control;
2156   unsigned pa_cl_clip_cntl;
2157   unsigned cb_color_control;
2158   uint32_t binding_stride[MAX_VBS];
2159   uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
2160   uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
2161   uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
2162   uint8_t last_vertex_attrib_bit;
2163   uint8_t next_vertex_stage : 8;
2164   uint32_t vb_desc_usage_mask;
2165   uint32_t vb_desc_alloc_size;
2166
2167   /* Last pre-PS API stage */
2168   gl_shader_stage last_vgt_api_stage;
2169
2170   /* Used for rbplus */
2171   uint32_t col_format;
2172   uint32_t cb_target_mask;
2173
2174   bool disable_out_of_order_rast_for_occlusion;
2175   bool uses_drawid;
2176   bool uses_baseinstance;
2177   bool uses_dynamic_stride;
2178   bool uses_conservative_overestimate;
2179   bool negative_one_to_one;
2180   enum radv_depth_clamp_mode depth_clamp_mode;
2181   bool use_per_attribute_vb_descs;
2182   bool can_use_simple_input;
2183   bool uses_user_sample_locations;
2184
2185   /* Whether the pipeline forces per-vertex VRS (GFX10.3+). */
2186   bool force_vrs_per_vertex;
2187
2188   /* Whether the pipeline uses NGG (GFX10+). */
2189   bool is_ngg;
2190   bool has_ngg_culling;
2191
2192   /* Not NULL if graphics pipeline uses streamout. */
2193   struct radv_shader *streamout_shader;
2194
2195   unsigned rast_prim;
2196   float line_width;
2197};
2198
2199struct radv_compute_pipeline {
2200   struct radv_pipeline base;
2201
2202   bool cs_regalloc_hang_bug;
2203
2204   /* Raytracing */
2205   struct radv_pipeline_group_handle *rt_group_handles;
2206   struct radv_pipeline_shader_stack_size *rt_stack_sizes;
2207   bool dynamic_stack_size;
2208   uint32_t group_count;
2209};
2210
2211struct radv_library_pipeline {
2212   struct radv_pipeline base;
2213
2214   unsigned stage_count;
2215   VkPipelineShaderStageCreateInfo *stages;
2216   unsigned group_count;
2217   VkRayTracingShaderGroupCreateInfoKHR *groups;
2218   VkPipelineShaderStageModuleIdentifierCreateInfoEXT *identifiers;
2219   struct {
2220      uint8_t sha1[SHA1_DIGEST_LENGTH];
2221   } *hashes;
2222};
2223
2224#define RADV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)            \
2225   static inline struct radv_##pipe_type##_pipeline *                \
2226   radv_pipeline_to_##pipe_type(struct radv_pipeline *pipeline)      \
2227   {                                                                 \
2228      assert(pipeline->type == pipe_enum);                           \
2229      return (struct radv_##pipe_type##_pipeline *) pipeline;        \
2230   }
2231
2232RADV_DECL_PIPELINE_DOWNCAST(graphics, RADV_PIPELINE_GRAPHICS)
2233RADV_DECL_PIPELINE_DOWNCAST(compute, RADV_PIPELINE_COMPUTE)
2234RADV_DECL_PIPELINE_DOWNCAST(library, RADV_PIPELINE_LIBRARY)
2235
2236struct radv_pipeline_stage {
2237   gl_shader_stage stage;
2238
2239   struct {
2240      const struct vk_object_base *object;
2241      const char *data;
2242      uint32_t size;
2243      unsigned char sha1[20];
2244   } spirv;
2245
2246   const char *entrypoint;
2247   const VkSpecializationInfo *spec_info;
2248
2249   unsigned char shader_sha1[20];
2250
2251   nir_shader *nir;
2252   nir_shader *internal_nir; /* meta shaders */
2253
2254   struct radv_shader_info info;
2255   struct radv_shader_args args;
2256
2257   VkPipelineCreationFeedback feedback;
2258};
2259
2260static inline bool
2261radv_pipeline_has_stage(const struct radv_graphics_pipeline *pipeline, gl_shader_stage stage)
2262{
2263   return pipeline->base.shaders[stage];
2264}
2265
2266bool radv_pipeline_has_ngg_passthrough(const struct radv_graphics_pipeline *pipeline);
2267
2268bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
2269
2270struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
2271                                                 gl_shader_stage stage, int idx);
2272
2273struct radv_shader *radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage);
2274
2275void radv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
2276                              const struct radv_shader *shader);
2277
2278void radv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice,
2279                                      struct radeon_cmdbuf *cs, const struct radv_shader *shader);
2280
2281struct radv_graphics_pipeline_create_info {
2282   bool use_rectlist;
2283   bool db_depth_clear;
2284   bool db_stencil_clear;
2285   bool depth_compress_disable;
2286   bool stencil_compress_disable;
2287   bool resummarize_enable;
2288   uint32_t custom_blend_mode;
2289};
2290
2291void radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline,
2292                        enum radv_pipeline_type type);
2293
2294VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
2295                                       const VkGraphicsPipelineCreateInfo *pCreateInfo,
2296                                       const struct radv_graphics_pipeline_create_info *extra,
2297                                       const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
2298
2299VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
2300                                      const VkComputePipelineCreateInfo *pCreateInfo,
2301                                      const VkAllocationCallbacks *pAllocator,
2302                                      const uint8_t *custom_hash,
2303                                      struct radv_pipeline_shader_stack_size *rt_stack_sizes,
2304                                      uint32_t rt_group_count, VkPipeline *pPipeline);
2305
2306void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
2307                           const VkAllocationCallbacks *allocator);
2308
2309struct radv_binning_settings {
2310   unsigned context_states_per_bin;    /* allowed range: [1, 6] */
2311   unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
2312   unsigned fpovs_per_batch;           /* allowed range: [0, 255], 0 = unlimited */
2313};
2314
2315struct radv_binning_settings radv_get_binning_settings(const struct radv_physical_device *pdev);
2316
2317struct vk_format_description;
2318uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
2319                                          int first_non_void);
2320uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
2321                                         int first_non_void);
2322bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
2323void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
2324                                  const struct util_format_description *desc, unsigned *dfmt,
2325                                  unsigned *nfmt, bool *post_shuffle,
2326                                  enum radv_vs_input_alpha_adjust *alpha_adjust);
2327uint32_t radv_translate_colorformat(VkFormat format);
2328uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
2329                                        int first_non_void);
2330uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
2331unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
2332uint32_t radv_translate_dbformat(VkFormat format);
2333uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
2334                                       int first_non_void);
2335uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
2336                                      int first_non_void);
2337bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2],
2338                                  VkClearColorValue *value);
2339bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
2340                                            VkFormat format);
2341bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
2342                                          VkFormat format, bool *blendable);
2343bool radv_dcc_formats_compatible(enum amd_gfx_level gfx_level, VkFormat format1, VkFormat format2,
2344                                 bool *sign_reinterpret);
2345bool radv_is_atomic_format_supported(VkFormat format);
2346bool radv_device_supports_etc(struct radv_physical_device *physical_device);
2347
2348static const VkImageUsageFlags RADV_IMAGE_USAGE_WRITE_BITS =
2349   VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
2350   VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
2351
2352struct radv_image_plane {
2353   VkFormat format;
2354   struct radeon_surf surface;
2355};
2356
2357struct radv_image_binding {
2358   /* Set when bound */
2359   struct radeon_winsys_bo *bo;
2360   VkDeviceSize offset;
2361};
2362
2363struct radv_image {
2364   struct vk_image vk;
2365
2366   struct ac_surf_info info;
2367
2368   VkDeviceSize size;
2369   uint32_t alignment;
2370
2371   unsigned queue_family_mask;
2372   bool exclusive;
2373   bool shareable;
2374   bool l2_coherent;
2375   bool dcc_sign_reinterpret;
2376   bool support_comp_to_single;
2377
2378   struct radv_image_binding bindings[3];
2379   bool tc_compatible_cmask;
2380
2381   uint64_t clear_value_offset;
2382   uint64_t fce_pred_offset;
2383   uint64_t dcc_pred_offset;
2384
2385   /*
2386    * Metadata for the TC-compat zrange workaround. If the 32-bit value
2387    * stored at this offset is UINT_MAX, the driver will emit
2388    * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
2389    * SET_CONTEXT_REG packet.
2390    */
2391   uint64_t tc_compat_zrange_offset;
2392
2393   /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
2394   VkDeviceMemory owned_memory;
2395
2396   unsigned plane_count;
2397   bool disjoint;
2398   struct radv_image_plane planes[0];
2399};
2400
2401/* Whether the image has a htile  that is known consistent with the contents of
2402 * the image and is allowed to be in compressed form.
2403 *
2404 * If this is false reads that don't use the htile should be able to return
2405 * correct results.
2406 */
2407bool radv_layout_is_htile_compressed(const struct radv_device *device,
2408                                     const struct radv_image *image, VkImageLayout layout,
2409                                     bool in_render_loop, unsigned queue_mask);
2410
2411bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2412                                unsigned level, VkImageLayout layout, bool in_render_loop,
2413                                unsigned queue_mask);
2414
2415bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2416                                unsigned level, VkImageLayout layout, bool in_render_loop,
2417                                unsigned queue_mask);
2418
2419bool radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2420                                  VkImageLayout layout, unsigned queue_mask);
2421
2422/**
2423 * Return whether the image has CMASK metadata for color surfaces.
2424 */
2425static inline bool
2426radv_image_has_cmask(const struct radv_image *image)
2427{
2428   return image->planes[0].surface.cmask_offset;
2429}
2430
2431/**
2432 * Return whether the image has FMASK metadata for color surfaces.
2433 */
2434static inline bool
2435radv_image_has_fmask(const struct radv_image *image)
2436{
2437   return image->planes[0].surface.fmask_offset;
2438}
2439
2440/**
2441 * Return whether the image has DCC metadata for color surfaces.
2442 */
2443static inline bool
2444radv_image_has_dcc(const struct radv_image *image)
2445{
2446   return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
2447          image->planes[0].surface.meta_offset;
2448}
2449
2450/**
2451 * Return whether the image is TC-compatible CMASK.
2452 */
2453static inline bool
2454radv_image_is_tc_compat_cmask(const struct radv_image *image)
2455{
2456   return radv_image_has_fmask(image) && image->tc_compatible_cmask;
2457}
2458
2459/**
2460 * Return whether DCC metadata is enabled for a level.
2461 */
2462static inline bool
2463radv_dcc_enabled(const struct radv_image *image, unsigned level)
2464{
2465   return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels;
2466}
2467
2468/**
2469 * Return whether the image has CB metadata.
2470 */
2471static inline bool
2472radv_image_has_CB_metadata(const struct radv_image *image)
2473{
2474   return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
2475}
2476
2477/**
2478 * Return whether the image has HTILE metadata for depth surfaces.
2479 */
2480static inline bool
2481radv_image_has_htile(const struct radv_image *image)
2482{
2483   return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER &&
2484          image->planes[0].surface.meta_size;
2485}
2486
2487/**
2488 * Return whether the image has VRS HTILE metadata for depth surfaces
2489 */
2490static inline bool
2491radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
2492{
2493   /* Any depth buffer can potentially use VRS. */
2494   return device->attachment_vrs_enabled && radv_image_has_htile(image) &&
2495          (image->vk.usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
2496}
2497
2498/**
2499 * Return whether HTILE metadata is enabled for a level.
2500 */
2501static inline bool
2502radv_htile_enabled(const struct radv_image *image, unsigned level)
2503{
2504   return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels;
2505}
2506
2507/**
2508 * Return whether the image is TC-compatible HTILE.
2509 */
2510static inline bool
2511radv_image_is_tc_compat_htile(const struct radv_image *image)
2512{
2513   return radv_image_has_htile(image) &&
2514          (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
2515}
2516
2517/**
2518 * Return whether the entire HTILE buffer can be used for depth in order to
2519 * improve HiZ Z-Range precision.
2520 */
2521static inline bool
2522radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
2523{
2524   if (device->physical_device->rad_info.gfx_level >= GFX9) {
2525      return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image);
2526   } else {
2527      /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
2528       * the TC-compat ZRANGE issue even if no stencil is used.
2529       */
2530      return !vk_format_has_stencil(image->vk.format) && !radv_image_is_tc_compat_htile(image);
2531   }
2532}
2533
2534static inline bool
2535radv_image_has_clear_value(const struct radv_image *image)
2536{
2537   return image->clear_value_offset != 0;
2538}
2539
2540static inline uint64_t
2541radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
2542{
2543   assert(radv_image_has_clear_value(image));
2544
2545   uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2546   va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2547   return va;
2548}
2549
2550static inline uint64_t
2551radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
2552{
2553   assert(image->fce_pred_offset != 0);
2554
2555   uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2556   va += image->bindings[0].offset + image->fce_pred_offset + base_level * 8;
2557   return va;
2558}
2559
2560static inline uint64_t
2561radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
2562{
2563   assert(image->dcc_pred_offset != 0);
2564
2565   uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2566   va += image->bindings[0].offset + image->dcc_pred_offset + base_level * 8;
2567   return va;
2568}
2569
2570static inline uint64_t
2571radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
2572{
2573   assert(image->tc_compat_zrange_offset != 0);
2574
2575   uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2576   va += image->bindings[0].offset + image->tc_compat_zrange_offset + base_level * 4;
2577   return va;
2578}
2579
2580static inline uint64_t
2581radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
2582{
2583   assert(radv_image_has_clear_value(image));
2584
2585   uint64_t va = radv_buffer_get_va(image->bindings[0].bo);
2586   va += image->bindings[0].offset + image->clear_value_offset + base_level * 8;
2587   return va;
2588}
2589
2590static inline uint32_t
2591radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
2592{
2593   uint32_t initial_value;
2594
2595   if (radv_image_tile_stencil_disabled(device, image)) {
2596      /* Z only (no stencil):
2597       *
2598       * |31     18|17      4|3     0|
2599       * +---------+---------+-------+
2600       * |  Max Z  |  Min Z  | ZMask |
2601       */
2602      initial_value = 0xfffc000f;
2603   } else {
2604      /* Z and stencil:
2605       *
2606       * |31       12|11 10|9    8|7   6|5   4|3     0|
2607       * +-----------+-----+------+-----+-----+-------+
2608       * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
2609       *
2610       * SR0/SR1 contains the stencil test results. Initializing
2611       * SR0/SR1 to 0x3 means the stencil test result is unknown.
2612       *
2613       * Z, stencil and 4 bit VRS encoding:
2614       * |31       12|11        10|9    8|7          6|5   4|3     0|
2615       * +-----------+------------+------+------------+-----+-------+
2616       * |  Z Range  | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
2617       */
2618      if (radv_image_has_vrs_htile(device, image)) {
2619         /* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
2620         initial_value = 0xfffff33f;
2621      } else {
2622         initial_value = 0xfffff3ff;
2623      }
2624   }
2625
2626   return initial_value;
2627}
2628
2629static inline bool
2630radv_image_get_iterate256(struct radv_device *device, struct radv_image *image)
2631{
2632   /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
2633   return device->physical_device->rad_info.gfx_level >= GFX10 &&
2634          (image->vk.usage &
2635           (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
2636          radv_image_is_tc_compat_htile(image) && image->info.samples > 1;
2637}
2638
2639unsigned radv_image_queue_family_mask(const struct radv_image *image,
2640                                      enum radv_queue_family family,
2641                                      enum radv_queue_family queue_family);
2642
2643static inline uint32_t
2644radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2645{
2646   return range->layerCount == VK_REMAINING_ARRAY_LAYERS
2647             ? image->info.array_size - range->baseArrayLayer
2648             : range->layerCount;
2649}
2650
2651static inline uint32_t
2652radv_get_levelCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2653{
2654   return range->levelCount == VK_REMAINING_MIP_LEVELS ? image->info.levels - range->baseMipLevel
2655                                                       : range->levelCount;
2656}
2657
2658bool radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
2659
2660struct radeon_bo_metadata;
2661void radv_init_metadata(struct radv_device *device, struct radv_image *image,
2662                        struct radeon_bo_metadata *metadata);
2663
2664void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
2665                                       uint64_t offset, uint32_t stride);
2666
2667union radv_descriptor {
2668   struct {
2669      uint32_t plane0_descriptor[8];
2670      uint32_t fmask_descriptor[8];
2671   };
2672   struct {
2673      uint32_t plane_descriptors[3][8];
2674   };
2675};
2676
2677struct radv_image_view {
2678   struct vk_image_view vk;
2679   struct radv_image *image; /**< VkImageViewCreateInfo::image */
2680
2681   unsigned plane_id;
2682   VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
2683
2684   /* Whether the image iview supports fast clear. */
2685   bool support_fast_clear;
2686
2687   bool disable_dcc_mrt;
2688
2689   union radv_descriptor descriptor;
2690
2691   /* Descriptor for use as a storage image as opposed to a sampled image.
2692    * This has a few differences for cube maps (e.g. type).
2693    */
2694   union radv_descriptor storage_descriptor;
2695};
2696
2697struct radv_image_create_info {
2698   const VkImageCreateInfo *vk_info;
2699   bool scanout;
2700   bool no_metadata_planes;
2701   bool prime_blit_src;
2702   const struct radeon_bo_metadata *bo_metadata;
2703};
2704
2705VkResult
2706radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
2707                         const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
2708                         struct radv_image *image);
2709
2710VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
2711                           const VkAllocationCallbacks *alloc, VkImage *pImage);
2712
2713bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
2714                                     VkFormat format, VkImageCreateFlags flags,
2715                                     bool *sign_reinterpret);
2716
2717bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
2718
2719VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
2720                                 const VkNativeBufferANDROID *gralloc_info,
2721                                 const VkAllocationCallbacks *alloc, VkImage *out_image_h);
2722uint64_t radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
2723                                      const VkImageUsageFlags vk_usage);
2724VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2725                                unsigned priority,
2726                                const VkImportAndroidHardwareBufferInfoANDROID *info);
2727VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2728                                unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo);
2729
2730VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
2731
2732bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
2733
2734struct radv_image_view_extra_create_info {
2735   bool disable_compression;
2736   bool enable_compression;
2737   bool disable_dcc_mrt;
2738   bool from_client; /**< Set only if this came from vkCreateImage */
2739};
2740
2741void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
2742                          const VkImageViewCreateInfo *pCreateInfo,
2743                          VkImageCreateFlags img_create_flags,
2744                          const struct radv_image_view_extra_create_info *extra_create_info);
2745void radv_image_view_finish(struct radv_image_view *iview);
2746
2747VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
2748
2749struct radv_sampler_ycbcr_conversion_state {
2750   VkFormat format;
2751   VkSamplerYcbcrModelConversion ycbcr_model;
2752   VkSamplerYcbcrRange ycbcr_range;
2753   VkComponentMapping components;
2754   VkChromaLocation chroma_offsets[2];
2755   VkFilter chroma_filter;
2756};
2757
2758struct radv_sampler_ycbcr_conversion {
2759   struct vk_object_base base;
2760   /* The state is hashed for the descriptor set layout. */
2761   struct radv_sampler_ycbcr_conversion_state state;
2762};
2763
2764struct radv_buffer_view {
2765   struct vk_object_base base;
2766   struct radeon_winsys_bo *bo;
2767   VkFormat vk_format;
2768   uint64_t range; /**< VkBufferViewCreateInfo::range */
2769   uint32_t state[4];
2770};
2771void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2772                           const VkBufferViewCreateInfo *pCreateInfo);
2773void radv_buffer_view_finish(struct radv_buffer_view *view);
2774
2775static inline bool
2776radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
2777{
2778   if (extent->width != image->info.width || extent->height != image->info.height ||
2779       extent->depth != image->info.depth)
2780      return false;
2781   return true;
2782}
2783
2784struct radv_sampler {
2785   struct vk_object_base base;
2786   uint32_t state[4];
2787   struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
2788   uint32_t border_color_slot;
2789};
2790
2791struct radv_subpass_barrier {
2792   VkPipelineStageFlags2 src_stage_mask;
2793   VkPipelineStageFlags2 dst_stage_mask;
2794   VkAccessFlags2 src_access_mask;
2795   VkAccessFlags2 dst_access_mask;
2796};
2797
2798void radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
2799                               const struct radv_subpass_barrier *barrier);
2800
2801struct radv_subpass_attachment {
2802   uint32_t attachment;
2803   VkImageLayout layout;
2804   VkImageLayout stencil_layout;
2805   bool in_render_loop;
2806};
2807
2808struct radv_subpass {
2809   uint32_t attachment_count;
2810   struct radv_subpass_attachment *attachments;
2811
2812   uint32_t input_count;
2813   uint32_t color_count;
2814   struct radv_subpass_attachment *input_attachments;
2815   struct radv_subpass_attachment *color_attachments;
2816   struct radv_subpass_attachment *resolve_attachments;
2817   struct radv_subpass_attachment *depth_stencil_attachment;
2818   struct radv_subpass_attachment *ds_resolve_attachment;
2819   struct radv_subpass_attachment *vrs_attachment;
2820   VkResolveModeFlagBits depth_resolve_mode;
2821   VkResolveModeFlagBits stencil_resolve_mode;
2822
2823   /** Subpass has at least one color resolve attachment */
2824   bool has_color_resolve;
2825
2826   struct radv_subpass_barrier start_barrier;
2827
2828   uint32_t view_mask;
2829
2830   VkSampleCountFlagBits color_sample_count;
2831   VkSampleCountFlagBits depth_sample_count;
2832   VkSampleCountFlagBits max_sample_count;
2833
2834   /* Whether the subpass has ingoing/outgoing external dependencies. */
2835   bool has_ingoing_dep;
2836   bool has_outgoing_dep;
2837};
2838
2839uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
2840
2841struct radv_render_pass_attachment {
2842   VkFormat format;
2843   uint32_t samples;
2844   VkAttachmentLoadOp load_op;
2845   VkAttachmentLoadOp stencil_load_op;
2846   VkImageLayout initial_layout;
2847   VkImageLayout final_layout;
2848   VkImageLayout stencil_initial_layout;
2849   VkImageLayout stencil_final_layout;
2850
2851   /* The subpass id in which the attachment will be used first/last. */
2852   uint32_t first_subpass_idx;
2853   uint32_t last_subpass_idx;
2854};
2855
2856struct radv_render_pass {
2857   struct vk_object_base base;
2858   uint32_t attachment_count;
2859   uint32_t subpass_count;
2860   struct radv_subpass_attachment *subpass_attachments;
2861   struct radv_render_pass_attachment *attachments;
2862   struct radv_subpass_barrier end_barrier;
2863   struct radv_subpass subpasses[0];
2864};
2865
2866VkResult radv_device_init_meta(struct radv_device *device);
2867void radv_device_finish_meta(struct radv_device *device);
2868
2869struct radv_query_pool {
2870   struct vk_object_base base;
2871   struct radeon_winsys_bo *bo;
2872   uint32_t stride;
2873   uint32_t availability_offset;
2874   uint64_t size;
2875   char *ptr;
2876   VkQueryType type;
2877   uint32_t pipeline_stats_mask;
2878   bool uses_gds; /* For NGG GS on GFX10+ */
2879};
2880
2881struct radv_perfcounter_impl;
2882
2883struct radv_pc_query_pool {
2884   struct radv_query_pool b;
2885
2886   uint32_t *pc_regs;
2887   unsigned num_pc_regs;
2888
2889   unsigned num_passes;
2890
2891   unsigned num_counters;
2892   struct radv_perfcounter_impl *counters;
2893};
2894
2895void radv_pc_deinit_query_pool(struct radv_pc_query_pool *pool);
2896VkResult radv_pc_init_query_pool(struct radv_physical_device *pdevice,
2897                                 const VkQueryPoolCreateInfo *pCreateInfo,
2898                                 struct radv_pc_query_pool *pool);
2899void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
2900                         uint64_t va);
2901void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool,
2902                       uint64_t va);
2903void radv_pc_get_results(const struct radv_pc_query_pool *pc_pool, const uint64_t *data, void *out);
2904
2905bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
2906
2907int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
2908                    const VkDeviceQueueCreateInfo *create_info,
2909                    const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority);
2910
2911void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
2912                             struct radv_descriptor_set *set, unsigned idx);
2913
2914void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
2915                                     VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
2916                                     const VkWriteDescriptorSet *pDescriptorWrites,
2917                                     uint32_t descriptorCopyCount,
2918                                     const VkCopyDescriptorSet *pDescriptorCopies);
2919
2920void radv_cmd_update_descriptor_set_with_template(struct radv_device *device,
2921                                                  struct radv_cmd_buffer *cmd_buffer,
2922                                                  struct radv_descriptor_set *set,
2923                                                  VkDescriptorUpdateTemplate descriptorUpdateTemplate,
2924                                                  const void *pData);
2925
2926void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
2927                                   VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
2928                                   uint32_t set, uint32_t descriptorWriteCount,
2929                                   const VkWriteDescriptorSet *pDescriptorWrites);
2930
2931uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2932                       const VkImageSubresourceRange *range, uint32_t value);
2933
2934uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2935                         const VkImageSubresourceRange *range);
2936
2937/* radv_nir_to_llvm.c */
2938struct radv_shader_args;
2939struct radv_nir_compiler_options;
2940struct radv_shader_info;
2941
2942void llvm_compile_shader(const struct radv_nir_compiler_options *options,
2943                         const struct radv_shader_info *info, unsigned shader_count,
2944                         struct nir_shader *const *shaders, struct radv_shader_binary **binary,
2945                         const struct radv_shader_args *args);
2946
2947/* radv_shader_info.h */
2948struct radv_shader_info;
2949
2950void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
2951                               const struct radv_pipeline_layout *layout,
2952                               const struct radv_pipeline_key *pipeline_key,
2953                               struct radv_shader_info *info);
2954
2955void radv_nir_shader_info_init(struct radv_shader_info *info);
2956
2957bool radv_thread_trace_init(struct radv_device *device);
2958void radv_thread_trace_finish(struct radv_device *device);
2959bool radv_begin_thread_trace(struct radv_queue *queue);
2960bool radv_end_thread_trace(struct radv_queue *queue);
2961bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace);
2962void radv_emit_thread_trace_userdata(struct radv_cmd_buffer *cmd_buffer, const void *data,
2963                                     uint32_t num_dwords);
2964bool radv_is_instruction_timing_enabled(void);
2965
2966void radv_emit_inhibit_clockgating(struct radv_device *device, struct radeon_cmdbuf *cs,
2967                                   bool inhibit);
2968void radv_emit_spi_config_cntl(struct radv_device *device, struct radeon_cmdbuf *cs, bool enable);
2969
2970bool radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2971                          struct radv_buffer *buffer, const VkBufferImageCopy2 *region);
2972
2973/* radv_sqtt_layer_.c */
2974struct radv_barrier_data {
2975   union {
2976      struct {
2977         uint16_t depth_stencil_expand : 1;
2978         uint16_t htile_hiz_range_expand : 1;
2979         uint16_t depth_stencil_resummarize : 1;
2980         uint16_t dcc_decompress : 1;
2981         uint16_t fmask_decompress : 1;
2982         uint16_t fast_clear_eliminate : 1;
2983         uint16_t fmask_color_expand : 1;
2984         uint16_t init_mask_ram : 1;
2985         uint16_t reserved : 8;
2986      };
2987      uint16_t all;
2988   } layout_transitions;
2989};
2990
2991/**
2992 * Value for the reason field of an RGP barrier start marker originating from
2993 * the Vulkan client (does not include PAL-defined values). (Table 15)
2994 */
2995enum rgp_barrier_reason {
2996   RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
2997
2998   /* External app-generated barrier reasons, i.e. API synchronization
2999    * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
3000    */
3001   RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
3002   RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
3003   RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
3004
3005   /* Internal barrier reasons, i.e. implicit synchronization inserted by
3006    * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
3007    */
3008   RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
3009   RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
3010   RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
3011   RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
3012   RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
3013};
3014
3015void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3016void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
3017void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
3018void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
3019void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
3020                                           VkImageAspectFlagBits aspects);
3021void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
3022void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3023void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
3024void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
3025                                 enum rgp_barrier_reason reason);
3026void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
3027void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
3028void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
3029                                     const struct radv_barrier_data *barrier);
3030
3031struct radv_indirect_command_layout {
3032   struct vk_object_base base;
3033
3034   uint32_t input_stride;
3035   uint32_t token_count;
3036
3037   bool indexed;
3038   bool binds_index_buffer;
3039   bool binds_state;
3040   uint16_t draw_params_offset;
3041   uint16_t index_buffer_offset;
3042
3043   uint16_t state_offset;
3044
3045   uint32_t bind_vbo_mask;
3046   uint32_t vbo_offsets[MAX_VBS];
3047
3048   uint64_t push_constant_mask;
3049   uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
3050
3051   uint32_t ibo_type_32;
3052   uint32_t ibo_type_8;
3053
3054   VkIndirectCommandsLayoutTokenNV tokens[0];
3055};
3056
3057uint32_t radv_get_indirect_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
3058
3059void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer,
3060                      const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
3061
3062uint64_t radv_get_current_time(void);
3063
3064static inline uint32_t
3065si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)
3066{
3067   switch (gl_prim) {
3068   case SHADER_PRIM_POINTS:
3069      return 1;
3070   case SHADER_PRIM_LINES:
3071   case SHADER_PRIM_LINE_STRIP:
3072      return 2;
3073   case SHADER_PRIM_TRIANGLES:
3074   case SHADER_PRIM_TRIANGLE_STRIP:
3075      return 3;
3076   case SHADER_PRIM_LINES_ADJACENCY:
3077      return 4;
3078   case SHADER_PRIM_TRIANGLES_ADJACENCY:
3079      return 6;
3080   case SHADER_PRIM_QUADS:
3081      return V_028A6C_TRISTRIP;
3082   default:
3083      assert(0);
3084      return 0;
3085   }
3086}
3087
3088static inline uint32_t
3089si_conv_prim_to_gs_out(uint32_t topology)
3090{
3091   switch (topology) {
3092   case V_008958_DI_PT_POINTLIST:
3093   case V_008958_DI_PT_PATCH:
3094      return V_028A6C_POINTLIST;
3095   case V_008958_DI_PT_LINELIST:
3096   case V_008958_DI_PT_LINESTRIP:
3097   case V_008958_DI_PT_LINELIST_ADJ:
3098   case V_008958_DI_PT_LINESTRIP_ADJ:
3099      return V_028A6C_LINESTRIP;
3100   case V_008958_DI_PT_TRILIST:
3101   case V_008958_DI_PT_TRISTRIP:
3102   case V_008958_DI_PT_TRIFAN:
3103   case V_008958_DI_PT_TRILIST_ADJ:
3104   case V_008958_DI_PT_TRISTRIP_ADJ:
3105      return V_028A6C_TRISTRIP;
3106   default:
3107      assert(0);
3108      return 0;
3109   }
3110}
3111
3112static inline uint32_t
3113si_translate_prim(unsigned topology)
3114{
3115   switch (topology) {
3116   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
3117      return V_008958_DI_PT_POINTLIST;
3118   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
3119      return V_008958_DI_PT_LINELIST;
3120   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
3121      return V_008958_DI_PT_LINESTRIP;
3122   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
3123      return V_008958_DI_PT_TRILIST;
3124   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
3125      return V_008958_DI_PT_TRISTRIP;
3126   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
3127      return V_008958_DI_PT_TRIFAN;
3128   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
3129      return V_008958_DI_PT_LINELIST_ADJ;
3130   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
3131      return V_008958_DI_PT_LINESTRIP_ADJ;
3132   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
3133      return V_008958_DI_PT_TRILIST_ADJ;
3134   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
3135      return V_008958_DI_PT_TRISTRIP_ADJ;
3136   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
3137      return V_008958_DI_PT_PATCH;
3138   default:
3139      unreachable("unhandled primitive type");
3140   }
3141}
3142
3143static inline bool
3144radv_prim_is_points_or_lines(unsigned topology)
3145{
3146   switch (topology) {
3147   case V_008958_DI_PT_POINTLIST:
3148   case V_008958_DI_PT_LINELIST:
3149   case V_008958_DI_PT_LINESTRIP:
3150   case V_008958_DI_PT_LINELIST_ADJ:
3151   case V_008958_DI_PT_LINESTRIP_ADJ:
3152      return true;
3153   default:
3154      return false;
3155   }
3156}
3157
3158static inline bool
3159radv_rast_prim_is_point(unsigned rast_prim)
3160{
3161   return rast_prim == V_028A6C_POINTLIST;
3162}
3163
3164static inline bool
3165radv_rast_prim_is_line(unsigned rast_prim)
3166{
3167   return rast_prim == V_028A6C_LINESTRIP;
3168}
3169
3170static inline bool
3171radv_rast_prim_is_points_or_lines(unsigned rast_prim)
3172{
3173   return radv_rast_prim_is_point(rast_prim) || radv_rast_prim_is_line(rast_prim);
3174}
3175
3176static inline uint32_t
3177si_translate_stencil_op(enum VkStencilOp op)
3178{
3179   switch (op) {
3180   case VK_STENCIL_OP_KEEP:
3181      return V_02842C_STENCIL_KEEP;
3182   case VK_STENCIL_OP_ZERO:
3183      return V_02842C_STENCIL_ZERO;
3184   case VK_STENCIL_OP_REPLACE:
3185      return V_02842C_STENCIL_REPLACE_TEST;
3186   case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
3187      return V_02842C_STENCIL_ADD_CLAMP;
3188   case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
3189      return V_02842C_STENCIL_SUB_CLAMP;
3190   case VK_STENCIL_OP_INVERT:
3191      return V_02842C_STENCIL_INVERT;
3192   case VK_STENCIL_OP_INCREMENT_AND_WRAP:
3193      return V_02842C_STENCIL_ADD_WRAP;
3194   case VK_STENCIL_OP_DECREMENT_AND_WRAP:
3195      return V_02842C_STENCIL_SUB_WRAP;
3196   default:
3197      return 0;
3198   }
3199}
3200
3201static inline uint32_t
3202si_translate_blend_logic_op(VkLogicOp op)
3203{
3204   switch (op) {
3205   case VK_LOGIC_OP_CLEAR:
3206      return V_028808_ROP3_CLEAR;
3207   case VK_LOGIC_OP_AND:
3208      return V_028808_ROP3_AND;
3209   case VK_LOGIC_OP_AND_REVERSE:
3210      return V_028808_ROP3_AND_REVERSE;
3211   case VK_LOGIC_OP_COPY:
3212      return V_028808_ROP3_COPY;
3213   case VK_LOGIC_OP_AND_INVERTED:
3214      return V_028808_ROP3_AND_INVERTED;
3215   case VK_LOGIC_OP_NO_OP:
3216      return V_028808_ROP3_NO_OP;
3217   case VK_LOGIC_OP_XOR:
3218      return V_028808_ROP3_XOR;
3219   case VK_LOGIC_OP_OR:
3220      return V_028808_ROP3_OR;
3221   case VK_LOGIC_OP_NOR:
3222      return V_028808_ROP3_NOR;
3223   case VK_LOGIC_OP_EQUIVALENT:
3224      return V_028808_ROP3_EQUIVALENT;
3225   case VK_LOGIC_OP_INVERT:
3226      return V_028808_ROP3_INVERT;
3227   case VK_LOGIC_OP_OR_REVERSE:
3228      return V_028808_ROP3_OR_REVERSE;
3229   case VK_LOGIC_OP_COPY_INVERTED:
3230      return V_028808_ROP3_COPY_INVERTED;
3231   case VK_LOGIC_OP_OR_INVERTED:
3232      return V_028808_ROP3_OR_INVERTED;
3233   case VK_LOGIC_OP_NAND:
3234      return V_028808_ROP3_NAND;
3235   case VK_LOGIC_OP_SET:
3236      return V_028808_ROP3_SET;
3237   default:
3238      unreachable("Unhandled logic op");
3239   }
3240}
3241
3242/*
3243 * Queue helper to get ring.
3244 * placed here as it needs queue + device structs.
3245 */
3246static inline enum amd_ip_type
3247radv_queue_ring(struct radv_queue *queue)
3248{
3249   return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf);
3250}
3251
3252/**
3253 * Helper used for debugging compiler issues by enabling/disabling LLVM for a
3254 * specific shader stage (developers only).
3255 */
3256static inline bool
3257radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage)
3258{
3259   return device->physical_device->use_llvm;
3260}
3261
3262static inline bool
3263radv_has_shader_buffer_float_minmax(const struct radv_physical_device *pdevice)
3264{
3265   return (pdevice->rad_info.gfx_level <= GFX7 && !pdevice->use_llvm) ||
3266          pdevice->rad_info.gfx_level >= GFX10;
3267}
3268
3269struct radv_acceleration_structure {
3270   struct vk_object_base base;
3271
3272   struct radeon_winsys_bo *bo;
3273   uint64_t mem_offset;
3274   uint64_t size;
3275};
3276
3277static inline uint64_t
3278radv_accel_struct_get_va(const struct radv_acceleration_structure *accel)
3279{
3280   return radv_buffer_get_va(accel->bo) + accel->mem_offset;
3281}
3282
3283/* radv_perfcounter.c */
3284void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
3285void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs);
3286void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs,
3287                                     int family);
3288void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs,
3289                                    int family);
3290
3291/* radv_spm.c */
3292bool radv_spm_init(struct radv_device *device);
3293void radv_spm_finish(struct radv_device *device);
3294void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs);
3295
3296#define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
3297   VK_FROM_HANDLE(__radv_type, __name, __handle)
3298
3299VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer,
3300                       VK_OBJECT_TYPE_COMMAND_BUFFER)
3301VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
3302VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
3303VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice,
3304                       VK_OBJECT_TYPE_PHYSICAL_DEVICE)
3305VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
3306VK_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, base,
3307                               VkAccelerationStructureKHR,
3308                               VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
3309VK_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, vk.base, VkCommandPool,
3310                               VK_OBJECT_TYPE_COMMAND_POOL)
3311VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, vk.base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
3312VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView,
3313                               VK_OBJECT_TYPE_BUFFER_VIEW)
3314VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool,
3315                               VK_OBJECT_TYPE_DESCRIPTOR_POOL)
3316VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet,
3317                               VK_OBJECT_TYPE_DESCRIPTOR_SET)
3318VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, vk.base, VkDescriptorSetLayout,
3319                               VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
3320VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base,
3321                               VkDescriptorUpdateTemplate,
3322                               VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
3323VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory,
3324                               VK_OBJECT_TYPE_DEVICE_MEMORY)
3325VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
3326VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
3327VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, vk.base, VkImageView,
3328                               VK_OBJECT_TYPE_IMAGE_VIEW);
3329VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
3330                               VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
3331VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, base, VkPipelineCache,
3332                               VK_OBJECT_TYPE_PIPELINE_CACHE)
3333VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline,
3334                               VK_OBJECT_TYPE_PIPELINE)
3335VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout,
3336                               VK_OBJECT_TYPE_PIPELINE_LAYOUT)
3337VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool,
3338                               VK_OBJECT_TYPE_QUERY_POOL)
3339VK_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, base, VkRenderPass,
3340                               VK_OBJECT_TYPE_RENDER_PASS)
3341VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler,
3342                               VK_OBJECT_TYPE_SAMPLER)
3343VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler_ycbcr_conversion, base,
3344                               VkSamplerYcbcrConversion,
3345                               VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
3346
3347#ifdef __cplusplus
3348}
3349#endif
3350
3351#endif /* RADV_PRIVATE_H */
3352