1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef ANV_PRIVATE_H
25#define ANV_PRIVATE_H
26
27#include <stdlib.h>
28#include <stdio.h>
29#include <stdbool.h>
30#include <pthread.h>
31#include <assert.h>
32#include <stdint.h>
33#include "drm-uapi/i915_drm.h"
34#include "drm-uapi/drm_fourcc.h"
35
36#ifdef HAVE_VALGRIND
37#include <valgrind.h>
38#include <memcheck.h>
39#define VG(x) x
40#ifndef NDEBUG
41#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
42#endif
43#else
44#define VG(x) ((void)0)
45#endif
46
47#include "common/intel_clflush.h"
48#include "common/intel_decoder.h"
49#include "common/intel_gem.h"
50#include "common/intel_l3_config.h"
51#include "common/intel_measure.h"
52#include "common/intel_sample_positions.h"
53#include "dev/intel_device_info.h"
54#include "blorp/blorp.h"
55#include "compiler/brw_compiler.h"
56#include "compiler/brw_rt.h"
57#include "ds/intel_driver_ds.h"
58#include "util/bitset.h"
59#include "util/bitscan.h"
60#include "util/macros.h"
61#include "util/hash_table.h"
62#include "util/list.h"
63#include "util/perf/u_trace.h"
64#include "util/sparse_array.h"
65#include "util/u_atomic.h"
66#include "util/u_vector.h"
67#include "util/u_math.h"
68#include "util/vma.h"
69#include "util/xmlconfig.h"
70#include "vk_alloc.h"
71#include "vk_buffer.h"
72#include "vk_command_buffer.h"
73#include "vk_command_pool.h"
74#include "vk_debug_report.h"
75#include "vk_device.h"
76#include "vk_drm_syncobj.h"
77#include "vk_enum_defines.h"
78#include "vk_framebuffer.h"
79#include "vk_graphics_state.h"
80#include "vk_image.h"
81#include "vk_instance.h"
82#include "vk_pipeline_cache.h"
83#include "vk_physical_device.h"
84#include "vk_shader_module.h"
85#include "vk_sync.h"
86#include "vk_sync_timeline.h"
87#include "vk_util.h"
88#include "vk_queue.h"
89#include "vk_log.h"
90
91/* Pre-declarations needed for WSI entrypoints */
92struct wl_surface;
93struct wl_display;
94typedef struct xcb_connection_t xcb_connection_t;
95typedef uint32_t xcb_visualid_t;
96typedef uint32_t xcb_window_t;
97
98struct anv_batch;
99struct anv_buffer;
100struct anv_buffer_view;
101struct anv_image_view;
102struct anv_acceleration_structure;
103struct anv_instance;
104
105struct intel_aux_map_context;
106struct intel_perf_config;
107struct intel_perf_counter_pass;
108struct intel_perf_query_result;
109
110#include <vulkan/vulkan.h>
111#include <vulkan/vk_icd.h>
112
113#include "anv_android.h"
114#include "anv_entrypoints.h"
115#include "isl/isl.h"
116
117#include "dev/intel_debug.h"
118#undef MESA_LOG_TAG
119#define MESA_LOG_TAG "MESA-INTEL"
120#include "util/log.h"
121#include "wsi_common.h"
122
123#define NSEC_PER_SEC 1000000000ull
124
125/* anv Virtual Memory Layout
126 * =========================
127 *
128 * When the anv driver is determining the virtual graphics addresses of memory
129 * objects itself using the softpin mechanism, the following memory ranges
130 * will be used.
131 *
132 * Three special considerations to notice:
133 *
134 * (1) the dynamic state pool is located within the same 4 GiB as the low
135 * heap. This is to work around a VF cache issue described in a comment in
136 * anv_physical_device_init_heaps.
137 *
138 * (2) the binding table pool is located at lower addresses than the surface
139 * state pool, within a 4 GiB range. This allows surface state base addresses
140 * to cover both binding tables (16 bit offsets) and surface states (32 bit
141 * offsets).
142 *
143 * (3) the last 4 GiB of the address space is withheld from the high
144 * heap. Various hardware units will read past the end of an object for
145 * various reasons. This healthy margin prevents reads from wrapping around
146 * 48-bit addresses.
147 */
148#define GENERAL_STATE_POOL_MIN_ADDRESS     0x000000200000ULL /* 2 MiB */
149#define GENERAL_STATE_POOL_MAX_ADDRESS     0x00003fffffffULL
150#define LOW_HEAP_MIN_ADDRESS               0x000040000000ULL /* 1 GiB */
151#define LOW_HEAP_MAX_ADDRESS               0x00007fffffffULL
152#define DYNAMIC_STATE_POOL_MIN_ADDRESS     0x0000c0000000ULL /* 3 GiB */
153#define DYNAMIC_STATE_POOL_MAX_ADDRESS     0x0000ffffffffULL
154#define BINDING_TABLE_POOL_MIN_ADDRESS     0x000100000000ULL /* 4 GiB */
155#define BINDING_TABLE_POOL_MAX_ADDRESS     0x00013fffffffULL
156#define SURFACE_STATE_POOL_MIN_ADDRESS     0x000140000000ULL /* 5 GiB */
157#define SURFACE_STATE_POOL_MAX_ADDRESS     0x00017fffffffULL
158#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
159#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
160#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS    0x0001c0000000ULL /* 7 GiB */
161#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS    0x0002bfffffffULL
162#define HIGH_HEAP_MIN_ADDRESS              0x0002c0000000ULL /* 11 GiB */
163
164#define GENERAL_STATE_POOL_SIZE     \
165   (GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)
166#define LOW_HEAP_SIZE               \
167   (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
168#define DYNAMIC_STATE_POOL_SIZE     \
169   (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
170#define BINDING_TABLE_POOL_SIZE     \
171   (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
172#define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
173#define SURFACE_STATE_POOL_SIZE     \
174   (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
175#define INSTRUCTION_STATE_POOL_SIZE \
176   (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
177#define CLIENT_VISIBLE_HEAP_SIZE               \
178   (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
179
180/* Allowing different clear colors requires us to perform a depth resolve at
181 * the end of certain render passes. This is because while slow clears store
182 * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
183 * See the PRMs for examples describing when additional resolves would be
184 * necessary. To enable fast clears without requiring extra resolves, we set
185 * the clear value to a globally-defined one. We could allow different values
186 * if the user doesn't expect coherent data during or after a render passes
187 * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
188 * don't seem to exist yet. In almost all Vulkan applications tested thus far,
189 * 1.0f seems to be the only value used. The only application that doesn't set
190 * this value does so through the usage of an seemingly uninitialized clear
191 * value.
192 */
193#define ANV_HZ_FC_VAL 1.0f
194
195/* 3DSTATE_VERTEX_BUFFER supports 33 VBs, we use 2 for base & drawid SGVs */
196#define MAX_VBS         (33 - 2)
197
198/* 3DSTATE_VERTEX_ELEMENTS supports up to 34 VEs, but our backend compiler
199 * only supports the push model of VS inputs, and we only have 128 GRFs,
200 * minus the g0 and g1 payload, which gives us a maximum of 31 VEs.  Plus,
201 * we use two of them for SGVs.
202 */
203#define MAX_VES         (31 - 2)
204
205#define MAX_XFB_BUFFERS  4
206#define MAX_XFB_STREAMS  4
207#define MAX_SETS        32
208#define MAX_RTS          8
209#define MAX_VIEWPORTS   16
210#define MAX_SCISSORS    16
211#define MAX_PUSH_CONSTANTS_SIZE 128
212#define MAX_DYNAMIC_BUFFERS 16
213#define MAX_IMAGES 64
214#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
215#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
216#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
217/* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
218 * use 64 here to avoid cache issues. This could most likely bring it back to
219 * 32 if we had different virtual addresses for the different views on a given
220 * GEM object.
221 */
222#define ANV_UBO_ALIGNMENT 64
223#define ANV_SSBO_ALIGNMENT 4
224#define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
225#define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
226#define MAX_SAMPLE_LOCATIONS 16
227
228/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
229 *
230 *    "The surface state model is used when a Binding Table Index (specified
231 *    in the message descriptor) of less than 240 is specified. In this model,
232 *    the Binding Table Index is used to index into the binding table, and the
233 *    binding table entry contains a pointer to the SURFACE_STATE."
234 *
235 * Binding table values above 240 are used for various things in the hardware
236 * such as stateless, stateless with incoherent cache, SLM, and bindless.
237 */
238#define MAX_BINDING_TABLE_SIZE 240
239
240/* The kernel relocation API has a limitation of a 32-bit delta value
241 * applied to the address before it is written which, in spite of it being
242 * unsigned, is treated as signed .  Because of the way that this maps to
243 * the Vulkan API, we cannot handle an offset into a buffer that does not
244 * fit into a signed 32 bits.  The only mechanism we have for dealing with
245 * this at the moment is to limit all VkDeviceMemory objects to a maximum
246 * of 2GB each.  The Vulkan spec allows us to do this:
247 *
248 *    "Some platforms may have a limit on the maximum size of a single
249 *    allocation. For example, certain systems may fail to create
250 *    allocations with a size greater than or equal to 4GB. Such a limit is
251 *    implementation-dependent, and if such a failure occurs then the error
252 *    VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
253 */
254#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
255
256#define ANV_SVGS_VB_INDEX    MAX_VBS
257#define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
258
259/* We reserve this MI ALU register for the purpose of handling predication.
260 * Other code which uses the MI ALU should leave it alone.
261 */
262#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
263
264/* We reserve this MI ALU register to pass around an offset computed from
265 * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
266 * Other code which uses the MI ALU should leave it alone.
267 */
268#define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
269
270#define ANV_GRAPHICS_SHADER_STAGE_COUNT (MESA_SHADER_MESH + 1)
271
272/* For gfx12 we set the streamout buffers using 4 separate commands
273 * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
274 * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
275 * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
276 * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
277 * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
278 * 3DSTATE_SO_BUFFER_INDEX_0.
279 */
280#define SO_BUFFER_INDEX_0_CMD 0x60
281#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
282
283static inline uint32_t
284align_down_npot_u32(uint32_t v, uint32_t a)
285{
286   return v - (v % a);
287}
288
289static inline uint32_t
290align_down_u32(uint32_t v, uint32_t a)
291{
292   assert(a != 0 && a == (a & -a));
293   return v & ~(a - 1);
294}
295
296static inline uint32_t
297align_u32(uint32_t v, uint32_t a)
298{
299   assert(a != 0 && a == (a & -a));
300   return align_down_u32(v + a - 1, a);
301}
302
303static inline uint64_t
304align_down_u64(uint64_t v, uint64_t a)
305{
306   assert(a != 0 && a == (a & -a));
307   return v & ~(a - 1);
308}
309
310static inline uint64_t
311align_u64(uint64_t v, uint64_t a)
312{
313   return align_down_u64(v + a - 1, a);
314}
315
316static inline int32_t
317align_i32(int32_t v, int32_t a)
318{
319   assert(a != 0 && a == (a & -a));
320   return (v + a - 1) & ~(a - 1);
321}
322
323/** Alignment must be a power of 2. */
324static inline bool
325anv_is_aligned(uintmax_t n, uintmax_t a)
326{
327   assert(a == (a & -a));
328   return (n & (a - 1)) == 0;
329}
330
331static inline uint32_t
332anv_minify(uint32_t n, uint32_t levels)
333{
334   if (unlikely(n == 0))
335      return 0;
336   else
337      return MAX2(n >> levels, 1);
338}
339
340static inline float
341anv_clamp_f(float f, float min, float max)
342{
343   assert(min < max);
344
345   if (f > max)
346      return max;
347   else if (f < min)
348      return min;
349   else
350      return f;
351}
352
353static inline bool
354anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
355{
356   if (*inout_mask & clear_mask) {
357      *inout_mask &= ~clear_mask;
358      return true;
359   } else {
360      return false;
361   }
362}
363
364static inline union isl_color_value
365vk_to_isl_color(VkClearColorValue color)
366{
367   return (union isl_color_value) {
368      .u32 = {
369         color.uint32[0],
370         color.uint32[1],
371         color.uint32[2],
372         color.uint32[3],
373      },
374   };
375}
376
377static inline union isl_color_value
378vk_to_isl_color_with_format(VkClearColorValue color, enum isl_format format)
379{
380   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
381   union isl_color_value isl_color = { .u32 = {0, } };
382
383#define COPY_COLOR_CHANNEL(c, i) \
384   if (fmtl->channels.c.bits) \
385      isl_color.u32[i] = color.uint32[i]
386
387   COPY_COLOR_CHANNEL(r, 0);
388   COPY_COLOR_CHANNEL(g, 1);
389   COPY_COLOR_CHANNEL(b, 2);
390   COPY_COLOR_CHANNEL(a, 3);
391
392#undef COPY_COLOR_CHANNEL
393
394   return isl_color;
395}
396
397static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
398{
399   uintptr_t mask = (1ull << bits) - 1;
400   *flags = ptr & mask;
401   return (void *) (ptr & ~mask);
402}
403
404static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
405{
406   uintptr_t value = (uintptr_t) ptr;
407   uintptr_t mask = (1ull << bits) - 1;
408   return value | (mask & flags);
409}
410
411/**
412 * Warn on ignored extension structs.
413 *
414 * The Vulkan spec requires us to ignore unsupported or unknown structs in
415 * a pNext chain.  In debug mode, emitting warnings for ignored structs may
416 * help us discover structs that we should not have ignored.
417 *
418 *
419 * From the Vulkan 1.0.38 spec:
420 *
421 *    Any component of the implementation (the loader, any enabled layers,
422 *    and drivers) must skip over, without processing (other than reading the
423 *    sType and pNext members) any chained structures with sType values not
424 *    defined by extensions supported by that component.
425 */
426#define anv_debug_ignored_stype(sType) \
427   mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
428
429void __anv_perf_warn(struct anv_device *device,
430                     const struct vk_object_base *object,
431                     const char *file, int line, const char *format, ...)
432   anv_printflike(5, 6);
433
434/**
435 * Print a FINISHME message, including its source location.
436 */
437#define anv_finishme(format, ...) \
438   do { \
439      static bool reported = false; \
440      if (!reported) { \
441         mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
442                    ##__VA_ARGS__); \
443         reported = true; \
444      } \
445   } while (0)
446
447/**
448 * Print a perf warning message.  Set INTEL_DEBUG=perf to see these.
449 */
450#define anv_perf_warn(objects_macro, format, ...)   \
451   do { \
452      static bool reported = false; \
453      if (!reported && INTEL_DEBUG(DEBUG_PERF)) { \
454         __vk_log(VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT,      \
455                  VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,      \
456                  objects_macro, __FILE__, __LINE__,                    \
457                  format, ## __VA_ARGS__);                              \
458         reported = true; \
459      } \
460   } while (0)
461
462/* A non-fatal assert.  Useful for debugging. */
463#ifdef DEBUG
464#define anv_assert(x) ({ \
465   if (unlikely(!(x))) \
466      mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
467})
468#else
469#define anv_assert(x)
470#endif
471
472struct anv_bo {
473   const char *name;
474
475   uint32_t gem_handle;
476
477   uint32_t refcount;
478
479   /* Index into the current validation list.  This is used by the
480    * validation list building algorithm to track which buffers are already
481    * in the validation list so that we can ensure uniqueness.
482    */
483   uint32_t exec_obj_index;
484
485   /* Index for use with util_sparse_array_free_list */
486   uint32_t free_index;
487
488   /* Last known offset.  This value is provided by the kernel when we
489    * execbuf and is used as the presumed offset for the next bunch of
490    * relocations.
491    */
492   uint64_t offset;
493
494   /** Size of the buffer not including implicit aux */
495   uint64_t size;
496
497   /* Map for internally mapped BOs.
498    *
499    * If ANV_BO_ALLOC_MAPPED is set in flags, this is the map for the whole
500    * BO. If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
501    */
502   void *map;
503
504   /** Size of the implicit CCS range at the end of the buffer
505    *
506    * On Gfx12, CCS data is always a direct 1/256 scale-down.  A single 64K
507    * page of main surface data maps to a 256B chunk of CCS data and that
508    * mapping is provided on TGL-LP by the AUX table which maps virtual memory
509    * addresses in the main surface to virtual memory addresses for CCS data.
510    *
511    * Because we can't change these maps around easily and because Vulkan
512    * allows two VkImages to be bound to overlapping memory regions (as long
513    * as the app is careful), it's not feasible to make this mapping part of
514    * the image.  (On Gfx11 and earlier, the mapping was provided via
515    * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
516    * Instead, we attach the CCS data directly to the buffer object and setup
517    * the AUX table mapping at BO creation time.
518    *
519    * This field is for internal tracking use by the BO allocator only and
520    * should not be touched by other parts of the code.  If something wants to
521    * know if a BO has implicit CCS data, it should instead look at the
522    * has_implicit_ccs boolean below.
523    *
524    * This data is not included in maps of this buffer.
525    */
526   uint32_t _ccs_size;
527
528   /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
529   uint32_t flags;
530
531   /** True if this BO may be shared with other processes */
532   bool is_external:1;
533
534   /** True if this BO is a wrapper
535    *
536    * When set to true, none of the fields in this BO are meaningful except
537    * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
538    * See also anv_bo_unwrap().  Wrapper BOs are not allowed when use_softpin
539    * is set in the physical device.
540    */
541   bool is_wrapper:1;
542
543   /** See also ANV_BO_ALLOC_FIXED_ADDRESS */
544   bool has_fixed_address:1;
545
546   /** True if this BO wraps a host pointer */
547   bool from_host_ptr:1;
548
549   /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
550   bool has_client_visible_address:1;
551
552   /** True if this BO has implicit CCS data attached to it */
553   bool has_implicit_ccs:1;
554};
555
556static inline struct anv_bo *
557anv_bo_ref(struct anv_bo *bo)
558{
559   p_atomic_inc(&bo->refcount);
560   return bo;
561}
562
563static inline struct anv_bo *
564anv_bo_unwrap(struct anv_bo *bo)
565{
566   while (bo->is_wrapper)
567      bo = bo->map;
568   return bo;
569}
570
571static inline bool
572anv_bo_is_pinned(struct anv_bo *bo)
573{
574#if defined(GFX_VERx10) && GFX_VERx10 >= 90
575   /* Sky Lake and later always uses softpin */
576   assert(bo->flags & EXEC_OBJECT_PINNED);
577   return true;
578#elif defined(GFX_VERx10) && GFX_VERx10 < 80
579   /* Haswell and earlier never use softpin */
580   assert(!(bo->flags & EXEC_OBJECT_PINNED));
581   assert(!bo->has_fixed_address);
582   return false;
583#else
584   /* If we don't have a GFX_VERx10 #define, we need to look at the BO.  Also,
585    * for GFX version 8, we need to look at the BO because Broadwell softpins
586    * but Cherryview doesn't.
587    */
588   assert((bo->flags & EXEC_OBJECT_PINNED) || !bo->has_fixed_address);
589   return (bo->flags & EXEC_OBJECT_PINNED) != 0;
590#endif
591}
592
593struct anv_address {
594   struct anv_bo *bo;
595   int64_t offset;
596};
597
598#define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
599
600static inline struct anv_address
601anv_address_from_u64(uint64_t addr_u64)
602{
603   assert(addr_u64 == intel_canonical_address(addr_u64));
604   return (struct anv_address) {
605      .bo = NULL,
606      .offset = addr_u64,
607   };
608}
609
610static inline bool
611anv_address_is_null(struct anv_address addr)
612{
613   return addr.bo == NULL && addr.offset == 0;
614}
615
616static inline uint64_t
617anv_address_physical(struct anv_address addr)
618{
619   if (addr.bo && anv_bo_is_pinned(addr.bo)) {
620      return intel_canonical_address(addr.bo->offset + addr.offset);
621   } else {
622      return intel_canonical_address(addr.offset);
623   }
624}
625
626static inline struct anv_address
627anv_address_add(struct anv_address addr, uint64_t offset)
628{
629   addr.offset += offset;
630   return addr;
631}
632
633/* Represents a lock-free linked list of "free" things.  This is used by
634 * both the block pool and the state pools.  Unfortunately, in order to
635 * solve the ABA problem, we can't use a single uint32_t head.
636 */
637union anv_free_list {
638   struct {
639      uint32_t offset;
640
641      /* A simple count that is incremented every time the head changes. */
642      uint32_t count;
643   };
644   /* Make sure it's aligned to 64 bits. This will make atomic operations
645    * faster on 32 bit platforms.
646    */
647   uint64_t u64 __attribute__ ((aligned (8)));
648};
649
650#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
651
652struct anv_block_state {
653   union {
654      struct {
655         uint32_t next;
656         uint32_t end;
657      };
658      /* Make sure it's aligned to 64 bits. This will make atomic operations
659       * faster on 32 bit platforms.
660       */
661      uint64_t u64 __attribute__ ((aligned (8)));
662   };
663};
664
665#define anv_block_pool_foreach_bo(bo, pool)  \
666   for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
667        _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
668        _pp_bo++)
669
670#define ANV_MAX_BLOCK_POOL_BOS 20
671
672struct anv_block_pool {
673   const char *name;
674
675   struct anv_device *device;
676   bool use_relocations;
677
678   /* Wrapper BO for use in relocation lists.  This BO is simply a wrapper
679    * around the actual BO so that we grow the pool after the wrapper BO has
680    * been put in a relocation list.  This is only used in the non-softpin
681    * case.
682    */
683   struct anv_bo wrapper_bo;
684
685   struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
686   struct anv_bo *bo;
687   uint32_t nbos;
688
689   uint64_t size;
690
691   /* The address where the start of the pool is pinned. The various bos that
692    * are created as the pool grows will have addresses in the range
693    * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
694    */
695   uint64_t start_address;
696
697   /* The offset from the start of the bo to the "center" of the block
698    * pool.  Pointers to allocated blocks are given by
699    * bo.map + center_bo_offset + offsets.
700    */
701   uint32_t center_bo_offset;
702
703   /* Current memory map of the block pool.  This pointer may or may not
704    * point to the actual beginning of the block pool memory.  If
705    * anv_block_pool_alloc_back has ever been called, then this pointer
706    * will point to the "center" position of the buffer and all offsets
707    * (negative or positive) given out by the block pool alloc functions
708    * will be valid relative to this pointer.
709    *
710    * In particular, map == bo.map + center_offset
711    *
712    * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
713    * since it will handle the softpin case as well, where this points to NULL.
714    */
715   void *map;
716   int fd;
717
718   /**
719    * Array of mmaps and gem handles owned by the block pool, reclaimed when
720    * the block pool is destroyed.
721    */
722   struct u_vector mmap_cleanups;
723
724   struct anv_block_state state;
725
726   struct anv_block_state back_state;
727};
728
729/* Block pools are backed by a fixed-size 1GB memfd */
730#define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
731
732/* The center of the block pool is also the middle of the memfd.  This may
733 * change in the future if we decide differently for some reason.
734 */
735#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
736
737static inline uint32_t
738anv_block_pool_size(struct anv_block_pool *pool)
739{
740   return pool->state.end + pool->back_state.end;
741}
742
743struct anv_state {
744   int32_t offset;
745   uint32_t alloc_size;
746   void *map;
747   uint32_t idx;
748};
749
750#define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
751
752struct anv_fixed_size_state_pool {
753   union anv_free_list free_list;
754   struct anv_block_state block;
755};
756
757#define ANV_MIN_STATE_SIZE_LOG2 6
758#define ANV_MAX_STATE_SIZE_LOG2 22
759
760#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
761
762struct anv_free_entry {
763   uint32_t next;
764   struct anv_state state;
765};
766
767struct anv_state_table {
768   struct anv_device *device;
769   int fd;
770   struct anv_free_entry *map;
771   uint32_t size;
772   struct anv_block_state state;
773   struct u_vector cleanups;
774};
775
776struct anv_state_pool {
777   struct anv_block_pool block_pool;
778
779   /* Offset into the relevant state base address where the state pool starts
780    * allocating memory.
781    */
782   int32_t start_offset;
783
784   struct anv_state_table table;
785
786   /* The size of blocks which will be allocated from the block pool */
787   uint32_t block_size;
788
789   /** Free list for "back" allocations */
790   union anv_free_list back_alloc_free_list;
791
792   struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
793};
794
795struct anv_state_reserved_pool {
796   struct anv_state_pool *pool;
797   union anv_free_list reserved_blocks;
798   uint32_t count;
799};
800
801struct anv_state_stream {
802   struct anv_state_pool *state_pool;
803
804   /* The size of blocks to allocate from the state pool */
805   uint32_t block_size;
806
807   /* Current block we're allocating from */
808   struct anv_state block;
809
810   /* Offset into the current block at which to allocate the next state */
811   uint32_t next;
812
813   /* List of all blocks allocated from this pool */
814   struct util_dynarray all_blocks;
815};
816
817/* The block_pool functions exported for testing only.  The block pool should
818 * only be used via a state pool (see below).
819 */
820VkResult anv_block_pool_init(struct anv_block_pool *pool,
821                             struct anv_device *device,
822                             const char *name,
823                             uint64_t start_address,
824                             uint32_t initial_size);
825void anv_block_pool_finish(struct anv_block_pool *pool);
826int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
827                             uint32_t block_size, uint32_t *padding);
828int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
829                                  uint32_t block_size);
830void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
831size);
832
833VkResult anv_state_pool_init(struct anv_state_pool *pool,
834                             struct anv_device *device,
835                             const char *name,
836                             uint64_t base_address,
837                             int32_t start_offset,
838                             uint32_t block_size);
839void anv_state_pool_finish(struct anv_state_pool *pool);
840struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
841                                      uint32_t state_size, uint32_t alignment);
842struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
843void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
844void anv_state_stream_init(struct anv_state_stream *stream,
845                           struct anv_state_pool *state_pool,
846                           uint32_t block_size);
847void anv_state_stream_finish(struct anv_state_stream *stream);
848struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
849                                        uint32_t size, uint32_t alignment);
850
851void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
852                                      struct anv_state_pool *parent,
853                                      uint32_t count, uint32_t size,
854                                      uint32_t alignment);
855void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
856struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
857void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
858                                  struct anv_state state);
859
860VkResult anv_state_table_init(struct anv_state_table *table,
861                             struct anv_device *device,
862                             uint32_t initial_entries);
863void anv_state_table_finish(struct anv_state_table *table);
864VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
865                             uint32_t count);
866void anv_free_list_push(union anv_free_list *list,
867                        struct anv_state_table *table,
868                        uint32_t idx, uint32_t count);
869struct anv_state* anv_free_list_pop(union anv_free_list *list,
870                                    struct anv_state_table *table);
871
872
873static inline struct anv_state *
874anv_state_table_get(struct anv_state_table *table, uint32_t idx)
875{
876   return &table->map[idx].state;
877}
878/**
879 * Implements a pool of re-usable BOs.  The interface is identical to that
880 * of block_pool except that each block is its own BO.
881 */
882struct anv_bo_pool {
883   const char *name;
884
885   struct anv_device *device;
886
887   struct util_sparse_array_free_list free_list[16];
888};
889
890void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
891                      const char *name);
892void anv_bo_pool_finish(struct anv_bo_pool *pool);
893VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
894                           struct anv_bo **bo_out);
895void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
896
897struct anv_scratch_pool {
898   /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
899   struct anv_bo *bos[16][MESA_SHADER_STAGES];
900   uint32_t surfs[16];
901   struct anv_state surf_states[16];
902};
903
904void anv_scratch_pool_init(struct anv_device *device,
905                           struct anv_scratch_pool *pool);
906void anv_scratch_pool_finish(struct anv_device *device,
907                             struct anv_scratch_pool *pool);
908struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
909                                      struct anv_scratch_pool *pool,
910                                      gl_shader_stage stage,
911                                      unsigned per_thread_scratch);
912uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
913                                   struct anv_scratch_pool *pool,
914                                   unsigned per_thread_scratch);
915
916/** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
917struct anv_bo_cache {
918   struct util_sparse_array bo_map;
919   pthread_mutex_t mutex;
920};
921
922VkResult anv_bo_cache_init(struct anv_bo_cache *cache,
923                           struct anv_device *device);
924void anv_bo_cache_finish(struct anv_bo_cache *cache);
925
926struct anv_queue_family {
927   /* Standard bits passed on to the client */
928   VkQueueFlags   queueFlags;
929   uint32_t       queueCount;
930
931   /* Driver internal information */
932   enum drm_i915_gem_engine_class engine_class;
933};
934
935#define ANV_MAX_QUEUE_FAMILIES 3
936
937struct anv_memory_type {
938   /* Standard bits passed on to the client */
939   VkMemoryPropertyFlags   propertyFlags;
940   uint32_t                heapIndex;
941};
942
943struct anv_memory_heap {
944   /* Standard bits passed on to the client */
945   VkDeviceSize      size;
946   VkMemoryHeapFlags flags;
947
948   /** Driver-internal book-keeping.
949    *
950    * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
951    */
952   VkDeviceSize      used __attribute__ ((aligned (8)));
953
954   bool              is_local_mem;
955};
956
957struct anv_memregion {
958   struct drm_i915_gem_memory_class_instance region;
959   uint64_t size;
960   uint64_t available;
961};
962
963struct anv_physical_device {
964    struct vk_physical_device                   vk;
965
966    /* Link in anv_instance::physical_devices */
967    struct list_head                            link;
968
969    struct anv_instance *                       instance;
970    char                                        path[20];
971    struct intel_device_info                      info;
972    /** Amount of "GPU memory" we want to advertise
973     *
974     * Clearly, this value is bogus since Intel is a UMA architecture.  On
975     * gfx7 platforms, we are limited by GTT size unless we want to implement
976     * fine-grained tracking and GTT splitting.  On Broadwell and above we are
977     * practically unlimited.  However, we will never report more than 3/4 of
978     * the total system ram to try and avoid running out of RAM.
979     */
980    bool                                        supports_48bit_addresses;
981    struct brw_compiler *                       compiler;
982    struct isl_device                           isl_dev;
983    struct intel_perf_config *                    perf;
984   /* True if hardware support is incomplete/alpha */
985    bool                                        is_alpha;
986    /*
987     * Number of commands required to implement a performance query begin +
988     * end.
989     */
990    uint32_t                                    n_perf_query_commands;
991    int                                         cmd_parser_version;
992    bool                                        has_exec_async;
993    bool                                        has_exec_capture;
994    int                                         max_context_priority;
995    bool                                        has_context_isolation;
996    bool                                        has_mmap_offset;
997    bool                                        has_userptr_probe;
998    uint64_t                                    gtt_size;
999
1000    bool                                        use_relocations;
1001    bool                                        use_softpin;
1002    bool                                        always_use_bindless;
1003    bool                                        use_call_secondary;
1004
1005    /** True if we can access buffers using A64 messages */
1006    bool                                        has_a64_buffer_access;
1007    /** True if we can use bindless access for images */
1008    bool                                        has_bindless_images;
1009    /** True if we can use bindless access for samplers */
1010    bool                                        has_bindless_samplers;
1011    /** True if we can use timeline semaphores through execbuf */
1012    bool                                        has_exec_timeline;
1013
1014    /** True if we can read the GPU timestamp register
1015     *
1016     * When running in a virtual context, the timestamp register is unreadable
1017     * on Gfx12+.
1018     */
1019    bool                                        has_reg_timestamp;
1020
1021    /** True if this device has implicit AUX
1022     *
1023     * If true, CCS is handled as an implicit attachment to the BO rather than
1024     * as an explicitly bound surface.
1025     */
1026    bool                                        has_implicit_ccs;
1027
1028    bool                                        always_flush_cache;
1029
1030    struct {
1031      uint32_t                                  family_count;
1032      struct anv_queue_family                   families[ANV_MAX_QUEUE_FAMILIES];
1033    } queue;
1034
1035    struct {
1036      uint32_t                                  type_count;
1037      struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
1038      uint32_t                                  heap_count;
1039      struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
1040      bool                                      need_clflush;
1041    } memory;
1042
1043    /* Either we have a single vram region and it's all mappable, or we have
1044     * both mappable & non-mappable parts. System memory is always available.
1045     */
1046    struct anv_memregion                        vram_mappable;
1047    struct anv_memregion                        vram_non_mappable;
1048    struct anv_memregion                        sys;
1049    uint8_t                                     driver_build_sha1[20];
1050    uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
1051    uint8_t                                     driver_uuid[VK_UUID_SIZE];
1052    uint8_t                                     device_uuid[VK_UUID_SIZE];
1053
1054    struct vk_sync_type                         sync_syncobj_type;
1055    struct vk_sync_timeline_type                sync_timeline_type;
1056    const struct vk_sync_type *                 sync_types[4];
1057
1058    struct wsi_device                       wsi_device;
1059    int                                         local_fd;
1060    bool                                        has_local;
1061    int64_t                                     local_major;
1062    int64_t                                     local_minor;
1063    int                                         master_fd;
1064    bool                                        has_master;
1065    int64_t                                     master_major;
1066    int64_t                                     master_minor;
1067    struct drm_i915_query_engine_info *         engine_info;
1068
1069    void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, bool);
1070    struct intel_measure_device                 measure_device;
1071};
1072
1073static inline bool
1074anv_physical_device_has_vram(const struct anv_physical_device *device)
1075{
1076   return device->vram_mappable.size > 0;
1077}
1078
1079struct anv_app_info {
1080   const char*        app_name;
1081   uint32_t           app_version;
1082   const char*        engine_name;
1083   uint32_t           engine_version;
1084   uint32_t           api_version;
1085};
1086
1087struct anv_instance {
1088    struct vk_instance                          vk;
1089
1090    bool                                        physical_devices_enumerated;
1091    struct list_head                            physical_devices;
1092
1093    struct driOptionCache                       dri_options;
1094    struct driOptionCache                       available_dri_options;
1095
1096    /**
1097     * Workarounds for game bugs.
1098     */
1099    bool                                        assume_full_subgroups;
1100    bool                                        limit_trig_input_range;
1101    bool                                        sample_mask_out_opengl_behaviour;
1102};
1103
1104VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1105void anv_finish_wsi(struct anv_physical_device *physical_device);
1106
1107struct anv_queue {
1108   struct vk_queue                           vk;
1109
1110   struct anv_device *                       device;
1111
1112   const struct anv_queue_family *           family;
1113
1114   uint32_t                                  index_in_family;
1115
1116   uint32_t                                  exec_flags;
1117
1118   /** Synchronization object for debug purposes (DEBUG_SYNC) */
1119   struct vk_sync                           *sync;
1120
1121   struct intel_ds_queue *                   ds;
1122};
1123
1124struct nir_xfb_info;
1125struct anv_pipeline_bind_map;
1126
1127extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
1128
1129struct anv_shader_bin *
1130anv_device_search_for_kernel(struct anv_device *device,
1131                             struct vk_pipeline_cache *cache,
1132                             const void *key_data, uint32_t key_size,
1133                             bool *user_cache_bit);
1134
1135struct anv_shader_bin *
1136anv_device_upload_kernel(struct anv_device *device,
1137                         struct vk_pipeline_cache *cache,
1138                         gl_shader_stage stage,
1139                         const void *key_data, uint32_t key_size,
1140                         const void *kernel_data, uint32_t kernel_size,
1141                         const struct brw_stage_prog_data *prog_data,
1142                         uint32_t prog_data_size,
1143                         const struct brw_compile_stats *stats,
1144                         uint32_t num_stats,
1145                         const struct nir_xfb_info *xfb_info,
1146                         const struct anv_pipeline_bind_map *bind_map);
1147
1148struct nir_shader;
1149struct nir_shader_compiler_options;
1150
1151struct nir_shader *
1152anv_device_search_for_nir(struct anv_device *device,
1153                          struct vk_pipeline_cache *cache,
1154                          const struct nir_shader_compiler_options *nir_options,
1155                          unsigned char sha1_key[20],
1156                          void *mem_ctx);
1157
1158void
1159anv_device_upload_nir(struct anv_device *device,
1160                      struct vk_pipeline_cache *cache,
1161                      const struct nir_shader *nir,
1162                      unsigned char sha1_key[20]);
1163
1164struct anv_device {
1165    struct vk_device                            vk;
1166
1167    struct anv_physical_device *                physical;
1168    struct intel_device_info                      info;
1169    struct isl_device                           isl_dev;
1170    int                                         context_id;
1171    int                                         fd;
1172    bool                                        can_chain_batches;
1173    bool                                        robust_buffer_access;
1174
1175    pthread_mutex_t                             vma_mutex;
1176    struct util_vma_heap                        vma_lo;
1177    struct util_vma_heap                        vma_cva;
1178    struct util_vma_heap                        vma_hi;
1179
1180    /** List of all anv_device_memory objects */
1181    struct list_head                            memory_objects;
1182
1183    struct anv_bo_pool                          batch_bo_pool;
1184    struct anv_bo_pool                          utrace_bo_pool;
1185
1186    struct anv_bo_cache                         bo_cache;
1187
1188    struct anv_state_pool                       general_state_pool;
1189    struct anv_state_pool                       dynamic_state_pool;
1190    struct anv_state_pool                       instruction_state_pool;
1191    struct anv_state_pool                       binding_table_pool;
1192    struct anv_state_pool                       surface_state_pool;
1193
1194    struct anv_state_reserved_pool              custom_border_colors;
1195
1196    /** BO used for various workarounds
1197     *
1198     * There are a number of workarounds on our hardware which require writing
1199     * data somewhere and it doesn't really matter where.  For that, we use
1200     * this BO and just write to the first dword or so.
1201     *
1202     * We also need to be able to handle NULL buffers bound as pushed UBOs.
1203     * For that, we use the high bytes (>= 1024) of the workaround BO.
1204     */
1205    struct anv_bo *                             workaround_bo;
1206    struct anv_address                          workaround_address;
1207
1208    struct anv_bo *                             trivial_batch_bo;
1209    struct anv_state                            null_surface_state;
1210
1211    struct vk_pipeline_cache *                  default_pipeline_cache;
1212    struct vk_pipeline_cache *                  internal_cache;
1213    struct blorp_context                        blorp;
1214
1215    struct anv_state                            border_colors;
1216
1217    struct anv_state                            slice_hash;
1218
1219    /** An array of CPS_STATE structures grouped by MAX_VIEWPORTS elements
1220     *
1221     * We need to emit CPS_STATE structures for each viewport accessible by a
1222     * pipeline. So rather than write many identical CPS_STATE structures
1223     * dynamically, we can enumerate all possible combinaisons and then just
1224     * emit a 3DSTATE_CPS_POINTERS instruction with the right offset into this
1225     * array.
1226     */
1227    struct anv_state                            cps_states;
1228
1229    uint32_t                                    queue_count;
1230    struct anv_queue  *                         queues;
1231
1232    struct anv_scratch_pool                     scratch_pool;
1233    struct anv_bo                              *rt_scratch_bos[16];
1234
1235    /** Shadow ray query BO
1236     *
1237     * The ray_query_bo only holds the current ray being traced. When using
1238     * more than 1 ray query per thread, we cannot fit all the queries in
1239     * there, so we need a another buffer to hold query data that is not
1240     * currently being used by the HW for tracing, similar to a scratch space.
1241     *
1242     * The size of the shadow buffer depends on the number of queries per
1243     * shader.
1244     */
1245    struct anv_bo                              *ray_query_shadow_bos[16];
1246    /** Ray query buffer used to communicated with HW unit.
1247     */
1248    struct anv_bo                              *ray_query_bo;
1249
1250    struct anv_shader_bin                      *rt_trampoline;
1251    struct anv_shader_bin                      *rt_trivial_return;
1252
1253    pthread_mutex_t                             mutex;
1254    pthread_cond_t                              queue_submit;
1255
1256    struct intel_batch_decode_ctx               decoder_ctx;
1257    /*
1258     * When decoding a anv_cmd_buffer, we might need to search for BOs through
1259     * the cmd_buffer's list.
1260     */
1261    struct anv_cmd_buffer                      *cmd_buffer_being_decoded;
1262
1263    int                                         perf_fd; /* -1 if no opened */
1264    uint64_t                                    perf_metric; /* 0 if unset */
1265
1266    struct intel_aux_map_context                *aux_map_ctx;
1267
1268    const struct intel_l3_config                *l3_config;
1269
1270    struct intel_debug_block_frame              *debug_frame_desc;
1271
1272    struct intel_ds_device                       ds;
1273};
1274
1275#if defined(GFX_VERx10) && GFX_VERx10 >= 90
1276#define ANV_ALWAYS_SOFTPIN true
1277#else
1278#define ANV_ALWAYS_SOFTPIN false
1279#endif
1280
1281static inline bool
1282anv_use_relocations(const struct anv_physical_device *pdevice)
1283{
1284#if defined(GFX_VERx10) && GFX_VERx10 >= 90
1285   /* Sky Lake and later always uses softpin */
1286   assert(!pdevice->use_relocations);
1287   return false;
1288#elif defined(GFX_VERx10) && GFX_VERx10 < 80
1289   /* Haswell and earlier never use softpin */
1290   assert(pdevice->use_relocations);
1291   return true;
1292#else
1293   /* If we don't have a GFX_VERx10 #define, we need to look at the physical
1294    * device.  Also, for GFX version 8, we need to look at the physical
1295    * device because Broadwell softpins but Cherryview doesn't.
1296    */
1297   return pdevice->use_relocations;
1298#endif
1299}
1300
1301static inline struct anv_state_pool *
1302anv_binding_table_pool(struct anv_device *device)
1303{
1304   if (anv_use_relocations(device->physical))
1305      return &device->surface_state_pool;
1306   else
1307      return &device->binding_table_pool;
1308}
1309
1310static inline struct anv_state
1311anv_binding_table_pool_alloc(struct anv_device *device)
1312{
1313   if (anv_use_relocations(device->physical))
1314      return anv_state_pool_alloc_back(&device->surface_state_pool);
1315   else
1316      return anv_state_pool_alloc(&device->binding_table_pool,
1317                                  device->binding_table_pool.block_size, 0);
1318}
1319
1320static inline void
1321anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1322   anv_state_pool_free(anv_binding_table_pool(device), state);
1323}
1324
1325static inline uint32_t
1326anv_mocs(const struct anv_device *device,
1327         const struct anv_bo *bo,
1328         isl_surf_usage_flags_t usage)
1329{
1330   return isl_mocs(&device->isl_dev, usage, bo && bo->is_external);
1331}
1332
1333void anv_device_init_blorp(struct anv_device *device);
1334void anv_device_finish_blorp(struct anv_device *device);
1335
1336enum anv_bo_alloc_flags {
1337   /** Specifies that the BO must have a 32-bit address
1338    *
1339    * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
1340    */
1341   ANV_BO_ALLOC_32BIT_ADDRESS =  (1 << 0),
1342
1343   /** Specifies that the BO may be shared externally */
1344   ANV_BO_ALLOC_EXTERNAL =       (1 << 1),
1345
1346   /** Specifies that the BO should be mapped */
1347   ANV_BO_ALLOC_MAPPED =         (1 << 2),
1348
1349   /** Specifies that the BO should be snooped so we get coherency */
1350   ANV_BO_ALLOC_SNOOPED =        (1 << 3),
1351
1352   /** Specifies that the BO should be captured in error states */
1353   ANV_BO_ALLOC_CAPTURE =        (1 << 4),
1354
1355   /** Specifies that the BO will have an address assigned by the caller
1356    *
1357    * Such BOs do not exist in any VMA heap.
1358    */
1359   ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
1360
1361   /** Enables implicit synchronization on the BO
1362    *
1363    * This is the opposite of EXEC_OBJECT_ASYNC.
1364    */
1365   ANV_BO_ALLOC_IMPLICIT_SYNC =  (1 << 6),
1366
1367   /** Enables implicit synchronization on the BO
1368    *
1369    * This is equivalent to EXEC_OBJECT_WRITE.
1370    */
1371   ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
1372
1373   /** Has an address which is visible to the client */
1374   ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
1375
1376   /** This buffer has implicit CCS data attached to it */
1377   ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
1378
1379   /** This buffer is allocated from local memory */
1380   ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
1381
1382   /** This buffer is allocated from local memory and should be cpu visible */
1383   ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 11),
1384};
1385
1386VkResult anv_device_alloc_bo(struct anv_device *device,
1387                             const char *name, uint64_t size,
1388                             enum anv_bo_alloc_flags alloc_flags,
1389                             uint64_t explicit_address,
1390                             struct anv_bo **bo);
1391VkResult anv_device_map_bo(struct anv_device *device,
1392                           struct anv_bo *bo,
1393                           uint64_t offset,
1394                           size_t size,
1395                           uint32_t gem_flags,
1396                           void **map_out);
1397void anv_device_unmap_bo(struct anv_device *device,
1398                         struct anv_bo *bo,
1399                         void *map, size_t map_size);
1400VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1401                                            void *host_ptr, uint32_t size,
1402                                            enum anv_bo_alloc_flags alloc_flags,
1403                                            uint64_t client_address,
1404                                            struct anv_bo **bo_out);
1405VkResult anv_device_import_bo(struct anv_device *device, int fd,
1406                              enum anv_bo_alloc_flags alloc_flags,
1407                              uint64_t client_address,
1408                              struct anv_bo **bo);
1409VkResult anv_device_export_bo(struct anv_device *device,
1410                              struct anv_bo *bo, int *fd_out);
1411VkResult anv_device_get_bo_tiling(struct anv_device *device,
1412                                  struct anv_bo *bo,
1413                                  enum isl_tiling *tiling_out);
1414VkResult anv_device_set_bo_tiling(struct anv_device *device,
1415                                  struct anv_bo *bo,
1416                                  uint32_t row_pitch_B,
1417                                  enum isl_tiling tiling);
1418void anv_device_release_bo(struct anv_device *device,
1419                           struct anv_bo *bo);
1420
1421static inline struct anv_bo *
1422anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
1423{
1424   return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
1425}
1426
1427VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1428                         int64_t timeout);
1429
1430VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
1431                        uint32_t exec_flags,
1432                        const VkDeviceQueueCreateInfo *pCreateInfo,
1433                        uint32_t index_in_family);
1434void anv_queue_finish(struct anv_queue *queue);
1435
1436VkResult anv_queue_submit(struct vk_queue *queue,
1437                          struct vk_queue_submit *submit);
1438VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
1439                                       struct anv_batch *batch);
1440
1441void* anv_gem_mmap(struct anv_device *device,
1442                   uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1443void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
1444uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1445void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1446uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,
1447                                uint32_t flags, uint32_t num_regions,
1448                                struct drm_i915_gem_memory_class_instance *regions);
1449uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1450int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
1451int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1452int anv_gem_execbuffer(struct anv_device *device,
1453                       struct drm_i915_gem_execbuffer2 *execbuf);
1454int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1455                       uint32_t stride, uint32_t tiling);
1456int anv_gem_create_context(struct anv_device *device);
1457bool anv_gem_has_context_priority(int fd, int priority);
1458int anv_gem_destroy_context(struct anv_device *device, int context);
1459int anv_gem_set_context_param(int fd, int context, uint32_t param,
1460                              uint64_t value);
1461int anv_gem_get_param(int fd, uint32_t param);
1462int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1463int anv_gem_context_get_reset_stats(int fd, int context,
1464                                    uint32_t *active, uint32_t *pending);
1465int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1466int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);
1467uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1468int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1469int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
1470                       uint32_t read_domains, uint32_t write_domain);
1471int anv_i915_query(int fd, uint64_t query_id, void *buffer,
1472                   int32_t *buffer_len);
1473struct drm_i915_query_engine_info *anv_gem_get_engine_info(int fd);
1474
1475uint64_t anv_vma_alloc(struct anv_device *device,
1476                       uint64_t size, uint64_t align,
1477                       enum anv_bo_alloc_flags alloc_flags,
1478                       uint64_t client_address);
1479void anv_vma_free(struct anv_device *device,
1480                  uint64_t address, uint64_t size);
1481
1482struct anv_reloc_list {
1483   uint32_t                                     num_relocs;
1484   uint32_t                                     array_length;
1485   struct drm_i915_gem_relocation_entry *       relocs;
1486   struct anv_bo **                             reloc_bos;
1487   uint32_t                                     dep_words;
1488   BITSET_WORD *                                deps;
1489};
1490
1491VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1492                             const VkAllocationCallbacks *alloc);
1493void anv_reloc_list_finish(struct anv_reloc_list *list,
1494                           const VkAllocationCallbacks *alloc);
1495
1496VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1497                            const VkAllocationCallbacks *alloc,
1498                            uint32_t offset, struct anv_bo *target_bo,
1499                            uint32_t delta, uint64_t *address_u64_out);
1500
1501VkResult anv_reloc_list_add_bo(struct anv_reloc_list *list,
1502                               const VkAllocationCallbacks *alloc,
1503                               struct anv_bo *target_bo);
1504
1505struct anv_batch_bo {
1506   /* Link in the anv_cmd_buffer.owned_batch_bos list */
1507   struct list_head                             link;
1508
1509   struct anv_bo *                              bo;
1510
1511   /* Bytes actually consumed in this batch BO */
1512   uint32_t                                     length;
1513
1514   /* When this batch BO is used as part of a primary batch buffer, this
1515    * tracked whether it is chained to another primary batch buffer.
1516    *
1517    * If this is the case, the relocation list's last entry points the
1518    * location of the MI_BATCH_BUFFER_START chaining to the next batch.
1519    */
1520   bool                                         chained;
1521
1522   struct anv_reloc_list                        relocs;
1523};
1524
1525struct anv_batch {
1526   const VkAllocationCallbacks *                alloc;
1527
1528   struct anv_address                           start_addr;
1529
1530   void *                                       start;
1531   void *                                       end;
1532   void *                                       next;
1533
1534   struct anv_reloc_list *                      relocs;
1535
1536   /* This callback is called (with the associated user data) in the event
1537    * that the batch runs out of space.
1538    */
1539   VkResult (*extend_cb)(struct anv_batch *, void *);
1540   void *                                       user_data;
1541
1542   /**
1543    * Current error status of the command buffer. Used to track inconsistent
1544    * or incomplete command buffer states that are the consequence of run-time
1545    * errors such as out of memory scenarios. We want to track this in the
1546    * batch because the command buffer object is not visible to some parts
1547    * of the driver.
1548    */
1549   VkResult                                     status;
1550};
1551
1552void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1553void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1554struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
1555
1556static inline void
1557anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
1558                      void *map, size_t size)
1559{
1560   batch->start_addr = addr;
1561   batch->next = batch->start = map;
1562   batch->end = map + size;
1563}
1564
1565static inline VkResult
1566anv_batch_set_error(struct anv_batch *batch, VkResult error)
1567{
1568   assert(error != VK_SUCCESS);
1569   if (batch->status == VK_SUCCESS)
1570      batch->status = error;
1571   return batch->status;
1572}
1573
1574static inline bool
1575anv_batch_has_error(struct anv_batch *batch)
1576{
1577   return batch->status != VK_SUCCESS;
1578}
1579
1580static inline uint64_t
1581anv_batch_emit_reloc(struct anv_batch *batch,
1582                     void *location, struct anv_bo *bo, uint32_t delta)
1583{
1584   uint64_t address_u64 = 0;
1585   VkResult result;
1586
1587   if (ANV_ALWAYS_SOFTPIN) {
1588      address_u64 = bo->offset + delta;
1589      result = anv_reloc_list_add_bo(batch->relocs, batch->alloc, bo);
1590   } else {
1591      result = anv_reloc_list_add(batch->relocs, batch->alloc,
1592                                  location - batch->start, bo, delta,
1593                                  &address_u64);
1594   }
1595   if (unlikely(result != VK_SUCCESS)) {
1596      anv_batch_set_error(batch, result);
1597      return 0;
1598   }
1599
1600   return address_u64;
1601}
1602
1603static inline void
1604write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1605{
1606   unsigned reloc_size = 0;
1607   if (device->info.ver >= 8) {
1608      reloc_size = sizeof(uint64_t);
1609      *(uint64_t *)p = intel_canonical_address(v);
1610   } else {
1611      reloc_size = sizeof(uint32_t);
1612      *(uint32_t *)p = v;
1613   }
1614
1615   if (flush && device->physical->memory.need_clflush)
1616      intel_flush_range(p, reloc_size);
1617}
1618
1619static inline uint64_t
1620_anv_combine_address(struct anv_batch *batch, void *location,
1621                     const struct anv_address address, uint32_t delta)
1622{
1623   if (address.bo == NULL) {
1624      return address.offset + delta;
1625   } else if (batch == NULL) {
1626      assert(anv_bo_is_pinned(address.bo));
1627      return anv_address_physical(anv_address_add(address, delta));
1628   } else {
1629      assert(batch->start <= location && location < batch->end);
1630      /* i915 relocations are signed. */
1631      assert(INT32_MIN <= address.offset && address.offset <= INT32_MAX);
1632      return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1633   }
1634}
1635
1636#define __gen_address_type struct anv_address
1637#define __gen_user_data struct anv_batch
1638#define __gen_combine_address _anv_combine_address
1639
1640/* Wrapper macros needed to work around preprocessor argument issues.  In
1641 * particular, arguments don't get pre-evaluated if they are concatenated.
1642 * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1643 * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1644 * We can work around this easily enough with these helpers.
1645 */
1646#define __anv_cmd_length(cmd) cmd ## _length
1647#define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1648#define __anv_cmd_header(cmd) cmd ## _header
1649#define __anv_cmd_pack(cmd) cmd ## _pack
1650#define __anv_reg_num(reg) reg ## _num
1651
1652#define anv_pack_struct(dst, struc, ...) do {                              \
1653      struct struc __template = {                                          \
1654         __VA_ARGS__                                                       \
1655      };                                                                   \
1656      __anv_cmd_pack(struc)(NULL, dst, &__template);                       \
1657      VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1658   } while (0)
1659
1660#define anv_batch_emitn(batch, n, cmd, ...) ({             \
1661      void *__dst = anv_batch_emit_dwords(batch, n);       \
1662      if (__dst) {                                         \
1663         struct cmd __template = {                         \
1664            __anv_cmd_header(cmd),                         \
1665           .DWordLength = n - __anv_cmd_length_bias(cmd),  \
1666            __VA_ARGS__                                    \
1667         };                                                \
1668         __anv_cmd_pack(cmd)(batch, __dst, &__template);   \
1669      }                                                    \
1670      __dst;                                               \
1671   })
1672
1673#define anv_batch_emit_merge(batch, dwords0, dwords1)                   \
1674   do {                                                                 \
1675      uint32_t *dw;                                                     \
1676                                                                        \
1677      STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1));        \
1678      dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0));         \
1679      if (!dw)                                                          \
1680         break;                                                         \
1681      for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++)                \
1682         dw[i] = (dwords0)[i] | (dwords1)[i];                           \
1683      VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1684   } while (0)
1685
1686#define anv_batch_emit(batch, cmd, name)                            \
1687   for (struct cmd name = { __anv_cmd_header(cmd) },                    \
1688        *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
1689        __builtin_expect(_dst != NULL, 1);                              \
1690        ({ __anv_cmd_pack(cmd)(batch, _dst, &name);                     \
1691           VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1692           _dst = NULL;                                                 \
1693         }))
1694
1695#define anv_batch_write_reg(batch, reg, name)                           \
1696   for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL;  \
1697        ({                                                              \
1698            uint32_t _dw[__anv_cmd_length(reg)];                        \
1699            __anv_cmd_pack(reg)(NULL, _dw, &name);                      \
1700            for (unsigned i = 0; i < __anv_cmd_length(reg); i++) {      \
1701               anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \
1702                  lri.RegisterOffset   = __anv_reg_num(reg);            \
1703                  lri.DataDWord        = _dw[i];                        \
1704               }                                                        \
1705            }                                                           \
1706           _cont = NULL;                                                \
1707         }))
1708
1709/* #define __gen_get_batch_dwords anv_batch_emit_dwords */
1710/* #define __gen_get_batch_address anv_batch_address */
1711/* #define __gen_address_value anv_address_physical */
1712/* #define __gen_address_offset anv_address_add */
1713
1714struct anv_device_memory {
1715   struct vk_object_base                        base;
1716
1717   struct list_head                             link;
1718
1719   struct anv_bo *                              bo;
1720   const struct anv_memory_type *               type;
1721
1722   void *                                       map;
1723   size_t                                       map_size;
1724
1725   /* The map, from the user PoV is map + map_delta */
1726   uint64_t                                     map_delta;
1727
1728   /* The map, from the user PoV is map + map_delta */
1729   uint32_t                                     map_delta;
1730
1731   /* If set, we are holding reference to AHardwareBuffer
1732    * which we must release when memory is freed.
1733    */
1734   struct AHardwareBuffer *                     ahw;
1735
1736   /* If set, this memory comes from a host pointer. */
1737   void *                                       host_ptr;
1738};
1739
1740/**
1741 * Header for Vertex URB Entry (VUE)
1742 */
1743struct anv_vue_header {
1744   uint32_t Reserved;
1745   uint32_t RTAIndex; /* RenderTargetArrayIndex */
1746   uint32_t ViewportIndex;
1747   float PointWidth;
1748};
1749
1750/** Struct representing a sampled image descriptor
1751 *
1752 * This descriptor layout is used for sampled images, bare sampler, and
1753 * combined image/sampler descriptors.
1754 */
1755struct anv_sampled_image_descriptor {
1756   /** Bindless image handle
1757    *
1758    * This is expected to already be shifted such that the 20-bit
1759    * SURFACE_STATE table index is in the top 20 bits.
1760    */
1761   uint32_t image;
1762
1763   /** Bindless sampler handle
1764    *
1765    * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
1766    * to the dynamic state base address.
1767    */
1768   uint32_t sampler;
1769};
1770
1771struct anv_texture_swizzle_descriptor {
1772   /** Texture swizzle
1773    *
1774    * See also nir_intrinsic_channel_select_intel
1775    */
1776   uint8_t swizzle[4];
1777
1778   /** Unused padding to ensure the struct is a multiple of 64 bits */
1779   uint32_t _pad;
1780};
1781
1782/** Struct representing a storage image descriptor */
1783struct anv_storage_image_descriptor {
1784   /** Bindless image handles
1785    *
1786    * These are expected to already be shifted such that the 20-bit
1787    * SURFACE_STATE table index is in the top 20 bits.
1788    */
1789   uint32_t vanilla;
1790   uint32_t lowered;
1791};
1792
1793/** Struct representing a address/range descriptor
1794 *
1795 * The fields of this struct correspond directly to the data layout of
1796 * nir_address_format_64bit_bounded_global addresses.  The last field is the
1797 * offset in the NIR address so it must be zero so that when you load the
1798 * descriptor you get a pointer to the start of the range.
1799 */
1800struct anv_address_range_descriptor {
1801   uint64_t address;
1802   uint32_t range;
1803   uint32_t zero;
1804};
1805
1806enum anv_descriptor_data {
1807   /** The descriptor contains a BTI reference to a surface state */
1808   ANV_DESCRIPTOR_SURFACE_STATE  = (1 << 0),
1809   /** The descriptor contains a BTI reference to a sampler state */
1810   ANV_DESCRIPTOR_SAMPLER_STATE  = (1 << 1),
1811   /** The descriptor contains an actual buffer view */
1812   ANV_DESCRIPTOR_BUFFER_VIEW    = (1 << 2),
1813   /** The descriptor contains auxiliary image layout data */
1814   ANV_DESCRIPTOR_IMAGE_PARAM    = (1 << 3),
1815   /** The descriptor contains auxiliary image layout data */
1816   ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
1817   /** anv_address_range_descriptor with a buffer address and range */
1818   ANV_DESCRIPTOR_ADDRESS_RANGE  = (1 << 5),
1819   /** Bindless surface handle */
1820   ANV_DESCRIPTOR_SAMPLED_IMAGE  = (1 << 6),
1821   /** Storage image handles */
1822   ANV_DESCRIPTOR_STORAGE_IMAGE  = (1 << 7),
1823   /** Storage image handles */
1824   ANV_DESCRIPTOR_TEXTURE_SWIZZLE  = (1 << 8),
1825};
1826
1827struct anv_descriptor_set_binding_layout {
1828   /* The type of the descriptors in this binding */
1829   VkDescriptorType type;
1830
1831   /* Flags provided when this binding was created */
1832   VkDescriptorBindingFlags flags;
1833
1834   /* Bitfield representing the type of data this descriptor contains */
1835   enum anv_descriptor_data data;
1836
1837   /* Maximum number of YCbCr texture/sampler planes */
1838   uint8_t max_plane_count;
1839
1840   /* Number of array elements in this binding (or size in bytes for inline
1841    * uniform data)
1842    */
1843   uint32_t array_size;
1844
1845   /* Index into the flattened descriptor set */
1846   uint32_t descriptor_index;
1847
1848   /* Index into the dynamic state array for a dynamic buffer */
1849   int16_t dynamic_offset_index;
1850
1851   /* Index into the descriptor set buffer views */
1852   int32_t buffer_view_index;
1853
1854   /* Offset into the descriptor buffer where this descriptor lives */
1855   uint32_t descriptor_offset;
1856
1857   /* Pre computed stride */
1858   unsigned descriptor_stride;
1859
1860   /* Immutable samplers (or NULL if no immutable samplers) */
1861   struct anv_sampler **immutable_samplers;
1862};
1863
1864bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
1865                                      const struct anv_descriptor_set_binding_layout *binding,
1866                                      bool sampler);
1867
1868bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
1869                                      const struct anv_descriptor_set_binding_layout *binding,
1870                                      bool sampler);
1871
1872struct anv_descriptor_set_layout {
1873   struct vk_object_base base;
1874
1875   /* Descriptor set layouts can be destroyed at almost any time */
1876   uint32_t ref_cnt;
1877
1878   /* Number of bindings in this descriptor set */
1879   uint32_t binding_count;
1880
1881   /* Total number of descriptors */
1882   uint32_t descriptor_count;
1883
1884   /* Shader stages affected by this descriptor set */
1885   uint16_t shader_stages;
1886
1887   /* Number of buffer views in this descriptor set */
1888   uint32_t buffer_view_count;
1889
1890   /* Number of dynamic offsets used by this descriptor set */
1891   uint16_t dynamic_offset_count;
1892
1893   /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
1894    * this buffer
1895    */
1896   VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
1897
1898   /* Size of the descriptor buffer for this descriptor set */
1899   uint32_t descriptor_buffer_size;
1900
1901   /* Bindings in this descriptor set */
1902   struct anv_descriptor_set_binding_layout binding[0];
1903};
1904
1905void anv_descriptor_set_layout_destroy(struct anv_device *device,
1906                                       struct anv_descriptor_set_layout *layout);
1907
1908static inline void
1909anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
1910{
1911   assert(layout && layout->ref_cnt >= 1);
1912   p_atomic_inc(&layout->ref_cnt);
1913}
1914
1915static inline void
1916anv_descriptor_set_layout_unref(struct anv_device *device,
1917                                struct anv_descriptor_set_layout *layout)
1918{
1919   assert(layout && layout->ref_cnt >= 1);
1920   if (p_atomic_dec_zero(&layout->ref_cnt))
1921      anv_descriptor_set_layout_destroy(device, layout);
1922}
1923
1924struct anv_descriptor {
1925   VkDescriptorType type;
1926
1927   union {
1928      struct {
1929         VkImageLayout layout;
1930         struct anv_image_view *image_view;
1931         struct anv_sampler *sampler;
1932      };
1933
1934      struct {
1935         struct anv_buffer_view *set_buffer_view;
1936         struct anv_buffer *buffer;
1937         uint64_t offset;
1938         uint64_t range;
1939      };
1940
1941      struct anv_buffer_view *buffer_view;
1942
1943      struct anv_acceleration_structure *accel_struct;
1944   };
1945};
1946
1947struct anv_descriptor_set {
1948   struct vk_object_base base;
1949
1950   struct anv_descriptor_pool *pool;
1951   struct anv_descriptor_set_layout *layout;
1952
1953   /* Amount of space occupied in the the pool by this descriptor set. It can
1954    * be larger than the size of the descriptor set.
1955    */
1956   uint32_t size;
1957
1958   /* State relative to anv_descriptor_pool::bo */
1959   struct anv_state desc_mem;
1960   /* Surface state for the descriptor buffer */
1961   struct anv_state desc_surface_state;
1962
1963   /* Descriptor set address. */
1964   struct anv_address desc_addr;
1965
1966   uint32_t buffer_view_count;
1967   struct anv_buffer_view *buffer_views;
1968
1969   /* Link to descriptor pool's desc_sets list . */
1970   struct list_head pool_link;
1971
1972   uint32_t descriptor_count;
1973   struct anv_descriptor descriptors[0];
1974};
1975
1976static inline bool
1977anv_descriptor_set_is_push(struct anv_descriptor_set *set)
1978{
1979   return set->pool == NULL;
1980}
1981
1982struct anv_buffer_view {
1983   struct vk_object_base base;
1984
1985   uint64_t range; /**< VkBufferViewCreateInfo::range */
1986
1987   struct anv_address address;
1988
1989   struct anv_state surface_state;
1990   struct anv_state storage_surface_state;
1991   struct anv_state lowered_storage_surface_state;
1992
1993   struct brw_image_param lowered_storage_image_param;
1994};
1995
1996struct anv_push_descriptor_set {
1997   struct anv_descriptor_set set;
1998
1999   /* Put this field right behind anv_descriptor_set so it fills up the
2000    * descriptors[0] field. */
2001   struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2002
2003   /** True if the descriptor set buffer has been referenced by a draw or
2004    * dispatch command.
2005    */
2006   bool set_used_on_gpu;
2007
2008   struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2009};
2010
2011static inline struct anv_address
2012anv_descriptor_set_address(struct anv_descriptor_set *set)
2013{
2014   if (anv_descriptor_set_is_push(set)) {
2015      /* We have to flag push descriptor set as used on the GPU
2016       * so that the next time we push descriptors, we grab a new memory.
2017       */
2018      struct anv_push_descriptor_set *push_set =
2019         (struct anv_push_descriptor_set *)set;
2020      push_set->set_used_on_gpu = true;
2021   }
2022
2023   return set->desc_addr;
2024}
2025
2026struct anv_descriptor_pool {
2027   struct vk_object_base base;
2028
2029   uint32_t size;
2030   uint32_t next;
2031   uint32_t free_list;
2032
2033   struct anv_bo *bo;
2034   struct util_vma_heap bo_heap;
2035
2036   struct anv_state_stream surface_state_stream;
2037   void *surface_state_free_list;
2038
2039   struct list_head desc_sets;
2040
2041   bool host_only;
2042
2043   char data[0];
2044};
2045
2046struct anv_descriptor_template_entry {
2047   /* The type of descriptor in this entry */
2048   VkDescriptorType type;
2049
2050   /* Binding in the descriptor set */
2051   uint32_t binding;
2052
2053   /* Offset at which to write into the descriptor set binding */
2054   uint32_t array_element;
2055
2056   /* Number of elements to write into the descriptor set binding */
2057   uint32_t array_count;
2058
2059   /* Offset into the user provided data */
2060   size_t offset;
2061
2062   /* Stride between elements into the user provided data */
2063   size_t stride;
2064};
2065
2066struct anv_descriptor_update_template {
2067    struct vk_object_base base;
2068
2069    VkPipelineBindPoint bind_point;
2070
2071   /* The descriptor set this template corresponds to. This value is only
2072    * valid if the template was created with the templateType
2073    * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
2074    */
2075   uint8_t set;
2076
2077   /* Number of entries in this template */
2078   uint32_t entry_count;
2079
2080   /* Entries of the template */
2081   struct anv_descriptor_template_entry entries[0];
2082};
2083
2084size_t
2085anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout,
2086                               uint32_t var_desc_count);
2087
2088uint32_t
2089anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout,
2090                                                 uint32_t var_desc_count);
2091
2092void
2093anv_descriptor_set_write_image_view(struct anv_device *device,
2094                                    struct anv_descriptor_set *set,
2095                                    const VkDescriptorImageInfo * const info,
2096                                    VkDescriptorType type,
2097                                    uint32_t binding,
2098                                    uint32_t element);
2099
2100void
2101anv_descriptor_set_write_buffer_view(struct anv_device *device,
2102                                     struct anv_descriptor_set *set,
2103                                     VkDescriptorType type,
2104                                     struct anv_buffer_view *buffer_view,
2105                                     uint32_t binding,
2106                                     uint32_t element);
2107
2108void
2109anv_descriptor_set_write_buffer(struct anv_device *device,
2110                                struct anv_descriptor_set *set,
2111                                struct anv_state_stream *alloc_stream,
2112                                VkDescriptorType type,
2113                                struct anv_buffer *buffer,
2114                                uint32_t binding,
2115                                uint32_t element,
2116                                VkDeviceSize offset,
2117                                VkDeviceSize range);
2118
2119void
2120anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
2121                                                struct anv_descriptor_set *set,
2122                                                struct anv_acceleration_structure *accel,
2123                                                uint32_t binding,
2124                                                uint32_t element);
2125
2126void
2127anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2128                                             struct anv_descriptor_set *set,
2129                                             uint32_t binding,
2130                                             const void *data,
2131                                             size_t offset,
2132                                             size_t size);
2133
2134void
2135anv_descriptor_set_write_template(struct anv_device *device,
2136                                  struct anv_descriptor_set *set,
2137                                  struct anv_state_stream *alloc_stream,
2138                                  const struct anv_descriptor_update_template *template,
2139                                  const void *data);
2140
2141#define ANV_DESCRIPTOR_SET_NULL             (UINT8_MAX - 5)
2142#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS   (UINT8_MAX - 4)
2143#define ANV_DESCRIPTOR_SET_DESCRIPTORS      (UINT8_MAX - 3)
2144#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS  (UINT8_MAX - 2)
2145#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
2146#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2147
2148struct anv_pipeline_binding {
2149   /** Index in the descriptor set
2150    *
2151    * This is a flattened index; the descriptor set layout is already taken
2152    * into account.
2153    */
2154   uint32_t index;
2155
2156   /** The descriptor set this surface corresponds to.
2157    *
2158    * The special ANV_DESCRIPTOR_SET_* values above indicates that this
2159    * binding is not a normal descriptor set but something else.
2160    */
2161   uint8_t set;
2162
2163   union {
2164      /** Plane in the binding index for images */
2165      uint8_t plane;
2166
2167      /** Dynamic offset index (for dynamic UBOs and SSBOs) */
2168      uint8_t dynamic_offset_index;
2169   };
2170
2171   /** For a storage image, whether it requires a lowered surface */
2172   uint8_t lowered_storage_surface;
2173
2174   /** Pad to 64 bits so that there are no holes and we can safely memcmp
2175    * assuming POD zero-initialization.
2176    */
2177   uint8_t pad;
2178};
2179
2180struct anv_push_range {
2181   /** Index in the descriptor set */
2182   uint32_t index;
2183
2184   /** Descriptor set index */
2185   uint8_t set;
2186
2187   /** Dynamic offset index (for dynamic UBOs) */
2188   uint8_t dynamic_offset_index;
2189
2190   /** Start offset in units of 32B */
2191   uint8_t start;
2192
2193   /** Range in units of 32B */
2194   uint8_t length;
2195};
2196
2197struct anv_pipeline_layout {
2198   struct vk_object_base base;
2199
2200   struct {
2201      struct anv_descriptor_set_layout *layout;
2202      uint32_t dynamic_offset_start;
2203   } set[MAX_SETS];
2204
2205   uint32_t num_sets;
2206
2207   unsigned char sha1[20];
2208};
2209
2210struct anv_buffer {
2211   struct vk_buffer vk;
2212
2213   /* Set when bound */
2214   struct anv_address address;
2215};
2216
2217enum anv_cmd_dirty_bits {
2218   ANV_CMD_DIRTY_PIPELINE                            = 1 << 0,
2219   ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 1,
2220   ANV_CMD_DIRTY_RENDER_TARGETS                      = 1 << 2,
2221   ANV_CMD_DIRTY_XFB_ENABLE                          = 1 << 3,
2222};
2223typedef enum anv_cmd_dirty_bits anv_cmd_dirty_mask_t;
2224
2225enum anv_pipe_bits {
2226   ANV_PIPE_DEPTH_CACHE_FLUSH_BIT            = (1 << 0),
2227   ANV_PIPE_STALL_AT_SCOREBOARD_BIT          = (1 << 1),
2228   ANV_PIPE_STATE_CACHE_INVALIDATE_BIT       = (1 << 2),
2229   ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT    = (1 << 3),
2230   ANV_PIPE_VF_CACHE_INVALIDATE_BIT          = (1 << 4),
2231   ANV_PIPE_DATA_CACHE_FLUSH_BIT             = (1 << 5),
2232   ANV_PIPE_TILE_CACHE_FLUSH_BIT             = (1 << 6),
2233   ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT     = (1 << 10),
2234   ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
2235   ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT    = (1 << 12),
2236   ANV_PIPE_DEPTH_STALL_BIT                  = (1 << 13),
2237
2238   /* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data
2239    * cache work has completed.  Available on Gfx12+.  For earlier Gfx we
2240    * must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.
2241    */
2242   ANV_PIPE_HDC_PIPELINE_FLUSH_BIT           = (1 << 14),
2243   ANV_PIPE_PSS_STALL_SYNC_BIT               = (1 << 15),
2244   ANV_PIPE_CS_STALL_BIT                     = (1 << 20),
2245   ANV_PIPE_END_OF_PIPE_SYNC_BIT             = (1 << 21),
2246
2247   /* This bit does not exist directly in PIPE_CONTROL.  Instead it means that
2248    * a flush has happened but not a CS stall.  The next time we do any sort
2249    * of invalidation we need to insert a CS stall at that time.  Otherwise,
2250    * we would have to CS stall on every flush which could be bad.
2251    */
2252   ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT       = (1 << 22),
2253
2254   /* This bit does not exist directly in PIPE_CONTROL. It means that render
2255    * target operations related to transfer commands with VkBuffer as
2256    * destination are ongoing. Some operations like copies on the command
2257    * streamer might need to be aware of this to trigger the appropriate stall
2258    * before they can proceed with the copy.
2259    */
2260   ANV_PIPE_RENDER_TARGET_BUFFER_WRITES      = (1 << 23),
2261
2262   /* This bit does not exist directly in PIPE_CONTROL. It means that Gfx12
2263    * AUX-TT data has changed and we need to invalidate AUX-TT data.  This is
2264    * done by writing the AUX-TT register.
2265    */
2266   ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 24),
2267
2268   /* This bit does not exist directly in PIPE_CONTROL. It means that a
2269    * PIPE_CONTROL with a post-sync operation will follow. This is used to
2270    * implement a workaround for Gfx9.
2271    */
2272   ANV_PIPE_POST_SYNC_BIT                    = (1 << 25),
2273};
2274
2275#define ANV_PIPE_FLUSH_BITS ( \
2276   ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
2277   ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2278   ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
2279   ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
2280   ANV_PIPE_TILE_CACHE_FLUSH_BIT)
2281
2282#define ANV_PIPE_STALL_BITS ( \
2283   ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
2284   ANV_PIPE_DEPTH_STALL_BIT | \
2285   ANV_PIPE_CS_STALL_BIT)
2286
2287#define ANV_PIPE_INVALIDATE_BITS ( \
2288   ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
2289   ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
2290   ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
2291   ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
2292   ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
2293   ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
2294
2295enum intel_ds_stall_flag
2296anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
2297
2298static inline enum anv_pipe_bits
2299anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
2300                                     VkAccessFlags2 flags)
2301{
2302   enum anv_pipe_bits pipe_bits = 0;
2303
2304   u_foreach_bit64(b, flags) {
2305      switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
2306      case VK_ACCESS_2_SHADER_WRITE_BIT:
2307      case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
2308         /* We're transitioning a buffer that was previously used as write
2309          * destination through the data port. To make its content available
2310          * to future operations, flush the hdc pipeline.
2311          */
2312         pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2313         break;
2314      case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
2315         /* We're transitioning a buffer that was previously used as render
2316          * target. To make its content available to future operations, flush
2317          * the render target cache.
2318          */
2319         pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2320         break;
2321      case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
2322         /* We're transitioning a buffer that was previously used as depth
2323          * buffer. To make its content available to future operations, flush
2324          * the depth cache.
2325          */
2326         pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2327         break;
2328      case VK_ACCESS_2_TRANSFER_WRITE_BIT:
2329         /* We're transitioning a buffer that was previously used as a
2330          * transfer write destination. Generic write operations include color
2331          * & depth operations as well as buffer operations like :
2332          *     - vkCmdClearColorImage()
2333          *     - vkCmdClearDepthStencilImage()
2334          *     - vkCmdBlitImage()
2335          *     - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
2336          *
2337          * Most of these operations are implemented using Blorp which writes
2338          * through the render target, so flush that cache to make it visible
2339          * to future operations. And for depth related operations we also
2340          * need to flush the depth cache.
2341          */
2342         pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2343         pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2344         break;
2345      case VK_ACCESS_2_MEMORY_WRITE_BIT:
2346         /* We're transitioning a buffer for generic write operations. Flush
2347          * all the caches.
2348          */
2349         pipe_bits |= ANV_PIPE_FLUSH_BITS;
2350         break;
2351      case VK_ACCESS_2_HOST_WRITE_BIT:
2352         /* We're transitioning a buffer for access by CPU. Invalidate
2353          * all the caches. Since data and tile caches don't have invalidate,
2354          * we are forced to flush those as well.
2355          */
2356         pipe_bits |= ANV_PIPE_FLUSH_BITS;
2357         pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2358         break;
2359      case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2360      case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
2361         /* We're transitioning a buffer written either from VS stage or from
2362          * the command streamer (see CmdEndTransformFeedbackEXT), we just
2363          * need to stall the CS.
2364          */
2365         pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2366         break;
2367      default:
2368         break; /* Nothing to do */
2369      }
2370   }
2371
2372   return pipe_bits;
2373}
2374
2375static inline enum anv_pipe_bits
2376anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
2377                                          VkAccessFlags2 flags)
2378{
2379   enum anv_pipe_bits pipe_bits = 0;
2380
2381   u_foreach_bit64(b, flags) {
2382      switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
2383      case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT:
2384         /* Indirect draw commands take a buffer as input that we're going to
2385          * read from the command streamer to load some of the HW registers
2386          * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
2387          * command streamer stall so that all the cache flushes have
2388          * completed before the command streamer loads from memory.
2389          */
2390         pipe_bits |=  ANV_PIPE_CS_STALL_BIT;
2391         /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
2392          * through a vertex buffer, so invalidate that cache.
2393          */
2394         pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2395         /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
2396          * UBO from the buffer, so we need to invalidate constant cache.
2397          */
2398         pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2399         pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2400         /* Tile cache flush needed For CmdDipatchIndirect since command
2401          * streamer and vertex fetch aren't L3 coherent.
2402          */
2403         pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2404         break;
2405      case VK_ACCESS_2_INDEX_READ_BIT:
2406      case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT:
2407         /* We transitioning a buffer to be used for as input for vkCmdDraw*
2408          * commands, so we invalidate the VF cache to make sure there is no
2409          * stale data when we start rendering.
2410          */
2411         pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2412         break;
2413      case VK_ACCESS_2_UNIFORM_READ_BIT:
2414         /* We transitioning a buffer to be used as uniform data. Because
2415          * uniform is accessed through the data port & sampler, we need to
2416          * invalidate the texture cache (sampler) & constant cache (data
2417          * port) to avoid stale data.
2418          */
2419         pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2420         if (device->physical->compiler->indirect_ubos_use_sampler)
2421            pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2422         else
2423            pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
2424         break;
2425      case VK_ACCESS_2_SHADER_READ_BIT:
2426      case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT:
2427      case VK_ACCESS_2_TRANSFER_READ_BIT:
2428         /* Transitioning a buffer to be read through the sampler, so
2429          * invalidate the texture cache, we don't want any stale data.
2430          */
2431         pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2432         break;
2433      case VK_ACCESS_2_MEMORY_READ_BIT:
2434         /* Transitioning a buffer for generic read, invalidate all the
2435          * caches.
2436          */
2437         pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2438         break;
2439      case VK_ACCESS_2_MEMORY_WRITE_BIT:
2440         /* Generic write, make sure all previously written things land in
2441          * memory.
2442          */
2443         pipe_bits |= ANV_PIPE_FLUSH_BITS;
2444         break;
2445      case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
2446      case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
2447         /* Transitioning a buffer for conditional rendering or transform
2448          * feedback. We'll load the content of this buffer into HW registers
2449          * using the command streamer, so we need to stall the command
2450          * streamer , so we need to stall the command streamer to make sure
2451          * any in-flight flush operations have completed.
2452          */
2453         pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2454         pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2455         pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2456         break;
2457      case VK_ACCESS_2_HOST_READ_BIT:
2458         /* We're transitioning a buffer that was written by CPU.  Flush
2459          * all the caches.
2460          */
2461         pipe_bits |= ANV_PIPE_FLUSH_BITS;
2462         break;
2463      case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
2464         /* We're transitioning a buffer to be written by the streamout fixed
2465          * function. This one is apparently not L3 coherent, so we need a
2466          * tile cache flush to make sure any previous write is not going to
2467          * create WaW hazards.
2468          */
2469         pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
2470         break;
2471      default:
2472         break; /* Nothing to do */
2473      }
2474   }
2475
2476   return pipe_bits;
2477}
2478
2479#define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV (         \
2480   VK_IMAGE_ASPECT_COLOR_BIT | \
2481   VK_IMAGE_ASPECT_PLANE_0_BIT | \
2482   VK_IMAGE_ASPECT_PLANE_1_BIT | \
2483   VK_IMAGE_ASPECT_PLANE_2_BIT)
2484#define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
2485   VK_IMAGE_ASPECT_PLANE_0_BIT | \
2486   VK_IMAGE_ASPECT_PLANE_1_BIT | \
2487   VK_IMAGE_ASPECT_PLANE_2_BIT)
2488
2489struct anv_vertex_binding {
2490   struct anv_buffer *                          buffer;
2491   VkDeviceSize                                 offset;
2492   VkDeviceSize                                 size;
2493};
2494
2495struct anv_xfb_binding {
2496   struct anv_buffer *                          buffer;
2497   VkDeviceSize                                 offset;
2498   VkDeviceSize                                 size;
2499};
2500
2501struct anv_push_constants {
2502   /** Push constant data provided by the client through vkPushConstants */
2503   uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
2504
2505   /** Dynamic offsets for dynamic UBOs and SSBOs */
2506   uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2507
2508   /* Robust access pushed registers. */
2509   uint64_t push_reg_mask[MESA_SHADER_STAGES];
2510
2511   /** Ray query globals (RT_DISPATCH_GLOBALS) */
2512   uint64_t ray_query_globals;
2513
2514   /* Base addresses for descriptor sets */
2515   uint64_t desc_sets[MAX_SETS];
2516
2517   struct {
2518      /** Base workgroup ID
2519       *
2520       * Used for vkCmdDispatchBase.
2521       */
2522      uint32_t base_work_group_id[3];
2523
2524      /** Subgroup ID
2525       *
2526       * This is never set by software but is implicitly filled out when
2527       * uploading the push constants for compute shaders.
2528       */
2529      uint32_t subgroup_id;
2530   } cs;
2531};
2532
2533struct anv_surface_state {
2534   struct anv_state state;
2535   /** Address of the surface referred to by this state
2536    *
2537    * This address is relative to the start of the BO.
2538    */
2539   struct anv_address address;
2540   /* Address of the aux surface, if any
2541    *
2542    * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2543    *
2544    * With the exception of gfx8, the bottom 12 bits of this address' offset
2545    * include extra aux information.
2546    */
2547   struct anv_address aux_address;
2548   /* Address of the clear color, if any
2549    *
2550    * This address is relative to the start of the BO.
2551    */
2552   struct anv_address clear_address;
2553};
2554
2555struct anv_attachment {
2556   VkFormat vk_format;
2557   const struct anv_image_view *iview;
2558   VkImageLayout layout;
2559   enum isl_aux_usage aux_usage;
2560   struct anv_surface_state surface_state;
2561
2562   VkResolveModeFlagBits resolve_mode;
2563   const struct anv_image_view *resolve_iview;
2564   VkImageLayout resolve_layout;
2565};
2566
2567/** State tracking for vertex buffer flushes
2568 *
2569 * On Gfx8-9, the VF cache only considers the bottom 32 bits of memory
2570 * addresses.  If you happen to have two vertex buffers which get placed
2571 * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
2572 * collisions.  In order to solve this problem, we track vertex address ranges
2573 * which are live in the cache and invalidate the cache if one ever exceeds 32
2574 * bits.
2575 */
2576struct anv_vb_cache_range {
2577   /* Virtual address at which the live vertex buffer cache range starts for
2578    * this vertex buffer index.
2579    */
2580   uint64_t start;
2581
2582   /* Virtual address of the byte after where vertex buffer cache range ends.
2583    * This is exclusive such that end - start is the size of the range.
2584    */
2585   uint64_t end;
2586};
2587
2588/* Check whether we need to apply the Gfx8-9 vertex buffer workaround*/
2589static inline bool
2590anv_gfx8_9_vb_cache_range_needs_workaround(struct anv_vb_cache_range *bound,
2591                                           struct anv_vb_cache_range *dirty,
2592                                           struct anv_address vb_address,
2593                                           uint32_t vb_size)
2594{
2595   if (vb_size == 0) {
2596      bound->start = 0;
2597      bound->end = 0;
2598      return false;
2599   }
2600
2601   assert(vb_address.bo && anv_bo_is_pinned(vb_address.bo));
2602   bound->start = intel_48b_address(anv_address_physical(vb_address));
2603   bound->end = bound->start + vb_size;
2604   assert(bound->end > bound->start); /* No overflow */
2605
2606   /* Align everything to a cache line */
2607   bound->start &= ~(64ull - 1ull);
2608   bound->end = align_u64(bound->end, 64);
2609
2610   /* Compute the dirty range */
2611   dirty->start = MIN2(dirty->start, bound->start);
2612   dirty->end = MAX2(dirty->end, bound->end);
2613
2614   /* If our range is larger than 32 bits, we have to flush */
2615   assert(bound->end - bound->start <= (1ull << 32));
2616   return (dirty->end - dirty->start) > (1ull << 32);
2617}
2618
2619/** State tracking for particular pipeline bind point
2620 *
2621 * This struct is the base struct for anv_cmd_graphics_state and
2622 * anv_cmd_compute_state.  These are used to track state which is bound to a
2623 * particular type of pipeline.  Generic state that applies per-stage such as
2624 * binding table offsets and push constants is tracked generically with a
2625 * per-stage array in anv_cmd_state.
2626 */
2627struct anv_cmd_pipeline_state {
2628   struct anv_descriptor_set *descriptors[MAX_SETS];
2629   struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2630
2631   struct anv_push_constants push_constants;
2632
2633   /* Push constant state allocated when flushing push constants. */
2634   struct anv_state          push_constants_state;
2635};
2636
2637/** State tracking for graphics pipeline
2638 *
2639 * This has anv_cmd_pipeline_state as a base struct to track things which get
2640 * bound to a graphics pipeline.  Along with general pipeline bind point state
2641 * which is in the anv_cmd_pipeline_state base struct, it also contains other
2642 * state which is graphics-specific.
2643 */
2644struct anv_cmd_graphics_state {
2645   struct anv_cmd_pipeline_state base;
2646
2647   struct anv_graphics_pipeline *pipeline;
2648
2649   VkRenderingFlags rendering_flags;
2650   VkRect2D render_area;
2651   uint32_t layer_count;
2652   uint32_t samples;
2653   uint32_t view_mask;
2654   uint32_t color_att_count;
2655   struct anv_state att_states;
2656   struct anv_attachment color_att[MAX_RTS];
2657   struct anv_attachment depth_att;
2658   struct anv_attachment stencil_att;
2659   struct anv_state null_surface_state;
2660
2661   anv_cmd_dirty_mask_t dirty;
2662   uint32_t vb_dirty;
2663
2664   struct anv_vb_cache_range ib_bound_range;
2665   struct anv_vb_cache_range ib_dirty_range;
2666   struct anv_vb_cache_range vb_bound_ranges[33];
2667   struct anv_vb_cache_range vb_dirty_ranges[33];
2668
2669   uint32_t restart_index;
2670
2671   VkShaderStageFlags push_constant_stages;
2672
2673   uint32_t primitive_topology;
2674
2675   struct anv_buffer *index_buffer;
2676   uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2677   uint32_t index_offset;
2678
2679   struct vk_sample_locations_state sample_locations;
2680};
2681
2682enum anv_depth_reg_mode {
2683   ANV_DEPTH_REG_MODE_UNKNOWN = 0,
2684   ANV_DEPTH_REG_MODE_HW_DEFAULT,
2685   ANV_DEPTH_REG_MODE_D16_1X_MSAA,
2686};
2687
2688/** State tracking for compute pipeline
2689 *
2690 * This has anv_cmd_pipeline_state as a base struct to track things which get
2691 * bound to a compute pipeline.  Along with general pipeline bind point state
2692 * which is in the anv_cmd_pipeline_state base struct, it also contains other
2693 * state which is compute-specific.
2694 */
2695struct anv_cmd_compute_state {
2696   struct anv_cmd_pipeline_state base;
2697
2698   struct anv_compute_pipeline *pipeline;
2699
2700   bool pipeline_dirty;
2701
2702   struct anv_state push_data;
2703
2704   struct anv_address num_workgroups;
2705};
2706
2707struct anv_cmd_ray_tracing_state {
2708   struct anv_cmd_pipeline_state base;
2709
2710   struct anv_ray_tracing_pipeline *pipeline;
2711
2712   bool pipeline_dirty;
2713
2714   struct {
2715      struct anv_bo *bo;
2716      struct brw_rt_scratch_layout layout;
2717   } scratch;
2718};
2719
2720/** State required while building cmd buffer */
2721struct anv_cmd_state {
2722   /* PIPELINE_SELECT.PipelineSelection */
2723   uint32_t                                     current_pipeline;
2724   const struct intel_l3_config *               current_l3_config;
2725   uint32_t                                     last_aux_map_state;
2726
2727   struct anv_cmd_graphics_state                gfx;
2728   struct anv_cmd_compute_state                 compute;
2729   struct anv_cmd_ray_tracing_state             rt;
2730
2731   enum anv_pipe_bits                           pending_pipe_bits;
2732   VkShaderStageFlags                           descriptors_dirty;
2733   VkShaderStageFlags                           push_constants_dirty;
2734
2735   struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
2736   bool                                         xfb_enabled;
2737   struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
2738   struct anv_state                             binding_tables[MESA_VULKAN_SHADER_STAGES];
2739   struct anv_state                             samplers[MESA_VULKAN_SHADER_STAGES];
2740
2741   unsigned char                                sampler_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2742   unsigned char                                surface_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2743   unsigned char                                push_sha1s[MESA_VULKAN_SHADER_STAGES][20];
2744
2745   /**
2746    * Whether or not the gfx8 PMA fix is enabled.  We ensure that, at the top
2747    * of any command buffer it is disabled by disabling it in EndCommandBuffer
2748    * and before invoking the secondary in ExecuteCommands.
2749    */
2750   bool                                         pma_fix_enabled;
2751
2752   /**
2753    * Whether or not we know for certain that HiZ is enabled for the current
2754    * subpass.  If, for whatever reason, we are unsure as to whether HiZ is
2755    * enabled or not, this will be false.
2756    */
2757   bool                                         hiz_enabled;
2758
2759   /* We ensure the registers for the gfx12 D16 fix are initialized at the
2760    * first non-NULL depth stencil packet emission of every command buffer.
2761    * For secondary command buffer execution, we transfer the state from the
2762    * last command buffer to the primary (if known).
2763    */
2764   enum anv_depth_reg_mode                      depth_reg_mode;
2765
2766   /**
2767    * Whether RHWO optimization is enabled (Wa_1508744258).
2768    */
2769   bool                                         rhwo_optimization_enabled;
2770
2771   /**
2772    * Pending state of the RHWO optimization, to be applied at the next
2773    * genX(cmd_buffer_apply_pipe_flushes).
2774    */
2775   bool                                         pending_rhwo_optimization_enabled;
2776
2777   bool                                         conditional_render_enabled;
2778
2779   /**
2780    * Last rendering scale argument provided to
2781    * genX(cmd_buffer_emit_hashing_mode)().
2782    */
2783   unsigned                                     current_hash_scale;
2784
2785   /**
2786    * A buffer used for spill/fill of ray queries.
2787    */
2788   struct anv_bo *                              ray_query_shadow_bo;
2789};
2790
2791#define ANV_MIN_CMD_BUFFER_BATCH_SIZE 8192
2792#define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)
2793
2794enum anv_cmd_buffer_exec_mode {
2795   ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
2796   ANV_CMD_BUFFER_EXEC_MODE_EMIT,
2797   ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
2798   ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
2799   ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
2800   ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
2801};
2802
2803struct anv_measure_batch;
2804
2805struct anv_cmd_buffer {
2806   struct vk_command_buffer                     vk;
2807
2808   struct anv_device *                          device;
2809   struct anv_queue_family *                    queue_family;
2810
2811   struct anv_batch                             batch;
2812
2813   /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
2814    * recorded upon calling vkEndCommandBuffer(). This is useful if we need to
2815    * rewrite the end to chain multiple batch together at vkQueueSubmit().
2816    */
2817   void *                                       batch_end;
2818
2819   /* Fields required for the actual chain of anv_batch_bo's.
2820    *
2821    * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
2822    */
2823   struct list_head                             batch_bos;
2824   enum anv_cmd_buffer_exec_mode                exec_mode;
2825
2826   /* A vector of anv_batch_bo pointers for every batch or surface buffer
2827    * referenced by this command buffer
2828    *
2829    * initialized by anv_cmd_buffer_init_batch_bo_chain()
2830    */
2831   struct u_vector                            seen_bbos;
2832
2833   /* A vector of int32_t's for every block of binding tables.
2834    *
2835    * initialized by anv_cmd_buffer_init_batch_bo_chain()
2836    */
2837   struct u_vector                              bt_block_states;
2838   struct anv_state                             bt_next;
2839
2840   struct anv_reloc_list                        surface_relocs;
2841   /** Last seen surface state block pool center bo offset */
2842   uint32_t                                     last_ss_pool_center;
2843
2844   /* Serial for tracking buffer completion */
2845   uint32_t                                     serial;
2846
2847   /* Stream objects for storing temporary data */
2848   struct anv_state_stream                      surface_state_stream;
2849   struct anv_state_stream                      dynamic_state_stream;
2850   struct anv_state_stream                      general_state_stream;
2851
2852   VkCommandBufferUsageFlags                    usage_flags;
2853
2854   struct anv_query_pool                       *perf_query_pool;
2855
2856   struct anv_cmd_state                         state;
2857
2858   struct anv_address                           return_addr;
2859
2860   /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
2861   uint64_t                                     intel_perf_marker;
2862
2863   struct anv_measure_batch *measure;
2864
2865   /**
2866    * KHR_performance_query requires self modifying command buffers and this
2867    * array has the location of modifying commands to the query begin and end
2868    * instructions storing performance counters. The array length is
2869    * anv_physical_device::n_perf_query_commands.
2870    */
2871   struct mi_address_token                  *self_mod_locations;
2872
2873   /**
2874    * Index tracking which of the self_mod_locations items have already been
2875    * used.
2876    */
2877   uint32_t                                      perf_reloc_idx;
2878
2879   /**
2880    * Sum of all the anv_batch_bo sizes allocated for this command buffer.
2881    * Used to increase allocation size for long command buffers.
2882    */
2883   uint32_t                                     total_batch_size;
2884
2885   /**
2886    *
2887    */
2888   struct u_trace                               trace;
2889};
2890
2891/* Determine whether we can chain a given cmd_buffer to another one. We need
2892 * softpin and we also need to make sure that we can edit the end of the batch
2893 * to point to next one, which requires the command buffer to not be used
2894 * simultaneously.
2895 */
2896static inline bool
2897anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)
2898{
2899   return !anv_use_relocations(cmd_buffer->device->physical) &&
2900      !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
2901}
2902
2903VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2904void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2905void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2906void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
2907void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
2908                                  struct anv_cmd_buffer *secondary);
2909void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
2910VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
2911                                struct anv_cmd_buffer *cmd_buffer,
2912                                const VkSemaphore *in_semaphores,
2913                                const uint64_t *in_wait_values,
2914                                uint32_t num_in_semaphores,
2915                                const VkSemaphore *out_semaphores,
2916                                const uint64_t *out_signal_values,
2917                                uint32_t num_out_semaphores,
2918                                VkFence fence,
2919                                int perf_query_pass);
2920
2921VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
2922
2923struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
2924                                             const void *data, uint32_t size, uint32_t alignment);
2925struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
2926                                              uint32_t *a, uint32_t *b,
2927                                              uint32_t dwords, uint32_t alignment);
2928
2929struct anv_address
2930anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
2931struct anv_state
2932anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
2933                                   uint32_t entries, uint32_t *state_offset);
2934struct anv_state
2935anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
2936struct anv_state
2937anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
2938                                   uint32_t size, uint32_t alignment);
2939
2940VkResult
2941anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
2942
2943void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
2944
2945struct anv_state
2946anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
2947struct anv_state
2948anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
2949
2950VkResult
2951anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
2952                                         uint32_t num_entries,
2953                                         uint32_t *state_offset,
2954                                         struct anv_state *bt_state);
2955
2956void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
2957
2958void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
2959
2960enum anv_bo_sync_state {
2961   /** Indicates that this is a new (or newly reset fence) */
2962   ANV_BO_SYNC_STATE_RESET,
2963
2964   /** Indicates that this fence has been submitted to the GPU but is still
2965    * (as far as we know) in use by the GPU.
2966    */
2967   ANV_BO_SYNC_STATE_SUBMITTED,
2968
2969   ANV_BO_SYNC_STATE_SIGNALED,
2970};
2971
2972struct anv_bo_sync {
2973   struct vk_sync sync;
2974
2975   enum anv_bo_sync_state state;
2976   struct anv_bo *bo;
2977};
2978
2979extern const struct vk_sync_type anv_bo_sync_type;
2980
2981static inline bool
2982vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
2983{
2984   return sync->type == &anv_bo_sync_type;
2985}
2986
2987VkResult anv_create_sync_for_memory(struct vk_device *device,
2988                                    VkDeviceMemory memory,
2989                                    bool signal_memory,
2990                                    struct vk_sync **sync_out);
2991
2992struct anv_event {
2993   struct vk_object_base                        base;
2994   uint64_t                                     semaphore;
2995   struct anv_state                             state;
2996};
2997
2998#define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
2999
3000#define anv_foreach_stage(stage, stage_bits)                         \
3001   for (gl_shader_stage stage,                                       \
3002        __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK);    \
3003        stage = __builtin_ffs(__tmp) - 1, __tmp;                     \
3004        __tmp &= ~(1 << (stage)))
3005
3006struct anv_pipeline_bind_map {
3007   unsigned char                                surface_sha1[20];
3008   unsigned char                                sampler_sha1[20];
3009   unsigned char                                push_sha1[20];
3010
3011   uint32_t surface_count;
3012   uint32_t sampler_count;
3013
3014   struct anv_pipeline_binding *                surface_to_descriptor;
3015   struct anv_pipeline_binding *                sampler_to_descriptor;
3016
3017   struct anv_push_range                        push_ranges[4];
3018};
3019
3020struct anv_shader_bin {
3021   struct vk_pipeline_cache_object base;
3022
3023   gl_shader_stage stage;
3024
3025   struct anv_state kernel;
3026   uint32_t kernel_size;
3027
3028   const struct brw_stage_prog_data *prog_data;
3029   uint32_t prog_data_size;
3030
3031   struct brw_compile_stats stats[3];
3032   uint32_t num_stats;
3033
3034   struct nir_xfb_info *xfb_info;
3035
3036   struct anv_pipeline_bind_map bind_map;
3037};
3038
3039struct anv_shader_bin *
3040anv_shader_bin_create(struct anv_device *device,
3041                      gl_shader_stage stage,
3042                      const void *key, uint32_t key_size,
3043                      const void *kernel, uint32_t kernel_size,
3044                      const struct brw_stage_prog_data *prog_data,
3045                      uint32_t prog_data_size,
3046                      const struct brw_compile_stats *stats, uint32_t num_stats,
3047                      const struct nir_xfb_info *xfb_info,
3048                      const struct anv_pipeline_bind_map *bind_map);
3049
3050static inline void
3051anv_shader_bin_ref(struct anv_shader_bin *shader)
3052{
3053   vk_pipeline_cache_object_ref(&shader->base);
3054}
3055
3056static inline void
3057anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
3058{
3059   vk_pipeline_cache_object_unref(&shader->base);
3060}
3061
3062#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({             \
3063   assert((local_arg_offset) % 8 == 0);                              \
3064   const struct brw_bs_prog_data *prog_data =                        \
3065      brw_bs_prog_data_const(bin->prog_data);                        \
3066   assert(prog_data->simd_size == 8 || prog_data->simd_size == 16);  \
3067                                                                     \
3068   (struct GFX_BINDLESS_SHADER_RECORD) {                             \
3069      .OffsetToLocalArguments = (local_arg_offset) / 8,              \
3070      .BindlessShaderDispatchMode =                                  \
3071         prog_data->simd_size == 16 ? RT_SIMD16 : RT_SIMD8,          \
3072      .KernelStartPointer = bin->kernel.offset,                      \
3073   };                                                                \
3074})
3075
3076struct anv_pipeline_executable {
3077   gl_shader_stage stage;
3078
3079   struct brw_compile_stats stats;
3080
3081   char *nir;
3082   char *disasm;
3083};
3084
3085enum anv_pipeline_type {
3086   ANV_PIPELINE_GRAPHICS,
3087   ANV_PIPELINE_COMPUTE,
3088   ANV_PIPELINE_RAY_TRACING,
3089};
3090
3091struct anv_pipeline {
3092   struct vk_object_base                        base;
3093
3094   struct anv_device *                          device;
3095
3096   struct anv_batch                             batch;
3097   struct anv_reloc_list                        batch_relocs;
3098
3099   void *                                       mem_ctx;
3100
3101   enum anv_pipeline_type                       type;
3102   VkPipelineCreateFlags                        flags;
3103
3104   uint32_t                                     ray_queries;
3105
3106   struct util_dynarray                         executables;
3107
3108   const struct intel_l3_config *               l3_config;
3109};
3110
3111struct anv_graphics_pipeline {
3112   struct anv_pipeline                          base;
3113
3114   /* Shaders */
3115   struct anv_shader_bin *                      shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT];
3116
3117   VkShaderStageFlags                           active_stages;
3118
3119   struct vk_sample_locations_state             sample_locations;
3120   struct vk_dynamic_graphics_state             dynamic_state;
3121
3122   /* These fields are required with dynamic primitive topology,
3123    * rasterization_samples used only with gen < 8.
3124    */
3125   VkLineRasterizationModeEXT                   line_mode;
3126   VkPolygonMode                                polygon_mode;
3127   uint32_t                                     patch_control_points;
3128   uint32_t                                     rasterization_samples;
3129
3130   VkColorComponentFlags                        color_comp_writes[MAX_RTS];
3131
3132   uint32_t                                     view_mask;
3133   uint32_t                                     instance_multiplier;
3134
3135   bool                                         depth_clamp_enable;
3136   bool                                         depth_clip_enable;
3137   bool                                         kill_pixel;
3138   bool                                         force_fragment_thread_dispatch;
3139   bool                                         negative_one_to_one;
3140
3141   /* When primitive replication is used, subpass->view_mask will describe what
3142    * views to replicate.
3143    */
3144   bool                                         use_primitive_replication;
3145
3146   uint32_t                                     vb_used;
3147   struct anv_pipeline_vertex_binding {
3148      uint32_t                                  stride;
3149      bool                                      instanced;
3150      uint32_t                                  instance_divisor;
3151   } vb[MAX_VBS];
3152
3153   /* Pre computed CS instructions that can directly be copied into
3154    * anv_cmd_buffer.
3155    */
3156   uint32_t                                     batch_data[512];
3157
3158   /* Pre packed CS instructions & structures that need to be merged later
3159    * with dynamic state.
3160    */
3161   struct {
3162      uint32_t                                  sf[7];
3163      uint32_t                                  clip[4];
3164      uint32_t                                  xfb_bo_pitch[4];
3165      uint32_t                                  wm[3];
3166      uint32_t                                  blend_state[MAX_RTS * 2];
3167      uint32_t                                  streamout_state[3];
3168   } gfx7;
3169
3170   struct {
3171      uint32_t                                  sf[4];
3172      uint32_t                                  raster[5];
3173      uint32_t                                  wm[2];
3174      uint32_t                                  ps_blend[2];
3175      uint32_t                                  blend_state[1 + MAX_RTS * 2];
3176      uint32_t                                  streamout_state[5];
3177   } gfx8;
3178};
3179
3180struct anv_compute_pipeline {
3181   struct anv_pipeline                          base;
3182
3183   struct anv_shader_bin *                      cs;
3184   uint32_t                                     batch_data[9];
3185   uint32_t                                     interface_descriptor_data[8];
3186};
3187
3188struct anv_rt_shader_group {
3189   VkRayTracingShaderGroupTypeKHR type;
3190
3191   struct anv_shader_bin *general;
3192   struct anv_shader_bin *closest_hit;
3193   struct anv_shader_bin *any_hit;
3194   struct anv_shader_bin *intersection;
3195
3196   /* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */
3197   uint32_t handle[8];
3198};
3199
3200struct anv_ray_tracing_pipeline {
3201   struct anv_pipeline                          base;
3202
3203   /* All shaders in the pipeline */
3204   struct util_dynarray                         shaders;
3205
3206   uint32_t                                     group_count;
3207   struct anv_rt_shader_group *                 groups;
3208
3209   /* If non-zero, this is the default computed stack size as per the stack
3210    * size computation in the Vulkan spec.  If zero, that indicates that the
3211    * client has requested a dynamic stack size.
3212    */
3213   uint32_t                                     stack_size;
3214};
3215
3216#define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)             \
3217   static inline struct anv_##pipe_type##_pipeline *                 \
3218   anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline)      \
3219   {                                                                 \
3220      assert(pipeline->type == pipe_enum);                           \
3221      return (struct anv_##pipe_type##_pipeline *) pipeline;         \
3222   }
3223
3224ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
3225ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
3226ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)
3227
3228static inline bool
3229anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
3230                       gl_shader_stage stage)
3231{
3232   return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
3233}
3234
3235static inline bool
3236anv_pipeline_is_primitive(const struct anv_graphics_pipeline *pipeline)
3237{
3238   return anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX);
3239}
3240
3241static inline bool
3242anv_pipeline_is_mesh(const struct anv_graphics_pipeline *pipeline)
3243{
3244   return anv_pipeline_has_stage(pipeline, MESA_SHADER_MESH);
3245}
3246
3247static inline bool
3248anv_cmd_buffer_all_color_write_masked(const struct anv_cmd_buffer *cmd_buffer)
3249{
3250   const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
3251   const struct vk_dynamic_graphics_state *dyn =
3252      &cmd_buffer->vk.dynamic_graphics_state;
3253   uint8_t color_writes = dyn->cb.color_write_enables;
3254
3255   /* All writes disabled through vkCmdSetColorWriteEnableEXT */
3256   if ((color_writes & ((1u << state->color_att_count) - 1)) == 0)
3257      return true;
3258
3259   /* Or all write masks are empty */
3260   for (uint32_t i = 0; i < state->color_att_count; i++) {
3261      if (state->pipeline->color_comp_writes[i] != 0)
3262         return false;
3263   }
3264
3265   return true;
3266}
3267
3268#define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage)             \
3269static inline const struct brw_##prefix##_prog_data *                   \
3270get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline)  \
3271{                                                                       \
3272   if (anv_pipeline_has_stage(pipeline, stage)) {                       \
3273      return (const struct brw_##prefix##_prog_data *)                  \
3274             pipeline->shaders[stage]->prog_data;                       \
3275   } else {                                                             \
3276      return NULL;                                                      \
3277   }                                                                    \
3278}
3279
3280ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
3281ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
3282ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
3283ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
3284ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
3285ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(mesh, MESA_SHADER_MESH)
3286ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(task, MESA_SHADER_TASK)
3287
3288static inline const struct brw_cs_prog_data *
3289get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
3290{
3291   assert(pipeline->cs);
3292   return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
3293}
3294
3295static inline const struct brw_vue_prog_data *
3296anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
3297{
3298   if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
3299      return &get_gs_prog_data(pipeline)->base;
3300   else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
3301      return &get_tes_prog_data(pipeline)->base;
3302   else
3303      return &get_vs_prog_data(pipeline)->base;
3304}
3305
3306VkResult
3307anv_device_init_rt_shaders(struct anv_device *device);
3308
3309void
3310anv_device_finish_rt_shaders(struct anv_device *device);
3311
3312VkResult
3313anv_pipeline_init(struct anv_pipeline *pipeline,
3314                  struct anv_device *device,
3315                  enum anv_pipeline_type type,
3316                  VkPipelineCreateFlags flags,
3317                  const VkAllocationCallbacks *pAllocator);
3318
3319void
3320anv_pipeline_finish(struct anv_pipeline *pipeline,
3321                    struct anv_device *device,
3322                    const VkAllocationCallbacks *pAllocator);
3323
3324struct anv_format_plane {
3325   enum isl_format isl_format:16;
3326   struct isl_swizzle swizzle;
3327
3328   /* Whether this plane contains chroma channels */
3329   bool has_chroma;
3330
3331   /* For downscaling of YUV planes */
3332   uint8_t denominator_scales[2];
3333
3334   /* How to map sampled ycbcr planes to a single 4 component element. */
3335   struct isl_swizzle ycbcr_swizzle;
3336
3337   /* What aspect is associated to this plane */
3338   VkImageAspectFlags aspect;
3339};
3340
3341
3342struct anv_format {
3343   struct anv_format_plane planes[3];
3344   VkFormat vk_format;
3345   uint8_t n_planes;
3346   bool can_ycbcr;
3347};
3348
3349static inline void
3350anv_assert_valid_aspect_set(VkImageAspectFlags aspects)
3351{
3352   if (util_bitcount(aspects) == 1) {
3353      assert(aspects & (VK_IMAGE_ASPECT_COLOR_BIT |
3354                        VK_IMAGE_ASPECT_DEPTH_BIT |
3355                        VK_IMAGE_ASPECT_STENCIL_BIT |
3356                        VK_IMAGE_ASPECT_PLANE_0_BIT |
3357                        VK_IMAGE_ASPECT_PLANE_1_BIT |
3358                        VK_IMAGE_ASPECT_PLANE_2_BIT));
3359   } else if (aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) {
3360      assert(aspects == VK_IMAGE_ASPECT_PLANE_0_BIT ||
3361             aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3362                         VK_IMAGE_ASPECT_PLANE_1_BIT) ||
3363             aspects == (VK_IMAGE_ASPECT_PLANE_0_BIT |
3364                         VK_IMAGE_ASPECT_PLANE_1_BIT |
3365                         VK_IMAGE_ASPECT_PLANE_2_BIT));
3366   } else {
3367      assert(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
3368                         VK_IMAGE_ASPECT_STENCIL_BIT));
3369   }
3370}
3371
3372/**
3373 * Return the aspect's plane relative to all_aspects.  For an image, for
3374 * instance, all_aspects would be the set of aspects in the image.  For
3375 * an image view, all_aspects would be the subset of aspects represented
3376 * by that particular view.
3377 */
3378static inline uint32_t
3379anv_aspect_to_plane(VkImageAspectFlags all_aspects,
3380                    VkImageAspectFlagBits aspect)
3381{
3382   anv_assert_valid_aspect_set(all_aspects);
3383   assert(util_bitcount(aspect) == 1);
3384   assert(!(aspect & ~all_aspects));
3385
3386   /* Because we always put image and view planes in aspect-bit-order, the
3387    * plane index is the number of bits in all_aspects before aspect.
3388    */
3389   return util_bitcount(all_aspects & (aspect - 1));
3390}
3391
3392#define anv_foreach_image_aspect_bit(b, image, aspects) \
3393   u_foreach_bit(b, vk_image_expand_aspect_mask(&(image)->vk, aspects))
3394
3395const struct anv_format *
3396anv_get_format(VkFormat format);
3397
3398static inline uint32_t
3399anv_get_format_planes(VkFormat vk_format)
3400{
3401   const struct anv_format *format = anv_get_format(vk_format);
3402
3403   return format != NULL ? format->n_planes : 0;
3404}
3405
3406struct anv_format_plane
3407anv_get_format_plane(const struct intel_device_info *devinfo,
3408                     VkFormat vk_format, uint32_t plane,
3409                     VkImageTiling tiling);
3410
3411struct anv_format_plane
3412anv_get_format_aspect(const struct intel_device_info *devinfo,
3413                      VkFormat vk_format,
3414                      VkImageAspectFlagBits aspect, VkImageTiling tiling);
3415
3416static inline enum isl_format
3417anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,
3418                   VkImageAspectFlags aspect, VkImageTiling tiling)
3419{
3420   return anv_get_format_aspect(devinfo, vk_format, aspect, tiling).isl_format;
3421}
3422
3423bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,
3424                                  VkImageCreateFlags create_flags,
3425                                  VkFormat vk_format, VkImageTiling vk_tiling,
3426                                  VkImageUsageFlags vk_usage,
3427                                  const VkImageFormatListCreateInfo *fmt_list);
3428
3429extern VkFormat
3430vk_format_from_android(unsigned android_format, unsigned android_usage);
3431
3432static inline struct isl_swizzle
3433anv_swizzle_for_render(struct isl_swizzle swizzle)
3434{
3435   /* Sometimes the swizzle will have alpha map to one.  We do this to fake
3436    * RGB as RGBA for texturing
3437    */
3438   assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
3439          swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
3440
3441   /* But it doesn't matter what we render to that channel */
3442   swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
3443
3444   return swizzle;
3445}
3446
3447void
3448anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
3449
3450/**
3451 * Describes how each part of anv_image will be bound to memory.
3452 */
3453struct anv_image_memory_range {
3454   /**
3455    * Disjoint bindings into which each portion of the image will be bound.
3456    *
3457    * Binding images to memory can be complicated and invold binding different
3458    * portions of the image to different memory objects or regions.  For most
3459    * images, everything lives in the MAIN binding and gets bound by
3460    * vkBindImageMemory.  For disjoint multi-planar images, each plane has
3461    * a unique, disjoint binding and gets bound by vkBindImageMemory2 with
3462    * VkBindImagePlaneMemoryInfo.  There may also exist bits of memory which are
3463    * implicit or driver-managed and live in special-case bindings.
3464    */
3465   enum anv_image_memory_binding {
3466      /**
3467       * Used if and only if image is not multi-planar disjoint. Bound by
3468       * vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.
3469       */
3470      ANV_IMAGE_MEMORY_BINDING_MAIN,
3471
3472      /**
3473       * Used if and only if image is multi-planar disjoint.  Bound by
3474       * vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.
3475       */
3476      ANV_IMAGE_MEMORY_BINDING_PLANE_0,
3477      ANV_IMAGE_MEMORY_BINDING_PLANE_1,
3478      ANV_IMAGE_MEMORY_BINDING_PLANE_2,
3479
3480      /**
3481       * Driver-private bo. In special cases we may store the aux surface and/or
3482       * aux state in this binding.
3483       */
3484      ANV_IMAGE_MEMORY_BINDING_PRIVATE,
3485
3486      /** Sentinel */
3487      ANV_IMAGE_MEMORY_BINDING_END,
3488   } binding;
3489
3490   /**
3491    * Offset is relative to the start of the binding created by
3492    * vkBindImageMemory, not to the start of the bo.
3493    */
3494   uint64_t offset;
3495
3496   uint64_t size;
3497   uint32_t alignment;
3498};
3499
3500/**
3501 * Subsurface of an anv_image.
3502 */
3503struct anv_surface {
3504   struct isl_surf isl;
3505   struct anv_image_memory_range memory_range;
3506};
3507
3508static inline bool MUST_CHECK
3509anv_surface_is_valid(const struct anv_surface *surface)
3510{
3511   return surface->isl.size_B > 0 && surface->memory_range.size > 0;
3512}
3513
3514struct anv_image {
3515   struct vk_image vk;
3516
3517   uint32_t n_planes;
3518
3519   /**
3520    * Image has multi-planar format and was created with
3521    * VK_IMAGE_CREATE_DISJOINT_BIT.
3522    */
3523   bool disjoint;
3524
3525   /**
3526    * Image was imported from an struct AHardwareBuffer.  We have to delay
3527    * final image creation until bind time.
3528    */
3529   bool from_ahb;
3530
3531   /**
3532    * Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo
3533    * must be released when the image is destroyed.
3534    */
3535   bool from_gralloc;
3536
3537   /**
3538    * The memory bindings created by vkCreateImage and vkBindImageMemory.
3539    *
3540    * For details on the image's memory layout, see check_memory_bindings().
3541    *
3542    * vkCreateImage constructs the `memory_range` for each
3543    * anv_image_memory_binding.  After vkCreateImage, each binding is valid if
3544    * and only if `memory_range::size > 0`.
3545    *
3546    * vkBindImageMemory binds each valid `memory_range` to an `address`.
3547    * Usually, the app will provide the address via the parameters of
3548    * vkBindImageMemory.  However, special-case bindings may be bound to
3549    * driver-private memory.
3550    */
3551   struct anv_image_binding {
3552      struct anv_image_memory_range memory_range;
3553      struct anv_address address;
3554   } bindings[ANV_IMAGE_MEMORY_BINDING_END];
3555
3556   /**
3557    * Image subsurfaces
3558    *
3559    * For each foo, anv_image::planes[x].surface is valid if and only if
3560    * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
3561    * to figure the number associated with a given aspect.
3562    *
3563    * The hardware requires that the depth buffer and stencil buffer be
3564    * separate surfaces.  From Vulkan's perspective, though, depth and stencil
3565    * reside in the same VkImage.  To satisfy both the hardware and Vulkan, we
3566    * allocate the depth and stencil buffers as separate surfaces in the same
3567    * bo.
3568    */
3569   struct anv_image_plane {
3570      struct anv_surface primary_surface;
3571
3572      /**
3573       * A surface which shadows the main surface and may have different
3574       * tiling. This is used for sampling using a tiling that isn't supported
3575       * for other operations.
3576       */
3577      struct anv_surface shadow_surface;
3578
3579      /**
3580       * The base aux usage for this image.  For color images, this can be
3581       * either CCS_E or CCS_D depending on whether or not we can reliably
3582       * leave CCS on all the time.
3583       */
3584      enum isl_aux_usage aux_usage;
3585
3586      struct anv_surface aux_surface;
3587
3588      /** Location of the fast clear state.  */
3589      struct anv_image_memory_range fast_clear_memory_range;
3590
3591      /**
3592       * Whether this image can be fast cleared with non-zero clear colors.
3593       * This can happen with mutable images when formats of different bit
3594       * sizes per components are used.
3595       *
3596       * On Gfx9+, because the clear colors are stored as a 4 components 32bit
3597       * values, we can clear in R16G16_UNORM (store 2 16bit values in the
3598       * components 0 & 1 of the clear color) and then draw in R32_UINT which
3599       * would interpret the clear color as a single component value, using
3600       * only the first 16bit component of the previous written clear color.
3601       *
3602       * On Gfx7/7.5/8, only CC_ZERO/CC_ONE clear colors are supported, this
3603       * boolean will prevent the usage of CC_ONE.
3604       */
3605      bool can_non_zero_fast_clear;
3606   } planes[3];
3607};
3608
3609static inline bool
3610anv_image_is_externally_shared(const struct anv_image *image)
3611{
3612   return image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID ||
3613          image->vk.external_handle_types != 0;
3614}
3615
3616static inline bool
3617anv_image_has_private_binding(const struct anv_image *image)
3618{
3619   const struct anv_image_binding private_binding =
3620      image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE];
3621   return private_binding.memory_range.size != 0;
3622}
3623
3624/* The ordering of this enum is important */
3625enum anv_fast_clear_type {
3626   /** Image does not have/support any fast-clear blocks */
3627   ANV_FAST_CLEAR_NONE = 0,
3628   /** Image has/supports fast-clear but only to the default value */
3629   ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
3630   /** Image has/supports fast-clear with an arbitrary fast-clear value */
3631   ANV_FAST_CLEAR_ANY = 2,
3632};
3633
3634/**
3635 * Return the aspect's _format_ plane, not its _memory_ plane (using the
3636 * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
3637 * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
3638 * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
3639 */
3640static inline uint32_t
3641anv_image_aspect_to_plane(const struct anv_image *image,
3642                          VkImageAspectFlagBits aspect)
3643{
3644   return anv_aspect_to_plane(image->vk.aspects, aspect);
3645}
3646
3647/* Returns the number of auxiliary buffer levels attached to an image. */
3648static inline uint8_t
3649anv_image_aux_levels(const struct anv_image * const image,
3650                     VkImageAspectFlagBits aspect)
3651{
3652   uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3653   if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
3654      return 0;
3655
3656   return image->vk.mip_levels;
3657}
3658
3659/* Returns the number of auxiliary buffer layers attached to an image. */
3660static inline uint32_t
3661anv_image_aux_layers(const struct anv_image * const image,
3662                     VkImageAspectFlagBits aspect,
3663                     const uint8_t miplevel)
3664{
3665   assert(image);
3666
3667   /* The miplevel must exist in the main buffer. */
3668   assert(miplevel < image->vk.mip_levels);
3669
3670   if (miplevel >= anv_image_aux_levels(image, aspect)) {
3671      /* There are no layers with auxiliary data because the miplevel has no
3672       * auxiliary data.
3673       */
3674      return 0;
3675   }
3676
3677   return MAX2(image->vk.array_layers, image->vk.extent.depth >> miplevel);
3678}
3679
3680static inline struct anv_address MUST_CHECK
3681anv_image_address(const struct anv_image *image,
3682                  const struct anv_image_memory_range *mem_range)
3683{
3684   const struct anv_image_binding *binding = &image->bindings[mem_range->binding];
3685   assert(binding->memory_range.offset == 0);
3686
3687   if (mem_range->size == 0)
3688      return ANV_NULL_ADDRESS;
3689
3690   return anv_address_add(binding->address, mem_range->offset);
3691}
3692
3693static inline struct anv_address
3694anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
3695                               const struct anv_image *image,
3696                               VkImageAspectFlagBits aspect)
3697{
3698   assert(image->vk.aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |
3699                               VK_IMAGE_ASPECT_DEPTH_BIT));
3700
3701   uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3702   const struct anv_image_memory_range *mem_range =
3703      &image->planes[plane].fast_clear_memory_range;
3704
3705   return anv_image_address(image, mem_range);
3706}
3707
3708static inline struct anv_address
3709anv_image_get_fast_clear_type_addr(const struct anv_device *device,
3710                                   const struct anv_image *image,
3711                                   VkImageAspectFlagBits aspect)
3712{
3713   struct anv_address addr =
3714      anv_image_get_clear_color_addr(device, image, aspect);
3715
3716   const unsigned clear_color_state_size = device->info.ver >= 10 ?
3717      device->isl_dev.ss.clear_color_state_size :
3718      device->isl_dev.ss.clear_value_size;
3719   return anv_address_add(addr, clear_color_state_size);
3720}
3721
3722static inline struct anv_address
3723anv_image_get_compression_state_addr(const struct anv_device *device,
3724                                     const struct anv_image *image,
3725                                     VkImageAspectFlagBits aspect,
3726                                     uint32_t level, uint32_t array_layer)
3727{
3728   assert(level < anv_image_aux_levels(image, aspect));
3729   assert(array_layer < anv_image_aux_layers(image, aspect, level));
3730   UNUSED uint32_t plane = anv_image_aspect_to_plane(image, aspect);
3731   assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);
3732
3733   /* Relative to start of the plane's fast clear memory range */
3734   uint32_t offset;
3735
3736   offset = 4; /* Go past the fast clear type */
3737
3738   if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
3739      for (uint32_t l = 0; l < level; l++)
3740         offset += anv_minify(image->vk.extent.depth, l) * 4;
3741   } else {
3742      offset += level * image->vk.array_layers * 4;
3743   }
3744
3745   offset += array_layer * 4;
3746
3747   assert(offset < image->planes[plane].fast_clear_memory_range.size);
3748
3749   return anv_address_add(
3750      anv_image_get_fast_clear_type_addr(device, image, aspect),
3751      offset);
3752}
3753
3754/* Returns true if a HiZ-enabled depth buffer can be sampled from. */
3755static inline bool
3756anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,
3757                        const struct anv_image *image)
3758{
3759   if (!(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
3760      return false;
3761
3762   /* For Gfx8-11, there are some restrictions around sampling from HiZ.
3763    * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
3764    * say:
3765    *
3766    *    "If this field is set to AUX_HIZ, Number of Multisamples must
3767    *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
3768    */
3769   if (image->vk.image_type == VK_IMAGE_TYPE_3D)
3770      return false;
3771
3772   /* Allow this feature on BDW even though it is disabled in the BDW devinfo
3773    * struct. There's documentation which suggests that this feature actually
3774    * reduces performance on BDW, but it has only been observed to help so
3775    * far. Sampling fast-cleared blocks on BDW must also be handled with care
3776    * (see depth_stencil_attachment_compute_aux_usage() for more info).
3777    */
3778   if (devinfo->ver != 8 && !devinfo->has_sample_with_hiz)
3779      return false;
3780
3781   return image->vk.samples == 1;
3782}
3783
3784/* Returns true if an MCS-enabled buffer can be sampled from. */
3785static inline bool
3786anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,
3787                              const struct anv_image *image)
3788{
3789   assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
3790   const uint32_t plane =
3791      anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_COLOR_BIT);
3792
3793   assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));
3794
3795   const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;
3796
3797   /* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.
3798    * See HSD 1707282275, wa_14013111325. Due to the use of
3799    * format-reinterpretation, a simplified workaround is implemented.
3800    */
3801   if (devinfo->ver >= 12 &&
3802       isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {
3803      return false;
3804   }
3805
3806   return true;
3807}
3808
3809static inline bool
3810anv_image_plane_uses_aux_map(const struct anv_device *device,
3811                             const struct anv_image *image,
3812                             uint32_t plane)
3813{
3814   return device->info.has_aux_map &&
3815      isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
3816}
3817
3818void
3819anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
3820                                  const struct anv_image *image,
3821                                  VkImageAspectFlagBits aspect,
3822                                  enum isl_aux_usage aux_usage,
3823                                  uint32_t level,
3824                                  uint32_t base_layer,
3825                                  uint32_t layer_count);
3826
3827void
3828anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
3829                      const struct anv_image *image,
3830                      VkImageAspectFlagBits aspect,
3831                      enum isl_aux_usage aux_usage,
3832                      enum isl_format format, struct isl_swizzle swizzle,
3833                      uint32_t level, uint32_t base_layer, uint32_t layer_count,
3834                      VkRect2D area, union isl_color_value clear_color);
3835void
3836anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3837                              const struct anv_image *image,
3838                              VkImageAspectFlags aspects,
3839                              enum isl_aux_usage depth_aux_usage,
3840                              uint32_t level,
3841                              uint32_t base_layer, uint32_t layer_count,
3842                              VkRect2D area,
3843                              float depth_value, uint8_t stencil_value);
3844void
3845anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
3846                       const struct anv_image *src_image,
3847                       enum isl_aux_usage src_aux_usage,
3848                       uint32_t src_level, uint32_t src_base_layer,
3849                       const struct anv_image *dst_image,
3850                       enum isl_aux_usage dst_aux_usage,
3851                       uint32_t dst_level, uint32_t dst_base_layer,
3852                       VkImageAspectFlagBits aspect,
3853                       uint32_t src_x, uint32_t src_y,
3854                       uint32_t dst_x, uint32_t dst_y,
3855                       uint32_t width, uint32_t height,
3856                       uint32_t layer_count,
3857                       enum blorp_filter filter);
3858void
3859anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
3860                 const struct anv_image *image,
3861                 VkImageAspectFlagBits aspect, uint32_t level,
3862                 uint32_t base_layer, uint32_t layer_count,
3863                 enum isl_aux_op hiz_op);
3864void
3865anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
3866                    const struct anv_image *image,
3867                    VkImageAspectFlags aspects,
3868                    uint32_t level,
3869                    uint32_t base_layer, uint32_t layer_count,
3870                    VkRect2D area, uint8_t stencil_value);
3871void
3872anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
3873                 const struct anv_image *image,
3874                 enum isl_format format, struct isl_swizzle swizzle,
3875                 VkImageAspectFlagBits aspect,
3876                 uint32_t base_layer, uint32_t layer_count,
3877                 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
3878                 bool predicate);
3879void
3880anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
3881                 const struct anv_image *image,
3882                 enum isl_format format, struct isl_swizzle swizzle,
3883                 VkImageAspectFlagBits aspect, uint32_t level,
3884                 uint32_t base_layer, uint32_t layer_count,
3885                 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
3886                 bool predicate);
3887
3888void
3889anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
3890                         const struct anv_image *image,
3891                         VkImageAspectFlagBits aspect,
3892                         uint32_t base_level, uint32_t level_count,
3893                         uint32_t base_layer, uint32_t layer_count);
3894
3895enum isl_aux_state ATTRIBUTE_PURE
3896anv_layout_to_aux_state(const struct intel_device_info * const devinfo,
3897                        const struct anv_image *image,
3898                        const VkImageAspectFlagBits aspect,
3899                        const VkImageLayout layout);
3900
3901enum isl_aux_usage ATTRIBUTE_PURE
3902anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,
3903                        const struct anv_image *image,
3904                        const VkImageAspectFlagBits aspect,
3905                        const VkImageUsageFlagBits usage,
3906                        const VkImageLayout layout);
3907
3908enum anv_fast_clear_type ATTRIBUTE_PURE
3909anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,
3910                              const struct anv_image * const image,
3911                              const VkImageAspectFlagBits aspect,
3912                              const VkImageLayout layout);
3913
3914static inline bool
3915anv_image_aspects_compatible(VkImageAspectFlags aspects1,
3916                             VkImageAspectFlags aspects2)
3917{
3918   if (aspects1 == aspects2)
3919      return true;
3920
3921   /* Only 1 color aspects are compatibles. */
3922   if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3923       (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3924       util_bitcount(aspects1) == util_bitcount(aspects2))
3925      return true;
3926
3927   return false;
3928}
3929
3930struct anv_image_view {
3931   struct vk_image_view vk;
3932
3933   const struct anv_image *image; /**< VkImageViewCreateInfo::image */
3934
3935   unsigned n_planes;
3936   struct {
3937      uint32_t image_plane;
3938
3939      struct isl_view isl;
3940
3941      /**
3942       * RENDER_SURFACE_STATE when using image as a sampler surface with an
3943       * image layout of SHADER_READ_ONLY_OPTIMAL or
3944       * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
3945       */
3946      struct anv_surface_state optimal_sampler_surface_state;
3947
3948      /**
3949       * RENDER_SURFACE_STATE when using image as a sampler surface with an
3950       * image layout of GENERAL.
3951       */
3952      struct anv_surface_state general_sampler_surface_state;
3953
3954      /**
3955       * RENDER_SURFACE_STATE when using image as a storage image. Separate
3956       * states for vanilla (with the original format) and one which has been
3957       * lowered to a format suitable for reading.  This may be a raw surface
3958       * in extreme cases or simply a surface with a different format where we
3959       * expect some conversion to be done in the shader.
3960       */
3961      struct anv_surface_state storage_surface_state;
3962      struct anv_surface_state lowered_storage_surface_state;
3963
3964      struct brw_image_param lowered_storage_image_param;
3965   } planes[3];
3966};
3967
3968enum anv_image_view_state_flags {
3969   ANV_IMAGE_VIEW_STATE_STORAGE_LOWERED      = (1 << 0),
3970   ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL      = (1 << 1),
3971};
3972
3973void anv_image_fill_surface_state(struct anv_device *device,
3974                                  const struct anv_image *image,
3975                                  VkImageAspectFlagBits aspect,
3976                                  const struct isl_view *view,
3977                                  isl_surf_usage_flags_t view_usage,
3978                                  enum isl_aux_usage aux_usage,
3979                                  const union isl_color_value *clear_color,
3980                                  enum anv_image_view_state_flags flags,
3981                                  struct anv_surface_state *state_inout,
3982                                  struct brw_image_param *image_param_out);
3983
3984struct anv_image_create_info {
3985   const VkImageCreateInfo *vk_info;
3986
3987   /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
3988   isl_tiling_flags_t isl_tiling_flags;
3989
3990   /** These flags will be added to any derived from VkImageCreateInfo. */
3991   isl_surf_usage_flags_t isl_extra_usage_flags;
3992};
3993
3994VkResult anv_image_init(struct anv_device *device, struct anv_image *image,
3995                        const struct anv_image_create_info *create_info);
3996
3997void anv_image_finish(struct anv_image *image);
3998
3999void anv_image_get_memory_requirements(struct anv_device *device,
4000                                       struct anv_image *image,
4001                                       VkImageAspectFlags aspects,
4002                                       VkMemoryRequirements2 *pMemoryRequirements);
4003
4004enum isl_format
4005anv_isl_format_for_descriptor_type(const struct anv_device *device,
4006                                   VkDescriptorType type);
4007
4008static inline uint32_t
4009anv_rasterization_aa_mode(VkPolygonMode raster_mode,
4010                          VkLineRasterizationModeEXT line_mode)
4011{
4012   if (raster_mode == VK_POLYGON_MODE_LINE &&
4013       line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT)
4014      return true;
4015   return false;
4016}
4017
4018VkFormatFeatureFlags2
4019anv_get_image_format_features2(const struct intel_device_info *devinfo,
4020                               VkFormat vk_format,
4021                               const struct anv_format *anv_format,
4022                               VkImageTiling vk_tiling,
4023                               const struct isl_drm_modifier_info *isl_mod_info);
4024
4025void anv_fill_buffer_surface_state(struct anv_device *device,
4026                                   struct anv_state state,
4027                                   enum isl_format format,
4028                                   struct isl_swizzle swizzle,
4029                                   isl_surf_usage_flags_t usage,
4030                                   struct anv_address address,
4031                                   uint32_t range, uint32_t stride);
4032
4033
4034/* Haswell border color is a bit of a disaster.  Float and unorm formats use a
4035 * straightforward 32-bit float color in the first 64 bytes.  Instead of using
4036 * a nice float/integer union like Gfx8+, Haswell specifies the integer border
4037 * color as a separate entry /after/ the float color.  The layout of this entry
4038 * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
4039 *
4040 * Since we don't know the format/bpp, we can't make any of the border colors
4041 * containing '1' work for all formats, as it would be in the wrong place for
4042 * some of them.  We opt to make 32-bit integers work as this seems like the
4043 * most common option.  Fortunately, transparent black works regardless, as
4044 * all zeroes is the same in every bit-size.
4045 */
4046struct hsw_border_color {
4047   float float32[4];
4048   uint32_t _pad0[12];
4049   uint32_t uint32[4];
4050   uint32_t _pad1[108];
4051};
4052
4053struct gfx8_border_color {
4054   union {
4055      float float32[4];
4056      uint32_t uint32[4];
4057   };
4058   /* Pad out to 64 bytes */
4059   uint32_t _pad[12];
4060};
4061
4062struct anv_ycbcr_conversion {
4063   struct vk_object_base base;
4064
4065   const struct anv_format *        format;
4066   VkSamplerYcbcrModelConversion    ycbcr_model;
4067   VkSamplerYcbcrRange              ycbcr_range;
4068   VkComponentSwizzle               mapping[4];
4069   VkChromaLocation                 chroma_offsets[2];
4070   VkFilter                         chroma_filter;
4071   bool                             chroma_reconstruction;
4072};
4073
4074struct anv_sampler {
4075   struct vk_object_base        base;
4076
4077   uint32_t                     state[3][4];
4078   uint32_t                     n_planes;
4079   struct anv_ycbcr_conversion *conversion;
4080
4081   /* Blob of sampler state data which is guaranteed to be 32-byte aligned
4082    * and with a 32-byte stride for use as bindless samplers.
4083    */
4084   struct anv_state             bindless_state;
4085
4086   struct anv_state             custom_border_color;
4087};
4088
4089#define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
4090
4091struct anv_query_pool {
4092   struct vk_object_base                        base;
4093
4094   VkQueryType                                  type;
4095   VkQueryPipelineStatisticFlags                pipeline_statistics;
4096   /** Stride between slots, in bytes */
4097   uint32_t                                     stride;
4098   /** Number of slots in this query pool */
4099   uint32_t                                     slots;
4100   struct anv_bo *                              bo;
4101
4102   /* KHR perf queries : */
4103   uint32_t                                     pass_size;
4104   uint32_t                                     data_offset;
4105   uint32_t                                     snapshot_size;
4106   uint32_t                                     n_counters;
4107   struct intel_perf_counter_pass                *counter_pass;
4108   uint32_t                                     n_passes;
4109   struct intel_perf_query_info                 **pass_query;
4110};
4111
4112static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,
4113                                                      uint32_t pass)
4114{
4115   return pool->pass_size * pass + 8;
4116}
4117
4118struct anv_acceleration_structure {
4119   struct vk_object_base                        base;
4120
4121   VkDeviceSize                                 size;
4122   struct anv_address                           address;
4123};
4124
4125int anv_get_instance_entrypoint_index(const char *name);
4126int anv_get_device_entrypoint_index(const char *name);
4127int anv_get_physical_device_entrypoint_index(const char *name);
4128
4129const char *anv_get_instance_entry_name(int index);
4130const char *anv_get_physical_device_entry_name(int index);
4131const char *anv_get_device_entry_name(int index);
4132
4133bool
4134anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
4135                                   const struct vk_instance_extension_table *instance);
4136bool
4137anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
4138                                          const struct vk_instance_extension_table *instance);
4139bool
4140anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
4141                                 const struct vk_instance_extension_table *instance,
4142                                 const struct vk_device_extension_table *device);
4143
4144const struct vk_device_dispatch_table *
4145anv_get_device_dispatch_table(const struct intel_device_info *devinfo);
4146
4147void
4148anv_dump_pipe_bits(enum anv_pipe_bits bits);
4149
4150static inline void
4151anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
4152                          enum anv_pipe_bits bits,
4153                          const char* reason)
4154{
4155   cmd_buffer->state.pending_pipe_bits |= bits;
4156   if (INTEL_DEBUG(DEBUG_PIPE_CONTROL) && bits)
4157   {
4158      fputs("pc: add ", stderr);
4159      anv_dump_pipe_bits(bits);
4160      fprintf(stderr, "reason: %s\n", reason);
4161   }
4162}
4163
4164struct anv_performance_configuration_intel {
4165   struct vk_object_base      base;
4166
4167   struct intel_perf_registers *register_config;
4168
4169   uint64_t                   config_id;
4170};
4171
4172void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
4173void anv_device_perf_init(struct anv_device *device);
4174void anv_perf_write_pass_results(struct intel_perf_config *perf,
4175                                 struct anv_query_pool *pool, uint32_t pass,
4176                                 const struct intel_perf_query_result *accumulated_results,
4177                                 union VkPerformanceCounterResultKHR *results);
4178
4179/* Use to emit a series of memcpy operations */
4180struct anv_memcpy_state {
4181   struct anv_device *device;
4182   struct anv_batch *batch;
4183
4184   struct anv_vb_cache_range vb_bound;
4185   struct anv_vb_cache_range vb_dirty;
4186};
4187
4188struct anv_utrace_flush_copy {
4189   /* Needs to be the first field */
4190   struct intel_ds_flush_data ds;
4191
4192   /* Batch stuff to implement of copy of timestamps recorded in another
4193    * buffer.
4194    */
4195   struct anv_reloc_list relocs;
4196   struct anv_batch batch;
4197   struct anv_bo *batch_bo;
4198
4199   /* Buffer of 64bits timestamps */
4200   struct anv_bo *trace_bo;
4201
4202   /* Syncobj to be signaled when the batch completes */
4203   struct vk_sync *sync;
4204
4205   /* Queue on which all the recorded traces are submitted */
4206   struct anv_queue *queue;
4207
4208   struct anv_memcpy_state memcpy_state;
4209};
4210
4211void anv_device_utrace_init(struct anv_device *device);
4212void anv_device_utrace_finish(struct anv_device *device);
4213VkResult
4214anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
4215                                    uint32_t cmd_buffer_count,
4216                                    struct anv_cmd_buffer **cmd_buffers,
4217                                    struct anv_utrace_flush_copy **out_flush_data);
4218
4219#ifdef HAVE_PERFETTO
4220void anv_perfetto_init(void);
4221uint64_t anv_perfetto_begin_submit(struct anv_queue *queue);
4222void anv_perfetto_end_submit(struct anv_queue *queue, uint32_t submission_id,
4223                             uint64_t start_ts);
4224#else
4225static inline void anv_perfetto_init(void)
4226{
4227}
4228static inline uint64_t anv_perfetto_begin_submit(struct anv_queue *queue)
4229{
4230   return 0;
4231}
4232static inline void anv_perfetto_end_submit(struct anv_queue *queue,
4233                                           uint32_t submission_id,
4234                                           uint64_t start_ts)
4235{}
4236#endif
4237
4238
4239#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
4240   VK_FROM_HANDLE(__anv_type, __name, __handle)
4241
4242VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, vk.base, VkCommandBuffer,
4243                       VK_OBJECT_TYPE_COMMAND_BUFFER)
4244VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
4245VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
4246VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,
4247                       VK_OBJECT_TYPE_PHYSICAL_DEVICE)
4248VK_DEFINE_HANDLE_CASTS(anv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
4249
4250VK_DEFINE_NONDISP_HANDLE_CASTS(anv_acceleration_structure, base,
4251                               VkAccelerationStructureKHR,
4252                               VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
4253VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, vk.base, VkBuffer,
4254                               VK_OBJECT_TYPE_BUFFER)
4255VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,
4256                               VK_OBJECT_TYPE_BUFFER_VIEW)
4257VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
4258                               VK_OBJECT_TYPE_DESCRIPTOR_POOL)
4259VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
4260                               VK_OBJECT_TYPE_DESCRIPTOR_SET)
4261VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
4262                               VkDescriptorSetLayout,
4263                               VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
4264VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,
4265                               VkDescriptorUpdateTemplate,
4266                               VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
4267VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
4268                               VK_OBJECT_TYPE_DEVICE_MEMORY)
4269VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
4270VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
4271VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
4272                               VK_OBJECT_TYPE_IMAGE_VIEW);
4273VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
4274                               VK_OBJECT_TYPE_PIPELINE)
4275VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
4276                               VK_OBJECT_TYPE_PIPELINE_LAYOUT)
4277VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
4278                               VK_OBJECT_TYPE_QUERY_POOL)
4279VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
4280                               VK_OBJECT_TYPE_SAMPLER)
4281VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
4282                               VkSamplerYcbcrConversion,
4283                               VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
4284VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
4285                               VkPerformanceConfigurationINTEL,
4286                               VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
4287
4288#define anv_genX(devinfo, thing) ({             \
4289   __typeof(&gfx9_##thing) genX_thing;          \
4290   switch ((devinfo)->verx10) {                 \
4291   case 70:                                     \
4292      genX_thing = &gfx7_##thing;               \
4293      break;                                    \
4294   case 75:                                     \
4295      genX_thing = &gfx75_##thing;              \
4296      break;                                    \
4297   case 80:                                     \
4298      genX_thing = &gfx8_##thing;               \
4299      break;                                    \
4300   case 90:                                     \
4301      genX_thing = &gfx9_##thing;               \
4302      break;                                    \
4303   case 110:                                    \
4304      genX_thing = &gfx11_##thing;              \
4305      break;                                    \
4306   case 120:                                    \
4307      genX_thing = &gfx12_##thing;              \
4308      break;                                    \
4309   case 125:                                    \
4310      genX_thing = &gfx125_##thing;             \
4311      break;                                    \
4312   default:                                     \
4313      unreachable("Unknown hardware generation"); \
4314   }                                            \
4315   genX_thing;                                  \
4316})
4317
4318/* Gen-specific function declarations */
4319#ifdef genX
4320#  include "anv_genX.h"
4321#else
4322#  define genX(x) gfx7_##x
4323#  include "anv_genX.h"
4324#  undef genX
4325#  define genX(x) gfx75_##x
4326#  include "anv_genX.h"
4327#  undef genX
4328#  define genX(x) gfx8_##x
4329#  include "anv_genX.h"
4330#  undef genX
4331#  define genX(x) gfx9_##x
4332#  include "anv_genX.h"
4333#  undef genX
4334#  define genX(x) gfx11_##x
4335#  include "anv_genX.h"
4336#  undef genX
4337#  define genX(x) gfx12_##x
4338#  include "anv_genX.h"
4339#  undef genX
4340#  define genX(x) gfx125_##x
4341#  include "anv_genX.h"
4342#  undef genX
4343#endif
4344
4345#endif /* ANV_PRIVATE_H */
4346