1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2012 Advanced Micro Devices, Inc.
3bf215546Sopenharmony_ci * All Rights Reserved.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
14bf215546Sopenharmony_ci * Software.
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci/* The compiler middle-end architecture: Explaining (non-)monolithic shaders
26bf215546Sopenharmony_ci * -------------------------------------------------------------------------
27bf215546Sopenharmony_ci *
28bf215546Sopenharmony_ci * Typically, there is one-to-one correspondence between API and HW shaders,
29bf215546Sopenharmony_ci * that is, for every API shader, there is exactly one shader binary in
30bf215546Sopenharmony_ci * the driver.
31bf215546Sopenharmony_ci *
32bf215546Sopenharmony_ci * The problem with that is that we also have to emulate some API states
33bf215546Sopenharmony_ci * (e.g. alpha-test, and many others) in shaders too. The two obvious ways
34bf215546Sopenharmony_ci * to deal with it are:
35bf215546Sopenharmony_ci * - each shader has multiple variants for each combination of emulated states,
36bf215546Sopenharmony_ci *   and the variants are compiled on demand, possibly relying on a shader
37bf215546Sopenharmony_ci *   cache for good performance
38bf215546Sopenharmony_ci * - patch shaders at the binary level
39bf215546Sopenharmony_ci *
40bf215546Sopenharmony_ci * This driver uses something completely different. The emulated states are
41bf215546Sopenharmony_ci * usually implemented at the beginning or end of shaders. Therefore, we can
42bf215546Sopenharmony_ci * split the shader into 3 parts:
43bf215546Sopenharmony_ci * - prolog part (shader code dependent on states)
44bf215546Sopenharmony_ci * - main part (the API shader)
45bf215546Sopenharmony_ci * - epilog part (shader code dependent on states)
46bf215546Sopenharmony_ci *
47bf215546Sopenharmony_ci * Each part is compiled as a separate shader and the final binaries are
48bf215546Sopenharmony_ci * concatenated. This type of shader is called non-monolithic, because it
49bf215546Sopenharmony_ci * consists of multiple independent binaries. Creating a new shader variant
50bf215546Sopenharmony_ci * is therefore only a concatenation of shader parts (binaries) and doesn't
51bf215546Sopenharmony_ci * involve any compilation. The main shader parts are the only parts that are
52bf215546Sopenharmony_ci * compiled when applications create shader objects. The prolog and epilog
53bf215546Sopenharmony_ci * parts are compiled on the first use and saved, so that their binaries can
54bf215546Sopenharmony_ci * be reused by many other shaders.
55bf215546Sopenharmony_ci *
56bf215546Sopenharmony_ci * One of the roles of the prolog part is to compute vertex buffer addresses
57bf215546Sopenharmony_ci * for vertex shaders. A few of the roles of the epilog part are color buffer
58bf215546Sopenharmony_ci * format conversions in pixel shaders that we have to do manually, and write
59bf215546Sopenharmony_ci * tessellation factors in tessellation control shaders. The prolog and epilog
60bf215546Sopenharmony_ci * have many other important responsibilities in various shader stages.
61bf215546Sopenharmony_ci * They don't just "emulate legacy stuff".
62bf215546Sopenharmony_ci *
63bf215546Sopenharmony_ci * Monolithic shaders are shaders where the parts are combined before LLVM
64bf215546Sopenharmony_ci * compilation, and the whole thing is compiled and optimized as one unit with
65bf215546Sopenharmony_ci * one binary on the output. The result is the same as the non-monolithic
66bf215546Sopenharmony_ci * shader, but the final code can be better, because LLVM can optimize across
67bf215546Sopenharmony_ci * all shader parts. Monolithic shaders aren't usually used except for these
68bf215546Sopenharmony_ci * special cases:
69bf215546Sopenharmony_ci *
70bf215546Sopenharmony_ci * 1) Some rarely-used states require modification of the main shader part
71bf215546Sopenharmony_ci *    itself, and in such cases, only the monolithic shader variant is
72bf215546Sopenharmony_ci *    compiled, and that's always done on the first use.
73bf215546Sopenharmony_ci *
74bf215546Sopenharmony_ci * 2) When we do cross-stage optimizations for separate shader objects and
75bf215546Sopenharmony_ci *    e.g. eliminate unused shader varyings, the resulting optimized shader
76bf215546Sopenharmony_ci *    variants are always compiled as monolithic shaders, and always
77bf215546Sopenharmony_ci *    asynchronously (i.e. not stalling ongoing rendering). We call them
78bf215546Sopenharmony_ci *    "optimized monolithic" shaders. The important property here is that
79bf215546Sopenharmony_ci *    the non-monolithic unoptimized shader variant is always available for use
80bf215546Sopenharmony_ci *    when the asynchronous compilation of the optimized shader is not done
81bf215546Sopenharmony_ci *    yet.
82bf215546Sopenharmony_ci *
83bf215546Sopenharmony_ci * Starting with GFX9 chips, some shader stages are merged, and the number of
84bf215546Sopenharmony_ci * shader parts per shader increased. The complete new list of shader parts is:
85bf215546Sopenharmony_ci * - 1st shader: prolog part
86bf215546Sopenharmony_ci * - 1st shader: main part
87bf215546Sopenharmony_ci * - 2nd shader: prolog part
88bf215546Sopenharmony_ci * - 2nd shader: main part
89bf215546Sopenharmony_ci * - 2nd shader: epilog part
90bf215546Sopenharmony_ci */
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci/* How linking shader inputs and outputs between vertex, tessellation, and
93bf215546Sopenharmony_ci * geometry shaders works.
94bf215546Sopenharmony_ci *
95bf215546Sopenharmony_ci * Inputs and outputs between shaders are stored in a buffer. This buffer
96bf215546Sopenharmony_ci * lives in LDS (typical case for tessellation), but it can also live
97bf215546Sopenharmony_ci * in memory (ESGS). Each input or output has a fixed location within a vertex.
98bf215546Sopenharmony_ci * The highest used input or output determines the stride between vertices.
99bf215546Sopenharmony_ci *
100bf215546Sopenharmony_ci * Since GS and tessellation are only possible in the OpenGL core profile,
101bf215546Sopenharmony_ci * only these semantics are valid for per-vertex data:
102bf215546Sopenharmony_ci *
103bf215546Sopenharmony_ci *   Name             Location
104bf215546Sopenharmony_ci *
105bf215546Sopenharmony_ci *   POSITION         0
106bf215546Sopenharmony_ci *   PSIZE            1
107bf215546Sopenharmony_ci *   CLIPDIST0..1     2..3
108bf215546Sopenharmony_ci *   CULLDIST0..1     (not implemented)
109bf215546Sopenharmony_ci *   GENERIC0..31     4..35
110bf215546Sopenharmony_ci *
111bf215546Sopenharmony_ci * For example, a shader only writing GENERIC0 has the output stride of 5.
112bf215546Sopenharmony_ci *
113bf215546Sopenharmony_ci * Only these semantics are valid for per-patch data:
114bf215546Sopenharmony_ci *
115bf215546Sopenharmony_ci *   Name             Location
116bf215546Sopenharmony_ci *
117bf215546Sopenharmony_ci *   TESSOUTER        0
118bf215546Sopenharmony_ci *   TESSINNER        1
119bf215546Sopenharmony_ci *   PATCH0..29       2..31
120bf215546Sopenharmony_ci *
121bf215546Sopenharmony_ci * That's how independent shaders agree on input and output locations.
122bf215546Sopenharmony_ci * The si_shader_io_get_unique_index function assigns the locations.
123bf215546Sopenharmony_ci *
124bf215546Sopenharmony_ci * For tessellation, other required information for calculating the input and
125bf215546Sopenharmony_ci * output addresses like the vertex stride, the patch stride, and the offsets
126bf215546Sopenharmony_ci * where per-vertex and per-patch data start, is passed to the shader via
127bf215546Sopenharmony_ci * user data SGPRs. The offsets and strides are calculated at draw time and
128bf215546Sopenharmony_ci * aren't available at compile time.
129bf215546Sopenharmony_ci */
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci#ifndef SI_SHADER_H
132bf215546Sopenharmony_ci#define SI_SHADER_H
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci#include "ac_binary.h"
135bf215546Sopenharmony_ci#include "ac_llvm_build.h"
136bf215546Sopenharmony_ci#include "ac_llvm_util.h"
137bf215546Sopenharmony_ci#include "util/simple_mtx.h"
138bf215546Sopenharmony_ci#include "util/u_inlines.h"
139bf215546Sopenharmony_ci#include "util/u_live_shader_cache.h"
140bf215546Sopenharmony_ci#include "util/u_queue.h"
141bf215546Sopenharmony_ci#include "si_pm4.h"
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci#include <stdio.h>
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci#ifdef __cplusplus
146bf215546Sopenharmony_ciextern "C" {
147bf215546Sopenharmony_ci#endif
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci// Use LDS symbols when supported by LLVM. Can be disabled for testing the old
150bf215546Sopenharmony_ci// path on newer LLVM for now. Should be removed in the long term.
151bf215546Sopenharmony_ci#define USE_LDS_SYMBOLS (true)
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_cistruct nir_shader;
154bf215546Sopenharmony_cistruct si_shader;
155bf215546Sopenharmony_cistruct si_context;
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci#define SI_MAX_ATTRIBS    16
158bf215546Sopenharmony_ci#define SI_MAX_VS_OUTPUTS 40
159bf215546Sopenharmony_ci#define SI_USER_CLIP_PLANE_MASK  0x3F
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci#define SI_NGG_PRIM_EDGE_FLAG_BITS ((1 << 9) | (1 << 19) | (1 << 29))
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci#define SI_PS_INPUT_CNTL_0000          (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(0))
164bf215546Sopenharmony_ci#define SI_PS_INPUT_CNTL_0001          (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(3))
165bf215546Sopenharmony_ci#define SI_PS_INPUT_CNTL_UNUSED        SI_PS_INPUT_CNTL_0000
166bf215546Sopenharmony_ci/* D3D9 behaviour for COLOR0 requires 0001. GL is undefined. */
167bf215546Sopenharmony_ci#define SI_PS_INPUT_CNTL_UNUSED_COLOR0 SI_PS_INPUT_CNTL_0001
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci/* SGPR user data indices */
170bf215546Sopenharmony_cienum
171bf215546Sopenharmony_ci{
172bf215546Sopenharmony_ci   SI_SGPR_INTERNAL_BINDINGS,
173bf215546Sopenharmony_ci   SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
174bf215546Sopenharmony_ci   SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */
175bf215546Sopenharmony_ci   SI_SGPR_SAMPLERS_AND_IMAGES,
176bf215546Sopenharmony_ci   SI_NUM_RESOURCE_SGPRS,
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci   /* API VS, TES without GS, GS copy shader */
179bf215546Sopenharmony_ci   SI_SGPR_VS_STATE_BITS = SI_NUM_RESOURCE_SGPRS,
180bf215546Sopenharmony_ci   SI_NUM_VS_STATE_RESOURCE_SGPRS,
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   /* all VS variants */
183bf215546Sopenharmony_ci   SI_SGPR_BASE_VERTEX = SI_NUM_VS_STATE_RESOURCE_SGPRS,
184bf215546Sopenharmony_ci   SI_SGPR_DRAWID,
185bf215546Sopenharmony_ci   SI_SGPR_START_INSTANCE,
186bf215546Sopenharmony_ci   SI_VS_NUM_USER_SGPR,
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci   SI_SGPR_VS_BLIT_DATA = SI_SGPR_CONST_AND_SHADER_BUFFERS,
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   /* TES */
191bf215546Sopenharmony_ci   SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_VS_STATE_RESOURCE_SGPRS,
192bf215546Sopenharmony_ci   SI_SGPR_TES_OFFCHIP_ADDR,
193bf215546Sopenharmony_ci   SI_TES_NUM_USER_SGPR,
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   /* GFX6-8: TCS only */
196bf215546Sopenharmony_ci   GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
197bf215546Sopenharmony_ci   GFX6_SGPR_TCS_OUT_OFFSETS,
198bf215546Sopenharmony_ci   GFX6_SGPR_TCS_OUT_LAYOUT,
199bf215546Sopenharmony_ci   GFX6_SGPR_TCS_IN_LAYOUT,
200bf215546Sopenharmony_ci   GFX6_TCS_NUM_USER_SGPR,
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   /* GFX9: Merged LS-HS (VS-TCS) only. */
203bf215546Sopenharmony_ci   GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR,
204bf215546Sopenharmony_ci   GFX9_SGPR_TCS_OUT_OFFSETS,
205bf215546Sopenharmony_ci   GFX9_SGPR_TCS_OUT_LAYOUT,
206bf215546Sopenharmony_ci   GFX9_TCS_NUM_USER_SGPR,
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci   /* GS limits */
209bf215546Sopenharmony_ci   GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
210bf215546Sopenharmony_ci   SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS,
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci   GFX9_SGPR_SMALL_PRIM_CULL_INFO = MAX2(SI_VS_NUM_USER_SGPR, SI_TES_NUM_USER_SGPR),
213bf215546Sopenharmony_ci   GFX9_SGPR_ATTRIBUTE_RING_ADDR,
214bf215546Sopenharmony_ci   GFX9_GS_NUM_USER_SGPR,
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci   /* PS only */
217bf215546Sopenharmony_ci   SI_SGPR_ALPHA_REF = SI_NUM_RESOURCE_SGPRS,
218bf215546Sopenharmony_ci   SI_PS_NUM_USER_SGPR,
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci   /* The value has to be 12, because the hw requires that descriptors
221bf215546Sopenharmony_ci    * are aligned to 4 SGPRs.
222bf215546Sopenharmony_ci    */
223bf215546Sopenharmony_ci   SI_SGPR_VS_VB_DESCRIPTOR_FIRST = 12,
224bf215546Sopenharmony_ci};
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci/* LLVM function parameter indices */
227bf215546Sopenharmony_cienum
228bf215546Sopenharmony_ci{
229bf215546Sopenharmony_ci   SI_NUM_RESOURCE_PARAMS = 4,
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci   /* PS only parameters */
232bf215546Sopenharmony_ci   SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
233bf215546Sopenharmony_ci   SI_PARAM_PRIM_MASK,
234bf215546Sopenharmony_ci   SI_PARAM_PERSP_SAMPLE,
235bf215546Sopenharmony_ci   SI_PARAM_PERSP_CENTER,
236bf215546Sopenharmony_ci   SI_PARAM_PERSP_CENTROID,
237bf215546Sopenharmony_ci   SI_PARAM_PERSP_PULL_MODEL,
238bf215546Sopenharmony_ci   SI_PARAM_LINEAR_SAMPLE,
239bf215546Sopenharmony_ci   SI_PARAM_LINEAR_CENTER,
240bf215546Sopenharmony_ci   SI_PARAM_LINEAR_CENTROID,
241bf215546Sopenharmony_ci   SI_PARAM_LINE_STIPPLE_TEX,
242bf215546Sopenharmony_ci   SI_PARAM_POS_X_FLOAT,
243bf215546Sopenharmony_ci   SI_PARAM_POS_Y_FLOAT,
244bf215546Sopenharmony_ci   SI_PARAM_POS_Z_FLOAT,
245bf215546Sopenharmony_ci   SI_PARAM_POS_W_FLOAT,
246bf215546Sopenharmony_ci   SI_PARAM_FRONT_FACE,
247bf215546Sopenharmony_ci   SI_PARAM_ANCILLARY,
248bf215546Sopenharmony_ci   SI_PARAM_SAMPLE_COVERAGE,
249bf215546Sopenharmony_ci   SI_PARAM_POS_FIXED_PT,
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */
252bf215546Sopenharmony_ci};
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci/* These fields are only set in current_vs_state (except INDEXED) in si_context, and they are
255bf215546Sopenharmony_ci * accessible in the shader via vs_state_bits in VS, TES, and GS.
256bf215546Sopenharmony_ci */
257bf215546Sopenharmony_ci#define VS_STATE_CLAMP_VERTEX_COLOR__SHIFT   0
258bf215546Sopenharmony_ci#define VS_STATE_CLAMP_VERTEX_COLOR__MASK    0x1 /* Shared by VS and GS */
259bf215546Sopenharmony_ci#define VS_STATE_INDEXED__SHIFT              1
260bf215546Sopenharmony_ci#define VS_STATE_INDEXED__MASK               0x1 /* Shared by VS and GS */
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci/* These fields are only set in current_vs_state in si_context, and they are accessible
263bf215546Sopenharmony_ci * in the shader via vs_state_bits in LS/HS.
264bf215546Sopenharmony_ci */
265bf215546Sopenharmony_ci/* bit gap */
266bf215546Sopenharmony_ci#define VS_STATE_LS_OUT_PATCH_SIZE__SHIFT    11
267bf215546Sopenharmony_ci#define VS_STATE_LS_OUT_PATCH_SIZE__MASK     0x1fff
268bf215546Sopenharmony_ci#define VS_STATE_LS_OUT_VERTEX_SIZE__SHIFT   24
269bf215546Sopenharmony_ci#define VS_STATE_LS_OUT_VERTEX_SIZE__MASK    0xff
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci/* These fields are only set in current_gs_state in si_context, and they are accessible
272bf215546Sopenharmony_ci * in the shader via vs_state_bits in legacy GS, the GS copy shader, and any NGG shader.
273bf215546Sopenharmony_ci */
274bf215546Sopenharmony_ci/* bit gap */
275bf215546Sopenharmony_ci#define GS_STATE_SMALL_PRIM_PRECISION_NO_AA__SHIFT 18
276bf215546Sopenharmony_ci#define GS_STATE_SMALL_PRIM_PRECISION_NO_AA__MASK  0xf
277bf215546Sopenharmony_ci#define GS_STATE_SMALL_PRIM_PRECISION__SHIFT    22
278bf215546Sopenharmony_ci#define GS_STATE_SMALL_PRIM_PRECISION__MASK     0xf
279bf215546Sopenharmony_ci#define GS_STATE_STREAMOUT_QUERY_ENABLED__SHIFT 26
280bf215546Sopenharmony_ci#define GS_STATE_STREAMOUT_QUERY_ENABLED__MASK  0x1
281bf215546Sopenharmony_ci#define GS_STATE_PROVOKING_VTX_INDEX__SHIFT     27
282bf215546Sopenharmony_ci#define GS_STATE_PROVOKING_VTX_INDEX__MASK      0x3
283bf215546Sopenharmony_ci#define GS_STATE_OUTPRIM__SHIFT                 29
284bf215546Sopenharmony_ci#define GS_STATE_OUTPRIM__MASK                  0x3
285bf215546Sopenharmony_ci#define GS_STATE_PIPELINE_STATS_EMU__SHIFT      31
286bf215546Sopenharmony_ci#define GS_STATE_PIPELINE_STATS_EMU__MASK       0x1
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci#define ENCODE_FIELD(field, value) (((unsigned)(value) & field##__MASK) << field##__SHIFT)
289bf215546Sopenharmony_ci#define CLEAR_FIELD(field) (~((unsigned)field##__MASK << field##__SHIFT))
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci/* This is called by functions that change states. */
292bf215546Sopenharmony_ci#define SET_FIELD(var, field, value) do { \
293bf215546Sopenharmony_ci   assert((value) == ((unsigned)(value) & field##__MASK)); \
294bf215546Sopenharmony_ci   (var) &= CLEAR_FIELD(field); \
295bf215546Sopenharmony_ci   (var) |= ENCODE_FIELD(field, value); \
296bf215546Sopenharmony_ci} while (0)
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci/* This is called during shader compilation and returns LLVMValueRef. */
299bf215546Sopenharmony_ci#define GET_FIELD(ctx, field) si_unpack_param((ctx), (ctx)->vs_state_bits, field##__SHIFT, \
300bf215546Sopenharmony_ci                                             util_bitcount(field##__MASK))
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_cienum
303bf215546Sopenharmony_ci{
304bf215546Sopenharmony_ci   /* These represent the number of SGPRs the shader uses. */
305bf215546Sopenharmony_ci   SI_VS_BLIT_SGPRS_POS = 3,
306bf215546Sopenharmony_ci   SI_VS_BLIT_SGPRS_POS_COLOR = 7,
307bf215546Sopenharmony_ci   SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9,
308bf215546Sopenharmony_ci};
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci#define SI_NGG_CULL_TRIANGLES                (1 << 0)   /* this implies W, view.xy, and small prim culling */
311bf215546Sopenharmony_ci#define SI_NGG_CULL_BACK_FACE                (1 << 1)   /* back faces */
312bf215546Sopenharmony_ci#define SI_NGG_CULL_FRONT_FACE               (1 << 2)   /* front faces */
313bf215546Sopenharmony_ci#define SI_NGG_CULL_LINES                    (1 << 3)   /* the primitive type is lines */
314bf215546Sopenharmony_ci#define SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT (1 << 4)   /* cull small lines according to the diamond exit rule */
315bf215546Sopenharmony_ci#define SI_NGG_CULL_CLIP_PLANE_ENABLE(enable) (((enable) & 0xff) << 5)
316bf215546Sopenharmony_ci#define SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(x)  (((x) >> 5) & 0xff)
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci#define SI_PROFILE_WAVE32                    (1 << 0)
319bf215546Sopenharmony_ci#define SI_PROFILE_WAVE64                    (1 << 1)
320bf215546Sopenharmony_ci#define SI_PROFILE_IGNORE_LLVM13_DISCARD_BUG (1 << 2)
321bf215546Sopenharmony_ci#define SI_PROFILE_VS_NO_BINNING             (1 << 3)
322bf215546Sopenharmony_ci#define SI_PROFILE_PS_NO_BINNING             (1 << 4)
323bf215546Sopenharmony_ci#define SI_PROFILE_CLAMP_DIV_BY_ZERO         (1 << 5)
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci/**
326bf215546Sopenharmony_ci * For VS shader keys, describe any fixups required for vertex fetch.
327bf215546Sopenharmony_ci *
328bf215546Sopenharmony_ci * \ref log_size, \ref format, and the number of channels are interpreted as
329bf215546Sopenharmony_ci * by \ref ac_build_opencoded_load_format.
330bf215546Sopenharmony_ci *
331bf215546Sopenharmony_ci * Note: all bits 0 (size = 1 byte, num channels = 1, format = float) is an
332bf215546Sopenharmony_ci * impossible format and indicates that no fixup is needed (just use
333bf215546Sopenharmony_ci * buffer_load_format_xyzw).
334bf215546Sopenharmony_ci */
335bf215546Sopenharmony_ciunion si_vs_fix_fetch {
336bf215546Sopenharmony_ci   struct {
337bf215546Sopenharmony_ci      uint8_t log_size : 2;        /* 1, 2, 4, 8 or bytes per channel */
338bf215546Sopenharmony_ci      uint8_t num_channels_m1 : 2; /* number of channels minus 1 */
339bf215546Sopenharmony_ci      uint8_t format : 3;          /* AC_FETCH_FORMAT_xxx */
340bf215546Sopenharmony_ci      uint8_t reverse : 1;         /* reverse XYZ channels */
341bf215546Sopenharmony_ci   } u;
342bf215546Sopenharmony_ci   uint8_t bits;
343bf215546Sopenharmony_ci};
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_cistruct si_shader;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci/* State of the context creating the shader object. */
348bf215546Sopenharmony_cistruct si_compiler_ctx_state {
349bf215546Sopenharmony_ci   /* Should only be used by si_init_shader_selector_async and
350bf215546Sopenharmony_ci    * si_build_shader_variant if thread_index == -1 (non-threaded). */
351bf215546Sopenharmony_ci   struct ac_llvm_compiler *compiler;
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   /* Used if thread_index == -1 or if debug.async is true. */
354bf215546Sopenharmony_ci   struct util_debug_callback debug;
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   /* Used for creating the log string for gallium/ddebug. */
357bf215546Sopenharmony_ci   bool is_debug_context;
358bf215546Sopenharmony_ci};
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_cienum si_color_output_type {
361bf215546Sopenharmony_ci   SI_TYPE_ANY32,
362bf215546Sopenharmony_ci   SI_TYPE_FLOAT16,
363bf215546Sopenharmony_ci   SI_TYPE_INT16,
364bf215546Sopenharmony_ci   SI_TYPE_UINT16,
365bf215546Sopenharmony_ci};
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ciunion si_input_info {
368bf215546Sopenharmony_ci   struct {
369bf215546Sopenharmony_ci      ubyte semantic;
370bf215546Sopenharmony_ci      ubyte interpolate;
371bf215546Sopenharmony_ci      ubyte fp16_lo_hi_valid;
372bf215546Sopenharmony_ci      ubyte usage_mask;
373bf215546Sopenharmony_ci   };
374bf215546Sopenharmony_ci   uint32_t _unused; /* this just forces 4-byte alignment */
375bf215546Sopenharmony_ci};
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_cistruct si_shader_info {
378bf215546Sopenharmony_ci   shader_info base;
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci   uint32_t options; /* bitmask of SI_PROFILE_* */
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci   ubyte num_inputs;
383bf215546Sopenharmony_ci   ubyte num_outputs;
384bf215546Sopenharmony_ci   union si_input_info input[PIPE_MAX_SHADER_INPUTS];
385bf215546Sopenharmony_ci   ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
386bf215546Sopenharmony_ci   ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
387bf215546Sopenharmony_ci   ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS];
388bf215546Sopenharmony_ci   ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
389bf215546Sopenharmony_ci   ubyte output_type[PIPE_MAX_SHADER_OUTPUTS]; /* enum nir_alu_type */
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci   ubyte num_vs_inputs;
392bf215546Sopenharmony_ci   ubyte num_vbos_in_user_sgprs;
393bf215546Sopenharmony_ci   ubyte num_stream_output_components[4];
394bf215546Sopenharmony_ci   uint16_t enabled_streamout_buffer_mask;
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci   uint64_t inputs_read; /* "get_unique_index" bits */
397bf215546Sopenharmony_ci   uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not LDS. */
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci   uint64_t outputs_written_before_ps; /* "get_unique_index" bits */
400bf215546Sopenharmony_ci   uint64_t outputs_written;           /* "get_unique_index" bits */
401bf215546Sopenharmony_ci   uint32_t patch_outputs_written;     /* "get_unique_index_patch" bits */
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci   ubyte clipdist_mask;
404bf215546Sopenharmony_ci   ubyte culldist_mask;
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci   uint16_t lshs_vertex_stride;
407bf215546Sopenharmony_ci   uint16_t esgs_itemsize; /* vertex stride */
408bf215546Sopenharmony_ci   uint16_t gsvs_vertex_size;
409bf215546Sopenharmony_ci   ubyte gs_input_verts_per_prim;
410bf215546Sopenharmony_ci   unsigned max_gsvs_emit_size;
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci   /* Set 0xf or 0x0 (4 bits) per each written output.
413bf215546Sopenharmony_ci    * ANDed with spi_shader_col_format.
414bf215546Sopenharmony_ci    */
415bf215546Sopenharmony_ci   unsigned colors_written_4bit;
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci   int constbuf0_num_slots;
418bf215546Sopenharmony_ci   uint num_memory_stores;
419bf215546Sopenharmony_ci   ubyte color_attr_index[2];
420bf215546Sopenharmony_ci   ubyte color_interpolate[2];
421bf215546Sopenharmony_ci   ubyte color_interpolate_loc[2];
422bf215546Sopenharmony_ci   ubyte colors_read; /**< which color components are read by the FS */
423bf215546Sopenharmony_ci   ubyte colors_written;
424bf215546Sopenharmony_ci   uint16_t output_color_types; /**< Each bit pair is enum si_color_output_type */
425bf215546Sopenharmony_ci   bool vs_needs_prolog;
426bf215546Sopenharmony_ci   bool color0_writes_all_cbufs; /**< gl_FragColor */
427bf215546Sopenharmony_ci   bool reads_samplemask;   /**< does fragment shader read sample mask? */
428bf215546Sopenharmony_ci   bool reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */
429bf215546Sopenharmony_ci   bool writes_z;           /**< does fragment shader write Z value? */
430bf215546Sopenharmony_ci   bool writes_stencil;     /**< does fragment shader write stencil value? */
431bf215546Sopenharmony_ci   bool writes_samplemask;  /**< does fragment shader write sample mask? */
432bf215546Sopenharmony_ci   bool writes_edgeflag;    /**< vertex shader outputs edgeflag */
433bf215546Sopenharmony_ci   bool uses_interp_color;
434bf215546Sopenharmony_ci   bool uses_persp_center_color;
435bf215546Sopenharmony_ci   bool uses_persp_centroid_color;
436bf215546Sopenharmony_ci   bool uses_persp_sample_color;
437bf215546Sopenharmony_ci   bool uses_persp_center;
438bf215546Sopenharmony_ci   bool uses_persp_centroid;
439bf215546Sopenharmony_ci   bool uses_persp_sample;
440bf215546Sopenharmony_ci   bool uses_linear_center;
441bf215546Sopenharmony_ci   bool uses_linear_centroid;
442bf215546Sopenharmony_ci   bool uses_linear_sample;
443bf215546Sopenharmony_ci   bool uses_interp_at_sample;
444bf215546Sopenharmony_ci   bool uses_instanceid;
445bf215546Sopenharmony_ci   bool uses_base_vertex;
446bf215546Sopenharmony_ci   bool uses_base_instance;
447bf215546Sopenharmony_ci   bool uses_drawid;
448bf215546Sopenharmony_ci   bool uses_primid;
449bf215546Sopenharmony_ci   bool uses_frontface;
450bf215546Sopenharmony_ci   bool uses_invocationid;
451bf215546Sopenharmony_ci   bool uses_thread_id[3];
452bf215546Sopenharmony_ci   bool uses_block_id[3];
453bf215546Sopenharmony_ci   bool uses_variable_block_size;
454bf215546Sopenharmony_ci   bool uses_grid_size;
455bf215546Sopenharmony_ci   bool uses_subgroup_info;
456bf215546Sopenharmony_ci   bool writes_position;
457bf215546Sopenharmony_ci   bool writes_psize;
458bf215546Sopenharmony_ci   bool writes_clipvertex;
459bf215546Sopenharmony_ci   bool writes_primid;
460bf215546Sopenharmony_ci   bool writes_viewport_index;
461bf215546Sopenharmony_ci   bool writes_layer;
462bf215546Sopenharmony_ci   bool uses_bindless_samplers;
463bf215546Sopenharmony_ci   bool uses_bindless_images;
464bf215546Sopenharmony_ci   bool uses_indirect_descriptor;
465bf215546Sopenharmony_ci   bool has_divergent_loop;
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   bool uses_vmem_sampler_or_bvh;
468bf215546Sopenharmony_ci   bool uses_vmem_load_other; /* all other VMEM loads and atomics with return */
469bf215546Sopenharmony_ci
470bf215546Sopenharmony_ci   /** Whether all codepaths write tess factors in all invocations. */
471bf215546Sopenharmony_ci   bool tessfactors_are_def_in_all_invocs;
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci   /* A flag to check if vrs2x2 can be enabled to reduce number of
474bf215546Sopenharmony_ci    * fragment shader invocations if flat shading.
475bf215546Sopenharmony_ci    */
476bf215546Sopenharmony_ci   bool allow_flat_shading;
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci   /* Optimization: if the texture bound to this texunit has been cleared to 1,
479bf215546Sopenharmony_ci    * then the draw can be skipped (see si_draw_vbo_skip_noop). Initially the
480bf215546Sopenharmony_ci    * value is 0xff (undetermined) and can be later changed to 0 (= false) or
481bf215546Sopenharmony_ci    * texunit + 1.
482bf215546Sopenharmony_ci    */
483bf215546Sopenharmony_ci   uint8_t writes_1_if_tex_is_1;
484bf215546Sopenharmony_ci};
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci/* A shader selector is a gallium CSO and contains shader variants and
487bf215546Sopenharmony_ci * binaries for one NIR program. This can be shared by multiple contexts.
488bf215546Sopenharmony_ci */
489bf215546Sopenharmony_cistruct si_shader_selector {
490bf215546Sopenharmony_ci   struct util_live_shader base;
491bf215546Sopenharmony_ci   struct si_screen *screen;
492bf215546Sopenharmony_ci   struct util_queue_fence ready;
493bf215546Sopenharmony_ci   struct si_compiler_ctx_state compiler_ctx_state;
494bf215546Sopenharmony_ci   gl_shader_stage stage;
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci   simple_mtx_t mutex;
497bf215546Sopenharmony_ci   union si_shader_key *keys;
498bf215546Sopenharmony_ci   unsigned variants_count;
499bf215546Sopenharmony_ci   unsigned variants_max_count;
500bf215546Sopenharmony_ci   struct si_shader **variants;
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci   /* The compiled NIR shader without a prolog and/or epilog (not
503bf215546Sopenharmony_ci    * uploaded to a buffer object).
504bf215546Sopenharmony_ci    */
505bf215546Sopenharmony_ci   struct si_shader *main_shader_part;
506bf215546Sopenharmony_ci   struct si_shader *main_shader_part_ls;     /* as_ls is set in the key */
507bf215546Sopenharmony_ci   struct si_shader *main_shader_part_es;     /* as_es is set in the key */
508bf215546Sopenharmony_ci   struct si_shader *main_shader_part_ngg;    /* as_ngg is set in the key */
509bf215546Sopenharmony_ci   struct si_shader *main_shader_part_ngg_es; /* for Wave32 TES before legacy GS */
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci   struct nir_shader *nir;
512bf215546Sopenharmony_ci   void *nir_binary;
513bf215546Sopenharmony_ci   unsigned nir_size;
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci   struct si_shader_info info;
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_ci   enum pipe_shader_type pipe_shader_type;
518bf215546Sopenharmony_ci   ubyte const_and_shader_buf_descriptors_index;
519bf215546Sopenharmony_ci   ubyte sampler_and_images_descriptors_index;
520bf215546Sopenharmony_ci   ubyte cs_shaderbufs_sgpr_index;
521bf215546Sopenharmony_ci   ubyte cs_num_shaderbufs_in_user_sgprs;
522bf215546Sopenharmony_ci   ubyte cs_images_sgpr_index;
523bf215546Sopenharmony_ci   ubyte cs_images_num_sgprs;
524bf215546Sopenharmony_ci   ubyte cs_num_images_in_user_sgprs;
525bf215546Sopenharmony_ci   unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
526bf215546Sopenharmony_ci   enum pipe_prim_type rast_prim;
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci   /* GS parameters. */
529bf215546Sopenharmony_ci   bool tess_turns_off_ngg;
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci   /* bitmasks of used descriptor slots */
532bf215546Sopenharmony_ci   uint64_t active_const_and_shader_buffers;
533bf215546Sopenharmony_ci   uint64_t active_samplers_and_images;
534bf215546Sopenharmony_ci};
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci/* Valid shader configurations:
537bf215546Sopenharmony_ci *
538bf215546Sopenharmony_ci * API shaders           VS | TCS | TES | GS |pass| PS
539bf215546Sopenharmony_ci * are compiled as:         |     |     |    |thru|
540bf215546Sopenharmony_ci *                          |     |     |    |    |
541bf215546Sopenharmony_ci * Only VS & PS:         VS |     |     |    |    | PS
542bf215546Sopenharmony_ci * GFX6     - with GS:   ES |     |     | GS | VS | PS
543bf215546Sopenharmony_ci *          - with tess: LS | HS  | VS  |    |    | PS
544bf215546Sopenharmony_ci *          - with both: LS | HS  | ES  | GS | VS | PS
545bf215546Sopenharmony_ci * GFX9     - with GS:   -> |     |     | GS | VS | PS
546bf215546Sopenharmony_ci *          - with tess: -> | HS  | VS  |    |    | PS
547bf215546Sopenharmony_ci *          - with both: -> | HS  | ->  | GS | VS | PS
548bf215546Sopenharmony_ci *                          |     |     |    |    |
549bf215546Sopenharmony_ci * NGG      - VS & PS:   GS |     |     |    |    | PS
550bf215546Sopenharmony_ci * (GFX10+) - with GS:   -> |     |     | GS |    | PS
551bf215546Sopenharmony_ci *          - with tess: -> | HS  | GS  |    |    | PS
552bf215546Sopenharmony_ci *          - with both: -> | HS  | ->  | GS |    | PS
553bf215546Sopenharmony_ci *
554bf215546Sopenharmony_ci * -> = merged with the next stage
555bf215546Sopenharmony_ci */
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci/* Use the byte alignment for all following structure members for optimal
558bf215546Sopenharmony_ci * shader key memory footprint.
559bf215546Sopenharmony_ci */
560bf215546Sopenharmony_ci#pragma pack(push, 1)
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci/* Common VS bits between the shader key and the prolog key. */
563bf215546Sopenharmony_cistruct si_vs_prolog_bits {
564bf215546Sopenharmony_ci   /* - If neither "is_one" nor "is_fetched" has a bit set, the instance
565bf215546Sopenharmony_ci    *   divisor is 0.
566bf215546Sopenharmony_ci    * - If "is_one" has a bit set, the instance divisor is 1.
567bf215546Sopenharmony_ci    * - If "is_fetched" has a bit set, the instance divisor will be loaded
568bf215546Sopenharmony_ci    *   from the constant buffer.
569bf215546Sopenharmony_ci    */
570bf215546Sopenharmony_ci   uint16_t instance_divisor_is_one;     /* bitmask of inputs */
571bf215546Sopenharmony_ci   uint16_t instance_divisor_is_fetched; /* bitmask of inputs */
572bf215546Sopenharmony_ci   unsigned ls_vgpr_fix : 1;
573bf215546Sopenharmony_ci};
574bf215546Sopenharmony_ci
575bf215546Sopenharmony_ci/* Common TCS bits between the shader key and the epilog key. */
576bf215546Sopenharmony_cistruct si_tcs_epilog_bits {
577bf215546Sopenharmony_ci   unsigned prim_mode : 3;
578bf215546Sopenharmony_ci   unsigned invoc0_tess_factors_are_def : 1;
579bf215546Sopenharmony_ci   unsigned tes_reads_tess_factors : 1;
580bf215546Sopenharmony_ci};
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci/* Common PS bits between the shader key and the prolog key. */
583bf215546Sopenharmony_cistruct si_ps_prolog_bits {
584bf215546Sopenharmony_ci   unsigned color_two_side : 1;
585bf215546Sopenharmony_ci   unsigned flatshade_colors : 1;
586bf215546Sopenharmony_ci   unsigned poly_stipple : 1;
587bf215546Sopenharmony_ci   unsigned force_persp_sample_interp : 1;
588bf215546Sopenharmony_ci   unsigned force_linear_sample_interp : 1;
589bf215546Sopenharmony_ci   unsigned force_persp_center_interp : 1;
590bf215546Sopenharmony_ci   unsigned force_linear_center_interp : 1;
591bf215546Sopenharmony_ci   unsigned bc_optimize_for_persp : 1;
592bf215546Sopenharmony_ci   unsigned bc_optimize_for_linear : 1;
593bf215546Sopenharmony_ci   unsigned samplemask_log_ps_iter : 3;
594bf215546Sopenharmony_ci};
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci/* Common PS bits between the shader key and the epilog key. */
597bf215546Sopenharmony_cistruct si_ps_epilog_bits {
598bf215546Sopenharmony_ci   unsigned spi_shader_col_format;
599bf215546Sopenharmony_ci   unsigned color_is_int8 : 8;
600bf215546Sopenharmony_ci   unsigned color_is_int10 : 8;
601bf215546Sopenharmony_ci   unsigned last_cbuf : 3;
602bf215546Sopenharmony_ci   unsigned alpha_func : 3;
603bf215546Sopenharmony_ci   unsigned alpha_to_one : 1;
604bf215546Sopenharmony_ci   unsigned alpha_to_coverage_via_mrtz : 1;  /* gfx11+ */
605bf215546Sopenharmony_ci   unsigned clamp_color : 1;
606bf215546Sopenharmony_ci   unsigned dual_src_blend_swizzle : 1;      /* gfx11+ */
607bf215546Sopenharmony_ci};
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ciunion si_shader_part_key {
610bf215546Sopenharmony_ci   struct {
611bf215546Sopenharmony_ci      struct si_vs_prolog_bits states;
612bf215546Sopenharmony_ci      unsigned wave32 : 1;
613bf215546Sopenharmony_ci      unsigned num_input_sgprs : 6;
614bf215546Sopenharmony_ci      /* For merged stages such as LS-HS, HS input VGPRs are first. */
615bf215546Sopenharmony_ci      unsigned num_merged_next_stage_vgprs : 3;
616bf215546Sopenharmony_ci      unsigned num_inputs : 5;
617bf215546Sopenharmony_ci      unsigned as_ls : 1;
618bf215546Sopenharmony_ci      unsigned as_es : 1;
619bf215546Sopenharmony_ci      unsigned as_ngg : 1;
620bf215546Sopenharmony_ci      unsigned load_vgprs_after_culling : 1;
621bf215546Sopenharmony_ci      /* Prologs for monolithic shaders shouldn't set EXEC. */
622bf215546Sopenharmony_ci      unsigned is_monolithic : 1;
623bf215546Sopenharmony_ci   } vs_prolog;
624bf215546Sopenharmony_ci   struct {
625bf215546Sopenharmony_ci      struct si_tcs_epilog_bits states;
626bf215546Sopenharmony_ci      unsigned wave32 : 1;
627bf215546Sopenharmony_ci      unsigned noop_s_barrier : 1;
628bf215546Sopenharmony_ci   } tcs_epilog;
629bf215546Sopenharmony_ci   struct {
630bf215546Sopenharmony_ci      struct si_ps_prolog_bits states;
631bf215546Sopenharmony_ci      unsigned wave32 : 1;
632bf215546Sopenharmony_ci      unsigned num_input_sgprs : 6;
633bf215546Sopenharmony_ci      unsigned num_input_vgprs : 5;
634bf215546Sopenharmony_ci      /* Color interpolation and two-side color selection. */
635bf215546Sopenharmony_ci      unsigned colors_read : 8;       /* color input components read */
636bf215546Sopenharmony_ci      unsigned num_interp_inputs : 5; /* BCOLOR is at this location */
637bf215546Sopenharmony_ci      unsigned face_vgpr_index : 5;
638bf215546Sopenharmony_ci      unsigned ancillary_vgpr_index : 5;
639bf215546Sopenharmony_ci      unsigned sample_coverage_vgpr_index : 5;
640bf215546Sopenharmony_ci      unsigned wqm : 1;
641bf215546Sopenharmony_ci      char color_attr_index[2];
642bf215546Sopenharmony_ci      signed char color_interp_vgpr_index[2]; /* -1 == constant */
643bf215546Sopenharmony_ci   } ps_prolog;
644bf215546Sopenharmony_ci   struct {
645bf215546Sopenharmony_ci      struct si_ps_epilog_bits states;
646bf215546Sopenharmony_ci      unsigned wave32 : 1;
647bf215546Sopenharmony_ci      unsigned uses_discard : 1;
648bf215546Sopenharmony_ci      unsigned colors_written : 8;
649bf215546Sopenharmony_ci      unsigned color_types : 16;
650bf215546Sopenharmony_ci      unsigned writes_z : 1;
651bf215546Sopenharmony_ci      unsigned writes_stencil : 1;
652bf215546Sopenharmony_ci      unsigned writes_samplemask : 1;
653bf215546Sopenharmony_ci   } ps_epilog;
654bf215546Sopenharmony_ci};
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci/* The shader key for geometry stages (VS, TCS, TES, GS) */
657bf215546Sopenharmony_cistruct si_shader_key_ge {
658bf215546Sopenharmony_ci   /* Prolog and epilog flags. */
659bf215546Sopenharmony_ci   union {
660bf215546Sopenharmony_ci      struct {
661bf215546Sopenharmony_ci         struct si_vs_prolog_bits prolog;
662bf215546Sopenharmony_ci      } vs;
663bf215546Sopenharmony_ci      struct {
664bf215546Sopenharmony_ci         struct si_vs_prolog_bits ls_prolog; /* for merged LS-HS */
665bf215546Sopenharmony_ci         struct si_shader_selector *ls;      /* for merged LS-HS */
666bf215546Sopenharmony_ci         struct si_tcs_epilog_bits epilog;
667bf215546Sopenharmony_ci      } tcs; /* tessellation control shader */
668bf215546Sopenharmony_ci      struct {
669bf215546Sopenharmony_ci         struct si_vs_prolog_bits vs_prolog; /* for merged ES-GS */
670bf215546Sopenharmony_ci         struct si_shader_selector *es;      /* for merged ES-GS */
671bf215546Sopenharmony_ci      } gs;
672bf215546Sopenharmony_ci   } part;
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_ci   /* These three are initially set according to the NEXT_SHADER property,
675bf215546Sopenharmony_ci    * or guessed if the property doesn't seem correct.
676bf215546Sopenharmony_ci    */
677bf215546Sopenharmony_ci   unsigned as_es : 1;  /* whether it's a shader before GS */
678bf215546Sopenharmony_ci   unsigned as_ls : 1;  /* whether it's VS before TCS */
679bf215546Sopenharmony_ci   unsigned as_ngg : 1; /* whether it's the last GE stage and NGG is enabled,
680bf215546Sopenharmony_ci                           also set for the stage right before GS */
681bf215546Sopenharmony_ci
682bf215546Sopenharmony_ci   /* Flags for monolithic compilation only. */
683bf215546Sopenharmony_ci   struct {
684bf215546Sopenharmony_ci      /* Whether fetch should be opencoded according to vs_fix_fetch.
685bf215546Sopenharmony_ci       * Otherwise, if vs_fix_fetch is non-zero, buffer_load_format_xyzw
686bf215546Sopenharmony_ci       * with minimal fixups is used. */
687bf215546Sopenharmony_ci      uint16_t vs_fetch_opencode;
688bf215546Sopenharmony_ci      union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS];
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci      union {
691bf215546Sopenharmony_ci         /* When PS needs PrimID and GS is disabled. */
692bf215546Sopenharmony_ci         unsigned vs_export_prim_id : 1;    /* VS and TES only */
693bf215546Sopenharmony_ci         unsigned gs_tri_strip_adj_fix : 1; /* GS only */
694bf215546Sopenharmony_ci      } u;
695bf215546Sopenharmony_ci   } mono;
696bf215546Sopenharmony_ci
697bf215546Sopenharmony_ci   /* Optimization flags for asynchronous compilation only. */
698bf215546Sopenharmony_ci   struct {
699bf215546Sopenharmony_ci      /* For HW VS (it can be VS, TES, GS) */
700bf215546Sopenharmony_ci      uint64_t kill_outputs; /* "get_unique_index" bits */
701bf215546Sopenharmony_ci      unsigned kill_clip_distances : 8;
702bf215546Sopenharmony_ci      unsigned kill_pointsize : 1;
703bf215546Sopenharmony_ci      unsigned remove_streamout : 1;
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci      /* For NGG VS and TES. */
706bf215546Sopenharmony_ci      unsigned ngg_culling : 13; /* SI_NGG_CULL_* */
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_ci      /* For shaders where monolithic variants have better code.
709bf215546Sopenharmony_ci       *
710bf215546Sopenharmony_ci       * This is a flag that has no effect on code generation,
711bf215546Sopenharmony_ci       * but forces monolithic shaders to be used as soon as
712bf215546Sopenharmony_ci       * possible, because it's in the "opt" group.
713bf215546Sopenharmony_ci       */
714bf215546Sopenharmony_ci      unsigned prefer_mono : 1;
715bf215546Sopenharmony_ci
716bf215546Sopenharmony_ci      /* VS and TCS have the same number of patch vertices. */
717bf215546Sopenharmony_ci      unsigned same_patch_vertices:1;
718bf215546Sopenharmony_ci
719bf215546Sopenharmony_ci      unsigned inline_uniforms:1;
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci      /* This must be kept last to limit the number of variants
722bf215546Sopenharmony_ci       * depending only on the uniform values.
723bf215546Sopenharmony_ci       */
724bf215546Sopenharmony_ci      uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS];
725bf215546Sopenharmony_ci   } opt;
726bf215546Sopenharmony_ci};
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_cistruct si_shader_key_ps {
729bf215546Sopenharmony_ci   struct {
730bf215546Sopenharmony_ci      /* Prolog and epilog flags. */
731bf215546Sopenharmony_ci      struct si_ps_prolog_bits prolog;
732bf215546Sopenharmony_ci      struct si_ps_epilog_bits epilog;
733bf215546Sopenharmony_ci   } part;
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci   /* Flags for monolithic compilation only. */
736bf215546Sopenharmony_ci   struct {
737bf215546Sopenharmony_ci      unsigned poly_line_smoothing : 1;
738bf215546Sopenharmony_ci      unsigned point_smoothing : 1;
739bf215546Sopenharmony_ci      unsigned interpolate_at_sample_force_center : 1;
740bf215546Sopenharmony_ci      unsigned fbfetch_msaa : 1;
741bf215546Sopenharmony_ci      unsigned fbfetch_is_1D : 1;
742bf215546Sopenharmony_ci      unsigned fbfetch_layered : 1;
743bf215546Sopenharmony_ci   } mono;
744bf215546Sopenharmony_ci
745bf215546Sopenharmony_ci   /* Optimization flags for asynchronous compilation only. */
746bf215546Sopenharmony_ci   struct {
747bf215546Sopenharmony_ci      /* For shaders where monolithic variants have better code.
748bf215546Sopenharmony_ci       *
749bf215546Sopenharmony_ci       * This is a flag that has no effect on code generation,
750bf215546Sopenharmony_ci       * but forces monolithic shaders to be used as soon as
751bf215546Sopenharmony_ci       * possible, because it's in the "opt" group.
752bf215546Sopenharmony_ci       */
753bf215546Sopenharmony_ci      unsigned prefer_mono : 1;
754bf215546Sopenharmony_ci      unsigned inline_uniforms:1;
755bf215546Sopenharmony_ci
756bf215546Sopenharmony_ci      /* This must be kept last to limit the number of variants
757bf215546Sopenharmony_ci       * depending only on the uniform values.
758bf215546Sopenharmony_ci       */
759bf215546Sopenharmony_ci      uint32_t inlined_uniform_values[MAX_INLINABLE_UNIFORMS];
760bf215546Sopenharmony_ci   } opt;
761bf215546Sopenharmony_ci};
762bf215546Sopenharmony_ci
763bf215546Sopenharmony_ciunion si_shader_key {
764bf215546Sopenharmony_ci   struct si_shader_key_ge ge; /* geometry engine shaders */
765bf215546Sopenharmony_ci   struct si_shader_key_ps ps;
766bf215546Sopenharmony_ci};
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci/* Restore the pack alignment to default. */
769bf215546Sopenharmony_ci#pragma pack(pop)
770bf215546Sopenharmony_ci
771bf215546Sopenharmony_ci/* GCN-specific shader info. */
772bf215546Sopenharmony_cistruct si_shader_binary_info {
773bf215546Sopenharmony_ci   ubyte vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS];
774bf215546Sopenharmony_ci   uint64_t vs_output_param_mask; /* which params to export, indexed by "base" */
775bf215546Sopenharmony_ci   uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS];
776bf215546Sopenharmony_ci   ubyte num_input_sgprs;
777bf215546Sopenharmony_ci   ubyte num_input_vgprs;
778bf215546Sopenharmony_ci   bool uses_vmem_load_other; /* all other VMEM loads and atomics with return */
779bf215546Sopenharmony_ci   bool uses_vmem_sampler_or_bvh;
780bf215546Sopenharmony_ci   signed char face_vgpr_index;
781bf215546Sopenharmony_ci   signed char ancillary_vgpr_index;
782bf215546Sopenharmony_ci   signed char sample_coverage_vgpr_index;
783bf215546Sopenharmony_ci   bool uses_instanceid;
784bf215546Sopenharmony_ci   ubyte nr_pos_exports;
785bf215546Sopenharmony_ci   ubyte nr_param_exports;
786bf215546Sopenharmony_ci   unsigned private_mem_vgprs;
787bf215546Sopenharmony_ci   unsigned max_simd_waves;
788bf215546Sopenharmony_ci};
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_cistruct si_shader_binary {
791bf215546Sopenharmony_ci   const char *elf_buffer;
792bf215546Sopenharmony_ci   size_t elf_size;
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci   char *uploaded_code;
795bf215546Sopenharmony_ci   size_t uploaded_code_size;
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_ci   char *llvm_ir_string;
798bf215546Sopenharmony_ci};
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_cistruct gfx9_gs_info {
801bf215546Sopenharmony_ci   unsigned es_verts_per_subgroup;
802bf215546Sopenharmony_ci   unsigned gs_prims_per_subgroup;
803bf215546Sopenharmony_ci   unsigned gs_inst_prims_in_subgroup;
804bf215546Sopenharmony_ci   unsigned max_prims_per_subgroup;
805bf215546Sopenharmony_ci   unsigned esgs_ring_size; /* in bytes */
806bf215546Sopenharmony_ci};
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci#define SI_NUM_VGT_STAGES_KEY_BITS 8
809bf215546Sopenharmony_ci#define SI_NUM_VGT_STAGES_STATES   (1 << SI_NUM_VGT_STAGES_KEY_BITS)
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci/* The VGT_SHADER_STAGES key used to index the table of precomputed values.
812bf215546Sopenharmony_ci * Some fields are set by state-change calls, most are set by draw_vbo.
813bf215546Sopenharmony_ci */
814bf215546Sopenharmony_ciunion si_vgt_stages_key {
815bf215546Sopenharmony_ci   struct {
816bf215546Sopenharmony_ci#if UTIL_ARCH_LITTLE_ENDIAN
817bf215546Sopenharmony_ci      uint8_t tess : 1;
818bf215546Sopenharmony_ci      uint8_t gs : 1;
819bf215546Sopenharmony_ci      uint8_t ngg_passthrough : 1;
820bf215546Sopenharmony_ci      uint8_t ngg : 1;       /* gfx10+ */
821bf215546Sopenharmony_ci      uint8_t streamout : 1; /* only used with NGG */
822bf215546Sopenharmony_ci      uint8_t hs_wave32 : 1;
823bf215546Sopenharmony_ci      uint8_t gs_wave32 : 1;
824bf215546Sopenharmony_ci      uint8_t vs_wave32 : 1;
825bf215546Sopenharmony_ci#else /* UTIL_ARCH_BIG_ENDIAN */
826bf215546Sopenharmony_ci      uint8_t vs_wave32 : 1;
827bf215546Sopenharmony_ci      uint8_t gs_wave32 : 1;
828bf215546Sopenharmony_ci      uint8_t hs_wave32 : 1;
829bf215546Sopenharmony_ci      uint8_t streamout : 1;
830bf215546Sopenharmony_ci      uint8_t ngg : 1;
831bf215546Sopenharmony_ci      uint8_t ngg_passthrough : 1;
832bf215546Sopenharmony_ci      uint8_t gs : 1;
833bf215546Sopenharmony_ci      uint8_t tess : 1;
834bf215546Sopenharmony_ci#endif
835bf215546Sopenharmony_ci   } u;
836bf215546Sopenharmony_ci   uint8_t index;
837bf215546Sopenharmony_ci};
838bf215546Sopenharmony_ci
839bf215546Sopenharmony_cistruct si_shader {
840bf215546Sopenharmony_ci   struct si_pm4_state pm4; /* base class */
841bf215546Sopenharmony_ci   struct si_compiler_ctx_state compiler_ctx_state;
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci   struct si_shader_selector *selector;
844bf215546Sopenharmony_ci   struct si_shader_selector *previous_stage_sel; /* for refcounting */
845bf215546Sopenharmony_ci
846bf215546Sopenharmony_ci   struct si_shader_part *prolog;
847bf215546Sopenharmony_ci   struct si_shader *previous_stage; /* for GFX9 */
848bf215546Sopenharmony_ci   struct si_shader_part *epilog;
849bf215546Sopenharmony_ci   struct si_shader *gs_copy_shader;
850bf215546Sopenharmony_ci
851bf215546Sopenharmony_ci   struct si_resource *bo;
852bf215546Sopenharmony_ci   struct si_resource *scratch_bo;
853bf215546Sopenharmony_ci   union si_shader_key key;
854bf215546Sopenharmony_ci   struct util_queue_fence ready;
855bf215546Sopenharmony_ci   bool compilation_failed;
856bf215546Sopenharmony_ci   bool is_monolithic;
857bf215546Sopenharmony_ci   bool is_optimized;
858bf215546Sopenharmony_ci   bool is_binary_shared;
859bf215546Sopenharmony_ci   bool is_gs_copy_shader;
860bf215546Sopenharmony_ci   uint8_t wave_size;
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   /* The following data is all that's needed for binary shaders. */
863bf215546Sopenharmony_ci   struct si_shader_binary binary;
864bf215546Sopenharmony_ci   struct ac_shader_config config;
865bf215546Sopenharmony_ci   struct si_shader_binary_info info;
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci   /* SI_SGPR_VS_STATE_BITS */
868bf215546Sopenharmony_ci   bool uses_vs_state_provoking_vertex;
869bf215546Sopenharmony_ci   bool uses_gs_state_outprim;
870bf215546Sopenharmony_ci
871bf215546Sopenharmony_ci   bool uses_base_instance;
872bf215546Sopenharmony_ci
873bf215546Sopenharmony_ci   struct {
874bf215546Sopenharmony_ci      uint16_t ngg_emit_size; /* in dwords */
875bf215546Sopenharmony_ci      uint16_t hw_max_esverts;
876bf215546Sopenharmony_ci      uint16_t max_gsprims;
877bf215546Sopenharmony_ci      uint16_t max_out_verts;
878bf215546Sopenharmony_ci      uint16_t prim_amp_factor;
879bf215546Sopenharmony_ci      bool max_vert_out_per_gs_instance;
880bf215546Sopenharmony_ci   } ngg;
881bf215546Sopenharmony_ci
882bf215546Sopenharmony_ci   /* Shader key + LLVM IR + disassembly + statistics.
883bf215546Sopenharmony_ci    * Generated for debug contexts only.
884bf215546Sopenharmony_ci    */
885bf215546Sopenharmony_ci   char *shader_log;
886bf215546Sopenharmony_ci   size_t shader_log_size;
887bf215546Sopenharmony_ci
888bf215546Sopenharmony_ci   struct gfx9_gs_info gs_info;
889bf215546Sopenharmony_ci
890bf215546Sopenharmony_ci   /* For save precompute context registers values. */
891bf215546Sopenharmony_ci   union {
892bf215546Sopenharmony_ci      struct {
893bf215546Sopenharmony_ci         unsigned vgt_gsvs_ring_offset_1;
894bf215546Sopenharmony_ci         unsigned vgt_gsvs_ring_offset_2;
895bf215546Sopenharmony_ci         unsigned vgt_gsvs_ring_offset_3;
896bf215546Sopenharmony_ci         unsigned vgt_gsvs_ring_itemsize;
897bf215546Sopenharmony_ci         unsigned vgt_gs_max_vert_out;
898bf215546Sopenharmony_ci         unsigned vgt_gs_vert_itemsize;
899bf215546Sopenharmony_ci         unsigned vgt_gs_vert_itemsize_1;
900bf215546Sopenharmony_ci         unsigned vgt_gs_vert_itemsize_2;
901bf215546Sopenharmony_ci         unsigned vgt_gs_vert_itemsize_3;
902bf215546Sopenharmony_ci         unsigned vgt_gs_instance_cnt;
903bf215546Sopenharmony_ci         unsigned vgt_gs_onchip_cntl;
904bf215546Sopenharmony_ci         unsigned vgt_gs_max_prims_per_subgroup;
905bf215546Sopenharmony_ci         unsigned vgt_esgs_ring_itemsize;
906bf215546Sopenharmony_ci         unsigned spi_shader_pgm_rsrc3_gs;
907bf215546Sopenharmony_ci         unsigned spi_shader_pgm_rsrc4_gs;
908bf215546Sopenharmony_ci      } gs;
909bf215546Sopenharmony_ci
910bf215546Sopenharmony_ci      struct {
911bf215546Sopenharmony_ci         unsigned ge_max_output_per_subgroup;
912bf215546Sopenharmony_ci         unsigned ge_ngg_subgrp_cntl;
913bf215546Sopenharmony_ci         unsigned vgt_primitiveid_en;
914bf215546Sopenharmony_ci         unsigned vgt_gs_onchip_cntl;
915bf215546Sopenharmony_ci         unsigned vgt_gs_instance_cnt;
916bf215546Sopenharmony_ci         unsigned vgt_esgs_ring_itemsize;
917bf215546Sopenharmony_ci         unsigned spi_vs_out_config;
918bf215546Sopenharmony_ci         unsigned spi_shader_idx_format;
919bf215546Sopenharmony_ci         unsigned spi_shader_pos_format;
920bf215546Sopenharmony_ci         unsigned pa_cl_vte_cntl;
921bf215546Sopenharmony_ci         unsigned pa_cl_ngg_cntl;
922bf215546Sopenharmony_ci         unsigned vgt_gs_max_vert_out; /* for API GS */
923bf215546Sopenharmony_ci         unsigned ge_pc_alloc;         /* uconfig register */
924bf215546Sopenharmony_ci         unsigned spi_shader_pgm_rsrc3_gs;
925bf215546Sopenharmony_ci         unsigned spi_shader_pgm_rsrc4_gs;
926bf215546Sopenharmony_ci         union si_vgt_stages_key vgt_stages;
927bf215546Sopenharmony_ci      } ngg;
928bf215546Sopenharmony_ci
929bf215546Sopenharmony_ci      struct {
930bf215546Sopenharmony_ci         unsigned vgt_gs_mode;
931bf215546Sopenharmony_ci         unsigned vgt_primitiveid_en;
932bf215546Sopenharmony_ci         unsigned vgt_reuse_off;
933bf215546Sopenharmony_ci         unsigned spi_vs_out_config;
934bf215546Sopenharmony_ci         unsigned spi_shader_pos_format;
935bf215546Sopenharmony_ci         unsigned pa_cl_vte_cntl;
936bf215546Sopenharmony_ci         unsigned ge_pc_alloc; /* uconfig register */
937bf215546Sopenharmony_ci      } vs;
938bf215546Sopenharmony_ci
939bf215546Sopenharmony_ci      struct {
940bf215546Sopenharmony_ci         unsigned spi_ps_input_ena;
941bf215546Sopenharmony_ci         unsigned spi_ps_input_addr;
942bf215546Sopenharmony_ci         unsigned spi_baryc_cntl;
943bf215546Sopenharmony_ci         unsigned spi_ps_in_control;
944bf215546Sopenharmony_ci         unsigned spi_shader_z_format;
945bf215546Sopenharmony_ci         unsigned spi_shader_col_format;
946bf215546Sopenharmony_ci         unsigned cb_shader_mask;
947bf215546Sopenharmony_ci         unsigned db_shader_control;
948bf215546Sopenharmony_ci         unsigned num_interp;
949bf215546Sopenharmony_ci      } ps;
950bf215546Sopenharmony_ci   } ctx_reg;
951bf215546Sopenharmony_ci
952bf215546Sopenharmony_ci   /*For save precompute registers value */
953bf215546Sopenharmony_ci   unsigned vgt_tf_param;                /* VGT_TF_PARAM */
954bf215546Sopenharmony_ci   unsigned vgt_vertex_reuse_block_cntl; /* VGT_VERTEX_REUSE_BLOCK_CNTL */
955bf215546Sopenharmony_ci   unsigned pa_cl_vs_out_cntl;
956bf215546Sopenharmony_ci   unsigned ge_cntl;
957bf215546Sopenharmony_ci};
958bf215546Sopenharmony_ci
959bf215546Sopenharmony_cistruct si_shader_part {
960bf215546Sopenharmony_ci   struct si_shader_part *next;
961bf215546Sopenharmony_ci   union si_shader_part_key key;
962bf215546Sopenharmony_ci   struct si_shader_binary binary;
963bf215546Sopenharmony_ci   struct ac_shader_config config;
964bf215546Sopenharmony_ci};
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_ci/* si_shader.c */
967bf215546Sopenharmony_civoid si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir);
968bf215546Sopenharmony_cibool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
969bf215546Sopenharmony_ci                       struct si_shader *shader, struct util_debug_callback *debug);
970bf215546Sopenharmony_cibool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
971bf215546Sopenharmony_ci                              struct si_shader *shader, struct util_debug_callback *debug);
972bf215546Sopenharmony_civoid si_shader_destroy(struct si_shader *shader);
973bf215546Sopenharmony_ciunsigned si_shader_io_get_unique_index_patch(unsigned semantic);
974bf215546Sopenharmony_ciunsigned si_shader_io_get_unique_index(unsigned semantic, bool is_varying);
975bf215546Sopenharmony_cibool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader,
976bf215546Sopenharmony_ci                             uint64_t scratch_va);
977bf215546Sopenharmony_civoid si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
978bf215546Sopenharmony_ci                    struct util_debug_callback *debug, FILE *f, bool check_debug_option);
979bf215546Sopenharmony_civoid si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shader *shader,
980bf215546Sopenharmony_ci                                        struct util_debug_callback *debug);
981bf215546Sopenharmony_civoid si_multiwave_lds_size_workaround(struct si_screen *sscreen, unsigned *lds_size);
982bf215546Sopenharmony_ciconst char *si_get_shader_name(const struct si_shader *shader);
983bf215546Sopenharmony_civoid si_shader_binary_clean(struct si_shader_binary *binary);
984bf215546Sopenharmony_cistruct nir_shader *si_deserialize_shader(struct si_shader_selector *sel);
985bf215546Sopenharmony_ciunsigned si_get_ps_num_interp(struct si_shader *ps);
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_ci/* si_shader_info.c */
988bf215546Sopenharmony_civoid si_nir_scan_shader(struct si_screen *sscreen,  const struct nir_shader *nir,
989bf215546Sopenharmony_ci                        struct si_shader_info *info);
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ci/* si_shader_llvm_gs.c */
992bf215546Sopenharmony_cistruct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
993bf215546Sopenharmony_ci                                             struct ac_llvm_compiler *compiler,
994bf215546Sopenharmony_ci                                             struct si_shader_selector *gs_selector,
995bf215546Sopenharmony_ci                                             const struct pipe_stream_output_info *so,
996bf215546Sopenharmony_ci                                             struct util_debug_callback *debug);
997bf215546Sopenharmony_ci
998bf215546Sopenharmony_ci/* si_shader_nir.c */
999bf215546Sopenharmony_civoid si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first);
1000bf215546Sopenharmony_civoid si_nir_late_opts(nir_shader *nir);
1001bf215546Sopenharmony_cichar *si_finalize_nir(struct pipe_screen *screen, void *nirptr);
1002bf215546Sopenharmony_ci
1003bf215546Sopenharmony_ci/* si_state_shaders.cpp */
1004bf215546Sopenharmony_ciunsigned si_determine_wave_size(struct si_screen *sscreen, struct si_shader *shader);
1005bf215546Sopenharmony_civoid gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *gs,
1006bf215546Sopenharmony_ci                      struct gfx9_gs_info *out);
1007bf215546Sopenharmony_cibool gfx10_is_ngg_passthrough(struct si_shader *shader);
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci/* Inline helpers. */
1010bf215546Sopenharmony_ci
1011bf215546Sopenharmony_ci/* Return the pointer to the main shader part's pointer. */
1012bf215546Sopenharmony_cistatic inline struct si_shader **si_get_main_shader_part(struct si_shader_selector *sel,
1013bf215546Sopenharmony_ci                                                         const union si_shader_key *key)
1014bf215546Sopenharmony_ci{
1015bf215546Sopenharmony_ci   if (sel->stage <= MESA_SHADER_GEOMETRY) {
1016bf215546Sopenharmony_ci      if (key->ge.as_ls)
1017bf215546Sopenharmony_ci         return &sel->main_shader_part_ls;
1018bf215546Sopenharmony_ci      if (key->ge.as_es && key->ge.as_ngg)
1019bf215546Sopenharmony_ci         return &sel->main_shader_part_ngg_es;
1020bf215546Sopenharmony_ci      if (key->ge.as_es)
1021bf215546Sopenharmony_ci         return &sel->main_shader_part_es;
1022bf215546Sopenharmony_ci      if (key->ge.as_ngg)
1023bf215546Sopenharmony_ci         return &sel->main_shader_part_ngg;
1024bf215546Sopenharmony_ci   }
1025bf215546Sopenharmony_ci   return &sel->main_shader_part;
1026bf215546Sopenharmony_ci}
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_cistatic inline bool si_shader_uses_bindless_samplers(struct si_shader_selector *selector)
1029bf215546Sopenharmony_ci{
1030bf215546Sopenharmony_ci   return selector ? selector->info.uses_bindless_samplers : false;
1031bf215546Sopenharmony_ci}
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_cistatic inline bool si_shader_uses_bindless_images(struct si_shader_selector *selector)
1034bf215546Sopenharmony_ci{
1035bf215546Sopenharmony_ci   return selector ? selector->info.uses_bindless_images : false;
1036bf215546Sopenharmony_ci}
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_cistatic inline bool gfx10_edgeflags_have_effect(struct si_shader *shader)
1039bf215546Sopenharmony_ci{
1040bf215546Sopenharmony_ci   if (shader->selector->stage == MESA_SHADER_VERTEX &&
1041bf215546Sopenharmony_ci       !shader->selector->info.base.vs.blit_sgprs_amd &&
1042bf215546Sopenharmony_ci       !(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES))
1043bf215546Sopenharmony_ci      return true;
1044bf215546Sopenharmony_ci
1045bf215546Sopenharmony_ci   return false;
1046bf215546Sopenharmony_ci}
1047bf215546Sopenharmony_ci
1048bf215546Sopenharmony_cistatic inline bool gfx10_ngg_writes_user_edgeflags(struct si_shader *shader)
1049bf215546Sopenharmony_ci{
1050bf215546Sopenharmony_ci   return gfx10_edgeflags_have_effect(shader) &&
1051bf215546Sopenharmony_ci          shader->selector->info.writes_edgeflag;
1052bf215546Sopenharmony_ci}
1053bf215546Sopenharmony_ci
1054bf215546Sopenharmony_cistatic inline bool si_shader_uses_streamout(struct si_shader *shader)
1055bf215546Sopenharmony_ci{
1056bf215546Sopenharmony_ci   return shader->selector->stage <= MESA_SHADER_GEOMETRY &&
1057bf215546Sopenharmony_ci          shader->selector->info.enabled_streamout_buffer_mask &&
1058bf215546Sopenharmony_ci          !shader->key.ge.opt.remove_streamout;
1059bf215546Sopenharmony_ci}
1060bf215546Sopenharmony_ci
1061bf215546Sopenharmony_cistatic inline bool si_shader_uses_discard(struct si_shader *shader)
1062bf215546Sopenharmony_ci{
1063bf215546Sopenharmony_ci   /* Changes to this should also update ps_modifies_zs. */
1064bf215546Sopenharmony_ci   return shader->selector->info.base.fs.uses_discard ||
1065bf215546Sopenharmony_ci          shader->key.ps.part.prolog.poly_stipple ||
1066bf215546Sopenharmony_ci          shader->key.ps.mono.point_smoothing ||
1067bf215546Sopenharmony_ci          shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS;
1068bf215546Sopenharmony_ci}
1069bf215546Sopenharmony_ci
1070bf215546Sopenharmony_ci#ifdef __cplusplus
1071bf215546Sopenharmony_ci}
1072bf215546Sopenharmony_ci#endif
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci#endif
1075