1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Rob Clark <robclark@freedesktop.org>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#ifndef IR3_COMPILER_H_
28bf215546Sopenharmony_ci#define IR3_COMPILER_H_
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "compiler/nir/nir.h"
31bf215546Sopenharmony_ci#include "util/disk_cache.h"
32bf215546Sopenharmony_ci#include "util/log.h"
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci#include "freedreno_dev_info.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include "ir3.h"
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_cistruct ir3_ra_reg_set;
39bf215546Sopenharmony_cistruct ir3_shader;
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_cistruct ir3_compiler {
42bf215546Sopenharmony_ci   struct fd_device *dev;
43bf215546Sopenharmony_ci   const struct fd_dev_id *dev_id;
44bf215546Sopenharmony_ci   uint8_t gen;
45bf215546Sopenharmony_ci   uint32_t shader_count;
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci   struct disk_cache *disk_cache;
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci   struct nir_shader_compiler_options nir_options;
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   bool robust_buffer_access2;
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   /*
54bf215546Sopenharmony_ci    * Configuration options for things that are handled differently on
55bf215546Sopenharmony_ci    * different generations:
56bf215546Sopenharmony_ci    */
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci   /* a4xx (and later) drops SP_FS_FLAT_SHAD_MODE_REG_* for flat-interpolate
59bf215546Sopenharmony_ci    * so we need to use ldlv.u32 to load the varying directly:
60bf215546Sopenharmony_ci    */
61bf215546Sopenharmony_ci   bool flat_bypass;
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci   /* on a3xx, we need to add one to # of array levels:
64bf215546Sopenharmony_ci    */
65bf215546Sopenharmony_ci   bool levels_add_one;
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   /* on a3xx, we need to scale up integer coords for isaml based
68bf215546Sopenharmony_ci    * on LoD:
69bf215546Sopenharmony_ci    */
70bf215546Sopenharmony_ci   bool unminify_coords;
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   /* on a3xx do txf_ms w/ isaml and scaled coords: */
73bf215546Sopenharmony_ci   bool txf_ms_with_isaml;
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   /* on a4xx, for array textures we need to add 0.5 to the array
76bf215546Sopenharmony_ci    * index coordinate:
77bf215546Sopenharmony_ci    */
78bf215546Sopenharmony_ci   bool array_index_add_half;
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   /* on a6xx, rewrite samgp to sequence of samgq0-3 in vertex shaders:
81bf215546Sopenharmony_ci    */
82bf215546Sopenharmony_ci   bool samgq_workaround;
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci   /* on a650, vertex shader <-> tess control io uses LDL/STL */
85bf215546Sopenharmony_ci   bool tess_use_shared;
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci   /* The maximum number of constants, in vec4's, across the entire graphics
88bf215546Sopenharmony_ci    * pipeline.
89bf215546Sopenharmony_ci    */
90bf215546Sopenharmony_ci   uint16_t max_const_pipeline;
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci   /* The maximum number of constants, in vec4's, for VS+HS+DS+GS. */
93bf215546Sopenharmony_ci   uint16_t max_const_geom;
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   /* The maximum number of constants, in vec4's, for FS. */
96bf215546Sopenharmony_ci   uint16_t max_const_frag;
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   /* A "safe" max constlen that can be applied to each shader in the
99bf215546Sopenharmony_ci    * pipeline which we guarantee will never exceed any combined limits.
100bf215546Sopenharmony_ci    */
101bf215546Sopenharmony_ci   uint16_t max_const_safe;
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   /* The maximum number of constants, in vec4's, for compute shaders. */
104bf215546Sopenharmony_ci   uint16_t max_const_compute;
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   /* Number of instructions that the shader's base address and length
107bf215546Sopenharmony_ci    * (instrlen divides instruction count by this) must be aligned to.
108bf215546Sopenharmony_ci    */
109bf215546Sopenharmony_ci   uint32_t instr_align;
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci   /* on a3xx, the unit of indirect const load is higher than later gens (in
112bf215546Sopenharmony_ci    * vec4 units):
113bf215546Sopenharmony_ci    */
114bf215546Sopenharmony_ci   uint32_t const_upload_unit;
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci   /* The base number of threads per wave. Some stages may be able to double
117bf215546Sopenharmony_ci    * this.
118bf215546Sopenharmony_ci    */
119bf215546Sopenharmony_ci   uint32_t threadsize_base;
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci   /* On at least a6xx, waves are always launched in pairs. In calculations
122bf215546Sopenharmony_ci    * about occupancy, we pretend that each wave pair is actually one wave,
123bf215546Sopenharmony_ci    * which simplifies many of the calculations, but means we have to
124bf215546Sopenharmony_ci    * multiply threadsize_base by this number.
125bf215546Sopenharmony_ci    */
126bf215546Sopenharmony_ci   uint32_t wave_granularity;
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci   /* The maximum number of simultaneous waves per core. */
129bf215546Sopenharmony_ci   uint32_t max_waves;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   /* This is theoretical maximum number of vec4 registers that one wave of
132bf215546Sopenharmony_ci    * the base threadsize could use. To get the actual size of the register
133bf215546Sopenharmony_ci    * file in bytes one would need to compute:
134bf215546Sopenharmony_ci    *
135bf215546Sopenharmony_ci    * reg_size_vec4 * threadsize_base * wave_granularity * 16 (bytes per vec4)
136bf215546Sopenharmony_ci    *
137bf215546Sopenharmony_ci    * However this number is more often what we actually need. For example, a
138bf215546Sopenharmony_ci    * max_reg more than half of this will result in a doubled threadsize
139bf215546Sopenharmony_ci    * being impossible (because double-sized waves take up twice as many
140bf215546Sopenharmony_ci    * registers). Also, the formula for the occupancy given a particular
141bf215546Sopenharmony_ci    * register footprint is simpler.
142bf215546Sopenharmony_ci    *
143bf215546Sopenharmony_ci    * It is in vec4 units because the register file is allocated
144bf215546Sopenharmony_ci    * with vec4 granularity, so it's in the same units as max_reg.
145bf215546Sopenharmony_ci    */
146bf215546Sopenharmony_ci   uint32_t reg_size_vec4;
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   /* The size of local memory in bytes */
149bf215546Sopenharmony_ci   uint32_t local_mem_size;
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci   /* The number of total branch stack entries, divided by wave_granularity. */
152bf215546Sopenharmony_ci   uint32_t branchstack_size;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   /* Whether clip+cull distances are supported */
155bf215546Sopenharmony_ci   bool has_clip_cull;
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   /* Whether private memory is supported */
158bf215546Sopenharmony_ci   bool has_pvtmem;
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   /* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */
161bf215546Sopenharmony_ci   bool storage_16bit;
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   /* True if getfiberid, getlast.w8, brcst.active, and quad_shuffle
164bf215546Sopenharmony_ci    * instructions are supported which are necessary to support
165bf215546Sopenharmony_ci    * subgroup quad and arithmetic operations.
166bf215546Sopenharmony_ci    */
167bf215546Sopenharmony_ci   bool has_getfiberid;
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci   /* MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB */
170bf215546Sopenharmony_ci   uint32_t max_variable_workgroup_size;
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   bool has_dp2acc;
173bf215546Sopenharmony_ci   bool has_dp4acc;
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   /* Type to use for 1b nir bools: */
176bf215546Sopenharmony_ci   type_t bool_type;
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci   /* Whether compute invocation params are passed in via shared regfile or
179bf215546Sopenharmony_ci    * constbuf. a5xx+ has the shared regfile.
180bf215546Sopenharmony_ci    */
181bf215546Sopenharmony_ci   bool has_shared_regfile;
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci   /* True if preamble instructions (shps, shpe, etc.) are supported */
184bf215546Sopenharmony_ci   bool has_preamble;
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci   bool push_ubo_with_preamble;
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci   /* Where the shared consts start in constants file, in vec4's. */
189bf215546Sopenharmony_ci   uint16_t shared_consts_base_offset;
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci   /* The size of shared consts for CS and FS(in vec4's).
192bf215546Sopenharmony_ci    * Also the size that is actually used on geometry stages (on a6xx).
193bf215546Sopenharmony_ci    */
194bf215546Sopenharmony_ci   uint64_t shared_consts_size;
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   /* Found on a6xx for geometry stages, that is different from
197bf215546Sopenharmony_ci    * actually used shared consts.
198bf215546Sopenharmony_ci    *
199bf215546Sopenharmony_ci    * TODO: Keep an eye on this for next gens.
200bf215546Sopenharmony_ci    */
201bf215546Sopenharmony_ci   uint64_t geom_shared_consts_size_quirk;
202bf215546Sopenharmony_ci};
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_cistruct ir3_compiler_options {
205bf215546Sopenharmony_ci   /* If true, UBO/SSBO accesses are assumed to be bounds-checked as defined by
206bf215546Sopenharmony_ci    * VK_EXT_robustness2 and optimizations may have to be more conservative.
207bf215546Sopenharmony_ci    */
208bf215546Sopenharmony_ci   bool robust_buffer_access2;
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   /* If true, promote UBOs (except for constant data) to constants using ldc.k
211bf215546Sopenharmony_ci    * in the preamble. The driver should ignore everything in ubo_state except
212bf215546Sopenharmony_ci    * for the constant data UBO, which is excluded because the command pushing
213bf215546Sopenharmony_ci    * constants for it can be pre-baked when compiling the shader.
214bf215546Sopenharmony_ci    */
215bf215546Sopenharmony_ci   bool push_ubo_with_preamble;
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   /* If true, disable the shader cache. The driver is then responsible for
218bf215546Sopenharmony_ci    * caching.
219bf215546Sopenharmony_ci    */
220bf215546Sopenharmony_ci   bool disable_cache;
221bf215546Sopenharmony_ci};
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_civoid ir3_compiler_destroy(struct ir3_compiler *compiler);
224bf215546Sopenharmony_cistruct ir3_compiler *ir3_compiler_create(struct fd_device *dev,
225bf215546Sopenharmony_ci                                         const struct fd_dev_id *dev_id,
226bf215546Sopenharmony_ci                                         const struct ir3_compiler_options *options);
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_civoid ir3_disk_cache_init(struct ir3_compiler *compiler);
229bf215546Sopenharmony_civoid ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
230bf215546Sopenharmony_ci                                    struct ir3_shader *shader);
231bf215546Sopenharmony_cistruct ir3_shader_variant *ir3_retrieve_variant(struct blob_reader *blob,
232bf215546Sopenharmony_ci                                                struct ir3_compiler *compiler,
233bf215546Sopenharmony_ci                                                void *mem_ctx);
234bf215546Sopenharmony_civoid ir3_store_variant(struct blob *blob, struct ir3_shader_variant *v);
235bf215546Sopenharmony_cibool ir3_disk_cache_retrieve(struct ir3_shader *shader,
236bf215546Sopenharmony_ci                             struct ir3_shader_variant *v);
237bf215546Sopenharmony_civoid ir3_disk_cache_store(struct ir3_shader *shader,
238bf215546Sopenharmony_ci                          struct ir3_shader_variant *v);
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ciconst nir_shader_compiler_options *
241bf215546Sopenharmony_ciir3_get_compiler_options(struct ir3_compiler *compiler);
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ciint ir3_compile_shader_nir(struct ir3_compiler *compiler,
244bf215546Sopenharmony_ci                           struct ir3_shader *shader,
245bf215546Sopenharmony_ci                           struct ir3_shader_variant *so);
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci/* gpu pointer size in units of 32bit registers/slots */
248bf215546Sopenharmony_cistatic inline unsigned
249bf215546Sopenharmony_ciir3_pointer_size(struct ir3_compiler *compiler)
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   return fd_dev_64b(compiler->dev_id) ? 2 : 1;
252bf215546Sopenharmony_ci}
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_cienum ir3_shader_debug {
255bf215546Sopenharmony_ci   IR3_DBG_SHADER_VS = BITFIELD_BIT(0),
256bf215546Sopenharmony_ci   IR3_DBG_SHADER_TCS = BITFIELD_BIT(1),
257bf215546Sopenharmony_ci   IR3_DBG_SHADER_TES = BITFIELD_BIT(2),
258bf215546Sopenharmony_ci   IR3_DBG_SHADER_GS = BITFIELD_BIT(3),
259bf215546Sopenharmony_ci   IR3_DBG_SHADER_FS = BITFIELD_BIT(4),
260bf215546Sopenharmony_ci   IR3_DBG_SHADER_CS = BITFIELD_BIT(5),
261bf215546Sopenharmony_ci   IR3_DBG_DISASM = BITFIELD_BIT(6),
262bf215546Sopenharmony_ci   IR3_DBG_OPTMSGS = BITFIELD_BIT(7),
263bf215546Sopenharmony_ci   IR3_DBG_FORCES2EN = BITFIELD_BIT(8),
264bf215546Sopenharmony_ci   IR3_DBG_NOUBOOPT = BITFIELD_BIT(9),
265bf215546Sopenharmony_ci   IR3_DBG_NOFP16 = BITFIELD_BIT(10),
266bf215546Sopenharmony_ci   IR3_DBG_NOCACHE = BITFIELD_BIT(11),
267bf215546Sopenharmony_ci   IR3_DBG_SPILLALL = BITFIELD_BIT(12),
268bf215546Sopenharmony_ci   IR3_DBG_NOPREAMBLE = BITFIELD_BIT(13),
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci   /* DEBUG-only options: */
271bf215546Sopenharmony_ci   IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20),
272bf215546Sopenharmony_ci   IR3_DBG_RAMSGS = BITFIELD_BIT(21),
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci   /* Only used for the disk-caching logic: */
275bf215546Sopenharmony_ci   IR3_DBG_ROBUST_UBO_ACCESS = BITFIELD_BIT(30),
276bf215546Sopenharmony_ci};
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ciextern enum ir3_shader_debug ir3_shader_debug;
279bf215546Sopenharmony_ciextern const char *ir3_shader_override_path;
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_cistatic inline bool
282bf215546Sopenharmony_cishader_debug_enabled(gl_shader_stage type)
283bf215546Sopenharmony_ci{
284bf215546Sopenharmony_ci   if (ir3_shader_debug & IR3_DBG_DISASM)
285bf215546Sopenharmony_ci      return true;
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci   switch (type) {
288bf215546Sopenharmony_ci   case MESA_SHADER_VERTEX:
289bf215546Sopenharmony_ci      return !!(ir3_shader_debug & IR3_DBG_SHADER_VS);
290bf215546Sopenharmony_ci   case MESA_SHADER_TESS_CTRL:
291bf215546Sopenharmony_ci      return !!(ir3_shader_debug & IR3_DBG_SHADER_TCS);
292bf215546Sopenharmony_ci   case MESA_SHADER_TESS_EVAL:
293bf215546Sopenharmony_ci      return !!(ir3_shader_debug & IR3_DBG_SHADER_TES);
294bf215546Sopenharmony_ci   case MESA_SHADER_GEOMETRY:
295bf215546Sopenharmony_ci      return !!(ir3_shader_debug & IR3_DBG_SHADER_GS);
296bf215546Sopenharmony_ci   case MESA_SHADER_FRAGMENT:
297bf215546Sopenharmony_ci      return !!(ir3_shader_debug & IR3_DBG_SHADER_FS);
298bf215546Sopenharmony_ci   case MESA_SHADER_COMPUTE:
299bf215546Sopenharmony_ci   case MESA_SHADER_KERNEL:
300bf215546Sopenharmony_ci      return !!(ir3_shader_debug & IR3_DBG_SHADER_CS);
301bf215546Sopenharmony_ci   default:
302bf215546Sopenharmony_ci      assert(0);
303bf215546Sopenharmony_ci      return false;
304bf215546Sopenharmony_ci   }
305bf215546Sopenharmony_ci}
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_cistatic inline void
308bf215546Sopenharmony_ciir3_debug_print(struct ir3 *ir, const char *when)
309bf215546Sopenharmony_ci{
310bf215546Sopenharmony_ci   if (ir3_shader_debug & IR3_DBG_OPTMSGS) {
311bf215546Sopenharmony_ci      mesa_logi("%s:", when);
312bf215546Sopenharmony_ci      ir3_print(ir);
313bf215546Sopenharmony_ci   }
314bf215546Sopenharmony_ci}
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci#endif /* IR3_COMPILER_H_ */
317