1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora, Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#ifndef __PAN_IR_H
25bf215546Sopenharmony_ci#define __PAN_IR_H
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include <stdint.h>
28bf215546Sopenharmony_ci#include "compiler/nir/nir.h"
29bf215546Sopenharmony_ci#include "util/u_dynarray.h"
30bf215546Sopenharmony_ci#include "util/hash_table.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci/* On Valhall, the driver gives the hardware a table of resource tables.
33bf215546Sopenharmony_ci * Resources are addressed as the index of the table together with the index of
34bf215546Sopenharmony_ci * the resource within the table. For simplicity, we put one type of resource
35bf215546Sopenharmony_ci * in each table and fix the numbering of the tables.
36bf215546Sopenharmony_ci *
37bf215546Sopenharmony_ci * This numbering is arbitrary. It is a software ABI between the
38bf215546Sopenharmony_ci * Gallium driver and the Valhall compiler.
39bf215546Sopenharmony_ci */
40bf215546Sopenharmony_cienum pan_resource_table {
41bf215546Sopenharmony_ci        PAN_TABLE_UBO = 0,
42bf215546Sopenharmony_ci        PAN_TABLE_ATTRIBUTE,
43bf215546Sopenharmony_ci        PAN_TABLE_ATTRIBUTE_BUFFER,
44bf215546Sopenharmony_ci        PAN_TABLE_SAMPLER,
45bf215546Sopenharmony_ci        PAN_TABLE_TEXTURE,
46bf215546Sopenharmony_ci        PAN_TABLE_IMAGE,
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci        PAN_NUM_RESOURCE_TABLES
49bf215546Sopenharmony_ci};
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci/* Indices for named (non-XFB) varyings that are present. These are packed
52bf215546Sopenharmony_ci * tightly so they correspond to a bitfield present (P) indexed by (1 <<
53bf215546Sopenharmony_ci * PAN_VARY_*). This has the nice property that you can lookup the buffer index
54bf215546Sopenharmony_ci * of a given special field given a shift S by:
55bf215546Sopenharmony_ci *
56bf215546Sopenharmony_ci *      idx = popcount(P & ((1 << S) - 1))
57bf215546Sopenharmony_ci *
58bf215546Sopenharmony_ci * That is... look at all of the varyings that come earlier and count them, the
59bf215546Sopenharmony_ci * count is the new index since plus one. Likewise, the total number of special
60bf215546Sopenharmony_ci * buffers required is simply popcount(P)
61bf215546Sopenharmony_ci */
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_cienum pan_special_varying {
64bf215546Sopenharmony_ci        PAN_VARY_GENERAL = 0,
65bf215546Sopenharmony_ci        PAN_VARY_POSITION = 1,
66bf215546Sopenharmony_ci        PAN_VARY_PSIZ = 2,
67bf215546Sopenharmony_ci        PAN_VARY_PNTCOORD = 3,
68bf215546Sopenharmony_ci        PAN_VARY_FACE = 4,
69bf215546Sopenharmony_ci        PAN_VARY_FRAGCOORD = 5,
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci        /* Keep last */
72bf215546Sopenharmony_ci        PAN_VARY_MAX,
73bf215546Sopenharmony_ci};
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci/* Maximum number of attribute descriptors required for varyings. These include
76bf215546Sopenharmony_ci * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL
77bf215546Sopenharmony_ci * special varying */
78bf215546Sopenharmony_ci#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1)
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci/* Define the general compiler entry point */
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci#define MAX_SYSVAL_COUNT 32
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci/* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
85bf215546Sopenharmony_ci * their class for equal comparison */
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci#define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
88bf215546Sopenharmony_ci#define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)
89bf215546Sopenharmony_ci#define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci/* Define some common types. We start at one for easy indexing of hash
92bf215546Sopenharmony_ci * tables internal to the compiler */
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_cienum {
95bf215546Sopenharmony_ci        PAN_SYSVAL_VIEWPORT_SCALE = 1,
96bf215546Sopenharmony_ci        PAN_SYSVAL_VIEWPORT_OFFSET = 2,
97bf215546Sopenharmony_ci        PAN_SYSVAL_TEXTURE_SIZE = 3,
98bf215546Sopenharmony_ci        PAN_SYSVAL_SSBO = 4,
99bf215546Sopenharmony_ci        PAN_SYSVAL_NUM_WORK_GROUPS = 5,
100bf215546Sopenharmony_ci        PAN_SYSVAL_SAMPLER = 7,
101bf215546Sopenharmony_ci        PAN_SYSVAL_LOCAL_GROUP_SIZE = 8,
102bf215546Sopenharmony_ci        PAN_SYSVAL_WORK_DIM = 9,
103bf215546Sopenharmony_ci        PAN_SYSVAL_IMAGE_SIZE = 10,
104bf215546Sopenharmony_ci        PAN_SYSVAL_SAMPLE_POSITIONS = 11,
105bf215546Sopenharmony_ci        PAN_SYSVAL_MULTISAMPLED = 12,
106bf215546Sopenharmony_ci        PAN_SYSVAL_RT_CONVERSION = 13,
107bf215546Sopenharmony_ci        PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS = 14,
108bf215546Sopenharmony_ci        PAN_SYSVAL_DRAWID = 15,
109bf215546Sopenharmony_ci        PAN_SYSVAL_BLEND_CONSTANTS = 16,
110bf215546Sopenharmony_ci        PAN_SYSVAL_XFB = 17,
111bf215546Sopenharmony_ci        PAN_SYSVAL_NUM_VERTICES = 18,
112bf215546Sopenharmony_ci};
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array)          \
115bf215546Sopenharmony_ci	((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci#define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id)        ((id) & 0x7f)
118bf215546Sopenharmony_ci#define PAN_SYSVAL_ID_TO_TXS_DIM(id)            (((id) >> 7) & 0x3)
119bf215546Sopenharmony_ci#define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id)       !!((id) & (1 << 9))
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be
122bf215546Sopenharmony_ci * consistent with the blob so we can compare traces easier. */
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_cienum {
125bf215546Sopenharmony_ci        PAN_VERTEX_ID   = 16,
126bf215546Sopenharmony_ci        PAN_INSTANCE_ID = 17,
127bf215546Sopenharmony_ci        PAN_MAX_ATTRIBUTE
128bf215546Sopenharmony_ci};
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_cistruct panfrost_sysvals {
131bf215546Sopenharmony_ci        /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
132bf215546Sopenharmony_ci        unsigned sysvals[MAX_SYSVAL_COUNT];
133bf215546Sopenharmony_ci        unsigned sysval_count;
134bf215546Sopenharmony_ci};
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci/* Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each.
137bf215546Sopenharmony_ci * In practice, the maximum number of FAU slots is limited by implementation.
138bf215546Sopenharmony_ci * All known Bifrost and Valhall devices limit to 64 FAU slots. Therefore the
139bf215546Sopenharmony_ci * maximum number of 32-bit words is 128, since there are 2 words per FAU slot.
140bf215546Sopenharmony_ci *
141bf215546Sopenharmony_ci * Midgard can push at most 92 words, so this bound suffices. The Midgard
142bf215546Sopenharmony_ci * compiler pushes less than this, as Midgard uses register-mapped uniforms
143bf215546Sopenharmony_ci * instead of FAU, preventing large numbers of uniforms to be pushed for
144bf215546Sopenharmony_ci * nontrivial programs.
145bf215546Sopenharmony_ci */
146bf215546Sopenharmony_ci#define PAN_MAX_PUSH 128
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so
149bf215546Sopenharmony_ci * an offset to a word must be < 2^16. There are less than 2^8 UBOs */
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_cistruct panfrost_ubo_word {
152bf215546Sopenharmony_ci        uint16_t ubo;
153bf215546Sopenharmony_ci        uint16_t offset;
154bf215546Sopenharmony_ci};
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_cistruct panfrost_ubo_push {
157bf215546Sopenharmony_ci        unsigned count;
158bf215546Sopenharmony_ci        struct panfrost_ubo_word words[PAN_MAX_PUSH];
159bf215546Sopenharmony_ci};
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci/* Helper for searching the above. Note this is O(N) to the number of pushed
162bf215546Sopenharmony_ci * constants, do not run in the draw call hot path */
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ciunsigned
165bf215546Sopenharmony_cipan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs);
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_cistruct hash_table_u64 *
168bf215546Sopenharmony_cipanfrost_init_sysvals(struct panfrost_sysvals *sysvals,
169bf215546Sopenharmony_ci                      struct panfrost_sysvals *fixed_sysvals,
170bf215546Sopenharmony_ci                      void *memctx);
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ciunsigned
173bf215546Sopenharmony_cipan_lookup_sysval(struct hash_table_u64 *sysval_to_id,
174bf215546Sopenharmony_ci                  struct panfrost_sysvals *sysvals,
175bf215546Sopenharmony_ci                  int sysval);
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ciint
178bf215546Sopenharmony_cipanfrost_sysval_for_instr(nir_instr *instr, nir_dest *dest);
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_cistruct panfrost_compile_inputs {
181bf215546Sopenharmony_ci        unsigned gpu_id;
182bf215546Sopenharmony_ci        bool is_blend, is_blit;
183bf215546Sopenharmony_ci        struct {
184bf215546Sopenharmony_ci                unsigned rt;
185bf215546Sopenharmony_ci                unsigned nr_samples;
186bf215546Sopenharmony_ci                uint64_t bifrost_blend_desc;
187bf215546Sopenharmony_ci        } blend;
188bf215546Sopenharmony_ci        int fixed_sysval_ubo;
189bf215546Sopenharmony_ci        struct panfrost_sysvals *fixed_sysval_layout;
190bf215546Sopenharmony_ci        bool shaderdb;
191bf215546Sopenharmony_ci        bool no_idvs;
192bf215546Sopenharmony_ci        bool no_ubo_to_push;
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci        enum pipe_format rt_formats[8];
195bf215546Sopenharmony_ci        uint8_t raw_fmt_mask;
196bf215546Sopenharmony_ci        unsigned nr_cbufs;
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci        /* Used on Valhall.
199bf215546Sopenharmony_ci         *
200bf215546Sopenharmony_ci         * Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0)
201bf215546Sopenharmony_ci         * written by the previous stage (fragment shader) or written by this
202bf215546Sopenharmony_ci         * stage (vertex shader). Bits are slots from gl_varying_slot.
203bf215546Sopenharmony_ci         *
204bf215546Sopenharmony_ci         * For modern APIs (GLES or VK), this should be 0.
205bf215546Sopenharmony_ci         */
206bf215546Sopenharmony_ci        uint32_t fixed_varying_mask;
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci        union {
209bf215546Sopenharmony_ci                struct {
210bf215546Sopenharmony_ci                        bool static_rt_conv;
211bf215546Sopenharmony_ci                        uint32_t rt_conv[8];
212bf215546Sopenharmony_ci                } bifrost;
213bf215546Sopenharmony_ci        };
214bf215546Sopenharmony_ci};
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_cistruct pan_shader_varying {
217bf215546Sopenharmony_ci        gl_varying_slot location;
218bf215546Sopenharmony_ci        enum pipe_format format;
219bf215546Sopenharmony_ci};
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_cistruct bifrost_shader_blend_info {
222bf215546Sopenharmony_ci        nir_alu_type type;
223bf215546Sopenharmony_ci        uint32_t return_offset;
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci        /* mali_bifrost_register_file_format corresponding to nir_alu_type */
226bf215546Sopenharmony_ci        unsigned format;
227bf215546Sopenharmony_ci};
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci/*
230bf215546Sopenharmony_ci * Unpacked form of a v7 message preload descriptor, produced by the compiler's
231bf215546Sopenharmony_ci * message preload optimization. By splitting out this struct, the compiler does
232bf215546Sopenharmony_ci * not need to know about data structure packing, avoiding a dependency on
233bf215546Sopenharmony_ci * GenXML.
234bf215546Sopenharmony_ci */
235bf215546Sopenharmony_cistruct bifrost_message_preload {
236bf215546Sopenharmony_ci        /* Whether to preload this message */
237bf215546Sopenharmony_ci        bool enabled;
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci        /* Varying to load from */
240bf215546Sopenharmony_ci        unsigned varying_index;
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci        /* Register type, FP32 otherwise */
243bf215546Sopenharmony_ci        bool fp16;
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci        /* Number of components, ignored if texturing */
246bf215546Sopenharmony_ci        unsigned num_components;
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci        /* If texture is set, performs a texture instruction according to
249bf215546Sopenharmony_ci         * texture_index, skip, and zero_lod. If texture is unset, only the
250bf215546Sopenharmony_ci         * varying load is performed.
251bf215546Sopenharmony_ci         */
252bf215546Sopenharmony_ci        bool texture, skip, zero_lod;
253bf215546Sopenharmony_ci        unsigned texture_index;
254bf215546Sopenharmony_ci};
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_cistruct bifrost_shader_info {
257bf215546Sopenharmony_ci        struct bifrost_shader_blend_info blend[8];
258bf215546Sopenharmony_ci        nir_alu_type blend_src1_type;
259bf215546Sopenharmony_ci        bool wait_6, wait_7;
260bf215546Sopenharmony_ci        struct bifrost_message_preload messages[2];
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci        /* Whether any flat varyings are loaded. This may disable optimizations
263bf215546Sopenharmony_ci         * that change the provoking vertex, since that would load incorrect
264bf215546Sopenharmony_ci         * values for flat varyings.
265bf215546Sopenharmony_ci         */
266bf215546Sopenharmony_ci        bool uses_flat_shading;
267bf215546Sopenharmony_ci};
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_cistruct midgard_shader_info {
270bf215546Sopenharmony_ci        unsigned first_tag;
271bf215546Sopenharmony_ci};
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_cistruct pan_shader_info {
274bf215546Sopenharmony_ci        gl_shader_stage stage;
275bf215546Sopenharmony_ci        unsigned work_reg_count;
276bf215546Sopenharmony_ci        unsigned tls_size;
277bf215546Sopenharmony_ci        unsigned wls_size;
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci        /* Bit mask of preloaded registers */
280bf215546Sopenharmony_ci        uint64_t preload;
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci        union {
283bf215546Sopenharmony_ci                struct {
284bf215546Sopenharmony_ci                        bool reads_frag_coord;
285bf215546Sopenharmony_ci                        bool reads_point_coord;
286bf215546Sopenharmony_ci                        bool reads_face;
287bf215546Sopenharmony_ci                        bool can_discard;
288bf215546Sopenharmony_ci                        bool writes_depth;
289bf215546Sopenharmony_ci                        bool writes_stencil;
290bf215546Sopenharmony_ci                        bool writes_coverage;
291bf215546Sopenharmony_ci                        bool sidefx;
292bf215546Sopenharmony_ci                        bool sample_shading;
293bf215546Sopenharmony_ci                        bool early_fragment_tests;
294bf215546Sopenharmony_ci                        bool can_early_z, can_fpk;
295bf215546Sopenharmony_ci                        BITSET_WORD outputs_read;
296bf215546Sopenharmony_ci                        BITSET_WORD outputs_written;
297bf215546Sopenharmony_ci                } fs;
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci                struct {
300bf215546Sopenharmony_ci                        bool writes_point_size;
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci                        /* If the primary shader writes point size, the Valhall
303bf215546Sopenharmony_ci                         * driver may need a variant that does not write point
304bf215546Sopenharmony_ci                         * size. Offset to such a shader in the program binary.
305bf215546Sopenharmony_ci                         *
306bf215546Sopenharmony_ci                         * Zero if no such variant is required.
307bf215546Sopenharmony_ci                         *
308bf215546Sopenharmony_ci                         * Only used with IDVS on Valhall.
309bf215546Sopenharmony_ci                         */
310bf215546Sopenharmony_ci                        unsigned no_psiz_offset;
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci                        /* Set if Index-Driven Vertex Shading is in use */
313bf215546Sopenharmony_ci                        bool idvs;
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci                        /* If IDVS is used, whether a varying shader is used */
316bf215546Sopenharmony_ci                        bool secondary_enable;
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci                        /* If a varying shader is used, the varying shader's
319bf215546Sopenharmony_ci                         * offset in the program binary
320bf215546Sopenharmony_ci                         */
321bf215546Sopenharmony_ci                        unsigned secondary_offset;
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci                        /* If IDVS is in use, number of work registers used by
324bf215546Sopenharmony_ci                         * the varying shader
325bf215546Sopenharmony_ci                         */
326bf215546Sopenharmony_ci                        unsigned secondary_work_reg_count;
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci                        /* If IDVS is in use, bit mask of preloaded registers
329bf215546Sopenharmony_ci                         * used by the varying shader
330bf215546Sopenharmony_ci                         */
331bf215546Sopenharmony_ci                        uint64_t secondary_preload;
332bf215546Sopenharmony_ci                } vs;
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci                struct {
335bf215546Sopenharmony_ci                        /* Is it legal to merge workgroups? This is true if the
336bf215546Sopenharmony_ci                         * shader uses neither barriers nor shared memory.
337bf215546Sopenharmony_ci                         *
338bf215546Sopenharmony_ci                         * Used by the Valhall hardware.
339bf215546Sopenharmony_ci                         */
340bf215546Sopenharmony_ci                        bool allow_merging_workgroups;
341bf215546Sopenharmony_ci                } cs;
342bf215546Sopenharmony_ci        };
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci        /* Does the shader contains a barrier? or (for fragment shaders) does it
345bf215546Sopenharmony_ci         * require helper invocations, which demand the same ordering guarantees
346bf215546Sopenharmony_ci         * of the hardware? These notions are unified in the hardware, so we
347bf215546Sopenharmony_ci         * unify them here as well.
348bf215546Sopenharmony_ci         */
349bf215546Sopenharmony_ci        bool contains_barrier;
350bf215546Sopenharmony_ci        bool separable;
351bf215546Sopenharmony_ci        bool writes_global;
352bf215546Sopenharmony_ci        uint64_t outputs_written;
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci        unsigned sampler_count;
355bf215546Sopenharmony_ci        unsigned texture_count;
356bf215546Sopenharmony_ci        unsigned ubo_count;
357bf215546Sopenharmony_ci        unsigned attributes_read_count;
358bf215546Sopenharmony_ci        unsigned attribute_count;
359bf215546Sopenharmony_ci        unsigned attributes_read;
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci        struct {
362bf215546Sopenharmony_ci                unsigned input_count;
363bf215546Sopenharmony_ci                struct pan_shader_varying input[PAN_MAX_VARYINGS];
364bf215546Sopenharmony_ci                unsigned output_count;
365bf215546Sopenharmony_ci                struct pan_shader_varying output[PAN_MAX_VARYINGS];
366bf215546Sopenharmony_ci        } varyings;
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci        struct panfrost_sysvals sysvals;
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci        /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access
371bf215546Sopenharmony_ci         * Uniforms (Bifrost) */
372bf215546Sopenharmony_ci        struct panfrost_ubo_push push;
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci        uint32_t ubo_mask;
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci        union {
377bf215546Sopenharmony_ci                struct bifrost_shader_info bifrost;
378bf215546Sopenharmony_ci                struct midgard_shader_info midgard;
379bf215546Sopenharmony_ci        };
380bf215546Sopenharmony_ci};
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_citypedef struct pan_block {
383bf215546Sopenharmony_ci        /* Link to next block. Must be first for mir_get_block */
384bf215546Sopenharmony_ci        struct list_head link;
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci        /* List of instructions emitted for the current block */
387bf215546Sopenharmony_ci        struct list_head instructions;
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_ci        /* Index of the block in source order */
390bf215546Sopenharmony_ci        unsigned name;
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci        /* Control flow graph */
393bf215546Sopenharmony_ci        struct pan_block *successors[2];
394bf215546Sopenharmony_ci        struct set *predecessors;
395bf215546Sopenharmony_ci        bool unconditional_jumps;
396bf215546Sopenharmony_ci
397bf215546Sopenharmony_ci        /* In liveness analysis, these are live masks (per-component) for
398bf215546Sopenharmony_ci         * indices for the block. Scalar compilers have the luxury of using
399bf215546Sopenharmony_ci         * simple bit fields, but for us, liveness is a vector idea. */
400bf215546Sopenharmony_ci        uint16_t *live_in;
401bf215546Sopenharmony_ci        uint16_t *live_out;
402bf215546Sopenharmony_ci} pan_block;
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_cistruct pan_instruction {
405bf215546Sopenharmony_ci        struct list_head link;
406bf215546Sopenharmony_ci};
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci#define pan_foreach_instr_in_block_rev(block, v) \
409bf215546Sopenharmony_ci        list_for_each_entry_rev(struct pan_instruction, v, &block->instructions, link)
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci#define pan_foreach_successor(blk, v) \
412bf215546Sopenharmony_ci        pan_block *v; \
413bf215546Sopenharmony_ci        pan_block **_v; \
414bf215546Sopenharmony_ci        for (_v = (pan_block **) &blk->successors[0], \
415bf215546Sopenharmony_ci                v = *_v; \
416bf215546Sopenharmony_ci                v != NULL && _v < (pan_block **) &blk->successors[2]; \
417bf215546Sopenharmony_ci                _v++, v = *_v) \
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci#define pan_foreach_predecessor(blk, v) \
420bf215546Sopenharmony_ci        struct set_entry *_entry_##v; \
421bf215546Sopenharmony_ci        struct pan_block *v; \
422bf215546Sopenharmony_ci        for (_entry_##v = _mesa_set_next_entry(blk->predecessors, NULL), \
423bf215546Sopenharmony_ci                v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL);  \
424bf215546Sopenharmony_ci                _entry_##v != NULL; \
425bf215546Sopenharmony_ci                _entry_##v = _mesa_set_next_entry(blk->predecessors, _entry_##v), \
426bf215546Sopenharmony_ci                v = (struct pan_block *) (_entry_##v ? _entry_##v->key : NULL))
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_cistatic inline pan_block *
429bf215546Sopenharmony_cipan_exit_block(struct list_head *blocks)
430bf215546Sopenharmony_ci{
431bf215546Sopenharmony_ci        pan_block *last = list_last_entry(blocks, pan_block, link);
432bf215546Sopenharmony_ci        assert(!last->successors[0] && !last->successors[1]);
433bf215546Sopenharmony_ci        return last;
434bf215546Sopenharmony_ci}
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_citypedef void (*pan_liveness_update)(uint16_t *, void *, unsigned max);
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_civoid pan_liveness_gen(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
439bf215546Sopenharmony_civoid pan_liveness_kill(uint16_t *live, unsigned node, unsigned max, uint16_t mask);
440bf215546Sopenharmony_cibool pan_liveness_get(uint16_t *live, unsigned node, uint16_t max);
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_civoid pan_compute_liveness(struct list_head *blocks,
443bf215546Sopenharmony_ci                unsigned temp_count,
444bf215546Sopenharmony_ci                pan_liveness_update callback);
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_civoid pan_free_liveness(struct list_head *blocks);
447bf215546Sopenharmony_ci
448bf215546Sopenharmony_ciuint16_t
449bf215546Sopenharmony_cipan_to_bytemask(unsigned bytes, unsigned mask);
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_civoid pan_block_add_successor(pan_block *block, pan_block *successor);
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci/* IR indexing */
454bf215546Sopenharmony_ci#define PAN_IS_REG (1)
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_cistatic inline unsigned
457bf215546Sopenharmony_cipan_ssa_index(nir_ssa_def *ssa)
458bf215546Sopenharmony_ci{
459bf215546Sopenharmony_ci        /* Off-by-one ensures BIR_NO_ARG is skipped */
460bf215546Sopenharmony_ci        return ((ssa->index + 1) << 1) | 0;
461bf215546Sopenharmony_ci}
462bf215546Sopenharmony_ci
463bf215546Sopenharmony_cistatic inline unsigned
464bf215546Sopenharmony_cipan_src_index(nir_src *src)
465bf215546Sopenharmony_ci{
466bf215546Sopenharmony_ci        if (src->is_ssa)
467bf215546Sopenharmony_ci                return pan_ssa_index(src->ssa);
468bf215546Sopenharmony_ci        else {
469bf215546Sopenharmony_ci                assert(!src->reg.indirect);
470bf215546Sopenharmony_ci                return (src->reg.reg->index << 1) | PAN_IS_REG;
471bf215546Sopenharmony_ci        }
472bf215546Sopenharmony_ci}
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_cistatic inline unsigned
475bf215546Sopenharmony_cipan_dest_index(nir_dest *dst)
476bf215546Sopenharmony_ci{
477bf215546Sopenharmony_ci        if (dst->is_ssa)
478bf215546Sopenharmony_ci                return pan_ssa_index(&dst->ssa);
479bf215546Sopenharmony_ci        else {
480bf215546Sopenharmony_ci                assert(!dst->reg.indirect);
481bf215546Sopenharmony_ci                return (dst->reg.reg->index << 1) | PAN_IS_REG;
482bf215546Sopenharmony_ci        }
483bf215546Sopenharmony_ci}
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci/* IR printing helpers */
486bf215546Sopenharmony_civoid pan_print_alu_type(nir_alu_type t, FILE *fp);
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci/* Until it can be upstreamed.. */
489bf215546Sopenharmony_cibool pan_has_source_mod(nir_alu_src *src, nir_op op);
490bf215546Sopenharmony_cibool pan_has_dest_mod(nir_dest **dest, nir_op op);
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci/* NIR passes to do some backend-specific lowering */
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci#define PAN_WRITEOUT_C 1
495bf215546Sopenharmony_ci#define PAN_WRITEOUT_Z 2
496bf215546Sopenharmony_ci#define PAN_WRITEOUT_S 4
497bf215546Sopenharmony_ci#define PAN_WRITEOUT_2 8
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_cibool pan_nir_lower_zs_store(nir_shader *nir);
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_cibool pan_nir_lower_64bit_intrin(nir_shader *shader);
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_cibool pan_lower_helper_invocation(nir_shader *shader);
504bf215546Sopenharmony_cibool pan_lower_sample_pos(nir_shader *shader);
505bf215546Sopenharmony_cibool pan_lower_xfb(nir_shader *nir);
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci/*
508bf215546Sopenharmony_ci * Helper returning the subgroup size. Generally, this is equal to the number of
509bf215546Sopenharmony_ci * threads in a warp. For Midgard (including warping models), this returns 1, as
510bf215546Sopenharmony_ci * subgroups are not supported.
511bf215546Sopenharmony_ci */
512bf215546Sopenharmony_cistatic inline unsigned
513bf215546Sopenharmony_cipan_subgroup_size(unsigned arch)
514bf215546Sopenharmony_ci{
515bf215546Sopenharmony_ci        if (arch >= 9)
516bf215546Sopenharmony_ci                return 16;
517bf215546Sopenharmony_ci        else if (arch >= 7)
518bf215546Sopenharmony_ci                return 8;
519bf215546Sopenharmony_ci        else if (arch >= 6)
520bf215546Sopenharmony_ci                return 4;
521bf215546Sopenharmony_ci        else
522bf215546Sopenharmony_ci                return 1;
523bf215546Sopenharmony_ci}
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci#endif
526