1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors (Collabora):
24bf215546Sopenharmony_ci *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#ifndef __BIFROST_COMPILER_H
28bf215546Sopenharmony_ci#define __BIFROST_COMPILER_H
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "bifrost.h"
31bf215546Sopenharmony_ci#include "bi_opcodes.h"
32bf215546Sopenharmony_ci#include "compiler/nir/nir.h"
33bf215546Sopenharmony_ci#include "panfrost/util/pan_ir.h"
34bf215546Sopenharmony_ci#include "util/u_math.h"
35bf215546Sopenharmony_ci#include "util/half_float.h"
36bf215546Sopenharmony_ci#include "util/u_worklist.h"
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci#ifdef __cplusplus
39bf215546Sopenharmony_ciextern "C" {
40bf215546Sopenharmony_ci#endif
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci/* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.
43bf215546Sopenharmony_ci * To express widen, use the correpsonding replicated form, i.e. H01 = identity
44bf215546Sopenharmony_ci * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also
45bf215546Sopenharmony_ci * use the replicated form (interpretation is governed by the opcode). For
46bf215546Sopenharmony_ci * 8-bit lanes with two channels, use replicated forms for replicated forms
47bf215546Sopenharmony_ci * (TODO: what about others?). For 8-bit lanes with four channels using
48bf215546Sopenharmony_ci * matching form (TODO: what about others?).
49bf215546Sopenharmony_ci */
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_cienum bi_swizzle {
52bf215546Sopenharmony_ci        /* 16-bit swizzle ordering deliberate for fast compute */
53bf215546Sopenharmony_ci        BI_SWIZZLE_H00 = 0, /* = B0101 */
54bf215546Sopenharmony_ci        BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
55bf215546Sopenharmony_ci        BI_SWIZZLE_H10 = 2, /* = B2301 */
56bf215546Sopenharmony_ci        BI_SWIZZLE_H11 = 3, /* = B2323 */
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci        /* replication order should be maintained for fast compute */
59bf215546Sopenharmony_ci        BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
60bf215546Sopenharmony_ci        BI_SWIZZLE_B1111 = 5,
61bf215546Sopenharmony_ci        BI_SWIZZLE_B2222 = 6,
62bf215546Sopenharmony_ci        BI_SWIZZLE_B3333 = 7,
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci        /* totally special for explicit pattern matching */
65bf215546Sopenharmony_ci        BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */
66bf215546Sopenharmony_ci        BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */
67bf215546Sopenharmony_ci        BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
68bf215546Sopenharmony_ci        BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci        BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
71bf215546Sopenharmony_ci};
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci/* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
74bf215546Sopenharmony_ci * folding and Valhall constant optimization. */
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_cistatic inline uint32_t
77bf215546Sopenharmony_cibi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
78bf215546Sopenharmony_ci{
79bf215546Sopenharmony_ci   const uint16_t *h = (const uint16_t *) &value;
80bf215546Sopenharmony_ci   const uint8_t  *b = (const uint8_t *) &value;
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci#define H(h0, h1) (h[h0] | (h[h1] << 16))
83bf215546Sopenharmony_ci#define B(b0, b1, b2, b3) (b[b0] | (b[b1] << 8) | (b[b2] << 16) | (b[b3] << 24))
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   switch (swz) {
86bf215546Sopenharmony_ci   case BI_SWIZZLE_H00: return H(0, 0);
87bf215546Sopenharmony_ci   case BI_SWIZZLE_H01: return H(0, 1);
88bf215546Sopenharmony_ci   case BI_SWIZZLE_H10: return H(1, 0);
89bf215546Sopenharmony_ci   case BI_SWIZZLE_H11: return H(1, 1);
90bf215546Sopenharmony_ci   case BI_SWIZZLE_B0000: return B(0, 0, 0, 0);
91bf215546Sopenharmony_ci   case BI_SWIZZLE_B1111: return B(1, 1, 1, 1);
92bf215546Sopenharmony_ci   case BI_SWIZZLE_B2222: return B(2, 2, 2, 2);
93bf215546Sopenharmony_ci   case BI_SWIZZLE_B3333: return B(3, 3, 3, 3);
94bf215546Sopenharmony_ci   case BI_SWIZZLE_B0011: return B(0, 0, 1, 1);
95bf215546Sopenharmony_ci   case BI_SWIZZLE_B2233: return B(2, 2, 3, 3);
96bf215546Sopenharmony_ci   case BI_SWIZZLE_B1032: return B(1, 0, 3, 2);
97bf215546Sopenharmony_ci   case BI_SWIZZLE_B3210: return B(3, 2, 1, 0);
98bf215546Sopenharmony_ci   case BI_SWIZZLE_B0022: return B(0, 0, 2, 2);
99bf215546Sopenharmony_ci   }
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci#undef H
102bf215546Sopenharmony_ci#undef B
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci   unreachable("Invalid swizzle");
105bf215546Sopenharmony_ci}
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_cienum bi_index_type {
108bf215546Sopenharmony_ci        BI_INDEX_NULL = 0,
109bf215546Sopenharmony_ci        BI_INDEX_NORMAL = 1,
110bf215546Sopenharmony_ci        BI_INDEX_REGISTER = 2,
111bf215546Sopenharmony_ci        BI_INDEX_CONSTANT = 3,
112bf215546Sopenharmony_ci        BI_INDEX_PASS = 4,
113bf215546Sopenharmony_ci        BI_INDEX_FAU = 5
114bf215546Sopenharmony_ci};
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_citypedef struct {
117bf215546Sopenharmony_ci        uint32_t value;
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci        /* modifiers, should only be set if applicable for a given instruction.
120bf215546Sopenharmony_ci         * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
121bf215546Sopenharmony_ci         * applicable, neg plays the role of not */
122bf215546Sopenharmony_ci        bool abs : 1;
123bf215546Sopenharmony_ci        bool neg : 1;
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci        /* The last use of a value, should be purged from the register cache.
126bf215546Sopenharmony_ci         * Set by liveness analysis. */
127bf215546Sopenharmony_ci        bool discard : 1;
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci        /* For a source, the swizzle. For a destination, acts a bit like a
130bf215546Sopenharmony_ci         * write mask. Identity for the full 32-bit, H00 for only caring about
131bf215546Sopenharmony_ci         * the lower half, other values unused. */
132bf215546Sopenharmony_ci        enum bi_swizzle swizzle : 4;
133bf215546Sopenharmony_ci        uint32_t offset : 3;
134bf215546Sopenharmony_ci        bool reg : 1;
135bf215546Sopenharmony_ci        enum bi_index_type type : 3;
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci        /* Must be zeroed so we can hash the whole 64-bits at a time */
138bf215546Sopenharmony_ci        unsigned padding : (32 - 14);
139bf215546Sopenharmony_ci} bi_index;
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_cistatic inline bi_index
142bf215546Sopenharmony_cibi_get_index(unsigned value, bool is_reg, unsigned offset)
143bf215546Sopenharmony_ci{
144bf215546Sopenharmony_ci        return (bi_index) {
145bf215546Sopenharmony_ci                .value = value,
146bf215546Sopenharmony_ci                .swizzle = BI_SWIZZLE_H01,
147bf215546Sopenharmony_ci                .offset = offset,
148bf215546Sopenharmony_ci                .reg = is_reg,
149bf215546Sopenharmony_ci                .type = BI_INDEX_NORMAL,
150bf215546Sopenharmony_ci        };
151bf215546Sopenharmony_ci}
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_cistatic inline bi_index
154bf215546Sopenharmony_cibi_register(unsigned reg)
155bf215546Sopenharmony_ci{
156bf215546Sopenharmony_ci        assert(reg < 64);
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci        return (bi_index) {
159bf215546Sopenharmony_ci                .value = reg,
160bf215546Sopenharmony_ci                .swizzle = BI_SWIZZLE_H01,
161bf215546Sopenharmony_ci                .type = BI_INDEX_REGISTER,
162bf215546Sopenharmony_ci        };
163bf215546Sopenharmony_ci}
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_cistatic inline bi_index
166bf215546Sopenharmony_cibi_imm_u32(uint32_t imm)
167bf215546Sopenharmony_ci{
168bf215546Sopenharmony_ci        return (bi_index) {
169bf215546Sopenharmony_ci                .value = imm,
170bf215546Sopenharmony_ci                .swizzle = BI_SWIZZLE_H01,
171bf215546Sopenharmony_ci                .type = BI_INDEX_CONSTANT,
172bf215546Sopenharmony_ci        };
173bf215546Sopenharmony_ci}
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_cistatic inline bi_index
176bf215546Sopenharmony_cibi_imm_f32(float imm)
177bf215546Sopenharmony_ci{
178bf215546Sopenharmony_ci        return bi_imm_u32(fui(imm));
179bf215546Sopenharmony_ci}
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_cistatic inline bi_index
182bf215546Sopenharmony_cibi_null()
183bf215546Sopenharmony_ci{
184bf215546Sopenharmony_ci        return (bi_index) { .type = BI_INDEX_NULL };
185bf215546Sopenharmony_ci}
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_cistatic inline bi_index
188bf215546Sopenharmony_cibi_zero()
189bf215546Sopenharmony_ci{
190bf215546Sopenharmony_ci        return bi_imm_u32(0);
191bf215546Sopenharmony_ci}
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_cistatic inline bi_index
194bf215546Sopenharmony_cibi_passthrough(enum bifrost_packed_src value)
195bf215546Sopenharmony_ci{
196bf215546Sopenharmony_ci        return (bi_index) {
197bf215546Sopenharmony_ci                .value = value,
198bf215546Sopenharmony_ci                .swizzle = BI_SWIZZLE_H01,
199bf215546Sopenharmony_ci                .type = BI_INDEX_PASS,
200bf215546Sopenharmony_ci        };
201bf215546Sopenharmony_ci}
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci/* Helps construct swizzles */
204bf215546Sopenharmony_cistatic inline bi_index
205bf215546Sopenharmony_cibi_swz_16(bi_index idx, bool x, bool y)
206bf215546Sopenharmony_ci{
207bf215546Sopenharmony_ci        assert(idx.swizzle == BI_SWIZZLE_H01);
208bf215546Sopenharmony_ci        idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_H00 | (x << 1) | y);
209bf215546Sopenharmony_ci        return idx;
210bf215546Sopenharmony_ci}
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_cistatic inline bi_index
213bf215546Sopenharmony_cibi_half(bi_index idx, bool upper)
214bf215546Sopenharmony_ci{
215bf215546Sopenharmony_ci        return bi_swz_16(idx, upper, upper);
216bf215546Sopenharmony_ci}
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_cistatic inline bi_index
219bf215546Sopenharmony_cibi_byte(bi_index idx, unsigned lane)
220bf215546Sopenharmony_ci{
221bf215546Sopenharmony_ci        assert(idx.swizzle == BI_SWIZZLE_H01);
222bf215546Sopenharmony_ci        assert(lane < 4);
223bf215546Sopenharmony_ci        idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane);
224bf215546Sopenharmony_ci        return idx;
225bf215546Sopenharmony_ci}
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_cistatic inline bi_index
228bf215546Sopenharmony_cibi_abs(bi_index idx)
229bf215546Sopenharmony_ci{
230bf215546Sopenharmony_ci        idx.abs = true;
231bf215546Sopenharmony_ci        return idx;
232bf215546Sopenharmony_ci}
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_cistatic inline bi_index
235bf215546Sopenharmony_cibi_neg(bi_index idx)
236bf215546Sopenharmony_ci{
237bf215546Sopenharmony_ci        idx.neg ^= true;
238bf215546Sopenharmony_ci        return idx;
239bf215546Sopenharmony_ci}
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_cistatic inline bi_index
242bf215546Sopenharmony_cibi_discard(bi_index idx)
243bf215546Sopenharmony_ci{
244bf215546Sopenharmony_ci        idx.discard = true;
245bf215546Sopenharmony_ci        return idx;
246bf215546Sopenharmony_ci}
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci/* Additive identity in IEEE 754 arithmetic */
249bf215546Sopenharmony_cistatic inline bi_index
250bf215546Sopenharmony_cibi_negzero()
251bf215546Sopenharmony_ci{
252bf215546Sopenharmony_ci        return bi_neg(bi_zero());
253bf215546Sopenharmony_ci}
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci/* Replaces an index, preserving any modifiers */
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_cistatic inline bi_index
258bf215546Sopenharmony_cibi_replace_index(bi_index old, bi_index replacement)
259bf215546Sopenharmony_ci{
260bf215546Sopenharmony_ci        replacement.abs = old.abs;
261bf215546Sopenharmony_ci        replacement.neg = old.neg;
262bf215546Sopenharmony_ci        replacement.swizzle = old.swizzle;
263bf215546Sopenharmony_ci        replacement.discard = false; /* needs liveness analysis to set */
264bf215546Sopenharmony_ci        return replacement;
265bf215546Sopenharmony_ci}
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci/* Remove any modifiers. This has the property:
268bf215546Sopenharmony_ci *
269bf215546Sopenharmony_ci *     replace_index(x, strip_index(x)) = x
270bf215546Sopenharmony_ci *
271bf215546Sopenharmony_ci * This ensures it is suitable to use when lowering sources to moves */
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_cistatic inline bi_index
274bf215546Sopenharmony_cibi_strip_index(bi_index index)
275bf215546Sopenharmony_ci{
276bf215546Sopenharmony_ci        index.abs = index.neg = false;
277bf215546Sopenharmony_ci        index.swizzle = BI_SWIZZLE_H01;
278bf215546Sopenharmony_ci        return index;
279bf215546Sopenharmony_ci}
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci/* For bitwise instructions */
282bf215546Sopenharmony_ci#define bi_not(x) bi_neg(x)
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_cistatic inline bi_index
285bf215546Sopenharmony_cibi_imm_u8(uint8_t imm)
286bf215546Sopenharmony_ci{
287bf215546Sopenharmony_ci        return bi_byte(bi_imm_u32(imm), 0);
288bf215546Sopenharmony_ci}
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_cistatic inline bi_index
291bf215546Sopenharmony_cibi_imm_u16(uint16_t imm)
292bf215546Sopenharmony_ci{
293bf215546Sopenharmony_ci        return bi_half(bi_imm_u32(imm), false);
294bf215546Sopenharmony_ci}
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_cistatic inline bi_index
297bf215546Sopenharmony_cibi_imm_uintN(uint32_t imm, unsigned sz)
298bf215546Sopenharmony_ci{
299bf215546Sopenharmony_ci        assert(sz == 8 || sz == 16 || sz == 32);
300bf215546Sopenharmony_ci        return (sz == 8) ? bi_imm_u8(imm) :
301bf215546Sopenharmony_ci                (sz == 16) ? bi_imm_u16(imm) :
302bf215546Sopenharmony_ci                bi_imm_u32(imm);
303bf215546Sopenharmony_ci}
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_cistatic inline bi_index
306bf215546Sopenharmony_cibi_imm_f16(float imm)
307bf215546Sopenharmony_ci{
308bf215546Sopenharmony_ci        return bi_imm_u16(_mesa_float_to_half(imm));
309bf215546Sopenharmony_ci}
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_cistatic inline bool
312bf215546Sopenharmony_cibi_is_null(bi_index idx)
313bf215546Sopenharmony_ci{
314bf215546Sopenharmony_ci        return idx.type == BI_INDEX_NULL;
315bf215546Sopenharmony_ci}
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_cistatic inline bool
318bf215546Sopenharmony_cibi_is_ssa(bi_index idx)
319bf215546Sopenharmony_ci{
320bf215546Sopenharmony_ci        return idx.type == BI_INDEX_NORMAL && !idx.reg;
321bf215546Sopenharmony_ci}
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci/* Compares equivalence as references. Does not compare offsets, swizzles, or
324bf215546Sopenharmony_ci * modifiers. In other words, this forms bi_index equivalence classes by
325bf215546Sopenharmony_ci * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_cistatic inline bool
328bf215546Sopenharmony_cibi_is_equiv(bi_index left, bi_index right)
329bf215546Sopenharmony_ci{
330bf215546Sopenharmony_ci        return (left.type == right.type) &&
331bf215546Sopenharmony_ci                (left.reg == right.reg) &&
332bf215546Sopenharmony_ci                (left.value == right.value);
333bf215546Sopenharmony_ci}
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci/* A stronger equivalence relation that requires the indices access the
336bf215546Sopenharmony_ci * same offset, useful for RA/scheduling to see what registers will
337bf215546Sopenharmony_ci * correspond to */
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_cistatic inline bool
340bf215546Sopenharmony_cibi_is_word_equiv(bi_index left, bi_index right)
341bf215546Sopenharmony_ci{
342bf215546Sopenharmony_ci        return bi_is_equiv(left, right) && left.offset == right.offset;
343bf215546Sopenharmony_ci}
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci/* An even stronger equivalence that checks if indices correspond to the
346bf215546Sopenharmony_ci * right value when evaluated
347bf215546Sopenharmony_ci */
348bf215546Sopenharmony_cistatic inline bool
349bf215546Sopenharmony_cibi_is_value_equiv(bi_index left, bi_index right)
350bf215546Sopenharmony_ci{
351bf215546Sopenharmony_ci        if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) {
352bf215546Sopenharmony_ci                return (bi_apply_swizzle(left.value, left.swizzle) ==
353bf215546Sopenharmony_ci                        bi_apply_swizzle(right.value, right.swizzle)) &&
354bf215546Sopenharmony_ci                       (left.abs == right.abs) &&
355bf215546Sopenharmony_ci                       (left.neg == right.neg);
356bf215546Sopenharmony_ci        } else {
357bf215546Sopenharmony_ci                return (left.value == right.value) &&
358bf215546Sopenharmony_ci                       (left.abs == right.abs) &&
359bf215546Sopenharmony_ci                       (left.neg == right.neg) &&
360bf215546Sopenharmony_ci                       (left.swizzle == right.swizzle) &&
361bf215546Sopenharmony_ci                       (left.offset == right.offset) &&
362bf215546Sopenharmony_ci                       (left.reg == right.reg) &&
363bf215546Sopenharmony_ci                       (left.type == right.type);
364bf215546Sopenharmony_ci        }
365bf215546Sopenharmony_ci}
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci#define BI_MAX_VEC 8
368bf215546Sopenharmony_ci#define BI_MAX_DESTS 4
369bf215546Sopenharmony_ci#define BI_MAX_SRCS 6
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_citypedef struct {
372bf215546Sopenharmony_ci        /* Must be first */
373bf215546Sopenharmony_ci        struct list_head link;
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci        enum bi_opcode op;
376bf215546Sopenharmony_ci        uint8_t nr_srcs;
377bf215546Sopenharmony_ci        uint8_t nr_dests;
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci        /* Data flow */
380bf215546Sopenharmony_ci        bi_index dest[BI_MAX_DESTS];
381bf215546Sopenharmony_ci        bi_index src[BI_MAX_SRCS];
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci        /* For a branch */
384bf215546Sopenharmony_ci        struct bi_block *branch_target;
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci        /* These don't fit neatly with anything else.. */
387bf215546Sopenharmony_ci        enum bi_register_format register_format;
388bf215546Sopenharmony_ci        enum bi_vecsize vecsize;
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci        /* Flow control associated with a Valhall instruction */
391bf215546Sopenharmony_ci        uint8_t flow;
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci        /* Slot associated with a message-passing instruction */
394bf215546Sopenharmony_ci        uint8_t slot;
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci        /* Can we spill the value written here? Used to prevent
397bf215546Sopenharmony_ci         * useless double fills */
398bf215546Sopenharmony_ci        bool no_spill;
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_ci        /* On Bifrost: A value of bi_table to override the table, inducing a
401bf215546Sopenharmony_ci         * DTSEL_IMM pair if nonzero.
402bf215546Sopenharmony_ci         *
403bf215546Sopenharmony_ci         * On Valhall: the table index to use for resource instructions.
404bf215546Sopenharmony_ci         *
405bf215546Sopenharmony_ci         * These two interpretations are equivalent if you squint a bit.
406bf215546Sopenharmony_ci         */
407bf215546Sopenharmony_ci        unsigned table;
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci        /* Everything after this MUST NOT be accessed directly, since
410bf215546Sopenharmony_ci         * interpretation depends on opcodes */
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci        /* Destination modifiers */
413bf215546Sopenharmony_ci        union {
414bf215546Sopenharmony_ci                enum bi_clamp clamp;
415bf215546Sopenharmony_ci                bool saturate;
416bf215546Sopenharmony_ci                bool not_result;
417bf215546Sopenharmony_ci                unsigned dest_mod;
418bf215546Sopenharmony_ci        };
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci        /* Immediates. All seen alone in an instruction, except for varying/texture
421bf215546Sopenharmony_ci         * which are specified jointly for VARTEX */
422bf215546Sopenharmony_ci        union {
423bf215546Sopenharmony_ci                uint32_t shift;
424bf215546Sopenharmony_ci                uint32_t fill;
425bf215546Sopenharmony_ci                uint32_t index;
426bf215546Sopenharmony_ci                uint32_t attribute_index;
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci                struct {
429bf215546Sopenharmony_ci                        uint32_t varying_index;
430bf215546Sopenharmony_ci                        uint32_t sampler_index;
431bf215546Sopenharmony_ci                        uint32_t texture_index;
432bf215546Sopenharmony_ci                };
433bf215546Sopenharmony_ci
434bf215546Sopenharmony_ci                /* TEXC, ATOM_CX: # of staging registers used */
435bf215546Sopenharmony_ci                struct {
436bf215546Sopenharmony_ci                        uint32_t sr_count;
437bf215546Sopenharmony_ci                        uint32_t sr_count_2;
438bf215546Sopenharmony_ci
439bf215546Sopenharmony_ci                        union {
440bf215546Sopenharmony_ci                                /* Atomics effectively require all three */
441bf215546Sopenharmony_ci                                int32_t byte_offset;
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci                                /* BLEND requires all three */
444bf215546Sopenharmony_ci                                int32_t branch_offset;
445bf215546Sopenharmony_ci                        };
446bf215546Sopenharmony_ci                };
447bf215546Sopenharmony_ci        };
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci        /* Modifiers specific to particular instructions are thrown in a union */
450bf215546Sopenharmony_ci        union {
451bf215546Sopenharmony_ci                enum bi_adj adj; /* FEXP_TABLE.u4 */
452bf215546Sopenharmony_ci                enum bi_atom_opc atom_opc; /* atomics */
453bf215546Sopenharmony_ci                enum bi_func func; /* FPOW_SC_DET */
454bf215546Sopenharmony_ci                enum bi_function function; /* LD_VAR_FLAT */
455bf215546Sopenharmony_ci                enum bi_mux mux; /* MUX */
456bf215546Sopenharmony_ci                enum bi_sem sem; /* FMAX, FMIN */
457bf215546Sopenharmony_ci                enum bi_source source; /* LD_GCLK */
458bf215546Sopenharmony_ci                bool scale; /* VN_ASST2, FSINCOS_OFFSET */
459bf215546Sopenharmony_ci                bool offset; /* FSIN_TABLE, FOCS_TABLE */
460bf215546Sopenharmony_ci                bool mask; /* CLZ */
461bf215546Sopenharmony_ci                bool threads; /* IMULD, IMOV_FMA */
462bf215546Sopenharmony_ci                bool combine; /* BRANCHC */
463bf215546Sopenharmony_ci                bool format; /* LEA_TEX */
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci                struct {
466bf215546Sopenharmony_ci                        enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */
467bf215546Sopenharmony_ci                        enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */
468bf215546Sopenharmony_ci                        bool ftz; /* Flush-to-zero for F16_TO_F32 */
469bf215546Sopenharmony_ci                };
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci                struct {
472bf215546Sopenharmony_ci                        enum bi_result_type result_type; /* FCMP, ICMP */
473bf215546Sopenharmony_ci                        enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */
474bf215546Sopenharmony_ci                };
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci                struct {
477bf215546Sopenharmony_ci                        enum bi_stack_mode stack_mode; /* JUMP_EX */
478bf215546Sopenharmony_ci                        bool test_mode;
479bf215546Sopenharmony_ci                };
480bf215546Sopenharmony_ci
481bf215546Sopenharmony_ci                struct {
482bf215546Sopenharmony_ci                        enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */
483bf215546Sopenharmony_ci                        bool preserve_null; /* SEG_ADD, SEG_SUB */
484bf215546Sopenharmony_ci                        enum bi_extend extend; /* LOAD, IMUL */
485bf215546Sopenharmony_ci                };
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci                struct {
488bf215546Sopenharmony_ci                        enum bi_sample sample; /* VAR_TEX, LD_VAR */
489bf215546Sopenharmony_ci                        enum bi_update update; /* VAR_TEX, LD_VAR */
490bf215546Sopenharmony_ci                        enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
491bf215546Sopenharmony_ci                        bool skip; /* VAR_TEX, TEXS, TEXC */
492bf215546Sopenharmony_ci                        bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
493bf215546Sopenharmony_ci                        enum bi_source_format source_format; /* LD_VAR_BUF */
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci                        /* Used for valhall texturing */
496bf215546Sopenharmony_ci                        bool shadow;
497bf215546Sopenharmony_ci                        bool texel_offset;
498bf215546Sopenharmony_ci                        bool array_enable;
499bf215546Sopenharmony_ci                        bool integer_coordinates;
500bf215546Sopenharmony_ci                        enum bi_fetch_component fetch_component;
501bf215546Sopenharmony_ci                        enum bi_va_lod_mode va_lod_mode;
502bf215546Sopenharmony_ci                        enum bi_dimension dimension;
503bf215546Sopenharmony_ci                        enum bi_write_mask write_mask;
504bf215546Sopenharmony_ci                };
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci                /* Maximum size, for hashing */
507bf215546Sopenharmony_ci                unsigned flags[14];
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_ci                struct {
510bf215546Sopenharmony_ci                        enum bi_subgroup subgroup; /* WMASK, CLPER */
511bf215546Sopenharmony_ci                        enum bi_inactive_result inactive_result; /* CLPER */
512bf215546Sopenharmony_ci                        enum bi_lane_op lane_op; /* CLPER */
513bf215546Sopenharmony_ci                };
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci                struct {
516bf215546Sopenharmony_ci                        bool z; /* ZS_EMIT */
517bf215546Sopenharmony_ci                        bool stencil; /* ZS_EMIT */
518bf215546Sopenharmony_ci                };
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci                struct {
521bf215546Sopenharmony_ci                        bool h; /* VN_ASST1.f16 */
522bf215546Sopenharmony_ci                        bool l; /* VN_ASST1.f16 */
523bf215546Sopenharmony_ci                };
524bf215546Sopenharmony_ci
525bf215546Sopenharmony_ci                struct {
526bf215546Sopenharmony_ci                        bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
527bf215546Sopenharmony_ci                        bool result_word;
528bf215546Sopenharmony_ci                        bool arithmetic; /* ARSHIFT_OR */
529bf215546Sopenharmony_ci                };
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci                struct {
532bf215546Sopenharmony_ci                        bool sqrt; /* FREXPM */
533bf215546Sopenharmony_ci                        bool log; /* FREXPM */
534bf215546Sopenharmony_ci                };
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci                struct {
537bf215546Sopenharmony_ci                        enum bi_mode mode; /* FLOG_TABLE */
538bf215546Sopenharmony_ci                        enum bi_precision precision; /* FLOG_TABLE */
539bf215546Sopenharmony_ci                        bool divzero; /* FRSQ_APPROX, FRSQ */
540bf215546Sopenharmony_ci                };
541bf215546Sopenharmony_ci        };
542bf215546Sopenharmony_ci} bi_instr;
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_cistatic inline bool
545bf215546Sopenharmony_cibi_is_staging_src(const bi_instr *I, unsigned s)
546bf215546Sopenharmony_ci{
547bf215546Sopenharmony_ci        return (s == 0 || s == 4) && bi_opcode_props[I->op].sr_read;
548bf215546Sopenharmony_ci}
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_ci/* Represents the assignment of slots for a given bi_tuple */
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_citypedef struct {
553bf215546Sopenharmony_ci        /* Register to assign to each slot */
554bf215546Sopenharmony_ci        unsigned slot[4];
555bf215546Sopenharmony_ci
556bf215546Sopenharmony_ci        /* Read slots can be disabled */
557bf215546Sopenharmony_ci        bool enabled[2];
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci        /* Configuration for slots 2/3 */
560bf215546Sopenharmony_ci        struct bifrost_reg_ctrl_23 slot23;
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci        /* Fast-Access-Uniform RAM index */
563bf215546Sopenharmony_ci        uint8_t fau_idx;
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_ci        /* Whether writes are actually for the last instruction */
566bf215546Sopenharmony_ci        bool first_instruction;
567bf215546Sopenharmony_ci} bi_registers;
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_ci/* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
570bf215546Sopenharmony_ci * leave it NULL; the emitter will fill in a nop. Instructions reference
571bf215546Sopenharmony_ci * registers via slots which are assigned per tuple.
572bf215546Sopenharmony_ci */
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_citypedef struct {
575bf215546Sopenharmony_ci        uint8_t fau_idx;
576bf215546Sopenharmony_ci        bi_registers regs;
577bf215546Sopenharmony_ci        bi_instr *fma;
578bf215546Sopenharmony_ci        bi_instr *add;
579bf215546Sopenharmony_ci} bi_tuple;
580bf215546Sopenharmony_ci
581bf215546Sopenharmony_cistruct bi_block;
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_citypedef struct {
584bf215546Sopenharmony_ci        struct list_head link;
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci        /* Link back up for branch calculations */
587bf215546Sopenharmony_ci        struct bi_block *block;
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci        /* Architectural limit of 8 tuples/clause */
590bf215546Sopenharmony_ci        unsigned tuple_count;
591bf215546Sopenharmony_ci        bi_tuple tuples[8];
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci        /* For scoreboarding -- the clause ID (this is not globally unique!)
594bf215546Sopenharmony_ci         * and its dependencies in terms of other clauses, computed during
595bf215546Sopenharmony_ci         * scheduling and used when emitting code. Dependencies expressed as a
596bf215546Sopenharmony_ci         * bitfield matching the hardware, except shifted by a clause (the
597bf215546Sopenharmony_ci         * shift back to the ISA's off-by-one encoding is worked out when
598bf215546Sopenharmony_ci         * emitting clauses) */
599bf215546Sopenharmony_ci        unsigned scoreboard_id;
600bf215546Sopenharmony_ci        uint8_t dependencies;
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_ci        /* See ISA header for description */
603bf215546Sopenharmony_ci        enum bifrost_flow flow_control;
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci        /* Can we prefetch the next clause? Usually it makes sense, except for
606bf215546Sopenharmony_ci         * clauses ending in unconditional branches */
607bf215546Sopenharmony_ci        bool next_clause_prefetch;
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci        /* Assigned data register */
610bf215546Sopenharmony_ci        unsigned staging_register;
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci        /* Corresponds to the usual bit but shifted by a clause */
613bf215546Sopenharmony_ci        bool staging_barrier;
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_ci        /* Constants read by this clause. ISA limit. Must satisfy:
616bf215546Sopenharmony_ci         *
617bf215546Sopenharmony_ci         *      constant_count + tuple_count <= 13
618bf215546Sopenharmony_ci         *
619bf215546Sopenharmony_ci         * Also implicitly constant_count <= tuple_count since a tuple only
620bf215546Sopenharmony_ci         * reads a single constant.
621bf215546Sopenharmony_ci         */
622bf215546Sopenharmony_ci        uint64_t constants[8];
623bf215546Sopenharmony_ci        unsigned constant_count;
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_ci        /* Index of a constant to be PC-relative */
626bf215546Sopenharmony_ci        unsigned pcrel_idx;
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_ci        /* Branches encode a constant offset relative to the program counter
629bf215546Sopenharmony_ci         * with some magic flags. By convention, if there is a branch, its
630bf215546Sopenharmony_ci         * constant will be last. Set this flag to indicate this is required.
631bf215546Sopenharmony_ci         */
632bf215546Sopenharmony_ci        bool branch_constant;
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci        /* Unique in a clause */
635bf215546Sopenharmony_ci        enum bifrost_message_type message_type;
636bf215546Sopenharmony_ci        bi_instr *message;
637bf215546Sopenharmony_ci
638bf215546Sopenharmony_ci        /* Discard helper threads */
639bf215546Sopenharmony_ci        bool td;
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_ci        /* Should flush-to-zero mode be enabled for this clause? */
642bf215546Sopenharmony_ci        bool ftz;
643bf215546Sopenharmony_ci} bi_clause;
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci#define BI_NUM_SLOTS 8
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci/* A model for the state of the scoreboard */
648bf215546Sopenharmony_cistruct bi_scoreboard_state {
649bf215546Sopenharmony_ci        /** Bitmap of registers read/written by a slot */
650bf215546Sopenharmony_ci        uint64_t read[BI_NUM_SLOTS];
651bf215546Sopenharmony_ci        uint64_t write[BI_NUM_SLOTS];
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_ci        /* Nonregister dependencies present by a slot */
654bf215546Sopenharmony_ci        uint8_t varying : BI_NUM_SLOTS;
655bf215546Sopenharmony_ci        uint8_t memory : BI_NUM_SLOTS;
656bf215546Sopenharmony_ci};
657bf215546Sopenharmony_ci
658bf215546Sopenharmony_citypedef struct bi_block {
659bf215546Sopenharmony_ci        /* Link to next block. Must be first for mir_get_block */
660bf215546Sopenharmony_ci        struct list_head link;
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_ci        /* List of instructions emitted for the current block */
663bf215546Sopenharmony_ci        struct list_head instructions;
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci        /* Index of the block in source order */
666bf215546Sopenharmony_ci        unsigned index;
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci        /* Control flow graph */
669bf215546Sopenharmony_ci        struct bi_block *successors[2];
670bf215546Sopenharmony_ci        struct util_dynarray predecessors;
671bf215546Sopenharmony_ci        bool unconditional_jumps;
672bf215546Sopenharmony_ci
673bf215546Sopenharmony_ci        /* Per 32-bit word live masks for the block indexed by node */
674bf215546Sopenharmony_ci        uint8_t *live_in;
675bf215546Sopenharmony_ci        uint8_t *live_out;
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_ci        /* If true, uses clauses; if false, uses instructions */
678bf215546Sopenharmony_ci        bool scheduled;
679bf215546Sopenharmony_ci        struct list_head clauses; /* list of bi_clause */
680bf215546Sopenharmony_ci
681bf215546Sopenharmony_ci        /* Post-RA liveness */
682bf215546Sopenharmony_ci        uint64_t reg_live_in, reg_live_out;
683bf215546Sopenharmony_ci
684bf215546Sopenharmony_ci        /* Scoreboard state at the start/end of block */
685bf215546Sopenharmony_ci        struct bi_scoreboard_state scoreboard_in, scoreboard_out;
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci        /* On Valhall, indicates we need a terminal NOP to implement jumps to
688bf215546Sopenharmony_ci         * the end of the shader.
689bf215546Sopenharmony_ci         */
690bf215546Sopenharmony_ci        bool needs_nop;
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci        /* Flags available for pass-internal use */
693bf215546Sopenharmony_ci        uint8_t pass_flags;
694bf215546Sopenharmony_ci} bi_block;
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_cistatic inline unsigned
697bf215546Sopenharmony_cibi_num_predecessors(bi_block *block)
698bf215546Sopenharmony_ci{
699bf215546Sopenharmony_ci        return util_dynarray_num_elements(&block->predecessors, bi_block *);
700bf215546Sopenharmony_ci}
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_cistatic inline bi_block *
703bf215546Sopenharmony_cibi_start_block(struct list_head *blocks)
704bf215546Sopenharmony_ci{
705bf215546Sopenharmony_ci        bi_block *first = list_first_entry(blocks, bi_block, link);
706bf215546Sopenharmony_ci        assert(bi_num_predecessors(first) == 0);
707bf215546Sopenharmony_ci        return first;
708bf215546Sopenharmony_ci}
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_cistatic inline bi_block *
711bf215546Sopenharmony_cibi_exit_block(struct list_head *blocks)
712bf215546Sopenharmony_ci{
713bf215546Sopenharmony_ci        bi_block *last = list_last_entry(blocks, bi_block, link);
714bf215546Sopenharmony_ci        assert(!last->successors[0] && !last->successors[1]);
715bf215546Sopenharmony_ci        return last;
716bf215546Sopenharmony_ci}
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_cistatic inline void
719bf215546Sopenharmony_cibi_block_add_successor(bi_block *block, bi_block *successor)
720bf215546Sopenharmony_ci{
721bf215546Sopenharmony_ci        assert(block != NULL && successor != NULL);
722bf215546Sopenharmony_ci
723bf215546Sopenharmony_ci        /* Cull impossible edges */
724bf215546Sopenharmony_ci        if (block->unconditional_jumps)
725bf215546Sopenharmony_ci                return;
726bf215546Sopenharmony_ci
727bf215546Sopenharmony_ci        for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
728bf215546Sopenharmony_ci                if (block->successors[i]) {
729bf215546Sopenharmony_ci                       if (block->successors[i] == successor)
730bf215546Sopenharmony_ci                               return;
731bf215546Sopenharmony_ci                       else
732bf215546Sopenharmony_ci                               continue;
733bf215546Sopenharmony_ci                }
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci                block->successors[i] = successor;
736bf215546Sopenharmony_ci                util_dynarray_append(&successor->predecessors, bi_block *, block);
737bf215546Sopenharmony_ci                return;
738bf215546Sopenharmony_ci        }
739bf215546Sopenharmony_ci
740bf215546Sopenharmony_ci        unreachable("Too many successors");
741bf215546Sopenharmony_ci}
742bf215546Sopenharmony_ci
743bf215546Sopenharmony_ci/* Subset of pan_shader_info needed per-variant, in order to support IDVS */
744bf215546Sopenharmony_cistruct bi_shader_info {
745bf215546Sopenharmony_ci        struct panfrost_ubo_push *push;
746bf215546Sopenharmony_ci        struct bifrost_shader_info *bifrost;
747bf215546Sopenharmony_ci        struct panfrost_sysvals *sysvals;
748bf215546Sopenharmony_ci        unsigned tls_size;
749bf215546Sopenharmony_ci        unsigned work_reg_count;
750bf215546Sopenharmony_ci        unsigned push_offset;
751bf215546Sopenharmony_ci};
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci/* State of index-driven vertex shading for current shader */
754bf215546Sopenharmony_cienum bi_idvs_mode {
755bf215546Sopenharmony_ci        /* IDVS not in use */
756bf215546Sopenharmony_ci        BI_IDVS_NONE = 0,
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci        /* IDVS in use. Compiling a position shader */
759bf215546Sopenharmony_ci        BI_IDVS_POSITION = 1,
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci        /* IDVS in use. Compiling a varying shader */
762bf215546Sopenharmony_ci        BI_IDVS_VARYING = 2,
763bf215546Sopenharmony_ci};
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_citypedef struct {
766bf215546Sopenharmony_ci       const struct panfrost_compile_inputs *inputs;
767bf215546Sopenharmony_ci       nir_shader *nir;
768bf215546Sopenharmony_ci       struct bi_shader_info info;
769bf215546Sopenharmony_ci       gl_shader_stage stage;
770bf215546Sopenharmony_ci       struct list_head blocks; /* list of bi_block */
771bf215546Sopenharmony_ci       struct hash_table_u64 *sysval_to_id;
772bf215546Sopenharmony_ci       uint32_t quirks;
773bf215546Sopenharmony_ci       unsigned arch;
774bf215546Sopenharmony_ci       enum bi_idvs_mode idvs;
775bf215546Sopenharmony_ci       unsigned num_blocks;
776bf215546Sopenharmony_ci
777bf215546Sopenharmony_ci       /* In any graphics shader, whether the "IDVS with memory
778bf215546Sopenharmony_ci        * allocation" flow is used. This affects how varyings are loaded and
779bf215546Sopenharmony_ci        * stored. Ignore for compute.
780bf215546Sopenharmony_ci        */
781bf215546Sopenharmony_ci       bool malloc_idvs;
782bf215546Sopenharmony_ci
783bf215546Sopenharmony_ci       /* During NIR->BIR */
784bf215546Sopenharmony_ci       bi_block *current_block;
785bf215546Sopenharmony_ci       bi_block *after_block;
786bf215546Sopenharmony_ci       bi_block *break_block;
787bf215546Sopenharmony_ci       bi_block *continue_block;
788bf215546Sopenharmony_ci       bool emitted_atest;
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_ci       /* During NIR->BIR, the coverage bitmap. If this is NULL, the default
791bf215546Sopenharmony_ci        * coverage bitmap should be source from preloaded register r60. This is
792bf215546Sopenharmony_ci        * written by ATEST and ZS_EMIT
793bf215546Sopenharmony_ci        */
794bf215546Sopenharmony_ci       bi_index coverage;
795bf215546Sopenharmony_ci
796bf215546Sopenharmony_ci       /* During NIR->BIR, table of preloaded registers, or NULL if never
797bf215546Sopenharmony_ci        * preloaded.
798bf215546Sopenharmony_ci        */
799bf215546Sopenharmony_ci       bi_index preloaded[64];
800bf215546Sopenharmony_ci
801bf215546Sopenharmony_ci       /* For creating temporaries */
802bf215546Sopenharmony_ci       unsigned ssa_alloc;
803bf215546Sopenharmony_ci       unsigned reg_alloc;
804bf215546Sopenharmony_ci
805bf215546Sopenharmony_ci       /* Mask of UBOs that need to be uploaded */
806bf215546Sopenharmony_ci       uint32_t ubo_mask;
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci       /* During instruction selection, map from vector bi_index to its scalar
809bf215546Sopenharmony_ci        * components, populated by a split.
810bf215546Sopenharmony_ci        */
811bf215546Sopenharmony_ci       struct hash_table_u64 *allocated_vec;
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci       /* Stats for shader-db */
814bf215546Sopenharmony_ci       unsigned instruction_count;
815bf215546Sopenharmony_ci       unsigned loop_count;
816bf215546Sopenharmony_ci       unsigned spills;
817bf215546Sopenharmony_ci       unsigned fills;
818bf215546Sopenharmony_ci} bi_context;
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_cistatic inline void
821bf215546Sopenharmony_cibi_remove_instruction(bi_instr *ins)
822bf215546Sopenharmony_ci{
823bf215546Sopenharmony_ci        list_del(&ins->link);
824bf215546Sopenharmony_ci}
825bf215546Sopenharmony_ci
826bf215546Sopenharmony_cienum bir_fau {
827bf215546Sopenharmony_ci        BIR_FAU_ZERO = 0,
828bf215546Sopenharmony_ci        BIR_FAU_LANE_ID = 1,
829bf215546Sopenharmony_ci        BIR_FAU_WARP_ID = 2,
830bf215546Sopenharmony_ci        BIR_FAU_CORE_ID = 3,
831bf215546Sopenharmony_ci        BIR_FAU_FB_EXTENT = 4,
832bf215546Sopenharmony_ci        BIR_FAU_ATEST_PARAM = 5,
833bf215546Sopenharmony_ci        BIR_FAU_SAMPLE_POS_ARRAY = 6,
834bf215546Sopenharmony_ci        BIR_FAU_BLEND_0 = 8,
835bf215546Sopenharmony_ci        /* blend descs 1 - 7 */
836bf215546Sopenharmony_ci        BIR_FAU_TYPE_MASK = 15,
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_ci        /* Valhall only */
839bf215546Sopenharmony_ci        BIR_FAU_TLS_PTR = 16,
840bf215546Sopenharmony_ci        BIR_FAU_WLS_PTR = 17,
841bf215546Sopenharmony_ci        BIR_FAU_PROGRAM_COUNTER = 18,
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci        BIR_FAU_UNIFORM = (1 << 7),
844bf215546Sopenharmony_ci        /* Look up table on Valhall */
845bf215546Sopenharmony_ci        BIR_FAU_IMMEDIATE = (1 << 8),
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci};
848bf215546Sopenharmony_ci
849bf215546Sopenharmony_cistatic inline bi_index
850bf215546Sopenharmony_cibi_fau(enum bir_fau value, bool hi)
851bf215546Sopenharmony_ci{
852bf215546Sopenharmony_ci        return (bi_index) {
853bf215546Sopenharmony_ci                .value = value,
854bf215546Sopenharmony_ci                .swizzle = BI_SWIZZLE_H01,
855bf215546Sopenharmony_ci                .offset = hi ? 1u : 0u,
856bf215546Sopenharmony_ci                .type = BI_INDEX_FAU,
857bf215546Sopenharmony_ci        };
858bf215546Sopenharmony_ci}
859bf215546Sopenharmony_ci
860bf215546Sopenharmony_ci/*
861bf215546Sopenharmony_ci * Builder for Valhall LUT entries. Generally, constants are modeled with
862bf215546Sopenharmony_ci * BI_INDEX_IMMEDIATE in the intermediate representation. This helper is only
863bf215546Sopenharmony_ci * necessary for passes running after lowering constants, as well as when
864bf215546Sopenharmony_ci * lowering constants.
865bf215546Sopenharmony_ci *
866bf215546Sopenharmony_ci */
867bf215546Sopenharmony_cistatic inline bi_index
868bf215546Sopenharmony_civa_lut(unsigned index)
869bf215546Sopenharmony_ci{
870bf215546Sopenharmony_ci        return bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | (index >> 1)),
871bf215546Sopenharmony_ci                      index & 1);
872bf215546Sopenharmony_ci}
873bf215546Sopenharmony_ci
874bf215546Sopenharmony_ci/*
875bf215546Sopenharmony_ci * va_lut_zero is like bi_zero but only works on Valhall. It is intended for
876bf215546Sopenharmony_ci * use by late passes that run after constants are lowered, specifically
877bf215546Sopenharmony_ci * register allocation. bi_zero() is preferred where possible.
878bf215546Sopenharmony_ci */
879bf215546Sopenharmony_cistatic inline bi_index
880bf215546Sopenharmony_civa_zero_lut()
881bf215546Sopenharmony_ci{
882bf215546Sopenharmony_ci        return va_lut(0);
883bf215546Sopenharmony_ci}
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_cistatic inline unsigned
886bf215546Sopenharmony_cibi_max_temp(bi_context *ctx)
887bf215546Sopenharmony_ci{
888bf215546Sopenharmony_ci        return (MAX2(ctx->reg_alloc, ctx->ssa_alloc) + 2) << 1;
889bf215546Sopenharmony_ci}
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_cistatic inline bi_index
892bf215546Sopenharmony_cibi_temp(bi_context *ctx)
893bf215546Sopenharmony_ci{
894bf215546Sopenharmony_ci        return bi_get_index(ctx->ssa_alloc++, false, 0);
895bf215546Sopenharmony_ci}
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_cistatic inline bi_index
898bf215546Sopenharmony_cibi_temp_reg(bi_context *ctx)
899bf215546Sopenharmony_ci{
900bf215546Sopenharmony_ci        return bi_get_index(ctx->reg_alloc++, true, 0);
901bf215546Sopenharmony_ci}
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci/* Inline constants automatically, will be lowered out by bi_lower_fau where a
904bf215546Sopenharmony_ci * constant is not allowed. load_const_to_scalar gaurantees that this makes
905bf215546Sopenharmony_ci * sense */
906bf215546Sopenharmony_ci
907bf215546Sopenharmony_cistatic inline bi_index
908bf215546Sopenharmony_cibi_src_index(nir_src *src)
909bf215546Sopenharmony_ci{
910bf215546Sopenharmony_ci        if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32)
911bf215546Sopenharmony_ci                return bi_imm_u32(nir_src_as_uint(*src));
912bf215546Sopenharmony_ci        else if (src->is_ssa)
913bf215546Sopenharmony_ci                return bi_get_index(src->ssa->index, false, 0);
914bf215546Sopenharmony_ci        else {
915bf215546Sopenharmony_ci                assert(!src->reg.indirect);
916bf215546Sopenharmony_ci                return bi_get_index(src->reg.reg->index, true, 0);
917bf215546Sopenharmony_ci        }
918bf215546Sopenharmony_ci}
919bf215546Sopenharmony_ci
920bf215546Sopenharmony_cistatic inline bi_index
921bf215546Sopenharmony_cibi_dest_index(nir_dest *dst)
922bf215546Sopenharmony_ci{
923bf215546Sopenharmony_ci        if (dst->is_ssa)
924bf215546Sopenharmony_ci                return bi_get_index(dst->ssa.index, false, 0);
925bf215546Sopenharmony_ci        else {
926bf215546Sopenharmony_ci                assert(!dst->reg.indirect);
927bf215546Sopenharmony_ci                return bi_get_index(dst->reg.reg->index, true, 0);
928bf215546Sopenharmony_ci        }
929bf215546Sopenharmony_ci}
930bf215546Sopenharmony_ci
931bf215546Sopenharmony_cistatic inline unsigned
932bf215546Sopenharmony_cibi_get_node(bi_index index)
933bf215546Sopenharmony_ci{
934bf215546Sopenharmony_ci        if (bi_is_null(index) || index.type != BI_INDEX_NORMAL)
935bf215546Sopenharmony_ci                return ~0;
936bf215546Sopenharmony_ci        else
937bf215546Sopenharmony_ci                return (index.value << 1) | index.reg;
938bf215546Sopenharmony_ci}
939bf215546Sopenharmony_ci
940bf215546Sopenharmony_cistatic inline bi_index
941bf215546Sopenharmony_cibi_node_to_index(unsigned node, unsigned node_count)
942bf215546Sopenharmony_ci{
943bf215546Sopenharmony_ci        assert(node < node_count);
944bf215546Sopenharmony_ci        assert(node_count < ~0u);
945bf215546Sopenharmony_ci
946bf215546Sopenharmony_ci        return bi_get_index(node >> 1, node & PAN_IS_REG, 0);
947bf215546Sopenharmony_ci}
948bf215546Sopenharmony_ci
949bf215546Sopenharmony_ci/* Iterators for Bifrost IR */
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_ci#define bi_foreach_block(ctx, v) \
952bf215546Sopenharmony_ci        list_for_each_entry(bi_block, v, &ctx->blocks, link)
953bf215546Sopenharmony_ci
954bf215546Sopenharmony_ci#define bi_foreach_block_rev(ctx, v) \
955bf215546Sopenharmony_ci        list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
956bf215546Sopenharmony_ci
957bf215546Sopenharmony_ci#define bi_foreach_block_from(ctx, from, v) \
958bf215546Sopenharmony_ci        list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
959bf215546Sopenharmony_ci
960bf215546Sopenharmony_ci#define bi_foreach_block_from_rev(ctx, from, v) \
961bf215546Sopenharmony_ci        list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
962bf215546Sopenharmony_ci
963bf215546Sopenharmony_ci#define bi_foreach_instr_in_block(block, v) \
964bf215546Sopenharmony_ci        list_for_each_entry(bi_instr, v, &(block)->instructions, link)
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_rev(block, v) \
967bf215546Sopenharmony_ci        list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_safe(block, v) \
970bf215546Sopenharmony_ci        list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
971bf215546Sopenharmony_ci
972bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_safe_rev(block, v) \
973bf215546Sopenharmony_ci        list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_from(block, v, from) \
976bf215546Sopenharmony_ci        list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
977bf215546Sopenharmony_ci
978bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_from_rev(block, v, from) \
979bf215546Sopenharmony_ci        list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
980bf215546Sopenharmony_ci
981bf215546Sopenharmony_ci#define bi_foreach_clause_in_block(block, v) \
982bf215546Sopenharmony_ci        list_for_each_entry(bi_clause, v, &(block)->clauses, link)
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci#define bi_foreach_clause_in_block_rev(block, v) \
985bf215546Sopenharmony_ci        list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_ci#define bi_foreach_clause_in_block_safe(block, v) \
988bf215546Sopenharmony_ci        list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
989bf215546Sopenharmony_ci
990bf215546Sopenharmony_ci#define bi_foreach_clause_in_block_from(block, v, from) \
991bf215546Sopenharmony_ci        list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
992bf215546Sopenharmony_ci
993bf215546Sopenharmony_ci#define bi_foreach_clause_in_block_from_rev(block, v, from) \
994bf215546Sopenharmony_ci        list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci#define bi_foreach_instr_global(ctx, v) \
997bf215546Sopenharmony_ci        bi_foreach_block(ctx, v_block) \
998bf215546Sopenharmony_ci                bi_foreach_instr_in_block(v_block, v)
999bf215546Sopenharmony_ci
1000bf215546Sopenharmony_ci#define bi_foreach_instr_global_rev(ctx, v) \
1001bf215546Sopenharmony_ci        bi_foreach_block_rev(ctx, v_block) \
1002bf215546Sopenharmony_ci                bi_foreach_instr_in_block_rev(v_block, v)
1003bf215546Sopenharmony_ci
1004bf215546Sopenharmony_ci#define bi_foreach_instr_global_safe(ctx, v) \
1005bf215546Sopenharmony_ci        bi_foreach_block(ctx, v_block) \
1006bf215546Sopenharmony_ci                bi_foreach_instr_in_block_safe(v_block, v)
1007bf215546Sopenharmony_ci
1008bf215546Sopenharmony_ci#define bi_foreach_instr_global_rev_safe(ctx, v) \
1009bf215546Sopenharmony_ci        bi_foreach_block_rev(ctx, v_block) \
1010bf215546Sopenharmony_ci                bi_foreach_instr_in_block_rev_safe(v_block, v)
1011bf215546Sopenharmony_ci
1012bf215546Sopenharmony_ci#define bi_foreach_instr_in_tuple(tuple, v) \
1013bf215546Sopenharmony_ci        for (bi_instr *v = (tuple)->fma ?: (tuple)->add; \
1014bf215546Sopenharmony_ci                        v != NULL; \
1015bf215546Sopenharmony_ci                        v = (v == (tuple)->add) ? NULL : (tuple)->add)
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_ci#define bi_foreach_successor(blk, v) \
1018bf215546Sopenharmony_ci        bi_block *v; \
1019bf215546Sopenharmony_ci        bi_block **_v; \
1020bf215546Sopenharmony_ci        for (_v = &blk->successors[0], \
1021bf215546Sopenharmony_ci                v = *_v; \
1022bf215546Sopenharmony_ci                v != NULL && _v < &blk->successors[2]; \
1023bf215546Sopenharmony_ci                _v++, v = *_v) \
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci#define bi_foreach_predecessor(blk, v) \
1026bf215546Sopenharmony_ci        util_dynarray_foreach(&(blk)->predecessors, bi_block *, v)
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_ci#define bi_foreach_src(ins, v) \
1029bf215546Sopenharmony_ci        for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
1030bf215546Sopenharmony_ci
1031bf215546Sopenharmony_ci#define bi_foreach_dest(ins, v) \
1032bf215546Sopenharmony_ci        for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)
1033bf215546Sopenharmony_ci
1034bf215546Sopenharmony_ci#define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \
1035bf215546Sopenharmony_ci        bi_foreach_instr_in_tuple(tuple, ins) \
1036bf215546Sopenharmony_ci                bi_foreach_src(ins, s)
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_cistatic inline bi_instr *
1039bf215546Sopenharmony_cibi_prev_op(bi_instr *ins)
1040bf215546Sopenharmony_ci{
1041bf215546Sopenharmony_ci        return list_last_entry(&(ins->link), bi_instr, link);
1042bf215546Sopenharmony_ci}
1043bf215546Sopenharmony_ci
1044bf215546Sopenharmony_cistatic inline bi_instr *
1045bf215546Sopenharmony_cibi_next_op(bi_instr *ins)
1046bf215546Sopenharmony_ci{
1047bf215546Sopenharmony_ci        return list_first_entry(&(ins->link), bi_instr, link);
1048bf215546Sopenharmony_ci}
1049bf215546Sopenharmony_ci
1050bf215546Sopenharmony_cistatic inline bi_block *
1051bf215546Sopenharmony_cibi_next_block(bi_block *block)
1052bf215546Sopenharmony_ci{
1053bf215546Sopenharmony_ci        return list_first_entry(&(block->link), bi_block, link);
1054bf215546Sopenharmony_ci}
1055bf215546Sopenharmony_ci
1056bf215546Sopenharmony_cistatic inline bi_block *
1057bf215546Sopenharmony_cibi_entry_block(bi_context *ctx)
1058bf215546Sopenharmony_ci{
1059bf215546Sopenharmony_ci        return list_first_entry(&ctx->blocks, bi_block, link);
1060bf215546Sopenharmony_ci}
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_ci/* BIR manipulation */
1063bf215546Sopenharmony_ci
1064bf215546Sopenharmony_cibool bi_has_arg(const bi_instr *ins, bi_index arg);
1065bf215546Sopenharmony_ciunsigned bi_count_read_registers(const bi_instr *ins, unsigned src);
1066bf215546Sopenharmony_ciunsigned bi_count_write_registers(const bi_instr *ins, unsigned dest);
1067bf215546Sopenharmony_cibool bi_is_regfmt_16(enum bi_register_format fmt);
1068bf215546Sopenharmony_ciunsigned bi_writemask(const bi_instr *ins, unsigned dest);
1069bf215546Sopenharmony_cibi_clause * bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
1070bf215546Sopenharmony_cibool bi_side_effects(const bi_instr *I);
1071bf215546Sopenharmony_cibool bi_reconverge_branches(bi_block *block);
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_cibool bi_can_replace_with_csel(bi_instr *I);
1074bf215546Sopenharmony_civoid bi_replace_mux_with_csel(bi_instr *I, bool must_sign);
1075bf215546Sopenharmony_ci
1076bf215546Sopenharmony_civoid bi_print_instr(const bi_instr *I, FILE *fp);
1077bf215546Sopenharmony_civoid bi_print_slots(bi_registers *regs, FILE *fp);
1078bf215546Sopenharmony_civoid bi_print_tuple(bi_tuple *tuple, FILE *fp);
1079bf215546Sopenharmony_civoid bi_print_clause(bi_clause *clause, FILE *fp);
1080bf215546Sopenharmony_civoid bi_print_block(bi_block *block, FILE *fp);
1081bf215546Sopenharmony_civoid bi_print_shader(bi_context *ctx, FILE *fp);
1082bf215546Sopenharmony_ci
1083bf215546Sopenharmony_ci/* BIR passes */
1084bf215546Sopenharmony_ci
1085bf215546Sopenharmony_cibool bi_instr_uses_helpers(bi_instr *I);
1086bf215546Sopenharmony_cibool bi_block_terminates_helpers(bi_block *block);
1087bf215546Sopenharmony_civoid bi_analyze_helper_terminate(bi_context *ctx);
1088bf215546Sopenharmony_civoid bi_mark_clauses_td(bi_context *ctx);
1089bf215546Sopenharmony_ci
1090bf215546Sopenharmony_civoid bi_analyze_helper_requirements(bi_context *ctx);
1091bf215546Sopenharmony_civoid bi_opt_copy_prop(bi_context *ctx);
1092bf215546Sopenharmony_civoid bi_opt_cse(bi_context *ctx);
1093bf215546Sopenharmony_civoid bi_opt_mod_prop_forward(bi_context *ctx);
1094bf215546Sopenharmony_civoid bi_opt_mod_prop_backward(bi_context *ctx);
1095bf215546Sopenharmony_civoid bi_opt_dead_code_eliminate(bi_context *ctx);
1096bf215546Sopenharmony_civoid bi_opt_fuse_dual_texture(bi_context *ctx);
1097bf215546Sopenharmony_civoid bi_opt_dce_post_ra(bi_context *ctx);
1098bf215546Sopenharmony_civoid bi_opt_message_preload(bi_context *ctx);
1099bf215546Sopenharmony_civoid bi_opt_push_ubo(bi_context *ctx);
1100bf215546Sopenharmony_civoid bi_opt_reorder_push(bi_context *ctx);
1101bf215546Sopenharmony_civoid bi_lower_swizzle(bi_context *ctx);
1102bf215546Sopenharmony_civoid bi_lower_fau(bi_context *ctx);
1103bf215546Sopenharmony_civoid bi_assign_scoreboard(bi_context *ctx);
1104bf215546Sopenharmony_civoid bi_register_allocate(bi_context *ctx);
1105bf215546Sopenharmony_civoid va_optimize(bi_context *ctx);
1106bf215546Sopenharmony_civoid va_lower_split_64bit(bi_context *ctx);
1107bf215546Sopenharmony_ci
1108bf215546Sopenharmony_civoid bi_lower_opt_instruction(bi_instr *I);
1109bf215546Sopenharmony_ci
1110bf215546Sopenharmony_civoid bi_pressure_schedule(bi_context *ctx);
1111bf215546Sopenharmony_civoid bi_schedule(bi_context *ctx);
1112bf215546Sopenharmony_cibool bi_can_fma(bi_instr *ins);
1113bf215546Sopenharmony_cibool bi_can_add(bi_instr *ins);
1114bf215546Sopenharmony_cibool bi_must_message(bi_instr *ins);
1115bf215546Sopenharmony_cibool bi_reads_zero(bi_instr *ins);
1116bf215546Sopenharmony_cibool bi_reads_temps(bi_instr *ins, unsigned src);
1117bf215546Sopenharmony_cibool bi_reads_t(bi_instr *ins, unsigned src);
1118bf215546Sopenharmony_ci
1119bf215546Sopenharmony_ci#ifndef NDEBUG
1120bf215546Sopenharmony_cibool bi_validate_initialization(bi_context *ctx);
1121bf215546Sopenharmony_civoid bi_validate(bi_context *ctx, const char *after_str);
1122bf215546Sopenharmony_ci#else
1123bf215546Sopenharmony_cistatic inline bool bi_validate_initialization(UNUSED bi_context *ctx) { return true; }
1124bf215546Sopenharmony_cistatic inline void bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str) { return; }
1125bf215546Sopenharmony_ci#endif
1126bf215546Sopenharmony_ci
1127bf215546Sopenharmony_ciuint32_t bi_fold_constant(bi_instr *I, bool *unsupported);
1128bf215546Sopenharmony_cibool bi_opt_constant_fold(bi_context *ctx);
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_ci/* Liveness */
1131bf215546Sopenharmony_ci
1132bf215546Sopenharmony_civoid bi_compute_liveness(bi_context *ctx);
1133bf215546Sopenharmony_civoid bi_liveness_ins_update(uint8_t *live, bi_instr *ins, unsigned max);
1134bf215546Sopenharmony_ci
1135bf215546Sopenharmony_civoid bi_postra_liveness(bi_context *ctx);
1136bf215546Sopenharmony_ciuint64_t MUST_CHECK bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
1137bf215546Sopenharmony_ci
1138bf215546Sopenharmony_ci/* Layout */
1139bf215546Sopenharmony_ci
1140bf215546Sopenharmony_cisigned bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
1141bf215546Sopenharmony_cibool bi_ec0_packed(unsigned tuple_count);
1142bf215546Sopenharmony_ci
1143bf215546Sopenharmony_ci/* Check if there are no more instructions starting with a given block, this
1144bf215546Sopenharmony_ci * needs to recurse in case a shader ends with multiple empty blocks */
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_cistatic inline bool
1147bf215546Sopenharmony_cibi_is_terminal_block(bi_block *block)
1148bf215546Sopenharmony_ci{
1149bf215546Sopenharmony_ci        return (block == NULL) ||
1150bf215546Sopenharmony_ci                (list_is_empty(&block->instructions) &&
1151bf215546Sopenharmony_ci                 bi_is_terminal_block(block->successors[0]) &&
1152bf215546Sopenharmony_ci                 bi_is_terminal_block(block->successors[1]));
1153bf215546Sopenharmony_ci}
1154bf215546Sopenharmony_ci
1155bf215546Sopenharmony_ci/* Code emit */
1156bf215546Sopenharmony_ci
1157bf215546Sopenharmony_ci/* Returns the size of the final clause */
1158bf215546Sopenharmony_ciunsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);
1159bf215546Sopenharmony_civoid bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission);
1160bf215546Sopenharmony_ci
1161bf215546Sopenharmony_cistruct bi_packed_tuple {
1162bf215546Sopenharmony_ci        uint64_t lo;
1163bf215546Sopenharmony_ci        uint64_t hi;
1164bf215546Sopenharmony_ci};
1165bf215546Sopenharmony_ci
1166bf215546Sopenharmony_ciuint8_t bi_pack_literal(enum bi_clause_subword literal);
1167bf215546Sopenharmony_ci
1168bf215546Sopenharmony_ciuint8_t
1169bf215546Sopenharmony_cibi_pack_upper(enum bi_clause_subword upper,
1170bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
1171bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count);
1172bf215546Sopenharmony_ciuint64_t
1173bf215546Sopenharmony_cibi_pack_tuple_bits(enum bi_clause_subword idx,
1174bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
1175bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count,
1176bf215546Sopenharmony_ci                unsigned offset, unsigned nbits);
1177bf215546Sopenharmony_ci
1178bf215546Sopenharmony_ciuint8_t
1179bf215546Sopenharmony_cibi_pack_sync(enum bi_clause_subword t1,
1180bf215546Sopenharmony_ci             enum bi_clause_subword t2,
1181bf215546Sopenharmony_ci             enum bi_clause_subword t3,
1182bf215546Sopenharmony_ci             struct bi_packed_tuple *tuples,
1183bf215546Sopenharmony_ci             ASSERTED unsigned tuple_count,
1184bf215546Sopenharmony_ci             bool z);
1185bf215546Sopenharmony_ci
1186bf215546Sopenharmony_civoid
1187bf215546Sopenharmony_cibi_pack_format(struct util_dynarray *emission,
1188bf215546Sopenharmony_ci                unsigned index,
1189bf215546Sopenharmony_ci                struct bi_packed_tuple *tuples,
1190bf215546Sopenharmony_ci                ASSERTED unsigned tuple_count,
1191bf215546Sopenharmony_ci                uint64_t header, uint64_t ec0,
1192bf215546Sopenharmony_ci                unsigned m0, bool z);
1193bf215546Sopenharmony_ci
1194bf215546Sopenharmony_ciunsigned bi_pack_fma(bi_instr *I,
1195bf215546Sopenharmony_ci                enum bifrost_packed_src src0,
1196bf215546Sopenharmony_ci                enum bifrost_packed_src src1,
1197bf215546Sopenharmony_ci                enum bifrost_packed_src src2,
1198bf215546Sopenharmony_ci                enum bifrost_packed_src src3);
1199bf215546Sopenharmony_ciunsigned bi_pack_add(bi_instr *I,
1200bf215546Sopenharmony_ci                enum bifrost_packed_src src0,
1201bf215546Sopenharmony_ci                enum bifrost_packed_src src1,
1202bf215546Sopenharmony_ci                enum bifrost_packed_src src2,
1203bf215546Sopenharmony_ci                enum bifrost_packed_src src3);
1204bf215546Sopenharmony_ci
1205bf215546Sopenharmony_ci/* Like in NIR, for use with the builder */
1206bf215546Sopenharmony_ci
1207bf215546Sopenharmony_cienum bi_cursor_option {
1208bf215546Sopenharmony_ci    bi_cursor_after_block,
1209bf215546Sopenharmony_ci    bi_cursor_before_instr,
1210bf215546Sopenharmony_ci    bi_cursor_after_instr
1211bf215546Sopenharmony_ci};
1212bf215546Sopenharmony_ci
1213bf215546Sopenharmony_citypedef struct {
1214bf215546Sopenharmony_ci    enum bi_cursor_option option;
1215bf215546Sopenharmony_ci
1216bf215546Sopenharmony_ci    union {
1217bf215546Sopenharmony_ci        bi_block *block;
1218bf215546Sopenharmony_ci        bi_instr *instr;
1219bf215546Sopenharmony_ci    };
1220bf215546Sopenharmony_ci} bi_cursor;
1221bf215546Sopenharmony_ci
1222bf215546Sopenharmony_cistatic inline bi_cursor
1223bf215546Sopenharmony_cibi_after_block(bi_block *block)
1224bf215546Sopenharmony_ci{
1225bf215546Sopenharmony_ci    return (bi_cursor) {
1226bf215546Sopenharmony_ci        .option = bi_cursor_after_block,
1227bf215546Sopenharmony_ci        .block = block
1228bf215546Sopenharmony_ci    };
1229bf215546Sopenharmony_ci}
1230bf215546Sopenharmony_ci
1231bf215546Sopenharmony_cistatic inline bi_cursor
1232bf215546Sopenharmony_cibi_before_instr(bi_instr *instr)
1233bf215546Sopenharmony_ci{
1234bf215546Sopenharmony_ci    return (bi_cursor) {
1235bf215546Sopenharmony_ci        .option = bi_cursor_before_instr,
1236bf215546Sopenharmony_ci        .instr = instr
1237bf215546Sopenharmony_ci    };
1238bf215546Sopenharmony_ci}
1239bf215546Sopenharmony_ci
1240bf215546Sopenharmony_cistatic inline bi_cursor
1241bf215546Sopenharmony_cibi_after_instr(bi_instr *instr)
1242bf215546Sopenharmony_ci{
1243bf215546Sopenharmony_ci    return (bi_cursor) {
1244bf215546Sopenharmony_ci        .option = bi_cursor_after_instr,
1245bf215546Sopenharmony_ci        .instr = instr
1246bf215546Sopenharmony_ci    };
1247bf215546Sopenharmony_ci}
1248bf215546Sopenharmony_ci
1249bf215546Sopenharmony_cistatic inline bi_cursor
1250bf215546Sopenharmony_cibi_before_nonempty_block(bi_block *block)
1251bf215546Sopenharmony_ci{
1252bf215546Sopenharmony_ci        bi_instr *I = list_first_entry(&block->instructions, bi_instr, link);
1253bf215546Sopenharmony_ci        assert(I != NULL);
1254bf215546Sopenharmony_ci
1255bf215546Sopenharmony_ci        return bi_before_instr(I);
1256bf215546Sopenharmony_ci}
1257bf215546Sopenharmony_ci
1258bf215546Sopenharmony_cistatic inline bi_cursor
1259bf215546Sopenharmony_cibi_before_block(bi_block *block)
1260bf215546Sopenharmony_ci{
1261bf215546Sopenharmony_ci        if (list_is_empty(&block->instructions))
1262bf215546Sopenharmony_ci                return bi_after_block(block);
1263bf215546Sopenharmony_ci        else
1264bf215546Sopenharmony_ci                return bi_before_nonempty_block(block);
1265bf215546Sopenharmony_ci}
1266bf215546Sopenharmony_ci
1267bf215546Sopenharmony_ci/* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,
1268bf215546Sopenharmony_ci * in which case there must exist a nonempty penultimate tuple */
1269bf215546Sopenharmony_ci
1270bf215546Sopenharmony_ciATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
1271bf215546Sopenharmony_cibi_first_instr_in_tuple(bi_tuple *tuple)
1272bf215546Sopenharmony_ci{
1273bf215546Sopenharmony_ci        bi_instr *instr = tuple->fma ?: tuple->add;
1274bf215546Sopenharmony_ci        assert(instr != NULL);
1275bf215546Sopenharmony_ci        return instr;
1276bf215546Sopenharmony_ci}
1277bf215546Sopenharmony_ci
1278bf215546Sopenharmony_ciATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
1279bf215546Sopenharmony_cibi_first_instr_in_clause(bi_clause *clause)
1280bf215546Sopenharmony_ci{
1281bf215546Sopenharmony_ci        return bi_first_instr_in_tuple(&clause->tuples[0]);
1282bf215546Sopenharmony_ci}
1283bf215546Sopenharmony_ci
1284bf215546Sopenharmony_ciATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
1285bf215546Sopenharmony_cibi_last_instr_in_clause(bi_clause *clause)
1286bf215546Sopenharmony_ci{
1287bf215546Sopenharmony_ci        bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
1288bf215546Sopenharmony_ci        bi_instr *instr = tuple.add ?: tuple.fma;
1289bf215546Sopenharmony_ci
1290bf215546Sopenharmony_ci        if (!instr) {
1291bf215546Sopenharmony_ci                assert(clause->tuple_count >= 2);
1292bf215546Sopenharmony_ci                tuple = clause->tuples[clause->tuple_count - 2];
1293bf215546Sopenharmony_ci                instr = tuple.add ?: tuple.fma;
1294bf215546Sopenharmony_ci        }
1295bf215546Sopenharmony_ci
1296bf215546Sopenharmony_ci        assert(instr != NULL);
1297bf215546Sopenharmony_ci        return instr;
1298bf215546Sopenharmony_ci}
1299bf215546Sopenharmony_ci
1300bf215546Sopenharmony_ci/* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
1301bf215546Sopenharmony_ci * (end) of the clause and adding a condition for the clause boundary */
1302bf215546Sopenharmony_ci
1303bf215546Sopenharmony_ci#define bi_foreach_instr_in_clause(block, clause, pos) \
1304bf215546Sopenharmony_ci   for (bi_instr *pos = list_entry(bi_first_instr_in_clause(clause), bi_instr, link); \
1305bf215546Sopenharmony_ci	(&pos->link != &(block)->instructions) \
1306bf215546Sopenharmony_ci                && (pos != bi_next_op(bi_last_instr_in_clause(clause))); \
1307bf215546Sopenharmony_ci	pos = list_entry(pos->link.next, bi_instr, link))
1308bf215546Sopenharmony_ci
1309bf215546Sopenharmony_ci#define bi_foreach_instr_in_clause_rev(block, clause, pos) \
1310bf215546Sopenharmony_ci   for (bi_instr *pos = list_entry(bi_last_instr_in_clause(clause), bi_instr, link); \
1311bf215546Sopenharmony_ci	(&pos->link != &(block)->instructions) \
1312bf215546Sopenharmony_ci	        && pos != bi_prev_op(bi_first_instr_in_clause(clause)); \
1313bf215546Sopenharmony_ci	pos = list_entry(pos->link.prev, bi_instr, link))
1314bf215546Sopenharmony_ci
1315bf215546Sopenharmony_cistatic inline bi_cursor
1316bf215546Sopenharmony_cibi_before_clause(bi_clause *clause)
1317bf215546Sopenharmony_ci{
1318bf215546Sopenharmony_ci    return bi_before_instr(bi_first_instr_in_clause(clause));
1319bf215546Sopenharmony_ci}
1320bf215546Sopenharmony_ci
1321bf215546Sopenharmony_cistatic inline bi_cursor
1322bf215546Sopenharmony_cibi_before_tuple(bi_tuple *tuple)
1323bf215546Sopenharmony_ci{
1324bf215546Sopenharmony_ci    return bi_before_instr(bi_first_instr_in_tuple(tuple));
1325bf215546Sopenharmony_ci}
1326bf215546Sopenharmony_ci
1327bf215546Sopenharmony_cistatic inline bi_cursor
1328bf215546Sopenharmony_cibi_after_clause(bi_clause *clause)
1329bf215546Sopenharmony_ci{
1330bf215546Sopenharmony_ci    return bi_after_instr(bi_last_instr_in_clause(clause));
1331bf215546Sopenharmony_ci}
1332bf215546Sopenharmony_ci
1333bf215546Sopenharmony_ci/* IR builder in terms of cursor infrastructure */
1334bf215546Sopenharmony_ci
1335bf215546Sopenharmony_citypedef struct {
1336bf215546Sopenharmony_ci    bi_context *shader;
1337bf215546Sopenharmony_ci    bi_cursor cursor;
1338bf215546Sopenharmony_ci} bi_builder;
1339bf215546Sopenharmony_ci
1340bf215546Sopenharmony_cistatic inline bi_builder
1341bf215546Sopenharmony_cibi_init_builder(bi_context *ctx, bi_cursor cursor)
1342bf215546Sopenharmony_ci{
1343bf215546Sopenharmony_ci        return (bi_builder) {
1344bf215546Sopenharmony_ci                .shader = ctx,
1345bf215546Sopenharmony_ci                .cursor = cursor
1346bf215546Sopenharmony_ci        };
1347bf215546Sopenharmony_ci}
1348bf215546Sopenharmony_ci
1349bf215546Sopenharmony_ci/* Insert an instruction at the cursor and move the cursor */
1350bf215546Sopenharmony_ci
1351bf215546Sopenharmony_cistatic inline void
1352bf215546Sopenharmony_cibi_builder_insert(bi_cursor *cursor, bi_instr *I)
1353bf215546Sopenharmony_ci{
1354bf215546Sopenharmony_ci    switch (cursor->option) {
1355bf215546Sopenharmony_ci    case bi_cursor_after_instr:
1356bf215546Sopenharmony_ci        list_add(&I->link, &cursor->instr->link);
1357bf215546Sopenharmony_ci        cursor->instr = I;
1358bf215546Sopenharmony_ci        return;
1359bf215546Sopenharmony_ci
1360bf215546Sopenharmony_ci    case bi_cursor_after_block:
1361bf215546Sopenharmony_ci        list_addtail(&I->link, &cursor->block->instructions);
1362bf215546Sopenharmony_ci        cursor->option = bi_cursor_after_instr;
1363bf215546Sopenharmony_ci        cursor->instr = I;
1364bf215546Sopenharmony_ci        return;
1365bf215546Sopenharmony_ci
1366bf215546Sopenharmony_ci    case bi_cursor_before_instr:
1367bf215546Sopenharmony_ci        list_addtail(&I->link, &cursor->instr->link);
1368bf215546Sopenharmony_ci        cursor->option = bi_cursor_after_instr;
1369bf215546Sopenharmony_ci        cursor->instr = I;
1370bf215546Sopenharmony_ci        return;
1371bf215546Sopenharmony_ci    }
1372bf215546Sopenharmony_ci
1373bf215546Sopenharmony_ci    unreachable("Invalid cursor option");
1374bf215546Sopenharmony_ci}
1375bf215546Sopenharmony_ci
1376bf215546Sopenharmony_ci/* Read back power-efficent garbage, TODO maybe merge with null? */
1377bf215546Sopenharmony_cistatic inline bi_index
1378bf215546Sopenharmony_cibi_dontcare(bi_builder *b)
1379bf215546Sopenharmony_ci{
1380bf215546Sopenharmony_ci        if (b->shader->arch >= 9)
1381bf215546Sopenharmony_ci               return bi_zero();
1382bf215546Sopenharmony_ci        else
1383bf215546Sopenharmony_ci               return bi_passthrough(BIFROST_SRC_FAU_HI);
1384bf215546Sopenharmony_ci}
1385bf215546Sopenharmony_ci
1386bf215546Sopenharmony_ci#define bi_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
1387bf215546Sopenharmony_ci#define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
1388bf215546Sopenharmony_ci#define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
1389bf215546Sopenharmony_ci#define bi_worklist_peek_head(w) u_worklist_peek_head(w, bi_block, index)
1390bf215546Sopenharmony_ci#define bi_worklist_pop_head(w)  u_worklist_pop_head( w, bi_block, index)
1391bf215546Sopenharmony_ci#define bi_worklist_peek_tail(w) u_worklist_peek_tail(w, bi_block, index)
1392bf215546Sopenharmony_ci#define bi_worklist_pop_tail(w)  u_worklist_pop_tail( w, bi_block, index)
1393bf215546Sopenharmony_ci
1394bf215546Sopenharmony_ci/* NIR passes */
1395bf215546Sopenharmony_ci
1396bf215546Sopenharmony_cibool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);
1397bf215546Sopenharmony_ci
1398bf215546Sopenharmony_ci#ifdef __cplusplus
1399bf215546Sopenharmony_ci} /* extern C */
1400bf215546Sopenharmony_ci#endif
1401bf215546Sopenharmony_ci
1402bf215546Sopenharmony_ci#endif
1403