1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#ifndef IR3_H_
25bf215546Sopenharmony_ci#define IR3_H_
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include <stdbool.h>
28bf215546Sopenharmony_ci#include <stdint.h>
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "compiler/shader_enums.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#include "util/bitscan.h"
33bf215546Sopenharmony_ci#include "util/list.h"
34bf215546Sopenharmony_ci#include "util/set.h"
35bf215546Sopenharmony_ci#include "util/u_debug.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci#include "instr-a3xx.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci/* low level intermediate representation of an adreno shader program */
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_cistruct ir3_compiler;
42bf215546Sopenharmony_cistruct ir3;
43bf215546Sopenharmony_cistruct ir3_instruction;
44bf215546Sopenharmony_cistruct ir3_block;
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_cistruct ir3_info {
47bf215546Sopenharmony_ci   void *data; /* used internally in ir3 assembler */
48bf215546Sopenharmony_ci   /* Size in bytes of the shader binary, including NIR constants and
49bf215546Sopenharmony_ci    * padding
50bf215546Sopenharmony_ci    */
51bf215546Sopenharmony_ci   uint32_t size;
52bf215546Sopenharmony_ci   /* byte offset from start of the shader to the NIR constant data. */
53bf215546Sopenharmony_ci   uint32_t constant_data_offset;
54bf215546Sopenharmony_ci   /* Size in dwords of the instructions. */
55bf215546Sopenharmony_ci   uint16_t sizedwords;
56bf215546Sopenharmony_ci   uint16_t instrs_count; /* expanded to account for rpt's */
57bf215546Sopenharmony_ci   uint16_t nops_count;   /* # of nop instructions, including nopN */
58bf215546Sopenharmony_ci   uint16_t mov_count;
59bf215546Sopenharmony_ci   uint16_t cov_count;
60bf215546Sopenharmony_ci   uint16_t stp_count;
61bf215546Sopenharmony_ci   uint16_t ldp_count;
62bf215546Sopenharmony_ci   /* NOTE: max_reg, etc, does not include registers not touched
63bf215546Sopenharmony_ci    * by the shader (ie. vertex fetched via VFD_DECODE but not
64bf215546Sopenharmony_ci    * touched by shader)
65bf215546Sopenharmony_ci    */
66bf215546Sopenharmony_ci   int8_t max_reg; /* highest GPR # used by shader */
67bf215546Sopenharmony_ci   int8_t max_half_reg;
68bf215546Sopenharmony_ci   int16_t max_const;
69bf215546Sopenharmony_ci   /* This is the maximum # of waves that can executed at once in one core,
70bf215546Sopenharmony_ci    * assuming that they are all executing this shader.
71bf215546Sopenharmony_ci    */
72bf215546Sopenharmony_ci   int8_t max_waves;
73bf215546Sopenharmony_ci   bool double_threadsize;
74bf215546Sopenharmony_ci   bool multi_dword_ldp_stp;
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci   /* number of sync bits: */
77bf215546Sopenharmony_ci   uint16_t ss, sy;
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   /* estimate of number of cycles stalled on (ss) */
80bf215546Sopenharmony_ci   uint16_t sstall;
81bf215546Sopenharmony_ci   /* estimate of number of cycles stalled on (sy) */
82bf215546Sopenharmony_ci   uint16_t systall;
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci   uint16_t last_baryf; /* instruction # of last varying fetch */
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   /* Number of instructions of a given category: */
87bf215546Sopenharmony_ci   uint16_t instrs_per_cat[8];
88bf215546Sopenharmony_ci};
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_cistruct ir3_merge_set {
91bf215546Sopenharmony_ci   uint16_t preferred_reg;
92bf215546Sopenharmony_ci   uint16_t size;
93bf215546Sopenharmony_ci   uint16_t alignment;
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   unsigned interval_start;
96bf215546Sopenharmony_ci   unsigned spill_slot;
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   unsigned regs_count;
99bf215546Sopenharmony_ci   struct ir3_register **regs;
100bf215546Sopenharmony_ci};
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_cistruct ir3_register {
103bf215546Sopenharmony_ci   enum {
104bf215546Sopenharmony_ci      IR3_REG_CONST = 0x001,
105bf215546Sopenharmony_ci      IR3_REG_IMMED = 0x002,
106bf215546Sopenharmony_ci      IR3_REG_HALF = 0x004,
107bf215546Sopenharmony_ci      /* Shared registers have the same value for all threads when read.
108bf215546Sopenharmony_ci       * They can only be written when one thread is active (that is, inside
109bf215546Sopenharmony_ci       * a "getone" block).
110bf215546Sopenharmony_ci       */
111bf215546Sopenharmony_ci      IR3_REG_SHARED = 0x008,
112bf215546Sopenharmony_ci      IR3_REG_RELATIV = 0x010,
113bf215546Sopenharmony_ci      IR3_REG_R = 0x020,
114bf215546Sopenharmony_ci      /* Most instructions, it seems, can do float abs/neg but not
115bf215546Sopenharmony_ci       * integer.  The CP pass needs to know what is intended (int or
116bf215546Sopenharmony_ci       * float) in order to do the right thing.  For this reason the
117bf215546Sopenharmony_ci       * abs/neg flags are split out into float and int variants.  In
118bf215546Sopenharmony_ci       * addition, .b (bitwise) operations, the negate is actually a
119bf215546Sopenharmony_ci       * bitwise not, so split that out into a new flag to make it
120bf215546Sopenharmony_ci       * more clear.
121bf215546Sopenharmony_ci       */
122bf215546Sopenharmony_ci      IR3_REG_FNEG = 0x040,
123bf215546Sopenharmony_ci      IR3_REG_FABS = 0x080,
124bf215546Sopenharmony_ci      IR3_REG_SNEG = 0x100,
125bf215546Sopenharmony_ci      IR3_REG_SABS = 0x200,
126bf215546Sopenharmony_ci      IR3_REG_BNOT = 0x400,
127bf215546Sopenharmony_ci      /* (ei) flag, end-input?  Set on last bary, presumably to signal
128bf215546Sopenharmony_ci       * that the shader needs no more input:
129bf215546Sopenharmony_ci       *
130bf215546Sopenharmony_ci       * Note: Has different meaning on other instructions like add.s/u
131bf215546Sopenharmony_ci       */
132bf215546Sopenharmony_ci      IR3_REG_EI = 0x2000,
133bf215546Sopenharmony_ci      /* meta-flags, for intermediate stages of IR, ie.
134bf215546Sopenharmony_ci       * before register assignment is done:
135bf215546Sopenharmony_ci       */
136bf215546Sopenharmony_ci      IR3_REG_SSA = 0x4000, /* 'def' is ptr to assigning destination */
137bf215546Sopenharmony_ci      IR3_REG_ARRAY = 0x8000,
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci      /* Set on a use whenever the SSA value becomes dead after the current
140bf215546Sopenharmony_ci       * instruction.
141bf215546Sopenharmony_ci       */
142bf215546Sopenharmony_ci      IR3_REG_KILL = 0x10000,
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci      /* Similar to IR3_REG_KILL, except that if there are multiple uses of the
145bf215546Sopenharmony_ci       * same SSA value in a single instruction, this is only set on the first
146bf215546Sopenharmony_ci       * use.
147bf215546Sopenharmony_ci       */
148bf215546Sopenharmony_ci      IR3_REG_FIRST_KILL = 0x20000,
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci      /* Set when a destination doesn't have any uses and is dead immediately
151bf215546Sopenharmony_ci       * after the instruction. This can happen even after optimizations for
152bf215546Sopenharmony_ci       * corner cases such as destinations of atomic instructions.
153bf215546Sopenharmony_ci       */
154bf215546Sopenharmony_ci      IR3_REG_UNUSED = 0x40000,
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci      /* "Early-clobber" on a destination means that the destination is
157bf215546Sopenharmony_ci       * (potentially) written before any sources are read and therefore
158bf215546Sopenharmony_ci       * interferes with the sources of the instruction.
159bf215546Sopenharmony_ci       */
160bf215546Sopenharmony_ci      IR3_REG_EARLY_CLOBBER = 0x80000,
161bf215546Sopenharmony_ci   } flags;
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   unsigned name;
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci   /* used for cat5 instructions, but also for internal/IR level
166bf215546Sopenharmony_ci    * tracking of what registers are read/written by an instruction.
167bf215546Sopenharmony_ci    * wrmask may be a bad name since it is used to represent both
168bf215546Sopenharmony_ci    * src and dst that touch multiple adjacent registers.
169bf215546Sopenharmony_ci    */
170bf215546Sopenharmony_ci   unsigned wrmask : 16; /* up to vec16 */
171bf215546Sopenharmony_ci
172bf215546Sopenharmony_ci   /* for relative addressing, 32bits for array size is too small,
173bf215546Sopenharmony_ci    * but otoh we don't need to deal with disjoint sets, so instead
174bf215546Sopenharmony_ci    * use a simple size field (number of scalar components).
175bf215546Sopenharmony_ci    *
176bf215546Sopenharmony_ci    * Note the size field isn't important for relative const (since
177bf215546Sopenharmony_ci    * we don't have to do register allocation for constants).
178bf215546Sopenharmony_ci    */
179bf215546Sopenharmony_ci   unsigned size : 16;
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   /* normal registers:
182bf215546Sopenharmony_ci    * the component is in the low two bits of the reg #, so
183bf215546Sopenharmony_ci    * rN.x becomes: (N << 2) | x
184bf215546Sopenharmony_ci    */
185bf215546Sopenharmony_ci   uint16_t num;
186bf215546Sopenharmony_ci   union {
187bf215546Sopenharmony_ci      /* immediate: */
188bf215546Sopenharmony_ci      int32_t iim_val;
189bf215546Sopenharmony_ci      uint32_t uim_val;
190bf215546Sopenharmony_ci      float fim_val;
191bf215546Sopenharmony_ci      /* relative: */
192bf215546Sopenharmony_ci      struct {
193bf215546Sopenharmony_ci         uint16_t id;
194bf215546Sopenharmony_ci         int16_t offset;
195bf215546Sopenharmony_ci         uint16_t base;
196bf215546Sopenharmony_ci      } array;
197bf215546Sopenharmony_ci   };
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   /* For IR3_REG_SSA, dst registers contain pointer back to the instruction
200bf215546Sopenharmony_ci    * containing this register.
201bf215546Sopenharmony_ci    */
202bf215546Sopenharmony_ci   struct ir3_instruction *instr;
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci   /* For IR3_REG_SSA, src registers contain ptr back to assigning
205bf215546Sopenharmony_ci    * instruction.
206bf215546Sopenharmony_ci    *
207bf215546Sopenharmony_ci    * For IR3_REG_ARRAY, the pointer is back to the last dependent
208bf215546Sopenharmony_ci    * array access (although the net effect is the same, it points
209bf215546Sopenharmony_ci    * back to a previous instruction that we depend on).
210bf215546Sopenharmony_ci    */
211bf215546Sopenharmony_ci   struct ir3_register *def;
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci   /* Pointer to another register in the instruction that must share the same
214bf215546Sopenharmony_ci    * physical register. Each destination can be tied with one source, and
215bf215546Sopenharmony_ci    * they must have "tied" pointing to each other.
216bf215546Sopenharmony_ci    */
217bf215546Sopenharmony_ci   struct ir3_register *tied;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   unsigned spill_slot, next_use;
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci   unsigned merge_set_offset;
222bf215546Sopenharmony_ci   struct ir3_merge_set *merge_set;
223bf215546Sopenharmony_ci   unsigned interval_start, interval_end;
224bf215546Sopenharmony_ci};
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci/*
227bf215546Sopenharmony_ci * Stupid/simple growable array implementation:
228bf215546Sopenharmony_ci */
229bf215546Sopenharmony_ci#define DECLARE_ARRAY(type, name)                                              \
230bf215546Sopenharmony_ci   unsigned name##_count, name##_sz;                                           \
231bf215546Sopenharmony_ci   type *name;
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci#define array_insert(ctx, arr, ...)                                            \
234bf215546Sopenharmony_ci   do {                                                                        \
235bf215546Sopenharmony_ci      if (arr##_count == arr##_sz) {                                           \
236bf215546Sopenharmony_ci         arr##_sz = MAX2(2 * arr##_sz, 16);                                    \
237bf215546Sopenharmony_ci         arr = reralloc_size(ctx, arr, arr##_sz * sizeof(arr[0]));             \
238bf215546Sopenharmony_ci      }                                                                        \
239bf215546Sopenharmony_ci      arr[arr##_count++] = __VA_ARGS__;                                        \
240bf215546Sopenharmony_ci   } while (0)
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_citypedef enum {
243bf215546Sopenharmony_ci   REDUCE_OP_ADD_U,
244bf215546Sopenharmony_ci   REDUCE_OP_ADD_F,
245bf215546Sopenharmony_ci   REDUCE_OP_MUL_U,
246bf215546Sopenharmony_ci   REDUCE_OP_MUL_F,
247bf215546Sopenharmony_ci   REDUCE_OP_MIN_U,
248bf215546Sopenharmony_ci   REDUCE_OP_MIN_S,
249bf215546Sopenharmony_ci   REDUCE_OP_MIN_F,
250bf215546Sopenharmony_ci   REDUCE_OP_MAX_U,
251bf215546Sopenharmony_ci   REDUCE_OP_MAX_S,
252bf215546Sopenharmony_ci   REDUCE_OP_MAX_F,
253bf215546Sopenharmony_ci   REDUCE_OP_AND_B,
254bf215546Sopenharmony_ci   REDUCE_OP_OR_B,
255bf215546Sopenharmony_ci   REDUCE_OP_XOR_B,
256bf215546Sopenharmony_ci} reduce_op_t;
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_cistruct ir3_instruction {
259bf215546Sopenharmony_ci   struct ir3_block *block;
260bf215546Sopenharmony_ci   opc_t opc;
261bf215546Sopenharmony_ci   enum {
262bf215546Sopenharmony_ci      /* (sy) flag is set on first instruction, and after sample
263bf215546Sopenharmony_ci       * instructions (probably just on RAW hazard).
264bf215546Sopenharmony_ci       */
265bf215546Sopenharmony_ci      IR3_INSTR_SY = 0x001,
266bf215546Sopenharmony_ci      /* (ss) flag is set on first instruction, and first instruction
267bf215546Sopenharmony_ci       * to depend on the result of "long" instructions (RAW hazard):
268bf215546Sopenharmony_ci       *
269bf215546Sopenharmony_ci       *   rcp, rsq, log2, exp2, sin, cos, sqrt
270bf215546Sopenharmony_ci       *
271bf215546Sopenharmony_ci       * It seems to synchronize until all in-flight instructions are
272bf215546Sopenharmony_ci       * completed, for example:
273bf215546Sopenharmony_ci       *
274bf215546Sopenharmony_ci       *   rsq hr1.w, hr1.w
275bf215546Sopenharmony_ci       *   add.f hr2.z, (neg)hr2.z, hc0.y
276bf215546Sopenharmony_ci       *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
277bf215546Sopenharmony_ci       *   rsq hr2.x, hr2.x
278bf215546Sopenharmony_ci       *   (rpt1)nop
279bf215546Sopenharmony_ci       *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
280bf215546Sopenharmony_ci       *   nop
281bf215546Sopenharmony_ci       *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
282bf215546Sopenharmony_ci       *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
283bf215546Sopenharmony_ci       *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
284bf215546Sopenharmony_ci       *
285bf215546Sopenharmony_ci       * The last mul.f does not have (ss) set, presumably because the
286bf215546Sopenharmony_ci       * (ss) on the previous instruction does the job.
287bf215546Sopenharmony_ci       *
288bf215546Sopenharmony_ci       * The blob driver also seems to set it on WAR hazards, although
289bf215546Sopenharmony_ci       * not really clear if this is needed or just blob compiler being
290bf215546Sopenharmony_ci       * sloppy.  So far I haven't found a case where removing the (ss)
291bf215546Sopenharmony_ci       * causes problems for WAR hazard, but I could just be getting
292bf215546Sopenharmony_ci       * lucky:
293bf215546Sopenharmony_ci       *
294bf215546Sopenharmony_ci       *   rcp r1.y, r3.y
295bf215546Sopenharmony_ci       *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
296bf215546Sopenharmony_ci       *
297bf215546Sopenharmony_ci       */
298bf215546Sopenharmony_ci      IR3_INSTR_SS = 0x002,
299bf215546Sopenharmony_ci      /* (jp) flag is set on jump targets:
300bf215546Sopenharmony_ci       */
301bf215546Sopenharmony_ci      IR3_INSTR_JP = 0x004,
302bf215546Sopenharmony_ci      IR3_INSTR_UL = 0x008,
303bf215546Sopenharmony_ci      IR3_INSTR_3D = 0x010,
304bf215546Sopenharmony_ci      IR3_INSTR_A = 0x020,
305bf215546Sopenharmony_ci      IR3_INSTR_O = 0x040,
306bf215546Sopenharmony_ci      IR3_INSTR_P = 0x080,
307bf215546Sopenharmony_ci      IR3_INSTR_S = 0x100,
308bf215546Sopenharmony_ci      IR3_INSTR_S2EN = 0x200,
309bf215546Sopenharmony_ci      IR3_INSTR_SAT = 0x400,
310bf215546Sopenharmony_ci      /* (cat5/cat6) Bindless */
311bf215546Sopenharmony_ci      IR3_INSTR_B = 0x800,
312bf215546Sopenharmony_ci      /* (cat5/cat6) nonuniform */
313bf215546Sopenharmony_ci      IR3_INSTR_NONUNIF = 0x1000,
314bf215546Sopenharmony_ci      /* (cat5-only) Get some parts of the encoding from a1.x */
315bf215546Sopenharmony_ci      IR3_INSTR_A1EN = 0x02000,
316bf215546Sopenharmony_ci      /* meta-flags, for intermediate stages of IR, ie.
317bf215546Sopenharmony_ci       * before register assignment is done:
318bf215546Sopenharmony_ci       */
319bf215546Sopenharmony_ci      IR3_INSTR_MARK = 0x04000,
320bf215546Sopenharmony_ci      IR3_INSTR_UNUSED = 0x08000,
321bf215546Sopenharmony_ci   } flags;
322bf215546Sopenharmony_ci   uint8_t repeat;
323bf215546Sopenharmony_ci   uint8_t nop;
324bf215546Sopenharmony_ci#ifdef DEBUG
325bf215546Sopenharmony_ci   unsigned srcs_max, dsts_max;
326bf215546Sopenharmony_ci#endif
327bf215546Sopenharmony_ci   unsigned srcs_count, dsts_count;
328bf215546Sopenharmony_ci   struct ir3_register **dsts;
329bf215546Sopenharmony_ci   struct ir3_register **srcs;
330bf215546Sopenharmony_ci   union {
331bf215546Sopenharmony_ci      struct {
332bf215546Sopenharmony_ci         char inv1, inv2;
333bf215546Sopenharmony_ci         char comp1, comp2;
334bf215546Sopenharmony_ci         int immed;
335bf215546Sopenharmony_ci         struct ir3_block *target;
336bf215546Sopenharmony_ci         const char *target_label;
337bf215546Sopenharmony_ci         brtype_t brtype;
338bf215546Sopenharmony_ci         unsigned idx; /* for brac.N */
339bf215546Sopenharmony_ci      } cat0;
340bf215546Sopenharmony_ci      struct {
341bf215546Sopenharmony_ci         type_t src_type, dst_type;
342bf215546Sopenharmony_ci         round_t round;
343bf215546Sopenharmony_ci         reduce_op_t reduce_op;
344bf215546Sopenharmony_ci      } cat1;
345bf215546Sopenharmony_ci      struct {
346bf215546Sopenharmony_ci         enum {
347bf215546Sopenharmony_ci            IR3_COND_LT = 0,
348bf215546Sopenharmony_ci            IR3_COND_LE = 1,
349bf215546Sopenharmony_ci            IR3_COND_GT = 2,
350bf215546Sopenharmony_ci            IR3_COND_GE = 3,
351bf215546Sopenharmony_ci            IR3_COND_EQ = 4,
352bf215546Sopenharmony_ci            IR3_COND_NE = 5,
353bf215546Sopenharmony_ci         } condition;
354bf215546Sopenharmony_ci      } cat2;
355bf215546Sopenharmony_ci      struct {
356bf215546Sopenharmony_ci         enum {
357bf215546Sopenharmony_ci            IR3_SRC_UNSIGNED = 0,
358bf215546Sopenharmony_ci            IR3_SRC_MIXED = 1,
359bf215546Sopenharmony_ci         } signedness;
360bf215546Sopenharmony_ci         enum {
361bf215546Sopenharmony_ci            IR3_SRC_PACKED_LOW = 0,
362bf215546Sopenharmony_ci            IR3_SRC_PACKED_HIGH = 1,
363bf215546Sopenharmony_ci         } packed;
364bf215546Sopenharmony_ci         bool swapped;
365bf215546Sopenharmony_ci      } cat3;
366bf215546Sopenharmony_ci      struct {
367bf215546Sopenharmony_ci         unsigned samp, tex;
368bf215546Sopenharmony_ci         unsigned tex_base : 3;
369bf215546Sopenharmony_ci         unsigned cluster_size : 4;
370bf215546Sopenharmony_ci         type_t type;
371bf215546Sopenharmony_ci      } cat5;
372bf215546Sopenharmony_ci      struct {
373bf215546Sopenharmony_ci         type_t type;
374bf215546Sopenharmony_ci         /* TODO remove dst_offset and handle as a ir3_register
375bf215546Sopenharmony_ci          * which might be IMMED, similar to how src_offset is
376bf215546Sopenharmony_ci          * handled.
377bf215546Sopenharmony_ci          */
378bf215546Sopenharmony_ci         int dst_offset;
379bf215546Sopenharmony_ci         int iim_val;       /* for ldgb/stgb, # of components */
380bf215546Sopenharmony_ci         unsigned d    : 3; /* for ldc, component offset */
381bf215546Sopenharmony_ci         bool typed    : 1;
382bf215546Sopenharmony_ci         unsigned base : 3;
383bf215546Sopenharmony_ci      } cat6;
384bf215546Sopenharmony_ci      struct {
385bf215546Sopenharmony_ci         unsigned w : 1; /* write */
386bf215546Sopenharmony_ci         unsigned r : 1; /* read */
387bf215546Sopenharmony_ci         unsigned l : 1; /* local */
388bf215546Sopenharmony_ci         unsigned g : 1; /* global */
389bf215546Sopenharmony_ci      } cat7;
390bf215546Sopenharmony_ci      /* for meta-instructions, just used to hold extra data
391bf215546Sopenharmony_ci       * before instruction scheduling, etc
392bf215546Sopenharmony_ci       */
393bf215546Sopenharmony_ci      struct {
394bf215546Sopenharmony_ci         int off; /* component/offset */
395bf215546Sopenharmony_ci      } split;
396bf215546Sopenharmony_ci      struct {
397bf215546Sopenharmony_ci         /* Per-source index back to the entry in the
398bf215546Sopenharmony_ci          * ir3_shader_variant::outputs table.
399bf215546Sopenharmony_ci          */
400bf215546Sopenharmony_ci         unsigned *outidxs;
401bf215546Sopenharmony_ci      } end;
402bf215546Sopenharmony_ci      struct {
403bf215546Sopenharmony_ci         /* used to temporarily hold reference to nir_phi_instr
404bf215546Sopenharmony_ci          * until we resolve the phi srcs
405bf215546Sopenharmony_ci          */
406bf215546Sopenharmony_ci         void *nphi;
407bf215546Sopenharmony_ci      } phi;
408bf215546Sopenharmony_ci      struct {
409bf215546Sopenharmony_ci         unsigned samp, tex;
410bf215546Sopenharmony_ci         unsigned input_offset;
411bf215546Sopenharmony_ci         unsigned samp_base : 3;
412bf215546Sopenharmony_ci         unsigned tex_base  : 3;
413bf215546Sopenharmony_ci      } prefetch;
414bf215546Sopenharmony_ci      struct {
415bf215546Sopenharmony_ci         /* maps back to entry in ir3_shader_variant::inputs table: */
416bf215546Sopenharmony_ci         int inidx;
417bf215546Sopenharmony_ci         /* for sysvals, identifies the sysval type.  Mostly so we can
418bf215546Sopenharmony_ci          * identify the special cases where a sysval should not be DCE'd
419bf215546Sopenharmony_ci          * (currently, just pre-fs texture fetch)
420bf215546Sopenharmony_ci          */
421bf215546Sopenharmony_ci         gl_system_value sysval;
422bf215546Sopenharmony_ci      } input;
423bf215546Sopenharmony_ci   };
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   /* For assigning jump offsets, we need instruction's position: */
426bf215546Sopenharmony_ci   uint32_t ip;
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci   /* used for per-pass extra instruction data.
429bf215546Sopenharmony_ci    *
430bf215546Sopenharmony_ci    * TODO we should remove the per-pass data like this and 'use_count'
431bf215546Sopenharmony_ci    * and do something similar to what RA does w/ ir3_ra_instr_data..
432bf215546Sopenharmony_ci    * ie. use the ir3_count_instructions pass, and then use instr->ip
433bf215546Sopenharmony_ci    * to index into a table of pass-private data.
434bf215546Sopenharmony_ci    */
435bf215546Sopenharmony_ci   void *data;
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   /**
438bf215546Sopenharmony_ci    * Valid if pass calls ir3_find_ssa_uses().. see foreach_ssa_use()
439bf215546Sopenharmony_ci    */
440bf215546Sopenharmony_ci   struct set *uses;
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ci   int use_count; /* currently just updated/used by cp */
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   /* an instruction can reference at most one address register amongst
445bf215546Sopenharmony_ci    * it's src/dst registers.  Beyond that, you need to insert mov's.
446bf215546Sopenharmony_ci    *
447bf215546Sopenharmony_ci    * NOTE: do not write this directly, use ir3_instr_set_address()
448bf215546Sopenharmony_ci    */
449bf215546Sopenharmony_ci   struct ir3_register *address;
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   /* Tracking for additional dependent instructions.  Used to handle
452bf215546Sopenharmony_ci    * barriers, WAR hazards for arrays/SSBOs/etc.
453bf215546Sopenharmony_ci    */
454bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_instruction *, deps);
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci   /*
457bf215546Sopenharmony_ci    * From PoV of instruction scheduling, not execution (ie. ignores global/
458bf215546Sopenharmony_ci    * local distinction):
459bf215546Sopenharmony_ci    *                            shared  image  atomic  SSBO  everything
460bf215546Sopenharmony_ci    *   barrier()/            -   R/W     R/W    R/W     R/W       X
461bf215546Sopenharmony_ci    *     groupMemoryBarrier()
462bf215546Sopenharmony_ci    *     memoryBarrier()
463bf215546Sopenharmony_ci    *     (but only images declared coherent?)
464bf215546Sopenharmony_ci    *   memoryBarrierAtomic() -                  R/W
465bf215546Sopenharmony_ci    *   memoryBarrierBuffer() -                          R/W
466bf215546Sopenharmony_ci    *   memoryBarrierImage()  -           R/W
467bf215546Sopenharmony_ci    *   memoryBarrierShared() -   R/W
468bf215546Sopenharmony_ci    *
469bf215546Sopenharmony_ci    * TODO I think for SSBO/image/shared, in cases where we can determine
470bf215546Sopenharmony_ci    * which variable is accessed, we don't need to care about accesses to
471bf215546Sopenharmony_ci    * different variables (unless declared coherent??)
472bf215546Sopenharmony_ci    */
473bf215546Sopenharmony_ci   enum {
474bf215546Sopenharmony_ci      IR3_BARRIER_EVERYTHING = 1 << 0,
475bf215546Sopenharmony_ci      IR3_BARRIER_SHARED_R = 1 << 1,
476bf215546Sopenharmony_ci      IR3_BARRIER_SHARED_W = 1 << 2,
477bf215546Sopenharmony_ci      IR3_BARRIER_IMAGE_R = 1 << 3,
478bf215546Sopenharmony_ci      IR3_BARRIER_IMAGE_W = 1 << 4,
479bf215546Sopenharmony_ci      IR3_BARRIER_BUFFER_R = 1 << 5,
480bf215546Sopenharmony_ci      IR3_BARRIER_BUFFER_W = 1 << 6,
481bf215546Sopenharmony_ci      IR3_BARRIER_ARRAY_R = 1 << 7,
482bf215546Sopenharmony_ci      IR3_BARRIER_ARRAY_W = 1 << 8,
483bf215546Sopenharmony_ci      IR3_BARRIER_PRIVATE_R = 1 << 9,
484bf215546Sopenharmony_ci      IR3_BARRIER_PRIVATE_W = 1 << 10,
485bf215546Sopenharmony_ci      IR3_BARRIER_CONST_W = 1 << 11,
486bf215546Sopenharmony_ci      IR3_BARRIER_ACTIVE_FIBERS_R = 1 << 12,
487bf215546Sopenharmony_ci      IR3_BARRIER_ACTIVE_FIBERS_W = 1 << 13,
488bf215546Sopenharmony_ci   } barrier_class,
489bf215546Sopenharmony_ci      barrier_conflict;
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci   /* Entry in ir3_block's instruction list: */
492bf215546Sopenharmony_ci   struct list_head node;
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   uint32_t serialno;
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci   // TODO only computerator/assembler:
497bf215546Sopenharmony_ci   int line;
498bf215546Sopenharmony_ci};
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_cistruct ir3 {
501bf215546Sopenharmony_ci   struct ir3_compiler *compiler;
502bf215546Sopenharmony_ci   gl_shader_stage type;
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_instruction *, inputs);
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci   /* Track bary.f (and ldlv) instructions.. this is needed in
507bf215546Sopenharmony_ci    * scheduling to ensure that all varying fetches happen before
508bf215546Sopenharmony_ci    * any potential kill instructions.  The hw gets grumpy if all
509bf215546Sopenharmony_ci    * threads in a group are killed before the last bary.f gets
510bf215546Sopenharmony_ci    * a chance to signal end of input (ei).
511bf215546Sopenharmony_ci    */
512bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_instruction *, baryfs);
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ci   /* Track all indirect instructions (read and write).  To avoid
515bf215546Sopenharmony_ci    * deadlock scenario where an address register gets scheduled,
516bf215546Sopenharmony_ci    * but other dependent src instructions cannot be scheduled due
517bf215546Sopenharmony_ci    * to dependency on a *different* address register value, the
518bf215546Sopenharmony_ci    * scheduler needs to ensure that all dependencies other than
519bf215546Sopenharmony_ci    * the instruction other than the address register are scheduled
520bf215546Sopenharmony_ci    * before the one that writes the address register.  Having a
521bf215546Sopenharmony_ci    * convenient list of instructions that reference some address
522bf215546Sopenharmony_ci    * register simplifies this.
523bf215546Sopenharmony_ci    */
524bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_instruction *, a0_users);
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci   /* same for a1.x: */
527bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_instruction *, a1_users);
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_ci   /* and same for instructions that consume predicate register: */
530bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_instruction *, predicates);
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   /* Track texture sample instructions which need texture state
533bf215546Sopenharmony_ci    * patched in (for astc-srgb workaround):
534bf215546Sopenharmony_ci    */
535bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_instruction *, astc_srgb);
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci   /* Track tg4 instructions which need texture state patched in (for tg4
538bf215546Sopenharmony_ci    * swizzling workaround):
539bf215546Sopenharmony_ci    */
540bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_instruction *, tg4);
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci   /* List of blocks: */
543bf215546Sopenharmony_ci   struct list_head block_list;
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci   /* List of ir3_array's: */
546bf215546Sopenharmony_ci   struct list_head array_list;
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci#ifdef DEBUG
549bf215546Sopenharmony_ci   unsigned block_count;
550bf215546Sopenharmony_ci#endif
551bf215546Sopenharmony_ci   unsigned instr_count;
552bf215546Sopenharmony_ci};
553bf215546Sopenharmony_ci
554bf215546Sopenharmony_cistruct ir3_array {
555bf215546Sopenharmony_ci   struct list_head node;
556bf215546Sopenharmony_ci   unsigned length;
557bf215546Sopenharmony_ci   unsigned id;
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci   struct nir_register *r;
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci   /* To avoid array write's from getting DCE'd, keep track of the
562bf215546Sopenharmony_ci    * most recent write.  Any array access depends on the most
563bf215546Sopenharmony_ci    * recent write.  This way, nothing depends on writes after the
564bf215546Sopenharmony_ci    * last read.  But all the writes that happen before that have
565bf215546Sopenharmony_ci    * something depending on them
566bf215546Sopenharmony_ci    */
567bf215546Sopenharmony_ci   struct ir3_register *last_write;
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_ci   /* extra stuff used in RA pass: */
570bf215546Sopenharmony_ci   unsigned base; /* base vreg name */
571bf215546Sopenharmony_ci   unsigned reg;  /* base physical reg */
572bf215546Sopenharmony_ci   uint16_t start_ip, end_ip;
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_ci   /* Indicates if half-precision */
575bf215546Sopenharmony_ci   bool half;
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci   bool unused;
578bf215546Sopenharmony_ci};
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_cistruct ir3_array *ir3_lookup_array(struct ir3 *ir, unsigned id);
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_cienum ir3_branch_type {
583bf215546Sopenharmony_ci   IR3_BRANCH_COND,   /* condition */
584bf215546Sopenharmony_ci   IR3_BRANCH_ANY,    /* subgroupAny(condition) */
585bf215546Sopenharmony_ci   IR3_BRANCH_ALL,    /* subgroupAll(condition) */
586bf215546Sopenharmony_ci   IR3_BRANCH_GETONE, /* subgroupElect() */
587bf215546Sopenharmony_ci   IR3_BRANCH_SHPS,   /* preamble start */
588bf215546Sopenharmony_ci};
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_cistruct ir3_block {
591bf215546Sopenharmony_ci   struct list_head node;
592bf215546Sopenharmony_ci   struct ir3 *shader;
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci   const struct nir_block *nblock;
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci   struct list_head instr_list; /* list of ir3_instruction */
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   /* The actual branch condition, if there are two successors */
599bf215546Sopenharmony_ci   enum ir3_branch_type brtype;
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   /* each block has either one or two successors.. in case of two
602bf215546Sopenharmony_ci    * successors, 'condition' decides which one to follow.  A block preceding
603bf215546Sopenharmony_ci    * an if/else has two successors.
604bf215546Sopenharmony_ci    *
605bf215546Sopenharmony_ci    * In some cases the path that the machine actually takes through the
606bf215546Sopenharmony_ci    * program may not match the per-thread view of the CFG. In particular
607bf215546Sopenharmony_ci    * this is the case for if/else, where the machine jumps from the end of
608bf215546Sopenharmony_ci    * the if to the beginning of the else and switches active lanes. While
609bf215546Sopenharmony_ci    * most things only care about the per-thread view, we need to use the
610bf215546Sopenharmony_ci    * "physical" view when allocating shared registers. "successors" contains
611bf215546Sopenharmony_ci    * the per-thread successors, and "physical_successors" contains the
612bf215546Sopenharmony_ci    * physical successors which includes the fallthrough edge from the if to
613bf215546Sopenharmony_ci    * the else.
614bf215546Sopenharmony_ci    */
615bf215546Sopenharmony_ci   struct ir3_instruction *condition;
616bf215546Sopenharmony_ci   struct ir3_block *successors[2];
617bf215546Sopenharmony_ci   struct ir3_block *physical_successors[2];
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_block *, predecessors);
620bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_block *, physical_predecessors);
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci   uint16_t start_ip, end_ip;
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_ci   /* Track instructions which do not write a register but other-
625bf215546Sopenharmony_ci    * wise must not be discarded (such as kill, stg, etc)
626bf215546Sopenharmony_ci    */
627bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_instruction *, keeps);
628bf215546Sopenharmony_ci
629bf215546Sopenharmony_ci   /* used for per-pass extra block data.  Mainly used right
630bf215546Sopenharmony_ci    * now in RA step to track livein/liveout.
631bf215546Sopenharmony_ci    */
632bf215546Sopenharmony_ci   void *data;
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci   uint32_t index;
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   struct ir3_block *imm_dom;
637bf215546Sopenharmony_ci   DECLARE_ARRAY(struct ir3_block *, dom_children);
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   uint32_t dom_pre_index;
640bf215546Sopenharmony_ci   uint32_t dom_post_index;
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci   uint32_t loop_id;
643bf215546Sopenharmony_ci   uint32_t loop_depth;
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci#ifdef DEBUG
646bf215546Sopenharmony_ci   uint32_t serialno;
647bf215546Sopenharmony_ci#endif
648bf215546Sopenharmony_ci};
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_cistatic inline uint32_t
651bf215546Sopenharmony_ciblock_id(struct ir3_block *block)
652bf215546Sopenharmony_ci{
653bf215546Sopenharmony_ci#ifdef DEBUG
654bf215546Sopenharmony_ci   return block->serialno;
655bf215546Sopenharmony_ci#else
656bf215546Sopenharmony_ci   return (uint32_t)(unsigned long)block;
657bf215546Sopenharmony_ci#endif
658bf215546Sopenharmony_ci}
659bf215546Sopenharmony_ci
660bf215546Sopenharmony_cistatic inline struct ir3_block *
661bf215546Sopenharmony_ciir3_start_block(struct ir3 *ir)
662bf215546Sopenharmony_ci{
663bf215546Sopenharmony_ci   return list_first_entry(&ir->block_list, struct ir3_block, node);
664bf215546Sopenharmony_ci}
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_cistatic inline struct ir3_block *
667bf215546Sopenharmony_ciir3_after_preamble(struct ir3 *ir)
668bf215546Sopenharmony_ci{
669bf215546Sopenharmony_ci   struct ir3_block *block = ir3_start_block(ir);
670bf215546Sopenharmony_ci   /* The preamble will have a usually-empty else branch, and we want to skip
671bf215546Sopenharmony_ci    * that to get to the block after the preamble.
672bf215546Sopenharmony_ci    */
673bf215546Sopenharmony_ci   if (block->brtype == IR3_BRANCH_SHPS)
674bf215546Sopenharmony_ci      return block->successors[1]->successors[0];
675bf215546Sopenharmony_ci   else
676bf215546Sopenharmony_ci      return block;
677bf215546Sopenharmony_ci}
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_civoid ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred);
680bf215546Sopenharmony_civoid ir3_block_add_physical_predecessor(struct ir3_block *block,
681bf215546Sopenharmony_ci                                        struct ir3_block *pred);
682bf215546Sopenharmony_civoid ir3_block_remove_predecessor(struct ir3_block *block,
683bf215546Sopenharmony_ci                                  struct ir3_block *pred);
684bf215546Sopenharmony_civoid ir3_block_remove_physical_predecessor(struct ir3_block *block,
685bf215546Sopenharmony_ci                                           struct ir3_block *pred);
686bf215546Sopenharmony_ciunsigned ir3_block_get_pred_index(struct ir3_block *block,
687bf215546Sopenharmony_ci                                  struct ir3_block *pred);
688bf215546Sopenharmony_ci
689bf215546Sopenharmony_civoid ir3_calc_dominance(struct ir3 *ir);
690bf215546Sopenharmony_cibool ir3_block_dominates(struct ir3_block *a, struct ir3_block *b);
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_cistruct ir3_shader_variant;
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_cistruct ir3 *ir3_create(struct ir3_compiler *compiler,
695bf215546Sopenharmony_ci                       struct ir3_shader_variant *v);
696bf215546Sopenharmony_civoid ir3_destroy(struct ir3 *shader);
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_civoid ir3_collect_info(struct ir3_shader_variant *v);
699bf215546Sopenharmony_civoid *ir3_alloc(struct ir3 *shader, int sz);
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ciunsigned ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler,
702bf215546Sopenharmony_ci                                         unsigned reg_count,
703bf215546Sopenharmony_ci                                         bool double_threadsize);
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ciunsigned ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
706bf215546Sopenharmony_ci                                           bool double_threadsize);
707bf215546Sopenharmony_ci
708bf215546Sopenharmony_cibool ir3_should_double_threadsize(struct ir3_shader_variant *v,
709bf215546Sopenharmony_ci                                  unsigned regs_count);
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_cistruct ir3_block *ir3_block_create(struct ir3 *shader);
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_cistruct ir3_instruction *ir3_instr_create(struct ir3_block *block, opc_t opc,
714bf215546Sopenharmony_ci                                         int ndst, int nsrc);
715bf215546Sopenharmony_cistruct ir3_instruction *ir3_instr_clone(struct ir3_instruction *instr);
716bf215546Sopenharmony_civoid ir3_instr_add_dep(struct ir3_instruction *instr,
717bf215546Sopenharmony_ci                       struct ir3_instruction *dep);
718bf215546Sopenharmony_ciconst char *ir3_instr_name(struct ir3_instruction *instr);
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_cistruct ir3_register *ir3_src_create(struct ir3_instruction *instr, int num,
721bf215546Sopenharmony_ci                                    int flags);
722bf215546Sopenharmony_cistruct ir3_register *ir3_dst_create(struct ir3_instruction *instr, int num,
723bf215546Sopenharmony_ci                                    int flags);
724bf215546Sopenharmony_cistruct ir3_register *ir3_reg_clone(struct ir3 *shader,
725bf215546Sopenharmony_ci                                   struct ir3_register *reg);
726bf215546Sopenharmony_ci
727bf215546Sopenharmony_cistatic inline void
728bf215546Sopenharmony_ciir3_reg_tie(struct ir3_register *dst, struct ir3_register *src)
729bf215546Sopenharmony_ci{
730bf215546Sopenharmony_ci   assert(!dst->tied && !src->tied);
731bf215546Sopenharmony_ci   dst->tied = src;
732bf215546Sopenharmony_ci   src->tied = dst;
733bf215546Sopenharmony_ci}
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_civoid ir3_reg_set_last_array(struct ir3_instruction *instr,
736bf215546Sopenharmony_ci                            struct ir3_register *reg,
737bf215546Sopenharmony_ci                            struct ir3_register *last_write);
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_civoid ir3_instr_set_address(struct ir3_instruction *instr,
740bf215546Sopenharmony_ci                           struct ir3_instruction *addr);
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_cistatic inline bool
743bf215546Sopenharmony_ciir3_instr_check_mark(struct ir3_instruction *instr)
744bf215546Sopenharmony_ci{
745bf215546Sopenharmony_ci   if (instr->flags & IR3_INSTR_MARK)
746bf215546Sopenharmony_ci      return true; /* already visited */
747bf215546Sopenharmony_ci   instr->flags |= IR3_INSTR_MARK;
748bf215546Sopenharmony_ci   return false;
749bf215546Sopenharmony_ci}
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_civoid ir3_block_clear_mark(struct ir3_block *block);
752bf215546Sopenharmony_civoid ir3_clear_mark(struct ir3 *shader);
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ciunsigned ir3_count_instructions(struct ir3 *ir);
755bf215546Sopenharmony_ciunsigned ir3_count_instructions_ra(struct ir3 *ir);
756bf215546Sopenharmony_ci
757bf215546Sopenharmony_ci/**
758bf215546Sopenharmony_ci * Move 'instr' to just before 'after'
759bf215546Sopenharmony_ci */
760bf215546Sopenharmony_cistatic inline void
761bf215546Sopenharmony_ciir3_instr_move_before(struct ir3_instruction *instr,
762bf215546Sopenharmony_ci                      struct ir3_instruction *after)
763bf215546Sopenharmony_ci{
764bf215546Sopenharmony_ci   list_delinit(&instr->node);
765bf215546Sopenharmony_ci   list_addtail(&instr->node, &after->node);
766bf215546Sopenharmony_ci}
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci/**
769bf215546Sopenharmony_ci * Move 'instr' to just after 'before':
770bf215546Sopenharmony_ci */
771bf215546Sopenharmony_cistatic inline void
772bf215546Sopenharmony_ciir3_instr_move_after(struct ir3_instruction *instr,
773bf215546Sopenharmony_ci                     struct ir3_instruction *before)
774bf215546Sopenharmony_ci{
775bf215546Sopenharmony_ci   list_delinit(&instr->node);
776bf215546Sopenharmony_ci   list_add(&instr->node, &before->node);
777bf215546Sopenharmony_ci}
778bf215546Sopenharmony_ci
779bf215546Sopenharmony_ci/**
780bf215546Sopenharmony_ci * Move 'instr' to the beginning of the block:
781bf215546Sopenharmony_ci */
782bf215546Sopenharmony_cistatic inline void
783bf215546Sopenharmony_ciir3_instr_move_before_block(struct ir3_instruction *instr,
784bf215546Sopenharmony_ci                            struct ir3_block *block)
785bf215546Sopenharmony_ci{
786bf215546Sopenharmony_ci   list_delinit(&instr->node);
787bf215546Sopenharmony_ci   list_add(&instr->node, &block->instr_list);
788bf215546Sopenharmony_ci}
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_civoid ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps);
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_civoid ir3_set_dst_type(struct ir3_instruction *instr, bool half);
793bf215546Sopenharmony_civoid ir3_fixup_src_type(struct ir3_instruction *instr);
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ciint ir3_flut(struct ir3_register *src_reg);
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_cibool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags);
798bf215546Sopenharmony_ci
799bf215546Sopenharmony_cibool ir3_valid_immediate(struct ir3_instruction *instr, int32_t immed);
800bf215546Sopenharmony_ci
801bf215546Sopenharmony_ci#include "util/set.h"
802bf215546Sopenharmony_ci#define foreach_ssa_use(__use, __instr)                                        \
803bf215546Sopenharmony_ci   for (struct ir3_instruction *__use = (void *)~0; __use && (__instr)->uses;  \
804bf215546Sopenharmony_ci        __use = NULL)                                                          \
805bf215546Sopenharmony_ci      set_foreach ((__instr)->uses, __entry)                                   \
806bf215546Sopenharmony_ci         if ((__use = (void *)__entry->key))
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_cistatic inline uint32_t
809bf215546Sopenharmony_cireg_num(const struct ir3_register *reg)
810bf215546Sopenharmony_ci{
811bf215546Sopenharmony_ci   return reg->num >> 2;
812bf215546Sopenharmony_ci}
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_cistatic inline uint32_t
815bf215546Sopenharmony_cireg_comp(const struct ir3_register *reg)
816bf215546Sopenharmony_ci{
817bf215546Sopenharmony_ci   return reg->num & 0x3;
818bf215546Sopenharmony_ci}
819bf215546Sopenharmony_ci
820bf215546Sopenharmony_cistatic inline bool
821bf215546Sopenharmony_ciis_flow(struct ir3_instruction *instr)
822bf215546Sopenharmony_ci{
823bf215546Sopenharmony_ci   return (opc_cat(instr->opc) == 0);
824bf215546Sopenharmony_ci}
825bf215546Sopenharmony_ci
826bf215546Sopenharmony_cistatic inline bool
827bf215546Sopenharmony_ciis_kill_or_demote(struct ir3_instruction *instr)
828bf215546Sopenharmony_ci{
829bf215546Sopenharmony_ci   return instr->opc == OPC_KILL || instr->opc == OPC_DEMOTE;
830bf215546Sopenharmony_ci}
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_cistatic inline bool
833bf215546Sopenharmony_ciis_nop(struct ir3_instruction *instr)
834bf215546Sopenharmony_ci{
835bf215546Sopenharmony_ci   return instr->opc == OPC_NOP;
836bf215546Sopenharmony_ci}
837bf215546Sopenharmony_ci
838bf215546Sopenharmony_cistatic inline bool
839bf215546Sopenharmony_ciis_same_type_reg(struct ir3_register *dst, struct ir3_register *src)
840bf215546Sopenharmony_ci{
841bf215546Sopenharmony_ci   unsigned dst_type = (dst->flags & IR3_REG_HALF);
842bf215546Sopenharmony_ci   unsigned src_type = (src->flags & IR3_REG_HALF);
843bf215546Sopenharmony_ci
844bf215546Sopenharmony_ci   /* Treat shared->normal copies as same-type, because they can generally be
845bf215546Sopenharmony_ci    * folded, but not normal->shared copies.
846bf215546Sopenharmony_ci    */
847bf215546Sopenharmony_ci   if (dst_type != src_type ||
848bf215546Sopenharmony_ci       ((dst->flags & IR3_REG_SHARED) && !(src->flags & IR3_REG_SHARED)))
849bf215546Sopenharmony_ci      return false;
850bf215546Sopenharmony_ci   else
851bf215546Sopenharmony_ci      return true;
852bf215546Sopenharmony_ci}
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci/* Is it a non-transformative (ie. not type changing) mov?  This can
855bf215546Sopenharmony_ci * also include absneg.s/absneg.f, which for the most part can be
856bf215546Sopenharmony_ci * treated as a mov (single src argument).
857bf215546Sopenharmony_ci */
858bf215546Sopenharmony_cistatic inline bool
859bf215546Sopenharmony_ciis_same_type_mov(struct ir3_instruction *instr)
860bf215546Sopenharmony_ci{
861bf215546Sopenharmony_ci   struct ir3_register *dst;
862bf215546Sopenharmony_ci
863bf215546Sopenharmony_ci   switch (instr->opc) {
864bf215546Sopenharmony_ci   case OPC_MOV:
865bf215546Sopenharmony_ci      if (instr->cat1.src_type != instr->cat1.dst_type)
866bf215546Sopenharmony_ci         return false;
867bf215546Sopenharmony_ci      /* If the type of dest reg and src reg are different,
868bf215546Sopenharmony_ci       * it shouldn't be considered as same type mov
869bf215546Sopenharmony_ci       */
870bf215546Sopenharmony_ci      if (!is_same_type_reg(instr->dsts[0], instr->srcs[0]))
871bf215546Sopenharmony_ci         return false;
872bf215546Sopenharmony_ci      break;
873bf215546Sopenharmony_ci   case OPC_ABSNEG_F:
874bf215546Sopenharmony_ci   case OPC_ABSNEG_S:
875bf215546Sopenharmony_ci      if (instr->flags & IR3_INSTR_SAT)
876bf215546Sopenharmony_ci         return false;
877bf215546Sopenharmony_ci      /* If the type of dest reg and src reg are different,
878bf215546Sopenharmony_ci       * it shouldn't be considered as same type mov
879bf215546Sopenharmony_ci       */
880bf215546Sopenharmony_ci      if (!is_same_type_reg(instr->dsts[0], instr->srcs[0]))
881bf215546Sopenharmony_ci         return false;
882bf215546Sopenharmony_ci      break;
883bf215546Sopenharmony_ci   case OPC_META_PHI:
884bf215546Sopenharmony_ci      return instr->srcs_count == 1;
885bf215546Sopenharmony_ci   default:
886bf215546Sopenharmony_ci      return false;
887bf215546Sopenharmony_ci   }
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_ci   dst = instr->dsts[0];
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_ci   /* mov's that write to a0 or p0.x are special: */
892bf215546Sopenharmony_ci   if (dst->num == regid(REG_P0, 0))
893bf215546Sopenharmony_ci      return false;
894bf215546Sopenharmony_ci   if (reg_num(dst) == REG_A0)
895bf215546Sopenharmony_ci      return false;
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci   if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
898bf215546Sopenharmony_ci      return false;
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_ci   return true;
901bf215546Sopenharmony_ci}
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci/* A move from const, which changes size but not type, can also be
904bf215546Sopenharmony_ci * folded into dest instruction in some cases.
905bf215546Sopenharmony_ci */
906bf215546Sopenharmony_cistatic inline bool
907bf215546Sopenharmony_ciis_const_mov(struct ir3_instruction *instr)
908bf215546Sopenharmony_ci{
909bf215546Sopenharmony_ci   if (instr->opc != OPC_MOV)
910bf215546Sopenharmony_ci      return false;
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_ci   if (!(instr->srcs[0]->flags & IR3_REG_CONST))
913bf215546Sopenharmony_ci      return false;
914bf215546Sopenharmony_ci
915bf215546Sopenharmony_ci   type_t src_type = instr->cat1.src_type;
916bf215546Sopenharmony_ci   type_t dst_type = instr->cat1.dst_type;
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci   return (type_float(src_type) && type_float(dst_type)) ||
919bf215546Sopenharmony_ci          (type_uint(src_type) && type_uint(dst_type)) ||
920bf215546Sopenharmony_ci          (type_sint(src_type) && type_sint(dst_type));
921bf215546Sopenharmony_ci}
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_cistatic inline bool
924bf215546Sopenharmony_ciis_subgroup_cond_mov_macro(struct ir3_instruction *instr)
925bf215546Sopenharmony_ci{
926bf215546Sopenharmony_ci   switch (instr->opc) {
927bf215546Sopenharmony_ci   case OPC_BALLOT_MACRO:
928bf215546Sopenharmony_ci   case OPC_ANY_MACRO:
929bf215546Sopenharmony_ci   case OPC_ALL_MACRO:
930bf215546Sopenharmony_ci   case OPC_ELECT_MACRO:
931bf215546Sopenharmony_ci   case OPC_READ_COND_MACRO:
932bf215546Sopenharmony_ci   case OPC_READ_FIRST_MACRO:
933bf215546Sopenharmony_ci   case OPC_SWZ_SHARED_MACRO:
934bf215546Sopenharmony_ci   case OPC_SCAN_MACRO:
935bf215546Sopenharmony_ci      return true;
936bf215546Sopenharmony_ci   default:
937bf215546Sopenharmony_ci      return false;
938bf215546Sopenharmony_ci   }
939bf215546Sopenharmony_ci}
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_cistatic inline bool
942bf215546Sopenharmony_ciis_alu(struct ir3_instruction *instr)
943bf215546Sopenharmony_ci{
944bf215546Sopenharmony_ci   return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3);
945bf215546Sopenharmony_ci}
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_cistatic inline bool
948bf215546Sopenharmony_ciis_sfu(struct ir3_instruction *instr)
949bf215546Sopenharmony_ci{
950bf215546Sopenharmony_ci   return (opc_cat(instr->opc) == 4) || instr->opc == OPC_GETFIBERID;
951bf215546Sopenharmony_ci}
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_cistatic inline bool
954bf215546Sopenharmony_ciis_tex(struct ir3_instruction *instr)
955bf215546Sopenharmony_ci{
956bf215546Sopenharmony_ci   return (opc_cat(instr->opc) == 5);
957bf215546Sopenharmony_ci}
958bf215546Sopenharmony_ci
959bf215546Sopenharmony_cistatic inline bool
960bf215546Sopenharmony_ciis_tex_or_prefetch(struct ir3_instruction *instr)
961bf215546Sopenharmony_ci{
962bf215546Sopenharmony_ci   return is_tex(instr) || (instr->opc == OPC_META_TEX_PREFETCH);
963bf215546Sopenharmony_ci}
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_cistatic inline bool
966bf215546Sopenharmony_ciis_mem(struct ir3_instruction *instr)
967bf215546Sopenharmony_ci{
968bf215546Sopenharmony_ci   return (opc_cat(instr->opc) == 6) && instr->opc != OPC_GETFIBERID;
969bf215546Sopenharmony_ci}
970bf215546Sopenharmony_ci
971bf215546Sopenharmony_cistatic inline bool
972bf215546Sopenharmony_ciis_barrier(struct ir3_instruction *instr)
973bf215546Sopenharmony_ci{
974bf215546Sopenharmony_ci   return (opc_cat(instr->opc) == 7);
975bf215546Sopenharmony_ci}
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_cistatic inline bool
978bf215546Sopenharmony_ciis_half(struct ir3_instruction *instr)
979bf215546Sopenharmony_ci{
980bf215546Sopenharmony_ci   return !!(instr->dsts[0]->flags & IR3_REG_HALF);
981bf215546Sopenharmony_ci}
982bf215546Sopenharmony_ci
983bf215546Sopenharmony_cistatic inline bool
984bf215546Sopenharmony_ciis_shared(struct ir3_instruction *instr)
985bf215546Sopenharmony_ci{
986bf215546Sopenharmony_ci   return !!(instr->dsts[0]->flags & IR3_REG_SHARED);
987bf215546Sopenharmony_ci}
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_cistatic inline bool
990bf215546Sopenharmony_ciis_store(struct ir3_instruction *instr)
991bf215546Sopenharmony_ci{
992bf215546Sopenharmony_ci   /* these instructions, the "destination" register is
993bf215546Sopenharmony_ci    * actually a source, the address to store to.
994bf215546Sopenharmony_ci    */
995bf215546Sopenharmony_ci   switch (instr->opc) {
996bf215546Sopenharmony_ci   case OPC_STG:
997bf215546Sopenharmony_ci   case OPC_STG_A:
998bf215546Sopenharmony_ci   case OPC_STGB:
999bf215546Sopenharmony_ci   case OPC_STIB:
1000bf215546Sopenharmony_ci   case OPC_STP:
1001bf215546Sopenharmony_ci   case OPC_STL:
1002bf215546Sopenharmony_ci   case OPC_STLW:
1003bf215546Sopenharmony_ci   case OPC_L2G:
1004bf215546Sopenharmony_ci   case OPC_G2L:
1005bf215546Sopenharmony_ci      return true;
1006bf215546Sopenharmony_ci   default:
1007bf215546Sopenharmony_ci      return false;
1008bf215546Sopenharmony_ci   }
1009bf215546Sopenharmony_ci}
1010bf215546Sopenharmony_ci
1011bf215546Sopenharmony_cistatic inline bool
1012bf215546Sopenharmony_ciis_load(struct ir3_instruction *instr)
1013bf215546Sopenharmony_ci{
1014bf215546Sopenharmony_ci   switch (instr->opc) {
1015bf215546Sopenharmony_ci   case OPC_LDG:
1016bf215546Sopenharmony_ci   case OPC_LDG_A:
1017bf215546Sopenharmony_ci   case OPC_LDGB:
1018bf215546Sopenharmony_ci   case OPC_LDIB:
1019bf215546Sopenharmony_ci   case OPC_LDL:
1020bf215546Sopenharmony_ci   case OPC_LDP:
1021bf215546Sopenharmony_ci   case OPC_L2G:
1022bf215546Sopenharmony_ci   case OPC_LDLW:
1023bf215546Sopenharmony_ci   case OPC_LDC:
1024bf215546Sopenharmony_ci   case OPC_LDLV:
1025bf215546Sopenharmony_ci      /* probably some others too.. */
1026bf215546Sopenharmony_ci      return true;
1027bf215546Sopenharmony_ci   default:
1028bf215546Sopenharmony_ci      return false;
1029bf215546Sopenharmony_ci   }
1030bf215546Sopenharmony_ci}
1031bf215546Sopenharmony_ci
1032bf215546Sopenharmony_cistatic inline bool
1033bf215546Sopenharmony_ciis_input(struct ir3_instruction *instr)
1034bf215546Sopenharmony_ci{
1035bf215546Sopenharmony_ci   /* in some cases, ldlv is used to fetch varying without
1036bf215546Sopenharmony_ci    * interpolation.. fortunately inloc is the first src
1037bf215546Sopenharmony_ci    * register in either case
1038bf215546Sopenharmony_ci    */
1039bf215546Sopenharmony_ci   switch (instr->opc) {
1040bf215546Sopenharmony_ci   case OPC_LDLV:
1041bf215546Sopenharmony_ci   case OPC_BARY_F:
1042bf215546Sopenharmony_ci   case OPC_FLAT_B:
1043bf215546Sopenharmony_ci      return true;
1044bf215546Sopenharmony_ci   default:
1045bf215546Sopenharmony_ci      return false;
1046bf215546Sopenharmony_ci   }
1047bf215546Sopenharmony_ci}
1048bf215546Sopenharmony_ci
1049bf215546Sopenharmony_cistatic inline bool
1050bf215546Sopenharmony_ciis_bool(struct ir3_instruction *instr)
1051bf215546Sopenharmony_ci{
1052bf215546Sopenharmony_ci   switch (instr->opc) {
1053bf215546Sopenharmony_ci   case OPC_CMPS_F:
1054bf215546Sopenharmony_ci   case OPC_CMPS_S:
1055bf215546Sopenharmony_ci   case OPC_CMPS_U:
1056bf215546Sopenharmony_ci      return true;
1057bf215546Sopenharmony_ci   default:
1058bf215546Sopenharmony_ci      return false;
1059bf215546Sopenharmony_ci   }
1060bf215546Sopenharmony_ci}
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_cistatic inline opc_t
1063bf215546Sopenharmony_cicat3_half_opc(opc_t opc)
1064bf215546Sopenharmony_ci{
1065bf215546Sopenharmony_ci   switch (opc) {
1066bf215546Sopenharmony_ci   case OPC_MAD_F32:
1067bf215546Sopenharmony_ci      return OPC_MAD_F16;
1068bf215546Sopenharmony_ci   case OPC_SEL_B32:
1069bf215546Sopenharmony_ci      return OPC_SEL_B16;
1070bf215546Sopenharmony_ci   case OPC_SEL_S32:
1071bf215546Sopenharmony_ci      return OPC_SEL_S16;
1072bf215546Sopenharmony_ci   case OPC_SEL_F32:
1073bf215546Sopenharmony_ci      return OPC_SEL_F16;
1074bf215546Sopenharmony_ci   case OPC_SAD_S32:
1075bf215546Sopenharmony_ci      return OPC_SAD_S16;
1076bf215546Sopenharmony_ci   default:
1077bf215546Sopenharmony_ci      return opc;
1078bf215546Sopenharmony_ci   }
1079bf215546Sopenharmony_ci}
1080bf215546Sopenharmony_ci
1081bf215546Sopenharmony_cistatic inline opc_t
1082bf215546Sopenharmony_cicat3_full_opc(opc_t opc)
1083bf215546Sopenharmony_ci{
1084bf215546Sopenharmony_ci   switch (opc) {
1085bf215546Sopenharmony_ci   case OPC_MAD_F16:
1086bf215546Sopenharmony_ci      return OPC_MAD_F32;
1087bf215546Sopenharmony_ci   case OPC_SEL_B16:
1088bf215546Sopenharmony_ci      return OPC_SEL_B32;
1089bf215546Sopenharmony_ci   case OPC_SEL_S16:
1090bf215546Sopenharmony_ci      return OPC_SEL_S32;
1091bf215546Sopenharmony_ci   case OPC_SEL_F16:
1092bf215546Sopenharmony_ci      return OPC_SEL_F32;
1093bf215546Sopenharmony_ci   case OPC_SAD_S16:
1094bf215546Sopenharmony_ci      return OPC_SAD_S32;
1095bf215546Sopenharmony_ci   default:
1096bf215546Sopenharmony_ci      return opc;
1097bf215546Sopenharmony_ci   }
1098bf215546Sopenharmony_ci}
1099bf215546Sopenharmony_ci
1100bf215546Sopenharmony_cistatic inline opc_t
1101bf215546Sopenharmony_cicat4_half_opc(opc_t opc)
1102bf215546Sopenharmony_ci{
1103bf215546Sopenharmony_ci   switch (opc) {
1104bf215546Sopenharmony_ci   case OPC_RSQ:
1105bf215546Sopenharmony_ci      return OPC_HRSQ;
1106bf215546Sopenharmony_ci   case OPC_LOG2:
1107bf215546Sopenharmony_ci      return OPC_HLOG2;
1108bf215546Sopenharmony_ci   case OPC_EXP2:
1109bf215546Sopenharmony_ci      return OPC_HEXP2;
1110bf215546Sopenharmony_ci   default:
1111bf215546Sopenharmony_ci      return opc;
1112bf215546Sopenharmony_ci   }
1113bf215546Sopenharmony_ci}
1114bf215546Sopenharmony_ci
1115bf215546Sopenharmony_cistatic inline opc_t
1116bf215546Sopenharmony_cicat4_full_opc(opc_t opc)
1117bf215546Sopenharmony_ci{
1118bf215546Sopenharmony_ci   switch (opc) {
1119bf215546Sopenharmony_ci   case OPC_HRSQ:
1120bf215546Sopenharmony_ci      return OPC_RSQ;
1121bf215546Sopenharmony_ci   case OPC_HLOG2:
1122bf215546Sopenharmony_ci      return OPC_LOG2;
1123bf215546Sopenharmony_ci   case OPC_HEXP2:
1124bf215546Sopenharmony_ci      return OPC_EXP2;
1125bf215546Sopenharmony_ci   default:
1126bf215546Sopenharmony_ci      return opc;
1127bf215546Sopenharmony_ci   }
1128bf215546Sopenharmony_ci}
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_cistatic inline bool
1131bf215546Sopenharmony_ciis_meta(struct ir3_instruction *instr)
1132bf215546Sopenharmony_ci{
1133bf215546Sopenharmony_ci   return (opc_cat(instr->opc) == -1);
1134bf215546Sopenharmony_ci}
1135bf215546Sopenharmony_ci
1136bf215546Sopenharmony_cistatic inline unsigned
1137bf215546Sopenharmony_cireg_elems(const struct ir3_register *reg)
1138bf215546Sopenharmony_ci{
1139bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_ARRAY)
1140bf215546Sopenharmony_ci      return reg->size;
1141bf215546Sopenharmony_ci   else
1142bf215546Sopenharmony_ci      return util_last_bit(reg->wrmask);
1143bf215546Sopenharmony_ci}
1144bf215546Sopenharmony_ci
1145bf215546Sopenharmony_cistatic inline unsigned
1146bf215546Sopenharmony_cireg_elem_size(const struct ir3_register *reg)
1147bf215546Sopenharmony_ci{
1148bf215546Sopenharmony_ci   return (reg->flags & IR3_REG_HALF) ? 1 : 2;
1149bf215546Sopenharmony_ci}
1150bf215546Sopenharmony_ci
1151bf215546Sopenharmony_cistatic inline unsigned
1152bf215546Sopenharmony_cireg_size(const struct ir3_register *reg)
1153bf215546Sopenharmony_ci{
1154bf215546Sopenharmony_ci   return reg_elems(reg) * reg_elem_size(reg);
1155bf215546Sopenharmony_ci}
1156bf215546Sopenharmony_ci
1157bf215546Sopenharmony_cistatic inline unsigned
1158bf215546Sopenharmony_cidest_regs(struct ir3_instruction *instr)
1159bf215546Sopenharmony_ci{
1160bf215546Sopenharmony_ci   if (instr->dsts_count == 0)
1161bf215546Sopenharmony_ci      return 0;
1162bf215546Sopenharmony_ci
1163bf215546Sopenharmony_ci   assert(instr->dsts_count == 1);
1164bf215546Sopenharmony_ci   return util_last_bit(instr->dsts[0]->wrmask);
1165bf215546Sopenharmony_ci}
1166bf215546Sopenharmony_ci
1167bf215546Sopenharmony_ci/* is dst a normal temp register: */
1168bf215546Sopenharmony_cistatic inline bool
1169bf215546Sopenharmony_ciis_dest_gpr(struct ir3_register *dst)
1170bf215546Sopenharmony_ci{
1171bf215546Sopenharmony_ci   if (dst->wrmask == 0)
1172bf215546Sopenharmony_ci      return false;
1173bf215546Sopenharmony_ci   if ((reg_num(dst) == REG_A0) || (dst->num == regid(REG_P0, 0)))
1174bf215546Sopenharmony_ci      return false;
1175bf215546Sopenharmony_ci   return true;
1176bf215546Sopenharmony_ci}
1177bf215546Sopenharmony_ci
1178bf215546Sopenharmony_cistatic inline bool
1179bf215546Sopenharmony_ciwrites_gpr(struct ir3_instruction *instr)
1180bf215546Sopenharmony_ci{
1181bf215546Sopenharmony_ci   if (dest_regs(instr) == 0)
1182bf215546Sopenharmony_ci      return false;
1183bf215546Sopenharmony_ci   return is_dest_gpr(instr->dsts[0]);
1184bf215546Sopenharmony_ci}
1185bf215546Sopenharmony_ci
1186bf215546Sopenharmony_cistatic inline bool
1187bf215546Sopenharmony_ciwrites_addr0(struct ir3_instruction *instr)
1188bf215546Sopenharmony_ci{
1189bf215546Sopenharmony_ci   /* Note: only the first dest can write to a0.x */
1190bf215546Sopenharmony_ci   if (instr->dsts_count > 0) {
1191bf215546Sopenharmony_ci      struct ir3_register *dst = instr->dsts[0];
1192bf215546Sopenharmony_ci      return dst->num == regid(REG_A0, 0);
1193bf215546Sopenharmony_ci   }
1194bf215546Sopenharmony_ci   return false;
1195bf215546Sopenharmony_ci}
1196bf215546Sopenharmony_ci
1197bf215546Sopenharmony_cistatic inline bool
1198bf215546Sopenharmony_ciwrites_addr1(struct ir3_instruction *instr)
1199bf215546Sopenharmony_ci{
1200bf215546Sopenharmony_ci   /* Note: only the first dest can write to a1.x */
1201bf215546Sopenharmony_ci   if (instr->dsts_count > 0) {
1202bf215546Sopenharmony_ci      struct ir3_register *dst = instr->dsts[0];
1203bf215546Sopenharmony_ci      return dst->num == regid(REG_A0, 1);
1204bf215546Sopenharmony_ci   }
1205bf215546Sopenharmony_ci   return false;
1206bf215546Sopenharmony_ci}
1207bf215546Sopenharmony_ci
1208bf215546Sopenharmony_cistatic inline bool
1209bf215546Sopenharmony_ciwrites_pred(struct ir3_instruction *instr)
1210bf215546Sopenharmony_ci{
1211bf215546Sopenharmony_ci   /* Note: only the first dest can write to p0.x */
1212bf215546Sopenharmony_ci   if (instr->dsts_count > 0) {
1213bf215546Sopenharmony_ci      struct ir3_register *dst = instr->dsts[0];
1214bf215546Sopenharmony_ci      return reg_num(dst) == REG_P0;
1215bf215546Sopenharmony_ci   }
1216bf215546Sopenharmony_ci   return false;
1217bf215546Sopenharmony_ci}
1218bf215546Sopenharmony_ci
1219bf215546Sopenharmony_ci/* Is it something other than a normal register. Shared regs, p0, and a0/a1
1220bf215546Sopenharmony_ci * are considered special here. Special registers are always accessed with one
1221bf215546Sopenharmony_ci * size and never alias normal registers, even though a naive calculation
1222bf215546Sopenharmony_ci * would sometimes make it seem like e.g. r30.z aliases a0.x.
1223bf215546Sopenharmony_ci */
1224bf215546Sopenharmony_cistatic inline bool
1225bf215546Sopenharmony_ciis_reg_special(const struct ir3_register *reg)
1226bf215546Sopenharmony_ci{
1227bf215546Sopenharmony_ci   return (reg->flags & IR3_REG_SHARED) || (reg_num(reg) == REG_A0) ||
1228bf215546Sopenharmony_ci          (reg_num(reg) == REG_P0);
1229bf215546Sopenharmony_ci}
1230bf215546Sopenharmony_ci
1231bf215546Sopenharmony_ci/* Same as above but in cases where we don't have a register. r48.x and above
1232bf215546Sopenharmony_ci * are shared/special.
1233bf215546Sopenharmony_ci */
1234bf215546Sopenharmony_cistatic inline bool
1235bf215546Sopenharmony_ciis_reg_num_special(unsigned num)
1236bf215546Sopenharmony_ci{
1237bf215546Sopenharmony_ci   return num >= 48 * 4;
1238bf215546Sopenharmony_ci}
1239bf215546Sopenharmony_ci
1240bf215546Sopenharmony_ci/* returns defining instruction for reg */
1241bf215546Sopenharmony_ci/* TODO better name */
1242bf215546Sopenharmony_cistatic inline struct ir3_instruction *
1243bf215546Sopenharmony_cissa(struct ir3_register *reg)
1244bf215546Sopenharmony_ci{
1245bf215546Sopenharmony_ci   if ((reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) && reg->def)
1246bf215546Sopenharmony_ci      return reg->def->instr;
1247bf215546Sopenharmony_ci   return NULL;
1248bf215546Sopenharmony_ci}
1249bf215546Sopenharmony_ci
1250bf215546Sopenharmony_cistatic inline bool
1251bf215546Sopenharmony_ciconflicts(struct ir3_register *a, struct ir3_register *b)
1252bf215546Sopenharmony_ci{
1253bf215546Sopenharmony_ci   return (a && b) && (a->def != b->def);
1254bf215546Sopenharmony_ci}
1255bf215546Sopenharmony_ci
1256bf215546Sopenharmony_cistatic inline bool
1257bf215546Sopenharmony_cireg_gpr(struct ir3_register *r)
1258bf215546Sopenharmony_ci{
1259bf215546Sopenharmony_ci   if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
1260bf215546Sopenharmony_ci      return false;
1261bf215546Sopenharmony_ci   if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
1262bf215546Sopenharmony_ci      return false;
1263bf215546Sopenharmony_ci   return true;
1264bf215546Sopenharmony_ci}
1265bf215546Sopenharmony_ci
1266bf215546Sopenharmony_cistatic inline type_t
1267bf215546Sopenharmony_cihalf_type(type_t type)
1268bf215546Sopenharmony_ci{
1269bf215546Sopenharmony_ci   switch (type) {
1270bf215546Sopenharmony_ci   case TYPE_F32:
1271bf215546Sopenharmony_ci      return TYPE_F16;
1272bf215546Sopenharmony_ci   case TYPE_U32:
1273bf215546Sopenharmony_ci      return TYPE_U16;
1274bf215546Sopenharmony_ci   case TYPE_S32:
1275bf215546Sopenharmony_ci      return TYPE_S16;
1276bf215546Sopenharmony_ci   case TYPE_F16:
1277bf215546Sopenharmony_ci   case TYPE_U16:
1278bf215546Sopenharmony_ci   case TYPE_S16:
1279bf215546Sopenharmony_ci      return type;
1280bf215546Sopenharmony_ci   case TYPE_U8:
1281bf215546Sopenharmony_ci   case TYPE_S8:
1282bf215546Sopenharmony_ci      return type;
1283bf215546Sopenharmony_ci   default:
1284bf215546Sopenharmony_ci      assert(0);
1285bf215546Sopenharmony_ci      return ~0;
1286bf215546Sopenharmony_ci   }
1287bf215546Sopenharmony_ci}
1288bf215546Sopenharmony_ci
1289bf215546Sopenharmony_cistatic inline type_t
1290bf215546Sopenharmony_cifull_type(type_t type)
1291bf215546Sopenharmony_ci{
1292bf215546Sopenharmony_ci   switch (type) {
1293bf215546Sopenharmony_ci   case TYPE_F16:
1294bf215546Sopenharmony_ci      return TYPE_F32;
1295bf215546Sopenharmony_ci   case TYPE_U8:
1296bf215546Sopenharmony_ci   case TYPE_U16:
1297bf215546Sopenharmony_ci      return TYPE_U32;
1298bf215546Sopenharmony_ci   case TYPE_S8:
1299bf215546Sopenharmony_ci   case TYPE_S16:
1300bf215546Sopenharmony_ci      return TYPE_S32;
1301bf215546Sopenharmony_ci   case TYPE_F32:
1302bf215546Sopenharmony_ci   case TYPE_U32:
1303bf215546Sopenharmony_ci   case TYPE_S32:
1304bf215546Sopenharmony_ci      return type;
1305bf215546Sopenharmony_ci   default:
1306bf215546Sopenharmony_ci      assert(0);
1307bf215546Sopenharmony_ci      return ~0;
1308bf215546Sopenharmony_ci   }
1309bf215546Sopenharmony_ci}
1310bf215546Sopenharmony_ci
1311bf215546Sopenharmony_ci/* some cat2 instructions (ie. those which are not float) can embed an
1312bf215546Sopenharmony_ci * immediate:
1313bf215546Sopenharmony_ci */
1314bf215546Sopenharmony_cistatic inline bool
1315bf215546Sopenharmony_ciir3_cat2_int(opc_t opc)
1316bf215546Sopenharmony_ci{
1317bf215546Sopenharmony_ci   switch (opc) {
1318bf215546Sopenharmony_ci   case OPC_ADD_U:
1319bf215546Sopenharmony_ci   case OPC_ADD_S:
1320bf215546Sopenharmony_ci   case OPC_SUB_U:
1321bf215546Sopenharmony_ci   case OPC_SUB_S:
1322bf215546Sopenharmony_ci   case OPC_CMPS_U:
1323bf215546Sopenharmony_ci   case OPC_CMPS_S:
1324bf215546Sopenharmony_ci   case OPC_MIN_U:
1325bf215546Sopenharmony_ci   case OPC_MIN_S:
1326bf215546Sopenharmony_ci   case OPC_MAX_U:
1327bf215546Sopenharmony_ci   case OPC_MAX_S:
1328bf215546Sopenharmony_ci   case OPC_CMPV_U:
1329bf215546Sopenharmony_ci   case OPC_CMPV_S:
1330bf215546Sopenharmony_ci   case OPC_MUL_U24:
1331bf215546Sopenharmony_ci   case OPC_MUL_S24:
1332bf215546Sopenharmony_ci   case OPC_MULL_U:
1333bf215546Sopenharmony_ci   case OPC_CLZ_S:
1334bf215546Sopenharmony_ci   case OPC_ABSNEG_S:
1335bf215546Sopenharmony_ci   case OPC_AND_B:
1336bf215546Sopenharmony_ci   case OPC_OR_B:
1337bf215546Sopenharmony_ci   case OPC_NOT_B:
1338bf215546Sopenharmony_ci   case OPC_XOR_B:
1339bf215546Sopenharmony_ci   case OPC_BFREV_B:
1340bf215546Sopenharmony_ci   case OPC_CLZ_B:
1341bf215546Sopenharmony_ci   case OPC_SHL_B:
1342bf215546Sopenharmony_ci   case OPC_SHR_B:
1343bf215546Sopenharmony_ci   case OPC_ASHR_B:
1344bf215546Sopenharmony_ci   case OPC_MGEN_B:
1345bf215546Sopenharmony_ci   case OPC_GETBIT_B:
1346bf215546Sopenharmony_ci   case OPC_CBITS_B:
1347bf215546Sopenharmony_ci   case OPC_BARY_F:
1348bf215546Sopenharmony_ci   case OPC_FLAT_B:
1349bf215546Sopenharmony_ci      return true;
1350bf215546Sopenharmony_ci
1351bf215546Sopenharmony_ci   default:
1352bf215546Sopenharmony_ci      return false;
1353bf215546Sopenharmony_ci   }
1354bf215546Sopenharmony_ci}
1355bf215546Sopenharmony_ci
1356bf215546Sopenharmony_ci/* map cat2 instruction to valid abs/neg flags: */
1357bf215546Sopenharmony_cistatic inline unsigned
1358bf215546Sopenharmony_ciir3_cat2_absneg(opc_t opc)
1359bf215546Sopenharmony_ci{
1360bf215546Sopenharmony_ci   switch (opc) {
1361bf215546Sopenharmony_ci   case OPC_ADD_F:
1362bf215546Sopenharmony_ci   case OPC_MIN_F:
1363bf215546Sopenharmony_ci   case OPC_MAX_F:
1364bf215546Sopenharmony_ci   case OPC_MUL_F:
1365bf215546Sopenharmony_ci   case OPC_SIGN_F:
1366bf215546Sopenharmony_ci   case OPC_CMPS_F:
1367bf215546Sopenharmony_ci   case OPC_ABSNEG_F:
1368bf215546Sopenharmony_ci   case OPC_CMPV_F:
1369bf215546Sopenharmony_ci   case OPC_FLOOR_F:
1370bf215546Sopenharmony_ci   case OPC_CEIL_F:
1371bf215546Sopenharmony_ci   case OPC_RNDNE_F:
1372bf215546Sopenharmony_ci   case OPC_RNDAZ_F:
1373bf215546Sopenharmony_ci   case OPC_TRUNC_F:
1374bf215546Sopenharmony_ci   case OPC_BARY_F:
1375bf215546Sopenharmony_ci      return IR3_REG_FABS | IR3_REG_FNEG;
1376bf215546Sopenharmony_ci
1377bf215546Sopenharmony_ci   case OPC_ADD_U:
1378bf215546Sopenharmony_ci   case OPC_ADD_S:
1379bf215546Sopenharmony_ci   case OPC_SUB_U:
1380bf215546Sopenharmony_ci   case OPC_SUB_S:
1381bf215546Sopenharmony_ci   case OPC_CMPS_U:
1382bf215546Sopenharmony_ci   case OPC_CMPS_S:
1383bf215546Sopenharmony_ci   case OPC_MIN_U:
1384bf215546Sopenharmony_ci   case OPC_MIN_S:
1385bf215546Sopenharmony_ci   case OPC_MAX_U:
1386bf215546Sopenharmony_ci   case OPC_MAX_S:
1387bf215546Sopenharmony_ci   case OPC_CMPV_U:
1388bf215546Sopenharmony_ci   case OPC_CMPV_S:
1389bf215546Sopenharmony_ci   case OPC_MUL_U24:
1390bf215546Sopenharmony_ci   case OPC_MUL_S24:
1391bf215546Sopenharmony_ci   case OPC_MULL_U:
1392bf215546Sopenharmony_ci   case OPC_CLZ_S:
1393bf215546Sopenharmony_ci      return 0;
1394bf215546Sopenharmony_ci
1395bf215546Sopenharmony_ci   case OPC_ABSNEG_S:
1396bf215546Sopenharmony_ci      return IR3_REG_SABS | IR3_REG_SNEG;
1397bf215546Sopenharmony_ci
1398bf215546Sopenharmony_ci   case OPC_AND_B:
1399bf215546Sopenharmony_ci   case OPC_OR_B:
1400bf215546Sopenharmony_ci   case OPC_NOT_B:
1401bf215546Sopenharmony_ci   case OPC_XOR_B:
1402bf215546Sopenharmony_ci   case OPC_BFREV_B:
1403bf215546Sopenharmony_ci   case OPC_CLZ_B:
1404bf215546Sopenharmony_ci   case OPC_SHL_B:
1405bf215546Sopenharmony_ci   case OPC_SHR_B:
1406bf215546Sopenharmony_ci   case OPC_ASHR_B:
1407bf215546Sopenharmony_ci   case OPC_MGEN_B:
1408bf215546Sopenharmony_ci   case OPC_GETBIT_B:
1409bf215546Sopenharmony_ci   case OPC_CBITS_B:
1410bf215546Sopenharmony_ci      return IR3_REG_BNOT;
1411bf215546Sopenharmony_ci
1412bf215546Sopenharmony_ci   default:
1413bf215546Sopenharmony_ci      return 0;
1414bf215546Sopenharmony_ci   }
1415bf215546Sopenharmony_ci}
1416bf215546Sopenharmony_ci
1417bf215546Sopenharmony_ci/* map cat3 instructions to valid abs/neg flags: */
1418bf215546Sopenharmony_cistatic inline unsigned
1419bf215546Sopenharmony_ciir3_cat3_absneg(opc_t opc)
1420bf215546Sopenharmony_ci{
1421bf215546Sopenharmony_ci   switch (opc) {
1422bf215546Sopenharmony_ci   case OPC_MAD_F16:
1423bf215546Sopenharmony_ci   case OPC_MAD_F32:
1424bf215546Sopenharmony_ci   case OPC_SEL_F16:
1425bf215546Sopenharmony_ci   case OPC_SEL_F32:
1426bf215546Sopenharmony_ci      return IR3_REG_FNEG;
1427bf215546Sopenharmony_ci
1428bf215546Sopenharmony_ci   case OPC_MAD_U16:
1429bf215546Sopenharmony_ci   case OPC_MADSH_U16:
1430bf215546Sopenharmony_ci   case OPC_MAD_S16:
1431bf215546Sopenharmony_ci   case OPC_MADSH_M16:
1432bf215546Sopenharmony_ci   case OPC_MAD_U24:
1433bf215546Sopenharmony_ci   case OPC_MAD_S24:
1434bf215546Sopenharmony_ci   case OPC_SEL_S16:
1435bf215546Sopenharmony_ci   case OPC_SEL_S32:
1436bf215546Sopenharmony_ci   case OPC_SAD_S16:
1437bf215546Sopenharmony_ci   case OPC_SAD_S32:
1438bf215546Sopenharmony_ci      /* neg *may* work on 3rd src.. */
1439bf215546Sopenharmony_ci
1440bf215546Sopenharmony_ci   case OPC_SEL_B16:
1441bf215546Sopenharmony_ci   case OPC_SEL_B32:
1442bf215546Sopenharmony_ci
1443bf215546Sopenharmony_ci   case OPC_SHRM:
1444bf215546Sopenharmony_ci   case OPC_SHLM:
1445bf215546Sopenharmony_ci   case OPC_SHRG:
1446bf215546Sopenharmony_ci   case OPC_SHLG:
1447bf215546Sopenharmony_ci   case OPC_ANDG:
1448bf215546Sopenharmony_ci   case OPC_WMM:
1449bf215546Sopenharmony_ci   case OPC_WMM_ACCU:
1450bf215546Sopenharmony_ci
1451bf215546Sopenharmony_ci   default:
1452bf215546Sopenharmony_ci      return 0;
1453bf215546Sopenharmony_ci   }
1454bf215546Sopenharmony_ci}
1455bf215546Sopenharmony_ci
1456bf215546Sopenharmony_ci/* Return the type (float, int, or uint) the op uses when converting from the
1457bf215546Sopenharmony_ci * internal result of the op (which is assumed to be the same size as the
1458bf215546Sopenharmony_ci * sources) to the destination when they are not the same size. If F32 it does
1459bf215546Sopenharmony_ci * a floating-point conversion, if U32 it does a truncation/zero-extension, if
1460bf215546Sopenharmony_ci * S32 it does a truncation/sign-extension. "can_fold" will be false if it
1461bf215546Sopenharmony_ci * doesn't do anything sensible or is unknown.
1462bf215546Sopenharmony_ci */
1463bf215546Sopenharmony_cistatic inline type_t
1464bf215546Sopenharmony_ciir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold)
1465bf215546Sopenharmony_ci{
1466bf215546Sopenharmony_ci   *can_fold = true;
1467bf215546Sopenharmony_ci   switch (instr->opc) {
1468bf215546Sopenharmony_ci   case OPC_ADD_F:
1469bf215546Sopenharmony_ci   case OPC_MUL_F:
1470bf215546Sopenharmony_ci   case OPC_BARY_F:
1471bf215546Sopenharmony_ci   case OPC_MAD_F32:
1472bf215546Sopenharmony_ci   case OPC_MAD_F16:
1473bf215546Sopenharmony_ci   case OPC_WMM:
1474bf215546Sopenharmony_ci   case OPC_WMM_ACCU:
1475bf215546Sopenharmony_ci      return TYPE_F32;
1476bf215546Sopenharmony_ci
1477bf215546Sopenharmony_ci   case OPC_ADD_U:
1478bf215546Sopenharmony_ci   case OPC_SUB_U:
1479bf215546Sopenharmony_ci   case OPC_MIN_U:
1480bf215546Sopenharmony_ci   case OPC_MAX_U:
1481bf215546Sopenharmony_ci   case OPC_AND_B:
1482bf215546Sopenharmony_ci   case OPC_OR_B:
1483bf215546Sopenharmony_ci   case OPC_NOT_B:
1484bf215546Sopenharmony_ci   case OPC_XOR_B:
1485bf215546Sopenharmony_ci   case OPC_MUL_U24:
1486bf215546Sopenharmony_ci   case OPC_MULL_U:
1487bf215546Sopenharmony_ci   case OPC_SHL_B:
1488bf215546Sopenharmony_ci   case OPC_SHR_B:
1489bf215546Sopenharmony_ci   case OPC_ASHR_B:
1490bf215546Sopenharmony_ci   case OPC_MAD_U24:
1491bf215546Sopenharmony_ci   case OPC_SHRM:
1492bf215546Sopenharmony_ci   case OPC_SHLM:
1493bf215546Sopenharmony_ci   case OPC_SHRG:
1494bf215546Sopenharmony_ci   case OPC_SHLG:
1495bf215546Sopenharmony_ci   case OPC_ANDG:
1496bf215546Sopenharmony_ci   /* Comparison ops zero-extend/truncate their results, so consider them as
1497bf215546Sopenharmony_ci    * unsigned here.
1498bf215546Sopenharmony_ci    */
1499bf215546Sopenharmony_ci   case OPC_CMPS_F:
1500bf215546Sopenharmony_ci   case OPC_CMPV_F:
1501bf215546Sopenharmony_ci   case OPC_CMPS_U:
1502bf215546Sopenharmony_ci   case OPC_CMPS_S:
1503bf215546Sopenharmony_ci      return TYPE_U32;
1504bf215546Sopenharmony_ci
1505bf215546Sopenharmony_ci   case OPC_ADD_S:
1506bf215546Sopenharmony_ci   case OPC_SUB_S:
1507bf215546Sopenharmony_ci   case OPC_MIN_S:
1508bf215546Sopenharmony_ci   case OPC_MAX_S:
1509bf215546Sopenharmony_ci   case OPC_ABSNEG_S:
1510bf215546Sopenharmony_ci   case OPC_MUL_S24:
1511bf215546Sopenharmony_ci   case OPC_MAD_S24:
1512bf215546Sopenharmony_ci      return TYPE_S32;
1513bf215546Sopenharmony_ci
1514bf215546Sopenharmony_ci   /* We assume that any move->move folding that could be done was done by
1515bf215546Sopenharmony_ci    * NIR.
1516bf215546Sopenharmony_ci    */
1517bf215546Sopenharmony_ci   case OPC_MOV:
1518bf215546Sopenharmony_ci   default:
1519bf215546Sopenharmony_ci      *can_fold = false;
1520bf215546Sopenharmony_ci      return TYPE_U32;
1521bf215546Sopenharmony_ci   }
1522bf215546Sopenharmony_ci}
1523bf215546Sopenharmony_ci
1524bf215546Sopenharmony_ci/* Return the src and dst types for the conversion which is already folded
1525bf215546Sopenharmony_ci * into the op. We can assume that instr has folded in a conversion from
1526bf215546Sopenharmony_ci * ir3_output_conv_src_type() to ir3_output_conv_dst_type(). Only makes sense
1527bf215546Sopenharmony_ci * to call if ir3_output_conv_type() returns can_fold = true.
1528bf215546Sopenharmony_ci */
1529bf215546Sopenharmony_cistatic inline type_t
1530bf215546Sopenharmony_ciir3_output_conv_src_type(struct ir3_instruction *instr, type_t base_type)
1531bf215546Sopenharmony_ci{
1532bf215546Sopenharmony_ci   switch (instr->opc) {
1533bf215546Sopenharmony_ci   case OPC_CMPS_F:
1534bf215546Sopenharmony_ci   case OPC_CMPV_F:
1535bf215546Sopenharmony_ci   case OPC_CMPS_U:
1536bf215546Sopenharmony_ci   case OPC_CMPS_S:
1537bf215546Sopenharmony_ci      /* Comparisons only return 0/1 and the size of the comparison sources
1538bf215546Sopenharmony_ci       * is irrelevant, never consider them as having an output conversion
1539bf215546Sopenharmony_ci       * by returning a type with the dest size here:
1540bf215546Sopenharmony_ci       */
1541bf215546Sopenharmony_ci      return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type)
1542bf215546Sopenharmony_ci                                                    : full_type(base_type);
1543bf215546Sopenharmony_ci
1544bf215546Sopenharmony_ci   case OPC_BARY_F:
1545bf215546Sopenharmony_ci      /* bary.f doesn't have an explicit source, but we can assume here that
1546bf215546Sopenharmony_ci       * the varying data it reads is in fp32.
1547bf215546Sopenharmony_ci       *
1548bf215546Sopenharmony_ci       * This may be fp16 on older gen's depending on some register
1549bf215546Sopenharmony_ci       * settings, but it's probably not worth plumbing that through for a
1550bf215546Sopenharmony_ci       * small improvement that NIR would hopefully handle for us anyway.
1551bf215546Sopenharmony_ci       */
1552bf215546Sopenharmony_ci      return TYPE_F32;
1553bf215546Sopenharmony_ci
1554bf215546Sopenharmony_ci   case OPC_FLAT_B:
1555bf215546Sopenharmony_ci      /* Treat the input data as u32 if not interpolating. */
1556bf215546Sopenharmony_ci      return TYPE_U32;
1557bf215546Sopenharmony_ci
1558bf215546Sopenharmony_ci   default:
1559bf215546Sopenharmony_ci      return (instr->srcs[0]->flags & IR3_REG_HALF) ? half_type(base_type)
1560bf215546Sopenharmony_ci                                                    : full_type(base_type);
1561bf215546Sopenharmony_ci   }
1562bf215546Sopenharmony_ci}
1563bf215546Sopenharmony_ci
1564bf215546Sopenharmony_cistatic inline type_t
1565bf215546Sopenharmony_ciir3_output_conv_dst_type(struct ir3_instruction *instr, type_t base_type)
1566bf215546Sopenharmony_ci{
1567bf215546Sopenharmony_ci   return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type)
1568bf215546Sopenharmony_ci                                                 : full_type(base_type);
1569bf215546Sopenharmony_ci}
1570bf215546Sopenharmony_ci
1571bf215546Sopenharmony_ci/* Some instructions have signed/unsigned variants which are identical except
1572bf215546Sopenharmony_ci * for whether the folded conversion sign-extends or zero-extends, and we can
1573bf215546Sopenharmony_ci * fold in a mismatching move by rewriting the opcode. Return the opcode to
1574bf215546Sopenharmony_ci * switch signedness, and whether one exists.
1575bf215546Sopenharmony_ci */
1576bf215546Sopenharmony_cistatic inline opc_t
1577bf215546Sopenharmony_ciir3_try_swap_signedness(opc_t opc, bool *can_swap)
1578bf215546Sopenharmony_ci{
1579bf215546Sopenharmony_ci   switch (opc) {
1580bf215546Sopenharmony_ci#define PAIR(u, s)                                                             \
1581bf215546Sopenharmony_ci   case OPC_##u:                                                               \
1582bf215546Sopenharmony_ci      return OPC_##s;                                                          \
1583bf215546Sopenharmony_ci   case OPC_##s:                                                               \
1584bf215546Sopenharmony_ci      return OPC_##u;
1585bf215546Sopenharmony_ci      PAIR(ADD_U, ADD_S)
1586bf215546Sopenharmony_ci      PAIR(SUB_U, SUB_S)
1587bf215546Sopenharmony_ci      /* Note: these are only identical when the sources are half, but that's
1588bf215546Sopenharmony_ci       * the only case we call this function for anyway.
1589bf215546Sopenharmony_ci       */
1590bf215546Sopenharmony_ci      PAIR(MUL_U24, MUL_S24)
1591bf215546Sopenharmony_ci
1592bf215546Sopenharmony_ci   default:
1593bf215546Sopenharmony_ci      *can_swap = false;
1594bf215546Sopenharmony_ci      return opc;
1595bf215546Sopenharmony_ci   }
1596bf215546Sopenharmony_ci}
1597bf215546Sopenharmony_ci
1598bf215546Sopenharmony_ci#define MASK(n) ((1 << (n)) - 1)
1599bf215546Sopenharmony_ci
1600bf215546Sopenharmony_ci/* iterator for an instructions's sources (reg), also returns src #: */
1601bf215546Sopenharmony_ci#define foreach_src_n(__srcreg, __n, __instr)                                  \
1602bf215546Sopenharmony_ci   if ((__instr)->srcs_count)                                                  \
1603bf215546Sopenharmony_ci      for (struct ir3_register *__srcreg = (void *)~0; __srcreg;               \
1604bf215546Sopenharmony_ci           __srcreg = NULL)                                                    \
1605bf215546Sopenharmony_ci         for (unsigned __cnt = (__instr)->srcs_count, __n = 0; __n < __cnt;    \
1606bf215546Sopenharmony_ci              __n++)                                                           \
1607bf215546Sopenharmony_ci            if ((__srcreg = (__instr)->srcs[__n]))
1608bf215546Sopenharmony_ci
1609bf215546Sopenharmony_ci/* iterator for an instructions's sources (reg): */
1610bf215546Sopenharmony_ci#define foreach_src(__srcreg, __instr) foreach_src_n (__srcreg, __i, __instr)
1611bf215546Sopenharmony_ci
1612bf215546Sopenharmony_ci/* iterator for an instructions's destinations (reg), also returns dst #: */
1613bf215546Sopenharmony_ci#define foreach_dst_n(__dstreg, __n, __instr)                                  \
1614bf215546Sopenharmony_ci   if ((__instr)->dsts_count)                                                  \
1615bf215546Sopenharmony_ci      for (struct ir3_register *__dstreg = (void *)~0; __dstreg;               \
1616bf215546Sopenharmony_ci           __dstreg = NULL)                                                    \
1617bf215546Sopenharmony_ci         for (unsigned __cnt = (__instr)->dsts_count, __n = 0; __n < __cnt;    \
1618bf215546Sopenharmony_ci              __n++)                                                           \
1619bf215546Sopenharmony_ci            if ((__dstreg = (__instr)->dsts[__n]))
1620bf215546Sopenharmony_ci
1621bf215546Sopenharmony_ci/* iterator for an instructions's destinations (reg): */
1622bf215546Sopenharmony_ci#define foreach_dst(__dstreg, __instr) foreach_dst_n (__dstreg, __i, __instr)
1623bf215546Sopenharmony_ci
1624bf215546Sopenharmony_cistatic inline unsigned
1625bf215546Sopenharmony_ci__ssa_src_cnt(struct ir3_instruction *instr)
1626bf215546Sopenharmony_ci{
1627bf215546Sopenharmony_ci   return instr->srcs_count + instr->deps_count;
1628bf215546Sopenharmony_ci}
1629bf215546Sopenharmony_ci
1630bf215546Sopenharmony_cistatic inline bool
1631bf215546Sopenharmony_ci__is_false_dep(struct ir3_instruction *instr, unsigned n)
1632bf215546Sopenharmony_ci{
1633bf215546Sopenharmony_ci   if (n >= instr->srcs_count)
1634bf215546Sopenharmony_ci      return true;
1635bf215546Sopenharmony_ci   return false;
1636bf215546Sopenharmony_ci}
1637bf215546Sopenharmony_ci
1638bf215546Sopenharmony_cistatic inline struct ir3_instruction **
1639bf215546Sopenharmony_ci__ssa_srcp_n(struct ir3_instruction *instr, unsigned n)
1640bf215546Sopenharmony_ci{
1641bf215546Sopenharmony_ci   if (__is_false_dep(instr, n))
1642bf215546Sopenharmony_ci      return &instr->deps[n - instr->srcs_count];
1643bf215546Sopenharmony_ci   if (ssa(instr->srcs[n]))
1644bf215546Sopenharmony_ci      return &instr->srcs[n]->def->instr;
1645bf215546Sopenharmony_ci   return NULL;
1646bf215546Sopenharmony_ci}
1647bf215546Sopenharmony_ci
1648bf215546Sopenharmony_ci#define foreach_ssa_srcp_n(__srcp, __n, __instr)                               \
1649bf215546Sopenharmony_ci   for (struct ir3_instruction **__srcp = (void *)~0; __srcp; __srcp = NULL)   \
1650bf215546Sopenharmony_ci      for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt;      \
1651bf215546Sopenharmony_ci           __n++)                                                              \
1652bf215546Sopenharmony_ci         if ((__srcp = __ssa_srcp_n(__instr, __n)))
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci#define foreach_ssa_srcp(__srcp, __instr)                                      \
1655bf215546Sopenharmony_ci   foreach_ssa_srcp_n (__srcp, __i, __instr)
1656bf215546Sopenharmony_ci
1657bf215546Sopenharmony_ci/* iterator for an instruction's SSA sources (instr), also returns src #: */
1658bf215546Sopenharmony_ci#define foreach_ssa_src_n(__srcinst, __n, __instr)                             \
1659bf215546Sopenharmony_ci   for (struct ir3_instruction *__srcinst = (void *)~0; __srcinst;             \
1660bf215546Sopenharmony_ci        __srcinst = NULL)                                                      \
1661bf215546Sopenharmony_ci      foreach_ssa_srcp_n (__srcp, __n, __instr)                                \
1662bf215546Sopenharmony_ci         if ((__srcinst = *__srcp))
1663bf215546Sopenharmony_ci
1664bf215546Sopenharmony_ci/* iterator for an instruction's SSA sources (instr): */
1665bf215546Sopenharmony_ci#define foreach_ssa_src(__srcinst, __instr)                                    \
1666bf215546Sopenharmony_ci   foreach_ssa_src_n (__srcinst, __i, __instr)
1667bf215546Sopenharmony_ci
1668bf215546Sopenharmony_ci/* iterators for shader inputs: */
1669bf215546Sopenharmony_ci#define foreach_input_n(__ininstr, __cnt, __ir)                                \
1670bf215546Sopenharmony_ci   for (struct ir3_instruction *__ininstr = (void *)~0; __ininstr;             \
1671bf215546Sopenharmony_ci        __ininstr = NULL)                                                      \
1672bf215546Sopenharmony_ci      for (unsigned __cnt = 0; __cnt < (__ir)->inputs_count; __cnt++)          \
1673bf215546Sopenharmony_ci         if ((__ininstr = (__ir)->inputs[__cnt]))
1674bf215546Sopenharmony_ci#define foreach_input(__ininstr, __ir) foreach_input_n (__ininstr, __i, __ir)
1675bf215546Sopenharmony_ci
1676bf215546Sopenharmony_ci/* iterators for instructions: */
1677bf215546Sopenharmony_ci#define foreach_instr(__instr, __list)                                         \
1678bf215546Sopenharmony_ci   list_for_each_entry (struct ir3_instruction, __instr, __list, node)
1679bf215546Sopenharmony_ci#define foreach_instr_rev(__instr, __list)                                     \
1680bf215546Sopenharmony_ci   list_for_each_entry_rev (struct ir3_instruction, __instr, __list, node)
1681bf215546Sopenharmony_ci#define foreach_instr_safe(__instr, __list)                                    \
1682bf215546Sopenharmony_ci   list_for_each_entry_safe (struct ir3_instruction, __instr, __list, node)
1683bf215546Sopenharmony_ci#define foreach_instr_from_safe(__instr, __start, __list)                      \
1684bf215546Sopenharmony_ci   list_for_each_entry_from_safe(struct ir3_instruction, __instr, __start,     \
1685bf215546Sopenharmony_ci                                 __list, node)
1686bf215546Sopenharmony_ci
1687bf215546Sopenharmony_ci/* iterators for blocks: */
1688bf215546Sopenharmony_ci#define foreach_block(__block, __list)                                         \
1689bf215546Sopenharmony_ci   list_for_each_entry (struct ir3_block, __block, __list, node)
1690bf215546Sopenharmony_ci#define foreach_block_safe(__block, __list)                                    \
1691bf215546Sopenharmony_ci   list_for_each_entry_safe (struct ir3_block, __block, __list, node)
1692bf215546Sopenharmony_ci#define foreach_block_rev(__block, __list)                                     \
1693bf215546Sopenharmony_ci   list_for_each_entry_rev (struct ir3_block, __block, __list, node)
1694bf215546Sopenharmony_ci
1695bf215546Sopenharmony_ci/* iterators for arrays: */
1696bf215546Sopenharmony_ci#define foreach_array(__array, __list)                                         \
1697bf215546Sopenharmony_ci   list_for_each_entry (struct ir3_array, __array, __list, node)
1698bf215546Sopenharmony_ci#define foreach_array_safe(__array, __list)                                    \
1699bf215546Sopenharmony_ci   list_for_each_entry_safe (struct ir3_array, __array, __list, node)
1700bf215546Sopenharmony_ci
1701bf215546Sopenharmony_ci#define IR3_PASS(ir, pass, ...)                                                \
1702bf215546Sopenharmony_ci   ({                                                                          \
1703bf215546Sopenharmony_ci      bool progress = pass(ir, ##__VA_ARGS__);                                 \
1704bf215546Sopenharmony_ci      if (progress) {                                                          \
1705bf215546Sopenharmony_ci         ir3_debug_print(ir, "AFTER: " #pass);                                 \
1706bf215546Sopenharmony_ci         ir3_validate(ir);                                                     \
1707bf215546Sopenharmony_ci      }                                                                        \
1708bf215546Sopenharmony_ci      progress;                                                                \
1709bf215546Sopenharmony_ci   })
1710bf215546Sopenharmony_ci
1711bf215546Sopenharmony_ci/* validate: */
1712bf215546Sopenharmony_civoid ir3_validate(struct ir3 *ir);
1713bf215546Sopenharmony_ci
1714bf215546Sopenharmony_ci/* dump: */
1715bf215546Sopenharmony_civoid ir3_print(struct ir3 *ir);
1716bf215546Sopenharmony_civoid ir3_print_instr(struct ir3_instruction *instr);
1717bf215546Sopenharmony_ci
1718bf215546Sopenharmony_cistruct log_stream;
1719bf215546Sopenharmony_civoid ir3_print_instr_stream(struct log_stream *stream, struct ir3_instruction *instr);
1720bf215546Sopenharmony_ci
1721bf215546Sopenharmony_ci/* delay calculation: */
1722bf215546Sopenharmony_ciint ir3_delayslots(struct ir3_instruction *assigner,
1723bf215546Sopenharmony_ci                   struct ir3_instruction *consumer, unsigned n, bool soft);
1724bf215546Sopenharmony_ciunsigned ir3_delayslots_with_repeat(struct ir3_instruction *assigner,
1725bf215546Sopenharmony_ci                                    struct ir3_instruction *consumer,
1726bf215546Sopenharmony_ci                                    unsigned assigner_n, unsigned consumer_n);
1727bf215546Sopenharmony_ciunsigned ir3_delay_calc(struct ir3_block *block,
1728bf215546Sopenharmony_ci                        struct ir3_instruction *instr, bool mergedregs);
1729bf215546Sopenharmony_ci
1730bf215546Sopenharmony_ci/* estimated (ss)/(sy) delay calculation */
1731bf215546Sopenharmony_ci
1732bf215546Sopenharmony_cistatic inline bool
1733bf215546Sopenharmony_ciis_local_mem_load(struct ir3_instruction *instr)
1734bf215546Sopenharmony_ci{
1735bf215546Sopenharmony_ci   return instr->opc == OPC_LDL || instr->opc == OPC_LDLV ||
1736bf215546Sopenharmony_ci      instr->opc == OPC_LDLW;
1737bf215546Sopenharmony_ci}
1738bf215546Sopenharmony_ci
1739bf215546Sopenharmony_ci/* Does this instruction need (ss) to wait for its result? */
1740bf215546Sopenharmony_cistatic inline bool
1741bf215546Sopenharmony_ciis_ss_producer(struct ir3_instruction *instr)
1742bf215546Sopenharmony_ci{
1743bf215546Sopenharmony_ci   foreach_dst (dst, instr) {
1744bf215546Sopenharmony_ci      if (dst->flags & IR3_REG_SHARED)
1745bf215546Sopenharmony_ci         return true;
1746bf215546Sopenharmony_ci   }
1747bf215546Sopenharmony_ci   return is_sfu(instr) || is_local_mem_load(instr);
1748bf215546Sopenharmony_ci}
1749bf215546Sopenharmony_ci
1750bf215546Sopenharmony_ci/* The soft delay for approximating the cost of (ss). */
1751bf215546Sopenharmony_cistatic inline unsigned
1752bf215546Sopenharmony_cisoft_ss_delay(struct ir3_instruction *instr)
1753bf215546Sopenharmony_ci{
1754bf215546Sopenharmony_ci   /* On a6xx, it takes the number of delay slots to get a SFU result back (ie.
1755bf215546Sopenharmony_ci    * using nop's instead of (ss) is:
1756bf215546Sopenharmony_ci    *
1757bf215546Sopenharmony_ci    *     8 - single warp
1758bf215546Sopenharmony_ci    *     9 - two warps
1759bf215546Sopenharmony_ci    *    10 - four warps
1760bf215546Sopenharmony_ci    *
1761bf215546Sopenharmony_ci    * and so on. Not quite sure where it tapers out (ie. how many warps share an
1762bf215546Sopenharmony_ci    * SFU unit). But 10 seems like a reasonable # to choose:
1763bf215546Sopenharmony_ci    */
1764bf215546Sopenharmony_ci   if (is_sfu(instr) || is_local_mem_load(instr))
1765bf215546Sopenharmony_ci      return 10;
1766bf215546Sopenharmony_ci
1767bf215546Sopenharmony_ci   /* The blob adds 6 nops between shared producers and consumers, and before we
1768bf215546Sopenharmony_ci    * used (ss) this was sufficient in most cases.
1769bf215546Sopenharmony_ci    */
1770bf215546Sopenharmony_ci   return 6;
1771bf215546Sopenharmony_ci}
1772bf215546Sopenharmony_ci
1773bf215546Sopenharmony_cistatic inline bool
1774bf215546Sopenharmony_ciis_sy_producer(struct ir3_instruction *instr)
1775bf215546Sopenharmony_ci{
1776bf215546Sopenharmony_ci   return is_tex_or_prefetch(instr) ||
1777bf215546Sopenharmony_ci      (is_load(instr) && !is_local_mem_load(instr)) ||
1778bf215546Sopenharmony_ci      is_atomic(instr->opc);
1779bf215546Sopenharmony_ci}
1780bf215546Sopenharmony_ci
1781bf215546Sopenharmony_cistatic inline unsigned
1782bf215546Sopenharmony_cisoft_sy_delay(struct ir3_instruction *instr, struct ir3 *shader)
1783bf215546Sopenharmony_ci{
1784bf215546Sopenharmony_ci   /* TODO: this is just an optimistic guess, we can do better post-RA.
1785bf215546Sopenharmony_ci    */
1786bf215546Sopenharmony_ci   bool double_wavesize =
1787bf215546Sopenharmony_ci      shader->type == MESA_SHADER_FRAGMENT ||
1788bf215546Sopenharmony_ci      shader->type == MESA_SHADER_COMPUTE;
1789bf215546Sopenharmony_ci
1790bf215546Sopenharmony_ci   unsigned components = reg_elems(instr->dsts[0]);
1791bf215546Sopenharmony_ci
1792bf215546Sopenharmony_ci   /* These numbers come from counting the number of delay slots to get
1793bf215546Sopenharmony_ci    * cat5/cat6 results back using nops instead of (sy). Note that these numbers
1794bf215546Sopenharmony_ci    * are with the result preloaded to cache by loading it before in the same
1795bf215546Sopenharmony_ci    * shader - uncached results are much larger.
1796bf215546Sopenharmony_ci    *
1797bf215546Sopenharmony_ci    * Note: most ALU instructions can't complete at the full doubled rate, so
1798bf215546Sopenharmony_ci    * they take 2 cycles. The only exception is fp16 instructions with no
1799bf215546Sopenharmony_ci    * built-in conversions. Therefore divide the latency by 2.
1800bf215546Sopenharmony_ci    *
1801bf215546Sopenharmony_ci    * TODO: Handle this properly in the scheduler and remove this.
1802bf215546Sopenharmony_ci    */
1803bf215546Sopenharmony_ci   if (instr->opc == OPC_LDC) {
1804bf215546Sopenharmony_ci      if (double_wavesize)
1805bf215546Sopenharmony_ci         return (21 + 8 * components) / 2;
1806bf215546Sopenharmony_ci      else
1807bf215546Sopenharmony_ci         return 18 + 4 * components;
1808bf215546Sopenharmony_ci   } else if (is_tex_or_prefetch(instr)) {
1809bf215546Sopenharmony_ci      if (double_wavesize) {
1810bf215546Sopenharmony_ci         switch (components) {
1811bf215546Sopenharmony_ci         case 1: return 58 / 2;
1812bf215546Sopenharmony_ci         case 2: return 60 / 2;
1813bf215546Sopenharmony_ci         case 3: return 77 / 2;
1814bf215546Sopenharmony_ci         case 4: return 79 / 2;
1815bf215546Sopenharmony_ci         default: unreachable("bad number of components");
1816bf215546Sopenharmony_ci         }
1817bf215546Sopenharmony_ci      } else {
1818bf215546Sopenharmony_ci         switch (components) {
1819bf215546Sopenharmony_ci         case 1: return 51;
1820bf215546Sopenharmony_ci         case 2: return 53;
1821bf215546Sopenharmony_ci         case 3: return 62;
1822bf215546Sopenharmony_ci         case 4: return 64;
1823bf215546Sopenharmony_ci         default: unreachable("bad number of components");
1824bf215546Sopenharmony_ci         }
1825bf215546Sopenharmony_ci      }
1826bf215546Sopenharmony_ci   } else {
1827bf215546Sopenharmony_ci      /* TODO: measure other cat6 opcodes like ldg */
1828bf215546Sopenharmony_ci      if (double_wavesize)
1829bf215546Sopenharmony_ci         return (172 + components) / 2;
1830bf215546Sopenharmony_ci      else
1831bf215546Sopenharmony_ci         return 109 + components;
1832bf215546Sopenharmony_ci   }
1833bf215546Sopenharmony_ci}
1834bf215546Sopenharmony_ci
1835bf215546Sopenharmony_ci
1836bf215546Sopenharmony_ci/* unreachable block elimination: */
1837bf215546Sopenharmony_cibool ir3_remove_unreachable(struct ir3 *ir);
1838bf215546Sopenharmony_ci
1839bf215546Sopenharmony_ci/* dead code elimination: */
1840bf215546Sopenharmony_cistruct ir3_shader_variant;
1841bf215546Sopenharmony_cibool ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so);
1842bf215546Sopenharmony_ci
1843bf215546Sopenharmony_ci/* fp16 conversion folding */
1844bf215546Sopenharmony_cibool ir3_cf(struct ir3 *ir);
1845bf215546Sopenharmony_ci
1846bf215546Sopenharmony_ci/* copy-propagate: */
1847bf215546Sopenharmony_cibool ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
1848bf215546Sopenharmony_ci
1849bf215546Sopenharmony_ci/* common subexpression elimination: */
1850bf215546Sopenharmony_cibool ir3_cse(struct ir3 *ir);
1851bf215546Sopenharmony_ci
1852bf215546Sopenharmony_ci/* Make arrays SSA */
1853bf215546Sopenharmony_cibool ir3_array_to_ssa(struct ir3 *ir);
1854bf215546Sopenharmony_ci
1855bf215546Sopenharmony_ci/* scheduling: */
1856bf215546Sopenharmony_cibool ir3_sched_add_deps(struct ir3 *ir);
1857bf215546Sopenharmony_ciint ir3_sched(struct ir3 *ir);
1858bf215546Sopenharmony_ci
1859bf215546Sopenharmony_cistruct ir3_context;
1860bf215546Sopenharmony_cibool ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v);
1861bf215546Sopenharmony_ci
1862bf215546Sopenharmony_ci/* register assignment: */
1863bf215546Sopenharmony_ciint ir3_ra(struct ir3_shader_variant *v);
1864bf215546Sopenharmony_ci
1865bf215546Sopenharmony_ci/* lower subgroup ops: */
1866bf215546Sopenharmony_cibool ir3_lower_subgroups(struct ir3 *ir);
1867bf215546Sopenharmony_ci
1868bf215546Sopenharmony_ci/* legalize: */
1869bf215546Sopenharmony_cibool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary);
1870bf215546Sopenharmony_cibool ir3_legalize_relative(struct ir3 *ir);
1871bf215546Sopenharmony_ci
1872bf215546Sopenharmony_cistatic inline bool
1873bf215546Sopenharmony_ciir3_has_latency_to_hide(struct ir3 *ir)
1874bf215546Sopenharmony_ci{
1875bf215546Sopenharmony_ci   /* VS/GS/TCS/TESS  co-exist with frag shader invocations, but we don't
1876bf215546Sopenharmony_ci    * know the nature of the fragment shader.  Just assume it will have
1877bf215546Sopenharmony_ci    * latency to hide:
1878bf215546Sopenharmony_ci    */
1879bf215546Sopenharmony_ci   if (ir->type != MESA_SHADER_FRAGMENT)
1880bf215546Sopenharmony_ci      return true;
1881bf215546Sopenharmony_ci
1882bf215546Sopenharmony_ci   foreach_block (block, &ir->block_list) {
1883bf215546Sopenharmony_ci      foreach_instr (instr, &block->instr_list) {
1884bf215546Sopenharmony_ci         if (is_tex_or_prefetch(instr))
1885bf215546Sopenharmony_ci            return true;
1886bf215546Sopenharmony_ci
1887bf215546Sopenharmony_ci         if (is_load(instr)) {
1888bf215546Sopenharmony_ci            switch (instr->opc) {
1889bf215546Sopenharmony_ci            case OPC_LDLV:
1890bf215546Sopenharmony_ci            case OPC_LDL:
1891bf215546Sopenharmony_ci            case OPC_LDLW:
1892bf215546Sopenharmony_ci               break;
1893bf215546Sopenharmony_ci            default:
1894bf215546Sopenharmony_ci               return true;
1895bf215546Sopenharmony_ci            }
1896bf215546Sopenharmony_ci         }
1897bf215546Sopenharmony_ci      }
1898bf215546Sopenharmony_ci   }
1899bf215546Sopenharmony_ci
1900bf215546Sopenharmony_ci   return false;
1901bf215546Sopenharmony_ci}
1902bf215546Sopenharmony_ci
1903bf215546Sopenharmony_ci/* ************************************************************************* */
1904bf215546Sopenharmony_ci/* instruction helpers */
1905bf215546Sopenharmony_ci
1906bf215546Sopenharmony_ci/* creates SSA src of correct type (ie. half vs full precision) */
1907bf215546Sopenharmony_cistatic inline struct ir3_register *
1908bf215546Sopenharmony_ci__ssa_src(struct ir3_instruction *instr, struct ir3_instruction *src,
1909bf215546Sopenharmony_ci          unsigned flags)
1910bf215546Sopenharmony_ci{
1911bf215546Sopenharmony_ci   struct ir3_register *reg;
1912bf215546Sopenharmony_ci   if (src->dsts[0]->flags & IR3_REG_HALF)
1913bf215546Sopenharmony_ci      flags |= IR3_REG_HALF;
1914bf215546Sopenharmony_ci   reg = ir3_src_create(instr, INVALID_REG, IR3_REG_SSA | flags);
1915bf215546Sopenharmony_ci   reg->def = src->dsts[0];
1916bf215546Sopenharmony_ci   reg->wrmask = src->dsts[0]->wrmask;
1917bf215546Sopenharmony_ci   return reg;
1918bf215546Sopenharmony_ci}
1919bf215546Sopenharmony_ci
1920bf215546Sopenharmony_cistatic inline struct ir3_register *
1921bf215546Sopenharmony_ci__ssa_dst(struct ir3_instruction *instr)
1922bf215546Sopenharmony_ci{
1923bf215546Sopenharmony_ci   struct ir3_register *reg = ir3_dst_create(instr, INVALID_REG, IR3_REG_SSA);
1924bf215546Sopenharmony_ci   reg->instr = instr;
1925bf215546Sopenharmony_ci   return reg;
1926bf215546Sopenharmony_ci}
1927bf215546Sopenharmony_ci
1928bf215546Sopenharmony_cistatic inline struct ir3_instruction *
1929bf215546Sopenharmony_cicreate_immed_typed(struct ir3_block *block, uint32_t val, type_t type)
1930bf215546Sopenharmony_ci{
1931bf215546Sopenharmony_ci   struct ir3_instruction *mov;
1932bf215546Sopenharmony_ci   unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
1933bf215546Sopenharmony_ci
1934bf215546Sopenharmony_ci   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
1935bf215546Sopenharmony_ci   mov->cat1.src_type = type;
1936bf215546Sopenharmony_ci   mov->cat1.dst_type = type;
1937bf215546Sopenharmony_ci   __ssa_dst(mov)->flags |= flags;
1938bf215546Sopenharmony_ci   ir3_src_create(mov, 0, IR3_REG_IMMED | flags)->uim_val = val;
1939bf215546Sopenharmony_ci
1940bf215546Sopenharmony_ci   return mov;
1941bf215546Sopenharmony_ci}
1942bf215546Sopenharmony_ci
1943bf215546Sopenharmony_cistatic inline struct ir3_instruction *
1944bf215546Sopenharmony_cicreate_immed(struct ir3_block *block, uint32_t val)
1945bf215546Sopenharmony_ci{
1946bf215546Sopenharmony_ci   return create_immed_typed(block, val, TYPE_U32);
1947bf215546Sopenharmony_ci}
1948bf215546Sopenharmony_ci
1949bf215546Sopenharmony_cistatic inline struct ir3_instruction *
1950bf215546Sopenharmony_cicreate_uniform_typed(struct ir3_block *block, unsigned n, type_t type)
1951bf215546Sopenharmony_ci{
1952bf215546Sopenharmony_ci   struct ir3_instruction *mov;
1953bf215546Sopenharmony_ci   unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
1954bf215546Sopenharmony_ci
1955bf215546Sopenharmony_ci   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
1956bf215546Sopenharmony_ci   mov->cat1.src_type = type;
1957bf215546Sopenharmony_ci   mov->cat1.dst_type = type;
1958bf215546Sopenharmony_ci   __ssa_dst(mov)->flags |= flags;
1959bf215546Sopenharmony_ci   ir3_src_create(mov, n, IR3_REG_CONST | flags);
1960bf215546Sopenharmony_ci
1961bf215546Sopenharmony_ci   return mov;
1962bf215546Sopenharmony_ci}
1963bf215546Sopenharmony_ci
1964bf215546Sopenharmony_cistatic inline struct ir3_instruction *
1965bf215546Sopenharmony_cicreate_uniform(struct ir3_block *block, unsigned n)
1966bf215546Sopenharmony_ci{
1967bf215546Sopenharmony_ci   return create_uniform_typed(block, n, TYPE_F32);
1968bf215546Sopenharmony_ci}
1969bf215546Sopenharmony_ci
1970bf215546Sopenharmony_cistatic inline struct ir3_instruction *
1971bf215546Sopenharmony_cicreate_uniform_indirect(struct ir3_block *block, int n, type_t type,
1972bf215546Sopenharmony_ci                        struct ir3_instruction *address)
1973bf215546Sopenharmony_ci{
1974bf215546Sopenharmony_ci   struct ir3_instruction *mov;
1975bf215546Sopenharmony_ci
1976bf215546Sopenharmony_ci   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
1977bf215546Sopenharmony_ci   mov->cat1.src_type = type;
1978bf215546Sopenharmony_ci   mov->cat1.dst_type = type;
1979bf215546Sopenharmony_ci   __ssa_dst(mov);
1980bf215546Sopenharmony_ci   ir3_src_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
1981bf215546Sopenharmony_ci
1982bf215546Sopenharmony_ci   ir3_instr_set_address(mov, address);
1983bf215546Sopenharmony_ci
1984bf215546Sopenharmony_ci   return mov;
1985bf215546Sopenharmony_ci}
1986bf215546Sopenharmony_ci
1987bf215546Sopenharmony_cistatic inline struct ir3_instruction *
1988bf215546Sopenharmony_ciir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
1989bf215546Sopenharmony_ci{
1990bf215546Sopenharmony_ci   struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1);
1991bf215546Sopenharmony_ci   unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
1992bf215546Sopenharmony_ci
1993bf215546Sopenharmony_ci   __ssa_dst(instr)->flags |= flags;
1994bf215546Sopenharmony_ci   if (src->dsts[0]->flags & IR3_REG_ARRAY) {
1995bf215546Sopenharmony_ci      struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY);
1996bf215546Sopenharmony_ci      src_reg->array = src->dsts[0]->array;
1997bf215546Sopenharmony_ci   } else {
1998bf215546Sopenharmony_ci      __ssa_src(instr, src, src->dsts[0]->flags & IR3_REG_SHARED);
1999bf215546Sopenharmony_ci   }
2000bf215546Sopenharmony_ci   assert(!(src->dsts[0]->flags & IR3_REG_RELATIV));
2001bf215546Sopenharmony_ci   instr->cat1.src_type = type;
2002bf215546Sopenharmony_ci   instr->cat1.dst_type = type;
2003bf215546Sopenharmony_ci   return instr;
2004bf215546Sopenharmony_ci}
2005bf215546Sopenharmony_ci
2006bf215546Sopenharmony_cistatic inline struct ir3_instruction *
2007bf215546Sopenharmony_ciir3_COV(struct ir3_block *block, struct ir3_instruction *src, type_t src_type,
2008bf215546Sopenharmony_ci        type_t dst_type)
2009bf215546Sopenharmony_ci{
2010bf215546Sopenharmony_ci   struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1);
2011bf215546Sopenharmony_ci   unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0;
2012bf215546Sopenharmony_ci   unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0;
2013bf215546Sopenharmony_ci
2014bf215546Sopenharmony_ci   assert((src->dsts[0]->flags & IR3_REG_HALF) == src_flags);
2015bf215546Sopenharmony_ci
2016bf215546Sopenharmony_ci   __ssa_dst(instr)->flags |= dst_flags;
2017bf215546Sopenharmony_ci   __ssa_src(instr, src, 0);
2018bf215546Sopenharmony_ci   instr->cat1.src_type = src_type;
2019bf215546Sopenharmony_ci   instr->cat1.dst_type = dst_type;
2020bf215546Sopenharmony_ci   assert(!(src->dsts[0]->flags & IR3_REG_ARRAY));
2021bf215546Sopenharmony_ci   return instr;
2022bf215546Sopenharmony_ci}
2023bf215546Sopenharmony_ci
2024bf215546Sopenharmony_cistatic inline struct ir3_instruction *
2025bf215546Sopenharmony_ciir3_MOVMSK(struct ir3_block *block, unsigned components)
2026bf215546Sopenharmony_ci{
2027bf215546Sopenharmony_ci   struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOVMSK, 1, 0);
2028bf215546Sopenharmony_ci
2029bf215546Sopenharmony_ci   struct ir3_register *dst = __ssa_dst(instr);
2030bf215546Sopenharmony_ci   dst->flags |= IR3_REG_SHARED;
2031bf215546Sopenharmony_ci   dst->wrmask = (1 << components) - 1;
2032bf215546Sopenharmony_ci   instr->repeat = components - 1;
2033bf215546Sopenharmony_ci   return instr;
2034bf215546Sopenharmony_ci}
2035bf215546Sopenharmony_ci
2036bf215546Sopenharmony_cistatic inline struct ir3_instruction *
2037bf215546Sopenharmony_ciir3_BALLOT_MACRO(struct ir3_block *block, struct ir3_instruction *src,
2038bf215546Sopenharmony_ci                 unsigned components)
2039bf215546Sopenharmony_ci{
2040bf215546Sopenharmony_ci   struct ir3_instruction *instr =
2041bf215546Sopenharmony_ci      ir3_instr_create(block, OPC_BALLOT_MACRO, 1, 1);
2042bf215546Sopenharmony_ci
2043bf215546Sopenharmony_ci   struct ir3_register *dst = __ssa_dst(instr);
2044bf215546Sopenharmony_ci   dst->flags |= IR3_REG_SHARED;
2045bf215546Sopenharmony_ci   dst->wrmask = (1 << components) - 1;
2046bf215546Sopenharmony_ci
2047bf215546Sopenharmony_ci   __ssa_src(instr, src, 0);
2048bf215546Sopenharmony_ci
2049bf215546Sopenharmony_ci   return instr;
2050bf215546Sopenharmony_ci}
2051bf215546Sopenharmony_ci
2052bf215546Sopenharmony_cistatic inline struct ir3_instruction *
2053bf215546Sopenharmony_ciir3_NOP(struct ir3_block *block)
2054bf215546Sopenharmony_ci{
2055bf215546Sopenharmony_ci   return ir3_instr_create(block, OPC_NOP, 0, 0);
2056bf215546Sopenharmony_ci}
2057bf215546Sopenharmony_ci
2058bf215546Sopenharmony_ci/* clang-format off */
2059bf215546Sopenharmony_ci#define __INSTR0(flag, name, opc)                                              \
2060bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name(struct ir3_block *block)      \
2061bf215546Sopenharmony_ci{                                                                              \
2062bf215546Sopenharmony_ci   struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 0);         \
2063bf215546Sopenharmony_ci   instr->flags |= flag;                                                       \
2064bf215546Sopenharmony_ci   return instr;                                                               \
2065bf215546Sopenharmony_ci}
2066bf215546Sopenharmony_ci/* clang-format on */
2067bf215546Sopenharmony_ci#define INSTR0F(f, name) __INSTR0(IR3_INSTR_##f, name##_##f, OPC_##name)
2068bf215546Sopenharmony_ci#define INSTR0(name)     __INSTR0(0, name, OPC_##name)
2069bf215546Sopenharmony_ci
2070bf215546Sopenharmony_ci/* clang-format off */
2071bf215546Sopenharmony_ci#define __INSTR1(flag, dst_count, name, opc)                                   \
2072bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name(                              \
2073bf215546Sopenharmony_ci   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags)        \
2074bf215546Sopenharmony_ci{                                                                              \
2075bf215546Sopenharmony_ci   struct ir3_instruction *instr =                                             \
2076bf215546Sopenharmony_ci      ir3_instr_create(block, opc, dst_count, 1);                              \
2077bf215546Sopenharmony_ci   for (unsigned i = 0; i < dst_count; i++)                                    \
2078bf215546Sopenharmony_ci      __ssa_dst(instr);                                                        \
2079bf215546Sopenharmony_ci   __ssa_src(instr, a, aflags);                                                \
2080bf215546Sopenharmony_ci   instr->flags |= flag;                                                       \
2081bf215546Sopenharmony_ci   return instr;                                                               \
2082bf215546Sopenharmony_ci}
2083bf215546Sopenharmony_ci/* clang-format on */
2084bf215546Sopenharmony_ci#define INSTR1F(f, name)  __INSTR1(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
2085bf215546Sopenharmony_ci#define INSTR1(name)      __INSTR1(0, 1, name, OPC_##name)
2086bf215546Sopenharmony_ci#define INSTR1NODST(name) __INSTR1(0, 0, name, OPC_##name)
2087bf215546Sopenharmony_ci
2088bf215546Sopenharmony_ci/* clang-format off */
2089bf215546Sopenharmony_ci#define __INSTR2(flag, dst_count, name, opc)                                   \
2090bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name(                              \
2091bf215546Sopenharmony_ci   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
2092bf215546Sopenharmony_ci   struct ir3_instruction *b, unsigned bflags)                                 \
2093bf215546Sopenharmony_ci{                                                                              \
2094bf215546Sopenharmony_ci   struct ir3_instruction *instr = ir3_instr_create(block, opc, dst_count, 2); \
2095bf215546Sopenharmony_ci   for (unsigned i = 0; i < dst_count; i++)                                    \
2096bf215546Sopenharmony_ci      __ssa_dst(instr);                                                        \
2097bf215546Sopenharmony_ci   __ssa_src(instr, a, aflags);                                                \
2098bf215546Sopenharmony_ci   __ssa_src(instr, b, bflags);                                                \
2099bf215546Sopenharmony_ci   instr->flags |= flag;                                                       \
2100bf215546Sopenharmony_ci   return instr;                                                               \
2101bf215546Sopenharmony_ci}
2102bf215546Sopenharmony_ci/* clang-format on */
2103bf215546Sopenharmony_ci#define INSTR2F(f, name)   __INSTR2(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
2104bf215546Sopenharmony_ci#define INSTR2(name)       __INSTR2(0, 1, name, OPC_##name)
2105bf215546Sopenharmony_ci#define INSTR2NODST(name)  __INSTR2(0, 0, name, OPC_##name)
2106bf215546Sopenharmony_ci
2107bf215546Sopenharmony_ci/* clang-format off */
2108bf215546Sopenharmony_ci#define __INSTR3(flag, dst_count, name, opc)                                   \
2109bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name(                              \
2110bf215546Sopenharmony_ci   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
2111bf215546Sopenharmony_ci   struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c,      \
2112bf215546Sopenharmony_ci   unsigned cflags)                                                            \
2113bf215546Sopenharmony_ci{                                                                              \
2114bf215546Sopenharmony_ci   struct ir3_instruction *instr =                                             \
2115bf215546Sopenharmony_ci      ir3_instr_create(block, opc, dst_count, 3);                              \
2116bf215546Sopenharmony_ci   for (unsigned i = 0; i < dst_count; i++)                                    \
2117bf215546Sopenharmony_ci      __ssa_dst(instr);                                                        \
2118bf215546Sopenharmony_ci   __ssa_src(instr, a, aflags);                                                \
2119bf215546Sopenharmony_ci   __ssa_src(instr, b, bflags);                                                \
2120bf215546Sopenharmony_ci   __ssa_src(instr, c, cflags);                                                \
2121bf215546Sopenharmony_ci   instr->flags |= flag;                                                       \
2122bf215546Sopenharmony_ci   return instr;                                                               \
2123bf215546Sopenharmony_ci}
2124bf215546Sopenharmony_ci/* clang-format on */
2125bf215546Sopenharmony_ci#define INSTR3F(f, name)  __INSTR3(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
2126bf215546Sopenharmony_ci#define INSTR3(name)      __INSTR3(0, 1, name, OPC_##name)
2127bf215546Sopenharmony_ci#define INSTR3NODST(name) __INSTR3(0, 0, name, OPC_##name)
2128bf215546Sopenharmony_ci
2129bf215546Sopenharmony_ci/* clang-format off */
2130bf215546Sopenharmony_ci#define __INSTR4(flag, dst_count, name, opc)                                   \
2131bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name(                              \
2132bf215546Sopenharmony_ci   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
2133bf215546Sopenharmony_ci   struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c,      \
2134bf215546Sopenharmony_ci   unsigned cflags, struct ir3_instruction *d, unsigned dflags)                \
2135bf215546Sopenharmony_ci{                                                                              \
2136bf215546Sopenharmony_ci   struct ir3_instruction *instr =                                             \
2137bf215546Sopenharmony_ci      ir3_instr_create(block, opc, dst_count, 4);                              \
2138bf215546Sopenharmony_ci   for (unsigned i = 0; i < dst_count; i++)                                    \
2139bf215546Sopenharmony_ci      __ssa_dst(instr);                                                        \
2140bf215546Sopenharmony_ci   __ssa_src(instr, a, aflags);                                                \
2141bf215546Sopenharmony_ci   __ssa_src(instr, b, bflags);                                                \
2142bf215546Sopenharmony_ci   __ssa_src(instr, c, cflags);                                                \
2143bf215546Sopenharmony_ci   __ssa_src(instr, d, dflags);                                                \
2144bf215546Sopenharmony_ci   instr->flags |= flag;                                                       \
2145bf215546Sopenharmony_ci   return instr;                                                               \
2146bf215546Sopenharmony_ci}
2147bf215546Sopenharmony_ci/* clang-format on */
2148bf215546Sopenharmony_ci#define INSTR4F(f, name)  __INSTR4(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
2149bf215546Sopenharmony_ci#define INSTR4(name)      __INSTR4(0, 1, name, OPC_##name)
2150bf215546Sopenharmony_ci#define INSTR4NODST(name) __INSTR4(0, 0, name, OPC_##name)
2151bf215546Sopenharmony_ci
2152bf215546Sopenharmony_ci/* clang-format off */
2153bf215546Sopenharmony_ci#define __INSTR5(flag, name, opc)                                              \
2154bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name(                              \
2155bf215546Sopenharmony_ci   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
2156bf215546Sopenharmony_ci   struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c,      \
2157bf215546Sopenharmony_ci   unsigned cflags, struct ir3_instruction *d, unsigned dflags,                \
2158bf215546Sopenharmony_ci   struct ir3_instruction *e, unsigned eflags)                                 \
2159bf215546Sopenharmony_ci{                                                                              \
2160bf215546Sopenharmony_ci   struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 5);         \
2161bf215546Sopenharmony_ci   __ssa_dst(instr);                                                           \
2162bf215546Sopenharmony_ci   __ssa_src(instr, a, aflags);                                                \
2163bf215546Sopenharmony_ci   __ssa_src(instr, b, bflags);                                                \
2164bf215546Sopenharmony_ci   __ssa_src(instr, c, cflags);                                                \
2165bf215546Sopenharmony_ci   __ssa_src(instr, d, dflags);                                                \
2166bf215546Sopenharmony_ci   __ssa_src(instr, e, eflags);                                                \
2167bf215546Sopenharmony_ci   instr->flags |= flag;                                                       \
2168bf215546Sopenharmony_ci   return instr;                                                               \
2169bf215546Sopenharmony_ci}
2170bf215546Sopenharmony_ci/* clang-format on */
2171bf215546Sopenharmony_ci#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name)
2172bf215546Sopenharmony_ci#define INSTR5(name)     __INSTR5(0, name, OPC_##name)
2173bf215546Sopenharmony_ci
2174bf215546Sopenharmony_ci/* clang-format off */
2175bf215546Sopenharmony_ci#define __INSTR6(flag, dst_count, name, opc)                                   \
2176bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name(                              \
2177bf215546Sopenharmony_ci   struct ir3_block *block, struct ir3_instruction *a, unsigned aflags,        \
2178bf215546Sopenharmony_ci   struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c,      \
2179bf215546Sopenharmony_ci   unsigned cflags, struct ir3_instruction *d, unsigned dflags,                \
2180bf215546Sopenharmony_ci   struct ir3_instruction *e, unsigned eflags, struct ir3_instruction *f,      \
2181bf215546Sopenharmony_ci   unsigned fflags)                                                            \
2182bf215546Sopenharmony_ci{                                                                              \
2183bf215546Sopenharmony_ci   struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 6);         \
2184bf215546Sopenharmony_ci   for (unsigned i = 0; i < dst_count; i++)                                    \
2185bf215546Sopenharmony_ci      __ssa_dst(instr);                                                        \
2186bf215546Sopenharmony_ci   __ssa_src(instr, a, aflags);                                                \
2187bf215546Sopenharmony_ci   __ssa_src(instr, b, bflags);                                                \
2188bf215546Sopenharmony_ci   __ssa_src(instr, c, cflags);                                                \
2189bf215546Sopenharmony_ci   __ssa_src(instr, d, dflags);                                                \
2190bf215546Sopenharmony_ci   __ssa_src(instr, e, eflags);                                                \
2191bf215546Sopenharmony_ci   __ssa_src(instr, f, fflags);                                                \
2192bf215546Sopenharmony_ci   instr->flags |= flag;                                                       \
2193bf215546Sopenharmony_ci   return instr;                                                               \
2194bf215546Sopenharmony_ci}
2195bf215546Sopenharmony_ci/* clang-format on */
2196bf215546Sopenharmony_ci#define INSTR6F(f, name)  __INSTR6(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
2197bf215546Sopenharmony_ci#define INSTR6(name)      __INSTR6(0, 1, name, OPC_##name)
2198bf215546Sopenharmony_ci#define INSTR6NODST(name) __INSTR6(0, 0, name, OPC_##name)
2199bf215546Sopenharmony_ci
2200bf215546Sopenharmony_ci/* cat0 instructions: */
2201bf215546Sopenharmony_ciINSTR1NODST(B)
2202bf215546Sopenharmony_ciINSTR0(JUMP)
2203bf215546Sopenharmony_ciINSTR1NODST(KILL)
2204bf215546Sopenharmony_ciINSTR1NODST(DEMOTE)
2205bf215546Sopenharmony_ciINSTR0(END)
2206bf215546Sopenharmony_ciINSTR0(CHSH)
2207bf215546Sopenharmony_ciINSTR0(CHMASK)
2208bf215546Sopenharmony_ciINSTR1NODST(PREDT)
2209bf215546Sopenharmony_ciINSTR0(PREDF)
2210bf215546Sopenharmony_ciINSTR0(PREDE)
2211bf215546Sopenharmony_ciINSTR0(GETONE)
2212bf215546Sopenharmony_ciINSTR0(SHPS)
2213bf215546Sopenharmony_ciINSTR0(SHPE)
2214bf215546Sopenharmony_ci
2215bf215546Sopenharmony_ci/* cat1 macros */
2216bf215546Sopenharmony_ciINSTR1(ANY_MACRO)
2217bf215546Sopenharmony_ciINSTR1(ALL_MACRO)
2218bf215546Sopenharmony_ciINSTR1(READ_FIRST_MACRO)
2219bf215546Sopenharmony_ciINSTR2(READ_COND_MACRO)
2220bf215546Sopenharmony_ci
2221bf215546Sopenharmony_cistatic inline struct ir3_instruction *
2222bf215546Sopenharmony_ciir3_ELECT_MACRO(struct ir3_block *block)
2223bf215546Sopenharmony_ci{
2224bf215546Sopenharmony_ci   struct ir3_instruction *instr =
2225bf215546Sopenharmony_ci      ir3_instr_create(block, OPC_ELECT_MACRO, 1, 0);
2226bf215546Sopenharmony_ci   __ssa_dst(instr);
2227bf215546Sopenharmony_ci   return instr;
2228bf215546Sopenharmony_ci}
2229bf215546Sopenharmony_ci
2230bf215546Sopenharmony_cistatic inline struct ir3_instruction *
2231bf215546Sopenharmony_ciir3_SHPS_MACRO(struct ir3_block *block)
2232bf215546Sopenharmony_ci{
2233bf215546Sopenharmony_ci   struct ir3_instruction *instr =
2234bf215546Sopenharmony_ci      ir3_instr_create(block, OPC_SHPS_MACRO, 1, 0);
2235bf215546Sopenharmony_ci   __ssa_dst(instr);
2236bf215546Sopenharmony_ci   return instr;
2237bf215546Sopenharmony_ci}
2238bf215546Sopenharmony_ci
2239bf215546Sopenharmony_ci/* cat2 instructions, most 2 src but some 1 src: */
2240bf215546Sopenharmony_ciINSTR2(ADD_F)
2241bf215546Sopenharmony_ciINSTR2(MIN_F)
2242bf215546Sopenharmony_ciINSTR2(MAX_F)
2243bf215546Sopenharmony_ciINSTR2(MUL_F)
2244bf215546Sopenharmony_ciINSTR1(SIGN_F)
2245bf215546Sopenharmony_ciINSTR2(CMPS_F)
2246bf215546Sopenharmony_ciINSTR1(ABSNEG_F)
2247bf215546Sopenharmony_ciINSTR2(CMPV_F)
2248bf215546Sopenharmony_ciINSTR1(FLOOR_F)
2249bf215546Sopenharmony_ciINSTR1(CEIL_F)
2250bf215546Sopenharmony_ciINSTR1(RNDNE_F)
2251bf215546Sopenharmony_ciINSTR1(RNDAZ_F)
2252bf215546Sopenharmony_ciINSTR1(TRUNC_F)
2253bf215546Sopenharmony_ciINSTR2(ADD_U)
2254bf215546Sopenharmony_ciINSTR2(ADD_S)
2255bf215546Sopenharmony_ciINSTR2(SUB_U)
2256bf215546Sopenharmony_ciINSTR2(SUB_S)
2257bf215546Sopenharmony_ciINSTR2(CMPS_U)
2258bf215546Sopenharmony_ciINSTR2(CMPS_S)
2259bf215546Sopenharmony_ciINSTR2(MIN_U)
2260bf215546Sopenharmony_ciINSTR2(MIN_S)
2261bf215546Sopenharmony_ciINSTR2(MAX_U)
2262bf215546Sopenharmony_ciINSTR2(MAX_S)
2263bf215546Sopenharmony_ciINSTR1(ABSNEG_S)
2264bf215546Sopenharmony_ciINSTR2(AND_B)
2265bf215546Sopenharmony_ciINSTR2(OR_B)
2266bf215546Sopenharmony_ciINSTR1(NOT_B)
2267bf215546Sopenharmony_ciINSTR2(XOR_B)
2268bf215546Sopenharmony_ciINSTR2(CMPV_U)
2269bf215546Sopenharmony_ciINSTR2(CMPV_S)
2270bf215546Sopenharmony_ciINSTR2(MUL_U24)
2271bf215546Sopenharmony_ciINSTR2(MUL_S24)
2272bf215546Sopenharmony_ciINSTR2(MULL_U)
2273bf215546Sopenharmony_ciINSTR1(BFREV_B)
2274bf215546Sopenharmony_ciINSTR1(CLZ_S)
2275bf215546Sopenharmony_ciINSTR1(CLZ_B)
2276bf215546Sopenharmony_ciINSTR2(SHL_B)
2277bf215546Sopenharmony_ciINSTR2(SHR_B)
2278bf215546Sopenharmony_ciINSTR2(ASHR_B)
2279bf215546Sopenharmony_ciINSTR2(BARY_F)
2280bf215546Sopenharmony_ciINSTR2(FLAT_B)
2281bf215546Sopenharmony_ciINSTR2(MGEN_B)
2282bf215546Sopenharmony_ciINSTR2(GETBIT_B)
2283bf215546Sopenharmony_ciINSTR1(SETRM)
2284bf215546Sopenharmony_ciINSTR1(CBITS_B)
2285bf215546Sopenharmony_ciINSTR2(SHB)
2286bf215546Sopenharmony_ciINSTR2(MSAD)
2287bf215546Sopenharmony_ci
2288bf215546Sopenharmony_ci/* cat3 instructions: */
2289bf215546Sopenharmony_ciINSTR3(MAD_U16)
2290bf215546Sopenharmony_ciINSTR3(MADSH_U16)
2291bf215546Sopenharmony_ciINSTR3(MAD_S16)
2292bf215546Sopenharmony_ciINSTR3(MADSH_M16)
2293bf215546Sopenharmony_ciINSTR3(MAD_U24)
2294bf215546Sopenharmony_ciINSTR3(MAD_S24)
2295bf215546Sopenharmony_ciINSTR3(MAD_F16)
2296bf215546Sopenharmony_ciINSTR3(MAD_F32)
2297bf215546Sopenharmony_ciINSTR3(DP2ACC)
2298bf215546Sopenharmony_ciINSTR3(DP4ACC)
2299bf215546Sopenharmony_ci/* NOTE: SEL_B32 checks for zero vs nonzero */
2300bf215546Sopenharmony_ciINSTR3(SEL_B16)
2301bf215546Sopenharmony_ciINSTR3(SEL_B32)
2302bf215546Sopenharmony_ciINSTR3(SEL_S16)
2303bf215546Sopenharmony_ciINSTR3(SEL_S32)
2304bf215546Sopenharmony_ciINSTR3(SEL_F16)
2305bf215546Sopenharmony_ciINSTR3(SEL_F32)
2306bf215546Sopenharmony_ciINSTR3(SAD_S16)
2307bf215546Sopenharmony_ciINSTR3(SAD_S32)
2308bf215546Sopenharmony_ci
2309bf215546Sopenharmony_ci/* cat4 instructions: */
2310bf215546Sopenharmony_ciINSTR1(RCP)
2311bf215546Sopenharmony_ciINSTR1(RSQ)
2312bf215546Sopenharmony_ciINSTR1(HRSQ)
2313bf215546Sopenharmony_ciINSTR1(LOG2)
2314bf215546Sopenharmony_ciINSTR1(HLOG2)
2315bf215546Sopenharmony_ciINSTR1(EXP2)
2316bf215546Sopenharmony_ciINSTR1(HEXP2)
2317bf215546Sopenharmony_ciINSTR1(SIN)
2318bf215546Sopenharmony_ciINSTR1(COS)
2319bf215546Sopenharmony_ciINSTR1(SQRT)
2320bf215546Sopenharmony_ci
2321bf215546Sopenharmony_ci/* cat5 instructions: */
2322bf215546Sopenharmony_ciINSTR1(DSX)
2323bf215546Sopenharmony_ciINSTR1(DSXPP_MACRO)
2324bf215546Sopenharmony_ciINSTR1(DSY)
2325bf215546Sopenharmony_ciINSTR1(DSYPP_MACRO)
2326bf215546Sopenharmony_ciINSTR1F(3D, DSX)
2327bf215546Sopenharmony_ciINSTR1F(3D, DSY)
2328bf215546Sopenharmony_ciINSTR1(RGETPOS)
2329bf215546Sopenharmony_ci
2330bf215546Sopenharmony_cistatic inline struct ir3_instruction *
2331bf215546Sopenharmony_ciir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask,
2332bf215546Sopenharmony_ci        unsigned flags, struct ir3_instruction *samp_tex,
2333bf215546Sopenharmony_ci        struct ir3_instruction *src0, struct ir3_instruction *src1)
2334bf215546Sopenharmony_ci{
2335bf215546Sopenharmony_ci   struct ir3_instruction *sam;
2336bf215546Sopenharmony_ci   unsigned nreg = 0;
2337bf215546Sopenharmony_ci
2338bf215546Sopenharmony_ci   if (flags & IR3_INSTR_S2EN) {
2339bf215546Sopenharmony_ci      nreg++;
2340bf215546Sopenharmony_ci   }
2341bf215546Sopenharmony_ci   if (src0) {
2342bf215546Sopenharmony_ci      nreg++;
2343bf215546Sopenharmony_ci   }
2344bf215546Sopenharmony_ci   if (src1) {
2345bf215546Sopenharmony_ci      nreg++;
2346bf215546Sopenharmony_ci   }
2347bf215546Sopenharmony_ci
2348bf215546Sopenharmony_ci   sam = ir3_instr_create(block, opc, 1, nreg);
2349bf215546Sopenharmony_ci   sam->flags |= flags;
2350bf215546Sopenharmony_ci   __ssa_dst(sam)->wrmask = wrmask;
2351bf215546Sopenharmony_ci   if (flags & IR3_INSTR_S2EN) {
2352bf215546Sopenharmony_ci      __ssa_src(sam, samp_tex, (flags & IR3_INSTR_B) ? 0 : IR3_REG_HALF);
2353bf215546Sopenharmony_ci   }
2354bf215546Sopenharmony_ci   if (src0) {
2355bf215546Sopenharmony_ci      __ssa_src(sam, src0, 0);
2356bf215546Sopenharmony_ci   }
2357bf215546Sopenharmony_ci   if (src1) {
2358bf215546Sopenharmony_ci      __ssa_src(sam, src1, 0);
2359bf215546Sopenharmony_ci   }
2360bf215546Sopenharmony_ci   sam->cat5.type = type;
2361bf215546Sopenharmony_ci
2362bf215546Sopenharmony_ci   return sam;
2363bf215546Sopenharmony_ci}
2364bf215546Sopenharmony_ci
2365bf215546Sopenharmony_ci/* cat6 instructions: */
2366bf215546Sopenharmony_ciINSTR0(GETFIBERID)
2367bf215546Sopenharmony_ciINSTR2(LDLV)
2368bf215546Sopenharmony_ciINSTR3(LDG)
2369bf215546Sopenharmony_ciINSTR3(LDL)
2370bf215546Sopenharmony_ciINSTR3(LDLW)
2371bf215546Sopenharmony_ciINSTR3(LDP)
2372bf215546Sopenharmony_ciINSTR4NODST(STG)
2373bf215546Sopenharmony_ciINSTR3NODST(STL)
2374bf215546Sopenharmony_ciINSTR3NODST(STLW)
2375bf215546Sopenharmony_ciINSTR3NODST(STP)
2376bf215546Sopenharmony_ciINSTR1(RESINFO)
2377bf215546Sopenharmony_ciINSTR1(RESFMT)
2378bf215546Sopenharmony_ciINSTR2(ATOMIC_ADD)
2379bf215546Sopenharmony_ciINSTR2(ATOMIC_SUB)
2380bf215546Sopenharmony_ciINSTR2(ATOMIC_XCHG)
2381bf215546Sopenharmony_ciINSTR2(ATOMIC_INC)
2382bf215546Sopenharmony_ciINSTR2(ATOMIC_DEC)
2383bf215546Sopenharmony_ciINSTR2(ATOMIC_CMPXCHG)
2384bf215546Sopenharmony_ciINSTR2(ATOMIC_MIN)
2385bf215546Sopenharmony_ciINSTR2(ATOMIC_MAX)
2386bf215546Sopenharmony_ciINSTR2(ATOMIC_AND)
2387bf215546Sopenharmony_ciINSTR2(ATOMIC_OR)
2388bf215546Sopenharmony_ciINSTR2(ATOMIC_XOR)
2389bf215546Sopenharmony_ciINSTR2(LDC)
2390bf215546Sopenharmony_ciINSTR2(QUAD_SHUFFLE_BRCST)
2391bf215546Sopenharmony_ciINSTR1(QUAD_SHUFFLE_HORIZ)
2392bf215546Sopenharmony_ciINSTR1(QUAD_SHUFFLE_VERT)
2393bf215546Sopenharmony_ciINSTR1(QUAD_SHUFFLE_DIAG)
2394bf215546Sopenharmony_ciINSTR2NODST(LDC_K)
2395bf215546Sopenharmony_ciINSTR2NODST(STC)
2396bf215546Sopenharmony_ci#if GPU >= 600
2397bf215546Sopenharmony_ciINSTR3NODST(STIB);
2398bf215546Sopenharmony_ciINSTR2(LDIB);
2399bf215546Sopenharmony_ciINSTR5(LDG_A);
2400bf215546Sopenharmony_ciINSTR6NODST(STG_A);
2401bf215546Sopenharmony_ciINSTR2(ATOMIC_G_ADD)
2402bf215546Sopenharmony_ciINSTR2(ATOMIC_G_SUB)
2403bf215546Sopenharmony_ciINSTR2(ATOMIC_G_XCHG)
2404bf215546Sopenharmony_ciINSTR2(ATOMIC_G_INC)
2405bf215546Sopenharmony_ciINSTR2(ATOMIC_G_DEC)
2406bf215546Sopenharmony_ciINSTR2(ATOMIC_G_CMPXCHG)
2407bf215546Sopenharmony_ciINSTR2(ATOMIC_G_MIN)
2408bf215546Sopenharmony_ciINSTR2(ATOMIC_G_MAX)
2409bf215546Sopenharmony_ciINSTR2(ATOMIC_G_AND)
2410bf215546Sopenharmony_ciINSTR2(ATOMIC_G_OR)
2411bf215546Sopenharmony_ciINSTR2(ATOMIC_G_XOR)
2412bf215546Sopenharmony_ciINSTR3(ATOMIC_B_ADD)
2413bf215546Sopenharmony_ciINSTR3(ATOMIC_B_SUB)
2414bf215546Sopenharmony_ciINSTR3(ATOMIC_B_XCHG)
2415bf215546Sopenharmony_ciINSTR3(ATOMIC_B_INC)
2416bf215546Sopenharmony_ciINSTR3(ATOMIC_B_DEC)
2417bf215546Sopenharmony_ciINSTR3(ATOMIC_B_CMPXCHG)
2418bf215546Sopenharmony_ciINSTR3(ATOMIC_B_MIN)
2419bf215546Sopenharmony_ciINSTR3(ATOMIC_B_MAX)
2420bf215546Sopenharmony_ciINSTR3(ATOMIC_B_AND)
2421bf215546Sopenharmony_ciINSTR3(ATOMIC_B_OR)
2422bf215546Sopenharmony_ciINSTR3(ATOMIC_B_XOR)
2423bf215546Sopenharmony_ci#elif GPU >= 400
2424bf215546Sopenharmony_ciINSTR3(LDGB)
2425bf215546Sopenharmony_ci#if GPU >= 500
2426bf215546Sopenharmony_ciINSTR3(LDIB)
2427bf215546Sopenharmony_ci#endif
2428bf215546Sopenharmony_ciINSTR4NODST(STGB)
2429bf215546Sopenharmony_ciINSTR4NODST(STIB)
2430bf215546Sopenharmony_ciINSTR4(ATOMIC_S_ADD)
2431bf215546Sopenharmony_ciINSTR4(ATOMIC_S_SUB)
2432bf215546Sopenharmony_ciINSTR4(ATOMIC_S_XCHG)
2433bf215546Sopenharmony_ciINSTR4(ATOMIC_S_INC)
2434bf215546Sopenharmony_ciINSTR4(ATOMIC_S_DEC)
2435bf215546Sopenharmony_ciINSTR4(ATOMIC_S_CMPXCHG)
2436bf215546Sopenharmony_ciINSTR4(ATOMIC_S_MIN)
2437bf215546Sopenharmony_ciINSTR4(ATOMIC_S_MAX)
2438bf215546Sopenharmony_ciINSTR4(ATOMIC_S_AND)
2439bf215546Sopenharmony_ciINSTR4(ATOMIC_S_OR)
2440bf215546Sopenharmony_ciINSTR4(ATOMIC_S_XOR)
2441bf215546Sopenharmony_ci#endif
2442bf215546Sopenharmony_ci
2443bf215546Sopenharmony_ci/* cat7 instructions: */
2444bf215546Sopenharmony_ciINSTR0(BAR)
2445bf215546Sopenharmony_ciINSTR0(FENCE)
2446bf215546Sopenharmony_ci
2447bf215546Sopenharmony_ci/* ************************************************************************* */
2448bf215546Sopenharmony_ci#include "bitset.h"
2449bf215546Sopenharmony_ci
2450bf215546Sopenharmony_ci#define MAX_REG 256
2451bf215546Sopenharmony_ci
2452bf215546Sopenharmony_citypedef BITSET_DECLARE(regmaskstate_t, 2 * MAX_REG);
2453bf215546Sopenharmony_ci
2454bf215546Sopenharmony_citypedef struct {
2455bf215546Sopenharmony_ci   bool mergedregs;
2456bf215546Sopenharmony_ci   regmaskstate_t mask;
2457bf215546Sopenharmony_ci} regmask_t;
2458bf215546Sopenharmony_ci
2459bf215546Sopenharmony_cistatic inline bool
2460bf215546Sopenharmony_ci__regmask_get(regmask_t *regmask, bool half, unsigned n)
2461bf215546Sopenharmony_ci{
2462bf215546Sopenharmony_ci   if (regmask->mergedregs) {
2463bf215546Sopenharmony_ci      /* a6xx+ case, with merged register file, we track things in terms
2464bf215546Sopenharmony_ci       * of half-precision registers, with a full precisions register
2465bf215546Sopenharmony_ci       * using two half-precision slots.
2466bf215546Sopenharmony_ci       *
2467bf215546Sopenharmony_ci       * Pretend that special regs (a0.x, a1.x, etc.) are full registers to
2468bf215546Sopenharmony_ci       * avoid having them alias normal full regs.
2469bf215546Sopenharmony_ci       */
2470bf215546Sopenharmony_ci      if (half && !is_reg_num_special(n)) {
2471bf215546Sopenharmony_ci         return BITSET_TEST(regmask->mask, n);
2472bf215546Sopenharmony_ci      } else {
2473bf215546Sopenharmony_ci         n *= 2;
2474bf215546Sopenharmony_ci         return BITSET_TEST(regmask->mask, n) ||
2475bf215546Sopenharmony_ci                BITSET_TEST(regmask->mask, n + 1);
2476bf215546Sopenharmony_ci      }
2477bf215546Sopenharmony_ci   } else {
2478bf215546Sopenharmony_ci      /* pre a6xx case, with separate register file for half and full
2479bf215546Sopenharmony_ci       * precision:
2480bf215546Sopenharmony_ci       */
2481bf215546Sopenharmony_ci      if (half)
2482bf215546Sopenharmony_ci         n += MAX_REG;
2483bf215546Sopenharmony_ci      return BITSET_TEST(regmask->mask, n);
2484bf215546Sopenharmony_ci   }
2485bf215546Sopenharmony_ci}
2486bf215546Sopenharmony_ci
2487bf215546Sopenharmony_cistatic inline void
2488bf215546Sopenharmony_ci__regmask_set(regmask_t *regmask, bool half, unsigned n)
2489bf215546Sopenharmony_ci{
2490bf215546Sopenharmony_ci   if (regmask->mergedregs) {
2491bf215546Sopenharmony_ci      /* a6xx+ case, with merged register file, we track things in terms
2492bf215546Sopenharmony_ci       * of half-precision registers, with a full precisions register
2493bf215546Sopenharmony_ci       * using two half-precision slots:
2494bf215546Sopenharmony_ci       */
2495bf215546Sopenharmony_ci      if (half && !is_reg_num_special(n)) {
2496bf215546Sopenharmony_ci         BITSET_SET(regmask->mask, n);
2497bf215546Sopenharmony_ci      } else {
2498bf215546Sopenharmony_ci         n *= 2;
2499bf215546Sopenharmony_ci         BITSET_SET(regmask->mask, n);
2500bf215546Sopenharmony_ci         BITSET_SET(regmask->mask, n + 1);
2501bf215546Sopenharmony_ci      }
2502bf215546Sopenharmony_ci   } else {
2503bf215546Sopenharmony_ci      /* pre a6xx case, with separate register file for half and full
2504bf215546Sopenharmony_ci       * precision:
2505bf215546Sopenharmony_ci       */
2506bf215546Sopenharmony_ci      if (half)
2507bf215546Sopenharmony_ci         n += MAX_REG;
2508bf215546Sopenharmony_ci      BITSET_SET(regmask->mask, n);
2509bf215546Sopenharmony_ci   }
2510bf215546Sopenharmony_ci}
2511bf215546Sopenharmony_ci
2512bf215546Sopenharmony_cistatic inline void
2513bf215546Sopenharmony_ci__regmask_clear(regmask_t *regmask, bool half, unsigned n)
2514bf215546Sopenharmony_ci{
2515bf215546Sopenharmony_ci   if (regmask->mergedregs) {
2516bf215546Sopenharmony_ci      /* a6xx+ case, with merged register file, we track things in terms
2517bf215546Sopenharmony_ci       * of half-precision registers, with a full precisions register
2518bf215546Sopenharmony_ci       * using two half-precision slots:
2519bf215546Sopenharmony_ci       */
2520bf215546Sopenharmony_ci      if (half && !is_reg_num_special(n)) {
2521bf215546Sopenharmony_ci         BITSET_CLEAR(regmask->mask, n);
2522bf215546Sopenharmony_ci      } else {
2523bf215546Sopenharmony_ci         n *= 2;
2524bf215546Sopenharmony_ci         BITSET_CLEAR(regmask->mask, n);
2525bf215546Sopenharmony_ci         BITSET_CLEAR(regmask->mask, n + 1);
2526bf215546Sopenharmony_ci      }
2527bf215546Sopenharmony_ci   } else {
2528bf215546Sopenharmony_ci      /* pre a6xx case, with separate register file for half and full
2529bf215546Sopenharmony_ci       * precision:
2530bf215546Sopenharmony_ci       */
2531bf215546Sopenharmony_ci      if (half)
2532bf215546Sopenharmony_ci         n += MAX_REG;
2533bf215546Sopenharmony_ci      BITSET_CLEAR(regmask->mask, n);
2534bf215546Sopenharmony_ci   }
2535bf215546Sopenharmony_ci}
2536bf215546Sopenharmony_ci
2537bf215546Sopenharmony_cistatic inline void
2538bf215546Sopenharmony_ciregmask_init(regmask_t *regmask, bool mergedregs)
2539bf215546Sopenharmony_ci{
2540bf215546Sopenharmony_ci   memset(&regmask->mask, 0, sizeof(regmask->mask));
2541bf215546Sopenharmony_ci   regmask->mergedregs = mergedregs;
2542bf215546Sopenharmony_ci}
2543bf215546Sopenharmony_ci
2544bf215546Sopenharmony_cistatic inline void
2545bf215546Sopenharmony_ciregmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
2546bf215546Sopenharmony_ci{
2547bf215546Sopenharmony_ci   assert(dst->mergedregs == a->mergedregs);
2548bf215546Sopenharmony_ci   assert(dst->mergedregs == b->mergedregs);
2549bf215546Sopenharmony_ci
2550bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++)
2551bf215546Sopenharmony_ci      dst->mask[i] = a->mask[i] | b->mask[i];
2552bf215546Sopenharmony_ci}
2553bf215546Sopenharmony_ci
2554bf215546Sopenharmony_cistatic inline void
2555bf215546Sopenharmony_ciregmask_or_shared(regmask_t *dst, regmask_t *a, regmask_t *b)
2556bf215546Sopenharmony_ci{
2557bf215546Sopenharmony_ci   regmaskstate_t shared_mask;
2558bf215546Sopenharmony_ci   BITSET_ZERO(shared_mask);
2559bf215546Sopenharmony_ci
2560bf215546Sopenharmony_ci   if (b->mergedregs) {
2561bf215546Sopenharmony_ci      BITSET_SET_RANGE(shared_mask, 2 * 4 * 48, 2 * 4 * 56 - 1);
2562bf215546Sopenharmony_ci   } else {
2563bf215546Sopenharmony_ci      BITSET_SET_RANGE(shared_mask, 4 * 48, 4 * 56 - 1);
2564bf215546Sopenharmony_ci   }
2565bf215546Sopenharmony_ci
2566bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++)
2567bf215546Sopenharmony_ci      dst->mask[i] = a->mask[i] | (b->mask[i] & shared_mask[i]);
2568bf215546Sopenharmony_ci}
2569bf215546Sopenharmony_ci
2570bf215546Sopenharmony_cistatic inline void
2571bf215546Sopenharmony_ciregmask_set(regmask_t *regmask, struct ir3_register *reg)
2572bf215546Sopenharmony_ci{
2573bf215546Sopenharmony_ci   bool half = reg->flags & IR3_REG_HALF;
2574bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_RELATIV) {
2575bf215546Sopenharmony_ci      for (unsigned i = 0; i < reg->size; i++)
2576bf215546Sopenharmony_ci         __regmask_set(regmask, half, reg->array.base + i);
2577bf215546Sopenharmony_ci   } else {
2578bf215546Sopenharmony_ci      for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
2579bf215546Sopenharmony_ci         if (mask & 1)
2580bf215546Sopenharmony_ci            __regmask_set(regmask, half, n);
2581bf215546Sopenharmony_ci   }
2582bf215546Sopenharmony_ci}
2583bf215546Sopenharmony_ci
2584bf215546Sopenharmony_cistatic inline void
2585bf215546Sopenharmony_ciregmask_clear(regmask_t *regmask, struct ir3_register *reg)
2586bf215546Sopenharmony_ci{
2587bf215546Sopenharmony_ci   bool half = reg->flags & IR3_REG_HALF;
2588bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_RELATIV) {
2589bf215546Sopenharmony_ci      for (unsigned i = 0; i < reg->size; i++)
2590bf215546Sopenharmony_ci         __regmask_clear(regmask, half, reg->array.base + i);
2591bf215546Sopenharmony_ci   } else {
2592bf215546Sopenharmony_ci      for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
2593bf215546Sopenharmony_ci         if (mask & 1)
2594bf215546Sopenharmony_ci            __regmask_clear(regmask, half, n);
2595bf215546Sopenharmony_ci   }
2596bf215546Sopenharmony_ci}
2597bf215546Sopenharmony_ci
2598bf215546Sopenharmony_cistatic inline bool
2599bf215546Sopenharmony_ciregmask_get(regmask_t *regmask, struct ir3_register *reg)
2600bf215546Sopenharmony_ci{
2601bf215546Sopenharmony_ci   bool half = reg->flags & IR3_REG_HALF;
2602bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_RELATIV) {
2603bf215546Sopenharmony_ci      for (unsigned i = 0; i < reg->size; i++)
2604bf215546Sopenharmony_ci         if (__regmask_get(regmask, half, reg->array.base + i))
2605bf215546Sopenharmony_ci            return true;
2606bf215546Sopenharmony_ci   } else {
2607bf215546Sopenharmony_ci      for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
2608bf215546Sopenharmony_ci         if (mask & 1)
2609bf215546Sopenharmony_ci            if (__regmask_get(regmask, half, n))
2610bf215546Sopenharmony_ci               return true;
2611bf215546Sopenharmony_ci   }
2612bf215546Sopenharmony_ci   return false;
2613bf215546Sopenharmony_ci}
2614bf215546Sopenharmony_ci/* ************************************************************************* */
2615bf215546Sopenharmony_ci
2616bf215546Sopenharmony_ci#endif /* IR3_H_ */
2617