1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (c) 2013 Rob Clark <robdclark@gmail.com> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#ifndef IR3_H_ 25bf215546Sopenharmony_ci#define IR3_H_ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include <stdbool.h> 28bf215546Sopenharmony_ci#include <stdint.h> 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "compiler/shader_enums.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#include "util/bitscan.h" 33bf215546Sopenharmony_ci#include "util/list.h" 34bf215546Sopenharmony_ci#include "util/set.h" 35bf215546Sopenharmony_ci#include "util/u_debug.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci#include "instr-a3xx.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci/* low level intermediate representation of an adreno shader program */ 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_cistruct ir3_compiler; 42bf215546Sopenharmony_cistruct ir3; 43bf215546Sopenharmony_cistruct ir3_instruction; 44bf215546Sopenharmony_cistruct ir3_block; 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_cistruct ir3_info { 47bf215546Sopenharmony_ci void *data; /* used internally in ir3 assembler */ 48bf215546Sopenharmony_ci /* Size in bytes of the shader binary, including NIR constants and 49bf215546Sopenharmony_ci * padding 50bf215546Sopenharmony_ci */ 51bf215546Sopenharmony_ci uint32_t size; 52bf215546Sopenharmony_ci /* byte offset from start of the shader to the NIR constant data. */ 53bf215546Sopenharmony_ci uint32_t constant_data_offset; 54bf215546Sopenharmony_ci /* Size in dwords of the instructions. */ 55bf215546Sopenharmony_ci uint16_t sizedwords; 56bf215546Sopenharmony_ci uint16_t instrs_count; /* expanded to account for rpt's */ 57bf215546Sopenharmony_ci uint16_t nops_count; /* # of nop instructions, including nopN */ 58bf215546Sopenharmony_ci uint16_t mov_count; 59bf215546Sopenharmony_ci uint16_t cov_count; 60bf215546Sopenharmony_ci uint16_t stp_count; 61bf215546Sopenharmony_ci uint16_t ldp_count; 62bf215546Sopenharmony_ci /* NOTE: max_reg, etc, does not include registers not touched 63bf215546Sopenharmony_ci * by the shader (ie. vertex fetched via VFD_DECODE but not 64bf215546Sopenharmony_ci * touched by shader) 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ci int8_t max_reg; /* highest GPR # used by shader */ 67bf215546Sopenharmony_ci int8_t max_half_reg; 68bf215546Sopenharmony_ci int16_t max_const; 69bf215546Sopenharmony_ci /* This is the maximum # of waves that can executed at once in one core, 70bf215546Sopenharmony_ci * assuming that they are all executing this shader. 71bf215546Sopenharmony_ci */ 72bf215546Sopenharmony_ci int8_t max_waves; 73bf215546Sopenharmony_ci bool double_threadsize; 74bf215546Sopenharmony_ci bool multi_dword_ldp_stp; 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci /* number of sync bits: */ 77bf215546Sopenharmony_ci uint16_t ss, sy; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci /* estimate of number of cycles stalled on (ss) */ 80bf215546Sopenharmony_ci uint16_t sstall; 81bf215546Sopenharmony_ci /* estimate of number of cycles stalled on (sy) */ 82bf215546Sopenharmony_ci uint16_t systall; 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci uint16_t last_baryf; /* instruction # of last varying fetch */ 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci /* Number of instructions of a given category: */ 87bf215546Sopenharmony_ci uint16_t instrs_per_cat[8]; 88bf215546Sopenharmony_ci}; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cistruct ir3_merge_set { 91bf215546Sopenharmony_ci uint16_t preferred_reg; 92bf215546Sopenharmony_ci uint16_t size; 93bf215546Sopenharmony_ci uint16_t alignment; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci unsigned interval_start; 96bf215546Sopenharmony_ci unsigned spill_slot; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci unsigned regs_count; 99bf215546Sopenharmony_ci struct ir3_register **regs; 100bf215546Sopenharmony_ci}; 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_cistruct ir3_register { 103bf215546Sopenharmony_ci enum { 104bf215546Sopenharmony_ci IR3_REG_CONST = 0x001, 105bf215546Sopenharmony_ci IR3_REG_IMMED = 0x002, 106bf215546Sopenharmony_ci IR3_REG_HALF = 0x004, 107bf215546Sopenharmony_ci /* Shared registers have the same value for all threads when read. 108bf215546Sopenharmony_ci * They can only be written when one thread is active (that is, inside 109bf215546Sopenharmony_ci * a "getone" block). 110bf215546Sopenharmony_ci */ 111bf215546Sopenharmony_ci IR3_REG_SHARED = 0x008, 112bf215546Sopenharmony_ci IR3_REG_RELATIV = 0x010, 113bf215546Sopenharmony_ci IR3_REG_R = 0x020, 114bf215546Sopenharmony_ci /* Most instructions, it seems, can do float abs/neg but not 115bf215546Sopenharmony_ci * integer. The CP pass needs to know what is intended (int or 116bf215546Sopenharmony_ci * float) in order to do the right thing. For this reason the 117bf215546Sopenharmony_ci * abs/neg flags are split out into float and int variants. In 118bf215546Sopenharmony_ci * addition, .b (bitwise) operations, the negate is actually a 119bf215546Sopenharmony_ci * bitwise not, so split that out into a new flag to make it 120bf215546Sopenharmony_ci * more clear. 121bf215546Sopenharmony_ci */ 122bf215546Sopenharmony_ci IR3_REG_FNEG = 0x040, 123bf215546Sopenharmony_ci IR3_REG_FABS = 0x080, 124bf215546Sopenharmony_ci IR3_REG_SNEG = 0x100, 125bf215546Sopenharmony_ci IR3_REG_SABS = 0x200, 126bf215546Sopenharmony_ci IR3_REG_BNOT = 0x400, 127bf215546Sopenharmony_ci /* (ei) flag, end-input? Set on last bary, presumably to signal 128bf215546Sopenharmony_ci * that the shader needs no more input: 129bf215546Sopenharmony_ci * 130bf215546Sopenharmony_ci * Note: Has different meaning on other instructions like add.s/u 131bf215546Sopenharmony_ci */ 132bf215546Sopenharmony_ci IR3_REG_EI = 0x2000, 133bf215546Sopenharmony_ci /* meta-flags, for intermediate stages of IR, ie. 134bf215546Sopenharmony_ci * before register assignment is done: 135bf215546Sopenharmony_ci */ 136bf215546Sopenharmony_ci IR3_REG_SSA = 0x4000, /* 'def' is ptr to assigning destination */ 137bf215546Sopenharmony_ci IR3_REG_ARRAY = 0x8000, 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci /* Set on a use whenever the SSA value becomes dead after the current 140bf215546Sopenharmony_ci * instruction. 141bf215546Sopenharmony_ci */ 142bf215546Sopenharmony_ci IR3_REG_KILL = 0x10000, 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci /* Similar to IR3_REG_KILL, except that if there are multiple uses of the 145bf215546Sopenharmony_ci * same SSA value in a single instruction, this is only set on the first 146bf215546Sopenharmony_ci * use. 147bf215546Sopenharmony_ci */ 148bf215546Sopenharmony_ci IR3_REG_FIRST_KILL = 0x20000, 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci /* Set when a destination doesn't have any uses and is dead immediately 151bf215546Sopenharmony_ci * after the instruction. This can happen even after optimizations for 152bf215546Sopenharmony_ci * corner cases such as destinations of atomic instructions. 153bf215546Sopenharmony_ci */ 154bf215546Sopenharmony_ci IR3_REG_UNUSED = 0x40000, 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci /* "Early-clobber" on a destination means that the destination is 157bf215546Sopenharmony_ci * (potentially) written before any sources are read and therefore 158bf215546Sopenharmony_ci * interferes with the sources of the instruction. 159bf215546Sopenharmony_ci */ 160bf215546Sopenharmony_ci IR3_REG_EARLY_CLOBBER = 0x80000, 161bf215546Sopenharmony_ci } flags; 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci unsigned name; 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci /* used for cat5 instructions, but also for internal/IR level 166bf215546Sopenharmony_ci * tracking of what registers are read/written by an instruction. 167bf215546Sopenharmony_ci * wrmask may be a bad name since it is used to represent both 168bf215546Sopenharmony_ci * src and dst that touch multiple adjacent registers. 169bf215546Sopenharmony_ci */ 170bf215546Sopenharmony_ci unsigned wrmask : 16; /* up to vec16 */ 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci /* for relative addressing, 32bits for array size is too small, 173bf215546Sopenharmony_ci * but otoh we don't need to deal with disjoint sets, so instead 174bf215546Sopenharmony_ci * use a simple size field (number of scalar components). 175bf215546Sopenharmony_ci * 176bf215546Sopenharmony_ci * Note the size field isn't important for relative const (since 177bf215546Sopenharmony_ci * we don't have to do register allocation for constants). 178bf215546Sopenharmony_ci */ 179bf215546Sopenharmony_ci unsigned size : 16; 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci /* normal registers: 182bf215546Sopenharmony_ci * the component is in the low two bits of the reg #, so 183bf215546Sopenharmony_ci * rN.x becomes: (N << 2) | x 184bf215546Sopenharmony_ci */ 185bf215546Sopenharmony_ci uint16_t num; 186bf215546Sopenharmony_ci union { 187bf215546Sopenharmony_ci /* immediate: */ 188bf215546Sopenharmony_ci int32_t iim_val; 189bf215546Sopenharmony_ci uint32_t uim_val; 190bf215546Sopenharmony_ci float fim_val; 191bf215546Sopenharmony_ci /* relative: */ 192bf215546Sopenharmony_ci struct { 193bf215546Sopenharmony_ci uint16_t id; 194bf215546Sopenharmony_ci int16_t offset; 195bf215546Sopenharmony_ci uint16_t base; 196bf215546Sopenharmony_ci } array; 197bf215546Sopenharmony_ci }; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci /* For IR3_REG_SSA, dst registers contain pointer back to the instruction 200bf215546Sopenharmony_ci * containing this register. 201bf215546Sopenharmony_ci */ 202bf215546Sopenharmony_ci struct ir3_instruction *instr; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci /* For IR3_REG_SSA, src registers contain ptr back to assigning 205bf215546Sopenharmony_ci * instruction. 206bf215546Sopenharmony_ci * 207bf215546Sopenharmony_ci * For IR3_REG_ARRAY, the pointer is back to the last dependent 208bf215546Sopenharmony_ci * array access (although the net effect is the same, it points 209bf215546Sopenharmony_ci * back to a previous instruction that we depend on). 210bf215546Sopenharmony_ci */ 211bf215546Sopenharmony_ci struct ir3_register *def; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci /* Pointer to another register in the instruction that must share the same 214bf215546Sopenharmony_ci * physical register. Each destination can be tied with one source, and 215bf215546Sopenharmony_ci * they must have "tied" pointing to each other. 216bf215546Sopenharmony_ci */ 217bf215546Sopenharmony_ci struct ir3_register *tied; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci unsigned spill_slot, next_use; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci unsigned merge_set_offset; 222bf215546Sopenharmony_ci struct ir3_merge_set *merge_set; 223bf215546Sopenharmony_ci unsigned interval_start, interval_end; 224bf215546Sopenharmony_ci}; 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci/* 227bf215546Sopenharmony_ci * Stupid/simple growable array implementation: 228bf215546Sopenharmony_ci */ 229bf215546Sopenharmony_ci#define DECLARE_ARRAY(type, name) \ 230bf215546Sopenharmony_ci unsigned name##_count, name##_sz; \ 231bf215546Sopenharmony_ci type *name; 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci#define array_insert(ctx, arr, ...) \ 234bf215546Sopenharmony_ci do { \ 235bf215546Sopenharmony_ci if (arr##_count == arr##_sz) { \ 236bf215546Sopenharmony_ci arr##_sz = MAX2(2 * arr##_sz, 16); \ 237bf215546Sopenharmony_ci arr = reralloc_size(ctx, arr, arr##_sz * sizeof(arr[0])); \ 238bf215546Sopenharmony_ci } \ 239bf215546Sopenharmony_ci arr[arr##_count++] = __VA_ARGS__; \ 240bf215546Sopenharmony_ci } while (0) 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_citypedef enum { 243bf215546Sopenharmony_ci REDUCE_OP_ADD_U, 244bf215546Sopenharmony_ci REDUCE_OP_ADD_F, 245bf215546Sopenharmony_ci REDUCE_OP_MUL_U, 246bf215546Sopenharmony_ci REDUCE_OP_MUL_F, 247bf215546Sopenharmony_ci REDUCE_OP_MIN_U, 248bf215546Sopenharmony_ci REDUCE_OP_MIN_S, 249bf215546Sopenharmony_ci REDUCE_OP_MIN_F, 250bf215546Sopenharmony_ci REDUCE_OP_MAX_U, 251bf215546Sopenharmony_ci REDUCE_OP_MAX_S, 252bf215546Sopenharmony_ci REDUCE_OP_MAX_F, 253bf215546Sopenharmony_ci REDUCE_OP_AND_B, 254bf215546Sopenharmony_ci REDUCE_OP_OR_B, 255bf215546Sopenharmony_ci REDUCE_OP_XOR_B, 256bf215546Sopenharmony_ci} reduce_op_t; 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_cistruct ir3_instruction { 259bf215546Sopenharmony_ci struct ir3_block *block; 260bf215546Sopenharmony_ci opc_t opc; 261bf215546Sopenharmony_ci enum { 262bf215546Sopenharmony_ci /* (sy) flag is set on first instruction, and after sample 263bf215546Sopenharmony_ci * instructions (probably just on RAW hazard). 264bf215546Sopenharmony_ci */ 265bf215546Sopenharmony_ci IR3_INSTR_SY = 0x001, 266bf215546Sopenharmony_ci /* (ss) flag is set on first instruction, and first instruction 267bf215546Sopenharmony_ci * to depend on the result of "long" instructions (RAW hazard): 268bf215546Sopenharmony_ci * 269bf215546Sopenharmony_ci * rcp, rsq, log2, exp2, sin, cos, sqrt 270bf215546Sopenharmony_ci * 271bf215546Sopenharmony_ci * It seems to synchronize until all in-flight instructions are 272bf215546Sopenharmony_ci * completed, for example: 273bf215546Sopenharmony_ci * 274bf215546Sopenharmony_ci * rsq hr1.w, hr1.w 275bf215546Sopenharmony_ci * add.f hr2.z, (neg)hr2.z, hc0.y 276bf215546Sopenharmony_ci * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y 277bf215546Sopenharmony_ci * rsq hr2.x, hr2.x 278bf215546Sopenharmony_ci * (rpt1)nop 279bf215546Sopenharmony_ci * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w 280bf215546Sopenharmony_ci * nop 281bf215546Sopenharmony_ci * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w 282bf215546Sopenharmony_ci * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w 283bf215546Sopenharmony_ci * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x 284bf215546Sopenharmony_ci * 285bf215546Sopenharmony_ci * The last mul.f does not have (ss) set, presumably because the 286bf215546Sopenharmony_ci * (ss) on the previous instruction does the job. 287bf215546Sopenharmony_ci * 288bf215546Sopenharmony_ci * The blob driver also seems to set it on WAR hazards, although 289bf215546Sopenharmony_ci * not really clear if this is needed or just blob compiler being 290bf215546Sopenharmony_ci * sloppy. So far I haven't found a case where removing the (ss) 291bf215546Sopenharmony_ci * causes problems for WAR hazard, but I could just be getting 292bf215546Sopenharmony_ci * lucky: 293bf215546Sopenharmony_ci * 294bf215546Sopenharmony_ci * rcp r1.y, r3.y 295bf215546Sopenharmony_ci * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z 296bf215546Sopenharmony_ci * 297bf215546Sopenharmony_ci */ 298bf215546Sopenharmony_ci IR3_INSTR_SS = 0x002, 299bf215546Sopenharmony_ci /* (jp) flag is set on jump targets: 300bf215546Sopenharmony_ci */ 301bf215546Sopenharmony_ci IR3_INSTR_JP = 0x004, 302bf215546Sopenharmony_ci IR3_INSTR_UL = 0x008, 303bf215546Sopenharmony_ci IR3_INSTR_3D = 0x010, 304bf215546Sopenharmony_ci IR3_INSTR_A = 0x020, 305bf215546Sopenharmony_ci IR3_INSTR_O = 0x040, 306bf215546Sopenharmony_ci IR3_INSTR_P = 0x080, 307bf215546Sopenharmony_ci IR3_INSTR_S = 0x100, 308bf215546Sopenharmony_ci IR3_INSTR_S2EN = 0x200, 309bf215546Sopenharmony_ci IR3_INSTR_SAT = 0x400, 310bf215546Sopenharmony_ci /* (cat5/cat6) Bindless */ 311bf215546Sopenharmony_ci IR3_INSTR_B = 0x800, 312bf215546Sopenharmony_ci /* (cat5/cat6) nonuniform */ 313bf215546Sopenharmony_ci IR3_INSTR_NONUNIF = 0x1000, 314bf215546Sopenharmony_ci /* (cat5-only) Get some parts of the encoding from a1.x */ 315bf215546Sopenharmony_ci IR3_INSTR_A1EN = 0x02000, 316bf215546Sopenharmony_ci /* meta-flags, for intermediate stages of IR, ie. 317bf215546Sopenharmony_ci * before register assignment is done: 318bf215546Sopenharmony_ci */ 319bf215546Sopenharmony_ci IR3_INSTR_MARK = 0x04000, 320bf215546Sopenharmony_ci IR3_INSTR_UNUSED = 0x08000, 321bf215546Sopenharmony_ci } flags; 322bf215546Sopenharmony_ci uint8_t repeat; 323bf215546Sopenharmony_ci uint8_t nop; 324bf215546Sopenharmony_ci#ifdef DEBUG 325bf215546Sopenharmony_ci unsigned srcs_max, dsts_max; 326bf215546Sopenharmony_ci#endif 327bf215546Sopenharmony_ci unsigned srcs_count, dsts_count; 328bf215546Sopenharmony_ci struct ir3_register **dsts; 329bf215546Sopenharmony_ci struct ir3_register **srcs; 330bf215546Sopenharmony_ci union { 331bf215546Sopenharmony_ci struct { 332bf215546Sopenharmony_ci char inv1, inv2; 333bf215546Sopenharmony_ci char comp1, comp2; 334bf215546Sopenharmony_ci int immed; 335bf215546Sopenharmony_ci struct ir3_block *target; 336bf215546Sopenharmony_ci const char *target_label; 337bf215546Sopenharmony_ci brtype_t brtype; 338bf215546Sopenharmony_ci unsigned idx; /* for brac.N */ 339bf215546Sopenharmony_ci } cat0; 340bf215546Sopenharmony_ci struct { 341bf215546Sopenharmony_ci type_t src_type, dst_type; 342bf215546Sopenharmony_ci round_t round; 343bf215546Sopenharmony_ci reduce_op_t reduce_op; 344bf215546Sopenharmony_ci } cat1; 345bf215546Sopenharmony_ci struct { 346bf215546Sopenharmony_ci enum { 347bf215546Sopenharmony_ci IR3_COND_LT = 0, 348bf215546Sopenharmony_ci IR3_COND_LE = 1, 349bf215546Sopenharmony_ci IR3_COND_GT = 2, 350bf215546Sopenharmony_ci IR3_COND_GE = 3, 351bf215546Sopenharmony_ci IR3_COND_EQ = 4, 352bf215546Sopenharmony_ci IR3_COND_NE = 5, 353bf215546Sopenharmony_ci } condition; 354bf215546Sopenharmony_ci } cat2; 355bf215546Sopenharmony_ci struct { 356bf215546Sopenharmony_ci enum { 357bf215546Sopenharmony_ci IR3_SRC_UNSIGNED = 0, 358bf215546Sopenharmony_ci IR3_SRC_MIXED = 1, 359bf215546Sopenharmony_ci } signedness; 360bf215546Sopenharmony_ci enum { 361bf215546Sopenharmony_ci IR3_SRC_PACKED_LOW = 0, 362bf215546Sopenharmony_ci IR3_SRC_PACKED_HIGH = 1, 363bf215546Sopenharmony_ci } packed; 364bf215546Sopenharmony_ci bool swapped; 365bf215546Sopenharmony_ci } cat3; 366bf215546Sopenharmony_ci struct { 367bf215546Sopenharmony_ci unsigned samp, tex; 368bf215546Sopenharmony_ci unsigned tex_base : 3; 369bf215546Sopenharmony_ci unsigned cluster_size : 4; 370bf215546Sopenharmony_ci type_t type; 371bf215546Sopenharmony_ci } cat5; 372bf215546Sopenharmony_ci struct { 373bf215546Sopenharmony_ci type_t type; 374bf215546Sopenharmony_ci /* TODO remove dst_offset and handle as a ir3_register 375bf215546Sopenharmony_ci * which might be IMMED, similar to how src_offset is 376bf215546Sopenharmony_ci * handled. 377bf215546Sopenharmony_ci */ 378bf215546Sopenharmony_ci int dst_offset; 379bf215546Sopenharmony_ci int iim_val; /* for ldgb/stgb, # of components */ 380bf215546Sopenharmony_ci unsigned d : 3; /* for ldc, component offset */ 381bf215546Sopenharmony_ci bool typed : 1; 382bf215546Sopenharmony_ci unsigned base : 3; 383bf215546Sopenharmony_ci } cat6; 384bf215546Sopenharmony_ci struct { 385bf215546Sopenharmony_ci unsigned w : 1; /* write */ 386bf215546Sopenharmony_ci unsigned r : 1; /* read */ 387bf215546Sopenharmony_ci unsigned l : 1; /* local */ 388bf215546Sopenharmony_ci unsigned g : 1; /* global */ 389bf215546Sopenharmony_ci } cat7; 390bf215546Sopenharmony_ci /* for meta-instructions, just used to hold extra data 391bf215546Sopenharmony_ci * before instruction scheduling, etc 392bf215546Sopenharmony_ci */ 393bf215546Sopenharmony_ci struct { 394bf215546Sopenharmony_ci int off; /* component/offset */ 395bf215546Sopenharmony_ci } split; 396bf215546Sopenharmony_ci struct { 397bf215546Sopenharmony_ci /* Per-source index back to the entry in the 398bf215546Sopenharmony_ci * ir3_shader_variant::outputs table. 399bf215546Sopenharmony_ci */ 400bf215546Sopenharmony_ci unsigned *outidxs; 401bf215546Sopenharmony_ci } end; 402bf215546Sopenharmony_ci struct { 403bf215546Sopenharmony_ci /* used to temporarily hold reference to nir_phi_instr 404bf215546Sopenharmony_ci * until we resolve the phi srcs 405bf215546Sopenharmony_ci */ 406bf215546Sopenharmony_ci void *nphi; 407bf215546Sopenharmony_ci } phi; 408bf215546Sopenharmony_ci struct { 409bf215546Sopenharmony_ci unsigned samp, tex; 410bf215546Sopenharmony_ci unsigned input_offset; 411bf215546Sopenharmony_ci unsigned samp_base : 3; 412bf215546Sopenharmony_ci unsigned tex_base : 3; 413bf215546Sopenharmony_ci } prefetch; 414bf215546Sopenharmony_ci struct { 415bf215546Sopenharmony_ci /* maps back to entry in ir3_shader_variant::inputs table: */ 416bf215546Sopenharmony_ci int inidx; 417bf215546Sopenharmony_ci /* for sysvals, identifies the sysval type. Mostly so we can 418bf215546Sopenharmony_ci * identify the special cases where a sysval should not be DCE'd 419bf215546Sopenharmony_ci * (currently, just pre-fs texture fetch) 420bf215546Sopenharmony_ci */ 421bf215546Sopenharmony_ci gl_system_value sysval; 422bf215546Sopenharmony_ci } input; 423bf215546Sopenharmony_ci }; 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci /* For assigning jump offsets, we need instruction's position: */ 426bf215546Sopenharmony_ci uint32_t ip; 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci /* used for per-pass extra instruction data. 429bf215546Sopenharmony_ci * 430bf215546Sopenharmony_ci * TODO we should remove the per-pass data like this and 'use_count' 431bf215546Sopenharmony_ci * and do something similar to what RA does w/ ir3_ra_instr_data.. 432bf215546Sopenharmony_ci * ie. use the ir3_count_instructions pass, and then use instr->ip 433bf215546Sopenharmony_ci * to index into a table of pass-private data. 434bf215546Sopenharmony_ci */ 435bf215546Sopenharmony_ci void *data; 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci /** 438bf215546Sopenharmony_ci * Valid if pass calls ir3_find_ssa_uses().. see foreach_ssa_use() 439bf215546Sopenharmony_ci */ 440bf215546Sopenharmony_ci struct set *uses; 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci int use_count; /* currently just updated/used by cp */ 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci /* an instruction can reference at most one address register amongst 445bf215546Sopenharmony_ci * it's src/dst registers. Beyond that, you need to insert mov's. 446bf215546Sopenharmony_ci * 447bf215546Sopenharmony_ci * NOTE: do not write this directly, use ir3_instr_set_address() 448bf215546Sopenharmony_ci */ 449bf215546Sopenharmony_ci struct ir3_register *address; 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci /* Tracking for additional dependent instructions. Used to handle 452bf215546Sopenharmony_ci * barriers, WAR hazards for arrays/SSBOs/etc. 453bf215546Sopenharmony_ci */ 454bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_instruction *, deps); 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci /* 457bf215546Sopenharmony_ci * From PoV of instruction scheduling, not execution (ie. ignores global/ 458bf215546Sopenharmony_ci * local distinction): 459bf215546Sopenharmony_ci * shared image atomic SSBO everything 460bf215546Sopenharmony_ci * barrier()/ - R/W R/W R/W R/W X 461bf215546Sopenharmony_ci * groupMemoryBarrier() 462bf215546Sopenharmony_ci * memoryBarrier() 463bf215546Sopenharmony_ci * (but only images declared coherent?) 464bf215546Sopenharmony_ci * memoryBarrierAtomic() - R/W 465bf215546Sopenharmony_ci * memoryBarrierBuffer() - R/W 466bf215546Sopenharmony_ci * memoryBarrierImage() - R/W 467bf215546Sopenharmony_ci * memoryBarrierShared() - R/W 468bf215546Sopenharmony_ci * 469bf215546Sopenharmony_ci * TODO I think for SSBO/image/shared, in cases where we can determine 470bf215546Sopenharmony_ci * which variable is accessed, we don't need to care about accesses to 471bf215546Sopenharmony_ci * different variables (unless declared coherent??) 472bf215546Sopenharmony_ci */ 473bf215546Sopenharmony_ci enum { 474bf215546Sopenharmony_ci IR3_BARRIER_EVERYTHING = 1 << 0, 475bf215546Sopenharmony_ci IR3_BARRIER_SHARED_R = 1 << 1, 476bf215546Sopenharmony_ci IR3_BARRIER_SHARED_W = 1 << 2, 477bf215546Sopenharmony_ci IR3_BARRIER_IMAGE_R = 1 << 3, 478bf215546Sopenharmony_ci IR3_BARRIER_IMAGE_W = 1 << 4, 479bf215546Sopenharmony_ci IR3_BARRIER_BUFFER_R = 1 << 5, 480bf215546Sopenharmony_ci IR3_BARRIER_BUFFER_W = 1 << 6, 481bf215546Sopenharmony_ci IR3_BARRIER_ARRAY_R = 1 << 7, 482bf215546Sopenharmony_ci IR3_BARRIER_ARRAY_W = 1 << 8, 483bf215546Sopenharmony_ci IR3_BARRIER_PRIVATE_R = 1 << 9, 484bf215546Sopenharmony_ci IR3_BARRIER_PRIVATE_W = 1 << 10, 485bf215546Sopenharmony_ci IR3_BARRIER_CONST_W = 1 << 11, 486bf215546Sopenharmony_ci IR3_BARRIER_ACTIVE_FIBERS_R = 1 << 12, 487bf215546Sopenharmony_ci IR3_BARRIER_ACTIVE_FIBERS_W = 1 << 13, 488bf215546Sopenharmony_ci } barrier_class, 489bf215546Sopenharmony_ci barrier_conflict; 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci /* Entry in ir3_block's instruction list: */ 492bf215546Sopenharmony_ci struct list_head node; 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci uint32_t serialno; 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci // TODO only computerator/assembler: 497bf215546Sopenharmony_ci int line; 498bf215546Sopenharmony_ci}; 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_cistruct ir3 { 501bf215546Sopenharmony_ci struct ir3_compiler *compiler; 502bf215546Sopenharmony_ci gl_shader_stage type; 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_instruction *, inputs); 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci /* Track bary.f (and ldlv) instructions.. this is needed in 507bf215546Sopenharmony_ci * scheduling to ensure that all varying fetches happen before 508bf215546Sopenharmony_ci * any potential kill instructions. The hw gets grumpy if all 509bf215546Sopenharmony_ci * threads in a group are killed before the last bary.f gets 510bf215546Sopenharmony_ci * a chance to signal end of input (ei). 511bf215546Sopenharmony_ci */ 512bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_instruction *, baryfs); 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_ci /* Track all indirect instructions (read and write). To avoid 515bf215546Sopenharmony_ci * deadlock scenario where an address register gets scheduled, 516bf215546Sopenharmony_ci * but other dependent src instructions cannot be scheduled due 517bf215546Sopenharmony_ci * to dependency on a *different* address register value, the 518bf215546Sopenharmony_ci * scheduler needs to ensure that all dependencies other than 519bf215546Sopenharmony_ci * the instruction other than the address register are scheduled 520bf215546Sopenharmony_ci * before the one that writes the address register. Having a 521bf215546Sopenharmony_ci * convenient list of instructions that reference some address 522bf215546Sopenharmony_ci * register simplifies this. 523bf215546Sopenharmony_ci */ 524bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_instruction *, a0_users); 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci /* same for a1.x: */ 527bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_instruction *, a1_users); 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci /* and same for instructions that consume predicate register: */ 530bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_instruction *, predicates); 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci /* Track texture sample instructions which need texture state 533bf215546Sopenharmony_ci * patched in (for astc-srgb workaround): 534bf215546Sopenharmony_ci */ 535bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_instruction *, astc_srgb); 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci /* Track tg4 instructions which need texture state patched in (for tg4 538bf215546Sopenharmony_ci * swizzling workaround): 539bf215546Sopenharmony_ci */ 540bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_instruction *, tg4); 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci /* List of blocks: */ 543bf215546Sopenharmony_ci struct list_head block_list; 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_ci /* List of ir3_array's: */ 546bf215546Sopenharmony_ci struct list_head array_list; 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci#ifdef DEBUG 549bf215546Sopenharmony_ci unsigned block_count; 550bf215546Sopenharmony_ci#endif 551bf215546Sopenharmony_ci unsigned instr_count; 552bf215546Sopenharmony_ci}; 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_cistruct ir3_array { 555bf215546Sopenharmony_ci struct list_head node; 556bf215546Sopenharmony_ci unsigned length; 557bf215546Sopenharmony_ci unsigned id; 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci struct nir_register *r; 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci /* To avoid array write's from getting DCE'd, keep track of the 562bf215546Sopenharmony_ci * most recent write. Any array access depends on the most 563bf215546Sopenharmony_ci * recent write. This way, nothing depends on writes after the 564bf215546Sopenharmony_ci * last read. But all the writes that happen before that have 565bf215546Sopenharmony_ci * something depending on them 566bf215546Sopenharmony_ci */ 567bf215546Sopenharmony_ci struct ir3_register *last_write; 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci /* extra stuff used in RA pass: */ 570bf215546Sopenharmony_ci unsigned base; /* base vreg name */ 571bf215546Sopenharmony_ci unsigned reg; /* base physical reg */ 572bf215546Sopenharmony_ci uint16_t start_ip, end_ip; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci /* Indicates if half-precision */ 575bf215546Sopenharmony_ci bool half; 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci bool unused; 578bf215546Sopenharmony_ci}; 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_cistruct ir3_array *ir3_lookup_array(struct ir3 *ir, unsigned id); 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_cienum ir3_branch_type { 583bf215546Sopenharmony_ci IR3_BRANCH_COND, /* condition */ 584bf215546Sopenharmony_ci IR3_BRANCH_ANY, /* subgroupAny(condition) */ 585bf215546Sopenharmony_ci IR3_BRANCH_ALL, /* subgroupAll(condition) */ 586bf215546Sopenharmony_ci IR3_BRANCH_GETONE, /* subgroupElect() */ 587bf215546Sopenharmony_ci IR3_BRANCH_SHPS, /* preamble start */ 588bf215546Sopenharmony_ci}; 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_cistruct ir3_block { 591bf215546Sopenharmony_ci struct list_head node; 592bf215546Sopenharmony_ci struct ir3 *shader; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci const struct nir_block *nblock; 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci struct list_head instr_list; /* list of ir3_instruction */ 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci /* The actual branch condition, if there are two successors */ 599bf215546Sopenharmony_ci enum ir3_branch_type brtype; 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci /* each block has either one or two successors.. in case of two 602bf215546Sopenharmony_ci * successors, 'condition' decides which one to follow. A block preceding 603bf215546Sopenharmony_ci * an if/else has two successors. 604bf215546Sopenharmony_ci * 605bf215546Sopenharmony_ci * In some cases the path that the machine actually takes through the 606bf215546Sopenharmony_ci * program may not match the per-thread view of the CFG. In particular 607bf215546Sopenharmony_ci * this is the case for if/else, where the machine jumps from the end of 608bf215546Sopenharmony_ci * the if to the beginning of the else and switches active lanes. While 609bf215546Sopenharmony_ci * most things only care about the per-thread view, we need to use the 610bf215546Sopenharmony_ci * "physical" view when allocating shared registers. "successors" contains 611bf215546Sopenharmony_ci * the per-thread successors, and "physical_successors" contains the 612bf215546Sopenharmony_ci * physical successors which includes the fallthrough edge from the if to 613bf215546Sopenharmony_ci * the else. 614bf215546Sopenharmony_ci */ 615bf215546Sopenharmony_ci struct ir3_instruction *condition; 616bf215546Sopenharmony_ci struct ir3_block *successors[2]; 617bf215546Sopenharmony_ci struct ir3_block *physical_successors[2]; 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_block *, predecessors); 620bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_block *, physical_predecessors); 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_ci uint16_t start_ip, end_ip; 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci /* Track instructions which do not write a register but other- 625bf215546Sopenharmony_ci * wise must not be discarded (such as kill, stg, etc) 626bf215546Sopenharmony_ci */ 627bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_instruction *, keeps); 628bf215546Sopenharmony_ci 629bf215546Sopenharmony_ci /* used for per-pass extra block data. Mainly used right 630bf215546Sopenharmony_ci * now in RA step to track livein/liveout. 631bf215546Sopenharmony_ci */ 632bf215546Sopenharmony_ci void *data; 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci uint32_t index; 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci struct ir3_block *imm_dom; 637bf215546Sopenharmony_ci DECLARE_ARRAY(struct ir3_block *, dom_children); 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci uint32_t dom_pre_index; 640bf215546Sopenharmony_ci uint32_t dom_post_index; 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci uint32_t loop_id; 643bf215546Sopenharmony_ci uint32_t loop_depth; 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci#ifdef DEBUG 646bf215546Sopenharmony_ci uint32_t serialno; 647bf215546Sopenharmony_ci#endif 648bf215546Sopenharmony_ci}; 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_cistatic inline uint32_t 651bf215546Sopenharmony_ciblock_id(struct ir3_block *block) 652bf215546Sopenharmony_ci{ 653bf215546Sopenharmony_ci#ifdef DEBUG 654bf215546Sopenharmony_ci return block->serialno; 655bf215546Sopenharmony_ci#else 656bf215546Sopenharmony_ci return (uint32_t)(unsigned long)block; 657bf215546Sopenharmony_ci#endif 658bf215546Sopenharmony_ci} 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_cistatic inline struct ir3_block * 661bf215546Sopenharmony_ciir3_start_block(struct ir3 *ir) 662bf215546Sopenharmony_ci{ 663bf215546Sopenharmony_ci return list_first_entry(&ir->block_list, struct ir3_block, node); 664bf215546Sopenharmony_ci} 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_cistatic inline struct ir3_block * 667bf215546Sopenharmony_ciir3_after_preamble(struct ir3 *ir) 668bf215546Sopenharmony_ci{ 669bf215546Sopenharmony_ci struct ir3_block *block = ir3_start_block(ir); 670bf215546Sopenharmony_ci /* The preamble will have a usually-empty else branch, and we want to skip 671bf215546Sopenharmony_ci * that to get to the block after the preamble. 672bf215546Sopenharmony_ci */ 673bf215546Sopenharmony_ci if (block->brtype == IR3_BRANCH_SHPS) 674bf215546Sopenharmony_ci return block->successors[1]->successors[0]; 675bf215546Sopenharmony_ci else 676bf215546Sopenharmony_ci return block; 677bf215546Sopenharmony_ci} 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_civoid ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred); 680bf215546Sopenharmony_civoid ir3_block_add_physical_predecessor(struct ir3_block *block, 681bf215546Sopenharmony_ci struct ir3_block *pred); 682bf215546Sopenharmony_civoid ir3_block_remove_predecessor(struct ir3_block *block, 683bf215546Sopenharmony_ci struct ir3_block *pred); 684bf215546Sopenharmony_civoid ir3_block_remove_physical_predecessor(struct ir3_block *block, 685bf215546Sopenharmony_ci struct ir3_block *pred); 686bf215546Sopenharmony_ciunsigned ir3_block_get_pred_index(struct ir3_block *block, 687bf215546Sopenharmony_ci struct ir3_block *pred); 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_civoid ir3_calc_dominance(struct ir3 *ir); 690bf215546Sopenharmony_cibool ir3_block_dominates(struct ir3_block *a, struct ir3_block *b); 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_cistruct ir3_shader_variant; 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_cistruct ir3 *ir3_create(struct ir3_compiler *compiler, 695bf215546Sopenharmony_ci struct ir3_shader_variant *v); 696bf215546Sopenharmony_civoid ir3_destroy(struct ir3 *shader); 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_civoid ir3_collect_info(struct ir3_shader_variant *v); 699bf215546Sopenharmony_civoid *ir3_alloc(struct ir3 *shader, int sz); 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ciunsigned ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler, 702bf215546Sopenharmony_ci unsigned reg_count, 703bf215546Sopenharmony_ci bool double_threadsize); 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ciunsigned ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v, 706bf215546Sopenharmony_ci bool double_threadsize); 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_cibool ir3_should_double_threadsize(struct ir3_shader_variant *v, 709bf215546Sopenharmony_ci unsigned regs_count); 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_cistruct ir3_block *ir3_block_create(struct ir3 *shader); 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_cistruct ir3_instruction *ir3_instr_create(struct ir3_block *block, opc_t opc, 714bf215546Sopenharmony_ci int ndst, int nsrc); 715bf215546Sopenharmony_cistruct ir3_instruction *ir3_instr_clone(struct ir3_instruction *instr); 716bf215546Sopenharmony_civoid ir3_instr_add_dep(struct ir3_instruction *instr, 717bf215546Sopenharmony_ci struct ir3_instruction *dep); 718bf215546Sopenharmony_ciconst char *ir3_instr_name(struct ir3_instruction *instr); 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_cistruct ir3_register *ir3_src_create(struct ir3_instruction *instr, int num, 721bf215546Sopenharmony_ci int flags); 722bf215546Sopenharmony_cistruct ir3_register *ir3_dst_create(struct ir3_instruction *instr, int num, 723bf215546Sopenharmony_ci int flags); 724bf215546Sopenharmony_cistruct ir3_register *ir3_reg_clone(struct ir3 *shader, 725bf215546Sopenharmony_ci struct ir3_register *reg); 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_cistatic inline void 728bf215546Sopenharmony_ciir3_reg_tie(struct ir3_register *dst, struct ir3_register *src) 729bf215546Sopenharmony_ci{ 730bf215546Sopenharmony_ci assert(!dst->tied && !src->tied); 731bf215546Sopenharmony_ci dst->tied = src; 732bf215546Sopenharmony_ci src->tied = dst; 733bf215546Sopenharmony_ci} 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_civoid ir3_reg_set_last_array(struct ir3_instruction *instr, 736bf215546Sopenharmony_ci struct ir3_register *reg, 737bf215546Sopenharmony_ci struct ir3_register *last_write); 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_civoid ir3_instr_set_address(struct ir3_instruction *instr, 740bf215546Sopenharmony_ci struct ir3_instruction *addr); 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_cistatic inline bool 743bf215546Sopenharmony_ciir3_instr_check_mark(struct ir3_instruction *instr) 744bf215546Sopenharmony_ci{ 745bf215546Sopenharmony_ci if (instr->flags & IR3_INSTR_MARK) 746bf215546Sopenharmony_ci return true; /* already visited */ 747bf215546Sopenharmony_ci instr->flags |= IR3_INSTR_MARK; 748bf215546Sopenharmony_ci return false; 749bf215546Sopenharmony_ci} 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_civoid ir3_block_clear_mark(struct ir3_block *block); 752bf215546Sopenharmony_civoid ir3_clear_mark(struct ir3 *shader); 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ciunsigned ir3_count_instructions(struct ir3 *ir); 755bf215546Sopenharmony_ciunsigned ir3_count_instructions_ra(struct ir3 *ir); 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci/** 758bf215546Sopenharmony_ci * Move 'instr' to just before 'after' 759bf215546Sopenharmony_ci */ 760bf215546Sopenharmony_cistatic inline void 761bf215546Sopenharmony_ciir3_instr_move_before(struct ir3_instruction *instr, 762bf215546Sopenharmony_ci struct ir3_instruction *after) 763bf215546Sopenharmony_ci{ 764bf215546Sopenharmony_ci list_delinit(&instr->node); 765bf215546Sopenharmony_ci list_addtail(&instr->node, &after->node); 766bf215546Sopenharmony_ci} 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci/** 769bf215546Sopenharmony_ci * Move 'instr' to just after 'before': 770bf215546Sopenharmony_ci */ 771bf215546Sopenharmony_cistatic inline void 772bf215546Sopenharmony_ciir3_instr_move_after(struct ir3_instruction *instr, 773bf215546Sopenharmony_ci struct ir3_instruction *before) 774bf215546Sopenharmony_ci{ 775bf215546Sopenharmony_ci list_delinit(&instr->node); 776bf215546Sopenharmony_ci list_add(&instr->node, &before->node); 777bf215546Sopenharmony_ci} 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci/** 780bf215546Sopenharmony_ci * Move 'instr' to the beginning of the block: 781bf215546Sopenharmony_ci */ 782bf215546Sopenharmony_cistatic inline void 783bf215546Sopenharmony_ciir3_instr_move_before_block(struct ir3_instruction *instr, 784bf215546Sopenharmony_ci struct ir3_block *block) 785bf215546Sopenharmony_ci{ 786bf215546Sopenharmony_ci list_delinit(&instr->node); 787bf215546Sopenharmony_ci list_add(&instr->node, &block->instr_list); 788bf215546Sopenharmony_ci} 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_civoid ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps); 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_civoid ir3_set_dst_type(struct ir3_instruction *instr, bool half); 793bf215546Sopenharmony_civoid ir3_fixup_src_type(struct ir3_instruction *instr); 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ciint ir3_flut(struct ir3_register *src_reg); 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_cibool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags); 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_cibool ir3_valid_immediate(struct ir3_instruction *instr, int32_t immed); 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci#include "util/set.h" 802bf215546Sopenharmony_ci#define foreach_ssa_use(__use, __instr) \ 803bf215546Sopenharmony_ci for (struct ir3_instruction *__use = (void *)~0; __use && (__instr)->uses; \ 804bf215546Sopenharmony_ci __use = NULL) \ 805bf215546Sopenharmony_ci set_foreach ((__instr)->uses, __entry) \ 806bf215546Sopenharmony_ci if ((__use = (void *)__entry->key)) 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_cistatic inline uint32_t 809bf215546Sopenharmony_cireg_num(const struct ir3_register *reg) 810bf215546Sopenharmony_ci{ 811bf215546Sopenharmony_ci return reg->num >> 2; 812bf215546Sopenharmony_ci} 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_cistatic inline uint32_t 815bf215546Sopenharmony_cireg_comp(const struct ir3_register *reg) 816bf215546Sopenharmony_ci{ 817bf215546Sopenharmony_ci return reg->num & 0x3; 818bf215546Sopenharmony_ci} 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_cistatic inline bool 821bf215546Sopenharmony_ciis_flow(struct ir3_instruction *instr) 822bf215546Sopenharmony_ci{ 823bf215546Sopenharmony_ci return (opc_cat(instr->opc) == 0); 824bf215546Sopenharmony_ci} 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_cistatic inline bool 827bf215546Sopenharmony_ciis_kill_or_demote(struct ir3_instruction *instr) 828bf215546Sopenharmony_ci{ 829bf215546Sopenharmony_ci return instr->opc == OPC_KILL || instr->opc == OPC_DEMOTE; 830bf215546Sopenharmony_ci} 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_cistatic inline bool 833bf215546Sopenharmony_ciis_nop(struct ir3_instruction *instr) 834bf215546Sopenharmony_ci{ 835bf215546Sopenharmony_ci return instr->opc == OPC_NOP; 836bf215546Sopenharmony_ci} 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_cistatic inline bool 839bf215546Sopenharmony_ciis_same_type_reg(struct ir3_register *dst, struct ir3_register *src) 840bf215546Sopenharmony_ci{ 841bf215546Sopenharmony_ci unsigned dst_type = (dst->flags & IR3_REG_HALF); 842bf215546Sopenharmony_ci unsigned src_type = (src->flags & IR3_REG_HALF); 843bf215546Sopenharmony_ci 844bf215546Sopenharmony_ci /* Treat shared->normal copies as same-type, because they can generally be 845bf215546Sopenharmony_ci * folded, but not normal->shared copies. 846bf215546Sopenharmony_ci */ 847bf215546Sopenharmony_ci if (dst_type != src_type || 848bf215546Sopenharmony_ci ((dst->flags & IR3_REG_SHARED) && !(src->flags & IR3_REG_SHARED))) 849bf215546Sopenharmony_ci return false; 850bf215546Sopenharmony_ci else 851bf215546Sopenharmony_ci return true; 852bf215546Sopenharmony_ci} 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci/* Is it a non-transformative (ie. not type changing) mov? This can 855bf215546Sopenharmony_ci * also include absneg.s/absneg.f, which for the most part can be 856bf215546Sopenharmony_ci * treated as a mov (single src argument). 857bf215546Sopenharmony_ci */ 858bf215546Sopenharmony_cistatic inline bool 859bf215546Sopenharmony_ciis_same_type_mov(struct ir3_instruction *instr) 860bf215546Sopenharmony_ci{ 861bf215546Sopenharmony_ci struct ir3_register *dst; 862bf215546Sopenharmony_ci 863bf215546Sopenharmony_ci switch (instr->opc) { 864bf215546Sopenharmony_ci case OPC_MOV: 865bf215546Sopenharmony_ci if (instr->cat1.src_type != instr->cat1.dst_type) 866bf215546Sopenharmony_ci return false; 867bf215546Sopenharmony_ci /* If the type of dest reg and src reg are different, 868bf215546Sopenharmony_ci * it shouldn't be considered as same type mov 869bf215546Sopenharmony_ci */ 870bf215546Sopenharmony_ci if (!is_same_type_reg(instr->dsts[0], instr->srcs[0])) 871bf215546Sopenharmony_ci return false; 872bf215546Sopenharmony_ci break; 873bf215546Sopenharmony_ci case OPC_ABSNEG_F: 874bf215546Sopenharmony_ci case OPC_ABSNEG_S: 875bf215546Sopenharmony_ci if (instr->flags & IR3_INSTR_SAT) 876bf215546Sopenharmony_ci return false; 877bf215546Sopenharmony_ci /* If the type of dest reg and src reg are different, 878bf215546Sopenharmony_ci * it shouldn't be considered as same type mov 879bf215546Sopenharmony_ci */ 880bf215546Sopenharmony_ci if (!is_same_type_reg(instr->dsts[0], instr->srcs[0])) 881bf215546Sopenharmony_ci return false; 882bf215546Sopenharmony_ci break; 883bf215546Sopenharmony_ci case OPC_META_PHI: 884bf215546Sopenharmony_ci return instr->srcs_count == 1; 885bf215546Sopenharmony_ci default: 886bf215546Sopenharmony_ci return false; 887bf215546Sopenharmony_ci } 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci dst = instr->dsts[0]; 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_ci /* mov's that write to a0 or p0.x are special: */ 892bf215546Sopenharmony_ci if (dst->num == regid(REG_P0, 0)) 893bf215546Sopenharmony_ci return false; 894bf215546Sopenharmony_ci if (reg_num(dst) == REG_A0) 895bf215546Sopenharmony_ci return false; 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) 898bf215546Sopenharmony_ci return false; 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci return true; 901bf215546Sopenharmony_ci} 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci/* A move from const, which changes size but not type, can also be 904bf215546Sopenharmony_ci * folded into dest instruction in some cases. 905bf215546Sopenharmony_ci */ 906bf215546Sopenharmony_cistatic inline bool 907bf215546Sopenharmony_ciis_const_mov(struct ir3_instruction *instr) 908bf215546Sopenharmony_ci{ 909bf215546Sopenharmony_ci if (instr->opc != OPC_MOV) 910bf215546Sopenharmony_ci return false; 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci if (!(instr->srcs[0]->flags & IR3_REG_CONST)) 913bf215546Sopenharmony_ci return false; 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_ci type_t src_type = instr->cat1.src_type; 916bf215546Sopenharmony_ci type_t dst_type = instr->cat1.dst_type; 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci return (type_float(src_type) && type_float(dst_type)) || 919bf215546Sopenharmony_ci (type_uint(src_type) && type_uint(dst_type)) || 920bf215546Sopenharmony_ci (type_sint(src_type) && type_sint(dst_type)); 921bf215546Sopenharmony_ci} 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_cistatic inline bool 924bf215546Sopenharmony_ciis_subgroup_cond_mov_macro(struct ir3_instruction *instr) 925bf215546Sopenharmony_ci{ 926bf215546Sopenharmony_ci switch (instr->opc) { 927bf215546Sopenharmony_ci case OPC_BALLOT_MACRO: 928bf215546Sopenharmony_ci case OPC_ANY_MACRO: 929bf215546Sopenharmony_ci case OPC_ALL_MACRO: 930bf215546Sopenharmony_ci case OPC_ELECT_MACRO: 931bf215546Sopenharmony_ci case OPC_READ_COND_MACRO: 932bf215546Sopenharmony_ci case OPC_READ_FIRST_MACRO: 933bf215546Sopenharmony_ci case OPC_SWZ_SHARED_MACRO: 934bf215546Sopenharmony_ci case OPC_SCAN_MACRO: 935bf215546Sopenharmony_ci return true; 936bf215546Sopenharmony_ci default: 937bf215546Sopenharmony_ci return false; 938bf215546Sopenharmony_ci } 939bf215546Sopenharmony_ci} 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_cistatic inline bool 942bf215546Sopenharmony_ciis_alu(struct ir3_instruction *instr) 943bf215546Sopenharmony_ci{ 944bf215546Sopenharmony_ci return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3); 945bf215546Sopenharmony_ci} 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_cistatic inline bool 948bf215546Sopenharmony_ciis_sfu(struct ir3_instruction *instr) 949bf215546Sopenharmony_ci{ 950bf215546Sopenharmony_ci return (opc_cat(instr->opc) == 4) || instr->opc == OPC_GETFIBERID; 951bf215546Sopenharmony_ci} 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_cistatic inline bool 954bf215546Sopenharmony_ciis_tex(struct ir3_instruction *instr) 955bf215546Sopenharmony_ci{ 956bf215546Sopenharmony_ci return (opc_cat(instr->opc) == 5); 957bf215546Sopenharmony_ci} 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_cistatic inline bool 960bf215546Sopenharmony_ciis_tex_or_prefetch(struct ir3_instruction *instr) 961bf215546Sopenharmony_ci{ 962bf215546Sopenharmony_ci return is_tex(instr) || (instr->opc == OPC_META_TEX_PREFETCH); 963bf215546Sopenharmony_ci} 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_cistatic inline bool 966bf215546Sopenharmony_ciis_mem(struct ir3_instruction *instr) 967bf215546Sopenharmony_ci{ 968bf215546Sopenharmony_ci return (opc_cat(instr->opc) == 6) && instr->opc != OPC_GETFIBERID; 969bf215546Sopenharmony_ci} 970bf215546Sopenharmony_ci 971bf215546Sopenharmony_cistatic inline bool 972bf215546Sopenharmony_ciis_barrier(struct ir3_instruction *instr) 973bf215546Sopenharmony_ci{ 974bf215546Sopenharmony_ci return (opc_cat(instr->opc) == 7); 975bf215546Sopenharmony_ci} 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_cistatic inline bool 978bf215546Sopenharmony_ciis_half(struct ir3_instruction *instr) 979bf215546Sopenharmony_ci{ 980bf215546Sopenharmony_ci return !!(instr->dsts[0]->flags & IR3_REG_HALF); 981bf215546Sopenharmony_ci} 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_cistatic inline bool 984bf215546Sopenharmony_ciis_shared(struct ir3_instruction *instr) 985bf215546Sopenharmony_ci{ 986bf215546Sopenharmony_ci return !!(instr->dsts[0]->flags & IR3_REG_SHARED); 987bf215546Sopenharmony_ci} 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_cistatic inline bool 990bf215546Sopenharmony_ciis_store(struct ir3_instruction *instr) 991bf215546Sopenharmony_ci{ 992bf215546Sopenharmony_ci /* these instructions, the "destination" register is 993bf215546Sopenharmony_ci * actually a source, the address to store to. 994bf215546Sopenharmony_ci */ 995bf215546Sopenharmony_ci switch (instr->opc) { 996bf215546Sopenharmony_ci case OPC_STG: 997bf215546Sopenharmony_ci case OPC_STG_A: 998bf215546Sopenharmony_ci case OPC_STGB: 999bf215546Sopenharmony_ci case OPC_STIB: 1000bf215546Sopenharmony_ci case OPC_STP: 1001bf215546Sopenharmony_ci case OPC_STL: 1002bf215546Sopenharmony_ci case OPC_STLW: 1003bf215546Sopenharmony_ci case OPC_L2G: 1004bf215546Sopenharmony_ci case OPC_G2L: 1005bf215546Sopenharmony_ci return true; 1006bf215546Sopenharmony_ci default: 1007bf215546Sopenharmony_ci return false; 1008bf215546Sopenharmony_ci } 1009bf215546Sopenharmony_ci} 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_cistatic inline bool 1012bf215546Sopenharmony_ciis_load(struct ir3_instruction *instr) 1013bf215546Sopenharmony_ci{ 1014bf215546Sopenharmony_ci switch (instr->opc) { 1015bf215546Sopenharmony_ci case OPC_LDG: 1016bf215546Sopenharmony_ci case OPC_LDG_A: 1017bf215546Sopenharmony_ci case OPC_LDGB: 1018bf215546Sopenharmony_ci case OPC_LDIB: 1019bf215546Sopenharmony_ci case OPC_LDL: 1020bf215546Sopenharmony_ci case OPC_LDP: 1021bf215546Sopenharmony_ci case OPC_L2G: 1022bf215546Sopenharmony_ci case OPC_LDLW: 1023bf215546Sopenharmony_ci case OPC_LDC: 1024bf215546Sopenharmony_ci case OPC_LDLV: 1025bf215546Sopenharmony_ci /* probably some others too.. */ 1026bf215546Sopenharmony_ci return true; 1027bf215546Sopenharmony_ci default: 1028bf215546Sopenharmony_ci return false; 1029bf215546Sopenharmony_ci } 1030bf215546Sopenharmony_ci} 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_cistatic inline bool 1033bf215546Sopenharmony_ciis_input(struct ir3_instruction *instr) 1034bf215546Sopenharmony_ci{ 1035bf215546Sopenharmony_ci /* in some cases, ldlv is used to fetch varying without 1036bf215546Sopenharmony_ci * interpolation.. fortunately inloc is the first src 1037bf215546Sopenharmony_ci * register in either case 1038bf215546Sopenharmony_ci */ 1039bf215546Sopenharmony_ci switch (instr->opc) { 1040bf215546Sopenharmony_ci case OPC_LDLV: 1041bf215546Sopenharmony_ci case OPC_BARY_F: 1042bf215546Sopenharmony_ci case OPC_FLAT_B: 1043bf215546Sopenharmony_ci return true; 1044bf215546Sopenharmony_ci default: 1045bf215546Sopenharmony_ci return false; 1046bf215546Sopenharmony_ci } 1047bf215546Sopenharmony_ci} 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_cistatic inline bool 1050bf215546Sopenharmony_ciis_bool(struct ir3_instruction *instr) 1051bf215546Sopenharmony_ci{ 1052bf215546Sopenharmony_ci switch (instr->opc) { 1053bf215546Sopenharmony_ci case OPC_CMPS_F: 1054bf215546Sopenharmony_ci case OPC_CMPS_S: 1055bf215546Sopenharmony_ci case OPC_CMPS_U: 1056bf215546Sopenharmony_ci return true; 1057bf215546Sopenharmony_ci default: 1058bf215546Sopenharmony_ci return false; 1059bf215546Sopenharmony_ci } 1060bf215546Sopenharmony_ci} 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_cistatic inline opc_t 1063bf215546Sopenharmony_cicat3_half_opc(opc_t opc) 1064bf215546Sopenharmony_ci{ 1065bf215546Sopenharmony_ci switch (opc) { 1066bf215546Sopenharmony_ci case OPC_MAD_F32: 1067bf215546Sopenharmony_ci return OPC_MAD_F16; 1068bf215546Sopenharmony_ci case OPC_SEL_B32: 1069bf215546Sopenharmony_ci return OPC_SEL_B16; 1070bf215546Sopenharmony_ci case OPC_SEL_S32: 1071bf215546Sopenharmony_ci return OPC_SEL_S16; 1072bf215546Sopenharmony_ci case OPC_SEL_F32: 1073bf215546Sopenharmony_ci return OPC_SEL_F16; 1074bf215546Sopenharmony_ci case OPC_SAD_S32: 1075bf215546Sopenharmony_ci return OPC_SAD_S16; 1076bf215546Sopenharmony_ci default: 1077bf215546Sopenharmony_ci return opc; 1078bf215546Sopenharmony_ci } 1079bf215546Sopenharmony_ci} 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_cistatic inline opc_t 1082bf215546Sopenharmony_cicat3_full_opc(opc_t opc) 1083bf215546Sopenharmony_ci{ 1084bf215546Sopenharmony_ci switch (opc) { 1085bf215546Sopenharmony_ci case OPC_MAD_F16: 1086bf215546Sopenharmony_ci return OPC_MAD_F32; 1087bf215546Sopenharmony_ci case OPC_SEL_B16: 1088bf215546Sopenharmony_ci return OPC_SEL_B32; 1089bf215546Sopenharmony_ci case OPC_SEL_S16: 1090bf215546Sopenharmony_ci return OPC_SEL_S32; 1091bf215546Sopenharmony_ci case OPC_SEL_F16: 1092bf215546Sopenharmony_ci return OPC_SEL_F32; 1093bf215546Sopenharmony_ci case OPC_SAD_S16: 1094bf215546Sopenharmony_ci return OPC_SAD_S32; 1095bf215546Sopenharmony_ci default: 1096bf215546Sopenharmony_ci return opc; 1097bf215546Sopenharmony_ci } 1098bf215546Sopenharmony_ci} 1099bf215546Sopenharmony_ci 1100bf215546Sopenharmony_cistatic inline opc_t 1101bf215546Sopenharmony_cicat4_half_opc(opc_t opc) 1102bf215546Sopenharmony_ci{ 1103bf215546Sopenharmony_ci switch (opc) { 1104bf215546Sopenharmony_ci case OPC_RSQ: 1105bf215546Sopenharmony_ci return OPC_HRSQ; 1106bf215546Sopenharmony_ci case OPC_LOG2: 1107bf215546Sopenharmony_ci return OPC_HLOG2; 1108bf215546Sopenharmony_ci case OPC_EXP2: 1109bf215546Sopenharmony_ci return OPC_HEXP2; 1110bf215546Sopenharmony_ci default: 1111bf215546Sopenharmony_ci return opc; 1112bf215546Sopenharmony_ci } 1113bf215546Sopenharmony_ci} 1114bf215546Sopenharmony_ci 1115bf215546Sopenharmony_cistatic inline opc_t 1116bf215546Sopenharmony_cicat4_full_opc(opc_t opc) 1117bf215546Sopenharmony_ci{ 1118bf215546Sopenharmony_ci switch (opc) { 1119bf215546Sopenharmony_ci case OPC_HRSQ: 1120bf215546Sopenharmony_ci return OPC_RSQ; 1121bf215546Sopenharmony_ci case OPC_HLOG2: 1122bf215546Sopenharmony_ci return OPC_LOG2; 1123bf215546Sopenharmony_ci case OPC_HEXP2: 1124bf215546Sopenharmony_ci return OPC_EXP2; 1125bf215546Sopenharmony_ci default: 1126bf215546Sopenharmony_ci return opc; 1127bf215546Sopenharmony_ci } 1128bf215546Sopenharmony_ci} 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_cistatic inline bool 1131bf215546Sopenharmony_ciis_meta(struct ir3_instruction *instr) 1132bf215546Sopenharmony_ci{ 1133bf215546Sopenharmony_ci return (opc_cat(instr->opc) == -1); 1134bf215546Sopenharmony_ci} 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_cistatic inline unsigned 1137bf215546Sopenharmony_cireg_elems(const struct ir3_register *reg) 1138bf215546Sopenharmony_ci{ 1139bf215546Sopenharmony_ci if (reg->flags & IR3_REG_ARRAY) 1140bf215546Sopenharmony_ci return reg->size; 1141bf215546Sopenharmony_ci else 1142bf215546Sopenharmony_ci return util_last_bit(reg->wrmask); 1143bf215546Sopenharmony_ci} 1144bf215546Sopenharmony_ci 1145bf215546Sopenharmony_cistatic inline unsigned 1146bf215546Sopenharmony_cireg_elem_size(const struct ir3_register *reg) 1147bf215546Sopenharmony_ci{ 1148bf215546Sopenharmony_ci return (reg->flags & IR3_REG_HALF) ? 1 : 2; 1149bf215546Sopenharmony_ci} 1150bf215546Sopenharmony_ci 1151bf215546Sopenharmony_cistatic inline unsigned 1152bf215546Sopenharmony_cireg_size(const struct ir3_register *reg) 1153bf215546Sopenharmony_ci{ 1154bf215546Sopenharmony_ci return reg_elems(reg) * reg_elem_size(reg); 1155bf215546Sopenharmony_ci} 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_cistatic inline unsigned 1158bf215546Sopenharmony_cidest_regs(struct ir3_instruction *instr) 1159bf215546Sopenharmony_ci{ 1160bf215546Sopenharmony_ci if (instr->dsts_count == 0) 1161bf215546Sopenharmony_ci return 0; 1162bf215546Sopenharmony_ci 1163bf215546Sopenharmony_ci assert(instr->dsts_count == 1); 1164bf215546Sopenharmony_ci return util_last_bit(instr->dsts[0]->wrmask); 1165bf215546Sopenharmony_ci} 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_ci/* is dst a normal temp register: */ 1168bf215546Sopenharmony_cistatic inline bool 1169bf215546Sopenharmony_ciis_dest_gpr(struct ir3_register *dst) 1170bf215546Sopenharmony_ci{ 1171bf215546Sopenharmony_ci if (dst->wrmask == 0) 1172bf215546Sopenharmony_ci return false; 1173bf215546Sopenharmony_ci if ((reg_num(dst) == REG_A0) || (dst->num == regid(REG_P0, 0))) 1174bf215546Sopenharmony_ci return false; 1175bf215546Sopenharmony_ci return true; 1176bf215546Sopenharmony_ci} 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_cistatic inline bool 1179bf215546Sopenharmony_ciwrites_gpr(struct ir3_instruction *instr) 1180bf215546Sopenharmony_ci{ 1181bf215546Sopenharmony_ci if (dest_regs(instr) == 0) 1182bf215546Sopenharmony_ci return false; 1183bf215546Sopenharmony_ci return is_dest_gpr(instr->dsts[0]); 1184bf215546Sopenharmony_ci} 1185bf215546Sopenharmony_ci 1186bf215546Sopenharmony_cistatic inline bool 1187bf215546Sopenharmony_ciwrites_addr0(struct ir3_instruction *instr) 1188bf215546Sopenharmony_ci{ 1189bf215546Sopenharmony_ci /* Note: only the first dest can write to a0.x */ 1190bf215546Sopenharmony_ci if (instr->dsts_count > 0) { 1191bf215546Sopenharmony_ci struct ir3_register *dst = instr->dsts[0]; 1192bf215546Sopenharmony_ci return dst->num == regid(REG_A0, 0); 1193bf215546Sopenharmony_ci } 1194bf215546Sopenharmony_ci return false; 1195bf215546Sopenharmony_ci} 1196bf215546Sopenharmony_ci 1197bf215546Sopenharmony_cistatic inline bool 1198bf215546Sopenharmony_ciwrites_addr1(struct ir3_instruction *instr) 1199bf215546Sopenharmony_ci{ 1200bf215546Sopenharmony_ci /* Note: only the first dest can write to a1.x */ 1201bf215546Sopenharmony_ci if (instr->dsts_count > 0) { 1202bf215546Sopenharmony_ci struct ir3_register *dst = instr->dsts[0]; 1203bf215546Sopenharmony_ci return dst->num == regid(REG_A0, 1); 1204bf215546Sopenharmony_ci } 1205bf215546Sopenharmony_ci return false; 1206bf215546Sopenharmony_ci} 1207bf215546Sopenharmony_ci 1208bf215546Sopenharmony_cistatic inline bool 1209bf215546Sopenharmony_ciwrites_pred(struct ir3_instruction *instr) 1210bf215546Sopenharmony_ci{ 1211bf215546Sopenharmony_ci /* Note: only the first dest can write to p0.x */ 1212bf215546Sopenharmony_ci if (instr->dsts_count > 0) { 1213bf215546Sopenharmony_ci struct ir3_register *dst = instr->dsts[0]; 1214bf215546Sopenharmony_ci return reg_num(dst) == REG_P0; 1215bf215546Sopenharmony_ci } 1216bf215546Sopenharmony_ci return false; 1217bf215546Sopenharmony_ci} 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci/* Is it something other than a normal register. Shared regs, p0, and a0/a1 1220bf215546Sopenharmony_ci * are considered special here. Special registers are always accessed with one 1221bf215546Sopenharmony_ci * size and never alias normal registers, even though a naive calculation 1222bf215546Sopenharmony_ci * would sometimes make it seem like e.g. r30.z aliases a0.x. 1223bf215546Sopenharmony_ci */ 1224bf215546Sopenharmony_cistatic inline bool 1225bf215546Sopenharmony_ciis_reg_special(const struct ir3_register *reg) 1226bf215546Sopenharmony_ci{ 1227bf215546Sopenharmony_ci return (reg->flags & IR3_REG_SHARED) || (reg_num(reg) == REG_A0) || 1228bf215546Sopenharmony_ci (reg_num(reg) == REG_P0); 1229bf215546Sopenharmony_ci} 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_ci/* Same as above but in cases where we don't have a register. r48.x and above 1232bf215546Sopenharmony_ci * are shared/special. 1233bf215546Sopenharmony_ci */ 1234bf215546Sopenharmony_cistatic inline bool 1235bf215546Sopenharmony_ciis_reg_num_special(unsigned num) 1236bf215546Sopenharmony_ci{ 1237bf215546Sopenharmony_ci return num >= 48 * 4; 1238bf215546Sopenharmony_ci} 1239bf215546Sopenharmony_ci 1240bf215546Sopenharmony_ci/* returns defining instruction for reg */ 1241bf215546Sopenharmony_ci/* TODO better name */ 1242bf215546Sopenharmony_cistatic inline struct ir3_instruction * 1243bf215546Sopenharmony_cissa(struct ir3_register *reg) 1244bf215546Sopenharmony_ci{ 1245bf215546Sopenharmony_ci if ((reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) && reg->def) 1246bf215546Sopenharmony_ci return reg->def->instr; 1247bf215546Sopenharmony_ci return NULL; 1248bf215546Sopenharmony_ci} 1249bf215546Sopenharmony_ci 1250bf215546Sopenharmony_cistatic inline bool 1251bf215546Sopenharmony_ciconflicts(struct ir3_register *a, struct ir3_register *b) 1252bf215546Sopenharmony_ci{ 1253bf215546Sopenharmony_ci return (a && b) && (a->def != b->def); 1254bf215546Sopenharmony_ci} 1255bf215546Sopenharmony_ci 1256bf215546Sopenharmony_cistatic inline bool 1257bf215546Sopenharmony_cireg_gpr(struct ir3_register *r) 1258bf215546Sopenharmony_ci{ 1259bf215546Sopenharmony_ci if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED)) 1260bf215546Sopenharmony_ci return false; 1261bf215546Sopenharmony_ci if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) 1262bf215546Sopenharmony_ci return false; 1263bf215546Sopenharmony_ci return true; 1264bf215546Sopenharmony_ci} 1265bf215546Sopenharmony_ci 1266bf215546Sopenharmony_cistatic inline type_t 1267bf215546Sopenharmony_cihalf_type(type_t type) 1268bf215546Sopenharmony_ci{ 1269bf215546Sopenharmony_ci switch (type) { 1270bf215546Sopenharmony_ci case TYPE_F32: 1271bf215546Sopenharmony_ci return TYPE_F16; 1272bf215546Sopenharmony_ci case TYPE_U32: 1273bf215546Sopenharmony_ci return TYPE_U16; 1274bf215546Sopenharmony_ci case TYPE_S32: 1275bf215546Sopenharmony_ci return TYPE_S16; 1276bf215546Sopenharmony_ci case TYPE_F16: 1277bf215546Sopenharmony_ci case TYPE_U16: 1278bf215546Sopenharmony_ci case TYPE_S16: 1279bf215546Sopenharmony_ci return type; 1280bf215546Sopenharmony_ci case TYPE_U8: 1281bf215546Sopenharmony_ci case TYPE_S8: 1282bf215546Sopenharmony_ci return type; 1283bf215546Sopenharmony_ci default: 1284bf215546Sopenharmony_ci assert(0); 1285bf215546Sopenharmony_ci return ~0; 1286bf215546Sopenharmony_ci } 1287bf215546Sopenharmony_ci} 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_cistatic inline type_t 1290bf215546Sopenharmony_cifull_type(type_t type) 1291bf215546Sopenharmony_ci{ 1292bf215546Sopenharmony_ci switch (type) { 1293bf215546Sopenharmony_ci case TYPE_F16: 1294bf215546Sopenharmony_ci return TYPE_F32; 1295bf215546Sopenharmony_ci case TYPE_U8: 1296bf215546Sopenharmony_ci case TYPE_U16: 1297bf215546Sopenharmony_ci return TYPE_U32; 1298bf215546Sopenharmony_ci case TYPE_S8: 1299bf215546Sopenharmony_ci case TYPE_S16: 1300bf215546Sopenharmony_ci return TYPE_S32; 1301bf215546Sopenharmony_ci case TYPE_F32: 1302bf215546Sopenharmony_ci case TYPE_U32: 1303bf215546Sopenharmony_ci case TYPE_S32: 1304bf215546Sopenharmony_ci return type; 1305bf215546Sopenharmony_ci default: 1306bf215546Sopenharmony_ci assert(0); 1307bf215546Sopenharmony_ci return ~0; 1308bf215546Sopenharmony_ci } 1309bf215546Sopenharmony_ci} 1310bf215546Sopenharmony_ci 1311bf215546Sopenharmony_ci/* some cat2 instructions (ie. those which are not float) can embed an 1312bf215546Sopenharmony_ci * immediate: 1313bf215546Sopenharmony_ci */ 1314bf215546Sopenharmony_cistatic inline bool 1315bf215546Sopenharmony_ciir3_cat2_int(opc_t opc) 1316bf215546Sopenharmony_ci{ 1317bf215546Sopenharmony_ci switch (opc) { 1318bf215546Sopenharmony_ci case OPC_ADD_U: 1319bf215546Sopenharmony_ci case OPC_ADD_S: 1320bf215546Sopenharmony_ci case OPC_SUB_U: 1321bf215546Sopenharmony_ci case OPC_SUB_S: 1322bf215546Sopenharmony_ci case OPC_CMPS_U: 1323bf215546Sopenharmony_ci case OPC_CMPS_S: 1324bf215546Sopenharmony_ci case OPC_MIN_U: 1325bf215546Sopenharmony_ci case OPC_MIN_S: 1326bf215546Sopenharmony_ci case OPC_MAX_U: 1327bf215546Sopenharmony_ci case OPC_MAX_S: 1328bf215546Sopenharmony_ci case OPC_CMPV_U: 1329bf215546Sopenharmony_ci case OPC_CMPV_S: 1330bf215546Sopenharmony_ci case OPC_MUL_U24: 1331bf215546Sopenharmony_ci case OPC_MUL_S24: 1332bf215546Sopenharmony_ci case OPC_MULL_U: 1333bf215546Sopenharmony_ci case OPC_CLZ_S: 1334bf215546Sopenharmony_ci case OPC_ABSNEG_S: 1335bf215546Sopenharmony_ci case OPC_AND_B: 1336bf215546Sopenharmony_ci case OPC_OR_B: 1337bf215546Sopenharmony_ci case OPC_NOT_B: 1338bf215546Sopenharmony_ci case OPC_XOR_B: 1339bf215546Sopenharmony_ci case OPC_BFREV_B: 1340bf215546Sopenharmony_ci case OPC_CLZ_B: 1341bf215546Sopenharmony_ci case OPC_SHL_B: 1342bf215546Sopenharmony_ci case OPC_SHR_B: 1343bf215546Sopenharmony_ci case OPC_ASHR_B: 1344bf215546Sopenharmony_ci case OPC_MGEN_B: 1345bf215546Sopenharmony_ci case OPC_GETBIT_B: 1346bf215546Sopenharmony_ci case OPC_CBITS_B: 1347bf215546Sopenharmony_ci case OPC_BARY_F: 1348bf215546Sopenharmony_ci case OPC_FLAT_B: 1349bf215546Sopenharmony_ci return true; 1350bf215546Sopenharmony_ci 1351bf215546Sopenharmony_ci default: 1352bf215546Sopenharmony_ci return false; 1353bf215546Sopenharmony_ci } 1354bf215546Sopenharmony_ci} 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_ci/* map cat2 instruction to valid abs/neg flags: */ 1357bf215546Sopenharmony_cistatic inline unsigned 1358bf215546Sopenharmony_ciir3_cat2_absneg(opc_t opc) 1359bf215546Sopenharmony_ci{ 1360bf215546Sopenharmony_ci switch (opc) { 1361bf215546Sopenharmony_ci case OPC_ADD_F: 1362bf215546Sopenharmony_ci case OPC_MIN_F: 1363bf215546Sopenharmony_ci case OPC_MAX_F: 1364bf215546Sopenharmony_ci case OPC_MUL_F: 1365bf215546Sopenharmony_ci case OPC_SIGN_F: 1366bf215546Sopenharmony_ci case OPC_CMPS_F: 1367bf215546Sopenharmony_ci case OPC_ABSNEG_F: 1368bf215546Sopenharmony_ci case OPC_CMPV_F: 1369bf215546Sopenharmony_ci case OPC_FLOOR_F: 1370bf215546Sopenharmony_ci case OPC_CEIL_F: 1371bf215546Sopenharmony_ci case OPC_RNDNE_F: 1372bf215546Sopenharmony_ci case OPC_RNDAZ_F: 1373bf215546Sopenharmony_ci case OPC_TRUNC_F: 1374bf215546Sopenharmony_ci case OPC_BARY_F: 1375bf215546Sopenharmony_ci return IR3_REG_FABS | IR3_REG_FNEG; 1376bf215546Sopenharmony_ci 1377bf215546Sopenharmony_ci case OPC_ADD_U: 1378bf215546Sopenharmony_ci case OPC_ADD_S: 1379bf215546Sopenharmony_ci case OPC_SUB_U: 1380bf215546Sopenharmony_ci case OPC_SUB_S: 1381bf215546Sopenharmony_ci case OPC_CMPS_U: 1382bf215546Sopenharmony_ci case OPC_CMPS_S: 1383bf215546Sopenharmony_ci case OPC_MIN_U: 1384bf215546Sopenharmony_ci case OPC_MIN_S: 1385bf215546Sopenharmony_ci case OPC_MAX_U: 1386bf215546Sopenharmony_ci case OPC_MAX_S: 1387bf215546Sopenharmony_ci case OPC_CMPV_U: 1388bf215546Sopenharmony_ci case OPC_CMPV_S: 1389bf215546Sopenharmony_ci case OPC_MUL_U24: 1390bf215546Sopenharmony_ci case OPC_MUL_S24: 1391bf215546Sopenharmony_ci case OPC_MULL_U: 1392bf215546Sopenharmony_ci case OPC_CLZ_S: 1393bf215546Sopenharmony_ci return 0; 1394bf215546Sopenharmony_ci 1395bf215546Sopenharmony_ci case OPC_ABSNEG_S: 1396bf215546Sopenharmony_ci return IR3_REG_SABS | IR3_REG_SNEG; 1397bf215546Sopenharmony_ci 1398bf215546Sopenharmony_ci case OPC_AND_B: 1399bf215546Sopenharmony_ci case OPC_OR_B: 1400bf215546Sopenharmony_ci case OPC_NOT_B: 1401bf215546Sopenharmony_ci case OPC_XOR_B: 1402bf215546Sopenharmony_ci case OPC_BFREV_B: 1403bf215546Sopenharmony_ci case OPC_CLZ_B: 1404bf215546Sopenharmony_ci case OPC_SHL_B: 1405bf215546Sopenharmony_ci case OPC_SHR_B: 1406bf215546Sopenharmony_ci case OPC_ASHR_B: 1407bf215546Sopenharmony_ci case OPC_MGEN_B: 1408bf215546Sopenharmony_ci case OPC_GETBIT_B: 1409bf215546Sopenharmony_ci case OPC_CBITS_B: 1410bf215546Sopenharmony_ci return IR3_REG_BNOT; 1411bf215546Sopenharmony_ci 1412bf215546Sopenharmony_ci default: 1413bf215546Sopenharmony_ci return 0; 1414bf215546Sopenharmony_ci } 1415bf215546Sopenharmony_ci} 1416bf215546Sopenharmony_ci 1417bf215546Sopenharmony_ci/* map cat3 instructions to valid abs/neg flags: */ 1418bf215546Sopenharmony_cistatic inline unsigned 1419bf215546Sopenharmony_ciir3_cat3_absneg(opc_t opc) 1420bf215546Sopenharmony_ci{ 1421bf215546Sopenharmony_ci switch (opc) { 1422bf215546Sopenharmony_ci case OPC_MAD_F16: 1423bf215546Sopenharmony_ci case OPC_MAD_F32: 1424bf215546Sopenharmony_ci case OPC_SEL_F16: 1425bf215546Sopenharmony_ci case OPC_SEL_F32: 1426bf215546Sopenharmony_ci return IR3_REG_FNEG; 1427bf215546Sopenharmony_ci 1428bf215546Sopenharmony_ci case OPC_MAD_U16: 1429bf215546Sopenharmony_ci case OPC_MADSH_U16: 1430bf215546Sopenharmony_ci case OPC_MAD_S16: 1431bf215546Sopenharmony_ci case OPC_MADSH_M16: 1432bf215546Sopenharmony_ci case OPC_MAD_U24: 1433bf215546Sopenharmony_ci case OPC_MAD_S24: 1434bf215546Sopenharmony_ci case OPC_SEL_S16: 1435bf215546Sopenharmony_ci case OPC_SEL_S32: 1436bf215546Sopenharmony_ci case OPC_SAD_S16: 1437bf215546Sopenharmony_ci case OPC_SAD_S32: 1438bf215546Sopenharmony_ci /* neg *may* work on 3rd src.. */ 1439bf215546Sopenharmony_ci 1440bf215546Sopenharmony_ci case OPC_SEL_B16: 1441bf215546Sopenharmony_ci case OPC_SEL_B32: 1442bf215546Sopenharmony_ci 1443bf215546Sopenharmony_ci case OPC_SHRM: 1444bf215546Sopenharmony_ci case OPC_SHLM: 1445bf215546Sopenharmony_ci case OPC_SHRG: 1446bf215546Sopenharmony_ci case OPC_SHLG: 1447bf215546Sopenharmony_ci case OPC_ANDG: 1448bf215546Sopenharmony_ci case OPC_WMM: 1449bf215546Sopenharmony_ci case OPC_WMM_ACCU: 1450bf215546Sopenharmony_ci 1451bf215546Sopenharmony_ci default: 1452bf215546Sopenharmony_ci return 0; 1453bf215546Sopenharmony_ci } 1454bf215546Sopenharmony_ci} 1455bf215546Sopenharmony_ci 1456bf215546Sopenharmony_ci/* Return the type (float, int, or uint) the op uses when converting from the 1457bf215546Sopenharmony_ci * internal result of the op (which is assumed to be the same size as the 1458bf215546Sopenharmony_ci * sources) to the destination when they are not the same size. If F32 it does 1459bf215546Sopenharmony_ci * a floating-point conversion, if U32 it does a truncation/zero-extension, if 1460bf215546Sopenharmony_ci * S32 it does a truncation/sign-extension. "can_fold" will be false if it 1461bf215546Sopenharmony_ci * doesn't do anything sensible or is unknown. 1462bf215546Sopenharmony_ci */ 1463bf215546Sopenharmony_cistatic inline type_t 1464bf215546Sopenharmony_ciir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold) 1465bf215546Sopenharmony_ci{ 1466bf215546Sopenharmony_ci *can_fold = true; 1467bf215546Sopenharmony_ci switch (instr->opc) { 1468bf215546Sopenharmony_ci case OPC_ADD_F: 1469bf215546Sopenharmony_ci case OPC_MUL_F: 1470bf215546Sopenharmony_ci case OPC_BARY_F: 1471bf215546Sopenharmony_ci case OPC_MAD_F32: 1472bf215546Sopenharmony_ci case OPC_MAD_F16: 1473bf215546Sopenharmony_ci case OPC_WMM: 1474bf215546Sopenharmony_ci case OPC_WMM_ACCU: 1475bf215546Sopenharmony_ci return TYPE_F32; 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci case OPC_ADD_U: 1478bf215546Sopenharmony_ci case OPC_SUB_U: 1479bf215546Sopenharmony_ci case OPC_MIN_U: 1480bf215546Sopenharmony_ci case OPC_MAX_U: 1481bf215546Sopenharmony_ci case OPC_AND_B: 1482bf215546Sopenharmony_ci case OPC_OR_B: 1483bf215546Sopenharmony_ci case OPC_NOT_B: 1484bf215546Sopenharmony_ci case OPC_XOR_B: 1485bf215546Sopenharmony_ci case OPC_MUL_U24: 1486bf215546Sopenharmony_ci case OPC_MULL_U: 1487bf215546Sopenharmony_ci case OPC_SHL_B: 1488bf215546Sopenharmony_ci case OPC_SHR_B: 1489bf215546Sopenharmony_ci case OPC_ASHR_B: 1490bf215546Sopenharmony_ci case OPC_MAD_U24: 1491bf215546Sopenharmony_ci case OPC_SHRM: 1492bf215546Sopenharmony_ci case OPC_SHLM: 1493bf215546Sopenharmony_ci case OPC_SHRG: 1494bf215546Sopenharmony_ci case OPC_SHLG: 1495bf215546Sopenharmony_ci case OPC_ANDG: 1496bf215546Sopenharmony_ci /* Comparison ops zero-extend/truncate their results, so consider them as 1497bf215546Sopenharmony_ci * unsigned here. 1498bf215546Sopenharmony_ci */ 1499bf215546Sopenharmony_ci case OPC_CMPS_F: 1500bf215546Sopenharmony_ci case OPC_CMPV_F: 1501bf215546Sopenharmony_ci case OPC_CMPS_U: 1502bf215546Sopenharmony_ci case OPC_CMPS_S: 1503bf215546Sopenharmony_ci return TYPE_U32; 1504bf215546Sopenharmony_ci 1505bf215546Sopenharmony_ci case OPC_ADD_S: 1506bf215546Sopenharmony_ci case OPC_SUB_S: 1507bf215546Sopenharmony_ci case OPC_MIN_S: 1508bf215546Sopenharmony_ci case OPC_MAX_S: 1509bf215546Sopenharmony_ci case OPC_ABSNEG_S: 1510bf215546Sopenharmony_ci case OPC_MUL_S24: 1511bf215546Sopenharmony_ci case OPC_MAD_S24: 1512bf215546Sopenharmony_ci return TYPE_S32; 1513bf215546Sopenharmony_ci 1514bf215546Sopenharmony_ci /* We assume that any move->move folding that could be done was done by 1515bf215546Sopenharmony_ci * NIR. 1516bf215546Sopenharmony_ci */ 1517bf215546Sopenharmony_ci case OPC_MOV: 1518bf215546Sopenharmony_ci default: 1519bf215546Sopenharmony_ci *can_fold = false; 1520bf215546Sopenharmony_ci return TYPE_U32; 1521bf215546Sopenharmony_ci } 1522bf215546Sopenharmony_ci} 1523bf215546Sopenharmony_ci 1524bf215546Sopenharmony_ci/* Return the src and dst types for the conversion which is already folded 1525bf215546Sopenharmony_ci * into the op. We can assume that instr has folded in a conversion from 1526bf215546Sopenharmony_ci * ir3_output_conv_src_type() to ir3_output_conv_dst_type(). Only makes sense 1527bf215546Sopenharmony_ci * to call if ir3_output_conv_type() returns can_fold = true. 1528bf215546Sopenharmony_ci */ 1529bf215546Sopenharmony_cistatic inline type_t 1530bf215546Sopenharmony_ciir3_output_conv_src_type(struct ir3_instruction *instr, type_t base_type) 1531bf215546Sopenharmony_ci{ 1532bf215546Sopenharmony_ci switch (instr->opc) { 1533bf215546Sopenharmony_ci case OPC_CMPS_F: 1534bf215546Sopenharmony_ci case OPC_CMPV_F: 1535bf215546Sopenharmony_ci case OPC_CMPS_U: 1536bf215546Sopenharmony_ci case OPC_CMPS_S: 1537bf215546Sopenharmony_ci /* Comparisons only return 0/1 and the size of the comparison sources 1538bf215546Sopenharmony_ci * is irrelevant, never consider them as having an output conversion 1539bf215546Sopenharmony_ci * by returning a type with the dest size here: 1540bf215546Sopenharmony_ci */ 1541bf215546Sopenharmony_ci return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type) 1542bf215546Sopenharmony_ci : full_type(base_type); 1543bf215546Sopenharmony_ci 1544bf215546Sopenharmony_ci case OPC_BARY_F: 1545bf215546Sopenharmony_ci /* bary.f doesn't have an explicit source, but we can assume here that 1546bf215546Sopenharmony_ci * the varying data it reads is in fp32. 1547bf215546Sopenharmony_ci * 1548bf215546Sopenharmony_ci * This may be fp16 on older gen's depending on some register 1549bf215546Sopenharmony_ci * settings, but it's probably not worth plumbing that through for a 1550bf215546Sopenharmony_ci * small improvement that NIR would hopefully handle for us anyway. 1551bf215546Sopenharmony_ci */ 1552bf215546Sopenharmony_ci return TYPE_F32; 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_ci case OPC_FLAT_B: 1555bf215546Sopenharmony_ci /* Treat the input data as u32 if not interpolating. */ 1556bf215546Sopenharmony_ci return TYPE_U32; 1557bf215546Sopenharmony_ci 1558bf215546Sopenharmony_ci default: 1559bf215546Sopenharmony_ci return (instr->srcs[0]->flags & IR3_REG_HALF) ? half_type(base_type) 1560bf215546Sopenharmony_ci : full_type(base_type); 1561bf215546Sopenharmony_ci } 1562bf215546Sopenharmony_ci} 1563bf215546Sopenharmony_ci 1564bf215546Sopenharmony_cistatic inline type_t 1565bf215546Sopenharmony_ciir3_output_conv_dst_type(struct ir3_instruction *instr, type_t base_type) 1566bf215546Sopenharmony_ci{ 1567bf215546Sopenharmony_ci return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type) 1568bf215546Sopenharmony_ci : full_type(base_type); 1569bf215546Sopenharmony_ci} 1570bf215546Sopenharmony_ci 1571bf215546Sopenharmony_ci/* Some instructions have signed/unsigned variants which are identical except 1572bf215546Sopenharmony_ci * for whether the folded conversion sign-extends or zero-extends, and we can 1573bf215546Sopenharmony_ci * fold in a mismatching move by rewriting the opcode. Return the opcode to 1574bf215546Sopenharmony_ci * switch signedness, and whether one exists. 1575bf215546Sopenharmony_ci */ 1576bf215546Sopenharmony_cistatic inline opc_t 1577bf215546Sopenharmony_ciir3_try_swap_signedness(opc_t opc, bool *can_swap) 1578bf215546Sopenharmony_ci{ 1579bf215546Sopenharmony_ci switch (opc) { 1580bf215546Sopenharmony_ci#define PAIR(u, s) \ 1581bf215546Sopenharmony_ci case OPC_##u: \ 1582bf215546Sopenharmony_ci return OPC_##s; \ 1583bf215546Sopenharmony_ci case OPC_##s: \ 1584bf215546Sopenharmony_ci return OPC_##u; 1585bf215546Sopenharmony_ci PAIR(ADD_U, ADD_S) 1586bf215546Sopenharmony_ci PAIR(SUB_U, SUB_S) 1587bf215546Sopenharmony_ci /* Note: these are only identical when the sources are half, but that's 1588bf215546Sopenharmony_ci * the only case we call this function for anyway. 1589bf215546Sopenharmony_ci */ 1590bf215546Sopenharmony_ci PAIR(MUL_U24, MUL_S24) 1591bf215546Sopenharmony_ci 1592bf215546Sopenharmony_ci default: 1593bf215546Sopenharmony_ci *can_swap = false; 1594bf215546Sopenharmony_ci return opc; 1595bf215546Sopenharmony_ci } 1596bf215546Sopenharmony_ci} 1597bf215546Sopenharmony_ci 1598bf215546Sopenharmony_ci#define MASK(n) ((1 << (n)) - 1) 1599bf215546Sopenharmony_ci 1600bf215546Sopenharmony_ci/* iterator for an instructions's sources (reg), also returns src #: */ 1601bf215546Sopenharmony_ci#define foreach_src_n(__srcreg, __n, __instr) \ 1602bf215546Sopenharmony_ci if ((__instr)->srcs_count) \ 1603bf215546Sopenharmony_ci for (struct ir3_register *__srcreg = (void *)~0; __srcreg; \ 1604bf215546Sopenharmony_ci __srcreg = NULL) \ 1605bf215546Sopenharmony_ci for (unsigned __cnt = (__instr)->srcs_count, __n = 0; __n < __cnt; \ 1606bf215546Sopenharmony_ci __n++) \ 1607bf215546Sopenharmony_ci if ((__srcreg = (__instr)->srcs[__n])) 1608bf215546Sopenharmony_ci 1609bf215546Sopenharmony_ci/* iterator for an instructions's sources (reg): */ 1610bf215546Sopenharmony_ci#define foreach_src(__srcreg, __instr) foreach_src_n (__srcreg, __i, __instr) 1611bf215546Sopenharmony_ci 1612bf215546Sopenharmony_ci/* iterator for an instructions's destinations (reg), also returns dst #: */ 1613bf215546Sopenharmony_ci#define foreach_dst_n(__dstreg, __n, __instr) \ 1614bf215546Sopenharmony_ci if ((__instr)->dsts_count) \ 1615bf215546Sopenharmony_ci for (struct ir3_register *__dstreg = (void *)~0; __dstreg; \ 1616bf215546Sopenharmony_ci __dstreg = NULL) \ 1617bf215546Sopenharmony_ci for (unsigned __cnt = (__instr)->dsts_count, __n = 0; __n < __cnt; \ 1618bf215546Sopenharmony_ci __n++) \ 1619bf215546Sopenharmony_ci if ((__dstreg = (__instr)->dsts[__n])) 1620bf215546Sopenharmony_ci 1621bf215546Sopenharmony_ci/* iterator for an instructions's destinations (reg): */ 1622bf215546Sopenharmony_ci#define foreach_dst(__dstreg, __instr) foreach_dst_n (__dstreg, __i, __instr) 1623bf215546Sopenharmony_ci 1624bf215546Sopenharmony_cistatic inline unsigned 1625bf215546Sopenharmony_ci__ssa_src_cnt(struct ir3_instruction *instr) 1626bf215546Sopenharmony_ci{ 1627bf215546Sopenharmony_ci return instr->srcs_count + instr->deps_count; 1628bf215546Sopenharmony_ci} 1629bf215546Sopenharmony_ci 1630bf215546Sopenharmony_cistatic inline bool 1631bf215546Sopenharmony_ci__is_false_dep(struct ir3_instruction *instr, unsigned n) 1632bf215546Sopenharmony_ci{ 1633bf215546Sopenharmony_ci if (n >= instr->srcs_count) 1634bf215546Sopenharmony_ci return true; 1635bf215546Sopenharmony_ci return false; 1636bf215546Sopenharmony_ci} 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_cistatic inline struct ir3_instruction ** 1639bf215546Sopenharmony_ci__ssa_srcp_n(struct ir3_instruction *instr, unsigned n) 1640bf215546Sopenharmony_ci{ 1641bf215546Sopenharmony_ci if (__is_false_dep(instr, n)) 1642bf215546Sopenharmony_ci return &instr->deps[n - instr->srcs_count]; 1643bf215546Sopenharmony_ci if (ssa(instr->srcs[n])) 1644bf215546Sopenharmony_ci return &instr->srcs[n]->def->instr; 1645bf215546Sopenharmony_ci return NULL; 1646bf215546Sopenharmony_ci} 1647bf215546Sopenharmony_ci 1648bf215546Sopenharmony_ci#define foreach_ssa_srcp_n(__srcp, __n, __instr) \ 1649bf215546Sopenharmony_ci for (struct ir3_instruction **__srcp = (void *)~0; __srcp; __srcp = NULL) \ 1650bf215546Sopenharmony_ci for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; \ 1651bf215546Sopenharmony_ci __n++) \ 1652bf215546Sopenharmony_ci if ((__srcp = __ssa_srcp_n(__instr, __n))) 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_ci#define foreach_ssa_srcp(__srcp, __instr) \ 1655bf215546Sopenharmony_ci foreach_ssa_srcp_n (__srcp, __i, __instr) 1656bf215546Sopenharmony_ci 1657bf215546Sopenharmony_ci/* iterator for an instruction's SSA sources (instr), also returns src #: */ 1658bf215546Sopenharmony_ci#define foreach_ssa_src_n(__srcinst, __n, __instr) \ 1659bf215546Sopenharmony_ci for (struct ir3_instruction *__srcinst = (void *)~0; __srcinst; \ 1660bf215546Sopenharmony_ci __srcinst = NULL) \ 1661bf215546Sopenharmony_ci foreach_ssa_srcp_n (__srcp, __n, __instr) \ 1662bf215546Sopenharmony_ci if ((__srcinst = *__srcp)) 1663bf215546Sopenharmony_ci 1664bf215546Sopenharmony_ci/* iterator for an instruction's SSA sources (instr): */ 1665bf215546Sopenharmony_ci#define foreach_ssa_src(__srcinst, __instr) \ 1666bf215546Sopenharmony_ci foreach_ssa_src_n (__srcinst, __i, __instr) 1667bf215546Sopenharmony_ci 1668bf215546Sopenharmony_ci/* iterators for shader inputs: */ 1669bf215546Sopenharmony_ci#define foreach_input_n(__ininstr, __cnt, __ir) \ 1670bf215546Sopenharmony_ci for (struct ir3_instruction *__ininstr = (void *)~0; __ininstr; \ 1671bf215546Sopenharmony_ci __ininstr = NULL) \ 1672bf215546Sopenharmony_ci for (unsigned __cnt = 0; __cnt < (__ir)->inputs_count; __cnt++) \ 1673bf215546Sopenharmony_ci if ((__ininstr = (__ir)->inputs[__cnt])) 1674bf215546Sopenharmony_ci#define foreach_input(__ininstr, __ir) foreach_input_n (__ininstr, __i, __ir) 1675bf215546Sopenharmony_ci 1676bf215546Sopenharmony_ci/* iterators for instructions: */ 1677bf215546Sopenharmony_ci#define foreach_instr(__instr, __list) \ 1678bf215546Sopenharmony_ci list_for_each_entry (struct ir3_instruction, __instr, __list, node) 1679bf215546Sopenharmony_ci#define foreach_instr_rev(__instr, __list) \ 1680bf215546Sopenharmony_ci list_for_each_entry_rev (struct ir3_instruction, __instr, __list, node) 1681bf215546Sopenharmony_ci#define foreach_instr_safe(__instr, __list) \ 1682bf215546Sopenharmony_ci list_for_each_entry_safe (struct ir3_instruction, __instr, __list, node) 1683bf215546Sopenharmony_ci#define foreach_instr_from_safe(__instr, __start, __list) \ 1684bf215546Sopenharmony_ci list_for_each_entry_from_safe(struct ir3_instruction, __instr, __start, \ 1685bf215546Sopenharmony_ci __list, node) 1686bf215546Sopenharmony_ci 1687bf215546Sopenharmony_ci/* iterators for blocks: */ 1688bf215546Sopenharmony_ci#define foreach_block(__block, __list) \ 1689bf215546Sopenharmony_ci list_for_each_entry (struct ir3_block, __block, __list, node) 1690bf215546Sopenharmony_ci#define foreach_block_safe(__block, __list) \ 1691bf215546Sopenharmony_ci list_for_each_entry_safe (struct ir3_block, __block, __list, node) 1692bf215546Sopenharmony_ci#define foreach_block_rev(__block, __list) \ 1693bf215546Sopenharmony_ci list_for_each_entry_rev (struct ir3_block, __block, __list, node) 1694bf215546Sopenharmony_ci 1695bf215546Sopenharmony_ci/* iterators for arrays: */ 1696bf215546Sopenharmony_ci#define foreach_array(__array, __list) \ 1697bf215546Sopenharmony_ci list_for_each_entry (struct ir3_array, __array, __list, node) 1698bf215546Sopenharmony_ci#define foreach_array_safe(__array, __list) \ 1699bf215546Sopenharmony_ci list_for_each_entry_safe (struct ir3_array, __array, __list, node) 1700bf215546Sopenharmony_ci 1701bf215546Sopenharmony_ci#define IR3_PASS(ir, pass, ...) \ 1702bf215546Sopenharmony_ci ({ \ 1703bf215546Sopenharmony_ci bool progress = pass(ir, ##__VA_ARGS__); \ 1704bf215546Sopenharmony_ci if (progress) { \ 1705bf215546Sopenharmony_ci ir3_debug_print(ir, "AFTER: " #pass); \ 1706bf215546Sopenharmony_ci ir3_validate(ir); \ 1707bf215546Sopenharmony_ci } \ 1708bf215546Sopenharmony_ci progress; \ 1709bf215546Sopenharmony_ci }) 1710bf215546Sopenharmony_ci 1711bf215546Sopenharmony_ci/* validate: */ 1712bf215546Sopenharmony_civoid ir3_validate(struct ir3 *ir); 1713bf215546Sopenharmony_ci 1714bf215546Sopenharmony_ci/* dump: */ 1715bf215546Sopenharmony_civoid ir3_print(struct ir3 *ir); 1716bf215546Sopenharmony_civoid ir3_print_instr(struct ir3_instruction *instr); 1717bf215546Sopenharmony_ci 1718bf215546Sopenharmony_cistruct log_stream; 1719bf215546Sopenharmony_civoid ir3_print_instr_stream(struct log_stream *stream, struct ir3_instruction *instr); 1720bf215546Sopenharmony_ci 1721bf215546Sopenharmony_ci/* delay calculation: */ 1722bf215546Sopenharmony_ciint ir3_delayslots(struct ir3_instruction *assigner, 1723bf215546Sopenharmony_ci struct ir3_instruction *consumer, unsigned n, bool soft); 1724bf215546Sopenharmony_ciunsigned ir3_delayslots_with_repeat(struct ir3_instruction *assigner, 1725bf215546Sopenharmony_ci struct ir3_instruction *consumer, 1726bf215546Sopenharmony_ci unsigned assigner_n, unsigned consumer_n); 1727bf215546Sopenharmony_ciunsigned ir3_delay_calc(struct ir3_block *block, 1728bf215546Sopenharmony_ci struct ir3_instruction *instr, bool mergedregs); 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci/* estimated (ss)/(sy) delay calculation */ 1731bf215546Sopenharmony_ci 1732bf215546Sopenharmony_cistatic inline bool 1733bf215546Sopenharmony_ciis_local_mem_load(struct ir3_instruction *instr) 1734bf215546Sopenharmony_ci{ 1735bf215546Sopenharmony_ci return instr->opc == OPC_LDL || instr->opc == OPC_LDLV || 1736bf215546Sopenharmony_ci instr->opc == OPC_LDLW; 1737bf215546Sopenharmony_ci} 1738bf215546Sopenharmony_ci 1739bf215546Sopenharmony_ci/* Does this instruction need (ss) to wait for its result? */ 1740bf215546Sopenharmony_cistatic inline bool 1741bf215546Sopenharmony_ciis_ss_producer(struct ir3_instruction *instr) 1742bf215546Sopenharmony_ci{ 1743bf215546Sopenharmony_ci foreach_dst (dst, instr) { 1744bf215546Sopenharmony_ci if (dst->flags & IR3_REG_SHARED) 1745bf215546Sopenharmony_ci return true; 1746bf215546Sopenharmony_ci } 1747bf215546Sopenharmony_ci return is_sfu(instr) || is_local_mem_load(instr); 1748bf215546Sopenharmony_ci} 1749bf215546Sopenharmony_ci 1750bf215546Sopenharmony_ci/* The soft delay for approximating the cost of (ss). */ 1751bf215546Sopenharmony_cistatic inline unsigned 1752bf215546Sopenharmony_cisoft_ss_delay(struct ir3_instruction *instr) 1753bf215546Sopenharmony_ci{ 1754bf215546Sopenharmony_ci /* On a6xx, it takes the number of delay slots to get a SFU result back (ie. 1755bf215546Sopenharmony_ci * using nop's instead of (ss) is: 1756bf215546Sopenharmony_ci * 1757bf215546Sopenharmony_ci * 8 - single warp 1758bf215546Sopenharmony_ci * 9 - two warps 1759bf215546Sopenharmony_ci * 10 - four warps 1760bf215546Sopenharmony_ci * 1761bf215546Sopenharmony_ci * and so on. Not quite sure where it tapers out (ie. how many warps share an 1762bf215546Sopenharmony_ci * SFU unit). But 10 seems like a reasonable # to choose: 1763bf215546Sopenharmony_ci */ 1764bf215546Sopenharmony_ci if (is_sfu(instr) || is_local_mem_load(instr)) 1765bf215546Sopenharmony_ci return 10; 1766bf215546Sopenharmony_ci 1767bf215546Sopenharmony_ci /* The blob adds 6 nops between shared producers and consumers, and before we 1768bf215546Sopenharmony_ci * used (ss) this was sufficient in most cases. 1769bf215546Sopenharmony_ci */ 1770bf215546Sopenharmony_ci return 6; 1771bf215546Sopenharmony_ci} 1772bf215546Sopenharmony_ci 1773bf215546Sopenharmony_cistatic inline bool 1774bf215546Sopenharmony_ciis_sy_producer(struct ir3_instruction *instr) 1775bf215546Sopenharmony_ci{ 1776bf215546Sopenharmony_ci return is_tex_or_prefetch(instr) || 1777bf215546Sopenharmony_ci (is_load(instr) && !is_local_mem_load(instr)) || 1778bf215546Sopenharmony_ci is_atomic(instr->opc); 1779bf215546Sopenharmony_ci} 1780bf215546Sopenharmony_ci 1781bf215546Sopenharmony_cistatic inline unsigned 1782bf215546Sopenharmony_cisoft_sy_delay(struct ir3_instruction *instr, struct ir3 *shader) 1783bf215546Sopenharmony_ci{ 1784bf215546Sopenharmony_ci /* TODO: this is just an optimistic guess, we can do better post-RA. 1785bf215546Sopenharmony_ci */ 1786bf215546Sopenharmony_ci bool double_wavesize = 1787bf215546Sopenharmony_ci shader->type == MESA_SHADER_FRAGMENT || 1788bf215546Sopenharmony_ci shader->type == MESA_SHADER_COMPUTE; 1789bf215546Sopenharmony_ci 1790bf215546Sopenharmony_ci unsigned components = reg_elems(instr->dsts[0]); 1791bf215546Sopenharmony_ci 1792bf215546Sopenharmony_ci /* These numbers come from counting the number of delay slots to get 1793bf215546Sopenharmony_ci * cat5/cat6 results back using nops instead of (sy). Note that these numbers 1794bf215546Sopenharmony_ci * are with the result preloaded to cache by loading it before in the same 1795bf215546Sopenharmony_ci * shader - uncached results are much larger. 1796bf215546Sopenharmony_ci * 1797bf215546Sopenharmony_ci * Note: most ALU instructions can't complete at the full doubled rate, so 1798bf215546Sopenharmony_ci * they take 2 cycles. The only exception is fp16 instructions with no 1799bf215546Sopenharmony_ci * built-in conversions. Therefore divide the latency by 2. 1800bf215546Sopenharmony_ci * 1801bf215546Sopenharmony_ci * TODO: Handle this properly in the scheduler and remove this. 1802bf215546Sopenharmony_ci */ 1803bf215546Sopenharmony_ci if (instr->opc == OPC_LDC) { 1804bf215546Sopenharmony_ci if (double_wavesize) 1805bf215546Sopenharmony_ci return (21 + 8 * components) / 2; 1806bf215546Sopenharmony_ci else 1807bf215546Sopenharmony_ci return 18 + 4 * components; 1808bf215546Sopenharmony_ci } else if (is_tex_or_prefetch(instr)) { 1809bf215546Sopenharmony_ci if (double_wavesize) { 1810bf215546Sopenharmony_ci switch (components) { 1811bf215546Sopenharmony_ci case 1: return 58 / 2; 1812bf215546Sopenharmony_ci case 2: return 60 / 2; 1813bf215546Sopenharmony_ci case 3: return 77 / 2; 1814bf215546Sopenharmony_ci case 4: return 79 / 2; 1815bf215546Sopenharmony_ci default: unreachable("bad number of components"); 1816bf215546Sopenharmony_ci } 1817bf215546Sopenharmony_ci } else { 1818bf215546Sopenharmony_ci switch (components) { 1819bf215546Sopenharmony_ci case 1: return 51; 1820bf215546Sopenharmony_ci case 2: return 53; 1821bf215546Sopenharmony_ci case 3: return 62; 1822bf215546Sopenharmony_ci case 4: return 64; 1823bf215546Sopenharmony_ci default: unreachable("bad number of components"); 1824bf215546Sopenharmony_ci } 1825bf215546Sopenharmony_ci } 1826bf215546Sopenharmony_ci } else { 1827bf215546Sopenharmony_ci /* TODO: measure other cat6 opcodes like ldg */ 1828bf215546Sopenharmony_ci if (double_wavesize) 1829bf215546Sopenharmony_ci return (172 + components) / 2; 1830bf215546Sopenharmony_ci else 1831bf215546Sopenharmony_ci return 109 + components; 1832bf215546Sopenharmony_ci } 1833bf215546Sopenharmony_ci} 1834bf215546Sopenharmony_ci 1835bf215546Sopenharmony_ci 1836bf215546Sopenharmony_ci/* unreachable block elimination: */ 1837bf215546Sopenharmony_cibool ir3_remove_unreachable(struct ir3 *ir); 1838bf215546Sopenharmony_ci 1839bf215546Sopenharmony_ci/* dead code elimination: */ 1840bf215546Sopenharmony_cistruct ir3_shader_variant; 1841bf215546Sopenharmony_cibool ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so); 1842bf215546Sopenharmony_ci 1843bf215546Sopenharmony_ci/* fp16 conversion folding */ 1844bf215546Sopenharmony_cibool ir3_cf(struct ir3 *ir); 1845bf215546Sopenharmony_ci 1846bf215546Sopenharmony_ci/* copy-propagate: */ 1847bf215546Sopenharmony_cibool ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); 1848bf215546Sopenharmony_ci 1849bf215546Sopenharmony_ci/* common subexpression elimination: */ 1850bf215546Sopenharmony_cibool ir3_cse(struct ir3 *ir); 1851bf215546Sopenharmony_ci 1852bf215546Sopenharmony_ci/* Make arrays SSA */ 1853bf215546Sopenharmony_cibool ir3_array_to_ssa(struct ir3 *ir); 1854bf215546Sopenharmony_ci 1855bf215546Sopenharmony_ci/* scheduling: */ 1856bf215546Sopenharmony_cibool ir3_sched_add_deps(struct ir3 *ir); 1857bf215546Sopenharmony_ciint ir3_sched(struct ir3 *ir); 1858bf215546Sopenharmony_ci 1859bf215546Sopenharmony_cistruct ir3_context; 1860bf215546Sopenharmony_cibool ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v); 1861bf215546Sopenharmony_ci 1862bf215546Sopenharmony_ci/* register assignment: */ 1863bf215546Sopenharmony_ciint ir3_ra(struct ir3_shader_variant *v); 1864bf215546Sopenharmony_ci 1865bf215546Sopenharmony_ci/* lower subgroup ops: */ 1866bf215546Sopenharmony_cibool ir3_lower_subgroups(struct ir3 *ir); 1867bf215546Sopenharmony_ci 1868bf215546Sopenharmony_ci/* legalize: */ 1869bf215546Sopenharmony_cibool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary); 1870bf215546Sopenharmony_cibool ir3_legalize_relative(struct ir3 *ir); 1871bf215546Sopenharmony_ci 1872bf215546Sopenharmony_cistatic inline bool 1873bf215546Sopenharmony_ciir3_has_latency_to_hide(struct ir3 *ir) 1874bf215546Sopenharmony_ci{ 1875bf215546Sopenharmony_ci /* VS/GS/TCS/TESS co-exist with frag shader invocations, but we don't 1876bf215546Sopenharmony_ci * know the nature of the fragment shader. Just assume it will have 1877bf215546Sopenharmony_ci * latency to hide: 1878bf215546Sopenharmony_ci */ 1879bf215546Sopenharmony_ci if (ir->type != MESA_SHADER_FRAGMENT) 1880bf215546Sopenharmony_ci return true; 1881bf215546Sopenharmony_ci 1882bf215546Sopenharmony_ci foreach_block (block, &ir->block_list) { 1883bf215546Sopenharmony_ci foreach_instr (instr, &block->instr_list) { 1884bf215546Sopenharmony_ci if (is_tex_or_prefetch(instr)) 1885bf215546Sopenharmony_ci return true; 1886bf215546Sopenharmony_ci 1887bf215546Sopenharmony_ci if (is_load(instr)) { 1888bf215546Sopenharmony_ci switch (instr->opc) { 1889bf215546Sopenharmony_ci case OPC_LDLV: 1890bf215546Sopenharmony_ci case OPC_LDL: 1891bf215546Sopenharmony_ci case OPC_LDLW: 1892bf215546Sopenharmony_ci break; 1893bf215546Sopenharmony_ci default: 1894bf215546Sopenharmony_ci return true; 1895bf215546Sopenharmony_ci } 1896bf215546Sopenharmony_ci } 1897bf215546Sopenharmony_ci } 1898bf215546Sopenharmony_ci } 1899bf215546Sopenharmony_ci 1900bf215546Sopenharmony_ci return false; 1901bf215546Sopenharmony_ci} 1902bf215546Sopenharmony_ci 1903bf215546Sopenharmony_ci/* ************************************************************************* */ 1904bf215546Sopenharmony_ci/* instruction helpers */ 1905bf215546Sopenharmony_ci 1906bf215546Sopenharmony_ci/* creates SSA src of correct type (ie. half vs full precision) */ 1907bf215546Sopenharmony_cistatic inline struct ir3_register * 1908bf215546Sopenharmony_ci__ssa_src(struct ir3_instruction *instr, struct ir3_instruction *src, 1909bf215546Sopenharmony_ci unsigned flags) 1910bf215546Sopenharmony_ci{ 1911bf215546Sopenharmony_ci struct ir3_register *reg; 1912bf215546Sopenharmony_ci if (src->dsts[0]->flags & IR3_REG_HALF) 1913bf215546Sopenharmony_ci flags |= IR3_REG_HALF; 1914bf215546Sopenharmony_ci reg = ir3_src_create(instr, INVALID_REG, IR3_REG_SSA | flags); 1915bf215546Sopenharmony_ci reg->def = src->dsts[0]; 1916bf215546Sopenharmony_ci reg->wrmask = src->dsts[0]->wrmask; 1917bf215546Sopenharmony_ci return reg; 1918bf215546Sopenharmony_ci} 1919bf215546Sopenharmony_ci 1920bf215546Sopenharmony_cistatic inline struct ir3_register * 1921bf215546Sopenharmony_ci__ssa_dst(struct ir3_instruction *instr) 1922bf215546Sopenharmony_ci{ 1923bf215546Sopenharmony_ci struct ir3_register *reg = ir3_dst_create(instr, INVALID_REG, IR3_REG_SSA); 1924bf215546Sopenharmony_ci reg->instr = instr; 1925bf215546Sopenharmony_ci return reg; 1926bf215546Sopenharmony_ci} 1927bf215546Sopenharmony_ci 1928bf215546Sopenharmony_cistatic inline struct ir3_instruction * 1929bf215546Sopenharmony_cicreate_immed_typed(struct ir3_block *block, uint32_t val, type_t type) 1930bf215546Sopenharmony_ci{ 1931bf215546Sopenharmony_ci struct ir3_instruction *mov; 1932bf215546Sopenharmony_ci unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; 1933bf215546Sopenharmony_ci 1934bf215546Sopenharmony_ci mov = ir3_instr_create(block, OPC_MOV, 1, 1); 1935bf215546Sopenharmony_ci mov->cat1.src_type = type; 1936bf215546Sopenharmony_ci mov->cat1.dst_type = type; 1937bf215546Sopenharmony_ci __ssa_dst(mov)->flags |= flags; 1938bf215546Sopenharmony_ci ir3_src_create(mov, 0, IR3_REG_IMMED | flags)->uim_val = val; 1939bf215546Sopenharmony_ci 1940bf215546Sopenharmony_ci return mov; 1941bf215546Sopenharmony_ci} 1942bf215546Sopenharmony_ci 1943bf215546Sopenharmony_cistatic inline struct ir3_instruction * 1944bf215546Sopenharmony_cicreate_immed(struct ir3_block *block, uint32_t val) 1945bf215546Sopenharmony_ci{ 1946bf215546Sopenharmony_ci return create_immed_typed(block, val, TYPE_U32); 1947bf215546Sopenharmony_ci} 1948bf215546Sopenharmony_ci 1949bf215546Sopenharmony_cistatic inline struct ir3_instruction * 1950bf215546Sopenharmony_cicreate_uniform_typed(struct ir3_block *block, unsigned n, type_t type) 1951bf215546Sopenharmony_ci{ 1952bf215546Sopenharmony_ci struct ir3_instruction *mov; 1953bf215546Sopenharmony_ci unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_ci mov = ir3_instr_create(block, OPC_MOV, 1, 1); 1956bf215546Sopenharmony_ci mov->cat1.src_type = type; 1957bf215546Sopenharmony_ci mov->cat1.dst_type = type; 1958bf215546Sopenharmony_ci __ssa_dst(mov)->flags |= flags; 1959bf215546Sopenharmony_ci ir3_src_create(mov, n, IR3_REG_CONST | flags); 1960bf215546Sopenharmony_ci 1961bf215546Sopenharmony_ci return mov; 1962bf215546Sopenharmony_ci} 1963bf215546Sopenharmony_ci 1964bf215546Sopenharmony_cistatic inline struct ir3_instruction * 1965bf215546Sopenharmony_cicreate_uniform(struct ir3_block *block, unsigned n) 1966bf215546Sopenharmony_ci{ 1967bf215546Sopenharmony_ci return create_uniform_typed(block, n, TYPE_F32); 1968bf215546Sopenharmony_ci} 1969bf215546Sopenharmony_ci 1970bf215546Sopenharmony_cistatic inline struct ir3_instruction * 1971bf215546Sopenharmony_cicreate_uniform_indirect(struct ir3_block *block, int n, type_t type, 1972bf215546Sopenharmony_ci struct ir3_instruction *address) 1973bf215546Sopenharmony_ci{ 1974bf215546Sopenharmony_ci struct ir3_instruction *mov; 1975bf215546Sopenharmony_ci 1976bf215546Sopenharmony_ci mov = ir3_instr_create(block, OPC_MOV, 1, 1); 1977bf215546Sopenharmony_ci mov->cat1.src_type = type; 1978bf215546Sopenharmony_ci mov->cat1.dst_type = type; 1979bf215546Sopenharmony_ci __ssa_dst(mov); 1980bf215546Sopenharmony_ci ir3_src_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n; 1981bf215546Sopenharmony_ci 1982bf215546Sopenharmony_ci ir3_instr_set_address(mov, address); 1983bf215546Sopenharmony_ci 1984bf215546Sopenharmony_ci return mov; 1985bf215546Sopenharmony_ci} 1986bf215546Sopenharmony_ci 1987bf215546Sopenharmony_cistatic inline struct ir3_instruction * 1988bf215546Sopenharmony_ciir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) 1989bf215546Sopenharmony_ci{ 1990bf215546Sopenharmony_ci struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1); 1991bf215546Sopenharmony_ci unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci __ssa_dst(instr)->flags |= flags; 1994bf215546Sopenharmony_ci if (src->dsts[0]->flags & IR3_REG_ARRAY) { 1995bf215546Sopenharmony_ci struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY); 1996bf215546Sopenharmony_ci src_reg->array = src->dsts[0]->array; 1997bf215546Sopenharmony_ci } else { 1998bf215546Sopenharmony_ci __ssa_src(instr, src, src->dsts[0]->flags & IR3_REG_SHARED); 1999bf215546Sopenharmony_ci } 2000bf215546Sopenharmony_ci assert(!(src->dsts[0]->flags & IR3_REG_RELATIV)); 2001bf215546Sopenharmony_ci instr->cat1.src_type = type; 2002bf215546Sopenharmony_ci instr->cat1.dst_type = type; 2003bf215546Sopenharmony_ci return instr; 2004bf215546Sopenharmony_ci} 2005bf215546Sopenharmony_ci 2006bf215546Sopenharmony_cistatic inline struct ir3_instruction * 2007bf215546Sopenharmony_ciir3_COV(struct ir3_block *block, struct ir3_instruction *src, type_t src_type, 2008bf215546Sopenharmony_ci type_t dst_type) 2009bf215546Sopenharmony_ci{ 2010bf215546Sopenharmony_ci struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1); 2011bf215546Sopenharmony_ci unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0; 2012bf215546Sopenharmony_ci unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0; 2013bf215546Sopenharmony_ci 2014bf215546Sopenharmony_ci assert((src->dsts[0]->flags & IR3_REG_HALF) == src_flags); 2015bf215546Sopenharmony_ci 2016bf215546Sopenharmony_ci __ssa_dst(instr)->flags |= dst_flags; 2017bf215546Sopenharmony_ci __ssa_src(instr, src, 0); 2018bf215546Sopenharmony_ci instr->cat1.src_type = src_type; 2019bf215546Sopenharmony_ci instr->cat1.dst_type = dst_type; 2020bf215546Sopenharmony_ci assert(!(src->dsts[0]->flags & IR3_REG_ARRAY)); 2021bf215546Sopenharmony_ci return instr; 2022bf215546Sopenharmony_ci} 2023bf215546Sopenharmony_ci 2024bf215546Sopenharmony_cistatic inline struct ir3_instruction * 2025bf215546Sopenharmony_ciir3_MOVMSK(struct ir3_block *block, unsigned components) 2026bf215546Sopenharmony_ci{ 2027bf215546Sopenharmony_ci struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOVMSK, 1, 0); 2028bf215546Sopenharmony_ci 2029bf215546Sopenharmony_ci struct ir3_register *dst = __ssa_dst(instr); 2030bf215546Sopenharmony_ci dst->flags |= IR3_REG_SHARED; 2031bf215546Sopenharmony_ci dst->wrmask = (1 << components) - 1; 2032bf215546Sopenharmony_ci instr->repeat = components - 1; 2033bf215546Sopenharmony_ci return instr; 2034bf215546Sopenharmony_ci} 2035bf215546Sopenharmony_ci 2036bf215546Sopenharmony_cistatic inline struct ir3_instruction * 2037bf215546Sopenharmony_ciir3_BALLOT_MACRO(struct ir3_block *block, struct ir3_instruction *src, 2038bf215546Sopenharmony_ci unsigned components) 2039bf215546Sopenharmony_ci{ 2040bf215546Sopenharmony_ci struct ir3_instruction *instr = 2041bf215546Sopenharmony_ci ir3_instr_create(block, OPC_BALLOT_MACRO, 1, 1); 2042bf215546Sopenharmony_ci 2043bf215546Sopenharmony_ci struct ir3_register *dst = __ssa_dst(instr); 2044bf215546Sopenharmony_ci dst->flags |= IR3_REG_SHARED; 2045bf215546Sopenharmony_ci dst->wrmask = (1 << components) - 1; 2046bf215546Sopenharmony_ci 2047bf215546Sopenharmony_ci __ssa_src(instr, src, 0); 2048bf215546Sopenharmony_ci 2049bf215546Sopenharmony_ci return instr; 2050bf215546Sopenharmony_ci} 2051bf215546Sopenharmony_ci 2052bf215546Sopenharmony_cistatic inline struct ir3_instruction * 2053bf215546Sopenharmony_ciir3_NOP(struct ir3_block *block) 2054bf215546Sopenharmony_ci{ 2055bf215546Sopenharmony_ci return ir3_instr_create(block, OPC_NOP, 0, 0); 2056bf215546Sopenharmony_ci} 2057bf215546Sopenharmony_ci 2058bf215546Sopenharmony_ci/* clang-format off */ 2059bf215546Sopenharmony_ci#define __INSTR0(flag, name, opc) \ 2060bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name(struct ir3_block *block) \ 2061bf215546Sopenharmony_ci{ \ 2062bf215546Sopenharmony_ci struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 0); \ 2063bf215546Sopenharmony_ci instr->flags |= flag; \ 2064bf215546Sopenharmony_ci return instr; \ 2065bf215546Sopenharmony_ci} 2066bf215546Sopenharmony_ci/* clang-format on */ 2067bf215546Sopenharmony_ci#define INSTR0F(f, name) __INSTR0(IR3_INSTR_##f, name##_##f, OPC_##name) 2068bf215546Sopenharmony_ci#define INSTR0(name) __INSTR0(0, name, OPC_##name) 2069bf215546Sopenharmony_ci 2070bf215546Sopenharmony_ci/* clang-format off */ 2071bf215546Sopenharmony_ci#define __INSTR1(flag, dst_count, name, opc) \ 2072bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name( \ 2073bf215546Sopenharmony_ci struct ir3_block *block, struct ir3_instruction *a, unsigned aflags) \ 2074bf215546Sopenharmony_ci{ \ 2075bf215546Sopenharmony_ci struct ir3_instruction *instr = \ 2076bf215546Sopenharmony_ci ir3_instr_create(block, opc, dst_count, 1); \ 2077bf215546Sopenharmony_ci for (unsigned i = 0; i < dst_count; i++) \ 2078bf215546Sopenharmony_ci __ssa_dst(instr); \ 2079bf215546Sopenharmony_ci __ssa_src(instr, a, aflags); \ 2080bf215546Sopenharmony_ci instr->flags |= flag; \ 2081bf215546Sopenharmony_ci return instr; \ 2082bf215546Sopenharmony_ci} 2083bf215546Sopenharmony_ci/* clang-format on */ 2084bf215546Sopenharmony_ci#define INSTR1F(f, name) __INSTR1(IR3_INSTR_##f, 1, name##_##f, OPC_##name) 2085bf215546Sopenharmony_ci#define INSTR1(name) __INSTR1(0, 1, name, OPC_##name) 2086bf215546Sopenharmony_ci#define INSTR1NODST(name) __INSTR1(0, 0, name, OPC_##name) 2087bf215546Sopenharmony_ci 2088bf215546Sopenharmony_ci/* clang-format off */ 2089bf215546Sopenharmony_ci#define __INSTR2(flag, dst_count, name, opc) \ 2090bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name( \ 2091bf215546Sopenharmony_ci struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 2092bf215546Sopenharmony_ci struct ir3_instruction *b, unsigned bflags) \ 2093bf215546Sopenharmony_ci{ \ 2094bf215546Sopenharmony_ci struct ir3_instruction *instr = ir3_instr_create(block, opc, dst_count, 2); \ 2095bf215546Sopenharmony_ci for (unsigned i = 0; i < dst_count; i++) \ 2096bf215546Sopenharmony_ci __ssa_dst(instr); \ 2097bf215546Sopenharmony_ci __ssa_src(instr, a, aflags); \ 2098bf215546Sopenharmony_ci __ssa_src(instr, b, bflags); \ 2099bf215546Sopenharmony_ci instr->flags |= flag; \ 2100bf215546Sopenharmony_ci return instr; \ 2101bf215546Sopenharmony_ci} 2102bf215546Sopenharmony_ci/* clang-format on */ 2103bf215546Sopenharmony_ci#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, 1, name##_##f, OPC_##name) 2104bf215546Sopenharmony_ci#define INSTR2(name) __INSTR2(0, 1, name, OPC_##name) 2105bf215546Sopenharmony_ci#define INSTR2NODST(name) __INSTR2(0, 0, name, OPC_##name) 2106bf215546Sopenharmony_ci 2107bf215546Sopenharmony_ci/* clang-format off */ 2108bf215546Sopenharmony_ci#define __INSTR3(flag, dst_count, name, opc) \ 2109bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name( \ 2110bf215546Sopenharmony_ci struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 2111bf215546Sopenharmony_ci struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \ 2112bf215546Sopenharmony_ci unsigned cflags) \ 2113bf215546Sopenharmony_ci{ \ 2114bf215546Sopenharmony_ci struct ir3_instruction *instr = \ 2115bf215546Sopenharmony_ci ir3_instr_create(block, opc, dst_count, 3); \ 2116bf215546Sopenharmony_ci for (unsigned i = 0; i < dst_count; i++) \ 2117bf215546Sopenharmony_ci __ssa_dst(instr); \ 2118bf215546Sopenharmony_ci __ssa_src(instr, a, aflags); \ 2119bf215546Sopenharmony_ci __ssa_src(instr, b, bflags); \ 2120bf215546Sopenharmony_ci __ssa_src(instr, c, cflags); \ 2121bf215546Sopenharmony_ci instr->flags |= flag; \ 2122bf215546Sopenharmony_ci return instr; \ 2123bf215546Sopenharmony_ci} 2124bf215546Sopenharmony_ci/* clang-format on */ 2125bf215546Sopenharmony_ci#define INSTR3F(f, name) __INSTR3(IR3_INSTR_##f, 1, name##_##f, OPC_##name) 2126bf215546Sopenharmony_ci#define INSTR3(name) __INSTR3(0, 1, name, OPC_##name) 2127bf215546Sopenharmony_ci#define INSTR3NODST(name) __INSTR3(0, 0, name, OPC_##name) 2128bf215546Sopenharmony_ci 2129bf215546Sopenharmony_ci/* clang-format off */ 2130bf215546Sopenharmony_ci#define __INSTR4(flag, dst_count, name, opc) \ 2131bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name( \ 2132bf215546Sopenharmony_ci struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 2133bf215546Sopenharmony_ci struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \ 2134bf215546Sopenharmony_ci unsigned cflags, struct ir3_instruction *d, unsigned dflags) \ 2135bf215546Sopenharmony_ci{ \ 2136bf215546Sopenharmony_ci struct ir3_instruction *instr = \ 2137bf215546Sopenharmony_ci ir3_instr_create(block, opc, dst_count, 4); \ 2138bf215546Sopenharmony_ci for (unsigned i = 0; i < dst_count; i++) \ 2139bf215546Sopenharmony_ci __ssa_dst(instr); \ 2140bf215546Sopenharmony_ci __ssa_src(instr, a, aflags); \ 2141bf215546Sopenharmony_ci __ssa_src(instr, b, bflags); \ 2142bf215546Sopenharmony_ci __ssa_src(instr, c, cflags); \ 2143bf215546Sopenharmony_ci __ssa_src(instr, d, dflags); \ 2144bf215546Sopenharmony_ci instr->flags |= flag; \ 2145bf215546Sopenharmony_ci return instr; \ 2146bf215546Sopenharmony_ci} 2147bf215546Sopenharmony_ci/* clang-format on */ 2148bf215546Sopenharmony_ci#define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, 1, name##_##f, OPC_##name) 2149bf215546Sopenharmony_ci#define INSTR4(name) __INSTR4(0, 1, name, OPC_##name) 2150bf215546Sopenharmony_ci#define INSTR4NODST(name) __INSTR4(0, 0, name, OPC_##name) 2151bf215546Sopenharmony_ci 2152bf215546Sopenharmony_ci/* clang-format off */ 2153bf215546Sopenharmony_ci#define __INSTR5(flag, name, opc) \ 2154bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name( \ 2155bf215546Sopenharmony_ci struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 2156bf215546Sopenharmony_ci struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \ 2157bf215546Sopenharmony_ci unsigned cflags, struct ir3_instruction *d, unsigned dflags, \ 2158bf215546Sopenharmony_ci struct ir3_instruction *e, unsigned eflags) \ 2159bf215546Sopenharmony_ci{ \ 2160bf215546Sopenharmony_ci struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 5); \ 2161bf215546Sopenharmony_ci __ssa_dst(instr); \ 2162bf215546Sopenharmony_ci __ssa_src(instr, a, aflags); \ 2163bf215546Sopenharmony_ci __ssa_src(instr, b, bflags); \ 2164bf215546Sopenharmony_ci __ssa_src(instr, c, cflags); \ 2165bf215546Sopenharmony_ci __ssa_src(instr, d, dflags); \ 2166bf215546Sopenharmony_ci __ssa_src(instr, e, eflags); \ 2167bf215546Sopenharmony_ci instr->flags |= flag; \ 2168bf215546Sopenharmony_ci return instr; \ 2169bf215546Sopenharmony_ci} 2170bf215546Sopenharmony_ci/* clang-format on */ 2171bf215546Sopenharmony_ci#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name) 2172bf215546Sopenharmony_ci#define INSTR5(name) __INSTR5(0, name, OPC_##name) 2173bf215546Sopenharmony_ci 2174bf215546Sopenharmony_ci/* clang-format off */ 2175bf215546Sopenharmony_ci#define __INSTR6(flag, dst_count, name, opc) \ 2176bf215546Sopenharmony_cistatic inline struct ir3_instruction *ir3_##name( \ 2177bf215546Sopenharmony_ci struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \ 2178bf215546Sopenharmony_ci struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \ 2179bf215546Sopenharmony_ci unsigned cflags, struct ir3_instruction *d, unsigned dflags, \ 2180bf215546Sopenharmony_ci struct ir3_instruction *e, unsigned eflags, struct ir3_instruction *f, \ 2181bf215546Sopenharmony_ci unsigned fflags) \ 2182bf215546Sopenharmony_ci{ \ 2183bf215546Sopenharmony_ci struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 6); \ 2184bf215546Sopenharmony_ci for (unsigned i = 0; i < dst_count; i++) \ 2185bf215546Sopenharmony_ci __ssa_dst(instr); \ 2186bf215546Sopenharmony_ci __ssa_src(instr, a, aflags); \ 2187bf215546Sopenharmony_ci __ssa_src(instr, b, bflags); \ 2188bf215546Sopenharmony_ci __ssa_src(instr, c, cflags); \ 2189bf215546Sopenharmony_ci __ssa_src(instr, d, dflags); \ 2190bf215546Sopenharmony_ci __ssa_src(instr, e, eflags); \ 2191bf215546Sopenharmony_ci __ssa_src(instr, f, fflags); \ 2192bf215546Sopenharmony_ci instr->flags |= flag; \ 2193bf215546Sopenharmony_ci return instr; \ 2194bf215546Sopenharmony_ci} 2195bf215546Sopenharmony_ci/* clang-format on */ 2196bf215546Sopenharmony_ci#define INSTR6F(f, name) __INSTR6(IR3_INSTR_##f, 1, name##_##f, OPC_##name) 2197bf215546Sopenharmony_ci#define INSTR6(name) __INSTR6(0, 1, name, OPC_##name) 2198bf215546Sopenharmony_ci#define INSTR6NODST(name) __INSTR6(0, 0, name, OPC_##name) 2199bf215546Sopenharmony_ci 2200bf215546Sopenharmony_ci/* cat0 instructions: */ 2201bf215546Sopenharmony_ciINSTR1NODST(B) 2202bf215546Sopenharmony_ciINSTR0(JUMP) 2203bf215546Sopenharmony_ciINSTR1NODST(KILL) 2204bf215546Sopenharmony_ciINSTR1NODST(DEMOTE) 2205bf215546Sopenharmony_ciINSTR0(END) 2206bf215546Sopenharmony_ciINSTR0(CHSH) 2207bf215546Sopenharmony_ciINSTR0(CHMASK) 2208bf215546Sopenharmony_ciINSTR1NODST(PREDT) 2209bf215546Sopenharmony_ciINSTR0(PREDF) 2210bf215546Sopenharmony_ciINSTR0(PREDE) 2211bf215546Sopenharmony_ciINSTR0(GETONE) 2212bf215546Sopenharmony_ciINSTR0(SHPS) 2213bf215546Sopenharmony_ciINSTR0(SHPE) 2214bf215546Sopenharmony_ci 2215bf215546Sopenharmony_ci/* cat1 macros */ 2216bf215546Sopenharmony_ciINSTR1(ANY_MACRO) 2217bf215546Sopenharmony_ciINSTR1(ALL_MACRO) 2218bf215546Sopenharmony_ciINSTR1(READ_FIRST_MACRO) 2219bf215546Sopenharmony_ciINSTR2(READ_COND_MACRO) 2220bf215546Sopenharmony_ci 2221bf215546Sopenharmony_cistatic inline struct ir3_instruction * 2222bf215546Sopenharmony_ciir3_ELECT_MACRO(struct ir3_block *block) 2223bf215546Sopenharmony_ci{ 2224bf215546Sopenharmony_ci struct ir3_instruction *instr = 2225bf215546Sopenharmony_ci ir3_instr_create(block, OPC_ELECT_MACRO, 1, 0); 2226bf215546Sopenharmony_ci __ssa_dst(instr); 2227bf215546Sopenharmony_ci return instr; 2228bf215546Sopenharmony_ci} 2229bf215546Sopenharmony_ci 2230bf215546Sopenharmony_cistatic inline struct ir3_instruction * 2231bf215546Sopenharmony_ciir3_SHPS_MACRO(struct ir3_block *block) 2232bf215546Sopenharmony_ci{ 2233bf215546Sopenharmony_ci struct ir3_instruction *instr = 2234bf215546Sopenharmony_ci ir3_instr_create(block, OPC_SHPS_MACRO, 1, 0); 2235bf215546Sopenharmony_ci __ssa_dst(instr); 2236bf215546Sopenharmony_ci return instr; 2237bf215546Sopenharmony_ci} 2238bf215546Sopenharmony_ci 2239bf215546Sopenharmony_ci/* cat2 instructions, most 2 src but some 1 src: */ 2240bf215546Sopenharmony_ciINSTR2(ADD_F) 2241bf215546Sopenharmony_ciINSTR2(MIN_F) 2242bf215546Sopenharmony_ciINSTR2(MAX_F) 2243bf215546Sopenharmony_ciINSTR2(MUL_F) 2244bf215546Sopenharmony_ciINSTR1(SIGN_F) 2245bf215546Sopenharmony_ciINSTR2(CMPS_F) 2246bf215546Sopenharmony_ciINSTR1(ABSNEG_F) 2247bf215546Sopenharmony_ciINSTR2(CMPV_F) 2248bf215546Sopenharmony_ciINSTR1(FLOOR_F) 2249bf215546Sopenharmony_ciINSTR1(CEIL_F) 2250bf215546Sopenharmony_ciINSTR1(RNDNE_F) 2251bf215546Sopenharmony_ciINSTR1(RNDAZ_F) 2252bf215546Sopenharmony_ciINSTR1(TRUNC_F) 2253bf215546Sopenharmony_ciINSTR2(ADD_U) 2254bf215546Sopenharmony_ciINSTR2(ADD_S) 2255bf215546Sopenharmony_ciINSTR2(SUB_U) 2256bf215546Sopenharmony_ciINSTR2(SUB_S) 2257bf215546Sopenharmony_ciINSTR2(CMPS_U) 2258bf215546Sopenharmony_ciINSTR2(CMPS_S) 2259bf215546Sopenharmony_ciINSTR2(MIN_U) 2260bf215546Sopenharmony_ciINSTR2(MIN_S) 2261bf215546Sopenharmony_ciINSTR2(MAX_U) 2262bf215546Sopenharmony_ciINSTR2(MAX_S) 2263bf215546Sopenharmony_ciINSTR1(ABSNEG_S) 2264bf215546Sopenharmony_ciINSTR2(AND_B) 2265bf215546Sopenharmony_ciINSTR2(OR_B) 2266bf215546Sopenharmony_ciINSTR1(NOT_B) 2267bf215546Sopenharmony_ciINSTR2(XOR_B) 2268bf215546Sopenharmony_ciINSTR2(CMPV_U) 2269bf215546Sopenharmony_ciINSTR2(CMPV_S) 2270bf215546Sopenharmony_ciINSTR2(MUL_U24) 2271bf215546Sopenharmony_ciINSTR2(MUL_S24) 2272bf215546Sopenharmony_ciINSTR2(MULL_U) 2273bf215546Sopenharmony_ciINSTR1(BFREV_B) 2274bf215546Sopenharmony_ciINSTR1(CLZ_S) 2275bf215546Sopenharmony_ciINSTR1(CLZ_B) 2276bf215546Sopenharmony_ciINSTR2(SHL_B) 2277bf215546Sopenharmony_ciINSTR2(SHR_B) 2278bf215546Sopenharmony_ciINSTR2(ASHR_B) 2279bf215546Sopenharmony_ciINSTR2(BARY_F) 2280bf215546Sopenharmony_ciINSTR2(FLAT_B) 2281bf215546Sopenharmony_ciINSTR2(MGEN_B) 2282bf215546Sopenharmony_ciINSTR2(GETBIT_B) 2283bf215546Sopenharmony_ciINSTR1(SETRM) 2284bf215546Sopenharmony_ciINSTR1(CBITS_B) 2285bf215546Sopenharmony_ciINSTR2(SHB) 2286bf215546Sopenharmony_ciINSTR2(MSAD) 2287bf215546Sopenharmony_ci 2288bf215546Sopenharmony_ci/* cat3 instructions: */ 2289bf215546Sopenharmony_ciINSTR3(MAD_U16) 2290bf215546Sopenharmony_ciINSTR3(MADSH_U16) 2291bf215546Sopenharmony_ciINSTR3(MAD_S16) 2292bf215546Sopenharmony_ciINSTR3(MADSH_M16) 2293bf215546Sopenharmony_ciINSTR3(MAD_U24) 2294bf215546Sopenharmony_ciINSTR3(MAD_S24) 2295bf215546Sopenharmony_ciINSTR3(MAD_F16) 2296bf215546Sopenharmony_ciINSTR3(MAD_F32) 2297bf215546Sopenharmony_ciINSTR3(DP2ACC) 2298bf215546Sopenharmony_ciINSTR3(DP4ACC) 2299bf215546Sopenharmony_ci/* NOTE: SEL_B32 checks for zero vs nonzero */ 2300bf215546Sopenharmony_ciINSTR3(SEL_B16) 2301bf215546Sopenharmony_ciINSTR3(SEL_B32) 2302bf215546Sopenharmony_ciINSTR3(SEL_S16) 2303bf215546Sopenharmony_ciINSTR3(SEL_S32) 2304bf215546Sopenharmony_ciINSTR3(SEL_F16) 2305bf215546Sopenharmony_ciINSTR3(SEL_F32) 2306bf215546Sopenharmony_ciINSTR3(SAD_S16) 2307bf215546Sopenharmony_ciINSTR3(SAD_S32) 2308bf215546Sopenharmony_ci 2309bf215546Sopenharmony_ci/* cat4 instructions: */ 2310bf215546Sopenharmony_ciINSTR1(RCP) 2311bf215546Sopenharmony_ciINSTR1(RSQ) 2312bf215546Sopenharmony_ciINSTR1(HRSQ) 2313bf215546Sopenharmony_ciINSTR1(LOG2) 2314bf215546Sopenharmony_ciINSTR1(HLOG2) 2315bf215546Sopenharmony_ciINSTR1(EXP2) 2316bf215546Sopenharmony_ciINSTR1(HEXP2) 2317bf215546Sopenharmony_ciINSTR1(SIN) 2318bf215546Sopenharmony_ciINSTR1(COS) 2319bf215546Sopenharmony_ciINSTR1(SQRT) 2320bf215546Sopenharmony_ci 2321bf215546Sopenharmony_ci/* cat5 instructions: */ 2322bf215546Sopenharmony_ciINSTR1(DSX) 2323bf215546Sopenharmony_ciINSTR1(DSXPP_MACRO) 2324bf215546Sopenharmony_ciINSTR1(DSY) 2325bf215546Sopenharmony_ciINSTR1(DSYPP_MACRO) 2326bf215546Sopenharmony_ciINSTR1F(3D, DSX) 2327bf215546Sopenharmony_ciINSTR1F(3D, DSY) 2328bf215546Sopenharmony_ciINSTR1(RGETPOS) 2329bf215546Sopenharmony_ci 2330bf215546Sopenharmony_cistatic inline struct ir3_instruction * 2331bf215546Sopenharmony_ciir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask, 2332bf215546Sopenharmony_ci unsigned flags, struct ir3_instruction *samp_tex, 2333bf215546Sopenharmony_ci struct ir3_instruction *src0, struct ir3_instruction *src1) 2334bf215546Sopenharmony_ci{ 2335bf215546Sopenharmony_ci struct ir3_instruction *sam; 2336bf215546Sopenharmony_ci unsigned nreg = 0; 2337bf215546Sopenharmony_ci 2338bf215546Sopenharmony_ci if (flags & IR3_INSTR_S2EN) { 2339bf215546Sopenharmony_ci nreg++; 2340bf215546Sopenharmony_ci } 2341bf215546Sopenharmony_ci if (src0) { 2342bf215546Sopenharmony_ci nreg++; 2343bf215546Sopenharmony_ci } 2344bf215546Sopenharmony_ci if (src1) { 2345bf215546Sopenharmony_ci nreg++; 2346bf215546Sopenharmony_ci } 2347bf215546Sopenharmony_ci 2348bf215546Sopenharmony_ci sam = ir3_instr_create(block, opc, 1, nreg); 2349bf215546Sopenharmony_ci sam->flags |= flags; 2350bf215546Sopenharmony_ci __ssa_dst(sam)->wrmask = wrmask; 2351bf215546Sopenharmony_ci if (flags & IR3_INSTR_S2EN) { 2352bf215546Sopenharmony_ci __ssa_src(sam, samp_tex, (flags & IR3_INSTR_B) ? 0 : IR3_REG_HALF); 2353bf215546Sopenharmony_ci } 2354bf215546Sopenharmony_ci if (src0) { 2355bf215546Sopenharmony_ci __ssa_src(sam, src0, 0); 2356bf215546Sopenharmony_ci } 2357bf215546Sopenharmony_ci if (src1) { 2358bf215546Sopenharmony_ci __ssa_src(sam, src1, 0); 2359bf215546Sopenharmony_ci } 2360bf215546Sopenharmony_ci sam->cat5.type = type; 2361bf215546Sopenharmony_ci 2362bf215546Sopenharmony_ci return sam; 2363bf215546Sopenharmony_ci} 2364bf215546Sopenharmony_ci 2365bf215546Sopenharmony_ci/* cat6 instructions: */ 2366bf215546Sopenharmony_ciINSTR0(GETFIBERID) 2367bf215546Sopenharmony_ciINSTR2(LDLV) 2368bf215546Sopenharmony_ciINSTR3(LDG) 2369bf215546Sopenharmony_ciINSTR3(LDL) 2370bf215546Sopenharmony_ciINSTR3(LDLW) 2371bf215546Sopenharmony_ciINSTR3(LDP) 2372bf215546Sopenharmony_ciINSTR4NODST(STG) 2373bf215546Sopenharmony_ciINSTR3NODST(STL) 2374bf215546Sopenharmony_ciINSTR3NODST(STLW) 2375bf215546Sopenharmony_ciINSTR3NODST(STP) 2376bf215546Sopenharmony_ciINSTR1(RESINFO) 2377bf215546Sopenharmony_ciINSTR1(RESFMT) 2378bf215546Sopenharmony_ciINSTR2(ATOMIC_ADD) 2379bf215546Sopenharmony_ciINSTR2(ATOMIC_SUB) 2380bf215546Sopenharmony_ciINSTR2(ATOMIC_XCHG) 2381bf215546Sopenharmony_ciINSTR2(ATOMIC_INC) 2382bf215546Sopenharmony_ciINSTR2(ATOMIC_DEC) 2383bf215546Sopenharmony_ciINSTR2(ATOMIC_CMPXCHG) 2384bf215546Sopenharmony_ciINSTR2(ATOMIC_MIN) 2385bf215546Sopenharmony_ciINSTR2(ATOMIC_MAX) 2386bf215546Sopenharmony_ciINSTR2(ATOMIC_AND) 2387bf215546Sopenharmony_ciINSTR2(ATOMIC_OR) 2388bf215546Sopenharmony_ciINSTR2(ATOMIC_XOR) 2389bf215546Sopenharmony_ciINSTR2(LDC) 2390bf215546Sopenharmony_ciINSTR2(QUAD_SHUFFLE_BRCST) 2391bf215546Sopenharmony_ciINSTR1(QUAD_SHUFFLE_HORIZ) 2392bf215546Sopenharmony_ciINSTR1(QUAD_SHUFFLE_VERT) 2393bf215546Sopenharmony_ciINSTR1(QUAD_SHUFFLE_DIAG) 2394bf215546Sopenharmony_ciINSTR2NODST(LDC_K) 2395bf215546Sopenharmony_ciINSTR2NODST(STC) 2396bf215546Sopenharmony_ci#if GPU >= 600 2397bf215546Sopenharmony_ciINSTR3NODST(STIB); 2398bf215546Sopenharmony_ciINSTR2(LDIB); 2399bf215546Sopenharmony_ciINSTR5(LDG_A); 2400bf215546Sopenharmony_ciINSTR6NODST(STG_A); 2401bf215546Sopenharmony_ciINSTR2(ATOMIC_G_ADD) 2402bf215546Sopenharmony_ciINSTR2(ATOMIC_G_SUB) 2403bf215546Sopenharmony_ciINSTR2(ATOMIC_G_XCHG) 2404bf215546Sopenharmony_ciINSTR2(ATOMIC_G_INC) 2405bf215546Sopenharmony_ciINSTR2(ATOMIC_G_DEC) 2406bf215546Sopenharmony_ciINSTR2(ATOMIC_G_CMPXCHG) 2407bf215546Sopenharmony_ciINSTR2(ATOMIC_G_MIN) 2408bf215546Sopenharmony_ciINSTR2(ATOMIC_G_MAX) 2409bf215546Sopenharmony_ciINSTR2(ATOMIC_G_AND) 2410bf215546Sopenharmony_ciINSTR2(ATOMIC_G_OR) 2411bf215546Sopenharmony_ciINSTR2(ATOMIC_G_XOR) 2412bf215546Sopenharmony_ciINSTR3(ATOMIC_B_ADD) 2413bf215546Sopenharmony_ciINSTR3(ATOMIC_B_SUB) 2414bf215546Sopenharmony_ciINSTR3(ATOMIC_B_XCHG) 2415bf215546Sopenharmony_ciINSTR3(ATOMIC_B_INC) 2416bf215546Sopenharmony_ciINSTR3(ATOMIC_B_DEC) 2417bf215546Sopenharmony_ciINSTR3(ATOMIC_B_CMPXCHG) 2418bf215546Sopenharmony_ciINSTR3(ATOMIC_B_MIN) 2419bf215546Sopenharmony_ciINSTR3(ATOMIC_B_MAX) 2420bf215546Sopenharmony_ciINSTR3(ATOMIC_B_AND) 2421bf215546Sopenharmony_ciINSTR3(ATOMIC_B_OR) 2422bf215546Sopenharmony_ciINSTR3(ATOMIC_B_XOR) 2423bf215546Sopenharmony_ci#elif GPU >= 400 2424bf215546Sopenharmony_ciINSTR3(LDGB) 2425bf215546Sopenharmony_ci#if GPU >= 500 2426bf215546Sopenharmony_ciINSTR3(LDIB) 2427bf215546Sopenharmony_ci#endif 2428bf215546Sopenharmony_ciINSTR4NODST(STGB) 2429bf215546Sopenharmony_ciINSTR4NODST(STIB) 2430bf215546Sopenharmony_ciINSTR4(ATOMIC_S_ADD) 2431bf215546Sopenharmony_ciINSTR4(ATOMIC_S_SUB) 2432bf215546Sopenharmony_ciINSTR4(ATOMIC_S_XCHG) 2433bf215546Sopenharmony_ciINSTR4(ATOMIC_S_INC) 2434bf215546Sopenharmony_ciINSTR4(ATOMIC_S_DEC) 2435bf215546Sopenharmony_ciINSTR4(ATOMIC_S_CMPXCHG) 2436bf215546Sopenharmony_ciINSTR4(ATOMIC_S_MIN) 2437bf215546Sopenharmony_ciINSTR4(ATOMIC_S_MAX) 2438bf215546Sopenharmony_ciINSTR4(ATOMIC_S_AND) 2439bf215546Sopenharmony_ciINSTR4(ATOMIC_S_OR) 2440bf215546Sopenharmony_ciINSTR4(ATOMIC_S_XOR) 2441bf215546Sopenharmony_ci#endif 2442bf215546Sopenharmony_ci 2443bf215546Sopenharmony_ci/* cat7 instructions: */ 2444bf215546Sopenharmony_ciINSTR0(BAR) 2445bf215546Sopenharmony_ciINSTR0(FENCE) 2446bf215546Sopenharmony_ci 2447bf215546Sopenharmony_ci/* ************************************************************************* */ 2448bf215546Sopenharmony_ci#include "bitset.h" 2449bf215546Sopenharmony_ci 2450bf215546Sopenharmony_ci#define MAX_REG 256 2451bf215546Sopenharmony_ci 2452bf215546Sopenharmony_citypedef BITSET_DECLARE(regmaskstate_t, 2 * MAX_REG); 2453bf215546Sopenharmony_ci 2454bf215546Sopenharmony_citypedef struct { 2455bf215546Sopenharmony_ci bool mergedregs; 2456bf215546Sopenharmony_ci regmaskstate_t mask; 2457bf215546Sopenharmony_ci} regmask_t; 2458bf215546Sopenharmony_ci 2459bf215546Sopenharmony_cistatic inline bool 2460bf215546Sopenharmony_ci__regmask_get(regmask_t *regmask, bool half, unsigned n) 2461bf215546Sopenharmony_ci{ 2462bf215546Sopenharmony_ci if (regmask->mergedregs) { 2463bf215546Sopenharmony_ci /* a6xx+ case, with merged register file, we track things in terms 2464bf215546Sopenharmony_ci * of half-precision registers, with a full precisions register 2465bf215546Sopenharmony_ci * using two half-precision slots. 2466bf215546Sopenharmony_ci * 2467bf215546Sopenharmony_ci * Pretend that special regs (a0.x, a1.x, etc.) are full registers to 2468bf215546Sopenharmony_ci * avoid having them alias normal full regs. 2469bf215546Sopenharmony_ci */ 2470bf215546Sopenharmony_ci if (half && !is_reg_num_special(n)) { 2471bf215546Sopenharmony_ci return BITSET_TEST(regmask->mask, n); 2472bf215546Sopenharmony_ci } else { 2473bf215546Sopenharmony_ci n *= 2; 2474bf215546Sopenharmony_ci return BITSET_TEST(regmask->mask, n) || 2475bf215546Sopenharmony_ci BITSET_TEST(regmask->mask, n + 1); 2476bf215546Sopenharmony_ci } 2477bf215546Sopenharmony_ci } else { 2478bf215546Sopenharmony_ci /* pre a6xx case, with separate register file for half and full 2479bf215546Sopenharmony_ci * precision: 2480bf215546Sopenharmony_ci */ 2481bf215546Sopenharmony_ci if (half) 2482bf215546Sopenharmony_ci n += MAX_REG; 2483bf215546Sopenharmony_ci return BITSET_TEST(regmask->mask, n); 2484bf215546Sopenharmony_ci } 2485bf215546Sopenharmony_ci} 2486bf215546Sopenharmony_ci 2487bf215546Sopenharmony_cistatic inline void 2488bf215546Sopenharmony_ci__regmask_set(regmask_t *regmask, bool half, unsigned n) 2489bf215546Sopenharmony_ci{ 2490bf215546Sopenharmony_ci if (regmask->mergedregs) { 2491bf215546Sopenharmony_ci /* a6xx+ case, with merged register file, we track things in terms 2492bf215546Sopenharmony_ci * of half-precision registers, with a full precisions register 2493bf215546Sopenharmony_ci * using two half-precision slots: 2494bf215546Sopenharmony_ci */ 2495bf215546Sopenharmony_ci if (half && !is_reg_num_special(n)) { 2496bf215546Sopenharmony_ci BITSET_SET(regmask->mask, n); 2497bf215546Sopenharmony_ci } else { 2498bf215546Sopenharmony_ci n *= 2; 2499bf215546Sopenharmony_ci BITSET_SET(regmask->mask, n); 2500bf215546Sopenharmony_ci BITSET_SET(regmask->mask, n + 1); 2501bf215546Sopenharmony_ci } 2502bf215546Sopenharmony_ci } else { 2503bf215546Sopenharmony_ci /* pre a6xx case, with separate register file for half and full 2504bf215546Sopenharmony_ci * precision: 2505bf215546Sopenharmony_ci */ 2506bf215546Sopenharmony_ci if (half) 2507bf215546Sopenharmony_ci n += MAX_REG; 2508bf215546Sopenharmony_ci BITSET_SET(regmask->mask, n); 2509bf215546Sopenharmony_ci } 2510bf215546Sopenharmony_ci} 2511bf215546Sopenharmony_ci 2512bf215546Sopenharmony_cistatic inline void 2513bf215546Sopenharmony_ci__regmask_clear(regmask_t *regmask, bool half, unsigned n) 2514bf215546Sopenharmony_ci{ 2515bf215546Sopenharmony_ci if (regmask->mergedregs) { 2516bf215546Sopenharmony_ci /* a6xx+ case, with merged register file, we track things in terms 2517bf215546Sopenharmony_ci * of half-precision registers, with a full precisions register 2518bf215546Sopenharmony_ci * using two half-precision slots: 2519bf215546Sopenharmony_ci */ 2520bf215546Sopenharmony_ci if (half && !is_reg_num_special(n)) { 2521bf215546Sopenharmony_ci BITSET_CLEAR(regmask->mask, n); 2522bf215546Sopenharmony_ci } else { 2523bf215546Sopenharmony_ci n *= 2; 2524bf215546Sopenharmony_ci BITSET_CLEAR(regmask->mask, n); 2525bf215546Sopenharmony_ci BITSET_CLEAR(regmask->mask, n + 1); 2526bf215546Sopenharmony_ci } 2527bf215546Sopenharmony_ci } else { 2528bf215546Sopenharmony_ci /* pre a6xx case, with separate register file for half and full 2529bf215546Sopenharmony_ci * precision: 2530bf215546Sopenharmony_ci */ 2531bf215546Sopenharmony_ci if (half) 2532bf215546Sopenharmony_ci n += MAX_REG; 2533bf215546Sopenharmony_ci BITSET_CLEAR(regmask->mask, n); 2534bf215546Sopenharmony_ci } 2535bf215546Sopenharmony_ci} 2536bf215546Sopenharmony_ci 2537bf215546Sopenharmony_cistatic inline void 2538bf215546Sopenharmony_ciregmask_init(regmask_t *regmask, bool mergedregs) 2539bf215546Sopenharmony_ci{ 2540bf215546Sopenharmony_ci memset(®mask->mask, 0, sizeof(regmask->mask)); 2541bf215546Sopenharmony_ci regmask->mergedregs = mergedregs; 2542bf215546Sopenharmony_ci} 2543bf215546Sopenharmony_ci 2544bf215546Sopenharmony_cistatic inline void 2545bf215546Sopenharmony_ciregmask_or(regmask_t *dst, regmask_t *a, regmask_t *b) 2546bf215546Sopenharmony_ci{ 2547bf215546Sopenharmony_ci assert(dst->mergedregs == a->mergedregs); 2548bf215546Sopenharmony_ci assert(dst->mergedregs == b->mergedregs); 2549bf215546Sopenharmony_ci 2550bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++) 2551bf215546Sopenharmony_ci dst->mask[i] = a->mask[i] | b->mask[i]; 2552bf215546Sopenharmony_ci} 2553bf215546Sopenharmony_ci 2554bf215546Sopenharmony_cistatic inline void 2555bf215546Sopenharmony_ciregmask_or_shared(regmask_t *dst, regmask_t *a, regmask_t *b) 2556bf215546Sopenharmony_ci{ 2557bf215546Sopenharmony_ci regmaskstate_t shared_mask; 2558bf215546Sopenharmony_ci BITSET_ZERO(shared_mask); 2559bf215546Sopenharmony_ci 2560bf215546Sopenharmony_ci if (b->mergedregs) { 2561bf215546Sopenharmony_ci BITSET_SET_RANGE(shared_mask, 2 * 4 * 48, 2 * 4 * 56 - 1); 2562bf215546Sopenharmony_ci } else { 2563bf215546Sopenharmony_ci BITSET_SET_RANGE(shared_mask, 4 * 48, 4 * 56 - 1); 2564bf215546Sopenharmony_ci } 2565bf215546Sopenharmony_ci 2566bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++) 2567bf215546Sopenharmony_ci dst->mask[i] = a->mask[i] | (b->mask[i] & shared_mask[i]); 2568bf215546Sopenharmony_ci} 2569bf215546Sopenharmony_ci 2570bf215546Sopenharmony_cistatic inline void 2571bf215546Sopenharmony_ciregmask_set(regmask_t *regmask, struct ir3_register *reg) 2572bf215546Sopenharmony_ci{ 2573bf215546Sopenharmony_ci bool half = reg->flags & IR3_REG_HALF; 2574bf215546Sopenharmony_ci if (reg->flags & IR3_REG_RELATIV) { 2575bf215546Sopenharmony_ci for (unsigned i = 0; i < reg->size; i++) 2576bf215546Sopenharmony_ci __regmask_set(regmask, half, reg->array.base + i); 2577bf215546Sopenharmony_ci } else { 2578bf215546Sopenharmony_ci for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++) 2579bf215546Sopenharmony_ci if (mask & 1) 2580bf215546Sopenharmony_ci __regmask_set(regmask, half, n); 2581bf215546Sopenharmony_ci } 2582bf215546Sopenharmony_ci} 2583bf215546Sopenharmony_ci 2584bf215546Sopenharmony_cistatic inline void 2585bf215546Sopenharmony_ciregmask_clear(regmask_t *regmask, struct ir3_register *reg) 2586bf215546Sopenharmony_ci{ 2587bf215546Sopenharmony_ci bool half = reg->flags & IR3_REG_HALF; 2588bf215546Sopenharmony_ci if (reg->flags & IR3_REG_RELATIV) { 2589bf215546Sopenharmony_ci for (unsigned i = 0; i < reg->size; i++) 2590bf215546Sopenharmony_ci __regmask_clear(regmask, half, reg->array.base + i); 2591bf215546Sopenharmony_ci } else { 2592bf215546Sopenharmony_ci for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++) 2593bf215546Sopenharmony_ci if (mask & 1) 2594bf215546Sopenharmony_ci __regmask_clear(regmask, half, n); 2595bf215546Sopenharmony_ci } 2596bf215546Sopenharmony_ci} 2597bf215546Sopenharmony_ci 2598bf215546Sopenharmony_cistatic inline bool 2599bf215546Sopenharmony_ciregmask_get(regmask_t *regmask, struct ir3_register *reg) 2600bf215546Sopenharmony_ci{ 2601bf215546Sopenharmony_ci bool half = reg->flags & IR3_REG_HALF; 2602bf215546Sopenharmony_ci if (reg->flags & IR3_REG_RELATIV) { 2603bf215546Sopenharmony_ci for (unsigned i = 0; i < reg->size; i++) 2604bf215546Sopenharmony_ci if (__regmask_get(regmask, half, reg->array.base + i)) 2605bf215546Sopenharmony_ci return true; 2606bf215546Sopenharmony_ci } else { 2607bf215546Sopenharmony_ci for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++) 2608bf215546Sopenharmony_ci if (mask & 1) 2609bf215546Sopenharmony_ci if (__regmask_get(regmask, half, n)) 2610bf215546Sopenharmony_ci return true; 2611bf215546Sopenharmony_ci } 2612bf215546Sopenharmony_ci return false; 2613bf215546Sopenharmony_ci} 2614bf215546Sopenharmony_ci/* ************************************************************************* */ 2615bf215546Sopenharmony_ci 2616bf215546Sopenharmony_ci#endif /* IR3_H_ */ 2617