1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors (Collabora): 24bf215546Sopenharmony_ci * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#ifndef __BIFROST_COMPILER_H 28bf215546Sopenharmony_ci#define __BIFROST_COMPILER_H 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "bifrost.h" 31bf215546Sopenharmony_ci#include "bi_opcodes.h" 32bf215546Sopenharmony_ci#include "compiler/nir/nir.h" 33bf215546Sopenharmony_ci#include "panfrost/util/pan_ir.h" 34bf215546Sopenharmony_ci#include "util/u_math.h" 35bf215546Sopenharmony_ci#include "util/half_float.h" 36bf215546Sopenharmony_ci#include "util/u_worklist.h" 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#ifdef __cplusplus 39bf215546Sopenharmony_ciextern "C" { 40bf215546Sopenharmony_ci#endif 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci/* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly. 43bf215546Sopenharmony_ci * To express widen, use the correpsonding replicated form, i.e. H01 = identity 44bf215546Sopenharmony_ci * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also 45bf215546Sopenharmony_ci * use the replicated form (interpretation is governed by the opcode). For 46bf215546Sopenharmony_ci * 8-bit lanes with two channels, use replicated forms for replicated forms 47bf215546Sopenharmony_ci * (TODO: what about others?). For 8-bit lanes with four channels using 48bf215546Sopenharmony_ci * matching form (TODO: what about others?). 49bf215546Sopenharmony_ci */ 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_cienum bi_swizzle { 52bf215546Sopenharmony_ci /* 16-bit swizzle ordering deliberate for fast compute */ 53bf215546Sopenharmony_ci BI_SWIZZLE_H00 = 0, /* = B0101 */ 54bf215546Sopenharmony_ci BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */ 55bf215546Sopenharmony_ci BI_SWIZZLE_H10 = 2, /* = B2301 */ 56bf215546Sopenharmony_ci BI_SWIZZLE_H11 = 3, /* = B2323 */ 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci /* replication order should be maintained for fast compute */ 59bf215546Sopenharmony_ci BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */ 60bf215546Sopenharmony_ci BI_SWIZZLE_B1111 = 5, 61bf215546Sopenharmony_ci BI_SWIZZLE_B2222 = 6, 62bf215546Sopenharmony_ci BI_SWIZZLE_B3333 = 7, 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci /* totally special for explicit pattern matching */ 65bf215546Sopenharmony_ci BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */ 66bf215546Sopenharmony_ci BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */ 67bf215546Sopenharmony_ci BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */ 68bf215546Sopenharmony_ci BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */ 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci BI_SWIZZLE_B0022 = 12, /* for b02 lanes */ 71bf215546Sopenharmony_ci}; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci/* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant 74bf215546Sopenharmony_ci * folding and Valhall constant optimization. */ 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_cistatic inline uint32_t 77bf215546Sopenharmony_cibi_apply_swizzle(uint32_t value, enum bi_swizzle swz) 78bf215546Sopenharmony_ci{ 79bf215546Sopenharmony_ci const uint16_t *h = (const uint16_t *) &value; 80bf215546Sopenharmony_ci const uint8_t *b = (const uint8_t *) &value; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci#define H(h0, h1) (h[h0] | (h[h1] << 16)) 83bf215546Sopenharmony_ci#define B(b0, b1, b2, b3) (b[b0] | (b[b1] << 8) | (b[b2] << 16) | (b[b3] << 24)) 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci switch (swz) { 86bf215546Sopenharmony_ci case BI_SWIZZLE_H00: return H(0, 0); 87bf215546Sopenharmony_ci case BI_SWIZZLE_H01: return H(0, 1); 88bf215546Sopenharmony_ci case BI_SWIZZLE_H10: return H(1, 0); 89bf215546Sopenharmony_ci case BI_SWIZZLE_H11: return H(1, 1); 90bf215546Sopenharmony_ci case BI_SWIZZLE_B0000: return B(0, 0, 0, 0); 91bf215546Sopenharmony_ci case BI_SWIZZLE_B1111: return B(1, 1, 1, 1); 92bf215546Sopenharmony_ci case BI_SWIZZLE_B2222: return B(2, 2, 2, 2); 93bf215546Sopenharmony_ci case BI_SWIZZLE_B3333: return B(3, 3, 3, 3); 94bf215546Sopenharmony_ci case BI_SWIZZLE_B0011: return B(0, 0, 1, 1); 95bf215546Sopenharmony_ci case BI_SWIZZLE_B2233: return B(2, 2, 3, 3); 96bf215546Sopenharmony_ci case BI_SWIZZLE_B1032: return B(1, 0, 3, 2); 97bf215546Sopenharmony_ci case BI_SWIZZLE_B3210: return B(3, 2, 1, 0); 98bf215546Sopenharmony_ci case BI_SWIZZLE_B0022: return B(0, 0, 2, 2); 99bf215546Sopenharmony_ci } 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci#undef H 102bf215546Sopenharmony_ci#undef B 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci unreachable("Invalid swizzle"); 105bf215546Sopenharmony_ci} 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cienum bi_index_type { 108bf215546Sopenharmony_ci BI_INDEX_NULL = 0, 109bf215546Sopenharmony_ci BI_INDEX_NORMAL = 1, 110bf215546Sopenharmony_ci BI_INDEX_REGISTER = 2, 111bf215546Sopenharmony_ci BI_INDEX_CONSTANT = 3, 112bf215546Sopenharmony_ci BI_INDEX_PASS = 4, 113bf215546Sopenharmony_ci BI_INDEX_FAU = 5 114bf215546Sopenharmony_ci}; 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_citypedef struct { 117bf215546Sopenharmony_ci uint32_t value; 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci /* modifiers, should only be set if applicable for a given instruction. 120bf215546Sopenharmony_ci * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where 121bf215546Sopenharmony_ci * applicable, neg plays the role of not */ 122bf215546Sopenharmony_ci bool abs : 1; 123bf215546Sopenharmony_ci bool neg : 1; 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci /* The last use of a value, should be purged from the register cache. 126bf215546Sopenharmony_ci * Set by liveness analysis. */ 127bf215546Sopenharmony_ci bool discard : 1; 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci /* For a source, the swizzle. For a destination, acts a bit like a 130bf215546Sopenharmony_ci * write mask. Identity for the full 32-bit, H00 for only caring about 131bf215546Sopenharmony_ci * the lower half, other values unused. */ 132bf215546Sopenharmony_ci enum bi_swizzle swizzle : 4; 133bf215546Sopenharmony_ci uint32_t offset : 3; 134bf215546Sopenharmony_ci bool reg : 1; 135bf215546Sopenharmony_ci enum bi_index_type type : 3; 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci /* Must be zeroed so we can hash the whole 64-bits at a time */ 138bf215546Sopenharmony_ci unsigned padding : (32 - 14); 139bf215546Sopenharmony_ci} bi_index; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_cistatic inline bi_index 142bf215546Sopenharmony_cibi_get_index(unsigned value, bool is_reg, unsigned offset) 143bf215546Sopenharmony_ci{ 144bf215546Sopenharmony_ci return (bi_index) { 145bf215546Sopenharmony_ci .value = value, 146bf215546Sopenharmony_ci .swizzle = BI_SWIZZLE_H01, 147bf215546Sopenharmony_ci .offset = offset, 148bf215546Sopenharmony_ci .reg = is_reg, 149bf215546Sopenharmony_ci .type = BI_INDEX_NORMAL, 150bf215546Sopenharmony_ci }; 151bf215546Sopenharmony_ci} 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_cistatic inline bi_index 154bf215546Sopenharmony_cibi_register(unsigned reg) 155bf215546Sopenharmony_ci{ 156bf215546Sopenharmony_ci assert(reg < 64); 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci return (bi_index) { 159bf215546Sopenharmony_ci .value = reg, 160bf215546Sopenharmony_ci .swizzle = BI_SWIZZLE_H01, 161bf215546Sopenharmony_ci .type = BI_INDEX_REGISTER, 162bf215546Sopenharmony_ci }; 163bf215546Sopenharmony_ci} 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_cistatic inline bi_index 166bf215546Sopenharmony_cibi_imm_u32(uint32_t imm) 167bf215546Sopenharmony_ci{ 168bf215546Sopenharmony_ci return (bi_index) { 169bf215546Sopenharmony_ci .value = imm, 170bf215546Sopenharmony_ci .swizzle = BI_SWIZZLE_H01, 171bf215546Sopenharmony_ci .type = BI_INDEX_CONSTANT, 172bf215546Sopenharmony_ci }; 173bf215546Sopenharmony_ci} 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_cistatic inline bi_index 176bf215546Sopenharmony_cibi_imm_f32(float imm) 177bf215546Sopenharmony_ci{ 178bf215546Sopenharmony_ci return bi_imm_u32(fui(imm)); 179bf215546Sopenharmony_ci} 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_cistatic inline bi_index 182bf215546Sopenharmony_cibi_null() 183bf215546Sopenharmony_ci{ 184bf215546Sopenharmony_ci return (bi_index) { .type = BI_INDEX_NULL }; 185bf215546Sopenharmony_ci} 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_cistatic inline bi_index 188bf215546Sopenharmony_cibi_zero() 189bf215546Sopenharmony_ci{ 190bf215546Sopenharmony_ci return bi_imm_u32(0); 191bf215546Sopenharmony_ci} 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_cistatic inline bi_index 194bf215546Sopenharmony_cibi_passthrough(enum bifrost_packed_src value) 195bf215546Sopenharmony_ci{ 196bf215546Sopenharmony_ci return (bi_index) { 197bf215546Sopenharmony_ci .value = value, 198bf215546Sopenharmony_ci .swizzle = BI_SWIZZLE_H01, 199bf215546Sopenharmony_ci .type = BI_INDEX_PASS, 200bf215546Sopenharmony_ci }; 201bf215546Sopenharmony_ci} 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci/* Helps construct swizzles */ 204bf215546Sopenharmony_cistatic inline bi_index 205bf215546Sopenharmony_cibi_swz_16(bi_index idx, bool x, bool y) 206bf215546Sopenharmony_ci{ 207bf215546Sopenharmony_ci assert(idx.swizzle == BI_SWIZZLE_H01); 208bf215546Sopenharmony_ci idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_H00 | (x << 1) | y); 209bf215546Sopenharmony_ci return idx; 210bf215546Sopenharmony_ci} 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_cistatic inline bi_index 213bf215546Sopenharmony_cibi_half(bi_index idx, bool upper) 214bf215546Sopenharmony_ci{ 215bf215546Sopenharmony_ci return bi_swz_16(idx, upper, upper); 216bf215546Sopenharmony_ci} 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_cistatic inline bi_index 219bf215546Sopenharmony_cibi_byte(bi_index idx, unsigned lane) 220bf215546Sopenharmony_ci{ 221bf215546Sopenharmony_ci assert(idx.swizzle == BI_SWIZZLE_H01); 222bf215546Sopenharmony_ci assert(lane < 4); 223bf215546Sopenharmony_ci idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane); 224bf215546Sopenharmony_ci return idx; 225bf215546Sopenharmony_ci} 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_cistatic inline bi_index 228bf215546Sopenharmony_cibi_abs(bi_index idx) 229bf215546Sopenharmony_ci{ 230bf215546Sopenharmony_ci idx.abs = true; 231bf215546Sopenharmony_ci return idx; 232bf215546Sopenharmony_ci} 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_cistatic inline bi_index 235bf215546Sopenharmony_cibi_neg(bi_index idx) 236bf215546Sopenharmony_ci{ 237bf215546Sopenharmony_ci idx.neg ^= true; 238bf215546Sopenharmony_ci return idx; 239bf215546Sopenharmony_ci} 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_cistatic inline bi_index 242bf215546Sopenharmony_cibi_discard(bi_index idx) 243bf215546Sopenharmony_ci{ 244bf215546Sopenharmony_ci idx.discard = true; 245bf215546Sopenharmony_ci return idx; 246bf215546Sopenharmony_ci} 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci/* Additive identity in IEEE 754 arithmetic */ 249bf215546Sopenharmony_cistatic inline bi_index 250bf215546Sopenharmony_cibi_negzero() 251bf215546Sopenharmony_ci{ 252bf215546Sopenharmony_ci return bi_neg(bi_zero()); 253bf215546Sopenharmony_ci} 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci/* Replaces an index, preserving any modifiers */ 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_cistatic inline bi_index 258bf215546Sopenharmony_cibi_replace_index(bi_index old, bi_index replacement) 259bf215546Sopenharmony_ci{ 260bf215546Sopenharmony_ci replacement.abs = old.abs; 261bf215546Sopenharmony_ci replacement.neg = old.neg; 262bf215546Sopenharmony_ci replacement.swizzle = old.swizzle; 263bf215546Sopenharmony_ci replacement.discard = false; /* needs liveness analysis to set */ 264bf215546Sopenharmony_ci return replacement; 265bf215546Sopenharmony_ci} 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci/* Remove any modifiers. This has the property: 268bf215546Sopenharmony_ci * 269bf215546Sopenharmony_ci * replace_index(x, strip_index(x)) = x 270bf215546Sopenharmony_ci * 271bf215546Sopenharmony_ci * This ensures it is suitable to use when lowering sources to moves */ 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_cistatic inline bi_index 274bf215546Sopenharmony_cibi_strip_index(bi_index index) 275bf215546Sopenharmony_ci{ 276bf215546Sopenharmony_ci index.abs = index.neg = false; 277bf215546Sopenharmony_ci index.swizzle = BI_SWIZZLE_H01; 278bf215546Sopenharmony_ci return index; 279bf215546Sopenharmony_ci} 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci/* For bitwise instructions */ 282bf215546Sopenharmony_ci#define bi_not(x) bi_neg(x) 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_cistatic inline bi_index 285bf215546Sopenharmony_cibi_imm_u8(uint8_t imm) 286bf215546Sopenharmony_ci{ 287bf215546Sopenharmony_ci return bi_byte(bi_imm_u32(imm), 0); 288bf215546Sopenharmony_ci} 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_cistatic inline bi_index 291bf215546Sopenharmony_cibi_imm_u16(uint16_t imm) 292bf215546Sopenharmony_ci{ 293bf215546Sopenharmony_ci return bi_half(bi_imm_u32(imm), false); 294bf215546Sopenharmony_ci} 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_cistatic inline bi_index 297bf215546Sopenharmony_cibi_imm_uintN(uint32_t imm, unsigned sz) 298bf215546Sopenharmony_ci{ 299bf215546Sopenharmony_ci assert(sz == 8 || sz == 16 || sz == 32); 300bf215546Sopenharmony_ci return (sz == 8) ? bi_imm_u8(imm) : 301bf215546Sopenharmony_ci (sz == 16) ? bi_imm_u16(imm) : 302bf215546Sopenharmony_ci bi_imm_u32(imm); 303bf215546Sopenharmony_ci} 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_cistatic inline bi_index 306bf215546Sopenharmony_cibi_imm_f16(float imm) 307bf215546Sopenharmony_ci{ 308bf215546Sopenharmony_ci return bi_imm_u16(_mesa_float_to_half(imm)); 309bf215546Sopenharmony_ci} 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_cistatic inline bool 312bf215546Sopenharmony_cibi_is_null(bi_index idx) 313bf215546Sopenharmony_ci{ 314bf215546Sopenharmony_ci return idx.type == BI_INDEX_NULL; 315bf215546Sopenharmony_ci} 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_cistatic inline bool 318bf215546Sopenharmony_cibi_is_ssa(bi_index idx) 319bf215546Sopenharmony_ci{ 320bf215546Sopenharmony_ci return idx.type == BI_INDEX_NORMAL && !idx.reg; 321bf215546Sopenharmony_ci} 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci/* Compares equivalence as references. Does not compare offsets, swizzles, or 324bf215546Sopenharmony_ci * modifiers. In other words, this forms bi_index equivalence classes by 325bf215546Sopenharmony_ci * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */ 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_cistatic inline bool 328bf215546Sopenharmony_cibi_is_equiv(bi_index left, bi_index right) 329bf215546Sopenharmony_ci{ 330bf215546Sopenharmony_ci return (left.type == right.type) && 331bf215546Sopenharmony_ci (left.reg == right.reg) && 332bf215546Sopenharmony_ci (left.value == right.value); 333bf215546Sopenharmony_ci} 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci/* A stronger equivalence relation that requires the indices access the 336bf215546Sopenharmony_ci * same offset, useful for RA/scheduling to see what registers will 337bf215546Sopenharmony_ci * correspond to */ 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_cistatic inline bool 340bf215546Sopenharmony_cibi_is_word_equiv(bi_index left, bi_index right) 341bf215546Sopenharmony_ci{ 342bf215546Sopenharmony_ci return bi_is_equiv(left, right) && left.offset == right.offset; 343bf215546Sopenharmony_ci} 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci/* An even stronger equivalence that checks if indices correspond to the 346bf215546Sopenharmony_ci * right value when evaluated 347bf215546Sopenharmony_ci */ 348bf215546Sopenharmony_cistatic inline bool 349bf215546Sopenharmony_cibi_is_value_equiv(bi_index left, bi_index right) 350bf215546Sopenharmony_ci{ 351bf215546Sopenharmony_ci if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) { 352bf215546Sopenharmony_ci return (bi_apply_swizzle(left.value, left.swizzle) == 353bf215546Sopenharmony_ci bi_apply_swizzle(right.value, right.swizzle)) && 354bf215546Sopenharmony_ci (left.abs == right.abs) && 355bf215546Sopenharmony_ci (left.neg == right.neg); 356bf215546Sopenharmony_ci } else { 357bf215546Sopenharmony_ci return (left.value == right.value) && 358bf215546Sopenharmony_ci (left.abs == right.abs) && 359bf215546Sopenharmony_ci (left.neg == right.neg) && 360bf215546Sopenharmony_ci (left.swizzle == right.swizzle) && 361bf215546Sopenharmony_ci (left.offset == right.offset) && 362bf215546Sopenharmony_ci (left.reg == right.reg) && 363bf215546Sopenharmony_ci (left.type == right.type); 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci} 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci#define BI_MAX_VEC 8 368bf215546Sopenharmony_ci#define BI_MAX_DESTS 4 369bf215546Sopenharmony_ci#define BI_MAX_SRCS 6 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_citypedef struct { 372bf215546Sopenharmony_ci /* Must be first */ 373bf215546Sopenharmony_ci struct list_head link; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci enum bi_opcode op; 376bf215546Sopenharmony_ci uint8_t nr_srcs; 377bf215546Sopenharmony_ci uint8_t nr_dests; 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci /* Data flow */ 380bf215546Sopenharmony_ci bi_index dest[BI_MAX_DESTS]; 381bf215546Sopenharmony_ci bi_index src[BI_MAX_SRCS]; 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci /* For a branch */ 384bf215546Sopenharmony_ci struct bi_block *branch_target; 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci /* These don't fit neatly with anything else.. */ 387bf215546Sopenharmony_ci enum bi_register_format register_format; 388bf215546Sopenharmony_ci enum bi_vecsize vecsize; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci /* Flow control associated with a Valhall instruction */ 391bf215546Sopenharmony_ci uint8_t flow; 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci /* Slot associated with a message-passing instruction */ 394bf215546Sopenharmony_ci uint8_t slot; 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci /* Can we spill the value written here? Used to prevent 397bf215546Sopenharmony_ci * useless double fills */ 398bf215546Sopenharmony_ci bool no_spill; 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci /* On Bifrost: A value of bi_table to override the table, inducing a 401bf215546Sopenharmony_ci * DTSEL_IMM pair if nonzero. 402bf215546Sopenharmony_ci * 403bf215546Sopenharmony_ci * On Valhall: the table index to use for resource instructions. 404bf215546Sopenharmony_ci * 405bf215546Sopenharmony_ci * These two interpretations are equivalent if you squint a bit. 406bf215546Sopenharmony_ci */ 407bf215546Sopenharmony_ci unsigned table; 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_ci /* Everything after this MUST NOT be accessed directly, since 410bf215546Sopenharmony_ci * interpretation depends on opcodes */ 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci /* Destination modifiers */ 413bf215546Sopenharmony_ci union { 414bf215546Sopenharmony_ci enum bi_clamp clamp; 415bf215546Sopenharmony_ci bool saturate; 416bf215546Sopenharmony_ci bool not_result; 417bf215546Sopenharmony_ci unsigned dest_mod; 418bf215546Sopenharmony_ci }; 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci /* Immediates. All seen alone in an instruction, except for varying/texture 421bf215546Sopenharmony_ci * which are specified jointly for VARTEX */ 422bf215546Sopenharmony_ci union { 423bf215546Sopenharmony_ci uint32_t shift; 424bf215546Sopenharmony_ci uint32_t fill; 425bf215546Sopenharmony_ci uint32_t index; 426bf215546Sopenharmony_ci uint32_t attribute_index; 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci struct { 429bf215546Sopenharmony_ci uint32_t varying_index; 430bf215546Sopenharmony_ci uint32_t sampler_index; 431bf215546Sopenharmony_ci uint32_t texture_index; 432bf215546Sopenharmony_ci }; 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_ci /* TEXC, ATOM_CX: # of staging registers used */ 435bf215546Sopenharmony_ci struct { 436bf215546Sopenharmony_ci uint32_t sr_count; 437bf215546Sopenharmony_ci uint32_t sr_count_2; 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_ci union { 440bf215546Sopenharmony_ci /* Atomics effectively require all three */ 441bf215546Sopenharmony_ci int32_t byte_offset; 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci /* BLEND requires all three */ 444bf215546Sopenharmony_ci int32_t branch_offset; 445bf215546Sopenharmony_ci }; 446bf215546Sopenharmony_ci }; 447bf215546Sopenharmony_ci }; 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci /* Modifiers specific to particular instructions are thrown in a union */ 450bf215546Sopenharmony_ci union { 451bf215546Sopenharmony_ci enum bi_adj adj; /* FEXP_TABLE.u4 */ 452bf215546Sopenharmony_ci enum bi_atom_opc atom_opc; /* atomics */ 453bf215546Sopenharmony_ci enum bi_func func; /* FPOW_SC_DET */ 454bf215546Sopenharmony_ci enum bi_function function; /* LD_VAR_FLAT */ 455bf215546Sopenharmony_ci enum bi_mux mux; /* MUX */ 456bf215546Sopenharmony_ci enum bi_sem sem; /* FMAX, FMIN */ 457bf215546Sopenharmony_ci enum bi_source source; /* LD_GCLK */ 458bf215546Sopenharmony_ci bool scale; /* VN_ASST2, FSINCOS_OFFSET */ 459bf215546Sopenharmony_ci bool offset; /* FSIN_TABLE, FOCS_TABLE */ 460bf215546Sopenharmony_ci bool mask; /* CLZ */ 461bf215546Sopenharmony_ci bool threads; /* IMULD, IMOV_FMA */ 462bf215546Sopenharmony_ci bool combine; /* BRANCHC */ 463bf215546Sopenharmony_ci bool format; /* LEA_TEX */ 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci struct { 466bf215546Sopenharmony_ci enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */ 467bf215546Sopenharmony_ci enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */ 468bf215546Sopenharmony_ci bool ftz; /* Flush-to-zero for F16_TO_F32 */ 469bf215546Sopenharmony_ci }; 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci struct { 472bf215546Sopenharmony_ci enum bi_result_type result_type; /* FCMP, ICMP */ 473bf215546Sopenharmony_ci enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */ 474bf215546Sopenharmony_ci }; 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci struct { 477bf215546Sopenharmony_ci enum bi_stack_mode stack_mode; /* JUMP_EX */ 478bf215546Sopenharmony_ci bool test_mode; 479bf215546Sopenharmony_ci }; 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci struct { 482bf215546Sopenharmony_ci enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */ 483bf215546Sopenharmony_ci bool preserve_null; /* SEG_ADD, SEG_SUB */ 484bf215546Sopenharmony_ci enum bi_extend extend; /* LOAD, IMUL */ 485bf215546Sopenharmony_ci }; 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci struct { 488bf215546Sopenharmony_ci enum bi_sample sample; /* VAR_TEX, LD_VAR */ 489bf215546Sopenharmony_ci enum bi_update update; /* VAR_TEX, LD_VAR */ 490bf215546Sopenharmony_ci enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */ 491bf215546Sopenharmony_ci bool skip; /* VAR_TEX, TEXS, TEXC */ 492bf215546Sopenharmony_ci bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */ 493bf215546Sopenharmony_ci enum bi_source_format source_format; /* LD_VAR_BUF */ 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci /* Used for valhall texturing */ 496bf215546Sopenharmony_ci bool shadow; 497bf215546Sopenharmony_ci bool texel_offset; 498bf215546Sopenharmony_ci bool array_enable; 499bf215546Sopenharmony_ci bool integer_coordinates; 500bf215546Sopenharmony_ci enum bi_fetch_component fetch_component; 501bf215546Sopenharmony_ci enum bi_va_lod_mode va_lod_mode; 502bf215546Sopenharmony_ci enum bi_dimension dimension; 503bf215546Sopenharmony_ci enum bi_write_mask write_mask; 504bf215546Sopenharmony_ci }; 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci /* Maximum size, for hashing */ 507bf215546Sopenharmony_ci unsigned flags[14]; 508bf215546Sopenharmony_ci 509bf215546Sopenharmony_ci struct { 510bf215546Sopenharmony_ci enum bi_subgroup subgroup; /* WMASK, CLPER */ 511bf215546Sopenharmony_ci enum bi_inactive_result inactive_result; /* CLPER */ 512bf215546Sopenharmony_ci enum bi_lane_op lane_op; /* CLPER */ 513bf215546Sopenharmony_ci }; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci struct { 516bf215546Sopenharmony_ci bool z; /* ZS_EMIT */ 517bf215546Sopenharmony_ci bool stencil; /* ZS_EMIT */ 518bf215546Sopenharmony_ci }; 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci struct { 521bf215546Sopenharmony_ci bool h; /* VN_ASST1.f16 */ 522bf215546Sopenharmony_ci bool l; /* VN_ASST1.f16 */ 523bf215546Sopenharmony_ci }; 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ci struct { 526bf215546Sopenharmony_ci bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */ 527bf215546Sopenharmony_ci bool result_word; 528bf215546Sopenharmony_ci bool arithmetic; /* ARSHIFT_OR */ 529bf215546Sopenharmony_ci }; 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci struct { 532bf215546Sopenharmony_ci bool sqrt; /* FREXPM */ 533bf215546Sopenharmony_ci bool log; /* FREXPM */ 534bf215546Sopenharmony_ci }; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci struct { 537bf215546Sopenharmony_ci enum bi_mode mode; /* FLOG_TABLE */ 538bf215546Sopenharmony_ci enum bi_precision precision; /* FLOG_TABLE */ 539bf215546Sopenharmony_ci bool divzero; /* FRSQ_APPROX, FRSQ */ 540bf215546Sopenharmony_ci }; 541bf215546Sopenharmony_ci }; 542bf215546Sopenharmony_ci} bi_instr; 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_cistatic inline bool 545bf215546Sopenharmony_cibi_is_staging_src(const bi_instr *I, unsigned s) 546bf215546Sopenharmony_ci{ 547bf215546Sopenharmony_ci return (s == 0 || s == 4) && bi_opcode_props[I->op].sr_read; 548bf215546Sopenharmony_ci} 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci/* Represents the assignment of slots for a given bi_tuple */ 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_citypedef struct { 553bf215546Sopenharmony_ci /* Register to assign to each slot */ 554bf215546Sopenharmony_ci unsigned slot[4]; 555bf215546Sopenharmony_ci 556bf215546Sopenharmony_ci /* Read slots can be disabled */ 557bf215546Sopenharmony_ci bool enabled[2]; 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci /* Configuration for slots 2/3 */ 560bf215546Sopenharmony_ci struct bifrost_reg_ctrl_23 slot23; 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci /* Fast-Access-Uniform RAM index */ 563bf215546Sopenharmony_ci uint8_t fau_idx; 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci /* Whether writes are actually for the last instruction */ 566bf215546Sopenharmony_ci bool first_instruction; 567bf215546Sopenharmony_ci} bi_registers; 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci/* A bi_tuple contains two paired instruction pointers. If a slot is unfilled, 570bf215546Sopenharmony_ci * leave it NULL; the emitter will fill in a nop. Instructions reference 571bf215546Sopenharmony_ci * registers via slots which are assigned per tuple. 572bf215546Sopenharmony_ci */ 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_citypedef struct { 575bf215546Sopenharmony_ci uint8_t fau_idx; 576bf215546Sopenharmony_ci bi_registers regs; 577bf215546Sopenharmony_ci bi_instr *fma; 578bf215546Sopenharmony_ci bi_instr *add; 579bf215546Sopenharmony_ci} bi_tuple; 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_cistruct bi_block; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_citypedef struct { 584bf215546Sopenharmony_ci struct list_head link; 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci /* Link back up for branch calculations */ 587bf215546Sopenharmony_ci struct bi_block *block; 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci /* Architectural limit of 8 tuples/clause */ 590bf215546Sopenharmony_ci unsigned tuple_count; 591bf215546Sopenharmony_ci bi_tuple tuples[8]; 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci /* For scoreboarding -- the clause ID (this is not globally unique!) 594bf215546Sopenharmony_ci * and its dependencies in terms of other clauses, computed during 595bf215546Sopenharmony_ci * scheduling and used when emitting code. Dependencies expressed as a 596bf215546Sopenharmony_ci * bitfield matching the hardware, except shifted by a clause (the 597bf215546Sopenharmony_ci * shift back to the ISA's off-by-one encoding is worked out when 598bf215546Sopenharmony_ci * emitting clauses) */ 599bf215546Sopenharmony_ci unsigned scoreboard_id; 600bf215546Sopenharmony_ci uint8_t dependencies; 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci /* See ISA header for description */ 603bf215546Sopenharmony_ci enum bifrost_flow flow_control; 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci /* Can we prefetch the next clause? Usually it makes sense, except for 606bf215546Sopenharmony_ci * clauses ending in unconditional branches */ 607bf215546Sopenharmony_ci bool next_clause_prefetch; 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ci /* Assigned data register */ 610bf215546Sopenharmony_ci unsigned staging_register; 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci /* Corresponds to the usual bit but shifted by a clause */ 613bf215546Sopenharmony_ci bool staging_barrier; 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci /* Constants read by this clause. ISA limit. Must satisfy: 616bf215546Sopenharmony_ci * 617bf215546Sopenharmony_ci * constant_count + tuple_count <= 13 618bf215546Sopenharmony_ci * 619bf215546Sopenharmony_ci * Also implicitly constant_count <= tuple_count since a tuple only 620bf215546Sopenharmony_ci * reads a single constant. 621bf215546Sopenharmony_ci */ 622bf215546Sopenharmony_ci uint64_t constants[8]; 623bf215546Sopenharmony_ci unsigned constant_count; 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci /* Index of a constant to be PC-relative */ 626bf215546Sopenharmony_ci unsigned pcrel_idx; 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci /* Branches encode a constant offset relative to the program counter 629bf215546Sopenharmony_ci * with some magic flags. By convention, if there is a branch, its 630bf215546Sopenharmony_ci * constant will be last. Set this flag to indicate this is required. 631bf215546Sopenharmony_ci */ 632bf215546Sopenharmony_ci bool branch_constant; 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci /* Unique in a clause */ 635bf215546Sopenharmony_ci enum bifrost_message_type message_type; 636bf215546Sopenharmony_ci bi_instr *message; 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci /* Discard helper threads */ 639bf215546Sopenharmony_ci bool td; 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_ci /* Should flush-to-zero mode be enabled for this clause? */ 642bf215546Sopenharmony_ci bool ftz; 643bf215546Sopenharmony_ci} bi_clause; 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci#define BI_NUM_SLOTS 8 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci/* A model for the state of the scoreboard */ 648bf215546Sopenharmony_cistruct bi_scoreboard_state { 649bf215546Sopenharmony_ci /** Bitmap of registers read/written by a slot */ 650bf215546Sopenharmony_ci uint64_t read[BI_NUM_SLOTS]; 651bf215546Sopenharmony_ci uint64_t write[BI_NUM_SLOTS]; 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci /* Nonregister dependencies present by a slot */ 654bf215546Sopenharmony_ci uint8_t varying : BI_NUM_SLOTS; 655bf215546Sopenharmony_ci uint8_t memory : BI_NUM_SLOTS; 656bf215546Sopenharmony_ci}; 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_citypedef struct bi_block { 659bf215546Sopenharmony_ci /* Link to next block. Must be first for mir_get_block */ 660bf215546Sopenharmony_ci struct list_head link; 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci /* List of instructions emitted for the current block */ 663bf215546Sopenharmony_ci struct list_head instructions; 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci /* Index of the block in source order */ 666bf215546Sopenharmony_ci unsigned index; 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci /* Control flow graph */ 669bf215546Sopenharmony_ci struct bi_block *successors[2]; 670bf215546Sopenharmony_ci struct util_dynarray predecessors; 671bf215546Sopenharmony_ci bool unconditional_jumps; 672bf215546Sopenharmony_ci 673bf215546Sopenharmony_ci /* Per 32-bit word live masks for the block indexed by node */ 674bf215546Sopenharmony_ci uint8_t *live_in; 675bf215546Sopenharmony_ci uint8_t *live_out; 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci /* If true, uses clauses; if false, uses instructions */ 678bf215546Sopenharmony_ci bool scheduled; 679bf215546Sopenharmony_ci struct list_head clauses; /* list of bi_clause */ 680bf215546Sopenharmony_ci 681bf215546Sopenharmony_ci /* Post-RA liveness */ 682bf215546Sopenharmony_ci uint64_t reg_live_in, reg_live_out; 683bf215546Sopenharmony_ci 684bf215546Sopenharmony_ci /* Scoreboard state at the start/end of block */ 685bf215546Sopenharmony_ci struct bi_scoreboard_state scoreboard_in, scoreboard_out; 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci /* On Valhall, indicates we need a terminal NOP to implement jumps to 688bf215546Sopenharmony_ci * the end of the shader. 689bf215546Sopenharmony_ci */ 690bf215546Sopenharmony_ci bool needs_nop; 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci /* Flags available for pass-internal use */ 693bf215546Sopenharmony_ci uint8_t pass_flags; 694bf215546Sopenharmony_ci} bi_block; 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_cistatic inline unsigned 697bf215546Sopenharmony_cibi_num_predecessors(bi_block *block) 698bf215546Sopenharmony_ci{ 699bf215546Sopenharmony_ci return util_dynarray_num_elements(&block->predecessors, bi_block *); 700bf215546Sopenharmony_ci} 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_cistatic inline bi_block * 703bf215546Sopenharmony_cibi_start_block(struct list_head *blocks) 704bf215546Sopenharmony_ci{ 705bf215546Sopenharmony_ci bi_block *first = list_first_entry(blocks, bi_block, link); 706bf215546Sopenharmony_ci assert(bi_num_predecessors(first) == 0); 707bf215546Sopenharmony_ci return first; 708bf215546Sopenharmony_ci} 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_cistatic inline bi_block * 711bf215546Sopenharmony_cibi_exit_block(struct list_head *blocks) 712bf215546Sopenharmony_ci{ 713bf215546Sopenharmony_ci bi_block *last = list_last_entry(blocks, bi_block, link); 714bf215546Sopenharmony_ci assert(!last->successors[0] && !last->successors[1]); 715bf215546Sopenharmony_ci return last; 716bf215546Sopenharmony_ci} 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_cistatic inline void 719bf215546Sopenharmony_cibi_block_add_successor(bi_block *block, bi_block *successor) 720bf215546Sopenharmony_ci{ 721bf215546Sopenharmony_ci assert(block != NULL && successor != NULL); 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci /* Cull impossible edges */ 724bf215546Sopenharmony_ci if (block->unconditional_jumps) 725bf215546Sopenharmony_ci return; 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) { 728bf215546Sopenharmony_ci if (block->successors[i]) { 729bf215546Sopenharmony_ci if (block->successors[i] == successor) 730bf215546Sopenharmony_ci return; 731bf215546Sopenharmony_ci else 732bf215546Sopenharmony_ci continue; 733bf215546Sopenharmony_ci } 734bf215546Sopenharmony_ci 735bf215546Sopenharmony_ci block->successors[i] = successor; 736bf215546Sopenharmony_ci util_dynarray_append(&successor->predecessors, bi_block *, block); 737bf215546Sopenharmony_ci return; 738bf215546Sopenharmony_ci } 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci unreachable("Too many successors"); 741bf215546Sopenharmony_ci} 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci/* Subset of pan_shader_info needed per-variant, in order to support IDVS */ 744bf215546Sopenharmony_cistruct bi_shader_info { 745bf215546Sopenharmony_ci struct panfrost_ubo_push *push; 746bf215546Sopenharmony_ci struct bifrost_shader_info *bifrost; 747bf215546Sopenharmony_ci struct panfrost_sysvals *sysvals; 748bf215546Sopenharmony_ci unsigned tls_size; 749bf215546Sopenharmony_ci unsigned work_reg_count; 750bf215546Sopenharmony_ci unsigned push_offset; 751bf215546Sopenharmony_ci}; 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci/* State of index-driven vertex shading for current shader */ 754bf215546Sopenharmony_cienum bi_idvs_mode { 755bf215546Sopenharmony_ci /* IDVS not in use */ 756bf215546Sopenharmony_ci BI_IDVS_NONE = 0, 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci /* IDVS in use. Compiling a position shader */ 759bf215546Sopenharmony_ci BI_IDVS_POSITION = 1, 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci /* IDVS in use. Compiling a varying shader */ 762bf215546Sopenharmony_ci BI_IDVS_VARYING = 2, 763bf215546Sopenharmony_ci}; 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_citypedef struct { 766bf215546Sopenharmony_ci const struct panfrost_compile_inputs *inputs; 767bf215546Sopenharmony_ci nir_shader *nir; 768bf215546Sopenharmony_ci struct bi_shader_info info; 769bf215546Sopenharmony_ci gl_shader_stage stage; 770bf215546Sopenharmony_ci struct list_head blocks; /* list of bi_block */ 771bf215546Sopenharmony_ci struct hash_table_u64 *sysval_to_id; 772bf215546Sopenharmony_ci uint32_t quirks; 773bf215546Sopenharmony_ci unsigned arch; 774bf215546Sopenharmony_ci enum bi_idvs_mode idvs; 775bf215546Sopenharmony_ci unsigned num_blocks; 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci /* In any graphics shader, whether the "IDVS with memory 778bf215546Sopenharmony_ci * allocation" flow is used. This affects how varyings are loaded and 779bf215546Sopenharmony_ci * stored. Ignore for compute. 780bf215546Sopenharmony_ci */ 781bf215546Sopenharmony_ci bool malloc_idvs; 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ci /* During NIR->BIR */ 784bf215546Sopenharmony_ci bi_block *current_block; 785bf215546Sopenharmony_ci bi_block *after_block; 786bf215546Sopenharmony_ci bi_block *break_block; 787bf215546Sopenharmony_ci bi_block *continue_block; 788bf215546Sopenharmony_ci bool emitted_atest; 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_ci /* During NIR->BIR, the coverage bitmap. If this is NULL, the default 791bf215546Sopenharmony_ci * coverage bitmap should be source from preloaded register r60. This is 792bf215546Sopenharmony_ci * written by ATEST and ZS_EMIT 793bf215546Sopenharmony_ci */ 794bf215546Sopenharmony_ci bi_index coverage; 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_ci /* During NIR->BIR, table of preloaded registers, or NULL if never 797bf215546Sopenharmony_ci * preloaded. 798bf215546Sopenharmony_ci */ 799bf215546Sopenharmony_ci bi_index preloaded[64]; 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci /* For creating temporaries */ 802bf215546Sopenharmony_ci unsigned ssa_alloc; 803bf215546Sopenharmony_ci unsigned reg_alloc; 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci /* Mask of UBOs that need to be uploaded */ 806bf215546Sopenharmony_ci uint32_t ubo_mask; 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci /* During instruction selection, map from vector bi_index to its scalar 809bf215546Sopenharmony_ci * components, populated by a split. 810bf215546Sopenharmony_ci */ 811bf215546Sopenharmony_ci struct hash_table_u64 *allocated_vec; 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci /* Stats for shader-db */ 814bf215546Sopenharmony_ci unsigned instruction_count; 815bf215546Sopenharmony_ci unsigned loop_count; 816bf215546Sopenharmony_ci unsigned spills; 817bf215546Sopenharmony_ci unsigned fills; 818bf215546Sopenharmony_ci} bi_context; 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_cistatic inline void 821bf215546Sopenharmony_cibi_remove_instruction(bi_instr *ins) 822bf215546Sopenharmony_ci{ 823bf215546Sopenharmony_ci list_del(&ins->link); 824bf215546Sopenharmony_ci} 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_cienum bir_fau { 827bf215546Sopenharmony_ci BIR_FAU_ZERO = 0, 828bf215546Sopenharmony_ci BIR_FAU_LANE_ID = 1, 829bf215546Sopenharmony_ci BIR_FAU_WARP_ID = 2, 830bf215546Sopenharmony_ci BIR_FAU_CORE_ID = 3, 831bf215546Sopenharmony_ci BIR_FAU_FB_EXTENT = 4, 832bf215546Sopenharmony_ci BIR_FAU_ATEST_PARAM = 5, 833bf215546Sopenharmony_ci BIR_FAU_SAMPLE_POS_ARRAY = 6, 834bf215546Sopenharmony_ci BIR_FAU_BLEND_0 = 8, 835bf215546Sopenharmony_ci /* blend descs 1 - 7 */ 836bf215546Sopenharmony_ci BIR_FAU_TYPE_MASK = 15, 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci /* Valhall only */ 839bf215546Sopenharmony_ci BIR_FAU_TLS_PTR = 16, 840bf215546Sopenharmony_ci BIR_FAU_WLS_PTR = 17, 841bf215546Sopenharmony_ci BIR_FAU_PROGRAM_COUNTER = 18, 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci BIR_FAU_UNIFORM = (1 << 7), 844bf215546Sopenharmony_ci /* Look up table on Valhall */ 845bf215546Sopenharmony_ci BIR_FAU_IMMEDIATE = (1 << 8), 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci}; 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_cistatic inline bi_index 850bf215546Sopenharmony_cibi_fau(enum bir_fau value, bool hi) 851bf215546Sopenharmony_ci{ 852bf215546Sopenharmony_ci return (bi_index) { 853bf215546Sopenharmony_ci .value = value, 854bf215546Sopenharmony_ci .swizzle = BI_SWIZZLE_H01, 855bf215546Sopenharmony_ci .offset = hi ? 1u : 0u, 856bf215546Sopenharmony_ci .type = BI_INDEX_FAU, 857bf215546Sopenharmony_ci }; 858bf215546Sopenharmony_ci} 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci/* 861bf215546Sopenharmony_ci * Builder for Valhall LUT entries. Generally, constants are modeled with 862bf215546Sopenharmony_ci * BI_INDEX_IMMEDIATE in the intermediate representation. This helper is only 863bf215546Sopenharmony_ci * necessary for passes running after lowering constants, as well as when 864bf215546Sopenharmony_ci * lowering constants. 865bf215546Sopenharmony_ci * 866bf215546Sopenharmony_ci */ 867bf215546Sopenharmony_cistatic inline bi_index 868bf215546Sopenharmony_civa_lut(unsigned index) 869bf215546Sopenharmony_ci{ 870bf215546Sopenharmony_ci return bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | (index >> 1)), 871bf215546Sopenharmony_ci index & 1); 872bf215546Sopenharmony_ci} 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci/* 875bf215546Sopenharmony_ci * va_lut_zero is like bi_zero but only works on Valhall. It is intended for 876bf215546Sopenharmony_ci * use by late passes that run after constants are lowered, specifically 877bf215546Sopenharmony_ci * register allocation. bi_zero() is preferred where possible. 878bf215546Sopenharmony_ci */ 879bf215546Sopenharmony_cistatic inline bi_index 880bf215546Sopenharmony_civa_zero_lut() 881bf215546Sopenharmony_ci{ 882bf215546Sopenharmony_ci return va_lut(0); 883bf215546Sopenharmony_ci} 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_cistatic inline unsigned 886bf215546Sopenharmony_cibi_max_temp(bi_context *ctx) 887bf215546Sopenharmony_ci{ 888bf215546Sopenharmony_ci return (MAX2(ctx->reg_alloc, ctx->ssa_alloc) + 2) << 1; 889bf215546Sopenharmony_ci} 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_cistatic inline bi_index 892bf215546Sopenharmony_cibi_temp(bi_context *ctx) 893bf215546Sopenharmony_ci{ 894bf215546Sopenharmony_ci return bi_get_index(ctx->ssa_alloc++, false, 0); 895bf215546Sopenharmony_ci} 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_cistatic inline bi_index 898bf215546Sopenharmony_cibi_temp_reg(bi_context *ctx) 899bf215546Sopenharmony_ci{ 900bf215546Sopenharmony_ci return bi_get_index(ctx->reg_alloc++, true, 0); 901bf215546Sopenharmony_ci} 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci/* Inline constants automatically, will be lowered out by bi_lower_fau where a 904bf215546Sopenharmony_ci * constant is not allowed. load_const_to_scalar gaurantees that this makes 905bf215546Sopenharmony_ci * sense */ 906bf215546Sopenharmony_ci 907bf215546Sopenharmony_cistatic inline bi_index 908bf215546Sopenharmony_cibi_src_index(nir_src *src) 909bf215546Sopenharmony_ci{ 910bf215546Sopenharmony_ci if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) 911bf215546Sopenharmony_ci return bi_imm_u32(nir_src_as_uint(*src)); 912bf215546Sopenharmony_ci else if (src->is_ssa) 913bf215546Sopenharmony_ci return bi_get_index(src->ssa->index, false, 0); 914bf215546Sopenharmony_ci else { 915bf215546Sopenharmony_ci assert(!src->reg.indirect); 916bf215546Sopenharmony_ci return bi_get_index(src->reg.reg->index, true, 0); 917bf215546Sopenharmony_ci } 918bf215546Sopenharmony_ci} 919bf215546Sopenharmony_ci 920bf215546Sopenharmony_cistatic inline bi_index 921bf215546Sopenharmony_cibi_dest_index(nir_dest *dst) 922bf215546Sopenharmony_ci{ 923bf215546Sopenharmony_ci if (dst->is_ssa) 924bf215546Sopenharmony_ci return bi_get_index(dst->ssa.index, false, 0); 925bf215546Sopenharmony_ci else { 926bf215546Sopenharmony_ci assert(!dst->reg.indirect); 927bf215546Sopenharmony_ci return bi_get_index(dst->reg.reg->index, true, 0); 928bf215546Sopenharmony_ci } 929bf215546Sopenharmony_ci} 930bf215546Sopenharmony_ci 931bf215546Sopenharmony_cistatic inline unsigned 932bf215546Sopenharmony_cibi_get_node(bi_index index) 933bf215546Sopenharmony_ci{ 934bf215546Sopenharmony_ci if (bi_is_null(index) || index.type != BI_INDEX_NORMAL) 935bf215546Sopenharmony_ci return ~0; 936bf215546Sopenharmony_ci else 937bf215546Sopenharmony_ci return (index.value << 1) | index.reg; 938bf215546Sopenharmony_ci} 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_cistatic inline bi_index 941bf215546Sopenharmony_cibi_node_to_index(unsigned node, unsigned node_count) 942bf215546Sopenharmony_ci{ 943bf215546Sopenharmony_ci assert(node < node_count); 944bf215546Sopenharmony_ci assert(node_count < ~0u); 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_ci return bi_get_index(node >> 1, node & PAN_IS_REG, 0); 947bf215546Sopenharmony_ci} 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci/* Iterators for Bifrost IR */ 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_ci#define bi_foreach_block(ctx, v) \ 952bf215546Sopenharmony_ci list_for_each_entry(bi_block, v, &ctx->blocks, link) 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_ci#define bi_foreach_block_rev(ctx, v) \ 955bf215546Sopenharmony_ci list_for_each_entry_rev(bi_block, v, &ctx->blocks, link) 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_ci#define bi_foreach_block_from(ctx, from, v) \ 958bf215546Sopenharmony_ci list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link) 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_ci#define bi_foreach_block_from_rev(ctx, from, v) \ 961bf215546Sopenharmony_ci list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link) 962bf215546Sopenharmony_ci 963bf215546Sopenharmony_ci#define bi_foreach_instr_in_block(block, v) \ 964bf215546Sopenharmony_ci list_for_each_entry(bi_instr, v, &(block)->instructions, link) 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_rev(block, v) \ 967bf215546Sopenharmony_ci list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link) 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_safe(block, v) \ 970bf215546Sopenharmony_ci list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link) 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_safe_rev(block, v) \ 973bf215546Sopenharmony_ci list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link) 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_from(block, v, from) \ 976bf215546Sopenharmony_ci list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link) 977bf215546Sopenharmony_ci 978bf215546Sopenharmony_ci#define bi_foreach_instr_in_block_from_rev(block, v, from) \ 979bf215546Sopenharmony_ci list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link) 980bf215546Sopenharmony_ci 981bf215546Sopenharmony_ci#define bi_foreach_clause_in_block(block, v) \ 982bf215546Sopenharmony_ci list_for_each_entry(bi_clause, v, &(block)->clauses, link) 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci#define bi_foreach_clause_in_block_rev(block, v) \ 985bf215546Sopenharmony_ci list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link) 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_ci#define bi_foreach_clause_in_block_safe(block, v) \ 988bf215546Sopenharmony_ci list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link) 989bf215546Sopenharmony_ci 990bf215546Sopenharmony_ci#define bi_foreach_clause_in_block_from(block, v, from) \ 991bf215546Sopenharmony_ci list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link) 992bf215546Sopenharmony_ci 993bf215546Sopenharmony_ci#define bi_foreach_clause_in_block_from_rev(block, v, from) \ 994bf215546Sopenharmony_ci list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link) 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci#define bi_foreach_instr_global(ctx, v) \ 997bf215546Sopenharmony_ci bi_foreach_block(ctx, v_block) \ 998bf215546Sopenharmony_ci bi_foreach_instr_in_block(v_block, v) 999bf215546Sopenharmony_ci 1000bf215546Sopenharmony_ci#define bi_foreach_instr_global_rev(ctx, v) \ 1001bf215546Sopenharmony_ci bi_foreach_block_rev(ctx, v_block) \ 1002bf215546Sopenharmony_ci bi_foreach_instr_in_block_rev(v_block, v) 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci#define bi_foreach_instr_global_safe(ctx, v) \ 1005bf215546Sopenharmony_ci bi_foreach_block(ctx, v_block) \ 1006bf215546Sopenharmony_ci bi_foreach_instr_in_block_safe(v_block, v) 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci#define bi_foreach_instr_global_rev_safe(ctx, v) \ 1009bf215546Sopenharmony_ci bi_foreach_block_rev(ctx, v_block) \ 1010bf215546Sopenharmony_ci bi_foreach_instr_in_block_rev_safe(v_block, v) 1011bf215546Sopenharmony_ci 1012bf215546Sopenharmony_ci#define bi_foreach_instr_in_tuple(tuple, v) \ 1013bf215546Sopenharmony_ci for (bi_instr *v = (tuple)->fma ?: (tuple)->add; \ 1014bf215546Sopenharmony_ci v != NULL; \ 1015bf215546Sopenharmony_ci v = (v == (tuple)->add) ? NULL : (tuple)->add) 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci#define bi_foreach_successor(blk, v) \ 1018bf215546Sopenharmony_ci bi_block *v; \ 1019bf215546Sopenharmony_ci bi_block **_v; \ 1020bf215546Sopenharmony_ci for (_v = &blk->successors[0], \ 1021bf215546Sopenharmony_ci v = *_v; \ 1022bf215546Sopenharmony_ci v != NULL && _v < &blk->successors[2]; \ 1023bf215546Sopenharmony_ci _v++, v = *_v) \ 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci#define bi_foreach_predecessor(blk, v) \ 1026bf215546Sopenharmony_ci util_dynarray_foreach(&(blk)->predecessors, bi_block *, v) 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci#define bi_foreach_src(ins, v) \ 1029bf215546Sopenharmony_ci for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v) 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_ci#define bi_foreach_dest(ins, v) \ 1032bf215546Sopenharmony_ci for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v) 1033bf215546Sopenharmony_ci 1034bf215546Sopenharmony_ci#define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \ 1035bf215546Sopenharmony_ci bi_foreach_instr_in_tuple(tuple, ins) \ 1036bf215546Sopenharmony_ci bi_foreach_src(ins, s) 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_cistatic inline bi_instr * 1039bf215546Sopenharmony_cibi_prev_op(bi_instr *ins) 1040bf215546Sopenharmony_ci{ 1041bf215546Sopenharmony_ci return list_last_entry(&(ins->link), bi_instr, link); 1042bf215546Sopenharmony_ci} 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_cistatic inline bi_instr * 1045bf215546Sopenharmony_cibi_next_op(bi_instr *ins) 1046bf215546Sopenharmony_ci{ 1047bf215546Sopenharmony_ci return list_first_entry(&(ins->link), bi_instr, link); 1048bf215546Sopenharmony_ci} 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_cistatic inline bi_block * 1051bf215546Sopenharmony_cibi_next_block(bi_block *block) 1052bf215546Sopenharmony_ci{ 1053bf215546Sopenharmony_ci return list_first_entry(&(block->link), bi_block, link); 1054bf215546Sopenharmony_ci} 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_cistatic inline bi_block * 1057bf215546Sopenharmony_cibi_entry_block(bi_context *ctx) 1058bf215546Sopenharmony_ci{ 1059bf215546Sopenharmony_ci return list_first_entry(&ctx->blocks, bi_block, link); 1060bf215546Sopenharmony_ci} 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_ci/* BIR manipulation */ 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_cibool bi_has_arg(const bi_instr *ins, bi_index arg); 1065bf215546Sopenharmony_ciunsigned bi_count_read_registers(const bi_instr *ins, unsigned src); 1066bf215546Sopenharmony_ciunsigned bi_count_write_registers(const bi_instr *ins, unsigned dest); 1067bf215546Sopenharmony_cibool bi_is_regfmt_16(enum bi_register_format fmt); 1068bf215546Sopenharmony_ciunsigned bi_writemask(const bi_instr *ins, unsigned dest); 1069bf215546Sopenharmony_cibi_clause * bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause); 1070bf215546Sopenharmony_cibool bi_side_effects(const bi_instr *I); 1071bf215546Sopenharmony_cibool bi_reconverge_branches(bi_block *block); 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_cibool bi_can_replace_with_csel(bi_instr *I); 1074bf215546Sopenharmony_civoid bi_replace_mux_with_csel(bi_instr *I, bool must_sign); 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_civoid bi_print_instr(const bi_instr *I, FILE *fp); 1077bf215546Sopenharmony_civoid bi_print_slots(bi_registers *regs, FILE *fp); 1078bf215546Sopenharmony_civoid bi_print_tuple(bi_tuple *tuple, FILE *fp); 1079bf215546Sopenharmony_civoid bi_print_clause(bi_clause *clause, FILE *fp); 1080bf215546Sopenharmony_civoid bi_print_block(bi_block *block, FILE *fp); 1081bf215546Sopenharmony_civoid bi_print_shader(bi_context *ctx, FILE *fp); 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci/* BIR passes */ 1084bf215546Sopenharmony_ci 1085bf215546Sopenharmony_cibool bi_instr_uses_helpers(bi_instr *I); 1086bf215546Sopenharmony_cibool bi_block_terminates_helpers(bi_block *block); 1087bf215546Sopenharmony_civoid bi_analyze_helper_terminate(bi_context *ctx); 1088bf215546Sopenharmony_civoid bi_mark_clauses_td(bi_context *ctx); 1089bf215546Sopenharmony_ci 1090bf215546Sopenharmony_civoid bi_analyze_helper_requirements(bi_context *ctx); 1091bf215546Sopenharmony_civoid bi_opt_copy_prop(bi_context *ctx); 1092bf215546Sopenharmony_civoid bi_opt_cse(bi_context *ctx); 1093bf215546Sopenharmony_civoid bi_opt_mod_prop_forward(bi_context *ctx); 1094bf215546Sopenharmony_civoid bi_opt_mod_prop_backward(bi_context *ctx); 1095bf215546Sopenharmony_civoid bi_opt_dead_code_eliminate(bi_context *ctx); 1096bf215546Sopenharmony_civoid bi_opt_fuse_dual_texture(bi_context *ctx); 1097bf215546Sopenharmony_civoid bi_opt_dce_post_ra(bi_context *ctx); 1098bf215546Sopenharmony_civoid bi_opt_message_preload(bi_context *ctx); 1099bf215546Sopenharmony_civoid bi_opt_push_ubo(bi_context *ctx); 1100bf215546Sopenharmony_civoid bi_opt_reorder_push(bi_context *ctx); 1101bf215546Sopenharmony_civoid bi_lower_swizzle(bi_context *ctx); 1102bf215546Sopenharmony_civoid bi_lower_fau(bi_context *ctx); 1103bf215546Sopenharmony_civoid bi_assign_scoreboard(bi_context *ctx); 1104bf215546Sopenharmony_civoid bi_register_allocate(bi_context *ctx); 1105bf215546Sopenharmony_civoid va_optimize(bi_context *ctx); 1106bf215546Sopenharmony_civoid va_lower_split_64bit(bi_context *ctx); 1107bf215546Sopenharmony_ci 1108bf215546Sopenharmony_civoid bi_lower_opt_instruction(bi_instr *I); 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_civoid bi_pressure_schedule(bi_context *ctx); 1111bf215546Sopenharmony_civoid bi_schedule(bi_context *ctx); 1112bf215546Sopenharmony_cibool bi_can_fma(bi_instr *ins); 1113bf215546Sopenharmony_cibool bi_can_add(bi_instr *ins); 1114bf215546Sopenharmony_cibool bi_must_message(bi_instr *ins); 1115bf215546Sopenharmony_cibool bi_reads_zero(bi_instr *ins); 1116bf215546Sopenharmony_cibool bi_reads_temps(bi_instr *ins, unsigned src); 1117bf215546Sopenharmony_cibool bi_reads_t(bi_instr *ins, unsigned src); 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci#ifndef NDEBUG 1120bf215546Sopenharmony_cibool bi_validate_initialization(bi_context *ctx); 1121bf215546Sopenharmony_civoid bi_validate(bi_context *ctx, const char *after_str); 1122bf215546Sopenharmony_ci#else 1123bf215546Sopenharmony_cistatic inline bool bi_validate_initialization(UNUSED bi_context *ctx) { return true; } 1124bf215546Sopenharmony_cistatic inline void bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str) { return; } 1125bf215546Sopenharmony_ci#endif 1126bf215546Sopenharmony_ci 1127bf215546Sopenharmony_ciuint32_t bi_fold_constant(bi_instr *I, bool *unsupported); 1128bf215546Sopenharmony_cibool bi_opt_constant_fold(bi_context *ctx); 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci/* Liveness */ 1131bf215546Sopenharmony_ci 1132bf215546Sopenharmony_civoid bi_compute_liveness(bi_context *ctx); 1133bf215546Sopenharmony_civoid bi_liveness_ins_update(uint8_t *live, bi_instr *ins, unsigned max); 1134bf215546Sopenharmony_ci 1135bf215546Sopenharmony_civoid bi_postra_liveness(bi_context *ctx); 1136bf215546Sopenharmony_ciuint64_t MUST_CHECK bi_postra_liveness_ins(uint64_t live, bi_instr *ins); 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci/* Layout */ 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_cisigned bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target); 1141bf215546Sopenharmony_cibool bi_ec0_packed(unsigned tuple_count); 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci/* Check if there are no more instructions starting with a given block, this 1144bf215546Sopenharmony_ci * needs to recurse in case a shader ends with multiple empty blocks */ 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_cistatic inline bool 1147bf215546Sopenharmony_cibi_is_terminal_block(bi_block *block) 1148bf215546Sopenharmony_ci{ 1149bf215546Sopenharmony_ci return (block == NULL) || 1150bf215546Sopenharmony_ci (list_is_empty(&block->instructions) && 1151bf215546Sopenharmony_ci bi_is_terminal_block(block->successors[0]) && 1152bf215546Sopenharmony_ci bi_is_terminal_block(block->successors[1])); 1153bf215546Sopenharmony_ci} 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci/* Code emit */ 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_ci/* Returns the size of the final clause */ 1158bf215546Sopenharmony_ciunsigned bi_pack(bi_context *ctx, struct util_dynarray *emission); 1159bf215546Sopenharmony_civoid bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission); 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_cistruct bi_packed_tuple { 1162bf215546Sopenharmony_ci uint64_t lo; 1163bf215546Sopenharmony_ci uint64_t hi; 1164bf215546Sopenharmony_ci}; 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_ciuint8_t bi_pack_literal(enum bi_clause_subword literal); 1167bf215546Sopenharmony_ci 1168bf215546Sopenharmony_ciuint8_t 1169bf215546Sopenharmony_cibi_pack_upper(enum bi_clause_subword upper, 1170bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 1171bf215546Sopenharmony_ci ASSERTED unsigned tuple_count); 1172bf215546Sopenharmony_ciuint64_t 1173bf215546Sopenharmony_cibi_pack_tuple_bits(enum bi_clause_subword idx, 1174bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 1175bf215546Sopenharmony_ci ASSERTED unsigned tuple_count, 1176bf215546Sopenharmony_ci unsigned offset, unsigned nbits); 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ciuint8_t 1179bf215546Sopenharmony_cibi_pack_sync(enum bi_clause_subword t1, 1180bf215546Sopenharmony_ci enum bi_clause_subword t2, 1181bf215546Sopenharmony_ci enum bi_clause_subword t3, 1182bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 1183bf215546Sopenharmony_ci ASSERTED unsigned tuple_count, 1184bf215546Sopenharmony_ci bool z); 1185bf215546Sopenharmony_ci 1186bf215546Sopenharmony_civoid 1187bf215546Sopenharmony_cibi_pack_format(struct util_dynarray *emission, 1188bf215546Sopenharmony_ci unsigned index, 1189bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 1190bf215546Sopenharmony_ci ASSERTED unsigned tuple_count, 1191bf215546Sopenharmony_ci uint64_t header, uint64_t ec0, 1192bf215546Sopenharmony_ci unsigned m0, bool z); 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_ciunsigned bi_pack_fma(bi_instr *I, 1195bf215546Sopenharmony_ci enum bifrost_packed_src src0, 1196bf215546Sopenharmony_ci enum bifrost_packed_src src1, 1197bf215546Sopenharmony_ci enum bifrost_packed_src src2, 1198bf215546Sopenharmony_ci enum bifrost_packed_src src3); 1199bf215546Sopenharmony_ciunsigned bi_pack_add(bi_instr *I, 1200bf215546Sopenharmony_ci enum bifrost_packed_src src0, 1201bf215546Sopenharmony_ci enum bifrost_packed_src src1, 1202bf215546Sopenharmony_ci enum bifrost_packed_src src2, 1203bf215546Sopenharmony_ci enum bifrost_packed_src src3); 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci/* Like in NIR, for use with the builder */ 1206bf215546Sopenharmony_ci 1207bf215546Sopenharmony_cienum bi_cursor_option { 1208bf215546Sopenharmony_ci bi_cursor_after_block, 1209bf215546Sopenharmony_ci bi_cursor_before_instr, 1210bf215546Sopenharmony_ci bi_cursor_after_instr 1211bf215546Sopenharmony_ci}; 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_citypedef struct { 1214bf215546Sopenharmony_ci enum bi_cursor_option option; 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_ci union { 1217bf215546Sopenharmony_ci bi_block *block; 1218bf215546Sopenharmony_ci bi_instr *instr; 1219bf215546Sopenharmony_ci }; 1220bf215546Sopenharmony_ci} bi_cursor; 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_cistatic inline bi_cursor 1223bf215546Sopenharmony_cibi_after_block(bi_block *block) 1224bf215546Sopenharmony_ci{ 1225bf215546Sopenharmony_ci return (bi_cursor) { 1226bf215546Sopenharmony_ci .option = bi_cursor_after_block, 1227bf215546Sopenharmony_ci .block = block 1228bf215546Sopenharmony_ci }; 1229bf215546Sopenharmony_ci} 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_cistatic inline bi_cursor 1232bf215546Sopenharmony_cibi_before_instr(bi_instr *instr) 1233bf215546Sopenharmony_ci{ 1234bf215546Sopenharmony_ci return (bi_cursor) { 1235bf215546Sopenharmony_ci .option = bi_cursor_before_instr, 1236bf215546Sopenharmony_ci .instr = instr 1237bf215546Sopenharmony_ci }; 1238bf215546Sopenharmony_ci} 1239bf215546Sopenharmony_ci 1240bf215546Sopenharmony_cistatic inline bi_cursor 1241bf215546Sopenharmony_cibi_after_instr(bi_instr *instr) 1242bf215546Sopenharmony_ci{ 1243bf215546Sopenharmony_ci return (bi_cursor) { 1244bf215546Sopenharmony_ci .option = bi_cursor_after_instr, 1245bf215546Sopenharmony_ci .instr = instr 1246bf215546Sopenharmony_ci }; 1247bf215546Sopenharmony_ci} 1248bf215546Sopenharmony_ci 1249bf215546Sopenharmony_cistatic inline bi_cursor 1250bf215546Sopenharmony_cibi_before_nonempty_block(bi_block *block) 1251bf215546Sopenharmony_ci{ 1252bf215546Sopenharmony_ci bi_instr *I = list_first_entry(&block->instructions, bi_instr, link); 1253bf215546Sopenharmony_ci assert(I != NULL); 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci return bi_before_instr(I); 1256bf215546Sopenharmony_ci} 1257bf215546Sopenharmony_ci 1258bf215546Sopenharmony_cistatic inline bi_cursor 1259bf215546Sopenharmony_cibi_before_block(bi_block *block) 1260bf215546Sopenharmony_ci{ 1261bf215546Sopenharmony_ci if (list_is_empty(&block->instructions)) 1262bf215546Sopenharmony_ci return bi_after_block(block); 1263bf215546Sopenharmony_ci else 1264bf215546Sopenharmony_ci return bi_before_nonempty_block(block); 1265bf215546Sopenharmony_ci} 1266bf215546Sopenharmony_ci 1267bf215546Sopenharmony_ci/* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause, 1268bf215546Sopenharmony_ci * in which case there must exist a nonempty penultimate tuple */ 1269bf215546Sopenharmony_ci 1270bf215546Sopenharmony_ciATTRIBUTE_RETURNS_NONNULL static inline bi_instr * 1271bf215546Sopenharmony_cibi_first_instr_in_tuple(bi_tuple *tuple) 1272bf215546Sopenharmony_ci{ 1273bf215546Sopenharmony_ci bi_instr *instr = tuple->fma ?: tuple->add; 1274bf215546Sopenharmony_ci assert(instr != NULL); 1275bf215546Sopenharmony_ci return instr; 1276bf215546Sopenharmony_ci} 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ciATTRIBUTE_RETURNS_NONNULL static inline bi_instr * 1279bf215546Sopenharmony_cibi_first_instr_in_clause(bi_clause *clause) 1280bf215546Sopenharmony_ci{ 1281bf215546Sopenharmony_ci return bi_first_instr_in_tuple(&clause->tuples[0]); 1282bf215546Sopenharmony_ci} 1283bf215546Sopenharmony_ci 1284bf215546Sopenharmony_ciATTRIBUTE_RETURNS_NONNULL static inline bi_instr * 1285bf215546Sopenharmony_cibi_last_instr_in_clause(bi_clause *clause) 1286bf215546Sopenharmony_ci{ 1287bf215546Sopenharmony_ci bi_tuple tuple = clause->tuples[clause->tuple_count - 1]; 1288bf215546Sopenharmony_ci bi_instr *instr = tuple.add ?: tuple.fma; 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_ci if (!instr) { 1291bf215546Sopenharmony_ci assert(clause->tuple_count >= 2); 1292bf215546Sopenharmony_ci tuple = clause->tuples[clause->tuple_count - 2]; 1293bf215546Sopenharmony_ci instr = tuple.add ?: tuple.fma; 1294bf215546Sopenharmony_ci } 1295bf215546Sopenharmony_ci 1296bf215546Sopenharmony_ci assert(instr != NULL); 1297bf215546Sopenharmony_ci return instr; 1298bf215546Sopenharmony_ci} 1299bf215546Sopenharmony_ci 1300bf215546Sopenharmony_ci/* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start 1301bf215546Sopenharmony_ci * (end) of the clause and adding a condition for the clause boundary */ 1302bf215546Sopenharmony_ci 1303bf215546Sopenharmony_ci#define bi_foreach_instr_in_clause(block, clause, pos) \ 1304bf215546Sopenharmony_ci for (bi_instr *pos = list_entry(bi_first_instr_in_clause(clause), bi_instr, link); \ 1305bf215546Sopenharmony_ci (&pos->link != &(block)->instructions) \ 1306bf215546Sopenharmony_ci && (pos != bi_next_op(bi_last_instr_in_clause(clause))); \ 1307bf215546Sopenharmony_ci pos = list_entry(pos->link.next, bi_instr, link)) 1308bf215546Sopenharmony_ci 1309bf215546Sopenharmony_ci#define bi_foreach_instr_in_clause_rev(block, clause, pos) \ 1310bf215546Sopenharmony_ci for (bi_instr *pos = list_entry(bi_last_instr_in_clause(clause), bi_instr, link); \ 1311bf215546Sopenharmony_ci (&pos->link != &(block)->instructions) \ 1312bf215546Sopenharmony_ci && pos != bi_prev_op(bi_first_instr_in_clause(clause)); \ 1313bf215546Sopenharmony_ci pos = list_entry(pos->link.prev, bi_instr, link)) 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_cistatic inline bi_cursor 1316bf215546Sopenharmony_cibi_before_clause(bi_clause *clause) 1317bf215546Sopenharmony_ci{ 1318bf215546Sopenharmony_ci return bi_before_instr(bi_first_instr_in_clause(clause)); 1319bf215546Sopenharmony_ci} 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_cistatic inline bi_cursor 1322bf215546Sopenharmony_cibi_before_tuple(bi_tuple *tuple) 1323bf215546Sopenharmony_ci{ 1324bf215546Sopenharmony_ci return bi_before_instr(bi_first_instr_in_tuple(tuple)); 1325bf215546Sopenharmony_ci} 1326bf215546Sopenharmony_ci 1327bf215546Sopenharmony_cistatic inline bi_cursor 1328bf215546Sopenharmony_cibi_after_clause(bi_clause *clause) 1329bf215546Sopenharmony_ci{ 1330bf215546Sopenharmony_ci return bi_after_instr(bi_last_instr_in_clause(clause)); 1331bf215546Sopenharmony_ci} 1332bf215546Sopenharmony_ci 1333bf215546Sopenharmony_ci/* IR builder in terms of cursor infrastructure */ 1334bf215546Sopenharmony_ci 1335bf215546Sopenharmony_citypedef struct { 1336bf215546Sopenharmony_ci bi_context *shader; 1337bf215546Sopenharmony_ci bi_cursor cursor; 1338bf215546Sopenharmony_ci} bi_builder; 1339bf215546Sopenharmony_ci 1340bf215546Sopenharmony_cistatic inline bi_builder 1341bf215546Sopenharmony_cibi_init_builder(bi_context *ctx, bi_cursor cursor) 1342bf215546Sopenharmony_ci{ 1343bf215546Sopenharmony_ci return (bi_builder) { 1344bf215546Sopenharmony_ci .shader = ctx, 1345bf215546Sopenharmony_ci .cursor = cursor 1346bf215546Sopenharmony_ci }; 1347bf215546Sopenharmony_ci} 1348bf215546Sopenharmony_ci 1349bf215546Sopenharmony_ci/* Insert an instruction at the cursor and move the cursor */ 1350bf215546Sopenharmony_ci 1351bf215546Sopenharmony_cistatic inline void 1352bf215546Sopenharmony_cibi_builder_insert(bi_cursor *cursor, bi_instr *I) 1353bf215546Sopenharmony_ci{ 1354bf215546Sopenharmony_ci switch (cursor->option) { 1355bf215546Sopenharmony_ci case bi_cursor_after_instr: 1356bf215546Sopenharmony_ci list_add(&I->link, &cursor->instr->link); 1357bf215546Sopenharmony_ci cursor->instr = I; 1358bf215546Sopenharmony_ci return; 1359bf215546Sopenharmony_ci 1360bf215546Sopenharmony_ci case bi_cursor_after_block: 1361bf215546Sopenharmony_ci list_addtail(&I->link, &cursor->block->instructions); 1362bf215546Sopenharmony_ci cursor->option = bi_cursor_after_instr; 1363bf215546Sopenharmony_ci cursor->instr = I; 1364bf215546Sopenharmony_ci return; 1365bf215546Sopenharmony_ci 1366bf215546Sopenharmony_ci case bi_cursor_before_instr: 1367bf215546Sopenharmony_ci list_addtail(&I->link, &cursor->instr->link); 1368bf215546Sopenharmony_ci cursor->option = bi_cursor_after_instr; 1369bf215546Sopenharmony_ci cursor->instr = I; 1370bf215546Sopenharmony_ci return; 1371bf215546Sopenharmony_ci } 1372bf215546Sopenharmony_ci 1373bf215546Sopenharmony_ci unreachable("Invalid cursor option"); 1374bf215546Sopenharmony_ci} 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci/* Read back power-efficent garbage, TODO maybe merge with null? */ 1377bf215546Sopenharmony_cistatic inline bi_index 1378bf215546Sopenharmony_cibi_dontcare(bi_builder *b) 1379bf215546Sopenharmony_ci{ 1380bf215546Sopenharmony_ci if (b->shader->arch >= 9) 1381bf215546Sopenharmony_ci return bi_zero(); 1382bf215546Sopenharmony_ci else 1383bf215546Sopenharmony_ci return bi_passthrough(BIFROST_SRC_FAU_HI); 1384bf215546Sopenharmony_ci} 1385bf215546Sopenharmony_ci 1386bf215546Sopenharmony_ci#define bi_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx) 1387bf215546Sopenharmony_ci#define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index) 1388bf215546Sopenharmony_ci#define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index) 1389bf215546Sopenharmony_ci#define bi_worklist_peek_head(w) u_worklist_peek_head(w, bi_block, index) 1390bf215546Sopenharmony_ci#define bi_worklist_pop_head(w) u_worklist_pop_head( w, bi_block, index) 1391bf215546Sopenharmony_ci#define bi_worklist_peek_tail(w) u_worklist_peek_tail(w, bi_block, index) 1392bf215546Sopenharmony_ci#define bi_worklist_pop_tail(w) u_worklist_pop_tail( w, bi_block, index) 1393bf215546Sopenharmony_ci 1394bf215546Sopenharmony_ci/* NIR passes */ 1395bf215546Sopenharmony_ci 1396bf215546Sopenharmony_cibool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes); 1397bf215546Sopenharmony_ci 1398bf215546Sopenharmony_ci#ifdef __cplusplus 1399bf215546Sopenharmony_ci} /* extern C */ 1400bf215546Sopenharmony_ci#endif 1401bf215546Sopenharmony_ci 1402bf215546Sopenharmony_ci#endif 1403