1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#ifndef ACO_IR_H 26bf215546Sopenharmony_ci#define ACO_IR_H 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "aco_opcodes.h" 29bf215546Sopenharmony_ci#include "aco_util.h" 30bf215546Sopenharmony_ci#include "aco_interface.h" 31bf215546Sopenharmony_ci#include "aco_shader_info.h" 32bf215546Sopenharmony_ci#include "vulkan/radv_shader.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci#include "nir.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include <bitset> 37bf215546Sopenharmony_ci#include <memory> 38bf215546Sopenharmony_ci#include <vector> 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cistruct radv_shader_args; 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_cinamespace aco { 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ciextern uint64_t debug_flags; 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_cienum { 47bf215546Sopenharmony_ci DEBUG_VALIDATE_IR = 0x1, 48bf215546Sopenharmony_ci DEBUG_VALIDATE_RA = 0x2, 49bf215546Sopenharmony_ci DEBUG_PERFWARN = 0x4, 50bf215546Sopenharmony_ci DEBUG_FORCE_WAITCNT = 0x8, 51bf215546Sopenharmony_ci DEBUG_NO_VN = 0x10, 52bf215546Sopenharmony_ci DEBUG_NO_OPT = 0x20, 53bf215546Sopenharmony_ci DEBUG_NO_SCHED = 0x40, 54bf215546Sopenharmony_ci DEBUG_PERF_INFO = 0x80, 55bf215546Sopenharmony_ci DEBUG_LIVE_INFO = 0x100, 56bf215546Sopenharmony_ci}; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci/** 59bf215546Sopenharmony_ci * Representation of the instruction's microcode encoding format 60bf215546Sopenharmony_ci * Note: Some Vector ALU Formats can be combined, such that: 61bf215546Sopenharmony_ci * - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding 62bf215546Sopenharmony_ci * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive. 63bf215546Sopenharmony_ci * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing. 64bf215546Sopenharmony_ci * 65bf215546Sopenharmony_ci * (*) The same is applicable for VOP1 and VOPC instructions. 66bf215546Sopenharmony_ci */ 67bf215546Sopenharmony_cienum class Format : std::uint16_t { 68bf215546Sopenharmony_ci /* Pseudo Instruction Format */ 69bf215546Sopenharmony_ci PSEUDO = 0, 70bf215546Sopenharmony_ci /* Scalar ALU & Control Formats */ 71bf215546Sopenharmony_ci SOP1 = 1, 72bf215546Sopenharmony_ci SOP2 = 2, 73bf215546Sopenharmony_ci SOPK = 3, 74bf215546Sopenharmony_ci SOPP = 4, 75bf215546Sopenharmony_ci SOPC = 5, 76bf215546Sopenharmony_ci /* Scalar Memory Format */ 77bf215546Sopenharmony_ci SMEM = 6, 78bf215546Sopenharmony_ci /* LDS/GDS Format */ 79bf215546Sopenharmony_ci DS = 8, 80bf215546Sopenharmony_ci /* Vector Memory Buffer Formats */ 81bf215546Sopenharmony_ci MTBUF = 9, 82bf215546Sopenharmony_ci MUBUF = 10, 83bf215546Sopenharmony_ci /* Vector Memory Image Format */ 84bf215546Sopenharmony_ci MIMG = 11, 85bf215546Sopenharmony_ci /* Export Format */ 86bf215546Sopenharmony_ci EXP = 12, 87bf215546Sopenharmony_ci /* Flat Formats */ 88bf215546Sopenharmony_ci FLAT = 13, 89bf215546Sopenharmony_ci GLOBAL = 14, 90bf215546Sopenharmony_ci SCRATCH = 15, 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci PSEUDO_BRANCH = 16, 93bf215546Sopenharmony_ci PSEUDO_BARRIER = 17, 94bf215546Sopenharmony_ci PSEUDO_REDUCTION = 18, 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci /* Vector ALU Formats */ 97bf215546Sopenharmony_ci VOP3P = 19, 98bf215546Sopenharmony_ci VOP1 = 1 << 8, 99bf215546Sopenharmony_ci VOP2 = 1 << 9, 100bf215546Sopenharmony_ci VOPC = 1 << 10, 101bf215546Sopenharmony_ci VOP3 = 1 << 11, 102bf215546Sopenharmony_ci /* Vector Parameter Interpolation Format */ 103bf215546Sopenharmony_ci VINTRP = 1 << 12, 104bf215546Sopenharmony_ci DPP16 = 1 << 13, 105bf215546Sopenharmony_ci SDWA = 1 << 14, 106bf215546Sopenharmony_ci DPP8 = 1 << 15, 107bf215546Sopenharmony_ci}; 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_cienum class instr_class : uint8_t { 110bf215546Sopenharmony_ci valu32 = 0, 111bf215546Sopenharmony_ci valu_convert32 = 1, 112bf215546Sopenharmony_ci valu64 = 2, 113bf215546Sopenharmony_ci valu_quarter_rate32 = 3, 114bf215546Sopenharmony_ci valu_fma = 4, 115bf215546Sopenharmony_ci valu_transcendental32 = 5, 116bf215546Sopenharmony_ci valu_double = 6, 117bf215546Sopenharmony_ci valu_double_add = 7, 118bf215546Sopenharmony_ci valu_double_convert = 8, 119bf215546Sopenharmony_ci valu_double_transcendental = 9, 120bf215546Sopenharmony_ci salu = 10, 121bf215546Sopenharmony_ci smem = 11, 122bf215546Sopenharmony_ci barrier = 12, 123bf215546Sopenharmony_ci branch = 13, 124bf215546Sopenharmony_ci sendmsg = 14, 125bf215546Sopenharmony_ci ds = 15, 126bf215546Sopenharmony_ci exp = 16, 127bf215546Sopenharmony_ci vmem = 17, 128bf215546Sopenharmony_ci waitcnt = 18, 129bf215546Sopenharmony_ci other = 19, 130bf215546Sopenharmony_ci count, 131bf215546Sopenharmony_ci}; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_cienum storage_class : uint8_t { 134bf215546Sopenharmony_ci storage_none = 0x0, /* no synchronization and can be reordered around aliasing stores */ 135bf215546Sopenharmony_ci storage_buffer = 0x1, /* SSBOs and global memory */ 136bf215546Sopenharmony_ci storage_atomic_counter = 0x2, /* not used for Vulkan */ 137bf215546Sopenharmony_ci storage_image = 0x4, 138bf215546Sopenharmony_ci storage_shared = 0x8, /* or TCS output */ 139bf215546Sopenharmony_ci storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */ 140bf215546Sopenharmony_ci storage_task_payload = 0x20,/* Task-Mesh payload */ 141bf215546Sopenharmony_ci storage_scratch = 0x40, 142bf215546Sopenharmony_ci storage_vgpr_spill = 0x80, 143bf215546Sopenharmony_ci storage_count = 8, /* not counting storage_none */ 144bf215546Sopenharmony_ci}; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_cienum memory_semantics : uint8_t { 147bf215546Sopenharmony_ci semantic_none = 0x0, 148bf215546Sopenharmony_ci /* for loads: don't move any access after this load to before this load (even other loads) 149bf215546Sopenharmony_ci * for barriers: don't move any access after the barrier to before any 150bf215546Sopenharmony_ci * atomics/control_barriers/sendmsg_gs_done before the barrier */ 151bf215546Sopenharmony_ci semantic_acquire = 0x1, 152bf215546Sopenharmony_ci /* for stores: don't move any access before this store to after this store 153bf215546Sopenharmony_ci * for barriers: don't move any access before the barrier to after any 154bf215546Sopenharmony_ci * atomics/control_barriers/sendmsg_gs_done after the barrier */ 155bf215546Sopenharmony_ci semantic_release = 0x2, 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci /* the rest are for load/stores/atomics only */ 158bf215546Sopenharmony_ci /* cannot be DCE'd or CSE'd */ 159bf215546Sopenharmony_ci semantic_volatile = 0x4, 160bf215546Sopenharmony_ci /* does not interact with barriers and assumes this lane is the only lane 161bf215546Sopenharmony_ci * accessing this memory */ 162bf215546Sopenharmony_ci semantic_private = 0x8, 163bf215546Sopenharmony_ci /* this operation can be reordered around operations of the same storage. 164bf215546Sopenharmony_ci * says nothing about barriers */ 165bf215546Sopenharmony_ci semantic_can_reorder = 0x10, 166bf215546Sopenharmony_ci /* this is a atomic instruction (may only read or write memory) */ 167bf215546Sopenharmony_ci semantic_atomic = 0x20, 168bf215546Sopenharmony_ci /* this is instruction both reads and writes memory */ 169bf215546Sopenharmony_ci semantic_rmw = 0x40, 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci semantic_acqrel = semantic_acquire | semantic_release, 172bf215546Sopenharmony_ci semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw, 173bf215546Sopenharmony_ci}; 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_cienum sync_scope : uint8_t { 176bf215546Sopenharmony_ci scope_invocation = 0, 177bf215546Sopenharmony_ci scope_subgroup = 1, 178bf215546Sopenharmony_ci scope_workgroup = 2, 179bf215546Sopenharmony_ci scope_queuefamily = 3, 180bf215546Sopenharmony_ci scope_device = 4, 181bf215546Sopenharmony_ci}; 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_cistruct memory_sync_info { 184bf215546Sopenharmony_ci memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {} 185bf215546Sopenharmony_ci memory_sync_info(int storage_, int semantics_ = 0, sync_scope scope_ = scope_invocation) 186bf215546Sopenharmony_ci : storage((storage_class)storage_), semantics((memory_semantics)semantics_), scope(scope_) 187bf215546Sopenharmony_ci {} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci storage_class storage : 8; 190bf215546Sopenharmony_ci memory_semantics semantics : 8; 191bf215546Sopenharmony_ci sync_scope scope : 8; 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci bool operator==(const memory_sync_info& rhs) const 194bf215546Sopenharmony_ci { 195bf215546Sopenharmony_ci return storage == rhs.storage && semantics == rhs.semantics && scope == rhs.scope; 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci bool can_reorder() const 199bf215546Sopenharmony_ci { 200bf215546Sopenharmony_ci if (semantics & semantic_acqrel) 201bf215546Sopenharmony_ci return false; 202bf215546Sopenharmony_ci /* Also check storage so that zero-initialized memory_sync_info can be 203bf215546Sopenharmony_ci * reordered. */ 204bf215546Sopenharmony_ci return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile); 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci}; 207bf215546Sopenharmony_cistatic_assert(sizeof(memory_sync_info) == 3, "Unexpected padding"); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_cienum fp_round { 210bf215546Sopenharmony_ci fp_round_ne = 0, 211bf215546Sopenharmony_ci fp_round_pi = 1, 212bf215546Sopenharmony_ci fp_round_ni = 2, 213bf215546Sopenharmony_ci fp_round_tz = 3, 214bf215546Sopenharmony_ci}; 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_cienum fp_denorm { 217bf215546Sopenharmony_ci /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and 218bf215546Sopenharmony_ci * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */ 219bf215546Sopenharmony_ci fp_denorm_flush = 0x0, 220bf215546Sopenharmony_ci fp_denorm_keep_in = 0x1, 221bf215546Sopenharmony_ci fp_denorm_keep_out = 0x2, 222bf215546Sopenharmony_ci fp_denorm_keep = 0x3, 223bf215546Sopenharmony_ci}; 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_cistruct float_mode { 226bf215546Sopenharmony_ci /* matches encoding of the MODE register */ 227bf215546Sopenharmony_ci union { 228bf215546Sopenharmony_ci struct { 229bf215546Sopenharmony_ci fp_round round32 : 2; 230bf215546Sopenharmony_ci fp_round round16_64 : 2; 231bf215546Sopenharmony_ci unsigned denorm32 : 2; 232bf215546Sopenharmony_ci unsigned denorm16_64 : 2; 233bf215546Sopenharmony_ci }; 234bf215546Sopenharmony_ci struct { 235bf215546Sopenharmony_ci uint8_t round : 4; 236bf215546Sopenharmony_ci uint8_t denorm : 4; 237bf215546Sopenharmony_ci }; 238bf215546Sopenharmony_ci uint8_t val = 0; 239bf215546Sopenharmony_ci }; 240bf215546Sopenharmony_ci /* if false, optimizations which may remove infs/nan/-0.0 can be done */ 241bf215546Sopenharmony_ci bool preserve_signed_zero_inf_nan32 : 1; 242bf215546Sopenharmony_ci bool preserve_signed_zero_inf_nan16_64 : 1; 243bf215546Sopenharmony_ci /* if false, optimizations which may remove denormal flushing can be done */ 244bf215546Sopenharmony_ci bool must_flush_denorms32 : 1; 245bf215546Sopenharmony_ci bool must_flush_denorms16_64 : 1; 246bf215546Sopenharmony_ci bool care_about_round32 : 1; 247bf215546Sopenharmony_ci bool care_about_round16_64 : 1; 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci /* Returns true if instructions using the mode "other" can safely use the 250bf215546Sopenharmony_ci * current one instead. */ 251bf215546Sopenharmony_ci bool canReplace(float_mode other) const noexcept 252bf215546Sopenharmony_ci { 253bf215546Sopenharmony_ci return val == other.val && 254bf215546Sopenharmony_ci (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) && 255bf215546Sopenharmony_ci (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) && 256bf215546Sopenharmony_ci (must_flush_denorms32 || !other.must_flush_denorms32) && 257bf215546Sopenharmony_ci (must_flush_denorms16_64 || !other.must_flush_denorms16_64) && 258bf215546Sopenharmony_ci (care_about_round32 || !other.care_about_round32) && 259bf215546Sopenharmony_ci (care_about_round16_64 || !other.care_about_round16_64); 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci}; 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_cistruct wait_imm { 264bf215546Sopenharmony_ci static const uint8_t unset_counter = 0xff; 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci uint8_t vm; 267bf215546Sopenharmony_ci uint8_t exp; 268bf215546Sopenharmony_ci uint8_t lgkm; 269bf215546Sopenharmony_ci uint8_t vs; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci wait_imm(); 272bf215546Sopenharmony_ci wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_); 273bf215546Sopenharmony_ci wait_imm(enum amd_gfx_level chip, uint16_t packed); 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci uint16_t pack(enum amd_gfx_level chip) const; 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci bool combine(const wait_imm& other); 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci bool empty() const; 280bf215546Sopenharmony_ci}; 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ciconstexpr Format 283bf215546Sopenharmony_ciasVOP3(Format format) 284bf215546Sopenharmony_ci{ 285bf215546Sopenharmony_ci return (Format)((uint32_t)Format::VOP3 | (uint32_t)format); 286bf215546Sopenharmony_ci}; 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ciconstexpr Format 289bf215546Sopenharmony_ciasSDWA(Format format) 290bf215546Sopenharmony_ci{ 291bf215546Sopenharmony_ci assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC); 292bf215546Sopenharmony_ci return (Format)((uint32_t)Format::SDWA | (uint32_t)format); 293bf215546Sopenharmony_ci} 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ciconstexpr Format 296bf215546Sopenharmony_ciwithoutDPP(Format format) 297bf215546Sopenharmony_ci{ 298bf215546Sopenharmony_ci return (Format)((uint32_t)format & ~((uint32_t)Format::DPP16 | (uint32_t)Format::DPP8)); 299bf215546Sopenharmony_ci} 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_cienum class RegType { 302bf215546Sopenharmony_ci none = 0, 303bf215546Sopenharmony_ci sgpr, 304bf215546Sopenharmony_ci vgpr, 305bf215546Sopenharmony_ci linear_vgpr, 306bf215546Sopenharmony_ci}; 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_cistruct RegClass { 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci enum RC : uint8_t { 311bf215546Sopenharmony_ci s1 = 1, 312bf215546Sopenharmony_ci s2 = 2, 313bf215546Sopenharmony_ci s3 = 3, 314bf215546Sopenharmony_ci s4 = 4, 315bf215546Sopenharmony_ci s6 = 6, 316bf215546Sopenharmony_ci s8 = 8, 317bf215546Sopenharmony_ci s16 = 16, 318bf215546Sopenharmony_ci v1 = s1 | (1 << 5), 319bf215546Sopenharmony_ci v2 = s2 | (1 << 5), 320bf215546Sopenharmony_ci v3 = s3 | (1 << 5), 321bf215546Sopenharmony_ci v4 = s4 | (1 << 5), 322bf215546Sopenharmony_ci v5 = 5 | (1 << 5), 323bf215546Sopenharmony_ci v6 = 6 | (1 << 5), 324bf215546Sopenharmony_ci v7 = 7 | (1 << 5), 325bf215546Sopenharmony_ci v8 = 8 | (1 << 5), 326bf215546Sopenharmony_ci /* byte-sized register class */ 327bf215546Sopenharmony_ci v1b = v1 | (1 << 7), 328bf215546Sopenharmony_ci v2b = v2 | (1 << 7), 329bf215546Sopenharmony_ci v3b = v3 | (1 << 7), 330bf215546Sopenharmony_ci v4b = v4 | (1 << 7), 331bf215546Sopenharmony_ci v6b = v6 | (1 << 7), 332bf215546Sopenharmony_ci v8b = v8 | (1 << 7), 333bf215546Sopenharmony_ci /* these are used for WWM and spills to vgpr */ 334bf215546Sopenharmony_ci v1_linear = v1 | (1 << 6), 335bf215546Sopenharmony_ci v2_linear = v2 | (1 << 6), 336bf215546Sopenharmony_ci }; 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci RegClass() = default; 339bf215546Sopenharmony_ci constexpr RegClass(RC rc_) : rc(rc_) {} 340bf215546Sopenharmony_ci constexpr RegClass(RegType type, unsigned size) 341bf215546Sopenharmony_ci : rc((RC)((type == RegType::vgpr ? 1 << 5 : 0) | size)) 342bf215546Sopenharmony_ci {} 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci constexpr operator RC() const { return rc; } 345bf215546Sopenharmony_ci explicit operator bool() = delete; 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; } 348bf215546Sopenharmony_ci constexpr bool is_linear_vgpr() const { return rc & (1 << 6); }; 349bf215546Sopenharmony_ci constexpr bool is_subdword() const { return rc & (1 << 7); } 350bf215546Sopenharmony_ci constexpr unsigned bytes() const { return ((unsigned)rc & 0x1F) * (is_subdword() ? 1 : 4); } 351bf215546Sopenharmony_ci // TODO: use size() less in favor of bytes() 352bf215546Sopenharmony_ci constexpr unsigned size() const { return (bytes() + 3) >> 2; } 353bf215546Sopenharmony_ci constexpr bool is_linear() const { return rc <= RC::s16 || is_linear_vgpr(); } 354bf215546Sopenharmony_ci constexpr RegClass as_linear() const { return RegClass((RC)(rc | (1 << 6))); } 355bf215546Sopenharmony_ci constexpr RegClass as_subdword() const { return RegClass((RC)(rc | 1 << 7)); } 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci static constexpr RegClass get(RegType type, unsigned bytes) 358bf215546Sopenharmony_ci { 359bf215546Sopenharmony_ci if (type == RegType::sgpr) { 360bf215546Sopenharmony_ci return RegClass(type, DIV_ROUND_UP(bytes, 4u)); 361bf215546Sopenharmony_ci } else { 362bf215546Sopenharmony_ci return bytes % 4u ? RegClass(type, bytes).as_subdword() : RegClass(type, bytes / 4u); 363bf215546Sopenharmony_ci } 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci constexpr RegClass resize(unsigned bytes) const 367bf215546Sopenharmony_ci { 368bf215546Sopenharmony_ci if (is_linear_vgpr()) { 369bf215546Sopenharmony_ci assert(bytes % 4u == 0); 370bf215546Sopenharmony_ci return get(RegType::vgpr, bytes).as_linear(); 371bf215546Sopenharmony_ci } 372bf215546Sopenharmony_ci return get(type(), bytes); 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ciprivate: 376bf215546Sopenharmony_ci RC rc; 377bf215546Sopenharmony_ci}; 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci/* transitional helper expressions */ 380bf215546Sopenharmony_cistatic constexpr RegClass s1{RegClass::s1}; 381bf215546Sopenharmony_cistatic constexpr RegClass s2{RegClass::s2}; 382bf215546Sopenharmony_cistatic constexpr RegClass s3{RegClass::s3}; 383bf215546Sopenharmony_cistatic constexpr RegClass s4{RegClass::s4}; 384bf215546Sopenharmony_cistatic constexpr RegClass s8{RegClass::s8}; 385bf215546Sopenharmony_cistatic constexpr RegClass s16{RegClass::s16}; 386bf215546Sopenharmony_cistatic constexpr RegClass v1{RegClass::v1}; 387bf215546Sopenharmony_cistatic constexpr RegClass v2{RegClass::v2}; 388bf215546Sopenharmony_cistatic constexpr RegClass v3{RegClass::v3}; 389bf215546Sopenharmony_cistatic constexpr RegClass v4{RegClass::v4}; 390bf215546Sopenharmony_cistatic constexpr RegClass v5{RegClass::v5}; 391bf215546Sopenharmony_cistatic constexpr RegClass v6{RegClass::v6}; 392bf215546Sopenharmony_cistatic constexpr RegClass v7{RegClass::v7}; 393bf215546Sopenharmony_cistatic constexpr RegClass v8{RegClass::v8}; 394bf215546Sopenharmony_cistatic constexpr RegClass v1b{RegClass::v1b}; 395bf215546Sopenharmony_cistatic constexpr RegClass v2b{RegClass::v2b}; 396bf215546Sopenharmony_cistatic constexpr RegClass v3b{RegClass::v3b}; 397bf215546Sopenharmony_cistatic constexpr RegClass v4b{RegClass::v4b}; 398bf215546Sopenharmony_cistatic constexpr RegClass v6b{RegClass::v6b}; 399bf215546Sopenharmony_cistatic constexpr RegClass v8b{RegClass::v8b}; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci/** 402bf215546Sopenharmony_ci * Temp Class 403bf215546Sopenharmony_ci * Each temporary virtual register has a 404bf215546Sopenharmony_ci * register class (i.e. size and type) 405bf215546Sopenharmony_ci * and SSA id. 406bf215546Sopenharmony_ci */ 407bf215546Sopenharmony_cistruct Temp { 408bf215546Sopenharmony_ci Temp() noexcept : id_(0), reg_class(0) {} 409bf215546Sopenharmony_ci constexpr Temp(uint32_t id, RegClass cls) noexcept : id_(id), reg_class(uint8_t(cls)) {} 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci constexpr uint32_t id() const noexcept { return id_; } 412bf215546Sopenharmony_ci constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; } 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci constexpr unsigned bytes() const noexcept { return regClass().bytes(); } 415bf215546Sopenharmony_ci constexpr unsigned size() const noexcept { return regClass().size(); } 416bf215546Sopenharmony_ci constexpr RegType type() const noexcept { return regClass().type(); } 417bf215546Sopenharmony_ci constexpr bool is_linear() const noexcept { return regClass().is_linear(); } 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci constexpr bool operator<(Temp other) const noexcept { return id() < other.id(); } 420bf215546Sopenharmony_ci constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); } 421bf215546Sopenharmony_ci constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); } 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ciprivate: 424bf215546Sopenharmony_ci uint32_t id_ : 24; 425bf215546Sopenharmony_ci uint32_t reg_class : 8; 426bf215546Sopenharmony_ci}; 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci/** 429bf215546Sopenharmony_ci * PhysReg 430bf215546Sopenharmony_ci * Represents the physical register for each 431bf215546Sopenharmony_ci * Operand and Definition. 432bf215546Sopenharmony_ci */ 433bf215546Sopenharmony_cistruct PhysReg { 434bf215546Sopenharmony_ci constexpr PhysReg() = default; 435bf215546Sopenharmony_ci explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {} 436bf215546Sopenharmony_ci constexpr unsigned reg() const { return reg_b >> 2; } 437bf215546Sopenharmony_ci constexpr unsigned byte() const { return reg_b & 0x3; } 438bf215546Sopenharmony_ci constexpr operator unsigned() const { return reg(); } 439bf215546Sopenharmony_ci constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; } 440bf215546Sopenharmony_ci constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; } 441bf215546Sopenharmony_ci constexpr bool operator<(PhysReg other) const { return reg_b < other.reg_b; } 442bf215546Sopenharmony_ci constexpr PhysReg advance(int bytes) const 443bf215546Sopenharmony_ci { 444bf215546Sopenharmony_ci PhysReg res = *this; 445bf215546Sopenharmony_ci res.reg_b += bytes; 446bf215546Sopenharmony_ci return res; 447bf215546Sopenharmony_ci } 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci uint16_t reg_b = 0; 450bf215546Sopenharmony_ci}; 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci/* helper expressions for special registers */ 453bf215546Sopenharmony_cistatic constexpr PhysReg m0{124}; 454bf215546Sopenharmony_cistatic constexpr PhysReg flat_scr_lo{102}; /* GFX8-GFX9, encoded differently on GFX6-7 */ 455bf215546Sopenharmony_cistatic constexpr PhysReg flat_scr_hi{103}; /* GFX8-GFX9, encoded differently on GFX6-7 */ 456bf215546Sopenharmony_cistatic constexpr PhysReg vcc{106}; 457bf215546Sopenharmony_cistatic constexpr PhysReg vcc_hi{107}; 458bf215546Sopenharmony_cistatic constexpr PhysReg tba{108}; /* GFX6-GFX8 */ 459bf215546Sopenharmony_cistatic constexpr PhysReg tma{110}; /* GFX6-GFX8 */ 460bf215546Sopenharmony_cistatic constexpr PhysReg ttmp0{112}; 461bf215546Sopenharmony_cistatic constexpr PhysReg ttmp1{113}; 462bf215546Sopenharmony_cistatic constexpr PhysReg ttmp2{114}; 463bf215546Sopenharmony_cistatic constexpr PhysReg ttmp3{115}; 464bf215546Sopenharmony_cistatic constexpr PhysReg ttmp4{116}; 465bf215546Sopenharmony_cistatic constexpr PhysReg ttmp5{117}; 466bf215546Sopenharmony_cistatic constexpr PhysReg ttmp6{118}; 467bf215546Sopenharmony_cistatic constexpr PhysReg ttmp7{119}; 468bf215546Sopenharmony_cistatic constexpr PhysReg ttmp8{120}; 469bf215546Sopenharmony_cistatic constexpr PhysReg ttmp9{121}; 470bf215546Sopenharmony_cistatic constexpr PhysReg ttmp10{122}; 471bf215546Sopenharmony_cistatic constexpr PhysReg ttmp11{123}; 472bf215546Sopenharmony_cistatic constexpr PhysReg sgpr_null{125}; /* GFX10+ */ 473bf215546Sopenharmony_cistatic constexpr PhysReg exec{126}; 474bf215546Sopenharmony_cistatic constexpr PhysReg exec_lo{126}; 475bf215546Sopenharmony_cistatic constexpr PhysReg exec_hi{127}; 476bf215546Sopenharmony_cistatic constexpr PhysReg vccz{251}; 477bf215546Sopenharmony_cistatic constexpr PhysReg execz{252}; 478bf215546Sopenharmony_cistatic constexpr PhysReg scc{253}; 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci/** 481bf215546Sopenharmony_ci * Operand Class 482bf215546Sopenharmony_ci * Initially, each Operand refers to either 483bf215546Sopenharmony_ci * a temporary virtual register 484bf215546Sopenharmony_ci * or to a constant value 485bf215546Sopenharmony_ci * Temporary registers get mapped to physical register during RA 486bf215546Sopenharmony_ci * Constant values are inlined into the instruction sequence. 487bf215546Sopenharmony_ci */ 488bf215546Sopenharmony_ciclass Operand final { 489bf215546Sopenharmony_cipublic: 490bf215546Sopenharmony_ci constexpr Operand() 491bf215546Sopenharmony_ci : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false), 492bf215546Sopenharmony_ci isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), is16bit_(false), 493bf215546Sopenharmony_ci is24bit_(false), signext(false) 494bf215546Sopenharmony_ci {} 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci explicit Operand(Temp r) noexcept 497bf215546Sopenharmony_ci { 498bf215546Sopenharmony_ci data_.temp = r; 499bf215546Sopenharmony_ci if (r.id()) { 500bf215546Sopenharmony_ci isTemp_ = true; 501bf215546Sopenharmony_ci } else { 502bf215546Sopenharmony_ci isUndef_ = true; 503bf215546Sopenharmony_ci setFixed(PhysReg{128}); 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci }; 506bf215546Sopenharmony_ci explicit Operand(Temp r, PhysReg reg) noexcept 507bf215546Sopenharmony_ci { 508bf215546Sopenharmony_ci assert(r.id()); /* Don't allow fixing an undef to a register */ 509bf215546Sopenharmony_ci data_.temp = r; 510bf215546Sopenharmony_ci isTemp_ = true; 511bf215546Sopenharmony_ci setFixed(reg); 512bf215546Sopenharmony_ci }; 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_ci /* 8-bit constant */ 515bf215546Sopenharmony_ci static Operand c8(uint8_t v) noexcept 516bf215546Sopenharmony_ci { 517bf215546Sopenharmony_ci /* 8-bit constants are only used for copies and copies from any 8-bit 518bf215546Sopenharmony_ci * constant can be implemented with a SDWA v_mul_u32_u24. So consider all 519bf215546Sopenharmony_ci * to be inline constants. */ 520bf215546Sopenharmony_ci Operand op; 521bf215546Sopenharmony_ci op.control_ = 0; 522bf215546Sopenharmony_ci op.data_.i = v; 523bf215546Sopenharmony_ci op.isConstant_ = true; 524bf215546Sopenharmony_ci op.constSize = 0; 525bf215546Sopenharmony_ci op.setFixed(PhysReg{0u}); 526bf215546Sopenharmony_ci return op; 527bf215546Sopenharmony_ci }; 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci /* 16-bit constant */ 530bf215546Sopenharmony_ci static Operand c16(uint16_t v) noexcept 531bf215546Sopenharmony_ci { 532bf215546Sopenharmony_ci Operand op; 533bf215546Sopenharmony_ci op.control_ = 0; 534bf215546Sopenharmony_ci op.data_.i = v; 535bf215546Sopenharmony_ci op.isConstant_ = true; 536bf215546Sopenharmony_ci op.constSize = 1; 537bf215546Sopenharmony_ci if (v <= 64) 538bf215546Sopenharmony_ci op.setFixed(PhysReg{128u + v}); 539bf215546Sopenharmony_ci else if (v >= 0xFFF0) /* [-16 .. -1] */ 540bf215546Sopenharmony_ci op.setFixed(PhysReg{(unsigned)(192 - (int16_t)v)}); 541bf215546Sopenharmony_ci else if (v == 0x3800) /* 0.5 */ 542bf215546Sopenharmony_ci op.setFixed(PhysReg{240}); 543bf215546Sopenharmony_ci else if (v == 0xB800) /* -0.5 */ 544bf215546Sopenharmony_ci op.setFixed(PhysReg{241}); 545bf215546Sopenharmony_ci else if (v == 0x3C00) /* 1.0 */ 546bf215546Sopenharmony_ci op.setFixed(PhysReg{242}); 547bf215546Sopenharmony_ci else if (v == 0xBC00) /* -1.0 */ 548bf215546Sopenharmony_ci op.setFixed(PhysReg{243}); 549bf215546Sopenharmony_ci else if (v == 0x4000) /* 2.0 */ 550bf215546Sopenharmony_ci op.setFixed(PhysReg{244}); 551bf215546Sopenharmony_ci else if (v == 0xC000) /* -2.0 */ 552bf215546Sopenharmony_ci op.setFixed(PhysReg{245}); 553bf215546Sopenharmony_ci else if (v == 0x4400) /* 4.0 */ 554bf215546Sopenharmony_ci op.setFixed(PhysReg{246}); 555bf215546Sopenharmony_ci else if (v == 0xC400) /* -4.0 */ 556bf215546Sopenharmony_ci op.setFixed(PhysReg{247}); 557bf215546Sopenharmony_ci else if (v == 0x3118) /* 1/2 PI */ 558bf215546Sopenharmony_ci op.setFixed(PhysReg{248}); 559bf215546Sopenharmony_ci else /* Literal Constant */ 560bf215546Sopenharmony_ci op.setFixed(PhysReg{255}); 561bf215546Sopenharmony_ci return op; 562bf215546Sopenharmony_ci } 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci /* 32-bit constant */ 565bf215546Sopenharmony_ci static Operand c32(uint32_t v) noexcept { return c32_or_c64(v, false); } 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci /* 64-bit constant */ 568bf215546Sopenharmony_ci static Operand c64(uint64_t v) noexcept 569bf215546Sopenharmony_ci { 570bf215546Sopenharmony_ci Operand op; 571bf215546Sopenharmony_ci op.control_ = 0; 572bf215546Sopenharmony_ci op.isConstant_ = true; 573bf215546Sopenharmony_ci op.constSize = 3; 574bf215546Sopenharmony_ci if (v <= 64) { 575bf215546Sopenharmony_ci op.data_.i = (uint32_t)v; 576bf215546Sopenharmony_ci op.setFixed(PhysReg{128 + (uint32_t)v}); 577bf215546Sopenharmony_ci } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */ 578bf215546Sopenharmony_ci op.data_.i = (uint32_t)v; 579bf215546Sopenharmony_ci op.setFixed(PhysReg{192 - (uint32_t)v}); 580bf215546Sopenharmony_ci } else if (v == 0x3FE0000000000000) { /* 0.5 */ 581bf215546Sopenharmony_ci op.data_.i = 0x3f000000; 582bf215546Sopenharmony_ci op.setFixed(PhysReg{240}); 583bf215546Sopenharmony_ci } else if (v == 0xBFE0000000000000) { /* -0.5 */ 584bf215546Sopenharmony_ci op.data_.i = 0xbf000000; 585bf215546Sopenharmony_ci op.setFixed(PhysReg{241}); 586bf215546Sopenharmony_ci } else if (v == 0x3FF0000000000000) { /* 1.0 */ 587bf215546Sopenharmony_ci op.data_.i = 0x3f800000; 588bf215546Sopenharmony_ci op.setFixed(PhysReg{242}); 589bf215546Sopenharmony_ci } else if (v == 0xBFF0000000000000) { /* -1.0 */ 590bf215546Sopenharmony_ci op.data_.i = 0xbf800000; 591bf215546Sopenharmony_ci op.setFixed(PhysReg{243}); 592bf215546Sopenharmony_ci } else if (v == 0x4000000000000000) { /* 2.0 */ 593bf215546Sopenharmony_ci op.data_.i = 0x40000000; 594bf215546Sopenharmony_ci op.setFixed(PhysReg{244}); 595bf215546Sopenharmony_ci } else if (v == 0xC000000000000000) { /* -2.0 */ 596bf215546Sopenharmony_ci op.data_.i = 0xc0000000; 597bf215546Sopenharmony_ci op.setFixed(PhysReg{245}); 598bf215546Sopenharmony_ci } else if (v == 0x4010000000000000) { /* 4.0 */ 599bf215546Sopenharmony_ci op.data_.i = 0x40800000; 600bf215546Sopenharmony_ci op.setFixed(PhysReg{246}); 601bf215546Sopenharmony_ci } else if (v == 0xC010000000000000) { /* -4.0 */ 602bf215546Sopenharmony_ci op.data_.i = 0xc0800000; 603bf215546Sopenharmony_ci op.setFixed(PhysReg{247}); 604bf215546Sopenharmony_ci } else { /* Literal Constant: we don't know if it is a long or double.*/ 605bf215546Sopenharmony_ci op.signext = v >> 63; 606bf215546Sopenharmony_ci op.data_.i = v & 0xffffffffu; 607bf215546Sopenharmony_ci op.setFixed(PhysReg{255}); 608bf215546Sopenharmony_ci assert(op.constantValue64() == v && 609bf215546Sopenharmony_ci "attempt to create a unrepresentable 64-bit literal constant"); 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci return op; 612bf215546Sopenharmony_ci } 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci /* 32-bit constant stored as a 32-bit or 64-bit operand */ 615bf215546Sopenharmony_ci static Operand c32_or_c64(uint32_t v, bool is64bit) noexcept 616bf215546Sopenharmony_ci { 617bf215546Sopenharmony_ci Operand op; 618bf215546Sopenharmony_ci op.control_ = 0; 619bf215546Sopenharmony_ci op.data_.i = v; 620bf215546Sopenharmony_ci op.isConstant_ = true; 621bf215546Sopenharmony_ci op.constSize = is64bit ? 3 : 2; 622bf215546Sopenharmony_ci if (v <= 64) 623bf215546Sopenharmony_ci op.setFixed(PhysReg{128 + v}); 624bf215546Sopenharmony_ci else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */ 625bf215546Sopenharmony_ci op.setFixed(PhysReg{192 - v}); 626bf215546Sopenharmony_ci else if (v == 0x3f000000) /* 0.5 */ 627bf215546Sopenharmony_ci op.setFixed(PhysReg{240}); 628bf215546Sopenharmony_ci else if (v == 0xbf000000) /* -0.5 */ 629bf215546Sopenharmony_ci op.setFixed(PhysReg{241}); 630bf215546Sopenharmony_ci else if (v == 0x3f800000) /* 1.0 */ 631bf215546Sopenharmony_ci op.setFixed(PhysReg{242}); 632bf215546Sopenharmony_ci else if (v == 0xbf800000) /* -1.0 */ 633bf215546Sopenharmony_ci op.setFixed(PhysReg{243}); 634bf215546Sopenharmony_ci else if (v == 0x40000000) /* 2.0 */ 635bf215546Sopenharmony_ci op.setFixed(PhysReg{244}); 636bf215546Sopenharmony_ci else if (v == 0xc0000000) /* -2.0 */ 637bf215546Sopenharmony_ci op.setFixed(PhysReg{245}); 638bf215546Sopenharmony_ci else if (v == 0x40800000) /* 4.0 */ 639bf215546Sopenharmony_ci op.setFixed(PhysReg{246}); 640bf215546Sopenharmony_ci else if (v == 0xc0800000) /* -4.0 */ 641bf215546Sopenharmony_ci op.setFixed(PhysReg{247}); 642bf215546Sopenharmony_ci else { /* Literal Constant */ 643bf215546Sopenharmony_ci assert(!is64bit && "attempt to create a 64-bit literal constant"); 644bf215546Sopenharmony_ci op.setFixed(PhysReg{255}); 645bf215546Sopenharmony_ci } 646bf215546Sopenharmony_ci return op; 647bf215546Sopenharmony_ci } 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci static Operand literal32(uint32_t v) noexcept 650bf215546Sopenharmony_ci { 651bf215546Sopenharmony_ci Operand op; 652bf215546Sopenharmony_ci op.control_ = 0; 653bf215546Sopenharmony_ci op.data_.i = v; 654bf215546Sopenharmony_ci op.isConstant_ = true; 655bf215546Sopenharmony_ci op.constSize = 2; 656bf215546Sopenharmony_ci op.setFixed(PhysReg{255}); 657bf215546Sopenharmony_ci return op; 658bf215546Sopenharmony_ci } 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_ci explicit Operand(RegClass type) noexcept 661bf215546Sopenharmony_ci { 662bf215546Sopenharmony_ci isUndef_ = true; 663bf215546Sopenharmony_ci data_.temp = Temp(0, type); 664bf215546Sopenharmony_ci setFixed(PhysReg{128}); 665bf215546Sopenharmony_ci }; 666bf215546Sopenharmony_ci explicit Operand(PhysReg reg, RegClass type) noexcept 667bf215546Sopenharmony_ci { 668bf215546Sopenharmony_ci data_.temp = Temp(0, type); 669bf215546Sopenharmony_ci setFixed(reg); 670bf215546Sopenharmony_ci } 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci static Operand zero(unsigned bytes = 4) noexcept 673bf215546Sopenharmony_ci { 674bf215546Sopenharmony_ci if (bytes == 8) 675bf215546Sopenharmony_ci return Operand::c64(0); 676bf215546Sopenharmony_ci else if (bytes == 4) 677bf215546Sopenharmony_ci return Operand::c32(0); 678bf215546Sopenharmony_ci else if (bytes == 2) 679bf215546Sopenharmony_ci return Operand::c16(0); 680bf215546Sopenharmony_ci assert(bytes == 1); 681bf215546Sopenharmony_ci return Operand::c8(0); 682bf215546Sopenharmony_ci } 683bf215546Sopenharmony_ci 684bf215546Sopenharmony_ci /* This is useful over the constructors when you want to take a gfx level 685bf215546Sopenharmony_ci * for 1/2 PI or an unknown operand size. 686bf215546Sopenharmony_ci */ 687bf215546Sopenharmony_ci static Operand get_const(enum amd_gfx_level chip, uint64_t val, unsigned bytes) 688bf215546Sopenharmony_ci { 689bf215546Sopenharmony_ci if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) { 690bf215546Sopenharmony_ci /* 1/2 PI can be an inline constant on GFX8+ */ 691bf215546Sopenharmony_ci Operand op = Operand::c32(val); 692bf215546Sopenharmony_ci op.setFixed(PhysReg{248}); 693bf215546Sopenharmony_ci return op; 694bf215546Sopenharmony_ci } 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci if (bytes == 8) 697bf215546Sopenharmony_ci return Operand::c64(val); 698bf215546Sopenharmony_ci else if (bytes == 4) 699bf215546Sopenharmony_ci return Operand::c32(val); 700bf215546Sopenharmony_ci else if (bytes == 2) 701bf215546Sopenharmony_ci return Operand::c16(val); 702bf215546Sopenharmony_ci assert(bytes == 1); 703bf215546Sopenharmony_ci return Operand::c8(val); 704bf215546Sopenharmony_ci } 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false, 707bf215546Sopenharmony_ci bool sext = false) 708bf215546Sopenharmony_ci { 709bf215546Sopenharmony_ci if (bytes <= 4) 710bf215546Sopenharmony_ci return true; 711bf215546Sopenharmony_ci 712bf215546Sopenharmony_ci if (zext && (val & 0xFFFFFFFF00000000) == 0x0000000000000000) 713bf215546Sopenharmony_ci return true; 714bf215546Sopenharmony_ci uint64_t upper33 = val & 0xFFFFFFFF80000000; 715bf215546Sopenharmony_ci if (sext && (upper33 == 0xFFFFFFFF80000000 || upper33 == 0)) 716bf215546Sopenharmony_ci return true; 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci return val >= 0xFFFFFFFFFFFFFFF0 || val <= 64 || /* [-16 .. 64] */ 719bf215546Sopenharmony_ci val == 0x3FE0000000000000 || /* 0.5 */ 720bf215546Sopenharmony_ci val == 0xBFE0000000000000 || /* -0.5 */ 721bf215546Sopenharmony_ci val == 0x3FF0000000000000 || /* 1.0 */ 722bf215546Sopenharmony_ci val == 0xBFF0000000000000 || /* -1.0 */ 723bf215546Sopenharmony_ci val == 0x4000000000000000 || /* 2.0 */ 724bf215546Sopenharmony_ci val == 0xC000000000000000 || /* -2.0 */ 725bf215546Sopenharmony_ci val == 0x4010000000000000 || /* 4.0 */ 726bf215546Sopenharmony_ci val == 0xC010000000000000; /* -4.0 */ 727bf215546Sopenharmony_ci } 728bf215546Sopenharmony_ci 729bf215546Sopenharmony_ci constexpr bool isTemp() const noexcept { return isTemp_; } 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci constexpr void setTemp(Temp t) noexcept 732bf215546Sopenharmony_ci { 733bf215546Sopenharmony_ci assert(!isConstant_); 734bf215546Sopenharmony_ci isTemp_ = true; 735bf215546Sopenharmony_ci data_.temp = t; 736bf215546Sopenharmony_ci } 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci constexpr Temp getTemp() const noexcept { return data_.temp; } 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci constexpr uint32_t tempId() const noexcept { return data_.temp.id(); } 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci constexpr bool hasRegClass() const noexcept { return isTemp() || isUndefined(); } 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci constexpr RegClass regClass() const noexcept { return data_.temp.regClass(); } 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci constexpr unsigned bytes() const noexcept 747bf215546Sopenharmony_ci { 748bf215546Sopenharmony_ci if (isConstant()) 749bf215546Sopenharmony_ci return 1 << constSize; 750bf215546Sopenharmony_ci else 751bf215546Sopenharmony_ci return data_.temp.bytes(); 752bf215546Sopenharmony_ci } 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci constexpr unsigned size() const noexcept 755bf215546Sopenharmony_ci { 756bf215546Sopenharmony_ci if (isConstant()) 757bf215546Sopenharmony_ci return constSize > 2 ? 2 : 1; 758bf215546Sopenharmony_ci else 759bf215546Sopenharmony_ci return data_.temp.size(); 760bf215546Sopenharmony_ci } 761bf215546Sopenharmony_ci 762bf215546Sopenharmony_ci constexpr bool isFixed() const noexcept { return isFixed_; } 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci constexpr PhysReg physReg() const noexcept { return reg_; } 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci constexpr void setFixed(PhysReg reg) noexcept 767bf215546Sopenharmony_ci { 768bf215546Sopenharmony_ci isFixed_ = reg != unsigned(-1); 769bf215546Sopenharmony_ci reg_ = reg; 770bf215546Sopenharmony_ci } 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci constexpr bool isConstant() const noexcept { return isConstant_; } 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_ci constexpr bool isLiteral() const noexcept { return isConstant() && reg_ == 255; } 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci constexpr bool isUndefined() const noexcept { return isUndef_; } 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci constexpr uint32_t constantValue() const noexcept { return data_.i; } 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci constexpr bool constantEquals(uint32_t cmp) const noexcept 781bf215546Sopenharmony_ci { 782bf215546Sopenharmony_ci return isConstant() && constantValue() == cmp; 783bf215546Sopenharmony_ci } 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci constexpr uint64_t constantValue64() const noexcept 786bf215546Sopenharmony_ci { 787bf215546Sopenharmony_ci if (constSize == 3) { 788bf215546Sopenharmony_ci if (reg_ <= 192) 789bf215546Sopenharmony_ci return reg_ - 128; 790bf215546Sopenharmony_ci else if (reg_ <= 208) 791bf215546Sopenharmony_ci return 0xFFFFFFFFFFFFFFFF - (reg_ - 193); 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci switch (reg_) { 794bf215546Sopenharmony_ci case 240: return 0x3FE0000000000000; 795bf215546Sopenharmony_ci case 241: return 0xBFE0000000000000; 796bf215546Sopenharmony_ci case 242: return 0x3FF0000000000000; 797bf215546Sopenharmony_ci case 243: return 0xBFF0000000000000; 798bf215546Sopenharmony_ci case 244: return 0x4000000000000000; 799bf215546Sopenharmony_ci case 245: return 0xC000000000000000; 800bf215546Sopenharmony_ci case 246: return 0x4010000000000000; 801bf215546Sopenharmony_ci case 247: return 0xC010000000000000; 802bf215546Sopenharmony_ci case 255: 803bf215546Sopenharmony_ci return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i; 804bf215546Sopenharmony_ci } 805bf215546Sopenharmony_ci unreachable("invalid register for 64-bit constant"); 806bf215546Sopenharmony_ci } else { 807bf215546Sopenharmony_ci return data_.i; 808bf215546Sopenharmony_ci } 809bf215546Sopenharmony_ci } 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci /* Value if this were used with vop3/opsel or vop3p. */ 812bf215546Sopenharmony_ci constexpr uint16_t constantValue16(bool opsel) const noexcept 813bf215546Sopenharmony_ci { 814bf215546Sopenharmony_ci assert(bytes() == 2 || bytes() == 4); 815bf215546Sopenharmony_ci if (opsel) { 816bf215546Sopenharmony_ci if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral()) 817bf215546Sopenharmony_ci return int16_t(data_.i) >> 16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */ 818bf215546Sopenharmony_ci else 819bf215546Sopenharmony_ci return data_.i >> 16; 820bf215546Sopenharmony_ci } 821bf215546Sopenharmony_ci return data_.i; 822bf215546Sopenharmony_ci } 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci constexpr bool isOfType(RegType type) const noexcept 825bf215546Sopenharmony_ci { 826bf215546Sopenharmony_ci return hasRegClass() && regClass().type() == type; 827bf215546Sopenharmony_ci } 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci /* Indicates that the killed operand's live range intersects with the 830bf215546Sopenharmony_ci * instruction's definitions. Unlike isKill() and isFirstKill(), this is 831bf215546Sopenharmony_ci * not set by liveness analysis. */ 832bf215546Sopenharmony_ci constexpr void setLateKill(bool flag) noexcept { isLateKill_ = flag; } 833bf215546Sopenharmony_ci 834bf215546Sopenharmony_ci constexpr bool isLateKill() const noexcept { return isLateKill_; } 835bf215546Sopenharmony_ci 836bf215546Sopenharmony_ci constexpr void setKill(bool flag) noexcept 837bf215546Sopenharmony_ci { 838bf215546Sopenharmony_ci isKill_ = flag; 839bf215546Sopenharmony_ci if (!flag) 840bf215546Sopenharmony_ci setFirstKill(false); 841bf215546Sopenharmony_ci } 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); } 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_ci constexpr void setFirstKill(bool flag) noexcept 846bf215546Sopenharmony_ci { 847bf215546Sopenharmony_ci isFirstKill_ = flag; 848bf215546Sopenharmony_ci if (flag) 849bf215546Sopenharmony_ci setKill(flag); 850bf215546Sopenharmony_ci } 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci /* When there are multiple operands killing the same temporary, 853bf215546Sopenharmony_ci * isFirstKill() is only returns true for the first one. */ 854bf215546Sopenharmony_ci constexpr bool isFirstKill() const noexcept { return isFirstKill_; } 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci constexpr bool isKillBeforeDef() const noexcept { return isKill() && !isLateKill(); } 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); } 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci constexpr bool operator==(Operand other) const noexcept 861bf215546Sopenharmony_ci { 862bf215546Sopenharmony_ci if (other.size() != size()) 863bf215546Sopenharmony_ci return false; 864bf215546Sopenharmony_ci if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef()) 865bf215546Sopenharmony_ci return false; 866bf215546Sopenharmony_ci if (isFixed() && other.isFixed() && physReg() != other.physReg()) 867bf215546Sopenharmony_ci return false; 868bf215546Sopenharmony_ci if (isLiteral()) 869bf215546Sopenharmony_ci return other.isLiteral() && other.constantValue() == constantValue(); 870bf215546Sopenharmony_ci else if (isConstant()) 871bf215546Sopenharmony_ci return other.isConstant() && other.physReg() == physReg(); 872bf215546Sopenharmony_ci else if (isUndefined()) 873bf215546Sopenharmony_ci return other.isUndefined() && other.regClass() == regClass(); 874bf215546Sopenharmony_ci else 875bf215546Sopenharmony_ci return other.isTemp() && other.getTemp() == getTemp(); 876bf215546Sopenharmony_ci } 877bf215546Sopenharmony_ci 878bf215546Sopenharmony_ci constexpr bool operator!=(Operand other) const noexcept { return !operator==(other); } 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_ci constexpr void set16bit(bool flag) noexcept { is16bit_ = flag; } 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci constexpr bool is16bit() const noexcept { return is16bit_; } 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci constexpr void set24bit(bool flag) noexcept { is24bit_ = flag; } 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci constexpr bool is24bit() const noexcept { return is24bit_; } 887bf215546Sopenharmony_ci 888bf215546Sopenharmony_ciprivate: 889bf215546Sopenharmony_ci union { 890bf215546Sopenharmony_ci Temp temp; 891bf215546Sopenharmony_ci uint32_t i; 892bf215546Sopenharmony_ci float f; 893bf215546Sopenharmony_ci } data_ = {Temp(0, s1)}; 894bf215546Sopenharmony_ci PhysReg reg_; 895bf215546Sopenharmony_ci union { 896bf215546Sopenharmony_ci struct { 897bf215546Sopenharmony_ci uint8_t isTemp_ : 1; 898bf215546Sopenharmony_ci uint8_t isFixed_ : 1; 899bf215546Sopenharmony_ci uint8_t isConstant_ : 1; 900bf215546Sopenharmony_ci uint8_t isKill_ : 1; 901bf215546Sopenharmony_ci uint8_t isUndef_ : 1; 902bf215546Sopenharmony_ci uint8_t isFirstKill_ : 1; 903bf215546Sopenharmony_ci uint8_t constSize : 2; 904bf215546Sopenharmony_ci uint8_t isLateKill_ : 1; 905bf215546Sopenharmony_ci uint8_t is16bit_ : 1; 906bf215546Sopenharmony_ci uint8_t is24bit_ : 1; 907bf215546Sopenharmony_ci uint8_t signext : 1; 908bf215546Sopenharmony_ci }; 909bf215546Sopenharmony_ci /* can't initialize bit-fields in c++11, so work around using a union */ 910bf215546Sopenharmony_ci uint16_t control_ = 0; 911bf215546Sopenharmony_ci }; 912bf215546Sopenharmony_ci}; 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci/** 915bf215546Sopenharmony_ci * Definition Class 916bf215546Sopenharmony_ci * Definitions are the results of Instructions 917bf215546Sopenharmony_ci * and refer to temporary virtual registers 918bf215546Sopenharmony_ci * which are later mapped to physical registers 919bf215546Sopenharmony_ci */ 920bf215546Sopenharmony_ciclass Definition final { 921bf215546Sopenharmony_cipublic: 922bf215546Sopenharmony_ci constexpr Definition() 923bf215546Sopenharmony_ci : temp(Temp(0, s1)), reg_(0), isFixed_(0), isKill_(0), isPrecise_(0), isNUW_(0), isNoCSE_(0) 924bf215546Sopenharmony_ci {} 925bf215546Sopenharmony_ci Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {} 926bf215546Sopenharmony_ci explicit Definition(Temp tmp) noexcept : temp(tmp) {} 927bf215546Sopenharmony_ci Definition(PhysReg reg, RegClass type) noexcept : temp(Temp(0, type)) { setFixed(reg); } 928bf215546Sopenharmony_ci Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept : temp(Temp(tmpId, type)) 929bf215546Sopenharmony_ci { 930bf215546Sopenharmony_ci setFixed(reg); 931bf215546Sopenharmony_ci } 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci constexpr bool isTemp() const noexcept { return tempId() > 0; } 934bf215546Sopenharmony_ci 935bf215546Sopenharmony_ci constexpr Temp getTemp() const noexcept { return temp; } 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci constexpr uint32_t tempId() const noexcept { return temp.id(); } 938bf215546Sopenharmony_ci 939bf215546Sopenharmony_ci constexpr void setTemp(Temp t) noexcept { temp = t; } 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci void swapTemp(Definition& other) noexcept { std::swap(temp, other.temp); } 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci constexpr RegClass regClass() const noexcept { return temp.regClass(); } 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci constexpr unsigned bytes() const noexcept { return temp.bytes(); } 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci constexpr unsigned size() const noexcept { return temp.size(); } 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci constexpr bool isFixed() const noexcept { return isFixed_; } 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_ci constexpr PhysReg physReg() const noexcept { return reg_; } 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci constexpr void setFixed(PhysReg reg) noexcept 954bf215546Sopenharmony_ci { 955bf215546Sopenharmony_ci isFixed_ = 1; 956bf215546Sopenharmony_ci reg_ = reg; 957bf215546Sopenharmony_ci } 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_ci constexpr void setKill(bool flag) noexcept { isKill_ = flag; } 960bf215546Sopenharmony_ci 961bf215546Sopenharmony_ci constexpr bool isKill() const noexcept { return isKill_; } 962bf215546Sopenharmony_ci 963bf215546Sopenharmony_ci constexpr void setPrecise(bool precise) noexcept { isPrecise_ = precise; } 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci constexpr bool isPrecise() const noexcept { return isPrecise_; } 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_ci /* No Unsigned Wrap */ 968bf215546Sopenharmony_ci constexpr void setNUW(bool nuw) noexcept { isNUW_ = nuw; } 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci constexpr bool isNUW() const noexcept { return isNUW_; } 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci constexpr void setNoCSE(bool noCSE) noexcept { isNoCSE_ = noCSE; } 973bf215546Sopenharmony_ci 974bf215546Sopenharmony_ci constexpr bool isNoCSE() const noexcept { return isNoCSE_; } 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ciprivate: 977bf215546Sopenharmony_ci Temp temp = Temp(0, s1); 978bf215546Sopenharmony_ci PhysReg reg_; 979bf215546Sopenharmony_ci union { 980bf215546Sopenharmony_ci struct { 981bf215546Sopenharmony_ci uint8_t isFixed_ : 1; 982bf215546Sopenharmony_ci uint8_t isKill_ : 1; 983bf215546Sopenharmony_ci uint8_t isPrecise_ : 1; 984bf215546Sopenharmony_ci uint8_t isNUW_ : 1; 985bf215546Sopenharmony_ci uint8_t isNoCSE_ : 1; 986bf215546Sopenharmony_ci }; 987bf215546Sopenharmony_ci /* can't initialize bit-fields in c++11, so work around using a union */ 988bf215546Sopenharmony_ci uint8_t control_ = 0; 989bf215546Sopenharmony_ci }; 990bf215546Sopenharmony_ci}; 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_cistruct Block; 993bf215546Sopenharmony_cistruct Instruction; 994bf215546Sopenharmony_cistruct Pseudo_instruction; 995bf215546Sopenharmony_cistruct SOP1_instruction; 996bf215546Sopenharmony_cistruct SOP2_instruction; 997bf215546Sopenharmony_cistruct SOPK_instruction; 998bf215546Sopenharmony_cistruct SOPP_instruction; 999bf215546Sopenharmony_cistruct SOPC_instruction; 1000bf215546Sopenharmony_cistruct SMEM_instruction; 1001bf215546Sopenharmony_cistruct DS_instruction; 1002bf215546Sopenharmony_cistruct MTBUF_instruction; 1003bf215546Sopenharmony_cistruct MUBUF_instruction; 1004bf215546Sopenharmony_cistruct MIMG_instruction; 1005bf215546Sopenharmony_cistruct Export_instruction; 1006bf215546Sopenharmony_cistruct FLAT_instruction; 1007bf215546Sopenharmony_cistruct Pseudo_branch_instruction; 1008bf215546Sopenharmony_cistruct Pseudo_barrier_instruction; 1009bf215546Sopenharmony_cistruct Pseudo_reduction_instruction; 1010bf215546Sopenharmony_cistruct VOP3P_instruction; 1011bf215546Sopenharmony_cistruct VOP1_instruction; 1012bf215546Sopenharmony_cistruct VOP2_instruction; 1013bf215546Sopenharmony_cistruct VOPC_instruction; 1014bf215546Sopenharmony_cistruct VOP3_instruction; 1015bf215546Sopenharmony_cistruct Interp_instruction; 1016bf215546Sopenharmony_cistruct DPP16_instruction; 1017bf215546Sopenharmony_cistruct DPP8_instruction; 1018bf215546Sopenharmony_cistruct SDWA_instruction; 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_cistruct Instruction { 1021bf215546Sopenharmony_ci aco_opcode opcode; 1022bf215546Sopenharmony_ci Format format; 1023bf215546Sopenharmony_ci uint32_t pass_flags; 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci aco::span<Operand> operands; 1026bf215546Sopenharmony_ci aco::span<Definition> definitions; 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci constexpr bool usesModifiers() const noexcept; 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_ci constexpr bool reads_exec() const noexcept 1031bf215546Sopenharmony_ci { 1032bf215546Sopenharmony_ci for (const Operand& op : operands) { 1033bf215546Sopenharmony_ci if (op.isFixed() && op.physReg() == exec) 1034bf215546Sopenharmony_ci return true; 1035bf215546Sopenharmony_ci } 1036bf215546Sopenharmony_ci return false; 1037bf215546Sopenharmony_ci } 1038bf215546Sopenharmony_ci 1039bf215546Sopenharmony_ci Pseudo_instruction& pseudo() noexcept 1040bf215546Sopenharmony_ci { 1041bf215546Sopenharmony_ci assert(isPseudo()); 1042bf215546Sopenharmony_ci return *(Pseudo_instruction*)this; 1043bf215546Sopenharmony_ci } 1044bf215546Sopenharmony_ci const Pseudo_instruction& pseudo() const noexcept 1045bf215546Sopenharmony_ci { 1046bf215546Sopenharmony_ci assert(isPseudo()); 1047bf215546Sopenharmony_ci return *(Pseudo_instruction*)this; 1048bf215546Sopenharmony_ci } 1049bf215546Sopenharmony_ci constexpr bool isPseudo() const noexcept { return format == Format::PSEUDO; } 1050bf215546Sopenharmony_ci SOP1_instruction& sop1() noexcept 1051bf215546Sopenharmony_ci { 1052bf215546Sopenharmony_ci assert(isSOP1()); 1053bf215546Sopenharmony_ci return *(SOP1_instruction*)this; 1054bf215546Sopenharmony_ci } 1055bf215546Sopenharmony_ci const SOP1_instruction& sop1() const noexcept 1056bf215546Sopenharmony_ci { 1057bf215546Sopenharmony_ci assert(isSOP1()); 1058bf215546Sopenharmony_ci return *(SOP1_instruction*)this; 1059bf215546Sopenharmony_ci } 1060bf215546Sopenharmony_ci constexpr bool isSOP1() const noexcept { return format == Format::SOP1; } 1061bf215546Sopenharmony_ci SOP2_instruction& sop2() noexcept 1062bf215546Sopenharmony_ci { 1063bf215546Sopenharmony_ci assert(isSOP2()); 1064bf215546Sopenharmony_ci return *(SOP2_instruction*)this; 1065bf215546Sopenharmony_ci } 1066bf215546Sopenharmony_ci const SOP2_instruction& sop2() const noexcept 1067bf215546Sopenharmony_ci { 1068bf215546Sopenharmony_ci assert(isSOP2()); 1069bf215546Sopenharmony_ci return *(SOP2_instruction*)this; 1070bf215546Sopenharmony_ci } 1071bf215546Sopenharmony_ci constexpr bool isSOP2() const noexcept { return format == Format::SOP2; } 1072bf215546Sopenharmony_ci SOPK_instruction& sopk() noexcept 1073bf215546Sopenharmony_ci { 1074bf215546Sopenharmony_ci assert(isSOPK()); 1075bf215546Sopenharmony_ci return *(SOPK_instruction*)this; 1076bf215546Sopenharmony_ci } 1077bf215546Sopenharmony_ci const SOPK_instruction& sopk() const noexcept 1078bf215546Sopenharmony_ci { 1079bf215546Sopenharmony_ci assert(isSOPK()); 1080bf215546Sopenharmony_ci return *(SOPK_instruction*)this; 1081bf215546Sopenharmony_ci } 1082bf215546Sopenharmony_ci constexpr bool isSOPK() const noexcept { return format == Format::SOPK; } 1083bf215546Sopenharmony_ci SOPP_instruction& sopp() noexcept 1084bf215546Sopenharmony_ci { 1085bf215546Sopenharmony_ci assert(isSOPP()); 1086bf215546Sopenharmony_ci return *(SOPP_instruction*)this; 1087bf215546Sopenharmony_ci } 1088bf215546Sopenharmony_ci const SOPP_instruction& sopp() const noexcept 1089bf215546Sopenharmony_ci { 1090bf215546Sopenharmony_ci assert(isSOPP()); 1091bf215546Sopenharmony_ci return *(SOPP_instruction*)this; 1092bf215546Sopenharmony_ci } 1093bf215546Sopenharmony_ci constexpr bool isSOPP() const noexcept { return format == Format::SOPP; } 1094bf215546Sopenharmony_ci SOPC_instruction& sopc() noexcept 1095bf215546Sopenharmony_ci { 1096bf215546Sopenharmony_ci assert(isSOPC()); 1097bf215546Sopenharmony_ci return *(SOPC_instruction*)this; 1098bf215546Sopenharmony_ci } 1099bf215546Sopenharmony_ci const SOPC_instruction& sopc() const noexcept 1100bf215546Sopenharmony_ci { 1101bf215546Sopenharmony_ci assert(isSOPC()); 1102bf215546Sopenharmony_ci return *(SOPC_instruction*)this; 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci constexpr bool isSOPC() const noexcept { return format == Format::SOPC; } 1105bf215546Sopenharmony_ci SMEM_instruction& smem() noexcept 1106bf215546Sopenharmony_ci { 1107bf215546Sopenharmony_ci assert(isSMEM()); 1108bf215546Sopenharmony_ci return *(SMEM_instruction*)this; 1109bf215546Sopenharmony_ci } 1110bf215546Sopenharmony_ci const SMEM_instruction& smem() const noexcept 1111bf215546Sopenharmony_ci { 1112bf215546Sopenharmony_ci assert(isSMEM()); 1113bf215546Sopenharmony_ci return *(SMEM_instruction*)this; 1114bf215546Sopenharmony_ci } 1115bf215546Sopenharmony_ci constexpr bool isSMEM() const noexcept { return format == Format::SMEM; } 1116bf215546Sopenharmony_ci DS_instruction& ds() noexcept 1117bf215546Sopenharmony_ci { 1118bf215546Sopenharmony_ci assert(isDS()); 1119bf215546Sopenharmony_ci return *(DS_instruction*)this; 1120bf215546Sopenharmony_ci } 1121bf215546Sopenharmony_ci const DS_instruction& ds() const noexcept 1122bf215546Sopenharmony_ci { 1123bf215546Sopenharmony_ci assert(isDS()); 1124bf215546Sopenharmony_ci return *(DS_instruction*)this; 1125bf215546Sopenharmony_ci } 1126bf215546Sopenharmony_ci constexpr bool isDS() const noexcept { return format == Format::DS; } 1127bf215546Sopenharmony_ci MTBUF_instruction& mtbuf() noexcept 1128bf215546Sopenharmony_ci { 1129bf215546Sopenharmony_ci assert(isMTBUF()); 1130bf215546Sopenharmony_ci return *(MTBUF_instruction*)this; 1131bf215546Sopenharmony_ci } 1132bf215546Sopenharmony_ci const MTBUF_instruction& mtbuf() const noexcept 1133bf215546Sopenharmony_ci { 1134bf215546Sopenharmony_ci assert(isMTBUF()); 1135bf215546Sopenharmony_ci return *(MTBUF_instruction*)this; 1136bf215546Sopenharmony_ci } 1137bf215546Sopenharmony_ci constexpr bool isMTBUF() const noexcept { return format == Format::MTBUF; } 1138bf215546Sopenharmony_ci MUBUF_instruction& mubuf() noexcept 1139bf215546Sopenharmony_ci { 1140bf215546Sopenharmony_ci assert(isMUBUF()); 1141bf215546Sopenharmony_ci return *(MUBUF_instruction*)this; 1142bf215546Sopenharmony_ci } 1143bf215546Sopenharmony_ci const MUBUF_instruction& mubuf() const noexcept 1144bf215546Sopenharmony_ci { 1145bf215546Sopenharmony_ci assert(isMUBUF()); 1146bf215546Sopenharmony_ci return *(MUBUF_instruction*)this; 1147bf215546Sopenharmony_ci } 1148bf215546Sopenharmony_ci constexpr bool isMUBUF() const noexcept { return format == Format::MUBUF; } 1149bf215546Sopenharmony_ci MIMG_instruction& mimg() noexcept 1150bf215546Sopenharmony_ci { 1151bf215546Sopenharmony_ci assert(isMIMG()); 1152bf215546Sopenharmony_ci return *(MIMG_instruction*)this; 1153bf215546Sopenharmony_ci } 1154bf215546Sopenharmony_ci const MIMG_instruction& mimg() const noexcept 1155bf215546Sopenharmony_ci { 1156bf215546Sopenharmony_ci assert(isMIMG()); 1157bf215546Sopenharmony_ci return *(MIMG_instruction*)this; 1158bf215546Sopenharmony_ci } 1159bf215546Sopenharmony_ci constexpr bool isMIMG() const noexcept { return format == Format::MIMG; } 1160bf215546Sopenharmony_ci Export_instruction& exp() noexcept 1161bf215546Sopenharmony_ci { 1162bf215546Sopenharmony_ci assert(isEXP()); 1163bf215546Sopenharmony_ci return *(Export_instruction*)this; 1164bf215546Sopenharmony_ci } 1165bf215546Sopenharmony_ci const Export_instruction& exp() const noexcept 1166bf215546Sopenharmony_ci { 1167bf215546Sopenharmony_ci assert(isEXP()); 1168bf215546Sopenharmony_ci return *(Export_instruction*)this; 1169bf215546Sopenharmony_ci } 1170bf215546Sopenharmony_ci constexpr bool isEXP() const noexcept { return format == Format::EXP; } 1171bf215546Sopenharmony_ci FLAT_instruction& flat() noexcept 1172bf215546Sopenharmony_ci { 1173bf215546Sopenharmony_ci assert(isFlat()); 1174bf215546Sopenharmony_ci return *(FLAT_instruction*)this; 1175bf215546Sopenharmony_ci } 1176bf215546Sopenharmony_ci const FLAT_instruction& flat() const noexcept 1177bf215546Sopenharmony_ci { 1178bf215546Sopenharmony_ci assert(isFlat()); 1179bf215546Sopenharmony_ci return *(FLAT_instruction*)this; 1180bf215546Sopenharmony_ci } 1181bf215546Sopenharmony_ci constexpr bool isFlat() const noexcept { return format == Format::FLAT; } 1182bf215546Sopenharmony_ci FLAT_instruction& global() noexcept 1183bf215546Sopenharmony_ci { 1184bf215546Sopenharmony_ci assert(isGlobal()); 1185bf215546Sopenharmony_ci return *(FLAT_instruction*)this; 1186bf215546Sopenharmony_ci } 1187bf215546Sopenharmony_ci const FLAT_instruction& global() const noexcept 1188bf215546Sopenharmony_ci { 1189bf215546Sopenharmony_ci assert(isGlobal()); 1190bf215546Sopenharmony_ci return *(FLAT_instruction*)this; 1191bf215546Sopenharmony_ci } 1192bf215546Sopenharmony_ci constexpr bool isGlobal() const noexcept { return format == Format::GLOBAL; } 1193bf215546Sopenharmony_ci FLAT_instruction& scratch() noexcept 1194bf215546Sopenharmony_ci { 1195bf215546Sopenharmony_ci assert(isScratch()); 1196bf215546Sopenharmony_ci return *(FLAT_instruction*)this; 1197bf215546Sopenharmony_ci } 1198bf215546Sopenharmony_ci const FLAT_instruction& scratch() const noexcept 1199bf215546Sopenharmony_ci { 1200bf215546Sopenharmony_ci assert(isScratch()); 1201bf215546Sopenharmony_ci return *(FLAT_instruction*)this; 1202bf215546Sopenharmony_ci } 1203bf215546Sopenharmony_ci constexpr bool isScratch() const noexcept { return format == Format::SCRATCH; } 1204bf215546Sopenharmony_ci Pseudo_branch_instruction& branch() noexcept 1205bf215546Sopenharmony_ci { 1206bf215546Sopenharmony_ci assert(isBranch()); 1207bf215546Sopenharmony_ci return *(Pseudo_branch_instruction*)this; 1208bf215546Sopenharmony_ci } 1209bf215546Sopenharmony_ci const Pseudo_branch_instruction& branch() const noexcept 1210bf215546Sopenharmony_ci { 1211bf215546Sopenharmony_ci assert(isBranch()); 1212bf215546Sopenharmony_ci return *(Pseudo_branch_instruction*)this; 1213bf215546Sopenharmony_ci } 1214bf215546Sopenharmony_ci constexpr bool isBranch() const noexcept { return format == Format::PSEUDO_BRANCH; } 1215bf215546Sopenharmony_ci Pseudo_barrier_instruction& barrier() noexcept 1216bf215546Sopenharmony_ci { 1217bf215546Sopenharmony_ci assert(isBarrier()); 1218bf215546Sopenharmony_ci return *(Pseudo_barrier_instruction*)this; 1219bf215546Sopenharmony_ci } 1220bf215546Sopenharmony_ci const Pseudo_barrier_instruction& barrier() const noexcept 1221bf215546Sopenharmony_ci { 1222bf215546Sopenharmony_ci assert(isBarrier()); 1223bf215546Sopenharmony_ci return *(Pseudo_barrier_instruction*)this; 1224bf215546Sopenharmony_ci } 1225bf215546Sopenharmony_ci constexpr bool isBarrier() const noexcept { return format == Format::PSEUDO_BARRIER; } 1226bf215546Sopenharmony_ci Pseudo_reduction_instruction& reduction() noexcept 1227bf215546Sopenharmony_ci { 1228bf215546Sopenharmony_ci assert(isReduction()); 1229bf215546Sopenharmony_ci return *(Pseudo_reduction_instruction*)this; 1230bf215546Sopenharmony_ci } 1231bf215546Sopenharmony_ci const Pseudo_reduction_instruction& reduction() const noexcept 1232bf215546Sopenharmony_ci { 1233bf215546Sopenharmony_ci assert(isReduction()); 1234bf215546Sopenharmony_ci return *(Pseudo_reduction_instruction*)this; 1235bf215546Sopenharmony_ci } 1236bf215546Sopenharmony_ci constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; } 1237bf215546Sopenharmony_ci VOP3P_instruction& vop3p() noexcept 1238bf215546Sopenharmony_ci { 1239bf215546Sopenharmony_ci assert(isVOP3P()); 1240bf215546Sopenharmony_ci return *(VOP3P_instruction*)this; 1241bf215546Sopenharmony_ci } 1242bf215546Sopenharmony_ci const VOP3P_instruction& vop3p() const noexcept 1243bf215546Sopenharmony_ci { 1244bf215546Sopenharmony_ci assert(isVOP3P()); 1245bf215546Sopenharmony_ci return *(VOP3P_instruction*)this; 1246bf215546Sopenharmony_ci } 1247bf215546Sopenharmony_ci constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; } 1248bf215546Sopenharmony_ci VOP1_instruction& vop1() noexcept 1249bf215546Sopenharmony_ci { 1250bf215546Sopenharmony_ci assert(isVOP1()); 1251bf215546Sopenharmony_ci return *(VOP1_instruction*)this; 1252bf215546Sopenharmony_ci } 1253bf215546Sopenharmony_ci const VOP1_instruction& vop1() const noexcept 1254bf215546Sopenharmony_ci { 1255bf215546Sopenharmony_ci assert(isVOP1()); 1256bf215546Sopenharmony_ci return *(VOP1_instruction*)this; 1257bf215546Sopenharmony_ci } 1258bf215546Sopenharmony_ci constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; } 1259bf215546Sopenharmony_ci VOP2_instruction& vop2() noexcept 1260bf215546Sopenharmony_ci { 1261bf215546Sopenharmony_ci assert(isVOP2()); 1262bf215546Sopenharmony_ci return *(VOP2_instruction*)this; 1263bf215546Sopenharmony_ci } 1264bf215546Sopenharmony_ci const VOP2_instruction& vop2() const noexcept 1265bf215546Sopenharmony_ci { 1266bf215546Sopenharmony_ci assert(isVOP2()); 1267bf215546Sopenharmony_ci return *(VOP2_instruction*)this; 1268bf215546Sopenharmony_ci } 1269bf215546Sopenharmony_ci constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; } 1270bf215546Sopenharmony_ci VOPC_instruction& vopc() noexcept 1271bf215546Sopenharmony_ci { 1272bf215546Sopenharmony_ci assert(isVOPC()); 1273bf215546Sopenharmony_ci return *(VOPC_instruction*)this; 1274bf215546Sopenharmony_ci } 1275bf215546Sopenharmony_ci const VOPC_instruction& vopc() const noexcept 1276bf215546Sopenharmony_ci { 1277bf215546Sopenharmony_ci assert(isVOPC()); 1278bf215546Sopenharmony_ci return *(VOPC_instruction*)this; 1279bf215546Sopenharmony_ci } 1280bf215546Sopenharmony_ci constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; } 1281bf215546Sopenharmony_ci VOP3_instruction& vop3() noexcept 1282bf215546Sopenharmony_ci { 1283bf215546Sopenharmony_ci assert(isVOP3()); 1284bf215546Sopenharmony_ci return *(VOP3_instruction*)this; 1285bf215546Sopenharmony_ci } 1286bf215546Sopenharmony_ci const VOP3_instruction& vop3() const noexcept 1287bf215546Sopenharmony_ci { 1288bf215546Sopenharmony_ci assert(isVOP3()); 1289bf215546Sopenharmony_ci return *(VOP3_instruction*)this; 1290bf215546Sopenharmony_ci } 1291bf215546Sopenharmony_ci constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; } 1292bf215546Sopenharmony_ci Interp_instruction& vintrp() noexcept 1293bf215546Sopenharmony_ci { 1294bf215546Sopenharmony_ci assert(isVINTRP()); 1295bf215546Sopenharmony_ci return *(Interp_instruction*)this; 1296bf215546Sopenharmony_ci } 1297bf215546Sopenharmony_ci const Interp_instruction& vintrp() const noexcept 1298bf215546Sopenharmony_ci { 1299bf215546Sopenharmony_ci assert(isVINTRP()); 1300bf215546Sopenharmony_ci return *(Interp_instruction*)this; 1301bf215546Sopenharmony_ci } 1302bf215546Sopenharmony_ci constexpr bool isVINTRP() const noexcept { return (uint16_t)format & (uint16_t)Format::VINTRP; } 1303bf215546Sopenharmony_ci DPP16_instruction& dpp16() noexcept 1304bf215546Sopenharmony_ci { 1305bf215546Sopenharmony_ci assert(isDPP16()); 1306bf215546Sopenharmony_ci return *(DPP16_instruction*)this; 1307bf215546Sopenharmony_ci } 1308bf215546Sopenharmony_ci const DPP16_instruction& dpp16() const noexcept 1309bf215546Sopenharmony_ci { 1310bf215546Sopenharmony_ci assert(isDPP16()); 1311bf215546Sopenharmony_ci return *(DPP16_instruction*)this; 1312bf215546Sopenharmony_ci } 1313bf215546Sopenharmony_ci constexpr bool isDPP16() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP16; } 1314bf215546Sopenharmony_ci DPP8_instruction& dpp8() noexcept 1315bf215546Sopenharmony_ci { 1316bf215546Sopenharmony_ci assert(isDPP8()); 1317bf215546Sopenharmony_ci return *(DPP8_instruction*)this; 1318bf215546Sopenharmony_ci } 1319bf215546Sopenharmony_ci const DPP8_instruction& dpp8() const noexcept 1320bf215546Sopenharmony_ci { 1321bf215546Sopenharmony_ci assert(isDPP8()); 1322bf215546Sopenharmony_ci return *(DPP8_instruction*)this; 1323bf215546Sopenharmony_ci } 1324bf215546Sopenharmony_ci constexpr bool isDPP8() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP8; } 1325bf215546Sopenharmony_ci constexpr bool isDPP() const noexcept { return isDPP16() || isDPP8(); } 1326bf215546Sopenharmony_ci SDWA_instruction& sdwa() noexcept 1327bf215546Sopenharmony_ci { 1328bf215546Sopenharmony_ci assert(isSDWA()); 1329bf215546Sopenharmony_ci return *(SDWA_instruction*)this; 1330bf215546Sopenharmony_ci } 1331bf215546Sopenharmony_ci const SDWA_instruction& sdwa() const noexcept 1332bf215546Sopenharmony_ci { 1333bf215546Sopenharmony_ci assert(isSDWA()); 1334bf215546Sopenharmony_ci return *(SDWA_instruction*)this; 1335bf215546Sopenharmony_ci } 1336bf215546Sopenharmony_ci constexpr bool isSDWA() const noexcept { return (uint16_t)format & (uint16_t)Format::SDWA; } 1337bf215546Sopenharmony_ci 1338bf215546Sopenharmony_ci FLAT_instruction& flatlike() { return *(FLAT_instruction*)this; } 1339bf215546Sopenharmony_ci 1340bf215546Sopenharmony_ci const FLAT_instruction& flatlike() const { return *(FLAT_instruction*)this; } 1341bf215546Sopenharmony_ci 1342bf215546Sopenharmony_ci constexpr bool isFlatLike() const noexcept { return isFlat() || isGlobal() || isScratch(); } 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci constexpr bool isVALU() const noexcept 1345bf215546Sopenharmony_ci { 1346bf215546Sopenharmony_ci return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P(); 1347bf215546Sopenharmony_ci } 1348bf215546Sopenharmony_ci 1349bf215546Sopenharmony_ci constexpr bool isSALU() const noexcept 1350bf215546Sopenharmony_ci { 1351bf215546Sopenharmony_ci return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP(); 1352bf215546Sopenharmony_ci } 1353bf215546Sopenharmony_ci 1354bf215546Sopenharmony_ci constexpr bool isVMEM() const noexcept { return isMTBUF() || isMUBUF() || isMIMG(); } 1355bf215546Sopenharmony_ci}; 1356bf215546Sopenharmony_cistatic_assert(sizeof(Instruction) == 16, "Unexpected padding"); 1357bf215546Sopenharmony_ci 1358bf215546Sopenharmony_cistruct SOPK_instruction : public Instruction { 1359bf215546Sopenharmony_ci uint16_t imm; 1360bf215546Sopenharmony_ci uint16_t padding; 1361bf215546Sopenharmony_ci}; 1362bf215546Sopenharmony_cistatic_assert(sizeof(SOPK_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 1363bf215546Sopenharmony_ci 1364bf215546Sopenharmony_cistruct SOPP_instruction : public Instruction { 1365bf215546Sopenharmony_ci uint32_t imm; 1366bf215546Sopenharmony_ci int block; 1367bf215546Sopenharmony_ci}; 1368bf215546Sopenharmony_cistatic_assert(sizeof(SOPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_cistruct SOPC_instruction : public Instruction { 1371bf215546Sopenharmony_ci uint32_t padding; 1372bf215546Sopenharmony_ci}; 1373bf215546Sopenharmony_cistatic_assert(sizeof(SOPC_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 1374bf215546Sopenharmony_ci 1375bf215546Sopenharmony_cistruct SOP1_instruction : public Instruction {}; 1376bf215546Sopenharmony_cistatic_assert(sizeof(SOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 1377bf215546Sopenharmony_ci 1378bf215546Sopenharmony_cistruct SOP2_instruction : public Instruction { 1379bf215546Sopenharmony_ci uint32_t padding; 1380bf215546Sopenharmony_ci}; 1381bf215546Sopenharmony_cistatic_assert(sizeof(SOP2_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci/** 1384bf215546Sopenharmony_ci * Scalar Memory Format: 1385bf215546Sopenharmony_ci * For s_(buffer_)load_dword*: 1386bf215546Sopenharmony_ci * Operand(0): SBASE - SGPR-pair which provides base address 1387bf215546Sopenharmony_ci * Operand(1): Offset - immediate (un)signed offset or SGPR 1388bf215546Sopenharmony_ci * Operand(2) / Definition(0): SDATA - SGPR for read / write result 1389bf215546Sopenharmony_ci * Operand(n-1): SOffset - SGPR offset (Vega only) 1390bf215546Sopenharmony_ci * 1391bf215546Sopenharmony_ci * Having no operands is also valid for instructions such as s_dcache_inv. 1392bf215546Sopenharmony_ci * 1393bf215546Sopenharmony_ci */ 1394bf215546Sopenharmony_cistruct SMEM_instruction : public Instruction { 1395bf215546Sopenharmony_ci memory_sync_info sync; 1396bf215546Sopenharmony_ci bool glc : 1; /* VI+: globally coherent */ 1397bf215546Sopenharmony_ci bool dlc : 1; /* NAVI: device level coherent */ 1398bf215546Sopenharmony_ci bool nv : 1; /* VEGA only: Non-volatile */ 1399bf215546Sopenharmony_ci bool disable_wqm : 1; 1400bf215546Sopenharmony_ci bool prevent_overflow : 1; /* avoid overflow when combining additions */ 1401bf215546Sopenharmony_ci uint8_t padding : 3; 1402bf215546Sopenharmony_ci}; 1403bf215546Sopenharmony_cistatic_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 1404bf215546Sopenharmony_ci 1405bf215546Sopenharmony_cistruct VOP1_instruction : public Instruction {}; 1406bf215546Sopenharmony_cistatic_assert(sizeof(VOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 1407bf215546Sopenharmony_ci 1408bf215546Sopenharmony_cistruct VOP2_instruction : public Instruction {}; 1409bf215546Sopenharmony_cistatic_assert(sizeof(VOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 1410bf215546Sopenharmony_ci 1411bf215546Sopenharmony_cistruct VOPC_instruction : public Instruction {}; 1412bf215546Sopenharmony_cistatic_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding"); 1413bf215546Sopenharmony_ci 1414bf215546Sopenharmony_cistruct VOP3_instruction : public Instruction { 1415bf215546Sopenharmony_ci bool abs[3]; 1416bf215546Sopenharmony_ci bool neg[3]; 1417bf215546Sopenharmony_ci uint8_t opsel : 4; 1418bf215546Sopenharmony_ci uint8_t omod : 2; 1419bf215546Sopenharmony_ci bool clamp : 1; 1420bf215546Sopenharmony_ci uint8_t padding0 : 1; 1421bf215546Sopenharmony_ci uint8_t padding1; 1422bf215546Sopenharmony_ci}; 1423bf215546Sopenharmony_cistatic_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1424bf215546Sopenharmony_ci 1425bf215546Sopenharmony_cistruct VOP3P_instruction : public Instruction { 1426bf215546Sopenharmony_ci bool neg_lo[3]; 1427bf215546Sopenharmony_ci bool neg_hi[3]; /* abs modifier, for v_mad_mix/v_fma_mix */ 1428bf215546Sopenharmony_ci uint8_t opsel_lo : 3; 1429bf215546Sopenharmony_ci uint8_t opsel_hi : 3; 1430bf215546Sopenharmony_ci bool clamp : 1; 1431bf215546Sopenharmony_ci uint8_t padding0 : 1; 1432bf215546Sopenharmony_ci uint8_t padding1; 1433bf215546Sopenharmony_ci}; 1434bf215546Sopenharmony_cistatic_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_ci/** 1437bf215546Sopenharmony_ci * Data Parallel Primitives Format: 1438bf215546Sopenharmony_ci * This format can be used for VOP1, VOP2 or VOPC instructions. 1439bf215546Sopenharmony_ci * The swizzle applies to the src0 operand. 1440bf215546Sopenharmony_ci * 1441bf215546Sopenharmony_ci */ 1442bf215546Sopenharmony_cistruct DPP16_instruction : public Instruction { 1443bf215546Sopenharmony_ci bool abs[2]; 1444bf215546Sopenharmony_ci bool neg[2]; 1445bf215546Sopenharmony_ci uint16_t dpp_ctrl; 1446bf215546Sopenharmony_ci uint8_t row_mask : 4; 1447bf215546Sopenharmony_ci uint8_t bank_mask : 4; 1448bf215546Sopenharmony_ci bool bound_ctrl : 1; 1449bf215546Sopenharmony_ci uint8_t padding : 7; 1450bf215546Sopenharmony_ci}; 1451bf215546Sopenharmony_cistatic_assert(sizeof(DPP16_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1452bf215546Sopenharmony_ci 1453bf215546Sopenharmony_cistruct DPP8_instruction : public Instruction { 1454bf215546Sopenharmony_ci uint8_t lane_sel[8]; 1455bf215546Sopenharmony_ci}; 1456bf215546Sopenharmony_cistatic_assert(sizeof(DPP8_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1457bf215546Sopenharmony_ci 1458bf215546Sopenharmony_cistruct SubdwordSel { 1459bf215546Sopenharmony_ci enum sdwa_sel : uint8_t { 1460bf215546Sopenharmony_ci ubyte = 0x4, 1461bf215546Sopenharmony_ci uword = 0x8, 1462bf215546Sopenharmony_ci dword = 0x10, 1463bf215546Sopenharmony_ci sext = 0x20, 1464bf215546Sopenharmony_ci sbyte = ubyte | sext, 1465bf215546Sopenharmony_ci sword = uword | sext, 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_ci ubyte0 = ubyte, 1468bf215546Sopenharmony_ci ubyte1 = ubyte | 1, 1469bf215546Sopenharmony_ci ubyte2 = ubyte | 2, 1470bf215546Sopenharmony_ci ubyte3 = ubyte | 3, 1471bf215546Sopenharmony_ci sbyte0 = sbyte, 1472bf215546Sopenharmony_ci sbyte1 = sbyte | 1, 1473bf215546Sopenharmony_ci sbyte2 = sbyte | 2, 1474bf215546Sopenharmony_ci sbyte3 = sbyte | 3, 1475bf215546Sopenharmony_ci uword0 = uword, 1476bf215546Sopenharmony_ci uword1 = uword | 2, 1477bf215546Sopenharmony_ci sword0 = sword, 1478bf215546Sopenharmony_ci sword1 = sword | 2, 1479bf215546Sopenharmony_ci }; 1480bf215546Sopenharmony_ci 1481bf215546Sopenharmony_ci SubdwordSel() : sel((sdwa_sel)0) {} 1482bf215546Sopenharmony_ci constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {} 1483bf215546Sopenharmony_ci constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend) 1484bf215546Sopenharmony_ci : sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset)) 1485bf215546Sopenharmony_ci {} 1486bf215546Sopenharmony_ci constexpr operator sdwa_sel() const { return sel; } 1487bf215546Sopenharmony_ci explicit operator bool() const { return sel != 0; } 1488bf215546Sopenharmony_ci 1489bf215546Sopenharmony_ci constexpr unsigned size() const { return (sel >> 2) & 0x7; } 1490bf215546Sopenharmony_ci constexpr unsigned offset() const { return sel & 0x3; } 1491bf215546Sopenharmony_ci constexpr bool sign_extend() const { return sel & sext; } 1492bf215546Sopenharmony_ci constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const 1493bf215546Sopenharmony_ci { 1494bf215546Sopenharmony_ci reg_byte_offset += offset(); 1495bf215546Sopenharmony_ci if (size() == 1) 1496bf215546Sopenharmony_ci return reg_byte_offset; 1497bf215546Sopenharmony_ci else if (size() == 2) 1498bf215546Sopenharmony_ci return 4 + (reg_byte_offset >> 1); 1499bf215546Sopenharmony_ci else 1500bf215546Sopenharmony_ci return 6; 1501bf215546Sopenharmony_ci } 1502bf215546Sopenharmony_ci 1503bf215546Sopenharmony_ciprivate: 1504bf215546Sopenharmony_ci sdwa_sel sel; 1505bf215546Sopenharmony_ci}; 1506bf215546Sopenharmony_ci 1507bf215546Sopenharmony_ci/** 1508bf215546Sopenharmony_ci * Sub-Dword Addressing Format: 1509bf215546Sopenharmony_ci * This format can be used for VOP1, VOP2 or VOPC instructions. 1510bf215546Sopenharmony_ci * 1511bf215546Sopenharmony_ci * omod and SGPR/constant operands are only available on GFX9+. For VOPC, 1512bf215546Sopenharmony_ci * the definition doesn't have to be VCC on GFX9+. 1513bf215546Sopenharmony_ci * 1514bf215546Sopenharmony_ci */ 1515bf215546Sopenharmony_cistruct SDWA_instruction : public Instruction { 1516bf215546Sopenharmony_ci /* these destination modifiers aren't available with VOPC except for 1517bf215546Sopenharmony_ci * clamp on GFX8 */ 1518bf215546Sopenharmony_ci SubdwordSel sel[2]; 1519bf215546Sopenharmony_ci SubdwordSel dst_sel; 1520bf215546Sopenharmony_ci bool neg[2]; 1521bf215546Sopenharmony_ci bool abs[2]; 1522bf215546Sopenharmony_ci bool clamp : 1; 1523bf215546Sopenharmony_ci uint8_t omod : 2; /* GFX9+ */ 1524bf215546Sopenharmony_ci uint8_t padding : 5; 1525bf215546Sopenharmony_ci}; 1526bf215546Sopenharmony_cistatic_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_cistruct Interp_instruction : public Instruction { 1529bf215546Sopenharmony_ci uint8_t attribute; 1530bf215546Sopenharmony_ci uint8_t component; 1531bf215546Sopenharmony_ci uint16_t padding; 1532bf215546Sopenharmony_ci}; 1533bf215546Sopenharmony_cistatic_assert(sizeof(Interp_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 1534bf215546Sopenharmony_ci 1535bf215546Sopenharmony_ci/** 1536bf215546Sopenharmony_ci * Local and Global Data Sharing instructions 1537bf215546Sopenharmony_ci * Operand(0): ADDR - VGPR which supplies the address. 1538bf215546Sopenharmony_ci * Operand(1): DATA0 - First data VGPR. 1539bf215546Sopenharmony_ci * Operand(2): DATA1 - Second data VGPR. 1540bf215546Sopenharmony_ci * Operand(n-1): M0 - LDS size. 1541bf215546Sopenharmony_ci * Definition(0): VDST - Destination VGPR when results returned to VGPRs. 1542bf215546Sopenharmony_ci * 1543bf215546Sopenharmony_ci */ 1544bf215546Sopenharmony_cistruct DS_instruction : public Instruction { 1545bf215546Sopenharmony_ci memory_sync_info sync; 1546bf215546Sopenharmony_ci bool gds; 1547bf215546Sopenharmony_ci uint16_t offset0; 1548bf215546Sopenharmony_ci uint8_t offset1; 1549bf215546Sopenharmony_ci uint8_t padding; 1550bf215546Sopenharmony_ci}; 1551bf215546Sopenharmony_cistatic_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1552bf215546Sopenharmony_ci 1553bf215546Sopenharmony_ci/** 1554bf215546Sopenharmony_ci * Vector Memory Untyped-buffer Instructions 1555bf215546Sopenharmony_ci * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant) 1556bf215546Sopenharmony_ci * Operand(1): VADDR - Address source. Can carry an index and/or offset 1557bf215546Sopenharmony_ci * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant) 1558bf215546Sopenharmony_ci * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data 1559bf215546Sopenharmony_ci * 1560bf215546Sopenharmony_ci */ 1561bf215546Sopenharmony_cistruct MUBUF_instruction : public Instruction { 1562bf215546Sopenharmony_ci memory_sync_info sync; 1563bf215546Sopenharmony_ci bool offen : 1; /* Supply an offset from VGPR (VADDR) */ 1564bf215546Sopenharmony_ci bool idxen : 1; /* Supply an index from VGPR (VADDR) */ 1565bf215546Sopenharmony_ci bool addr64 : 1; /* SI, CIK: Address size is 64-bit */ 1566bf215546Sopenharmony_ci bool glc : 1; /* globally coherent */ 1567bf215546Sopenharmony_ci bool dlc : 1; /* NAVI: device level coherent */ 1568bf215546Sopenharmony_ci bool slc : 1; /* system level coherent */ 1569bf215546Sopenharmony_ci bool tfe : 1; /* texture fail enable */ 1570bf215546Sopenharmony_ci bool lds : 1; /* Return read-data to LDS instead of VGPRs */ 1571bf215546Sopenharmony_ci uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */ 1572bf215546Sopenharmony_ci uint16_t offset : 12; /* Unsigned byte offset - 12 bit */ 1573bf215546Sopenharmony_ci uint16_t swizzled : 1; 1574bf215546Sopenharmony_ci uint16_t padding0 : 2; 1575bf215546Sopenharmony_ci uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */ 1576bf215546Sopenharmony_ci uint16_t padding1 : 10; 1577bf215546Sopenharmony_ci}; 1578bf215546Sopenharmony_cistatic_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1579bf215546Sopenharmony_ci 1580bf215546Sopenharmony_ci/** 1581bf215546Sopenharmony_ci * Vector Memory Typed-buffer Instructions 1582bf215546Sopenharmony_ci * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant) 1583bf215546Sopenharmony_ci * Operand(1): VADDR - Address source. Can carry an index and/or offset 1584bf215546Sopenharmony_ci * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant) 1585bf215546Sopenharmony_ci * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data 1586bf215546Sopenharmony_ci * 1587bf215546Sopenharmony_ci */ 1588bf215546Sopenharmony_cistruct MTBUF_instruction : public Instruction { 1589bf215546Sopenharmony_ci memory_sync_info sync; 1590bf215546Sopenharmony_ci uint8_t dfmt : 4; /* Data Format of data in memory buffer */ 1591bf215546Sopenharmony_ci uint8_t nfmt : 3; /* Numeric format of data in memory */ 1592bf215546Sopenharmony_ci bool offen : 1; /* Supply an offset from VGPR (VADDR) */ 1593bf215546Sopenharmony_ci uint16_t idxen : 1; /* Supply an index from VGPR (VADDR) */ 1594bf215546Sopenharmony_ci uint16_t glc : 1; /* globally coherent */ 1595bf215546Sopenharmony_ci uint16_t dlc : 1; /* NAVI: device level coherent */ 1596bf215546Sopenharmony_ci uint16_t slc : 1; /* system level coherent */ 1597bf215546Sopenharmony_ci uint16_t tfe : 1; /* texture fail enable */ 1598bf215546Sopenharmony_ci uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */ 1599bf215546Sopenharmony_ci uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */ 1600bf215546Sopenharmony_ci uint16_t padding : 4; 1601bf215546Sopenharmony_ci uint16_t offset; /* Unsigned byte offset - 12 bit */ 1602bf215546Sopenharmony_ci}; 1603bf215546Sopenharmony_cistatic_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1604bf215546Sopenharmony_ci 1605bf215546Sopenharmony_ci/** 1606bf215546Sopenharmony_ci * Vector Memory Image Instructions 1607bf215546Sopenharmony_ci * Operand(0) SRSRC - Scalar GPR that specifies the resource constant. 1608bf215546Sopenharmony_ci * Operand(1): SSAMP - Scalar GPR that specifies sampler constant. 1609bf215546Sopenharmony_ci * Operand(2): VDATA - Vector GPR for write data or zero if TFE/LWE=1. 1610bf215546Sopenharmony_ci * Operand(3): VADDR - Address source. Can carry an offset or an index. 1611bf215546Sopenharmony_ci * Definition(0): VDATA - Vector GPR for read result. 1612bf215546Sopenharmony_ci * 1613bf215546Sopenharmony_ci */ 1614bf215546Sopenharmony_cistruct MIMG_instruction : public Instruction { 1615bf215546Sopenharmony_ci memory_sync_info sync; 1616bf215546Sopenharmony_ci uint8_t dmask; /* Data VGPR enable mask */ 1617bf215546Sopenharmony_ci uint8_t dim : 3; /* NAVI: dimensionality */ 1618bf215546Sopenharmony_ci bool unrm : 1; /* Force address to be un-normalized */ 1619bf215546Sopenharmony_ci bool dlc : 1; /* NAVI: device level coherent */ 1620bf215546Sopenharmony_ci bool glc : 1; /* globally coherent */ 1621bf215546Sopenharmony_ci bool slc : 1; /* system level coherent */ 1622bf215546Sopenharmony_ci bool tfe : 1; /* texture fail enable */ 1623bf215546Sopenharmony_ci bool da : 1; /* declare an array */ 1624bf215546Sopenharmony_ci bool lwe : 1; /* LOD warning enable */ 1625bf215546Sopenharmony_ci bool r128 : 1; /* NAVI: Texture resource size */ 1626bf215546Sopenharmony_ci bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */ 1627bf215546Sopenharmony_ci bool d16 : 1; /* Convert 32-bit data to 16-bit data */ 1628bf215546Sopenharmony_ci bool disable_wqm : 1; /* Require an exec mask without helper invocations */ 1629bf215546Sopenharmony_ci uint8_t padding0 : 2; 1630bf215546Sopenharmony_ci uint8_t padding1; 1631bf215546Sopenharmony_ci uint8_t padding2; 1632bf215546Sopenharmony_ci}; 1633bf215546Sopenharmony_cistatic_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1634bf215546Sopenharmony_ci 1635bf215546Sopenharmony_ci/** 1636bf215546Sopenharmony_ci * Flat/Scratch/Global Instructions 1637bf215546Sopenharmony_ci * Operand(0): ADDR 1638bf215546Sopenharmony_ci * Operand(1): SADDR 1639bf215546Sopenharmony_ci * Operand(2) / Definition(0): DATA/VDST 1640bf215546Sopenharmony_ci * 1641bf215546Sopenharmony_ci */ 1642bf215546Sopenharmony_cistruct FLAT_instruction : public Instruction { 1643bf215546Sopenharmony_ci memory_sync_info sync; 1644bf215546Sopenharmony_ci bool slc : 1; /* system level coherent */ 1645bf215546Sopenharmony_ci bool glc : 1; /* globally coherent */ 1646bf215546Sopenharmony_ci bool dlc : 1; /* NAVI: device level coherent */ 1647bf215546Sopenharmony_ci bool lds : 1; 1648bf215546Sopenharmony_ci bool nv : 1; 1649bf215546Sopenharmony_ci bool disable_wqm : 1; /* Require an exec mask without helper invocations */ 1650bf215546Sopenharmony_ci uint8_t padding0 : 2; 1651bf215546Sopenharmony_ci int16_t offset; /* Vega/Navi only */ 1652bf215546Sopenharmony_ci uint16_t padding1; 1653bf215546Sopenharmony_ci}; 1654bf215546Sopenharmony_cistatic_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1655bf215546Sopenharmony_ci 1656bf215546Sopenharmony_cistruct Export_instruction : public Instruction { 1657bf215546Sopenharmony_ci uint8_t enabled_mask; 1658bf215546Sopenharmony_ci uint8_t dest; 1659bf215546Sopenharmony_ci bool compressed : 1; 1660bf215546Sopenharmony_ci bool done : 1; 1661bf215546Sopenharmony_ci bool valid_mask : 1; 1662bf215546Sopenharmony_ci uint8_t padding0 : 5; 1663bf215546Sopenharmony_ci uint8_t padding1; 1664bf215546Sopenharmony_ci}; 1665bf215546Sopenharmony_cistatic_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 1666bf215546Sopenharmony_ci 1667bf215546Sopenharmony_cistruct Pseudo_instruction : public Instruction { 1668bf215546Sopenharmony_ci PhysReg scratch_sgpr; /* might not be valid if it's not needed */ 1669bf215546Sopenharmony_ci bool tmp_in_scc; 1670bf215546Sopenharmony_ci uint8_t padding; 1671bf215546Sopenharmony_ci}; 1672bf215546Sopenharmony_cistatic_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 1673bf215546Sopenharmony_ci 1674bf215546Sopenharmony_cistruct Pseudo_branch_instruction : public Instruction { 1675bf215546Sopenharmony_ci /* target[0] is the block index of the branch target. 1676bf215546Sopenharmony_ci * For conditional branches, target[1] contains the fall-through alternative. 1677bf215546Sopenharmony_ci * A value of 0 means the target has not been initialized (BB0 cannot be a branch target). 1678bf215546Sopenharmony_ci */ 1679bf215546Sopenharmony_ci uint32_t target[2]; 1680bf215546Sopenharmony_ci}; 1681bf215546Sopenharmony_cistatic_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); 1682bf215546Sopenharmony_ci 1683bf215546Sopenharmony_cistruct Pseudo_barrier_instruction : public Instruction { 1684bf215546Sopenharmony_ci memory_sync_info sync; 1685bf215546Sopenharmony_ci sync_scope exec_scope; 1686bf215546Sopenharmony_ci}; 1687bf215546Sopenharmony_cistatic_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); 1688bf215546Sopenharmony_ci 1689bf215546Sopenharmony_cienum ReduceOp : uint16_t { 1690bf215546Sopenharmony_ci // clang-format off 1691bf215546Sopenharmony_ci iadd8, iadd16, iadd32, iadd64, 1692bf215546Sopenharmony_ci imul8, imul16, imul32, imul64, 1693bf215546Sopenharmony_ci fadd16, fadd32, fadd64, 1694bf215546Sopenharmony_ci fmul16, fmul32, fmul64, 1695bf215546Sopenharmony_ci imin8, imin16, imin32, imin64, 1696bf215546Sopenharmony_ci imax8, imax16, imax32, imax64, 1697bf215546Sopenharmony_ci umin8, umin16, umin32, umin64, 1698bf215546Sopenharmony_ci umax8, umax16, umax32, umax64, 1699bf215546Sopenharmony_ci fmin16, fmin32, fmin64, 1700bf215546Sopenharmony_ci fmax16, fmax32, fmax64, 1701bf215546Sopenharmony_ci iand8, iand16, iand32, iand64, 1702bf215546Sopenharmony_ci ior8, ior16, ior32, ior64, 1703bf215546Sopenharmony_ci ixor8, ixor16, ixor32, ixor64, 1704bf215546Sopenharmony_ci num_reduce_ops, 1705bf215546Sopenharmony_ci // clang-format on 1706bf215546Sopenharmony_ci}; 1707bf215546Sopenharmony_ci 1708bf215546Sopenharmony_ci/** 1709bf215546Sopenharmony_ci * Subgroup Reduction Instructions, everything except for the data to be 1710bf215546Sopenharmony_ci * reduced and the result as inserted by setup_reduce_temp(). 1711bf215546Sopenharmony_ci * Operand(0): data to be reduced 1712bf215546Sopenharmony_ci * Operand(1): reduce temporary 1713bf215546Sopenharmony_ci * Operand(2): vector temporary 1714bf215546Sopenharmony_ci * Definition(0): result 1715bf215546Sopenharmony_ci * Definition(1): scalar temporary 1716bf215546Sopenharmony_ci * Definition(2): scalar identity temporary (not used to store identity on GFX10) 1717bf215546Sopenharmony_ci * Definition(3): scc clobber 1718bf215546Sopenharmony_ci * Definition(4): vcc clobber 1719bf215546Sopenharmony_ci * 1720bf215546Sopenharmony_ci */ 1721bf215546Sopenharmony_cistruct Pseudo_reduction_instruction : public Instruction { 1722bf215546Sopenharmony_ci ReduceOp reduce_op; 1723bf215546Sopenharmony_ci uint16_t cluster_size; // must be 0 for scans 1724bf215546Sopenharmony_ci}; 1725bf215546Sopenharmony_cistatic_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4, 1726bf215546Sopenharmony_ci "Unexpected padding"); 1727bf215546Sopenharmony_ci 1728bf215546Sopenharmony_cistruct instr_deleter_functor { 1729bf215546Sopenharmony_ci void operator()(void* p) { free(p); } 1730bf215546Sopenharmony_ci}; 1731bf215546Sopenharmony_ci 1732bf215546Sopenharmony_citemplate <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>; 1733bf215546Sopenharmony_ci 1734bf215546Sopenharmony_citemplate <typename T> 1735bf215546Sopenharmony_ciT* 1736bf215546Sopenharmony_cicreate_instruction(aco_opcode opcode, Format format, uint32_t num_operands, 1737bf215546Sopenharmony_ci uint32_t num_definitions) 1738bf215546Sopenharmony_ci{ 1739bf215546Sopenharmony_ci std::size_t size = 1740bf215546Sopenharmony_ci sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition); 1741bf215546Sopenharmony_ci char* data = (char*)calloc(1, size); 1742bf215546Sopenharmony_ci T* inst = (T*)data; 1743bf215546Sopenharmony_ci 1744bf215546Sopenharmony_ci inst->opcode = opcode; 1745bf215546Sopenharmony_ci inst->format = format; 1746bf215546Sopenharmony_ci 1747bf215546Sopenharmony_ci uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands; 1748bf215546Sopenharmony_ci inst->operands = aco::span<Operand>(operands_offset, num_operands); 1749bf215546Sopenharmony_ci uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions; 1750bf215546Sopenharmony_ci inst->definitions = aco::span<Definition>(definitions_offset, num_definitions); 1751bf215546Sopenharmony_ci 1752bf215546Sopenharmony_ci return inst; 1753bf215546Sopenharmony_ci} 1754bf215546Sopenharmony_ci 1755bf215546Sopenharmony_ciconstexpr bool 1756bf215546Sopenharmony_ciInstruction::usesModifiers() const noexcept 1757bf215546Sopenharmony_ci{ 1758bf215546Sopenharmony_ci if (isDPP() || isSDWA()) 1759bf215546Sopenharmony_ci return true; 1760bf215546Sopenharmony_ci 1761bf215546Sopenharmony_ci if (isVOP3P()) { 1762bf215546Sopenharmony_ci const VOP3P_instruction& vop3p = this->vop3p(); 1763bf215546Sopenharmony_ci for (unsigned i = 0; i < operands.size(); i++) { 1764bf215546Sopenharmony_ci if (vop3p.neg_lo[i] || vop3p.neg_hi[i]) 1765bf215546Sopenharmony_ci return true; 1766bf215546Sopenharmony_ci 1767bf215546Sopenharmony_ci /* opsel_hi must be 1 to not be considered a modifier - even for constants */ 1768bf215546Sopenharmony_ci if (!(vop3p.opsel_hi & (1 << i))) 1769bf215546Sopenharmony_ci return true; 1770bf215546Sopenharmony_ci } 1771bf215546Sopenharmony_ci return vop3p.opsel_lo || vop3p.clamp; 1772bf215546Sopenharmony_ci } else if (isVOP3()) { 1773bf215546Sopenharmony_ci const VOP3_instruction& vop3 = this->vop3(); 1774bf215546Sopenharmony_ci for (unsigned i = 0; i < operands.size(); i++) { 1775bf215546Sopenharmony_ci if (vop3.abs[i] || vop3.neg[i]) 1776bf215546Sopenharmony_ci return true; 1777bf215546Sopenharmony_ci } 1778bf215546Sopenharmony_ci return vop3.opsel || vop3.clamp || vop3.omod; 1779bf215546Sopenharmony_ci } 1780bf215546Sopenharmony_ci return false; 1781bf215546Sopenharmony_ci} 1782bf215546Sopenharmony_ci 1783bf215546Sopenharmony_ciconstexpr bool 1784bf215546Sopenharmony_ciis_phi(Instruction* instr) 1785bf215546Sopenharmony_ci{ 1786bf215546Sopenharmony_ci return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi; 1787bf215546Sopenharmony_ci} 1788bf215546Sopenharmony_ci 1789bf215546Sopenharmony_cistatic inline bool 1790bf215546Sopenharmony_ciis_phi(aco_ptr<Instruction>& instr) 1791bf215546Sopenharmony_ci{ 1792bf215546Sopenharmony_ci return is_phi(instr.get()); 1793bf215546Sopenharmony_ci} 1794bf215546Sopenharmony_ci 1795bf215546Sopenharmony_cimemory_sync_info get_sync_info(const Instruction* instr); 1796bf215546Sopenharmony_ci 1797bf215546Sopenharmony_cibool is_dead(const std::vector<uint16_t>& uses, Instruction* instr); 1798bf215546Sopenharmony_ci 1799bf215546Sopenharmony_cibool can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx); 1800bf215546Sopenharmony_cibool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op); 1801bf215546Sopenharmony_cibool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra); 1802bf215546Sopenharmony_cibool can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra, bool dpp8); 1803bf215546Sopenharmony_ci/* updates "instr" and returns the old instruction (or NULL if no update was needed) */ 1804bf215546Sopenharmony_ciaco_ptr<Instruction> convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr); 1805bf215546Sopenharmony_ciaco_ptr<Instruction> convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8); 1806bf215546Sopenharmony_cibool needs_exec_mask(const Instruction* instr); 1807bf215546Sopenharmony_ci 1808bf215546Sopenharmony_ciaco_opcode get_ordered(aco_opcode op); 1809bf215546Sopenharmony_ciaco_opcode get_unordered(aco_opcode op); 1810bf215546Sopenharmony_ciaco_opcode get_inverse(aco_opcode op); 1811bf215546Sopenharmony_ciaco_opcode get_f32_cmp(aco_opcode op); 1812bf215546Sopenharmony_ciaco_opcode get_vcmpx(aco_opcode op); 1813bf215546Sopenharmony_ciunsigned get_cmp_bitsize(aco_opcode op); 1814bf215546Sopenharmony_cibool is_cmp(aco_opcode op); 1815bf215546Sopenharmony_ci 1816bf215546Sopenharmony_cibool can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op); 1817bf215546Sopenharmony_ci 1818bf215546Sopenharmony_ciuint32_t get_reduction_identity(ReduceOp op, unsigned idx); 1819bf215546Sopenharmony_ci 1820bf215546Sopenharmony_ciunsigned get_mimg_nsa_dwords(const Instruction* instr); 1821bf215546Sopenharmony_ci 1822bf215546Sopenharmony_cibool should_form_clause(const Instruction* a, const Instruction* b); 1823bf215546Sopenharmony_ci 1824bf215546Sopenharmony_cienum block_kind { 1825bf215546Sopenharmony_ci /* uniform indicates that leaving this block, 1826bf215546Sopenharmony_ci * all actives lanes stay active */ 1827bf215546Sopenharmony_ci block_kind_uniform = 1 << 0, 1828bf215546Sopenharmony_ci block_kind_top_level = 1 << 1, 1829bf215546Sopenharmony_ci block_kind_loop_preheader = 1 << 2, 1830bf215546Sopenharmony_ci block_kind_loop_header = 1 << 3, 1831bf215546Sopenharmony_ci block_kind_loop_exit = 1 << 4, 1832bf215546Sopenharmony_ci block_kind_continue = 1 << 5, 1833bf215546Sopenharmony_ci block_kind_break = 1 << 6, 1834bf215546Sopenharmony_ci block_kind_continue_or_break = 1 << 7, 1835bf215546Sopenharmony_ci block_kind_branch = 1 << 8, 1836bf215546Sopenharmony_ci block_kind_merge = 1 << 9, 1837bf215546Sopenharmony_ci block_kind_invert = 1 << 10, 1838bf215546Sopenharmony_ci block_kind_uses_discard = 1 << 12, 1839bf215546Sopenharmony_ci block_kind_needs_lowering = 1 << 13, 1840bf215546Sopenharmony_ci block_kind_export_end = 1 << 15, 1841bf215546Sopenharmony_ci}; 1842bf215546Sopenharmony_ci 1843bf215546Sopenharmony_cistruct RegisterDemand { 1844bf215546Sopenharmony_ci constexpr RegisterDemand() = default; 1845bf215546Sopenharmony_ci constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {} 1846bf215546Sopenharmony_ci int16_t vgpr = 0; 1847bf215546Sopenharmony_ci int16_t sgpr = 0; 1848bf215546Sopenharmony_ci 1849bf215546Sopenharmony_ci constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept 1850bf215546Sopenharmony_ci { 1851bf215546Sopenharmony_ci return a.vgpr == b.vgpr && a.sgpr == b.sgpr; 1852bf215546Sopenharmony_ci } 1853bf215546Sopenharmony_ci 1854bf215546Sopenharmony_ci constexpr bool exceeds(const RegisterDemand other) const noexcept 1855bf215546Sopenharmony_ci { 1856bf215546Sopenharmony_ci return vgpr > other.vgpr || sgpr > other.sgpr; 1857bf215546Sopenharmony_ci } 1858bf215546Sopenharmony_ci 1859bf215546Sopenharmony_ci constexpr RegisterDemand operator+(const Temp t) const noexcept 1860bf215546Sopenharmony_ci { 1861bf215546Sopenharmony_ci if (t.type() == RegType::sgpr) 1862bf215546Sopenharmony_ci return RegisterDemand(vgpr, sgpr + t.size()); 1863bf215546Sopenharmony_ci else 1864bf215546Sopenharmony_ci return RegisterDemand(vgpr + t.size(), sgpr); 1865bf215546Sopenharmony_ci } 1866bf215546Sopenharmony_ci 1867bf215546Sopenharmony_ci constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept 1868bf215546Sopenharmony_ci { 1869bf215546Sopenharmony_ci return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr); 1870bf215546Sopenharmony_ci } 1871bf215546Sopenharmony_ci 1872bf215546Sopenharmony_ci constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept 1873bf215546Sopenharmony_ci { 1874bf215546Sopenharmony_ci return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr); 1875bf215546Sopenharmony_ci } 1876bf215546Sopenharmony_ci 1877bf215546Sopenharmony_ci constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept 1878bf215546Sopenharmony_ci { 1879bf215546Sopenharmony_ci vgpr += other.vgpr; 1880bf215546Sopenharmony_ci sgpr += other.sgpr; 1881bf215546Sopenharmony_ci return *this; 1882bf215546Sopenharmony_ci } 1883bf215546Sopenharmony_ci 1884bf215546Sopenharmony_ci constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept 1885bf215546Sopenharmony_ci { 1886bf215546Sopenharmony_ci vgpr -= other.vgpr; 1887bf215546Sopenharmony_ci sgpr -= other.sgpr; 1888bf215546Sopenharmony_ci return *this; 1889bf215546Sopenharmony_ci } 1890bf215546Sopenharmony_ci 1891bf215546Sopenharmony_ci constexpr RegisterDemand& operator+=(const Temp t) noexcept 1892bf215546Sopenharmony_ci { 1893bf215546Sopenharmony_ci if (t.type() == RegType::sgpr) 1894bf215546Sopenharmony_ci sgpr += t.size(); 1895bf215546Sopenharmony_ci else 1896bf215546Sopenharmony_ci vgpr += t.size(); 1897bf215546Sopenharmony_ci return *this; 1898bf215546Sopenharmony_ci } 1899bf215546Sopenharmony_ci 1900bf215546Sopenharmony_ci constexpr RegisterDemand& operator-=(const Temp t) noexcept 1901bf215546Sopenharmony_ci { 1902bf215546Sopenharmony_ci if (t.type() == RegType::sgpr) 1903bf215546Sopenharmony_ci sgpr -= t.size(); 1904bf215546Sopenharmony_ci else 1905bf215546Sopenharmony_ci vgpr -= t.size(); 1906bf215546Sopenharmony_ci return *this; 1907bf215546Sopenharmony_ci } 1908bf215546Sopenharmony_ci 1909bf215546Sopenharmony_ci constexpr void update(const RegisterDemand other) noexcept 1910bf215546Sopenharmony_ci { 1911bf215546Sopenharmony_ci vgpr = std::max(vgpr, other.vgpr); 1912bf215546Sopenharmony_ci sgpr = std::max(sgpr, other.sgpr); 1913bf215546Sopenharmony_ci } 1914bf215546Sopenharmony_ci}; 1915bf215546Sopenharmony_ci 1916bf215546Sopenharmony_ci/* CFG */ 1917bf215546Sopenharmony_cistruct Block { 1918bf215546Sopenharmony_ci float_mode fp_mode; 1919bf215546Sopenharmony_ci unsigned index; 1920bf215546Sopenharmony_ci unsigned offset = 0; 1921bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>> instructions; 1922bf215546Sopenharmony_ci std::vector<unsigned> logical_preds; 1923bf215546Sopenharmony_ci std::vector<unsigned> linear_preds; 1924bf215546Sopenharmony_ci std::vector<unsigned> logical_succs; 1925bf215546Sopenharmony_ci std::vector<unsigned> linear_succs; 1926bf215546Sopenharmony_ci RegisterDemand register_demand = RegisterDemand(); 1927bf215546Sopenharmony_ci uint16_t loop_nest_depth = 0; 1928bf215546Sopenharmony_ci uint16_t divergent_if_logical_depth = 0; 1929bf215546Sopenharmony_ci uint16_t uniform_if_depth = 0; 1930bf215546Sopenharmony_ci uint16_t kind = 0; 1931bf215546Sopenharmony_ci int logical_idom = -1; 1932bf215546Sopenharmony_ci int linear_idom = -1; 1933bf215546Sopenharmony_ci 1934bf215546Sopenharmony_ci /* this information is needed for predecessors to blocks with phis when 1935bf215546Sopenharmony_ci * moving out of ssa */ 1936bf215546Sopenharmony_ci bool scc_live_out = false; 1937bf215546Sopenharmony_ci 1938bf215546Sopenharmony_ci Block() : index(0) {} 1939bf215546Sopenharmony_ci}; 1940bf215546Sopenharmony_ci 1941bf215546Sopenharmony_ci/* 1942bf215546Sopenharmony_ci * Shader stages as provided in Vulkan by the application. Contrast this to HWStage. 1943bf215546Sopenharmony_ci */ 1944bf215546Sopenharmony_cienum class SWStage : uint16_t { 1945bf215546Sopenharmony_ci None = 0, 1946bf215546Sopenharmony_ci VS = 1 << 0, /* Vertex Shader */ 1947bf215546Sopenharmony_ci GS = 1 << 1, /* Geometry Shader */ 1948bf215546Sopenharmony_ci TCS = 1 << 2, /* Tessellation Control aka Hull Shader */ 1949bf215546Sopenharmony_ci TES = 1 << 3, /* Tessellation Evaluation aka Domain Shader */ 1950bf215546Sopenharmony_ci FS = 1 << 4, /* Fragment aka Pixel Shader */ 1951bf215546Sopenharmony_ci CS = 1 << 5, /* Compute Shader */ 1952bf215546Sopenharmony_ci TS = 1 << 6, /* Task Shader */ 1953bf215546Sopenharmony_ci MS = 1 << 7, /* Mesh Shader */ 1954bf215546Sopenharmony_ci GSCopy = 1 << 8, /* GS Copy Shader (internal) */ 1955bf215546Sopenharmony_ci 1956bf215546Sopenharmony_ci /* Stage combinations merged to run on a single HWStage */ 1957bf215546Sopenharmony_ci VS_GS = VS | GS, 1958bf215546Sopenharmony_ci VS_TCS = VS | TCS, 1959bf215546Sopenharmony_ci TES_GS = TES | GS, 1960bf215546Sopenharmony_ci}; 1961bf215546Sopenharmony_ci 1962bf215546Sopenharmony_ciconstexpr SWStage 1963bf215546Sopenharmony_cioperator|(SWStage a, SWStage b) 1964bf215546Sopenharmony_ci{ 1965bf215546Sopenharmony_ci return static_cast<SWStage>(static_cast<uint16_t>(a) | static_cast<uint16_t>(b)); 1966bf215546Sopenharmony_ci} 1967bf215546Sopenharmony_ci 1968bf215546Sopenharmony_ci/* 1969bf215546Sopenharmony_ci * Shader stages as running on the AMD GPU. 1970bf215546Sopenharmony_ci * 1971bf215546Sopenharmony_ci * The relation between HWStages and SWStages is not a one-to-one mapping: 1972bf215546Sopenharmony_ci * Some SWStages are merged by ACO to run on a single HWStage. 1973bf215546Sopenharmony_ci * See README.md for details. 1974bf215546Sopenharmony_ci */ 1975bf215546Sopenharmony_cienum class HWStage : uint8_t { 1976bf215546Sopenharmony_ci VS, 1977bf215546Sopenharmony_ci ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */ 1978bf215546Sopenharmony_ci GS, /* Geometry shader on GFX10/legacy and GFX6-9. */ 1979bf215546Sopenharmony_ci NGG, /* Primitive shader, used to implement VS, TES, GS. */ 1980bf215546Sopenharmony_ci LS, /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */ 1981bf215546Sopenharmony_ci HS, /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */ 1982bf215546Sopenharmony_ci FS, 1983bf215546Sopenharmony_ci CS, 1984bf215546Sopenharmony_ci}; 1985bf215546Sopenharmony_ci 1986bf215546Sopenharmony_ci/* 1987bf215546Sopenharmony_ci * Set of SWStages to be merged into a single shader paired with the 1988bf215546Sopenharmony_ci * HWStage it will run on. 1989bf215546Sopenharmony_ci */ 1990bf215546Sopenharmony_cistruct Stage { 1991bf215546Sopenharmony_ci constexpr Stage() = default; 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {} 1994bf215546Sopenharmony_ci 1995bf215546Sopenharmony_ci /* Check if the given SWStage is included */ 1996bf215546Sopenharmony_ci constexpr bool has(SWStage stage) const 1997bf215546Sopenharmony_ci { 1998bf215546Sopenharmony_ci return (static_cast<uint16_t>(sw) & static_cast<uint16_t>(stage)); 1999bf215546Sopenharmony_ci } 2000bf215546Sopenharmony_ci 2001bf215546Sopenharmony_ci unsigned num_sw_stages() const { return util_bitcount(static_cast<uint16_t>(sw)); } 2002bf215546Sopenharmony_ci 2003bf215546Sopenharmony_ci constexpr bool operator==(const Stage& other) const { return sw == other.sw && hw == other.hw; } 2004bf215546Sopenharmony_ci 2005bf215546Sopenharmony_ci constexpr bool operator!=(const Stage& other) const { return sw != other.sw || hw != other.hw; } 2006bf215546Sopenharmony_ci 2007bf215546Sopenharmony_ci /* Mask of merged software stages */ 2008bf215546Sopenharmony_ci SWStage sw = SWStage::None; 2009bf215546Sopenharmony_ci 2010bf215546Sopenharmony_ci /* Active hardware stage */ 2011bf215546Sopenharmony_ci HWStage hw{}; 2012bf215546Sopenharmony_ci}; 2013bf215546Sopenharmony_ci 2014bf215546Sopenharmony_ci/* possible settings of Program::stage */ 2015bf215546Sopenharmony_cistatic constexpr Stage vertex_vs(HWStage::VS, SWStage::VS); 2016bf215546Sopenharmony_cistatic constexpr Stage fragment_fs(HWStage::FS, SWStage::FS); 2017bf215546Sopenharmony_cistatic constexpr Stage compute_cs(HWStage::CS, SWStage::CS); 2018bf215546Sopenharmony_cistatic constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES); 2019bf215546Sopenharmony_cistatic constexpr Stage gs_copy_vs(HWStage::VS, SWStage::GSCopy); 2020bf215546Sopenharmony_ci/* Mesh shading pipeline */ 2021bf215546Sopenharmony_cistatic constexpr Stage task_cs(HWStage::CS, SWStage::TS); 2022bf215546Sopenharmony_cistatic constexpr Stage mesh_ngg(HWStage::NGG, SWStage::MS); 2023bf215546Sopenharmony_ci/* GFX10/NGG */ 2024bf215546Sopenharmony_cistatic constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS); 2025bf215546Sopenharmony_cistatic constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS); 2026bf215546Sopenharmony_cistatic constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES); 2027bf215546Sopenharmony_cistatic constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS); 2028bf215546Sopenharmony_ci/* GFX9 (and GFX10 if NGG isn't used) */ 2029bf215546Sopenharmony_cistatic constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS); 2030bf215546Sopenharmony_cistatic constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS); 2031bf215546Sopenharmony_cistatic constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS); 2032bf215546Sopenharmony_ci/* pre-GFX9 */ 2033bf215546Sopenharmony_cistatic constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tesselation control */ 2034bf215546Sopenharmony_cistatic constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */ 2035bf215546Sopenharmony_cistatic constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS); 2036bf215546Sopenharmony_cistatic constexpr Stage tess_eval_es(HWStage::ES, 2037bf215546Sopenharmony_ci SWStage::TES); /* tesselation evaluation before geometry */ 2038bf215546Sopenharmony_cistatic constexpr Stage geometry_gs(HWStage::GS, SWStage::GS); 2039bf215546Sopenharmony_ci 2040bf215546Sopenharmony_cienum statistic { 2041bf215546Sopenharmony_ci statistic_hash, 2042bf215546Sopenharmony_ci statistic_instructions, 2043bf215546Sopenharmony_ci statistic_copies, 2044bf215546Sopenharmony_ci statistic_branches, 2045bf215546Sopenharmony_ci statistic_latency, 2046bf215546Sopenharmony_ci statistic_inv_throughput, 2047bf215546Sopenharmony_ci statistic_vmem_clauses, 2048bf215546Sopenharmony_ci statistic_smem_clauses, 2049bf215546Sopenharmony_ci statistic_sgpr_presched, 2050bf215546Sopenharmony_ci statistic_vgpr_presched, 2051bf215546Sopenharmony_ci num_statistics 2052bf215546Sopenharmony_ci}; 2053bf215546Sopenharmony_ci 2054bf215546Sopenharmony_cistruct DeviceInfo { 2055bf215546Sopenharmony_ci uint16_t lds_encoding_granule; 2056bf215546Sopenharmony_ci uint16_t lds_alloc_granule; 2057bf215546Sopenharmony_ci uint32_t lds_limit; /* in bytes */ 2058bf215546Sopenharmony_ci bool has_16bank_lds; 2059bf215546Sopenharmony_ci uint16_t physical_sgprs; 2060bf215546Sopenharmony_ci uint16_t physical_vgprs; 2061bf215546Sopenharmony_ci uint16_t vgpr_limit; 2062bf215546Sopenharmony_ci uint16_t sgpr_limit; 2063bf215546Sopenharmony_ci uint16_t sgpr_alloc_granule; 2064bf215546Sopenharmony_ci uint16_t vgpr_alloc_granule; /* must be power of two */ 2065bf215546Sopenharmony_ci unsigned max_wave64_per_simd; 2066bf215546Sopenharmony_ci unsigned simd_per_cu; 2067bf215546Sopenharmony_ci bool has_fast_fma32 = false; 2068bf215546Sopenharmony_ci bool has_mac_legacy32 = false; 2069bf215546Sopenharmony_ci bool fused_mad_mix = false; 2070bf215546Sopenharmony_ci bool xnack_enabled = false; 2071bf215546Sopenharmony_ci bool sram_ecc_enabled = false; 2072bf215546Sopenharmony_ci 2073bf215546Sopenharmony_ci int16_t scratch_global_offset_min; 2074bf215546Sopenharmony_ci int16_t scratch_global_offset_max; 2075bf215546Sopenharmony_ci}; 2076bf215546Sopenharmony_ci 2077bf215546Sopenharmony_cienum class CompilationProgress { 2078bf215546Sopenharmony_ci after_isel, 2079bf215546Sopenharmony_ci after_spilling, 2080bf215546Sopenharmony_ci after_ra, 2081bf215546Sopenharmony_ci}; 2082bf215546Sopenharmony_ci 2083bf215546Sopenharmony_ciclass Program final { 2084bf215546Sopenharmony_cipublic: 2085bf215546Sopenharmony_ci std::vector<Block> blocks; 2086bf215546Sopenharmony_ci std::vector<RegClass> temp_rc = {s1}; 2087bf215546Sopenharmony_ci RegisterDemand max_reg_demand = RegisterDemand(); 2088bf215546Sopenharmony_ci ac_shader_config* config; 2089bf215546Sopenharmony_ci struct aco_shader_info info; 2090bf215546Sopenharmony_ci enum amd_gfx_level gfx_level; 2091bf215546Sopenharmony_ci enum radeon_family family; 2092bf215546Sopenharmony_ci DeviceInfo dev; 2093bf215546Sopenharmony_ci unsigned wave_size; 2094bf215546Sopenharmony_ci RegClass lane_mask; 2095bf215546Sopenharmony_ci Stage stage; 2096bf215546Sopenharmony_ci bool needs_exact = false; /* there exists an instruction with disable_wqm = true */ 2097bf215546Sopenharmony_ci bool needs_wqm = false; /* there exists a p_wqm instruction */ 2098bf215546Sopenharmony_ci 2099bf215546Sopenharmony_ci std::vector<uint8_t> constant_data; 2100bf215546Sopenharmony_ci Temp private_segment_buffer; 2101bf215546Sopenharmony_ci Temp scratch_offset; 2102bf215546Sopenharmony_ci 2103bf215546Sopenharmony_ci uint16_t num_waves = 0; 2104bf215546Sopenharmony_ci uint16_t min_waves = 0; 2105bf215546Sopenharmony_ci unsigned workgroup_size; /* if known; otherwise UINT_MAX */ 2106bf215546Sopenharmony_ci bool wgp_mode; 2107bf215546Sopenharmony_ci bool early_rast = false; /* whether rasterization can start as soon as the 1st DONE pos export */ 2108bf215546Sopenharmony_ci 2109bf215546Sopenharmony_ci bool needs_vcc = false; 2110bf215546Sopenharmony_ci 2111bf215546Sopenharmony_ci CompilationProgress progress; 2112bf215546Sopenharmony_ci 2113bf215546Sopenharmony_ci bool collect_statistics = false; 2114bf215546Sopenharmony_ci uint32_t statistics[num_statistics]; 2115bf215546Sopenharmony_ci 2116bf215546Sopenharmony_ci float_mode next_fp_mode; 2117bf215546Sopenharmony_ci unsigned next_loop_depth = 0; 2118bf215546Sopenharmony_ci unsigned next_divergent_if_logical_depth = 0; 2119bf215546Sopenharmony_ci unsigned next_uniform_if_depth = 0; 2120bf215546Sopenharmony_ci 2121bf215546Sopenharmony_ci std::vector<Definition> vs_inputs; 2122bf215546Sopenharmony_ci 2123bf215546Sopenharmony_ci struct { 2124bf215546Sopenharmony_ci FILE* output = stderr; 2125bf215546Sopenharmony_ci bool shorten_messages = false; 2126bf215546Sopenharmony_ci void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message); 2127bf215546Sopenharmony_ci void* private_data; 2128bf215546Sopenharmony_ci } debug; 2129bf215546Sopenharmony_ci 2130bf215546Sopenharmony_ci uint32_t allocateId(RegClass rc) 2131bf215546Sopenharmony_ci { 2132bf215546Sopenharmony_ci assert(allocationID <= 16777215); 2133bf215546Sopenharmony_ci temp_rc.push_back(rc); 2134bf215546Sopenharmony_ci return allocationID++; 2135bf215546Sopenharmony_ci } 2136bf215546Sopenharmony_ci 2137bf215546Sopenharmony_ci void allocateRange(unsigned amount) 2138bf215546Sopenharmony_ci { 2139bf215546Sopenharmony_ci assert(allocationID + amount <= 16777216); 2140bf215546Sopenharmony_ci temp_rc.resize(temp_rc.size() + amount); 2141bf215546Sopenharmony_ci allocationID += amount; 2142bf215546Sopenharmony_ci } 2143bf215546Sopenharmony_ci 2144bf215546Sopenharmony_ci Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); } 2145bf215546Sopenharmony_ci 2146bf215546Sopenharmony_ci uint32_t peekAllocationId() { return allocationID; } 2147bf215546Sopenharmony_ci 2148bf215546Sopenharmony_ci friend void reindex_ssa(Program* program); 2149bf215546Sopenharmony_ci friend void reindex_ssa(Program* program, std::vector<IDSet>& live_out); 2150bf215546Sopenharmony_ci 2151bf215546Sopenharmony_ci Block* create_and_insert_block() 2152bf215546Sopenharmony_ci { 2153bf215546Sopenharmony_ci Block block; 2154bf215546Sopenharmony_ci return insert_block(std::move(block)); 2155bf215546Sopenharmony_ci } 2156bf215546Sopenharmony_ci 2157bf215546Sopenharmony_ci Block* insert_block(Block&& block) 2158bf215546Sopenharmony_ci { 2159bf215546Sopenharmony_ci block.index = blocks.size(); 2160bf215546Sopenharmony_ci block.fp_mode = next_fp_mode; 2161bf215546Sopenharmony_ci block.loop_nest_depth = next_loop_depth; 2162bf215546Sopenharmony_ci block.divergent_if_logical_depth = next_divergent_if_logical_depth; 2163bf215546Sopenharmony_ci block.uniform_if_depth = next_uniform_if_depth; 2164bf215546Sopenharmony_ci blocks.emplace_back(std::move(block)); 2165bf215546Sopenharmony_ci return &blocks.back(); 2166bf215546Sopenharmony_ci } 2167bf215546Sopenharmony_ci 2168bf215546Sopenharmony_ciprivate: 2169bf215546Sopenharmony_ci uint32_t allocationID = 1; 2170bf215546Sopenharmony_ci}; 2171bf215546Sopenharmony_ci 2172bf215546Sopenharmony_cistruct live { 2173bf215546Sopenharmony_ci /* live temps out per block */ 2174bf215546Sopenharmony_ci std::vector<IDSet> live_out; 2175bf215546Sopenharmony_ci /* register demand (sgpr/vgpr) per instruction per block */ 2176bf215546Sopenharmony_ci std::vector<std::vector<RegisterDemand>> register_demand; 2177bf215546Sopenharmony_ci}; 2178bf215546Sopenharmony_ci 2179bf215546Sopenharmony_cistruct ra_test_policy { 2180bf215546Sopenharmony_ci /* Force RA to always use its pessimistic fallback algorithm */ 2181bf215546Sopenharmony_ci bool skip_optimistic_path = false; 2182bf215546Sopenharmony_ci}; 2183bf215546Sopenharmony_ci 2184bf215546Sopenharmony_civoid init(); 2185bf215546Sopenharmony_ci 2186bf215546Sopenharmony_civoid init_program(Program* program, Stage stage, const struct aco_shader_info* info, 2187bf215546Sopenharmony_ci enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode, 2188bf215546Sopenharmony_ci ac_shader_config* config); 2189bf215546Sopenharmony_ci 2190bf215546Sopenharmony_civoid select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders, 2191bf215546Sopenharmony_ci ac_shader_config* config, const struct aco_compiler_options* options, 2192bf215546Sopenharmony_ci const struct aco_shader_info* info, 2193bf215546Sopenharmony_ci const struct radv_shader_args* args); 2194bf215546Sopenharmony_civoid select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config, 2195bf215546Sopenharmony_ci const struct aco_compiler_options* options, 2196bf215546Sopenharmony_ci const struct aco_shader_info* info, 2197bf215546Sopenharmony_ci const struct radv_shader_args* args); 2198bf215546Sopenharmony_civoid select_trap_handler_shader(Program* program, struct nir_shader* shader, 2199bf215546Sopenharmony_ci ac_shader_config* config, 2200bf215546Sopenharmony_ci const struct aco_compiler_options* options, 2201bf215546Sopenharmony_ci const struct aco_shader_info* info, 2202bf215546Sopenharmony_ci const struct radv_shader_args* args); 2203bf215546Sopenharmony_civoid select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, 2204bf215546Sopenharmony_ci ac_shader_config* config, 2205bf215546Sopenharmony_ci const struct aco_compiler_options* options, 2206bf215546Sopenharmony_ci const struct aco_shader_info* info, 2207bf215546Sopenharmony_ci const struct radv_shader_args* args, 2208bf215546Sopenharmony_ci unsigned* num_preserved_sgprs); 2209bf215546Sopenharmony_ci 2210bf215546Sopenharmony_civoid select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, 2211bf215546Sopenharmony_ci ac_shader_config* config, 2212bf215546Sopenharmony_ci const struct aco_compiler_options* options, 2213bf215546Sopenharmony_ci const struct aco_shader_info* info, 2214bf215546Sopenharmony_ci const struct radv_shader_args* args); 2215bf215546Sopenharmony_ci 2216bf215546Sopenharmony_civoid lower_phis(Program* program); 2217bf215546Sopenharmony_civoid calc_min_waves(Program* program); 2218bf215546Sopenharmony_civoid update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand); 2219bf215546Sopenharmony_cilive live_var_analysis(Program* program); 2220bf215546Sopenharmony_cistd::vector<uint16_t> dead_code_analysis(Program* program); 2221bf215546Sopenharmony_civoid dominator_tree(Program* program); 2222bf215546Sopenharmony_civoid insert_exec_mask(Program* program); 2223bf215546Sopenharmony_civoid value_numbering(Program* program); 2224bf215546Sopenharmony_civoid optimize(Program* program); 2225bf215546Sopenharmony_civoid optimize_postRA(Program* program); 2226bf215546Sopenharmony_civoid setup_reduce_temp(Program* program); 2227bf215546Sopenharmony_civoid lower_to_cssa(Program* program, live& live_vars); 2228bf215546Sopenharmony_civoid register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, 2229bf215546Sopenharmony_ci ra_test_policy = {}); 2230bf215546Sopenharmony_civoid ssa_elimination(Program* program); 2231bf215546Sopenharmony_civoid lower_to_hw_instr(Program* program); 2232bf215546Sopenharmony_civoid schedule_program(Program* program, live& live_vars); 2233bf215546Sopenharmony_civoid spill(Program* program, live& live_vars); 2234bf215546Sopenharmony_civoid insert_wait_states(Program* program); 2235bf215546Sopenharmony_civoid insert_NOPs(Program* program); 2236bf215546Sopenharmony_civoid form_hard_clauses(Program* program); 2237bf215546Sopenharmony_ciunsigned emit_program(Program* program, std::vector<uint32_t>& code); 2238bf215546Sopenharmony_ci/** 2239bf215546Sopenharmony_ci * Returns true if print_asm can disassemble the given program for the current build/runtime 2240bf215546Sopenharmony_ci * configuration 2241bf215546Sopenharmony_ci */ 2242bf215546Sopenharmony_cibool check_print_asm_support(Program* program); 2243bf215546Sopenharmony_cibool print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output); 2244bf215546Sopenharmony_cibool validate_ir(Program* program); 2245bf215546Sopenharmony_cibool validate_ra(Program* program); 2246bf215546Sopenharmony_ci#ifndef NDEBUG 2247bf215546Sopenharmony_civoid perfwarn(Program* program, bool cond, const char* msg, Instruction* instr = NULL); 2248bf215546Sopenharmony_ci#else 2249bf215546Sopenharmony_ci#define perfwarn(program, cond, msg, ...) \ 2250bf215546Sopenharmony_ci do { \ 2251bf215546Sopenharmony_ci } while (0) 2252bf215546Sopenharmony_ci#endif 2253bf215546Sopenharmony_ci 2254bf215546Sopenharmony_civoid collect_presched_stats(Program* program); 2255bf215546Sopenharmony_civoid collect_preasm_stats(Program* program); 2256bf215546Sopenharmony_civoid collect_postasm_stats(Program* program, const std::vector<uint32_t>& code); 2257bf215546Sopenharmony_ci 2258bf215546Sopenharmony_cienum print_flags { 2259bf215546Sopenharmony_ci print_no_ssa = 0x1, 2260bf215546Sopenharmony_ci print_perf_info = 0x2, 2261bf215546Sopenharmony_ci print_kill = 0x4, 2262bf215546Sopenharmony_ci print_live_vars = 0x8, 2263bf215546Sopenharmony_ci}; 2264bf215546Sopenharmony_ci 2265bf215546Sopenharmony_civoid aco_print_operand(const Operand* operand, FILE* output, unsigned flags = 0); 2266bf215546Sopenharmony_civoid aco_print_instr(const Instruction* instr, FILE* output, unsigned flags = 0); 2267bf215546Sopenharmony_civoid aco_print_program(const Program* program, FILE* output, unsigned flags = 0); 2268bf215546Sopenharmony_civoid aco_print_program(const Program* program, FILE* output, const live& live_vars, 2269bf215546Sopenharmony_ci unsigned flags = 0); 2270bf215546Sopenharmony_ci 2271bf215546Sopenharmony_civoid _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...); 2272bf215546Sopenharmony_civoid _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...); 2273bf215546Sopenharmony_ci 2274bf215546Sopenharmony_ci#define aco_perfwarn(program, ...) _aco_perfwarn(program, __FILE__, __LINE__, __VA_ARGS__) 2275bf215546Sopenharmony_ci#define aco_err(program, ...) _aco_err(program, __FILE__, __LINE__, __VA_ARGS__) 2276bf215546Sopenharmony_ci 2277bf215546Sopenharmony_ci/* utilities for dealing with register demand */ 2278bf215546Sopenharmony_ciRegisterDemand get_live_changes(aco_ptr<Instruction>& instr); 2279bf215546Sopenharmony_ciRegisterDemand get_temp_registers(aco_ptr<Instruction>& instr); 2280bf215546Sopenharmony_ciRegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr, 2281bf215546Sopenharmony_ci aco_ptr<Instruction>& instr_before); 2282bf215546Sopenharmony_ci 2283bf215546Sopenharmony_ci/* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */ 2284bf215546Sopenharmony_ciuint16_t get_extra_sgprs(Program* program); 2285bf215546Sopenharmony_ci 2286bf215546Sopenharmony_ci/* adjust num_waves for workgroup size and LDS limits */ 2287bf215546Sopenharmony_ciuint16_t max_suitable_waves(Program* program, uint16_t waves); 2288bf215546Sopenharmony_ci 2289bf215546Sopenharmony_ci/* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */ 2290bf215546Sopenharmony_ciuint16_t get_sgpr_alloc(Program* program, uint16_t addressable_sgprs); 2291bf215546Sopenharmony_ciuint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs); 2292bf215546Sopenharmony_ci 2293bf215546Sopenharmony_ci/* return number of addressable sgprs/vgprs for max_waves */ 2294bf215546Sopenharmony_ciuint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves); 2295bf215546Sopenharmony_ciuint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves); 2296bf215546Sopenharmony_ci 2297bf215546Sopenharmony_citypedef struct { 2298bf215546Sopenharmony_ci const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)]; 2299bf215546Sopenharmony_ci const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)]; 2300bf215546Sopenharmony_ci const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)]; 2301bf215546Sopenharmony_ci const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers; 2302bf215546Sopenharmony_ci const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers; 2303bf215546Sopenharmony_ci const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic; 2304bf215546Sopenharmony_ci const char* name[static_cast<int>(aco_opcode::num_opcodes)]; 2305bf215546Sopenharmony_ci const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)]; 2306bf215546Sopenharmony_ci /* sizes used for input/output modifiers and constants */ 2307bf215546Sopenharmony_ci const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)]; 2308bf215546Sopenharmony_ci const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)]; 2309bf215546Sopenharmony_ci} Info; 2310bf215546Sopenharmony_ci 2311bf215546Sopenharmony_ciextern const Info instr_info; 2312bf215546Sopenharmony_ci 2313bf215546Sopenharmony_ci} // namespace aco 2314bf215546Sopenharmony_ci 2315bf215546Sopenharmony_ci#endif /* ACO_IR_H */ 2316