1bf215546Sopenharmony_ci 2bf215546Sopenharmony_citemplate = """\ 3bf215546Sopenharmony_ci/* 4bf215546Sopenharmony_ci * Copyright (c) 2019 Valve Corporation 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 9bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 11bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 15bf215546Sopenharmony_ci * Software. 16bf215546Sopenharmony_ci * 17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23bf215546Sopenharmony_ci * IN THE SOFTWARE. 24bf215546Sopenharmony_ci * 25bf215546Sopenharmony_ci * This file was generated by aco_builder_h.py 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#ifndef _ACO_BUILDER_ 29bf215546Sopenharmony_ci#define _ACO_BUILDER_ 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "aco_ir.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cinamespace aco { 34bf215546Sopenharmony_cienum dpp_ctrl { 35bf215546Sopenharmony_ci _dpp_quad_perm = 0x000, 36bf215546Sopenharmony_ci _dpp_row_sl = 0x100, 37bf215546Sopenharmony_ci _dpp_row_sr = 0x110, 38bf215546Sopenharmony_ci _dpp_row_rr = 0x120, 39bf215546Sopenharmony_ci dpp_wf_sl1 = 0x130, 40bf215546Sopenharmony_ci dpp_wf_rl1 = 0x134, 41bf215546Sopenharmony_ci dpp_wf_sr1 = 0x138, 42bf215546Sopenharmony_ci dpp_wf_rr1 = 0x13C, 43bf215546Sopenharmony_ci dpp_row_mirror = 0x140, 44bf215546Sopenharmony_ci dpp_row_half_mirror = 0x141, 45bf215546Sopenharmony_ci dpp_row_bcast15 = 0x142, 46bf215546Sopenharmony_ci dpp_row_bcast31 = 0x143 47bf215546Sopenharmony_ci}; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ciinline dpp_ctrl 50bf215546Sopenharmony_cidpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) 51bf215546Sopenharmony_ci{ 52bf215546Sopenharmony_ci assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4); 53bf215546Sopenharmony_ci return (dpp_ctrl)(lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6)); 54bf215546Sopenharmony_ci} 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ciinline dpp_ctrl 57bf215546Sopenharmony_cidpp_row_sl(unsigned amount) 58bf215546Sopenharmony_ci{ 59bf215546Sopenharmony_ci assert(amount > 0 && amount < 16); 60bf215546Sopenharmony_ci return (dpp_ctrl)(((unsigned) _dpp_row_sl) | amount); 61bf215546Sopenharmony_ci} 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ciinline dpp_ctrl 64bf215546Sopenharmony_cidpp_row_sr(unsigned amount) 65bf215546Sopenharmony_ci{ 66bf215546Sopenharmony_ci assert(amount > 0 && amount < 16); 67bf215546Sopenharmony_ci return (dpp_ctrl)(((unsigned) _dpp_row_sr) | amount); 68bf215546Sopenharmony_ci} 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ciinline dpp_ctrl 71bf215546Sopenharmony_cidpp_row_rr(unsigned amount) 72bf215546Sopenharmony_ci{ 73bf215546Sopenharmony_ci assert(amount > 0 && amount < 16); 74bf215546Sopenharmony_ci return (dpp_ctrl)(((unsigned) _dpp_row_rr) | amount); 75bf215546Sopenharmony_ci} 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ciinline unsigned 78bf215546Sopenharmony_cids_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) 79bf215546Sopenharmony_ci{ 80bf215546Sopenharmony_ci assert(and_mask < 32 && or_mask < 32 && xor_mask < 32); 81bf215546Sopenharmony_ci return and_mask | (or_mask << 5) | (xor_mask << 10); 82bf215546Sopenharmony_ci} 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ciaco_ptr<Instruction> create_s_mov(Definition dst, Operand src); 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_cienum sendmsg { 87bf215546Sopenharmony_ci sendmsg_none = 0, 88bf215546Sopenharmony_ci _sendmsg_gs = 2, 89bf215546Sopenharmony_ci _sendmsg_gs_done = 3, 90bf215546Sopenharmony_ci sendmsg_save_wave = 4, 91bf215546Sopenharmony_ci sendmsg_stall_wave_gen = 5, 92bf215546Sopenharmony_ci sendmsg_halt_waves = 6, 93bf215546Sopenharmony_ci sendmsg_ordered_ps_done = 7, 94bf215546Sopenharmony_ci sendmsg_early_prim_dealloc = 8, 95bf215546Sopenharmony_ci sendmsg_gs_alloc_req = 9, 96bf215546Sopenharmony_ci sendmsg_id_mask = 0xf, 97bf215546Sopenharmony_ci}; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ciinline sendmsg 100bf215546Sopenharmony_cisendmsg_gs(bool cut, bool emit, unsigned stream) 101bf215546Sopenharmony_ci{ 102bf215546Sopenharmony_ci assert(stream < 4); 103bf215546Sopenharmony_ci return (sendmsg)((unsigned)_sendmsg_gs | (cut << 4) | (emit << 5) | (stream << 8)); 104bf215546Sopenharmony_ci} 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ciinline sendmsg 107bf215546Sopenharmony_cisendmsg_gs_done(bool cut, bool emit, unsigned stream) 108bf215546Sopenharmony_ci{ 109bf215546Sopenharmony_ci assert(stream < 4); 110bf215546Sopenharmony_ci return (sendmsg)((unsigned)_sendmsg_gs_done | (cut << 4) | (emit << 5) | (stream << 8)); 111bf215546Sopenharmony_ci} 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_cienum bperm_swiz { 114bf215546Sopenharmony_ci bperm_b1_sign = 8, 115bf215546Sopenharmony_ci bperm_b3_sign = 9, 116bf215546Sopenharmony_ci bperm_b5_sign = 10, 117bf215546Sopenharmony_ci bperm_b7_sign = 11, 118bf215546Sopenharmony_ci bperm_0 = 12, 119bf215546Sopenharmony_ci bperm_255 = 13, 120bf215546Sopenharmony_ci}; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ciclass Builder { 123bf215546Sopenharmony_cipublic: 124bf215546Sopenharmony_ci struct Result { 125bf215546Sopenharmony_ci Instruction *instr; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci Result(Instruction *instr_) : instr(instr_) {} 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci operator Instruction *() const { 130bf215546Sopenharmony_ci return instr; 131bf215546Sopenharmony_ci } 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci operator Temp() const { 134bf215546Sopenharmony_ci return instr->definitions[0].getTemp(); 135bf215546Sopenharmony_ci } 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci operator Operand() const { 138bf215546Sopenharmony_ci return Operand((Temp)*this); 139bf215546Sopenharmony_ci } 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci Definition& def(unsigned index) const { 142bf215546Sopenharmony_ci return instr->definitions[index]; 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci aco_ptr<Instruction> get_ptr() const { 146bf215546Sopenharmony_ci return aco_ptr<Instruction>(instr); 147bf215546Sopenharmony_ci } 148bf215546Sopenharmony_ci }; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci struct Op { 151bf215546Sopenharmony_ci Operand op; 152bf215546Sopenharmony_ci Op(Temp tmp) : op(tmp) {} 153bf215546Sopenharmony_ci Op(Operand op_) : op(op_) {} 154bf215546Sopenharmony_ci Op(Result res) : op((Temp)res) {} 155bf215546Sopenharmony_ci }; 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci enum WaveSpecificOpcode { 158bf215546Sopenharmony_ci s_cselect = (unsigned) aco_opcode::s_cselect_b64, 159bf215546Sopenharmony_ci s_cmp_lg = (unsigned) aco_opcode::s_cmp_lg_u64, 160bf215546Sopenharmony_ci s_and = (unsigned) aco_opcode::s_and_b64, 161bf215546Sopenharmony_ci s_andn2 = (unsigned) aco_opcode::s_andn2_b64, 162bf215546Sopenharmony_ci s_or = (unsigned) aco_opcode::s_or_b64, 163bf215546Sopenharmony_ci s_orn2 = (unsigned) aco_opcode::s_orn2_b64, 164bf215546Sopenharmony_ci s_not = (unsigned) aco_opcode::s_not_b64, 165bf215546Sopenharmony_ci s_mov = (unsigned) aco_opcode::s_mov_b64, 166bf215546Sopenharmony_ci s_wqm = (unsigned) aco_opcode::s_wqm_b64, 167bf215546Sopenharmony_ci s_and_saveexec = (unsigned) aco_opcode::s_and_saveexec_b64, 168bf215546Sopenharmony_ci s_or_saveexec = (unsigned) aco_opcode::s_or_saveexec_b64, 169bf215546Sopenharmony_ci s_xnor = (unsigned) aco_opcode::s_xnor_b64, 170bf215546Sopenharmony_ci s_xor = (unsigned) aco_opcode::s_xor_b64, 171bf215546Sopenharmony_ci s_bcnt1_i32 = (unsigned) aco_opcode::s_bcnt1_i32_b64, 172bf215546Sopenharmony_ci s_bitcmp1 = (unsigned) aco_opcode::s_bitcmp1_b64, 173bf215546Sopenharmony_ci s_ff1_i32 = (unsigned) aco_opcode::s_ff1_i32_b64, 174bf215546Sopenharmony_ci s_flbit_i32 = (unsigned) aco_opcode::s_flbit_i32_b64, 175bf215546Sopenharmony_ci s_lshl = (unsigned) aco_opcode::s_lshl_b64, 176bf215546Sopenharmony_ci }; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci Program *program; 179bf215546Sopenharmony_ci bool use_iterator; 180bf215546Sopenharmony_ci bool start; // only when use_iterator == false 181bf215546Sopenharmony_ci RegClass lm; 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>> *instructions; 184bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>>::iterator it; 185bf215546Sopenharmony_ci bool is_precise = false; 186bf215546Sopenharmony_ci bool is_nuw = false; 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci Builder(Program *pgm) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(NULL) {} 189bf215546Sopenharmony_ci Builder(Program *pgm, Block *block) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(&block->instructions) {} 190bf215546Sopenharmony_ci Builder(Program *pgm, std::vector<aco_ptr<Instruction>> *instrs) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(instrs) {} 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci Builder precise() const { 193bf215546Sopenharmony_ci Builder res = *this; 194bf215546Sopenharmony_ci res.is_precise = true; 195bf215546Sopenharmony_ci return res; 196bf215546Sopenharmony_ci }; 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci Builder nuw() const { 199bf215546Sopenharmony_ci Builder res = *this; 200bf215546Sopenharmony_ci res.is_nuw = true; 201bf215546Sopenharmony_ci return res; 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci void moveEnd(Block *block) { 205bf215546Sopenharmony_ci instructions = &block->instructions; 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci void reset() { 209bf215546Sopenharmony_ci use_iterator = false; 210bf215546Sopenharmony_ci start = false; 211bf215546Sopenharmony_ci instructions = NULL; 212bf215546Sopenharmony_ci } 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci void reset(Block *block) { 215bf215546Sopenharmony_ci use_iterator = false; 216bf215546Sopenharmony_ci start = false; 217bf215546Sopenharmony_ci instructions = &block->instructions; 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci void reset(std::vector<aco_ptr<Instruction>> *instrs) { 221bf215546Sopenharmony_ci use_iterator = false; 222bf215546Sopenharmony_ci start = false; 223bf215546Sopenharmony_ci instructions = instrs; 224bf215546Sopenharmony_ci } 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci void reset(std::vector<aco_ptr<Instruction>> *instrs, std::vector<aco_ptr<Instruction>>::iterator instr_it) { 227bf215546Sopenharmony_ci use_iterator = true; 228bf215546Sopenharmony_ci start = false; 229bf215546Sopenharmony_ci instructions = instrs; 230bf215546Sopenharmony_ci it = instr_it; 231bf215546Sopenharmony_ci } 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci Result insert(aco_ptr<Instruction> instr) { 234bf215546Sopenharmony_ci Instruction *instr_ptr = instr.get(); 235bf215546Sopenharmony_ci if (instructions) { 236bf215546Sopenharmony_ci if (use_iterator) { 237bf215546Sopenharmony_ci it = instructions->emplace(it, std::move(instr)); 238bf215546Sopenharmony_ci it = std::next(it); 239bf215546Sopenharmony_ci } else if (!start) { 240bf215546Sopenharmony_ci instructions->emplace_back(std::move(instr)); 241bf215546Sopenharmony_ci } else { 242bf215546Sopenharmony_ci instructions->emplace(instructions->begin(), std::move(instr)); 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci } 245bf215546Sopenharmony_ci return Result(instr_ptr); 246bf215546Sopenharmony_ci } 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci Result insert(Instruction* instr) { 249bf215546Sopenharmony_ci if (instructions) { 250bf215546Sopenharmony_ci if (use_iterator) { 251bf215546Sopenharmony_ci it = instructions->emplace(it, aco_ptr<Instruction>(instr)); 252bf215546Sopenharmony_ci it = std::next(it); 253bf215546Sopenharmony_ci } else if (!start) { 254bf215546Sopenharmony_ci instructions->emplace_back(aco_ptr<Instruction>(instr)); 255bf215546Sopenharmony_ci } else { 256bf215546Sopenharmony_ci instructions->emplace(instructions->begin(), aco_ptr<Instruction>(instr)); 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci } 259bf215546Sopenharmony_ci return Result(instr); 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci Temp tmp(RegClass rc) { 263bf215546Sopenharmony_ci return program->allocateTmp(rc); 264bf215546Sopenharmony_ci } 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci Temp tmp(RegType type, unsigned size) { 267bf215546Sopenharmony_ci return tmp(RegClass(type, size)); 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci Definition def(RegClass rc) { 271bf215546Sopenharmony_ci return Definition(program->allocateTmp(rc)); 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci Definition def(RegType type, unsigned size) { 275bf215546Sopenharmony_ci return def(RegClass(type, size)); 276bf215546Sopenharmony_ci } 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci Definition def(RegClass rc, PhysReg reg) { 279bf215546Sopenharmony_ci return Definition(program->allocateId(rc), reg, rc); 280bf215546Sopenharmony_ci } 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci inline aco_opcode w64or32(WaveSpecificOpcode opcode) const { 283bf215546Sopenharmony_ci if (program->wave_size == 64) 284bf215546Sopenharmony_ci return (aco_opcode) opcode; 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci switch (opcode) { 287bf215546Sopenharmony_ci case s_cselect: 288bf215546Sopenharmony_ci return aco_opcode::s_cselect_b32; 289bf215546Sopenharmony_ci case s_cmp_lg: 290bf215546Sopenharmony_ci return aco_opcode::s_cmp_lg_u32; 291bf215546Sopenharmony_ci case s_and: 292bf215546Sopenharmony_ci return aco_opcode::s_and_b32; 293bf215546Sopenharmony_ci case s_andn2: 294bf215546Sopenharmony_ci return aco_opcode::s_andn2_b32; 295bf215546Sopenharmony_ci case s_or: 296bf215546Sopenharmony_ci return aco_opcode::s_or_b32; 297bf215546Sopenharmony_ci case s_orn2: 298bf215546Sopenharmony_ci return aco_opcode::s_orn2_b32; 299bf215546Sopenharmony_ci case s_not: 300bf215546Sopenharmony_ci return aco_opcode::s_not_b32; 301bf215546Sopenharmony_ci case s_mov: 302bf215546Sopenharmony_ci return aco_opcode::s_mov_b32; 303bf215546Sopenharmony_ci case s_wqm: 304bf215546Sopenharmony_ci return aco_opcode::s_wqm_b32; 305bf215546Sopenharmony_ci case s_and_saveexec: 306bf215546Sopenharmony_ci return aco_opcode::s_and_saveexec_b32; 307bf215546Sopenharmony_ci case s_or_saveexec: 308bf215546Sopenharmony_ci return aco_opcode::s_or_saveexec_b32; 309bf215546Sopenharmony_ci case s_xnor: 310bf215546Sopenharmony_ci return aco_opcode::s_xnor_b32; 311bf215546Sopenharmony_ci case s_xor: 312bf215546Sopenharmony_ci return aco_opcode::s_xor_b32; 313bf215546Sopenharmony_ci case s_bcnt1_i32: 314bf215546Sopenharmony_ci return aco_opcode::s_bcnt1_i32_b32; 315bf215546Sopenharmony_ci case s_bitcmp1: 316bf215546Sopenharmony_ci return aco_opcode::s_bitcmp1_b32; 317bf215546Sopenharmony_ci case s_ff1_i32: 318bf215546Sopenharmony_ci return aco_opcode::s_ff1_i32_b32; 319bf215546Sopenharmony_ci case s_flbit_i32: 320bf215546Sopenharmony_ci return aco_opcode::s_flbit_i32_b32; 321bf215546Sopenharmony_ci case s_lshl: 322bf215546Sopenharmony_ci return aco_opcode::s_lshl_b32; 323bf215546Sopenharmony_ci default: 324bf215546Sopenharmony_ci unreachable("Unsupported wave specific opcode."); 325bf215546Sopenharmony_ci } 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci% for fixed in ['m0', 'vcc', 'exec', 'scc']: 329bf215546Sopenharmony_ci Operand ${fixed}(Temp tmp) { 330bf215546Sopenharmony_ci % if fixed == 'vcc' or fixed == 'exec': 331bf215546Sopenharmony_ci //vcc_hi and exec_hi can still be used in wave32 332bf215546Sopenharmony_ci assert(tmp.type() == RegType::sgpr && tmp.bytes() <= 8); 333bf215546Sopenharmony_ci % endif 334bf215546Sopenharmony_ci Operand op(tmp); 335bf215546Sopenharmony_ci op.setFixed(aco::${fixed}); 336bf215546Sopenharmony_ci return op; 337bf215546Sopenharmony_ci } 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci Definition ${fixed}(Definition def) { 340bf215546Sopenharmony_ci % if fixed == 'vcc' or fixed == 'exec': 341bf215546Sopenharmony_ci //vcc_hi and exec_hi can still be used in wave32 342bf215546Sopenharmony_ci assert(def.regClass().type() == RegType::sgpr && def.bytes() <= 8); 343bf215546Sopenharmony_ci % endif 344bf215546Sopenharmony_ci def.setFixed(aco::${fixed}); 345bf215546Sopenharmony_ci return def; 346bf215546Sopenharmony_ci } 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci% endfor 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci Operand set16bit(Operand op) { 351bf215546Sopenharmony_ci op.set16bit(true); 352bf215546Sopenharmony_ci return op; 353bf215546Sopenharmony_ci } 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci Operand set24bit(Operand op) { 356bf215546Sopenharmony_ci op.set24bit(true); 357bf215546Sopenharmony_ci return op; 358bf215546Sopenharmony_ci } 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci /* hand-written helpers */ 361bf215546Sopenharmony_ci Temp as_uniform(Op op) 362bf215546Sopenharmony_ci { 363bf215546Sopenharmony_ci assert(op.op.isTemp()); 364bf215546Sopenharmony_ci if (op.op.getTemp().type() == RegType::vgpr) 365bf215546Sopenharmony_ci return pseudo(aco_opcode::p_as_uniform, def(RegType::sgpr, op.op.size()), op); 366bf215546Sopenharmony_ci else 367bf215546Sopenharmony_ci return op.op.getTemp(); 368bf215546Sopenharmony_ci } 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool bits24=false) 371bf215546Sopenharmony_ci { 372bf215546Sopenharmony_ci assert(tmp.type() == RegType::vgpr); 373bf215546Sopenharmony_ci bool has_lshl_add = program->gfx_level >= GFX9; 374bf215546Sopenharmony_ci /* v_mul_lo_u32 has 1.6x the latency of most VALU on GFX10 (8 vs 5 cycles), 375bf215546Sopenharmony_ci * compared to 4x the latency on <GFX10. */ 376bf215546Sopenharmony_ci unsigned mul_cost = program->gfx_level >= GFX10 ? 1 : (4 + Operand::c32(imm).isLiteral()); 377bf215546Sopenharmony_ci if (imm == 0) { 378bf215546Sopenharmony_ci return copy(dst, Operand::zero()); 379bf215546Sopenharmony_ci } else if (imm == 1) { 380bf215546Sopenharmony_ci return copy(dst, Operand(tmp)); 381bf215546Sopenharmony_ci } else if (util_is_power_of_two_or_zero(imm)) { 382bf215546Sopenharmony_ci return vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(ffs(imm) - 1u), tmp); 383bf215546Sopenharmony_ci } else if (bits24) { 384bf215546Sopenharmony_ci return vop2(aco_opcode::v_mul_u32_u24, dst, Operand::c32(imm), tmp); 385bf215546Sopenharmony_ci } else if (util_is_power_of_two_nonzero(imm - 1u)) { 386bf215546Sopenharmony_ci return vadd32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm - 1u) - 1u), tmp), tmp); 387bf215546Sopenharmony_ci } else if (mul_cost > 2 && util_is_power_of_two_nonzero(imm + 1u)) { 388bf215546Sopenharmony_ci return vsub32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm + 1u) - 1u), tmp), tmp); 389bf215546Sopenharmony_ci } 390bf215546Sopenharmony_ci 391bf215546Sopenharmony_ci unsigned instrs_required = util_bitcount(imm); 392bf215546Sopenharmony_ci if (!has_lshl_add) { 393bf215546Sopenharmony_ci instrs_required = util_bitcount(imm) - (imm & 0x1); /* shifts */ 394bf215546Sopenharmony_ci instrs_required += util_bitcount(imm) - 1; /* additions */ 395bf215546Sopenharmony_ci } 396bf215546Sopenharmony_ci if (instrs_required < mul_cost) { 397bf215546Sopenharmony_ci Result res(NULL); 398bf215546Sopenharmony_ci Temp cur; 399bf215546Sopenharmony_ci while (imm) { 400bf215546Sopenharmony_ci unsigned shift = u_bit_scan(&imm); 401bf215546Sopenharmony_ci Definition tmp_dst = imm ? def(v1) : dst; 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci if (shift && cur.id()) 404bf215546Sopenharmony_ci res = vadd32(Definition(tmp_dst), vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(shift), tmp), cur); 405bf215546Sopenharmony_ci else if (shift) 406bf215546Sopenharmony_ci res = vop2(aco_opcode::v_lshlrev_b32, Definition(tmp_dst), Operand::c32(shift), tmp); 407bf215546Sopenharmony_ci else if (cur.id()) 408bf215546Sopenharmony_ci res = vadd32(Definition(tmp_dst), tmp, cur); 409bf215546Sopenharmony_ci else 410bf215546Sopenharmony_ci tmp_dst = Definition(tmp); 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci cur = tmp_dst.getTemp(); 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci return res; 415bf215546Sopenharmony_ci } 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci Temp imm_tmp = copy(def(s1), Operand::c32(imm)); 418bf215546Sopenharmony_ci return vop3(aco_opcode::v_mul_lo_u32, dst, imm_tmp, tmp); 419bf215546Sopenharmony_ci } 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci Result v_mul24_imm(Definition dst, Temp tmp, uint32_t imm) 422bf215546Sopenharmony_ci { 423bf215546Sopenharmony_ci return v_mul_imm(dst, tmp, imm, true); 424bf215546Sopenharmony_ci } 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_ci Result copy(Definition dst, Op op) { 427bf215546Sopenharmony_ci return pseudo(aco_opcode::p_parallelcopy, dst, op); 428bf215546Sopenharmony_ci } 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci Result vadd32(Definition dst, Op a, Op b, bool carry_out=false, Op carry_in=Op(Operand(s2)), bool post_ra=false) { 431bf215546Sopenharmony_ci if (b.op.isConstant() || b.op.regClass().type() != RegType::vgpr) 432bf215546Sopenharmony_ci std::swap(a, b); 433bf215546Sopenharmony_ci if (!post_ra && (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr)) 434bf215546Sopenharmony_ci b = copy(def(v1), b); 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci if (!carry_in.op.isUndefined()) 437bf215546Sopenharmony_ci return vop2(aco_opcode::v_addc_co_u32, Definition(dst), def(lm), a, b, carry_in); 438bf215546Sopenharmony_ci else if (program->gfx_level >= GFX10 && carry_out) 439bf215546Sopenharmony_ci return vop3(aco_opcode::v_add_co_u32_e64, Definition(dst), def(lm), a, b); 440bf215546Sopenharmony_ci else if (program->gfx_level < GFX9 || carry_out) 441bf215546Sopenharmony_ci return vop2(aco_opcode::v_add_co_u32, Definition(dst), def(lm), a, b); 442bf215546Sopenharmony_ci else 443bf215546Sopenharmony_ci return vop2(aco_opcode::v_add_u32, Definition(dst), a, b); 444bf215546Sopenharmony_ci } 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci Result vsub32(Definition dst, Op a, Op b, bool carry_out=false, Op borrow=Op(Operand(s2))) 447bf215546Sopenharmony_ci { 448bf215546Sopenharmony_ci if (!borrow.op.isUndefined() || program->gfx_level < GFX9) 449bf215546Sopenharmony_ci carry_out = true; 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci bool reverse = !b.op.isTemp() || b.op.regClass().type() != RegType::vgpr; 452bf215546Sopenharmony_ci if (reverse) 453bf215546Sopenharmony_ci std::swap(a, b); 454bf215546Sopenharmony_ci if (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr) 455bf215546Sopenharmony_ci b = copy(def(v1), b); 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci aco_opcode op; 458bf215546Sopenharmony_ci Temp carry; 459bf215546Sopenharmony_ci if (carry_out) { 460bf215546Sopenharmony_ci carry = tmp(s2); 461bf215546Sopenharmony_ci if (borrow.op.isUndefined()) 462bf215546Sopenharmony_ci op = reverse ? aco_opcode::v_subrev_co_u32 : aco_opcode::v_sub_co_u32; 463bf215546Sopenharmony_ci else 464bf215546Sopenharmony_ci op = reverse ? aco_opcode::v_subbrev_co_u32 : aco_opcode::v_subb_co_u32; 465bf215546Sopenharmony_ci } else { 466bf215546Sopenharmony_ci op = reverse ? aco_opcode::v_subrev_u32 : aco_opcode::v_sub_u32; 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci bool vop3 = false; 469bf215546Sopenharmony_ci if (program->gfx_level >= GFX10 && op == aco_opcode::v_subrev_co_u32) { 470bf215546Sopenharmony_ci vop3 = true; 471bf215546Sopenharmony_ci op = aco_opcode::v_subrev_co_u32_e64; 472bf215546Sopenharmony_ci } else if (program->gfx_level >= GFX10 && op == aco_opcode::v_sub_co_u32) { 473bf215546Sopenharmony_ci vop3 = true; 474bf215546Sopenharmony_ci op = aco_opcode::v_sub_co_u32_e64; 475bf215546Sopenharmony_ci } 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci int num_ops = borrow.op.isUndefined() ? 2 : 3; 478bf215546Sopenharmony_ci int num_defs = carry_out ? 2 : 1; 479bf215546Sopenharmony_ci aco_ptr<Instruction> sub; 480bf215546Sopenharmony_ci if (vop3) 481bf215546Sopenharmony_ci sub.reset(create_instruction<VOP3_instruction>(op, Format::VOP3, num_ops, num_defs)); 482bf215546Sopenharmony_ci else 483bf215546Sopenharmony_ci sub.reset(create_instruction<VOP2_instruction>(op, Format::VOP2, num_ops, num_defs)); 484bf215546Sopenharmony_ci sub->operands[0] = a.op; 485bf215546Sopenharmony_ci sub->operands[1] = b.op; 486bf215546Sopenharmony_ci if (!borrow.op.isUndefined()) 487bf215546Sopenharmony_ci sub->operands[2] = borrow.op; 488bf215546Sopenharmony_ci sub->definitions[0] = dst; 489bf215546Sopenharmony_ci if (carry_out) 490bf215546Sopenharmony_ci sub->definitions[1] = Definition(carry); 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci return insert(std::move(sub)); 493bf215546Sopenharmony_ci } 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci Result readlane(Definition dst, Op vsrc, Op lane) 496bf215546Sopenharmony_ci { 497bf215546Sopenharmony_ci if (program->gfx_level >= GFX8) 498bf215546Sopenharmony_ci return vop3(aco_opcode::v_readlane_b32_e64, dst, vsrc, lane); 499bf215546Sopenharmony_ci else 500bf215546Sopenharmony_ci return vop2(aco_opcode::v_readlane_b32, dst, vsrc, lane); 501bf215546Sopenharmony_ci } 502bf215546Sopenharmony_ci Result writelane(Definition dst, Op val, Op lane, Op vsrc) { 503bf215546Sopenharmony_ci if (program->gfx_level >= GFX8) 504bf215546Sopenharmony_ci return vop3(aco_opcode::v_writelane_b32_e64, dst, val, lane, vsrc); 505bf215546Sopenharmony_ci else 506bf215546Sopenharmony_ci return vop2(aco_opcode::v_writelane_b32, dst, val, lane, vsrc); 507bf215546Sopenharmony_ci } 508bf215546Sopenharmony_ci<% 509bf215546Sopenharmony_ciimport itertools 510bf215546Sopenharmony_ciformats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8)]), 511bf215546Sopenharmony_ci ("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]), 512bf215546Sopenharmony_ci ("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])), 513bf215546Sopenharmony_ci ("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])), 514bf215546Sopenharmony_ci ("sopp", [Format.SOPP], 'SOPP_instruction', itertools.product([0, 1], [0, 1])), 515bf215546Sopenharmony_ci ("sopc", [Format.SOPC], 'SOPC_instruction', [(1, 2)]), 516bf215546Sopenharmony_ci ("smem", [Format.SMEM], 'SMEM_instruction', [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (0, 0)]), 517bf215546Sopenharmony_ci ("ds", [Format.DS], 'DS_instruction', [(1, 1), (1, 2), (0, 3), (0, 4)]), 518bf215546Sopenharmony_ci ("mubuf", [Format.MUBUF], 'MUBUF_instruction', [(0, 4), (1, 3)]), 519bf215546Sopenharmony_ci ("mtbuf", [Format.MTBUF], 'MTBUF_instruction', [(0, 4), (1, 3)]), 520bf215546Sopenharmony_ci ("mimg", [Format.MIMG], 'MIMG_instruction', itertools.product([0, 1], [3, 4, 5, 6, 7])), 521bf215546Sopenharmony_ci ("exp", [Format.EXP], 'Export_instruction', [(0, 4)]), 522bf215546Sopenharmony_ci ("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([1], [0, 1])), 523bf215546Sopenharmony_ci ("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]), 524bf215546Sopenharmony_ci ("reduction", [Format.PSEUDO_REDUCTION], 'Pseudo_reduction_instruction', [(3, 2)]), 525bf215546Sopenharmony_ci ("vop1", [Format.VOP1], 'VOP1_instruction', [(0, 0), (1, 1), (2, 2)]), 526bf215546Sopenharmony_ci ("vop1_sdwa", [Format.VOP1, Format.SDWA], 'SDWA_instruction', [(1, 1)]), 527bf215546Sopenharmony_ci ("vop2", [Format.VOP2], 'VOP2_instruction', itertools.product([1, 2], [2, 3])), 528bf215546Sopenharmony_ci ("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])), 529bf215546Sopenharmony_ci ("vopc", [Format.VOPC], 'VOPC_instruction', itertools.product([1, 2], [2])), 530bf215546Sopenharmony_ci ("vopc_sdwa", [Format.VOPC, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2])), 531bf215546Sopenharmony_ci ("vop3", [Format.VOP3], 'VOP3_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]), 532bf215546Sopenharmony_ci ("vop3p", [Format.VOP3P], 'VOP3P_instruction', [(1, 2), (1, 3)]), 533bf215546Sopenharmony_ci ("vintrp", [Format.VINTRP], 'Interp_instruction', [(1, 2), (1, 3)]), 534bf215546Sopenharmony_ci ("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]), 535bf215546Sopenharmony_ci ("vop2_dpp", [Format.VOP2, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2, 3])), 536bf215546Sopenharmony_ci ("vopc_dpp", [Format.VOPC, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2])), 537bf215546Sopenharmony_ci ("vop1_dpp8", [Format.VOP1, Format.DPP8], 'DPP8_instruction', [(1, 1)]), 538bf215546Sopenharmony_ci ("vop2_dpp8", [Format.VOP2, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2, 3])), 539bf215546Sopenharmony_ci ("vopc_dpp8", [Format.VOPC, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2])), 540bf215546Sopenharmony_ci ("vop1_e64", [Format.VOP1, Format.VOP3], 'VOP3_instruction', itertools.product([1], [1])), 541bf215546Sopenharmony_ci ("vop2_e64", [Format.VOP2, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2, 3])), 542bf215546Sopenharmony_ci ("vopc_e64", [Format.VOPC, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2])), 543bf215546Sopenharmony_ci ("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]), 544bf215546Sopenharmony_ci ("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)]), 545bf215546Sopenharmony_ci ("scratch", [Format.SCRATCH], 'FLAT_instruction', [(0, 3), (1, 2)])] 546bf215546Sopenharmony_ciformats = [(f if len(f) == 5 else f + ('',)) for f in formats] 547bf215546Sopenharmony_ci%>\\ 548bf215546Sopenharmony_ci% for name, formats, struct, shapes, extra_field_setup in formats: 549bf215546Sopenharmony_ci % for num_definitions, num_operands in shapes: 550bf215546Sopenharmony_ci <% 551bf215546Sopenharmony_ci args = ['aco_opcode opcode'] 552bf215546Sopenharmony_ci for i in range(num_definitions): 553bf215546Sopenharmony_ci args.append('Definition def%d' % i) 554bf215546Sopenharmony_ci for i in range(num_operands): 555bf215546Sopenharmony_ci args.append('Op op%d' % i) 556bf215546Sopenharmony_ci for f in formats: 557bf215546Sopenharmony_ci args += f.get_builder_field_decls() 558bf215546Sopenharmony_ci %>\\ 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci Result ${name}(${', '.join(args)}) 561bf215546Sopenharmony_ci { 562bf215546Sopenharmony_ci ${struct} *instr = create_instruction<${struct}>(opcode, (Format)(${'|'.join('(int)Format::%s' % f.name for f in formats)}), ${num_operands}, ${num_definitions}); 563bf215546Sopenharmony_ci % for i in range(num_definitions): 564bf215546Sopenharmony_ci instr->definitions[${i}] = def${i}; 565bf215546Sopenharmony_ci instr->definitions[${i}].setPrecise(is_precise); 566bf215546Sopenharmony_ci instr->definitions[${i}].setNUW(is_nuw); 567bf215546Sopenharmony_ci % endfor 568bf215546Sopenharmony_ci % for i in range(num_operands): 569bf215546Sopenharmony_ci instr->operands[${i}] = op${i}.op; 570bf215546Sopenharmony_ci % endfor 571bf215546Sopenharmony_ci % for f in formats: 572bf215546Sopenharmony_ci % for dest, field_name in zip(f.get_builder_field_dests(), f.get_builder_field_names()): 573bf215546Sopenharmony_ci instr->${dest} = ${field_name}; 574bf215546Sopenharmony_ci % endfor 575bf215546Sopenharmony_ci ${f.get_builder_initialization(num_operands)} 576bf215546Sopenharmony_ci % endfor 577bf215546Sopenharmony_ci ${extra_field_setup} 578bf215546Sopenharmony_ci return insert(instr); 579bf215546Sopenharmony_ci } 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_ci % if name == 'sop1' or name == 'sop2' or name == 'sopc': 582bf215546Sopenharmony_ci <% 583bf215546Sopenharmony_ci args[0] = 'WaveSpecificOpcode opcode' 584bf215546Sopenharmony_ci params = [] 585bf215546Sopenharmony_ci for i in range(num_definitions): 586bf215546Sopenharmony_ci params.append('def%d' % i) 587bf215546Sopenharmony_ci for i in range(num_operands): 588bf215546Sopenharmony_ci params.append('op%d' % i) 589bf215546Sopenharmony_ci %>\\ 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci inline Result ${name}(${', '.join(args)}) 592bf215546Sopenharmony_ci { 593bf215546Sopenharmony_ci return ${name}(w64or32(opcode), ${', '.join(params)}); 594bf215546Sopenharmony_ci } 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci % endif 597bf215546Sopenharmony_ci % endfor 598bf215546Sopenharmony_ci% endfor 599bf215546Sopenharmony_ci}; 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci} // namespace aco 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci#endif /* _ACO_BUILDER_ */""" 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_cifrom aco_opcodes import opcodes, Format 606bf215546Sopenharmony_cifrom mako.template import Template 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ciprint(Template(template).render(opcodes=opcodes, Format=Format)) 609