1bf215546Sopenharmony_ci/* -*- mesa-c++ -*- 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright (c) 2022 Collabora LTD 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Author: Gert Wollny <gert.wollny@collabora.com> 6bf215546Sopenharmony_ci * 7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 8bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 9bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 10bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 11bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 12bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 15bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 16bf215546Sopenharmony_ci * Software. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 22bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "sfn_assembler.h" 28bf215546Sopenharmony_ci#include "sfn_debug.h" 29bf215546Sopenharmony_ci#include "sfn_instr_alugroup.h" 30bf215546Sopenharmony_ci#include "sfn_instr_controlflow.h" 31bf215546Sopenharmony_ci#include "sfn_instr_fetch.h" 32bf215546Sopenharmony_ci#include "sfn_instr_export.h" 33bf215546Sopenharmony_ci#include "sfn_instr_mem.h" 34bf215546Sopenharmony_ci#include "sfn_instr_tex.h" 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci#include "sfn_conditionaljumptracker.h" 37bf215546Sopenharmony_ci#include "sfn_callstack.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci#include "../eg_sq.h" 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_cinamespace r600 { 42bf215546Sopenharmony_ciAssembler::Assembler(r600_shader *sh, const r600_shader_key& key): 43bf215546Sopenharmony_ci m_sh(sh), m_key(key) 44bf215546Sopenharmony_ci{ 45bf215546Sopenharmony_ci} 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ciextern const std::map<ESDOp, int> ds_opcode_map; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ciclass AssamblerVisitor : public ConstInstrVisitor { 50bf215546Sopenharmony_cipublic: 51bf215546Sopenharmony_ci AssamblerVisitor(r600_shader *sh, const r600_shader_key& key); 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci void visit(const AluInstr& instr) override; 54bf215546Sopenharmony_ci void visit(const AluGroup& instr) override; 55bf215546Sopenharmony_ci void visit(const TexInstr& instr) override; 56bf215546Sopenharmony_ci void visit(const ExportInstr& instr) override; 57bf215546Sopenharmony_ci void visit(const FetchInstr& instr) override; 58bf215546Sopenharmony_ci void visit(const Block& instr) override; 59bf215546Sopenharmony_ci void visit(const IfInstr& instr) override; 60bf215546Sopenharmony_ci void visit(const ControlFlowInstr& instr) override; 61bf215546Sopenharmony_ci void visit(const ScratchIOInstr& instr) override; 62bf215546Sopenharmony_ci void visit(const StreamOutInstr& instr) override; 63bf215546Sopenharmony_ci void visit(const MemRingOutInstr& instr) override; 64bf215546Sopenharmony_ci void visit(const EmitVertexInstr& instr) override; 65bf215546Sopenharmony_ci void visit(const GDSInstr& instr) override; 66bf215546Sopenharmony_ci void visit(const WriteTFInstr& instr) override; 67bf215546Sopenharmony_ci void visit(const LDSAtomicInstr& instr) override; 68bf215546Sopenharmony_ci void visit(const LDSReadInstr& instr) override; 69bf215546Sopenharmony_ci void visit(const RatInstr& instr) override; 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci void finalize(); 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci const uint32_t sf_vtx = 1; 74bf215546Sopenharmony_ci const uint32_t sf_tex = 2; 75bf215546Sopenharmony_ci const uint32_t sf_alu = 4; 76bf215546Sopenharmony_ci const uint32_t sf_addr_register = 8; 77bf215546Sopenharmony_ci const uint32_t sf_all = 0xf; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci void clear_states(const uint32_t& states); 80bf215546Sopenharmony_ci bool copy_dst(r600_bytecode_alu_dst& dst, const Register& d, bool write); 81bf215546Sopenharmony_ci PVirtualValue copy_src(r600_bytecode_alu_src& src, const VirtualValue& s); 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci EBufferIndexMode 84bf215546Sopenharmony_ci emit_index_reg(const VirtualValue& addr, unsigned idx); 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci void emit_endif(); 87bf215546Sopenharmony_ci void emit_else(); 88bf215546Sopenharmony_ci void emit_loop_begin(bool vpm); 89bf215546Sopenharmony_ci void emit_loop_end(); 90bf215546Sopenharmony_ci void emit_loop_break(); 91bf215546Sopenharmony_ci void emit_loop_cont(); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci void emit_alu_op(const AluInstr& ai); 94bf215546Sopenharmony_ci void emit_lds_op(const AluInstr& lds); 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci void emit_wait_ack(); 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci /* Start initialized in constructor */ 99bf215546Sopenharmony_ci const r600_shader_key& m_key; 100bf215546Sopenharmony_ci r600_shader *m_shader; 101bf215546Sopenharmony_ci r600_bytecode *m_bc; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci ConditionalJumpTracker m_jump_tracker; 104bf215546Sopenharmony_ci CallStack m_callstack; 105bf215546Sopenharmony_ci bool ps_alpha_to_one; 106bf215546Sopenharmony_ci /* End initialized in constructor */ 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci std::set<uint32_t> m_nliterals_in_group; 109bf215546Sopenharmony_ci std::set<int> vtx_fetch_results; 110bf215546Sopenharmony_ci std::set<int> tex_fetch_results; 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci PRegister m_last_addr{nullptr}; 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci unsigned m_max_color_exports{0}; 115bf215546Sopenharmony_ci int m_loop_nesting{0}; 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci bool m_ack_suggested{false}; 118bf215546Sopenharmony_ci bool m_has_param_output{false}; 119bf215546Sopenharmony_ci bool m_has_pos_output{false}; 120bf215546Sopenharmony_ci bool m_last_op_was_barrier{false}; 121bf215546Sopenharmony_ci bool m_result{true}; 122bf215546Sopenharmony_ci}; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_cibool Assembler::lower(Shader *shader) 125bf215546Sopenharmony_ci{ 126bf215546Sopenharmony_ci AssamblerVisitor ass(m_sh, m_key); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci auto& blocks = shader->func(); 129bf215546Sopenharmony_ci for (auto b : blocks) { 130bf215546Sopenharmony_ci b->accept(ass); 131bf215546Sopenharmony_ci if (!ass.m_result) 132bf215546Sopenharmony_ci return false; 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci ass.finalize(); 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci return ass.m_result; 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci} 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ciAssamblerVisitor::AssamblerVisitor(r600_shader *sh, const r600_shader_key& key): 142bf215546Sopenharmony_ci m_key(key), 143bf215546Sopenharmony_ci m_shader(sh), 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci m_bc(&sh->bc), 146bf215546Sopenharmony_ci m_callstack(sh->bc), 147bf215546Sopenharmony_ci ps_alpha_to_one(key.ps.alpha_to_one) 148bf215546Sopenharmony_ci{ 149bf215546Sopenharmony_ci if (m_shader->processor_type == PIPE_SHADER_FRAGMENT) 150bf215546Sopenharmony_ci m_max_color_exports = MAX2(m_key.ps.nr_cbufs, 1); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci if (m_shader->processor_type == PIPE_SHADER_VERTEX && 153bf215546Sopenharmony_ci m_shader->ninput > 0) 154bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_CALL_FS); 155bf215546Sopenharmony_ci} 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_civoid AssamblerVisitor::finalize() 158bf215546Sopenharmony_ci{ 159bf215546Sopenharmony_ci const struct cf_op_info *last = nullptr; 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci if (m_bc->cf_last) 162bf215546Sopenharmony_ci last = r600_isa_cf(m_bc->cf_last->op); 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci /* alu clause instructions don't have EOP bit, so add NOP */ 165bf215546Sopenharmony_ci if (m_shader->bc.gfx_level < CAYMAN && 166bf215546Sopenharmony_ci (!last || last->flags & CF_ALU || m_bc->cf_last->op == CF_OP_LOOP_END 167bf215546Sopenharmony_ci || m_bc->cf_last->op == CF_OP_POP)) 168bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_NOP); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci /* A fetch shader only can't be EOP (results in hang), but we can replace it 171bf215546Sopenharmony_ci * by a NOP */ 172bf215546Sopenharmony_ci else if (last && m_bc->cf_last->op == CF_OP_CALL_FS) 173bf215546Sopenharmony_ci m_bc->cf_last->op = CF_OP_NOP; 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci if (m_shader->bc.gfx_level != CAYMAN) 176bf215546Sopenharmony_ci m_bc->cf_last->end_of_program = 1; 177bf215546Sopenharmony_ci else 178bf215546Sopenharmony_ci cm_bytecode_add_cf_end(m_bc); 179bf215546Sopenharmony_ci} 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ciextern const std::map<EAluOp, int> opcode_map; 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_civoid AssamblerVisitor::visit(const AluInstr& ai) 184bf215546Sopenharmony_ci{ 185bf215546Sopenharmony_ci assert(vtx_fetch_results.empty()); 186bf215546Sopenharmony_ci assert(tex_fetch_results.empty()); 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci if (unlikely(ai.has_alu_flag(alu_is_lds))) 189bf215546Sopenharmony_ci emit_lds_op(ai); 190bf215546Sopenharmony_ci else 191bf215546Sopenharmony_ci emit_alu_op(ai); 192bf215546Sopenharmony_ci} 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_civoid AssamblerVisitor::emit_lds_op(const AluInstr& lds) 195bf215546Sopenharmony_ci{ 196bf215546Sopenharmony_ci struct r600_bytecode_alu alu; 197bf215546Sopenharmony_ci memset(&alu, 0, sizeof(alu)); 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci alu.is_lds_idx_op = true; 200bf215546Sopenharmony_ci alu.op = lds.lds_opcode(); 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci bool has_lds_fetch = false; 203bf215546Sopenharmony_ci switch (alu.op) { 204bf215546Sopenharmony_ci case LDS_WRITE: 205bf215546Sopenharmony_ci alu.op =LDS_OP2_LDS_WRITE; 206bf215546Sopenharmony_ci break; 207bf215546Sopenharmony_ci case LDS_WRITE_REL: 208bf215546Sopenharmony_ci alu.op = LDS_OP3_LDS_WRITE_REL; 209bf215546Sopenharmony_ci alu.lds_idx = 1; 210bf215546Sopenharmony_ci break; 211bf215546Sopenharmony_ci case DS_OP_READ_RET: 212bf215546Sopenharmony_ci alu.op = LDS_OP1_LDS_READ_RET; 213bf215546Sopenharmony_ci FALLTHROUGH; 214bf215546Sopenharmony_ci case LDS_ADD_RET: 215bf215546Sopenharmony_ci case LDS_AND_RET: 216bf215546Sopenharmony_ci case LDS_OR_RET: 217bf215546Sopenharmony_ci case LDS_MAX_INT_RET: 218bf215546Sopenharmony_ci case LDS_MAX_UINT_RET: 219bf215546Sopenharmony_ci case LDS_MIN_INT_RET: 220bf215546Sopenharmony_ci case LDS_MIN_UINT_RET: 221bf215546Sopenharmony_ci case LDS_XOR_RET: 222bf215546Sopenharmony_ci case LDS_XCHG_RET: 223bf215546Sopenharmony_ci case LDS_CMP_XCHG_RET: 224bf215546Sopenharmony_ci has_lds_fetch = true; 225bf215546Sopenharmony_ci break; 226bf215546Sopenharmony_ci case LDS_ADD: 227bf215546Sopenharmony_ci case LDS_AND: 228bf215546Sopenharmony_ci case LDS_OR: 229bf215546Sopenharmony_ci case LDS_MAX_INT: 230bf215546Sopenharmony_ci case LDS_MAX_UINT: 231bf215546Sopenharmony_ci case LDS_MIN_INT: 232bf215546Sopenharmony_ci case LDS_MIN_UINT: 233bf215546Sopenharmony_ci case LDS_XOR: 234bf215546Sopenharmony_ci break; 235bf215546Sopenharmony_ci default: 236bf215546Sopenharmony_ci std::cerr << "\n R600: error op: " << lds << "\n"; 237bf215546Sopenharmony_ci unreachable("Unhandled LDS op"); 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci copy_src(alu.src[0], lds.src(0)); 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci if (lds.n_sources() > 1) 243bf215546Sopenharmony_ci copy_src(alu.src[1], lds.src(1)); 244bf215546Sopenharmony_ci else 245bf215546Sopenharmony_ci alu.src[1].sel = V_SQ_ALU_SRC_0; 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci if (lds.n_sources() > 2) 248bf215546Sopenharmony_ci copy_src(alu.src[2], lds.src(2)); 249bf215546Sopenharmony_ci else 250bf215546Sopenharmony_ci alu.src[2].sel = V_SQ_ALU_SRC_0; 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci alu.last = lds.has_alu_flag(alu_last_instr); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci int r = r600_bytecode_add_alu(m_bc, &alu); 255bf215546Sopenharmony_ci if (has_lds_fetch) 256bf215546Sopenharmony_ci m_bc->cf_last->nlds_read++; 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci if (r) 259bf215546Sopenharmony_ci m_result = false; 260bf215546Sopenharmony_ci} 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_civoid AssamblerVisitor::emit_alu_op(const AluInstr& ai) 263bf215546Sopenharmony_ci{ 264bf215546Sopenharmony_ci struct r600_bytecode_alu alu; 265bf215546Sopenharmony_ci memset(&alu, 0, sizeof(alu)); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci if (opcode_map.find(ai.opcode()) == opcode_map.end()) { 268bf215546Sopenharmony_ci std::cerr << "Opcode not handled for " << ai <<"\n"; 269bf215546Sopenharmony_ci m_result = false; 270bf215546Sopenharmony_ci return; 271bf215546Sopenharmony_ci } 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci // skip multiple barriers 274bf215546Sopenharmony_ci if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier) 275bf215546Sopenharmony_ci return; 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci m_last_op_was_barrier = ai.opcode() == op0_group_barrier; 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci alu.op = opcode_map.at(ai.opcode()); 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci auto dst = ai.dest(); 282bf215546Sopenharmony_ci if (dst) { 283bf215546Sopenharmony_ci if (!copy_dst(alu.dst, *dst, ai.has_alu_flag(alu_write))) { 284bf215546Sopenharmony_ci m_result = false; 285bf215546Sopenharmony_ci return; 286bf215546Sopenharmony_ci } 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci alu.dst.write = ai.has_alu_flag(alu_write); 289bf215546Sopenharmony_ci alu.dst.clamp = ai.has_alu_flag(alu_dst_clamp); 290bf215546Sopenharmony_ci alu.dst.rel = dst->addr() ? 1 : 0; 291bf215546Sopenharmony_ci } else { 292bf215546Sopenharmony_ci alu.dst.chan = ai.dest_chan(); 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci alu.is_op3 = ai.n_sources() == 3; 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci EBufferIndexMode kcache_index_mode = bim_none; 298bf215546Sopenharmony_ci PVirtualValue buffer_offset = nullptr; 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci for (unsigned i = 0; i < ai.n_sources(); ++i) { 301bf215546Sopenharmony_ci buffer_offset = copy_src(alu.src[i], ai.src(i)); 302bf215546Sopenharmony_ci alu.src[i].neg = ai.has_alu_flag(AluInstr::src_neg_flags[i]); 303bf215546Sopenharmony_ci if (!alu.is_op3) 304bf215546Sopenharmony_ci alu.src[i].abs = ai.has_alu_flag(AluInstr::src_abs_flags[i]); 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci if (buffer_offset && kcache_index_mode == bim_none) { 307bf215546Sopenharmony_ci kcache_index_mode = bim_zero; 308bf215546Sopenharmony_ci alu.src[i].kc_bank = 1; 309bf215546Sopenharmony_ci alu.src[i].kc_rel = 1; 310bf215546Sopenharmony_ci } 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci if (ai.has_lds_queue_read()) { 313bf215546Sopenharmony_ci assert(m_bc->cf_last->nlds_read > 0); 314bf215546Sopenharmony_ci m_bc->cf_last->nlds_read--; 315bf215546Sopenharmony_ci } 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci if (ai.bank_swizzle() != alu_vec_unknown) 319bf215546Sopenharmony_ci alu.bank_swizzle_force = ai.bank_swizzle(); 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci alu.last = ai.has_alu_flag(alu_last_instr); 322bf215546Sopenharmony_ci alu.execute_mask = ai.has_alu_flag(alu_update_exec); 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci /* If the destination register is equal to the last loaded address register 325bf215546Sopenharmony_ci * then clear the latter one, because the values will no longer be identical */ 326bf215546Sopenharmony_ci if (m_last_addr) 327bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n"; 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci if (dst) 330bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n"; 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci if (dst && m_last_addr && *dst == *m_last_addr) { 333bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n"; 334bf215546Sopenharmony_ci m_last_addr = nullptr; 335bf215546Sopenharmony_ci } 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci auto cf_op = ai.cf_type(); 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci unsigned type = 0; 340bf215546Sopenharmony_ci switch (cf_op) { 341bf215546Sopenharmony_ci case cf_alu: type = CF_OP_ALU; break; 342bf215546Sopenharmony_ci case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break; 343bf215546Sopenharmony_ci case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break; 344bf215546Sopenharmony_ci case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break; 345bf215546Sopenharmony_ci case cf_alu_break: type = CF_OP_ALU_BREAK; break; 346bf215546Sopenharmony_ci case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break; 347bf215546Sopenharmony_ci case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break; 348bf215546Sopenharmony_ci case cf_alu_extended: type = CF_OP_ALU_EXT; break; 349bf215546Sopenharmony_ci default: 350bf215546Sopenharmony_ci assert(0 && "cf_alu_undefined should have been replaced"); 351bf215546Sopenharmony_ci } 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci if (alu.last) 354bf215546Sopenharmony_ci m_nliterals_in_group.clear(); 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type); 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci if (ai.opcode() == op1_mova_int) 360bf215546Sopenharmony_ci m_bc->ar_loaded = 0; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci if (ai.opcode() == op1_set_cf_idx0) 363bf215546Sopenharmony_ci m_bc->index_loaded[0] = 1; 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci if (ai.opcode() == op1_set_cf_idx1) 366bf215546Sopenharmony_ci m_bc->index_loaded[1] = 1; 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci m_bc->force_add_cf |= (ai.opcode() == op2_kille || 369bf215546Sopenharmony_ci ai.opcode() == op2_killne_int || 370bf215546Sopenharmony_ci ai.opcode() == op1_set_cf_idx0 || 371bf215546Sopenharmony_ci ai.opcode() == op1_set_cf_idx1); 372bf215546Sopenharmony_ci} 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_civoid AssamblerVisitor::visit(const AluGroup& group) 375bf215546Sopenharmony_ci{ 376bf215546Sopenharmony_ci clear_states(sf_vtx | sf_tex); 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci if (group.slots() == 0) 379bf215546Sopenharmony_ci return; 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci if (group.has_lds_group_start()) { 382bf215546Sopenharmony_ci if (m_bc->cf_last->ndw + 2 * (*group.begin())->required_slots() > 220) { 383bf215546Sopenharmony_ci assert(m_bc->cf_last->nlds_read == 0); 384bf215546Sopenharmony_ci m_bc->force_add_cf = 1; 385bf215546Sopenharmony_ci m_last_addr = nullptr; 386bf215546Sopenharmony_ci } 387bf215546Sopenharmony_ci } else if (m_bc->cf_last) { 388bf215546Sopenharmony_ci if (m_bc->cf_last->ndw + 2 * group.slots() > 240) { 389bf215546Sopenharmony_ci assert(m_bc->cf_last->nlds_read == 0); 390bf215546Sopenharmony_ci m_bc->force_add_cf = 1; 391bf215546Sopenharmony_ci m_last_addr = nullptr; 392bf215546Sopenharmony_ci } else { 393bf215546Sopenharmony_ci auto instr = *group.begin(); 394bf215546Sopenharmony_ci if (instr && 395bf215546Sopenharmony_ci !instr->has_alu_flag(alu_is_lds) && 396bf215546Sopenharmony_ci instr->opcode() == op0_group_barrier && 397bf215546Sopenharmony_ci m_bc->cf_last->ndw + 14 > 240) { 398bf215546Sopenharmony_ci assert(m_bc->cf_last->nlds_read == 0); 399bf215546Sopenharmony_ci m_bc->force_add_cf = 1; 400bf215546Sopenharmony_ci m_last_addr = nullptr; 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci } 403bf215546Sopenharmony_ci } 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci auto addr = group.addr(); 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci if (addr.first) { 408bf215546Sopenharmony_ci if (!addr.second) { 409bf215546Sopenharmony_ci if (!m_last_addr || !m_bc->ar_loaded || 410bf215546Sopenharmony_ci !m_last_addr->equal_to(*addr.first)) { 411bf215546Sopenharmony_ci m_bc->ar_reg = addr.first->sel(); 412bf215546Sopenharmony_ci m_bc->ar_chan = addr.first->chan(); 413bf215546Sopenharmony_ci m_last_addr = addr.first; 414bf215546Sopenharmony_ci m_bc->ar_loaded = 0; 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci r600_load_ar(m_bc, group.addr_for_src()); 417bf215546Sopenharmony_ci } 418bf215546Sopenharmony_ci } else { 419bf215546Sopenharmony_ci emit_index_reg(*addr.first, 0); 420bf215546Sopenharmony_ci } 421bf215546Sopenharmony_ci } 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci for (auto& i : group) { 424bf215546Sopenharmony_ci if (i) 425bf215546Sopenharmony_ci i->accept(*this); 426bf215546Sopenharmony_ci } 427bf215546Sopenharmony_ci} 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_civoid AssamblerVisitor::visit(const TexInstr& tex_instr) 430bf215546Sopenharmony_ci{ 431bf215546Sopenharmony_ci clear_states(sf_vtx | sf_alu); 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci int sampler_offset = 0; 434bf215546Sopenharmony_ci auto addr = tex_instr.sampler_offset(); 435bf215546Sopenharmony_ci EBufferIndexMode index_mode = bim_none; 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci if (addr) 438bf215546Sopenharmony_ci index_mode = emit_index_reg(*addr, 1); 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci if (tex_fetch_results.find(tex_instr.src().sel()) != 441bf215546Sopenharmony_ci tex_fetch_results.end()) { 442bf215546Sopenharmony_ci m_bc->force_add_cf = 1; 443bf215546Sopenharmony_ci tex_fetch_results.clear(); 444bf215546Sopenharmony_ci } 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci r600_bytecode_tex tex; 447bf215546Sopenharmony_ci memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 448bf215546Sopenharmony_ci tex.op = tex_instr.opcode(); 449bf215546Sopenharmony_ci tex.sampler_id = tex_instr.sampler_id() + sampler_offset; 450bf215546Sopenharmony_ci tex.resource_id = tex_instr.resource_id() + sampler_offset; 451bf215546Sopenharmony_ci tex.src_gpr = tex_instr.src().sel(); 452bf215546Sopenharmony_ci tex.dst_gpr = tex_instr.dst().sel(); 453bf215546Sopenharmony_ci tex.dst_sel_x = tex_instr.dest_swizzle(0); 454bf215546Sopenharmony_ci tex.dst_sel_y = tex_instr.dest_swizzle(1); 455bf215546Sopenharmony_ci tex.dst_sel_z = tex_instr.dest_swizzle(2); 456bf215546Sopenharmony_ci tex.dst_sel_w = tex_instr.dest_swizzle(3); 457bf215546Sopenharmony_ci tex.src_sel_x = tex_instr.src()[0]->chan(); 458bf215546Sopenharmony_ci tex.src_sel_y = tex_instr.src()[1]->chan(); 459bf215546Sopenharmony_ci tex.src_sel_z = tex_instr.src()[2]->chan(); 460bf215546Sopenharmony_ci tex.src_sel_w = tex_instr.src()[3]->chan(); 461bf215546Sopenharmony_ci tex.coord_type_x = !tex_instr.has_tex_flag(TexInstr::x_unnormalized); 462bf215546Sopenharmony_ci tex.coord_type_y = !tex_instr.has_tex_flag(TexInstr::y_unnormalized); 463bf215546Sopenharmony_ci tex.coord_type_z = !tex_instr.has_tex_flag(TexInstr::z_unnormalized); 464bf215546Sopenharmony_ci tex.coord_type_w = !tex_instr.has_tex_flag(TexInstr::w_unnormalized); 465bf215546Sopenharmony_ci tex.offset_x = tex_instr.get_offset(0); 466bf215546Sopenharmony_ci tex.offset_y = tex_instr.get_offset(1); 467bf215546Sopenharmony_ci tex.offset_z = tex_instr.get_offset(2); 468bf215546Sopenharmony_ci tex.resource_index_mode = index_mode; 469bf215546Sopenharmony_ci tex.sampler_index_mode = index_mode; 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci if (tex.dst_sel_x < 4 && 472bf215546Sopenharmony_ci tex.dst_sel_y < 4 && 473bf215546Sopenharmony_ci tex.dst_sel_z < 4 && 474bf215546Sopenharmony_ci tex.dst_sel_w < 4) 475bf215546Sopenharmony_ci tex_fetch_results.insert(tex.dst_gpr); 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci if (tex_instr.opcode() == TexInstr::get_gradient_h || 478bf215546Sopenharmony_ci tex_instr.opcode() == TexInstr::get_gradient_v) 479bf215546Sopenharmony_ci tex.inst_mod = tex_instr.has_tex_flag(TexInstr::grad_fine) ? 1 : 0; 480bf215546Sopenharmony_ci else 481bf215546Sopenharmony_ci tex.inst_mod = tex_instr.inst_mode(); 482bf215546Sopenharmony_ci if (r600_bytecode_add_tex(m_bc, &tex)) { 483bf215546Sopenharmony_ci R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); 484bf215546Sopenharmony_ci m_result = false; 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci} 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_civoid AssamblerVisitor::visit(const ExportInstr& exi) 489bf215546Sopenharmony_ci{ 490bf215546Sopenharmony_ci const auto& value = exi.value(); 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci r600_bytecode_output output; 493bf215546Sopenharmony_ci memset(&output, 0, sizeof(output)); 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci output.gpr = value.sel(); 496bf215546Sopenharmony_ci output.elem_size = 3; 497bf215546Sopenharmony_ci output.swizzle_x = value[0]->chan(); 498bf215546Sopenharmony_ci output.swizzle_y = value[1]->chan(); 499bf215546Sopenharmony_ci output.swizzle_z = value[2]->chan(); 500bf215546Sopenharmony_ci output.burst_count = 1; 501bf215546Sopenharmony_ci output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; 502bf215546Sopenharmony_ci output.type = exi.export_type(); 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci clear_states(sf_all); 506bf215546Sopenharmony_ci switch (exi.export_type()) { 507bf215546Sopenharmony_ci case ExportInstr::pixel: 508bf215546Sopenharmony_ci output.swizzle_w = ps_alpha_to_one ? 5 : exi.value()[3]->chan(); 509bf215546Sopenharmony_ci output.array_base = exi.location(); 510bf215546Sopenharmony_ci break; 511bf215546Sopenharmony_ci case ExportInstr::pos: 512bf215546Sopenharmony_ci output.swizzle_w = exi.value()[3]->chan(); 513bf215546Sopenharmony_ci output.array_base = 60 + exi.location(); 514bf215546Sopenharmony_ci break; 515bf215546Sopenharmony_ci case ExportInstr::param: 516bf215546Sopenharmony_ci output.swizzle_w = exi.value()[3]->chan(); 517bf215546Sopenharmony_ci output.array_base = exi.location(); 518bf215546Sopenharmony_ci break; 519bf215546Sopenharmony_ci default: 520bf215546Sopenharmony_ci R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type()); 521bf215546Sopenharmony_ci m_result = false; 522bf215546Sopenharmony_ci } 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci /* If all register elements pinned to fixed values 525bf215546Sopenharmony_ci * we can override the gpr (the register allocator doesn't see 526bf215546Sopenharmony_ci * this because it doesn't take these channels into account. */ 527bf215546Sopenharmony_ci if (output.swizzle_x > 3 && output.swizzle_y > 3 && 528bf215546Sopenharmony_ci output.swizzle_z > 3 && output.swizzle_w > 3) 529bf215546Sopenharmony_ci output.gpr = 0; 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci int r = 0; 532bf215546Sopenharmony_ci if ((r =r600_bytecode_add_output(m_bc, &output))) { 533bf215546Sopenharmony_ci R600_ERR("Error adding export at location %d : err: %d\n", exi.location(), r); 534bf215546Sopenharmony_ci m_result = false; 535bf215546Sopenharmony_ci } 536bf215546Sopenharmony_ci} 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_civoid AssamblerVisitor::visit(const ScratchIOInstr& instr) 539bf215546Sopenharmony_ci{ 540bf215546Sopenharmony_ci clear_states(sf_all); 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci struct r600_bytecode_output cf; 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci memset(&cf, 0, sizeof(struct r600_bytecode_output)); 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci cf.op = CF_OP_MEM_SCRATCH; 547bf215546Sopenharmony_ci cf.elem_size = 3; 548bf215546Sopenharmony_ci cf.gpr = instr.value().sel(); 549bf215546Sopenharmony_ci cf.mark = !instr.is_read(); 550bf215546Sopenharmony_ci cf.comp_mask = instr.is_read() ? 0xf : instr.write_mask(); 551bf215546Sopenharmony_ci cf.swizzle_x = 0; 552bf215546Sopenharmony_ci cf.swizzle_y = 1; 553bf215546Sopenharmony_ci cf.swizzle_z = 2; 554bf215546Sopenharmony_ci cf.swizzle_w = 3; 555bf215546Sopenharmony_ci cf.burst_count = 1; 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_ci assert(!instr.is_read() || m_bc->gfx_level < R700); 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci if (instr.address()) { 560bf215546Sopenharmony_ci cf.type = instr.is_read() || m_bc->gfx_level > R600 ? 3 : 1; 561bf215546Sopenharmony_ci cf.index_gpr = instr.address()->sel(); 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci /* The docu seems to be wrong here: In indirect addressing the 564bf215546Sopenharmony_ci * address_base seems to be the array_size */ 565bf215546Sopenharmony_ci cf.array_size = instr.array_size(); 566bf215546Sopenharmony_ci } else { 567bf215546Sopenharmony_ci cf.type = instr.is_read() || m_bc->gfx_level > R600 ? 2 : 0; 568bf215546Sopenharmony_ci cf.array_base = instr.location(); 569bf215546Sopenharmony_ci } 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci if (r600_bytecode_add_output(m_bc, &cf)){ 572bf215546Sopenharmony_ci R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n"); 573bf215546Sopenharmony_ci m_result = false; 574bf215546Sopenharmony_ci } 575bf215546Sopenharmony_ci} 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_civoid AssamblerVisitor::visit(const StreamOutInstr& instr) 578bf215546Sopenharmony_ci{ 579bf215546Sopenharmony_ci struct r600_bytecode_output output; 580bf215546Sopenharmony_ci memset(&output, 0, sizeof(struct r600_bytecode_output)); 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci output.gpr = instr.value().sel(); 583bf215546Sopenharmony_ci output.elem_size = instr.element_size(); 584bf215546Sopenharmony_ci output.array_base = instr.array_base(); 585bf215546Sopenharmony_ci output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 586bf215546Sopenharmony_ci output.burst_count = instr.burst_count(); 587bf215546Sopenharmony_ci output.array_size = instr.array_size(); 588bf215546Sopenharmony_ci output.comp_mask = instr.comp_mask(); 589bf215546Sopenharmony_ci output.op = instr.op(m_shader->bc.gfx_level); 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci 592bf215546Sopenharmony_ci if (r600_bytecode_add_output(m_bc, &output)) { 593bf215546Sopenharmony_ci R600_ERR("shader_from_nir: Error creating stream output instruction\n"); 594bf215546Sopenharmony_ci m_result = false; 595bf215546Sopenharmony_ci } 596bf215546Sopenharmony_ci} 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_civoid AssamblerVisitor::visit(const MemRingOutInstr& instr) 599bf215546Sopenharmony_ci{ 600bf215546Sopenharmony_ci struct r600_bytecode_output output; 601bf215546Sopenharmony_ci memset(&output, 0, sizeof(struct r600_bytecode_output)); 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci output.gpr = instr.value().sel(); 604bf215546Sopenharmony_ci output.type = instr.type(); 605bf215546Sopenharmony_ci output.elem_size = 3; 606bf215546Sopenharmony_ci output.comp_mask = 0xf; 607bf215546Sopenharmony_ci output.burst_count = 1; 608bf215546Sopenharmony_ci output.op = instr.op(); 609bf215546Sopenharmony_ci if (instr.type() == MemRingOutInstr::mem_write_ind || 610bf215546Sopenharmony_ci instr.type() == MemRingOutInstr::mem_write_ind_ack) { 611bf215546Sopenharmony_ci output.index_gpr = instr.index_reg(); 612bf215546Sopenharmony_ci output.array_size = 0xfff; 613bf215546Sopenharmony_ci } 614bf215546Sopenharmony_ci output.array_base = instr.array_base(); 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci if (r600_bytecode_add_output(m_bc, &output)) { 617bf215546Sopenharmony_ci R600_ERR("shader_from_nir: Error creating mem ring write instruction\n"); 618bf215546Sopenharmony_ci m_result = false; 619bf215546Sopenharmony_ci } 620bf215546Sopenharmony_ci} 621bf215546Sopenharmony_ci 622bf215546Sopenharmony_civoid AssamblerVisitor::visit(const EmitVertexInstr& instr) 623bf215546Sopenharmony_ci{ 624bf215546Sopenharmony_ci int r = r600_bytecode_add_cfinst(m_bc, instr.op()); 625bf215546Sopenharmony_ci if (!r) 626bf215546Sopenharmony_ci m_bc->cf_last->count = instr.stream(); 627bf215546Sopenharmony_ci else 628bf215546Sopenharmony_ci m_result = false; 629bf215546Sopenharmony_ci assert(m_bc->cf_last->count < 4); 630bf215546Sopenharmony_ci} 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_civoid AssamblerVisitor::visit(const FetchInstr& fetch_instr) 633bf215546Sopenharmony_ci{ 634bf215546Sopenharmony_ci clear_states(sf_tex | sf_alu); 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci auto buffer_offset = fetch_instr.resource_offset(); 637bf215546Sopenharmony_ci EBufferIndexMode rat_index_mode = bim_none; 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci if (buffer_offset) 640bf215546Sopenharmony_ci rat_index_mode = emit_index_reg(*buffer_offset, 0); 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci if (fetch_instr.has_fetch_flag(FetchInstr::wait_ack)) 643bf215546Sopenharmony_ci emit_wait_ack(); 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci bool use_tc = fetch_instr.has_fetch_flag(FetchInstr::use_tc) || 646bf215546Sopenharmony_ci (m_bc->gfx_level == CAYMAN); 647bf215546Sopenharmony_ci if (!use_tc && 648bf215546Sopenharmony_ci vtx_fetch_results.find(fetch_instr.src().sel()) != 649bf215546Sopenharmony_ci vtx_fetch_results.end()) { 650bf215546Sopenharmony_ci m_bc->force_add_cf = 1; 651bf215546Sopenharmony_ci vtx_fetch_results.clear(); 652bf215546Sopenharmony_ci } 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci if (fetch_instr.has_fetch_flag(FetchInstr::use_tc) && 655bf215546Sopenharmony_ci tex_fetch_results.find(fetch_instr.src().sel()) != 656bf215546Sopenharmony_ci tex_fetch_results.end()) { 657bf215546Sopenharmony_ci m_bc->force_add_cf = 1; 658bf215546Sopenharmony_ci tex_fetch_results.clear(); 659bf215546Sopenharmony_ci } 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci if (use_tc) 662bf215546Sopenharmony_ci tex_fetch_results.insert(fetch_instr.dst().sel()); 663bf215546Sopenharmony_ci else 664bf215546Sopenharmony_ci vtx_fetch_results.insert(fetch_instr.dst().sel()); 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci struct r600_bytecode_vtx vtx; 667bf215546Sopenharmony_ci memset(&vtx, 0, sizeof(vtx)); 668bf215546Sopenharmony_ci vtx.op = fetch_instr.opcode(); 669bf215546Sopenharmony_ci vtx.buffer_id = fetch_instr.resource_id(); 670bf215546Sopenharmony_ci vtx.fetch_type = fetch_instr.fetch_type(); 671bf215546Sopenharmony_ci vtx.src_gpr = fetch_instr.src().sel(); 672bf215546Sopenharmony_ci vtx.src_sel_x = fetch_instr.src().chan(); 673bf215546Sopenharmony_ci vtx.mega_fetch_count = fetch_instr.mega_fetch_count(); 674bf215546Sopenharmony_ci vtx.dst_gpr = fetch_instr.dst().sel(); 675bf215546Sopenharmony_ci vtx.dst_sel_x = fetch_instr.dest_swizzle(0); /* SEL_X */ 676bf215546Sopenharmony_ci vtx.dst_sel_y = fetch_instr.dest_swizzle(1); /* SEL_Y */ 677bf215546Sopenharmony_ci vtx.dst_sel_z = fetch_instr.dest_swizzle(2); /* SEL_Z */ 678bf215546Sopenharmony_ci vtx.dst_sel_w = fetch_instr.dest_swizzle(3); /* SEL_W */ 679bf215546Sopenharmony_ci vtx.use_const_fields = fetch_instr.has_fetch_flag(FetchInstr::use_const_field); 680bf215546Sopenharmony_ci vtx.data_format = fetch_instr.data_format(); 681bf215546Sopenharmony_ci vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */ 682bf215546Sopenharmony_ci vtx.format_comp_all = fetch_instr.has_fetch_flag(FetchInstr::format_comp_signed); 683bf215546Sopenharmony_ci vtx.endian = fetch_instr.endian_swap(); 684bf215546Sopenharmony_ci vtx.buffer_index_mode = rat_index_mode; 685bf215546Sopenharmony_ci vtx.offset = fetch_instr.src_offset(); 686bf215546Sopenharmony_ci vtx.indexed = fetch_instr.has_fetch_flag(FetchInstr::indexed); 687bf215546Sopenharmony_ci vtx.uncached = fetch_instr.has_fetch_flag(FetchInstr::uncached); 688bf215546Sopenharmony_ci vtx.elem_size = fetch_instr.elm_size(); 689bf215546Sopenharmony_ci vtx.array_base = fetch_instr.array_base(); 690bf215546Sopenharmony_ci vtx.array_size = fetch_instr.array_size(); 691bf215546Sopenharmony_ci vtx.srf_mode_all = fetch_instr.has_fetch_flag(FetchInstr::srf_mode); 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci if (fetch_instr.has_fetch_flag(FetchInstr::use_tc)) { 694bf215546Sopenharmony_ci if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) { 695bf215546Sopenharmony_ci R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); 696bf215546Sopenharmony_ci m_result = false; 697bf215546Sopenharmony_ci } 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci } else { 700bf215546Sopenharmony_ci if ((r600_bytecode_add_vtx(m_bc, &vtx))) { 701bf215546Sopenharmony_ci R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); 702bf215546Sopenharmony_ci m_result = false; 703bf215546Sopenharmony_ci } 704bf215546Sopenharmony_ci } 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci m_bc->cf_last->vpm = (m_bc->type == PIPE_SHADER_FRAGMENT) && 707bf215546Sopenharmony_ci fetch_instr.has_fetch_flag(FetchInstr::vpm); 708bf215546Sopenharmony_ci m_bc->cf_last->barrier = 1; 709bf215546Sopenharmony_ci} 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_civoid AssamblerVisitor::visit(const WriteTFInstr& instr) 712bf215546Sopenharmony_ci{ 713bf215546Sopenharmony_ci struct r600_bytecode_gds gds; 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci auto& value = instr.value(); 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 718bf215546Sopenharmony_ci gds.src_gpr = value.sel(); 719bf215546Sopenharmony_ci gds.src_sel_x = value[0]->chan(); 720bf215546Sopenharmony_ci gds.src_sel_y = value[1]->chan(); 721bf215546Sopenharmony_ci gds.src_sel_z = 4; 722bf215546Sopenharmony_ci gds.dst_sel_x = 7; 723bf215546Sopenharmony_ci gds.dst_sel_y = 7; 724bf215546Sopenharmony_ci gds.dst_sel_z = 7; 725bf215546Sopenharmony_ci gds.dst_sel_w = 7; 726bf215546Sopenharmony_ci gds.op = FETCH_OP_TF_WRITE; 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci if (r600_bytecode_add_gds(m_bc, &gds) != 0) { 729bf215546Sopenharmony_ci m_result = false; 730bf215546Sopenharmony_ci return; 731bf215546Sopenharmony_ci } 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci if (value[2]->chan() != 7) { 734bf215546Sopenharmony_ci memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 735bf215546Sopenharmony_ci gds.src_gpr = value.sel(); 736bf215546Sopenharmony_ci gds.src_sel_x = value[2]->chan(); 737bf215546Sopenharmony_ci gds.src_sel_y = value[3]->chan(); 738bf215546Sopenharmony_ci gds.src_sel_z = 4; 739bf215546Sopenharmony_ci gds.dst_sel_x = 7; 740bf215546Sopenharmony_ci gds.dst_sel_y = 7; 741bf215546Sopenharmony_ci gds.dst_sel_z = 7; 742bf215546Sopenharmony_ci gds.dst_sel_w = 7; 743bf215546Sopenharmony_ci gds.op = FETCH_OP_TF_WRITE; 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_ci if (r600_bytecode_add_gds(m_bc, &gds)) { 746bf215546Sopenharmony_ci m_result = false; 747bf215546Sopenharmony_ci return; 748bf215546Sopenharmony_ci } 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci} 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_civoid AssamblerVisitor::visit(const RatInstr& instr) 753bf215546Sopenharmony_ci{ 754bf215546Sopenharmony_ci struct r600_bytecode_gds gds; 755bf215546Sopenharmony_ci 756bf215546Sopenharmony_ci /* The instruction writes to the retuen buffer loaction, and 757bf215546Sopenharmony_ci * the value will actually be read bach, so make sure all previous writes 758bf215546Sopenharmony_ci * have been finished */ 759bf215546Sopenharmony_ci if (m_ack_suggested /*&& instr.has_instr_flag(Instr::ack_rat_return_write)*/) 760bf215546Sopenharmony_ci emit_wait_ack(); 761bf215546Sopenharmony_ci 762bf215546Sopenharmony_ci int rat_idx = instr.rat_id(); 763bf215546Sopenharmony_ci EBufferIndexMode rat_index_mode = bim_none; 764bf215546Sopenharmony_ci auto addr = instr.rat_id_offset(); 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci if (addr) 767bf215546Sopenharmony_ci rat_index_mode = emit_index_reg(*addr, 1); 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 770bf215546Sopenharmony_ci 771bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, instr.cf_opcode()); 772bf215546Sopenharmony_ci auto cf = m_bc->cf_last; 773bf215546Sopenharmony_ci cf->rat.id = rat_idx + m_shader->rat_base; 774bf215546Sopenharmony_ci cf->rat.inst = instr.rat_op(); 775bf215546Sopenharmony_ci cf->rat.index_mode = rat_index_mode; 776bf215546Sopenharmony_ci cf->output.type = instr.need_ack() ? 3 : 1; 777bf215546Sopenharmony_ci cf->output.gpr = instr.data_gpr(); 778bf215546Sopenharmony_ci cf->output.index_gpr = instr.index_gpr(); 779bf215546Sopenharmony_ci cf->output.comp_mask = instr.comp_mask(); 780bf215546Sopenharmony_ci cf->output.burst_count = instr.burst_count(); 781bf215546Sopenharmony_ci assert(instr.data_swz(0) == PIPE_SWIZZLE_X); 782bf215546Sopenharmony_ci if (cf->rat.inst != RatInstr::STORE_TYPED) { 783bf215546Sopenharmony_ci assert(instr.data_swz(1) == PIPE_SWIZZLE_Y || 784bf215546Sopenharmony_ci instr.data_swz(1) == PIPE_SWIZZLE_MAX) ; 785bf215546Sopenharmony_ci assert(instr.data_swz(2) == PIPE_SWIZZLE_Z || 786bf215546Sopenharmony_ci instr.data_swz(2) == PIPE_SWIZZLE_MAX) ; 787bf215546Sopenharmony_ci } 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT; 790bf215546Sopenharmony_ci cf->barrier = 1; 791bf215546Sopenharmony_ci cf->mark = instr.need_ack(); 792bf215546Sopenharmony_ci cf->output.elem_size = instr.elm_size(); 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci m_ack_suggested |= instr.need_ack(); 795bf215546Sopenharmony_ci} 796bf215546Sopenharmony_ci 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_civoid AssamblerVisitor::clear_states(const uint32_t& states) 799bf215546Sopenharmony_ci{ 800bf215546Sopenharmony_ci if (states & sf_vtx) 801bf215546Sopenharmony_ci vtx_fetch_results.clear(); 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci if (states & sf_tex) 804bf215546Sopenharmony_ci tex_fetch_results.clear(); 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci if (states & sf_alu) { 807bf215546Sopenharmony_ci m_last_op_was_barrier = false; 808bf215546Sopenharmony_ci m_last_addr = nullptr; 809bf215546Sopenharmony_ci } 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci} 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_civoid AssamblerVisitor::visit(const Block& block) 815bf215546Sopenharmony_ci{ 816bf215546Sopenharmony_ci if (block.empty()) 817bf215546Sopenharmony_ci return; 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci m_bc->force_add_cf = block.has_instr_flag(Instr::force_cf); 820bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << "Translate block size: " << block.size() << " new_cf:" << m_bc->force_add_cf << "\n"; 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci for (const auto& i : block) { 823bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << "Translate " << *i << " "; 824bf215546Sopenharmony_ci i->accept(*this); 825bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << (m_result ? "good" : "fail") << "\n"; 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci if (!m_result) 828bf215546Sopenharmony_ci break; 829bf215546Sopenharmony_ci } 830bf215546Sopenharmony_ci} 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_civoid AssamblerVisitor::visit(const IfInstr& instr) 833bf215546Sopenharmony_ci{ 834bf215546Sopenharmony_ci int elems = m_callstack.push(FC_PUSH_VPM); 835bf215546Sopenharmony_ci bool needs_workaround = false; 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci if (m_bc->gfx_level == CAYMAN && m_bc->stack.loop > 1) 838bf215546Sopenharmony_ci needs_workaround = true; 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci if (m_bc->gfx_level == EVERGREEN && 841bf215546Sopenharmony_ci m_bc->family != CHIP_HEMLOCK && 842bf215546Sopenharmony_ci m_bc->family != CHIP_CYPRESS && 843bf215546Sopenharmony_ci m_bc->family != CHIP_JUNIPER) { 844bf215546Sopenharmony_ci unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size; 845bf215546Sopenharmony_ci unsigned dmod2 = (elems) % m_bc->stack.entry_size; 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci if (elems && (!dmod1 || !dmod2)) 848bf215546Sopenharmony_ci needs_workaround = true; 849bf215546Sopenharmony_ci } 850bf215546Sopenharmony_ci 851bf215546Sopenharmony_ci auto pred = instr.predicate(); 852bf215546Sopenharmony_ci auto [addr, dummy0, dummy1 ] = pred->indirect_addr(); {} 853bf215546Sopenharmony_ci if (addr) { 854bf215546Sopenharmony_ci if (!m_last_addr || !m_bc->ar_loaded || 855bf215546Sopenharmony_ci !m_last_addr->equal_to(*addr)) { 856bf215546Sopenharmony_ci m_bc->ar_reg = addr->sel(); 857bf215546Sopenharmony_ci m_bc->ar_chan = addr->chan(); 858bf215546Sopenharmony_ci m_last_addr = addr; 859bf215546Sopenharmony_ci m_bc->ar_loaded = 0; 860bf215546Sopenharmony_ci 861bf215546Sopenharmony_ci r600_load_ar(m_bc, true); 862bf215546Sopenharmony_ci } 863bf215546Sopenharmony_ci } 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_ci if (needs_workaround) { 866bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH); 867bf215546Sopenharmony_ci m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; 868bf215546Sopenharmony_ci pred->set_cf_type(cf_alu); 869bf215546Sopenharmony_ci } 870bf215546Sopenharmony_ci 871bf215546Sopenharmony_ci clear_states(sf_tex|sf_vtx); 872bf215546Sopenharmony_ci pred->accept(*this); 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP); 875bf215546Sopenharmony_ci clear_states(sf_all); 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci m_jump_tracker.push(m_bc->cf_last, jt_if); 878bf215546Sopenharmony_ci} 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_civoid AssamblerVisitor::visit(const ControlFlowInstr& instr) 881bf215546Sopenharmony_ci{ 882bf215546Sopenharmony_ci clear_states(sf_all); 883bf215546Sopenharmony_ci switch (instr.cf_type()) { 884bf215546Sopenharmony_ci case ControlFlowInstr::cf_else: 885bf215546Sopenharmony_ci emit_else(); 886bf215546Sopenharmony_ci break; 887bf215546Sopenharmony_ci case ControlFlowInstr::cf_endif: 888bf215546Sopenharmony_ci emit_endif(); 889bf215546Sopenharmony_ci break; 890bf215546Sopenharmony_ci case ControlFlowInstr::cf_loop_begin: 891bf215546Sopenharmony_ci emit_loop_begin(instr.has_instr_flag(Instr::vpm)); 892bf215546Sopenharmony_ci break; 893bf215546Sopenharmony_ci case ControlFlowInstr::cf_loop_end: 894bf215546Sopenharmony_ci emit_loop_end(); 895bf215546Sopenharmony_ci break; 896bf215546Sopenharmony_ci case ControlFlowInstr::cf_loop_break: 897bf215546Sopenharmony_ci emit_loop_break(); 898bf215546Sopenharmony_ci break; 899bf215546Sopenharmony_ci case ControlFlowInstr::cf_loop_continue: 900bf215546Sopenharmony_ci emit_loop_cont(); 901bf215546Sopenharmony_ci break; 902bf215546Sopenharmony_ci case ControlFlowInstr::cf_wait_ack: 903bf215546Sopenharmony_ci { 904bf215546Sopenharmony_ci int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK); 905bf215546Sopenharmony_ci if (!r) { 906bf215546Sopenharmony_ci m_bc->cf_last->cf_addr = 0; 907bf215546Sopenharmony_ci m_bc->cf_last->barrier = 1; 908bf215546Sopenharmony_ci m_ack_suggested = false; 909bf215546Sopenharmony_ci } else { 910bf215546Sopenharmony_ci m_result = false; 911bf215546Sopenharmony_ci } 912bf215546Sopenharmony_ci } 913bf215546Sopenharmony_ci break; 914bf215546Sopenharmony_ci default: 915bf215546Sopenharmony_ci unreachable("Unknown CF instruction type"); 916bf215546Sopenharmony_ci } 917bf215546Sopenharmony_ci} 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_civoid AssamblerVisitor::visit(const GDSInstr& instr) 920bf215546Sopenharmony_ci{ 921bf215546Sopenharmony_ci struct r600_bytecode_gds gds; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci bool indirect = false; 924bf215546Sopenharmony_ci auto addr = instr.uav_id(); 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci if (addr) { 927bf215546Sopenharmony_ci indirect = true; 928bf215546Sopenharmony_ci emit_index_reg(*addr, 1); 929bf215546Sopenharmony_ci } 930bf215546Sopenharmony_ci 931bf215546Sopenharmony_ci memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci gds.op = ds_opcode_map.at(instr.opcode()); 934bf215546Sopenharmony_ci gds.dst_gpr = instr.dest()->sel(); 935bf215546Sopenharmony_ci gds.uav_id = instr.uav_base(); 936bf215546Sopenharmony_ci gds.uav_index_mode = indirect ? bim_one : bim_none; 937bf215546Sopenharmony_ci gds.src_gpr = instr.src().sel(); 938bf215546Sopenharmony_ci 939bf215546Sopenharmony_ci gds.src_sel_x = instr.src()[0]->chan() < 7 ? instr.src()[0]->chan() : 4; 940bf215546Sopenharmony_ci gds.src_sel_y = instr.src()[1]->chan(); 941bf215546Sopenharmony_ci gds.src_sel_z = instr.src()[2]->chan() < 7 ? instr.src()[2]->chan() : 4; 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci gds.dst_sel_x = 7; 944bf215546Sopenharmony_ci gds.dst_sel_y = 7; 945bf215546Sopenharmony_ci gds.dst_sel_z = 7; 946bf215546Sopenharmony_ci gds.dst_sel_w = 7; 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci switch (instr.dest()->chan()) { 949bf215546Sopenharmony_ci case 0: gds.dst_sel_x = 0;break; 950bf215546Sopenharmony_ci case 1: gds.dst_sel_y = 0;break; 951bf215546Sopenharmony_ci case 2: gds.dst_sel_z = 0;break; 952bf215546Sopenharmony_ci case 3: gds.dst_sel_w = 0; 953bf215546Sopenharmony_ci } 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci gds.src_gpr2 = 0; 956bf215546Sopenharmony_ci gds.alloc_consume = m_bc->gfx_level < CAYMAN ? 1 : 0; // Not Cayman 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci int r = r600_bytecode_add_gds(m_bc, &gds); 959bf215546Sopenharmony_ci if (r) { 960bf215546Sopenharmony_ci m_result = false; 961bf215546Sopenharmony_ci return; 962bf215546Sopenharmony_ci } 963bf215546Sopenharmony_ci m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type; 964bf215546Sopenharmony_ci m_bc->cf_last->barrier = 1; 965bf215546Sopenharmony_ci} 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_civoid AssamblerVisitor::visit(const LDSAtomicInstr& instr) 968bf215546Sopenharmony_ci{ 969bf215546Sopenharmony_ci (void)instr; 970bf215546Sopenharmony_ci unreachable("LDSAtomicInstr must be lowered to ALUInstr"); 971bf215546Sopenharmony_ci} 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_civoid AssamblerVisitor::visit(const LDSReadInstr& instr) 974bf215546Sopenharmony_ci{ 975bf215546Sopenharmony_ci (void)instr; 976bf215546Sopenharmony_ci unreachable("LDSReadInstr must be lowered to ALUInstr"); 977bf215546Sopenharmony_ci} 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ciEBufferIndexMode 980bf215546Sopenharmony_ciAssamblerVisitor::emit_index_reg(const VirtualValue& addr, unsigned idx) 981bf215546Sopenharmony_ci{ 982bf215546Sopenharmony_ci assert(idx < 2); 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_ci if (!m_bc->index_loaded[idx] || m_loop_nesting || 985bf215546Sopenharmony_ci m_bc->index_reg[idx] != (unsigned)addr.sel() 986bf215546Sopenharmony_ci || m_bc->index_reg_chan[idx] != (unsigned)addr.chan()) { 987bf215546Sopenharmony_ci struct r600_bytecode_alu alu; 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci // Make sure MOVA is not last instr in clause 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci if (!m_bc->cf_last || (m_bc->cf_last->ndw>>1) >= 110) 992bf215546Sopenharmony_ci m_bc->force_add_cf = 1; 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_ci if (m_bc->gfx_level != CAYMAN) { 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0; 997bf215546Sopenharmony_ci 998bf215546Sopenharmony_ci memset(&alu, 0, sizeof(alu)); 999bf215546Sopenharmony_ci alu.op = opcode_map.at(op1_mova_int); 1000bf215546Sopenharmony_ci alu.dst.chan = 0; 1001bf215546Sopenharmony_ci alu.src[0].sel = addr.sel(); 1002bf215546Sopenharmony_ci alu.src[0].chan = addr.chan(); 1003bf215546Sopenharmony_ci alu.last = 1; 1004bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << " mova_int, "; 1005bf215546Sopenharmony_ci int r = r600_bytecode_add_alu(m_bc, &alu); 1006bf215546Sopenharmony_ci if (r) 1007bf215546Sopenharmony_ci return bim_invalid; 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci alu.op = opcode_map.at(idxop); 1010bf215546Sopenharmony_ci alu.dst.chan = 0; 1011bf215546Sopenharmony_ci alu.src[0].sel = 0; 1012bf215546Sopenharmony_ci alu.src[0].chan = 0; 1013bf215546Sopenharmony_ci alu.last = 1; 1014bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx; 1015bf215546Sopenharmony_ci r = r600_bytecode_add_alu(m_bc, &alu); 1016bf215546Sopenharmony_ci if (r) 1017bf215546Sopenharmony_ci return bim_invalid; 1018bf215546Sopenharmony_ci } else { 1019bf215546Sopenharmony_ci memset(&alu, 0, sizeof(alu)); 1020bf215546Sopenharmony_ci alu.op = opcode_map.at(op1_mova_int); 1021bf215546Sopenharmony_ci alu.dst.sel = idx == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1; 1022bf215546Sopenharmony_ci alu.dst.chan = 0; 1023bf215546Sopenharmony_ci alu.src[0].sel = addr.sel(); 1024bf215546Sopenharmony_ci alu.src[0].chan = addr.chan(); 1025bf215546Sopenharmony_ci alu.last = 1; 1026bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << " mova_int, "; 1027bf215546Sopenharmony_ci int r = r600_bytecode_add_alu(m_bc, &alu); 1028bf215546Sopenharmony_ci if (r) 1029bf215546Sopenharmony_ci return bim_invalid; 1030bf215546Sopenharmony_ci } 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_ci m_bc->ar_loaded = 0; 1033bf215546Sopenharmony_ci m_bc->index_reg[idx] = addr.sel(); 1034bf215546Sopenharmony_ci m_bc->index_reg_chan[idx] = addr.chan(); 1035bf215546Sopenharmony_ci m_bc->index_loaded[idx] = true; 1036bf215546Sopenharmony_ci m_bc->force_add_cf = 1; 1037bf215546Sopenharmony_ci sfn_log << SfnLog::assembly << "\n"; 1038bf215546Sopenharmony_ci } 1039bf215546Sopenharmony_ci return idx == 0 ? bim_zero : bim_one; 1040bf215546Sopenharmony_ci} 1041bf215546Sopenharmony_ci 1042bf215546Sopenharmony_civoid AssamblerVisitor::emit_else() 1043bf215546Sopenharmony_ci{ 1044bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE); 1045bf215546Sopenharmony_ci m_bc->cf_last->pop_count = 1; 1046bf215546Sopenharmony_ci m_result &= m_jump_tracker.add_mid(m_bc->cf_last, jt_if); 1047bf215546Sopenharmony_ci} 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_civoid AssamblerVisitor::emit_endif() 1050bf215546Sopenharmony_ci{ 1051bf215546Sopenharmony_ci m_callstack.pop(FC_PUSH_VPM); 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ci unsigned force_pop = m_bc->force_add_cf; 1054bf215546Sopenharmony_ci if (!force_pop) { 1055bf215546Sopenharmony_ci int alu_pop = 3; 1056bf215546Sopenharmony_ci if (m_bc->cf_last) { 1057bf215546Sopenharmony_ci if (m_bc->cf_last->op == CF_OP_ALU) 1058bf215546Sopenharmony_ci alu_pop = 0; 1059bf215546Sopenharmony_ci else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER) 1060bf215546Sopenharmony_ci alu_pop = 1; 1061bf215546Sopenharmony_ci } 1062bf215546Sopenharmony_ci alu_pop += 1; 1063bf215546Sopenharmony_ci if (alu_pop == 1) { 1064bf215546Sopenharmony_ci m_bc->cf_last->op = CF_OP_ALU_POP_AFTER; 1065bf215546Sopenharmony_ci m_bc->force_add_cf = 1; 1066bf215546Sopenharmony_ci } else { 1067bf215546Sopenharmony_ci force_pop = 1; 1068bf215546Sopenharmony_ci } 1069bf215546Sopenharmony_ci } 1070bf215546Sopenharmony_ci 1071bf215546Sopenharmony_ci if (force_pop) { 1072bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_POP); 1073bf215546Sopenharmony_ci m_bc->cf_last->pop_count = 1; 1074bf215546Sopenharmony_ci m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; 1075bf215546Sopenharmony_ci } 1076bf215546Sopenharmony_ci 1077bf215546Sopenharmony_ci m_result &= m_jump_tracker.pop(m_bc->cf_last, jt_if); 1078bf215546Sopenharmony_ci} 1079bf215546Sopenharmony_ci 1080bf215546Sopenharmony_civoid AssamblerVisitor::emit_loop_begin(bool vpm) 1081bf215546Sopenharmony_ci{ 1082bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10); 1083bf215546Sopenharmony_ci m_bc->cf_last->vpm = vpm && m_bc->type == PIPE_SHADER_FRAGMENT; 1084bf215546Sopenharmony_ci m_jump_tracker.push(m_bc->cf_last, jt_loop); 1085bf215546Sopenharmony_ci m_callstack.push(FC_LOOP); 1086bf215546Sopenharmony_ci ++m_loop_nesting; 1087bf215546Sopenharmony_ci} 1088bf215546Sopenharmony_ci 1089bf215546Sopenharmony_civoid AssamblerVisitor::emit_loop_end() 1090bf215546Sopenharmony_ci{ 1091bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END); 1092bf215546Sopenharmony_ci m_callstack.pop(FC_LOOP); 1093bf215546Sopenharmony_ci assert(m_loop_nesting); 1094bf215546Sopenharmony_ci --m_loop_nesting; 1095bf215546Sopenharmony_ci m_result |= m_jump_tracker.pop(m_bc->cf_last, jt_loop); 1096bf215546Sopenharmony_ci} 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_civoid AssamblerVisitor::emit_loop_break() 1099bf215546Sopenharmony_ci{ 1100bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK); 1101bf215546Sopenharmony_ci m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); 1102bf215546Sopenharmony_ci} 1103bf215546Sopenharmony_ci 1104bf215546Sopenharmony_civoid AssamblerVisitor::emit_loop_cont() 1105bf215546Sopenharmony_ci{ 1106bf215546Sopenharmony_ci r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE); 1107bf215546Sopenharmony_ci m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); 1108bf215546Sopenharmony_ci} 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_cibool AssamblerVisitor::copy_dst(r600_bytecode_alu_dst& dst, 1111bf215546Sopenharmony_ci const Register& d, bool write) 1112bf215546Sopenharmony_ci{ 1113bf215546Sopenharmony_ci if (write && d.sel() > 124) { 1114bf215546Sopenharmony_ci R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", 1115bf215546Sopenharmony_ci d.sel()); 1116bf215546Sopenharmony_ci m_result = false; 1117bf215546Sopenharmony_ci return false; 1118bf215546Sopenharmony_ci } 1119bf215546Sopenharmony_ci 1120bf215546Sopenharmony_ci dst.sel = d.sel(); 1121bf215546Sopenharmony_ci dst.chan = d.chan(); 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci if (m_bc->index_reg[1] == dst.sel && 1124bf215546Sopenharmony_ci m_bc->index_reg_chan[1] == dst.chan) 1125bf215546Sopenharmony_ci m_bc->index_loaded[1] = false; 1126bf215546Sopenharmony_ci 1127bf215546Sopenharmony_ci if (m_bc->index_reg[0] == dst.sel && 1128bf215546Sopenharmony_ci m_bc->index_reg_chan[0] == dst.chan) 1129bf215546Sopenharmony_ci m_bc->index_loaded[0] = false; 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci return true; 1132bf215546Sopenharmony_ci} 1133bf215546Sopenharmony_ci 1134bf215546Sopenharmony_civoid AssamblerVisitor::emit_wait_ack() 1135bf215546Sopenharmony_ci{ 1136bf215546Sopenharmony_ci int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK); 1137bf215546Sopenharmony_ci if (!r) { 1138bf215546Sopenharmony_ci m_bc->cf_last->cf_addr = 0; 1139bf215546Sopenharmony_ci m_bc->cf_last->barrier = 1; 1140bf215546Sopenharmony_ci m_ack_suggested = false; 1141bf215546Sopenharmony_ci } else 1142bf215546Sopenharmony_ci m_result = false; 1143bf215546Sopenharmony_ci} 1144bf215546Sopenharmony_ci 1145bf215546Sopenharmony_ciclass EncodeSourceVisitor : public ConstRegisterVisitor { 1146bf215546Sopenharmony_cipublic: 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc); 1149bf215546Sopenharmony_ci void visit(const Register& value) override; 1150bf215546Sopenharmony_ci void visit(const LocalArray& value) override; 1151bf215546Sopenharmony_ci void visit(const LocalArrayValue& value) override; 1152bf215546Sopenharmony_ci void visit(const UniformValue& value) override; 1153bf215546Sopenharmony_ci void visit(const LiteralConstant& value) override; 1154bf215546Sopenharmony_ci void visit(const InlineConstant& value) override; 1155bf215546Sopenharmony_ci 1156bf215546Sopenharmony_ci r600_bytecode_alu_src& src; 1157bf215546Sopenharmony_ci r600_bytecode *m_bc; 1158bf215546Sopenharmony_ci PVirtualValue m_buffer_offset{nullptr}; 1159bf215546Sopenharmony_ci}; 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_ciPVirtualValue AssamblerVisitor::copy_src(r600_bytecode_alu_src& src, const VirtualValue& s) 1162bf215546Sopenharmony_ci{ 1163bf215546Sopenharmony_ci 1164bf215546Sopenharmony_ci EncodeSourceVisitor visitor(src, m_bc); 1165bf215546Sopenharmony_ci src.sel = s.sel(); 1166bf215546Sopenharmony_ci src.chan = s.chan(); 1167bf215546Sopenharmony_ci 1168bf215546Sopenharmony_ci s.accept(visitor); 1169bf215546Sopenharmony_ci return visitor.m_buffer_offset; 1170bf215546Sopenharmony_ci} 1171bf215546Sopenharmony_ci 1172bf215546Sopenharmony_ciEncodeSourceVisitor::EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc): 1173bf215546Sopenharmony_ci src(s), m_bc(bc) 1174bf215546Sopenharmony_ci{ 1175bf215546Sopenharmony_ci} 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const Register& value) 1178bf215546Sopenharmony_ci{ 1179bf215546Sopenharmony_ci assert(value.sel() <= 124 && "Only have 124 registers"); 1180bf215546Sopenharmony_ci} 1181bf215546Sopenharmony_ci 1182bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const LocalArray& value) 1183bf215546Sopenharmony_ci{ 1184bf215546Sopenharmony_ci (void)value; 1185bf215546Sopenharmony_ci unreachable("An array can't be a source register"); 1186bf215546Sopenharmony_ci} 1187bf215546Sopenharmony_ci 1188bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const LocalArrayValue& value) 1189bf215546Sopenharmony_ci{ 1190bf215546Sopenharmony_ci src.rel = value.addr() ? 1 : 0; 1191bf215546Sopenharmony_ci} 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const UniformValue& value) 1194bf215546Sopenharmony_ci{ 1195bf215546Sopenharmony_ci assert(value.sel() >= 512 && "Uniform values must have a sel >= 512"); 1196bf215546Sopenharmony_ci m_buffer_offset = value.buf_addr(); 1197bf215546Sopenharmony_ci src.kc_bank = value.kcache_bank(); 1198bf215546Sopenharmony_ci} 1199bf215546Sopenharmony_ci 1200bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const LiteralConstant& value) 1201bf215546Sopenharmony_ci{ 1202bf215546Sopenharmony_ci src.value = value.value(); 1203bf215546Sopenharmony_ci} 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const InlineConstant& value) 1206bf215546Sopenharmony_ci{ 1207bf215546Sopenharmony_ci (void)value; 1208bf215546Sopenharmony_ci} 1209bf215546Sopenharmony_ci 1210bf215546Sopenharmony_ci 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_ciconst std::map<EAluOp, int> opcode_map = { 1213bf215546Sopenharmony_ci 1214bf215546Sopenharmony_ci {op2_add, ALU_OP2_ADD}, 1215bf215546Sopenharmony_ci {op2_mul, ALU_OP2_MUL}, 1216bf215546Sopenharmony_ci {op2_mul_ieee, ALU_OP2_MUL_IEEE}, 1217bf215546Sopenharmony_ci {op2_max, ALU_OP2_MAX}, 1218bf215546Sopenharmony_ci {op2_min, ALU_OP2_MIN}, 1219bf215546Sopenharmony_ci {op2_max_dx10, ALU_OP2_MAX_DX10}, 1220bf215546Sopenharmony_ci {op2_min_dx10, ALU_OP2_MIN_DX10}, 1221bf215546Sopenharmony_ci {op2_sete, ALU_OP2_SETE}, 1222bf215546Sopenharmony_ci {op2_setgt, ALU_OP2_SETGT}, 1223bf215546Sopenharmony_ci {op2_setge, ALU_OP2_SETGE}, 1224bf215546Sopenharmony_ci {op2_setne, ALU_OP2_SETNE}, 1225bf215546Sopenharmony_ci {op2_sete_dx10, ALU_OP2_SETE_DX10}, 1226bf215546Sopenharmony_ci {op2_setgt_dx10, ALU_OP2_SETGT_DX10}, 1227bf215546Sopenharmony_ci {op2_setge_dx10, ALU_OP2_SETGE_DX10}, 1228bf215546Sopenharmony_ci {op2_setne_dx10, ALU_OP2_SETNE_DX10}, 1229bf215546Sopenharmony_ci {op1_fract, ALU_OP1_FRACT}, 1230bf215546Sopenharmony_ci {op1_trunc, ALU_OP1_TRUNC}, 1231bf215546Sopenharmony_ci {op1_ceil, ALU_OP1_CEIL}, 1232bf215546Sopenharmony_ci {op1_rndne, ALU_OP1_RNDNE}, 1233bf215546Sopenharmony_ci {op1_floor, ALU_OP1_FLOOR}, 1234bf215546Sopenharmony_ci {op2_ashr_int, ALU_OP2_ASHR_INT}, 1235bf215546Sopenharmony_ci {op2_lshr_int, ALU_OP2_LSHR_INT}, 1236bf215546Sopenharmony_ci {op2_lshl_int, ALU_OP2_LSHL_INT}, 1237bf215546Sopenharmony_ci {op1_mov, ALU_OP1_MOV}, 1238bf215546Sopenharmony_ci {op0_nop, ALU_OP0_NOP}, 1239bf215546Sopenharmony_ci {op2_mul_64, ALU_OP2_MUL_64}, 1240bf215546Sopenharmony_ci {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32}, 1241bf215546Sopenharmony_ci {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64}, 1242bf215546Sopenharmony_ci {op2_prede_int, ALU_OP2_PRED_SETE_INT}, 1243bf215546Sopenharmony_ci {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT}, 1244bf215546Sopenharmony_ci {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT}, 1245bf215546Sopenharmony_ci {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT}, 1246bf215546Sopenharmony_ci {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT}, 1247bf215546Sopenharmony_ci {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT}, 1248bf215546Sopenharmony_ci {op2_pred_sete, ALU_OP2_PRED_SETE}, 1249bf215546Sopenharmony_ci {op2_pred_setgt, ALU_OP2_PRED_SETGT}, 1250bf215546Sopenharmony_ci {op2_pred_setge, ALU_OP2_PRED_SETGE}, 1251bf215546Sopenharmony_ci {op2_pred_setne, ALU_OP2_PRED_SETNE}, 1252bf215546Sopenharmony_ci {op0_pred_set_clr, ALU_OP0_PRED_SET_CLR}, 1253bf215546Sopenharmony_ci {op1_pred_set_restore, ALU_OP1_PRED_SET_RESTORE}, 1254bf215546Sopenharmony_ci {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH}, 1255bf215546Sopenharmony_ci {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH}, 1256bf215546Sopenharmony_ci {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH}, 1257bf215546Sopenharmony_ci {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH}, 1258bf215546Sopenharmony_ci {op2_kille, ALU_OP2_KILLE}, 1259bf215546Sopenharmony_ci {op2_killgt, ALU_OP2_KILLGT}, 1260bf215546Sopenharmony_ci {op2_killge, ALU_OP2_KILLGE}, 1261bf215546Sopenharmony_ci {op2_killne, ALU_OP2_KILLNE}, 1262bf215546Sopenharmony_ci {op2_and_int, ALU_OP2_AND_INT}, 1263bf215546Sopenharmony_ci {op2_or_int, ALU_OP2_OR_INT}, 1264bf215546Sopenharmony_ci {op2_xor_int, ALU_OP2_XOR_INT}, 1265bf215546Sopenharmony_ci {op1_not_int, ALU_OP1_NOT_INT}, 1266bf215546Sopenharmony_ci {op2_add_int, ALU_OP2_ADD_INT}, 1267bf215546Sopenharmony_ci {op2_sub_int, ALU_OP2_SUB_INT}, 1268bf215546Sopenharmony_ci {op2_max_int, ALU_OP2_MAX_INT}, 1269bf215546Sopenharmony_ci {op2_min_int, ALU_OP2_MIN_INT}, 1270bf215546Sopenharmony_ci {op2_max_uint, ALU_OP2_MAX_UINT}, 1271bf215546Sopenharmony_ci {op2_min_uint, ALU_OP2_MIN_UINT}, 1272bf215546Sopenharmony_ci {op2_sete_int, ALU_OP2_SETE_INT}, 1273bf215546Sopenharmony_ci {op2_setgt_int, ALU_OP2_SETGT_INT}, 1274bf215546Sopenharmony_ci {op2_setge_int, ALU_OP2_SETGE_INT}, 1275bf215546Sopenharmony_ci {op2_setne_int, ALU_OP2_SETNE_INT}, 1276bf215546Sopenharmony_ci {op2_setgt_uint, ALU_OP2_SETGT_UINT}, 1277bf215546Sopenharmony_ci {op2_setge_uint, ALU_OP2_SETGE_UINT}, 1278bf215546Sopenharmony_ci {op2_killgt_uint, ALU_OP2_KILLGT_UINT}, 1279bf215546Sopenharmony_ci {op2_killge_uint, ALU_OP2_KILLGE_UINT}, 1280bf215546Sopenharmony_ci {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT}, 1281bf215546Sopenharmony_ci {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT}, 1282bf215546Sopenharmony_ci {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT}, 1283bf215546Sopenharmony_ci {op2_kille_int, ALU_OP2_KILLE_INT}, 1284bf215546Sopenharmony_ci {op2_killgt_int, ALU_OP2_KILLGT_INT}, 1285bf215546Sopenharmony_ci {op2_killge_int, ALU_OP2_KILLGE_INT}, 1286bf215546Sopenharmony_ci {op2_killne_int, ALU_OP2_KILLNE_INT}, 1287bf215546Sopenharmony_ci {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT}, 1288bf215546Sopenharmony_ci {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT}, 1289bf215546Sopenharmony_ci {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT}, 1290bf215546Sopenharmony_ci {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT}, 1291bf215546Sopenharmony_ci {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT}, 1292bf215546Sopenharmony_ci {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT}, 1293bf215546Sopenharmony_ci {op1_flt_to_int, ALU_OP1_FLT_TO_INT}, 1294bf215546Sopenharmony_ci {op1_bfrev_int, ALU_OP1_BFREV_INT}, 1295bf215546Sopenharmony_ci {op2_addc_uint, ALU_OP2_ADDC_UINT}, 1296bf215546Sopenharmony_ci {op2_subb_uint, ALU_OP2_SUBB_UINT}, 1297bf215546Sopenharmony_ci {op0_group_barrier, ALU_OP0_GROUP_BARRIER}, 1298bf215546Sopenharmony_ci {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN}, 1299bf215546Sopenharmony_ci {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END}, 1300bf215546Sopenharmony_ci {op2_set_mode, ALU_OP2_SET_MODE}, 1301bf215546Sopenharmony_ci {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0}, 1302bf215546Sopenharmony_ci {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1}, 1303bf215546Sopenharmony_ci {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE}, 1304bf215546Sopenharmony_ci {op1_exp_ieee, ALU_OP1_EXP_IEEE}, 1305bf215546Sopenharmony_ci {op1_log_clamped, ALU_OP1_LOG_CLAMPED}, 1306bf215546Sopenharmony_ci {op1_log_ieee, ALU_OP1_LOG_IEEE}, 1307bf215546Sopenharmony_ci {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED}, 1308bf215546Sopenharmony_ci {op1_recip_ff, ALU_OP1_RECIP_FF}, 1309bf215546Sopenharmony_ci {op1_recip_ieee, ALU_OP1_RECIP_IEEE}, 1310bf215546Sopenharmony_ci {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED}, 1311bf215546Sopenharmony_ci {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF}, 1312bf215546Sopenharmony_ci {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE}, 1313bf215546Sopenharmony_ci {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE}, 1314bf215546Sopenharmony_ci {op1_sin, ALU_OP1_SIN}, 1315bf215546Sopenharmony_ci {op1_cos, ALU_OP1_COS}, 1316bf215546Sopenharmony_ci {op2_mullo_int, ALU_OP2_MULLO_INT}, 1317bf215546Sopenharmony_ci {op2_mulhi_int, ALU_OP2_MULHI_INT}, 1318bf215546Sopenharmony_ci {op2_mullo_uint, ALU_OP2_MULLO_UINT}, 1319bf215546Sopenharmony_ci {op2_mulhi_uint, ALU_OP2_MULHI_UINT}, 1320bf215546Sopenharmony_ci {op1_recip_int, ALU_OP1_RECIP_INT}, 1321bf215546Sopenharmony_ci {op1_recip_uint, ALU_OP1_RECIP_UINT}, 1322bf215546Sopenharmony_ci {op1_recip_64, ALU_OP2_RECIP_64}, 1323bf215546Sopenharmony_ci {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64}, 1324bf215546Sopenharmony_ci {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64}, 1325bf215546Sopenharmony_ci {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64}, 1326bf215546Sopenharmony_ci {op1_sqrt_64, ALU_OP2_SQRT_64}, 1327bf215546Sopenharmony_ci {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT}, 1328bf215546Sopenharmony_ci {op1_int_to_flt, ALU_OP1_INT_TO_FLT}, 1329bf215546Sopenharmony_ci {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT}, 1330bf215546Sopenharmony_ci {op2_bfm_int, ALU_OP2_BFM_INT}, 1331bf215546Sopenharmony_ci {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16}, 1332bf215546Sopenharmony_ci {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32}, 1333bf215546Sopenharmony_ci {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT}, 1334bf215546Sopenharmony_ci {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT}, 1335bf215546Sopenharmony_ci {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT}, 1336bf215546Sopenharmony_ci {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT}, 1337bf215546Sopenharmony_ci {op1_bcnt_int, ALU_OP1_BCNT_INT}, 1338bf215546Sopenharmony_ci {op1_ffbh_uint, ALU_OP1_FFBH_UINT}, 1339bf215546Sopenharmony_ci {op1_ffbl_int, ALU_OP1_FFBL_INT}, 1340bf215546Sopenharmony_ci {op1_ffbh_int, ALU_OP1_FFBH_INT}, 1341bf215546Sopenharmony_ci {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4}, 1342bf215546Sopenharmony_ci {op2_dot_ieee, ALU_OP2_DOT_IEEE}, 1343bf215546Sopenharmony_ci {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI}, 1344bf215546Sopenharmony_ci {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR}, 1345bf215546Sopenharmony_ci {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24}, 1346bf215546Sopenharmony_ci {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT}, 1347bf215546Sopenharmony_ci {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT}, 1348bf215546Sopenharmony_ci {op2_mul_uint24, ALU_OP2_MUL_UINT24}, 1349bf215546Sopenharmony_ci {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT}, 1350bf215546Sopenharmony_ci {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT}, 1351bf215546Sopenharmony_ci {op2_sete_64, ALU_OP2_SETE_64}, 1352bf215546Sopenharmony_ci {op2_setne_64, ALU_OP2_SETNE_64}, 1353bf215546Sopenharmony_ci {op2_setgt_64, ALU_OP2_SETGT_64}, 1354bf215546Sopenharmony_ci {op2_setge_64, ALU_OP2_SETGE_64}, 1355bf215546Sopenharmony_ci {op2_min_64, ALU_OP2_MIN_64}, 1356bf215546Sopenharmony_ci {op2_max_64, ALU_OP2_MAX_64}, 1357bf215546Sopenharmony_ci {op2_dot4, ALU_OP2_DOT4}, 1358bf215546Sopenharmony_ci {op2_dot4_ieee, ALU_OP2_DOT4_IEEE}, 1359bf215546Sopenharmony_ci {op2_cube, ALU_OP2_CUBE}, 1360bf215546Sopenharmony_ci {op1_max4, ALU_OP1_MAX4}, 1361bf215546Sopenharmony_ci {op1_frexp_64, ALU_OP1_FREXP_64}, 1362bf215546Sopenharmony_ci {op1_ldexp_64, ALU_OP2_LDEXP_64}, 1363bf215546Sopenharmony_ci {op1_fract_64, ALU_OP1_FRACT_64}, 1364bf215546Sopenharmony_ci {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64}, 1365bf215546Sopenharmony_ci {op2_pred_sete_64, ALU_OP2_PRED_SETE_64}, 1366bf215546Sopenharmony_ci {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64}, 1367bf215546Sopenharmony_ci {op2_add_64, ALU_OP2_ADD_64}, 1368bf215546Sopenharmony_ci {op1_mova_int, ALU_OP1_MOVA_INT}, 1369bf215546Sopenharmony_ci {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32}, 1370bf215546Sopenharmony_ci {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64}, 1371bf215546Sopenharmony_ci {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT}, 1372bf215546Sopenharmony_ci {op2_dot, ALU_OP2_DOT}, 1373bf215546Sopenharmony_ci {op1_mul_prev, ALU_OP1_MUL_PREV}, 1374bf215546Sopenharmony_ci {op1_mul_ieee_prev, ALU_OP1_MUL_IEEE_PREV}, 1375bf215546Sopenharmony_ci {op1_add_prev, ALU_OP1_ADD_PREV}, 1376bf215546Sopenharmony_ci {op2_muladd_prev, ALU_OP2_MULADD_PREV}, 1377bf215546Sopenharmony_ci {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV}, 1378bf215546Sopenharmony_ci {op2_interp_xy, ALU_OP2_INTERP_XY}, 1379bf215546Sopenharmony_ci {op2_interp_zw, ALU_OP2_INTERP_ZW}, 1380bf215546Sopenharmony_ci {op2_interp_x, ALU_OP2_INTERP_X}, 1381bf215546Sopenharmony_ci {op2_interp_z, ALU_OP2_INTERP_Z}, 1382bf215546Sopenharmony_ci {op0_store_flags, ALU_OP1_STORE_FLAGS}, 1383bf215546Sopenharmony_ci {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS}, 1384bf215546Sopenharmony_ci {op0_lds_1a, ALU_OP2_LDS_1A}, 1385bf215546Sopenharmony_ci {op0_lds_1a1d, ALU_OP2_LDS_1A1D}, 1386bf215546Sopenharmony_ci {op0_lds_2a, ALU_OP2_LDS_2A}, 1387bf215546Sopenharmony_ci {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0}, 1388bf215546Sopenharmony_ci {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10}, 1389bf215546Sopenharmony_ci {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20}, 1390bf215546Sopenharmony_ci {op3_bfe_uint, ALU_OP3_BFE_UINT}, 1391bf215546Sopenharmony_ci {op3_bfe_int, ALU_OP3_BFE_INT}, 1392bf215546Sopenharmony_ci {op3_bfi_int, ALU_OP3_BFI_INT}, 1393bf215546Sopenharmony_ci {op3_fma, ALU_OP3_FMA}, 1394bf215546Sopenharmony_ci {op3_cndne_64, ALU_OP3_CNDNE_64}, 1395bf215546Sopenharmony_ci {op3_fma_64, ALU_OP3_FMA_64}, 1396bf215546Sopenharmony_ci {op3_lerp_uint, ALU_OP3_LERP_UINT}, 1397bf215546Sopenharmony_ci {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT}, 1398bf215546Sopenharmony_ci {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT}, 1399bf215546Sopenharmony_ci {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT}, 1400bf215546Sopenharmony_ci {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT}, 1401bf215546Sopenharmony_ci {op3_muladd_uint24, ALU_OP3_MULADD_UINT24}, 1402bf215546Sopenharmony_ci {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP}, 1403bf215546Sopenharmony_ci {op3_muladd, ALU_OP3_MULADD}, 1404bf215546Sopenharmony_ci {op3_muladd_m2, ALU_OP3_MULADD_M2}, 1405bf215546Sopenharmony_ci {op3_muladd_m4, ALU_OP3_MULADD_M4}, 1406bf215546Sopenharmony_ci {op3_muladd_d2, ALU_OP3_MULADD_D2}, 1407bf215546Sopenharmony_ci {op3_muladd_ieee, ALU_OP3_MULADD_IEEE}, 1408bf215546Sopenharmony_ci {op3_cnde, ALU_OP3_CNDE}, 1409bf215546Sopenharmony_ci {op3_cndgt, ALU_OP3_CNDGT}, 1410bf215546Sopenharmony_ci {op3_cndge, ALU_OP3_CNDGE}, 1411bf215546Sopenharmony_ci {op3_cnde_int, ALU_OP3_CNDE_INT}, 1412bf215546Sopenharmony_ci {op3_cndgt_int, ALU_OP3_CNDGT_INT}, 1413bf215546Sopenharmony_ci {op3_cndge_int, ALU_OP3_CNDGE_INT}, 1414bf215546Sopenharmony_ci {op3_mul_lit, ALU_OP3_MUL_LIT}, 1415bf215546Sopenharmony_ci}; 1416bf215546Sopenharmony_ci 1417bf215546Sopenharmony_ciconst std::map<ESDOp, int> ds_opcode_map = { 1418bf215546Sopenharmony_ci {DS_OP_ADD, FETCH_OP_GDS_ADD}, 1419bf215546Sopenharmony_ci {DS_OP_SUB, FETCH_OP_GDS_SUB}, 1420bf215546Sopenharmony_ci {DS_OP_RSUB, FETCH_OP_GDS_RSUB}, 1421bf215546Sopenharmony_ci {DS_OP_INC, FETCH_OP_GDS_INC}, 1422bf215546Sopenharmony_ci {DS_OP_DEC, FETCH_OP_GDS_DEC}, 1423bf215546Sopenharmony_ci {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT}, 1424bf215546Sopenharmony_ci {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT}, 1425bf215546Sopenharmony_ci {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT}, 1426bf215546Sopenharmony_ci {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT}, 1427bf215546Sopenharmony_ci {DS_OP_AND, FETCH_OP_GDS_AND}, 1428bf215546Sopenharmony_ci {DS_OP_OR, FETCH_OP_GDS_OR}, 1429bf215546Sopenharmony_ci {DS_OP_XOR, FETCH_OP_GDS_XOR}, 1430bf215546Sopenharmony_ci {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR}, 1431bf215546Sopenharmony_ci {DS_OP_WRITE, FETCH_OP_GDS_WRITE}, 1432bf215546Sopenharmony_ci {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL}, 1433bf215546Sopenharmony_ci {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2}, 1434bf215546Sopenharmony_ci {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE}, 1435bf215546Sopenharmony_ci {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF}, 1436bf215546Sopenharmony_ci {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE}, 1437bf215546Sopenharmony_ci {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE}, 1438bf215546Sopenharmony_ci {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET}, 1439bf215546Sopenharmony_ci {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET}, 1440bf215546Sopenharmony_ci {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET}, 1441bf215546Sopenharmony_ci {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET}, 1442bf215546Sopenharmony_ci {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET}, 1443bf215546Sopenharmony_ci {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET}, 1444bf215546Sopenharmony_ci {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET}, 1445bf215546Sopenharmony_ci {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET}, 1446bf215546Sopenharmony_ci {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET}, 1447bf215546Sopenharmony_ci {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET}, 1448bf215546Sopenharmony_ci {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET}, 1449bf215546Sopenharmony_ci {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET}, 1450bf215546Sopenharmony_ci {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET}, 1451bf215546Sopenharmony_ci {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET}, 1452bf215546Sopenharmony_ci {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET}, 1453bf215546Sopenharmony_ci {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET}, 1454bf215546Sopenharmony_ci {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET}, 1455bf215546Sopenharmony_ci {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET}, 1456bf215546Sopenharmony_ci {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET}, 1457bf215546Sopenharmony_ci {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET}, 1458bf215546Sopenharmony_ci {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET}, 1459bf215546Sopenharmony_ci {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET}, 1460bf215546Sopenharmony_ci {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET}, 1461bf215546Sopenharmony_ci {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET}, 1462bf215546Sopenharmony_ci {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET}, 1463bf215546Sopenharmony_ci {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET}, 1464bf215546Sopenharmony_ci {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC}, 1465bf215546Sopenharmony_ci {DS_OP_INVALID, 0}, 1466bf215546Sopenharmony_ci}; 1467bf215546Sopenharmony_ci 1468bf215546Sopenharmony_ci} 1469