1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "aco_builder.h" 26bf215546Sopenharmony_ci#include "aco_ir.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "common/sid.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "util/memstream.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#include <algorithm> 33bf215546Sopenharmony_ci#include <map> 34bf215546Sopenharmony_ci#include <vector> 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_cinamespace aco { 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_cistruct constaddr_info { 39bf215546Sopenharmony_ci unsigned getpc_end; 40bf215546Sopenharmony_ci unsigned add_literal; 41bf215546Sopenharmony_ci}; 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_cistruct asm_context { 44bf215546Sopenharmony_ci Program* program; 45bf215546Sopenharmony_ci enum amd_gfx_level gfx_level; 46bf215546Sopenharmony_ci std::vector<std::pair<int, SOPP_instruction*>> branches; 47bf215546Sopenharmony_ci std::map<unsigned, constaddr_info> constaddrs; 48bf215546Sopenharmony_ci const int16_t* opcode; 49bf215546Sopenharmony_ci // TODO: keep track of branch instructions referring blocks 50bf215546Sopenharmony_ci // and, when emitting the block, correct the offset in instr 51bf215546Sopenharmony_ci asm_context(Program* program_) : program(program_), gfx_level(program->gfx_level) 52bf215546Sopenharmony_ci { 53bf215546Sopenharmony_ci if (gfx_level <= GFX7) 54bf215546Sopenharmony_ci opcode = &instr_info.opcode_gfx7[0]; 55bf215546Sopenharmony_ci else if (gfx_level <= GFX9) 56bf215546Sopenharmony_ci opcode = &instr_info.opcode_gfx9[0]; 57bf215546Sopenharmony_ci else if (gfx_level >= GFX10) 58bf215546Sopenharmony_ci opcode = &instr_info.opcode_gfx10[0]; 59bf215546Sopenharmony_ci } 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci int subvector_begin_pos = -1; 62bf215546Sopenharmony_ci}; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ciunsigned 65bf215546Sopenharmony_ciget_mimg_nsa_dwords(const Instruction* instr) 66bf215546Sopenharmony_ci{ 67bf215546Sopenharmony_ci unsigned addr_dwords = instr->operands.size() - 3; 68bf215546Sopenharmony_ci for (unsigned i = 1; i < addr_dwords; i++) { 69bf215546Sopenharmony_ci if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4)) 70bf215546Sopenharmony_ci return DIV_ROUND_UP(addr_dwords - 1, 4); 71bf215546Sopenharmony_ci } 72bf215546Sopenharmony_ci return 0; 73bf215546Sopenharmony_ci} 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_civoid 76bf215546Sopenharmony_ciemit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr) 77bf215546Sopenharmony_ci{ 78bf215546Sopenharmony_ci /* lower remaining pseudo-instructions */ 79bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_constaddr_getpc) { 80bf215546Sopenharmony_ci ctx.constaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci instr->opcode = aco_opcode::s_getpc_b64; 83bf215546Sopenharmony_ci instr->operands.pop_back(); 84bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::p_constaddr_addlo) { 85bf215546Sopenharmony_ci ctx.constaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci instr->opcode = aco_opcode::s_add_u32; 88bf215546Sopenharmony_ci instr->operands.pop_back(); 89bf215546Sopenharmony_ci assert(instr->operands[1].isConstant()); 90bf215546Sopenharmony_ci /* in case it's an inline constant, make it a literal */ 91bf215546Sopenharmony_ci instr->operands[1] = Operand::literal32(instr->operands[1].constantValue()); 92bf215546Sopenharmony_ci } 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci uint32_t opcode = ctx.opcode[(int)instr->opcode]; 95bf215546Sopenharmony_ci if (opcode == (uint32_t)-1) { 96bf215546Sopenharmony_ci char* outmem; 97bf215546Sopenharmony_ci size_t outsize; 98bf215546Sopenharmony_ci struct u_memstream mem; 99bf215546Sopenharmony_ci u_memstream_open(&mem, &outmem, &outsize); 100bf215546Sopenharmony_ci FILE* const memf = u_memstream_get(&mem); 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci fprintf(memf, "Unsupported opcode: "); 103bf215546Sopenharmony_ci aco_print_instr(instr, memf); 104bf215546Sopenharmony_ci u_memstream_close(&mem); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci aco_err(ctx.program, outmem); 107bf215546Sopenharmony_ci free(outmem); 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci abort(); 110bf215546Sopenharmony_ci } 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci switch (instr->format) { 113bf215546Sopenharmony_ci case Format::SOP2: { 114bf215546Sopenharmony_ci uint32_t encoding = (0b10 << 30); 115bf215546Sopenharmony_ci encoding |= opcode << 23; 116bf215546Sopenharmony_ci encoding |= !instr->definitions.empty() ? instr->definitions[0].physReg() << 16 : 0; 117bf215546Sopenharmony_ci encoding |= instr->operands.size() >= 2 ? instr->operands[1].physReg() << 8 : 0; 118bf215546Sopenharmony_ci encoding |= !instr->operands.empty() ? instr->operands[0].physReg() : 0; 119bf215546Sopenharmony_ci out.push_back(encoding); 120bf215546Sopenharmony_ci break; 121bf215546Sopenharmony_ci } 122bf215546Sopenharmony_ci case Format::SOPK: { 123bf215546Sopenharmony_ci SOPK_instruction& sopk = instr->sopk(); 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::s_subvector_loop_begin) { 126bf215546Sopenharmony_ci assert(ctx.gfx_level >= GFX10); 127bf215546Sopenharmony_ci assert(ctx.subvector_begin_pos == -1); 128bf215546Sopenharmony_ci ctx.subvector_begin_pos = out.size(); 129bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::s_subvector_loop_end) { 130bf215546Sopenharmony_ci assert(ctx.gfx_level >= GFX10); 131bf215546Sopenharmony_ci assert(ctx.subvector_begin_pos != -1); 132bf215546Sopenharmony_ci /* Adjust s_subvector_loop_begin instruction to the address after the end */ 133bf215546Sopenharmony_ci out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos); 134bf215546Sopenharmony_ci /* Adjust s_subvector_loop_end instruction to the address after the beginning */ 135bf215546Sopenharmony_ci sopk.imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size()); 136bf215546Sopenharmony_ci ctx.subvector_begin_pos = -1; 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci uint32_t encoding = (0b1011 << 28); 140bf215546Sopenharmony_ci encoding |= opcode << 23; 141bf215546Sopenharmony_ci encoding |= !instr->definitions.empty() && !(instr->definitions[0].physReg() == scc) 142bf215546Sopenharmony_ci ? instr->definitions[0].physReg() << 16 143bf215546Sopenharmony_ci : !instr->operands.empty() && instr->operands[0].physReg() <= 127 144bf215546Sopenharmony_ci ? instr->operands[0].physReg() << 16 145bf215546Sopenharmony_ci : 0; 146bf215546Sopenharmony_ci encoding |= sopk.imm; 147bf215546Sopenharmony_ci out.push_back(encoding); 148bf215546Sopenharmony_ci break; 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci case Format::SOP1: { 151bf215546Sopenharmony_ci uint32_t encoding = (0b101111101 << 23); 152bf215546Sopenharmony_ci encoding |= !instr->definitions.empty() ? instr->definitions[0].physReg() << 16 : 0; 153bf215546Sopenharmony_ci encoding |= opcode << 8; 154bf215546Sopenharmony_ci encoding |= !instr->operands.empty() ? instr->operands[0].physReg() : 0; 155bf215546Sopenharmony_ci out.push_back(encoding); 156bf215546Sopenharmony_ci break; 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci case Format::SOPC: { 159bf215546Sopenharmony_ci uint32_t encoding = (0b101111110 << 23); 160bf215546Sopenharmony_ci encoding |= opcode << 16; 161bf215546Sopenharmony_ci encoding |= instr->operands.size() == 2 ? instr->operands[1].physReg() << 8 : 0; 162bf215546Sopenharmony_ci encoding |= !instr->operands.empty() ? instr->operands[0].physReg() : 0; 163bf215546Sopenharmony_ci out.push_back(encoding); 164bf215546Sopenharmony_ci break; 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci case Format::SOPP: { 167bf215546Sopenharmony_ci SOPP_instruction& sopp = instr->sopp(); 168bf215546Sopenharmony_ci uint32_t encoding = (0b101111111 << 23); 169bf215546Sopenharmony_ci encoding |= opcode << 16; 170bf215546Sopenharmony_ci encoding |= (uint16_t)sopp.imm; 171bf215546Sopenharmony_ci if (sopp.block != -1) { 172bf215546Sopenharmony_ci sopp.pass_flags = 0; 173bf215546Sopenharmony_ci ctx.branches.emplace_back(out.size(), &sopp); 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci out.push_back(encoding); 176bf215546Sopenharmony_ci break; 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci case Format::SMEM: { 179bf215546Sopenharmony_ci SMEM_instruction& smem = instr->smem(); 180bf215546Sopenharmony_ci bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4); 181bf215546Sopenharmony_ci bool is_load = !instr->definitions.empty(); 182bf215546Sopenharmony_ci uint32_t encoding = 0; 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci if (ctx.gfx_level <= GFX7) { 185bf215546Sopenharmony_ci encoding = (0b11000 << 27); 186bf215546Sopenharmony_ci encoding |= opcode << 22; 187bf215546Sopenharmony_ci encoding |= instr->definitions.size() ? instr->definitions[0].physReg() << 15 : 0; 188bf215546Sopenharmony_ci encoding |= instr->operands.size() ? (instr->operands[0].physReg() >> 1) << 9 : 0; 189bf215546Sopenharmony_ci if (instr->operands.size() >= 2) { 190bf215546Sopenharmony_ci if (!instr->operands[1].isConstant()) { 191bf215546Sopenharmony_ci encoding |= instr->operands[1].physReg().reg(); 192bf215546Sopenharmony_ci } else if (instr->operands[1].constantValue() >= 1024) { 193bf215546Sopenharmony_ci encoding |= 255; /* SQ_SRC_LITERAL */ 194bf215546Sopenharmony_ci } else { 195bf215546Sopenharmony_ci encoding |= instr->operands[1].constantValue() >> 2; 196bf215546Sopenharmony_ci encoding |= 1 << 8; 197bf215546Sopenharmony_ci } 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci out.push_back(encoding); 200bf215546Sopenharmony_ci /* SMRD instructions can take a literal on GFX7 */ 201bf215546Sopenharmony_ci if (instr->operands.size() >= 2 && instr->operands[1].isConstant() && 202bf215546Sopenharmony_ci instr->operands[1].constantValue() >= 1024) 203bf215546Sopenharmony_ci out.push_back(instr->operands[1].constantValue() >> 2); 204bf215546Sopenharmony_ci return; 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci if (ctx.gfx_level <= GFX9) { 208bf215546Sopenharmony_ci encoding = (0b110000 << 26); 209bf215546Sopenharmony_ci assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */ 210bf215546Sopenharmony_ci encoding |= smem.nv ? 1 << 15 : 0; 211bf215546Sopenharmony_ci } else { 212bf215546Sopenharmony_ci encoding = (0b111101 << 26); 213bf215546Sopenharmony_ci assert(!smem.nv); /* Non-volatile is not supported on GFX10 */ 214bf215546Sopenharmony_ci encoding |= smem.dlc ? 1 << 14 : 0; 215bf215546Sopenharmony_ci } 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci encoding |= opcode << 18; 218bf215546Sopenharmony_ci encoding |= smem.glc ? 1 << 16 : 0; 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci if (ctx.gfx_level <= GFX9) { 221bf215546Sopenharmony_ci if (instr->operands.size() >= 2) 222bf215546Sopenharmony_ci encoding |= instr->operands[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */ 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci if (ctx.gfx_level == GFX9) { 225bf215546Sopenharmony_ci encoding |= soe ? 1 << 14 : 0; 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci if (is_load || instr->operands.size() >= 3) { /* SDATA */ 229bf215546Sopenharmony_ci encoding |= (is_load ? instr->definitions[0].physReg() : instr->operands[2].physReg()) 230bf215546Sopenharmony_ci << 6; 231bf215546Sopenharmony_ci } 232bf215546Sopenharmony_ci if (instr->operands.size() >= 1) { /* SBASE */ 233bf215546Sopenharmony_ci encoding |= instr->operands[0].physReg() >> 1; 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci out.push_back(encoding); 237bf215546Sopenharmony_ci encoding = 0; 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci int32_t offset = 0; 240bf215546Sopenharmony_ci uint32_t soffset = ctx.gfx_level >= GFX10 241bf215546Sopenharmony_ci ? sgpr_null /* On GFX10 this is disabled by specifying SGPR_NULL */ 242bf215546Sopenharmony_ci : 0; /* On GFX9, it is disabled by the SOE bit (and it's not present on 243bf215546Sopenharmony_ci GFX8 and below) */ 244bf215546Sopenharmony_ci if (instr->operands.size() >= 2) { 245bf215546Sopenharmony_ci const Operand& op_off1 = instr->operands[1]; 246bf215546Sopenharmony_ci if (ctx.gfx_level <= GFX9) { 247bf215546Sopenharmony_ci offset = op_off1.isConstant() ? op_off1.constantValue() : op_off1.physReg(); 248bf215546Sopenharmony_ci } else { 249bf215546Sopenharmony_ci /* GFX10 only supports constants in OFFSET, so put the operand in SOFFSET if it's an 250bf215546Sopenharmony_ci * SGPR */ 251bf215546Sopenharmony_ci if (op_off1.isConstant()) { 252bf215546Sopenharmony_ci offset = op_off1.constantValue(); 253bf215546Sopenharmony_ci } else { 254bf215546Sopenharmony_ci soffset = op_off1.physReg(); 255bf215546Sopenharmony_ci assert(!soe); /* There is no place to put the other SGPR offset, if any */ 256bf215546Sopenharmony_ci } 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci if (soe) { 260bf215546Sopenharmony_ci const Operand& op_off2 = instr->operands.back(); 261bf215546Sopenharmony_ci assert(ctx.gfx_level >= GFX9); /* GFX8 and below don't support specifying a constant 262bf215546Sopenharmony_ci and an SGPR at the same time */ 263bf215546Sopenharmony_ci assert(!op_off2.isConstant()); 264bf215546Sopenharmony_ci soffset = op_off2.physReg(); 265bf215546Sopenharmony_ci } 266bf215546Sopenharmony_ci } 267bf215546Sopenharmony_ci encoding |= offset; 268bf215546Sopenharmony_ci encoding |= soffset << 25; 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci out.push_back(encoding); 271bf215546Sopenharmony_ci return; 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci case Format::VOP2: { 274bf215546Sopenharmony_ci uint32_t encoding = 0; 275bf215546Sopenharmony_ci encoding |= opcode << 25; 276bf215546Sopenharmony_ci encoding |= (0xFF & instr->definitions[0].physReg()) << 17; 277bf215546Sopenharmony_ci encoding |= (0xFF & instr->operands[1].physReg()) << 9; 278bf215546Sopenharmony_ci encoding |= instr->operands[0].physReg(); 279bf215546Sopenharmony_ci out.push_back(encoding); 280bf215546Sopenharmony_ci break; 281bf215546Sopenharmony_ci } 282bf215546Sopenharmony_ci case Format::VOP1: { 283bf215546Sopenharmony_ci uint32_t encoding = (0b0111111 << 25); 284bf215546Sopenharmony_ci if (!instr->definitions.empty()) 285bf215546Sopenharmony_ci encoding |= (0xFF & instr->definitions[0].physReg()) << 17; 286bf215546Sopenharmony_ci encoding |= opcode << 9; 287bf215546Sopenharmony_ci if (!instr->operands.empty()) 288bf215546Sopenharmony_ci encoding |= instr->operands[0].physReg(); 289bf215546Sopenharmony_ci out.push_back(encoding); 290bf215546Sopenharmony_ci break; 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci case Format::VOPC: { 293bf215546Sopenharmony_ci uint32_t encoding = (0b0111110 << 25); 294bf215546Sopenharmony_ci encoding |= opcode << 17; 295bf215546Sopenharmony_ci encoding |= (0xFF & instr->operands[1].physReg()) << 9; 296bf215546Sopenharmony_ci encoding |= instr->operands[0].physReg(); 297bf215546Sopenharmony_ci out.push_back(encoding); 298bf215546Sopenharmony_ci break; 299bf215546Sopenharmony_ci } 300bf215546Sopenharmony_ci case Format::VINTRP: { 301bf215546Sopenharmony_ci Interp_instruction& interp = instr->vintrp(); 302bf215546Sopenharmony_ci uint32_t encoding = 0; 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_interp_p1ll_f16 || 305bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_interp_p1lv_f16 || 306bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || 307bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_interp_p2_f16) { 308bf215546Sopenharmony_ci if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) { 309bf215546Sopenharmony_ci encoding = (0b110100 << 26); 310bf215546Sopenharmony_ci } else if (ctx.gfx_level >= GFX10) { 311bf215546Sopenharmony_ci encoding = (0b110101 << 26); 312bf215546Sopenharmony_ci } else { 313bf215546Sopenharmony_ci unreachable("Unknown gfx_level."); 314bf215546Sopenharmony_ci } 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci encoding |= opcode << 16; 317bf215546Sopenharmony_ci encoding |= (0xFF & instr->definitions[0].physReg()); 318bf215546Sopenharmony_ci out.push_back(encoding); 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci encoding = 0; 321bf215546Sopenharmony_ci encoding |= interp.attribute; 322bf215546Sopenharmony_ci encoding |= interp.component << 6; 323bf215546Sopenharmony_ci encoding |= instr->operands[0].physReg() << 9; 324bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_interp_p2_f16 || 325bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || 326bf215546Sopenharmony_ci instr->opcode == aco_opcode::v_interp_p1lv_f16) { 327bf215546Sopenharmony_ci encoding |= instr->operands[2].physReg() << 18; 328bf215546Sopenharmony_ci } 329bf215546Sopenharmony_ci out.push_back(encoding); 330bf215546Sopenharmony_ci } else { 331bf215546Sopenharmony_ci if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) { 332bf215546Sopenharmony_ci encoding = (0b110101 << 26); /* Vega ISA doc says 110010 but it's wrong */ 333bf215546Sopenharmony_ci } else { 334bf215546Sopenharmony_ci encoding = (0b110010 << 26); 335bf215546Sopenharmony_ci } 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci assert(encoding); 338bf215546Sopenharmony_ci encoding |= (0xFF & instr->definitions[0].physReg()) << 18; 339bf215546Sopenharmony_ci encoding |= opcode << 16; 340bf215546Sopenharmony_ci encoding |= interp.attribute << 10; 341bf215546Sopenharmony_ci encoding |= interp.component << 8; 342bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_interp_mov_f32) 343bf215546Sopenharmony_ci encoding |= (0x3 & instr->operands[0].constantValue()); 344bf215546Sopenharmony_ci else 345bf215546Sopenharmony_ci encoding |= (0xFF & instr->operands[0].physReg()); 346bf215546Sopenharmony_ci out.push_back(encoding); 347bf215546Sopenharmony_ci } 348bf215546Sopenharmony_ci break; 349bf215546Sopenharmony_ci } 350bf215546Sopenharmony_ci case Format::DS: { 351bf215546Sopenharmony_ci DS_instruction& ds = instr->ds(); 352bf215546Sopenharmony_ci uint32_t encoding = (0b110110 << 26); 353bf215546Sopenharmony_ci if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) { 354bf215546Sopenharmony_ci encoding |= opcode << 17; 355bf215546Sopenharmony_ci encoding |= (ds.gds ? 1 : 0) << 16; 356bf215546Sopenharmony_ci } else { 357bf215546Sopenharmony_ci encoding |= opcode << 18; 358bf215546Sopenharmony_ci encoding |= (ds.gds ? 1 : 0) << 17; 359bf215546Sopenharmony_ci } 360bf215546Sopenharmony_ci encoding |= ((0xFF & ds.offset1) << 8); 361bf215546Sopenharmony_ci encoding |= (0xFFFF & ds.offset0); 362bf215546Sopenharmony_ci out.push_back(encoding); 363bf215546Sopenharmony_ci encoding = 0; 364bf215546Sopenharmony_ci unsigned reg = !instr->definitions.empty() ? instr->definitions[0].physReg() : 0; 365bf215546Sopenharmony_ci encoding |= (0xFF & reg) << 24; 366bf215546Sopenharmony_ci reg = instr->operands.size() >= 3 && !(instr->operands[2].physReg() == m0) 367bf215546Sopenharmony_ci ? instr->operands[2].physReg() 368bf215546Sopenharmony_ci : 0; 369bf215546Sopenharmony_ci encoding |= (0xFF & reg) << 16; 370bf215546Sopenharmony_ci reg = instr->operands.size() >= 2 && !(instr->operands[1].physReg() == m0) 371bf215546Sopenharmony_ci ? instr->operands[1].physReg() 372bf215546Sopenharmony_ci : 0; 373bf215546Sopenharmony_ci encoding |= (0xFF & reg) << 8; 374bf215546Sopenharmony_ci encoding |= (0xFF & instr->operands[0].physReg()); 375bf215546Sopenharmony_ci out.push_back(encoding); 376bf215546Sopenharmony_ci break; 377bf215546Sopenharmony_ci } 378bf215546Sopenharmony_ci case Format::MUBUF: { 379bf215546Sopenharmony_ci MUBUF_instruction& mubuf = instr->mubuf(); 380bf215546Sopenharmony_ci uint32_t encoding = (0b111000 << 26); 381bf215546Sopenharmony_ci encoding |= opcode << 18; 382bf215546Sopenharmony_ci encoding |= (mubuf.lds ? 1 : 0) << 16; 383bf215546Sopenharmony_ci encoding |= (mubuf.glc ? 1 : 0) << 14; 384bf215546Sopenharmony_ci encoding |= (mubuf.idxen ? 1 : 0) << 13; 385bf215546Sopenharmony_ci assert(!mubuf.addr64 || ctx.gfx_level <= GFX7); 386bf215546Sopenharmony_ci if (ctx.gfx_level == GFX6 || ctx.gfx_level == GFX7) 387bf215546Sopenharmony_ci encoding |= (mubuf.addr64 ? 1 : 0) << 15; 388bf215546Sopenharmony_ci encoding |= (mubuf.offen ? 1 : 0) << 12; 389bf215546Sopenharmony_ci if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) { 390bf215546Sopenharmony_ci assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */ 391bf215546Sopenharmony_ci encoding |= (mubuf.slc ? 1 : 0) << 17; 392bf215546Sopenharmony_ci } else if (ctx.gfx_level >= GFX10) { 393bf215546Sopenharmony_ci encoding |= (mubuf.dlc ? 1 : 0) << 15; 394bf215546Sopenharmony_ci } 395bf215546Sopenharmony_ci encoding |= 0x0FFF & mubuf.offset; 396bf215546Sopenharmony_ci out.push_back(encoding); 397bf215546Sopenharmony_ci encoding = 0; 398bf215546Sopenharmony_ci if (ctx.gfx_level <= GFX7 || ctx.gfx_level >= GFX10) { 399bf215546Sopenharmony_ci encoding |= (mubuf.slc ? 1 : 0) << 22; 400bf215546Sopenharmony_ci } 401bf215546Sopenharmony_ci encoding |= instr->operands[2].physReg() << 24; 402bf215546Sopenharmony_ci encoding |= (mubuf.tfe ? 1 : 0) << 23; 403bf215546Sopenharmony_ci encoding |= (instr->operands[0].physReg() >> 2) << 16; 404bf215546Sopenharmony_ci unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() 405bf215546Sopenharmony_ci : instr->definitions[0].physReg(); 406bf215546Sopenharmony_ci encoding |= (0xFF & reg) << 8; 407bf215546Sopenharmony_ci encoding |= (0xFF & instr->operands[1].physReg()); 408bf215546Sopenharmony_ci out.push_back(encoding); 409bf215546Sopenharmony_ci break; 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci case Format::MTBUF: { 412bf215546Sopenharmony_ci MTBUF_instruction& mtbuf = instr->mtbuf(); 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci uint32_t img_format = ac_get_tbuffer_format(ctx.gfx_level, mtbuf.dfmt, mtbuf.nfmt); 415bf215546Sopenharmony_ci uint32_t encoding = (0b111010 << 26); 416bf215546Sopenharmony_ci assert(img_format <= 0x7F); 417bf215546Sopenharmony_ci assert(!mtbuf.dlc || ctx.gfx_level >= GFX10); 418bf215546Sopenharmony_ci encoding |= (mtbuf.dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */ 419bf215546Sopenharmony_ci encoding |= (mtbuf.glc ? 1 : 0) << 14; 420bf215546Sopenharmony_ci encoding |= (mtbuf.idxen ? 1 : 0) << 13; 421bf215546Sopenharmony_ci encoding |= (mtbuf.offen ? 1 : 0) << 12; 422bf215546Sopenharmony_ci encoding |= 0x0FFF & mtbuf.offset; 423bf215546Sopenharmony_ci encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */ 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) { 426bf215546Sopenharmony_ci encoding |= opcode << 15; 427bf215546Sopenharmony_ci } else { 428bf215546Sopenharmony_ci encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */ 429bf215546Sopenharmony_ci } 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci out.push_back(encoding); 432bf215546Sopenharmony_ci encoding = 0; 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_ci encoding |= instr->operands[2].physReg() << 24; 435bf215546Sopenharmony_ci encoding |= (mtbuf.tfe ? 1 : 0) << 23; 436bf215546Sopenharmony_ci encoding |= (mtbuf.slc ? 1 : 0) << 22; 437bf215546Sopenharmony_ci encoding |= (instr->operands[0].physReg() >> 2) << 16; 438bf215546Sopenharmony_ci unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() 439bf215546Sopenharmony_ci : instr->definitions[0].physReg(); 440bf215546Sopenharmony_ci encoding |= (0xFF & reg) << 8; 441bf215546Sopenharmony_ci encoding |= (0xFF & instr->operands[1].physReg()); 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci if (ctx.gfx_level >= GFX10) { 444bf215546Sopenharmony_ci encoding |= (((opcode & 0x08) >> 3) << 21); /* MSB of 4-bit OPCODE */ 445bf215546Sopenharmony_ci } 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci out.push_back(encoding); 448bf215546Sopenharmony_ci break; 449bf215546Sopenharmony_ci } 450bf215546Sopenharmony_ci case Format::MIMG: { 451bf215546Sopenharmony_ci unsigned nsa_dwords = get_mimg_nsa_dwords(instr); 452bf215546Sopenharmony_ci assert(!nsa_dwords || ctx.gfx_level >= GFX10); 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci MIMG_instruction& mimg = instr->mimg(); 455bf215546Sopenharmony_ci uint32_t encoding = (0b111100 << 26); 456bf215546Sopenharmony_ci encoding |= mimg.slc ? 1 << 25 : 0; 457bf215546Sopenharmony_ci encoding |= (opcode & 0x7f) << 18; 458bf215546Sopenharmony_ci encoding |= (opcode >> 7) & 1; 459bf215546Sopenharmony_ci encoding |= mimg.lwe ? 1 << 17 : 0; 460bf215546Sopenharmony_ci encoding |= mimg.tfe ? 1 << 16 : 0; 461bf215546Sopenharmony_ci encoding |= mimg.glc ? 1 << 13 : 0; 462bf215546Sopenharmony_ci encoding |= mimg.unrm ? 1 << 12 : 0; 463bf215546Sopenharmony_ci if (ctx.gfx_level <= GFX9) { 464bf215546Sopenharmony_ci assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */ 465bf215546Sopenharmony_ci assert(!mimg.r128); 466bf215546Sopenharmony_ci encoding |= mimg.a16 ? 1 << 15 : 0; 467bf215546Sopenharmony_ci encoding |= mimg.da ? 1 << 14 : 0; 468bf215546Sopenharmony_ci } else { 469bf215546Sopenharmony_ci encoding |= mimg.r128 ? 1 << 15 470bf215546Sopenharmony_ci : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */ 471bf215546Sopenharmony_ci encoding |= nsa_dwords << 1; 472bf215546Sopenharmony_ci encoding |= mimg.dim << 3; /* GFX10: dimensionality instead of declare array */ 473bf215546Sopenharmony_ci encoding |= mimg.dlc ? 1 << 7 : 0; 474bf215546Sopenharmony_ci } 475bf215546Sopenharmony_ci encoding |= (0xF & mimg.dmask) << 8; 476bf215546Sopenharmony_ci out.push_back(encoding); 477bf215546Sopenharmony_ci encoding = (0xFF & instr->operands[3].physReg()); /* VADDR */ 478bf215546Sopenharmony_ci if (!instr->definitions.empty()) { 479bf215546Sopenharmony_ci encoding |= (0xFF & instr->definitions[0].physReg()) << 8; /* VDATA */ 480bf215546Sopenharmony_ci } else if (!instr->operands[2].isUndefined()) { 481bf215546Sopenharmony_ci encoding |= (0xFF & instr->operands[2].physReg()) << 8; /* VDATA */ 482bf215546Sopenharmony_ci } 483bf215546Sopenharmony_ci encoding |= (0x1F & (instr->operands[0].physReg() >> 2)) << 16; /* T# (resource) */ 484bf215546Sopenharmony_ci if (!instr->operands[1].isUndefined()) 485bf215546Sopenharmony_ci encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */ 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci assert(!mimg.d16 || ctx.gfx_level >= GFX9); 488bf215546Sopenharmony_ci encoding |= mimg.d16 ? 1 << 31 : 0; 489bf215546Sopenharmony_ci if (ctx.gfx_level >= GFX10) { 490bf215546Sopenharmony_ci /* GFX10: A16 still exists, but is in a different place */ 491bf215546Sopenharmony_ci encoding |= mimg.a16 ? 1 << 30 : 0; 492bf215546Sopenharmony_ci } 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci out.push_back(encoding); 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci if (nsa_dwords) { 497bf215546Sopenharmony_ci out.resize(out.size() + nsa_dwords); 498bf215546Sopenharmony_ci std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords); 499bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size() - 4u; i++) 500bf215546Sopenharmony_ci nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8); 501bf215546Sopenharmony_ci } 502bf215546Sopenharmony_ci break; 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci case Format::FLAT: 505bf215546Sopenharmony_ci case Format::SCRATCH: 506bf215546Sopenharmony_ci case Format::GLOBAL: { 507bf215546Sopenharmony_ci FLAT_instruction& flat = instr->flatlike(); 508bf215546Sopenharmony_ci uint32_t encoding = (0b110111 << 26); 509bf215546Sopenharmony_ci encoding |= opcode << 18; 510bf215546Sopenharmony_ci if (ctx.gfx_level == GFX9 || ctx.gfx_level >= GFX11) { 511bf215546Sopenharmony_ci if (instr->isFlat()) 512bf215546Sopenharmony_ci assert(flat.offset <= 0xfff); 513bf215546Sopenharmony_ci else 514bf215546Sopenharmony_ci assert(flat.offset >= -4096 && flat.offset < 4096); 515bf215546Sopenharmony_ci encoding |= flat.offset & 0x1fff; 516bf215546Sopenharmony_ci } else if (ctx.gfx_level <= GFX8 || instr->isFlat()) { 517bf215546Sopenharmony_ci /* GFX10 has a 12-bit immediate OFFSET field, 518bf215546Sopenharmony_ci * but it has a hw bug: it ignores the offset, called FlatSegmentOffsetBug 519bf215546Sopenharmony_ci */ 520bf215546Sopenharmony_ci assert(flat.offset == 0); 521bf215546Sopenharmony_ci } else { 522bf215546Sopenharmony_ci assert(flat.offset >= -2048 && flat.offset <= 2047); 523bf215546Sopenharmony_ci encoding |= flat.offset & 0xfff; 524bf215546Sopenharmony_ci } 525bf215546Sopenharmony_ci if (instr->isScratch()) 526bf215546Sopenharmony_ci encoding |= 1 << 14; 527bf215546Sopenharmony_ci else if (instr->isGlobal()) 528bf215546Sopenharmony_ci encoding |= 2 << 14; 529bf215546Sopenharmony_ci encoding |= flat.lds ? 1 << 13 : 0; 530bf215546Sopenharmony_ci encoding |= flat.glc ? 1 << 16 : 0; 531bf215546Sopenharmony_ci encoding |= flat.slc ? 1 << 17 : 0; 532bf215546Sopenharmony_ci if (ctx.gfx_level >= GFX10) { 533bf215546Sopenharmony_ci assert(!flat.nv); 534bf215546Sopenharmony_ci encoding |= flat.dlc ? 1 << 12 : 0; 535bf215546Sopenharmony_ci } else { 536bf215546Sopenharmony_ci assert(!flat.dlc); 537bf215546Sopenharmony_ci } 538bf215546Sopenharmony_ci out.push_back(encoding); 539bf215546Sopenharmony_ci encoding = (0xFF & instr->operands[0].physReg()); 540bf215546Sopenharmony_ci if (!instr->definitions.empty()) 541bf215546Sopenharmony_ci encoding |= (0xFF & instr->definitions[0].physReg()) << 24; 542bf215546Sopenharmony_ci if (instr->operands.size() >= 3) 543bf215546Sopenharmony_ci encoding |= (0xFF & instr->operands[2].physReg()) << 8; 544bf215546Sopenharmony_ci if (!instr->operands[1].isUndefined()) { 545bf215546Sopenharmony_ci assert(ctx.gfx_level >= GFX10 || instr->operands[1].physReg() != 0x7F); 546bf215546Sopenharmony_ci assert(instr->format != Format::FLAT); 547bf215546Sopenharmony_ci encoding |= instr->operands[1].physReg() << 16; 548bf215546Sopenharmony_ci } else if (instr->format != Format::FLAT || 549bf215546Sopenharmony_ci ctx.gfx_level >= GFX10) { /* SADDR is actually used with FLAT on GFX10 */ 550bf215546Sopenharmony_ci /* For GFX10.3 scratch, 0x7F disables both ADDR and SADDR, unlike sgpr_null, which only 551bf215546Sopenharmony_ci * disables SADDR. 552bf215546Sopenharmony_ci */ 553bf215546Sopenharmony_ci if (ctx.gfx_level <= GFX9 || 554bf215546Sopenharmony_ci (instr->format == Format::SCRATCH && instr->operands[0].isUndefined())) 555bf215546Sopenharmony_ci encoding |= 0x7F << 16; 556bf215546Sopenharmony_ci else 557bf215546Sopenharmony_ci encoding |= sgpr_null << 16; 558bf215546Sopenharmony_ci } 559bf215546Sopenharmony_ci encoding |= flat.nv ? 1 << 23 : 0; 560bf215546Sopenharmony_ci out.push_back(encoding); 561bf215546Sopenharmony_ci break; 562bf215546Sopenharmony_ci } 563bf215546Sopenharmony_ci case Format::EXP: { 564bf215546Sopenharmony_ci Export_instruction& exp = instr->exp(); 565bf215546Sopenharmony_ci uint32_t encoding; 566bf215546Sopenharmony_ci if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) { 567bf215546Sopenharmony_ci encoding = (0b110001 << 26); 568bf215546Sopenharmony_ci } else { 569bf215546Sopenharmony_ci encoding = (0b111110 << 26); 570bf215546Sopenharmony_ci } 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci encoding |= exp.valid_mask ? 0b1 << 12 : 0; 573bf215546Sopenharmony_ci encoding |= exp.done ? 0b1 << 11 : 0; 574bf215546Sopenharmony_ci encoding |= exp.compressed ? 0b1 << 10 : 0; 575bf215546Sopenharmony_ci encoding |= exp.dest << 4; 576bf215546Sopenharmony_ci encoding |= exp.enabled_mask; 577bf215546Sopenharmony_ci out.push_back(encoding); 578bf215546Sopenharmony_ci encoding = 0xFF & exp.operands[0].physReg(); 579bf215546Sopenharmony_ci encoding |= (0xFF & exp.operands[1].physReg()) << 8; 580bf215546Sopenharmony_ci encoding |= (0xFF & exp.operands[2].physReg()) << 16; 581bf215546Sopenharmony_ci encoding |= (0xFF & exp.operands[3].physReg()) << 24; 582bf215546Sopenharmony_ci out.push_back(encoding); 583bf215546Sopenharmony_ci break; 584bf215546Sopenharmony_ci } 585bf215546Sopenharmony_ci case Format::PSEUDO: 586bf215546Sopenharmony_ci case Format::PSEUDO_BARRIER: 587bf215546Sopenharmony_ci if (instr->opcode != aco_opcode::p_unit_test) 588bf215546Sopenharmony_ci unreachable("Pseudo instructions should be lowered before assembly."); 589bf215546Sopenharmony_ci break; 590bf215546Sopenharmony_ci default: 591bf215546Sopenharmony_ci if (instr->isVOP3()) { 592bf215546Sopenharmony_ci VOP3_instruction& vop3 = instr->vop3(); 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci if (instr->isVOP2()) { 595bf215546Sopenharmony_ci opcode = opcode + 0x100; 596bf215546Sopenharmony_ci } else if (instr->isVOP1()) { 597bf215546Sopenharmony_ci if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) 598bf215546Sopenharmony_ci opcode = opcode + 0x140; 599bf215546Sopenharmony_ci else 600bf215546Sopenharmony_ci opcode = opcode + 0x180; 601bf215546Sopenharmony_ci } else if (instr->isVOPC()) { 602bf215546Sopenharmony_ci opcode = opcode + 0x0; 603bf215546Sopenharmony_ci } else if (instr->isVINTRP()) { 604bf215546Sopenharmony_ci opcode = opcode + 0x270; 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci uint32_t encoding; 608bf215546Sopenharmony_ci if (ctx.gfx_level <= GFX9) { 609bf215546Sopenharmony_ci encoding = (0b110100 << 26); 610bf215546Sopenharmony_ci } else if (ctx.gfx_level >= GFX10) { 611bf215546Sopenharmony_ci encoding = (0b110101 << 26); 612bf215546Sopenharmony_ci } else { 613bf215546Sopenharmony_ci unreachable("Unknown gfx_level."); 614bf215546Sopenharmony_ci } 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci if (ctx.gfx_level <= GFX7) { 617bf215546Sopenharmony_ci encoding |= opcode << 17; 618bf215546Sopenharmony_ci encoding |= (vop3.clamp ? 1 : 0) << 11; 619bf215546Sopenharmony_ci } else { 620bf215546Sopenharmony_ci encoding |= opcode << 16; 621bf215546Sopenharmony_ci encoding |= (vop3.clamp ? 1 : 0) << 15; 622bf215546Sopenharmony_ci } 623bf215546Sopenharmony_ci encoding |= vop3.opsel << 11; 624bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; i++) 625bf215546Sopenharmony_ci encoding |= vop3.abs[i] << (8 + i); 626bf215546Sopenharmony_ci if (instr->definitions.size() == 2) 627bf215546Sopenharmony_ci encoding |= instr->definitions[1].physReg() << 8; 628bf215546Sopenharmony_ci encoding |= (0xFF & instr->definitions[0].physReg()); 629bf215546Sopenharmony_ci out.push_back(encoding); 630bf215546Sopenharmony_ci encoding = 0; 631bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::v_interp_mov_f32) { 632bf215546Sopenharmony_ci encoding = 0x3 & instr->operands[0].constantValue(); 633bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::v_writelane_b32_e64) { 634bf215546Sopenharmony_ci encoding |= instr->operands[0].physReg() << 0; 635bf215546Sopenharmony_ci encoding |= instr->operands[1].physReg() << 9; 636bf215546Sopenharmony_ci /* Encoding src2 works fine with hardware but breaks some disassemblers. */ 637bf215546Sopenharmony_ci } else { 638bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) 639bf215546Sopenharmony_ci encoding |= instr->operands[i].physReg() << (i * 9); 640bf215546Sopenharmony_ci } 641bf215546Sopenharmony_ci encoding |= vop3.omod << 27; 642bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; i++) 643bf215546Sopenharmony_ci encoding |= vop3.neg[i] << (29 + i); 644bf215546Sopenharmony_ci out.push_back(encoding); 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_ci } else if (instr->isVOP3P()) { 647bf215546Sopenharmony_ci VOP3P_instruction& vop3 = instr->vop3p(); 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci uint32_t encoding; 650bf215546Sopenharmony_ci if (ctx.gfx_level == GFX9) { 651bf215546Sopenharmony_ci encoding = (0b110100111 << 23); 652bf215546Sopenharmony_ci } else if (ctx.gfx_level >= GFX10) { 653bf215546Sopenharmony_ci encoding = (0b110011 << 26); 654bf215546Sopenharmony_ci } else { 655bf215546Sopenharmony_ci unreachable("Unknown gfx_level."); 656bf215546Sopenharmony_ci } 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci encoding |= opcode << 16; 659bf215546Sopenharmony_ci encoding |= (vop3.clamp ? 1 : 0) << 15; 660bf215546Sopenharmony_ci encoding |= vop3.opsel_lo << 11; 661bf215546Sopenharmony_ci encoding |= ((vop3.opsel_hi & 0x4) ? 1 : 0) << 14; 662bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; i++) 663bf215546Sopenharmony_ci encoding |= vop3.neg_hi[i] << (8 + i); 664bf215546Sopenharmony_ci encoding |= (0xFF & instr->definitions[0].physReg()); 665bf215546Sopenharmony_ci out.push_back(encoding); 666bf215546Sopenharmony_ci encoding = 0; 667bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->operands.size(); i++) 668bf215546Sopenharmony_ci encoding |= instr->operands[i].physReg() << (i * 9); 669bf215546Sopenharmony_ci encoding |= (vop3.opsel_hi & 0x3) << 27; 670bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; i++) 671bf215546Sopenharmony_ci encoding |= vop3.neg_lo[i] << (29 + i); 672bf215546Sopenharmony_ci out.push_back(encoding); 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci } else if (instr->isDPP16()) { 675bf215546Sopenharmony_ci assert(ctx.gfx_level >= GFX8); 676bf215546Sopenharmony_ci DPP16_instruction& dpp = instr->dpp16(); 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci /* first emit the instruction without the DPP operand */ 679bf215546Sopenharmony_ci Operand dpp_op = instr->operands[0]; 680bf215546Sopenharmony_ci instr->operands[0] = Operand(PhysReg{250}, v1); 681bf215546Sopenharmony_ci instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16); 682bf215546Sopenharmony_ci emit_instruction(ctx, out, instr); 683bf215546Sopenharmony_ci uint32_t encoding = (0xF & dpp.row_mask) << 28; 684bf215546Sopenharmony_ci encoding |= (0xF & dpp.bank_mask) << 24; 685bf215546Sopenharmony_ci encoding |= dpp.abs[1] << 23; 686bf215546Sopenharmony_ci encoding |= dpp.neg[1] << 22; 687bf215546Sopenharmony_ci encoding |= dpp.abs[0] << 21; 688bf215546Sopenharmony_ci encoding |= dpp.neg[0] << 20; 689bf215546Sopenharmony_ci if (ctx.gfx_level >= GFX10) 690bf215546Sopenharmony_ci encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */ 691bf215546Sopenharmony_ci encoding |= dpp.bound_ctrl << 19; 692bf215546Sopenharmony_ci encoding |= dpp.dpp_ctrl << 8; 693bf215546Sopenharmony_ci encoding |= (0xFF) & dpp_op.physReg(); 694bf215546Sopenharmony_ci out.push_back(encoding); 695bf215546Sopenharmony_ci return; 696bf215546Sopenharmony_ci } else if (instr->isDPP8()) { 697bf215546Sopenharmony_ci assert(ctx.gfx_level >= GFX10); 698bf215546Sopenharmony_ci DPP8_instruction& dpp = instr->dpp8(); 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci /* first emit the instruction without the DPP operand */ 701bf215546Sopenharmony_ci Operand dpp_op = instr->operands[0]; 702bf215546Sopenharmony_ci instr->operands[0] = Operand(PhysReg{234}, v1); 703bf215546Sopenharmony_ci instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8); 704bf215546Sopenharmony_ci emit_instruction(ctx, out, instr); 705bf215546Sopenharmony_ci uint32_t encoding = (0xFF) & dpp_op.physReg(); 706bf215546Sopenharmony_ci for (unsigned i = 0; i < 8; ++i) 707bf215546Sopenharmony_ci encoding |= dpp.lane_sel[i] << (8 + i * 3); 708bf215546Sopenharmony_ci out.push_back(encoding); 709bf215546Sopenharmony_ci return; 710bf215546Sopenharmony_ci } else if (instr->isSDWA()) { 711bf215546Sopenharmony_ci assert(ctx.gfx_level >= GFX8 && ctx.gfx_level < GFX11); 712bf215546Sopenharmony_ci SDWA_instruction& sdwa = instr->sdwa(); 713bf215546Sopenharmony_ci 714bf215546Sopenharmony_ci /* first emit the instruction without the SDWA operand */ 715bf215546Sopenharmony_ci Operand sdwa_op = instr->operands[0]; 716bf215546Sopenharmony_ci instr->operands[0] = Operand(PhysReg{249}, v1); 717bf215546Sopenharmony_ci instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::SDWA); 718bf215546Sopenharmony_ci emit_instruction(ctx, out, instr); 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci uint32_t encoding = 0; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci if (instr->isVOPC()) { 723bf215546Sopenharmony_ci if (instr->definitions[0].physReg() != vcc) { 724bf215546Sopenharmony_ci encoding |= instr->definitions[0].physReg() << 8; 725bf215546Sopenharmony_ci encoding |= 1 << 15; 726bf215546Sopenharmony_ci } 727bf215546Sopenharmony_ci encoding |= (sdwa.clamp ? 1 : 0) << 13; 728bf215546Sopenharmony_ci } else { 729bf215546Sopenharmony_ci encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8; 730bf215546Sopenharmony_ci uint32_t dst_u = sdwa.dst_sel.sign_extend() ? 1 : 0; 731bf215546Sopenharmony_ci if (instr->definitions[0].bytes() < 4) /* dst_preserve */ 732bf215546Sopenharmony_ci dst_u = 2; 733bf215546Sopenharmony_ci encoding |= dst_u << 11; 734bf215546Sopenharmony_ci encoding |= (sdwa.clamp ? 1 : 0) << 13; 735bf215546Sopenharmony_ci encoding |= sdwa.omod << 14; 736bf215546Sopenharmony_ci } 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci encoding |= sdwa.sel[0].to_sdwa_sel(sdwa_op.physReg().byte()) << 16; 739bf215546Sopenharmony_ci encoding |= sdwa.sel[0].sign_extend() ? 1 << 19 : 0; 740bf215546Sopenharmony_ci encoding |= sdwa.abs[0] << 21; 741bf215546Sopenharmony_ci encoding |= sdwa.neg[0] << 20; 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci if (instr->operands.size() >= 2) { 744bf215546Sopenharmony_ci encoding |= sdwa.sel[1].to_sdwa_sel(instr->operands[1].physReg().byte()) << 24; 745bf215546Sopenharmony_ci encoding |= sdwa.sel[1].sign_extend() ? 1 << 27 : 0; 746bf215546Sopenharmony_ci encoding |= sdwa.abs[1] << 29; 747bf215546Sopenharmony_ci encoding |= sdwa.neg[1] << 28; 748bf215546Sopenharmony_ci } 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci encoding |= 0xFF & sdwa_op.physReg(); 751bf215546Sopenharmony_ci encoding |= (sdwa_op.physReg() < 256) << 23; 752bf215546Sopenharmony_ci if (instr->operands.size() >= 2) 753bf215546Sopenharmony_ci encoding |= (instr->operands[1].physReg() < 256) << 31; 754bf215546Sopenharmony_ci out.push_back(encoding); 755bf215546Sopenharmony_ci } else { 756bf215546Sopenharmony_ci unreachable("unimplemented instruction format"); 757bf215546Sopenharmony_ci } 758bf215546Sopenharmony_ci break; 759bf215546Sopenharmony_ci } 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci /* append literal dword */ 762bf215546Sopenharmony_ci for (const Operand& op : instr->operands) { 763bf215546Sopenharmony_ci if (op.isLiteral()) { 764bf215546Sopenharmony_ci out.push_back(op.constantValue()); 765bf215546Sopenharmony_ci break; 766bf215546Sopenharmony_ci } 767bf215546Sopenharmony_ci } 768bf215546Sopenharmony_ci} 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_civoid 771bf215546Sopenharmony_ciemit_block(asm_context& ctx, std::vector<uint32_t>& out, Block& block) 772bf215546Sopenharmony_ci{ 773bf215546Sopenharmony_ci for (aco_ptr<Instruction>& instr : block.instructions) { 774bf215546Sopenharmony_ci#if 0 775bf215546Sopenharmony_ci int start_idx = out.size(); 776bf215546Sopenharmony_ci std::cerr << "Encoding:\t" << std::endl; 777bf215546Sopenharmony_ci aco_print_instr(&*instr, stderr); 778bf215546Sopenharmony_ci std::cerr << std::endl; 779bf215546Sopenharmony_ci#endif 780bf215546Sopenharmony_ci emit_instruction(ctx, out, instr.get()); 781bf215546Sopenharmony_ci#if 0 782bf215546Sopenharmony_ci for (int i = start_idx; i < out.size(); i++) 783bf215546Sopenharmony_ci std::cerr << "encoding: " << "0x" << std::setfill('0') << std::setw(8) << std::hex << out[i] << std::endl; 784bf215546Sopenharmony_ci#endif 785bf215546Sopenharmony_ci } 786bf215546Sopenharmony_ci} 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_civoid 789bf215546Sopenharmony_cifix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program) 790bf215546Sopenharmony_ci{ 791bf215546Sopenharmony_ci bool exported = false; 792bf215546Sopenharmony_ci for (Block& block : program->blocks) { 793bf215546Sopenharmony_ci if (!(block.kind & block_kind_export_end)) 794bf215546Sopenharmony_ci continue; 795bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>>::reverse_iterator it = block.instructions.rbegin(); 796bf215546Sopenharmony_ci while (it != block.instructions.rend()) { 797bf215546Sopenharmony_ci if ((*it)->isEXP()) { 798bf215546Sopenharmony_ci Export_instruction& exp = (*it)->exp(); 799bf215546Sopenharmony_ci if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) { 800bf215546Sopenharmony_ci if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) { 801bf215546Sopenharmony_ci exp.done = true; 802bf215546Sopenharmony_ci exported = true; 803bf215546Sopenharmony_ci break; 804bf215546Sopenharmony_ci } 805bf215546Sopenharmony_ci } else { 806bf215546Sopenharmony_ci if (!program->info.ps.has_epilog) { 807bf215546Sopenharmony_ci exp.done = true; 808bf215546Sopenharmony_ci exp.valid_mask = true; 809bf215546Sopenharmony_ci } 810bf215546Sopenharmony_ci exported = true; 811bf215546Sopenharmony_ci break; 812bf215546Sopenharmony_ci } 813bf215546Sopenharmony_ci } else if ((*it)->definitions.size() && (*it)->definitions[0].physReg() == exec) { 814bf215546Sopenharmony_ci break; 815bf215546Sopenharmony_ci } else if ((*it)->opcode == aco_opcode::s_setpc_b64) { 816bf215546Sopenharmony_ci /* Do not abort if the main FS has an epilog because it only 817bf215546Sopenharmony_ci * exports MRTZ (if present) and the epilog exports colors. 818bf215546Sopenharmony_ci */ 819bf215546Sopenharmony_ci exported |= program->stage.hw == HWStage::FS && program->info.ps.has_epilog; 820bf215546Sopenharmony_ci } 821bf215546Sopenharmony_ci ++it; 822bf215546Sopenharmony_ci } 823bf215546Sopenharmony_ci } 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci if (!exported) { 826bf215546Sopenharmony_ci /* Abort in order to avoid a GPU hang. */ 827bf215546Sopenharmony_ci bool is_vertex_or_ngg = 828bf215546Sopenharmony_ci (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG); 829bf215546Sopenharmony_ci aco_err(program, 830bf215546Sopenharmony_ci "Missing export in %s shader:", is_vertex_or_ngg ? "vertex or NGG" : "fragment"); 831bf215546Sopenharmony_ci aco_print_program(program, stderr); 832bf215546Sopenharmony_ci abort(); 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci} 835bf215546Sopenharmony_ci 836bf215546Sopenharmony_cistatic void 837bf215546Sopenharmony_ciinsert_code(asm_context& ctx, std::vector<uint32_t>& out, unsigned insert_before, 838bf215546Sopenharmony_ci unsigned insert_count, const uint32_t* insert_data) 839bf215546Sopenharmony_ci{ 840bf215546Sopenharmony_ci out.insert(out.begin() + insert_before, insert_data, insert_data + insert_count); 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci /* Update the offset of each affected block */ 843bf215546Sopenharmony_ci for (Block& block : ctx.program->blocks) { 844bf215546Sopenharmony_ci if (block.offset >= insert_before) 845bf215546Sopenharmony_ci block.offset += insert_count; 846bf215546Sopenharmony_ci } 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci /* Find first branch after the inserted code */ 849bf215546Sopenharmony_ci auto branch_it = std::find_if(ctx.branches.begin(), ctx.branches.end(), 850bf215546Sopenharmony_ci [insert_before](const auto& branch) -> bool 851bf215546Sopenharmony_ci { return (unsigned)branch.first >= insert_before; }); 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci /* Update the locations of branches */ 854bf215546Sopenharmony_ci for (; branch_it != ctx.branches.end(); ++branch_it) 855bf215546Sopenharmony_ci branch_it->first += insert_count; 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci /* Update the locations of p_constaddr instructions */ 858bf215546Sopenharmony_ci for (auto& constaddr : ctx.constaddrs) { 859bf215546Sopenharmony_ci constaddr_info& info = constaddr.second; 860bf215546Sopenharmony_ci if (info.getpc_end >= insert_before) 861bf215546Sopenharmony_ci info.getpc_end += insert_count; 862bf215546Sopenharmony_ci if (info.add_literal >= insert_before) 863bf215546Sopenharmony_ci info.add_literal += insert_count; 864bf215546Sopenharmony_ci } 865bf215546Sopenharmony_ci} 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_cistatic void 868bf215546Sopenharmony_cifix_branches_gfx10(asm_context& ctx, std::vector<uint32_t>& out) 869bf215546Sopenharmony_ci{ 870bf215546Sopenharmony_ci /* Branches with an offset of 0x3f are buggy on GFX10, 871bf215546Sopenharmony_ci * we workaround by inserting NOPs if needed. 872bf215546Sopenharmony_ci */ 873bf215546Sopenharmony_ci bool gfx10_3f_bug = false; 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_ci do { 876bf215546Sopenharmony_ci auto buggy_branch_it = std::find_if( 877bf215546Sopenharmony_ci ctx.branches.begin(), ctx.branches.end(), 878bf215546Sopenharmony_ci [&ctx](const auto& branch) -> bool { 879bf215546Sopenharmony_ci return ((int)ctx.program->blocks[branch.second->block].offset - branch.first - 1) == 880bf215546Sopenharmony_ci 0x3f; 881bf215546Sopenharmony_ci }); 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci gfx10_3f_bug = buggy_branch_it != ctx.branches.end(); 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci if (gfx10_3f_bug) { 886bf215546Sopenharmony_ci /* Insert an s_nop after the branch */ 887bf215546Sopenharmony_ci constexpr uint32_t s_nop_0 = 0xbf800000u; 888bf215546Sopenharmony_ci insert_code(ctx, out, buggy_branch_it->first + 1, 1, &s_nop_0); 889bf215546Sopenharmony_ci } 890bf215546Sopenharmony_ci } while (gfx10_3f_bug); 891bf215546Sopenharmony_ci} 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_civoid 894bf215546Sopenharmony_ciemit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards, 895bf215546Sopenharmony_ci std::vector<uint32_t>& out) 896bf215546Sopenharmony_ci{ 897bf215546Sopenharmony_ci Builder bld(ctx.program); 898bf215546Sopenharmony_ci 899bf215546Sopenharmony_ci Definition def_tmp_lo(branch->definitions[0].physReg(), s1); 900bf215546Sopenharmony_ci Operand op_tmp_lo(branch->definitions[0].physReg(), s1); 901bf215546Sopenharmony_ci Definition def_tmp_hi(branch->definitions[0].physReg().advance(4), s1); 902bf215546Sopenharmony_ci Operand op_tmp_hi(branch->definitions[0].physReg().advance(4), s1); 903bf215546Sopenharmony_ci 904bf215546Sopenharmony_ci aco_ptr<Instruction> instr; 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci if (branch->opcode != aco_opcode::s_branch) { 907bf215546Sopenharmony_ci /* for conditional branches, skip the long jump if the condition is false */ 908bf215546Sopenharmony_ci aco_opcode inv; 909bf215546Sopenharmony_ci switch (branch->opcode) { 910bf215546Sopenharmony_ci case aco_opcode::s_cbranch_scc0: inv = aco_opcode::s_cbranch_scc1; break; 911bf215546Sopenharmony_ci case aco_opcode::s_cbranch_scc1: inv = aco_opcode::s_cbranch_scc0; break; 912bf215546Sopenharmony_ci case aco_opcode::s_cbranch_vccz: inv = aco_opcode::s_cbranch_vccnz; break; 913bf215546Sopenharmony_ci case aco_opcode::s_cbranch_vccnz: inv = aco_opcode::s_cbranch_vccz; break; 914bf215546Sopenharmony_ci case aco_opcode::s_cbranch_execz: inv = aco_opcode::s_cbranch_execnz; break; 915bf215546Sopenharmony_ci case aco_opcode::s_cbranch_execnz: inv = aco_opcode::s_cbranch_execz; break; 916bf215546Sopenharmony_ci default: unreachable("Unhandled long jump."); 917bf215546Sopenharmony_ci } 918bf215546Sopenharmony_ci instr.reset(bld.sopp(inv, -1, 6)); 919bf215546Sopenharmony_ci emit_instruction(ctx, out, instr.get()); 920bf215546Sopenharmony_ci } 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci /* create the new PC and stash SCC in the LSB */ 923bf215546Sopenharmony_ci instr.reset(bld.sop1(aco_opcode::s_getpc_b64, branch->definitions[0]).instr); 924bf215546Sopenharmony_ci emit_instruction(ctx, out, instr.get()); 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci instr.reset( 927bf215546Sopenharmony_ci bld.sop2(aco_opcode::s_addc_u32, def_tmp_lo, op_tmp_lo, Operand::literal32(0)).instr); 928bf215546Sopenharmony_ci emit_instruction(ctx, out, instr.get()); 929bf215546Sopenharmony_ci branch->pass_flags = out.size(); 930bf215546Sopenharmony_ci 931bf215546Sopenharmony_ci /* s_addc_u32 for high 32 bits not needed because the program is in a 32-bit VA range */ 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci /* restore SCC and clear the LSB of the new PC */ 934bf215546Sopenharmony_ci instr.reset(bld.sopc(aco_opcode::s_bitcmp1_b32, def_tmp_lo, op_tmp_lo, Operand::zero()).instr); 935bf215546Sopenharmony_ci emit_instruction(ctx, out, instr.get()); 936bf215546Sopenharmony_ci instr.reset(bld.sop1(aco_opcode::s_bitset0_b32, def_tmp_lo, Operand::zero()).instr); 937bf215546Sopenharmony_ci emit_instruction(ctx, out, instr.get()); 938bf215546Sopenharmony_ci 939bf215546Sopenharmony_ci /* create the s_setpc_b64 to jump */ 940bf215546Sopenharmony_ci instr.reset( 941bf215546Sopenharmony_ci bld.sop1(aco_opcode::s_setpc_b64, Operand(branch->definitions[0].physReg(), s2)).instr); 942bf215546Sopenharmony_ci emit_instruction(ctx, out, instr.get()); 943bf215546Sopenharmony_ci} 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_civoid 946bf215546Sopenharmony_cifix_branches(asm_context& ctx, std::vector<uint32_t>& out) 947bf215546Sopenharmony_ci{ 948bf215546Sopenharmony_ci bool repeat = false; 949bf215546Sopenharmony_ci do { 950bf215546Sopenharmony_ci repeat = false; 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci if (ctx.gfx_level == GFX10) 953bf215546Sopenharmony_ci fix_branches_gfx10(ctx, out); 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci for (std::pair<int, SOPP_instruction*>& branch : ctx.branches) { 956bf215546Sopenharmony_ci int offset = (int)ctx.program->blocks[branch.second->block].offset - branch.first - 1; 957bf215546Sopenharmony_ci if ((offset < INT16_MIN || offset > INT16_MAX) && !branch.second->pass_flags) { 958bf215546Sopenharmony_ci std::vector<uint32_t> long_jump; 959bf215546Sopenharmony_ci bool backwards = 960bf215546Sopenharmony_ci ctx.program->blocks[branch.second->block].offset < (unsigned)branch.first; 961bf215546Sopenharmony_ci emit_long_jump(ctx, branch.second, backwards, long_jump); 962bf215546Sopenharmony_ci 963bf215546Sopenharmony_ci out[branch.first] = long_jump[0]; 964bf215546Sopenharmony_ci insert_code(ctx, out, branch.first + 1, long_jump.size() - 1, long_jump.data() + 1); 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci repeat = true; 967bf215546Sopenharmony_ci break; 968bf215546Sopenharmony_ci } 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci if (branch.second->pass_flags) { 971bf215546Sopenharmony_ci int after_getpc = branch.first + branch.second->pass_flags - 2; 972bf215546Sopenharmony_ci offset = (int)ctx.program->blocks[branch.second->block].offset - after_getpc; 973bf215546Sopenharmony_ci out[branch.first + branch.second->pass_flags - 1] = offset * 4; 974bf215546Sopenharmony_ci } else { 975bf215546Sopenharmony_ci out[branch.first] &= 0xffff0000u; 976bf215546Sopenharmony_ci out[branch.first] |= (uint16_t)offset; 977bf215546Sopenharmony_ci } 978bf215546Sopenharmony_ci } 979bf215546Sopenharmony_ci } while (repeat); 980bf215546Sopenharmony_ci} 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_civoid 983bf215546Sopenharmony_cifix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out) 984bf215546Sopenharmony_ci{ 985bf215546Sopenharmony_ci for (auto& constaddr : ctx.constaddrs) { 986bf215546Sopenharmony_ci constaddr_info& info = constaddr.second; 987bf215546Sopenharmony_ci out[info.add_literal] += (out.size() - info.getpc_end) * 4u; 988bf215546Sopenharmony_ci } 989bf215546Sopenharmony_ci} 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ciunsigned 992bf215546Sopenharmony_ciemit_program(Program* program, std::vector<uint32_t>& code) 993bf215546Sopenharmony_ci{ 994bf215546Sopenharmony_ci asm_context ctx(program); 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::FS || 997bf215546Sopenharmony_ci program->stage.hw == HWStage::NGG) 998bf215546Sopenharmony_ci fix_exports(ctx, code, program); 999bf215546Sopenharmony_ci 1000bf215546Sopenharmony_ci for (Block& block : program->blocks) { 1001bf215546Sopenharmony_ci block.offset = code.size(); 1002bf215546Sopenharmony_ci emit_block(ctx, code, block); 1003bf215546Sopenharmony_ci } 1004bf215546Sopenharmony_ci 1005bf215546Sopenharmony_ci fix_branches(ctx, code); 1006bf215546Sopenharmony_ci 1007bf215546Sopenharmony_ci unsigned exec_size = code.size() * sizeof(uint32_t); 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci if (program->gfx_level >= GFX10) { 1010bf215546Sopenharmony_ci /* Pad output with s_code_end so instruction prefetching doesn't cause 1011bf215546Sopenharmony_ci * page faults */ 1012bf215546Sopenharmony_ci unsigned final_size = align(code.size() + 3 * 16, 16); 1013bf215546Sopenharmony_ci while (code.size() < final_size) 1014bf215546Sopenharmony_ci code.push_back(0xbf9f0000u); 1015bf215546Sopenharmony_ci } 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci fix_constaddrs(ctx, code); 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci while (program->constant_data.size() % 4u) 1020bf215546Sopenharmony_ci program->constant_data.push_back(0); 1021bf215546Sopenharmony_ci /* Copy constant data */ 1022bf215546Sopenharmony_ci code.insert(code.end(), (uint32_t*)program->constant_data.data(), 1023bf215546Sopenharmony_ci (uint32_t*)(program->constant_data.data() + program->constant_data.size())); 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci return exec_size; 1026bf215546Sopenharmony_ci} 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci} // namespace aco 1029