1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "aco_builder.h"
26bf215546Sopenharmony_ci#include "aco_ir.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "common/sid.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "util/memstream.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#include <algorithm>
33bf215546Sopenharmony_ci#include <map>
34bf215546Sopenharmony_ci#include <vector>
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_cinamespace aco {
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_cistruct constaddr_info {
39bf215546Sopenharmony_ci   unsigned getpc_end;
40bf215546Sopenharmony_ci   unsigned add_literal;
41bf215546Sopenharmony_ci};
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_cistruct asm_context {
44bf215546Sopenharmony_ci   Program* program;
45bf215546Sopenharmony_ci   enum amd_gfx_level gfx_level;
46bf215546Sopenharmony_ci   std::vector<std::pair<int, SOPP_instruction*>> branches;
47bf215546Sopenharmony_ci   std::map<unsigned, constaddr_info> constaddrs;
48bf215546Sopenharmony_ci   const int16_t* opcode;
49bf215546Sopenharmony_ci   // TODO: keep track of branch instructions referring blocks
50bf215546Sopenharmony_ci   // and, when emitting the block, correct the offset in instr
51bf215546Sopenharmony_ci   asm_context(Program* program_) : program(program_), gfx_level(program->gfx_level)
52bf215546Sopenharmony_ci   {
53bf215546Sopenharmony_ci      if (gfx_level <= GFX7)
54bf215546Sopenharmony_ci         opcode = &instr_info.opcode_gfx7[0];
55bf215546Sopenharmony_ci      else if (gfx_level <= GFX9)
56bf215546Sopenharmony_ci         opcode = &instr_info.opcode_gfx9[0];
57bf215546Sopenharmony_ci      else if (gfx_level >= GFX10)
58bf215546Sopenharmony_ci         opcode = &instr_info.opcode_gfx10[0];
59bf215546Sopenharmony_ci   }
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   int subvector_begin_pos = -1;
62bf215546Sopenharmony_ci};
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ciunsigned
65bf215546Sopenharmony_ciget_mimg_nsa_dwords(const Instruction* instr)
66bf215546Sopenharmony_ci{
67bf215546Sopenharmony_ci   unsigned addr_dwords = instr->operands.size() - 3;
68bf215546Sopenharmony_ci   for (unsigned i = 1; i < addr_dwords; i++) {
69bf215546Sopenharmony_ci      if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
70bf215546Sopenharmony_ci         return DIV_ROUND_UP(addr_dwords - 1, 4);
71bf215546Sopenharmony_ci   }
72bf215546Sopenharmony_ci   return 0;
73bf215546Sopenharmony_ci}
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_civoid
76bf215546Sopenharmony_ciemit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
77bf215546Sopenharmony_ci{
78bf215546Sopenharmony_ci   /* lower remaining pseudo-instructions */
79bf215546Sopenharmony_ci   if (instr->opcode == aco_opcode::p_constaddr_getpc) {
80bf215546Sopenharmony_ci      ctx.constaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1;
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci      instr->opcode = aco_opcode::s_getpc_b64;
83bf215546Sopenharmony_ci      instr->operands.pop_back();
84bf215546Sopenharmony_ci   } else if (instr->opcode == aco_opcode::p_constaddr_addlo) {
85bf215546Sopenharmony_ci      ctx.constaddrs[instr->operands[2].constantValue()].add_literal = out.size() + 1;
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci      instr->opcode = aco_opcode::s_add_u32;
88bf215546Sopenharmony_ci      instr->operands.pop_back();
89bf215546Sopenharmony_ci      assert(instr->operands[1].isConstant());
90bf215546Sopenharmony_ci      /* in case it's an inline constant, make it a literal */
91bf215546Sopenharmony_ci      instr->operands[1] = Operand::literal32(instr->operands[1].constantValue());
92bf215546Sopenharmony_ci   }
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci   uint32_t opcode = ctx.opcode[(int)instr->opcode];
95bf215546Sopenharmony_ci   if (opcode == (uint32_t)-1) {
96bf215546Sopenharmony_ci      char* outmem;
97bf215546Sopenharmony_ci      size_t outsize;
98bf215546Sopenharmony_ci      struct u_memstream mem;
99bf215546Sopenharmony_ci      u_memstream_open(&mem, &outmem, &outsize);
100bf215546Sopenharmony_ci      FILE* const memf = u_memstream_get(&mem);
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci      fprintf(memf, "Unsupported opcode: ");
103bf215546Sopenharmony_ci      aco_print_instr(instr, memf);
104bf215546Sopenharmony_ci      u_memstream_close(&mem);
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci      aco_err(ctx.program, outmem);
107bf215546Sopenharmony_ci      free(outmem);
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci      abort();
110bf215546Sopenharmony_ci   }
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   switch (instr->format) {
113bf215546Sopenharmony_ci   case Format::SOP2: {
114bf215546Sopenharmony_ci      uint32_t encoding = (0b10 << 30);
115bf215546Sopenharmony_ci      encoding |= opcode << 23;
116bf215546Sopenharmony_ci      encoding |= !instr->definitions.empty() ? instr->definitions[0].physReg() << 16 : 0;
117bf215546Sopenharmony_ci      encoding |= instr->operands.size() >= 2 ? instr->operands[1].physReg() << 8 : 0;
118bf215546Sopenharmony_ci      encoding |= !instr->operands.empty() ? instr->operands[0].physReg() : 0;
119bf215546Sopenharmony_ci      out.push_back(encoding);
120bf215546Sopenharmony_ci      break;
121bf215546Sopenharmony_ci   }
122bf215546Sopenharmony_ci   case Format::SOPK: {
123bf215546Sopenharmony_ci      SOPK_instruction& sopk = instr->sopk();
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci      if (instr->opcode == aco_opcode::s_subvector_loop_begin) {
126bf215546Sopenharmony_ci         assert(ctx.gfx_level >= GFX10);
127bf215546Sopenharmony_ci         assert(ctx.subvector_begin_pos == -1);
128bf215546Sopenharmony_ci         ctx.subvector_begin_pos = out.size();
129bf215546Sopenharmony_ci      } else if (instr->opcode == aco_opcode::s_subvector_loop_end) {
130bf215546Sopenharmony_ci         assert(ctx.gfx_level >= GFX10);
131bf215546Sopenharmony_ci         assert(ctx.subvector_begin_pos != -1);
132bf215546Sopenharmony_ci         /* Adjust s_subvector_loop_begin instruction to the address after the end  */
133bf215546Sopenharmony_ci         out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos);
134bf215546Sopenharmony_ci         /* Adjust s_subvector_loop_end instruction to the address after the beginning  */
135bf215546Sopenharmony_ci         sopk.imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());
136bf215546Sopenharmony_ci         ctx.subvector_begin_pos = -1;
137bf215546Sopenharmony_ci      }
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci      uint32_t encoding = (0b1011 << 28);
140bf215546Sopenharmony_ci      encoding |= opcode << 23;
141bf215546Sopenharmony_ci      encoding |= !instr->definitions.empty() && !(instr->definitions[0].physReg() == scc)
142bf215546Sopenharmony_ci                     ? instr->definitions[0].physReg() << 16
143bf215546Sopenharmony_ci                  : !instr->operands.empty() && instr->operands[0].physReg() <= 127
144bf215546Sopenharmony_ci                     ? instr->operands[0].physReg() << 16
145bf215546Sopenharmony_ci                     : 0;
146bf215546Sopenharmony_ci      encoding |= sopk.imm;
147bf215546Sopenharmony_ci      out.push_back(encoding);
148bf215546Sopenharmony_ci      break;
149bf215546Sopenharmony_ci   }
150bf215546Sopenharmony_ci   case Format::SOP1: {
151bf215546Sopenharmony_ci      uint32_t encoding = (0b101111101 << 23);
152bf215546Sopenharmony_ci      encoding |= !instr->definitions.empty() ? instr->definitions[0].physReg() << 16 : 0;
153bf215546Sopenharmony_ci      encoding |= opcode << 8;
154bf215546Sopenharmony_ci      encoding |= !instr->operands.empty() ? instr->operands[0].physReg() : 0;
155bf215546Sopenharmony_ci      out.push_back(encoding);
156bf215546Sopenharmony_ci      break;
157bf215546Sopenharmony_ci   }
158bf215546Sopenharmony_ci   case Format::SOPC: {
159bf215546Sopenharmony_ci      uint32_t encoding = (0b101111110 << 23);
160bf215546Sopenharmony_ci      encoding |= opcode << 16;
161bf215546Sopenharmony_ci      encoding |= instr->operands.size() == 2 ? instr->operands[1].physReg() << 8 : 0;
162bf215546Sopenharmony_ci      encoding |= !instr->operands.empty() ? instr->operands[0].physReg() : 0;
163bf215546Sopenharmony_ci      out.push_back(encoding);
164bf215546Sopenharmony_ci      break;
165bf215546Sopenharmony_ci   }
166bf215546Sopenharmony_ci   case Format::SOPP: {
167bf215546Sopenharmony_ci      SOPP_instruction& sopp = instr->sopp();
168bf215546Sopenharmony_ci      uint32_t encoding = (0b101111111 << 23);
169bf215546Sopenharmony_ci      encoding |= opcode << 16;
170bf215546Sopenharmony_ci      encoding |= (uint16_t)sopp.imm;
171bf215546Sopenharmony_ci      if (sopp.block != -1) {
172bf215546Sopenharmony_ci         sopp.pass_flags = 0;
173bf215546Sopenharmony_ci         ctx.branches.emplace_back(out.size(), &sopp);
174bf215546Sopenharmony_ci      }
175bf215546Sopenharmony_ci      out.push_back(encoding);
176bf215546Sopenharmony_ci      break;
177bf215546Sopenharmony_ci   }
178bf215546Sopenharmony_ci   case Format::SMEM: {
179bf215546Sopenharmony_ci      SMEM_instruction& smem = instr->smem();
180bf215546Sopenharmony_ci      bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);
181bf215546Sopenharmony_ci      bool is_load = !instr->definitions.empty();
182bf215546Sopenharmony_ci      uint32_t encoding = 0;
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci      if (ctx.gfx_level <= GFX7) {
185bf215546Sopenharmony_ci         encoding = (0b11000 << 27);
186bf215546Sopenharmony_ci         encoding |= opcode << 22;
187bf215546Sopenharmony_ci         encoding |= instr->definitions.size() ? instr->definitions[0].physReg() << 15 : 0;
188bf215546Sopenharmony_ci         encoding |= instr->operands.size() ? (instr->operands[0].physReg() >> 1) << 9 : 0;
189bf215546Sopenharmony_ci         if (instr->operands.size() >= 2) {
190bf215546Sopenharmony_ci            if (!instr->operands[1].isConstant()) {
191bf215546Sopenharmony_ci               encoding |= instr->operands[1].physReg().reg();
192bf215546Sopenharmony_ci            } else if (instr->operands[1].constantValue() >= 1024) {
193bf215546Sopenharmony_ci               encoding |= 255; /* SQ_SRC_LITERAL */
194bf215546Sopenharmony_ci            } else {
195bf215546Sopenharmony_ci               encoding |= instr->operands[1].constantValue() >> 2;
196bf215546Sopenharmony_ci               encoding |= 1 << 8;
197bf215546Sopenharmony_ci            }
198bf215546Sopenharmony_ci         }
199bf215546Sopenharmony_ci         out.push_back(encoding);
200bf215546Sopenharmony_ci         /* SMRD instructions can take a literal on GFX7 */
201bf215546Sopenharmony_ci         if (instr->operands.size() >= 2 && instr->operands[1].isConstant() &&
202bf215546Sopenharmony_ci             instr->operands[1].constantValue() >= 1024)
203bf215546Sopenharmony_ci            out.push_back(instr->operands[1].constantValue() >> 2);
204bf215546Sopenharmony_ci         return;
205bf215546Sopenharmony_ci      }
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci      if (ctx.gfx_level <= GFX9) {
208bf215546Sopenharmony_ci         encoding = (0b110000 << 26);
209bf215546Sopenharmony_ci         assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */
210bf215546Sopenharmony_ci         encoding |= smem.nv ? 1 << 15 : 0;
211bf215546Sopenharmony_ci      } else {
212bf215546Sopenharmony_ci         encoding = (0b111101 << 26);
213bf215546Sopenharmony_ci         assert(!smem.nv); /* Non-volatile is not supported on GFX10 */
214bf215546Sopenharmony_ci         encoding |= smem.dlc ? 1 << 14 : 0;
215bf215546Sopenharmony_ci      }
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci      encoding |= opcode << 18;
218bf215546Sopenharmony_ci      encoding |= smem.glc ? 1 << 16 : 0;
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci      if (ctx.gfx_level <= GFX9) {
221bf215546Sopenharmony_ci         if (instr->operands.size() >= 2)
222bf215546Sopenharmony_ci            encoding |= instr->operands[1].isConstant() ? 1 << 17 : 0; /* IMM - immediate enable */
223bf215546Sopenharmony_ci      }
224bf215546Sopenharmony_ci      if (ctx.gfx_level == GFX9) {
225bf215546Sopenharmony_ci         encoding |= soe ? 1 << 14 : 0;
226bf215546Sopenharmony_ci      }
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci      if (is_load || instr->operands.size() >= 3) { /* SDATA */
229bf215546Sopenharmony_ci         encoding |= (is_load ? instr->definitions[0].physReg() : instr->operands[2].physReg())
230bf215546Sopenharmony_ci                     << 6;
231bf215546Sopenharmony_ci      }
232bf215546Sopenharmony_ci      if (instr->operands.size() >= 1) { /* SBASE */
233bf215546Sopenharmony_ci         encoding |= instr->operands[0].physReg() >> 1;
234bf215546Sopenharmony_ci      }
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci      out.push_back(encoding);
237bf215546Sopenharmony_ci      encoding = 0;
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci      int32_t offset = 0;
240bf215546Sopenharmony_ci      uint32_t soffset = ctx.gfx_level >= GFX10
241bf215546Sopenharmony_ci                            ? sgpr_null /* On GFX10 this is disabled by specifying SGPR_NULL */
242bf215546Sopenharmony_ci                            : 0; /* On GFX9, it is disabled by the SOE bit (and it's not present on
243bf215546Sopenharmony_ci                                    GFX8 and below) */
244bf215546Sopenharmony_ci      if (instr->operands.size() >= 2) {
245bf215546Sopenharmony_ci         const Operand& op_off1 = instr->operands[1];
246bf215546Sopenharmony_ci         if (ctx.gfx_level <= GFX9) {
247bf215546Sopenharmony_ci            offset = op_off1.isConstant() ? op_off1.constantValue() : op_off1.physReg();
248bf215546Sopenharmony_ci         } else {
249bf215546Sopenharmony_ci            /* GFX10 only supports constants in OFFSET, so put the operand in SOFFSET if it's an
250bf215546Sopenharmony_ci             * SGPR */
251bf215546Sopenharmony_ci            if (op_off1.isConstant()) {
252bf215546Sopenharmony_ci               offset = op_off1.constantValue();
253bf215546Sopenharmony_ci            } else {
254bf215546Sopenharmony_ci               soffset = op_off1.physReg();
255bf215546Sopenharmony_ci               assert(!soe); /* There is no place to put the other SGPR offset, if any */
256bf215546Sopenharmony_ci            }
257bf215546Sopenharmony_ci         }
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci         if (soe) {
260bf215546Sopenharmony_ci            const Operand& op_off2 = instr->operands.back();
261bf215546Sopenharmony_ci            assert(ctx.gfx_level >= GFX9); /* GFX8 and below don't support specifying a constant
262bf215546Sopenharmony_ci                                               and an SGPR at the same time */
263bf215546Sopenharmony_ci            assert(!op_off2.isConstant());
264bf215546Sopenharmony_ci            soffset = op_off2.physReg();
265bf215546Sopenharmony_ci         }
266bf215546Sopenharmony_ci      }
267bf215546Sopenharmony_ci      encoding |= offset;
268bf215546Sopenharmony_ci      encoding |= soffset << 25;
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci      out.push_back(encoding);
271bf215546Sopenharmony_ci      return;
272bf215546Sopenharmony_ci   }
273bf215546Sopenharmony_ci   case Format::VOP2: {
274bf215546Sopenharmony_ci      uint32_t encoding = 0;
275bf215546Sopenharmony_ci      encoding |= opcode << 25;
276bf215546Sopenharmony_ci      encoding |= (0xFF & instr->definitions[0].physReg()) << 17;
277bf215546Sopenharmony_ci      encoding |= (0xFF & instr->operands[1].physReg()) << 9;
278bf215546Sopenharmony_ci      encoding |= instr->operands[0].physReg();
279bf215546Sopenharmony_ci      out.push_back(encoding);
280bf215546Sopenharmony_ci      break;
281bf215546Sopenharmony_ci   }
282bf215546Sopenharmony_ci   case Format::VOP1: {
283bf215546Sopenharmony_ci      uint32_t encoding = (0b0111111 << 25);
284bf215546Sopenharmony_ci      if (!instr->definitions.empty())
285bf215546Sopenharmony_ci         encoding |= (0xFF & instr->definitions[0].physReg()) << 17;
286bf215546Sopenharmony_ci      encoding |= opcode << 9;
287bf215546Sopenharmony_ci      if (!instr->operands.empty())
288bf215546Sopenharmony_ci         encoding |= instr->operands[0].physReg();
289bf215546Sopenharmony_ci      out.push_back(encoding);
290bf215546Sopenharmony_ci      break;
291bf215546Sopenharmony_ci   }
292bf215546Sopenharmony_ci   case Format::VOPC: {
293bf215546Sopenharmony_ci      uint32_t encoding = (0b0111110 << 25);
294bf215546Sopenharmony_ci      encoding |= opcode << 17;
295bf215546Sopenharmony_ci      encoding |= (0xFF & instr->operands[1].physReg()) << 9;
296bf215546Sopenharmony_ci      encoding |= instr->operands[0].physReg();
297bf215546Sopenharmony_ci      out.push_back(encoding);
298bf215546Sopenharmony_ci      break;
299bf215546Sopenharmony_ci   }
300bf215546Sopenharmony_ci   case Format::VINTRP: {
301bf215546Sopenharmony_ci      Interp_instruction& interp = instr->vintrp();
302bf215546Sopenharmony_ci      uint32_t encoding = 0;
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci      if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
305bf215546Sopenharmony_ci          instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
306bf215546Sopenharmony_ci          instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
307bf215546Sopenharmony_ci          instr->opcode == aco_opcode::v_interp_p2_f16) {
308bf215546Sopenharmony_ci         if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
309bf215546Sopenharmony_ci            encoding = (0b110100 << 26);
310bf215546Sopenharmony_ci         } else if (ctx.gfx_level >= GFX10) {
311bf215546Sopenharmony_ci            encoding = (0b110101 << 26);
312bf215546Sopenharmony_ci         } else {
313bf215546Sopenharmony_ci            unreachable("Unknown gfx_level.");
314bf215546Sopenharmony_ci         }
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci         encoding |= opcode << 16;
317bf215546Sopenharmony_ci         encoding |= (0xFF & instr->definitions[0].physReg());
318bf215546Sopenharmony_ci         out.push_back(encoding);
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_ci         encoding = 0;
321bf215546Sopenharmony_ci         encoding |= interp.attribute;
322bf215546Sopenharmony_ci         encoding |= interp.component << 6;
323bf215546Sopenharmony_ci         encoding |= instr->operands[0].physReg() << 9;
324bf215546Sopenharmony_ci         if (instr->opcode == aco_opcode::v_interp_p2_f16 ||
325bf215546Sopenharmony_ci             instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
326bf215546Sopenharmony_ci             instr->opcode == aco_opcode::v_interp_p1lv_f16) {
327bf215546Sopenharmony_ci            encoding |= instr->operands[2].physReg() << 18;
328bf215546Sopenharmony_ci         }
329bf215546Sopenharmony_ci         out.push_back(encoding);
330bf215546Sopenharmony_ci      } else {
331bf215546Sopenharmony_ci         if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
332bf215546Sopenharmony_ci            encoding = (0b110101 << 26); /* Vega ISA doc says 110010 but it's wrong */
333bf215546Sopenharmony_ci         } else {
334bf215546Sopenharmony_ci            encoding = (0b110010 << 26);
335bf215546Sopenharmony_ci         }
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci         assert(encoding);
338bf215546Sopenharmony_ci         encoding |= (0xFF & instr->definitions[0].physReg()) << 18;
339bf215546Sopenharmony_ci         encoding |= opcode << 16;
340bf215546Sopenharmony_ci         encoding |= interp.attribute << 10;
341bf215546Sopenharmony_ci         encoding |= interp.component << 8;
342bf215546Sopenharmony_ci         if (instr->opcode == aco_opcode::v_interp_mov_f32)
343bf215546Sopenharmony_ci            encoding |= (0x3 & instr->operands[0].constantValue());
344bf215546Sopenharmony_ci         else
345bf215546Sopenharmony_ci            encoding |= (0xFF & instr->operands[0].physReg());
346bf215546Sopenharmony_ci         out.push_back(encoding);
347bf215546Sopenharmony_ci      }
348bf215546Sopenharmony_ci      break;
349bf215546Sopenharmony_ci   }
350bf215546Sopenharmony_ci   case Format::DS: {
351bf215546Sopenharmony_ci      DS_instruction& ds = instr->ds();
352bf215546Sopenharmony_ci      uint32_t encoding = (0b110110 << 26);
353bf215546Sopenharmony_ci      if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
354bf215546Sopenharmony_ci         encoding |= opcode << 17;
355bf215546Sopenharmony_ci         encoding |= (ds.gds ? 1 : 0) << 16;
356bf215546Sopenharmony_ci      } else {
357bf215546Sopenharmony_ci         encoding |= opcode << 18;
358bf215546Sopenharmony_ci         encoding |= (ds.gds ? 1 : 0) << 17;
359bf215546Sopenharmony_ci      }
360bf215546Sopenharmony_ci      encoding |= ((0xFF & ds.offset1) << 8);
361bf215546Sopenharmony_ci      encoding |= (0xFFFF & ds.offset0);
362bf215546Sopenharmony_ci      out.push_back(encoding);
363bf215546Sopenharmony_ci      encoding = 0;
364bf215546Sopenharmony_ci      unsigned reg = !instr->definitions.empty() ? instr->definitions[0].physReg() : 0;
365bf215546Sopenharmony_ci      encoding |= (0xFF & reg) << 24;
366bf215546Sopenharmony_ci      reg = instr->operands.size() >= 3 && !(instr->operands[2].physReg() == m0)
367bf215546Sopenharmony_ci               ? instr->operands[2].physReg()
368bf215546Sopenharmony_ci               : 0;
369bf215546Sopenharmony_ci      encoding |= (0xFF & reg) << 16;
370bf215546Sopenharmony_ci      reg = instr->operands.size() >= 2 && !(instr->operands[1].physReg() == m0)
371bf215546Sopenharmony_ci               ? instr->operands[1].physReg()
372bf215546Sopenharmony_ci               : 0;
373bf215546Sopenharmony_ci      encoding |= (0xFF & reg) << 8;
374bf215546Sopenharmony_ci      encoding |= (0xFF & instr->operands[0].physReg());
375bf215546Sopenharmony_ci      out.push_back(encoding);
376bf215546Sopenharmony_ci      break;
377bf215546Sopenharmony_ci   }
378bf215546Sopenharmony_ci   case Format::MUBUF: {
379bf215546Sopenharmony_ci      MUBUF_instruction& mubuf = instr->mubuf();
380bf215546Sopenharmony_ci      uint32_t encoding = (0b111000 << 26);
381bf215546Sopenharmony_ci      encoding |= opcode << 18;
382bf215546Sopenharmony_ci      encoding |= (mubuf.lds ? 1 : 0) << 16;
383bf215546Sopenharmony_ci      encoding |= (mubuf.glc ? 1 : 0) << 14;
384bf215546Sopenharmony_ci      encoding |= (mubuf.idxen ? 1 : 0) << 13;
385bf215546Sopenharmony_ci      assert(!mubuf.addr64 || ctx.gfx_level <= GFX7);
386bf215546Sopenharmony_ci      if (ctx.gfx_level == GFX6 || ctx.gfx_level == GFX7)
387bf215546Sopenharmony_ci         encoding |= (mubuf.addr64 ? 1 : 0) << 15;
388bf215546Sopenharmony_ci      encoding |= (mubuf.offen ? 1 : 0) << 12;
389bf215546Sopenharmony_ci      if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
390bf215546Sopenharmony_ci         assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */
391bf215546Sopenharmony_ci         encoding |= (mubuf.slc ? 1 : 0) << 17;
392bf215546Sopenharmony_ci      } else if (ctx.gfx_level >= GFX10) {
393bf215546Sopenharmony_ci         encoding |= (mubuf.dlc ? 1 : 0) << 15;
394bf215546Sopenharmony_ci      }
395bf215546Sopenharmony_ci      encoding |= 0x0FFF & mubuf.offset;
396bf215546Sopenharmony_ci      out.push_back(encoding);
397bf215546Sopenharmony_ci      encoding = 0;
398bf215546Sopenharmony_ci      if (ctx.gfx_level <= GFX7 || ctx.gfx_level >= GFX10) {
399bf215546Sopenharmony_ci         encoding |= (mubuf.slc ? 1 : 0) << 22;
400bf215546Sopenharmony_ci      }
401bf215546Sopenharmony_ci      encoding |= instr->operands[2].physReg() << 24;
402bf215546Sopenharmony_ci      encoding |= (mubuf.tfe ? 1 : 0) << 23;
403bf215546Sopenharmony_ci      encoding |= (instr->operands[0].physReg() >> 2) << 16;
404bf215546Sopenharmony_ci      unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg()
405bf215546Sopenharmony_ci                                                : instr->definitions[0].physReg();
406bf215546Sopenharmony_ci      encoding |= (0xFF & reg) << 8;
407bf215546Sopenharmony_ci      encoding |= (0xFF & instr->operands[1].physReg());
408bf215546Sopenharmony_ci      out.push_back(encoding);
409bf215546Sopenharmony_ci      break;
410bf215546Sopenharmony_ci   }
411bf215546Sopenharmony_ci   case Format::MTBUF: {
412bf215546Sopenharmony_ci      MTBUF_instruction& mtbuf = instr->mtbuf();
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci      uint32_t img_format = ac_get_tbuffer_format(ctx.gfx_level, mtbuf.dfmt, mtbuf.nfmt);
415bf215546Sopenharmony_ci      uint32_t encoding = (0b111010 << 26);
416bf215546Sopenharmony_ci      assert(img_format <= 0x7F);
417bf215546Sopenharmony_ci      assert(!mtbuf.dlc || ctx.gfx_level >= GFX10);
418bf215546Sopenharmony_ci      encoding |= (mtbuf.dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
419bf215546Sopenharmony_ci      encoding |= (mtbuf.glc ? 1 : 0) << 14;
420bf215546Sopenharmony_ci      encoding |= (mtbuf.idxen ? 1 : 0) << 13;
421bf215546Sopenharmony_ci      encoding |= (mtbuf.offen ? 1 : 0) << 12;
422bf215546Sopenharmony_ci      encoding |= 0x0FFF & mtbuf.offset;
423bf215546Sopenharmony_ci      encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci      if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
426bf215546Sopenharmony_ci         encoding |= opcode << 15;
427bf215546Sopenharmony_ci      } else {
428bf215546Sopenharmony_ci         encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */
429bf215546Sopenharmony_ci      }
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci      out.push_back(encoding);
432bf215546Sopenharmony_ci      encoding = 0;
433bf215546Sopenharmony_ci
434bf215546Sopenharmony_ci      encoding |= instr->operands[2].physReg() << 24;
435bf215546Sopenharmony_ci      encoding |= (mtbuf.tfe ? 1 : 0) << 23;
436bf215546Sopenharmony_ci      encoding |= (mtbuf.slc ? 1 : 0) << 22;
437bf215546Sopenharmony_ci      encoding |= (instr->operands[0].physReg() >> 2) << 16;
438bf215546Sopenharmony_ci      unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg()
439bf215546Sopenharmony_ci                                                : instr->definitions[0].physReg();
440bf215546Sopenharmony_ci      encoding |= (0xFF & reg) << 8;
441bf215546Sopenharmony_ci      encoding |= (0xFF & instr->operands[1].physReg());
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci      if (ctx.gfx_level >= GFX10) {
444bf215546Sopenharmony_ci         encoding |= (((opcode & 0x08) >> 3) << 21); /* MSB of 4-bit OPCODE */
445bf215546Sopenharmony_ci      }
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci      out.push_back(encoding);
448bf215546Sopenharmony_ci      break;
449bf215546Sopenharmony_ci   }
450bf215546Sopenharmony_ci   case Format::MIMG: {
451bf215546Sopenharmony_ci      unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
452bf215546Sopenharmony_ci      assert(!nsa_dwords || ctx.gfx_level >= GFX10);
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci      MIMG_instruction& mimg = instr->mimg();
455bf215546Sopenharmony_ci      uint32_t encoding = (0b111100 << 26);
456bf215546Sopenharmony_ci      encoding |= mimg.slc ? 1 << 25 : 0;
457bf215546Sopenharmony_ci      encoding |= (opcode & 0x7f) << 18;
458bf215546Sopenharmony_ci      encoding |= (opcode >> 7) & 1;
459bf215546Sopenharmony_ci      encoding |= mimg.lwe ? 1 << 17 : 0;
460bf215546Sopenharmony_ci      encoding |= mimg.tfe ? 1 << 16 : 0;
461bf215546Sopenharmony_ci      encoding |= mimg.glc ? 1 << 13 : 0;
462bf215546Sopenharmony_ci      encoding |= mimg.unrm ? 1 << 12 : 0;
463bf215546Sopenharmony_ci      if (ctx.gfx_level <= GFX9) {
464bf215546Sopenharmony_ci         assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */
465bf215546Sopenharmony_ci         assert(!mimg.r128);
466bf215546Sopenharmony_ci         encoding |= mimg.a16 ? 1 << 15 : 0;
467bf215546Sopenharmony_ci         encoding |= mimg.da ? 1 << 14 : 0;
468bf215546Sopenharmony_ci      } else {
469bf215546Sopenharmony_ci         encoding |= mimg.r128 ? 1 << 15
470bf215546Sopenharmony_ci                               : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
471bf215546Sopenharmony_ci         encoding |= nsa_dwords << 1;
472bf215546Sopenharmony_ci         encoding |= mimg.dim << 3; /* GFX10: dimensionality instead of declare array */
473bf215546Sopenharmony_ci         encoding |= mimg.dlc ? 1 << 7 : 0;
474bf215546Sopenharmony_ci      }
475bf215546Sopenharmony_ci      encoding |= (0xF & mimg.dmask) << 8;
476bf215546Sopenharmony_ci      out.push_back(encoding);
477bf215546Sopenharmony_ci      encoding = (0xFF & instr->operands[3].physReg()); /* VADDR */
478bf215546Sopenharmony_ci      if (!instr->definitions.empty()) {
479bf215546Sopenharmony_ci         encoding |= (0xFF & instr->definitions[0].physReg()) << 8; /* VDATA */
480bf215546Sopenharmony_ci      } else if (!instr->operands[2].isUndefined()) {
481bf215546Sopenharmony_ci         encoding |= (0xFF & instr->operands[2].physReg()) << 8; /* VDATA */
482bf215546Sopenharmony_ci      }
483bf215546Sopenharmony_ci      encoding |= (0x1F & (instr->operands[0].physReg() >> 2)) << 16; /* T# (resource) */
484bf215546Sopenharmony_ci      if (!instr->operands[1].isUndefined())
485bf215546Sopenharmony_ci         encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci      assert(!mimg.d16 || ctx.gfx_level >= GFX9);
488bf215546Sopenharmony_ci      encoding |= mimg.d16 ? 1 << 31 : 0;
489bf215546Sopenharmony_ci      if (ctx.gfx_level >= GFX10) {
490bf215546Sopenharmony_ci         /* GFX10: A16 still exists, but is in a different place */
491bf215546Sopenharmony_ci         encoding |= mimg.a16 ? 1 << 30 : 0;
492bf215546Sopenharmony_ci      }
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci      out.push_back(encoding);
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci      if (nsa_dwords) {
497bf215546Sopenharmony_ci         out.resize(out.size() + nsa_dwords);
498bf215546Sopenharmony_ci         std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords);
499bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->operands.size() - 4u; i++)
500bf215546Sopenharmony_ci            nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8);
501bf215546Sopenharmony_ci      }
502bf215546Sopenharmony_ci      break;
503bf215546Sopenharmony_ci   }
504bf215546Sopenharmony_ci   case Format::FLAT:
505bf215546Sopenharmony_ci   case Format::SCRATCH:
506bf215546Sopenharmony_ci   case Format::GLOBAL: {
507bf215546Sopenharmony_ci      FLAT_instruction& flat = instr->flatlike();
508bf215546Sopenharmony_ci      uint32_t encoding = (0b110111 << 26);
509bf215546Sopenharmony_ci      encoding |= opcode << 18;
510bf215546Sopenharmony_ci      if (ctx.gfx_level == GFX9 || ctx.gfx_level >= GFX11) {
511bf215546Sopenharmony_ci         if (instr->isFlat())
512bf215546Sopenharmony_ci            assert(flat.offset <= 0xfff);
513bf215546Sopenharmony_ci         else
514bf215546Sopenharmony_ci            assert(flat.offset >= -4096 && flat.offset < 4096);
515bf215546Sopenharmony_ci         encoding |= flat.offset & 0x1fff;
516bf215546Sopenharmony_ci      } else if (ctx.gfx_level <= GFX8 || instr->isFlat()) {
517bf215546Sopenharmony_ci         /* GFX10 has a 12-bit immediate OFFSET field,
518bf215546Sopenharmony_ci          * but it has a hw bug: it ignores the offset, called FlatSegmentOffsetBug
519bf215546Sopenharmony_ci          */
520bf215546Sopenharmony_ci         assert(flat.offset == 0);
521bf215546Sopenharmony_ci      } else {
522bf215546Sopenharmony_ci         assert(flat.offset >= -2048 && flat.offset <= 2047);
523bf215546Sopenharmony_ci         encoding |= flat.offset & 0xfff;
524bf215546Sopenharmony_ci      }
525bf215546Sopenharmony_ci      if (instr->isScratch())
526bf215546Sopenharmony_ci         encoding |= 1 << 14;
527bf215546Sopenharmony_ci      else if (instr->isGlobal())
528bf215546Sopenharmony_ci         encoding |= 2 << 14;
529bf215546Sopenharmony_ci      encoding |= flat.lds ? 1 << 13 : 0;
530bf215546Sopenharmony_ci      encoding |= flat.glc ? 1 << 16 : 0;
531bf215546Sopenharmony_ci      encoding |= flat.slc ? 1 << 17 : 0;
532bf215546Sopenharmony_ci      if (ctx.gfx_level >= GFX10) {
533bf215546Sopenharmony_ci         assert(!flat.nv);
534bf215546Sopenharmony_ci         encoding |= flat.dlc ? 1 << 12 : 0;
535bf215546Sopenharmony_ci      } else {
536bf215546Sopenharmony_ci         assert(!flat.dlc);
537bf215546Sopenharmony_ci      }
538bf215546Sopenharmony_ci      out.push_back(encoding);
539bf215546Sopenharmony_ci      encoding = (0xFF & instr->operands[0].physReg());
540bf215546Sopenharmony_ci      if (!instr->definitions.empty())
541bf215546Sopenharmony_ci         encoding |= (0xFF & instr->definitions[0].physReg()) << 24;
542bf215546Sopenharmony_ci      if (instr->operands.size() >= 3)
543bf215546Sopenharmony_ci         encoding |= (0xFF & instr->operands[2].physReg()) << 8;
544bf215546Sopenharmony_ci      if (!instr->operands[1].isUndefined()) {
545bf215546Sopenharmony_ci         assert(ctx.gfx_level >= GFX10 || instr->operands[1].physReg() != 0x7F);
546bf215546Sopenharmony_ci         assert(instr->format != Format::FLAT);
547bf215546Sopenharmony_ci         encoding |= instr->operands[1].physReg() << 16;
548bf215546Sopenharmony_ci      } else if (instr->format != Format::FLAT ||
549bf215546Sopenharmony_ci                 ctx.gfx_level >= GFX10) { /* SADDR is actually used with FLAT on GFX10 */
550bf215546Sopenharmony_ci         /* For GFX10.3 scratch, 0x7F disables both ADDR and SADDR, unlike sgpr_null, which only
551bf215546Sopenharmony_ci          * disables SADDR.
552bf215546Sopenharmony_ci          */
553bf215546Sopenharmony_ci         if (ctx.gfx_level <= GFX9 ||
554bf215546Sopenharmony_ci             (instr->format == Format::SCRATCH && instr->operands[0].isUndefined()))
555bf215546Sopenharmony_ci            encoding |= 0x7F << 16;
556bf215546Sopenharmony_ci         else
557bf215546Sopenharmony_ci            encoding |= sgpr_null << 16;
558bf215546Sopenharmony_ci      }
559bf215546Sopenharmony_ci      encoding |= flat.nv ? 1 << 23 : 0;
560bf215546Sopenharmony_ci      out.push_back(encoding);
561bf215546Sopenharmony_ci      break;
562bf215546Sopenharmony_ci   }
563bf215546Sopenharmony_ci   case Format::EXP: {
564bf215546Sopenharmony_ci      Export_instruction& exp = instr->exp();
565bf215546Sopenharmony_ci      uint32_t encoding;
566bf215546Sopenharmony_ci      if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9) {
567bf215546Sopenharmony_ci         encoding = (0b110001 << 26);
568bf215546Sopenharmony_ci      } else {
569bf215546Sopenharmony_ci         encoding = (0b111110 << 26);
570bf215546Sopenharmony_ci      }
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci      encoding |= exp.valid_mask ? 0b1 << 12 : 0;
573bf215546Sopenharmony_ci      encoding |= exp.done ? 0b1 << 11 : 0;
574bf215546Sopenharmony_ci      encoding |= exp.compressed ? 0b1 << 10 : 0;
575bf215546Sopenharmony_ci      encoding |= exp.dest << 4;
576bf215546Sopenharmony_ci      encoding |= exp.enabled_mask;
577bf215546Sopenharmony_ci      out.push_back(encoding);
578bf215546Sopenharmony_ci      encoding = 0xFF & exp.operands[0].physReg();
579bf215546Sopenharmony_ci      encoding |= (0xFF & exp.operands[1].physReg()) << 8;
580bf215546Sopenharmony_ci      encoding |= (0xFF & exp.operands[2].physReg()) << 16;
581bf215546Sopenharmony_ci      encoding |= (0xFF & exp.operands[3].physReg()) << 24;
582bf215546Sopenharmony_ci      out.push_back(encoding);
583bf215546Sopenharmony_ci      break;
584bf215546Sopenharmony_ci   }
585bf215546Sopenharmony_ci   case Format::PSEUDO:
586bf215546Sopenharmony_ci   case Format::PSEUDO_BARRIER:
587bf215546Sopenharmony_ci      if (instr->opcode != aco_opcode::p_unit_test)
588bf215546Sopenharmony_ci         unreachable("Pseudo instructions should be lowered before assembly.");
589bf215546Sopenharmony_ci      break;
590bf215546Sopenharmony_ci   default:
591bf215546Sopenharmony_ci      if (instr->isVOP3()) {
592bf215546Sopenharmony_ci         VOP3_instruction& vop3 = instr->vop3();
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci         if (instr->isVOP2()) {
595bf215546Sopenharmony_ci            opcode = opcode + 0x100;
596bf215546Sopenharmony_ci         } else if (instr->isVOP1()) {
597bf215546Sopenharmony_ci            if (ctx.gfx_level == GFX8 || ctx.gfx_level == GFX9)
598bf215546Sopenharmony_ci               opcode = opcode + 0x140;
599bf215546Sopenharmony_ci            else
600bf215546Sopenharmony_ci               opcode = opcode + 0x180;
601bf215546Sopenharmony_ci         } else if (instr->isVOPC()) {
602bf215546Sopenharmony_ci            opcode = opcode + 0x0;
603bf215546Sopenharmony_ci         } else if (instr->isVINTRP()) {
604bf215546Sopenharmony_ci            opcode = opcode + 0x270;
605bf215546Sopenharmony_ci         }
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci         uint32_t encoding;
608bf215546Sopenharmony_ci         if (ctx.gfx_level <= GFX9) {
609bf215546Sopenharmony_ci            encoding = (0b110100 << 26);
610bf215546Sopenharmony_ci         } else if (ctx.gfx_level >= GFX10) {
611bf215546Sopenharmony_ci            encoding = (0b110101 << 26);
612bf215546Sopenharmony_ci         } else {
613bf215546Sopenharmony_ci            unreachable("Unknown gfx_level.");
614bf215546Sopenharmony_ci         }
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci         if (ctx.gfx_level <= GFX7) {
617bf215546Sopenharmony_ci            encoding |= opcode << 17;
618bf215546Sopenharmony_ci            encoding |= (vop3.clamp ? 1 : 0) << 11;
619bf215546Sopenharmony_ci         } else {
620bf215546Sopenharmony_ci            encoding |= opcode << 16;
621bf215546Sopenharmony_ci            encoding |= (vop3.clamp ? 1 : 0) << 15;
622bf215546Sopenharmony_ci         }
623bf215546Sopenharmony_ci         encoding |= vop3.opsel << 11;
624bf215546Sopenharmony_ci         for (unsigned i = 0; i < 3; i++)
625bf215546Sopenharmony_ci            encoding |= vop3.abs[i] << (8 + i);
626bf215546Sopenharmony_ci         if (instr->definitions.size() == 2)
627bf215546Sopenharmony_ci            encoding |= instr->definitions[1].physReg() << 8;
628bf215546Sopenharmony_ci         encoding |= (0xFF & instr->definitions[0].physReg());
629bf215546Sopenharmony_ci         out.push_back(encoding);
630bf215546Sopenharmony_ci         encoding = 0;
631bf215546Sopenharmony_ci         if (instr->opcode == aco_opcode::v_interp_mov_f32) {
632bf215546Sopenharmony_ci            encoding = 0x3 & instr->operands[0].constantValue();
633bf215546Sopenharmony_ci         } else if (instr->opcode == aco_opcode::v_writelane_b32_e64) {
634bf215546Sopenharmony_ci            encoding |= instr->operands[0].physReg() << 0;
635bf215546Sopenharmony_ci            encoding |= instr->operands[1].physReg() << 9;
636bf215546Sopenharmony_ci            /* Encoding src2 works fine with hardware but breaks some disassemblers. */
637bf215546Sopenharmony_ci         } else {
638bf215546Sopenharmony_ci            for (unsigned i = 0; i < instr->operands.size(); i++)
639bf215546Sopenharmony_ci               encoding |= instr->operands[i].physReg() << (i * 9);
640bf215546Sopenharmony_ci         }
641bf215546Sopenharmony_ci         encoding |= vop3.omod << 27;
642bf215546Sopenharmony_ci         for (unsigned i = 0; i < 3; i++)
643bf215546Sopenharmony_ci            encoding |= vop3.neg[i] << (29 + i);
644bf215546Sopenharmony_ci         out.push_back(encoding);
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci      } else if (instr->isVOP3P()) {
647bf215546Sopenharmony_ci         VOP3P_instruction& vop3 = instr->vop3p();
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci         uint32_t encoding;
650bf215546Sopenharmony_ci         if (ctx.gfx_level == GFX9) {
651bf215546Sopenharmony_ci            encoding = (0b110100111 << 23);
652bf215546Sopenharmony_ci         } else if (ctx.gfx_level >= GFX10) {
653bf215546Sopenharmony_ci            encoding = (0b110011 << 26);
654bf215546Sopenharmony_ci         } else {
655bf215546Sopenharmony_ci            unreachable("Unknown gfx_level.");
656bf215546Sopenharmony_ci         }
657bf215546Sopenharmony_ci
658bf215546Sopenharmony_ci         encoding |= opcode << 16;
659bf215546Sopenharmony_ci         encoding |= (vop3.clamp ? 1 : 0) << 15;
660bf215546Sopenharmony_ci         encoding |= vop3.opsel_lo << 11;
661bf215546Sopenharmony_ci         encoding |= ((vop3.opsel_hi & 0x4) ? 1 : 0) << 14;
662bf215546Sopenharmony_ci         for (unsigned i = 0; i < 3; i++)
663bf215546Sopenharmony_ci            encoding |= vop3.neg_hi[i] << (8 + i);
664bf215546Sopenharmony_ci         encoding |= (0xFF & instr->definitions[0].physReg());
665bf215546Sopenharmony_ci         out.push_back(encoding);
666bf215546Sopenharmony_ci         encoding = 0;
667bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->operands.size(); i++)
668bf215546Sopenharmony_ci            encoding |= instr->operands[i].physReg() << (i * 9);
669bf215546Sopenharmony_ci         encoding |= (vop3.opsel_hi & 0x3) << 27;
670bf215546Sopenharmony_ci         for (unsigned i = 0; i < 3; i++)
671bf215546Sopenharmony_ci            encoding |= vop3.neg_lo[i] << (29 + i);
672bf215546Sopenharmony_ci         out.push_back(encoding);
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_ci      } else if (instr->isDPP16()) {
675bf215546Sopenharmony_ci         assert(ctx.gfx_level >= GFX8);
676bf215546Sopenharmony_ci         DPP16_instruction& dpp = instr->dpp16();
677bf215546Sopenharmony_ci
678bf215546Sopenharmony_ci         /* first emit the instruction without the DPP operand */
679bf215546Sopenharmony_ci         Operand dpp_op = instr->operands[0];
680bf215546Sopenharmony_ci         instr->operands[0] = Operand(PhysReg{250}, v1);
681bf215546Sopenharmony_ci         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
682bf215546Sopenharmony_ci         emit_instruction(ctx, out, instr);
683bf215546Sopenharmony_ci         uint32_t encoding = (0xF & dpp.row_mask) << 28;
684bf215546Sopenharmony_ci         encoding |= (0xF & dpp.bank_mask) << 24;
685bf215546Sopenharmony_ci         encoding |= dpp.abs[1] << 23;
686bf215546Sopenharmony_ci         encoding |= dpp.neg[1] << 22;
687bf215546Sopenharmony_ci         encoding |= dpp.abs[0] << 21;
688bf215546Sopenharmony_ci         encoding |= dpp.neg[0] << 20;
689bf215546Sopenharmony_ci         if (ctx.gfx_level >= GFX10)
690bf215546Sopenharmony_ci            encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
691bf215546Sopenharmony_ci         encoding |= dpp.bound_ctrl << 19;
692bf215546Sopenharmony_ci         encoding |= dpp.dpp_ctrl << 8;
693bf215546Sopenharmony_ci         encoding |= (0xFF) & dpp_op.physReg();
694bf215546Sopenharmony_ci         out.push_back(encoding);
695bf215546Sopenharmony_ci         return;
696bf215546Sopenharmony_ci      } else if (instr->isDPP8()) {
697bf215546Sopenharmony_ci         assert(ctx.gfx_level >= GFX10);
698bf215546Sopenharmony_ci         DPP8_instruction& dpp = instr->dpp8();
699bf215546Sopenharmony_ci
700bf215546Sopenharmony_ci         /* first emit the instruction without the DPP operand */
701bf215546Sopenharmony_ci         Operand dpp_op = instr->operands[0];
702bf215546Sopenharmony_ci         instr->operands[0] = Operand(PhysReg{234}, v1);
703bf215546Sopenharmony_ci         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
704bf215546Sopenharmony_ci         emit_instruction(ctx, out, instr);
705bf215546Sopenharmony_ci         uint32_t encoding = (0xFF) & dpp_op.physReg();
706bf215546Sopenharmony_ci         for (unsigned i = 0; i < 8; ++i)
707bf215546Sopenharmony_ci            encoding |= dpp.lane_sel[i] << (8 + i * 3);
708bf215546Sopenharmony_ci         out.push_back(encoding);
709bf215546Sopenharmony_ci         return;
710bf215546Sopenharmony_ci      } else if (instr->isSDWA()) {
711bf215546Sopenharmony_ci         assert(ctx.gfx_level >= GFX8 && ctx.gfx_level < GFX11);
712bf215546Sopenharmony_ci         SDWA_instruction& sdwa = instr->sdwa();
713bf215546Sopenharmony_ci
714bf215546Sopenharmony_ci         /* first emit the instruction without the SDWA operand */
715bf215546Sopenharmony_ci         Operand sdwa_op = instr->operands[0];
716bf215546Sopenharmony_ci         instr->operands[0] = Operand(PhysReg{249}, v1);
717bf215546Sopenharmony_ci         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::SDWA);
718bf215546Sopenharmony_ci         emit_instruction(ctx, out, instr);
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_ci         uint32_t encoding = 0;
721bf215546Sopenharmony_ci
722bf215546Sopenharmony_ci         if (instr->isVOPC()) {
723bf215546Sopenharmony_ci            if (instr->definitions[0].physReg() != vcc) {
724bf215546Sopenharmony_ci               encoding |= instr->definitions[0].physReg() << 8;
725bf215546Sopenharmony_ci               encoding |= 1 << 15;
726bf215546Sopenharmony_ci            }
727bf215546Sopenharmony_ci            encoding |= (sdwa.clamp ? 1 : 0) << 13;
728bf215546Sopenharmony_ci         } else {
729bf215546Sopenharmony_ci            encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8;
730bf215546Sopenharmony_ci            uint32_t dst_u = sdwa.dst_sel.sign_extend() ? 1 : 0;
731bf215546Sopenharmony_ci            if (instr->definitions[0].bytes() < 4) /* dst_preserve */
732bf215546Sopenharmony_ci               dst_u = 2;
733bf215546Sopenharmony_ci            encoding |= dst_u << 11;
734bf215546Sopenharmony_ci            encoding |= (sdwa.clamp ? 1 : 0) << 13;
735bf215546Sopenharmony_ci            encoding |= sdwa.omod << 14;
736bf215546Sopenharmony_ci         }
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci         encoding |= sdwa.sel[0].to_sdwa_sel(sdwa_op.physReg().byte()) << 16;
739bf215546Sopenharmony_ci         encoding |= sdwa.sel[0].sign_extend() ? 1 << 19 : 0;
740bf215546Sopenharmony_ci         encoding |= sdwa.abs[0] << 21;
741bf215546Sopenharmony_ci         encoding |= sdwa.neg[0] << 20;
742bf215546Sopenharmony_ci
743bf215546Sopenharmony_ci         if (instr->operands.size() >= 2) {
744bf215546Sopenharmony_ci            encoding |= sdwa.sel[1].to_sdwa_sel(instr->operands[1].physReg().byte()) << 24;
745bf215546Sopenharmony_ci            encoding |= sdwa.sel[1].sign_extend() ? 1 << 27 : 0;
746bf215546Sopenharmony_ci            encoding |= sdwa.abs[1] << 29;
747bf215546Sopenharmony_ci            encoding |= sdwa.neg[1] << 28;
748bf215546Sopenharmony_ci         }
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci         encoding |= 0xFF & sdwa_op.physReg();
751bf215546Sopenharmony_ci         encoding |= (sdwa_op.physReg() < 256) << 23;
752bf215546Sopenharmony_ci         if (instr->operands.size() >= 2)
753bf215546Sopenharmony_ci            encoding |= (instr->operands[1].physReg() < 256) << 31;
754bf215546Sopenharmony_ci         out.push_back(encoding);
755bf215546Sopenharmony_ci      } else {
756bf215546Sopenharmony_ci         unreachable("unimplemented instruction format");
757bf215546Sopenharmony_ci      }
758bf215546Sopenharmony_ci      break;
759bf215546Sopenharmony_ci   }
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci   /* append literal dword */
762bf215546Sopenharmony_ci   for (const Operand& op : instr->operands) {
763bf215546Sopenharmony_ci      if (op.isLiteral()) {
764bf215546Sopenharmony_ci         out.push_back(op.constantValue());
765bf215546Sopenharmony_ci         break;
766bf215546Sopenharmony_ci      }
767bf215546Sopenharmony_ci   }
768bf215546Sopenharmony_ci}
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_civoid
771bf215546Sopenharmony_ciemit_block(asm_context& ctx, std::vector<uint32_t>& out, Block& block)
772bf215546Sopenharmony_ci{
773bf215546Sopenharmony_ci   for (aco_ptr<Instruction>& instr : block.instructions) {
774bf215546Sopenharmony_ci#if 0
775bf215546Sopenharmony_ci      int start_idx = out.size();
776bf215546Sopenharmony_ci      std::cerr << "Encoding:\t" << std::endl;
777bf215546Sopenharmony_ci      aco_print_instr(&*instr, stderr);
778bf215546Sopenharmony_ci      std::cerr << std::endl;
779bf215546Sopenharmony_ci#endif
780bf215546Sopenharmony_ci      emit_instruction(ctx, out, instr.get());
781bf215546Sopenharmony_ci#if 0
782bf215546Sopenharmony_ci      for (int i = start_idx; i < out.size(); i++)
783bf215546Sopenharmony_ci         std::cerr << "encoding: " << "0x" << std::setfill('0') << std::setw(8) << std::hex << out[i] << std::endl;
784bf215546Sopenharmony_ci#endif
785bf215546Sopenharmony_ci   }
786bf215546Sopenharmony_ci}
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_civoid
789bf215546Sopenharmony_cifix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
790bf215546Sopenharmony_ci{
791bf215546Sopenharmony_ci   bool exported = false;
792bf215546Sopenharmony_ci   for (Block& block : program->blocks) {
793bf215546Sopenharmony_ci      if (!(block.kind & block_kind_export_end))
794bf215546Sopenharmony_ci         continue;
795bf215546Sopenharmony_ci      std::vector<aco_ptr<Instruction>>::reverse_iterator it = block.instructions.rbegin();
796bf215546Sopenharmony_ci      while (it != block.instructions.rend()) {
797bf215546Sopenharmony_ci         if ((*it)->isEXP()) {
798bf215546Sopenharmony_ci            Export_instruction& exp = (*it)->exp();
799bf215546Sopenharmony_ci            if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) {
800bf215546Sopenharmony_ci               if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) {
801bf215546Sopenharmony_ci                  exp.done = true;
802bf215546Sopenharmony_ci                  exported = true;
803bf215546Sopenharmony_ci                  break;
804bf215546Sopenharmony_ci               }
805bf215546Sopenharmony_ci            } else {
806bf215546Sopenharmony_ci               if (!program->info.ps.has_epilog) {
807bf215546Sopenharmony_ci                  exp.done = true;
808bf215546Sopenharmony_ci                  exp.valid_mask = true;
809bf215546Sopenharmony_ci               }
810bf215546Sopenharmony_ci               exported = true;
811bf215546Sopenharmony_ci               break;
812bf215546Sopenharmony_ci            }
813bf215546Sopenharmony_ci         } else if ((*it)->definitions.size() && (*it)->definitions[0].physReg() == exec) {
814bf215546Sopenharmony_ci            break;
815bf215546Sopenharmony_ci         } else if ((*it)->opcode == aco_opcode::s_setpc_b64) {
816bf215546Sopenharmony_ci            /* Do not abort if the main FS has an epilog because it only
817bf215546Sopenharmony_ci             * exports MRTZ (if present) and the epilog exports colors.
818bf215546Sopenharmony_ci             */
819bf215546Sopenharmony_ci            exported |= program->stage.hw == HWStage::FS && program->info.ps.has_epilog;
820bf215546Sopenharmony_ci         }
821bf215546Sopenharmony_ci         ++it;
822bf215546Sopenharmony_ci      }
823bf215546Sopenharmony_ci   }
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_ci   if (!exported) {
826bf215546Sopenharmony_ci      /* Abort in order to avoid a GPU hang. */
827bf215546Sopenharmony_ci      bool is_vertex_or_ngg =
828bf215546Sopenharmony_ci         (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG);
829bf215546Sopenharmony_ci      aco_err(program,
830bf215546Sopenharmony_ci              "Missing export in %s shader:", is_vertex_or_ngg ? "vertex or NGG" : "fragment");
831bf215546Sopenharmony_ci      aco_print_program(program, stderr);
832bf215546Sopenharmony_ci      abort();
833bf215546Sopenharmony_ci   }
834bf215546Sopenharmony_ci}
835bf215546Sopenharmony_ci
836bf215546Sopenharmony_cistatic void
837bf215546Sopenharmony_ciinsert_code(asm_context& ctx, std::vector<uint32_t>& out, unsigned insert_before,
838bf215546Sopenharmony_ci            unsigned insert_count, const uint32_t* insert_data)
839bf215546Sopenharmony_ci{
840bf215546Sopenharmony_ci   out.insert(out.begin() + insert_before, insert_data, insert_data + insert_count);
841bf215546Sopenharmony_ci
842bf215546Sopenharmony_ci   /* Update the offset of each affected block */
843bf215546Sopenharmony_ci   for (Block& block : ctx.program->blocks) {
844bf215546Sopenharmony_ci      if (block.offset >= insert_before)
845bf215546Sopenharmony_ci         block.offset += insert_count;
846bf215546Sopenharmony_ci   }
847bf215546Sopenharmony_ci
848bf215546Sopenharmony_ci   /* Find first branch after the inserted code */
849bf215546Sopenharmony_ci   auto branch_it = std::find_if(ctx.branches.begin(), ctx.branches.end(),
850bf215546Sopenharmony_ci                                 [insert_before](const auto& branch) -> bool
851bf215546Sopenharmony_ci                                 { return (unsigned)branch.first >= insert_before; });
852bf215546Sopenharmony_ci
853bf215546Sopenharmony_ci   /* Update the locations of branches */
854bf215546Sopenharmony_ci   for (; branch_it != ctx.branches.end(); ++branch_it)
855bf215546Sopenharmony_ci      branch_it->first += insert_count;
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci   /* Update the locations of p_constaddr instructions */
858bf215546Sopenharmony_ci   for (auto& constaddr : ctx.constaddrs) {
859bf215546Sopenharmony_ci      constaddr_info& info = constaddr.second;
860bf215546Sopenharmony_ci      if (info.getpc_end >= insert_before)
861bf215546Sopenharmony_ci         info.getpc_end += insert_count;
862bf215546Sopenharmony_ci      if (info.add_literal >= insert_before)
863bf215546Sopenharmony_ci         info.add_literal += insert_count;
864bf215546Sopenharmony_ci   }
865bf215546Sopenharmony_ci}
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_cistatic void
868bf215546Sopenharmony_cifix_branches_gfx10(asm_context& ctx, std::vector<uint32_t>& out)
869bf215546Sopenharmony_ci{
870bf215546Sopenharmony_ci   /* Branches with an offset of 0x3f are buggy on GFX10,
871bf215546Sopenharmony_ci    * we workaround by inserting NOPs if needed.
872bf215546Sopenharmony_ci    */
873bf215546Sopenharmony_ci   bool gfx10_3f_bug = false;
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_ci   do {
876bf215546Sopenharmony_ci      auto buggy_branch_it = std::find_if(
877bf215546Sopenharmony_ci         ctx.branches.begin(), ctx.branches.end(),
878bf215546Sopenharmony_ci         [&ctx](const auto& branch) -> bool {
879bf215546Sopenharmony_ci            return ((int)ctx.program->blocks[branch.second->block].offset - branch.first - 1) ==
880bf215546Sopenharmony_ci                   0x3f;
881bf215546Sopenharmony_ci         });
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci      gfx10_3f_bug = buggy_branch_it != ctx.branches.end();
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_ci      if (gfx10_3f_bug) {
886bf215546Sopenharmony_ci         /* Insert an s_nop after the branch */
887bf215546Sopenharmony_ci         constexpr uint32_t s_nop_0 = 0xbf800000u;
888bf215546Sopenharmony_ci         insert_code(ctx, out, buggy_branch_it->first + 1, 1, &s_nop_0);
889bf215546Sopenharmony_ci      }
890bf215546Sopenharmony_ci   } while (gfx10_3f_bug);
891bf215546Sopenharmony_ci}
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_civoid
894bf215546Sopenharmony_ciemit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
895bf215546Sopenharmony_ci               std::vector<uint32_t>& out)
896bf215546Sopenharmony_ci{
897bf215546Sopenharmony_ci   Builder bld(ctx.program);
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci   Definition def_tmp_lo(branch->definitions[0].physReg(), s1);
900bf215546Sopenharmony_ci   Operand op_tmp_lo(branch->definitions[0].physReg(), s1);
901bf215546Sopenharmony_ci   Definition def_tmp_hi(branch->definitions[0].physReg().advance(4), s1);
902bf215546Sopenharmony_ci   Operand op_tmp_hi(branch->definitions[0].physReg().advance(4), s1);
903bf215546Sopenharmony_ci
904bf215546Sopenharmony_ci   aco_ptr<Instruction> instr;
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci   if (branch->opcode != aco_opcode::s_branch) {
907bf215546Sopenharmony_ci      /* for conditional branches, skip the long jump if the condition is false */
908bf215546Sopenharmony_ci      aco_opcode inv;
909bf215546Sopenharmony_ci      switch (branch->opcode) {
910bf215546Sopenharmony_ci      case aco_opcode::s_cbranch_scc0: inv = aco_opcode::s_cbranch_scc1; break;
911bf215546Sopenharmony_ci      case aco_opcode::s_cbranch_scc1: inv = aco_opcode::s_cbranch_scc0; break;
912bf215546Sopenharmony_ci      case aco_opcode::s_cbranch_vccz: inv = aco_opcode::s_cbranch_vccnz; break;
913bf215546Sopenharmony_ci      case aco_opcode::s_cbranch_vccnz: inv = aco_opcode::s_cbranch_vccz; break;
914bf215546Sopenharmony_ci      case aco_opcode::s_cbranch_execz: inv = aco_opcode::s_cbranch_execnz; break;
915bf215546Sopenharmony_ci      case aco_opcode::s_cbranch_execnz: inv = aco_opcode::s_cbranch_execz; break;
916bf215546Sopenharmony_ci      default: unreachable("Unhandled long jump.");
917bf215546Sopenharmony_ci      }
918bf215546Sopenharmony_ci      instr.reset(bld.sopp(inv, -1, 6));
919bf215546Sopenharmony_ci      emit_instruction(ctx, out, instr.get());
920bf215546Sopenharmony_ci   }
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci   /* create the new PC and stash SCC in the LSB */
923bf215546Sopenharmony_ci   instr.reset(bld.sop1(aco_opcode::s_getpc_b64, branch->definitions[0]).instr);
924bf215546Sopenharmony_ci   emit_instruction(ctx, out, instr.get());
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_ci   instr.reset(
927bf215546Sopenharmony_ci      bld.sop2(aco_opcode::s_addc_u32, def_tmp_lo, op_tmp_lo, Operand::literal32(0)).instr);
928bf215546Sopenharmony_ci   emit_instruction(ctx, out, instr.get());
929bf215546Sopenharmony_ci   branch->pass_flags = out.size();
930bf215546Sopenharmony_ci
931bf215546Sopenharmony_ci   /* s_addc_u32 for high 32 bits not needed because the program is in a 32-bit VA range */
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci   /* restore SCC and clear the LSB of the new PC */
934bf215546Sopenharmony_ci   instr.reset(bld.sopc(aco_opcode::s_bitcmp1_b32, def_tmp_lo, op_tmp_lo, Operand::zero()).instr);
935bf215546Sopenharmony_ci   emit_instruction(ctx, out, instr.get());
936bf215546Sopenharmony_ci   instr.reset(bld.sop1(aco_opcode::s_bitset0_b32, def_tmp_lo, Operand::zero()).instr);
937bf215546Sopenharmony_ci   emit_instruction(ctx, out, instr.get());
938bf215546Sopenharmony_ci
939bf215546Sopenharmony_ci   /* create the s_setpc_b64 to jump */
940bf215546Sopenharmony_ci   instr.reset(
941bf215546Sopenharmony_ci      bld.sop1(aco_opcode::s_setpc_b64, Operand(branch->definitions[0].physReg(), s2)).instr);
942bf215546Sopenharmony_ci   emit_instruction(ctx, out, instr.get());
943bf215546Sopenharmony_ci}
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_civoid
946bf215546Sopenharmony_cifix_branches(asm_context& ctx, std::vector<uint32_t>& out)
947bf215546Sopenharmony_ci{
948bf215546Sopenharmony_ci   bool repeat = false;
949bf215546Sopenharmony_ci   do {
950bf215546Sopenharmony_ci      repeat = false;
951bf215546Sopenharmony_ci
952bf215546Sopenharmony_ci      if (ctx.gfx_level == GFX10)
953bf215546Sopenharmony_ci         fix_branches_gfx10(ctx, out);
954bf215546Sopenharmony_ci
955bf215546Sopenharmony_ci      for (std::pair<int, SOPP_instruction*>& branch : ctx.branches) {
956bf215546Sopenharmony_ci         int offset = (int)ctx.program->blocks[branch.second->block].offset - branch.first - 1;
957bf215546Sopenharmony_ci         if ((offset < INT16_MIN || offset > INT16_MAX) && !branch.second->pass_flags) {
958bf215546Sopenharmony_ci            std::vector<uint32_t> long_jump;
959bf215546Sopenharmony_ci            bool backwards =
960bf215546Sopenharmony_ci               ctx.program->blocks[branch.second->block].offset < (unsigned)branch.first;
961bf215546Sopenharmony_ci            emit_long_jump(ctx, branch.second, backwards, long_jump);
962bf215546Sopenharmony_ci
963bf215546Sopenharmony_ci            out[branch.first] = long_jump[0];
964bf215546Sopenharmony_ci            insert_code(ctx, out, branch.first + 1, long_jump.size() - 1, long_jump.data() + 1);
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_ci            repeat = true;
967bf215546Sopenharmony_ci            break;
968bf215546Sopenharmony_ci         }
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci         if (branch.second->pass_flags) {
971bf215546Sopenharmony_ci            int after_getpc = branch.first + branch.second->pass_flags - 2;
972bf215546Sopenharmony_ci            offset = (int)ctx.program->blocks[branch.second->block].offset - after_getpc;
973bf215546Sopenharmony_ci            out[branch.first + branch.second->pass_flags - 1] = offset * 4;
974bf215546Sopenharmony_ci         } else {
975bf215546Sopenharmony_ci            out[branch.first] &= 0xffff0000u;
976bf215546Sopenharmony_ci            out[branch.first] |= (uint16_t)offset;
977bf215546Sopenharmony_ci         }
978bf215546Sopenharmony_ci      }
979bf215546Sopenharmony_ci   } while (repeat);
980bf215546Sopenharmony_ci}
981bf215546Sopenharmony_ci
982bf215546Sopenharmony_civoid
983bf215546Sopenharmony_cifix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
984bf215546Sopenharmony_ci{
985bf215546Sopenharmony_ci   for (auto& constaddr : ctx.constaddrs) {
986bf215546Sopenharmony_ci      constaddr_info& info = constaddr.second;
987bf215546Sopenharmony_ci      out[info.add_literal] += (out.size() - info.getpc_end) * 4u;
988bf215546Sopenharmony_ci   }
989bf215546Sopenharmony_ci}
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ciunsigned
992bf215546Sopenharmony_ciemit_program(Program* program, std::vector<uint32_t>& code)
993bf215546Sopenharmony_ci{
994bf215546Sopenharmony_ci   asm_context ctx(program);
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci   if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::FS ||
997bf215546Sopenharmony_ci       program->stage.hw == HWStage::NGG)
998bf215546Sopenharmony_ci      fix_exports(ctx, code, program);
999bf215546Sopenharmony_ci
1000bf215546Sopenharmony_ci   for (Block& block : program->blocks) {
1001bf215546Sopenharmony_ci      block.offset = code.size();
1002bf215546Sopenharmony_ci      emit_block(ctx, code, block);
1003bf215546Sopenharmony_ci   }
1004bf215546Sopenharmony_ci
1005bf215546Sopenharmony_ci   fix_branches(ctx, code);
1006bf215546Sopenharmony_ci
1007bf215546Sopenharmony_ci   unsigned exec_size = code.size() * sizeof(uint32_t);
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci   if (program->gfx_level >= GFX10) {
1010bf215546Sopenharmony_ci      /* Pad output with s_code_end so instruction prefetching doesn't cause
1011bf215546Sopenharmony_ci       * page faults */
1012bf215546Sopenharmony_ci      unsigned final_size = align(code.size() + 3 * 16, 16);
1013bf215546Sopenharmony_ci      while (code.size() < final_size)
1014bf215546Sopenharmony_ci         code.push_back(0xbf9f0000u);
1015bf215546Sopenharmony_ci   }
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_ci   fix_constaddrs(ctx, code);
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_ci   while (program->constant_data.size() % 4u)
1020bf215546Sopenharmony_ci      program->constant_data.push_back(0);
1021bf215546Sopenharmony_ci   /* Copy constant data */
1022bf215546Sopenharmony_ci   code.insert(code.end(), (uint32_t*)program->constant_data.data(),
1023bf215546Sopenharmony_ci               (uint32_t*)(program->constant_data.data() + program->constant_data.size()));
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci   return exec_size;
1026bf215546Sopenharmony_ci}
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_ci} // namespace aco
1029