1bf215546Sopenharmony_ci/* -*- mesa-c++  -*-
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright (c) 2022 Collabora LTD
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Author: Gert Wollny <gert.wollny@collabora.com>
6bf215546Sopenharmony_ci *
7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
8bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
9bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
10bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
11bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
12bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
15bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
16bf215546Sopenharmony_ci * Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "sfn_assembler.h"
28bf215546Sopenharmony_ci#include "sfn_debug.h"
29bf215546Sopenharmony_ci#include "sfn_instr_alugroup.h"
30bf215546Sopenharmony_ci#include "sfn_instr_controlflow.h"
31bf215546Sopenharmony_ci#include "sfn_instr_fetch.h"
32bf215546Sopenharmony_ci#include "sfn_instr_export.h"
33bf215546Sopenharmony_ci#include "sfn_instr_mem.h"
34bf215546Sopenharmony_ci#include "sfn_instr_tex.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include "sfn_conditionaljumptracker.h"
37bf215546Sopenharmony_ci#include "sfn_callstack.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci#include "../eg_sq.h"
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_cinamespace r600 {
42bf215546Sopenharmony_ciAssembler::Assembler(r600_shader *sh, const r600_shader_key& key):
43bf215546Sopenharmony_ci   m_sh(sh), m_key(key)
44bf215546Sopenharmony_ci{
45bf215546Sopenharmony_ci}
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ciextern const std::map<ESDOp, int> ds_opcode_map;
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ciclass AssamblerVisitor : public ConstInstrVisitor {
50bf215546Sopenharmony_cipublic:
51bf215546Sopenharmony_ci   AssamblerVisitor(r600_shader *sh, const r600_shader_key& key);
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   void visit(const AluInstr& instr) override;
54bf215546Sopenharmony_ci   void visit(const AluGroup& instr) override;
55bf215546Sopenharmony_ci   void visit(const TexInstr& instr) override;
56bf215546Sopenharmony_ci   void visit(const ExportInstr& instr) override;
57bf215546Sopenharmony_ci   void visit(const FetchInstr& instr) override;
58bf215546Sopenharmony_ci   void visit(const Block& instr) override;
59bf215546Sopenharmony_ci   void visit(const IfInstr& instr) override;
60bf215546Sopenharmony_ci   void visit(const ControlFlowInstr& instr) override;
61bf215546Sopenharmony_ci   void visit(const ScratchIOInstr& instr) override;
62bf215546Sopenharmony_ci   void visit(const StreamOutInstr& instr) override;
63bf215546Sopenharmony_ci   void visit(const MemRingOutInstr& instr) override;
64bf215546Sopenharmony_ci   void visit(const EmitVertexInstr& instr) override;
65bf215546Sopenharmony_ci   void visit(const GDSInstr& instr) override;
66bf215546Sopenharmony_ci   void visit(const WriteTFInstr& instr) override;
67bf215546Sopenharmony_ci   void visit(const LDSAtomicInstr& instr) override;
68bf215546Sopenharmony_ci   void visit(const LDSReadInstr& instr) override;
69bf215546Sopenharmony_ci   void visit(const RatInstr& instr) override;
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci   void finalize();
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci   const uint32_t sf_vtx = 1;
74bf215546Sopenharmony_ci   const uint32_t sf_tex = 2;
75bf215546Sopenharmony_ci   const uint32_t sf_alu = 4;
76bf215546Sopenharmony_ci   const uint32_t sf_addr_register = 8;
77bf215546Sopenharmony_ci   const uint32_t sf_all = 0xf;
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci   void clear_states(const uint32_t& states);
80bf215546Sopenharmony_ci   bool copy_dst(r600_bytecode_alu_dst& dst, const Register& d, bool write);
81bf215546Sopenharmony_ci   PVirtualValue copy_src(r600_bytecode_alu_src& src, const VirtualValue& s);
82bf215546Sopenharmony_ci
83bf215546Sopenharmony_ci   EBufferIndexMode
84bf215546Sopenharmony_ci   emit_index_reg(const VirtualValue& addr, unsigned idx);
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   void emit_endif();
87bf215546Sopenharmony_ci   void emit_else();
88bf215546Sopenharmony_ci   void emit_loop_begin(bool vpm);
89bf215546Sopenharmony_ci   void emit_loop_end();
90bf215546Sopenharmony_ci   void emit_loop_break();
91bf215546Sopenharmony_ci   void emit_loop_cont();
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci   void emit_alu_op(const AluInstr& ai);
94bf215546Sopenharmony_ci   void emit_lds_op(const AluInstr& lds);
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci   void emit_wait_ack();
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   /* Start initialized in constructor */
99bf215546Sopenharmony_ci   const r600_shader_key& m_key;
100bf215546Sopenharmony_ci   r600_shader *m_shader;
101bf215546Sopenharmony_ci   r600_bytecode *m_bc;
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   ConditionalJumpTracker m_jump_tracker;
104bf215546Sopenharmony_ci   CallStack m_callstack;
105bf215546Sopenharmony_ci   bool ps_alpha_to_one;
106bf215546Sopenharmony_ci   /* End initialized in constructor */
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci   std::set<uint32_t> m_nliterals_in_group;
109bf215546Sopenharmony_ci   std::set<int> vtx_fetch_results;
110bf215546Sopenharmony_ci   std::set<int> tex_fetch_results;
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci   PRegister m_last_addr{nullptr};
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci   unsigned m_max_color_exports{0};
115bf215546Sopenharmony_ci   int m_loop_nesting{0};
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci   bool m_ack_suggested{false};
118bf215546Sopenharmony_ci   bool m_has_param_output{false};
119bf215546Sopenharmony_ci   bool m_has_pos_output{false};
120bf215546Sopenharmony_ci   bool m_last_op_was_barrier{false};
121bf215546Sopenharmony_ci   bool m_result{true};
122bf215546Sopenharmony_ci};
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_cibool Assembler::lower(Shader *shader)
125bf215546Sopenharmony_ci{
126bf215546Sopenharmony_ci   AssamblerVisitor ass(m_sh, m_key);
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci   auto& blocks = shader->func();
129bf215546Sopenharmony_ci   for (auto b : blocks) {
130bf215546Sopenharmony_ci      b->accept(ass);
131bf215546Sopenharmony_ci      if (!ass.m_result)
132bf215546Sopenharmony_ci         return false;
133bf215546Sopenharmony_ci   }
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci   ass.finalize();
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   return ass.m_result;
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci}
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ciAssamblerVisitor::AssamblerVisitor(r600_shader *sh, const r600_shader_key& key):
142bf215546Sopenharmony_ci   m_key(key),
143bf215546Sopenharmony_ci   m_shader(sh),
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci   m_bc(&sh->bc),
146bf215546Sopenharmony_ci   m_callstack(sh->bc),
147bf215546Sopenharmony_ci   ps_alpha_to_one(key.ps.alpha_to_one)
148bf215546Sopenharmony_ci{
149bf215546Sopenharmony_ci   if (m_shader->processor_type == PIPE_SHADER_FRAGMENT)
150bf215546Sopenharmony_ci      m_max_color_exports = MAX2(m_key.ps.nr_cbufs, 1);
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   if (m_shader->processor_type == PIPE_SHADER_VERTEX &&
153bf215546Sopenharmony_ci       m_shader->ninput > 0)
154bf215546Sopenharmony_ci         r600_bytecode_add_cfinst(m_bc, CF_OP_CALL_FS);
155bf215546Sopenharmony_ci}
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_civoid AssamblerVisitor::finalize()
158bf215546Sopenharmony_ci{
159bf215546Sopenharmony_ci   const struct cf_op_info *last = nullptr;
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   if (m_bc->cf_last)
162bf215546Sopenharmony_ci      last = r600_isa_cf(m_bc->cf_last->op);
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci   /* alu clause instructions don't have EOP bit, so add NOP */
165bf215546Sopenharmony_ci   if (m_shader->bc.gfx_level < CAYMAN &&
166bf215546Sopenharmony_ci       (!last || last->flags & CF_ALU || m_bc->cf_last->op == CF_OP_LOOP_END
167bf215546Sopenharmony_ci       || m_bc->cf_last->op == CF_OP_POP))
168bf215546Sopenharmony_ci      r600_bytecode_add_cfinst(m_bc, CF_OP_NOP);
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci   /* A fetch shader only can't be EOP (results in hang), but we can replace it
171bf215546Sopenharmony_ci        * by a NOP */
172bf215546Sopenharmony_ci   else if (last && m_bc->cf_last->op == CF_OP_CALL_FS)
173bf215546Sopenharmony_ci      m_bc->cf_last->op = CF_OP_NOP;
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   if (m_shader->bc.gfx_level != CAYMAN)
176bf215546Sopenharmony_ci      m_bc->cf_last->end_of_program = 1;
177bf215546Sopenharmony_ci   else
178bf215546Sopenharmony_ci      cm_bytecode_add_cf_end(m_bc);
179bf215546Sopenharmony_ci}
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ciextern const std::map<EAluOp, int> opcode_map;
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_civoid AssamblerVisitor::visit(const AluInstr& ai)
184bf215546Sopenharmony_ci{
185bf215546Sopenharmony_ci   assert(vtx_fetch_results.empty());
186bf215546Sopenharmony_ci   assert(tex_fetch_results.empty());
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci   if (unlikely(ai.has_alu_flag(alu_is_lds)))
189bf215546Sopenharmony_ci      emit_lds_op(ai);
190bf215546Sopenharmony_ci   else
191bf215546Sopenharmony_ci      emit_alu_op(ai);
192bf215546Sopenharmony_ci}
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_civoid AssamblerVisitor::emit_lds_op(const AluInstr& lds)
195bf215546Sopenharmony_ci{
196bf215546Sopenharmony_ci   struct r600_bytecode_alu alu;
197bf215546Sopenharmony_ci   memset(&alu, 0, sizeof(alu));
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   alu.is_lds_idx_op = true;
200bf215546Sopenharmony_ci   alu.op = lds.lds_opcode();
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   bool has_lds_fetch = false;
203bf215546Sopenharmony_ci   switch (alu.op) {
204bf215546Sopenharmony_ci   case LDS_WRITE:
205bf215546Sopenharmony_ci      alu.op =LDS_OP2_LDS_WRITE;
206bf215546Sopenharmony_ci      break;
207bf215546Sopenharmony_ci   case LDS_WRITE_REL:
208bf215546Sopenharmony_ci      alu.op = LDS_OP3_LDS_WRITE_REL;
209bf215546Sopenharmony_ci      alu.lds_idx = 1;
210bf215546Sopenharmony_ci      break;
211bf215546Sopenharmony_ci   case DS_OP_READ_RET:
212bf215546Sopenharmony_ci      alu.op = LDS_OP1_LDS_READ_RET;
213bf215546Sopenharmony_ci      FALLTHROUGH;
214bf215546Sopenharmony_ci   case LDS_ADD_RET:
215bf215546Sopenharmony_ci   case LDS_AND_RET:
216bf215546Sopenharmony_ci   case LDS_OR_RET:
217bf215546Sopenharmony_ci   case LDS_MAX_INT_RET:
218bf215546Sopenharmony_ci   case LDS_MAX_UINT_RET:
219bf215546Sopenharmony_ci   case LDS_MIN_INT_RET:
220bf215546Sopenharmony_ci   case LDS_MIN_UINT_RET:
221bf215546Sopenharmony_ci   case LDS_XOR_RET:
222bf215546Sopenharmony_ci   case LDS_XCHG_RET:
223bf215546Sopenharmony_ci   case LDS_CMP_XCHG_RET:
224bf215546Sopenharmony_ci      has_lds_fetch = true;
225bf215546Sopenharmony_ci      break;
226bf215546Sopenharmony_ci   case LDS_ADD:
227bf215546Sopenharmony_ci   case LDS_AND:
228bf215546Sopenharmony_ci   case LDS_OR:
229bf215546Sopenharmony_ci   case LDS_MAX_INT:
230bf215546Sopenharmony_ci   case LDS_MAX_UINT:
231bf215546Sopenharmony_ci   case LDS_MIN_INT:
232bf215546Sopenharmony_ci   case LDS_MIN_UINT:
233bf215546Sopenharmony_ci   case LDS_XOR:
234bf215546Sopenharmony_ci      break;
235bf215546Sopenharmony_ci   default:
236bf215546Sopenharmony_ci      std::cerr << "\n R600: error op: " << lds << "\n";
237bf215546Sopenharmony_ci      unreachable("Unhandled LDS op");
238bf215546Sopenharmony_ci   }
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   copy_src(alu.src[0], lds.src(0));
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci   if (lds.n_sources() > 1)
243bf215546Sopenharmony_ci      copy_src(alu.src[1], lds.src(1));
244bf215546Sopenharmony_ci   else
245bf215546Sopenharmony_ci      alu.src[1].sel = V_SQ_ALU_SRC_0;
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci   if (lds.n_sources() > 2)
248bf215546Sopenharmony_ci      copy_src(alu.src[2], lds.src(2));
249bf215546Sopenharmony_ci   else
250bf215546Sopenharmony_ci      alu.src[2].sel = V_SQ_ALU_SRC_0;
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   alu.last = lds.has_alu_flag(alu_last_instr);
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   int r = r600_bytecode_add_alu(m_bc, &alu);
255bf215546Sopenharmony_ci   if (has_lds_fetch)
256bf215546Sopenharmony_ci      m_bc->cf_last->nlds_read++;
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   if (r)
259bf215546Sopenharmony_ci      m_result = false;
260bf215546Sopenharmony_ci}
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_civoid AssamblerVisitor::emit_alu_op(const AluInstr& ai)
263bf215546Sopenharmony_ci{
264bf215546Sopenharmony_ci   struct r600_bytecode_alu alu;
265bf215546Sopenharmony_ci   memset(&alu, 0, sizeof(alu));
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci   if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
268bf215546Sopenharmony_ci      std::cerr << "Opcode not handled for " << ai <<"\n";
269bf215546Sopenharmony_ci      m_result = false;
270bf215546Sopenharmony_ci      return;
271bf215546Sopenharmony_ci   }
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci   // skip multiple barriers
274bf215546Sopenharmony_ci   if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier)
275bf215546Sopenharmony_ci      return;
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci   m_last_op_was_barrier = ai.opcode() == op0_group_barrier;
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci   alu.op = opcode_map.at(ai.opcode());
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci   auto dst = ai.dest();
282bf215546Sopenharmony_ci   if (dst) {
283bf215546Sopenharmony_ci      if (!copy_dst(alu.dst, *dst, ai.has_alu_flag(alu_write))) {
284bf215546Sopenharmony_ci         m_result = false;
285bf215546Sopenharmony_ci         return;
286bf215546Sopenharmony_ci      }
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci      alu.dst.write = ai.has_alu_flag(alu_write);
289bf215546Sopenharmony_ci      alu.dst.clamp = ai.has_alu_flag(alu_dst_clamp);
290bf215546Sopenharmony_ci      alu.dst.rel = dst->addr() ? 1 : 0;
291bf215546Sopenharmony_ci   } else {
292bf215546Sopenharmony_ci      alu.dst.chan = ai.dest_chan();
293bf215546Sopenharmony_ci   }
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   alu.is_op3 = ai.n_sources() == 3;
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci   EBufferIndexMode kcache_index_mode = bim_none;
298bf215546Sopenharmony_ci   PVirtualValue buffer_offset = nullptr;
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   for (unsigned i = 0; i < ai.n_sources(); ++i) {
301bf215546Sopenharmony_ci      buffer_offset = copy_src(alu.src[i], ai.src(i));
302bf215546Sopenharmony_ci      alu.src[i].neg = ai.has_alu_flag(AluInstr::src_neg_flags[i]);
303bf215546Sopenharmony_ci      if (!alu.is_op3)
304bf215546Sopenharmony_ci         alu.src[i].abs = ai.has_alu_flag(AluInstr::src_abs_flags[i]);
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci      if (buffer_offset && kcache_index_mode == bim_none) {
307bf215546Sopenharmony_ci         kcache_index_mode = bim_zero;
308bf215546Sopenharmony_ci         alu.src[i].kc_bank = 1;
309bf215546Sopenharmony_ci         alu.src[i].kc_rel = 1;
310bf215546Sopenharmony_ci      }
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci      if (ai.has_lds_queue_read()) {
313bf215546Sopenharmony_ci         assert(m_bc->cf_last->nlds_read > 0);
314bf215546Sopenharmony_ci         m_bc->cf_last->nlds_read--;
315bf215546Sopenharmony_ci      }
316bf215546Sopenharmony_ci   }
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   if (ai.bank_swizzle() != alu_vec_unknown)
319bf215546Sopenharmony_ci      alu.bank_swizzle_force = ai.bank_swizzle();
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci   alu.last = ai.has_alu_flag(alu_last_instr);
322bf215546Sopenharmony_ci   alu.execute_mask = ai.has_alu_flag(alu_update_exec);
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci   /* If the destination register is equal to the last loaded address register
325bf215546Sopenharmony_ci    * then clear the latter one, because the values will no longer be identical */
326bf215546Sopenharmony_ci   if (m_last_addr)
327bf215546Sopenharmony_ci      sfn_log << SfnLog::assembly << "  Current address register is " << *m_last_addr << "\n";
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ci   if (dst)
330bf215546Sopenharmony_ci      sfn_log << SfnLog::assembly << "  Current dst register is " << *dst << "\n";
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci   if (dst && m_last_addr && *dst == *m_last_addr) {
333bf215546Sopenharmony_ci      sfn_log << SfnLog::assembly << "  Clear address register (was " << *m_last_addr << "\n";
334bf215546Sopenharmony_ci      m_last_addr = nullptr;
335bf215546Sopenharmony_ci   }
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci   auto cf_op = ai.cf_type();
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci   unsigned type = 0;
340bf215546Sopenharmony_ci   switch (cf_op) {
341bf215546Sopenharmony_ci   case cf_alu: type = CF_OP_ALU; break;
342bf215546Sopenharmony_ci   case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
343bf215546Sopenharmony_ci   case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
344bf215546Sopenharmony_ci   case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
345bf215546Sopenharmony_ci   case cf_alu_break: type = CF_OP_ALU_BREAK; break;
346bf215546Sopenharmony_ci   case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
347bf215546Sopenharmony_ci   case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
348bf215546Sopenharmony_ci   case cf_alu_extended: type = CF_OP_ALU_EXT; break;
349bf215546Sopenharmony_ci   default:
350bf215546Sopenharmony_ci      assert(0 && "cf_alu_undefined should have been replaced");
351bf215546Sopenharmony_ci   }
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   if (alu.last)
354bf215546Sopenharmony_ci      m_nliterals_in_group.clear();
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci   m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type);
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   if (ai.opcode() == op1_mova_int)
360bf215546Sopenharmony_ci      m_bc->ar_loaded = 0;
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci   if (ai.opcode() == op1_set_cf_idx0)
363bf215546Sopenharmony_ci      m_bc->index_loaded[0] = 1;
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_ci   if (ai.opcode() == op1_set_cf_idx1)
366bf215546Sopenharmony_ci      m_bc->index_loaded[1] = 1;
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci   m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
369bf215546Sopenharmony_ci                          ai.opcode() == op2_killne_int ||
370bf215546Sopenharmony_ci                          ai.opcode() == op1_set_cf_idx0 ||
371bf215546Sopenharmony_ci                          ai.opcode() == op1_set_cf_idx1);
372bf215546Sopenharmony_ci}
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_civoid AssamblerVisitor::visit(const AluGroup& group)
375bf215546Sopenharmony_ci{
376bf215546Sopenharmony_ci   clear_states(sf_vtx | sf_tex);
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci   if (group.slots() == 0)
379bf215546Sopenharmony_ci      return;
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci   if (group.has_lds_group_start()) {
382bf215546Sopenharmony_ci      if (m_bc->cf_last->ndw + 2 * (*group.begin())->required_slots() > 220) {
383bf215546Sopenharmony_ci         assert(m_bc->cf_last->nlds_read == 0);
384bf215546Sopenharmony_ci         m_bc->force_add_cf = 1;
385bf215546Sopenharmony_ci         m_last_addr = nullptr;
386bf215546Sopenharmony_ci      }
387bf215546Sopenharmony_ci   } else if (m_bc->cf_last) {
388bf215546Sopenharmony_ci      if (m_bc->cf_last->ndw + 2 * group.slots() > 240) {
389bf215546Sopenharmony_ci         assert(m_bc->cf_last->nlds_read == 0);
390bf215546Sopenharmony_ci         m_bc->force_add_cf = 1;
391bf215546Sopenharmony_ci         m_last_addr = nullptr;
392bf215546Sopenharmony_ci      } else {
393bf215546Sopenharmony_ci         auto instr = *group.begin();
394bf215546Sopenharmony_ci         if (instr &&
395bf215546Sopenharmony_ci             !instr->has_alu_flag(alu_is_lds) &&
396bf215546Sopenharmony_ci             instr->opcode() == op0_group_barrier &&
397bf215546Sopenharmony_ci             m_bc->cf_last->ndw + 14 > 240) {
398bf215546Sopenharmony_ci            assert(m_bc->cf_last->nlds_read == 0);
399bf215546Sopenharmony_ci            m_bc->force_add_cf = 1;
400bf215546Sopenharmony_ci            m_last_addr = nullptr;
401bf215546Sopenharmony_ci         }
402bf215546Sopenharmony_ci      }
403bf215546Sopenharmony_ci   }
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci   auto addr = group.addr();
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci   if (addr.first) {
408bf215546Sopenharmony_ci      if (!addr.second) {
409bf215546Sopenharmony_ci         if (!m_last_addr || !m_bc->ar_loaded ||
410bf215546Sopenharmony_ci             !m_last_addr->equal_to(*addr.first)) {
411bf215546Sopenharmony_ci            m_bc->ar_reg = addr.first->sel();
412bf215546Sopenharmony_ci            m_bc->ar_chan = addr.first->chan();
413bf215546Sopenharmony_ci            m_last_addr = addr.first;
414bf215546Sopenharmony_ci            m_bc->ar_loaded = 0;
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci            r600_load_ar(m_bc, group.addr_for_src());
417bf215546Sopenharmony_ci         }
418bf215546Sopenharmony_ci      } else {
419bf215546Sopenharmony_ci         emit_index_reg(*addr.first, 0);
420bf215546Sopenharmony_ci      }
421bf215546Sopenharmony_ci   }
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci   for (auto& i : group) {
424bf215546Sopenharmony_ci      if (i)
425bf215546Sopenharmony_ci         i->accept(*this);
426bf215546Sopenharmony_ci   }
427bf215546Sopenharmony_ci}
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_civoid AssamblerVisitor::visit(const TexInstr& tex_instr)
430bf215546Sopenharmony_ci{
431bf215546Sopenharmony_ci   clear_states(sf_vtx | sf_alu);
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci   int sampler_offset = 0;
434bf215546Sopenharmony_ci   auto addr = tex_instr.sampler_offset();
435bf215546Sopenharmony_ci   EBufferIndexMode index_mode = bim_none;
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   if (addr)
438bf215546Sopenharmony_ci      index_mode = emit_index_reg(*addr, 1);
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci   if (tex_fetch_results.find(tex_instr.src().sel()) !=
441bf215546Sopenharmony_ci       tex_fetch_results.end()) {
442bf215546Sopenharmony_ci      m_bc->force_add_cf = 1;
443bf215546Sopenharmony_ci      tex_fetch_results.clear();
444bf215546Sopenharmony_ci   }
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   r600_bytecode_tex tex;
447bf215546Sopenharmony_ci   memset(&tex, 0, sizeof(struct r600_bytecode_tex));
448bf215546Sopenharmony_ci   tex.op = tex_instr.opcode();
449bf215546Sopenharmony_ci   tex.sampler_id = tex_instr.sampler_id() + sampler_offset;
450bf215546Sopenharmony_ci   tex.resource_id = tex_instr.resource_id() + sampler_offset;
451bf215546Sopenharmony_ci   tex.src_gpr = tex_instr.src().sel();
452bf215546Sopenharmony_ci   tex.dst_gpr = tex_instr.dst().sel();
453bf215546Sopenharmony_ci   tex.dst_sel_x = tex_instr.dest_swizzle(0);
454bf215546Sopenharmony_ci   tex.dst_sel_y = tex_instr.dest_swizzle(1);
455bf215546Sopenharmony_ci   tex.dst_sel_z = tex_instr.dest_swizzle(2);
456bf215546Sopenharmony_ci   tex.dst_sel_w = tex_instr.dest_swizzle(3);
457bf215546Sopenharmony_ci   tex.src_sel_x = tex_instr.src()[0]->chan();
458bf215546Sopenharmony_ci   tex.src_sel_y = tex_instr.src()[1]->chan();
459bf215546Sopenharmony_ci   tex.src_sel_z = tex_instr.src()[2]->chan();
460bf215546Sopenharmony_ci   tex.src_sel_w = tex_instr.src()[3]->chan();
461bf215546Sopenharmony_ci   tex.coord_type_x = !tex_instr.has_tex_flag(TexInstr::x_unnormalized);
462bf215546Sopenharmony_ci   tex.coord_type_y = !tex_instr.has_tex_flag(TexInstr::y_unnormalized);
463bf215546Sopenharmony_ci   tex.coord_type_z = !tex_instr.has_tex_flag(TexInstr::z_unnormalized);
464bf215546Sopenharmony_ci   tex.coord_type_w = !tex_instr.has_tex_flag(TexInstr::w_unnormalized);
465bf215546Sopenharmony_ci   tex.offset_x = tex_instr.get_offset(0);
466bf215546Sopenharmony_ci   tex.offset_y = tex_instr.get_offset(1);
467bf215546Sopenharmony_ci   tex.offset_z = tex_instr.get_offset(2);
468bf215546Sopenharmony_ci   tex.resource_index_mode = index_mode;
469bf215546Sopenharmony_ci   tex.sampler_index_mode = index_mode;
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci   if (tex.dst_sel_x < 4 &&
472bf215546Sopenharmony_ci       tex.dst_sel_y < 4 &&
473bf215546Sopenharmony_ci       tex.dst_sel_z < 4 &&
474bf215546Sopenharmony_ci       tex.dst_sel_w < 4)
475bf215546Sopenharmony_ci      tex_fetch_results.insert(tex.dst_gpr);
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci   if (tex_instr.opcode() == TexInstr::get_gradient_h ||
478bf215546Sopenharmony_ci       tex_instr.opcode() == TexInstr::get_gradient_v)
479bf215546Sopenharmony_ci      tex.inst_mod = tex_instr.has_tex_flag(TexInstr::grad_fine) ? 1 : 0;
480bf215546Sopenharmony_ci   else
481bf215546Sopenharmony_ci      tex.inst_mod = tex_instr.inst_mode();
482bf215546Sopenharmony_ci   if (r600_bytecode_add_tex(m_bc, &tex)) {
483bf215546Sopenharmony_ci      R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
484bf215546Sopenharmony_ci      m_result = false;
485bf215546Sopenharmony_ci   }
486bf215546Sopenharmony_ci}
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_civoid AssamblerVisitor::visit(const ExportInstr& exi)
489bf215546Sopenharmony_ci{
490bf215546Sopenharmony_ci   const auto& value = exi.value();
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci   r600_bytecode_output output;
493bf215546Sopenharmony_ci   memset(&output, 0, sizeof(output));
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci   output.gpr = value.sel();
496bf215546Sopenharmony_ci   output.elem_size = 3;
497bf215546Sopenharmony_ci   output.swizzle_x = value[0]->chan();
498bf215546Sopenharmony_ci   output.swizzle_y = value[1]->chan();
499bf215546Sopenharmony_ci   output.swizzle_z = value[2]->chan();
500bf215546Sopenharmony_ci   output.burst_count = 1;
501bf215546Sopenharmony_ci   output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
502bf215546Sopenharmony_ci   output.type = exi.export_type();
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci   clear_states(sf_all);
506bf215546Sopenharmony_ci   switch (exi.export_type()) {
507bf215546Sopenharmony_ci   case ExportInstr::pixel:
508bf215546Sopenharmony_ci      output.swizzle_w = ps_alpha_to_one ? 5 : exi.value()[3]->chan();
509bf215546Sopenharmony_ci      output.array_base = exi.location();
510bf215546Sopenharmony_ci   break;
511bf215546Sopenharmony_ci   case ExportInstr::pos:
512bf215546Sopenharmony_ci      output.swizzle_w = exi.value()[3]->chan();
513bf215546Sopenharmony_ci      output.array_base = 60 + exi.location();
514bf215546Sopenharmony_ci   break;
515bf215546Sopenharmony_ci   case ExportInstr::param:
516bf215546Sopenharmony_ci      output.swizzle_w = exi.value()[3]->chan();
517bf215546Sopenharmony_ci      output.array_base = exi.location();
518bf215546Sopenharmony_ci   break;
519bf215546Sopenharmony_ci   default:
520bf215546Sopenharmony_ci      R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
521bf215546Sopenharmony_ci      m_result = false;
522bf215546Sopenharmony_ci   }
523bf215546Sopenharmony_ci
524bf215546Sopenharmony_ci   /* If all register elements pinned to fixed values
525bf215546Sopenharmony_ci    * we can override the gpr (the register allocator doesn't see
526bf215546Sopenharmony_ci    * this because it doesn't take these channels into account. */
527bf215546Sopenharmony_ci   if (output.swizzle_x > 3 && output.swizzle_y > 3 &&
528bf215546Sopenharmony_ci       output.swizzle_z > 3 && output.swizzle_w > 3)
529bf215546Sopenharmony_ci       output.gpr = 0;
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci   int r = 0;
532bf215546Sopenharmony_ci   if ((r =r600_bytecode_add_output(m_bc, &output))) {
533bf215546Sopenharmony_ci      R600_ERR("Error adding export at location %d : err: %d\n", exi.location(), r);
534bf215546Sopenharmony_ci      m_result = false;
535bf215546Sopenharmony_ci   }
536bf215546Sopenharmony_ci}
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_civoid AssamblerVisitor::visit(const ScratchIOInstr& instr)
539bf215546Sopenharmony_ci{
540bf215546Sopenharmony_ci   clear_states(sf_all);
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci   struct r600_bytecode_output cf;
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci   memset(&cf, 0, sizeof(struct r600_bytecode_output));
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_ci   cf.op = CF_OP_MEM_SCRATCH;
547bf215546Sopenharmony_ci   cf.elem_size = 3;
548bf215546Sopenharmony_ci   cf.gpr = instr.value().sel();
549bf215546Sopenharmony_ci   cf.mark = !instr.is_read();
550bf215546Sopenharmony_ci   cf.comp_mask = instr.is_read() ? 0xf : instr.write_mask();
551bf215546Sopenharmony_ci   cf.swizzle_x = 0;
552bf215546Sopenharmony_ci   cf.swizzle_y = 1;
553bf215546Sopenharmony_ci   cf.swizzle_z = 2;
554bf215546Sopenharmony_ci   cf.swizzle_w = 3;
555bf215546Sopenharmony_ci   cf.burst_count = 1;
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci   assert(!instr.is_read() || m_bc->gfx_level < R700);
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci   if (instr.address()) {
560bf215546Sopenharmony_ci      cf.type = instr.is_read() || m_bc->gfx_level > R600 ? 3 : 1;
561bf215546Sopenharmony_ci      cf.index_gpr = instr.address()->sel();
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci      /* The docu seems to be wrong here: In indirect addressing the
564bf215546Sopenharmony_ci       * address_base seems to be the array_size */
565bf215546Sopenharmony_ci      cf.array_size = instr.array_size();
566bf215546Sopenharmony_ci   } else {
567bf215546Sopenharmony_ci      cf.type = instr.is_read() || m_bc->gfx_level > R600 ? 2 : 0;
568bf215546Sopenharmony_ci      cf.array_base = instr.location();
569bf215546Sopenharmony_ci   }
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci   if (r600_bytecode_add_output(m_bc, &cf)){
572bf215546Sopenharmony_ci      R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
573bf215546Sopenharmony_ci      m_result = false;
574bf215546Sopenharmony_ci   }
575bf215546Sopenharmony_ci}
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_civoid AssamblerVisitor::visit(const StreamOutInstr& instr)
578bf215546Sopenharmony_ci{
579bf215546Sopenharmony_ci   struct r600_bytecode_output output;
580bf215546Sopenharmony_ci   memset(&output, 0, sizeof(struct r600_bytecode_output));
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci   output.gpr = instr.value().sel();
583bf215546Sopenharmony_ci   output.elem_size = instr.element_size();
584bf215546Sopenharmony_ci   output.array_base = instr.array_base();
585bf215546Sopenharmony_ci   output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
586bf215546Sopenharmony_ci   output.burst_count = instr.burst_count();
587bf215546Sopenharmony_ci   output.array_size = instr.array_size();
588bf215546Sopenharmony_ci   output.comp_mask = instr.comp_mask();
589bf215546Sopenharmony_ci   output.op = instr.op(m_shader->bc.gfx_level);
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci
592bf215546Sopenharmony_ci   if (r600_bytecode_add_output(m_bc, &output))  {
593bf215546Sopenharmony_ci      R600_ERR("shader_from_nir: Error creating stream output instruction\n");
594bf215546Sopenharmony_ci      m_result = false;
595bf215546Sopenharmony_ci   }
596bf215546Sopenharmony_ci}
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_civoid AssamblerVisitor::visit(const MemRingOutInstr& instr)
599bf215546Sopenharmony_ci{
600bf215546Sopenharmony_ci   struct r600_bytecode_output output;
601bf215546Sopenharmony_ci   memset(&output, 0, sizeof(struct r600_bytecode_output));
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci   output.gpr = instr.value().sel();
604bf215546Sopenharmony_ci   output.type = instr.type();
605bf215546Sopenharmony_ci   output.elem_size = 3;
606bf215546Sopenharmony_ci   output.comp_mask = 0xf;
607bf215546Sopenharmony_ci   output.burst_count = 1;
608bf215546Sopenharmony_ci   output.op = instr.op();
609bf215546Sopenharmony_ci   if (instr.type() == MemRingOutInstr::mem_write_ind ||
610bf215546Sopenharmony_ci       instr.type() == MemRingOutInstr::mem_write_ind_ack) {
611bf215546Sopenharmony_ci      output.index_gpr = instr.index_reg();
612bf215546Sopenharmony_ci      output.array_size = 0xfff;
613bf215546Sopenharmony_ci   }
614bf215546Sopenharmony_ci   output.array_base = instr.array_base();
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci   if (r600_bytecode_add_output(m_bc, &output)) {
617bf215546Sopenharmony_ci      R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
618bf215546Sopenharmony_ci      m_result = false;
619bf215546Sopenharmony_ci   }
620bf215546Sopenharmony_ci}
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_civoid AssamblerVisitor::visit(const EmitVertexInstr& instr)
623bf215546Sopenharmony_ci{
624bf215546Sopenharmony_ci   int r = r600_bytecode_add_cfinst(m_bc, instr.op());
625bf215546Sopenharmony_ci   if (!r)
626bf215546Sopenharmony_ci      m_bc->cf_last->count = instr.stream();
627bf215546Sopenharmony_ci   else
628bf215546Sopenharmony_ci      m_result = false;
629bf215546Sopenharmony_ci   assert(m_bc->cf_last->count < 4);
630bf215546Sopenharmony_ci}
631bf215546Sopenharmony_ci
632bf215546Sopenharmony_civoid AssamblerVisitor::visit(const FetchInstr& fetch_instr)
633bf215546Sopenharmony_ci{
634bf215546Sopenharmony_ci   clear_states(sf_tex | sf_alu);
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   auto buffer_offset = fetch_instr.resource_offset();
637bf215546Sopenharmony_ci   EBufferIndexMode rat_index_mode = bim_none;
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   if (buffer_offset)
640bf215546Sopenharmony_ci      rat_index_mode = emit_index_reg(*buffer_offset, 0);
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci   if (fetch_instr.has_fetch_flag(FetchInstr::wait_ack))
643bf215546Sopenharmony_ci      emit_wait_ack();
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci   bool use_tc = fetch_instr.has_fetch_flag(FetchInstr::use_tc) ||
646bf215546Sopenharmony_ci                 (m_bc->gfx_level == CAYMAN);
647bf215546Sopenharmony_ci   if (!use_tc &&
648bf215546Sopenharmony_ci       vtx_fetch_results.find(fetch_instr.src().sel()) !=
649bf215546Sopenharmony_ci       vtx_fetch_results.end()) {
650bf215546Sopenharmony_ci      m_bc->force_add_cf = 1;
651bf215546Sopenharmony_ci      vtx_fetch_results.clear();
652bf215546Sopenharmony_ci   }
653bf215546Sopenharmony_ci
654bf215546Sopenharmony_ci   if (fetch_instr.has_fetch_flag(FetchInstr::use_tc) &&
655bf215546Sopenharmony_ci       tex_fetch_results.find(fetch_instr.src().sel()) !=
656bf215546Sopenharmony_ci       tex_fetch_results.end()) {
657bf215546Sopenharmony_ci      m_bc->force_add_cf = 1;
658bf215546Sopenharmony_ci      tex_fetch_results.clear();
659bf215546Sopenharmony_ci   }
660bf215546Sopenharmony_ci
661bf215546Sopenharmony_ci   if (use_tc)
662bf215546Sopenharmony_ci      tex_fetch_results.insert(fetch_instr.dst().sel());
663bf215546Sopenharmony_ci   else
664bf215546Sopenharmony_ci      vtx_fetch_results.insert(fetch_instr.dst().sel());
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_ci   struct r600_bytecode_vtx vtx;
667bf215546Sopenharmony_ci   memset(&vtx, 0, sizeof(vtx));
668bf215546Sopenharmony_ci   vtx.op = fetch_instr.opcode();
669bf215546Sopenharmony_ci   vtx.buffer_id = fetch_instr.resource_id();
670bf215546Sopenharmony_ci   vtx.fetch_type = fetch_instr.fetch_type();
671bf215546Sopenharmony_ci   vtx.src_gpr = fetch_instr.src().sel();
672bf215546Sopenharmony_ci   vtx.src_sel_x = fetch_instr.src().chan();
673bf215546Sopenharmony_ci   vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
674bf215546Sopenharmony_ci   vtx.dst_gpr = fetch_instr.dst().sel();
675bf215546Sopenharmony_ci   vtx.dst_sel_x = fetch_instr.dest_swizzle(0);		/* SEL_X */
676bf215546Sopenharmony_ci   vtx.dst_sel_y = fetch_instr.dest_swizzle(1);		/* SEL_Y */
677bf215546Sopenharmony_ci   vtx.dst_sel_z = fetch_instr.dest_swizzle(2);		/* SEL_Z */
678bf215546Sopenharmony_ci   vtx.dst_sel_w = fetch_instr.dest_swizzle(3);		/* SEL_W */
679bf215546Sopenharmony_ci   vtx.use_const_fields = fetch_instr.has_fetch_flag(FetchInstr::use_const_field);
680bf215546Sopenharmony_ci   vtx.data_format = fetch_instr.data_format();
681bf215546Sopenharmony_ci   vtx.num_format_all = fetch_instr.num_format();		/* NUM_FORMAT_SCALED */
682bf215546Sopenharmony_ci   vtx.format_comp_all = fetch_instr.has_fetch_flag(FetchInstr::format_comp_signed);
683bf215546Sopenharmony_ci   vtx.endian = fetch_instr.endian_swap();
684bf215546Sopenharmony_ci   vtx.buffer_index_mode = rat_index_mode;
685bf215546Sopenharmony_ci   vtx.offset = fetch_instr.src_offset();
686bf215546Sopenharmony_ci   vtx.indexed = fetch_instr.has_fetch_flag(FetchInstr::indexed);
687bf215546Sopenharmony_ci   vtx.uncached = fetch_instr.has_fetch_flag(FetchInstr::uncached);
688bf215546Sopenharmony_ci   vtx.elem_size = fetch_instr.elm_size();
689bf215546Sopenharmony_ci   vtx.array_base = fetch_instr.array_base();
690bf215546Sopenharmony_ci   vtx.array_size = fetch_instr.array_size();
691bf215546Sopenharmony_ci   vtx.srf_mode_all = fetch_instr.has_fetch_flag(FetchInstr::srf_mode);
692bf215546Sopenharmony_ci
693bf215546Sopenharmony_ci   if (fetch_instr.has_fetch_flag(FetchInstr::use_tc)) {
694bf215546Sopenharmony_ci      if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
695bf215546Sopenharmony_ci         R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
696bf215546Sopenharmony_ci         m_result = false;
697bf215546Sopenharmony_ci      }
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci   } else {
700bf215546Sopenharmony_ci      if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
701bf215546Sopenharmony_ci         R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
702bf215546Sopenharmony_ci         m_result = false;
703bf215546Sopenharmony_ci      }
704bf215546Sopenharmony_ci   }
705bf215546Sopenharmony_ci
706bf215546Sopenharmony_ci   m_bc->cf_last->vpm = (m_bc->type == PIPE_SHADER_FRAGMENT) &&
707bf215546Sopenharmony_ci         fetch_instr.has_fetch_flag(FetchInstr::vpm);
708bf215546Sopenharmony_ci   m_bc->cf_last->barrier = 1;
709bf215546Sopenharmony_ci}
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_civoid AssamblerVisitor::visit(const WriteTFInstr& instr)
712bf215546Sopenharmony_ci{
713bf215546Sopenharmony_ci   struct r600_bytecode_gds gds;
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci   auto& value = instr.value();
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci   memset(&gds, 0, sizeof(struct r600_bytecode_gds));
718bf215546Sopenharmony_ci   gds.src_gpr = value.sel();
719bf215546Sopenharmony_ci   gds.src_sel_x = value[0]->chan();
720bf215546Sopenharmony_ci   gds.src_sel_y = value[1]->chan();
721bf215546Sopenharmony_ci   gds.src_sel_z = 4;
722bf215546Sopenharmony_ci   gds.dst_sel_x = 7;
723bf215546Sopenharmony_ci   gds.dst_sel_y = 7;
724bf215546Sopenharmony_ci   gds.dst_sel_z = 7;
725bf215546Sopenharmony_ci   gds.dst_sel_w = 7;
726bf215546Sopenharmony_ci   gds.op = FETCH_OP_TF_WRITE;
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci   if (r600_bytecode_add_gds(m_bc, &gds) != 0) {
729bf215546Sopenharmony_ci      m_result = false;
730bf215546Sopenharmony_ci      return;
731bf215546Sopenharmony_ci   }
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci   if (value[2]->chan() != 7) {
734bf215546Sopenharmony_ci      memset(&gds, 0, sizeof(struct r600_bytecode_gds));
735bf215546Sopenharmony_ci      gds.src_gpr = value.sel();
736bf215546Sopenharmony_ci      gds.src_sel_x = value[2]->chan();
737bf215546Sopenharmony_ci      gds.src_sel_y = value[3]->chan();
738bf215546Sopenharmony_ci      gds.src_sel_z = 4;
739bf215546Sopenharmony_ci      gds.dst_sel_x = 7;
740bf215546Sopenharmony_ci      gds.dst_sel_y = 7;
741bf215546Sopenharmony_ci      gds.dst_sel_z = 7;
742bf215546Sopenharmony_ci      gds.dst_sel_w = 7;
743bf215546Sopenharmony_ci      gds.op = FETCH_OP_TF_WRITE;
744bf215546Sopenharmony_ci
745bf215546Sopenharmony_ci      if (r600_bytecode_add_gds(m_bc, &gds)) {
746bf215546Sopenharmony_ci         m_result = false;
747bf215546Sopenharmony_ci         return;
748bf215546Sopenharmony_ci      }
749bf215546Sopenharmony_ci   }
750bf215546Sopenharmony_ci}
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_civoid AssamblerVisitor::visit(const RatInstr& instr)
753bf215546Sopenharmony_ci{
754bf215546Sopenharmony_ci   struct r600_bytecode_gds gds;
755bf215546Sopenharmony_ci
756bf215546Sopenharmony_ci   /* The instruction writes to the retuen buffer loaction, and
757bf215546Sopenharmony_ci    * the value will actually be read bach, so make sure all previous writes
758bf215546Sopenharmony_ci    * have been finished */
759bf215546Sopenharmony_ci   if (m_ack_suggested /*&& instr.has_instr_flag(Instr::ack_rat_return_write)*/)
760bf215546Sopenharmony_ci      emit_wait_ack();
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_ci   int rat_idx = instr.rat_id();
763bf215546Sopenharmony_ci   EBufferIndexMode rat_index_mode = bim_none;
764bf215546Sopenharmony_ci   auto addr = instr.rat_id_offset();
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci   if (addr)
767bf215546Sopenharmony_ci      rat_index_mode = emit_index_reg(*addr, 1);
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci   memset(&gds, 0, sizeof(struct r600_bytecode_gds));
770bf215546Sopenharmony_ci
771bf215546Sopenharmony_ci   r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
772bf215546Sopenharmony_ci   auto cf = m_bc->cf_last;
773bf215546Sopenharmony_ci   cf->rat.id = rat_idx + m_shader->rat_base;
774bf215546Sopenharmony_ci   cf->rat.inst = instr.rat_op();
775bf215546Sopenharmony_ci   cf->rat.index_mode = rat_index_mode;
776bf215546Sopenharmony_ci   cf->output.type = instr.need_ack() ? 3 : 1;
777bf215546Sopenharmony_ci   cf->output.gpr = instr.data_gpr();
778bf215546Sopenharmony_ci   cf->output.index_gpr = instr.index_gpr();
779bf215546Sopenharmony_ci   cf->output.comp_mask = instr.comp_mask();
780bf215546Sopenharmony_ci   cf->output.burst_count = instr.burst_count();
781bf215546Sopenharmony_ci   assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
782bf215546Sopenharmony_ci   if (cf->rat.inst != RatInstr::STORE_TYPED) {
783bf215546Sopenharmony_ci      assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
784bf215546Sopenharmony_ci             instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
785bf215546Sopenharmony_ci      assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
786bf215546Sopenharmony_ci             instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
787bf215546Sopenharmony_ci   }
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci   cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT;
790bf215546Sopenharmony_ci   cf->barrier = 1;
791bf215546Sopenharmony_ci   cf->mark = instr.need_ack();
792bf215546Sopenharmony_ci   cf->output.elem_size = instr.elm_size();
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci   m_ack_suggested |= instr.need_ack();
795bf215546Sopenharmony_ci}
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_civoid AssamblerVisitor::clear_states(const uint32_t& states)
799bf215546Sopenharmony_ci{
800bf215546Sopenharmony_ci   if (states & sf_vtx)
801bf215546Sopenharmony_ci      vtx_fetch_results.clear();
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci   if (states & sf_tex)
804bf215546Sopenharmony_ci      tex_fetch_results.clear();
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci   if (states & sf_alu) {
807bf215546Sopenharmony_ci      m_last_op_was_barrier = false;
808bf215546Sopenharmony_ci      m_last_addr = nullptr;
809bf215546Sopenharmony_ci   }
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci}
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_civoid AssamblerVisitor::visit(const Block& block)
815bf215546Sopenharmony_ci{
816bf215546Sopenharmony_ci   if (block.empty())
817bf215546Sopenharmony_ci      return;
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci   m_bc->force_add_cf = block.has_instr_flag(Instr::force_cf);
820bf215546Sopenharmony_ci   sfn_log << SfnLog::assembly << "Translate block  size: " << block.size() << " new_cf:" << m_bc->force_add_cf << "\n";
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_ci   for (const auto& i : block) {
823bf215546Sopenharmony_ci      sfn_log << SfnLog::assembly << "Translate " << *i << " ";
824bf215546Sopenharmony_ci      i->accept(*this);
825bf215546Sopenharmony_ci      sfn_log << SfnLog::assembly << (m_result ? "good" : "fail") << "\n";
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci      if (!m_result)
828bf215546Sopenharmony_ci         break;
829bf215546Sopenharmony_ci   }
830bf215546Sopenharmony_ci}
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_civoid AssamblerVisitor::visit(const IfInstr& instr)
833bf215546Sopenharmony_ci{
834bf215546Sopenharmony_ci   int elems = m_callstack.push(FC_PUSH_VPM);
835bf215546Sopenharmony_ci   bool needs_workaround = false;
836bf215546Sopenharmony_ci
837bf215546Sopenharmony_ci   if (m_bc->gfx_level == CAYMAN && m_bc->stack.loop > 1)
838bf215546Sopenharmony_ci      needs_workaround = true;
839bf215546Sopenharmony_ci
840bf215546Sopenharmony_ci   if (m_bc->gfx_level == EVERGREEN &&
841bf215546Sopenharmony_ci       m_bc->family != CHIP_HEMLOCK &&
842bf215546Sopenharmony_ci       m_bc->family != CHIP_CYPRESS &&
843bf215546Sopenharmony_ci       m_bc->family != CHIP_JUNIPER) {
844bf215546Sopenharmony_ci      unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
845bf215546Sopenharmony_ci      unsigned dmod2 = (elems) % m_bc->stack.entry_size;
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci      if (elems && (!dmod1 || !dmod2))
848bf215546Sopenharmony_ci         needs_workaround = true;
849bf215546Sopenharmony_ci   }
850bf215546Sopenharmony_ci
851bf215546Sopenharmony_ci   auto pred = instr.predicate();
852bf215546Sopenharmony_ci   auto [addr, dummy0, dummy1 ] = pred->indirect_addr(); {}
853bf215546Sopenharmony_ci   if (addr) {
854bf215546Sopenharmony_ci      if (!m_last_addr || !m_bc->ar_loaded ||
855bf215546Sopenharmony_ci          !m_last_addr->equal_to(*addr)) {
856bf215546Sopenharmony_ci         m_bc->ar_reg = addr->sel();
857bf215546Sopenharmony_ci            m_bc->ar_chan = addr->chan();
858bf215546Sopenharmony_ci            m_last_addr = addr;
859bf215546Sopenharmony_ci            m_bc->ar_loaded = 0;
860bf215546Sopenharmony_ci
861bf215546Sopenharmony_ci            r600_load_ar(m_bc, true);
862bf215546Sopenharmony_ci      }
863bf215546Sopenharmony_ci   }
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_ci   if (needs_workaround) {
866bf215546Sopenharmony_ci      r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
867bf215546Sopenharmony_ci      m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
868bf215546Sopenharmony_ci      pred->set_cf_type(cf_alu);
869bf215546Sopenharmony_ci   }
870bf215546Sopenharmony_ci
871bf215546Sopenharmony_ci   clear_states(sf_tex|sf_vtx);
872bf215546Sopenharmony_ci   pred->accept(*this);
873bf215546Sopenharmony_ci
874bf215546Sopenharmony_ci   r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
875bf215546Sopenharmony_ci   clear_states(sf_all);
876bf215546Sopenharmony_ci
877bf215546Sopenharmony_ci   m_jump_tracker.push(m_bc->cf_last, jt_if);
878bf215546Sopenharmony_ci}
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_civoid AssamblerVisitor::visit(const ControlFlowInstr& instr)
881bf215546Sopenharmony_ci{
882bf215546Sopenharmony_ci   clear_states(sf_all);
883bf215546Sopenharmony_ci   switch (instr.cf_type()) {
884bf215546Sopenharmony_ci   case ControlFlowInstr::cf_else:
885bf215546Sopenharmony_ci      emit_else();
886bf215546Sopenharmony_ci      break;
887bf215546Sopenharmony_ci   case ControlFlowInstr::cf_endif:
888bf215546Sopenharmony_ci      emit_endif();
889bf215546Sopenharmony_ci      break;
890bf215546Sopenharmony_ci   case ControlFlowInstr::cf_loop_begin:
891bf215546Sopenharmony_ci      emit_loop_begin(instr.has_instr_flag(Instr::vpm));
892bf215546Sopenharmony_ci      break;
893bf215546Sopenharmony_ci   case ControlFlowInstr::cf_loop_end:
894bf215546Sopenharmony_ci      emit_loop_end();
895bf215546Sopenharmony_ci      break;
896bf215546Sopenharmony_ci   case ControlFlowInstr::cf_loop_break:
897bf215546Sopenharmony_ci      emit_loop_break();
898bf215546Sopenharmony_ci      break;
899bf215546Sopenharmony_ci   case ControlFlowInstr::cf_loop_continue:
900bf215546Sopenharmony_ci      emit_loop_cont();
901bf215546Sopenharmony_ci      break;
902bf215546Sopenharmony_ci   case ControlFlowInstr::cf_wait_ack:
903bf215546Sopenharmony_ci   {
904bf215546Sopenharmony_ci      int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK);
905bf215546Sopenharmony_ci      if (!r) {
906bf215546Sopenharmony_ci         m_bc->cf_last->cf_addr = 0;
907bf215546Sopenharmony_ci         m_bc->cf_last->barrier = 1;
908bf215546Sopenharmony_ci         m_ack_suggested = false;
909bf215546Sopenharmony_ci      } else {
910bf215546Sopenharmony_ci         m_result = false;
911bf215546Sopenharmony_ci      }
912bf215546Sopenharmony_ci   }
913bf215546Sopenharmony_ci      break;
914bf215546Sopenharmony_ci   default:
915bf215546Sopenharmony_ci      unreachable("Unknown CF instruction type");
916bf215546Sopenharmony_ci   }
917bf215546Sopenharmony_ci}
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_civoid AssamblerVisitor::visit(const GDSInstr& instr)
920bf215546Sopenharmony_ci{
921bf215546Sopenharmony_ci   struct r600_bytecode_gds gds;
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci   bool indirect = false;
924bf215546Sopenharmony_ci   auto addr = instr.uav_id();
925bf215546Sopenharmony_ci
926bf215546Sopenharmony_ci   if (addr) {
927bf215546Sopenharmony_ci      indirect = true;
928bf215546Sopenharmony_ci      emit_index_reg(*addr, 1);
929bf215546Sopenharmony_ci   }
930bf215546Sopenharmony_ci
931bf215546Sopenharmony_ci   memset(&gds, 0, sizeof(struct r600_bytecode_gds));
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci   gds.op = ds_opcode_map.at(instr.opcode());
934bf215546Sopenharmony_ci   gds.dst_gpr = instr.dest()->sel();
935bf215546Sopenharmony_ci   gds.uav_id = instr.uav_base();
936bf215546Sopenharmony_ci   gds.uav_index_mode = indirect ? bim_one : bim_none;
937bf215546Sopenharmony_ci   gds.src_gpr = instr.src().sel();
938bf215546Sopenharmony_ci
939bf215546Sopenharmony_ci   gds.src_sel_x = instr.src()[0]->chan() < 7 ? instr.src()[0]->chan() : 4;
940bf215546Sopenharmony_ci   gds.src_sel_y = instr.src()[1]->chan();
941bf215546Sopenharmony_ci   gds.src_sel_z = instr.src()[2]->chan() < 7 ? instr.src()[2]->chan() : 4;
942bf215546Sopenharmony_ci
943bf215546Sopenharmony_ci   gds.dst_sel_x = 7;
944bf215546Sopenharmony_ci   gds.dst_sel_y = 7;
945bf215546Sopenharmony_ci   gds.dst_sel_z = 7;
946bf215546Sopenharmony_ci   gds.dst_sel_w = 7;
947bf215546Sopenharmony_ci
948bf215546Sopenharmony_ci   switch (instr.dest()->chan()) {
949bf215546Sopenharmony_ci   case 0: gds.dst_sel_x = 0;break;
950bf215546Sopenharmony_ci   case 1: gds.dst_sel_y = 0;break;
951bf215546Sopenharmony_ci   case 2: gds.dst_sel_z = 0;break;
952bf215546Sopenharmony_ci   case 3: gds.dst_sel_w = 0;
953bf215546Sopenharmony_ci   }
954bf215546Sopenharmony_ci
955bf215546Sopenharmony_ci   gds.src_gpr2 = 0;
956bf215546Sopenharmony_ci   gds.alloc_consume = m_bc->gfx_level < CAYMAN ? 1 : 0; // Not Cayman
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci   int r = r600_bytecode_add_gds(m_bc, &gds);
959bf215546Sopenharmony_ci   if (r) {
960bf215546Sopenharmony_ci      m_result = false;
961bf215546Sopenharmony_ci      return;
962bf215546Sopenharmony_ci   }
963bf215546Sopenharmony_ci   m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type;
964bf215546Sopenharmony_ci   m_bc->cf_last->barrier = 1;
965bf215546Sopenharmony_ci}
966bf215546Sopenharmony_ci
967bf215546Sopenharmony_civoid AssamblerVisitor::visit(const LDSAtomicInstr& instr)
968bf215546Sopenharmony_ci{
969bf215546Sopenharmony_ci   (void)instr;
970bf215546Sopenharmony_ci   unreachable("LDSAtomicInstr must be lowered to ALUInstr");
971bf215546Sopenharmony_ci}
972bf215546Sopenharmony_ci
973bf215546Sopenharmony_civoid AssamblerVisitor::visit(const LDSReadInstr& instr)
974bf215546Sopenharmony_ci{
975bf215546Sopenharmony_ci   (void)instr;
976bf215546Sopenharmony_ci   unreachable("LDSReadInstr must be lowered to ALUInstr");
977bf215546Sopenharmony_ci}
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ciEBufferIndexMode
980bf215546Sopenharmony_ciAssamblerVisitor::emit_index_reg(const VirtualValue& addr, unsigned idx)
981bf215546Sopenharmony_ci{
982bf215546Sopenharmony_ci   assert(idx < 2);
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_ci   if (!m_bc->index_loaded[idx] || m_loop_nesting ||
985bf215546Sopenharmony_ci       m_bc->index_reg[idx] != (unsigned)addr.sel()
986bf215546Sopenharmony_ci       ||  m_bc->index_reg_chan[idx] != (unsigned)addr.chan()) {
987bf215546Sopenharmony_ci      struct r600_bytecode_alu alu;
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_ci      // Make sure MOVA is not last instr in clause
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ci      if (!m_bc->cf_last || (m_bc->cf_last->ndw>>1) >= 110)
992bf215546Sopenharmony_ci         m_bc->force_add_cf = 1;
993bf215546Sopenharmony_ci
994bf215546Sopenharmony_ci      if (m_bc->gfx_level != CAYMAN) {
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci         EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
997bf215546Sopenharmony_ci
998bf215546Sopenharmony_ci         memset(&alu, 0, sizeof(alu));
999bf215546Sopenharmony_ci         alu.op = opcode_map.at(op1_mova_int);
1000bf215546Sopenharmony_ci         alu.dst.chan = 0;
1001bf215546Sopenharmony_ci         alu.src[0].sel = addr.sel();
1002bf215546Sopenharmony_ci         alu.src[0].chan = addr.chan();
1003bf215546Sopenharmony_ci         alu.last = 1;
1004bf215546Sopenharmony_ci         sfn_log << SfnLog::assembly << "   mova_int, ";
1005bf215546Sopenharmony_ci         int r = r600_bytecode_add_alu(m_bc, &alu);
1006bf215546Sopenharmony_ci         if (r)
1007bf215546Sopenharmony_ci            return bim_invalid;
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci         alu.op = opcode_map.at(idxop);
1010bf215546Sopenharmony_ci         alu.dst.chan = 0;
1011bf215546Sopenharmony_ci         alu.src[0].sel = 0;
1012bf215546Sopenharmony_ci         alu.src[0].chan = 0;
1013bf215546Sopenharmony_ci         alu.last = 1;
1014bf215546Sopenharmony_ci         sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
1015bf215546Sopenharmony_ci         r = r600_bytecode_add_alu(m_bc, &alu);
1016bf215546Sopenharmony_ci         if (r)
1017bf215546Sopenharmony_ci            return bim_invalid;
1018bf215546Sopenharmony_ci      } else {
1019bf215546Sopenharmony_ci         memset(&alu, 0, sizeof(alu));
1020bf215546Sopenharmony_ci         alu.op = opcode_map.at(op1_mova_int);
1021bf215546Sopenharmony_ci         alu.dst.sel = idx == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
1022bf215546Sopenharmony_ci         alu.dst.chan = 0;
1023bf215546Sopenharmony_ci         alu.src[0].sel = addr.sel();
1024bf215546Sopenharmony_ci         alu.src[0].chan = addr.chan();
1025bf215546Sopenharmony_ci         alu.last = 1;
1026bf215546Sopenharmony_ci         sfn_log << SfnLog::assembly << "   mova_int, ";
1027bf215546Sopenharmony_ci         int r = r600_bytecode_add_alu(m_bc, &alu);
1028bf215546Sopenharmony_ci         if (r)
1029bf215546Sopenharmony_ci            return bim_invalid;
1030bf215546Sopenharmony_ci      }
1031bf215546Sopenharmony_ci
1032bf215546Sopenharmony_ci      m_bc->ar_loaded = 0;
1033bf215546Sopenharmony_ci      m_bc->index_reg[idx] = addr.sel();
1034bf215546Sopenharmony_ci      m_bc->index_reg_chan[idx] = addr.chan();
1035bf215546Sopenharmony_ci      m_bc->index_loaded[idx] = true;
1036bf215546Sopenharmony_ci      m_bc->force_add_cf = 1;
1037bf215546Sopenharmony_ci      sfn_log << SfnLog::assembly << "\n";
1038bf215546Sopenharmony_ci   }
1039bf215546Sopenharmony_ci   return idx == 0 ? bim_zero : bim_one;
1040bf215546Sopenharmony_ci}
1041bf215546Sopenharmony_ci
1042bf215546Sopenharmony_civoid AssamblerVisitor::emit_else()
1043bf215546Sopenharmony_ci{
1044bf215546Sopenharmony_ci   r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
1045bf215546Sopenharmony_ci   m_bc->cf_last->pop_count = 1;
1046bf215546Sopenharmony_ci   m_result &= m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
1047bf215546Sopenharmony_ci}
1048bf215546Sopenharmony_ci
1049bf215546Sopenharmony_civoid AssamblerVisitor::emit_endif()
1050bf215546Sopenharmony_ci{
1051bf215546Sopenharmony_ci   m_callstack.pop(FC_PUSH_VPM);
1052bf215546Sopenharmony_ci
1053bf215546Sopenharmony_ci   unsigned force_pop = m_bc->force_add_cf;
1054bf215546Sopenharmony_ci   if (!force_pop) {
1055bf215546Sopenharmony_ci      int alu_pop = 3;
1056bf215546Sopenharmony_ci      if (m_bc->cf_last) {
1057bf215546Sopenharmony_ci         if (m_bc->cf_last->op == CF_OP_ALU)
1058bf215546Sopenharmony_ci            alu_pop = 0;
1059bf215546Sopenharmony_ci         else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
1060bf215546Sopenharmony_ci            alu_pop = 1;
1061bf215546Sopenharmony_ci      }
1062bf215546Sopenharmony_ci      alu_pop += 1;
1063bf215546Sopenharmony_ci      if (alu_pop == 1) {
1064bf215546Sopenharmony_ci         m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
1065bf215546Sopenharmony_ci         m_bc->force_add_cf = 1;
1066bf215546Sopenharmony_ci      } else {
1067bf215546Sopenharmony_ci         force_pop = 1;
1068bf215546Sopenharmony_ci      }
1069bf215546Sopenharmony_ci   }
1070bf215546Sopenharmony_ci
1071bf215546Sopenharmony_ci   if (force_pop) {
1072bf215546Sopenharmony_ci      r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
1073bf215546Sopenharmony_ci      m_bc->cf_last->pop_count = 1;
1074bf215546Sopenharmony_ci      m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
1075bf215546Sopenharmony_ci   }
1076bf215546Sopenharmony_ci
1077bf215546Sopenharmony_ci   m_result &= m_jump_tracker.pop(m_bc->cf_last, jt_if);
1078bf215546Sopenharmony_ci}
1079bf215546Sopenharmony_ci
1080bf215546Sopenharmony_civoid AssamblerVisitor::emit_loop_begin(bool vpm)
1081bf215546Sopenharmony_ci{
1082bf215546Sopenharmony_ci   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
1083bf215546Sopenharmony_ci   m_bc->cf_last->vpm = vpm && m_bc->type == PIPE_SHADER_FRAGMENT;
1084bf215546Sopenharmony_ci   m_jump_tracker.push(m_bc->cf_last, jt_loop);
1085bf215546Sopenharmony_ci   m_callstack.push(FC_LOOP);
1086bf215546Sopenharmony_ci   ++m_loop_nesting;
1087bf215546Sopenharmony_ci}
1088bf215546Sopenharmony_ci
1089bf215546Sopenharmony_civoid AssamblerVisitor::emit_loop_end()
1090bf215546Sopenharmony_ci{
1091bf215546Sopenharmony_ci   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
1092bf215546Sopenharmony_ci   m_callstack.pop(FC_LOOP);
1093bf215546Sopenharmony_ci   assert(m_loop_nesting);
1094bf215546Sopenharmony_ci   --m_loop_nesting;
1095bf215546Sopenharmony_ci   m_result |= m_jump_tracker.pop(m_bc->cf_last, jt_loop);
1096bf215546Sopenharmony_ci}
1097bf215546Sopenharmony_ci
1098bf215546Sopenharmony_civoid AssamblerVisitor::emit_loop_break()
1099bf215546Sopenharmony_ci{
1100bf215546Sopenharmony_ci   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
1101bf215546Sopenharmony_ci   m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
1102bf215546Sopenharmony_ci}
1103bf215546Sopenharmony_ci
1104bf215546Sopenharmony_civoid AssamblerVisitor::emit_loop_cont()
1105bf215546Sopenharmony_ci{
1106bf215546Sopenharmony_ci   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
1107bf215546Sopenharmony_ci   m_result |= m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
1108bf215546Sopenharmony_ci}
1109bf215546Sopenharmony_ci
1110bf215546Sopenharmony_cibool AssamblerVisitor::copy_dst(r600_bytecode_alu_dst& dst,
1111bf215546Sopenharmony_ci                                const Register& d, bool write)
1112bf215546Sopenharmony_ci{
1113bf215546Sopenharmony_ci   if (write && d.sel() > 124) {
1114bf215546Sopenharmony_ci      R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n",
1115bf215546Sopenharmony_ci               d.sel());
1116bf215546Sopenharmony_ci      m_result = false;
1117bf215546Sopenharmony_ci      return false;
1118bf215546Sopenharmony_ci   }
1119bf215546Sopenharmony_ci
1120bf215546Sopenharmony_ci   dst.sel = d.sel();
1121bf215546Sopenharmony_ci   dst.chan = d.chan();
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci   if (m_bc->index_reg[1] == dst.sel &&
1124bf215546Sopenharmony_ci       m_bc->index_reg_chan[1] == dst.chan)
1125bf215546Sopenharmony_ci      m_bc->index_loaded[1] = false;
1126bf215546Sopenharmony_ci
1127bf215546Sopenharmony_ci   if (m_bc->index_reg[0] == dst.sel &&
1128bf215546Sopenharmony_ci       m_bc->index_reg_chan[0] == dst.chan)
1129bf215546Sopenharmony_ci      m_bc->index_loaded[0] = false;
1130bf215546Sopenharmony_ci
1131bf215546Sopenharmony_ci   return true;
1132bf215546Sopenharmony_ci}
1133bf215546Sopenharmony_ci
1134bf215546Sopenharmony_civoid AssamblerVisitor::emit_wait_ack()
1135bf215546Sopenharmony_ci{
1136bf215546Sopenharmony_ci   int r = r600_bytecode_add_cfinst(m_bc, CF_OP_WAIT_ACK);
1137bf215546Sopenharmony_ci   if (!r) {
1138bf215546Sopenharmony_ci      m_bc->cf_last->cf_addr = 0;
1139bf215546Sopenharmony_ci      m_bc->cf_last->barrier = 1;
1140bf215546Sopenharmony_ci      m_ack_suggested = false;
1141bf215546Sopenharmony_ci   } else
1142bf215546Sopenharmony_ci      m_result = false;
1143bf215546Sopenharmony_ci}
1144bf215546Sopenharmony_ci
1145bf215546Sopenharmony_ciclass EncodeSourceVisitor : public ConstRegisterVisitor {
1146bf215546Sopenharmony_cipublic:
1147bf215546Sopenharmony_ci
1148bf215546Sopenharmony_ci   EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc);
1149bf215546Sopenharmony_ci   void visit(const Register& value) override;
1150bf215546Sopenharmony_ci   void visit(const LocalArray& value) override;
1151bf215546Sopenharmony_ci   void visit(const LocalArrayValue& value) override;
1152bf215546Sopenharmony_ci   void visit(const UniformValue& value) override;
1153bf215546Sopenharmony_ci   void visit(const LiteralConstant& value) override;
1154bf215546Sopenharmony_ci   void visit(const InlineConstant& value) override;
1155bf215546Sopenharmony_ci
1156bf215546Sopenharmony_ci   r600_bytecode_alu_src& src;
1157bf215546Sopenharmony_ci   r600_bytecode *m_bc;
1158bf215546Sopenharmony_ci   PVirtualValue m_buffer_offset{nullptr};
1159bf215546Sopenharmony_ci};
1160bf215546Sopenharmony_ci
1161bf215546Sopenharmony_ciPVirtualValue AssamblerVisitor::copy_src(r600_bytecode_alu_src& src, const VirtualValue& s)
1162bf215546Sopenharmony_ci{
1163bf215546Sopenharmony_ci
1164bf215546Sopenharmony_ci   EncodeSourceVisitor visitor(src, m_bc);
1165bf215546Sopenharmony_ci   src.sel = s.sel();
1166bf215546Sopenharmony_ci   src.chan = s.chan();
1167bf215546Sopenharmony_ci
1168bf215546Sopenharmony_ci   s.accept(visitor);
1169bf215546Sopenharmony_ci   return visitor.m_buffer_offset;
1170bf215546Sopenharmony_ci}
1171bf215546Sopenharmony_ci
1172bf215546Sopenharmony_ciEncodeSourceVisitor::EncodeSourceVisitor(r600_bytecode_alu_src& s, r600_bytecode *bc):
1173bf215546Sopenharmony_ci   src(s), m_bc(bc)
1174bf215546Sopenharmony_ci{
1175bf215546Sopenharmony_ci}
1176bf215546Sopenharmony_ci
1177bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const Register& value)
1178bf215546Sopenharmony_ci{
1179bf215546Sopenharmony_ci   assert(value.sel() <= 124 && "Only have 124 registers");
1180bf215546Sopenharmony_ci}
1181bf215546Sopenharmony_ci
1182bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const LocalArray& value)
1183bf215546Sopenharmony_ci{
1184bf215546Sopenharmony_ci   (void)value;
1185bf215546Sopenharmony_ci   unreachable("An array can't be a source register");
1186bf215546Sopenharmony_ci}
1187bf215546Sopenharmony_ci
1188bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const LocalArrayValue& value)
1189bf215546Sopenharmony_ci{
1190bf215546Sopenharmony_ci   src.rel = value.addr() ? 1 : 0;
1191bf215546Sopenharmony_ci}
1192bf215546Sopenharmony_ci
1193bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const UniformValue& value)
1194bf215546Sopenharmony_ci{
1195bf215546Sopenharmony_ci   assert(value.sel() >= 512 && "Uniform values must have a sel >= 512");
1196bf215546Sopenharmony_ci   m_buffer_offset = value.buf_addr();
1197bf215546Sopenharmony_ci   src.kc_bank = value.kcache_bank();
1198bf215546Sopenharmony_ci}
1199bf215546Sopenharmony_ci
1200bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const LiteralConstant& value)
1201bf215546Sopenharmony_ci{
1202bf215546Sopenharmony_ci   src.value = value.value();
1203bf215546Sopenharmony_ci}
1204bf215546Sopenharmony_ci
1205bf215546Sopenharmony_civoid EncodeSourceVisitor::visit(const InlineConstant& value)
1206bf215546Sopenharmony_ci{
1207bf215546Sopenharmony_ci   (void)value;
1208bf215546Sopenharmony_ci}
1209bf215546Sopenharmony_ci
1210bf215546Sopenharmony_ci
1211bf215546Sopenharmony_ci
1212bf215546Sopenharmony_ciconst std::map<EAluOp, int> opcode_map = {
1213bf215546Sopenharmony_ci
1214bf215546Sopenharmony_ci   {op2_add, ALU_OP2_ADD},
1215bf215546Sopenharmony_ci   {op2_mul, ALU_OP2_MUL},
1216bf215546Sopenharmony_ci   {op2_mul_ieee, ALU_OP2_MUL_IEEE},
1217bf215546Sopenharmony_ci   {op2_max, ALU_OP2_MAX},
1218bf215546Sopenharmony_ci   {op2_min, ALU_OP2_MIN},
1219bf215546Sopenharmony_ci   {op2_max_dx10, ALU_OP2_MAX_DX10},
1220bf215546Sopenharmony_ci   {op2_min_dx10, ALU_OP2_MIN_DX10},
1221bf215546Sopenharmony_ci   {op2_sete, ALU_OP2_SETE},
1222bf215546Sopenharmony_ci   {op2_setgt, ALU_OP2_SETGT},
1223bf215546Sopenharmony_ci   {op2_setge, ALU_OP2_SETGE},
1224bf215546Sopenharmony_ci   {op2_setne, ALU_OP2_SETNE},
1225bf215546Sopenharmony_ci   {op2_sete_dx10, ALU_OP2_SETE_DX10},
1226bf215546Sopenharmony_ci   {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
1227bf215546Sopenharmony_ci   {op2_setge_dx10, ALU_OP2_SETGE_DX10},
1228bf215546Sopenharmony_ci   {op2_setne_dx10, ALU_OP2_SETNE_DX10},
1229bf215546Sopenharmony_ci   {op1_fract, ALU_OP1_FRACT},
1230bf215546Sopenharmony_ci   {op1_trunc, ALU_OP1_TRUNC},
1231bf215546Sopenharmony_ci   {op1_ceil, ALU_OP1_CEIL},
1232bf215546Sopenharmony_ci   {op1_rndne, ALU_OP1_RNDNE},
1233bf215546Sopenharmony_ci   {op1_floor, ALU_OP1_FLOOR},
1234bf215546Sopenharmony_ci   {op2_ashr_int, ALU_OP2_ASHR_INT},
1235bf215546Sopenharmony_ci   {op2_lshr_int, ALU_OP2_LSHR_INT},
1236bf215546Sopenharmony_ci   {op2_lshl_int, ALU_OP2_LSHL_INT},
1237bf215546Sopenharmony_ci   {op1_mov, ALU_OP1_MOV},
1238bf215546Sopenharmony_ci   {op0_nop, ALU_OP0_NOP},
1239bf215546Sopenharmony_ci   {op2_mul_64, ALU_OP2_MUL_64},
1240bf215546Sopenharmony_ci   {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1241bf215546Sopenharmony_ci   {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1242bf215546Sopenharmony_ci   {op2_prede_int, ALU_OP2_PRED_SETE_INT},
1243bf215546Sopenharmony_ci   {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1244bf215546Sopenharmony_ci   {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1245bf215546Sopenharmony_ci   {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1246bf215546Sopenharmony_ci   {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
1247bf215546Sopenharmony_ci   {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
1248bf215546Sopenharmony_ci   {op2_pred_sete, ALU_OP2_PRED_SETE},
1249bf215546Sopenharmony_ci   {op2_pred_setgt, ALU_OP2_PRED_SETGT},
1250bf215546Sopenharmony_ci   {op2_pred_setge, ALU_OP2_PRED_SETGE},
1251bf215546Sopenharmony_ci   {op2_pred_setne, ALU_OP2_PRED_SETNE},
1252bf215546Sopenharmony_ci   {op0_pred_set_clr, ALU_OP0_PRED_SET_CLR},
1253bf215546Sopenharmony_ci   {op1_pred_set_restore, ALU_OP1_PRED_SET_RESTORE},
1254bf215546Sopenharmony_ci   {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
1255bf215546Sopenharmony_ci   {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
1256bf215546Sopenharmony_ci   {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
1257bf215546Sopenharmony_ci   {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
1258bf215546Sopenharmony_ci   {op2_kille, ALU_OP2_KILLE},
1259bf215546Sopenharmony_ci   {op2_killgt, ALU_OP2_KILLGT},
1260bf215546Sopenharmony_ci   {op2_killge, ALU_OP2_KILLGE},
1261bf215546Sopenharmony_ci   {op2_killne, ALU_OP2_KILLNE},
1262bf215546Sopenharmony_ci   {op2_and_int, ALU_OP2_AND_INT},
1263bf215546Sopenharmony_ci   {op2_or_int, ALU_OP2_OR_INT},
1264bf215546Sopenharmony_ci   {op2_xor_int, ALU_OP2_XOR_INT},
1265bf215546Sopenharmony_ci   {op1_not_int, ALU_OP1_NOT_INT},
1266bf215546Sopenharmony_ci   {op2_add_int, ALU_OP2_ADD_INT},
1267bf215546Sopenharmony_ci   {op2_sub_int, ALU_OP2_SUB_INT},
1268bf215546Sopenharmony_ci   {op2_max_int, ALU_OP2_MAX_INT},
1269bf215546Sopenharmony_ci   {op2_min_int, ALU_OP2_MIN_INT},
1270bf215546Sopenharmony_ci   {op2_max_uint, ALU_OP2_MAX_UINT},
1271bf215546Sopenharmony_ci   {op2_min_uint, ALU_OP2_MIN_UINT},
1272bf215546Sopenharmony_ci   {op2_sete_int, ALU_OP2_SETE_INT},
1273bf215546Sopenharmony_ci   {op2_setgt_int, ALU_OP2_SETGT_INT},
1274bf215546Sopenharmony_ci   {op2_setge_int, ALU_OP2_SETGE_INT},
1275bf215546Sopenharmony_ci   {op2_setne_int, ALU_OP2_SETNE_INT},
1276bf215546Sopenharmony_ci   {op2_setgt_uint, ALU_OP2_SETGT_UINT},
1277bf215546Sopenharmony_ci   {op2_setge_uint, ALU_OP2_SETGE_UINT},
1278bf215546Sopenharmony_ci   {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
1279bf215546Sopenharmony_ci   {op2_killge_uint, ALU_OP2_KILLGE_UINT},
1280bf215546Sopenharmony_ci   {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1281bf215546Sopenharmony_ci   {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1282bf215546Sopenharmony_ci   {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1283bf215546Sopenharmony_ci   {op2_kille_int, ALU_OP2_KILLE_INT},
1284bf215546Sopenharmony_ci   {op2_killgt_int, ALU_OP2_KILLGT_INT},
1285bf215546Sopenharmony_ci   {op2_killge_int, ALU_OP2_KILLGE_INT},
1286bf215546Sopenharmony_ci   {op2_killne_int, ALU_OP2_KILLNE_INT},
1287bf215546Sopenharmony_ci   {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
1288bf215546Sopenharmony_ci   {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
1289bf215546Sopenharmony_ci   {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
1290bf215546Sopenharmony_ci   {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
1291bf215546Sopenharmony_ci   {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
1292bf215546Sopenharmony_ci   {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
1293bf215546Sopenharmony_ci   {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
1294bf215546Sopenharmony_ci   {op1_bfrev_int, ALU_OP1_BFREV_INT},
1295bf215546Sopenharmony_ci   {op2_addc_uint, ALU_OP2_ADDC_UINT},
1296bf215546Sopenharmony_ci   {op2_subb_uint, ALU_OP2_SUBB_UINT},
1297bf215546Sopenharmony_ci   {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
1298bf215546Sopenharmony_ci   {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
1299bf215546Sopenharmony_ci   {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
1300bf215546Sopenharmony_ci   {op2_set_mode, ALU_OP2_SET_MODE},
1301bf215546Sopenharmony_ci   {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
1302bf215546Sopenharmony_ci   {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
1303bf215546Sopenharmony_ci   {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
1304bf215546Sopenharmony_ci   {op1_exp_ieee, ALU_OP1_EXP_IEEE},
1305bf215546Sopenharmony_ci   {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
1306bf215546Sopenharmony_ci   {op1_log_ieee, ALU_OP1_LOG_IEEE},
1307bf215546Sopenharmony_ci   {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
1308bf215546Sopenharmony_ci   {op1_recip_ff, ALU_OP1_RECIP_FF},
1309bf215546Sopenharmony_ci   {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
1310bf215546Sopenharmony_ci   {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
1311bf215546Sopenharmony_ci   {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
1312bf215546Sopenharmony_ci   {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
1313bf215546Sopenharmony_ci   {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
1314bf215546Sopenharmony_ci   {op1_sin, ALU_OP1_SIN},
1315bf215546Sopenharmony_ci   {op1_cos, ALU_OP1_COS},
1316bf215546Sopenharmony_ci   {op2_mullo_int, ALU_OP2_MULLO_INT},
1317bf215546Sopenharmony_ci   {op2_mulhi_int, ALU_OP2_MULHI_INT},
1318bf215546Sopenharmony_ci   {op2_mullo_uint, ALU_OP2_MULLO_UINT},
1319bf215546Sopenharmony_ci   {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
1320bf215546Sopenharmony_ci   {op1_recip_int, ALU_OP1_RECIP_INT},
1321bf215546Sopenharmony_ci   {op1_recip_uint, ALU_OP1_RECIP_UINT},
1322bf215546Sopenharmony_ci   {op1_recip_64, ALU_OP2_RECIP_64},
1323bf215546Sopenharmony_ci   {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
1324bf215546Sopenharmony_ci   {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
1325bf215546Sopenharmony_ci   {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
1326bf215546Sopenharmony_ci   {op1_sqrt_64, ALU_OP2_SQRT_64},
1327bf215546Sopenharmony_ci   {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
1328bf215546Sopenharmony_ci   {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
1329bf215546Sopenharmony_ci   {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
1330bf215546Sopenharmony_ci   {op2_bfm_int, ALU_OP2_BFM_INT},
1331bf215546Sopenharmony_ci   {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
1332bf215546Sopenharmony_ci   {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
1333bf215546Sopenharmony_ci   {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
1334bf215546Sopenharmony_ci   {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
1335bf215546Sopenharmony_ci   {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
1336bf215546Sopenharmony_ci   {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
1337bf215546Sopenharmony_ci   {op1_bcnt_int, ALU_OP1_BCNT_INT},
1338bf215546Sopenharmony_ci   {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
1339bf215546Sopenharmony_ci   {op1_ffbl_int, ALU_OP1_FFBL_INT},
1340bf215546Sopenharmony_ci   {op1_ffbh_int, ALU_OP1_FFBH_INT},
1341bf215546Sopenharmony_ci   {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
1342bf215546Sopenharmony_ci   {op2_dot_ieee, ALU_OP2_DOT_IEEE},
1343bf215546Sopenharmony_ci   {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
1344bf215546Sopenharmony_ci   {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
1345bf215546Sopenharmony_ci   {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
1346bf215546Sopenharmony_ci   {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
1347bf215546Sopenharmony_ci   {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
1348bf215546Sopenharmony_ci   {op2_mul_uint24, ALU_OP2_MUL_UINT24},
1349bf215546Sopenharmony_ci   {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
1350bf215546Sopenharmony_ci   {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1351bf215546Sopenharmony_ci   {op2_sete_64, ALU_OP2_SETE_64},
1352bf215546Sopenharmony_ci   {op2_setne_64, ALU_OP2_SETNE_64},
1353bf215546Sopenharmony_ci   {op2_setgt_64, ALU_OP2_SETGT_64},
1354bf215546Sopenharmony_ci   {op2_setge_64, ALU_OP2_SETGE_64},
1355bf215546Sopenharmony_ci   {op2_min_64, ALU_OP2_MIN_64},
1356bf215546Sopenharmony_ci   {op2_max_64, ALU_OP2_MAX_64},
1357bf215546Sopenharmony_ci   {op2_dot4, ALU_OP2_DOT4},
1358bf215546Sopenharmony_ci   {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
1359bf215546Sopenharmony_ci   {op2_cube, ALU_OP2_CUBE},
1360bf215546Sopenharmony_ci   {op1_max4, ALU_OP1_MAX4},
1361bf215546Sopenharmony_ci   {op1_frexp_64, ALU_OP1_FREXP_64},
1362bf215546Sopenharmony_ci   {op1_ldexp_64, ALU_OP2_LDEXP_64},
1363bf215546Sopenharmony_ci   {op1_fract_64, ALU_OP1_FRACT_64},
1364bf215546Sopenharmony_ci   {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
1365bf215546Sopenharmony_ci   {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
1366bf215546Sopenharmony_ci   {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
1367bf215546Sopenharmony_ci   {op2_add_64, ALU_OP2_ADD_64},
1368bf215546Sopenharmony_ci   {op1_mova_int, ALU_OP1_MOVA_INT},
1369bf215546Sopenharmony_ci   {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1370bf215546Sopenharmony_ci   {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1371bf215546Sopenharmony_ci   {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
1372bf215546Sopenharmony_ci   {op2_dot, ALU_OP2_DOT},
1373bf215546Sopenharmony_ci   {op1_mul_prev, ALU_OP1_MUL_PREV},
1374bf215546Sopenharmony_ci   {op1_mul_ieee_prev, ALU_OP1_MUL_IEEE_PREV},
1375bf215546Sopenharmony_ci   {op1_add_prev, ALU_OP1_ADD_PREV},
1376bf215546Sopenharmony_ci   {op2_muladd_prev, ALU_OP2_MULADD_PREV},
1377bf215546Sopenharmony_ci   {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
1378bf215546Sopenharmony_ci   {op2_interp_xy, ALU_OP2_INTERP_XY},
1379bf215546Sopenharmony_ci   {op2_interp_zw, ALU_OP2_INTERP_ZW},
1380bf215546Sopenharmony_ci   {op2_interp_x, ALU_OP2_INTERP_X},
1381bf215546Sopenharmony_ci   {op2_interp_z, ALU_OP2_INTERP_Z},
1382bf215546Sopenharmony_ci   {op0_store_flags, ALU_OP1_STORE_FLAGS},
1383bf215546Sopenharmony_ci   {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
1384bf215546Sopenharmony_ci   {op0_lds_1a, ALU_OP2_LDS_1A},
1385bf215546Sopenharmony_ci   {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
1386bf215546Sopenharmony_ci   {op0_lds_2a, ALU_OP2_LDS_2A},
1387bf215546Sopenharmony_ci   {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
1388bf215546Sopenharmony_ci   {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
1389bf215546Sopenharmony_ci   {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
1390bf215546Sopenharmony_ci   {op3_bfe_uint, ALU_OP3_BFE_UINT},
1391bf215546Sopenharmony_ci   {op3_bfe_int, ALU_OP3_BFE_INT},
1392bf215546Sopenharmony_ci   {op3_bfi_int, ALU_OP3_BFI_INT},
1393bf215546Sopenharmony_ci   {op3_fma, ALU_OP3_FMA},
1394bf215546Sopenharmony_ci   {op3_cndne_64, ALU_OP3_CNDNE_64},
1395bf215546Sopenharmony_ci   {op3_fma_64, ALU_OP3_FMA_64},
1396bf215546Sopenharmony_ci   {op3_lerp_uint, ALU_OP3_LERP_UINT},
1397bf215546Sopenharmony_ci   {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
1398bf215546Sopenharmony_ci   {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
1399bf215546Sopenharmony_ci   {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
1400bf215546Sopenharmony_ci   {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
1401bf215546Sopenharmony_ci   {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
1402bf215546Sopenharmony_ci   {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
1403bf215546Sopenharmony_ci   {op3_muladd, ALU_OP3_MULADD},
1404bf215546Sopenharmony_ci   {op3_muladd_m2, ALU_OP3_MULADD_M2},
1405bf215546Sopenharmony_ci   {op3_muladd_m4, ALU_OP3_MULADD_M4},
1406bf215546Sopenharmony_ci   {op3_muladd_d2, ALU_OP3_MULADD_D2},
1407bf215546Sopenharmony_ci   {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
1408bf215546Sopenharmony_ci   {op3_cnde, ALU_OP3_CNDE},
1409bf215546Sopenharmony_ci   {op3_cndgt, ALU_OP3_CNDGT},
1410bf215546Sopenharmony_ci   {op3_cndge, ALU_OP3_CNDGE},
1411bf215546Sopenharmony_ci   {op3_cnde_int, ALU_OP3_CNDE_INT},
1412bf215546Sopenharmony_ci   {op3_cndgt_int, ALU_OP3_CNDGT_INT},
1413bf215546Sopenharmony_ci   {op3_cndge_int, ALU_OP3_CNDGE_INT},
1414bf215546Sopenharmony_ci   {op3_mul_lit, ALU_OP3_MUL_LIT},
1415bf215546Sopenharmony_ci};
1416bf215546Sopenharmony_ci
1417bf215546Sopenharmony_ciconst std::map<ESDOp, int> ds_opcode_map = {
1418bf215546Sopenharmony_ci   {DS_OP_ADD, FETCH_OP_GDS_ADD},
1419bf215546Sopenharmony_ci   {DS_OP_SUB, FETCH_OP_GDS_SUB},
1420bf215546Sopenharmony_ci   {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
1421bf215546Sopenharmony_ci   {DS_OP_INC, FETCH_OP_GDS_INC},
1422bf215546Sopenharmony_ci   {DS_OP_DEC, FETCH_OP_GDS_DEC},
1423bf215546Sopenharmony_ci   {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
1424bf215546Sopenharmony_ci   {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
1425bf215546Sopenharmony_ci   {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
1426bf215546Sopenharmony_ci   {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
1427bf215546Sopenharmony_ci   {DS_OP_AND, FETCH_OP_GDS_AND},
1428bf215546Sopenharmony_ci   {DS_OP_OR, FETCH_OP_GDS_OR},
1429bf215546Sopenharmony_ci   {DS_OP_XOR, FETCH_OP_GDS_XOR},
1430bf215546Sopenharmony_ci   {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
1431bf215546Sopenharmony_ci   {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
1432bf215546Sopenharmony_ci   {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
1433bf215546Sopenharmony_ci   {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
1434bf215546Sopenharmony_ci   {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
1435bf215546Sopenharmony_ci   {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
1436bf215546Sopenharmony_ci   {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
1437bf215546Sopenharmony_ci   {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
1438bf215546Sopenharmony_ci   {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
1439bf215546Sopenharmony_ci   {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
1440bf215546Sopenharmony_ci   {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
1441bf215546Sopenharmony_ci   {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
1442bf215546Sopenharmony_ci   {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
1443bf215546Sopenharmony_ci   {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
1444bf215546Sopenharmony_ci   {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
1445bf215546Sopenharmony_ci   {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
1446bf215546Sopenharmony_ci   {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
1447bf215546Sopenharmony_ci   {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
1448bf215546Sopenharmony_ci   {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
1449bf215546Sopenharmony_ci   {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
1450bf215546Sopenharmony_ci   {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
1451bf215546Sopenharmony_ci   {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
1452bf215546Sopenharmony_ci   {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
1453bf215546Sopenharmony_ci   {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
1454bf215546Sopenharmony_ci   {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
1455bf215546Sopenharmony_ci   {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
1456bf215546Sopenharmony_ci   {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
1457bf215546Sopenharmony_ci   {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
1458bf215546Sopenharmony_ci   {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
1459bf215546Sopenharmony_ci   {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
1460bf215546Sopenharmony_ci   {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
1461bf215546Sopenharmony_ci   {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
1462bf215546Sopenharmony_ci   {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
1463bf215546Sopenharmony_ci   {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
1464bf215546Sopenharmony_ci   {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1465bf215546Sopenharmony_ci   {DS_OP_INVALID, 0},
1466bf215546Sopenharmony_ci};
1467bf215546Sopenharmony_ci
1468bf215546Sopenharmony_ci}
1469