1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2019 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "aco_builder.h"
26bf215546Sopenharmony_ci#include "aco_ir.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include <algorithm>
29bf215546Sopenharmony_ci#include <bitset>
30bf215546Sopenharmony_ci#include <stack>
31bf215546Sopenharmony_ci#include <vector>
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_cinamespace aco {
34bf215546Sopenharmony_cinamespace {
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_cistruct State {
37bf215546Sopenharmony_ci   Program* program;
38bf215546Sopenharmony_ci   Block* block;
39bf215546Sopenharmony_ci   std::vector<aco_ptr<Instruction>> old_instructions;
40bf215546Sopenharmony_ci};
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_cistruct NOP_ctx_gfx6 {
43bf215546Sopenharmony_ci   void join(const NOP_ctx_gfx6& other)
44bf215546Sopenharmony_ci   {
45bf215546Sopenharmony_ci      set_vskip_mode_then_vector =
46bf215546Sopenharmony_ci         MAX2(set_vskip_mode_then_vector, other.set_vskip_mode_then_vector);
47bf215546Sopenharmony_ci      valu_wr_vcc_then_vccz = MAX2(valu_wr_vcc_then_vccz, other.valu_wr_vcc_then_vccz);
48bf215546Sopenharmony_ci      valu_wr_exec_then_execz = MAX2(valu_wr_exec_then_execz, other.valu_wr_exec_then_execz);
49bf215546Sopenharmony_ci      valu_wr_vcc_then_div_fmas = MAX2(valu_wr_vcc_then_div_fmas, other.valu_wr_vcc_then_div_fmas);
50bf215546Sopenharmony_ci      salu_wr_m0_then_gds_msg_ttrace =
51bf215546Sopenharmony_ci         MAX2(salu_wr_m0_then_gds_msg_ttrace, other.salu_wr_m0_then_gds_msg_ttrace);
52bf215546Sopenharmony_ci      valu_wr_exec_then_dpp = MAX2(valu_wr_exec_then_dpp, other.valu_wr_exec_then_dpp);
53bf215546Sopenharmony_ci      salu_wr_m0_then_lds = MAX2(salu_wr_m0_then_lds, other.salu_wr_m0_then_lds);
54bf215546Sopenharmony_ci      salu_wr_m0_then_moverel = MAX2(salu_wr_m0_then_moverel, other.salu_wr_m0_then_moverel);
55bf215546Sopenharmony_ci      setreg_then_getsetreg = MAX2(setreg_then_getsetreg, other.setreg_then_getsetreg);
56bf215546Sopenharmony_ci      vmem_store_then_wr_data |= other.vmem_store_then_wr_data;
57bf215546Sopenharmony_ci      smem_clause |= other.smem_clause;
58bf215546Sopenharmony_ci      smem_write |= other.smem_write;
59bf215546Sopenharmony_ci      for (unsigned i = 0; i < BITSET_WORDS(128); i++) {
60bf215546Sopenharmony_ci         smem_clause_read_write[i] |= other.smem_clause_read_write[i];
61bf215546Sopenharmony_ci         smem_clause_write[i] |= other.smem_clause_write[i];
62bf215546Sopenharmony_ci      }
63bf215546Sopenharmony_ci   }
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci   bool operator==(const NOP_ctx_gfx6& other)
66bf215546Sopenharmony_ci   {
67bf215546Sopenharmony_ci      return set_vskip_mode_then_vector == other.set_vskip_mode_then_vector &&
68bf215546Sopenharmony_ci             valu_wr_vcc_then_vccz == other.valu_wr_vcc_then_vccz &&
69bf215546Sopenharmony_ci             valu_wr_exec_then_execz == other.valu_wr_exec_then_execz &&
70bf215546Sopenharmony_ci             valu_wr_vcc_then_div_fmas == other.valu_wr_vcc_then_div_fmas &&
71bf215546Sopenharmony_ci             vmem_store_then_wr_data == other.vmem_store_then_wr_data &&
72bf215546Sopenharmony_ci             salu_wr_m0_then_gds_msg_ttrace == other.salu_wr_m0_then_gds_msg_ttrace &&
73bf215546Sopenharmony_ci             valu_wr_exec_then_dpp == other.valu_wr_exec_then_dpp &&
74bf215546Sopenharmony_ci             salu_wr_m0_then_lds == other.salu_wr_m0_then_lds &&
75bf215546Sopenharmony_ci             salu_wr_m0_then_moverel == other.salu_wr_m0_then_moverel &&
76bf215546Sopenharmony_ci             setreg_then_getsetreg == other.setreg_then_getsetreg &&
77bf215546Sopenharmony_ci             smem_clause == other.smem_clause && smem_write == other.smem_write &&
78bf215546Sopenharmony_ci             BITSET_EQUAL(smem_clause_read_write, other.smem_clause_read_write) &&
79bf215546Sopenharmony_ci             BITSET_EQUAL(smem_clause_write, other.smem_clause_write);
80bf215546Sopenharmony_ci   }
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci   void add_wait_states(unsigned amount)
83bf215546Sopenharmony_ci   {
84bf215546Sopenharmony_ci      if ((set_vskip_mode_then_vector -= amount) < 0)
85bf215546Sopenharmony_ci         set_vskip_mode_then_vector = 0;
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci      if ((valu_wr_vcc_then_vccz -= amount) < 0)
88bf215546Sopenharmony_ci         valu_wr_vcc_then_vccz = 0;
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci      if ((valu_wr_exec_then_execz -= amount) < 0)
91bf215546Sopenharmony_ci         valu_wr_exec_then_execz = 0;
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci      if ((valu_wr_vcc_then_div_fmas -= amount) < 0)
94bf215546Sopenharmony_ci         valu_wr_vcc_then_div_fmas = 0;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci      if ((salu_wr_m0_then_gds_msg_ttrace -= amount) < 0)
97bf215546Sopenharmony_ci         salu_wr_m0_then_gds_msg_ttrace = 0;
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci      if ((valu_wr_exec_then_dpp -= amount) < 0)
100bf215546Sopenharmony_ci         valu_wr_exec_then_dpp = 0;
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci      if ((salu_wr_m0_then_lds -= amount) < 0)
103bf215546Sopenharmony_ci         salu_wr_m0_then_lds = 0;
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci      if ((salu_wr_m0_then_moverel -= amount) < 0)
106bf215546Sopenharmony_ci         salu_wr_m0_then_moverel = 0;
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci      if ((setreg_then_getsetreg -= amount) < 0)
109bf215546Sopenharmony_ci         setreg_then_getsetreg = 0;
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci      vmem_store_then_wr_data.reset();
112bf215546Sopenharmony_ci   }
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci   /* setting MODE.vskip and then any vector op requires 2 wait states */
115bf215546Sopenharmony_ci   int8_t set_vskip_mode_then_vector = 0;
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci   /* VALU writing VCC/EXEC and then a VALU reading VCCZ/EXECZ requires 5 wait states */
118bf215546Sopenharmony_ci   int8_t valu_wr_vcc_then_vccz = 0;
119bf215546Sopenharmony_ci   int8_t valu_wr_exec_then_execz = 0;
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci   /* VALU writing VCC followed by v_div_fmas require 4 wait states */
122bf215546Sopenharmony_ci   int8_t valu_wr_vcc_then_div_fmas = 0;
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_ci   /* SALU writing M0 followed by GDS, s_sendmsg or s_ttrace_data requires 1 wait state */
125bf215546Sopenharmony_ci   int8_t salu_wr_m0_then_gds_msg_ttrace = 0;
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci   /* VALU writing EXEC followed by DPP requires 5 wait states */
128bf215546Sopenharmony_ci   int8_t valu_wr_exec_then_dpp = 0;
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci   /* SALU writing M0 followed by some LDS instructions requires 1 wait state on GFX10 */
131bf215546Sopenharmony_ci   int8_t salu_wr_m0_then_lds = 0;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci   /* SALU writing M0 followed by s_moverel requires 1 wait state on GFX9 */
134bf215546Sopenharmony_ci   int8_t salu_wr_m0_then_moverel = 0;
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci   /* s_setreg followed by a s_getreg/s_setreg of the same register needs 2 wait states
137bf215546Sopenharmony_ci    * currently we don't look at the actual register */
138bf215546Sopenharmony_ci   int8_t setreg_then_getsetreg = 0;
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   /* some memory instructions writing >64bit followed by a instructions
141bf215546Sopenharmony_ci    * writing the VGPRs holding the writedata requires 1 wait state */
142bf215546Sopenharmony_ci   std::bitset<256> vmem_store_then_wr_data;
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci   /* we break up SMEM clauses that contain stores or overwrite an
145bf215546Sopenharmony_ci    * operand/definition of another instruction in the clause */
146bf215546Sopenharmony_ci   bool smem_clause = false;
147bf215546Sopenharmony_ci   bool smem_write = false;
148bf215546Sopenharmony_ci   BITSET_DECLARE(smem_clause_read_write, 128) = {0};
149bf215546Sopenharmony_ci   BITSET_DECLARE(smem_clause_write, 128) = {0};
150bf215546Sopenharmony_ci};
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_cistruct NOP_ctx_gfx10 {
153bf215546Sopenharmony_ci   bool has_VOPC = false;
154bf215546Sopenharmony_ci   bool has_nonVALU_exec_read = false;
155bf215546Sopenharmony_ci   bool has_VMEM = false;
156bf215546Sopenharmony_ci   bool has_branch_after_VMEM = false;
157bf215546Sopenharmony_ci   bool has_DS = false;
158bf215546Sopenharmony_ci   bool has_branch_after_DS = false;
159bf215546Sopenharmony_ci   bool has_NSA_MIMG = false;
160bf215546Sopenharmony_ci   bool has_writelane = false;
161bf215546Sopenharmony_ci   std::bitset<128> sgprs_read_by_VMEM;
162bf215546Sopenharmony_ci   std::bitset<128> sgprs_read_by_VMEM_store;
163bf215546Sopenharmony_ci   std::bitset<128> sgprs_read_by_DS;
164bf215546Sopenharmony_ci   std::bitset<128> sgprs_read_by_SMEM;
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   void join(const NOP_ctx_gfx10& other)
167bf215546Sopenharmony_ci   {
168bf215546Sopenharmony_ci      has_VOPC |= other.has_VOPC;
169bf215546Sopenharmony_ci      has_nonVALU_exec_read |= other.has_nonVALU_exec_read;
170bf215546Sopenharmony_ci      has_VMEM |= other.has_VMEM;
171bf215546Sopenharmony_ci      has_branch_after_VMEM |= other.has_branch_after_VMEM;
172bf215546Sopenharmony_ci      has_DS |= other.has_DS;
173bf215546Sopenharmony_ci      has_branch_after_DS |= other.has_branch_after_DS;
174bf215546Sopenharmony_ci      has_NSA_MIMG |= other.has_NSA_MIMG;
175bf215546Sopenharmony_ci      has_writelane |= other.has_writelane;
176bf215546Sopenharmony_ci      sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
177bf215546Sopenharmony_ci      sgprs_read_by_DS |= other.sgprs_read_by_DS;
178bf215546Sopenharmony_ci      sgprs_read_by_VMEM_store |= other.sgprs_read_by_VMEM_store;
179bf215546Sopenharmony_ci      sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM;
180bf215546Sopenharmony_ci   }
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   bool operator==(const NOP_ctx_gfx10& other)
183bf215546Sopenharmony_ci   {
184bf215546Sopenharmony_ci      return has_VOPC == other.has_VOPC && has_nonVALU_exec_read == other.has_nonVALU_exec_read &&
185bf215546Sopenharmony_ci             has_VMEM == other.has_VMEM && has_branch_after_VMEM == other.has_branch_after_VMEM &&
186bf215546Sopenharmony_ci             has_DS == other.has_DS && has_branch_after_DS == other.has_branch_after_DS &&
187bf215546Sopenharmony_ci             has_NSA_MIMG == other.has_NSA_MIMG && has_writelane == other.has_writelane &&
188bf215546Sopenharmony_ci             sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
189bf215546Sopenharmony_ci             sgprs_read_by_DS == other.sgprs_read_by_DS &&
190bf215546Sopenharmony_ci             sgprs_read_by_VMEM_store == other.sgprs_read_by_VMEM_store &&
191bf215546Sopenharmony_ci             sgprs_read_by_SMEM == other.sgprs_read_by_SMEM;
192bf215546Sopenharmony_ci   }
193bf215546Sopenharmony_ci};
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ciint
196bf215546Sopenharmony_ciget_wait_states(aco_ptr<Instruction>& instr)
197bf215546Sopenharmony_ci{
198bf215546Sopenharmony_ci   if (instr->opcode == aco_opcode::s_nop)
199bf215546Sopenharmony_ci      return instr->sopp().imm + 1;
200bf215546Sopenharmony_ci   else if (instr->opcode == aco_opcode::p_constaddr)
201bf215546Sopenharmony_ci      return 3; /* lowered to 3 instructions in the assembler */
202bf215546Sopenharmony_ci   else
203bf215546Sopenharmony_ci      return 1;
204bf215546Sopenharmony_ci}
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_cibool
207bf215546Sopenharmony_ciregs_intersect(PhysReg a_reg, unsigned a_size, PhysReg b_reg, unsigned b_size)
208bf215546Sopenharmony_ci{
209bf215546Sopenharmony_ci   return a_reg > b_reg ? (a_reg - b_reg < b_size) : (b_reg - a_reg < a_size);
210bf215546Sopenharmony_ci}
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_citemplate <bool Valu, bool Vintrp, bool Salu>
213bf215546Sopenharmony_cibool
214bf215546Sopenharmony_cihandle_raw_hazard_instr(aco_ptr<Instruction>& pred, PhysReg reg, int* nops_needed, uint32_t* mask)
215bf215546Sopenharmony_ci{
216bf215546Sopenharmony_ci   unsigned mask_size = util_last_bit(*mask);
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci   uint32_t writemask = 0;
219bf215546Sopenharmony_ci   for (Definition& def : pred->definitions) {
220bf215546Sopenharmony_ci      if (regs_intersect(reg, mask_size, def.physReg(), def.size())) {
221bf215546Sopenharmony_ci         unsigned start = def.physReg() > reg ? def.physReg() - reg : 0;
222bf215546Sopenharmony_ci         unsigned end = MIN2(mask_size, start + def.size());
223bf215546Sopenharmony_ci         writemask |= u_bit_consecutive(start, end - start);
224bf215546Sopenharmony_ci      }
225bf215546Sopenharmony_ci   }
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   bool is_hazard = writemask != 0 && ((pred->isVALU() && Valu) || (pred->isVINTRP() && Vintrp) ||
228bf215546Sopenharmony_ci                                       (pred->isSALU() && Salu));
229bf215546Sopenharmony_ci   if (is_hazard)
230bf215546Sopenharmony_ci      return true;
231bf215546Sopenharmony_ci
232bf215546Sopenharmony_ci   *mask &= ~writemask;
233bf215546Sopenharmony_ci   *nops_needed = MAX2(*nops_needed - get_wait_states(pred), 0);
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_ci   if (*mask == 0)
236bf215546Sopenharmony_ci      *nops_needed = 0;
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_ci   return *nops_needed == 0;
239bf215546Sopenharmony_ci}
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_citemplate <bool Valu, bool Vintrp, bool Salu>
242bf215546Sopenharmony_ciint
243bf215546Sopenharmony_cihandle_raw_hazard_internal(State& state, Block* block, int nops_needed, PhysReg reg, uint32_t mask,
244bf215546Sopenharmony_ci                           bool start_at_end)
245bf215546Sopenharmony_ci{
246bf215546Sopenharmony_ci   if (block == state.block && start_at_end) {
247bf215546Sopenharmony_ci      /* If it's the current block, block->instructions is incomplete. */
248bf215546Sopenharmony_ci      for (int pred_idx = state.old_instructions.size() - 1; pred_idx >= 0; pred_idx--) {
249bf215546Sopenharmony_ci         aco_ptr<Instruction>& instr = state.old_instructions[pred_idx];
250bf215546Sopenharmony_ci         if (!instr)
251bf215546Sopenharmony_ci            break; /* Instruction has been moved to block->instructions. */
252bf215546Sopenharmony_ci         if (handle_raw_hazard_instr<Valu, Vintrp, Salu>(instr, reg, &nops_needed, &mask))
253bf215546Sopenharmony_ci            return nops_needed;
254bf215546Sopenharmony_ci      }
255bf215546Sopenharmony_ci   }
256bf215546Sopenharmony_ci   for (int pred_idx = block->instructions.size() - 1; pred_idx >= 0; pred_idx--) {
257bf215546Sopenharmony_ci      if (handle_raw_hazard_instr<Valu, Vintrp, Salu>(block->instructions[pred_idx], reg,
258bf215546Sopenharmony_ci                                                      &nops_needed, &mask))
259bf215546Sopenharmony_ci         return nops_needed;
260bf215546Sopenharmony_ci   }
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   int res = 0;
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci   /* Loops require branch instructions, which count towards the wait
265bf215546Sopenharmony_ci    * states. So even with loops this should finish unless nops_needed is some
266bf215546Sopenharmony_ci    * huge value. */
267bf215546Sopenharmony_ci   for (unsigned lin_pred : block->linear_preds) {
268bf215546Sopenharmony_ci      res =
269bf215546Sopenharmony_ci         std::max(res, handle_raw_hazard_internal<Valu, Vintrp, Salu>(
270bf215546Sopenharmony_ci                          state, &state.program->blocks[lin_pred], nops_needed, reg, mask, true));
271bf215546Sopenharmony_ci   }
272bf215546Sopenharmony_ci   return res;
273bf215546Sopenharmony_ci}
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_citemplate <bool Valu, bool Vintrp, bool Salu>
276bf215546Sopenharmony_civoid
277bf215546Sopenharmony_cihandle_raw_hazard(State& state, int* NOPs, int min_states, Operand op)
278bf215546Sopenharmony_ci{
279bf215546Sopenharmony_ci   if (*NOPs >= min_states)
280bf215546Sopenharmony_ci      return;
281bf215546Sopenharmony_ci   int res = handle_raw_hazard_internal<Valu, Vintrp, Salu>(
282bf215546Sopenharmony_ci      state, state.block, min_states, op.physReg(), u_bit_consecutive(0, op.size()), false);
283bf215546Sopenharmony_ci   *NOPs = MAX2(*NOPs, res);
284bf215546Sopenharmony_ci}
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_cistatic auto handle_valu_then_read_hazard = handle_raw_hazard<true, true, false>;
287bf215546Sopenharmony_cistatic auto handle_vintrp_then_read_hazard = handle_raw_hazard<false, true, false>;
288bf215546Sopenharmony_cistatic auto handle_valu_salu_then_read_hazard = handle_raw_hazard<true, true, true>;
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_civoid
291bf215546Sopenharmony_ciset_bitset_range(BITSET_WORD* words, unsigned start, unsigned size)
292bf215546Sopenharmony_ci{
293bf215546Sopenharmony_ci   unsigned end = start + size - 1;
294bf215546Sopenharmony_ci   unsigned start_mod = start % BITSET_WORDBITS;
295bf215546Sopenharmony_ci   if (start_mod + size <= BITSET_WORDBITS) {
296bf215546Sopenharmony_ci      BITSET_SET_RANGE_INSIDE_WORD(words, start, end);
297bf215546Sopenharmony_ci   } else {
298bf215546Sopenharmony_ci      unsigned first_size = BITSET_WORDBITS - start_mod;
299bf215546Sopenharmony_ci      set_bitset_range(words, start, BITSET_WORDBITS - start_mod);
300bf215546Sopenharmony_ci      set_bitset_range(words, start + first_size, size - first_size);
301bf215546Sopenharmony_ci   }
302bf215546Sopenharmony_ci}
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_cibool
305bf215546Sopenharmony_citest_bitset_range(BITSET_WORD* words, unsigned start, unsigned size)
306bf215546Sopenharmony_ci{
307bf215546Sopenharmony_ci   unsigned end = start + size - 1;
308bf215546Sopenharmony_ci   unsigned start_mod = start % BITSET_WORDBITS;
309bf215546Sopenharmony_ci   if (start_mod + size <= BITSET_WORDBITS) {
310bf215546Sopenharmony_ci      return BITSET_TEST_RANGE(words, start, end);
311bf215546Sopenharmony_ci   } else {
312bf215546Sopenharmony_ci      unsigned first_size = BITSET_WORDBITS - start_mod;
313bf215546Sopenharmony_ci      return test_bitset_range(words, start, BITSET_WORDBITS - start_mod) ||
314bf215546Sopenharmony_ci             test_bitset_range(words, start + first_size, size - first_size);
315bf215546Sopenharmony_ci   }
316bf215546Sopenharmony_ci}
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci/* A SMEM clause is any group of consecutive SMEM instructions. The
319bf215546Sopenharmony_ci * instructions in this group may return out of order and/or may be replayed.
320bf215546Sopenharmony_ci *
321bf215546Sopenharmony_ci * To fix this potential hazard correctly, we have to make sure that when a
322bf215546Sopenharmony_ci * clause has more than one instruction, no instruction in the clause writes
323bf215546Sopenharmony_ci * to a register that is read by another instruction in the clause (including
324bf215546Sopenharmony_ci * itself). In this case, we have to break the SMEM clause by inserting non
325bf215546Sopenharmony_ci * SMEM instructions.
326bf215546Sopenharmony_ci *
327bf215546Sopenharmony_ci * SMEM clauses are only present on GFX8+, and only matter when XNACK is set.
328bf215546Sopenharmony_ci */
329bf215546Sopenharmony_civoid
330bf215546Sopenharmony_cihandle_smem_clause_hazards(Program* program, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& instr,
331bf215546Sopenharmony_ci                           int* NOPs)
332bf215546Sopenharmony_ci{
333bf215546Sopenharmony_ci   /* break off from previous SMEM clause if needed */
334bf215546Sopenharmony_ci   if (!*NOPs & (ctx.smem_clause || ctx.smem_write)) {
335bf215546Sopenharmony_ci      /* Don't allow clauses with store instructions since the clause's
336bf215546Sopenharmony_ci       * instructions may use the same address. */
337bf215546Sopenharmony_ci      if (ctx.smem_write || instr->definitions.empty() ||
338bf215546Sopenharmony_ci          instr_info.is_atomic[(unsigned)instr->opcode]) {
339bf215546Sopenharmony_ci         *NOPs = 1;
340bf215546Sopenharmony_ci      } else if (program->dev.xnack_enabled) {
341bf215546Sopenharmony_ci         for (Operand op : instr->operands) {
342bf215546Sopenharmony_ci            if (!op.isConstant() &&
343bf215546Sopenharmony_ci                test_bitset_range(ctx.smem_clause_write, op.physReg(), op.size())) {
344bf215546Sopenharmony_ci               *NOPs = 1;
345bf215546Sopenharmony_ci               break;
346bf215546Sopenharmony_ci            }
347bf215546Sopenharmony_ci         }
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci         Definition def = instr->definitions[0];
350bf215546Sopenharmony_ci         if (!*NOPs && test_bitset_range(ctx.smem_clause_read_write, def.physReg(), def.size()))
351bf215546Sopenharmony_ci            *NOPs = 1;
352bf215546Sopenharmony_ci      }
353bf215546Sopenharmony_ci   }
354bf215546Sopenharmony_ci}
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci/* TODO: we don't handle accessing VCC using the actual SGPR instead of using the alias */
357bf215546Sopenharmony_civoid
358bf215546Sopenharmony_cihandle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& instr,
359bf215546Sopenharmony_ci                        std::vector<aco_ptr<Instruction>>& new_instructions)
360bf215546Sopenharmony_ci{
361bf215546Sopenharmony_ci   /* check hazards */
362bf215546Sopenharmony_ci   int NOPs = 0;
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci   if (instr->isSMEM()) {
365bf215546Sopenharmony_ci      if (state.program->gfx_level == GFX6) {
366bf215546Sopenharmony_ci         /* A read of an SGPR by SMRD instruction requires 4 wait states
367bf215546Sopenharmony_ci          * when the SGPR was written by a VALU instruction. According to LLVM,
368bf215546Sopenharmony_ci          * there is also an undocumented hardware behavior when the buffer
369bf215546Sopenharmony_ci          * descriptor is written by a SALU instruction */
370bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->operands.size(); i++) {
371bf215546Sopenharmony_ci            Operand op = instr->operands[i];
372bf215546Sopenharmony_ci            if (op.isConstant())
373bf215546Sopenharmony_ci               continue;
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci            bool is_buffer_desc = i == 0 && op.size() > 2;
376bf215546Sopenharmony_ci            if (is_buffer_desc)
377bf215546Sopenharmony_ci               handle_valu_salu_then_read_hazard(state, &NOPs, 4, op);
378bf215546Sopenharmony_ci            else
379bf215546Sopenharmony_ci               handle_valu_then_read_hazard(state, &NOPs, 4, op);
380bf215546Sopenharmony_ci         }
381bf215546Sopenharmony_ci      }
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci      handle_smem_clause_hazards(state.program, ctx, instr, &NOPs);
384bf215546Sopenharmony_ci   } else if (instr->isSALU()) {
385bf215546Sopenharmony_ci      if (instr->opcode == aco_opcode::s_setreg_b32 ||
386bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_setreg_imm32_b32 ||
387bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_getreg_b32) {
388bf215546Sopenharmony_ci         NOPs = MAX2(NOPs, ctx.setreg_then_getsetreg);
389bf215546Sopenharmony_ci      }
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci      if (state.program->gfx_level == GFX9) {
392bf215546Sopenharmony_ci         if (instr->opcode == aco_opcode::s_movrels_b32 ||
393bf215546Sopenharmony_ci             instr->opcode == aco_opcode::s_movrels_b64 ||
394bf215546Sopenharmony_ci             instr->opcode == aco_opcode::s_movreld_b32 ||
395bf215546Sopenharmony_ci             instr->opcode == aco_opcode::s_movreld_b64) {
396bf215546Sopenharmony_ci            NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_moverel);
397bf215546Sopenharmony_ci         }
398bf215546Sopenharmony_ci      }
399bf215546Sopenharmony_ci
400bf215546Sopenharmony_ci      if (instr->opcode == aco_opcode::s_sendmsg || instr->opcode == aco_opcode::s_ttracedata)
401bf215546Sopenharmony_ci         NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
402bf215546Sopenharmony_ci   } else if (instr->isDS() && instr->ds().gds) {
403bf215546Sopenharmony_ci      NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
404bf215546Sopenharmony_ci   } else if (instr->isVALU() || instr->isVINTRP()) {
405bf215546Sopenharmony_ci      for (Operand op : instr->operands) {
406bf215546Sopenharmony_ci         if (op.physReg() == vccz)
407bf215546Sopenharmony_ci            NOPs = MAX2(NOPs, ctx.valu_wr_vcc_then_vccz);
408bf215546Sopenharmony_ci         if (op.physReg() == execz)
409bf215546Sopenharmony_ci            NOPs = MAX2(NOPs, ctx.valu_wr_exec_then_execz);
410bf215546Sopenharmony_ci      }
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci      if (instr->isDPP()) {
413bf215546Sopenharmony_ci         NOPs = MAX2(NOPs, ctx.valu_wr_exec_then_dpp);
414bf215546Sopenharmony_ci         handle_valu_then_read_hazard(state, &NOPs, 2, instr->operands[0]);
415bf215546Sopenharmony_ci      }
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci      for (Definition def : instr->definitions) {
418bf215546Sopenharmony_ci         if (def.regClass().type() != RegType::sgpr) {
419bf215546Sopenharmony_ci            for (unsigned i = 0; i < def.size(); i++)
420bf215546Sopenharmony_ci               NOPs = MAX2(NOPs, ctx.vmem_store_then_wr_data[(def.physReg() & 0xff) + i]);
421bf215546Sopenharmony_ci         }
422bf215546Sopenharmony_ci      }
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_ci      if ((instr->opcode == aco_opcode::v_readlane_b32 ||
425bf215546Sopenharmony_ci           instr->opcode == aco_opcode::v_readlane_b32_e64 ||
426bf215546Sopenharmony_ci           instr->opcode == aco_opcode::v_writelane_b32 ||
427bf215546Sopenharmony_ci           instr->opcode == aco_opcode::v_writelane_b32_e64) &&
428bf215546Sopenharmony_ci          !instr->operands[1].isConstant()) {
429bf215546Sopenharmony_ci         handle_valu_then_read_hazard(state, &NOPs, 4, instr->operands[1]);
430bf215546Sopenharmony_ci      }
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci      /* It's required to insert 1 wait state if the dst VGPR of any v_interp_*
433bf215546Sopenharmony_ci       * is followed by a read with v_readfirstlane or v_readlane to fix GPU
434bf215546Sopenharmony_ci       * hangs on GFX6. Note that v_writelane_* is apparently not affected.
435bf215546Sopenharmony_ci       * This hazard isn't documented anywhere but AMD confirmed that hazard.
436bf215546Sopenharmony_ci       */
437bf215546Sopenharmony_ci      if (state.program->gfx_level == GFX6 &&
438bf215546Sopenharmony_ci          (instr->opcode == aco_opcode::v_readlane_b32 || /* GFX6 doesn't have v_readlane_b32_e64 */
439bf215546Sopenharmony_ci           instr->opcode == aco_opcode::v_readfirstlane_b32)) {
440bf215546Sopenharmony_ci         handle_vintrp_then_read_hazard(state, &NOPs, 1, instr->operands[0]);
441bf215546Sopenharmony_ci      }
442bf215546Sopenharmony_ci
443bf215546Sopenharmony_ci      if (instr->opcode == aco_opcode::v_div_fmas_f32 ||
444bf215546Sopenharmony_ci          instr->opcode == aco_opcode::v_div_fmas_f64)
445bf215546Sopenharmony_ci         NOPs = MAX2(NOPs, ctx.valu_wr_vcc_then_div_fmas);
446bf215546Sopenharmony_ci   } else if (instr->isVMEM() || instr->isFlatLike()) {
447bf215546Sopenharmony_ci      /* If the VALU writes the SGPR that is used by a VMEM, the user must add five wait states. */
448bf215546Sopenharmony_ci      for (Operand op : instr->operands) {
449bf215546Sopenharmony_ci         if (!op.isConstant() && !op.isUndefined() && op.regClass().type() == RegType::sgpr)
450bf215546Sopenharmony_ci            handle_valu_then_read_hazard(state, &NOPs, 5, op);
451bf215546Sopenharmony_ci      }
452bf215546Sopenharmony_ci   }
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci   if (!instr->isSALU() && instr->format != Format::SMEM)
455bf215546Sopenharmony_ci      NOPs = MAX2(NOPs, ctx.set_vskip_mode_then_vector);
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci   if (state.program->gfx_level == GFX9) {
458bf215546Sopenharmony_ci      bool lds_scratch_global = (instr->isScratch() || instr->isGlobal()) && instr->flatlike().lds;
459bf215546Sopenharmony_ci      if (instr->isVINTRP() || lds_scratch_global ||
460bf215546Sopenharmony_ci          instr->opcode == aco_opcode::ds_read_addtid_b32 ||
461bf215546Sopenharmony_ci          instr->opcode == aco_opcode::ds_write_addtid_b32 ||
462bf215546Sopenharmony_ci          instr->opcode == aco_opcode::buffer_store_lds_dword) {
463bf215546Sopenharmony_ci         NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_lds);
464bf215546Sopenharmony_ci      }
465bf215546Sopenharmony_ci   }
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   ctx.add_wait_states(NOPs + get_wait_states(instr));
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci   // TODO: try to schedule the NOP-causing instruction up to reduce the number of stall cycles
470bf215546Sopenharmony_ci   if (NOPs) {
471bf215546Sopenharmony_ci      /* create NOP */
472bf215546Sopenharmony_ci      aco_ptr<SOPP_instruction> nop{
473bf215546Sopenharmony_ci         create_instruction<SOPP_instruction>(aco_opcode::s_nop, Format::SOPP, 0, 0)};
474bf215546Sopenharmony_ci      nop->imm = NOPs - 1;
475bf215546Sopenharmony_ci      nop->block = -1;
476bf215546Sopenharmony_ci      new_instructions.emplace_back(std::move(nop));
477bf215546Sopenharmony_ci   }
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_ci   /* update information to check for later hazards */
480bf215546Sopenharmony_ci   if ((ctx.smem_clause || ctx.smem_write) && (NOPs || instr->format != Format::SMEM)) {
481bf215546Sopenharmony_ci      ctx.smem_clause = false;
482bf215546Sopenharmony_ci      ctx.smem_write = false;
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_ci      if (state.program->dev.xnack_enabled) {
485bf215546Sopenharmony_ci         BITSET_ZERO(ctx.smem_clause_read_write);
486bf215546Sopenharmony_ci         BITSET_ZERO(ctx.smem_clause_write);
487bf215546Sopenharmony_ci      }
488bf215546Sopenharmony_ci   }
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci   if (instr->isSMEM()) {
491bf215546Sopenharmony_ci      if (instr->definitions.empty() || instr_info.is_atomic[(unsigned)instr->opcode]) {
492bf215546Sopenharmony_ci         ctx.smem_write = true;
493bf215546Sopenharmony_ci      } else {
494bf215546Sopenharmony_ci         ctx.smem_clause = true;
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci         if (state.program->dev.xnack_enabled) {
497bf215546Sopenharmony_ci            for (Operand op : instr->operands) {
498bf215546Sopenharmony_ci               if (!op.isConstant()) {
499bf215546Sopenharmony_ci                  set_bitset_range(ctx.smem_clause_read_write, op.physReg(), op.size());
500bf215546Sopenharmony_ci               }
501bf215546Sopenharmony_ci            }
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_ci            Definition def = instr->definitions[0];
504bf215546Sopenharmony_ci            set_bitset_range(ctx.smem_clause_read_write, def.physReg(), def.size());
505bf215546Sopenharmony_ci            set_bitset_range(ctx.smem_clause_write, def.physReg(), def.size());
506bf215546Sopenharmony_ci         }
507bf215546Sopenharmony_ci      }
508bf215546Sopenharmony_ci   } else if (instr->isVALU()) {
509bf215546Sopenharmony_ci      for (Definition def : instr->definitions) {
510bf215546Sopenharmony_ci         if (def.regClass().type() == RegType::sgpr) {
511bf215546Sopenharmony_ci            if (def.physReg() == vcc || def.physReg() == vcc_hi) {
512bf215546Sopenharmony_ci               ctx.valu_wr_vcc_then_vccz = 5;
513bf215546Sopenharmony_ci               ctx.valu_wr_vcc_then_div_fmas = 4;
514bf215546Sopenharmony_ci            }
515bf215546Sopenharmony_ci            if (def.physReg() == exec || def.physReg() == exec_hi) {
516bf215546Sopenharmony_ci               ctx.valu_wr_exec_then_execz = 5;
517bf215546Sopenharmony_ci               ctx.valu_wr_exec_then_dpp = 5;
518bf215546Sopenharmony_ci            }
519bf215546Sopenharmony_ci         }
520bf215546Sopenharmony_ci      }
521bf215546Sopenharmony_ci   } else if (instr->isSALU() && !instr->definitions.empty()) {
522bf215546Sopenharmony_ci      if (!instr->definitions.empty()) {
523bf215546Sopenharmony_ci         /* all other definitions should be SCC */
524bf215546Sopenharmony_ci         Definition def = instr->definitions[0];
525bf215546Sopenharmony_ci         if (def.physReg() == m0) {
526bf215546Sopenharmony_ci            ctx.salu_wr_m0_then_gds_msg_ttrace = 1;
527bf215546Sopenharmony_ci            ctx.salu_wr_m0_then_lds = 1;
528bf215546Sopenharmony_ci            ctx.salu_wr_m0_then_moverel = 1;
529bf215546Sopenharmony_ci         }
530bf215546Sopenharmony_ci      } else if (instr->opcode == aco_opcode::s_setreg_b32 ||
531bf215546Sopenharmony_ci                 instr->opcode == aco_opcode::s_setreg_imm32_b32) {
532bf215546Sopenharmony_ci         SOPK_instruction& sopk = instr->sopk();
533bf215546Sopenharmony_ci         unsigned offset = (sopk.imm >> 6) & 0x1f;
534bf215546Sopenharmony_ci         unsigned size = ((sopk.imm >> 11) & 0x1f) + 1;
535bf215546Sopenharmony_ci         unsigned reg = sopk.imm & 0x3f;
536bf215546Sopenharmony_ci         ctx.setreg_then_getsetreg = 2;
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_ci         if (reg == 1 && offset >= 28 && size > (28 - offset))
539bf215546Sopenharmony_ci            ctx.set_vskip_mode_then_vector = 2;
540bf215546Sopenharmony_ci      }
541bf215546Sopenharmony_ci   } else if (instr->isVMEM() || instr->isFlatLike()) {
542bf215546Sopenharmony_ci      /* >64-bit MUBUF/MTBUF store with a constant in SOFFSET */
543bf215546Sopenharmony_ci      bool consider_buf = (instr->isMUBUF() || instr->isMTBUF()) && instr->operands.size() == 4 &&
544bf215546Sopenharmony_ci                          instr->operands[3].size() > 2 && instr->operands[2].physReg() >= 128;
545bf215546Sopenharmony_ci      /* MIMG store with a 128-bit T# with more than two bits set in dmask (making it a >64-bit
546bf215546Sopenharmony_ci       * store) */
547bf215546Sopenharmony_ci      bool consider_mimg = instr->isMIMG() &&
548bf215546Sopenharmony_ci                           instr->operands[1].regClass().type() == RegType::vgpr &&
549bf215546Sopenharmony_ci                           instr->operands[1].size() > 2 && instr->operands[0].size() == 4;
550bf215546Sopenharmony_ci      /* FLAT/GLOBAL/SCRATCH store with >64-bit data */
551bf215546Sopenharmony_ci      bool consider_flat =
552bf215546Sopenharmony_ci         instr->isFlatLike() && instr->operands.size() == 3 && instr->operands[2].size() > 2;
553bf215546Sopenharmony_ci      if (consider_buf || consider_mimg || consider_flat) {
554bf215546Sopenharmony_ci         PhysReg wrdata = instr->operands[consider_flat ? 2 : 3].physReg();
555bf215546Sopenharmony_ci         unsigned size = instr->operands[consider_flat ? 2 : 3].size();
556bf215546Sopenharmony_ci         for (unsigned i = 0; i < size; i++)
557bf215546Sopenharmony_ci            ctx.vmem_store_then_wr_data[(wrdata & 0xff) + i] = 1;
558bf215546Sopenharmony_ci      }
559bf215546Sopenharmony_ci   }
560bf215546Sopenharmony_ci}
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_citemplate <std::size_t N>
563bf215546Sopenharmony_cibool
564bf215546Sopenharmony_cicheck_written_regs(const aco_ptr<Instruction>& instr, const std::bitset<N>& check_regs)
565bf215546Sopenharmony_ci{
566bf215546Sopenharmony_ci   return std::any_of(instr->definitions.begin(), instr->definitions.end(),
567bf215546Sopenharmony_ci                      [&check_regs](const Definition& def) -> bool
568bf215546Sopenharmony_ci                      {
569bf215546Sopenharmony_ci                         bool writes_any = false;
570bf215546Sopenharmony_ci                         for (unsigned i = 0; i < def.size(); i++) {
571bf215546Sopenharmony_ci                            unsigned def_reg = def.physReg() + i;
572bf215546Sopenharmony_ci                            writes_any |= def_reg < check_regs.size() && check_regs[def_reg];
573bf215546Sopenharmony_ci                         }
574bf215546Sopenharmony_ci                         return writes_any;
575bf215546Sopenharmony_ci                      });
576bf215546Sopenharmony_ci}
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_citemplate <std::size_t N>
579bf215546Sopenharmony_civoid
580bf215546Sopenharmony_cimark_read_regs(const aco_ptr<Instruction>& instr, std::bitset<N>& reg_reads)
581bf215546Sopenharmony_ci{
582bf215546Sopenharmony_ci   for (const Operand& op : instr->operands) {
583bf215546Sopenharmony_ci      for (unsigned i = 0; i < op.size(); i++) {
584bf215546Sopenharmony_ci         unsigned reg = op.physReg() + i;
585bf215546Sopenharmony_ci         if (reg < reg_reads.size())
586bf215546Sopenharmony_ci            reg_reads.set(reg);
587bf215546Sopenharmony_ci      }
588bf215546Sopenharmony_ci   }
589bf215546Sopenharmony_ci}
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_citemplate <std::size_t N>
592bf215546Sopenharmony_civoid
593bf215546Sopenharmony_cimark_read_regs_exec(State& state, const aco_ptr<Instruction>& instr, std::bitset<N>& reg_reads)
594bf215546Sopenharmony_ci{
595bf215546Sopenharmony_ci   mark_read_regs(instr, reg_reads);
596bf215546Sopenharmony_ci   reg_reads.set(exec);
597bf215546Sopenharmony_ci   if (state.program->wave_size == 64)
598bf215546Sopenharmony_ci      reg_reads.set(exec_hi);
599bf215546Sopenharmony_ci}
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_cibool
602bf215546Sopenharmony_ciVALU_writes_sgpr(aco_ptr<Instruction>& instr)
603bf215546Sopenharmony_ci{
604bf215546Sopenharmony_ci   if (instr->isVOPC())
605bf215546Sopenharmony_ci      return true;
606bf215546Sopenharmony_ci   if (instr->isVOP3() && instr->definitions.size() == 2)
607bf215546Sopenharmony_ci      return true;
608bf215546Sopenharmony_ci   if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
609bf215546Sopenharmony_ci       instr->opcode == aco_opcode::v_readlane_b32 ||
610bf215546Sopenharmony_ci       instr->opcode == aco_opcode::v_readlane_b32_e64)
611bf215546Sopenharmony_ci      return true;
612bf215546Sopenharmony_ci   return false;
613bf215546Sopenharmony_ci}
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_cibool
616bf215546Sopenharmony_ciinstr_writes_exec(const aco_ptr<Instruction>& instr)
617bf215546Sopenharmony_ci{
618bf215546Sopenharmony_ci   return std::any_of(instr->definitions.begin(), instr->definitions.end(),
619bf215546Sopenharmony_ci                      [](const Definition& def) -> bool
620bf215546Sopenharmony_ci                      { return def.physReg() == exec_lo || def.physReg() == exec_hi; });
621bf215546Sopenharmony_ci}
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_cibool
624bf215546Sopenharmony_ciinstr_writes_sgpr(const aco_ptr<Instruction>& instr)
625bf215546Sopenharmony_ci{
626bf215546Sopenharmony_ci   return std::any_of(instr->definitions.begin(), instr->definitions.end(),
627bf215546Sopenharmony_ci                      [](const Definition& def) -> bool
628bf215546Sopenharmony_ci                      { return def.getTemp().type() == RegType::sgpr; });
629bf215546Sopenharmony_ci}
630bf215546Sopenharmony_ci
631bf215546Sopenharmony_ciinline bool
632bf215546Sopenharmony_ciinstr_is_branch(const aco_ptr<Instruction>& instr)
633bf215546Sopenharmony_ci{
634bf215546Sopenharmony_ci   return instr->opcode == aco_opcode::s_branch || instr->opcode == aco_opcode::s_cbranch_scc0 ||
635bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_cbranch_scc1 ||
636bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_cbranch_vccz ||
637bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_cbranch_vccnz ||
638bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_cbranch_execz ||
639bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_cbranch_execnz ||
640bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_cbranch_cdbgsys ||
641bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_cbranch_cdbguser ||
642bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_cbranch_cdbgsys_or_user ||
643bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_cbranch_cdbgsys_and_user ||
644bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_subvector_loop_begin ||
645bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_subvector_loop_end ||
646bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_setpc_b64 || instr->opcode == aco_opcode::s_swappc_b64 ||
647bf215546Sopenharmony_ci          instr->opcode == aco_opcode::s_getpc_b64 || instr->opcode == aco_opcode::s_call_b64;
648bf215546Sopenharmony_ci}
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_civoid
651bf215546Sopenharmony_cihandle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>& instr,
652bf215546Sopenharmony_ci                         std::vector<aco_ptr<Instruction>>& new_instructions)
653bf215546Sopenharmony_ci{
654bf215546Sopenharmony_ci   // TODO: s_dcache_inv needs to be in it's own group on GFX10
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci   /* VMEMtoScalarWriteHazard
657bf215546Sopenharmony_ci    * Handle EXEC/M0/SGPR write following a VMEM/DS instruction without a VALU or "waitcnt vmcnt(0)"
658bf215546Sopenharmony_ci    * in-between.
659bf215546Sopenharmony_ci    */
660bf215546Sopenharmony_ci   if (instr->isVMEM() || instr->isFlatLike() || instr->isDS()) {
661bf215546Sopenharmony_ci      /* Remember all SGPRs that are read by the VMEM/DS instruction */
662bf215546Sopenharmony_ci      if (instr->isVMEM() || instr->isFlatLike())
663bf215546Sopenharmony_ci         mark_read_regs_exec(
664bf215546Sopenharmony_ci            state, instr,
665bf215546Sopenharmony_ci            instr->definitions.empty() ? ctx.sgprs_read_by_VMEM_store : ctx.sgprs_read_by_VMEM);
666bf215546Sopenharmony_ci      if (instr->isFlat() || instr->isDS())
667bf215546Sopenharmony_ci         mark_read_regs_exec(state, instr, ctx.sgprs_read_by_DS);
668bf215546Sopenharmony_ci   } else if (instr->isSALU() || instr->isSMEM()) {
669bf215546Sopenharmony_ci      if (instr->opcode == aco_opcode::s_waitcnt) {
670bf215546Sopenharmony_ci         wait_imm imm(state.program->gfx_level, instr->sopp().imm);
671bf215546Sopenharmony_ci         if (imm.vm == 0)
672bf215546Sopenharmony_ci            ctx.sgprs_read_by_VMEM.reset();
673bf215546Sopenharmony_ci      } else if (instr->opcode == aco_opcode::s_waitcnt_depctr && instr->sopp().imm == 0xffe3) {
674bf215546Sopenharmony_ci         /* Hazard is mitigated by a s_waitcnt_depctr with a magic imm */
675bf215546Sopenharmony_ci         ctx.sgprs_read_by_VMEM.reset();
676bf215546Sopenharmony_ci         ctx.sgprs_read_by_DS.reset();
677bf215546Sopenharmony_ci         ctx.sgprs_read_by_VMEM_store.reset();
678bf215546Sopenharmony_ci      }
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci      /* Check if SALU writes an SGPR that was previously read by the VALU */
681bf215546Sopenharmony_ci      if (check_written_regs(instr, ctx.sgprs_read_by_VMEM) ||
682bf215546Sopenharmony_ci          check_written_regs(instr, ctx.sgprs_read_by_DS) ||
683bf215546Sopenharmony_ci          check_written_regs(instr, ctx.sgprs_read_by_VMEM_store)) {
684bf215546Sopenharmony_ci         ctx.sgprs_read_by_VMEM.reset();
685bf215546Sopenharmony_ci         ctx.sgprs_read_by_DS.reset();
686bf215546Sopenharmony_ci         ctx.sgprs_read_by_VMEM_store.reset();
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_ci         /* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */
689bf215546Sopenharmony_ci         aco_ptr<SOPP_instruction> depctr{
690bf215546Sopenharmony_ci            create_instruction<SOPP_instruction>(aco_opcode::s_waitcnt_depctr, Format::SOPP, 0, 0)};
691bf215546Sopenharmony_ci         depctr->imm = 0xffe3;
692bf215546Sopenharmony_ci         depctr->block = -1;
693bf215546Sopenharmony_ci         new_instructions.emplace_back(std::move(depctr));
694bf215546Sopenharmony_ci      }
695bf215546Sopenharmony_ci   } else if (instr->isVALU()) {
696bf215546Sopenharmony_ci      /* Hazard is mitigated by any VALU instruction */
697bf215546Sopenharmony_ci      ctx.sgprs_read_by_VMEM.reset();
698bf215546Sopenharmony_ci      ctx.sgprs_read_by_DS.reset();
699bf215546Sopenharmony_ci      ctx.sgprs_read_by_VMEM_store.reset();
700bf215546Sopenharmony_ci   }
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_ci   /* VcmpxPermlaneHazard
703bf215546Sopenharmony_ci    * Handle any permlane following a VOPC instruction, insert v_mov between them.
704bf215546Sopenharmony_ci    */
705bf215546Sopenharmony_ci   if (instr->isVOPC()) {
706bf215546Sopenharmony_ci      ctx.has_VOPC = true;
707bf215546Sopenharmony_ci   } else if (ctx.has_VOPC && (instr->opcode == aco_opcode::v_permlane16_b32 ||
708bf215546Sopenharmony_ci                               instr->opcode == aco_opcode::v_permlanex16_b32)) {
709bf215546Sopenharmony_ci      ctx.has_VOPC = false;
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_ci      /* v_nop would be discarded by SQ, so use v_mov with the first operand of the permlane */
712bf215546Sopenharmony_ci      aco_ptr<VOP1_instruction> v_mov{
713bf215546Sopenharmony_ci         create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32, Format::VOP1, 1, 1)};
714bf215546Sopenharmony_ci      v_mov->definitions[0] = Definition(instr->operands[0].physReg(), v1);
715bf215546Sopenharmony_ci      v_mov->operands[0] = Operand(instr->operands[0].physReg(), v1);
716bf215546Sopenharmony_ci      new_instructions.emplace_back(std::move(v_mov));
717bf215546Sopenharmony_ci   } else if (instr->isVALU() && instr->opcode != aco_opcode::v_nop) {
718bf215546Sopenharmony_ci      ctx.has_VOPC = false;
719bf215546Sopenharmony_ci   }
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci   /* VcmpxExecWARHazard
722bf215546Sopenharmony_ci    * Handle any VALU instruction writing the exec mask after it was read by a non-VALU instruction.
723bf215546Sopenharmony_ci    */
724bf215546Sopenharmony_ci   if (!instr->isVALU() && instr->reads_exec()) {
725bf215546Sopenharmony_ci      ctx.has_nonVALU_exec_read = true;
726bf215546Sopenharmony_ci   } else if (instr->isVALU()) {
727bf215546Sopenharmony_ci      if (instr_writes_exec(instr)) {
728bf215546Sopenharmony_ci         ctx.has_nonVALU_exec_read = false;
729bf215546Sopenharmony_ci
730bf215546Sopenharmony_ci         /* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */
731bf215546Sopenharmony_ci         aco_ptr<SOPP_instruction> depctr{
732bf215546Sopenharmony_ci            create_instruction<SOPP_instruction>(aco_opcode::s_waitcnt_depctr, Format::SOPP, 0, 0)};
733bf215546Sopenharmony_ci         depctr->imm = 0xfffe;
734bf215546Sopenharmony_ci         depctr->block = -1;
735bf215546Sopenharmony_ci         new_instructions.emplace_back(std::move(depctr));
736bf215546Sopenharmony_ci      } else if (instr_writes_sgpr(instr)) {
737bf215546Sopenharmony_ci         /* Any VALU instruction that writes an SGPR mitigates the problem */
738bf215546Sopenharmony_ci         ctx.has_nonVALU_exec_read = false;
739bf215546Sopenharmony_ci      }
740bf215546Sopenharmony_ci   } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
741bf215546Sopenharmony_ci      /* s_waitcnt_depctr can mitigate the problem if it has a magic imm */
742bf215546Sopenharmony_ci      if ((instr->sopp().imm & 0xfffe) == 0xfffe)
743bf215546Sopenharmony_ci         ctx.has_nonVALU_exec_read = false;
744bf215546Sopenharmony_ci   }
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_ci   /* SMEMtoVectorWriteHazard
747bf215546Sopenharmony_ci    * Handle any VALU instruction writing an SGPR after an SMEM reads it.
748bf215546Sopenharmony_ci    */
749bf215546Sopenharmony_ci   if (instr->isSMEM()) {
750bf215546Sopenharmony_ci      /* Remember all SGPRs that are read by the SMEM instruction */
751bf215546Sopenharmony_ci      mark_read_regs(instr, ctx.sgprs_read_by_SMEM);
752bf215546Sopenharmony_ci   } else if (VALU_writes_sgpr(instr)) {
753bf215546Sopenharmony_ci      /* Check if VALU writes an SGPR that was previously read by SMEM */
754bf215546Sopenharmony_ci      if (check_written_regs(instr, ctx.sgprs_read_by_SMEM)) {
755bf215546Sopenharmony_ci         ctx.sgprs_read_by_SMEM.reset();
756bf215546Sopenharmony_ci
757bf215546Sopenharmony_ci         /* Insert s_mov to mitigate the problem */
758bf215546Sopenharmony_ci         aco_ptr<SOP1_instruction> s_mov{
759bf215546Sopenharmony_ci            create_instruction<SOP1_instruction>(aco_opcode::s_mov_b32, Format::SOP1, 1, 1)};
760bf215546Sopenharmony_ci         s_mov->definitions[0] = Definition(sgpr_null, s1);
761bf215546Sopenharmony_ci         s_mov->operands[0] = Operand::zero();
762bf215546Sopenharmony_ci         new_instructions.emplace_back(std::move(s_mov));
763bf215546Sopenharmony_ci      }
764bf215546Sopenharmony_ci   } else if (instr->isSALU()) {
765bf215546Sopenharmony_ci      if (instr->format != Format::SOPP) {
766bf215546Sopenharmony_ci         /* SALU can mitigate the hazard */
767bf215546Sopenharmony_ci         ctx.sgprs_read_by_SMEM.reset();
768bf215546Sopenharmony_ci      } else {
769bf215546Sopenharmony_ci         /* Reducing lgkmcnt count to 0 always mitigates the hazard. */
770bf215546Sopenharmony_ci         const SOPP_instruction& sopp = instr->sopp();
771bf215546Sopenharmony_ci         if (sopp.opcode == aco_opcode::s_waitcnt_lgkmcnt) {
772bf215546Sopenharmony_ci            if (sopp.imm == 0 && sopp.definitions[0].physReg() == sgpr_null)
773bf215546Sopenharmony_ci               ctx.sgprs_read_by_SMEM.reset();
774bf215546Sopenharmony_ci         } else if (sopp.opcode == aco_opcode::s_waitcnt) {
775bf215546Sopenharmony_ci            unsigned lgkm = (sopp.imm >> 8) & 0x3f;
776bf215546Sopenharmony_ci            if (lgkm == 0)
777bf215546Sopenharmony_ci               ctx.sgprs_read_by_SMEM.reset();
778bf215546Sopenharmony_ci         }
779bf215546Sopenharmony_ci      }
780bf215546Sopenharmony_ci   }
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci   /* LdsBranchVmemWARHazard
783bf215546Sopenharmony_ci    * Handle VMEM/GLOBAL/SCRATCH->branch->DS and DS->branch->VMEM/GLOBAL/SCRATCH patterns.
784bf215546Sopenharmony_ci    */
785bf215546Sopenharmony_ci   if (instr->isVMEM() || instr->isGlobal() || instr->isScratch()) {
786bf215546Sopenharmony_ci      ctx.has_VMEM = true;
787bf215546Sopenharmony_ci      ctx.has_branch_after_VMEM = false;
788bf215546Sopenharmony_ci      /* Mitigation for DS is needed only if there was already a branch after */
789bf215546Sopenharmony_ci      ctx.has_DS = ctx.has_branch_after_DS;
790bf215546Sopenharmony_ci   } else if (instr->isDS()) {
791bf215546Sopenharmony_ci      ctx.has_DS = true;
792bf215546Sopenharmony_ci      ctx.has_branch_after_DS = false;
793bf215546Sopenharmony_ci      /* Mitigation for VMEM is needed only if there was already a branch after */
794bf215546Sopenharmony_ci      ctx.has_VMEM = ctx.has_branch_after_VMEM;
795bf215546Sopenharmony_ci   } else if (instr_is_branch(instr)) {
796bf215546Sopenharmony_ci      ctx.has_branch_after_VMEM = ctx.has_VMEM;
797bf215546Sopenharmony_ci      ctx.has_branch_after_DS = ctx.has_DS;
798bf215546Sopenharmony_ci   } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
799bf215546Sopenharmony_ci      /* Only s_waitcnt_vscnt can mitigate the hazard */
800bf215546Sopenharmony_ci      const SOPK_instruction& sopk = instr->sopk();
801bf215546Sopenharmony_ci      if (sopk.definitions[0].physReg() == sgpr_null && sopk.imm == 0)
802bf215546Sopenharmony_ci         ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false;
803bf215546Sopenharmony_ci   }
804bf215546Sopenharmony_ci   if ((ctx.has_VMEM && ctx.has_branch_after_DS) || (ctx.has_DS && ctx.has_branch_after_VMEM)) {
805bf215546Sopenharmony_ci      ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false;
806bf215546Sopenharmony_ci
807bf215546Sopenharmony_ci      /* Insert s_waitcnt_vscnt to mitigate the problem */
808bf215546Sopenharmony_ci      aco_ptr<SOPK_instruction> wait{
809bf215546Sopenharmony_ci         create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 0, 1)};
810bf215546Sopenharmony_ci      wait->definitions[0] = Definition(sgpr_null, s1);
811bf215546Sopenharmony_ci      wait->imm = 0;
812bf215546Sopenharmony_ci      new_instructions.emplace_back(std::move(wait));
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci      ctx.has_VMEM = instr->isVMEM() || instr->isGlobal() || instr->isScratch();
815bf215546Sopenharmony_ci      ctx.has_DS = instr->isDS();
816bf215546Sopenharmony_ci   }
817bf215546Sopenharmony_ci
818bf215546Sopenharmony_ci   /* NSAToVMEMBug
819bf215546Sopenharmony_ci    * Handles NSA MIMG (4 or more dwords) immediately followed by MUBUF/MTBUF (with offset[2:1] !=
820bf215546Sopenharmony_ci    * 0).
821bf215546Sopenharmony_ci    */
822bf215546Sopenharmony_ci   if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1) {
823bf215546Sopenharmony_ci      ctx.has_NSA_MIMG = true;
824bf215546Sopenharmony_ci   } else if (ctx.has_NSA_MIMG) {
825bf215546Sopenharmony_ci      ctx.has_NSA_MIMG = false;
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci      if (instr->isMUBUF() || instr->isMTBUF()) {
828bf215546Sopenharmony_ci         uint32_t offset = instr->isMUBUF() ? instr->mubuf().offset : instr->mtbuf().offset;
829bf215546Sopenharmony_ci         if (offset & 6)
830bf215546Sopenharmony_ci            Builder(state.program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
831bf215546Sopenharmony_ci      }
832bf215546Sopenharmony_ci   }
833bf215546Sopenharmony_ci
834bf215546Sopenharmony_ci   /* waNsaCannotFollowWritelane
835bf215546Sopenharmony_ci    * Handles NSA MIMG immediately following a v_writelane_b32.
836bf215546Sopenharmony_ci    */
837bf215546Sopenharmony_ci   if (instr->opcode == aco_opcode::v_writelane_b32_e64) {
838bf215546Sopenharmony_ci      ctx.has_writelane = true;
839bf215546Sopenharmony_ci   } else if (ctx.has_writelane) {
840bf215546Sopenharmony_ci      ctx.has_writelane = false;
841bf215546Sopenharmony_ci      if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 0)
842bf215546Sopenharmony_ci         Builder(state.program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
843bf215546Sopenharmony_ci   }
844bf215546Sopenharmony_ci}
845bf215546Sopenharmony_ci
846bf215546Sopenharmony_citemplate <typename Ctx>
847bf215546Sopenharmony_ciusing HandleInstr = void (*)(State& state, Ctx&, aco_ptr<Instruction>&,
848bf215546Sopenharmony_ci                             std::vector<aco_ptr<Instruction>>&);
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_citemplate <typename Ctx, HandleInstr<Ctx> Handle>
851bf215546Sopenharmony_civoid
852bf215546Sopenharmony_cihandle_block(Program* program, Ctx& ctx, Block& block)
853bf215546Sopenharmony_ci{
854bf215546Sopenharmony_ci   if (block.instructions.empty())
855bf215546Sopenharmony_ci      return;
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci   State state;
858bf215546Sopenharmony_ci   state.program = program;
859bf215546Sopenharmony_ci   state.block = &block;
860bf215546Sopenharmony_ci   state.old_instructions = std::move(block.instructions);
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci   block.instructions.clear(); // Silence clang-analyzer-cplusplus.Move warning
863bf215546Sopenharmony_ci   block.instructions.reserve(state.old_instructions.size());
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_ci   for (aco_ptr<Instruction>& instr : state.old_instructions) {
866bf215546Sopenharmony_ci      Handle(state, ctx, instr, block.instructions);
867bf215546Sopenharmony_ci      block.instructions.emplace_back(std::move(instr));
868bf215546Sopenharmony_ci   }
869bf215546Sopenharmony_ci}
870bf215546Sopenharmony_ci
871bf215546Sopenharmony_citemplate <typename Ctx, HandleInstr<Ctx> Handle>
872bf215546Sopenharmony_civoid
873bf215546Sopenharmony_cimitigate_hazards(Program* program)
874bf215546Sopenharmony_ci{
875bf215546Sopenharmony_ci   std::vector<Ctx> all_ctx(program->blocks.size());
876bf215546Sopenharmony_ci   std::stack<unsigned, std::vector<unsigned>> loop_header_indices;
877bf215546Sopenharmony_ci
878bf215546Sopenharmony_ci   for (unsigned i = 0; i < program->blocks.size(); i++) {
879bf215546Sopenharmony_ci      Block& block = program->blocks[i];
880bf215546Sopenharmony_ci      Ctx& ctx = all_ctx[i];
881bf215546Sopenharmony_ci
882bf215546Sopenharmony_ci      if (block.kind & block_kind_loop_header) {
883bf215546Sopenharmony_ci         loop_header_indices.push(i);
884bf215546Sopenharmony_ci      } else if (block.kind & block_kind_loop_exit) {
885bf215546Sopenharmony_ci         /* Go through the whole loop again */
886bf215546Sopenharmony_ci         for (unsigned idx = loop_header_indices.top(); idx < i; idx++) {
887bf215546Sopenharmony_ci            Ctx loop_block_ctx;
888bf215546Sopenharmony_ci            for (unsigned b : program->blocks[idx].linear_preds)
889bf215546Sopenharmony_ci               loop_block_ctx.join(all_ctx[b]);
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_ci            handle_block<Ctx, Handle>(program, loop_block_ctx, program->blocks[idx]);
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_ci            /* We only need to continue if the loop header context changed */
894bf215546Sopenharmony_ci            if (idx == loop_header_indices.top() && loop_block_ctx == all_ctx[idx])
895bf215546Sopenharmony_ci               break;
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci            all_ctx[idx] = loop_block_ctx;
898bf215546Sopenharmony_ci         }
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_ci         loop_header_indices.pop();
901bf215546Sopenharmony_ci      }
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci      for (unsigned b : block.linear_preds)
904bf215546Sopenharmony_ci         ctx.join(all_ctx[b]);
905bf215546Sopenharmony_ci
906bf215546Sopenharmony_ci      handle_block<Ctx, Handle>(program, ctx, block);
907bf215546Sopenharmony_ci   }
908bf215546Sopenharmony_ci}
909bf215546Sopenharmony_ci
910bf215546Sopenharmony_ci} /* end namespace */
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_civoid
913bf215546Sopenharmony_ciinsert_NOPs(Program* program)
914bf215546Sopenharmony_ci{
915bf215546Sopenharmony_ci   if (program->gfx_level >= GFX10_3)
916bf215546Sopenharmony_ci      ; /* no hazards/bugs to mitigate */
917bf215546Sopenharmony_ci   else if (program->gfx_level >= GFX10)
918bf215546Sopenharmony_ci      mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10>(program);
919bf215546Sopenharmony_ci   else
920bf215546Sopenharmony_ci      mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6>(program);
921bf215546Sopenharmony_ci}
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci} // namespace aco
924