1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "aco_ir.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "util/memstream.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include <array>
30bf215546Sopenharmony_ci#include <map>
31bf215546Sopenharmony_ci#include <set>
32bf215546Sopenharmony_ci#include <vector>
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_cinamespace aco {
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_cistatic void
37bf215546Sopenharmony_ciaco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix,
38bf215546Sopenharmony_ci        const char* file, unsigned line, const char* fmt, va_list args)
39bf215546Sopenharmony_ci{
40bf215546Sopenharmony_ci   char* msg;
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ci   if (program->debug.shorten_messages) {
43bf215546Sopenharmony_ci      msg = ralloc_vasprintf(NULL, fmt, args);
44bf215546Sopenharmony_ci   } else {
45bf215546Sopenharmony_ci      msg = ralloc_strdup(NULL, prefix);
46bf215546Sopenharmony_ci      ralloc_asprintf_append(&msg, "    In file %s:%u\n", file, line);
47bf215546Sopenharmony_ci      ralloc_asprintf_append(&msg, "    ");
48bf215546Sopenharmony_ci      ralloc_vasprintf_append(&msg, fmt, args);
49bf215546Sopenharmony_ci   }
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   if (program->debug.func)
52bf215546Sopenharmony_ci      program->debug.func(program->debug.private_data, level, msg);
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci   fprintf(program->debug.output, "%s\n", msg);
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci   ralloc_free(msg);
57bf215546Sopenharmony_ci}
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_civoid
60bf215546Sopenharmony_ci_aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...)
61bf215546Sopenharmony_ci{
62bf215546Sopenharmony_ci   va_list args;
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   va_start(args, fmt);
65bf215546Sopenharmony_ci   aco_log(program, ACO_COMPILER_DEBUG_LEVEL_PERFWARN, "ACO PERFWARN:\n", file, line, fmt, args);
66bf215546Sopenharmony_ci   va_end(args);
67bf215546Sopenharmony_ci}
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_civoid
70bf215546Sopenharmony_ci_aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...)
71bf215546Sopenharmony_ci{
72bf215546Sopenharmony_ci   va_list args;
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci   va_start(args, fmt);
75bf215546Sopenharmony_ci   aco_log(program, ACO_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args);
76bf215546Sopenharmony_ci   va_end(args);
77bf215546Sopenharmony_ci}
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_cibool
80bf215546Sopenharmony_civalidate_ir(Program* program)
81bf215546Sopenharmony_ci{
82bf215546Sopenharmony_ci   bool is_valid = true;
83bf215546Sopenharmony_ci   auto check = [&program, &is_valid](bool success, const char* msg,
84bf215546Sopenharmony_ci                                      aco::Instruction* instr) -> void
85bf215546Sopenharmony_ci   {
86bf215546Sopenharmony_ci      if (!success) {
87bf215546Sopenharmony_ci         char* out;
88bf215546Sopenharmony_ci         size_t outsize;
89bf215546Sopenharmony_ci         struct u_memstream mem;
90bf215546Sopenharmony_ci         u_memstream_open(&mem, &out, &outsize);
91bf215546Sopenharmony_ci         FILE* const memf = u_memstream_get(&mem);
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci         fprintf(memf, "%s: ", msg);
94bf215546Sopenharmony_ci         aco_print_instr(instr, memf);
95bf215546Sopenharmony_ci         u_memstream_close(&mem);
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci         aco_err(program, "%s", out);
98bf215546Sopenharmony_ci         free(out);
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci         is_valid = false;
101bf215546Sopenharmony_ci      }
102bf215546Sopenharmony_ci   };
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_ci   auto check_block = [&program, &is_valid](bool success, const char* msg,
105bf215546Sopenharmony_ci                                            aco::Block* block) -> void
106bf215546Sopenharmony_ci   {
107bf215546Sopenharmony_ci      if (!success) {
108bf215546Sopenharmony_ci         aco_err(program, "%s: BB%u", msg, block->index);
109bf215546Sopenharmony_ci         is_valid = false;
110bf215546Sopenharmony_ci      }
111bf215546Sopenharmony_ci   };
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   for (Block& block : program->blocks) {
114bf215546Sopenharmony_ci      for (aco_ptr<Instruction>& instr : block.instructions) {
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci         /* check base format */
117bf215546Sopenharmony_ci         Format base_format = instr->format;
118bf215546Sopenharmony_ci         base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);
119bf215546Sopenharmony_ci         base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP16);
120bf215546Sopenharmony_ci         base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP8);
121bf215546Sopenharmony_ci         if ((uint32_t)base_format & (uint32_t)Format::VOP1)
122bf215546Sopenharmony_ci            base_format = Format::VOP1;
123bf215546Sopenharmony_ci         else if ((uint32_t)base_format & (uint32_t)Format::VOP2)
124bf215546Sopenharmony_ci            base_format = Format::VOP2;
125bf215546Sopenharmony_ci         else if ((uint32_t)base_format & (uint32_t)Format::VOPC)
126bf215546Sopenharmony_ci            base_format = Format::VOPC;
127bf215546Sopenharmony_ci         else if ((uint32_t)base_format & (uint32_t)Format::VINTRP) {
128bf215546Sopenharmony_ci            if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
129bf215546Sopenharmony_ci                instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
130bf215546Sopenharmony_ci                instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
131bf215546Sopenharmony_ci                instr->opcode == aco_opcode::v_interp_p2_f16) {
132bf215546Sopenharmony_ci               /* v_interp_*_fp16 are considered VINTRP by the compiler but
133bf215546Sopenharmony_ci                * they are emitted as VOP3.
134bf215546Sopenharmony_ci                */
135bf215546Sopenharmony_ci               base_format = Format::VOP3;
136bf215546Sopenharmony_ci            } else {
137bf215546Sopenharmony_ci               base_format = Format::VINTRP;
138bf215546Sopenharmony_ci            }
139bf215546Sopenharmony_ci         }
140bf215546Sopenharmony_ci         check(base_format == instr_info.format[(int)instr->opcode],
141bf215546Sopenharmony_ci               "Wrong base format for instruction", instr.get());
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci         /* check VOP3 modifiers */
144bf215546Sopenharmony_ci         if (instr->isVOP3() && instr->format != Format::VOP3) {
145bf215546Sopenharmony_ci            check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
146bf215546Sopenharmony_ci                     base_format == Format::VOPC || base_format == Format::VINTRP,
147bf215546Sopenharmony_ci                  "Format cannot have VOP3/VOP3B applied", instr.get());
148bf215546Sopenharmony_ci         }
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci         /* check SDWA */
151bf215546Sopenharmony_ci         if (instr->isSDWA()) {
152bf215546Sopenharmony_ci            check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
153bf215546Sopenharmony_ci                     base_format == Format::VOPC,
154bf215546Sopenharmony_ci                  "Format cannot have SDWA applied", instr.get());
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci            check(program->gfx_level >= GFX8, "SDWA is GFX8 to GFX10.3 only", instr.get());
157bf215546Sopenharmony_ci            check(program->gfx_level < GFX11, "SDWA is GFX8 to GFX10.3 only", instr.get());
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci            SDWA_instruction& sdwa = instr->sdwa();
160bf215546Sopenharmony_ci            check(sdwa.omod == 0 || program->gfx_level >= GFX9, "SDWA omod only supported on GFX9+",
161bf215546Sopenharmony_ci                  instr.get());
162bf215546Sopenharmony_ci            if (base_format == Format::VOPC) {
163bf215546Sopenharmony_ci               check(sdwa.clamp == false || program->gfx_level == GFX8,
164bf215546Sopenharmony_ci                     "SDWA VOPC clamp only supported on GFX8", instr.get());
165bf215546Sopenharmony_ci               check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
166bf215546Sopenharmony_ci                        program->gfx_level >= GFX9,
167bf215546Sopenharmony_ci                     "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
168bf215546Sopenharmony_ci            } else {
169bf215546Sopenharmony_ci               const Definition& def = instr->definitions[0];
170bf215546Sopenharmony_ci               check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes",
171bf215546Sopenharmony_ci                     instr.get());
172bf215546Sopenharmony_ci               check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(),
173bf215546Sopenharmony_ci                     "SDWA definition selection size must be at most definition size", instr.get());
174bf215546Sopenharmony_ci               check(
175bf215546Sopenharmony_ci                  sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4,
176bf215546Sopenharmony_ci                  "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
177bf215546Sopenharmony_ci               check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
178bf215546Sopenharmony_ci                     instr.get());
179bf215546Sopenharmony_ci               check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(),
180bf215546Sopenharmony_ci                     "SDWA dst_sel size must be definition size for subdword definitions",
181bf215546Sopenharmony_ci                     instr.get());
182bf215546Sopenharmony_ci               check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0,
183bf215546Sopenharmony_ci                     "SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
184bf215546Sopenharmony_ci            }
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci            for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
187bf215546Sopenharmony_ci               const Operand& op = instr->operands[i];
188bf215546Sopenharmony_ci               check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
189bf215546Sopenharmony_ci               check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(),
190bf215546Sopenharmony_ci                     "SDWA operand selection size must be at most operand size", instr.get());
191bf215546Sopenharmony_ci               check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4,
192bf215546Sopenharmony_ci                     "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
193bf215546Sopenharmony_ci               check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset",
194bf215546Sopenharmony_ci                     instr.get());
195bf215546Sopenharmony_ci            }
196bf215546Sopenharmony_ci            if (instr->operands.size() >= 3) {
197bf215546Sopenharmony_ci               check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
198bf215546Sopenharmony_ci                     "3rd operand must be fixed to vcc with SDWA", instr.get());
199bf215546Sopenharmony_ci            }
200bf215546Sopenharmony_ci            if (instr->definitions.size() >= 2) {
201bf215546Sopenharmony_ci               check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
202bf215546Sopenharmony_ci                     "2nd definition must be fixed to vcc with SDWA", instr.get());
203bf215546Sopenharmony_ci            }
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci            const bool sdwa_opcodes =
206bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&
207bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_fmamk_f32 &&
208bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_fmaak_f32 &&
209bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_fmamk_f16 &&
210bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_fmaak_f16 &&
211bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_madmk_f32 &&
212bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_madak_f32 &&
213bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_madmk_f16 &&
214bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_madak_f16 &&
215bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_readfirstlane_b32 &&
216bf215546Sopenharmony_ci               instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci            const bool feature_mac =
219bf215546Sopenharmony_ci               program->gfx_level == GFX8 &&
220bf215546Sopenharmony_ci               (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci            check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
223bf215546Sopenharmony_ci         }
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci         /* check opsel */
226bf215546Sopenharmony_ci         if (instr->isVOP3()) {
227bf215546Sopenharmony_ci            VOP3_instruction& vop3 = instr->vop3();
228bf215546Sopenharmony_ci            check(vop3.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+",
229bf215546Sopenharmony_ci                  instr.get());
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci            for (unsigned i = 0; i < 3; i++) {
232bf215546Sopenharmony_ci               if (i >= instr->operands.size() ||
233bf215546Sopenharmony_ci                   (instr->operands[i].hasRegClass() &&
234bf215546Sopenharmony_ci                    instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
235bf215546Sopenharmony_ci                  check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
236bf215546Sopenharmony_ci            }
237bf215546Sopenharmony_ci            if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
238bf215546Sopenharmony_ci               check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition",
239bf215546Sopenharmony_ci                     instr.get());
240bf215546Sopenharmony_ci         } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
241bf215546Sopenharmony_ci                    instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
242bf215546Sopenharmony_ci                    instr->opcode == aco_opcode::v_fma_mix_f32) {
243bf215546Sopenharmony_ci            check(instr->definitions[0].regClass() ==
244bf215546Sopenharmony_ci                     (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
245bf215546Sopenharmony_ci                  "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
246bf215546Sopenharmony_ci         } else if (instr->isVOP3P()) {
247bf215546Sopenharmony_ci            VOP3P_instruction& vop3p = instr->vop3p();
248bf215546Sopenharmony_ci            for (unsigned i = 0; i < instr->operands.size(); i++) {
249bf215546Sopenharmony_ci               if (instr->operands[i].hasRegClass() &&
250bf215546Sopenharmony_ci                   instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
251bf215546Sopenharmony_ci                  check((vop3p.opsel_lo & (1 << i)) == 0 && (vop3p.opsel_hi & (1 << i)) == 0,
252bf215546Sopenharmony_ci                        "Unexpected opsel for subdword operand", instr.get());
253bf215546Sopenharmony_ci            }
254bf215546Sopenharmony_ci            check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition",
255bf215546Sopenharmony_ci                  instr.get());
256bf215546Sopenharmony_ci         }
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci         /* check for undefs */
259bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->operands.size(); i++) {
260bf215546Sopenharmony_ci            if (instr->operands[i].isUndefined()) {
261bf215546Sopenharmony_ci               bool flat = instr->isFlatLike();
262bf215546Sopenharmony_ci               bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
263bf215546Sopenharmony_ci                                   instr->opcode == aco_opcode::p_create_vector ||
264bf215546Sopenharmony_ci                                   instr->opcode == aco_opcode::p_jump_to_epilog ||
265bf215546Sopenharmony_ci                                   (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
266bf215546Sopenharmony_ci                                   ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
267bf215546Sopenharmony_ci                                   (instr->isScratch() && i == 0);
268bf215546Sopenharmony_ci               check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
269bf215546Sopenharmony_ci            } else {
270bf215546Sopenharmony_ci               check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||
271bf215546Sopenharmony_ci                        instr->operands[i].isConstant(),
272bf215546Sopenharmony_ci                     "Uninitialized Operand", instr.get());
273bf215546Sopenharmony_ci            }
274bf215546Sopenharmony_ci         }
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci         /* check subdword definitions */
277bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->definitions.size(); i++) {
278bf215546Sopenharmony_ci            if (instr->definitions[i].regClass().is_subdword())
279bf215546Sopenharmony_ci               check(instr->definitions[i].bytes() <= 4 || instr->isPseudo() || instr->isVMEM(),
280bf215546Sopenharmony_ci                     "Only Pseudo and VMEM instructions can write subdword registers > 4 bytes",
281bf215546Sopenharmony_ci                     instr.get());
282bf215546Sopenharmony_ci         }
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci         if (instr->isSALU() || instr->isVALU()) {
285bf215546Sopenharmony_ci            /* check literals */
286bf215546Sopenharmony_ci            Operand literal(s1);
287bf215546Sopenharmony_ci            for (unsigned i = 0; i < instr->operands.size(); i++) {
288bf215546Sopenharmony_ci               Operand op = instr->operands[i];
289bf215546Sopenharmony_ci               if (!op.isLiteral())
290bf215546Sopenharmony_ci                  continue;
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci               check(!instr->isDPP() && !instr->isSDWA() &&
293bf215546Sopenharmony_ci                        (!instr->isVOP3() || program->gfx_level >= GFX10) &&
294bf215546Sopenharmony_ci                        (!instr->isVOP3P() || program->gfx_level >= GFX10),
295bf215546Sopenharmony_ci                     "Literal applied on wrong instruction format", instr.get());
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci               check(literal.isUndefined() || (literal.size() == op.size() &&
298bf215546Sopenharmony_ci                                               literal.constantValue() == op.constantValue()),
299bf215546Sopenharmony_ci                     "Only 1 Literal allowed", instr.get());
300bf215546Sopenharmony_ci               literal = op;
301bf215546Sopenharmony_ci               check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,
302bf215546Sopenharmony_ci                     "Wrong source position for Literal argument", instr.get());
303bf215546Sopenharmony_ci            }
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci            /* check num sgprs for VALU */
306bf215546Sopenharmony_ci            if (instr->isVALU()) {
307bf215546Sopenharmony_ci               bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
308bf215546Sopenharmony_ci                                 instr->opcode == aco_opcode::v_lshrrev_b64 ||
309bf215546Sopenharmony_ci                                 instr->opcode == aco_opcode::v_ashrrev_i64;
310bf215546Sopenharmony_ci               unsigned const_bus_limit = 1;
311bf215546Sopenharmony_ci               if (program->gfx_level >= GFX10 && !is_shift64)
312bf215546Sopenharmony_ci                  const_bus_limit = 2;
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci               uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;
315bf215546Sopenharmony_ci               if (instr->isSDWA())
316bf215546Sopenharmony_ci                  scalar_mask = program->gfx_level >= GFX9 ? 0x7 : 0x4;
317bf215546Sopenharmony_ci               else if (instr->isDPP())
318bf215546Sopenharmony_ci                  scalar_mask = 0x4;
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_ci               if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||
321bf215546Sopenharmony_ci                   instr->opcode == aco_opcode::v_readlane_b32 ||
322bf215546Sopenharmony_ci                   instr->opcode == aco_opcode::v_readlane_b32_e64) {
323bf215546Sopenharmony_ci                  check(instr->definitions[0].getTemp().type() == RegType::sgpr,
324bf215546Sopenharmony_ci                        "Wrong Definition type for VALU instruction", instr.get());
325bf215546Sopenharmony_ci               } else {
326bf215546Sopenharmony_ci                  check(instr->definitions[0].getTemp().type() == RegType::vgpr,
327bf215546Sopenharmony_ci                        "Wrong Definition type for VALU instruction", instr.get());
328bf215546Sopenharmony_ci               }
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci               unsigned num_sgprs = 0;
331bf215546Sopenharmony_ci               unsigned sgpr[] = {0, 0};
332bf215546Sopenharmony_ci               for (unsigned i = 0; i < instr->operands.size(); i++) {
333bf215546Sopenharmony_ci                  Operand op = instr->operands[i];
334bf215546Sopenharmony_ci                  if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
335bf215546Sopenharmony_ci                      instr->opcode == aco_opcode::v_readlane_b32 ||
336bf215546Sopenharmony_ci                      instr->opcode == aco_opcode::v_readlane_b32_e64) {
337bf215546Sopenharmony_ci                     check(i != 1 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
338bf215546Sopenharmony_ci                              op.isConstant(),
339bf215546Sopenharmony_ci                           "Must be a SGPR or a constant", instr.get());
340bf215546Sopenharmony_ci                     check(i == 1 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
341bf215546Sopenharmony_ci                                      op.bytes() <= 4),
342bf215546Sopenharmony_ci                           "Wrong Operand type for VALU instruction", instr.get());
343bf215546Sopenharmony_ci                     continue;
344bf215546Sopenharmony_ci                  }
345bf215546Sopenharmony_ci                  if (instr->opcode == aco_opcode::v_permlane16_b32 ||
346bf215546Sopenharmony_ci                      instr->opcode == aco_opcode::v_permlanex16_b32) {
347bf215546Sopenharmony_ci                     check(i != 0 || (op.isTemp() && op.regClass().type() == RegType::vgpr),
348bf215546Sopenharmony_ci                           "Operand 0 of v_permlane must be VGPR", instr.get());
349bf215546Sopenharmony_ci                     check(i == 0 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
350bf215546Sopenharmony_ci                              op.isConstant(),
351bf215546Sopenharmony_ci                           "Lane select operands of v_permlane must be SGPR or constant",
352bf215546Sopenharmony_ci                           instr.get());
353bf215546Sopenharmony_ci                  }
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci                  if (instr->opcode == aco_opcode::v_writelane_b32 ||
356bf215546Sopenharmony_ci                      instr->opcode == aco_opcode::v_writelane_b32_e64) {
357bf215546Sopenharmony_ci                     check(i != 2 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
358bf215546Sopenharmony_ci                                      op.bytes() <= 4),
359bf215546Sopenharmony_ci                           "Wrong Operand type for VALU instruction", instr.get());
360bf215546Sopenharmony_ci                     check(i == 2 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
361bf215546Sopenharmony_ci                              op.isConstant(),
362bf215546Sopenharmony_ci                           "Must be a SGPR or a constant", instr.get());
363bf215546Sopenharmony_ci                     continue;
364bf215546Sopenharmony_ci                  }
365bf215546Sopenharmony_ci                  if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) {
366bf215546Sopenharmony_ci                     check(scalar_mask & (1 << i), "Wrong source position for SGPR argument",
367bf215546Sopenharmony_ci                           instr.get());
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci                     if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
370bf215546Sopenharmony_ci                        if (num_sgprs < 2)
371bf215546Sopenharmony_ci                           sgpr[num_sgprs++] = op.tempId();
372bf215546Sopenharmony_ci                     }
373bf215546Sopenharmony_ci                  }
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci                  if (op.isConstant() && !op.isLiteral())
376bf215546Sopenharmony_ci                     check(scalar_mask & (1 << i), "Wrong source position for constant argument",
377bf215546Sopenharmony_ci                           instr.get());
378bf215546Sopenharmony_ci               }
379bf215546Sopenharmony_ci               check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit,
380bf215546Sopenharmony_ci                     "Too many SGPRs/literals", instr.get());
381bf215546Sopenharmony_ci            }
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci            if (instr->isSOP1() || instr->isSOP2()) {
384bf215546Sopenharmony_ci               if (!instr->definitions.empty())
385bf215546Sopenharmony_ci                  check(instr->definitions[0].getTemp().type() == RegType::sgpr,
386bf215546Sopenharmony_ci                        "Wrong Definition type for SALU instruction", instr.get());
387bf215546Sopenharmony_ci               for (const Operand& op : instr->operands) {
388bf215546Sopenharmony_ci                  check(op.isConstant() || op.regClass().type() <= RegType::sgpr,
389bf215546Sopenharmony_ci                        "Wrong Operand type for SALU instruction", instr.get());
390bf215546Sopenharmony_ci               }
391bf215546Sopenharmony_ci            }
392bf215546Sopenharmony_ci         }
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci         switch (instr->format) {
395bf215546Sopenharmony_ci         case Format::PSEUDO: {
396bf215546Sopenharmony_ci            if (instr->opcode == aco_opcode::p_create_vector) {
397bf215546Sopenharmony_ci               unsigned size = 0;
398bf215546Sopenharmony_ci               for (const Operand& op : instr->operands) {
399bf215546Sopenharmony_ci                  check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
400bf215546Sopenharmony_ci                  size += op.bytes();
401bf215546Sopenharmony_ci               }
402bf215546Sopenharmony_ci               check(size == instr->definitions[0].bytes(),
403bf215546Sopenharmony_ci                     "Definition size does not match operand sizes", instr.get());
404bf215546Sopenharmony_ci               if (instr->definitions[0].getTemp().type() == RegType::sgpr) {
405bf215546Sopenharmony_ci                  for (const Operand& op : instr->operands) {
406bf215546Sopenharmony_ci                     check(op.isConstant() || op.regClass().type() == RegType::sgpr,
407bf215546Sopenharmony_ci                           "Wrong Operand type for scalar vector", instr.get());
408bf215546Sopenharmony_ci                  }
409bf215546Sopenharmony_ci               }
410bf215546Sopenharmony_ci            } else if (instr->opcode == aco_opcode::p_extract_vector) {
411bf215546Sopenharmony_ci               check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(),
412bf215546Sopenharmony_ci                     "Wrong Operand types", instr.get());
413bf215546Sopenharmony_ci               check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=
414bf215546Sopenharmony_ci                        instr->operands[0].bytes(),
415bf215546Sopenharmony_ci                     "Index out of range", instr.get());
416bf215546Sopenharmony_ci               check(instr->definitions[0].getTemp().type() == RegType::vgpr ||
417bf215546Sopenharmony_ci                        instr->operands[0].regClass().type() == RegType::sgpr,
418bf215546Sopenharmony_ci                     "Cannot extract SGPR value from VGPR vector", instr.get());
419bf215546Sopenharmony_ci               check(program->gfx_level >= GFX9 ||
420bf215546Sopenharmony_ci                        !instr->definitions[0].regClass().is_subdword() ||
421bf215546Sopenharmony_ci                        instr->operands[0].regClass().type() == RegType::vgpr,
422bf215546Sopenharmony_ci                     "Cannot extract subdword from SGPR before GFX9+", instr.get());
423bf215546Sopenharmony_ci            } else if (instr->opcode == aco_opcode::p_split_vector) {
424bf215546Sopenharmony_ci               check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get());
425bf215546Sopenharmony_ci               unsigned size = 0;
426bf215546Sopenharmony_ci               for (const Definition& def : instr->definitions) {
427bf215546Sopenharmony_ci                  size += def.bytes();
428bf215546Sopenharmony_ci               }
429bf215546Sopenharmony_ci               check(size == instr->operands[0].bytes(),
430bf215546Sopenharmony_ci                     "Operand size does not match definition sizes", instr.get());
431bf215546Sopenharmony_ci               if (instr->operands[0].getTemp().type() == RegType::vgpr) {
432bf215546Sopenharmony_ci                  for (const Definition& def : instr->definitions)
433bf215546Sopenharmony_ci                     check(def.regClass().type() == RegType::vgpr,
434bf215546Sopenharmony_ci                           "Wrong Definition type for VGPR split_vector", instr.get());
435bf215546Sopenharmony_ci               } else {
436bf215546Sopenharmony_ci                  for (const Definition& def : instr->definitions)
437bf215546Sopenharmony_ci                     check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(),
438bf215546Sopenharmony_ci                           "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
439bf215546Sopenharmony_ci               }
440bf215546Sopenharmony_ci            } else if (instr->opcode == aco_opcode::p_parallelcopy) {
441bf215546Sopenharmony_ci               check(instr->definitions.size() == instr->operands.size(),
442bf215546Sopenharmony_ci                     "Number of Operands does not match number of Definitions", instr.get());
443bf215546Sopenharmony_ci               for (unsigned i = 0; i < instr->operands.size(); i++) {
444bf215546Sopenharmony_ci                  check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
445bf215546Sopenharmony_ci                        "Operand and Definition size must match", instr.get());
446bf215546Sopenharmony_ci                  if (instr->operands[i].isTemp()) {
447bf215546Sopenharmony_ci                     check((instr->definitions[i].getTemp().type() ==
448bf215546Sopenharmony_ci                            instr->operands[i].regClass().type()) ||
449bf215546Sopenharmony_ci                              (instr->definitions[i].getTemp().type() == RegType::vgpr &&
450bf215546Sopenharmony_ci                               instr->operands[i].regClass().type() == RegType::sgpr),
451bf215546Sopenharmony_ci                           "Operand and Definition types do not match", instr.get());
452bf215546Sopenharmony_ci                     check(instr->definitions[i].regClass().is_linear_vgpr() ==
453bf215546Sopenharmony_ci                              instr->operands[i].regClass().is_linear_vgpr(),
454bf215546Sopenharmony_ci                           "Operand and Definition types do not match", instr.get());
455bf215546Sopenharmony_ci                  } else {
456bf215546Sopenharmony_ci                     check(!instr->definitions[i].regClass().is_linear_vgpr(),
457bf215546Sopenharmony_ci                           "Can only copy linear VGPRs into linear VGPRs, not constant/undef",
458bf215546Sopenharmony_ci                           instr.get());
459bf215546Sopenharmony_ci                  }
460bf215546Sopenharmony_ci               }
461bf215546Sopenharmony_ci            } else if (instr->opcode == aco_opcode::p_phi) {
462bf215546Sopenharmony_ci               check(instr->operands.size() == block.logical_preds.size(),
463bf215546Sopenharmony_ci                     "Number of Operands does not match number of predecessors", instr.get());
464bf215546Sopenharmony_ci               check(instr->definitions[0].getTemp().type() == RegType::vgpr,
465bf215546Sopenharmony_ci                     "Logical Phi Definition must be vgpr", instr.get());
466bf215546Sopenharmony_ci               for (const Operand& op : instr->operands)
467bf215546Sopenharmony_ci                  check(instr->definitions[0].size() == op.size(),
468bf215546Sopenharmony_ci                        "Operand sizes must match Definition size", instr.get());
469bf215546Sopenharmony_ci            } else if (instr->opcode == aco_opcode::p_linear_phi) {
470bf215546Sopenharmony_ci               for (const Operand& op : instr->operands) {
471bf215546Sopenharmony_ci                  check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type",
472bf215546Sopenharmony_ci                        instr.get());
473bf215546Sopenharmony_ci                  check(instr->definitions[0].size() == op.size(),
474bf215546Sopenharmony_ci                        "Operand sizes must match Definition size", instr.get());
475bf215546Sopenharmony_ci               }
476bf215546Sopenharmony_ci               check(instr->operands.size() == block.linear_preds.size(),
477bf215546Sopenharmony_ci                     "Number of Operands does not match number of predecessors", instr.get());
478bf215546Sopenharmony_ci            } else if (instr->opcode == aco_opcode::p_extract ||
479bf215546Sopenharmony_ci                       instr->opcode == aco_opcode::p_insert) {
480bf215546Sopenharmony_ci               check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get());
481bf215546Sopenharmony_ci               check(instr->operands[1].isConstant(), "Index must be constant", instr.get());
482bf215546Sopenharmony_ci               if (instr->opcode == aco_opcode::p_extract)
483bf215546Sopenharmony_ci                  check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",
484bf215546Sopenharmony_ci                        instr.get());
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci               check(instr->definitions[0].getTemp().type() != RegType::sgpr ||
487bf215546Sopenharmony_ci                        instr->operands[0].getTemp().type() == RegType::sgpr,
488bf215546Sopenharmony_ci                     "Can't extract/insert VGPR to SGPR", instr.get());
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci               if (instr->opcode == aco_opcode::p_insert)
491bf215546Sopenharmony_ci                  check(instr->operands[0].bytes() == instr->definitions[0].bytes(),
492bf215546Sopenharmony_ci                        "Sizes of p_insert data operand and definition must match", instr.get());
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci               if (instr->definitions[0].getTemp().type() == RegType::sgpr)
495bf215546Sopenharmony_ci                  check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&
496bf215546Sopenharmony_ci                           instr->definitions[1].physReg() == scc,
497bf215546Sopenharmony_ci                        "SGPR extract/insert needs an SCC definition", instr.get());
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci               unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u;
500bf215546Sopenharmony_ci               unsigned op_bits = instr->operands[2].constantValue();
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci               if (instr->opcode == aco_opcode::p_insert) {
503bf215546Sopenharmony_ci                  check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get());
504bf215546Sopenharmony_ci                  check(op_bits < data_bits, "Size must be smaller than source", instr.get());
505bf215546Sopenharmony_ci               } else if (instr->opcode == aco_opcode::p_extract) {
506bf215546Sopenharmony_ci                  check(op_bits == 8 || op_bits == 16 || op_bits == 32,
507bf215546Sopenharmony_ci                        "Size must be 8 or 16 or 32", instr.get());
508bf215546Sopenharmony_ci                  check(data_bits >= op_bits, "Can't extract more bits than what the data has.",
509bf215546Sopenharmony_ci                        instr.get());
510bf215546Sopenharmony_ci               }
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci               unsigned comp = data_bits / MAX2(op_bits, 1);
513bf215546Sopenharmony_ci               check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
514bf215546Sopenharmony_ci                     instr.get());
515bf215546Sopenharmony_ci            } else if (instr->opcode == aco_opcode::p_jump_to_epilog) {
516bf215546Sopenharmony_ci               check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions",
517bf215546Sopenharmony_ci                     instr.get());
518bf215546Sopenharmony_ci               check(instr->operands.size() > 0 &&
519bf215546Sopenharmony_ci                        instr->operands[0].getTemp().type() == RegType::sgpr &&
520bf215546Sopenharmony_ci                        instr->operands[0].getTemp().size() == 2,
521bf215546Sopenharmony_ci                     "First operand of p_jump_to_epilog must be a SGPR", instr.get());
522bf215546Sopenharmony_ci               for (unsigned i = 1; i < instr->operands.size(); i++) {
523bf215546Sopenharmony_ci                  check(instr->operands[i].getTemp().type() == RegType::vgpr ||
524bf215546Sopenharmony_ci                           instr->operands[i].isUndefined(),
525bf215546Sopenharmony_ci                        "Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get());
526bf215546Sopenharmony_ci               }
527bf215546Sopenharmony_ci            }
528bf215546Sopenharmony_ci            break;
529bf215546Sopenharmony_ci         }
530bf215546Sopenharmony_ci         case Format::PSEUDO_REDUCTION: {
531bf215546Sopenharmony_ci            for (const Operand& op : instr->operands)
532bf215546Sopenharmony_ci               check(op.regClass().type() == RegType::vgpr,
533bf215546Sopenharmony_ci                     "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.",
534bf215546Sopenharmony_ci                     instr.get());
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci            if (instr->opcode == aco_opcode::p_reduce &&
537bf215546Sopenharmony_ci                instr->reduction().cluster_size == program->wave_size)
538bf215546Sopenharmony_ci               check(instr->definitions[0].regClass().type() == RegType::sgpr ||
539bf215546Sopenharmony_ci                        program->wave_size == 32,
540bf215546Sopenharmony_ci                     "The result of unclustered reductions must go into an SGPR.", instr.get());
541bf215546Sopenharmony_ci            else
542bf215546Sopenharmony_ci               check(instr->definitions[0].regClass().type() == RegType::vgpr,
543bf215546Sopenharmony_ci                     "The result of scans and clustered reductions must go into a VGPR.",
544bf215546Sopenharmony_ci                     instr.get());
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_ci            break;
547bf215546Sopenharmony_ci         }
548bf215546Sopenharmony_ci         case Format::SMEM: {
549bf215546Sopenharmony_ci            if (instr->operands.size() >= 1)
550bf215546Sopenharmony_ci               check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) ||
551bf215546Sopenharmony_ci                        (instr->operands[0].isTemp() &&
552bf215546Sopenharmony_ci                         instr->operands[0].regClass().type() == RegType::sgpr),
553bf215546Sopenharmony_ci                     "SMEM operands must be sgpr", instr.get());
554bf215546Sopenharmony_ci            if (instr->operands.size() >= 2)
555bf215546Sopenharmony_ci               check(instr->operands[1].isConstant() ||
556bf215546Sopenharmony_ci                        (instr->operands[1].isTemp() &&
557bf215546Sopenharmony_ci                         instr->operands[1].regClass().type() == RegType::sgpr),
558bf215546Sopenharmony_ci                     "SMEM offset must be constant or sgpr", instr.get());
559bf215546Sopenharmony_ci            if (!instr->definitions.empty())
560bf215546Sopenharmony_ci               check(instr->definitions[0].getTemp().type() == RegType::sgpr,
561bf215546Sopenharmony_ci                     "SMEM result must be sgpr", instr.get());
562bf215546Sopenharmony_ci            break;
563bf215546Sopenharmony_ci         }
564bf215546Sopenharmony_ci         case Format::MTBUF:
565bf215546Sopenharmony_ci         case Format::MUBUF: {
566bf215546Sopenharmony_ci            check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",
567bf215546Sopenharmony_ci                  instr.get());
568bf215546Sopenharmony_ci            check(instr->operands[1].hasRegClass() &&
569bf215546Sopenharmony_ci                     instr->operands[1].regClass().type() == RegType::vgpr,
570bf215546Sopenharmony_ci                  "VADDR must be in vgpr for VMEM instructions", instr.get());
571bf215546Sopenharmony_ci            check(
572bf215546Sopenharmony_ci               instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr,
573bf215546Sopenharmony_ci               "VMEM resource constant must be sgpr", instr.get());
574bf215546Sopenharmony_ci            check(instr->operands.size() < 4 ||
575bf215546Sopenharmony_ci                     (instr->operands[3].isTemp() &&
576bf215546Sopenharmony_ci                      instr->operands[3].regClass().type() == RegType::vgpr),
577bf215546Sopenharmony_ci                  "VMEM write data must be vgpr", instr.get());
578bf215546Sopenharmony_ci
579bf215546Sopenharmony_ci            const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
580bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_ubyte ||
581bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_sbyte ||
582bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_ushort ||
583bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_sshort ||
584bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
585bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
586bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
587bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
588bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_short_d16 ||
589bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
590bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_format_d16_x ||
591bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
592bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
593bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
594bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
595bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
596bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
597bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
598bf215546Sopenharmony_ci                             instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
599bf215546Sopenharmony_ci            if (instr->definitions.size()) {
600bf215546Sopenharmony_ci               check(instr->definitions[0].isTemp() &&
601bf215546Sopenharmony_ci                        instr->definitions[0].regClass().type() == RegType::vgpr,
602bf215546Sopenharmony_ci                     "VMEM definitions[0] (VDATA) must be VGPR", instr.get());
603bf215546Sopenharmony_ci               check(d16 || !instr->definitions[0].regClass().is_subdword(),
604bf215546Sopenharmony_ci                     "Only D16 opcodes can load subdword values.", instr.get());
605bf215546Sopenharmony_ci               check(instr->definitions[0].bytes() <= 8 || !d16,
606bf215546Sopenharmony_ci                     "D16 opcodes can only load up to 8 bytes.", instr.get());
607bf215546Sopenharmony_ci            }
608bf215546Sopenharmony_ci            break;
609bf215546Sopenharmony_ci         }
610bf215546Sopenharmony_ci         case Format::MIMG: {
611bf215546Sopenharmony_ci            check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",
612bf215546Sopenharmony_ci                  instr.get());
613bf215546Sopenharmony_ci            check(instr->operands[0].hasRegClass() &&
614bf215546Sopenharmony_ci                     (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
615bf215546Sopenharmony_ci                  "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
616bf215546Sopenharmony_ci            if (instr->operands[1].hasRegClass())
617bf215546Sopenharmony_ci               check(instr->operands[1].regClass() == s4,
618bf215546Sopenharmony_ci                     "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
619bf215546Sopenharmony_ci            if (!instr->operands[2].isUndefined()) {
620bf215546Sopenharmony_ci               bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||
621bf215546Sopenharmony_ci                                 instr->opcode == aco_opcode::image_atomic_fcmpswap;
622bf215546Sopenharmony_ci               check(instr->definitions.empty() ||
623bf215546Sopenharmony_ci                        (instr->definitions[0].regClass() == instr->operands[2].regClass() ||
624bf215546Sopenharmony_ci                         is_cmpswap),
625bf215546Sopenharmony_ci                     "MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and "
626bf215546Sopenharmony_ci                     "TFE/LWE loads",
627bf215546Sopenharmony_ci                     instr.get());
628bf215546Sopenharmony_ci            }
629bf215546Sopenharmony_ci            check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
630bf215546Sopenharmony_ci                  "NSA is only supported on GFX10+", instr.get());
631bf215546Sopenharmony_ci            for (unsigned i = 3; i < instr->operands.size(); i++) {
632bf215546Sopenharmony_ci               if (instr->operands.size() == 4) {
633bf215546Sopenharmony_ci                  check(instr->operands[i].hasRegClass() &&
634bf215546Sopenharmony_ci                           instr->operands[i].regClass().type() == RegType::vgpr,
635bf215546Sopenharmony_ci                        "MIMG operands[3] (VADDR) must be VGPR", instr.get());
636bf215546Sopenharmony_ci               } else {
637bf215546Sopenharmony_ci                  check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used",
638bf215546Sopenharmony_ci                        instr.get());
639bf215546Sopenharmony_ci               }
640bf215546Sopenharmony_ci            }
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci            if (instr->definitions.size()) {
643bf215546Sopenharmony_ci               check(instr->definitions[0].isTemp() &&
644bf215546Sopenharmony_ci                        instr->definitions[0].regClass().type() == RegType::vgpr,
645bf215546Sopenharmony_ci                     "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
646bf215546Sopenharmony_ci               check(instr->mimg().d16 || !instr->definitions[0].regClass().is_subdword(),
647bf215546Sopenharmony_ci                     "Only D16 MIMG instructions can load subdword values.", instr.get());
648bf215546Sopenharmony_ci               check(instr->definitions[0].bytes() <= 8 || !instr->mimg().d16,
649bf215546Sopenharmony_ci                     "D16 MIMG instructions can only load up to 8 bytes.", instr.get());
650bf215546Sopenharmony_ci            }
651bf215546Sopenharmony_ci            break;
652bf215546Sopenharmony_ci         }
653bf215546Sopenharmony_ci         case Format::DS: {
654bf215546Sopenharmony_ci            for (const Operand& op : instr->operands) {
655bf215546Sopenharmony_ci               check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0,
656bf215546Sopenharmony_ci                     "Only VGPRs are valid DS instruction operands", instr.get());
657bf215546Sopenharmony_ci            }
658bf215546Sopenharmony_ci            if (!instr->definitions.empty())
659bf215546Sopenharmony_ci               check(instr->definitions[0].getTemp().type() == RegType::vgpr,
660bf215546Sopenharmony_ci                     "DS instruction must return VGPR", instr.get());
661bf215546Sopenharmony_ci            break;
662bf215546Sopenharmony_ci         }
663bf215546Sopenharmony_ci         case Format::EXP: {
664bf215546Sopenharmony_ci            for (unsigned i = 0; i < 4; i++)
665bf215546Sopenharmony_ci               check(instr->operands[i].hasRegClass() &&
666bf215546Sopenharmony_ci                        instr->operands[i].regClass().type() == RegType::vgpr,
667bf215546Sopenharmony_ci                     "Only VGPRs are valid Export arguments", instr.get());
668bf215546Sopenharmony_ci            break;
669bf215546Sopenharmony_ci         }
670bf215546Sopenharmony_ci         case Format::FLAT:
671bf215546Sopenharmony_ci            check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",
672bf215546Sopenharmony_ci                  instr.get());
673bf215546Sopenharmony_ci            FALLTHROUGH;
674bf215546Sopenharmony_ci         case Format::GLOBAL:
675bf215546Sopenharmony_ci            check(
676bf215546Sopenharmony_ci               instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr,
677bf215546Sopenharmony_ci               "FLAT/GLOBAL address must be vgpr", instr.get());
678bf215546Sopenharmony_ci            FALLTHROUGH;
679bf215546Sopenharmony_ci         case Format::SCRATCH: {
680bf215546Sopenharmony_ci            check(instr->operands[0].hasRegClass() &&
681bf215546Sopenharmony_ci                     instr->operands[0].regClass().type() == RegType::vgpr,
682bf215546Sopenharmony_ci                  "FLAT/GLOBAL/SCRATCH address must be undefined or vgpr", instr.get());
683bf215546Sopenharmony_ci            check(instr->operands[1].hasRegClass() &&
684bf215546Sopenharmony_ci                     instr->operands[1].regClass().type() == RegType::sgpr,
685bf215546Sopenharmony_ci                  "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
686bf215546Sopenharmony_ci            if (instr->format == Format::SCRATCH && program->gfx_level < GFX10_3)
687bf215546Sopenharmony_ci               check(instr->operands[0].isTemp() || instr->operands[1].isTemp(),
688bf215546Sopenharmony_ci                     "SCRATCH must have either SADDR or ADDR operand", instr.get());
689bf215546Sopenharmony_ci            if (!instr->definitions.empty())
690bf215546Sopenharmony_ci               check(instr->definitions[0].getTemp().type() == RegType::vgpr,
691bf215546Sopenharmony_ci                     "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
692bf215546Sopenharmony_ci            else
693bf215546Sopenharmony_ci               check(instr->operands[2].regClass().type() == RegType::vgpr,
694bf215546Sopenharmony_ci                     "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
695bf215546Sopenharmony_ci            break;
696bf215546Sopenharmony_ci         }
697bf215546Sopenharmony_ci         default: break;
698bf215546Sopenharmony_ci         }
699bf215546Sopenharmony_ci      }
700bf215546Sopenharmony_ci   }
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_ci   /* validate CFG */
703bf215546Sopenharmony_ci   for (unsigned i = 0; i < program->blocks.size(); i++) {
704bf215546Sopenharmony_ci      Block& block = program->blocks[i];
705bf215546Sopenharmony_ci      check_block(block.index == i, "block.index must match actual index", &block);
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci      /* predecessors/successors should be sorted */
708bf215546Sopenharmony_ci      for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++)
709bf215546Sopenharmony_ci         check_block(block.linear_preds[j] < block.linear_preds[j + 1],
710bf215546Sopenharmony_ci                     "linear predecessors must be sorted", &block);
711bf215546Sopenharmony_ci      for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++)
712bf215546Sopenharmony_ci         check_block(block.logical_preds[j] < block.logical_preds[j + 1],
713bf215546Sopenharmony_ci                     "logical predecessors must be sorted", &block);
714bf215546Sopenharmony_ci      for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++)
715bf215546Sopenharmony_ci         check_block(block.linear_succs[j] < block.linear_succs[j + 1],
716bf215546Sopenharmony_ci                     "linear successors must be sorted", &block);
717bf215546Sopenharmony_ci      for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++)
718bf215546Sopenharmony_ci         check_block(block.logical_succs[j] < block.logical_succs[j + 1],
719bf215546Sopenharmony_ci                     "logical successors must be sorted", &block);
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_ci      /* critical edges are not allowed */
722bf215546Sopenharmony_ci      if (block.linear_preds.size() > 1) {
723bf215546Sopenharmony_ci         for (unsigned pred : block.linear_preds)
724bf215546Sopenharmony_ci            check_block(program->blocks[pred].linear_succs.size() == 1,
725bf215546Sopenharmony_ci                        "linear critical edges are not allowed", &program->blocks[pred]);
726bf215546Sopenharmony_ci         for (unsigned pred : block.logical_preds)
727bf215546Sopenharmony_ci            check_block(program->blocks[pred].logical_succs.size() == 1,
728bf215546Sopenharmony_ci                        "logical critical edges are not allowed", &program->blocks[pred]);
729bf215546Sopenharmony_ci      }
730bf215546Sopenharmony_ci   }
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci   return is_valid;
733bf215546Sopenharmony_ci}
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_ci/* RA validation */
736bf215546Sopenharmony_cinamespace {
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_cistruct Location {
739bf215546Sopenharmony_ci   Location() : block(NULL), instr(NULL) {}
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci   Block* block;
742bf215546Sopenharmony_ci   Instruction* instr; // NULL if it's the block's live-in
743bf215546Sopenharmony_ci};
744bf215546Sopenharmony_ci
745bf215546Sopenharmony_cistruct Assignment {
746bf215546Sopenharmony_ci   Location defloc;
747bf215546Sopenharmony_ci   Location firstloc;
748bf215546Sopenharmony_ci   PhysReg reg;
749bf215546Sopenharmony_ci   bool valid;
750bf215546Sopenharmony_ci};
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_cibool
753bf215546Sopenharmony_cira_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)
754bf215546Sopenharmony_ci{
755bf215546Sopenharmony_ci   va_list args;
756bf215546Sopenharmony_ci   va_start(args, fmt);
757bf215546Sopenharmony_ci   char msg[1024];
758bf215546Sopenharmony_ci   vsprintf(msg, fmt, args);
759bf215546Sopenharmony_ci   va_end(args);
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci   char* out;
762bf215546Sopenharmony_ci   size_t outsize;
763bf215546Sopenharmony_ci   struct u_memstream mem;
764bf215546Sopenharmony_ci   u_memstream_open(&mem, &out, &outsize);
765bf215546Sopenharmony_ci   FILE* const memf = u_memstream_get(&mem);
766bf215546Sopenharmony_ci
767bf215546Sopenharmony_ci   fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index);
768bf215546Sopenharmony_ci   if (loc.instr) {
769bf215546Sopenharmony_ci      aco_print_instr(loc.instr, memf);
770bf215546Sopenharmony_ci      fprintf(memf, "\n%s", msg);
771bf215546Sopenharmony_ci   } else {
772bf215546Sopenharmony_ci      fprintf(memf, "%s", msg);
773bf215546Sopenharmony_ci   }
774bf215546Sopenharmony_ci   if (loc2.block) {
775bf215546Sopenharmony_ci      fprintf(memf, " in BB%d:\n", loc2.block->index);
776bf215546Sopenharmony_ci      aco_print_instr(loc2.instr, memf);
777bf215546Sopenharmony_ci   }
778bf215546Sopenharmony_ci   fprintf(memf, "\n\n");
779bf215546Sopenharmony_ci   u_memstream_close(&mem);
780bf215546Sopenharmony_ci
781bf215546Sopenharmony_ci   aco_err(program, "%s", out);
782bf215546Sopenharmony_ci   free(out);
783bf215546Sopenharmony_ci
784bf215546Sopenharmony_ci   return true;
785bf215546Sopenharmony_ci}
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_cibool
788bf215546Sopenharmony_civalidate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
789bf215546Sopenharmony_ci                          unsigned index)
790bf215546Sopenharmony_ci{
791bf215546Sopenharmony_ci   Operand op = instr->operands[index];
792bf215546Sopenharmony_ci   unsigned byte = op.physReg().byte();
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci   if (instr->opcode == aco_opcode::p_as_uniform)
795bf215546Sopenharmony_ci      return byte == 0;
796bf215546Sopenharmony_ci   if (instr->isPseudo() && gfx_level >= GFX8)
797bf215546Sopenharmony_ci      return true;
798bf215546Sopenharmony_ci   if (instr->isSDWA())
799bf215546Sopenharmony_ci      return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
800bf215546Sopenharmony_ci             byte % instr->sdwa().sel[index].size() == 0;
801bf215546Sopenharmony_ci   if (instr->isVOP3P()) {
802bf215546Sopenharmony_ci      bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
803bf215546Sopenharmony_ci                     instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
804bf215546Sopenharmony_ci                     instr->opcode == aco_opcode::v_fma_mix_f32;
805bf215546Sopenharmony_ci      return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
806bf215546Sopenharmony_ci             ((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
807bf215546Sopenharmony_ci   }
808bf215546Sopenharmony_ci   if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
809bf215546Sopenharmony_ci      return true;
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci   switch (instr->opcode) {
812bf215546Sopenharmony_ci   case aco_opcode::v_cvt_f32_ubyte1:
813bf215546Sopenharmony_ci      if (byte == 1)
814bf215546Sopenharmony_ci         return true;
815bf215546Sopenharmony_ci      break;
816bf215546Sopenharmony_ci   case aco_opcode::v_cvt_f32_ubyte2:
817bf215546Sopenharmony_ci      if (byte == 2)
818bf215546Sopenharmony_ci         return true;
819bf215546Sopenharmony_ci      break;
820bf215546Sopenharmony_ci   case aco_opcode::v_cvt_f32_ubyte3:
821bf215546Sopenharmony_ci      if (byte == 3)
822bf215546Sopenharmony_ci         return true;
823bf215546Sopenharmony_ci      break;
824bf215546Sopenharmony_ci   case aco_opcode::ds_write_b8_d16_hi:
825bf215546Sopenharmony_ci   case aco_opcode::ds_write_b16_d16_hi:
826bf215546Sopenharmony_ci      if (byte == 2 && index == 1)
827bf215546Sopenharmony_ci         return true;
828bf215546Sopenharmony_ci      break;
829bf215546Sopenharmony_ci   case aco_opcode::buffer_store_byte_d16_hi:
830bf215546Sopenharmony_ci   case aco_opcode::buffer_store_short_d16_hi:
831bf215546Sopenharmony_ci   case aco_opcode::buffer_store_format_d16_hi_x:
832bf215546Sopenharmony_ci      if (byte == 2 && index == 3)
833bf215546Sopenharmony_ci         return true;
834bf215546Sopenharmony_ci      break;
835bf215546Sopenharmony_ci   case aco_opcode::flat_store_byte_d16_hi:
836bf215546Sopenharmony_ci   case aco_opcode::flat_store_short_d16_hi:
837bf215546Sopenharmony_ci   case aco_opcode::scratch_store_byte_d16_hi:
838bf215546Sopenharmony_ci   case aco_opcode::scratch_store_short_d16_hi:
839bf215546Sopenharmony_ci   case aco_opcode::global_store_byte_d16_hi:
840bf215546Sopenharmony_ci   case aco_opcode::global_store_short_d16_hi:
841bf215546Sopenharmony_ci      if (byte == 2 && index == 2)
842bf215546Sopenharmony_ci         return true;
843bf215546Sopenharmony_ci      break;
844bf215546Sopenharmony_ci   default: break;
845bf215546Sopenharmony_ci   }
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci   return byte == 0;
848bf215546Sopenharmony_ci}
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_cibool
851bf215546Sopenharmony_civalidate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr)
852bf215546Sopenharmony_ci{
853bf215546Sopenharmony_ci   Definition def = instr->definitions[0];
854bf215546Sopenharmony_ci   unsigned byte = def.physReg().byte();
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci   if (instr->isPseudo() && gfx_level >= GFX8)
857bf215546Sopenharmony_ci      return true;
858bf215546Sopenharmony_ci   if (instr->isSDWA())
859bf215546Sopenharmony_ci      return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
860bf215546Sopenharmony_ci             byte % instr->sdwa().dst_sel.size() == 0;
861bf215546Sopenharmony_ci   if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1))
862bf215546Sopenharmony_ci      return true;
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci   switch (instr->opcode) {
865bf215546Sopenharmony_ci   case aco_opcode::v_fma_mixhi_f16:
866bf215546Sopenharmony_ci   case aco_opcode::buffer_load_ubyte_d16_hi:
867bf215546Sopenharmony_ci   case aco_opcode::buffer_load_sbyte_d16_hi:
868bf215546Sopenharmony_ci   case aco_opcode::buffer_load_short_d16_hi:
869bf215546Sopenharmony_ci   case aco_opcode::buffer_load_format_d16_hi_x:
870bf215546Sopenharmony_ci   case aco_opcode::flat_load_ubyte_d16_hi:
871bf215546Sopenharmony_ci   case aco_opcode::flat_load_short_d16_hi:
872bf215546Sopenharmony_ci   case aco_opcode::scratch_load_ubyte_d16_hi:
873bf215546Sopenharmony_ci   case aco_opcode::scratch_load_short_d16_hi:
874bf215546Sopenharmony_ci   case aco_opcode::global_load_ubyte_d16_hi:
875bf215546Sopenharmony_ci   case aco_opcode::global_load_short_d16_hi:
876bf215546Sopenharmony_ci   case aco_opcode::ds_read_u8_d16_hi:
877bf215546Sopenharmony_ci   case aco_opcode::ds_read_u16_d16_hi: return byte == 2;
878bf215546Sopenharmony_ci   default: break;
879bf215546Sopenharmony_ci   }
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_ci   return byte == 0;
882bf215546Sopenharmony_ci}
883bf215546Sopenharmony_ci
884bf215546Sopenharmony_ciunsigned
885bf215546Sopenharmony_ciget_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
886bf215546Sopenharmony_ci{
887bf215546Sopenharmony_ci   amd_gfx_level gfx_level = program->gfx_level;
888bf215546Sopenharmony_ci   Definition def = instr->definitions[index];
889bf215546Sopenharmony_ci
890bf215546Sopenharmony_ci   if (instr->isPseudo())
891bf215546Sopenharmony_ci      return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u;
892bf215546Sopenharmony_ci   if (instr->isVALU()) {
893bf215546Sopenharmony_ci      assert(def.bytes() <= 2);
894bf215546Sopenharmony_ci      if (instr->isSDWA())
895bf215546Sopenharmony_ci         return instr->sdwa().dst_sel.size();
896bf215546Sopenharmony_ci
897bf215546Sopenharmony_ci      if (instr_is_16bit(gfx_level, instr->opcode))
898bf215546Sopenharmony_ci         return 2;
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_ci      return 4;
901bf215546Sopenharmony_ci   }
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci   if (instr->isMIMG()) {
904bf215546Sopenharmony_ci      assert(instr->mimg().d16);
905bf215546Sopenharmony_ci      return program->dev.sram_ecc_enabled ? def.size() * 4u : def.bytes();
906bf215546Sopenharmony_ci   }
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_ci   switch (instr->opcode) {
909bf215546Sopenharmony_ci   case aco_opcode::buffer_load_ubyte_d16:
910bf215546Sopenharmony_ci   case aco_opcode::buffer_load_sbyte_d16:
911bf215546Sopenharmony_ci   case aco_opcode::buffer_load_short_d16:
912bf215546Sopenharmony_ci   case aco_opcode::buffer_load_format_d16_x:
913bf215546Sopenharmony_ci   case aco_opcode::tbuffer_load_format_d16_x:
914bf215546Sopenharmony_ci   case aco_opcode::flat_load_ubyte_d16:
915bf215546Sopenharmony_ci   case aco_opcode::flat_load_short_d16:
916bf215546Sopenharmony_ci   case aco_opcode::scratch_load_ubyte_d16:
917bf215546Sopenharmony_ci   case aco_opcode::scratch_load_short_d16:
918bf215546Sopenharmony_ci   case aco_opcode::global_load_ubyte_d16:
919bf215546Sopenharmony_ci   case aco_opcode::global_load_short_d16:
920bf215546Sopenharmony_ci   case aco_opcode::ds_read_u8_d16:
921bf215546Sopenharmony_ci   case aco_opcode::ds_read_u16_d16:
922bf215546Sopenharmony_ci   case aco_opcode::buffer_load_ubyte_d16_hi:
923bf215546Sopenharmony_ci   case aco_opcode::buffer_load_sbyte_d16_hi:
924bf215546Sopenharmony_ci   case aco_opcode::buffer_load_short_d16_hi:
925bf215546Sopenharmony_ci   case aco_opcode::buffer_load_format_d16_hi_x:
926bf215546Sopenharmony_ci   case aco_opcode::flat_load_ubyte_d16_hi:
927bf215546Sopenharmony_ci   case aco_opcode::flat_load_short_d16_hi:
928bf215546Sopenharmony_ci   case aco_opcode::scratch_load_ubyte_d16_hi:
929bf215546Sopenharmony_ci   case aco_opcode::scratch_load_short_d16_hi:
930bf215546Sopenharmony_ci   case aco_opcode::global_load_ubyte_d16_hi:
931bf215546Sopenharmony_ci   case aco_opcode::global_load_short_d16_hi:
932bf215546Sopenharmony_ci   case aco_opcode::ds_read_u8_d16_hi:
933bf215546Sopenharmony_ci   case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2;
934bf215546Sopenharmony_ci   case aco_opcode::buffer_load_format_d16_xyz:
935bf215546Sopenharmony_ci   case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6;
936bf215546Sopenharmony_ci   default: return def.size() * 4;
937bf215546Sopenharmony_ci   }
938bf215546Sopenharmony_ci}
939bf215546Sopenharmony_ci
940bf215546Sopenharmony_cibool
941bf215546Sopenharmony_civalidate_instr_defs(Program* program, std::array<unsigned, 2048>& regs,
942bf215546Sopenharmony_ci                    const std::vector<Assignment>& assignments, const Location& loc,
943bf215546Sopenharmony_ci                    aco_ptr<Instruction>& instr)
944bf215546Sopenharmony_ci{
945bf215546Sopenharmony_ci   bool err = false;
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci   for (unsigned i = 0; i < instr->definitions.size(); i++) {
948bf215546Sopenharmony_ci      Definition& def = instr->definitions[i];
949bf215546Sopenharmony_ci      if (!def.isTemp())
950bf215546Sopenharmony_ci         continue;
951bf215546Sopenharmony_ci      Temp tmp = def.getTemp();
952bf215546Sopenharmony_ci      PhysReg reg = assignments[tmp.id()].reg;
953bf215546Sopenharmony_ci      for (unsigned j = 0; j < tmp.bytes(); j++) {
954bf215546Sopenharmony_ci         if (regs[reg.reg_b + j])
955bf215546Sopenharmony_ci            err |=
956bf215546Sopenharmony_ci               ra_fail(program, loc, assignments[regs[reg.reg_b + j]].defloc,
957bf215546Sopenharmony_ci                       "Assignment of element %d of %%%d already taken by %%%d from instruction", i,
958bf215546Sopenharmony_ci                       tmp.id(), regs[reg.reg_b + j]);
959bf215546Sopenharmony_ci         regs[reg.reg_b + j] = tmp.id();
960bf215546Sopenharmony_ci      }
961bf215546Sopenharmony_ci      if (def.regClass().is_subdword() && def.bytes() < 4) {
962bf215546Sopenharmony_ci         unsigned written = get_subdword_bytes_written(program, instr, i);
963bf215546Sopenharmony_ci         /* If written=4, the instruction still might write the upper half. In that case, it's
964bf215546Sopenharmony_ci          * the lower half that isn't preserved */
965bf215546Sopenharmony_ci         for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) {
966bf215546Sopenharmony_ci            unsigned written_reg = reg.reg() * 4u + j;
967bf215546Sopenharmony_ci            if (regs[written_reg] && regs[written_reg] != def.tempId())
968bf215546Sopenharmony_ci               err |= ra_fail(program, loc, assignments[regs[written_reg]].defloc,
969bf215546Sopenharmony_ci                              "Assignment of element %d of %%%d overwrites the full register "
970bf215546Sopenharmony_ci                              "taken by %%%d from instruction",
971bf215546Sopenharmony_ci                              i, tmp.id(), regs[written_reg]);
972bf215546Sopenharmony_ci         }
973bf215546Sopenharmony_ci      }
974bf215546Sopenharmony_ci   }
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci   for (const Definition& def : instr->definitions) {
977bf215546Sopenharmony_ci      if (!def.isTemp())
978bf215546Sopenharmony_ci         continue;
979bf215546Sopenharmony_ci      if (def.isKill()) {
980bf215546Sopenharmony_ci         for (unsigned j = 0; j < def.getTemp().bytes(); j++)
981bf215546Sopenharmony_ci            regs[def.physReg().reg_b + j] = 0;
982bf215546Sopenharmony_ci      }
983bf215546Sopenharmony_ci   }
984bf215546Sopenharmony_ci
985bf215546Sopenharmony_ci   return err;
986bf215546Sopenharmony_ci}
987bf215546Sopenharmony_ci
988bf215546Sopenharmony_ci} /* end namespace */
989bf215546Sopenharmony_ci
990bf215546Sopenharmony_cibool
991bf215546Sopenharmony_civalidate_ra(Program* program)
992bf215546Sopenharmony_ci{
993bf215546Sopenharmony_ci   if (!(debug_flags & DEBUG_VALIDATE_RA))
994bf215546Sopenharmony_ci      return false;
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci   bool err = false;
997bf215546Sopenharmony_ci   aco::live live_vars = aco::live_var_analysis(program);
998bf215546Sopenharmony_ci   std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
999bf215546Sopenharmony_ci   uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);
1000bf215546Sopenharmony_ci
1001bf215546Sopenharmony_ci   std::vector<Assignment> assignments(program->peekAllocationId());
1002bf215546Sopenharmony_ci   for (Block& block : program->blocks) {
1003bf215546Sopenharmony_ci      Location loc;
1004bf215546Sopenharmony_ci      loc.block = &block;
1005bf215546Sopenharmony_ci      for (aco_ptr<Instruction>& instr : block.instructions) {
1006bf215546Sopenharmony_ci         if (instr->opcode == aco_opcode::p_phi) {
1007bf215546Sopenharmony_ci            for (unsigned i = 0; i < instr->operands.size(); i++) {
1008bf215546Sopenharmony_ci               if (instr->operands[i].isTemp() &&
1009bf215546Sopenharmony_ci                   instr->operands[i].getTemp().type() == RegType::sgpr &&
1010bf215546Sopenharmony_ci                   instr->operands[i].isFirstKill())
1011bf215546Sopenharmony_ci                  phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
1012bf215546Sopenharmony_ci            }
1013bf215546Sopenharmony_ci         }
1014bf215546Sopenharmony_ci
1015bf215546Sopenharmony_ci         loc.instr = instr.get();
1016bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->operands.size(); i++) {
1017bf215546Sopenharmony_ci            Operand& op = instr->operands[i];
1018bf215546Sopenharmony_ci            if (!op.isTemp())
1019bf215546Sopenharmony_ci               continue;
1020bf215546Sopenharmony_ci            if (!op.isFixed())
1021bf215546Sopenharmony_ci               err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i);
1022bf215546Sopenharmony_ci            if (assignments[op.tempId()].valid && assignments[op.tempId()].reg != op.physReg())
1023bf215546Sopenharmony_ci               err |=
1024bf215546Sopenharmony_ci                  ra_fail(program, loc, assignments[op.tempId()].firstloc,
1025bf215546Sopenharmony_ci                          "Operand %d has an inconsistent register assignment with instruction", i);
1026bf215546Sopenharmony_ci            if ((op.getTemp().type() == RegType::vgpr &&
1027bf215546Sopenharmony_ci                 op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||
1028bf215546Sopenharmony_ci                (op.getTemp().type() == RegType::sgpr &&
1029bf215546Sopenharmony_ci                 op.physReg() + op.size() > program->config->num_sgprs &&
1030bf215546Sopenharmony_ci                 op.physReg() < sgpr_limit))
1031bf215546Sopenharmony_ci               err |= ra_fail(program, loc, assignments[op.tempId()].firstloc,
1032bf215546Sopenharmony_ci                              "Operand %d has an out-of-bounds register assignment", i);
1033bf215546Sopenharmony_ci            if (op.physReg() == vcc && !program->needs_vcc)
1034bf215546Sopenharmony_ci               err |= ra_fail(program, loc, Location(),
1035bf215546Sopenharmony_ci                              "Operand %d fixed to vcc but needs_vcc=false", i);
1036bf215546Sopenharmony_ci            if (op.regClass().is_subdword() &&
1037bf215546Sopenharmony_ci                !validate_subdword_operand(program->gfx_level, instr, i))
1038bf215546Sopenharmony_ci               err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i);
1039bf215546Sopenharmony_ci            if (!assignments[op.tempId()].firstloc.block)
1040bf215546Sopenharmony_ci               assignments[op.tempId()].firstloc = loc;
1041bf215546Sopenharmony_ci            if (!assignments[op.tempId()].defloc.block) {
1042bf215546Sopenharmony_ci               assignments[op.tempId()].reg = op.physReg();
1043bf215546Sopenharmony_ci               assignments[op.tempId()].valid = true;
1044bf215546Sopenharmony_ci            }
1045bf215546Sopenharmony_ci         }
1046bf215546Sopenharmony_ci
1047bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->definitions.size(); i++) {
1048bf215546Sopenharmony_ci            Definition& def = instr->definitions[i];
1049bf215546Sopenharmony_ci            if (!def.isTemp())
1050bf215546Sopenharmony_ci               continue;
1051bf215546Sopenharmony_ci            if (!def.isFixed())
1052bf215546Sopenharmony_ci               err |=
1053bf215546Sopenharmony_ci                  ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i);
1054bf215546Sopenharmony_ci            if (assignments[def.tempId()].defloc.block)
1055bf215546Sopenharmony_ci               err |= ra_fail(program, loc, assignments[def.tempId()].defloc,
1056bf215546Sopenharmony_ci                              "Temporary %%%d also defined by instruction", def.tempId());
1057bf215546Sopenharmony_ci            if ((def.getTemp().type() == RegType::vgpr &&
1058bf215546Sopenharmony_ci                 def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||
1059bf215546Sopenharmony_ci                (def.getTemp().type() == RegType::sgpr &&
1060bf215546Sopenharmony_ci                 def.physReg() + def.size() > program->config->num_sgprs &&
1061bf215546Sopenharmony_ci                 def.physReg() < sgpr_limit))
1062bf215546Sopenharmony_ci               err |= ra_fail(program, loc, assignments[def.tempId()].firstloc,
1063bf215546Sopenharmony_ci                              "Definition %d has an out-of-bounds register assignment", i);
1064bf215546Sopenharmony_ci            if (def.physReg() == vcc && !program->needs_vcc)
1065bf215546Sopenharmony_ci               err |= ra_fail(program, loc, Location(),
1066bf215546Sopenharmony_ci                              "Definition %d fixed to vcc but needs_vcc=false", i);
1067bf215546Sopenharmony_ci            if (def.regClass().is_subdword() &&
1068bf215546Sopenharmony_ci                !validate_subdword_definition(program->gfx_level, instr))
1069bf215546Sopenharmony_ci               err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);
1070bf215546Sopenharmony_ci            if (!assignments[def.tempId()].firstloc.block)
1071bf215546Sopenharmony_ci               assignments[def.tempId()].firstloc = loc;
1072bf215546Sopenharmony_ci            assignments[def.tempId()].defloc = loc;
1073bf215546Sopenharmony_ci            assignments[def.tempId()].reg = def.physReg();
1074bf215546Sopenharmony_ci            assignments[def.tempId()].valid = true;
1075bf215546Sopenharmony_ci         }
1076bf215546Sopenharmony_ci      }
1077bf215546Sopenharmony_ci   }
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_ci   for (Block& block : program->blocks) {
1080bf215546Sopenharmony_ci      Location loc;
1081bf215546Sopenharmony_ci      loc.block = &block;
1082bf215546Sopenharmony_ci
1083bf215546Sopenharmony_ci      std::array<unsigned, 2048> regs; /* register file in bytes */
1084bf215546Sopenharmony_ci      regs.fill(0);
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci      IDSet live = live_vars.live_out[block.index];
1087bf215546Sopenharmony_ci      /* remove killed p_phi sgpr operands */
1088bf215546Sopenharmony_ci      for (Temp tmp : phi_sgpr_ops[block.index])
1089bf215546Sopenharmony_ci         live.erase(tmp.id());
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci      /* check live out */
1092bf215546Sopenharmony_ci      for (unsigned id : live) {
1093bf215546Sopenharmony_ci         Temp tmp(id, program->temp_rc[id]);
1094bf215546Sopenharmony_ci         PhysReg reg = assignments[id].reg;
1095bf215546Sopenharmony_ci         for (unsigned i = 0; i < tmp.bytes(); i++) {
1096bf215546Sopenharmony_ci            if (regs[reg.reg_b + i]) {
1097bf215546Sopenharmony_ci               err |= ra_fail(program, loc, Location(),
1098bf215546Sopenharmony_ci                              "Assignment of element %d of %%%d already taken by %%%d in live-out",
1099bf215546Sopenharmony_ci                              i, id, regs[reg.reg_b + i]);
1100bf215546Sopenharmony_ci            }
1101bf215546Sopenharmony_ci            regs[reg.reg_b + i] = id;
1102bf215546Sopenharmony_ci         }
1103bf215546Sopenharmony_ci      }
1104bf215546Sopenharmony_ci      regs.fill(0);
1105bf215546Sopenharmony_ci
1106bf215546Sopenharmony_ci      for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) {
1107bf215546Sopenharmony_ci         aco_ptr<Instruction>& instr = *it;
1108bf215546Sopenharmony_ci
1109bf215546Sopenharmony_ci         /* check killed p_phi sgpr operands */
1110bf215546Sopenharmony_ci         if (instr->opcode == aco_opcode::p_logical_end) {
1111bf215546Sopenharmony_ci            for (Temp tmp : phi_sgpr_ops[block.index]) {
1112bf215546Sopenharmony_ci               PhysReg reg = assignments[tmp.id()].reg;
1113bf215546Sopenharmony_ci               for (unsigned i = 0; i < tmp.bytes(); i++) {
1114bf215546Sopenharmony_ci                  if (regs[reg.reg_b + i])
1115bf215546Sopenharmony_ci                     err |= ra_fail(
1116bf215546Sopenharmony_ci                        program, loc, Location(),
1117bf215546Sopenharmony_ci                        "Assignment of element %d of %%%d already taken by %%%d in live-out", i,
1118bf215546Sopenharmony_ci                        tmp.id(), regs[reg.reg_b + i]);
1119bf215546Sopenharmony_ci               }
1120bf215546Sopenharmony_ci               live.insert(tmp.id());
1121bf215546Sopenharmony_ci            }
1122bf215546Sopenharmony_ci         }
1123bf215546Sopenharmony_ci
1124bf215546Sopenharmony_ci         for (const Definition& def : instr->definitions) {
1125bf215546Sopenharmony_ci            if (!def.isTemp())
1126bf215546Sopenharmony_ci               continue;
1127bf215546Sopenharmony_ci            live.erase(def.tempId());
1128bf215546Sopenharmony_ci         }
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_ci         /* don't count phi operands as live-in, since they are actually
1131bf215546Sopenharmony_ci          * killed when they are copied at the predecessor */
1132bf215546Sopenharmony_ci         if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1133bf215546Sopenharmony_ci            for (const Operand& op : instr->operands) {
1134bf215546Sopenharmony_ci               if (!op.isTemp())
1135bf215546Sopenharmony_ci                  continue;
1136bf215546Sopenharmony_ci               live.insert(op.tempId());
1137bf215546Sopenharmony_ci            }
1138bf215546Sopenharmony_ci         }
1139bf215546Sopenharmony_ci      }
1140bf215546Sopenharmony_ci
1141bf215546Sopenharmony_ci      for (unsigned id : live) {
1142bf215546Sopenharmony_ci         Temp tmp(id, program->temp_rc[id]);
1143bf215546Sopenharmony_ci         PhysReg reg = assignments[id].reg;
1144bf215546Sopenharmony_ci         for (unsigned i = 0; i < tmp.bytes(); i++)
1145bf215546Sopenharmony_ci            regs[reg.reg_b + i] = id;
1146bf215546Sopenharmony_ci      }
1147bf215546Sopenharmony_ci
1148bf215546Sopenharmony_ci      for (aco_ptr<Instruction>& instr : block.instructions) {
1149bf215546Sopenharmony_ci         loc.instr = instr.get();
1150bf215546Sopenharmony_ci
1151bf215546Sopenharmony_ci         /* remove killed p_phi operands from regs */
1152bf215546Sopenharmony_ci         if (instr->opcode == aco_opcode::p_logical_end) {
1153bf215546Sopenharmony_ci            for (Temp tmp : phi_sgpr_ops[block.index]) {
1154bf215546Sopenharmony_ci               PhysReg reg = assignments[tmp.id()].reg;
1155bf215546Sopenharmony_ci               for (unsigned i = 0; i < tmp.bytes(); i++)
1156bf215546Sopenharmony_ci                  regs[reg.reg_b + i] = 0;
1157bf215546Sopenharmony_ci            }
1158bf215546Sopenharmony_ci         }
1159bf215546Sopenharmony_ci
1160bf215546Sopenharmony_ci         if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1161bf215546Sopenharmony_ci            for (const Operand& op : instr->operands) {
1162bf215546Sopenharmony_ci               if (!op.isTemp())
1163bf215546Sopenharmony_ci                  continue;
1164bf215546Sopenharmony_ci               if (op.isFirstKillBeforeDef()) {
1165bf215546Sopenharmony_ci                  for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1166bf215546Sopenharmony_ci                     regs[op.physReg().reg_b + j] = 0;
1167bf215546Sopenharmony_ci               }
1168bf215546Sopenharmony_ci            }
1169bf215546Sopenharmony_ci         }
1170bf215546Sopenharmony_ci
1171bf215546Sopenharmony_ci         if (!instr->isBranch() || block.linear_succs.size() != 1)
1172bf215546Sopenharmony_ci            err |= validate_instr_defs(program, regs, assignments, loc, instr);
1173bf215546Sopenharmony_ci
1174bf215546Sopenharmony_ci         if (!is_phi(instr)) {
1175bf215546Sopenharmony_ci            for (const Operand& op : instr->operands) {
1176bf215546Sopenharmony_ci               if (!op.isTemp())
1177bf215546Sopenharmony_ci                  continue;
1178bf215546Sopenharmony_ci               if (op.isLateKill() && op.isFirstKill()) {
1179bf215546Sopenharmony_ci                  for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1180bf215546Sopenharmony_ci                     regs[op.physReg().reg_b + j] = 0;
1181bf215546Sopenharmony_ci               }
1182bf215546Sopenharmony_ci            }
1183bf215546Sopenharmony_ci         } else if (block.linear_preds.size() != 1 ||
1184bf215546Sopenharmony_ci                    program->blocks[block.linear_preds[0]].linear_succs.size() == 1) {
1185bf215546Sopenharmony_ci            for (unsigned pred : block.linear_preds) {
1186bf215546Sopenharmony_ci               aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
1187bf215546Sopenharmony_ci               assert(br->isBranch());
1188bf215546Sopenharmony_ci               err |= validate_instr_defs(program, regs, assignments, loc, br);
1189bf215546Sopenharmony_ci            }
1190bf215546Sopenharmony_ci         }
1191bf215546Sopenharmony_ci      }
1192bf215546Sopenharmony_ci   }
1193bf215546Sopenharmony_ci
1194bf215546Sopenharmony_ci   return err;
1195bf215546Sopenharmony_ci}
1196bf215546Sopenharmony_ci} // namespace aco
1197