1/*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "aco_ir.h"
26
27#include "util/memstream.h"
28
29#include <array>
30#include <map>
31#include <set>
32#include <vector>
33
34namespace aco {
35
36static void
37aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix,
38        const char* file, unsigned line, const char* fmt, va_list args)
39{
40   char* msg;
41
42   if (program->debug.shorten_messages) {
43      msg = ralloc_vasprintf(NULL, fmt, args);
44   } else {
45      msg = ralloc_strdup(NULL, prefix);
46      ralloc_asprintf_append(&msg, "    In file %s:%u\n", file, line);
47      ralloc_asprintf_append(&msg, "    ");
48      ralloc_vasprintf_append(&msg, fmt, args);
49   }
50
51   if (program->debug.func)
52      program->debug.func(program->debug.private_data, level, msg);
53
54   fprintf(program->debug.output, "%s\n", msg);
55
56   ralloc_free(msg);
57}
58
59void
60_aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...)
61{
62   va_list args;
63
64   va_start(args, fmt);
65   aco_log(program, ACO_COMPILER_DEBUG_LEVEL_PERFWARN, "ACO PERFWARN:\n", file, line, fmt, args);
66   va_end(args);
67}
68
69void
70_aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...)
71{
72   va_list args;
73
74   va_start(args, fmt);
75   aco_log(program, ACO_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args);
76   va_end(args);
77}
78
79bool
80validate_ir(Program* program)
81{
82   bool is_valid = true;
83   auto check = [&program, &is_valid](bool success, const char* msg,
84                                      aco::Instruction* instr) -> void
85   {
86      if (!success) {
87         char* out;
88         size_t outsize;
89         struct u_memstream mem;
90         u_memstream_open(&mem, &out, &outsize);
91         FILE* const memf = u_memstream_get(&mem);
92
93         fprintf(memf, "%s: ", msg);
94         aco_print_instr(instr, memf);
95         u_memstream_close(&mem);
96
97         aco_err(program, "%s", out);
98         free(out);
99
100         is_valid = false;
101      }
102   };
103
104   auto check_block = [&program, &is_valid](bool success, const char* msg,
105                                            aco::Block* block) -> void
106   {
107      if (!success) {
108         aco_err(program, "%s: BB%u", msg, block->index);
109         is_valid = false;
110      }
111   };
112
113   for (Block& block : program->blocks) {
114      for (aco_ptr<Instruction>& instr : block.instructions) {
115
116         /* check base format */
117         Format base_format = instr->format;
118         base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);
119         base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP16);
120         base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP8);
121         if ((uint32_t)base_format & (uint32_t)Format::VOP1)
122            base_format = Format::VOP1;
123         else if ((uint32_t)base_format & (uint32_t)Format::VOP2)
124            base_format = Format::VOP2;
125         else if ((uint32_t)base_format & (uint32_t)Format::VOPC)
126            base_format = Format::VOPC;
127         else if ((uint32_t)base_format & (uint32_t)Format::VINTRP) {
128            if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
129                instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
130                instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
131                instr->opcode == aco_opcode::v_interp_p2_f16) {
132               /* v_interp_*_fp16 are considered VINTRP by the compiler but
133                * they are emitted as VOP3.
134                */
135               base_format = Format::VOP3;
136            } else {
137               base_format = Format::VINTRP;
138            }
139         }
140         check(base_format == instr_info.format[(int)instr->opcode],
141               "Wrong base format for instruction", instr.get());
142
143         /* check VOP3 modifiers */
144         if (instr->isVOP3() && instr->format != Format::VOP3) {
145            check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
146                     base_format == Format::VOPC || base_format == Format::VINTRP,
147                  "Format cannot have VOP3/VOP3B applied", instr.get());
148         }
149
150         /* check SDWA */
151         if (instr->isSDWA()) {
152            check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
153                     base_format == Format::VOPC,
154                  "Format cannot have SDWA applied", instr.get());
155
156            check(program->gfx_level >= GFX8, "SDWA is GFX8 to GFX10.3 only", instr.get());
157            check(program->gfx_level < GFX11, "SDWA is GFX8 to GFX10.3 only", instr.get());
158
159            SDWA_instruction& sdwa = instr->sdwa();
160            check(sdwa.omod == 0 || program->gfx_level >= GFX9, "SDWA omod only supported on GFX9+",
161                  instr.get());
162            if (base_format == Format::VOPC) {
163               check(sdwa.clamp == false || program->gfx_level == GFX8,
164                     "SDWA VOPC clamp only supported on GFX8", instr.get());
165               check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
166                        program->gfx_level >= GFX9,
167                     "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
168            } else {
169               const Definition& def = instr->definitions[0];
170               check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes",
171                     instr.get());
172               check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(),
173                     "SDWA definition selection size must be at most definition size", instr.get());
174               check(
175                  sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4,
176                  "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
177               check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
178                     instr.get());
179               check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(),
180                     "SDWA dst_sel size must be definition size for subdword definitions",
181                     instr.get());
182               check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0,
183                     "SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
184            }
185
186            for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
187               const Operand& op = instr->operands[i];
188               check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
189               check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(),
190                     "SDWA operand selection size must be at most operand size", instr.get());
191               check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4,
192                     "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
193               check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset",
194                     instr.get());
195            }
196            if (instr->operands.size() >= 3) {
197               check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
198                     "3rd operand must be fixed to vcc with SDWA", instr.get());
199            }
200            if (instr->definitions.size() >= 2) {
201               check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
202                     "2nd definition must be fixed to vcc with SDWA", instr.get());
203            }
204
205            const bool sdwa_opcodes =
206               instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&
207               instr->opcode != aco_opcode::v_fmamk_f32 &&
208               instr->opcode != aco_opcode::v_fmaak_f32 &&
209               instr->opcode != aco_opcode::v_fmamk_f16 &&
210               instr->opcode != aco_opcode::v_fmaak_f16 &&
211               instr->opcode != aco_opcode::v_madmk_f32 &&
212               instr->opcode != aco_opcode::v_madak_f32 &&
213               instr->opcode != aco_opcode::v_madmk_f16 &&
214               instr->opcode != aco_opcode::v_madak_f16 &&
215               instr->opcode != aco_opcode::v_readfirstlane_b32 &&
216               instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
217
218            const bool feature_mac =
219               program->gfx_level == GFX8 &&
220               (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
221
222            check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
223         }
224
225         /* check opsel */
226         if (instr->isVOP3()) {
227            VOP3_instruction& vop3 = instr->vop3();
228            check(vop3.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+",
229                  instr.get());
230
231            for (unsigned i = 0; i < 3; i++) {
232               if (i >= instr->operands.size() ||
233                   (instr->operands[i].hasRegClass() &&
234                    instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
235                  check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
236            }
237            if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
238               check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition",
239                     instr.get());
240         } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
241                    instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
242                    instr->opcode == aco_opcode::v_fma_mix_f32) {
243            check(instr->definitions[0].regClass() ==
244                     (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
245                  "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
246         } else if (instr->isVOP3P()) {
247            VOP3P_instruction& vop3p = instr->vop3p();
248            for (unsigned i = 0; i < instr->operands.size(); i++) {
249               if (instr->operands[i].hasRegClass() &&
250                   instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
251                  check((vop3p.opsel_lo & (1 << i)) == 0 && (vop3p.opsel_hi & (1 << i)) == 0,
252                        "Unexpected opsel for subdword operand", instr.get());
253            }
254            check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition",
255                  instr.get());
256         }
257
258         /* check for undefs */
259         for (unsigned i = 0; i < instr->operands.size(); i++) {
260            if (instr->operands[i].isUndefined()) {
261               bool flat = instr->isFlatLike();
262               bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
263                                   instr->opcode == aco_opcode::p_create_vector ||
264                                   instr->opcode == aco_opcode::p_jump_to_epilog ||
265                                   (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
266                                   ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
267                                   (instr->isScratch() && i == 0);
268               check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
269            } else {
270               check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||
271                        instr->operands[i].isConstant(),
272                     "Uninitialized Operand", instr.get());
273            }
274         }
275
276         /* check subdword definitions */
277         for (unsigned i = 0; i < instr->definitions.size(); i++) {
278            if (instr->definitions[i].regClass().is_subdword())
279               check(instr->definitions[i].bytes() <= 4 || instr->isPseudo() || instr->isVMEM(),
280                     "Only Pseudo and VMEM instructions can write subdword registers > 4 bytes",
281                     instr.get());
282         }
283
284         if (instr->isSALU() || instr->isVALU()) {
285            /* check literals */
286            Operand literal(s1);
287            for (unsigned i = 0; i < instr->operands.size(); i++) {
288               Operand op = instr->operands[i];
289               if (!op.isLiteral())
290                  continue;
291
292               check(!instr->isDPP() && !instr->isSDWA() &&
293                        (!instr->isVOP3() || program->gfx_level >= GFX10) &&
294                        (!instr->isVOP3P() || program->gfx_level >= GFX10),
295                     "Literal applied on wrong instruction format", instr.get());
296
297               check(literal.isUndefined() || (literal.size() == op.size() &&
298                                               literal.constantValue() == op.constantValue()),
299                     "Only 1 Literal allowed", instr.get());
300               literal = op;
301               check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,
302                     "Wrong source position for Literal argument", instr.get());
303            }
304
305            /* check num sgprs for VALU */
306            if (instr->isVALU()) {
307               bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
308                                 instr->opcode == aco_opcode::v_lshrrev_b64 ||
309                                 instr->opcode == aco_opcode::v_ashrrev_i64;
310               unsigned const_bus_limit = 1;
311               if (program->gfx_level >= GFX10 && !is_shift64)
312                  const_bus_limit = 2;
313
314               uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;
315               if (instr->isSDWA())
316                  scalar_mask = program->gfx_level >= GFX9 ? 0x7 : 0x4;
317               else if (instr->isDPP())
318                  scalar_mask = 0x4;
319
320               if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||
321                   instr->opcode == aco_opcode::v_readlane_b32 ||
322                   instr->opcode == aco_opcode::v_readlane_b32_e64) {
323                  check(instr->definitions[0].getTemp().type() == RegType::sgpr,
324                        "Wrong Definition type for VALU instruction", instr.get());
325               } else {
326                  check(instr->definitions[0].getTemp().type() == RegType::vgpr,
327                        "Wrong Definition type for VALU instruction", instr.get());
328               }
329
330               unsigned num_sgprs = 0;
331               unsigned sgpr[] = {0, 0};
332               for (unsigned i = 0; i < instr->operands.size(); i++) {
333                  Operand op = instr->operands[i];
334                  if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
335                      instr->opcode == aco_opcode::v_readlane_b32 ||
336                      instr->opcode == aco_opcode::v_readlane_b32_e64) {
337                     check(i != 1 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
338                              op.isConstant(),
339                           "Must be a SGPR or a constant", instr.get());
340                     check(i == 1 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
341                                      op.bytes() <= 4),
342                           "Wrong Operand type for VALU instruction", instr.get());
343                     continue;
344                  }
345                  if (instr->opcode == aco_opcode::v_permlane16_b32 ||
346                      instr->opcode == aco_opcode::v_permlanex16_b32) {
347                     check(i != 0 || (op.isTemp() && op.regClass().type() == RegType::vgpr),
348                           "Operand 0 of v_permlane must be VGPR", instr.get());
349                     check(i == 0 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
350                              op.isConstant(),
351                           "Lane select operands of v_permlane must be SGPR or constant",
352                           instr.get());
353                  }
354
355                  if (instr->opcode == aco_opcode::v_writelane_b32 ||
356                      instr->opcode == aco_opcode::v_writelane_b32_e64) {
357                     check(i != 2 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
358                                      op.bytes() <= 4),
359                           "Wrong Operand type for VALU instruction", instr.get());
360                     check(i == 2 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
361                              op.isConstant(),
362                           "Must be a SGPR or a constant", instr.get());
363                     continue;
364                  }
365                  if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) {
366                     check(scalar_mask & (1 << i), "Wrong source position for SGPR argument",
367                           instr.get());
368
369                     if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
370                        if (num_sgprs < 2)
371                           sgpr[num_sgprs++] = op.tempId();
372                     }
373                  }
374
375                  if (op.isConstant() && !op.isLiteral())
376                     check(scalar_mask & (1 << i), "Wrong source position for constant argument",
377                           instr.get());
378               }
379               check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit,
380                     "Too many SGPRs/literals", instr.get());
381            }
382
383            if (instr->isSOP1() || instr->isSOP2()) {
384               if (!instr->definitions.empty())
385                  check(instr->definitions[0].getTemp().type() == RegType::sgpr,
386                        "Wrong Definition type for SALU instruction", instr.get());
387               for (const Operand& op : instr->operands) {
388                  check(op.isConstant() || op.regClass().type() <= RegType::sgpr,
389                        "Wrong Operand type for SALU instruction", instr.get());
390               }
391            }
392         }
393
394         switch (instr->format) {
395         case Format::PSEUDO: {
396            if (instr->opcode == aco_opcode::p_create_vector) {
397               unsigned size = 0;
398               for (const Operand& op : instr->operands) {
399                  check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
400                  size += op.bytes();
401               }
402               check(size == instr->definitions[0].bytes(),
403                     "Definition size does not match operand sizes", instr.get());
404               if (instr->definitions[0].getTemp().type() == RegType::sgpr) {
405                  for (const Operand& op : instr->operands) {
406                     check(op.isConstant() || op.regClass().type() == RegType::sgpr,
407                           "Wrong Operand type for scalar vector", instr.get());
408                  }
409               }
410            } else if (instr->opcode == aco_opcode::p_extract_vector) {
411               check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(),
412                     "Wrong Operand types", instr.get());
413               check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=
414                        instr->operands[0].bytes(),
415                     "Index out of range", instr.get());
416               check(instr->definitions[0].getTemp().type() == RegType::vgpr ||
417                        instr->operands[0].regClass().type() == RegType::sgpr,
418                     "Cannot extract SGPR value from VGPR vector", instr.get());
419               check(program->gfx_level >= GFX9 ||
420                        !instr->definitions[0].regClass().is_subdword() ||
421                        instr->operands[0].regClass().type() == RegType::vgpr,
422                     "Cannot extract subdword from SGPR before GFX9+", instr.get());
423            } else if (instr->opcode == aco_opcode::p_split_vector) {
424               check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get());
425               unsigned size = 0;
426               for (const Definition& def : instr->definitions) {
427                  size += def.bytes();
428               }
429               check(size == instr->operands[0].bytes(),
430                     "Operand size does not match definition sizes", instr.get());
431               if (instr->operands[0].getTemp().type() == RegType::vgpr) {
432                  for (const Definition& def : instr->definitions)
433                     check(def.regClass().type() == RegType::vgpr,
434                           "Wrong Definition type for VGPR split_vector", instr.get());
435               } else {
436                  for (const Definition& def : instr->definitions)
437                     check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(),
438                           "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
439               }
440            } else if (instr->opcode == aco_opcode::p_parallelcopy) {
441               check(instr->definitions.size() == instr->operands.size(),
442                     "Number of Operands does not match number of Definitions", instr.get());
443               for (unsigned i = 0; i < instr->operands.size(); i++) {
444                  check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
445                        "Operand and Definition size must match", instr.get());
446                  if (instr->operands[i].isTemp()) {
447                     check((instr->definitions[i].getTemp().type() ==
448                            instr->operands[i].regClass().type()) ||
449                              (instr->definitions[i].getTemp().type() == RegType::vgpr &&
450                               instr->operands[i].regClass().type() == RegType::sgpr),
451                           "Operand and Definition types do not match", instr.get());
452                     check(instr->definitions[i].regClass().is_linear_vgpr() ==
453                              instr->operands[i].regClass().is_linear_vgpr(),
454                           "Operand and Definition types do not match", instr.get());
455                  } else {
456                     check(!instr->definitions[i].regClass().is_linear_vgpr(),
457                           "Can only copy linear VGPRs into linear VGPRs, not constant/undef",
458                           instr.get());
459                  }
460               }
461            } else if (instr->opcode == aco_opcode::p_phi) {
462               check(instr->operands.size() == block.logical_preds.size(),
463                     "Number of Operands does not match number of predecessors", instr.get());
464               check(instr->definitions[0].getTemp().type() == RegType::vgpr,
465                     "Logical Phi Definition must be vgpr", instr.get());
466               for (const Operand& op : instr->operands)
467                  check(instr->definitions[0].size() == op.size(),
468                        "Operand sizes must match Definition size", instr.get());
469            } else if (instr->opcode == aco_opcode::p_linear_phi) {
470               for (const Operand& op : instr->operands) {
471                  check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type",
472                        instr.get());
473                  check(instr->definitions[0].size() == op.size(),
474                        "Operand sizes must match Definition size", instr.get());
475               }
476               check(instr->operands.size() == block.linear_preds.size(),
477                     "Number of Operands does not match number of predecessors", instr.get());
478            } else if (instr->opcode == aco_opcode::p_extract ||
479                       instr->opcode == aco_opcode::p_insert) {
480               check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get());
481               check(instr->operands[1].isConstant(), "Index must be constant", instr.get());
482               if (instr->opcode == aco_opcode::p_extract)
483                  check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",
484                        instr.get());
485
486               check(instr->definitions[0].getTemp().type() != RegType::sgpr ||
487                        instr->operands[0].getTemp().type() == RegType::sgpr,
488                     "Can't extract/insert VGPR to SGPR", instr.get());
489
490               if (instr->opcode == aco_opcode::p_insert)
491                  check(instr->operands[0].bytes() == instr->definitions[0].bytes(),
492                        "Sizes of p_insert data operand and definition must match", instr.get());
493
494               if (instr->definitions[0].getTemp().type() == RegType::sgpr)
495                  check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&
496                           instr->definitions[1].physReg() == scc,
497                        "SGPR extract/insert needs an SCC definition", instr.get());
498
499               unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u;
500               unsigned op_bits = instr->operands[2].constantValue();
501
502               if (instr->opcode == aco_opcode::p_insert) {
503                  check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get());
504                  check(op_bits < data_bits, "Size must be smaller than source", instr.get());
505               } else if (instr->opcode == aco_opcode::p_extract) {
506                  check(op_bits == 8 || op_bits == 16 || op_bits == 32,
507                        "Size must be 8 or 16 or 32", instr.get());
508                  check(data_bits >= op_bits, "Can't extract more bits than what the data has.",
509                        instr.get());
510               }
511
512               unsigned comp = data_bits / MAX2(op_bits, 1);
513               check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
514                     instr.get());
515            } else if (instr->opcode == aco_opcode::p_jump_to_epilog) {
516               check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions",
517                     instr.get());
518               check(instr->operands.size() > 0 &&
519                        instr->operands[0].getTemp().type() == RegType::sgpr &&
520                        instr->operands[0].getTemp().size() == 2,
521                     "First operand of p_jump_to_epilog must be a SGPR", instr.get());
522               for (unsigned i = 1; i < instr->operands.size(); i++) {
523                  check(instr->operands[i].getTemp().type() == RegType::vgpr ||
524                           instr->operands[i].isUndefined(),
525                        "Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get());
526               }
527            }
528            break;
529         }
530         case Format::PSEUDO_REDUCTION: {
531            for (const Operand& op : instr->operands)
532               check(op.regClass().type() == RegType::vgpr,
533                     "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.",
534                     instr.get());
535
536            if (instr->opcode == aco_opcode::p_reduce &&
537                instr->reduction().cluster_size == program->wave_size)
538               check(instr->definitions[0].regClass().type() == RegType::sgpr ||
539                        program->wave_size == 32,
540                     "The result of unclustered reductions must go into an SGPR.", instr.get());
541            else
542               check(instr->definitions[0].regClass().type() == RegType::vgpr,
543                     "The result of scans and clustered reductions must go into a VGPR.",
544                     instr.get());
545
546            break;
547         }
548         case Format::SMEM: {
549            if (instr->operands.size() >= 1)
550               check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) ||
551                        (instr->operands[0].isTemp() &&
552                         instr->operands[0].regClass().type() == RegType::sgpr),
553                     "SMEM operands must be sgpr", instr.get());
554            if (instr->operands.size() >= 2)
555               check(instr->operands[1].isConstant() ||
556                        (instr->operands[1].isTemp() &&
557                         instr->operands[1].regClass().type() == RegType::sgpr),
558                     "SMEM offset must be constant or sgpr", instr.get());
559            if (!instr->definitions.empty())
560               check(instr->definitions[0].getTemp().type() == RegType::sgpr,
561                     "SMEM result must be sgpr", instr.get());
562            break;
563         }
564         case Format::MTBUF:
565         case Format::MUBUF: {
566            check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",
567                  instr.get());
568            check(instr->operands[1].hasRegClass() &&
569                     instr->operands[1].regClass().type() == RegType::vgpr,
570                  "VADDR must be in vgpr for VMEM instructions", instr.get());
571            check(
572               instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr,
573               "VMEM resource constant must be sgpr", instr.get());
574            check(instr->operands.size() < 4 ||
575                     (instr->operands[3].isTemp() &&
576                      instr->operands[3].regClass().type() == RegType::vgpr),
577                  "VMEM write data must be vgpr", instr.get());
578
579            const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
580                             instr->opcode == aco_opcode::buffer_load_ubyte ||
581                             instr->opcode == aco_opcode::buffer_load_sbyte ||
582                             instr->opcode == aco_opcode::buffer_load_ushort ||
583                             instr->opcode == aco_opcode::buffer_load_sshort ||
584                             instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
585                             instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
586                             instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
587                             instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
588                             instr->opcode == aco_opcode::buffer_load_short_d16 ||
589                             instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
590                             instr->opcode == aco_opcode::buffer_load_format_d16_x ||
591                             instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
592                             instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
593                             instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
594                             instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
595                             instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
596                             instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
597                             instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
598                             instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
599            if (instr->definitions.size()) {
600               check(instr->definitions[0].isTemp() &&
601                        instr->definitions[0].regClass().type() == RegType::vgpr,
602                     "VMEM definitions[0] (VDATA) must be VGPR", instr.get());
603               check(d16 || !instr->definitions[0].regClass().is_subdword(),
604                     "Only D16 opcodes can load subdword values.", instr.get());
605               check(instr->definitions[0].bytes() <= 8 || !d16,
606                     "D16 opcodes can only load up to 8 bytes.", instr.get());
607            }
608            break;
609         }
610         case Format::MIMG: {
611            check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",
612                  instr.get());
613            check(instr->operands[0].hasRegClass() &&
614                     (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
615                  "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
616            if (instr->operands[1].hasRegClass())
617               check(instr->operands[1].regClass() == s4,
618                     "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
619            if (!instr->operands[2].isUndefined()) {
620               bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||
621                                 instr->opcode == aco_opcode::image_atomic_fcmpswap;
622               check(instr->definitions.empty() ||
623                        (instr->definitions[0].regClass() == instr->operands[2].regClass() ||
624                         is_cmpswap),
625                     "MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and "
626                     "TFE/LWE loads",
627                     instr.get());
628            }
629            check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
630                  "NSA is only supported on GFX10+", instr.get());
631            for (unsigned i = 3; i < instr->operands.size(); i++) {
632               if (instr->operands.size() == 4) {
633                  check(instr->operands[i].hasRegClass() &&
634                           instr->operands[i].regClass().type() == RegType::vgpr,
635                        "MIMG operands[3] (VADDR) must be VGPR", instr.get());
636               } else {
637                  check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used",
638                        instr.get());
639               }
640            }
641
642            if (instr->definitions.size()) {
643               check(instr->definitions[0].isTemp() &&
644                        instr->definitions[0].regClass().type() == RegType::vgpr,
645                     "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
646               check(instr->mimg().d16 || !instr->definitions[0].regClass().is_subdword(),
647                     "Only D16 MIMG instructions can load subdword values.", instr.get());
648               check(instr->definitions[0].bytes() <= 8 || !instr->mimg().d16,
649                     "D16 MIMG instructions can only load up to 8 bytes.", instr.get());
650            }
651            break;
652         }
653         case Format::DS: {
654            for (const Operand& op : instr->operands) {
655               check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0,
656                     "Only VGPRs are valid DS instruction operands", instr.get());
657            }
658            if (!instr->definitions.empty())
659               check(instr->definitions[0].getTemp().type() == RegType::vgpr,
660                     "DS instruction must return VGPR", instr.get());
661            break;
662         }
663         case Format::EXP: {
664            for (unsigned i = 0; i < 4; i++)
665               check(instr->operands[i].hasRegClass() &&
666                        instr->operands[i].regClass().type() == RegType::vgpr,
667                     "Only VGPRs are valid Export arguments", instr.get());
668            break;
669         }
670         case Format::FLAT:
671            check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",
672                  instr.get());
673            FALLTHROUGH;
674         case Format::GLOBAL:
675            check(
676               instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr,
677               "FLAT/GLOBAL address must be vgpr", instr.get());
678            FALLTHROUGH;
679         case Format::SCRATCH: {
680            check(instr->operands[0].hasRegClass() &&
681                     instr->operands[0].regClass().type() == RegType::vgpr,
682                  "FLAT/GLOBAL/SCRATCH address must be undefined or vgpr", instr.get());
683            check(instr->operands[1].hasRegClass() &&
684                     instr->operands[1].regClass().type() == RegType::sgpr,
685                  "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
686            if (instr->format == Format::SCRATCH && program->gfx_level < GFX10_3)
687               check(instr->operands[0].isTemp() || instr->operands[1].isTemp(),
688                     "SCRATCH must have either SADDR or ADDR operand", instr.get());
689            if (!instr->definitions.empty())
690               check(instr->definitions[0].getTemp().type() == RegType::vgpr,
691                     "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
692            else
693               check(instr->operands[2].regClass().type() == RegType::vgpr,
694                     "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
695            break;
696         }
697         default: break;
698         }
699      }
700   }
701
702   /* validate CFG */
703   for (unsigned i = 0; i < program->blocks.size(); i++) {
704      Block& block = program->blocks[i];
705      check_block(block.index == i, "block.index must match actual index", &block);
706
707      /* predecessors/successors should be sorted */
708      for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++)
709         check_block(block.linear_preds[j] < block.linear_preds[j + 1],
710                     "linear predecessors must be sorted", &block);
711      for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++)
712         check_block(block.logical_preds[j] < block.logical_preds[j + 1],
713                     "logical predecessors must be sorted", &block);
714      for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++)
715         check_block(block.linear_succs[j] < block.linear_succs[j + 1],
716                     "linear successors must be sorted", &block);
717      for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++)
718         check_block(block.logical_succs[j] < block.logical_succs[j + 1],
719                     "logical successors must be sorted", &block);
720
721      /* critical edges are not allowed */
722      if (block.linear_preds.size() > 1) {
723         for (unsigned pred : block.linear_preds)
724            check_block(program->blocks[pred].linear_succs.size() == 1,
725                        "linear critical edges are not allowed", &program->blocks[pred]);
726         for (unsigned pred : block.logical_preds)
727            check_block(program->blocks[pred].logical_succs.size() == 1,
728                        "logical critical edges are not allowed", &program->blocks[pred]);
729      }
730   }
731
732   return is_valid;
733}
734
735/* RA validation */
736namespace {
737
738struct Location {
739   Location() : block(NULL), instr(NULL) {}
740
741   Block* block;
742   Instruction* instr; // NULL if it's the block's live-in
743};
744
745struct Assignment {
746   Location defloc;
747   Location firstloc;
748   PhysReg reg;
749   bool valid;
750};
751
752bool
753ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)
754{
755   va_list args;
756   va_start(args, fmt);
757   char msg[1024];
758   vsprintf(msg, fmt, args);
759   va_end(args);
760
761   char* out;
762   size_t outsize;
763   struct u_memstream mem;
764   u_memstream_open(&mem, &out, &outsize);
765   FILE* const memf = u_memstream_get(&mem);
766
767   fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index);
768   if (loc.instr) {
769      aco_print_instr(loc.instr, memf);
770      fprintf(memf, "\n%s", msg);
771   } else {
772      fprintf(memf, "%s", msg);
773   }
774   if (loc2.block) {
775      fprintf(memf, " in BB%d:\n", loc2.block->index);
776      aco_print_instr(loc2.instr, memf);
777   }
778   fprintf(memf, "\n\n");
779   u_memstream_close(&mem);
780
781   aco_err(program, "%s", out);
782   free(out);
783
784   return true;
785}
786
787bool
788validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
789                          unsigned index)
790{
791   Operand op = instr->operands[index];
792   unsigned byte = op.physReg().byte();
793
794   if (instr->opcode == aco_opcode::p_as_uniform)
795      return byte == 0;
796   if (instr->isPseudo() && gfx_level >= GFX8)
797      return true;
798   if (instr->isSDWA())
799      return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
800             byte % instr->sdwa().sel[index].size() == 0;
801   if (instr->isVOP3P()) {
802      bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
803                     instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
804                     instr->opcode == aco_opcode::v_fma_mix_f32;
805      return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
806             ((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
807   }
808   if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
809      return true;
810
811   switch (instr->opcode) {
812   case aco_opcode::v_cvt_f32_ubyte1:
813      if (byte == 1)
814         return true;
815      break;
816   case aco_opcode::v_cvt_f32_ubyte2:
817      if (byte == 2)
818         return true;
819      break;
820   case aco_opcode::v_cvt_f32_ubyte3:
821      if (byte == 3)
822         return true;
823      break;
824   case aco_opcode::ds_write_b8_d16_hi:
825   case aco_opcode::ds_write_b16_d16_hi:
826      if (byte == 2 && index == 1)
827         return true;
828      break;
829   case aco_opcode::buffer_store_byte_d16_hi:
830   case aco_opcode::buffer_store_short_d16_hi:
831   case aco_opcode::buffer_store_format_d16_hi_x:
832      if (byte == 2 && index == 3)
833         return true;
834      break;
835   case aco_opcode::flat_store_byte_d16_hi:
836   case aco_opcode::flat_store_short_d16_hi:
837   case aco_opcode::scratch_store_byte_d16_hi:
838   case aco_opcode::scratch_store_short_d16_hi:
839   case aco_opcode::global_store_byte_d16_hi:
840   case aco_opcode::global_store_short_d16_hi:
841      if (byte == 2 && index == 2)
842         return true;
843      break;
844   default: break;
845   }
846
847   return byte == 0;
848}
849
850bool
851validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr)
852{
853   Definition def = instr->definitions[0];
854   unsigned byte = def.physReg().byte();
855
856   if (instr->isPseudo() && gfx_level >= GFX8)
857      return true;
858   if (instr->isSDWA())
859      return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
860             byte % instr->sdwa().dst_sel.size() == 0;
861   if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1))
862      return true;
863
864   switch (instr->opcode) {
865   case aco_opcode::v_fma_mixhi_f16:
866   case aco_opcode::buffer_load_ubyte_d16_hi:
867   case aco_opcode::buffer_load_sbyte_d16_hi:
868   case aco_opcode::buffer_load_short_d16_hi:
869   case aco_opcode::buffer_load_format_d16_hi_x:
870   case aco_opcode::flat_load_ubyte_d16_hi:
871   case aco_opcode::flat_load_short_d16_hi:
872   case aco_opcode::scratch_load_ubyte_d16_hi:
873   case aco_opcode::scratch_load_short_d16_hi:
874   case aco_opcode::global_load_ubyte_d16_hi:
875   case aco_opcode::global_load_short_d16_hi:
876   case aco_opcode::ds_read_u8_d16_hi:
877   case aco_opcode::ds_read_u16_d16_hi: return byte == 2;
878   default: break;
879   }
880
881   return byte == 0;
882}
883
884unsigned
885get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
886{
887   amd_gfx_level gfx_level = program->gfx_level;
888   Definition def = instr->definitions[index];
889
890   if (instr->isPseudo())
891      return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u;
892   if (instr->isVALU()) {
893      assert(def.bytes() <= 2);
894      if (instr->isSDWA())
895         return instr->sdwa().dst_sel.size();
896
897      if (instr_is_16bit(gfx_level, instr->opcode))
898         return 2;
899
900      return 4;
901   }
902
903   if (instr->isMIMG()) {
904      assert(instr->mimg().d16);
905      return program->dev.sram_ecc_enabled ? def.size() * 4u : def.bytes();
906   }
907
908   switch (instr->opcode) {
909   case aco_opcode::buffer_load_ubyte_d16:
910   case aco_opcode::buffer_load_sbyte_d16:
911   case aco_opcode::buffer_load_short_d16:
912   case aco_opcode::buffer_load_format_d16_x:
913   case aco_opcode::tbuffer_load_format_d16_x:
914   case aco_opcode::flat_load_ubyte_d16:
915   case aco_opcode::flat_load_short_d16:
916   case aco_opcode::scratch_load_ubyte_d16:
917   case aco_opcode::scratch_load_short_d16:
918   case aco_opcode::global_load_ubyte_d16:
919   case aco_opcode::global_load_short_d16:
920   case aco_opcode::ds_read_u8_d16:
921   case aco_opcode::ds_read_u16_d16:
922   case aco_opcode::buffer_load_ubyte_d16_hi:
923   case aco_opcode::buffer_load_sbyte_d16_hi:
924   case aco_opcode::buffer_load_short_d16_hi:
925   case aco_opcode::buffer_load_format_d16_hi_x:
926   case aco_opcode::flat_load_ubyte_d16_hi:
927   case aco_opcode::flat_load_short_d16_hi:
928   case aco_opcode::scratch_load_ubyte_d16_hi:
929   case aco_opcode::scratch_load_short_d16_hi:
930   case aco_opcode::global_load_ubyte_d16_hi:
931   case aco_opcode::global_load_short_d16_hi:
932   case aco_opcode::ds_read_u8_d16_hi:
933   case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2;
934   case aco_opcode::buffer_load_format_d16_xyz:
935   case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6;
936   default: return def.size() * 4;
937   }
938}
939
940bool
941validate_instr_defs(Program* program, std::array<unsigned, 2048>& regs,
942                    const std::vector<Assignment>& assignments, const Location& loc,
943                    aco_ptr<Instruction>& instr)
944{
945   bool err = false;
946
947   for (unsigned i = 0; i < instr->definitions.size(); i++) {
948      Definition& def = instr->definitions[i];
949      if (!def.isTemp())
950         continue;
951      Temp tmp = def.getTemp();
952      PhysReg reg = assignments[tmp.id()].reg;
953      for (unsigned j = 0; j < tmp.bytes(); j++) {
954         if (regs[reg.reg_b + j])
955            err |=
956               ra_fail(program, loc, assignments[regs[reg.reg_b + j]].defloc,
957                       "Assignment of element %d of %%%d already taken by %%%d from instruction", i,
958                       tmp.id(), regs[reg.reg_b + j]);
959         regs[reg.reg_b + j] = tmp.id();
960      }
961      if (def.regClass().is_subdword() && def.bytes() < 4) {
962         unsigned written = get_subdword_bytes_written(program, instr, i);
963         /* If written=4, the instruction still might write the upper half. In that case, it's
964          * the lower half that isn't preserved */
965         for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) {
966            unsigned written_reg = reg.reg() * 4u + j;
967            if (regs[written_reg] && regs[written_reg] != def.tempId())
968               err |= ra_fail(program, loc, assignments[regs[written_reg]].defloc,
969                              "Assignment of element %d of %%%d overwrites the full register "
970                              "taken by %%%d from instruction",
971                              i, tmp.id(), regs[written_reg]);
972         }
973      }
974   }
975
976   for (const Definition& def : instr->definitions) {
977      if (!def.isTemp())
978         continue;
979      if (def.isKill()) {
980         for (unsigned j = 0; j < def.getTemp().bytes(); j++)
981            regs[def.physReg().reg_b + j] = 0;
982      }
983   }
984
985   return err;
986}
987
988} /* end namespace */
989
990bool
991validate_ra(Program* program)
992{
993   if (!(debug_flags & DEBUG_VALIDATE_RA))
994      return false;
995
996   bool err = false;
997   aco::live live_vars = aco::live_var_analysis(program);
998   std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
999   uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);
1000
1001   std::vector<Assignment> assignments(program->peekAllocationId());
1002   for (Block& block : program->blocks) {
1003      Location loc;
1004      loc.block = &block;
1005      for (aco_ptr<Instruction>& instr : block.instructions) {
1006         if (instr->opcode == aco_opcode::p_phi) {
1007            for (unsigned i = 0; i < instr->operands.size(); i++) {
1008               if (instr->operands[i].isTemp() &&
1009                   instr->operands[i].getTemp().type() == RegType::sgpr &&
1010                   instr->operands[i].isFirstKill())
1011                  phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
1012            }
1013         }
1014
1015         loc.instr = instr.get();
1016         for (unsigned i = 0; i < instr->operands.size(); i++) {
1017            Operand& op = instr->operands[i];
1018            if (!op.isTemp())
1019               continue;
1020            if (!op.isFixed())
1021               err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i);
1022            if (assignments[op.tempId()].valid && assignments[op.tempId()].reg != op.physReg())
1023               err |=
1024                  ra_fail(program, loc, assignments[op.tempId()].firstloc,
1025                          "Operand %d has an inconsistent register assignment with instruction", i);
1026            if ((op.getTemp().type() == RegType::vgpr &&
1027                 op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||
1028                (op.getTemp().type() == RegType::sgpr &&
1029                 op.physReg() + op.size() > program->config->num_sgprs &&
1030                 op.physReg() < sgpr_limit))
1031               err |= ra_fail(program, loc, assignments[op.tempId()].firstloc,
1032                              "Operand %d has an out-of-bounds register assignment", i);
1033            if (op.physReg() == vcc && !program->needs_vcc)
1034               err |= ra_fail(program, loc, Location(),
1035                              "Operand %d fixed to vcc but needs_vcc=false", i);
1036            if (op.regClass().is_subdword() &&
1037                !validate_subdword_operand(program->gfx_level, instr, i))
1038               err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i);
1039            if (!assignments[op.tempId()].firstloc.block)
1040               assignments[op.tempId()].firstloc = loc;
1041            if (!assignments[op.tempId()].defloc.block) {
1042               assignments[op.tempId()].reg = op.physReg();
1043               assignments[op.tempId()].valid = true;
1044            }
1045         }
1046
1047         for (unsigned i = 0; i < instr->definitions.size(); i++) {
1048            Definition& def = instr->definitions[i];
1049            if (!def.isTemp())
1050               continue;
1051            if (!def.isFixed())
1052               err |=
1053                  ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i);
1054            if (assignments[def.tempId()].defloc.block)
1055               err |= ra_fail(program, loc, assignments[def.tempId()].defloc,
1056                              "Temporary %%%d also defined by instruction", def.tempId());
1057            if ((def.getTemp().type() == RegType::vgpr &&
1058                 def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||
1059                (def.getTemp().type() == RegType::sgpr &&
1060                 def.physReg() + def.size() > program->config->num_sgprs &&
1061                 def.physReg() < sgpr_limit))
1062               err |= ra_fail(program, loc, assignments[def.tempId()].firstloc,
1063                              "Definition %d has an out-of-bounds register assignment", i);
1064            if (def.physReg() == vcc && !program->needs_vcc)
1065               err |= ra_fail(program, loc, Location(),
1066                              "Definition %d fixed to vcc but needs_vcc=false", i);
1067            if (def.regClass().is_subdword() &&
1068                !validate_subdword_definition(program->gfx_level, instr))
1069               err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);
1070            if (!assignments[def.tempId()].firstloc.block)
1071               assignments[def.tempId()].firstloc = loc;
1072            assignments[def.tempId()].defloc = loc;
1073            assignments[def.tempId()].reg = def.physReg();
1074            assignments[def.tempId()].valid = true;
1075         }
1076      }
1077   }
1078
1079   for (Block& block : program->blocks) {
1080      Location loc;
1081      loc.block = &block;
1082
1083      std::array<unsigned, 2048> regs; /* register file in bytes */
1084      regs.fill(0);
1085
1086      IDSet live = live_vars.live_out[block.index];
1087      /* remove killed p_phi sgpr operands */
1088      for (Temp tmp : phi_sgpr_ops[block.index])
1089         live.erase(tmp.id());
1090
1091      /* check live out */
1092      for (unsigned id : live) {
1093         Temp tmp(id, program->temp_rc[id]);
1094         PhysReg reg = assignments[id].reg;
1095         for (unsigned i = 0; i < tmp.bytes(); i++) {
1096            if (regs[reg.reg_b + i]) {
1097               err |= ra_fail(program, loc, Location(),
1098                              "Assignment of element %d of %%%d already taken by %%%d in live-out",
1099                              i, id, regs[reg.reg_b + i]);
1100            }
1101            regs[reg.reg_b + i] = id;
1102         }
1103      }
1104      regs.fill(0);
1105
1106      for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) {
1107         aco_ptr<Instruction>& instr = *it;
1108
1109         /* check killed p_phi sgpr operands */
1110         if (instr->opcode == aco_opcode::p_logical_end) {
1111            for (Temp tmp : phi_sgpr_ops[block.index]) {
1112               PhysReg reg = assignments[tmp.id()].reg;
1113               for (unsigned i = 0; i < tmp.bytes(); i++) {
1114                  if (regs[reg.reg_b + i])
1115                     err |= ra_fail(
1116                        program, loc, Location(),
1117                        "Assignment of element %d of %%%d already taken by %%%d in live-out", i,
1118                        tmp.id(), regs[reg.reg_b + i]);
1119               }
1120               live.insert(tmp.id());
1121            }
1122         }
1123
1124         for (const Definition& def : instr->definitions) {
1125            if (!def.isTemp())
1126               continue;
1127            live.erase(def.tempId());
1128         }
1129
1130         /* don't count phi operands as live-in, since they are actually
1131          * killed when they are copied at the predecessor */
1132         if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1133            for (const Operand& op : instr->operands) {
1134               if (!op.isTemp())
1135                  continue;
1136               live.insert(op.tempId());
1137            }
1138         }
1139      }
1140
1141      for (unsigned id : live) {
1142         Temp tmp(id, program->temp_rc[id]);
1143         PhysReg reg = assignments[id].reg;
1144         for (unsigned i = 0; i < tmp.bytes(); i++)
1145            regs[reg.reg_b + i] = id;
1146      }
1147
1148      for (aco_ptr<Instruction>& instr : block.instructions) {
1149         loc.instr = instr.get();
1150
1151         /* remove killed p_phi operands from regs */
1152         if (instr->opcode == aco_opcode::p_logical_end) {
1153            for (Temp tmp : phi_sgpr_ops[block.index]) {
1154               PhysReg reg = assignments[tmp.id()].reg;
1155               for (unsigned i = 0; i < tmp.bytes(); i++)
1156                  regs[reg.reg_b + i] = 0;
1157            }
1158         }
1159
1160         if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1161            for (const Operand& op : instr->operands) {
1162               if (!op.isTemp())
1163                  continue;
1164               if (op.isFirstKillBeforeDef()) {
1165                  for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1166                     regs[op.physReg().reg_b + j] = 0;
1167               }
1168            }
1169         }
1170
1171         if (!instr->isBranch() || block.linear_succs.size() != 1)
1172            err |= validate_instr_defs(program, regs, assignments, loc, instr);
1173
1174         if (!is_phi(instr)) {
1175            for (const Operand& op : instr->operands) {
1176               if (!op.isTemp())
1177                  continue;
1178               if (op.isLateKill() && op.isFirstKill()) {
1179                  for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1180                     regs[op.physReg().reg_b + j] = 0;
1181               }
1182            }
1183         } else if (block.linear_preds.size() != 1 ||
1184                    program->blocks[block.linear_preds[0]].linear_succs.size() == 1) {
1185            for (unsigned pred : block.linear_preds) {
1186               aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
1187               assert(br->isBranch());
1188               err |= validate_instr_defs(program, regs, assignments, loc, br);
1189            }
1190         }
1191      }
1192   }
1193
1194   return err;
1195}
1196} // namespace aco
1197