1/*
2 * Copyright (C) 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "util/ralloc.h"
25#include "ir3_ra.h"
26#include "ir3_shader.h"
27
28/* This file implements a validation pass for register allocation. We check
29 * that the assignment of SSA values to registers is "valid", in the sense
30 * that each original definition reaches all of its uses without being
31 * clobbered by something else.
32 *
33 * The validation is a forward dataflow analysis. The state at each point
34 * consists of, for each physical register, the SSA value occupying it, or a
35 * few special values:
36 *
37 * - "unknown" is set initially, before the dataflow analysis assigns it a
38 *   value. This is the lattice bottom.
39 * - Values at the start get "undef", which acts like a special SSA value that
40 *   indicates it is never written.
41 * - "overdefined" registers are set to more than one value, depending on
42 *   which path you take to get to the spot. This is the lattice top.
43 *
44 * Overdefined is necessary to distinguish because in some programs, like this
45 * simple example, it's perfectly normal and allowed:
46 *
47 * if (...) {
48 *    mov.u32u32 ssa_1(r1.x), ...
49 *    ...
50 * } else {
51 *    mov.u32u32 ssa_2(r1.x), ...
52 *    ...
53 * }
54 * // r1.x is overdefined here!
55 *
56 * However, if an ssa value after the if is accidentally assigned to r1.x, we
57 * need to remember that it's invalid to catch the mistake. Overdef has to be
58 * distinguished from undef so that the state forms a valid lattice to
59 * guarantee that the analysis always terminates. We could avoid relying on
60 * overdef by using liveness analysis, but not relying on liveness has the
61 * benefit that we can catch bugs in liveness analysis too.
62 *
63 * One tricky thing we have to handle is the coalescing of splits/collects,
64 * which means that multiple SSA values can occupy a register at the same
65 * time. While we could use the same merge set indices that RA uses, again
66 * that would rely on the merge set calculation being correct which we don't
67 * want to. Instead we treat splits/collects as transfer instructions, similar
68 * to the parallelcopy instructions inserted by RA, and have them copy their
69 * sources to their destinations. This means that each physreg must carry the
70 * SSA def assigned to it plus an offset into that definition, and when
71 * validating sources we must look through splits/collects to find the
72 * "original" source for each subregister.
73 */
74
75#define UNKNOWN ((struct ir3_register *)NULL)
76#define UNDEF   ((struct ir3_register *)(uintptr_t)1)
77#define OVERDEF ((struct ir3_register *)(uintptr_t)2)
78
79struct reg_state {
80   struct ir3_register *def;
81   unsigned offset;
82};
83
84struct file_state {
85   struct reg_state regs[RA_MAX_FILE_SIZE];
86};
87
88struct reaching_state {
89   struct file_state half, full, shared;
90};
91
92struct ra_val_ctx {
93   struct ir3_instruction *current_instr;
94
95   struct reaching_state reaching;
96   struct reaching_state *block_reaching;
97   unsigned block_count;
98
99   unsigned full_size, half_size;
100
101   bool merged_regs;
102
103   bool failed;
104};
105
106static void
107validate_error(struct ra_val_ctx *ctx, const char *condstr)
108{
109   fprintf(stderr, "ra validation fail: %s\n", condstr);
110   fprintf(stderr, "  -> for instruction: ");
111   ir3_print_instr(ctx->current_instr);
112   abort();
113}
114
115#define validate_assert(ctx, cond)                                             \
116   do {                                                                        \
117      if (!(cond)) {                                                           \
118         validate_error(ctx, #cond);                                           \
119      }                                                                        \
120   } while (0)
121
122static unsigned
123get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
124{
125   if (reg->flags & IR3_REG_SHARED)
126      return RA_SHARED_SIZE;
127   else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
128      return ctx->full_size;
129   else
130      return ctx->half_size;
131}
132
133/* Validate simple things, like the registers being in-bounds. This way we
134 * don't have to worry about out-of-bounds accesses later.
135 */
136
137static void
138validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
139{
140   ctx->current_instr = instr;
141   ra_foreach_dst (dst, instr) {
142      unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
143      validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
144      if (dst->tied)
145         validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
146   }
147
148   ra_foreach_src (src, instr) {
149      unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
150      validate_assert(ctx, src_max <= get_file_size(ctx, src));
151   }
152}
153
154/* This is the lattice operator. */
155static bool
156merge_reg(struct reg_state *dst, const struct reg_state *src)
157{
158   if (dst->def == UNKNOWN) {
159      *dst = *src;
160      return src->def != UNKNOWN;
161   } else if (dst->def == OVERDEF) {
162      return false;
163   } else {
164      if (src->def == UNKNOWN)
165         return false;
166      else if (src->def == OVERDEF) {
167         *dst = *src;
168         return true;
169      } else {
170         if (dst->def != src->def || dst->offset != src->offset) {
171            dst->def = OVERDEF;
172            dst->offset = 0;
173            return true;
174         } else {
175            return false;
176         }
177      }
178   }
179}
180
181static bool
182merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
183{
184   bool progress = false;
185   for (unsigned i = 0; i < size; i++)
186      progress |= merge_reg(&dst->regs[i], &src->regs[i]);
187   return progress;
188}
189
190static bool
191merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
192            const struct reaching_state *src)
193{
194   bool progress = false;
195   progress |= merge_file(&dst->full, &src->full, ctx->full_size);
196   progress |= merge_file(&dst->half, &src->half, ctx->half_size);
197   return progress;
198}
199
200static bool
201merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
202                     const struct reaching_state *src)
203{
204   return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
205}
206
207static struct file_state *
208ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
209{
210   if (reg->flags & IR3_REG_SHARED)
211      return &ctx->reaching.shared;
212   else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
213      return &ctx->reaching.full;
214   else
215      return &ctx->reaching.half;
216}
217
218static void
219propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
220{
221   ra_foreach_dst (dst, instr) {
222      struct file_state *file = ra_val_get_file(ctx, dst);
223      physreg_t physreg = ra_reg_get_physreg(dst);
224      for (unsigned i = 0; i < reg_size(dst); i++) {
225         file->regs[physreg + i] = (struct reg_state){
226            .def = dst,
227            .offset = i,
228         };
229      }
230   }
231}
232
233static void
234propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
235{
236   struct ir3_register *dst = split->dsts[0];
237   struct ir3_register *src = split->srcs[0];
238   physreg_t dst_physreg = ra_reg_get_physreg(dst);
239   physreg_t src_physreg = ra_reg_get_physreg(src);
240   struct file_state *file = ra_val_get_file(ctx, dst);
241
242   unsigned offset = split->split.off * reg_elem_size(src);
243   for (unsigned i = 0; i < reg_elem_size(src); i++) {
244      file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
245   }
246}
247
248static void
249propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
250{
251   struct ir3_register *dst = collect->dsts[0];
252   physreg_t dst_physreg = ra_reg_get_physreg(dst);
253   struct file_state *file = ra_val_get_file(ctx, dst);
254
255   unsigned size = reg_size(dst);
256   struct reg_state srcs[size];
257
258   for (unsigned i = 0; i < collect->srcs_count; i++) {
259      struct ir3_register *src = collect->srcs[i];
260      unsigned dst_offset = i * reg_elem_size(dst);
261      for (unsigned j = 0; j < reg_elem_size(dst); j++) {
262         if (!ra_reg_is_src(src)) {
263            srcs[dst_offset + j] = (struct reg_state){
264               .def = dst,
265               .offset = dst_offset + j,
266            };
267         } else {
268            physreg_t src_physreg = ra_reg_get_physreg(src);
269            srcs[dst_offset + j] = file->regs[src_physreg + j];
270         }
271      }
272   }
273
274   for (unsigned i = 0; i < size; i++)
275      file->regs[dst_physreg + i] = srcs[i];
276}
277
278static void
279propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
280{
281   unsigned size = 0;
282   for (unsigned i = 0; i < pcopy->dsts_count; i++) {
283      size += reg_size(pcopy->srcs[i]);
284   }
285
286   struct reg_state srcs[size];
287
288   unsigned offset = 0;
289   for (unsigned i = 0; i < pcopy->srcs_count; i++) {
290      struct ir3_register *dst = pcopy->dsts[i];
291      struct ir3_register *src = pcopy->srcs[i];
292      struct file_state *file = ra_val_get_file(ctx, dst);
293
294      for (unsigned j = 0; j < reg_size(dst); j++) {
295         if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
296            srcs[offset + j] = (struct reg_state){
297               .def = dst,
298               .offset = j,
299            };
300         } else {
301            physreg_t src_physreg = ra_reg_get_physreg(src);
302            srcs[offset + j] = file->regs[src_physreg + j];
303         }
304      }
305
306      offset += reg_size(dst);
307   }
308   assert(offset == size);
309
310   offset = 0;
311   for (unsigned i = 0; i < pcopy->dsts_count; i++) {
312      struct ir3_register *dst = pcopy->dsts[i];
313      physreg_t dst_physreg = ra_reg_get_physreg(dst);
314      struct file_state *file = ra_val_get_file(ctx, dst);
315
316      for (unsigned j = 0; j < reg_size(dst); j++)
317         file->regs[dst_physreg + j] = srcs[offset + j];
318
319      offset += reg_size(dst);
320   }
321   assert(offset == size);
322}
323
324static void
325propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
326{
327   if (instr->opc == OPC_META_SPLIT)
328      propagate_split(ctx, instr);
329   else if (instr->opc == OPC_META_COLLECT)
330      propagate_collect(ctx, instr);
331   else if (instr->opc == OPC_META_PARALLEL_COPY)
332      propagate_parallelcopy(ctx, instr);
333   else
334      propagate_normal_instr(ctx, instr);
335}
336
337static bool
338propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
339{
340   ctx->reaching = ctx->block_reaching[block->index];
341
342   foreach_instr (instr, &block->instr_list) {
343      propagate_instr(ctx, instr);
344   }
345
346   bool progress = false;
347   for (unsigned i = 0; i < 2; i++) {
348      struct ir3_block *succ = block->successors[i];
349      if (!succ)
350         continue;
351      progress |=
352         merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
353   }
354   for (unsigned i = 0; i < 2; i++) {
355      struct ir3_block *succ = block->physical_successors[i];
356      if (!succ)
357         continue;
358      progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
359                                       &ctx->reaching);
360   }
361   return progress;
362}
363
364static void
365chase_definition(struct reg_state *state)
366{
367   while (true) {
368      struct ir3_instruction *instr = state->def->instr;
369      switch (instr->opc) {
370      case OPC_META_SPLIT: {
371         struct ir3_register *new_def = instr->srcs[0]->def;
372         unsigned offset = instr->split.off * reg_elem_size(new_def);
373         *state = (struct reg_state){
374            .def = new_def,
375            .offset = state->offset + offset,
376         };
377         break;
378      }
379      case OPC_META_COLLECT: {
380         unsigned src_idx = state->offset / reg_elem_size(state->def);
381         unsigned src_offset = state->offset % reg_elem_size(state->def);
382         struct ir3_register *new_def = instr->srcs[src_idx]->def;
383         if (new_def) {
384            *state = (struct reg_state){
385               .def = new_def,
386               .offset = src_offset,
387            };
388         } else {
389            /* Bail on immed/const */
390            return;
391         }
392         break;
393      }
394      case OPC_META_PARALLEL_COPY: {
395         unsigned dst_idx = ~0;
396         for (unsigned i = 0; i < instr->dsts_count; i++) {
397            if (instr->dsts[i] == state->def) {
398               dst_idx = i;
399               break;
400            }
401         }
402         assert(dst_idx != ~0);
403
404         struct ir3_register *new_def = instr->srcs[dst_idx]->def;
405         if (new_def) {
406            state->def = new_def;
407         } else {
408            /* Bail on immed/const */
409            return;
410         }
411         break;
412      }
413      default:
414         return;
415      }
416   }
417}
418
419static void
420dump_reg_state(struct reg_state *state)
421{
422   if (state->def == UNDEF) {
423      fprintf(stderr, "no reaching definition");
424   } else if (state->def == OVERDEF) {
425      fprintf(stderr,
426              "more than one reaching definition or partial definition");
427   } else {
428      /* The analysis should always remove UNKNOWN eventually. */
429      assert(state->def != UNKNOWN);
430
431      fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,
432              state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
433              state->def->num / 4, "xyzw"[state->def->num % 4],
434              state -> offset);
435   }
436}
437
438static void
439check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
440                   struct ir3_register *src)
441{
442   struct file_state *file = ra_val_get_file(ctx, src);
443   physreg_t physreg = ra_reg_get_physreg(src);
444   for (unsigned i = 0; i < reg_size(src); i++) {
445      struct reg_state expected = (struct reg_state){
446         .def = src->def,
447         .offset = i,
448      };
449      chase_definition(&expected);
450
451      struct reg_state actual = file->regs[physreg + i];
452
453      if (expected.def != actual.def || expected.offset != actual.offset) {
454         fprintf(
455            stderr,
456            "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
457            src->def->instr->serialno, src->def->name, i);
458         fprintf(stderr, "expected: ");
459         dump_reg_state(&expected);
460         fprintf(stderr, "\n");
461         fprintf(stderr, "actual: ");
462         dump_reg_state(&actual);
463         fprintf(stderr, "\n");
464         fprintf(stderr, "-> for instruction: ");
465         ir3_print_instr(instr);
466         ctx->failed = true;
467      }
468   }
469}
470
471static void
472check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
473{
474   if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
475       instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
476      return;
477   }
478
479   ra_foreach_src (src, instr) {
480      check_reaching_src(ctx, instr, src);
481   }
482}
483
484static void
485check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
486{
487   ctx->reaching = ctx->block_reaching[block->index];
488
489   foreach_instr (instr, &block->instr_list) {
490      check_reaching_instr(ctx, instr);
491      propagate_instr(ctx, instr);
492   }
493
494   for (unsigned i = 0; i < 2; i++) {
495      struct ir3_block *succ = block->successors[i];
496      if (!succ)
497         continue;
498
499      unsigned pred_idx = ir3_block_get_pred_index(succ, block);
500      foreach_instr (instr, &succ->instr_list) {
501         if (instr->opc != OPC_META_PHI)
502            break;
503         if (instr->srcs[pred_idx]->def)
504            check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
505      }
506   }
507}
508
509static void
510check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
511{
512   ctx->block_reaching =
513      rzalloc_array(ctx, struct reaching_state, ctx->block_count);
514
515   struct reaching_state *start = &ctx->block_reaching[0];
516   for (unsigned i = 0; i < ctx->full_size; i++)
517      start->full.regs[i].def = UNDEF;
518   for (unsigned i = 0; i < ctx->half_size; i++)
519      start->half.regs[i].def = UNDEF;
520   for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
521      start->shared.regs[i].def = UNDEF;
522
523   bool progress;
524   do {
525      progress = false;
526      foreach_block (block, &ir->block_list) {
527         progress |= propagate_block(ctx, block);
528      }
529   } while (progress);
530
531   foreach_block (block, &ir->block_list) {
532      check_reaching_block(ctx, block);
533   }
534
535   if (ctx->failed) {
536      fprintf(stderr, "failing shader:\n");
537      ir3_print(ir);
538      abort();
539   }
540}
541
542void
543ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
544                unsigned half_size, unsigned block_count)
545{
546#ifdef NDEBUG
547#define VALIDATE 0
548#else
549#define VALIDATE 1
550#endif
551
552   if (!VALIDATE)
553      return;
554
555   struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
556   ctx->merged_regs = v->mergedregs;
557   ctx->full_size = full_size;
558   ctx->half_size = half_size;
559   ctx->block_count = block_count;
560
561   foreach_block (block, &v->ir->block_list) {
562      foreach_instr (instr, &block->instr_list) {
563         validate_simple(ctx, instr);
564      }
565   }
566
567   check_reaching_defs(ctx, v->ir);
568
569   ralloc_free(ctx);
570}
571