1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2021 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "util/ralloc.h"
25bf215546Sopenharmony_ci#include "ir3_ra.h"
26bf215546Sopenharmony_ci#include "ir3_shader.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/* This file implements a validation pass for register allocation. We check
29bf215546Sopenharmony_ci * that the assignment of SSA values to registers is "valid", in the sense
30bf215546Sopenharmony_ci * that each original definition reaches all of its uses without being
31bf215546Sopenharmony_ci * clobbered by something else.
32bf215546Sopenharmony_ci *
33bf215546Sopenharmony_ci * The validation is a forward dataflow analysis. The state at each point
34bf215546Sopenharmony_ci * consists of, for each physical register, the SSA value occupying it, or a
35bf215546Sopenharmony_ci * few special values:
36bf215546Sopenharmony_ci *
37bf215546Sopenharmony_ci * - "unknown" is set initially, before the dataflow analysis assigns it a
38bf215546Sopenharmony_ci *   value. This is the lattice bottom.
39bf215546Sopenharmony_ci * - Values at the start get "undef", which acts like a special SSA value that
40bf215546Sopenharmony_ci *   indicates it is never written.
41bf215546Sopenharmony_ci * - "overdefined" registers are set to more than one value, depending on
42bf215546Sopenharmony_ci *   which path you take to get to the spot. This is the lattice top.
43bf215546Sopenharmony_ci *
44bf215546Sopenharmony_ci * Overdefined is necessary to distinguish because in some programs, like this
45bf215546Sopenharmony_ci * simple example, it's perfectly normal and allowed:
46bf215546Sopenharmony_ci *
47bf215546Sopenharmony_ci * if (...) {
48bf215546Sopenharmony_ci *    mov.u32u32 ssa_1(r1.x), ...
49bf215546Sopenharmony_ci *    ...
50bf215546Sopenharmony_ci * } else {
51bf215546Sopenharmony_ci *    mov.u32u32 ssa_2(r1.x), ...
52bf215546Sopenharmony_ci *    ...
53bf215546Sopenharmony_ci * }
54bf215546Sopenharmony_ci * // r1.x is overdefined here!
55bf215546Sopenharmony_ci *
56bf215546Sopenharmony_ci * However, if an ssa value after the if is accidentally assigned to r1.x, we
57bf215546Sopenharmony_ci * need to remember that it's invalid to catch the mistake. Overdef has to be
58bf215546Sopenharmony_ci * distinguished from undef so that the state forms a valid lattice to
59bf215546Sopenharmony_ci * guarantee that the analysis always terminates. We could avoid relying on
60bf215546Sopenharmony_ci * overdef by using liveness analysis, but not relying on liveness has the
61bf215546Sopenharmony_ci * benefit that we can catch bugs in liveness analysis too.
62bf215546Sopenharmony_ci *
63bf215546Sopenharmony_ci * One tricky thing we have to handle is the coalescing of splits/collects,
64bf215546Sopenharmony_ci * which means that multiple SSA values can occupy a register at the same
65bf215546Sopenharmony_ci * time. While we could use the same merge set indices that RA uses, again
66bf215546Sopenharmony_ci * that would rely on the merge set calculation being correct which we don't
67bf215546Sopenharmony_ci * want to. Instead we treat splits/collects as transfer instructions, similar
68bf215546Sopenharmony_ci * to the parallelcopy instructions inserted by RA, and have them copy their
69bf215546Sopenharmony_ci * sources to their destinations. This means that each physreg must carry the
70bf215546Sopenharmony_ci * SSA def assigned to it plus an offset into that definition, and when
71bf215546Sopenharmony_ci * validating sources we must look through splits/collects to find the
72bf215546Sopenharmony_ci * "original" source for each subregister.
73bf215546Sopenharmony_ci */
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci#define UNKNOWN ((struct ir3_register *)NULL)
76bf215546Sopenharmony_ci#define UNDEF   ((struct ir3_register *)(uintptr_t)1)
77bf215546Sopenharmony_ci#define OVERDEF ((struct ir3_register *)(uintptr_t)2)
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_cistruct reg_state {
80bf215546Sopenharmony_ci   struct ir3_register *def;
81bf215546Sopenharmony_ci   unsigned offset;
82bf215546Sopenharmony_ci};
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_cistruct file_state {
85bf215546Sopenharmony_ci   struct reg_state regs[RA_MAX_FILE_SIZE];
86bf215546Sopenharmony_ci};
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_cistruct reaching_state {
89bf215546Sopenharmony_ci   struct file_state half, full, shared;
90bf215546Sopenharmony_ci};
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_cistruct ra_val_ctx {
93bf215546Sopenharmony_ci   struct ir3_instruction *current_instr;
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   struct reaching_state reaching;
96bf215546Sopenharmony_ci   struct reaching_state *block_reaching;
97bf215546Sopenharmony_ci   unsigned block_count;
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci   unsigned full_size, half_size;
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci   bool merged_regs;
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   bool failed;
104bf215546Sopenharmony_ci};
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_cistatic void
107bf215546Sopenharmony_civalidate_error(struct ra_val_ctx *ctx, const char *condstr)
108bf215546Sopenharmony_ci{
109bf215546Sopenharmony_ci   fprintf(stderr, "ra validation fail: %s\n", condstr);
110bf215546Sopenharmony_ci   fprintf(stderr, "  -> for instruction: ");
111bf215546Sopenharmony_ci   ir3_print_instr(ctx->current_instr);
112bf215546Sopenharmony_ci   abort();
113bf215546Sopenharmony_ci}
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci#define validate_assert(ctx, cond)                                             \
116bf215546Sopenharmony_ci   do {                                                                        \
117bf215546Sopenharmony_ci      if (!(cond)) {                                                           \
118bf215546Sopenharmony_ci         validate_error(ctx, #cond);                                           \
119bf215546Sopenharmony_ci      }                                                                        \
120bf215546Sopenharmony_ci   } while (0)
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_cistatic unsigned
123bf215546Sopenharmony_ciget_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
124bf215546Sopenharmony_ci{
125bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_SHARED)
126bf215546Sopenharmony_ci      return RA_SHARED_SIZE;
127bf215546Sopenharmony_ci   else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
128bf215546Sopenharmony_ci      return ctx->full_size;
129bf215546Sopenharmony_ci   else
130bf215546Sopenharmony_ci      return ctx->half_size;
131bf215546Sopenharmony_ci}
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci/* Validate simple things, like the registers being in-bounds. This way we
134bf215546Sopenharmony_ci * don't have to worry about out-of-bounds accesses later.
135bf215546Sopenharmony_ci */
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_cistatic void
138bf215546Sopenharmony_civalidate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
139bf215546Sopenharmony_ci{
140bf215546Sopenharmony_ci   ctx->current_instr = instr;
141bf215546Sopenharmony_ci   ra_foreach_dst (dst, instr) {
142bf215546Sopenharmony_ci      unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
143bf215546Sopenharmony_ci      validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
144bf215546Sopenharmony_ci      if (dst->tied)
145bf215546Sopenharmony_ci         validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
146bf215546Sopenharmony_ci   }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   ra_foreach_src (src, instr) {
149bf215546Sopenharmony_ci      unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
150bf215546Sopenharmony_ci      validate_assert(ctx, src_max <= get_file_size(ctx, src));
151bf215546Sopenharmony_ci   }
152bf215546Sopenharmony_ci}
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci/* This is the lattice operator. */
155bf215546Sopenharmony_cistatic bool
156bf215546Sopenharmony_cimerge_reg(struct reg_state *dst, const struct reg_state *src)
157bf215546Sopenharmony_ci{
158bf215546Sopenharmony_ci   if (dst->def == UNKNOWN) {
159bf215546Sopenharmony_ci      *dst = *src;
160bf215546Sopenharmony_ci      return src->def != UNKNOWN;
161bf215546Sopenharmony_ci   } else if (dst->def == OVERDEF) {
162bf215546Sopenharmony_ci      return false;
163bf215546Sopenharmony_ci   } else {
164bf215546Sopenharmony_ci      if (src->def == UNKNOWN)
165bf215546Sopenharmony_ci         return false;
166bf215546Sopenharmony_ci      else if (src->def == OVERDEF) {
167bf215546Sopenharmony_ci         *dst = *src;
168bf215546Sopenharmony_ci         return true;
169bf215546Sopenharmony_ci      } else {
170bf215546Sopenharmony_ci         if (dst->def != src->def || dst->offset != src->offset) {
171bf215546Sopenharmony_ci            dst->def = OVERDEF;
172bf215546Sopenharmony_ci            dst->offset = 0;
173bf215546Sopenharmony_ci            return true;
174bf215546Sopenharmony_ci         } else {
175bf215546Sopenharmony_ci            return false;
176bf215546Sopenharmony_ci         }
177bf215546Sopenharmony_ci      }
178bf215546Sopenharmony_ci   }
179bf215546Sopenharmony_ci}
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_cistatic bool
182bf215546Sopenharmony_cimerge_file(struct file_state *dst, const struct file_state *src, unsigned size)
183bf215546Sopenharmony_ci{
184bf215546Sopenharmony_ci   bool progress = false;
185bf215546Sopenharmony_ci   for (unsigned i = 0; i < size; i++)
186bf215546Sopenharmony_ci      progress |= merge_reg(&dst->regs[i], &src->regs[i]);
187bf215546Sopenharmony_ci   return progress;
188bf215546Sopenharmony_ci}
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_cistatic bool
191bf215546Sopenharmony_cimerge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
192bf215546Sopenharmony_ci            const struct reaching_state *src)
193bf215546Sopenharmony_ci{
194bf215546Sopenharmony_ci   bool progress = false;
195bf215546Sopenharmony_ci   progress |= merge_file(&dst->full, &src->full, ctx->full_size);
196bf215546Sopenharmony_ci   progress |= merge_file(&dst->half, &src->half, ctx->half_size);
197bf215546Sopenharmony_ci   return progress;
198bf215546Sopenharmony_ci}
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_cistatic bool
201bf215546Sopenharmony_cimerge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
202bf215546Sopenharmony_ci                     const struct reaching_state *src)
203bf215546Sopenharmony_ci{
204bf215546Sopenharmony_ci   return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
205bf215546Sopenharmony_ci}
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_cistatic struct file_state *
208bf215546Sopenharmony_cira_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
209bf215546Sopenharmony_ci{
210bf215546Sopenharmony_ci   if (reg->flags & IR3_REG_SHARED)
211bf215546Sopenharmony_ci      return &ctx->reaching.shared;
212bf215546Sopenharmony_ci   else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
213bf215546Sopenharmony_ci      return &ctx->reaching.full;
214bf215546Sopenharmony_ci   else
215bf215546Sopenharmony_ci      return &ctx->reaching.half;
216bf215546Sopenharmony_ci}
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_cistatic void
219bf215546Sopenharmony_cipropagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
220bf215546Sopenharmony_ci{
221bf215546Sopenharmony_ci   ra_foreach_dst (dst, instr) {
222bf215546Sopenharmony_ci      struct file_state *file = ra_val_get_file(ctx, dst);
223bf215546Sopenharmony_ci      physreg_t physreg = ra_reg_get_physreg(dst);
224bf215546Sopenharmony_ci      for (unsigned i = 0; i < reg_size(dst); i++) {
225bf215546Sopenharmony_ci         file->regs[physreg + i] = (struct reg_state){
226bf215546Sopenharmony_ci            .def = dst,
227bf215546Sopenharmony_ci            .offset = i,
228bf215546Sopenharmony_ci         };
229bf215546Sopenharmony_ci      }
230bf215546Sopenharmony_ci   }
231bf215546Sopenharmony_ci}
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_cistatic void
234bf215546Sopenharmony_cipropagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
235bf215546Sopenharmony_ci{
236bf215546Sopenharmony_ci   struct ir3_register *dst = split->dsts[0];
237bf215546Sopenharmony_ci   struct ir3_register *src = split->srcs[0];
238bf215546Sopenharmony_ci   physreg_t dst_physreg = ra_reg_get_physreg(dst);
239bf215546Sopenharmony_ci   physreg_t src_physreg = ra_reg_get_physreg(src);
240bf215546Sopenharmony_ci   struct file_state *file = ra_val_get_file(ctx, dst);
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_ci   unsigned offset = split->split.off * reg_elem_size(src);
243bf215546Sopenharmony_ci   for (unsigned i = 0; i < reg_elem_size(src); i++) {
244bf215546Sopenharmony_ci      file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
245bf215546Sopenharmony_ci   }
246bf215546Sopenharmony_ci}
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_cistatic void
249bf215546Sopenharmony_cipropagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   struct ir3_register *dst = collect->dsts[0];
252bf215546Sopenharmony_ci   physreg_t dst_physreg = ra_reg_get_physreg(dst);
253bf215546Sopenharmony_ci   struct file_state *file = ra_val_get_file(ctx, dst);
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci   unsigned size = reg_size(dst);
256bf215546Sopenharmony_ci   struct reg_state srcs[size];
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   for (unsigned i = 0; i < collect->srcs_count; i++) {
259bf215546Sopenharmony_ci      struct ir3_register *src = collect->srcs[i];
260bf215546Sopenharmony_ci      unsigned dst_offset = i * reg_elem_size(dst);
261bf215546Sopenharmony_ci      for (unsigned j = 0; j < reg_elem_size(dst); j++) {
262bf215546Sopenharmony_ci         if (!ra_reg_is_src(src)) {
263bf215546Sopenharmony_ci            srcs[dst_offset + j] = (struct reg_state){
264bf215546Sopenharmony_ci               .def = dst,
265bf215546Sopenharmony_ci               .offset = dst_offset + j,
266bf215546Sopenharmony_ci            };
267bf215546Sopenharmony_ci         } else {
268bf215546Sopenharmony_ci            physreg_t src_physreg = ra_reg_get_physreg(src);
269bf215546Sopenharmony_ci            srcs[dst_offset + j] = file->regs[src_physreg + j];
270bf215546Sopenharmony_ci         }
271bf215546Sopenharmony_ci      }
272bf215546Sopenharmony_ci   }
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci   for (unsigned i = 0; i < size; i++)
275bf215546Sopenharmony_ci      file->regs[dst_physreg + i] = srcs[i];
276bf215546Sopenharmony_ci}
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_cistatic void
279bf215546Sopenharmony_cipropagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
280bf215546Sopenharmony_ci{
281bf215546Sopenharmony_ci   unsigned size = 0;
282bf215546Sopenharmony_ci   for (unsigned i = 0; i < pcopy->dsts_count; i++) {
283bf215546Sopenharmony_ci      size += reg_size(pcopy->srcs[i]);
284bf215546Sopenharmony_ci   }
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci   struct reg_state srcs[size];
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci   unsigned offset = 0;
289bf215546Sopenharmony_ci   for (unsigned i = 0; i < pcopy->srcs_count; i++) {
290bf215546Sopenharmony_ci      struct ir3_register *dst = pcopy->dsts[i];
291bf215546Sopenharmony_ci      struct ir3_register *src = pcopy->srcs[i];
292bf215546Sopenharmony_ci      struct file_state *file = ra_val_get_file(ctx, dst);
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci      for (unsigned j = 0; j < reg_size(dst); j++) {
295bf215546Sopenharmony_ci         if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
296bf215546Sopenharmony_ci            srcs[offset + j] = (struct reg_state){
297bf215546Sopenharmony_ci               .def = dst,
298bf215546Sopenharmony_ci               .offset = j,
299bf215546Sopenharmony_ci            };
300bf215546Sopenharmony_ci         } else {
301bf215546Sopenharmony_ci            physreg_t src_physreg = ra_reg_get_physreg(src);
302bf215546Sopenharmony_ci            srcs[offset + j] = file->regs[src_physreg + j];
303bf215546Sopenharmony_ci         }
304bf215546Sopenharmony_ci      }
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci      offset += reg_size(dst);
307bf215546Sopenharmony_ci   }
308bf215546Sopenharmony_ci   assert(offset == size);
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_ci   offset = 0;
311bf215546Sopenharmony_ci   for (unsigned i = 0; i < pcopy->dsts_count; i++) {
312bf215546Sopenharmony_ci      struct ir3_register *dst = pcopy->dsts[i];
313bf215546Sopenharmony_ci      physreg_t dst_physreg = ra_reg_get_physreg(dst);
314bf215546Sopenharmony_ci      struct file_state *file = ra_val_get_file(ctx, dst);
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci      for (unsigned j = 0; j < reg_size(dst); j++)
317bf215546Sopenharmony_ci         file->regs[dst_physreg + j] = srcs[offset + j];
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci      offset += reg_size(dst);
320bf215546Sopenharmony_ci   }
321bf215546Sopenharmony_ci   assert(offset == size);
322bf215546Sopenharmony_ci}
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_cistatic void
325bf215546Sopenharmony_cipropagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
326bf215546Sopenharmony_ci{
327bf215546Sopenharmony_ci   if (instr->opc == OPC_META_SPLIT)
328bf215546Sopenharmony_ci      propagate_split(ctx, instr);
329bf215546Sopenharmony_ci   else if (instr->opc == OPC_META_COLLECT)
330bf215546Sopenharmony_ci      propagate_collect(ctx, instr);
331bf215546Sopenharmony_ci   else if (instr->opc == OPC_META_PARALLEL_COPY)
332bf215546Sopenharmony_ci      propagate_parallelcopy(ctx, instr);
333bf215546Sopenharmony_ci   else
334bf215546Sopenharmony_ci      propagate_normal_instr(ctx, instr);
335bf215546Sopenharmony_ci}
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_cistatic bool
338bf215546Sopenharmony_cipropagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
339bf215546Sopenharmony_ci{
340bf215546Sopenharmony_ci   ctx->reaching = ctx->block_reaching[block->index];
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci   foreach_instr (instr, &block->instr_list) {
343bf215546Sopenharmony_ci      propagate_instr(ctx, instr);
344bf215546Sopenharmony_ci   }
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   bool progress = false;
347bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; i++) {
348bf215546Sopenharmony_ci      struct ir3_block *succ = block->successors[i];
349bf215546Sopenharmony_ci      if (!succ)
350bf215546Sopenharmony_ci         continue;
351bf215546Sopenharmony_ci      progress |=
352bf215546Sopenharmony_ci         merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
353bf215546Sopenharmony_ci   }
354bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; i++) {
355bf215546Sopenharmony_ci      struct ir3_block *succ = block->physical_successors[i];
356bf215546Sopenharmony_ci      if (!succ)
357bf215546Sopenharmony_ci         continue;
358bf215546Sopenharmony_ci      progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
359bf215546Sopenharmony_ci                                       &ctx->reaching);
360bf215546Sopenharmony_ci   }
361bf215546Sopenharmony_ci   return progress;
362bf215546Sopenharmony_ci}
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_cistatic void
365bf215546Sopenharmony_cichase_definition(struct reg_state *state)
366bf215546Sopenharmony_ci{
367bf215546Sopenharmony_ci   while (true) {
368bf215546Sopenharmony_ci      struct ir3_instruction *instr = state->def->instr;
369bf215546Sopenharmony_ci      switch (instr->opc) {
370bf215546Sopenharmony_ci      case OPC_META_SPLIT: {
371bf215546Sopenharmony_ci         struct ir3_register *new_def = instr->srcs[0]->def;
372bf215546Sopenharmony_ci         unsigned offset = instr->split.off * reg_elem_size(new_def);
373bf215546Sopenharmony_ci         *state = (struct reg_state){
374bf215546Sopenharmony_ci            .def = new_def,
375bf215546Sopenharmony_ci            .offset = state->offset + offset,
376bf215546Sopenharmony_ci         };
377bf215546Sopenharmony_ci         break;
378bf215546Sopenharmony_ci      }
379bf215546Sopenharmony_ci      case OPC_META_COLLECT: {
380bf215546Sopenharmony_ci         unsigned src_idx = state->offset / reg_elem_size(state->def);
381bf215546Sopenharmony_ci         unsigned src_offset = state->offset % reg_elem_size(state->def);
382bf215546Sopenharmony_ci         struct ir3_register *new_def = instr->srcs[src_idx]->def;
383bf215546Sopenharmony_ci         if (new_def) {
384bf215546Sopenharmony_ci            *state = (struct reg_state){
385bf215546Sopenharmony_ci               .def = new_def,
386bf215546Sopenharmony_ci               .offset = src_offset,
387bf215546Sopenharmony_ci            };
388bf215546Sopenharmony_ci         } else {
389bf215546Sopenharmony_ci            /* Bail on immed/const */
390bf215546Sopenharmony_ci            return;
391bf215546Sopenharmony_ci         }
392bf215546Sopenharmony_ci         break;
393bf215546Sopenharmony_ci      }
394bf215546Sopenharmony_ci      case OPC_META_PARALLEL_COPY: {
395bf215546Sopenharmony_ci         unsigned dst_idx = ~0;
396bf215546Sopenharmony_ci         for (unsigned i = 0; i < instr->dsts_count; i++) {
397bf215546Sopenharmony_ci            if (instr->dsts[i] == state->def) {
398bf215546Sopenharmony_ci               dst_idx = i;
399bf215546Sopenharmony_ci               break;
400bf215546Sopenharmony_ci            }
401bf215546Sopenharmony_ci         }
402bf215546Sopenharmony_ci         assert(dst_idx != ~0);
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci         struct ir3_register *new_def = instr->srcs[dst_idx]->def;
405bf215546Sopenharmony_ci         if (new_def) {
406bf215546Sopenharmony_ci            state->def = new_def;
407bf215546Sopenharmony_ci         } else {
408bf215546Sopenharmony_ci            /* Bail on immed/const */
409bf215546Sopenharmony_ci            return;
410bf215546Sopenharmony_ci         }
411bf215546Sopenharmony_ci         break;
412bf215546Sopenharmony_ci      }
413bf215546Sopenharmony_ci      default:
414bf215546Sopenharmony_ci         return;
415bf215546Sopenharmony_ci      }
416bf215546Sopenharmony_ci   }
417bf215546Sopenharmony_ci}
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_cistatic void
420bf215546Sopenharmony_cidump_reg_state(struct reg_state *state)
421bf215546Sopenharmony_ci{
422bf215546Sopenharmony_ci   if (state->def == UNDEF) {
423bf215546Sopenharmony_ci      fprintf(stderr, "no reaching definition");
424bf215546Sopenharmony_ci   } else if (state->def == OVERDEF) {
425bf215546Sopenharmony_ci      fprintf(stderr,
426bf215546Sopenharmony_ci              "more than one reaching definition or partial definition");
427bf215546Sopenharmony_ci   } else {
428bf215546Sopenharmony_ci      /* The analysis should always remove UNKNOWN eventually. */
429bf215546Sopenharmony_ci      assert(state->def != UNKNOWN);
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci      fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,
432bf215546Sopenharmony_ci              state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
433bf215546Sopenharmony_ci              state->def->num / 4, "xyzw"[state->def->num % 4],
434bf215546Sopenharmony_ci              state -> offset);
435bf215546Sopenharmony_ci   }
436bf215546Sopenharmony_ci}
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_cistatic void
439bf215546Sopenharmony_cicheck_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
440bf215546Sopenharmony_ci                   struct ir3_register *src)
441bf215546Sopenharmony_ci{
442bf215546Sopenharmony_ci   struct file_state *file = ra_val_get_file(ctx, src);
443bf215546Sopenharmony_ci   physreg_t physreg = ra_reg_get_physreg(src);
444bf215546Sopenharmony_ci   for (unsigned i = 0; i < reg_size(src); i++) {
445bf215546Sopenharmony_ci      struct reg_state expected = (struct reg_state){
446bf215546Sopenharmony_ci         .def = src->def,
447bf215546Sopenharmony_ci         .offset = i,
448bf215546Sopenharmony_ci      };
449bf215546Sopenharmony_ci      chase_definition(&expected);
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci      struct reg_state actual = file->regs[physreg + i];
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci      if (expected.def != actual.def || expected.offset != actual.offset) {
454bf215546Sopenharmony_ci         fprintf(
455bf215546Sopenharmony_ci            stderr,
456bf215546Sopenharmony_ci            "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
457bf215546Sopenharmony_ci            src->def->instr->serialno, src->def->name, i);
458bf215546Sopenharmony_ci         fprintf(stderr, "expected: ");
459bf215546Sopenharmony_ci         dump_reg_state(&expected);
460bf215546Sopenharmony_ci         fprintf(stderr, "\n");
461bf215546Sopenharmony_ci         fprintf(stderr, "actual: ");
462bf215546Sopenharmony_ci         dump_reg_state(&actual);
463bf215546Sopenharmony_ci         fprintf(stderr, "\n");
464bf215546Sopenharmony_ci         fprintf(stderr, "-> for instruction: ");
465bf215546Sopenharmony_ci         ir3_print_instr(instr);
466bf215546Sopenharmony_ci         ctx->failed = true;
467bf215546Sopenharmony_ci      }
468bf215546Sopenharmony_ci   }
469bf215546Sopenharmony_ci}
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_cistatic void
472bf215546Sopenharmony_cicheck_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
473bf215546Sopenharmony_ci{
474bf215546Sopenharmony_ci   if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
475bf215546Sopenharmony_ci       instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
476bf215546Sopenharmony_ci      return;
477bf215546Sopenharmony_ci   }
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_ci   ra_foreach_src (src, instr) {
480bf215546Sopenharmony_ci      check_reaching_src(ctx, instr, src);
481bf215546Sopenharmony_ci   }
482bf215546Sopenharmony_ci}
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_cistatic void
485bf215546Sopenharmony_cicheck_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
486bf215546Sopenharmony_ci{
487bf215546Sopenharmony_ci   ctx->reaching = ctx->block_reaching[block->index];
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   foreach_instr (instr, &block->instr_list) {
490bf215546Sopenharmony_ci      check_reaching_instr(ctx, instr);
491bf215546Sopenharmony_ci      propagate_instr(ctx, instr);
492bf215546Sopenharmony_ci   }
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; i++) {
495bf215546Sopenharmony_ci      struct ir3_block *succ = block->successors[i];
496bf215546Sopenharmony_ci      if (!succ)
497bf215546Sopenharmony_ci         continue;
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci      unsigned pred_idx = ir3_block_get_pred_index(succ, block);
500bf215546Sopenharmony_ci      foreach_instr (instr, &succ->instr_list) {
501bf215546Sopenharmony_ci         if (instr->opc != OPC_META_PHI)
502bf215546Sopenharmony_ci            break;
503bf215546Sopenharmony_ci         if (instr->srcs[pred_idx]->def)
504bf215546Sopenharmony_ci            check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
505bf215546Sopenharmony_ci      }
506bf215546Sopenharmony_ci   }
507bf215546Sopenharmony_ci}
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_cistatic void
510bf215546Sopenharmony_cicheck_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
511bf215546Sopenharmony_ci{
512bf215546Sopenharmony_ci   ctx->block_reaching =
513bf215546Sopenharmony_ci      rzalloc_array(ctx, struct reaching_state, ctx->block_count);
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci   struct reaching_state *start = &ctx->block_reaching[0];
516bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->full_size; i++)
517bf215546Sopenharmony_ci      start->full.regs[i].def = UNDEF;
518bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->half_size; i++)
519bf215546Sopenharmony_ci      start->half.regs[i].def = UNDEF;
520bf215546Sopenharmony_ci   for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
521bf215546Sopenharmony_ci      start->shared.regs[i].def = UNDEF;
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci   bool progress;
524bf215546Sopenharmony_ci   do {
525bf215546Sopenharmony_ci      progress = false;
526bf215546Sopenharmony_ci      foreach_block (block, &ir->block_list) {
527bf215546Sopenharmony_ci         progress |= propagate_block(ctx, block);
528bf215546Sopenharmony_ci      }
529bf215546Sopenharmony_ci   } while (progress);
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci   foreach_block (block, &ir->block_list) {
532bf215546Sopenharmony_ci      check_reaching_block(ctx, block);
533bf215546Sopenharmony_ci   }
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci   if (ctx->failed) {
536bf215546Sopenharmony_ci      fprintf(stderr, "failing shader:\n");
537bf215546Sopenharmony_ci      ir3_print(ir);
538bf215546Sopenharmony_ci      abort();
539bf215546Sopenharmony_ci   }
540bf215546Sopenharmony_ci}
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_civoid
543bf215546Sopenharmony_ciir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
544bf215546Sopenharmony_ci                unsigned half_size, unsigned block_count)
545bf215546Sopenharmony_ci{
546bf215546Sopenharmony_ci#ifdef NDEBUG
547bf215546Sopenharmony_ci#define VALIDATE 0
548bf215546Sopenharmony_ci#else
549bf215546Sopenharmony_ci#define VALIDATE 1
550bf215546Sopenharmony_ci#endif
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci   if (!VALIDATE)
553bf215546Sopenharmony_ci      return;
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci   struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
556bf215546Sopenharmony_ci   ctx->merged_regs = v->mergedregs;
557bf215546Sopenharmony_ci   ctx->full_size = full_size;
558bf215546Sopenharmony_ci   ctx->half_size = half_size;
559bf215546Sopenharmony_ci   ctx->block_count = block_count;
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci   foreach_block (block, &v->ir->block_list) {
562bf215546Sopenharmony_ci      foreach_instr (instr, &block->instr_list) {
563bf215546Sopenharmony_ci         validate_simple(ctx, instr);
564bf215546Sopenharmony_ci      }
565bf215546Sopenharmony_ci   }
566bf215546Sopenharmony_ci
567bf215546Sopenharmony_ci   check_reaching_defs(ctx, v->ir);
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_ci   ralloc_free(ctx);
570bf215546Sopenharmony_ci}
571