1/*
2 * Copyright (C) 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "util/rb_tree.h"
25#include "ir3_ra.h"
26#include "ir3_shader.h"
27
28/*
29 * This pass does two things:
30 *
31 * 1. Calculates the maximum register pressure. To do this, we need to use the
32 *    exact same technique that RA uses for combining meta_split instructions
33 *    with their sources, so that our calculation agrees with RA.
34 * 2. Spills when the register pressure is exceeded a limit calculated by RA.
35 *    The implementation is based on "Register Spilling and Live-Range Splitting
36 *    for SSA-Form Programs" by Braun and Hack, although again care has to be
37 *    taken to handle combining split/collect instructions.
38 */
39
40struct reg_or_immed {
41   unsigned flags;
42   union {
43      struct ir3_register *def;
44      uint32_t uimm;
45      unsigned const_num;
46   };
47};
48
49struct ra_spill_interval {
50   struct ir3_reg_interval interval;
51
52   struct rb_node node;
53   struct rb_node half_node;
54
55   /* The current SSA value/const/immed this source is mapped to. */
56   struct reg_or_immed dst;
57
58   /* When computing use distances we use the distance relative to the start
59    * of the block. So, for example, a value that's defined in cycle 5 of the
60    * block and used 6 cycles later will always have a next_use_distance of 11
61    * until we reach that use.
62    */
63   unsigned next_use_distance;
64
65   /* Whether this value was reloaded and therefore doesn't need to be
66    * spilled again. Corresponds to the S set in the paper.
67    */
68   bool already_spilled;
69
70   /* We need to add sources early for accounting purposes, but we have to
71    * insert the reload code for them last. Keep track of whether this interval
72    * needs to be reloaded later.
73    */
74   bool needs_reload;
75
76   /* Keep track of whether this interval currently can't be spilled because:
77    * - It or one of its children is a source and we're making space for
78    *   sources.
79    * - It is a destination and we're making space for destinations.
80    */
81   bool cant_spill;
82
83   /* Whether this interval can be rematerialized. */
84   bool can_rematerialize;
85};
86
87struct ra_spill_block_state {
88   unsigned *next_use_end;
89   unsigned *next_use_start;
90
91   unsigned cycles;
92
93   /* Map from SSA def to reg_or_immed it is mapped to at the end of the block.
94    * This map only contains values which we didn't spill, so it also serves as
95    * a record of the new live-out set for this block.
96    */
97   struct hash_table *remap;
98
99   /* For blocks whose successors are visited first (i.e. loop backedges), which
100    * values should be live at the end.
101    */
102   BITSET_WORD *live_out;
103
104   bool visited;
105};
106
107struct ra_spill_ctx {
108   struct ir3_reg_ctx reg_ctx;
109
110   struct ra_spill_interval **intervals;
111   unsigned intervals_count;
112
113   /* rb tree of live intervals that we can spill, ordered by next-use distance.
114    * full_live_intervals contains the full+shared intervals in the merged_regs
115    * case. We use this list to determine what to spill.
116    */
117   struct rb_tree full_live_intervals;
118   struct rb_tree half_live_intervals;
119
120   struct ir3_pressure cur_pressure, max_pressure;
121
122   struct ir3_pressure limit_pressure;
123
124   /* When spilling, we need to reserve a register to serve as the zero'd
125    * "base". For simplicity we reserve a register at the beginning so that it's
126    * always available.
127    */
128   struct ir3_register *base_reg;
129
130   /* Current pvtmem offset in bytes. */
131   unsigned spill_slot;
132
133   struct ir3_liveness *live;
134
135   const struct ir3_compiler *compiler;
136
137   struct ra_spill_block_state *blocks;
138
139   bool spilling;
140
141   bool merged_regs;
142};
143
144static void
145add_base_reg(struct ra_spill_ctx *ctx, struct ir3 *ir)
146{
147   struct ir3_block *start = ir3_start_block(ir);
148
149   /* We need to stick it after any meta instructions which need to be first. */
150   struct ir3_instruction *after = NULL;
151   foreach_instr (instr, &start->instr_list) {
152      if (instr->opc != OPC_META_INPUT &&
153          instr->opc != OPC_META_TEX_PREFETCH) {
154         after = instr;
155         break;
156      }
157   }
158
159   struct ir3_instruction *mov = create_immed(start, 0);
160
161   if (after)
162      ir3_instr_move_before(mov, after);
163
164   ctx->base_reg = mov->dsts[0];
165
166   /* We don't create an interval, etc. for the base reg, so just lower the
167    * register pressure limit to account for it. We assume it's always
168    * available for simplicity.
169    */
170   ctx->limit_pressure.full -= reg_size(ctx->base_reg);
171}
172
173
174/* Compute the number of cycles per instruction used for next-use-distance
175 * analysis. This is just approximate, obviously.
176 */
177static unsigned
178instr_cycles(struct ir3_instruction *instr)
179{
180   if (instr->opc == OPC_META_PARALLEL_COPY) {
181      unsigned cycles = 0;
182      for (unsigned i = 0; i < instr->dsts_count; i++) {
183         if (!instr->srcs[i]->def ||
184             instr->srcs[i]->def->merge_set != instr->dsts[i]->merge_set) {
185            cycles += reg_elems(instr->srcs[i]);
186         }
187      }
188
189      return cycles;
190   }
191
192   if (instr->opc == OPC_META_COLLECT) {
193      unsigned cycles = 0;
194      for (unsigned i = 0; i < instr->srcs_count; i++) {
195         if (!instr->srcs[i]->def ||
196             instr->srcs[i]->def->merge_set != instr->dsts[0]->merge_set) {
197            cycles++;
198         }
199      }
200
201      return cycles;
202   }
203
204   if (is_meta(instr))
205      return 0;
206
207   return 1 + instr->repeat;
208}
209
210static bool
211compute_block_next_distance(struct ra_spill_ctx *ctx, struct ir3_block *block,
212                            unsigned *tmp_next_use)
213{
214   struct ra_spill_block_state *state = &ctx->blocks[block->index];
215   memcpy(tmp_next_use, state->next_use_end,
216          ctx->live->definitions_count * sizeof(*tmp_next_use));
217
218   unsigned cycle = state->cycles;
219   foreach_instr_rev (instr, &block->instr_list) {
220      ra_foreach_dst (dst, instr) {
221         dst->next_use = tmp_next_use[dst->name];
222      }
223
224      ra_foreach_src (src, instr) {
225         src->next_use = tmp_next_use[src->def->name];
226      }
227
228      cycle -= instr_cycles(instr);
229
230      if (instr->opc == OPC_META_PARALLEL_COPY) {
231         ra_foreach_src_n (src, i, instr) {
232            if (src->def->merge_set == instr->dsts[i]->merge_set &&
233                src->def->merge_set_offset == instr->dsts[i]->merge_set_offset) {
234               tmp_next_use[src->def->name] =
235                  tmp_next_use[instr->dsts[i]->name];
236            } else {
237               tmp_next_use[src->def->name] = cycle;
238            }
239         }
240      } else if (instr->opc != OPC_META_PHI) {
241         ra_foreach_src (src, instr) {
242            tmp_next_use[src->def->name] = cycle;
243         }
244      }
245
246      ra_foreach_dst (dst, instr) {
247         tmp_next_use[dst->name] = UINT_MAX;
248      }
249   }
250
251   memcpy(state->next_use_start, tmp_next_use,
252          ctx->live->definitions_count * sizeof(*tmp_next_use));
253
254   bool progress = false;
255   for (unsigned i = 0; i < block->predecessors_count; i++) {
256      const struct ir3_block *pred = block->predecessors[i];
257      struct ra_spill_block_state *pred_state = &ctx->blocks[pred->index];
258
259      /* Add a large-enough distance in front of edges exiting the loop so that
260       * variables that are live-through the loop but not used inside it are
261       * prioritized for spilling, as per the paper. This just needs to be
262       * larger than the longest path through the loop.
263       */
264      bool loop_exit = pred->loop_depth < block->loop_depth;
265      unsigned block_distance = pred_state->cycles + (loop_exit ? 100000 : 0);
266
267      for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
268         if (state->next_use_start[j] < UINT_MAX &&
269             state->next_use_start[j] + block_distance <
270             pred_state->next_use_end[j]) {
271            pred_state->next_use_end[j] = state->next_use_start[j] +
272               block_distance;
273            progress = true;
274         }
275      }
276
277      foreach_instr (phi, &block->instr_list) {
278         if (phi->opc != OPC_META_PHI)
279            break;
280         if (!phi->srcs[i]->def)
281            continue;
282         unsigned src = phi->srcs[i]->def->name;
283         if (phi->dsts[0]->next_use < UINT_MAX &&
284             phi->dsts[0]->next_use + block_distance <
285             pred_state->next_use_end[src]) {
286            pred_state->next_use_end[src] = phi->dsts[0]->next_use +
287               block_distance;
288            progress = true;
289         }
290      }
291   }
292
293   return progress;
294}
295
296static void
297compute_next_distance(struct ra_spill_ctx *ctx, struct ir3 *ir)
298{
299   for (unsigned i = 0; i < ctx->live->block_count; i++) {
300      ctx->blocks[i].next_use_start =
301         ralloc_array(ctx, unsigned, ctx->live->definitions_count);
302      ctx->blocks[i].next_use_end =
303         ralloc_array(ctx, unsigned, ctx->live->definitions_count);
304
305      for (unsigned j = 0; j < ctx->live->definitions_count; j++) {
306         ctx->blocks[i].next_use_start[j] = UINT_MAX;
307         ctx->blocks[i].next_use_end[j] = UINT_MAX;
308      }
309   }
310
311   foreach_block (block, &ir->block_list) {
312      struct ra_spill_block_state *state = &ctx->blocks[block->index];
313      state->cycles = 0;
314      foreach_instr (instr, &block->instr_list) {
315         state->cycles += instr_cycles(instr);
316         foreach_dst (dst, instr) {
317            dst->spill_slot = ~0;
318         }
319      }
320   }
321
322   unsigned *tmp_next_use =
323      ralloc_array(ctx, unsigned, ctx->live->definitions_count);
324
325   bool progress = true;
326   while (progress) {
327      progress = false;
328      foreach_block_rev (block, &ir->block_list) {
329         progress |= compute_block_next_distance(ctx, block, tmp_next_use);
330      }
331   }
332}
333
334static bool
335can_rematerialize(struct ir3_register *reg)
336{
337   if (reg->flags & IR3_REG_ARRAY)
338      return false;
339   if (reg->instr->opc != OPC_MOV)
340      return false;
341   if (!(reg->instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST)))
342      return false;
343   if (reg->instr->srcs[0]->flags & IR3_REG_RELATIV)
344      return false;
345   return true;
346}
347
348static struct ir3_register *
349rematerialize(struct ir3_register *reg, struct ir3_instruction *after,
350              struct ir3_block *block)
351{
352   d("rematerializing ssa_%u:%u", reg->instr->serialno, reg->name);
353
354   struct ir3_instruction *remat =
355      ir3_instr_create(block, reg->instr->opc, 1, reg->instr->srcs_count);
356   struct ir3_register *dst = __ssa_dst(remat);
357   dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
358   for (unsigned i = 0; i < reg->instr->srcs_count; i++) {
359      struct ir3_register *src =
360         ir3_src_create(remat, INVALID_REG, reg->instr->srcs[i]->flags);
361      *src = *reg->instr->srcs[i];
362   }
363
364   remat->cat1 = reg->instr->cat1;
365
366   dst->merge_set = reg->merge_set;
367   dst->merge_set_offset = reg->merge_set_offset;
368   dst->interval_start = reg->interval_start;
369   dst->interval_end = reg->interval_end;
370
371   if (after)
372      ir3_instr_move_before(remat, after);
373
374   return dst;
375}
376
377static void
378ra_spill_interval_init(struct ra_spill_interval *interval,
379                       struct ir3_register *reg)
380{
381   ir3_reg_interval_init(&interval->interval, reg);
382   interval->dst.flags = reg->flags;
383   interval->dst.def = reg;
384   interval->already_spilled = false;
385   interval->needs_reload = false;
386   interval->cant_spill = false;
387   interval->can_rematerialize = can_rematerialize(reg);
388}
389
390static struct ra_spill_interval *
391ir3_reg_interval_to_interval(struct ir3_reg_interval *interval)
392{
393   return rb_node_data(struct ra_spill_interval, interval, interval);
394}
395
396static struct ra_spill_interval *
397ra_spill_interval_root(struct ra_spill_interval *interval)
398{
399   struct ir3_reg_interval *ir3_interval = &interval->interval;
400   while (ir3_interval->parent)
401      ir3_interval = ir3_interval->parent;
402   return ir3_reg_interval_to_interval(ir3_interval);
403}
404
405static struct ra_spill_ctx *
406ir3_reg_ctx_to_ctx(struct ir3_reg_ctx *ctx)
407{
408   return rb_node_data(struct ra_spill_ctx, ctx, reg_ctx);
409}
410
411static int
412spill_interval_cmp(const struct ra_spill_interval *a,
413                   const struct ra_spill_interval *b)
414{
415   /* Prioritize intervals that we can rematerialize. */
416   if (a->can_rematerialize && !b->can_rematerialize)
417      return 1;
418   if (!a->can_rematerialize && b->can_rematerialize)
419      return -1;
420
421   return a->next_use_distance - b->next_use_distance;
422}
423
424static int
425ra_spill_interval_cmp(const struct rb_node *_a, const struct rb_node *_b)
426{
427   const struct ra_spill_interval *a =
428      rb_node_data(const struct ra_spill_interval, _a, node);
429   const struct ra_spill_interval *b =
430      rb_node_data(const struct ra_spill_interval, _b, node);
431   return spill_interval_cmp(a, b);
432}
433
434static int
435ra_spill_interval_half_cmp(const struct rb_node *_a, const struct rb_node *_b)
436{
437   const struct ra_spill_interval *a =
438      rb_node_data(const struct ra_spill_interval, _a, half_node);
439   const struct ra_spill_interval *b =
440      rb_node_data(const struct ra_spill_interval, _b, half_node);
441   return spill_interval_cmp(a, b);
442}
443
444static void
445interval_add(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
446{
447   struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
448   struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
449
450   unsigned size = reg_size(interval->interval.reg);
451   if (interval->interval.reg->flags & IR3_REG_SHARED) {
452      ctx->cur_pressure.shared += size;
453   } else {
454      if (interval->interval.reg->flags & IR3_REG_HALF) {
455         ctx->cur_pressure.half += size;
456         if (ctx->spilling) {
457            rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
458                           ra_spill_interval_half_cmp);
459         }
460      }
461      if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
462         ctx->cur_pressure.full += size;
463         if (ctx->spilling) {
464            rb_tree_insert(&ctx->full_live_intervals, &interval->node,
465                           ra_spill_interval_cmp);
466         }
467      }
468   }
469}
470
471static void
472interval_delete(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_interval)
473{
474   struct ra_spill_interval *interval = ir3_reg_interval_to_interval(_interval);
475   struct ra_spill_ctx *ctx = ir3_reg_ctx_to_ctx(_ctx);
476
477   unsigned size = reg_size(interval->interval.reg);
478   if (interval->interval.reg->flags & IR3_REG_SHARED) {
479      ctx->cur_pressure.shared -= size;
480   } else {
481      if (interval->interval.reg->flags & IR3_REG_HALF) {
482         ctx->cur_pressure.half -= size;
483         if (ctx->spilling) {
484            rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
485         }
486      }
487      if (ctx->merged_regs || !(interval->interval.reg->flags & IR3_REG_HALF)) {
488         ctx->cur_pressure.full -= size;
489         if (ctx->spilling) {
490            rb_tree_remove(&ctx->full_live_intervals, &interval->node);
491         }
492      }
493   }
494}
495
496static void
497interval_readd(struct ir3_reg_ctx *_ctx, struct ir3_reg_interval *_parent,
498               struct ir3_reg_interval *_child)
499{
500   interval_add(_ctx, _child);
501}
502
503static void
504spill_ctx_init(struct ra_spill_ctx *ctx, struct ir3_shader_variant *v,
505               struct ir3_liveness *live)
506{
507   ctx->live = live;
508   ctx->intervals = ralloc_array(ctx, struct ra_spill_interval *,
509                                 ctx->live->definitions_count);
510   struct ra_spill_interval *intervals =
511      rzalloc_array(ctx, struct ra_spill_interval,
512                    ctx->live->definitions_count);
513   for (unsigned i = 0; i < ctx->live->definitions_count; i++)
514      ctx->intervals[i] = &intervals[i];
515
516   ctx->intervals_count = ctx->live->definitions_count;
517   ctx->compiler = v->compiler;
518   ctx->merged_regs = v->mergedregs;
519
520   rb_tree_init(&ctx->reg_ctx.intervals);
521   ctx->reg_ctx.interval_add = interval_add;
522   ctx->reg_ctx.interval_delete = interval_delete;
523   ctx->reg_ctx.interval_readd = interval_readd;
524}
525
526static void
527ra_spill_ctx_insert(struct ra_spill_ctx *ctx,
528                    struct ra_spill_interval *interval)
529{
530   ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
531}
532
533static void
534ra_spill_ctx_remove(struct ra_spill_ctx *ctx,
535                    struct ra_spill_interval *interval)
536{
537   ir3_reg_interval_remove(&ctx->reg_ctx, &interval->interval);
538}
539
540static void
541init_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
542{
543   struct ra_spill_interval *interval = ctx->intervals[dst->name];
544   ra_spill_interval_init(interval, dst);
545   if (ctx->spilling) {
546      interval->next_use_distance = dst->next_use;
547
548      /* We only need to keep track of used-ness if this value may be
549       * rematerialized. This also keeps us from nuking things that may be
550       * in the keeps list (e.g. atomics, input splits).
551       */
552      if (interval->can_rematerialize)
553         dst->instr->flags |= IR3_INSTR_UNUSED;
554   }
555}
556
557static void
558insert_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
559{
560   struct ra_spill_interval *interval = ctx->intervals[dst->name];
561   if (interval->interval.inserted)
562      return;
563
564   ra_spill_ctx_insert(ctx, interval);
565   interval->cant_spill = true;
566
567   /* For precolored inputs, make sure we leave enough registers to allow for
568    * holes in the inputs. It can happen that the binning shader has a lower
569    * register pressure than the main shader, but the main shader decided to
570    * add holes between the inputs which means that the binning shader has a
571    * higher register demand.
572    */
573   if (dst->instr->opc == OPC_META_INPUT && dst->num != INVALID_REG) {
574      physreg_t physreg = ra_reg_get_physreg(dst);
575      physreg_t max = physreg + reg_size(dst);
576
577      if (interval->interval.reg->flags & IR3_REG_SHARED)
578         ctx->max_pressure.shared = MAX2(ctx->max_pressure.shared, max);
579      else if (interval->interval.reg->flags & IR3_REG_HALF)
580         ctx->max_pressure.half = MAX2(ctx->max_pressure.half, max);
581      else
582         ctx->max_pressure.full = MAX2(ctx->max_pressure.full, max);
583   }
584}
585
586static void
587insert_src(struct ra_spill_ctx *ctx, struct ir3_register *src)
588{
589   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
590
591   if (!interval->interval.inserted) {
592      ra_spill_ctx_insert(ctx, interval);
593      interval->needs_reload = true;
594      interval->already_spilled = true;
595   }
596
597   ra_spill_interval_root(interval)->cant_spill = true;
598
599}
600
601static void
602remove_src_early(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
603                 struct ir3_register *src)
604{
605   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
606
607   if (!interval->interval.inserted || interval->interval.parent ||
608       !rb_tree_is_empty(&interval->interval.children))
609      return;
610
611   ra_spill_ctx_remove(ctx, interval);
612}
613
614static void
615remove_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
616           struct ir3_register *src)
617{
618   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
619
620   if (!interval->interval.inserted)
621      return;
622
623   ra_spill_ctx_remove(ctx, interval);
624}
625
626static void
627finish_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
628{
629   struct ra_spill_interval *interval = ctx->intervals[dst->name];
630   interval->cant_spill = false;
631}
632
633static void
634remove_dst(struct ra_spill_ctx *ctx, struct ir3_register *dst)
635{
636   struct ra_spill_interval *interval = ctx->intervals[dst->name];
637
638   if (!interval->interval.inserted)
639      return;
640
641   ra_spill_ctx_remove(ctx, interval);
642}
643
644static void
645update_src_next_use(struct ra_spill_ctx *ctx, struct ir3_register *src)
646{
647   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
648
649   assert(interval->interval.inserted);
650
651   interval->next_use_distance = src->next_use;
652
653   /* If this node is inserted in one of the trees, then it needs to be resorted
654    * as its key has changed.
655    */
656   if (!interval->interval.parent && !(src->flags & IR3_REG_SHARED)) {
657      if (src->flags & IR3_REG_HALF) {
658         rb_tree_remove(&ctx->half_live_intervals, &interval->half_node);
659         rb_tree_insert(&ctx->half_live_intervals, &interval->half_node,
660                        ra_spill_interval_half_cmp);
661      }
662      if (ctx->merged_regs || !(src->flags & IR3_REG_HALF)) {
663         rb_tree_remove(&ctx->full_live_intervals, &interval->node);
664         rb_tree_insert(&ctx->full_live_intervals, &interval->node,
665                        ra_spill_interval_cmp);
666      }
667   }
668}
669
670static unsigned
671get_spill_slot(struct ra_spill_ctx *ctx, struct ir3_register *reg)
672{
673   if (reg->merge_set) {
674      if (reg->merge_set->spill_slot == ~0) {
675         reg->merge_set->spill_slot = ALIGN_POT(ctx->spill_slot,
676                                                reg->merge_set->alignment);
677         ctx->spill_slot = reg->merge_set->spill_slot + reg->merge_set->size * 2;
678      }
679      return reg->merge_set->spill_slot + reg->merge_set_offset * 2;
680   } else {
681      if (reg->spill_slot == ~0) {
682         reg->spill_slot = ALIGN_POT(ctx->spill_slot, reg_elem_size(reg));
683         ctx->spill_slot = reg->spill_slot + reg_size(reg) * 2;
684      }
685      return reg->spill_slot;
686   }
687}
688
689static void
690set_src_val(struct ir3_register *src, const struct reg_or_immed *val)
691{
692   if (val->flags & IR3_REG_IMMED) {
693      src->flags = IR3_REG_IMMED | (val->flags & IR3_REG_HALF);
694      src->uim_val = val->uimm;
695      src->def = NULL;
696   } else if (val->flags & IR3_REG_CONST) {
697      src->flags = IR3_REG_CONST | (val->flags & IR3_REG_HALF);
698      src->num = val->const_num;
699      src->def = NULL;
700   } else {
701      src->def = val->def;
702      val->def->instr->flags &= ~IR3_INSTR_UNUSED;
703   }
704}
705
706static struct ir3_register *
707materialize_pcopy_src(const struct reg_or_immed *src,
708                      struct ir3_instruction *instr,
709                      struct ir3_block *block)
710{
711   struct ir3_instruction *mov = ir3_instr_create(block, OPC_MOV, 1, 1);
712   struct ir3_register *dst = __ssa_dst(mov);
713   dst->flags |= src->flags & IR3_REG_HALF;
714   struct ir3_register *mov_src = ir3_src_create(mov, INVALID_REG, src->flags);
715   set_src_val(mov_src, src);
716   mov->cat1.src_type = mov->cat1.dst_type =
717      (src->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
718
719   if (instr)
720      ir3_instr_move_before(mov, instr);
721   return dst;
722}
723
724static void
725spill(struct ra_spill_ctx *ctx, const struct reg_or_immed *val,
726      unsigned spill_slot, struct ir3_instruction *instr, struct ir3_block *block)
727{
728   struct ir3_register *reg;
729
730   /* If spilling an immed/const pcopy src, we need to actually materialize it
731    * first with a mov.
732    */
733   if (val->flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
734      reg = materialize_pcopy_src(val, instr, block);
735   } else {
736      reg = val->def;
737      reg->instr->flags &= ~IR3_INSTR_UNUSED;
738   }
739
740   d("spilling ssa_%u:%u to %u", reg->instr->serialno, reg->name,
741     spill_slot);
742
743   unsigned elems = reg_elems(reg);
744   struct ir3_instruction *spill =
745      ir3_instr_create(block, OPC_SPILL_MACRO, 0, 3);
746   ir3_src_create(spill, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
747   unsigned src_flags = reg->flags & (IR3_REG_HALF | IR3_REG_IMMED |
748                                      IR3_REG_CONST | IR3_REG_SSA |
749                                      IR3_REG_ARRAY);
750   struct ir3_register *src = ir3_src_create(spill, INVALID_REG, src_flags);
751   ir3_src_create(spill, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
752   spill->cat6.dst_offset = spill_slot;
753   spill->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
754
755   src->def = reg;
756   if (reg->flags & IR3_REG_ARRAY) {
757      src->size = reg->size;
758      src->array.id = reg->array.id;
759      src->array.offset = 0;
760   } else {
761      src->wrmask = reg->wrmask;
762   }
763
764   if (instr)
765      ir3_instr_move_before(spill, instr);
766}
767
768static void
769spill_interval(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
770               struct ir3_instruction *instr, struct ir3_block *block)
771{
772   if (interval->can_rematerialize && !interval->interval.reg->merge_set)
773      return;
774
775   spill(ctx, &interval->dst, get_spill_slot(ctx, interval->interval.reg),
776         instr, block);
777}
778
779/* This is similar to "limit" in the paper. */
780static void
781limit(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
782{
783   if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
784      d("cur half pressure %u exceeds %u", ctx->cur_pressure.half,
785        ctx->limit_pressure.half);
786      rb_tree_foreach_safe (struct ra_spill_interval, interval,
787                            &ctx->half_live_intervals, half_node) {
788         d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
789           interval->interval.reg->name);
790         if (!interval->cant_spill) {
791            if (!interval->already_spilled)
792               spill_interval(ctx, interval, instr, instr->block);
793            ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
794            if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
795               break;
796         }
797      }
798
799      assert(ctx->cur_pressure.half <= ctx->limit_pressure.half);
800   }
801
802   if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
803      d("cur full pressure %u exceeds %u", ctx->cur_pressure.full,
804        ctx->limit_pressure.full);
805      rb_tree_foreach_safe (struct ra_spill_interval, interval,
806                            &ctx->full_live_intervals, node) {
807         d("trying ssa_%u:%u", interval->interval.reg->instr->serialno,
808           interval->interval.reg->name);
809         if (!interval->cant_spill) {
810            if (!interval->already_spilled)
811               spill_interval(ctx, interval, instr, instr->block);
812            ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
813            if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
814               break;
815         } else {
816            d("can't spill");
817         }
818      }
819
820      assert(ctx->cur_pressure.full <= ctx->limit_pressure.full);
821   }
822}
823
824/* There's a corner case where we reload a value which has overlapping live
825 * values already reloaded, either because it's the child of some other interval
826 * that was already reloaded or some of its children have already been
827 * reloaded. Because RA only expects overlapping source/dest intervals for meta
828 * instructions (split/collect), and we don't want to add register pressure by
829 * creating an entirely separate value, we need to add splits and collects to
830 * deal with this case. These splits/collects have to also have correct merge
831 * set information, so that it doesn't result in any actual code or register
832 * pressure in practice.
833 */
834
835static void
836add_to_merge_set(struct ir3_merge_set *set, struct ir3_register *def,
837                 unsigned offset)
838{
839   def->merge_set = set;
840   def->merge_set_offset = offset;
841   def->interval_start = set->interval_start + offset;
842   def->interval_end = set->interval_start + offset + reg_size(def);
843}
844
845static struct ir3_register *
846split(struct ir3_register *def, unsigned offset,
847      struct ir3_instruction *after, struct ir3_block *block)
848{
849   if (reg_elems(def) == 1) {
850      assert(offset == 0);
851      return def;
852   }
853
854   assert(!(def->flags & IR3_REG_ARRAY));
855   assert(def->merge_set);
856   struct ir3_instruction *split =
857      ir3_instr_create(block, OPC_META_SPLIT, 1, 1);
858   struct ir3_register *dst = __ssa_dst(split);
859   dst->flags |= def->flags & IR3_REG_HALF;
860   struct ir3_register *src = ir3_src_create(split, INVALID_REG, def->flags);
861   src->wrmask = def->wrmask;
862   src->def = def;
863   add_to_merge_set(def->merge_set, dst,
864                    def->merge_set_offset + offset * reg_elem_size(def));
865   if (after)
866      ir3_instr_move_before(split, after);
867   return dst;
868}
869
870static struct ir3_register *
871extract(struct ir3_register *parent_def, unsigned offset, unsigned elems,
872        struct ir3_instruction *after, struct ir3_block *block)
873{
874   if (offset == 0 && elems == reg_elems(parent_def))
875      return parent_def;
876
877   struct ir3_register *srcs[elems];
878   for (unsigned i = 0; i < elems; i++) {
879      srcs[i] = split(parent_def, offset + i, after, block);
880   }
881
882   struct ir3_instruction *collect =
883      ir3_instr_create(block, OPC_META_COLLECT, 1, elems);
884   struct ir3_register *dst = __ssa_dst(collect);
885   dst->flags |= parent_def->flags & IR3_REG_HALF;
886   dst->wrmask = MASK(elems);
887   add_to_merge_set(parent_def->merge_set, dst, parent_def->merge_set_offset);
888
889   for (unsigned i = 0; i < elems; i++) {
890      ir3_src_create(collect, INVALID_REG, parent_def->flags)->def = srcs[i];
891   }
892
893   if (after)
894      ir3_instr_move_before(collect, after);
895   return dst;
896}
897
898static struct ir3_register *
899reload(struct ra_spill_ctx *ctx, struct ir3_register *reg,
900       struct ir3_instruction *after, struct ir3_block *block)
901{
902   unsigned spill_slot = get_spill_slot(ctx, reg);
903
904   d("reloading ssa_%u:%u from %u", reg->instr->serialno, reg->name,
905     spill_slot);
906
907   unsigned elems = reg_elems(reg);
908   struct ir3_instruction *reload =
909      ir3_instr_create(block, OPC_RELOAD_MACRO, 1, 3);
910   struct ir3_register *dst = __ssa_dst(reload);
911   dst->flags |= reg->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
912   /* The reload may be split into multiple pieces, and if the destination
913    * overlaps with the base register then it could get clobbered before the
914    * last ldp in the sequence. Note that we always reserve space for the base
915    * register throughout the whole program, so effectively extending its live
916    * range past the end of the instruction isn't a problem for our pressure
917    * accounting.
918    */
919   dst->flags |= IR3_REG_EARLY_CLOBBER;
920   ir3_src_create(reload, INVALID_REG, ctx->base_reg->flags)->def = ctx->base_reg;
921   struct ir3_register *offset_reg =
922      ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED);
923   offset_reg->uim_val = spill_slot;
924   ir3_src_create(reload, INVALID_REG, IR3_REG_IMMED)->uim_val = elems;
925   reload->cat6.type = (reg->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
926
927   if (reg->flags & IR3_REG_ARRAY) {
928      dst->array.offset = 0;
929      dst->array.id = reg->array.id;
930      dst->size = reg->size;
931   } else {
932      dst->wrmask = MASK(elems);
933   }
934
935   dst->merge_set = reg->merge_set;
936   dst->merge_set_offset = reg->merge_set_offset;
937   dst->interval_start = reg->interval_start;
938   dst->interval_end = reg->interval_end;
939
940   if (after)
941      ir3_instr_move_before(reload, after);
942
943   return dst;
944}
945
946static void
947rewrite_src_interval(struct ra_spill_ctx *ctx,
948                    struct ra_spill_interval *interval,
949                    struct ir3_register *def,
950                    struct ir3_instruction *instr,
951                    struct ir3_block *block)
952{
953   interval->dst.flags = def->flags;
954   interval->dst.def = def;
955   interval->needs_reload = false;
956
957   rb_tree_foreach (struct ra_spill_interval, child,
958                    &interval->interval.children, interval.node) {
959      struct ir3_register *child_reg = child->interval.reg;
960      struct ir3_register *child_def =
961         extract(def, (child_reg->interval_start -
962                       interval->interval.reg->interval_start) / reg_elem_size(def),
963                 reg_elems(child_reg), instr, block);
964      rewrite_src_interval(ctx, child, child_def, instr, block);
965   }
966}
967
968static void
969reload_def(struct ra_spill_ctx *ctx, struct ir3_register *def,
970           struct ir3_instruction *instr, struct ir3_block *block)
971{
972   unsigned elems = reg_elems(def);
973   struct ra_spill_interval *interval = ctx->intervals[def->name];
974
975   struct ir3_reg_interval *ir3_parent = interval->interval.parent;
976
977   if (ir3_parent) {
978      struct ra_spill_interval *parent =
979         ir3_reg_interval_to_interval(ir3_parent);
980      if (!parent->needs_reload) {
981         interval->dst.flags = def->flags;
982         interval->dst.def = extract(
983            parent->dst.def, (def->interval_start - parent->dst.def->interval_start) /
984            reg_elem_size(def), elems, instr, block);
985         return;
986      }
987   }
988
989   struct ir3_register *dst;
990   if (interval->can_rematerialize)
991      dst = rematerialize(def, instr, block);
992   else
993      dst = reload(ctx, def, instr, block);
994
995   rewrite_src_interval(ctx, interval, dst, instr, block);
996}
997
998static void
999reload_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
1000            struct ir3_register *src)
1001{
1002   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
1003
1004   if (interval->needs_reload) {
1005      reload_def(ctx, src->def, instr, instr->block);
1006   }
1007
1008   ra_spill_interval_root(interval)->cant_spill = false;
1009}
1010
1011static void
1012rewrite_src(struct ra_spill_ctx *ctx, struct ir3_instruction *instr,
1013            struct ir3_register *src)
1014{
1015   struct ra_spill_interval *interval = ctx->intervals[src->def->name];
1016
1017   set_src_val(src, &interval->dst);
1018}
1019
1020static void
1021update_max_pressure(struct ra_spill_ctx *ctx)
1022{
1023   d("pressure:");
1024   d("\tfull: %u", ctx->cur_pressure.full);
1025   d("\thalf: %u", ctx->cur_pressure.half);
1026   d("\tshared: %u", ctx->cur_pressure.shared);
1027
1028   ctx->max_pressure.full =
1029      MAX2(ctx->max_pressure.full, ctx->cur_pressure.full);
1030   ctx->max_pressure.half =
1031      MAX2(ctx->max_pressure.half, ctx->cur_pressure.half);
1032   ctx->max_pressure.shared =
1033      MAX2(ctx->max_pressure.shared, ctx->cur_pressure.shared);
1034}
1035
1036static void
1037handle_instr(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
1038{
1039   ra_foreach_dst (dst, instr) {
1040      init_dst(ctx, dst);
1041   }
1042
1043   if (ctx->spilling) {
1044      ra_foreach_src (src, instr)
1045         insert_src(ctx, src);
1046   }
1047
1048   /* Handle tied and early-kill destinations. If a destination is tied to a
1049    * source and that source is live-through, then we need to allocate a new
1050    * register for the destination which is live-through itself and cannot
1051    * overlap the sources. Similarly early-kill destinations cannot overlap
1052    * sources.
1053    */
1054
1055   ra_foreach_dst (dst, instr) {
1056      struct ir3_register *tied_src = dst->tied;
1057      if ((tied_src && !(tied_src->flags & IR3_REG_FIRST_KILL)) ||
1058          (dst->flags & IR3_REG_EARLY_CLOBBER))
1059         insert_dst(ctx, dst);
1060   }
1061
1062   if (ctx->spilling)
1063      limit(ctx, instr);
1064   else
1065      update_max_pressure(ctx);
1066
1067   if (ctx->spilling) {
1068      ra_foreach_src (src, instr) {
1069         reload_src(ctx, instr, src);
1070         update_src_next_use(ctx, src);
1071      }
1072   }
1073
1074   ra_foreach_src (src, instr) {
1075      if (src->flags & IR3_REG_FIRST_KILL)
1076         remove_src_early(ctx, instr, src);
1077   }
1078
1079   ra_foreach_dst (dst, instr) {
1080      insert_dst(ctx, dst);
1081   }
1082
1083   if (ctx->spilling)
1084      limit(ctx, instr);
1085   else
1086      update_max_pressure(ctx);
1087
1088   /* We have to remove sources before rewriting them so that we can lookup the
1089    * interval to remove before the source itself is changed.
1090    */
1091   ra_foreach_src (src, instr) {
1092      if (src->flags & IR3_REG_FIRST_KILL)
1093         remove_src(ctx, instr, src);
1094   }
1095
1096   if (ctx->spilling) {
1097      ra_foreach_src (src, instr) {
1098         rewrite_src(ctx, instr, src);
1099      }
1100   }
1101
1102   ra_foreach_dst (dst, instr) {
1103      finish_dst(ctx, dst);
1104   }
1105
1106   for (unsigned i = 0; i < instr->dsts_count; i++) {
1107      if (ra_reg_is_dst(instr->dsts[i]) &&
1108          (instr->dsts[i]->flags & IR3_REG_UNUSED))
1109         remove_dst(ctx, instr->dsts[i]);
1110   }
1111}
1112
1113static struct ra_spill_interval *
1114create_temp_interval(struct ra_spill_ctx *ctx, struct ir3_register *def)
1115{
1116   unsigned name = ctx->intervals_count++;
1117   unsigned offset = ctx->live->interval_offset;
1118
1119   /* This is kinda hacky, but we need to create a fake SSA def here that is
1120    * only used as part of the pcopy accounting. See below.
1121    */
1122   struct ir3_register *reg = rzalloc(ctx, struct ir3_register);
1123   *reg = *def;
1124   reg->name = name;
1125   reg->interval_start = offset;
1126   reg->interval_end = offset + reg_size(def);
1127   reg->merge_set = NULL;
1128
1129   ctx->intervals = reralloc(ctx, ctx->intervals, struct ra_spill_interval *,
1130                             ctx->intervals_count);
1131   struct ra_spill_interval *interval = rzalloc(ctx, struct ra_spill_interval);
1132   ra_spill_interval_init(interval, reg);
1133   ctx->intervals[name] = interval;
1134   ctx->live->interval_offset += reg_size(def);
1135   return interval;
1136}
1137
1138/* In the sequence of copies generated (see below), would this source be killed?
1139 */
1140static bool
1141is_last_pcopy_src(struct ir3_instruction *pcopy, unsigned src_n)
1142{
1143   struct ir3_register *src = pcopy->srcs[src_n];
1144   if (!(src->flags & IR3_REG_KILL))
1145      return false;
1146   for (unsigned j = src_n + 1; j < pcopy->srcs_count; j++) {
1147      if (pcopy->srcs[j]->def == src->def)
1148         return false;
1149   }
1150   return true;
1151}
1152
1153/* Parallel copies are different from normal instructions. The sources together
1154 * may be larger than the entire register file, so we cannot just reload every
1155 * source like normal, and indeed that probably wouldn't be a great idea.
1156 * Instead we essentially need to lower the parallel copy to "copies," just like
1157 * in the normal CSSA construction, although we implement the copies by
1158 * reloading and then possibly spilling values. We essentially just shuffle
1159 * around the sources until each source either (a) is live or (b) has the same
1160 * spill slot as its corresponding destination. We do this by decomposing the
1161 * copy into a series of copies, so:
1162 *
1163 * a, b, c = d, e, f
1164 *
1165 * becomes:
1166 *
1167 * d' = d
1168 * e' = e
1169 * f' = f
1170 * a = d'
1171 * b = e'
1172 * c = f'
1173 *
1174 * the temporary SSA values d', e', and f' never actually show up in the result.
1175 * They are only used for our internal accounting. They may, however, have their
1176 * own spill slot created for them. Similarly, we don't actually emit any copy
1177 * instructions, although we emit the spills/reloads that *would've* been
1178 * required if those copies were there.
1179 *
1180 * TODO: in order to reduce the number of temporaries and therefore spill slots,
1181 * we could instead do a more complicated analysis that considers the location
1182 * transfer graph.
1183 *
1184 * In addition, we actually remove the parallel copy and rewrite all its uses
1185 * (in the phi nodes) rather than rewrite its sources at the end. Recreating it
1186 * later turns out to be easier than keeping it up-to-date throughout this pass,
1187 * since we may have to remove entries for phi sources that are spilled and add
1188 * entries for live-outs that are spilled and reloaded, which can happen here
1189 * and then possibly be undone or done again when processing live-ins of the
1190 * successor block.
1191 */
1192
1193static void
1194handle_pcopy(struct ra_spill_ctx *ctx, struct ir3_instruction *pcopy)
1195{
1196   foreach_dst (dst, pcopy) {
1197      struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
1198      ra_spill_interval_init(dst_interval, dst);
1199   }
1200
1201   foreach_src_n (src, i, pcopy) {
1202      d("processing src %u", i);
1203      struct ir3_register *dst = pcopy->dsts[i];
1204
1205      /* Skip the intermediate copy for cases where the source is merged with
1206       * the destination. Crucially this means that we also don't reload/spill
1207       * it if it's been spilled, because it shares the same spill slot.
1208       */
1209      if (src->def && src->def->merge_set &&
1210          src->def->merge_set == dst->merge_set &&
1211          src->def->merge_set_offset == dst->merge_set_offset) {
1212         struct ra_spill_interval *src_interval = ctx->intervals[src->def->name];
1213         struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
1214         if (src_interval->interval.inserted) {
1215            update_src_next_use(ctx, src);
1216            if (is_last_pcopy_src(pcopy, i))
1217               ra_spill_ctx_remove(ctx, src_interval);
1218            dst_interval->cant_spill = true;
1219            ra_spill_ctx_insert(ctx, dst_interval);
1220            limit(ctx, pcopy);
1221            dst_interval->cant_spill = false;
1222            dst_interval->dst = src_interval->dst;
1223         }
1224      } else if (src->def) {
1225         struct ra_spill_interval *temp_interval =
1226            create_temp_interval(ctx, dst);
1227         struct ir3_register *temp = temp_interval->interval.reg;
1228         temp_interval->next_use_distance = src->next_use;
1229
1230         insert_src(ctx, src);
1231         limit(ctx, pcopy);
1232         reload_src(ctx, pcopy, src);
1233         update_src_next_use(ctx, src);
1234         if (is_last_pcopy_src(pcopy, i))
1235            remove_src(ctx, pcopy, src);
1236         struct ra_spill_interval *src_interval =
1237            ctx->intervals[src->def->name];
1238         temp_interval->dst = src_interval->dst;
1239
1240         temp_interval->cant_spill = true;
1241         ra_spill_ctx_insert(ctx, temp_interval);
1242         limit(ctx, pcopy);
1243         temp_interval->cant_spill = false;
1244
1245         src->flags = temp->flags;
1246         src->def = temp;
1247      }
1248   }
1249
1250   d("done with pcopy srcs");
1251
1252   foreach_src_n (src, i, pcopy) {
1253      struct ir3_register *dst = pcopy->dsts[i];
1254
1255      if (src->def && src->def->merge_set &&
1256          src->def->merge_set == dst->merge_set &&
1257          src->def->merge_set_offset == dst->merge_set_offset)
1258         continue;
1259
1260      struct ra_spill_interval *dst_interval = ctx->intervals[dst->name];
1261
1262      if (!src->def) {
1263         dst_interval->cant_spill = true;
1264         ra_spill_ctx_insert(ctx, dst_interval);
1265         limit(ctx, pcopy);
1266         dst_interval->cant_spill = false;
1267
1268         assert(src->flags & (IR3_REG_CONST | IR3_REG_IMMED));
1269         if (src->flags & IR3_REG_CONST) {
1270            dst_interval->dst.flags = src->flags;
1271            dst_interval->dst.const_num = src->num;
1272         } else {
1273            dst_interval->dst.flags = src->flags;
1274            dst_interval->dst.uimm = src->uim_val;
1275         }
1276      } else {
1277         struct ra_spill_interval *temp_interval = ctx->intervals[src->def->name];
1278
1279         insert_src(ctx, src);
1280         limit(ctx, pcopy);
1281         reload_src(ctx, pcopy, src);
1282         remove_src(ctx, pcopy, src);
1283
1284         dst_interval->dst = temp_interval->dst;
1285         ra_spill_ctx_insert(ctx, dst_interval);
1286      }
1287   }
1288
1289   pcopy->flags |= IR3_INSTR_UNUSED;
1290}
1291
1292static void
1293handle_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
1294{
1295   init_dst(ctx, instr->dsts[0]);
1296   insert_dst(ctx, instr->dsts[0]);
1297   finish_dst(ctx, instr->dsts[0]);
1298}
1299
1300static void
1301remove_input_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *instr)
1302{
1303   if (instr->opc == OPC_META_TEX_PREFETCH) {
1304      ra_foreach_src (src, instr)
1305         remove_src(ctx, instr, src);
1306   }
1307   if (instr->dsts[0]->flags & IR3_REG_UNUSED)
1308      remove_dst(ctx, instr->dsts[0]);
1309}
1310
1311static void
1312handle_live_in(struct ra_spill_ctx *ctx, struct ir3_block *block,
1313               struct ir3_register *def)
1314{
1315   struct ra_spill_interval *interval = ctx->intervals[def->name];
1316   ra_spill_interval_init(interval, def);
1317   if (ctx->spilling) {
1318      interval->next_use_distance =
1319         ctx->blocks[block->index].next_use_start[def->name];
1320   }
1321
1322   ra_spill_ctx_insert(ctx, interval);
1323}
1324
1325static bool
1326is_live_in_phi(struct ir3_register *def, struct ir3_block *block)
1327{
1328   return def->instr->opc == OPC_META_PHI && def->instr->block == block;
1329}
1330
1331static bool
1332is_live_in_pred(struct ra_spill_ctx *ctx, struct ir3_register *def,
1333                struct ir3_block *block, unsigned pred_idx)
1334{
1335   struct ir3_block *pred = block->predecessors[pred_idx];
1336   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1337   if (is_live_in_phi(def, block)) {
1338      def = def->instr->srcs[pred_idx]->def;
1339      if (!def)
1340         return false;
1341   }
1342
1343   return _mesa_hash_table_search(state->remap, def);
1344}
1345
1346static bool
1347is_live_in_undef(struct ir3_register *def,
1348                 struct ir3_block *block, unsigned pred_idx)
1349{
1350   if (!is_live_in_phi(def, block))
1351      return false;
1352
1353   return !def->instr->srcs[pred_idx]->def;
1354}
1355
1356static struct reg_or_immed *
1357read_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
1358             struct ir3_block *block, unsigned pred_idx)
1359{
1360   struct ir3_block *pred = block->predecessors[pred_idx];
1361   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1362
1363   if (is_live_in_phi(def, block)) {
1364      def = def->instr->srcs[pred_idx]->def;
1365      if (!def)
1366         return NULL;
1367   }
1368
1369   struct hash_entry *entry = _mesa_hash_table_search(state->remap, def);
1370   if (entry)
1371      return entry->data;
1372   else
1373      return NULL;
1374}
1375
1376static bool
1377is_live_in_all_preds(struct ra_spill_ctx *ctx, struct ir3_register *def,
1378                     struct ir3_block *block)
1379{
1380   for (unsigned i = 0; i < block->predecessors_count; i++) {
1381      if (!is_live_in_pred(ctx, def, block, i))
1382         return false;
1383   }
1384
1385   return true;
1386}
1387
1388static void
1389spill_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
1390              struct ir3_block *block)
1391{
1392   for (unsigned i = 0; i < block->predecessors_count; i++) {
1393      struct ir3_block *pred = block->predecessors[i];
1394      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1395
1396      if (!state->visited)
1397         continue;
1398
1399      struct reg_or_immed *pred_def = read_live_in(ctx, def, block, i);
1400      if (pred_def) {
1401         spill(ctx, pred_def, get_spill_slot(ctx, def), NULL, pred);
1402      }
1403   }
1404}
1405
1406static void
1407spill_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
1408{
1409   bool all_preds_visited = true;
1410   for (unsigned i = 0; i < block->predecessors_count; i++) {
1411      struct ir3_block *pred = block->predecessors[i];
1412      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1413      if (!state->visited) {
1414         all_preds_visited = false;
1415         break;
1416      }
1417   }
1418
1419   /* Note: in the paper they explicitly spill live-through values first, but we
1420    * should be doing that automatically by virtue of picking the largest
1421    * distance due to the extra distance added to edges out of loops.
1422    *
1423    * TODO: Keep track of pressure in each block and preemptively spill
1424    * live-through values as described in the paper to avoid spilling them
1425    * inside the loop.
1426    */
1427
1428   if (ctx->cur_pressure.half > ctx->limit_pressure.half) {
1429      rb_tree_foreach_safe (struct ra_spill_interval, interval,
1430                            &ctx->half_live_intervals, half_node) {
1431         if (all_preds_visited &&
1432             is_live_in_all_preds(ctx, interval->interval.reg, block))
1433            continue;
1434         if (interval->interval.reg->merge_set ||
1435             !interval->can_rematerialize)
1436            spill_live_in(ctx, interval->interval.reg, block);
1437         ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
1438         if (ctx->cur_pressure.half <= ctx->limit_pressure.half)
1439            break;
1440      }
1441   }
1442
1443   if (ctx->cur_pressure.full > ctx->limit_pressure.full) {
1444      rb_tree_foreach_safe (struct ra_spill_interval, interval,
1445                            &ctx->full_live_intervals, node) {
1446         if (all_preds_visited &&
1447             is_live_in_all_preds(ctx, interval->interval.reg, block))
1448            continue;
1449         spill_live_in(ctx, interval->interval.reg, block);
1450         ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
1451         if (ctx->cur_pressure.full <= ctx->limit_pressure.full)
1452            break;
1453      }
1454   }
1455}
1456
1457static void
1458live_in_rewrite(struct ra_spill_ctx *ctx,
1459                struct ra_spill_interval *interval,
1460                struct reg_or_immed *new_val,
1461                struct ir3_block *block, unsigned pred_idx)
1462{
1463   struct ir3_block *pred = block->predecessors[pred_idx];
1464   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1465   struct ir3_register *def = interval->interval.reg;
1466   if (is_live_in_phi(def, block)) {
1467      def = def->instr->srcs[pred_idx]->def;
1468   }
1469
1470   if (def)
1471      _mesa_hash_table_insert(state->remap, def, new_val);
1472
1473   rb_tree_foreach (struct ra_spill_interval, child,
1474                    &interval->interval.children, interval.node) {
1475      assert(new_val->flags & IR3_REG_SSA);
1476      struct ir3_register *child_def =
1477         extract(new_val->def,
1478                 (child->interval.reg->interval_start - def->interval_start) /
1479                 reg_elem_size(def), reg_elems(child->interval.reg),
1480                 NULL, pred);
1481      struct reg_or_immed *child_val = ralloc(ctx, struct reg_or_immed);
1482      child_val->def = child_def;
1483      child_val->flags = child_def->flags;
1484      live_in_rewrite(ctx, child, child_val, block, pred_idx);
1485   }
1486}
1487
1488static void
1489reload_live_in(struct ra_spill_ctx *ctx, struct ir3_register *def,
1490               struct ir3_block *block)
1491{
1492   struct ra_spill_interval *interval = ctx->intervals[def->name];
1493   for (unsigned i = 0; i < block->predecessors_count; i++) {
1494      struct ir3_block *pred = block->predecessors[i];
1495      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1496      if (!state->visited)
1497         continue;
1498
1499      if (is_live_in_undef(def, block, i))
1500         continue;
1501
1502      struct reg_or_immed *new_val = read_live_in(ctx, def, block, i);
1503
1504      if (!new_val) {
1505         new_val = ralloc(ctx, struct reg_or_immed);
1506         if (interval->can_rematerialize)
1507            new_val->def = rematerialize(def, NULL, pred);
1508         else
1509            new_val->def = reload(ctx, def, NULL, pred);
1510         new_val->flags = new_val->def->flags;
1511      }
1512      live_in_rewrite(ctx, interval, new_val, block, i);
1513   }
1514}
1515
1516static void
1517reload_live_ins(struct ra_spill_ctx *ctx, struct ir3_block *block)
1518{
1519   rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
1520                    interval.node) {
1521      reload_live_in(ctx, interval->interval.reg, block);
1522   }
1523}
1524
1525static void
1526add_live_in_phi(struct ra_spill_ctx *ctx, struct ir3_register *def,
1527                struct ir3_block *block)
1528{
1529   struct ra_spill_interval *interval = ctx->intervals[def->name];
1530   if (!interval->interval.inserted)
1531      return;
1532
1533   bool needs_phi = false;
1534   struct ir3_register *cur_def = NULL;
1535   for (unsigned i = 0; i < block->predecessors_count; i++) {
1536      struct ir3_block *pred = block->predecessors[i];
1537      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1538
1539      if (!state->visited) {
1540         needs_phi = true;
1541         break;
1542      }
1543
1544      struct hash_entry *entry =
1545         _mesa_hash_table_search(state->remap, def);
1546      assert(entry);
1547      struct reg_or_immed *pred_val = entry->data;
1548      if ((pred_val->flags & (IR3_REG_IMMED | IR3_REG_CONST)) ||
1549          !pred_val->def ||
1550          (cur_def && cur_def != pred_val->def)) {
1551         needs_phi = true;
1552         break;
1553      }
1554      cur_def = pred_val->def;
1555   }
1556
1557   if (!needs_phi) {
1558      interval->dst.def = cur_def;
1559      interval->dst.flags = cur_def->flags;
1560      return;
1561   }
1562
1563   struct ir3_instruction *phi =
1564      ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
1565   struct ir3_register *dst = __ssa_dst(phi);
1566   dst->flags |= def->flags & (IR3_REG_HALF | IR3_REG_ARRAY);
1567   dst->size = def->size;
1568   dst->wrmask = def->wrmask;
1569
1570   dst->interval_start = def->interval_start;
1571   dst->interval_end = def->interval_end;
1572   dst->merge_set = def->merge_set;
1573   dst->merge_set_offset = def->merge_set_offset;
1574
1575   for (unsigned i = 0; i < block->predecessors_count; i++) {
1576      struct ir3_block *pred = block->predecessors[i];
1577      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1578      struct ir3_register *src = ir3_src_create(phi, INVALID_REG, dst->flags);
1579      src->size = def->size;
1580      src->wrmask = def->wrmask;
1581
1582      if (state->visited) {
1583         struct hash_entry *entry =
1584            _mesa_hash_table_search(state->remap, def);
1585         assert(entry);
1586         struct reg_or_immed *new_val = entry->data;
1587         set_src_val(src, new_val);
1588      } else {
1589         src->def = def;
1590      }
1591   }
1592
1593   interval->dst.def = dst;
1594   interval->dst.flags = dst->flags;
1595
1596   ir3_instr_move_before_block(phi, block);
1597}
1598
1599/* When spilling a block with a single predecessors, the pred may have other
1600 * successors so we can't choose what's live in and we can't spill/restore
1601 * anything. Just make the inserted intervals exactly match the predecessor. If
1602 * it wasn't live in the predecessor then it must've already been spilled. Also,
1603 * there are no phi nodes and no live-ins.
1604 */
1605static void
1606spill_single_pred_live_in(struct ra_spill_ctx *ctx,
1607                          struct ir3_block *block)
1608{
1609   unsigned name;
1610   BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
1611                       ctx->live->definitions_count) {
1612      struct ir3_register *reg = ctx->live->definitions[name];
1613      struct ra_spill_interval *interval = ctx->intervals[reg->name];
1614      struct reg_or_immed *val = read_live_in(ctx, reg, block, 0);
1615      if (val)
1616         interval->dst = *val;
1617      else
1618         ra_spill_ctx_remove(ctx, interval);
1619   }
1620}
1621
1622static void
1623rewrite_phi(struct ra_spill_ctx *ctx, struct ir3_instruction *phi,
1624            struct ir3_block *block)
1625{
1626   if (!ctx->intervals[phi->dsts[0]->name]->interval.inserted) {
1627      phi->flags |= IR3_INSTR_UNUSED;
1628      return;
1629   }
1630
1631   for (unsigned i = 0; i < block->predecessors_count; i++) {
1632      struct ir3_block *pred = block->predecessors[i];
1633      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1634
1635      if (!state->visited)
1636         continue;
1637
1638      struct ir3_register *src = phi->srcs[i];
1639      if (!src->def)
1640         continue;
1641
1642      struct hash_entry *entry =
1643         _mesa_hash_table_search(state->remap, src->def);
1644      assert(entry);
1645      struct reg_or_immed *new_val = entry->data;
1646      set_src_val(src, new_val);
1647   }
1648}
1649
1650static void
1651spill_live_out(struct ra_spill_ctx *ctx, struct ra_spill_interval *interval,
1652               struct ir3_block *block)
1653{
1654   struct ir3_register *def = interval->interval.reg;
1655
1656   if (interval->interval.reg->merge_set ||
1657       !interval->can_rematerialize)
1658      spill(ctx, &interval->dst, get_spill_slot(ctx, def), NULL, block);
1659   ir3_reg_interval_remove_all(&ctx->reg_ctx, &interval->interval);
1660}
1661
1662static void
1663spill_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
1664{
1665   struct ra_spill_block_state *state = &ctx->blocks[block->index];
1666   rb_tree_foreach_safe (struct ra_spill_interval, interval,
1667                         &ctx->reg_ctx.intervals, interval.node) {
1668      if (!BITSET_TEST(state->live_out, interval->interval.reg->name)) {
1669         spill_live_out(ctx, interval, block);
1670      }
1671   }
1672}
1673
1674static void
1675reload_live_out(struct ra_spill_ctx *ctx, struct ir3_register *def,
1676                struct ir3_block *block)
1677{
1678   struct ra_spill_interval *interval = ctx->intervals[def->name];
1679   ir3_reg_interval_insert(&ctx->reg_ctx, &interval->interval);
1680
1681   reload_def(ctx, def, NULL, block);
1682}
1683
1684static void
1685reload_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
1686{
1687   struct ra_spill_block_state *state = &ctx->blocks[block->index];
1688   unsigned name;
1689   BITSET_FOREACH_SET (name, state->live_out, ctx->live->definitions_count) {
1690      struct ir3_register *reg = ctx->live->definitions[name];
1691      struct ra_spill_interval *interval = ctx->intervals[name];
1692      if (!interval->interval.inserted)
1693         reload_live_out(ctx, reg, block);
1694   }
1695}
1696
1697static void
1698update_live_out_phis(struct ra_spill_ctx *ctx, struct ir3_block *block)
1699{
1700   assert(!block->successors[1]);
1701   struct ir3_block *succ = block->successors[0];
1702   unsigned pred_idx = ir3_block_get_pred_index(succ, block);
1703
1704   foreach_instr (instr, &succ->instr_list) {
1705      if (instr->opc != OPC_META_PHI)
1706         break;
1707
1708      struct ir3_register *def = instr->srcs[pred_idx]->def;
1709      if (!def)
1710         continue;
1711
1712      struct ra_spill_interval *interval = ctx->intervals[def->name];
1713      if (!interval->interval.inserted)
1714         continue;
1715      set_src_val(instr->srcs[pred_idx], &interval->dst);
1716   }
1717}
1718
1719static void
1720record_pred_live_out(struct ra_spill_ctx *ctx,
1721                     struct ra_spill_interval *interval,
1722                     struct ir3_block *block, unsigned pred_idx)
1723{
1724   struct ir3_block *pred = block->predecessors[pred_idx];
1725   struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1726
1727   struct ir3_register *def = interval->interval.reg;
1728   if (is_live_in_phi(def, block)) {
1729      def = def->instr->srcs[pred_idx]->def;
1730   }
1731   BITSET_SET(state->live_out, def->name);
1732
1733   rb_tree_foreach (struct ra_spill_interval, child,
1734                    &interval->interval.children, interval.node) {
1735      record_pred_live_out(ctx, child, block, pred_idx);
1736   }
1737}
1738
1739static void
1740record_pred_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
1741{
1742   for (unsigned i = 0; i < block->predecessors_count; i++) {
1743      struct ir3_block *pred = block->predecessors[i];
1744      struct ra_spill_block_state *state = &ctx->blocks[pred->index];
1745      if (state->visited)
1746         continue;
1747
1748      state->live_out = rzalloc_array(ctx, BITSET_WORD,
1749                                      BITSET_WORDS(ctx->live->definitions_count));
1750
1751
1752      rb_tree_foreach (struct ra_spill_interval, interval,
1753                       &ctx->reg_ctx.intervals, interval.node) {
1754         record_pred_live_out(ctx, interval, block, i);
1755      }
1756   }
1757}
1758
1759static void
1760record_live_out(struct ra_spill_ctx *ctx,
1761                struct ra_spill_block_state *state,
1762                struct ra_spill_interval *interval)
1763{
1764   if (!(interval->dst.flags & IR3_REG_SSA) ||
1765       interval->dst.def) {
1766      struct reg_or_immed *val = ralloc(ctx, struct reg_or_immed);
1767      *val = interval->dst;
1768      _mesa_hash_table_insert(state->remap, interval->interval.reg, val);
1769   }
1770   rb_tree_foreach (struct ra_spill_interval, child,
1771                    &interval->interval.children, interval.node) {
1772      record_live_out(ctx, state, child);
1773   }
1774}
1775
1776static void
1777record_live_outs(struct ra_spill_ctx *ctx, struct ir3_block *block)
1778{
1779   struct ra_spill_block_state *state = &ctx->blocks[block->index];
1780   state->remap = _mesa_pointer_hash_table_create(ctx);
1781
1782   rb_tree_foreach (struct ra_spill_interval, interval, &ctx->reg_ctx.intervals,
1783                    interval.node) {
1784      record_live_out(ctx, state, interval);
1785   }
1786}
1787
1788static void
1789handle_block(struct ra_spill_ctx *ctx, struct ir3_block *block)
1790{
1791   memset(&ctx->cur_pressure, 0, sizeof(ctx->cur_pressure));
1792   rb_tree_init(&ctx->reg_ctx.intervals);
1793   rb_tree_init(&ctx->full_live_intervals);
1794   rb_tree_init(&ctx->half_live_intervals);
1795
1796   unsigned name;
1797   BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
1798                       ctx->live->definitions_count) {
1799      struct ir3_register *reg = ctx->live->definitions[name];
1800      handle_live_in(ctx, block, reg);
1801   }
1802
1803   foreach_instr (instr, &block->instr_list) {
1804      if (instr->opc != OPC_META_PHI && instr->opc != OPC_META_INPUT &&
1805          instr->opc != OPC_META_TEX_PREFETCH)
1806         break;
1807      handle_input_phi(ctx, instr);
1808   }
1809
1810   if (ctx->spilling) {
1811      if (block->predecessors_count == 1) {
1812         spill_single_pred_live_in(ctx, block);
1813      } else {
1814         spill_live_ins(ctx, block);
1815         reload_live_ins(ctx, block);
1816         record_pred_live_outs(ctx, block);
1817         foreach_instr (instr, &block->instr_list) {
1818            if (instr->opc != OPC_META_PHI)
1819               break;
1820            rewrite_phi(ctx, instr, block);
1821         }
1822         BITSET_FOREACH_SET (name, ctx->live->live_in[block->index],
1823                             ctx->live->definitions_count) {
1824            struct ir3_register *reg = ctx->live->definitions[name];
1825            add_live_in_phi(ctx, reg, block);
1826         }
1827      }
1828   } else {
1829      update_max_pressure(ctx);
1830   }
1831
1832   foreach_instr (instr, &block->instr_list) {
1833      di(instr, "processing");
1834
1835      if (instr->opc == OPC_META_PHI || instr->opc == OPC_META_INPUT ||
1836          instr->opc == OPC_META_TEX_PREFETCH)
1837         remove_input_phi(ctx, instr);
1838      else if (ctx->spilling && instr->opc == OPC_META_PARALLEL_COPY)
1839         handle_pcopy(ctx, instr);
1840      else if (ctx->spilling && instr->opc == OPC_MOV &&
1841               instr->dsts[0] == ctx->base_reg)
1842         /* skip */;
1843      else
1844         handle_instr(ctx, instr);
1845   }
1846
1847   if (ctx->spilling && block->successors[0]) {
1848      struct ra_spill_block_state *state =
1849         &ctx->blocks[block->successors[0]->index];
1850      if (state->visited) {
1851         assert(!block->successors[1]);
1852
1853         spill_live_outs(ctx, block);
1854         reload_live_outs(ctx, block);
1855         update_live_out_phis(ctx, block);
1856      }
1857   }
1858
1859   if (ctx->spilling) {
1860      record_live_outs(ctx, block);
1861      ctx->blocks[block->index].visited = true;
1862   }
1863}
1864
1865static bool
1866simplify_phi_node(struct ir3_instruction *phi)
1867{
1868   struct ir3_register *def = NULL;
1869   foreach_src (src, phi) {
1870      /* Ignore phi sources which point to the phi itself. */
1871      if (src->def == phi->dsts[0])
1872         continue;
1873      /* If it's undef or it doesn't match the previous sources, bail */
1874      if (!src->def || (def && def != src->def))
1875         return false;
1876      def = src->def;
1877   }
1878
1879   phi->data = def;
1880   phi->flags |= IR3_INSTR_UNUSED;
1881   return true;
1882}
1883
1884static struct ir3_register *
1885simplify_phi_def(struct ir3_register *def)
1886{
1887   if (def->instr->opc == OPC_META_PHI) {
1888      struct ir3_instruction *phi = def->instr;
1889
1890      /* Note: this function is always called at least once after visiting the
1891       * phi, so either there has been a simplified phi in the meantime, in
1892       * which case we will set progress=true and visit the definition again, or
1893       * phi->data already has the most up-to-date value. Therefore we don't
1894       * have to recursively check phi->data.
1895       */
1896      if (phi->data)
1897         return phi->data;
1898   }
1899
1900   return def;
1901}
1902
1903static void
1904simplify_phi_srcs(struct ir3_instruction *instr)
1905{
1906   foreach_src (src, instr) {
1907      if (src->def)
1908         src->def = simplify_phi_def(src->def);
1909   }
1910}
1911
1912/* We insert phi nodes for all live-ins of loops in case we need to split the
1913 * live range. This pass cleans that up for the case where the live range didn't
1914 * actually need to be split.
1915 */
1916static void
1917simplify_phi_nodes(struct ir3 *ir)
1918{
1919   foreach_block (block, &ir->block_list) {
1920      foreach_instr (instr, &block->instr_list) {
1921         if (instr->opc != OPC_META_PHI)
1922            break;
1923         instr->data = NULL;
1924      }
1925   }
1926
1927   bool progress;
1928   do {
1929      progress = false;
1930      foreach_block (block, &ir->block_list) {
1931         foreach_instr (instr, &block->instr_list) {
1932            if (instr->opc == OPC_META_PHI || (instr->flags & IR3_INSTR_UNUSED))
1933               continue;
1934
1935            simplify_phi_srcs(instr);
1936         }
1937
1938         /* Visit phi nodes in the sucessors to make sure that phi sources are
1939          * always visited at least once after visiting the definition they
1940          * point to. See note in simplify_phi_def() for why this is necessary.
1941          */
1942         for (unsigned i = 0; i < 2; i++) {
1943            struct ir3_block *succ = block->successors[i];
1944            if (!succ)
1945               continue;
1946            foreach_instr (instr, &succ->instr_list) {
1947               if (instr->opc != OPC_META_PHI)
1948                  break;
1949               if (instr->flags & IR3_INSTR_UNUSED) {
1950                  if (instr->data)
1951                     instr->data = simplify_phi_def(instr->data);
1952               } else {
1953                  simplify_phi_srcs(instr);
1954                  progress |= simplify_phi_node(instr);
1955               }
1956            }
1957         }
1958      }
1959   } while (progress);
1960}
1961
1962static void
1963unmark_dead(struct ir3 *ir)
1964{
1965   foreach_block (block, &ir->block_list) {
1966      foreach_instr (instr, &block->instr_list) {
1967         instr->flags &= ~IR3_INSTR_UNUSED;
1968      }
1969   }
1970}
1971
1972/* Simple pass to remove now-dead phi nodes and pcopy instructions. We mark
1973 * which ones are dead along the way, so there's nothing to compute here.
1974 */
1975static void
1976cleanup_dead(struct ir3 *ir)
1977{
1978   foreach_block (block, &ir->block_list) {
1979      foreach_instr_safe (instr, &block->instr_list) {
1980         if (instr->flags & IR3_INSTR_UNUSED)
1981            list_delinit(&instr->node);
1982      }
1983   }
1984}
1985
1986/* Deal with merge sets after spilling. Spilling generally leaves the merge sets
1987 * in a mess, and even if we properly cleaned up after ourselves, we would want
1988 * to recompute the merge sets afterward anway. That's because
1989 * spilling/reloading can "break up" phi webs and split/collect webs so that
1990 * allocating them to the same register no longer gives any benefit. For
1991 * example, imagine we have this:
1992 *
1993 * if (...) {
1994 *    foo = ...
1995 * } else {
1996 *    bar = ...
1997 * }
1998 * baz = phi(foo, bar)
1999 *
2000 * and we spill "baz":
2001 *
2002 * if (...) {
2003 *    foo = ...
2004 *    spill(foo)
2005 * } else {
2006 *    bar = ...
2007 *    spill(bar)
2008 * }
2009 * baz = reload()
2010 *
2011 * now foo, bar, and baz don't have to be allocated to the same register. How
2012 * exactly the merge sets change can be complicated, so it's easier just to
2013 * recompute them.
2014 *
2015 * However, there's a wrinkle in this: those same merge sets determine the
2016 * register pressure, due to multiple values inhabiting the same register! And
2017 * we assume that this sharing happens when spilling. Therefore we need a
2018 * three-step procedure:
2019 *
2020 * 1. Drop the original merge sets.
2021 * 2. Calculate which values *must* be merged, being careful to only use the
2022 *    interval information which isn't trashed by spilling, and forcibly merge
2023 *    them.
2024 * 3. Let ir3_merge_regs() finish the job, including recalculating the
2025 *    intervals.
2026 */
2027
2028static void
2029fixup_merge_sets(struct ir3_liveness *live, struct ir3 *ir)
2030{
2031   foreach_block (block, &ir->block_list) {
2032      foreach_instr (instr, &block->instr_list) {
2033         ra_foreach_dst (dst, instr) {
2034            dst->merge_set = NULL;
2035            dst->merge_set_offset = 0;
2036         }
2037      }
2038   }
2039
2040   foreach_block (block, &ir->block_list) {
2041      foreach_instr (instr, &block->instr_list) {
2042         if (instr->opc != OPC_META_SPLIT &&
2043             instr->opc != OPC_META_COLLECT)
2044            continue;
2045
2046         struct ir3_register *dst = instr->dsts[0];
2047         ra_foreach_src (src, instr) {
2048            if (!(src->flags & IR3_REG_KILL) &&
2049                src->def->interval_start < dst->interval_end &&
2050                dst->interval_start < src->def->interval_end) {
2051               ir3_force_merge(dst, src->def,
2052                               src->def->interval_start - dst->interval_start);
2053            }
2054         }
2055      }
2056   }
2057
2058   ir3_merge_regs(live, ir);
2059}
2060
2061void
2062ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
2063                  struct ir3_pressure *max_pressure)
2064{
2065   struct ra_spill_ctx *ctx = rzalloc(NULL, struct ra_spill_ctx);
2066   spill_ctx_init(ctx, v, live);
2067
2068   foreach_block (block, &v->ir->block_list) {
2069      handle_block(ctx, block);
2070   }
2071
2072   assert(ctx->cur_pressure.full == 0);
2073   assert(ctx->cur_pressure.half == 0);
2074   assert(ctx->cur_pressure.shared == 0);
2075
2076   *max_pressure = ctx->max_pressure;
2077   ralloc_free(ctx);
2078}
2079
2080bool
2081ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
2082          struct ir3_liveness **live,
2083          const struct ir3_pressure *limit_pressure)
2084{
2085   void *mem_ctx = ralloc_parent(*live);
2086   struct ra_spill_ctx *ctx = rzalloc(mem_ctx, struct ra_spill_ctx);
2087   spill_ctx_init(ctx, v, *live);
2088
2089   ctx->spilling = true;
2090
2091   ctx->blocks = rzalloc_array(ctx, struct ra_spill_block_state,
2092                               ctx->live->block_count);
2093   rb_tree_init(&ctx->full_live_intervals);
2094   rb_tree_init(&ctx->half_live_intervals);
2095
2096   ctx->limit_pressure = *limit_pressure;
2097   ctx->spill_slot = v->pvtmem_size;
2098
2099   add_base_reg(ctx, ir);
2100   compute_next_distance(ctx, ir);
2101
2102   unmark_dead(ir);
2103
2104   foreach_block (block, &ir->block_list) {
2105      handle_block(ctx, block);
2106   }
2107
2108   simplify_phi_nodes(ir);
2109
2110   cleanup_dead(ir);
2111
2112   ir3_create_parallel_copies(ir);
2113
2114   /* After this point, we're done mutating the IR. Liveness has been trashed,
2115    * so recalculate it. We'll need it for recalculating the merge sets.
2116    */
2117   ralloc_free(ctx->live);
2118   *live = ir3_calc_liveness(mem_ctx, ir);
2119
2120   fixup_merge_sets(*live, ir);
2121
2122   v->pvtmem_size = ctx->spill_slot;
2123   ralloc_free(ctx);
2124
2125   return true;
2126}
2127