1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (c) 2017 Lima Project
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
12bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
13bf215546Sopenharmony_ci * of the Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "gpir.h"
26bf215546Sopenharmony_ci#include "util/u_dynarray.h"
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/* Per-register information */
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_cistruct reg_info {
31bf215546Sopenharmony_ci   BITSET_WORD *conflicts;
32bf215546Sopenharmony_ci   struct util_dynarray conflict_list;
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci   unsigned num_conflicts;
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci   int assigned_color;
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci   bool visited;
39bf215546Sopenharmony_ci};
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_cistruct regalloc_ctx {
42bf215546Sopenharmony_ci   unsigned bitset_words;
43bf215546Sopenharmony_ci   struct reg_info *registers;
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci   /* Reusable scratch liveness array */
46bf215546Sopenharmony_ci   BITSET_WORD *live;
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci   unsigned *worklist;
49bf215546Sopenharmony_ci   unsigned worklist_start, worklist_end;
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   unsigned *stack;
52bf215546Sopenharmony_ci   unsigned stack_size;
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci   gpir_compiler *comp;
55bf215546Sopenharmony_ci   void *mem_ctx;
56bf215546Sopenharmony_ci};
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci/* Liveness analysis */
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_cistatic void propagate_liveness_node(gpir_node *node, BITSET_WORD *live,
61bf215546Sopenharmony_ci                                    gpir_compiler *comp)
62bf215546Sopenharmony_ci{
63bf215546Sopenharmony_ci   /* KILL */
64bf215546Sopenharmony_ci   if (node->type == gpir_node_type_store) {
65bf215546Sopenharmony_ci      if (node->op == gpir_op_store_reg) {
66bf215546Sopenharmony_ci         gpir_store_node *store = gpir_node_to_store(node);
67bf215546Sopenharmony_ci         BITSET_CLEAR(live, store->reg->index);
68bf215546Sopenharmony_ci      }
69bf215546Sopenharmony_ci   }
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_ci   /* GEN */
72bf215546Sopenharmony_ci   if (node->type == gpir_node_type_load) {
73bf215546Sopenharmony_ci      if (node->op == gpir_op_load_reg) {
74bf215546Sopenharmony_ci         gpir_load_node *load = gpir_node_to_load(node);
75bf215546Sopenharmony_ci         BITSET_SET(live, load->reg->index);
76bf215546Sopenharmony_ci      }
77bf215546Sopenharmony_ci   }
78bf215546Sopenharmony_ci}
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_cistatic bool propagate_liveness_block(gpir_block *block, struct regalloc_ctx *ctx)
81bf215546Sopenharmony_ci{
82bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; i++) {
83bf215546Sopenharmony_ci      if (block->successors[i]) {
84bf215546Sopenharmony_ci         for (unsigned j = 0; j < ctx->bitset_words; j++)
85bf215546Sopenharmony_ci            block->live_out[j] |= block->successors[i]->live_in[j];
86bf215546Sopenharmony_ci      }
87bf215546Sopenharmony_ci   }
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   memcpy(ctx->live, block->live_out, ctx->bitset_words * sizeof(BITSET_WORD));
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
92bf215546Sopenharmony_ci      propagate_liveness_node(node, ctx->live, block->comp);
93bf215546Sopenharmony_ci   }
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   bool changed = false;
96bf215546Sopenharmony_ci   for (unsigned i = 0; i < ctx->bitset_words; i++) {
97bf215546Sopenharmony_ci      changed |= (block->live_in[i] != ctx->live[i]);
98bf215546Sopenharmony_ci      block->live_in[i] = ctx->live[i];
99bf215546Sopenharmony_ci   }
100bf215546Sopenharmony_ci   return changed;
101bf215546Sopenharmony_ci}
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_cistatic void calc_def_block(gpir_block *block)
104bf215546Sopenharmony_ci{
105bf215546Sopenharmony_ci   list_for_each_entry(gpir_node, node, &block->node_list, list) {
106bf215546Sopenharmony_ci      if (node->op == gpir_op_store_reg) {
107bf215546Sopenharmony_ci         gpir_store_node *store = gpir_node_to_store(node);
108bf215546Sopenharmony_ci         BITSET_SET(block->def_out, store->reg->index);
109bf215546Sopenharmony_ci      }
110bf215546Sopenharmony_ci   }
111bf215546Sopenharmony_ci}
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_cistatic void calc_liveness(struct regalloc_ctx *ctx)
114bf215546Sopenharmony_ci{
115bf215546Sopenharmony_ci   bool changed = true;
116bf215546Sopenharmony_ci   while (changed) {
117bf215546Sopenharmony_ci      changed = false;
118bf215546Sopenharmony_ci      list_for_each_entry_rev(gpir_block, block, &ctx->comp->block_list, list) {
119bf215546Sopenharmony_ci         changed |= propagate_liveness_block(block, ctx);
120bf215546Sopenharmony_ci      }
121bf215546Sopenharmony_ci   }
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci   list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
124bf215546Sopenharmony_ci      calc_def_block(block);
125bf215546Sopenharmony_ci   }
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci   changed = true;
128bf215546Sopenharmony_ci   while (changed) {
129bf215546Sopenharmony_ci      changed = false;
130bf215546Sopenharmony_ci      list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
131bf215546Sopenharmony_ci         for (unsigned i = 0; i < 2; i++) {
132bf215546Sopenharmony_ci            gpir_block *succ = block->successors[i];
133bf215546Sopenharmony_ci            if (!succ)
134bf215546Sopenharmony_ci               continue;
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci            for (unsigned j = 0; j < ctx->bitset_words; j++) {
137bf215546Sopenharmony_ci               BITSET_WORD new = block->def_out[j] & ~succ->def_out[j];
138bf215546Sopenharmony_ci               changed |= (new != 0);
139bf215546Sopenharmony_ci               succ->def_out[j] |= block->def_out[j];
140bf215546Sopenharmony_ci            }
141bf215546Sopenharmony_ci         }
142bf215546Sopenharmony_ci      }
143bf215546Sopenharmony_ci   }
144bf215546Sopenharmony_ci}
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci/* Interference calculation */
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_cistatic void add_interference(struct regalloc_ctx *ctx, unsigned i, unsigned j)
149bf215546Sopenharmony_ci{
150bf215546Sopenharmony_ci   if (i == j)
151bf215546Sopenharmony_ci      return;
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci   struct reg_info *a = &ctx->registers[i];
154bf215546Sopenharmony_ci   struct reg_info *b = &ctx->registers[j];
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci   if (BITSET_TEST(a->conflicts, j))
157bf215546Sopenharmony_ci      return;
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci   BITSET_SET(a->conflicts, j);
160bf215546Sopenharmony_ci   BITSET_SET(b->conflicts, i);
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   a->num_conflicts++;
163bf215546Sopenharmony_ci   b->num_conflicts++;
164bf215546Sopenharmony_ci   util_dynarray_append(&a->conflict_list, unsigned, j);
165bf215546Sopenharmony_ci   util_dynarray_append(&b->conflict_list, unsigned, i);
166bf215546Sopenharmony_ci}
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci/* Make the register or node "i" interfere with all the other live registers
169bf215546Sopenharmony_ci * and nodes.
170bf215546Sopenharmony_ci */
171bf215546Sopenharmony_cistatic void add_all_interferences(struct regalloc_ctx *ctx,
172bf215546Sopenharmony_ci                                  unsigned i,
173bf215546Sopenharmony_ci                                  BITSET_WORD *live_regs)
174bf215546Sopenharmony_ci{
175bf215546Sopenharmony_ci   int live_reg;
176bf215546Sopenharmony_ci   BITSET_FOREACH_SET(live_reg, ctx->live, ctx->comp->cur_reg) {
177bf215546Sopenharmony_ci      add_interference(ctx, i, live_reg);
178bf215546Sopenharmony_ci   }
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci}
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_cistatic void print_liveness(struct regalloc_ctx *ctx,
183bf215546Sopenharmony_ci                           BITSET_WORD *live_reg)
184bf215546Sopenharmony_ci{
185bf215546Sopenharmony_ci   if (!(lima_debug & LIMA_DEBUG_GP))
186bf215546Sopenharmony_ci      return;
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_ci   int live_idx;
189bf215546Sopenharmony_ci   BITSET_FOREACH_SET(live_idx, live_reg, ctx->comp->cur_reg) {
190bf215546Sopenharmony_ci      printf("reg%d ", live_idx);
191bf215546Sopenharmony_ci   }
192bf215546Sopenharmony_ci   printf("\n");
193bf215546Sopenharmony_ci}
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_cistatic void calc_interference(struct regalloc_ctx *ctx)
196bf215546Sopenharmony_ci{
197bf215546Sopenharmony_ci   list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
198bf215546Sopenharmony_ci      /* Initialize liveness at the end of the block, but exclude values that
199bf215546Sopenharmony_ci       * definitely aren't defined by the end. This helps out with
200bf215546Sopenharmony_ci       * partially-defined registers, like:
201bf215546Sopenharmony_ci       *
202bf215546Sopenharmony_ci       * if (condition) {
203bf215546Sopenharmony_ci       *    foo = ...;
204bf215546Sopenharmony_ci       * }
205bf215546Sopenharmony_ci       * if (condition) {
206bf215546Sopenharmony_ci       *    ... = foo;
207bf215546Sopenharmony_ci       * }
208bf215546Sopenharmony_ci       *
209bf215546Sopenharmony_ci       * If we naively propagated liveness backwards, foo would be live from
210bf215546Sopenharmony_ci       * the beginning of the program, but if we're not inside a loop then
211bf215546Sopenharmony_ci       * its value is undefined before the first if and we don't have to
212bf215546Sopenharmony_ci       * consider it live. Mask out registers like foo here.
213bf215546Sopenharmony_ci       */
214bf215546Sopenharmony_ci      for (unsigned i = 0; i < ctx->bitset_words; i++) {
215bf215546Sopenharmony_ci         ctx->live[i] = block->live_out[i] & block->def_out[i];
216bf215546Sopenharmony_ci      }
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci      list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
219bf215546Sopenharmony_ci         gpir_debug("processing node %d\n", node->index);
220bf215546Sopenharmony_ci         print_liveness(ctx, ctx->live);
221bf215546Sopenharmony_ci         if (node->op == gpir_op_store_reg) {
222bf215546Sopenharmony_ci            gpir_store_node *store = gpir_node_to_store(node);
223bf215546Sopenharmony_ci            add_all_interferences(ctx, store->reg->index, ctx->live);
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci            /* KILL */
226bf215546Sopenharmony_ci            BITSET_CLEAR(ctx->live, store->reg->index);
227bf215546Sopenharmony_ci         } else if (node->op == gpir_op_load_reg) {
228bf215546Sopenharmony_ci            /* GEN */
229bf215546Sopenharmony_ci            gpir_load_node *load = gpir_node_to_load(node);
230bf215546Sopenharmony_ci            BITSET_SET(ctx->live, load->reg->index);
231bf215546Sopenharmony_ci         }
232bf215546Sopenharmony_ci      }
233bf215546Sopenharmony_ci   }
234bf215546Sopenharmony_ci}
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci/* Register allocation */
237bf215546Sopenharmony_ci
238bf215546Sopenharmony_cistatic bool can_simplify(struct regalloc_ctx *ctx, unsigned i)
239bf215546Sopenharmony_ci{
240bf215546Sopenharmony_ci   struct reg_info *info = &ctx->registers[i];
241bf215546Sopenharmony_ci   return info->num_conflicts < GPIR_PHYSICAL_REG_NUM;
242bf215546Sopenharmony_ci}
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_cistatic void push_stack(struct regalloc_ctx *ctx, unsigned i)
245bf215546Sopenharmony_ci{
246bf215546Sopenharmony_ci   ctx->stack[ctx->stack_size++] = i;
247bf215546Sopenharmony_ci   gpir_debug("pushing reg%u\n", i);
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci   struct reg_info *info = &ctx->registers[i];
250bf215546Sopenharmony_ci   assert(info->visited);
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   util_dynarray_foreach(&info->conflict_list, unsigned, conflict) {
253bf215546Sopenharmony_ci      struct reg_info *conflict_info = &ctx->registers[*conflict];
254bf215546Sopenharmony_ci      assert(conflict_info->num_conflicts > 0);
255bf215546Sopenharmony_ci      conflict_info->num_conflicts--;
256bf215546Sopenharmony_ci      if (!ctx->registers[*conflict].visited && can_simplify(ctx, *conflict)) {
257bf215546Sopenharmony_ci         ctx->worklist[ctx->worklist_end++] = *conflict;
258bf215546Sopenharmony_ci         ctx->registers[*conflict].visited = true;
259bf215546Sopenharmony_ci      }
260bf215546Sopenharmony_ci   }
261bf215546Sopenharmony_ci}
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_cistatic bool do_regalloc(struct regalloc_ctx *ctx)
264bf215546Sopenharmony_ci{
265bf215546Sopenharmony_ci   ctx->worklist_start = 0;
266bf215546Sopenharmony_ci   ctx->worklist_end = 0;
267bf215546Sopenharmony_ci   ctx->stack_size = 0;
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci   /* Step 1: find the initially simplifiable registers */
270bf215546Sopenharmony_ci   for (int i = 0; i < ctx->comp->cur_reg; i++) {
271bf215546Sopenharmony_ci      if (can_simplify(ctx, i)) {
272bf215546Sopenharmony_ci         ctx->worklist[ctx->worklist_end++] = i;
273bf215546Sopenharmony_ci         ctx->registers[i].visited = true;
274bf215546Sopenharmony_ci      }
275bf215546Sopenharmony_ci   }
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci   while (true) {
278bf215546Sopenharmony_ci      /* Step 2: push onto the stack whatever we can */
279bf215546Sopenharmony_ci      while (ctx->worklist_start != ctx->worklist_end) {
280bf215546Sopenharmony_ci         push_stack(ctx, ctx->worklist[ctx->worklist_start++]);
281bf215546Sopenharmony_ci      }
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci      if (ctx->stack_size < ctx->comp->cur_reg) {
284bf215546Sopenharmony_ci         /* If there are still unsimplifiable nodes left, we need to
285bf215546Sopenharmony_ci          * optimistically push a node onto the stack.  Choose the one with
286bf215546Sopenharmony_ci          * the smallest number of current neighbors, since that's the most
287bf215546Sopenharmony_ci          * likely to succeed.
288bf215546Sopenharmony_ci          */
289bf215546Sopenharmony_ci         unsigned min_conflicts = UINT_MAX;
290bf215546Sopenharmony_ci         unsigned best_reg = 0;
291bf215546Sopenharmony_ci         for (int reg = 0; reg < ctx->comp->cur_reg; reg++) {
292bf215546Sopenharmony_ci            struct reg_info *info = &ctx->registers[reg];
293bf215546Sopenharmony_ci            if (info->visited)
294bf215546Sopenharmony_ci               continue;
295bf215546Sopenharmony_ci            if (info->num_conflicts < min_conflicts) {
296bf215546Sopenharmony_ci               best_reg = reg;
297bf215546Sopenharmony_ci               min_conflicts = info->num_conflicts;
298bf215546Sopenharmony_ci            }
299bf215546Sopenharmony_ci         }
300bf215546Sopenharmony_ci         gpir_debug("optimistic triggered\n");
301bf215546Sopenharmony_ci         ctx->registers[best_reg].visited = true;
302bf215546Sopenharmony_ci         push_stack(ctx, best_reg);
303bf215546Sopenharmony_ci      } else {
304bf215546Sopenharmony_ci         break;
305bf215546Sopenharmony_ci      }
306bf215546Sopenharmony_ci   }
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ci   /* Step 4: pop off the stack and assign colors */
309bf215546Sopenharmony_ci   for (int i = ctx->comp->cur_reg - 1; i >= 0; i--) {
310bf215546Sopenharmony_ci      unsigned idx = ctx->stack[i];
311bf215546Sopenharmony_ci      struct reg_info *reg = &ctx->registers[idx];
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci      bool found = false;
314bf215546Sopenharmony_ci      unsigned start = i % GPIR_PHYSICAL_REG_NUM;
315bf215546Sopenharmony_ci      for (unsigned j = 0; j < GPIR_PHYSICAL_REG_NUM; j++) {
316bf215546Sopenharmony_ci         unsigned candidate = (j + start) % GPIR_PHYSICAL_REG_NUM;
317bf215546Sopenharmony_ci         bool available = true;
318bf215546Sopenharmony_ci         util_dynarray_foreach(&reg->conflict_list, unsigned, conflict_idx) {
319bf215546Sopenharmony_ci            struct reg_info *conflict = &ctx->registers[*conflict_idx];
320bf215546Sopenharmony_ci            if (conflict->assigned_color >= 0 &&
321bf215546Sopenharmony_ci                conflict->assigned_color == (int) candidate) {
322bf215546Sopenharmony_ci               available = false;
323bf215546Sopenharmony_ci               break;
324bf215546Sopenharmony_ci            }
325bf215546Sopenharmony_ci         }
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci         if (available) {
328bf215546Sopenharmony_ci            reg->assigned_color = candidate;
329bf215546Sopenharmony_ci            found = true;
330bf215546Sopenharmony_ci            break;
331bf215546Sopenharmony_ci         }
332bf215546Sopenharmony_ci      }
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci      /* TODO: spilling */
335bf215546Sopenharmony_ci      if (!found) {
336bf215546Sopenharmony_ci         gpir_error("Failed to allocate registers\n");
337bf215546Sopenharmony_ci         return false;
338bf215546Sopenharmony_ci      }
339bf215546Sopenharmony_ci   }
340bf215546Sopenharmony_ci
341bf215546Sopenharmony_ci   return true;
342bf215546Sopenharmony_ci}
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_cistatic void assign_regs(struct regalloc_ctx *ctx)
345bf215546Sopenharmony_ci{
346bf215546Sopenharmony_ci   list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
347bf215546Sopenharmony_ci      list_for_each_entry(gpir_node, node, &block->node_list, list) {
348bf215546Sopenharmony_ci         if (node->op == gpir_op_load_reg) {
349bf215546Sopenharmony_ci            gpir_load_node *load = gpir_node_to_load(node);
350bf215546Sopenharmony_ci            unsigned color = ctx->registers[load->reg->index].assigned_color;
351bf215546Sopenharmony_ci            load->index = color / 4;
352bf215546Sopenharmony_ci            load->component = color % 4;
353bf215546Sopenharmony_ci         }
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci         if (node->op == gpir_op_store_reg) {
356bf215546Sopenharmony_ci            gpir_store_node *store = gpir_node_to_store(node);
357bf215546Sopenharmony_ci            unsigned color = ctx->registers[store->reg->index].assigned_color;
358bf215546Sopenharmony_ci            store->index = color / 4;
359bf215546Sopenharmony_ci            store->component = color % 4;
360bf215546Sopenharmony_ci            node->value_reg = color;
361bf215546Sopenharmony_ci         }
362bf215546Sopenharmony_ci      }
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci      block->live_out_phys = 0;
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci      int reg_idx;
367bf215546Sopenharmony_ci      BITSET_FOREACH_SET(reg_idx, block->live_out, ctx->comp->cur_reg) {
368bf215546Sopenharmony_ci         if (BITSET_TEST(block->def_out, reg_idx)) {
369bf215546Sopenharmony_ci            block->live_out_phys |= (1ull << ctx->registers[reg_idx].assigned_color);
370bf215546Sopenharmony_ci         }
371bf215546Sopenharmony_ci      }
372bf215546Sopenharmony_ci   }
373bf215546Sopenharmony_ci}
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci/* Value register allocation */
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci/* Define a special token for when the register is occupied by a preallocated
378bf215546Sopenharmony_ci * physical register (i.e. load_reg/store_reg). Normally entries in the "live"
379bf215546Sopenharmony_ci * array points to the definition of the value, but there may be multiple
380bf215546Sopenharmony_ci * definitions in this case, and they will certainly come from other basic
381bf215546Sopenharmony_ci * blocks, so it doesn't make sense to do that here.
382bf215546Sopenharmony_ci */
383bf215546Sopenharmony_cistatic gpir_node __physreg_live;
384bf215546Sopenharmony_ci#define PHYSREG_LIVE (&__physreg_live)
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_cistruct value_regalloc_ctx {
387bf215546Sopenharmony_ci   gpir_node *last_written[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM];
388bf215546Sopenharmony_ci   gpir_node *complex1_last_written[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM];
389bf215546Sopenharmony_ci   gpir_node *live[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM];
390bf215546Sopenharmony_ci   gpir_node *last_complex1;
391bf215546Sopenharmony_ci   unsigned alloc_start;
392bf215546Sopenharmony_ci};
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_cistatic unsigned find_free_value_reg(struct value_regalloc_ctx *ctx)
395bf215546Sopenharmony_ci{
396bf215546Sopenharmony_ci   /* Implement round-robin allocation */
397bf215546Sopenharmony_ci   unsigned reg_offset = ctx->alloc_start++;
398bf215546Sopenharmony_ci   if (ctx->alloc_start == GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM)
399bf215546Sopenharmony_ci      ctx->alloc_start = 0;
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci   unsigned reg = UINT_MAX;
402bf215546Sopenharmony_ci   for (unsigned reg_base = 0;
403bf215546Sopenharmony_ci        reg_base < GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM;
404bf215546Sopenharmony_ci        reg_base++) {
405bf215546Sopenharmony_ci      unsigned cur_reg = (reg_base + reg_offset) % (GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM);
406bf215546Sopenharmony_ci      if (!ctx->live[cur_reg]) {
407bf215546Sopenharmony_ci         reg = cur_reg;
408bf215546Sopenharmony_ci         break;
409bf215546Sopenharmony_ci      }
410bf215546Sopenharmony_ci   }
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci   return reg;
413bf215546Sopenharmony_ci}
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_cistatic void add_fake_dep(gpir_node *node, gpir_node *src,
416bf215546Sopenharmony_ci                         struct value_regalloc_ctx *ctx)
417bf215546Sopenharmony_ci{
418bf215546Sopenharmony_ci   assert(src->value_reg >= 0);
419bf215546Sopenharmony_ci   if (ctx->last_written[src->value_reg] &&
420bf215546Sopenharmony_ci       ctx->last_written[src->value_reg] != node) {
421bf215546Sopenharmony_ci      gpir_node_add_dep(ctx->last_written[src->value_reg], node,
422bf215546Sopenharmony_ci                        GPIR_DEP_WRITE_AFTER_READ);
423bf215546Sopenharmony_ci   }
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   /* For a sequence of schedule_first nodes right before a complex1
426bf215546Sopenharmony_ci    * node, add any extra fake dependencies necessary so that the
427bf215546Sopenharmony_ci    * schedule_first nodes can be scheduled right after the complex1 is
428bf215546Sopenharmony_ci    * scheduled. We have to save the last_written before complex1 happens to
429bf215546Sopenharmony_ci    * avoid adding dependencies to children of the complex1 node which would
430bf215546Sopenharmony_ci    * create a cycle.
431bf215546Sopenharmony_ci    */
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci   if (gpir_op_infos[node->op].schedule_first &&
434bf215546Sopenharmony_ci       ctx->last_complex1 &&
435bf215546Sopenharmony_ci       ctx->complex1_last_written[src->value_reg]) {
436bf215546Sopenharmony_ci      gpir_node_add_dep(ctx->complex1_last_written[src->value_reg],
437bf215546Sopenharmony_ci                        ctx->last_complex1,
438bf215546Sopenharmony_ci                        GPIR_DEP_WRITE_AFTER_READ);
439bf215546Sopenharmony_ci   }
440bf215546Sopenharmony_ci}
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_cistatic bool handle_value_read(gpir_node *node, gpir_node *src,
443bf215546Sopenharmony_ci                              struct value_regalloc_ctx *ctx)
444bf215546Sopenharmony_ci{
445bf215546Sopenharmony_ci   /* If already allocated, don't allocate it */
446bf215546Sopenharmony_ci   if (src->value_reg < 0) {
447bf215546Sopenharmony_ci      unsigned reg = find_free_value_reg(ctx);
448bf215546Sopenharmony_ci      if (reg == UINT_MAX)
449bf215546Sopenharmony_ci         return false;
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci      src->value_reg = reg;
452bf215546Sopenharmony_ci      ctx->live[reg] = src;
453bf215546Sopenharmony_ci   }
454bf215546Sopenharmony_ci
455bf215546Sopenharmony_ci   /* Add any fake dependencies. Note: this is the actual result of value
456bf215546Sopenharmony_ci    * register allocation. We throw away node->value_reg afterwards, since
457bf215546Sopenharmony_ci    * it's really the fake dependencies which constrain the post-RA scheduler
458bf215546Sopenharmony_ci    * enough to make sure it never needs to spill to temporaries.
459bf215546Sopenharmony_ci    */
460bf215546Sopenharmony_ci   add_fake_dep(node, src, ctx);
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   return true;
463bf215546Sopenharmony_ci}
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_cistatic bool handle_reg_read(gpir_load_node *load,
466bf215546Sopenharmony_ci                            struct value_regalloc_ctx *ctx)
467bf215546Sopenharmony_ci{
468bf215546Sopenharmony_ci   unsigned idx = load->index * 4 + load->component;
469bf215546Sopenharmony_ci   if (!ctx->live[idx]) {
470bf215546Sopenharmony_ci      ctx->live[idx] = PHYSREG_LIVE;
471bf215546Sopenharmony_ci   } else if (ctx->live[idx] != PHYSREG_LIVE) {
472bf215546Sopenharmony_ci      /* This slot is occupied by some other value register, so we need to
473bf215546Sopenharmony_ci       * evict it. This effectively splits the live range of the value
474bf215546Sopenharmony_ci       * register. NB: since we create fake dependencies on the fly, and the
475bf215546Sopenharmony_ci       * fake dependencies are the only output of this pass, we don't actually
476bf215546Sopenharmony_ci       * have to record where the split happened or that this value was
477bf215546Sopenharmony_ci       * assigned to two different registers. Any actual live range splitting
478bf215546Sopenharmony_ci       * happens in the post-RA scheduler, which moves the value to and from
479bf215546Sopenharmony_ci       * the register file. This will just cause some reads of the value
480bf215546Sopenharmony_ci       * register to have different fake dependencies.
481bf215546Sopenharmony_ci       */
482bf215546Sopenharmony_ci      unsigned new_reg = find_free_value_reg(ctx);
483bf215546Sopenharmony_ci      if (new_reg == UINT_MAX)
484bf215546Sopenharmony_ci         return false;
485bf215546Sopenharmony_ci      ctx->live[new_reg] = ctx->live[idx];
486bf215546Sopenharmony_ci      ctx->live[new_reg]->value_reg = new_reg;
487bf215546Sopenharmony_ci      ctx->live[idx] = PHYSREG_LIVE;
488bf215546Sopenharmony_ci   }
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci   if (ctx->last_written[idx]) {
491bf215546Sopenharmony_ci      gpir_node_add_dep(ctx->last_written[idx], &load->node,
492bf215546Sopenharmony_ci                        GPIR_DEP_WRITE_AFTER_READ);
493bf215546Sopenharmony_ci   }
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci   return true;
496bf215546Sopenharmony_ci}
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_cistatic void handle_reg_write(gpir_store_node *store,
499bf215546Sopenharmony_ci                             struct value_regalloc_ctx *ctx)
500bf215546Sopenharmony_ci{
501bf215546Sopenharmony_ci   unsigned idx = store->index * 4 + store->component;
502bf215546Sopenharmony_ci   store->node.value_reg = idx;
503bf215546Sopenharmony_ci   ctx->last_written[idx] = &store->node;
504bf215546Sopenharmony_ci   ctx->live[idx] = NULL;
505bf215546Sopenharmony_ci}
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_cistatic void handle_value_write(gpir_node *node,
508bf215546Sopenharmony_ci                               struct value_regalloc_ctx *ctx)
509bf215546Sopenharmony_ci{
510bf215546Sopenharmony_ci   /* TODO: why does an uninitialized node->value_reg
511bf215546Sopenharmony_ci    * sometimes end up here? */
512bf215546Sopenharmony_ci   if (node->value_reg < 0)
513bf215546Sopenharmony_ci      return;
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci   ctx->last_written[node->value_reg] = node;
516bf215546Sopenharmony_ci   ctx->live[node->value_reg] = NULL;
517bf215546Sopenharmony_ci}
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_cistatic bool regalloc_value_regs(gpir_block *block)
520bf215546Sopenharmony_ci{
521bf215546Sopenharmony_ci   struct value_regalloc_ctx ctx = { { 0 } };
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci   list_for_each_entry(gpir_node, node, &block->node_list, list) {
524bf215546Sopenharmony_ci      node->value_reg = -1;
525bf215546Sopenharmony_ci   }
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci   list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
528bf215546Sopenharmony_ci      if (node->op == gpir_op_complex1) {
529bf215546Sopenharmony_ci         ctx.last_complex1 = node;
530bf215546Sopenharmony_ci         memcpy(ctx.complex1_last_written, ctx.last_written,
531bf215546Sopenharmony_ci                sizeof(ctx.complex1_last_written));
532bf215546Sopenharmony_ci      }
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci      if (node->type != gpir_node_type_store &&
535bf215546Sopenharmony_ci          node->type != gpir_node_type_branch) {
536bf215546Sopenharmony_ci         handle_value_write(node, &ctx);
537bf215546Sopenharmony_ci      } else if (node->op == gpir_op_store_reg) {
538bf215546Sopenharmony_ci         handle_reg_write(gpir_node_to_store(node), &ctx);
539bf215546Sopenharmony_ci      }
540bf215546Sopenharmony_ci
541bf215546Sopenharmony_ci      if (node->type == gpir_node_type_store) {
542bf215546Sopenharmony_ci         gpir_store_node *store = gpir_node_to_store(node);
543bf215546Sopenharmony_ci         if (!handle_value_read(&store->node, store->child, &ctx))
544bf215546Sopenharmony_ci            return false;
545bf215546Sopenharmony_ci      } else if (node->type == gpir_node_type_alu) {
546bf215546Sopenharmony_ci         gpir_alu_node *alu = gpir_node_to_alu(node);
547bf215546Sopenharmony_ci         for (int i = 0; i < alu->num_child; i++) {
548bf215546Sopenharmony_ci            if (!handle_value_read(&alu->node, alu->children[i], &ctx))
549bf215546Sopenharmony_ci               return false;
550bf215546Sopenharmony_ci         }
551bf215546Sopenharmony_ci      } else if (node->type == gpir_node_type_branch) {
552bf215546Sopenharmony_ci         /* At the end of a block the top 11 values are always free, so
553bf215546Sopenharmony_ci          * branches should always succeed.
554bf215546Sopenharmony_ci          */
555bf215546Sopenharmony_ci         gpir_branch_node *branch = gpir_node_to_branch(node);
556bf215546Sopenharmony_ci         ASSERTED bool result = handle_value_read(&branch->node,
557bf215546Sopenharmony_ci                                                  branch->cond, &ctx);
558bf215546Sopenharmony_ci         assert(result);
559bf215546Sopenharmony_ci      } else if (node->op == gpir_op_load_reg) {
560bf215546Sopenharmony_ci         gpir_load_node *load = gpir_node_to_load(node);
561bf215546Sopenharmony_ci         if (!handle_reg_read(load, &ctx))
562bf215546Sopenharmony_ci             return false;
563bf215546Sopenharmony_ci      }
564bf215546Sopenharmony_ci   }
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci   return true;
567bf215546Sopenharmony_ci}
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_cistatic void regalloc_print_result(gpir_compiler *comp)
570bf215546Sopenharmony_ci{
571bf215546Sopenharmony_ci   if (!(lima_debug & LIMA_DEBUG_GP))
572bf215546Sopenharmony_ci      return;
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_ci   int index = 0;
575bf215546Sopenharmony_ci   printf("======== regalloc ========\n");
576bf215546Sopenharmony_ci   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
577bf215546Sopenharmony_ci      list_for_each_entry(gpir_node, node, &block->node_list, list) {
578bf215546Sopenharmony_ci         printf("%03d: %d/%d %s ", index++, node->index, node->value_reg,
579bf215546Sopenharmony_ci                gpir_op_infos[node->op].name);
580bf215546Sopenharmony_ci         gpir_node_foreach_pred(node, dep) {
581bf215546Sopenharmony_ci            gpir_node *pred = dep->pred;
582bf215546Sopenharmony_ci            printf(" %d/%d", pred->index, pred->value_reg);
583bf215546Sopenharmony_ci         }
584bf215546Sopenharmony_ci         if (node->op == gpir_op_load_reg) {
585bf215546Sopenharmony_ci            gpir_load_node *load = gpir_node_to_load(node);
586bf215546Sopenharmony_ci            printf(" -/%d", 4 * load->index + load->component);
587bf215546Sopenharmony_ci            printf(" (%d)", load->reg->index);
588bf215546Sopenharmony_ci         } else if (node->op == gpir_op_store_reg) {
589bf215546Sopenharmony_ci            gpir_store_node *store = gpir_node_to_store(node);
590bf215546Sopenharmony_ci            printf(" (%d)", store->reg->index);
591bf215546Sopenharmony_ci         }
592bf215546Sopenharmony_ci         printf("\n");
593bf215546Sopenharmony_ci      }
594bf215546Sopenharmony_ci      printf("----------------------------\n");
595bf215546Sopenharmony_ci   }
596bf215546Sopenharmony_ci}
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_cibool gpir_regalloc_prog(gpir_compiler *comp)
599bf215546Sopenharmony_ci{
600bf215546Sopenharmony_ci   struct regalloc_ctx ctx;
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_ci   ctx.mem_ctx = ralloc_context(NULL);
603bf215546Sopenharmony_ci   ctx.bitset_words = BITSET_WORDS(comp->cur_reg);
604bf215546Sopenharmony_ci   ctx.live = ralloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
605bf215546Sopenharmony_ci   ctx.worklist = ralloc_array(ctx.mem_ctx, unsigned, comp->cur_reg);
606bf215546Sopenharmony_ci   ctx.stack = ralloc_array(ctx.mem_ctx, unsigned, comp->cur_reg);
607bf215546Sopenharmony_ci   ctx.comp = comp;
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci   ctx.registers = rzalloc_array(ctx.mem_ctx, struct reg_info, comp->cur_reg);
610bf215546Sopenharmony_ci   for (int i = 0; i < comp->cur_reg; i++) {
611bf215546Sopenharmony_ci      ctx.registers[i].conflicts = rzalloc_array(ctx.mem_ctx, BITSET_WORD,
612bf215546Sopenharmony_ci                                                 ctx.bitset_words);
613bf215546Sopenharmony_ci      util_dynarray_init(&ctx.registers[i].conflict_list, ctx.mem_ctx);
614bf215546Sopenharmony_ci   }
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
617bf215546Sopenharmony_ci      block->live_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
618bf215546Sopenharmony_ci      block->live_in = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
619bf215546Sopenharmony_ci      block->def_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
620bf215546Sopenharmony_ci   }
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci   calc_liveness(&ctx);
623bf215546Sopenharmony_ci   calc_interference(&ctx);
624bf215546Sopenharmony_ci   if (!do_regalloc(&ctx)) {
625bf215546Sopenharmony_ci      ralloc_free(ctx.mem_ctx);
626bf215546Sopenharmony_ci      return false;
627bf215546Sopenharmony_ci   }
628bf215546Sopenharmony_ci   assign_regs(&ctx);
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
631bf215546Sopenharmony_ci      if (!regalloc_value_regs(block))
632bf215546Sopenharmony_ci         return false;
633bf215546Sopenharmony_ci   }
634bf215546Sopenharmony_ci
635bf215546Sopenharmony_ci   regalloc_print_result(comp);
636bf215546Sopenharmony_ci   ralloc_free(ctx.mem_ctx);
637bf215546Sopenharmony_ci   return true;
638bf215546Sopenharmony_ci}
639