1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (c) 2017 Lima Project
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
12bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
13bf215546Sopenharmony_ci * of the Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "util/ralloc.h"
26bf215546Sopenharmony_ci#include "util/register_allocate.h"
27bf215546Sopenharmony_ci#include "util/u_debug.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "ppir.h"
30bf215546Sopenharmony_ci#include "lima_context.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#define PPIR_REG_COUNT  (6 * 4)
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_cienum ppir_ra_reg_class {
35bf215546Sopenharmony_ci   ppir_ra_reg_class_vec1,
36bf215546Sopenharmony_ci   ppir_ra_reg_class_vec2,
37bf215546Sopenharmony_ci   ppir_ra_reg_class_vec3,
38bf215546Sopenharmony_ci   ppir_ra_reg_class_vec4,
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci   /* 4 reg class for load/store instr regs:
41bf215546Sopenharmony_ci    * load/store instr has no swizzle field, so the (virtual) register
42bf215546Sopenharmony_ci    * must be allocated at the beginning of a (physical) register,
43bf215546Sopenharmony_ci    */
44bf215546Sopenharmony_ci   ppir_ra_reg_class_head_vec1,
45bf215546Sopenharmony_ci   ppir_ra_reg_class_head_vec2,
46bf215546Sopenharmony_ci   ppir_ra_reg_class_head_vec3,
47bf215546Sopenharmony_ci   ppir_ra_reg_class_head_vec4,
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci   ppir_ra_reg_class_num,
50bf215546Sopenharmony_ci};
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_cistruct ra_regs *ppir_regalloc_init(void *mem_ctx)
53bf215546Sopenharmony_ci{
54bf215546Sopenharmony_ci   struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false);
55bf215546Sopenharmony_ci   if (!ret)
56bf215546Sopenharmony_ci      return NULL;
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci   /* Classes for contiguous 1-4 channel groups anywhere within a register. */
59bf215546Sopenharmony_ci   struct ra_class *classes[ppir_ra_reg_class_num];
60bf215546Sopenharmony_ci   for (int i = 0; i < ppir_ra_reg_class_head_vec1; i++) {
61bf215546Sopenharmony_ci      classes[i] = ra_alloc_contig_reg_class(ret, i + 1);
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci      for (int j = 0; j < PPIR_REG_COUNT; j += 4) {
64bf215546Sopenharmony_ci         for (int swiz = 0; swiz < (4 - i); swiz++)
65bf215546Sopenharmony_ci            ra_class_add_reg(classes[i], j + swiz);
66bf215546Sopenharmony_ci      }
67bf215546Sopenharmony_ci   }
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci   /* Classes for contiguous 1-4 channels with a start channel of .x */
70bf215546Sopenharmony_ci   for (int i = ppir_ra_reg_class_head_vec1; i < ppir_ra_reg_class_num; i++) {
71bf215546Sopenharmony_ci      classes[i] = ra_alloc_contig_reg_class(ret, i - ppir_ra_reg_class_head_vec1 + 1);
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci      for (int j = 0; j < PPIR_REG_COUNT; j += 4)
74bf215546Sopenharmony_ci         ra_class_add_reg(classes[i], j);
75bf215546Sopenharmony_ci   }
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   ra_set_finalize(ret, NULL);
78bf215546Sopenharmony_ci   return ret;
79bf215546Sopenharmony_ci}
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_cistatic void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
82bf215546Sopenharmony_ci{
83bf215546Sopenharmony_ci   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
84bf215546Sopenharmony_ci      list_for_each_entry(ppir_node, node, &block->node_list, list) {
85bf215546Sopenharmony_ci         if (!node->instr || node->op == ppir_op_const)
86bf215546Sopenharmony_ci            continue;
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci         ppir_dest *dest = ppir_node_get_dest(node);
89bf215546Sopenharmony_ci         if (dest) {
90bf215546Sopenharmony_ci            ppir_reg *reg = NULL;
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci            if (dest->type == ppir_target_ssa) {
93bf215546Sopenharmony_ci               reg = &dest->ssa;
94bf215546Sopenharmony_ci               if (node->is_out)
95bf215546Sopenharmony_ci                  reg->out_reg = true;
96bf215546Sopenharmony_ci               list_addtail(&reg->list, &comp->reg_list);
97bf215546Sopenharmony_ci               comp->reg_num++;
98bf215546Sopenharmony_ci            }
99bf215546Sopenharmony_ci         }
100bf215546Sopenharmony_ci      }
101bf215546Sopenharmony_ci   }
102bf215546Sopenharmony_ci}
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_cistatic void ppir_regalloc_print_result(ppir_compiler *comp)
105bf215546Sopenharmony_ci{
106bf215546Sopenharmony_ci   printf("======ppir regalloc result======\n");
107bf215546Sopenharmony_ci   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
108bf215546Sopenharmony_ci      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
109bf215546Sopenharmony_ci         printf("%03d:", instr->index);
110bf215546Sopenharmony_ci         for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
111bf215546Sopenharmony_ci            ppir_node *node = instr->slots[i];
112bf215546Sopenharmony_ci            if (!node)
113bf215546Sopenharmony_ci               continue;
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci            printf(" (%d|", node->index);
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci            ppir_dest *dest = ppir_node_get_dest(node);
118bf215546Sopenharmony_ci            if (dest)
119bf215546Sopenharmony_ci               printf("%d", ppir_target_get_dest_reg_index(dest));
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci            printf("|");
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci            for (int i = 0; i < ppir_node_get_src_num(node); i++) {
124bf215546Sopenharmony_ci               if (i)
125bf215546Sopenharmony_ci                  printf(" ");
126bf215546Sopenharmony_ci               printf("%d", ppir_target_get_src_reg_index(ppir_node_get_src(node, i)));
127bf215546Sopenharmony_ci            }
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci            printf(")");
130bf215546Sopenharmony_ci         }
131bf215546Sopenharmony_ci         printf("\n");
132bf215546Sopenharmony_ci      }
133bf215546Sopenharmony_ci   }
134bf215546Sopenharmony_ci   printf("--------------------------\n");
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci   printf("======ppir output regs======\n");
137bf215546Sopenharmony_ci   for (int i = 0; i < ppir_output_num; i++) {
138bf215546Sopenharmony_ci      if (comp->out_type_to_reg[i] != -1)
139bf215546Sopenharmony_ci         printf("%s: $%d\n", ppir_output_type_to_str(i),
140bf215546Sopenharmony_ci                (int)comp->out_type_to_reg[i]);
141bf215546Sopenharmony_ci   }
142bf215546Sopenharmony_ci   printf("--------------------------\n");
143bf215546Sopenharmony_ci}
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_cistatic bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
146bf215546Sopenharmony_ci                                   ppir_node *node)
147bf215546Sopenharmony_ci{
148bf215546Sopenharmony_ci   ppir_instr *newinstr = ppir_instr_create(block);
149bf215546Sopenharmony_ci   if (unlikely(!newinstr))
150bf215546Sopenharmony_ci      return false;
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   list_del(&newinstr->list);
153bf215546Sopenharmony_ci   list_add(&newinstr->list, &ref->list);
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   if (!ppir_instr_insert_node(newinstr, node))
156bf215546Sopenharmony_ci      return false;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci   list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
159bf215546Sopenharmony_ci      instr->seq++;
160bf215546Sopenharmony_ci   }
161bf215546Sopenharmony_ci   newinstr->seq = ref->seq+1;
162bf215546Sopenharmony_ci   newinstr->scheduled = true;
163bf215546Sopenharmony_ci   return true;
164bf215546Sopenharmony_ci}
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_cistatic bool create_new_instr_before(ppir_block *block, ppir_instr *ref,
167bf215546Sopenharmony_ci                                    ppir_node *node)
168bf215546Sopenharmony_ci{
169bf215546Sopenharmony_ci   ppir_instr *newinstr = ppir_instr_create(block);
170bf215546Sopenharmony_ci   if (unlikely(!newinstr))
171bf215546Sopenharmony_ci      return false;
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci   list_del(&newinstr->list);
174bf215546Sopenharmony_ci   list_addtail(&newinstr->list, &ref->list);
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci   if (!ppir_instr_insert_node(newinstr, node))
177bf215546Sopenharmony_ci      return false;
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci   list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
180bf215546Sopenharmony_ci      instr->seq++;
181bf215546Sopenharmony_ci   }
182bf215546Sopenharmony_ci   newinstr->seq = ref->seq-1;
183bf215546Sopenharmony_ci   newinstr->scheduled = true;
184bf215546Sopenharmony_ci   return true;
185bf215546Sopenharmony_ci}
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_cistatic bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block,
188bf215546Sopenharmony_ci                                    ppir_node *node, ppir_src *src,
189bf215546Sopenharmony_ci                                    ppir_node **fill_node)
190bf215546Sopenharmony_ci{
191bf215546Sopenharmony_ci   /* nodes might have multiple references to the same value.
192bf215546Sopenharmony_ci    * avoid creating unnecessary loads for the same fill by
193bf215546Sopenharmony_ci    * saving the node resulting from the temporary load */
194bf215546Sopenharmony_ci   if (*fill_node)
195bf215546Sopenharmony_ci      goto update_src;
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   int num_components = src->reg->num_components;
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci   /* alloc new node to load value */
200bf215546Sopenharmony_ci   ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
201bf215546Sopenharmony_ci   if (!load_node)
202bf215546Sopenharmony_ci      return false;
203bf215546Sopenharmony_ci   list_addtail(&load_node->list, &node->list);
204bf215546Sopenharmony_ci   comp->num_fills++;
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   ppir_load_node *load = ppir_node_to_load(load_node);
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci   load->index = -comp->prog->state.stack_size; /* index sizes are negative */
209bf215546Sopenharmony_ci   load->num_components = num_components;
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci   ppir_dest *ld_dest = &load->dest;
212bf215546Sopenharmony_ci   ld_dest->type = ppir_target_pipeline;
213bf215546Sopenharmony_ci   ld_dest->pipeline = ppir_pipeline_reg_uniform;
214bf215546Sopenharmony_ci   ld_dest->write_mask = u_bit_consecutive(0, num_components);
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci   /* If the uniform slot is empty, we can insert the load_temp
217bf215546Sopenharmony_ci    * there and use it directly. Exceptionally, if the node is in the
218bf215546Sopenharmony_ci    * varying or texld slot, this doesn't work. */
219bf215546Sopenharmony_ci   if (!node->instr->slots[PPIR_INSTR_SLOT_UNIFORM] &&
220bf215546Sopenharmony_ci        node->instr_pos != PPIR_INSTR_SLOT_VARYING &&
221bf215546Sopenharmony_ci        node->instr_pos != PPIR_INSTR_SLOT_TEXLD) {
222bf215546Sopenharmony_ci      ppir_node_target_assign(src, load_node);
223bf215546Sopenharmony_ci      *fill_node = load_node;
224bf215546Sopenharmony_ci      return ppir_instr_insert_node(node->instr, load_node);
225bf215546Sopenharmony_ci   }
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   /* Uniform slot was taken, so fall back to a new instruction with a mov */
228bf215546Sopenharmony_ci   if (!create_new_instr_before(block, node->instr, load_node))
229bf215546Sopenharmony_ci      return false;
230bf215546Sopenharmony_ci
231bf215546Sopenharmony_ci   /* Create move node */
232bf215546Sopenharmony_ci   ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
233bf215546Sopenharmony_ci   if (unlikely(!move_node))
234bf215546Sopenharmony_ci      return false;
235bf215546Sopenharmony_ci   list_addtail(&move_node->list, &node->list);
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci   ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci   move_alu->num_src = 1;
240bf215546Sopenharmony_ci   move_alu->src->type = ppir_target_pipeline;
241bf215546Sopenharmony_ci   move_alu->src->pipeline = ppir_pipeline_reg_uniform;
242bf215546Sopenharmony_ci   for (int i = 0; i < 4; i++)
243bf215546Sopenharmony_ci      move_alu->src->swizzle[i] = i;
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci   ppir_dest *alu_dest = &move_alu->dest;
246bf215546Sopenharmony_ci   alu_dest->type = ppir_target_ssa;
247bf215546Sopenharmony_ci   alu_dest->ssa.num_components = num_components;
248bf215546Sopenharmony_ci   alu_dest->ssa.spilled = true;
249bf215546Sopenharmony_ci   alu_dest->write_mask = u_bit_consecutive(0, num_components);
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci   list_addtail(&alu_dest->ssa.list, &comp->reg_list);
252bf215546Sopenharmony_ci   comp->reg_num++;
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   if (!ppir_instr_insert_node(load_node->instr, move_node))
255bf215546Sopenharmony_ci      return false;
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_ci   /* insert the new node as predecessor */
258bf215546Sopenharmony_ci   ppir_node_foreach_pred_safe(node, dep) {
259bf215546Sopenharmony_ci      ppir_node *pred = dep->pred;
260bf215546Sopenharmony_ci      ppir_node_remove_dep(dep);
261bf215546Sopenharmony_ci      ppir_node_add_dep(load_node, pred, ppir_dep_src);
262bf215546Sopenharmony_ci   }
263bf215546Sopenharmony_ci   ppir_node_add_dep(node, move_node, ppir_dep_src);
264bf215546Sopenharmony_ci   ppir_node_add_dep(move_node, load_node, ppir_dep_src);
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   *fill_node = move_node;
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ciupdate_src:
269bf215546Sopenharmony_ci   /* switch node src to use the fill node dest */
270bf215546Sopenharmony_ci   ppir_node_target_assign(src, *fill_node);
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci   return true;
273bf215546Sopenharmony_ci}
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_cistatic bool ppir_update_spilled_dest_load(ppir_compiler *comp, ppir_block *block,
276bf215546Sopenharmony_ci                                          ppir_node *node)
277bf215546Sopenharmony_ci{
278bf215546Sopenharmony_ci   ppir_dest *dest = ppir_node_get_dest(node);
279bf215546Sopenharmony_ci   assert(dest != NULL);
280bf215546Sopenharmony_ci   assert(dest->type == ppir_target_register);
281bf215546Sopenharmony_ci   ppir_reg *reg = dest->reg;
282bf215546Sopenharmony_ci   int num_components = reg->num_components;
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci   /* alloc new node to load value */
285bf215546Sopenharmony_ci   ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
286bf215546Sopenharmony_ci   if (!load_node)
287bf215546Sopenharmony_ci      return NULL;
288bf215546Sopenharmony_ci   list_addtail(&load_node->list, &node->list);
289bf215546Sopenharmony_ci   comp->num_fills++;
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci   ppir_load_node *load = ppir_node_to_load(load_node);
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   load->index = -comp->prog->state.stack_size; /* index sizes are negative */
294bf215546Sopenharmony_ci   load->num_components = num_components;
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci   load->dest.type = ppir_target_pipeline;
297bf215546Sopenharmony_ci   load->dest.pipeline = ppir_pipeline_reg_uniform;
298bf215546Sopenharmony_ci   load->dest.write_mask = u_bit_consecutive(0, num_components);
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   /* New instruction is needed since we're updating a dest register
301bf215546Sopenharmony_ci    * and we can't write to the uniform pipeline reg */
302bf215546Sopenharmony_ci   if (!create_new_instr_before(block, node->instr, load_node))
303bf215546Sopenharmony_ci      return false;
304bf215546Sopenharmony_ci
305bf215546Sopenharmony_ci   /* Create move node */
306bf215546Sopenharmony_ci   ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
307bf215546Sopenharmony_ci   if (unlikely(!move_node))
308bf215546Sopenharmony_ci      return false;
309bf215546Sopenharmony_ci   list_addtail(&move_node->list, &node->list);
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci   move_alu->num_src = 1;
314bf215546Sopenharmony_ci   move_alu->src->type = ppir_target_pipeline;
315bf215546Sopenharmony_ci   move_alu->src->pipeline = ppir_pipeline_reg_uniform;
316bf215546Sopenharmony_ci   for (int i = 0; i < 4; i++)
317bf215546Sopenharmony_ci      move_alu->src->swizzle[i] = i;
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci   move_alu->dest.type = ppir_target_register;
320bf215546Sopenharmony_ci   move_alu->dest.reg = reg;
321bf215546Sopenharmony_ci   move_alu->dest.write_mask = u_bit_consecutive(0, num_components);
322bf215546Sopenharmony_ci
323bf215546Sopenharmony_ci   if (!ppir_instr_insert_node(load_node->instr, move_node))
324bf215546Sopenharmony_ci      return false;
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci   ppir_node_foreach_pred_safe(node, dep) {
327bf215546Sopenharmony_ci      ppir_node *pred = dep->pred;
328bf215546Sopenharmony_ci      ppir_node_remove_dep(dep);
329bf215546Sopenharmony_ci      ppir_node_add_dep(load_node, pred, ppir_dep_src);
330bf215546Sopenharmony_ci   }
331bf215546Sopenharmony_ci   ppir_node_add_dep(node, move_node, ppir_dep_src);
332bf215546Sopenharmony_ci   ppir_node_add_dep(move_node, load_node, ppir_dep_src);
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci   return true;
335bf215546Sopenharmony_ci}
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_cistatic bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block,
338bf215546Sopenharmony_ci                                     ppir_node *node)
339bf215546Sopenharmony_ci{
340bf215546Sopenharmony_ci   ppir_dest *dest = ppir_node_get_dest(node);
341bf215546Sopenharmony_ci   assert(dest != NULL);
342bf215546Sopenharmony_ci   ppir_reg *reg = ppir_dest_get_reg(dest);
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ci   /* alloc new node to store value */
345bf215546Sopenharmony_ci   ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0);
346bf215546Sopenharmony_ci   if (!store_node)
347bf215546Sopenharmony_ci      return false;
348bf215546Sopenharmony_ci   list_addtail(&store_node->list, &node->list);
349bf215546Sopenharmony_ci   comp->num_spills++;
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci   ppir_store_node *store = ppir_node_to_store(store_node);
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   store->index = -comp->prog->state.stack_size; /* index sizes are negative */
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci   ppir_node_target_assign(&store->src, node);
356bf215546Sopenharmony_ci   store->num_components = reg->num_components;
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci   /* insert the new node as successor */
359bf215546Sopenharmony_ci   ppir_node_foreach_succ_safe(node, dep) {
360bf215546Sopenharmony_ci      ppir_node *succ = dep->succ;
361bf215546Sopenharmony_ci      ppir_node_remove_dep(dep);
362bf215546Sopenharmony_ci      ppir_node_add_dep(succ, store_node, ppir_dep_src);
363bf215546Sopenharmony_ci   }
364bf215546Sopenharmony_ci   ppir_node_add_dep(store_node, node, ppir_dep_src);
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci   /* If the store temp slot is empty, we can insert the store_temp
367bf215546Sopenharmony_ci    * there and use it directly. Exceptionally, if the node is in the
368bf215546Sopenharmony_ci    * combine slot, this doesn't work. */
369bf215546Sopenharmony_ci   if (!node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP] &&
370bf215546Sopenharmony_ci        node->instr_pos != PPIR_INSTR_SLOT_ALU_COMBINE)
371bf215546Sopenharmony_ci      return ppir_instr_insert_node(node->instr, store_node);
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci   /* Not possible to merge store, so fall back to a new instruction */
374bf215546Sopenharmony_ci   return create_new_instr_after(block, node->instr, store_node);
375bf215546Sopenharmony_ci}
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_cistatic bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen)
378bf215546Sopenharmony_ci{
379bf215546Sopenharmony_ci   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
380bf215546Sopenharmony_ci      list_for_each_entry(ppir_node, node, &block->node_list, list) {
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci         ppir_dest *dest = ppir_node_get_dest(node);
383bf215546Sopenharmony_ci         if (dest && ppir_dest_get_reg(dest) == chosen) {
384bf215546Sopenharmony_ci            /* If dest is a register, it might be updating only some its
385bf215546Sopenharmony_ci             * components, so need to load the existing value first */
386bf215546Sopenharmony_ci            if (dest->type == ppir_target_register) {
387bf215546Sopenharmony_ci               if (!ppir_update_spilled_dest_load(comp, block, node))
388bf215546Sopenharmony_ci                  return false;
389bf215546Sopenharmony_ci            }
390bf215546Sopenharmony_ci            if (!ppir_update_spilled_dest(comp, block, node))
391bf215546Sopenharmony_ci               return false;
392bf215546Sopenharmony_ci         }
393bf215546Sopenharmony_ci
394bf215546Sopenharmony_ci         ppir_node *fill_node = NULL;
395bf215546Sopenharmony_ci         /* nodes might have multiple references to the same value.
396bf215546Sopenharmony_ci          * avoid creating unnecessary loads for the same fill by
397bf215546Sopenharmony_ci          * saving the node resulting from the temporary load */
398bf215546Sopenharmony_ci         for (int i = 0; i < ppir_node_get_src_num(node); i++) {
399bf215546Sopenharmony_ci            ppir_src *src = ppir_node_get_src(node, i);
400bf215546Sopenharmony_ci            ppir_reg *reg = ppir_src_get_reg(src);
401bf215546Sopenharmony_ci            if (reg == chosen) {
402bf215546Sopenharmony_ci               if (!ppir_update_spilled_src(comp, block, node, src, &fill_node))
403bf215546Sopenharmony_ci                  return false;
404bf215546Sopenharmony_ci            }
405bf215546Sopenharmony_ci         }
406bf215546Sopenharmony_ci      }
407bf215546Sopenharmony_ci   }
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci   return true;
410bf215546Sopenharmony_ci}
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_cistatic ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp,
413bf215546Sopenharmony_ci                                                 struct ra_graph *g)
414bf215546Sopenharmony_ci{
415bf215546Sopenharmony_ci   float spill_costs[comp->reg_num];
416bf215546Sopenharmony_ci   /* experimentally determined, it seems to be worth scaling cost of
417bf215546Sopenharmony_ci    * regs in instructions that have used uniform/store_temp slots,
418bf215546Sopenharmony_ci    * but not too much as to offset the num_components base cost. */
419bf215546Sopenharmony_ci   const float slot_scale = 1.1f;
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci   memset(spill_costs, 0, sizeof(spill_costs[0]) * comp->reg_num);
422bf215546Sopenharmony_ci   list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
423bf215546Sopenharmony_ci      if (reg->spilled) {
424bf215546Sopenharmony_ci         /* not considered for spilling */
425bf215546Sopenharmony_ci         spill_costs[reg->regalloc_index] = 0.0f;
426bf215546Sopenharmony_ci         continue;
427bf215546Sopenharmony_ci      }
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci      /* It is beneficial to spill registers with higher component number,
430bf215546Sopenharmony_ci       * so increase the cost of spilling registers with few components */
431bf215546Sopenharmony_ci      float spill_cost = 4.0f / (float)reg->num_components;
432bf215546Sopenharmony_ci      spill_costs[reg->regalloc_index] = spill_cost;
433bf215546Sopenharmony_ci   }
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
436bf215546Sopenharmony_ci      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
437bf215546Sopenharmony_ci         if (instr->slots[PPIR_INSTR_SLOT_UNIFORM]) {
438bf215546Sopenharmony_ci            for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
439bf215546Sopenharmony_ci               ppir_node *node = instr->slots[i];
440bf215546Sopenharmony_ci               if (!node)
441bf215546Sopenharmony_ci                  continue;
442bf215546Sopenharmony_ci               for (int j = 0; j < ppir_node_get_src_num(node); j++) {
443bf215546Sopenharmony_ci                  ppir_src *src = ppir_node_get_src(node, j);
444bf215546Sopenharmony_ci                  if (!src)
445bf215546Sopenharmony_ci                     continue;
446bf215546Sopenharmony_ci                  ppir_reg *reg = ppir_src_get_reg(src);
447bf215546Sopenharmony_ci                  if (!reg)
448bf215546Sopenharmony_ci                     continue;
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci                  spill_costs[reg->regalloc_index] *= slot_scale;
451bf215546Sopenharmony_ci               }
452bf215546Sopenharmony_ci            }
453bf215546Sopenharmony_ci         }
454bf215546Sopenharmony_ci         if (instr->slots[PPIR_INSTR_SLOT_STORE_TEMP]) {
455bf215546Sopenharmony_ci            for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
456bf215546Sopenharmony_ci               ppir_node *node = instr->slots[i];
457bf215546Sopenharmony_ci               if (!node)
458bf215546Sopenharmony_ci                  continue;
459bf215546Sopenharmony_ci               ppir_dest *dest = ppir_node_get_dest(node);
460bf215546Sopenharmony_ci               if (!dest)
461bf215546Sopenharmony_ci                  continue;
462bf215546Sopenharmony_ci               ppir_reg *reg = ppir_dest_get_reg(dest);
463bf215546Sopenharmony_ci               if (!reg)
464bf215546Sopenharmony_ci                  continue;
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci               spill_costs[reg->regalloc_index] *= slot_scale;
467bf215546Sopenharmony_ci            }
468bf215546Sopenharmony_ci         }
469bf215546Sopenharmony_ci      }
470bf215546Sopenharmony_ci   }
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci   for (int i = 0; i < comp->reg_num; i++)
473bf215546Sopenharmony_ci      ra_set_node_spill_cost(g, i, spill_costs[i]);
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci   int r = ra_get_best_spill_node(g);
476bf215546Sopenharmony_ci   if (r == -1)
477bf215546Sopenharmony_ci      return NULL;
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_ci   ppir_reg *chosen = NULL;
480bf215546Sopenharmony_ci   int i = 0;
481bf215546Sopenharmony_ci   list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
482bf215546Sopenharmony_ci      if (i++ == r) {
483bf215546Sopenharmony_ci         chosen = reg;
484bf215546Sopenharmony_ci         break;
485bf215546Sopenharmony_ci      }
486bf215546Sopenharmony_ci   }
487bf215546Sopenharmony_ci   assert(chosen);
488bf215546Sopenharmony_ci   chosen->spilled = true;
489bf215546Sopenharmony_ci   chosen->is_head = true; /* store_temp unable to do swizzle */
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci   return chosen;
492bf215546Sopenharmony_ci}
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_cistatic void ppir_regalloc_reset_liveness_info(ppir_compiler *comp)
495bf215546Sopenharmony_ci{
496bf215546Sopenharmony_ci   int idx = 0;
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
499bf215546Sopenharmony_ci      reg->regalloc_index = idx++;
500bf215546Sopenharmony_ci   }
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
503bf215546Sopenharmony_ci      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci         if (instr->live_mask)
506bf215546Sopenharmony_ci            ralloc_free(instr->live_mask);
507bf215546Sopenharmony_ci         instr->live_mask = rzalloc_array(comp, uint8_t,
508bf215546Sopenharmony_ci                                          reg_mask_size(comp->reg_num));
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci         if (instr->live_set)
511bf215546Sopenharmony_ci            ralloc_free(instr->live_set);
512bf215546Sopenharmony_ci         instr->live_set = rzalloc_array(comp, BITSET_WORD, comp->reg_num);
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_ci         if (instr->live_internal)
515bf215546Sopenharmony_ci            ralloc_free(instr->live_internal);
516bf215546Sopenharmony_ci         instr->live_internal = rzalloc_array(comp, BITSET_WORD, comp->reg_num);
517bf215546Sopenharmony_ci      }
518bf215546Sopenharmony_ci   }
519bf215546Sopenharmony_ci}
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_cistatic void ppir_all_interference(ppir_compiler *comp, struct ra_graph *g,
522bf215546Sopenharmony_ci                                  BITSET_WORD *liveness)
523bf215546Sopenharmony_ci{
524bf215546Sopenharmony_ci   int i, j;
525bf215546Sopenharmony_ci   BITSET_FOREACH_SET(i, liveness, comp->reg_num) {
526bf215546Sopenharmony_ci      BITSET_FOREACH_SET(j, liveness, comp->reg_num) {
527bf215546Sopenharmony_ci         ra_add_node_interference(g, i, j);
528bf215546Sopenharmony_ci      }
529bf215546Sopenharmony_ci      BITSET_CLEAR(liveness, i);
530bf215546Sopenharmony_ci   }
531bf215546Sopenharmony_ci}
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ciint lima_ppir_force_spilling = 0;
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_cistatic bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
536bf215546Sopenharmony_ci{
537bf215546Sopenharmony_ci   ppir_regalloc_reset_liveness_info(comp);
538bf215546Sopenharmony_ci
539bf215546Sopenharmony_ci   struct ra_graph *g = ra_alloc_interference_graph(
540bf215546Sopenharmony_ci      comp->ra, comp->reg_num);
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci   int n = 0;
543bf215546Sopenharmony_ci   list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
544bf215546Sopenharmony_ci      int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1);
545bf215546Sopenharmony_ci      if (reg->is_head)
546bf215546Sopenharmony_ci         c += 4;
547bf215546Sopenharmony_ci      ra_set_node_class(g, n++, ra_get_class_from_index(comp->ra, c));
548bf215546Sopenharmony_ci   }
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_ci   ppir_liveness_analysis(comp);
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
553bf215546Sopenharmony_ci      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
554bf215546Sopenharmony_ci         int i;
555bf215546Sopenharmony_ci         BITSET_FOREACH_SET(i, instr->live_internal, comp->reg_num) {
556bf215546Sopenharmony_ci            BITSET_SET(instr->live_set, i);
557bf215546Sopenharmony_ci         }
558bf215546Sopenharmony_ci         ppir_all_interference(comp, g, instr->live_set);
559bf215546Sopenharmony_ci      }
560bf215546Sopenharmony_ci   }
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci   *spilled = false;
563bf215546Sopenharmony_ci   bool ok = ra_allocate(g);
564bf215546Sopenharmony_ci   if (!ok || (comp->force_spilling-- > 0)) {
565bf215546Sopenharmony_ci      ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g);
566bf215546Sopenharmony_ci      if (chosen) {
567bf215546Sopenharmony_ci         /* stack_size will be used to assemble the frame reg in lima_draw.
568bf215546Sopenharmony_ci          * It is also be used in the spilling code, as negative indices
569bf215546Sopenharmony_ci          * starting from -1, to create stack addresses. */
570bf215546Sopenharmony_ci         comp->prog->state.stack_size++;
571bf215546Sopenharmony_ci         if (!ppir_regalloc_spill_reg(comp, chosen))
572bf215546Sopenharmony_ci            goto err_out;
573bf215546Sopenharmony_ci         /* Ask the outer loop to call back in. */
574bf215546Sopenharmony_ci         *spilled = true;
575bf215546Sopenharmony_ci
576bf215546Sopenharmony_ci         ppir_debug("spilled register %d/%d, num_components: %d\n",
577bf215546Sopenharmony_ci                    chosen->regalloc_index, comp->reg_num,
578bf215546Sopenharmony_ci                    chosen->num_components);
579bf215546Sopenharmony_ci         goto err_out;
580bf215546Sopenharmony_ci      }
581bf215546Sopenharmony_ci
582bf215546Sopenharmony_ci      ppir_error("regalloc fail\n");
583bf215546Sopenharmony_ci      goto err_out;
584bf215546Sopenharmony_ci   }
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci   n = 0;
587bf215546Sopenharmony_ci   list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
588bf215546Sopenharmony_ci      reg->index = ra_get_node_reg(g, n++);
589bf215546Sopenharmony_ci      if (reg->out_reg) {
590bf215546Sopenharmony_ci         /* We need actual reg number, we don't have swizzle for output regs */
591bf215546Sopenharmony_ci         assert(!(reg->index & 0x3) && "ppir: output regs don't have swizzle");
592bf215546Sopenharmony_ci         comp->out_type_to_reg[reg->out_type] = reg->index / 4;
593bf215546Sopenharmony_ci      }
594bf215546Sopenharmony_ci   }
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci   ralloc_free(g);
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   if (lima_debug & LIMA_DEBUG_PP)
599bf215546Sopenharmony_ci      ppir_regalloc_print_result(comp);
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   return true;
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_cierr_out:
604bf215546Sopenharmony_ci   ralloc_free(g);
605bf215546Sopenharmony_ci   return false;
606bf215546Sopenharmony_ci}
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_cibool ppir_regalloc_prog(ppir_compiler *comp)
609bf215546Sopenharmony_ci{
610bf215546Sopenharmony_ci   bool spilled = false;
611bf215546Sopenharmony_ci   comp->prog->state.stack_size = 0;
612bf215546Sopenharmony_ci
613bf215546Sopenharmony_ci   /* Set from an environment variable to force spilling
614bf215546Sopenharmony_ci    * for debugging purposes, see lima_screen.c */
615bf215546Sopenharmony_ci   comp->force_spilling = lima_ppir_force_spilling;
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_ci   ppir_regalloc_update_reglist_ssa(comp);
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci   /* No registers? Probably shader consists of discard instruction */
620bf215546Sopenharmony_ci   if (list_is_empty(&comp->reg_list)) {
621bf215546Sopenharmony_ci      comp->prog->state.frag_color0_reg = 0;
622bf215546Sopenharmony_ci      comp->prog->state.frag_color1_reg = -1;
623bf215546Sopenharmony_ci      comp->prog->state.frag_depth_reg = -1;
624bf215546Sopenharmony_ci      return true;
625bf215546Sopenharmony_ci   }
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_ci   /* this will most likely succeed in the first
628bf215546Sopenharmony_ci    * try, except for very complicated shaders */
629bf215546Sopenharmony_ci   while (!ppir_regalloc_prog_try(comp, &spilled))
630bf215546Sopenharmony_ci      if (!spilled)
631bf215546Sopenharmony_ci         return false;
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci   comp->prog->state.frag_color0_reg =
634bf215546Sopenharmony_ci      comp->out_type_to_reg[ppir_output_color0];
635bf215546Sopenharmony_ci   comp->prog->state.frag_color1_reg =
636bf215546Sopenharmony_ci      comp->out_type_to_reg[ppir_output_color1];
637bf215546Sopenharmony_ci   comp->prog->state.frag_depth_reg =
638bf215546Sopenharmony_ci      comp->out_type_to_reg[ppir_output_depth];
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_ci   return true;
641bf215546Sopenharmony_ci}
642