1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (c) 2017 Lima Project
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sub license,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
12bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
13bf215546Sopenharmony_ci * of the Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include <string.h>
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "util/ralloc.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "gpir.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_cigpir_instr *gpir_instr_create(gpir_block *block)
32bf215546Sopenharmony_ci{
33bf215546Sopenharmony_ci   gpir_instr *instr = rzalloc(block, gpir_instr);
34bf215546Sopenharmony_ci   if (unlikely(!instr))
35bf215546Sopenharmony_ci      return NULL;
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci   block->comp->num_instr++;
38bf215546Sopenharmony_ci   if (block->comp->num_instr > 512) {
39bf215546Sopenharmony_ci      gpir_error("shader exceeds limit of 512 instructions\n");
40bf215546Sopenharmony_ci      return NULL;
41bf215546Sopenharmony_ci   }
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   instr->index = block->sched.instr_index++;
44bf215546Sopenharmony_ci   instr->alu_num_slot_free = 6;
45bf215546Sopenharmony_ci   instr->alu_non_cplx_slot_free = 5;
46bf215546Sopenharmony_ci   instr->alu_max_allowed_next_max = 5;
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci   list_add(&instr->list, &block->instr_list);
49bf215546Sopenharmony_ci   return instr;
50bf215546Sopenharmony_ci}
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_cistatic gpir_node *gpir_instr_get_the_other_acc_node(gpir_instr *instr, int slot)
53bf215546Sopenharmony_ci{
54bf215546Sopenharmony_ci   if (slot == GPIR_INSTR_SLOT_ADD0)
55bf215546Sopenharmony_ci      return instr->slots[GPIR_INSTR_SLOT_ADD1];
56bf215546Sopenharmony_ci   else if (slot == GPIR_INSTR_SLOT_ADD1)
57bf215546Sopenharmony_ci      return instr->slots[GPIR_INSTR_SLOT_ADD0];
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci   return NULL;
60bf215546Sopenharmony_ci}
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_cistatic bool gpir_instr_check_acc_same_op(gpir_instr *instr, gpir_node *node, int slot)
63bf215546Sopenharmony_ci{
64bf215546Sopenharmony_ci   /* two ACC slots must share the same op code */
65bf215546Sopenharmony_ci   gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, slot);
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_ci   /* spill move case may get acc_node == node */
68bf215546Sopenharmony_ci   if (acc_node && acc_node != node &&
69bf215546Sopenharmony_ci       !gpir_codegen_acc_same_op(node->op, acc_node->op))
70bf215546Sopenharmony_ci      return false;
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   return true;
73bf215546Sopenharmony_ci}
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_cistatic int gpir_instr_get_consume_slot(gpir_instr *instr, gpir_node *node)
76bf215546Sopenharmony_ci{
77bf215546Sopenharmony_ci   if (gpir_op_infos[node->op].may_consume_two_slots) {
78bf215546Sopenharmony_ci      gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, node->sched.pos);
79bf215546Sopenharmony_ci      if (acc_node)
80bf215546Sopenharmony_ci         /* at this point node must have the same acc op with acc_node,
81bf215546Sopenharmony_ci          * so it just consumes the extra slot acc_node consumed */
82bf215546Sopenharmony_ci         return 0;
83bf215546Sopenharmony_ci      else
84bf215546Sopenharmony_ci         return 2;
85bf215546Sopenharmony_ci   }
86bf215546Sopenharmony_ci   else
87bf215546Sopenharmony_ci      return 1;
88bf215546Sopenharmony_ci}
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_cistatic bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
91bf215546Sopenharmony_ci{
92bf215546Sopenharmony_ci   if (!gpir_instr_check_acc_same_op(instr, node, node->sched.pos))
93bf215546Sopenharmony_ci      return false;
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   if (node->sched.next_max_node && !node->sched.complex_allowed &&
96bf215546Sopenharmony_ci       node->sched.pos == GPIR_INSTR_SLOT_COMPLEX)
97bf215546Sopenharmony_ci      return false;
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci   int consume_slot = gpir_instr_get_consume_slot(instr, node);
100bf215546Sopenharmony_ci   int non_cplx_consume_slot =
101bf215546Sopenharmony_ci      node->sched.pos == GPIR_INSTR_SLOT_COMPLEX ? 0 : consume_slot;
102bf215546Sopenharmony_ci   int store_reduce_slot = 0;
103bf215546Sopenharmony_ci   int non_cplx_store_reduce_slot = 0;
104bf215546Sopenharmony_ci   int max_reduce_slot = node->sched.max_node ? 1 : 0;
105bf215546Sopenharmony_ci   int next_max_reduce_slot = node->sched.next_max_node ? 1 : 0;
106bf215546Sopenharmony_ci   int alu_new_max_allowed_next_max =
107bf215546Sopenharmony_ci      node->op == gpir_op_complex1 ? 4 : instr->alu_max_allowed_next_max;
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci   /* check if this node is child of one store node.
110bf215546Sopenharmony_ci    * complex1 won't be any of this instr's store node's child,
111bf215546Sopenharmony_ci    * because it has two instr latency before store can use it.
112bf215546Sopenharmony_ci    */
113bf215546Sopenharmony_ci   for (int i = GPIR_INSTR_SLOT_STORE0; i <= GPIR_INSTR_SLOT_STORE3; i++) {
114bf215546Sopenharmony_ci      gpir_store_node *s = gpir_node_to_store(instr->slots[i]);
115bf215546Sopenharmony_ci      if (s && s->child == node) {
116bf215546Sopenharmony_ci         store_reduce_slot = 1;
117bf215546Sopenharmony_ci         if (node->sched.next_max_node && !node->sched.complex_allowed)
118bf215546Sopenharmony_ci            non_cplx_store_reduce_slot = 1;
119bf215546Sopenharmony_ci         break;
120bf215546Sopenharmony_ci      }
121bf215546Sopenharmony_ci   }
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ci   /* Check that the invariants will be maintained after we adjust everything
124bf215546Sopenharmony_ci    */
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci   int slot_difference =
127bf215546Sopenharmony_ci       instr->alu_num_slot_needed_by_store - store_reduce_slot +
128bf215546Sopenharmony_ci       instr->alu_num_slot_needed_by_max - max_reduce_slot +
129bf215546Sopenharmony_ci       MAX2(instr->alu_num_unscheduled_next_max - next_max_reduce_slot -
130bf215546Sopenharmony_ci            alu_new_max_allowed_next_max, 0) -
131bf215546Sopenharmony_ci      (instr->alu_num_slot_free - consume_slot);
132bf215546Sopenharmony_ci   if (slot_difference > 0) {
133bf215546Sopenharmony_ci      gpir_debug("failed %d because of alu slot\n", node->index);
134bf215546Sopenharmony_ci      instr->slot_difference = slot_difference;
135bf215546Sopenharmony_ci   }
136bf215546Sopenharmony_ci
137bf215546Sopenharmony_ci   int non_cplx_slot_difference =
138bf215546Sopenharmony_ci       instr->alu_num_slot_needed_by_max - max_reduce_slot +
139bf215546Sopenharmony_ci       instr->alu_num_slot_needed_by_non_cplx_store - non_cplx_store_reduce_slot -
140bf215546Sopenharmony_ci       (instr->alu_non_cplx_slot_free - non_cplx_consume_slot);
141bf215546Sopenharmony_ci   if (non_cplx_slot_difference > 0) {
142bf215546Sopenharmony_ci      gpir_debug("failed %d because of alu slot\n", node->index);
143bf215546Sopenharmony_ci      instr->non_cplx_slot_difference = non_cplx_slot_difference;
144bf215546Sopenharmony_ci   }
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   if (slot_difference > 0 || non_cplx_slot_difference > 0)
147bf215546Sopenharmony_ci      return false;
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci   instr->alu_num_slot_free -= consume_slot;
150bf215546Sopenharmony_ci   instr->alu_non_cplx_slot_free -= non_cplx_consume_slot;
151bf215546Sopenharmony_ci   instr->alu_num_slot_needed_by_store -= store_reduce_slot;
152bf215546Sopenharmony_ci   instr->alu_num_slot_needed_by_non_cplx_store -= non_cplx_store_reduce_slot;
153bf215546Sopenharmony_ci   instr->alu_num_slot_needed_by_max -= max_reduce_slot;
154bf215546Sopenharmony_ci   instr->alu_num_unscheduled_next_max -= next_max_reduce_slot;
155bf215546Sopenharmony_ci   instr->alu_max_allowed_next_max = alu_new_max_allowed_next_max;
156bf215546Sopenharmony_ci   return true;
157bf215546Sopenharmony_ci}
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_cistatic void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node)
160bf215546Sopenharmony_ci{
161bf215546Sopenharmony_ci   int consume_slot = gpir_instr_get_consume_slot(instr, node);
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci   for (int i = GPIR_INSTR_SLOT_STORE0; i <= GPIR_INSTR_SLOT_STORE3; i++) {
164bf215546Sopenharmony_ci      gpir_store_node *s = gpir_node_to_store(instr->slots[i]);
165bf215546Sopenharmony_ci      if (s && s->child == node) {
166bf215546Sopenharmony_ci         instr->alu_num_slot_needed_by_store++;
167bf215546Sopenharmony_ci         if (node->sched.next_max_node && !node->sched.complex_allowed)
168bf215546Sopenharmony_ci            instr->alu_num_slot_needed_by_non_cplx_store++;
169bf215546Sopenharmony_ci         break;
170bf215546Sopenharmony_ci      }
171bf215546Sopenharmony_ci   }
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci   instr->alu_num_slot_free += consume_slot;
174bf215546Sopenharmony_ci   if (node->sched.pos != GPIR_INSTR_SLOT_COMPLEX)
175bf215546Sopenharmony_ci      instr->alu_non_cplx_slot_free += consume_slot;
176bf215546Sopenharmony_ci   if (node->sched.max_node)
177bf215546Sopenharmony_ci      instr->alu_num_slot_needed_by_max++;
178bf215546Sopenharmony_ci   if (node->sched.next_max_node)
179bf215546Sopenharmony_ci      instr->alu_num_unscheduled_next_max++;
180bf215546Sopenharmony_ci   if (node->op == gpir_op_complex1)
181bf215546Sopenharmony_ci      instr->alu_max_allowed_next_max = 5;
182bf215546Sopenharmony_ci}
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_cistatic bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node)
185bf215546Sopenharmony_ci{
186bf215546Sopenharmony_ci   gpir_load_node *load = gpir_node_to_load(node);
187bf215546Sopenharmony_ci   int i = node->sched.pos - GPIR_INSTR_SLOT_REG0_LOAD0;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   if (load->component != i)
190bf215546Sopenharmony_ci      return false;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   if (instr->reg0_is_attr && node->op != gpir_op_load_attribute)
193bf215546Sopenharmony_ci      return false;
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   if (instr->reg0_use_count) {
196bf215546Sopenharmony_ci       if (instr->reg0_index != load->index)
197bf215546Sopenharmony_ci          return false;
198bf215546Sopenharmony_ci   }
199bf215546Sopenharmony_ci   else {
200bf215546Sopenharmony_ci      instr->reg0_is_attr = node->op == gpir_op_load_attribute;
201bf215546Sopenharmony_ci      instr->reg0_index = load->index;
202bf215546Sopenharmony_ci   }
203bf215546Sopenharmony_ci
204bf215546Sopenharmony_ci   instr->reg0_use_count++;
205bf215546Sopenharmony_ci   return true;
206bf215546Sopenharmony_ci}
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_cistatic void gpir_instr_remove_reg0(gpir_instr *instr, gpir_node *node)
209bf215546Sopenharmony_ci{
210bf215546Sopenharmony_ci   instr->reg0_use_count--;
211bf215546Sopenharmony_ci   if (!instr->reg0_use_count)
212bf215546Sopenharmony_ci      instr->reg0_is_attr = false;
213bf215546Sopenharmony_ci}
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_cistatic bool gpir_instr_insert_reg1_check(gpir_instr *instr, gpir_node *node)
216bf215546Sopenharmony_ci{
217bf215546Sopenharmony_ci   gpir_load_node *load = gpir_node_to_load(node);
218bf215546Sopenharmony_ci   int i = node->sched.pos - GPIR_INSTR_SLOT_REG1_LOAD0;
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci   if (load->component != i)
221bf215546Sopenharmony_ci      return false;
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci   if (instr->reg1_use_count) {
224bf215546Sopenharmony_ci       if (instr->reg1_index != load->index)
225bf215546Sopenharmony_ci          return false;
226bf215546Sopenharmony_ci   }
227bf215546Sopenharmony_ci   else
228bf215546Sopenharmony_ci      instr->reg1_index = load->index;
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci   instr->reg1_use_count++;
231bf215546Sopenharmony_ci   return true;
232bf215546Sopenharmony_ci}
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_cistatic void gpir_instr_remove_reg1(gpir_instr *instr, gpir_node *node)
235bf215546Sopenharmony_ci{
236bf215546Sopenharmony_ci   instr->reg1_use_count--;
237bf215546Sopenharmony_ci}
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_cistatic bool gpir_instr_insert_mem_check(gpir_instr *instr, gpir_node *node)
240bf215546Sopenharmony_ci{
241bf215546Sopenharmony_ci   gpir_load_node *load = gpir_node_to_load(node);
242bf215546Sopenharmony_ci   int i = node->sched.pos - GPIR_INSTR_SLOT_MEM_LOAD0;
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci   if (load->component != i)
245bf215546Sopenharmony_ci      return false;
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci   if (instr->mem_is_temp && node->op != gpir_op_load_temp)
248bf215546Sopenharmony_ci      return false;
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   if (instr->mem_use_count) {
251bf215546Sopenharmony_ci       if (instr->mem_index != load->index)
252bf215546Sopenharmony_ci          return false;
253bf215546Sopenharmony_ci   }
254bf215546Sopenharmony_ci   else {
255bf215546Sopenharmony_ci      instr->mem_is_temp = node->op == gpir_op_load_temp;
256bf215546Sopenharmony_ci      instr->mem_index = load->index;
257bf215546Sopenharmony_ci   }
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_ci   instr->mem_use_count++;
260bf215546Sopenharmony_ci   return true;
261bf215546Sopenharmony_ci}
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_cistatic void gpir_instr_remove_mem(gpir_instr *instr, gpir_node *node)
264bf215546Sopenharmony_ci{
265bf215546Sopenharmony_ci   instr->mem_use_count--;
266bf215546Sopenharmony_ci   if (!instr->mem_use_count)
267bf215546Sopenharmony_ci      instr->mem_is_temp = false;
268bf215546Sopenharmony_ci}
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_cistatic bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node)
271bf215546Sopenharmony_ci{
272bf215546Sopenharmony_ci   gpir_store_node *store = gpir_node_to_store(node);
273bf215546Sopenharmony_ci   int i = node->sched.pos - GPIR_INSTR_SLOT_STORE0;
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci   if (store->component != i)
276bf215546Sopenharmony_ci      return false;
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   i >>= 1;
279bf215546Sopenharmony_ci   switch (instr->store_content[i]) {
280bf215546Sopenharmony_ci   case GPIR_INSTR_STORE_NONE:
281bf215546Sopenharmony_ci      /* store temp has only one address reg for two store unit */
282bf215546Sopenharmony_ci      if (node->op == gpir_op_store_temp &&
283bf215546Sopenharmony_ci          instr->store_content[!i] == GPIR_INSTR_STORE_TEMP &&
284bf215546Sopenharmony_ci          instr->store_index[!i] != store->index)
285bf215546Sopenharmony_ci         return false;
286bf215546Sopenharmony_ci      break;
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci   case GPIR_INSTR_STORE_VARYING:
289bf215546Sopenharmony_ci      if (node->op != gpir_op_store_varying ||
290bf215546Sopenharmony_ci          instr->store_index[i] != store->index)
291bf215546Sopenharmony_ci         return false;
292bf215546Sopenharmony_ci      break;
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci   case GPIR_INSTR_STORE_REG:
295bf215546Sopenharmony_ci      if (node->op != gpir_op_store_reg ||
296bf215546Sopenharmony_ci          instr->store_index[i] != store->index)
297bf215546Sopenharmony_ci         return false;
298bf215546Sopenharmony_ci      break;
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   case GPIR_INSTR_STORE_TEMP:
301bf215546Sopenharmony_ci      if (node->op != gpir_op_store_temp ||
302bf215546Sopenharmony_ci          instr->store_index[i] != store->index)
303bf215546Sopenharmony_ci         return false;
304bf215546Sopenharmony_ci      break;
305bf215546Sopenharmony_ci   }
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci   /* check if any store node has the same child as this node */
308bf215546Sopenharmony_ci   for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) {
309bf215546Sopenharmony_ci      gpir_store_node *s = gpir_node_to_store(instr->slots[j]);
310bf215546Sopenharmony_ci      if (s && s->child == store->child)
311bf215546Sopenharmony_ci         goto out;
312bf215546Sopenharmony_ci   }
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   /* check if the child is already in this instr's alu slot,
315bf215546Sopenharmony_ci    * this may happen when store an scheduled alu node to reg
316bf215546Sopenharmony_ci    */
317bf215546Sopenharmony_ci   for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) {
318bf215546Sopenharmony_ci      if (store->child == instr->slots[j])
319bf215546Sopenharmony_ci         goto out;
320bf215546Sopenharmony_ci   }
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   /* Check the invariants documented in gpir.h, similar to the ALU case.
323bf215546Sopenharmony_ci    * When the only thing that changes is alu_num_slot_needed_by_store, we
324bf215546Sopenharmony_ci    * can get away with just checking the first one.
325bf215546Sopenharmony_ci    */
326bf215546Sopenharmony_ci   int slot_difference = instr->alu_num_slot_needed_by_store + 1
327bf215546Sopenharmony_ci      + instr->alu_num_slot_needed_by_max +
328bf215546Sopenharmony_ci      MAX2(instr->alu_num_unscheduled_next_max - instr->alu_max_allowed_next_max, 0) -
329bf215546Sopenharmony_ci      instr->alu_num_slot_free;
330bf215546Sopenharmony_ci   if (slot_difference > 0) {
331bf215546Sopenharmony_ci      instr->slot_difference = slot_difference;
332bf215546Sopenharmony_ci      return false;
333bf215546Sopenharmony_ci   }
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   if (store->child->sched.next_max_node &&
336bf215546Sopenharmony_ci       !store->child->sched.complex_allowed) {
337bf215546Sopenharmony_ci      /* The child of the store is already partially ready, and has a use one
338bf215546Sopenharmony_ci       * cycle ago that disqualifies it (or a move replacing it) from being
339bf215546Sopenharmony_ci       * put in the complex slot. Therefore we have to check the non-complex
340bf215546Sopenharmony_ci       * invariant.
341bf215546Sopenharmony_ci       */
342bf215546Sopenharmony_ci      int non_cplx_slot_difference =
343bf215546Sopenharmony_ci          instr->alu_num_slot_needed_by_max +
344bf215546Sopenharmony_ci          instr->alu_num_slot_needed_by_non_cplx_store + 1 -
345bf215546Sopenharmony_ci          instr->alu_non_cplx_slot_free;
346bf215546Sopenharmony_ci      if (non_cplx_slot_difference > 0) {
347bf215546Sopenharmony_ci         instr->non_cplx_slot_difference = non_cplx_slot_difference;
348bf215546Sopenharmony_ci         return false;
349bf215546Sopenharmony_ci      }
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci      instr->alu_num_slot_needed_by_non_cplx_store++;
352bf215546Sopenharmony_ci   }
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci   instr->alu_num_slot_needed_by_store++;
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ciout:
357bf215546Sopenharmony_ci   if (instr->store_content[i] == GPIR_INSTR_STORE_NONE) {
358bf215546Sopenharmony_ci      if (node->op == gpir_op_store_varying)
359bf215546Sopenharmony_ci         instr->store_content[i] = GPIR_INSTR_STORE_VARYING;
360bf215546Sopenharmony_ci      else if (node->op == gpir_op_store_reg)
361bf215546Sopenharmony_ci         instr->store_content[i] = GPIR_INSTR_STORE_REG;
362bf215546Sopenharmony_ci      else
363bf215546Sopenharmony_ci         instr->store_content[i] = GPIR_INSTR_STORE_TEMP;
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_ci      instr->store_index[i] = store->index;
366bf215546Sopenharmony_ci   }
367bf215546Sopenharmony_ci   return true;
368bf215546Sopenharmony_ci}
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_cistatic void gpir_instr_remove_store(gpir_instr *instr, gpir_node *node)
371bf215546Sopenharmony_ci{
372bf215546Sopenharmony_ci   gpir_store_node *store = gpir_node_to_store(node);
373bf215546Sopenharmony_ci   int component = node->sched.pos - GPIR_INSTR_SLOT_STORE0;
374bf215546Sopenharmony_ci   int other_slot = GPIR_INSTR_SLOT_STORE0 + (component ^ 1);
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci   for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) {
377bf215546Sopenharmony_ci      if (j == node->sched.pos)
378bf215546Sopenharmony_ci         continue;
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci      gpir_store_node *s = gpir_node_to_store(instr->slots[j]);
381bf215546Sopenharmony_ci      if (s && s->child == store->child)
382bf215546Sopenharmony_ci         goto out;
383bf215546Sopenharmony_ci   }
384bf215546Sopenharmony_ci
385bf215546Sopenharmony_ci   for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) {
386bf215546Sopenharmony_ci      if (store->child == instr->slots[j])
387bf215546Sopenharmony_ci         goto out;
388bf215546Sopenharmony_ci   }
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci   instr->alu_num_slot_needed_by_store--;
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci   if (store->child->sched.next_max_node &&
393bf215546Sopenharmony_ci       !store->child->sched.complex_allowed) {
394bf215546Sopenharmony_ci      instr->alu_num_slot_needed_by_non_cplx_store--;
395bf215546Sopenharmony_ci   }
396bf215546Sopenharmony_ci
397bf215546Sopenharmony_ciout:
398bf215546Sopenharmony_ci   if (!instr->slots[other_slot])
399bf215546Sopenharmony_ci      instr->store_content[component >> 1] = GPIR_INSTR_STORE_NONE;
400bf215546Sopenharmony_ci}
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_cistatic bool gpir_instr_spill_move(gpir_instr *instr, int slot, int spill_to_start)
403bf215546Sopenharmony_ci{
404bf215546Sopenharmony_ci   gpir_node *node = instr->slots[slot];
405bf215546Sopenharmony_ci   if (!node)
406bf215546Sopenharmony_ci      return true;
407bf215546Sopenharmony_ci
408bf215546Sopenharmony_ci   if (node->op != gpir_op_mov)
409bf215546Sopenharmony_ci      return false;
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci   for (int i = spill_to_start; i <= GPIR_INSTR_SLOT_DIST_TWO_END; i++) {
412bf215546Sopenharmony_ci      if (i != slot && !instr->slots[i] &&
413bf215546Sopenharmony_ci          gpir_instr_check_acc_same_op(instr, node, i)) {
414bf215546Sopenharmony_ci         instr->slots[i] = node;
415bf215546Sopenharmony_ci         instr->slots[slot] = NULL;
416bf215546Sopenharmony_ci         node->sched.pos = i;
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci         gpir_debug("instr %d spill move %d from slot %d to %d\n",
419bf215546Sopenharmony_ci                    instr->index, node->index, slot, i);
420bf215546Sopenharmony_ci         return true;
421bf215546Sopenharmony_ci      }
422bf215546Sopenharmony_ci   }
423bf215546Sopenharmony_ci
424bf215546Sopenharmony_ci   return false;
425bf215546Sopenharmony_ci}
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_cistatic bool gpir_instr_slot_free(gpir_instr *instr, gpir_node *node)
428bf215546Sopenharmony_ci{
429bf215546Sopenharmony_ci   if (node->op == gpir_op_mov ||
430bf215546Sopenharmony_ci       node->sched.pos > GPIR_INSTR_SLOT_DIST_TWO_END) {
431bf215546Sopenharmony_ci      if (instr->slots[node->sched.pos])
432bf215546Sopenharmony_ci         return false;
433bf215546Sopenharmony_ci   }
434bf215546Sopenharmony_ci   else {
435bf215546Sopenharmony_ci      /* for node needs dist two slot, if the slot has a move, we can
436bf215546Sopenharmony_ci       * spill it to other dist two slot without any side effect */
437bf215546Sopenharmony_ci      int spill_to_start = GPIR_INSTR_SLOT_MUL0;
438bf215546Sopenharmony_ci      if (node->op == gpir_op_complex1 || node->op == gpir_op_select)
439bf215546Sopenharmony_ci         spill_to_start = GPIR_INSTR_SLOT_ADD0;
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci      if (!gpir_instr_spill_move(instr, node->sched.pos, spill_to_start))
442bf215546Sopenharmony_ci         return false;
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci      if (node->op == gpir_op_complex1 || node->op == gpir_op_select) {
445bf215546Sopenharmony_ci         if (!gpir_instr_spill_move(instr, GPIR_INSTR_SLOT_MUL1, spill_to_start))
446bf215546Sopenharmony_ci            return false;
447bf215546Sopenharmony_ci      }
448bf215546Sopenharmony_ci   }
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_ci   return true;
451bf215546Sopenharmony_ci}
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_cibool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node)
454bf215546Sopenharmony_ci{
455bf215546Sopenharmony_ci   instr->slot_difference = 0;
456bf215546Sopenharmony_ci   instr->non_cplx_slot_difference = 0;
457bf215546Sopenharmony_ci
458bf215546Sopenharmony_ci   if (!gpir_instr_slot_free(instr, node))
459bf215546Sopenharmony_ci      return false;
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci   if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN &&
462bf215546Sopenharmony_ci       node->sched.pos <= GPIR_INSTR_SLOT_ALU_END) {
463bf215546Sopenharmony_ci      if (!gpir_instr_insert_alu_check(instr, node))
464bf215546Sopenharmony_ci         return false;
465bf215546Sopenharmony_ci   }
466bf215546Sopenharmony_ci   else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 &&
467bf215546Sopenharmony_ci            node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3) {
468bf215546Sopenharmony_ci      if (!gpir_instr_insert_reg0_check(instr, node))
469bf215546Sopenharmony_ci         return false;
470bf215546Sopenharmony_ci   }
471bf215546Sopenharmony_ci   else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 &&
472bf215546Sopenharmony_ci            node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3) {
473bf215546Sopenharmony_ci      if (!gpir_instr_insert_reg1_check(instr, node))
474bf215546Sopenharmony_ci         return false;
475bf215546Sopenharmony_ci   }
476bf215546Sopenharmony_ci   else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 &&
477bf215546Sopenharmony_ci            node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3) {
478bf215546Sopenharmony_ci      if (!gpir_instr_insert_mem_check(instr, node))
479bf215546Sopenharmony_ci         return false;
480bf215546Sopenharmony_ci   }
481bf215546Sopenharmony_ci   else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 &&
482bf215546Sopenharmony_ci            node->sched.pos <= GPIR_INSTR_SLOT_STORE3) {
483bf215546Sopenharmony_ci      if (!gpir_instr_insert_store_check(instr, node))
484bf215546Sopenharmony_ci         return false;
485bf215546Sopenharmony_ci   }
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   instr->slots[node->sched.pos] = node;
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   if (node->op == gpir_op_complex1 || node->op == gpir_op_select)
490bf215546Sopenharmony_ci      instr->slots[GPIR_INSTR_SLOT_MUL1] = node;
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci   return true;
493bf215546Sopenharmony_ci}
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_civoid gpir_instr_remove_node(gpir_instr *instr, gpir_node *node)
496bf215546Sopenharmony_ci{
497bf215546Sopenharmony_ci   assert(node->sched.pos >= 0);
498bf215546Sopenharmony_ci
499bf215546Sopenharmony_ci   /* This can happen if we merge duplicate loads in the scheduler. */
500bf215546Sopenharmony_ci   if (instr->slots[node->sched.pos] != node) {
501bf215546Sopenharmony_ci      node->sched.pos = -1;
502bf215546Sopenharmony_ci      node->sched.instr = NULL;
503bf215546Sopenharmony_ci      return;
504bf215546Sopenharmony_ci   }
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci   if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN &&
507bf215546Sopenharmony_ci       node->sched.pos <= GPIR_INSTR_SLOT_ALU_END)
508bf215546Sopenharmony_ci      gpir_instr_remove_alu(instr, node);
509bf215546Sopenharmony_ci   else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 &&
510bf215546Sopenharmony_ci            node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3)
511bf215546Sopenharmony_ci      gpir_instr_remove_reg0(instr, node);
512bf215546Sopenharmony_ci   else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 &&
513bf215546Sopenharmony_ci            node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3)
514bf215546Sopenharmony_ci      gpir_instr_remove_reg1(instr, node);
515bf215546Sopenharmony_ci   else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 &&
516bf215546Sopenharmony_ci            node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3)
517bf215546Sopenharmony_ci      gpir_instr_remove_mem(instr, node);
518bf215546Sopenharmony_ci   else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 &&
519bf215546Sopenharmony_ci            node->sched.pos <= GPIR_INSTR_SLOT_STORE3)
520bf215546Sopenharmony_ci      gpir_instr_remove_store(instr, node);
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci   instr->slots[node->sched.pos] = NULL;
523bf215546Sopenharmony_ci
524bf215546Sopenharmony_ci   if (node->op == gpir_op_complex1 || node->op == gpir_op_select)
525bf215546Sopenharmony_ci      instr->slots[GPIR_INSTR_SLOT_MUL1] = NULL;
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci   node->sched.pos = -1;
528bf215546Sopenharmony_ci   node->sched.instr = NULL;
529bf215546Sopenharmony_ci}
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_civoid gpir_instr_print_prog(gpir_compiler *comp)
532bf215546Sopenharmony_ci{
533bf215546Sopenharmony_ci   struct {
534bf215546Sopenharmony_ci      int len;
535bf215546Sopenharmony_ci      char *name;
536bf215546Sopenharmony_ci   } fields[] = {
537bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_MUL0] = { 4, "mul0" },
538bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_MUL1] = { 4, "mul1" },
539bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_ADD0] = { 4, "add0" },
540bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_ADD1] = { 4, "add1" },
541bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_REG0_LOAD3] = { 15, "load0" },
542bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_REG1_LOAD3] = { 15, "load1" },
543bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_MEM_LOAD3] = { 15, "load2" },
544bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_STORE3] = { 15, "store" },
545bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_COMPLEX] = { 4, "cmpl" },
546bf215546Sopenharmony_ci      [GPIR_INSTR_SLOT_PASS] = { 4, "pass" },
547bf215546Sopenharmony_ci   };
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci   printf("========prog instr========\n");
550bf215546Sopenharmony_ci   printf("     ");
551bf215546Sopenharmony_ci   for (int i = 0; i < GPIR_INSTR_SLOT_NUM; i++) {
552bf215546Sopenharmony_ci      if (fields[i].len)
553bf215546Sopenharmony_ci         printf("%-*s ", fields[i].len, fields[i].name);
554bf215546Sopenharmony_ci   }
555bf215546Sopenharmony_ci   printf("\n");
556bf215546Sopenharmony_ci
557bf215546Sopenharmony_ci   int index = 0;
558bf215546Sopenharmony_ci   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
559bf215546Sopenharmony_ci      list_for_each_entry(gpir_instr, instr, &block->instr_list, list) {
560bf215546Sopenharmony_ci         printf("%03d: ", index++);
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci         char buff[16] = "null";
563bf215546Sopenharmony_ci         int start = 0;
564bf215546Sopenharmony_ci         for (int j = 0; j < GPIR_INSTR_SLOT_NUM; j++) {
565bf215546Sopenharmony_ci            gpir_node *node = instr->slots[j];
566bf215546Sopenharmony_ci            if (fields[j].len) {
567bf215546Sopenharmony_ci               if (node)
568bf215546Sopenharmony_ci                  snprintf(buff + start, sizeof(buff) - start, "%d", node->index);
569bf215546Sopenharmony_ci               printf("%-*s ", fields[j].len, buff);
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci               strcpy(buff, "null");
572bf215546Sopenharmony_ci               start = 0;
573bf215546Sopenharmony_ci            }
574bf215546Sopenharmony_ci            else {
575bf215546Sopenharmony_ci               if (node)
576bf215546Sopenharmony_ci                  start += snprintf(buff + start, sizeof(buff) - start, "%d", node->index);
577bf215546Sopenharmony_ci               start += snprintf(buff + start, sizeof(buff) - start, "|");
578bf215546Sopenharmony_ci            }
579bf215546Sopenharmony_ci         }
580bf215546Sopenharmony_ci         printf("\n");
581bf215546Sopenharmony_ci      }
582bf215546Sopenharmony_ci      printf("-----------------------\n");
583bf215546Sopenharmony_ci   }
584bf215546Sopenharmony_ci   printf("==========================\n");
585bf215546Sopenharmony_ci}
586