1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "util/ralloc.h"
26
27#include "gpir.h"
28#include "lima_context.h"
29
30static bool gpir_lower_const(gpir_compiler *comp)
31{
32   int num_constant = 0;
33   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
34      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
35         if (node->op == gpir_op_const) {
36            if (gpir_node_is_root(node))
37               gpir_node_delete(node);
38            else
39               num_constant++;
40         }
41      }
42   }
43
44   if (num_constant) {
45      union fi *constant = ralloc_array(comp->prog, union fi, num_constant);
46      if (!constant)
47         return false;
48
49      comp->prog->constant = constant;
50      comp->prog->state.constant_size = num_constant * sizeof(union fi);
51
52      int index = 0;
53      list_for_each_entry(gpir_block, block, &comp->block_list, list) {
54         list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
55            if (node->op == gpir_op_const) {
56               gpir_const_node *c = gpir_node_to_const(node);
57
58               if (!gpir_node_is_root(node)) {
59                  gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform);
60                  if (unlikely(!load))
61                     return false;
62
63                  load->index = comp->constant_base + (index >> 2);
64                  load->component = index % 4;
65                  constant[index++] = c->value;
66
67                  gpir_node_replace_succ(&load->node, node);
68
69                  list_addtail(&load->node.list, &node->list);
70
71                  gpir_debug("lower const create uniform %d for const %d\n",
72                             load->node.index, node->index);
73               }
74
75               gpir_node_delete(node);
76            }
77         }
78      }
79   }
80
81   return true;
82}
83
84/* duplicate load to all its successors */
85static bool gpir_lower_load(gpir_compiler *comp)
86{
87   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
88      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
89         if (node->type == gpir_node_type_load) {
90            gpir_load_node *load = gpir_node_to_load(node);
91
92            bool first = true;
93            gpir_node_foreach_succ_safe(node, dep) {
94               gpir_node *succ = dep->succ;
95
96               if (first) {
97                  first = false;
98                  continue;
99               }
100
101               gpir_node *new = gpir_node_create(succ->block, node->op);
102               if (unlikely(!new))
103                  return false;
104               list_addtail(&new->list, &succ->list);
105
106               gpir_debug("lower load create %d from %d for succ %d\n",
107                          new->index, node->index, succ->index);
108
109               gpir_load_node *nload = gpir_node_to_load(new);
110               nload->index = load->index;
111               nload->component = load->component;
112               nload->reg = load->reg;
113
114               gpir_node_replace_pred(dep, new);
115               gpir_node_replace_child(succ, node, new);
116            }
117         }
118      }
119   }
120
121   return true;
122}
123
124static bool gpir_lower_neg(gpir_block *block, gpir_node *node)
125{
126   gpir_alu_node *neg = gpir_node_to_alu(node);
127   gpir_node *child = neg->children[0];
128
129   /* check if child can dest negate */
130   if (child->type == gpir_node_type_alu) {
131      /* negate must be its only successor */
132      if (list_is_singular(&child->succ_list) &&
133          gpir_op_infos[child->op].dest_neg) {
134         gpir_alu_node *alu = gpir_node_to_alu(child);
135         alu->dest_negate = !alu->dest_negate;
136
137         gpir_node_replace_succ(child, node);
138         gpir_node_delete(node);
139         return true;
140      }
141   }
142
143   /* check if child can src negate */
144   gpir_node_foreach_succ_safe(node, dep) {
145      gpir_node *succ = dep->succ;
146      if (succ->type != gpir_node_type_alu)
147         continue;
148
149      bool success = true;
150      gpir_alu_node *alu = gpir_node_to_alu(dep->succ);
151      for (int i = 0; i < alu->num_child; i++) {
152         if (alu->children[i] == node) {
153            if (gpir_op_infos[succ->op].src_neg[i]) {
154               alu->children_negate[i] = !alu->children_negate[i];
155               alu->children[i] = child;
156            }
157            else
158               success = false;
159         }
160      }
161
162      if (success)
163         gpir_node_replace_pred(dep, child);
164   }
165
166   if (gpir_node_is_root(node))
167      gpir_node_delete(node);
168
169   return true;
170}
171
172static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
173{
174   gpir_alu_node *alu = gpir_node_to_alu(node);
175   gpir_node *child = alu->children[0];
176
177   if (node->op == gpir_op_exp2) {
178      gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2);
179      if (unlikely(!preexp2))
180         return false;
181
182      preexp2->children[0] = child;
183      preexp2->num_child = 1;
184      gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT);
185      list_addtail(&preexp2->node.list, &node->list);
186
187      child = &preexp2->node;
188   }
189
190   gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
191   if (unlikely(!complex2))
192      return false;
193
194   complex2->children[0] = child;
195   complex2->num_child = 1;
196   gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT);
197   list_addtail(&complex2->node.list, &node->list);
198
199   int impl_op = 0;
200   switch (node->op) {
201   case gpir_op_rcp:
202      impl_op = gpir_op_rcp_impl;
203      break;
204   case gpir_op_rsqrt:
205      impl_op = gpir_op_rsqrt_impl;
206      break;
207   case gpir_op_exp2:
208      impl_op = gpir_op_exp2_impl;
209      break;
210   case gpir_op_log2:
211      impl_op = gpir_op_log2_impl;
212      break;
213   default:
214      assert(0);
215   }
216
217   gpir_alu_node *impl = gpir_node_create(block, impl_op);
218   if (unlikely(!impl))
219      return false;
220
221   impl->children[0] = child;
222   impl->num_child = 1;
223   gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
224   list_addtail(&impl->node.list, &node->list);
225
226   gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1);
227   complex1->children[0] = &impl->node;
228   complex1->children[1] = &complex2->node;
229   complex1->children[2] = child;
230   complex1->num_child = 3;
231   gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT);
232   gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT);
233   gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT);
234   list_addtail(&complex1->node.list, &node->list);
235
236   gpir_node *result = &complex1->node;
237
238   if (node->op == gpir_op_log2) {
239      gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2);
240      if (unlikely(!postlog2))
241         return false;
242
243      postlog2->children[0] = result;
244      postlog2->num_child = 1;
245      gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT);
246      list_addtail(&postlog2->node.list, &node->list);
247
248      result = &postlog2->node;
249   }
250
251   gpir_node_replace_succ(result, node);
252   gpir_node_delete(node);
253
254   return true;
255}
256
257static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp)
258{
259   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
260      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
261         if (gpir_op_infos[node->op].may_consume_two_slots) {
262            /* dummy_f/m are auxiliary nodes for value reg alloc:
263             * 1. before reg alloc, create fake nodes dummy_f, dummy_m,
264             *    so the tree become: (dummy_m (node dummy_f))
265             *    dummy_m can be spilled, but other nodes in the tree can't
266             *    be spilled.
267             * 2. After reg allocation and fake dep add, merge all deps of
268             *    dummy_m and dummy_f to node and remove dummy_m & dummy_f
269             *
270             * We may also not use dummy_f/m, but alloc two value reg for
271             * node. But that means we need to make sure there're 2 free
272             * slot after the node successors, but we just need one slot
273             * after to be able to schedule it because we can use one move for
274             * the two slot node. It's also not easy to handle the spill case
275             * for the alloc 2 value method.
276             *
277             * With the dummy_f/m method, there's no such requirement, the
278             * node can be scheduled only when there's two slots for it,
279             * otherwise a move. And the node can be spilled with one reg.
280             */
281            gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m);
282            if (unlikely(!dummy_m))
283               return false;
284            list_add(&dummy_m->list, &node->list);
285
286            gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f);
287            if (unlikely(!dummy_f))
288               return false;
289            list_add(&dummy_f->list, &node->list);
290
291            gpir_alu_node *alu = gpir_node_to_alu(dummy_m);
292            alu->children[0] = node;
293            alu->children[1] = dummy_f;
294            alu->num_child = 2;
295
296            gpir_node_replace_succ(dummy_m, node);
297            gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT);
298            gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT);
299
300         }
301      }
302   }
303
304   return true;
305}
306
307/*
308 * There are no 'equal' or 'not-equal' opcodes.
309 * eq (a == b) is lowered to and(a >= b, b >= a)
310 * ne (a != b) is lowered to or(a < b, b < a)
311 */
312static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node)
313{
314   gpir_op cmp_node_op;
315   gpir_op node_new_op;
316   switch (node->op) {
317      case gpir_op_eq:
318         cmp_node_op = gpir_op_ge;
319         node_new_op = gpir_op_min; /* and */
320         break;
321      case gpir_op_ne:
322         cmp_node_op = gpir_op_lt;
323         node_new_op = gpir_op_max; /* or */
324         break;
325      default:
326         unreachable("bad node op");
327   }
328
329   gpir_alu_node *e = gpir_node_to_alu(node);
330
331   gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op);
332   list_addtail(&cmp1->node.list, &node->list);
333   gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op);
334   list_addtail(&cmp2->node.list, &node->list);
335
336   cmp1->children[0] = e->children[0];
337   cmp1->children[1] = e->children[1];
338   cmp1->num_child = 2;
339
340   cmp2->children[0] = e->children[1];
341   cmp2->children[1] = e->children[0];
342   cmp2->num_child = 2;
343
344   gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT);
345   gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT);
346
347   gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT);
348   gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT);
349
350   gpir_node_foreach_pred_safe(node, dep) {
351      gpir_node_remove_dep(node, dep->pred);
352   }
353
354   gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT);
355   gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT);
356
357   node->op = node_new_op;
358   e->children[0] = &cmp1->node;
359   e->children[1] = &cmp2->node;
360   e->num_child = 2;
361
362   return true;
363}
364
365/*
366 * There is no 'abs' opcode.
367 * abs(a) is lowered to max(a, -a)
368 */
369static bool gpir_lower_abs(gpir_block *block, gpir_node *node)
370{
371   gpir_alu_node *alu = gpir_node_to_alu(node);
372
373   assert(node->op == gpir_op_abs);
374
375   node->op = gpir_op_max;
376
377   alu->children[1] = alu->children[0];
378   alu->children_negate[1] = true;
379   alu->num_child = 2;
380
381   return true;
382}
383
384/*
385 * There is no 'not' opcode.
386 * not(a) is lowered to add(1, -a)
387 */
388static bool gpir_lower_not(gpir_block *block, gpir_node *node)
389{
390   gpir_alu_node *alu = gpir_node_to_alu(node);
391
392   assert(alu->node.op == gpir_op_not);
393
394   node->op = gpir_op_add;
395
396   gpir_node *node_const = gpir_node_create(block, gpir_op_const);
397   gpir_const_node *c = gpir_node_to_const(node_const);
398
399   assert(c->node.op == gpir_op_const);
400
401   list_addtail(&c->node.list, &node->list);
402   c->value.f = 1.0f;
403   gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT);
404
405   alu->children_negate[1] = !alu->children_negate[0];
406   alu->children[1] = alu->children[0];
407   alu->children[0] = &c->node;
408   alu->num_child = 2;
409
410   return true;
411}
412
413/* There is no unconditional branch instruction, so we have to lower it to a
414 * conditional branch with a condition of 1.0.
415 */
416
417static bool gpir_lower_branch_uncond(gpir_block *block, gpir_node *node)
418{
419   gpir_branch_node *branch = gpir_node_to_branch(node);
420
421   gpir_node *node_const = gpir_node_create(block, gpir_op_const);
422   gpir_const_node *c = gpir_node_to_const(node_const);
423
424   list_addtail(&c->node.list, &node->list);
425   c->value.f = 1.0f;
426   gpir_node_add_dep(&branch->node, &c->node, GPIR_DEP_INPUT);
427
428   branch->node.op = gpir_op_branch_cond;
429   branch->cond = node_const;
430
431   return true;
432}
433
434static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {
435   [gpir_op_not] = gpir_lower_not,
436   [gpir_op_neg] = gpir_lower_neg,
437   [gpir_op_rcp] = gpir_lower_complex,
438   [gpir_op_rsqrt] = gpir_lower_complex,
439   [gpir_op_exp2] = gpir_lower_complex,
440   [gpir_op_log2] = gpir_lower_complex,
441   [gpir_op_eq] = gpir_lower_eq_ne,
442   [gpir_op_ne] = gpir_lower_eq_ne,
443   [gpir_op_abs] = gpir_lower_abs,
444   [gpir_op_branch_uncond] = gpir_lower_branch_uncond,
445};
446
447bool gpir_pre_rsched_lower_prog(gpir_compiler *comp)
448{
449   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
450      list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
451         if (gpir_pre_rsched_lower_funcs[node->op] &&
452             !gpir_pre_rsched_lower_funcs[node->op](block, node))
453            return false;
454      }
455   }
456
457   if (!gpir_lower_const(comp))
458      return false;
459
460   if (!gpir_lower_load(comp))
461      return false;
462
463   if (!gpir_lower_node_may_consume_two_slots(comp))
464      return false;
465
466   gpir_debug("pre rsched lower prog\n");
467   gpir_node_print_prog_seq(comp);
468   return true;
469}
470
471