1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "util/u_math.h"
26#include "util/ralloc.h"
27
28#include "gpir.h"
29
30const gpir_op_info gpir_op_infos[] = {
31   [gpir_op_unsupported] = {
32      .name = "unsupported",
33   },
34   [gpir_op_mov] = {
35      .name = "mov",
36      .slots = (int []) {
37         GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1,
38         GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0,
39         GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_COMPLEX,
40         GPIR_INSTR_SLOT_END
41      },
42   },
43   [gpir_op_mul] = {
44      .name = "mul",
45      .dest_neg = true,
46      .slots = (int []) { GPIR_INSTR_SLOT_MUL1, GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
47   },
48   [gpir_op_select] = {
49      .name = "select",
50      .dest_neg = true,
51      .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
52      .may_consume_two_slots = true,
53   },
54   [gpir_op_complex1] = {
55      .name = "complex1",
56      .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
57      .spillless = true,
58      .may_consume_two_slots = true,
59   },
60   [gpir_op_complex2] = {
61      .name = "complex2",
62      .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },
63      .spillless = true,
64      .schedule_first = true,
65   },
66   [gpir_op_add] = {
67      .name = "add",
68      .src_neg = {true, true, false, false},
69      .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
70   },
71   [gpir_op_floor] = {
72      .name = "floor",
73      .src_neg = {true, false, false, false},
74      .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
75      .spillless = true,
76      .may_consume_two_slots = true,
77   },
78   [gpir_op_sign] = {
79      .name = "sign",
80      .src_neg = {true, false, false, false},
81      .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
82      .spillless = true,
83      .may_consume_two_slots = true,
84   },
85   [gpir_op_ge] = {
86      .name = "ge",
87      .src_neg = {true, true, false, false},
88      .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
89      .spillless = true,
90      .may_consume_two_slots = true,
91   },
92   [gpir_op_lt] = {
93      .name = "lt",
94      .src_neg = {true, true, false, false},
95      .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
96      .spillless = true,
97      .may_consume_two_slots = true,
98   },
99   [gpir_op_min] = {
100      .name = "min",
101      .src_neg = {true, true, false, false},
102      .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
103      .spillless = true,
104      .may_consume_two_slots = true,
105   },
106   [gpir_op_max] = {
107      .name = "max",
108      .src_neg = {true, true, false, false},
109      .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
110      .spillless = true,
111      .may_consume_two_slots = true,
112   },
113   [gpir_op_abs] = {
114      .name = "abs",
115      .src_neg = {true, true, false, false},
116   },
117   [gpir_op_neg] = {
118      .name = "neg",
119      .slots = (int []) {
120         GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1,
121         GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0,
122         GPIR_INSTR_SLOT_END
123      },
124   },
125   [gpir_op_not] = {
126      .name = "not",
127      .src_neg = {true, true, false, false},
128      .slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },
129   },
130   [gpir_op_eq] = {
131      .name = "eq",
132      .slots = (int []) {
133         GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END
134      },
135   },
136   [gpir_op_ne] = {
137      .name = "ne",
138      .slots = (int []) {
139         GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END
140      },
141   },
142   [gpir_op_clamp_const] = {
143      .name = "clamp_const",
144   },
145   [gpir_op_preexp2] = {
146      .name = "preexp2",
147      .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
148      .spillless = true,
149      .schedule_first = true,
150   },
151   [gpir_op_postlog2] = {
152      .name = "postlog2",
153      .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
154   },
155   [gpir_op_exp2_impl] = {
156      .name = "exp2_impl",
157      .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
158      .spillless = true,
159      .schedule_first = true,
160   },
161   [gpir_op_log2_impl] = {
162      .name = "log2_impl",
163      .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
164      .spillless = true,
165      .schedule_first = true,
166   },
167   [gpir_op_rcp_impl] = {
168      .name = "rcp_impl",
169      .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
170      .spillless = true,
171      .schedule_first = true,
172   },
173   [gpir_op_rsqrt_impl] = {
174      .name = "rsqrt_impl",
175      .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
176      .spillless = true,
177      .schedule_first = true,
178   },
179   [gpir_op_load_uniform] = {
180      .name = "ld_uni",
181      .slots = (int []) {
182         GPIR_INSTR_SLOT_MEM_LOAD0, GPIR_INSTR_SLOT_MEM_LOAD1,
183         GPIR_INSTR_SLOT_MEM_LOAD2, GPIR_INSTR_SLOT_MEM_LOAD3,
184         GPIR_INSTR_SLOT_END
185      },
186      .type = gpir_node_type_load,
187   },
188   [gpir_op_load_temp] = {
189      .name = "ld_tmp",
190      .type = gpir_node_type_load,
191   },
192   [gpir_op_load_attribute] = {
193      .name = "ld_att",
194      .slots = (int []) {
195         GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1,
196         GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3,
197         GPIR_INSTR_SLOT_END
198      },
199      .type = gpir_node_type_load,
200   },
201   [gpir_op_load_reg] = {
202      .name = "ld_reg",
203      .slots = (int []) {
204         GPIR_INSTR_SLOT_REG1_LOAD0, GPIR_INSTR_SLOT_REG1_LOAD1,
205         GPIR_INSTR_SLOT_REG1_LOAD2, GPIR_INSTR_SLOT_REG1_LOAD3,
206         GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1,
207         GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3,
208         GPIR_INSTR_SLOT_END
209      },
210      .type = gpir_node_type_load,
211      .spillless = true,
212   },
213   [gpir_op_store_temp] = {
214      .name = "st_tmp",
215      .type = gpir_node_type_store,
216   },
217   [gpir_op_store_reg] = {
218      .name = "st_reg",
219      .slots = (int []) {
220         GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1,
221         GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3,
222         GPIR_INSTR_SLOT_END
223      },
224      .type = gpir_node_type_store,
225      .spillless = true,
226   },
227   [gpir_op_store_varying] = {
228      .name = "st_var",
229      .slots = (int []) {
230         GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1,
231         GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3,
232         GPIR_INSTR_SLOT_END
233      },
234      .type = gpir_node_type_store,
235      .spillless = true,
236   },
237   [gpir_op_store_temp_load_off0] = {
238      .name = "st_of0",
239      .type = gpir_node_type_store,
240   },
241   [gpir_op_store_temp_load_off1] = {
242      .name = "st_of1",
243      .type = gpir_node_type_store,
244   },
245   [gpir_op_store_temp_load_off2] = {
246      .name = "st_of2",
247      .type = gpir_node_type_store,
248   },
249   [gpir_op_branch_cond] = {
250      .name = "branch_cond",
251      .type = gpir_node_type_branch,
252      .schedule_first = true,
253      .slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
254   },
255   [gpir_op_const] = {
256      .name = "const",
257      .type = gpir_node_type_const,
258   },
259   [gpir_op_exp2] = {
260      .name = "exp2",
261   },
262   [gpir_op_log2] = {
263      .name = "log2",
264   },
265   [gpir_op_rcp] = {
266      .name = "rcp",
267   },
268   [gpir_op_rsqrt] = {
269      .name = "rsqrt",
270   },
271   [gpir_op_ceil] = {
272      .name = "ceil",
273   },
274   [gpir_op_exp] = {
275      .name = "exp",
276   },
277   [gpir_op_log] = {
278      .name = "log",
279   },
280   [gpir_op_sin] = {
281      .name = "sin",
282   },
283   [gpir_op_cos] = {
284      .name = "cos",
285   },
286   [gpir_op_tan] = {
287      .name = "tan",
288   },
289   [gpir_op_dummy_f] = {
290      .name = "dummy_f",
291      .type = gpir_node_type_alu,
292      .spillless = true,
293   },
294   [gpir_op_dummy_m] = {
295      .name = "dummy_m",
296      .type = gpir_node_type_alu,
297   },
298   [gpir_op_branch_uncond] = {
299      .name = "branch_uncond",
300      .type = gpir_node_type_branch,
301   },
302};
303
304void *gpir_node_create(gpir_block *block, gpir_op op)
305{
306   static const int node_size[] = {
307      [gpir_node_type_alu] = sizeof(gpir_alu_node),
308      [gpir_node_type_const] = sizeof(gpir_const_node),
309      [gpir_node_type_load] = sizeof(gpir_load_node),
310      [gpir_node_type_store] = sizeof(gpir_store_node),
311      [gpir_node_type_branch] = sizeof(gpir_branch_node),
312   };
313
314   gpir_node_type type = gpir_op_infos[op].type;
315   int size = node_size[type];
316   gpir_node *node = rzalloc_size(block, size);
317   if (unlikely(!node))
318      return NULL;
319
320   snprintf(node->name, sizeof(node->name), "new");
321
322   list_inithead(&node->succ_list);
323   list_inithead(&node->pred_list);
324
325   node->op = op;
326   node->type = type;
327   node->index = block->comp->cur_index++;
328   node->block = block;
329
330   return node;
331}
332
333gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type)
334{
335   /* don't add dep for two nodes from different block */
336   if (succ->block != pred->block)
337      return NULL;
338
339   /* don't add self loop dep */
340   if (succ == pred)
341      return NULL;
342
343   /* don't add duplicated dep */
344   gpir_node_foreach_pred(succ, dep) {
345      if (dep->pred == pred) {
346         /* use stronger dependency */
347         if (dep->type > type)
348            dep->type = type;
349         return dep;
350      }
351   }
352
353   gpir_dep *dep = ralloc(succ, gpir_dep);
354   dep->type = type;
355   dep->pred = pred;
356   dep->succ = succ;
357   list_addtail(&dep->pred_link, &succ->pred_list);
358   list_addtail(&dep->succ_link, &pred->succ_list);
359   return dep;
360}
361
362void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred)
363{
364   gpir_node_foreach_pred(succ, dep) {
365      if (dep->pred == pred) {
366         list_del(&dep->succ_link);
367         list_del(&dep->pred_link);
368         ralloc_free(dep);
369         return;
370      }
371   }
372}
373
374void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child,
375                             gpir_node *new_child)
376{
377   if (parent->type == gpir_node_type_alu) {
378      gpir_alu_node *alu = gpir_node_to_alu(parent);
379      for (int i = 0; i < alu->num_child; i++) {
380         if (alu->children[i] == old_child)
381            alu->children[i] = new_child;
382      }
383   }
384   else if (parent->type == gpir_node_type_store) {
385      gpir_store_node *store = gpir_node_to_store(parent);
386      if (store->child == old_child)
387         store->child = new_child;
388   } else if (parent->type == gpir_node_type_branch) {
389      gpir_branch_node *branch = gpir_node_to_branch(parent);
390      if (branch->cond == old_child)
391         branch->cond = new_child;
392   }
393}
394
395void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred)
396{
397   list_del(&dep->succ_link);
398   dep->pred = new_pred;
399   list_addtail(&dep->succ_link, &new_pred->succ_list);
400}
401
402void gpir_node_replace_succ(gpir_node *dst, gpir_node *src)
403{
404   gpir_node_foreach_succ_safe(src, dep) {
405      if (dep->type != GPIR_DEP_INPUT)
406         continue;
407
408      gpir_node_replace_pred(dep, dst);
409      gpir_node_replace_child(dep->succ, src, dst);
410   }
411}
412
413void gpir_node_insert_child(gpir_node *parent, gpir_node *child,
414                            gpir_node *insert_child)
415{
416   gpir_node_foreach_pred(parent, dep) {
417      if (dep->pred == child) {
418         gpir_node_replace_pred(dep, insert_child);
419         gpir_node_replace_child(parent, child, insert_child);
420         break;
421      }
422   }
423}
424
425void gpir_node_delete(gpir_node *node)
426{
427   gpir_node_foreach_succ_safe(node, dep) {
428      list_del(&dep->succ_link);
429      list_del(&dep->pred_link);
430      ralloc_free(dep);
431   }
432
433   gpir_node_foreach_pred_safe(node, dep) {
434      list_del(&dep->succ_link);
435      list_del(&dep->pred_link);
436      ralloc_free(dep);
437   }
438
439   list_del(&node->list);
440   ralloc_free(node);
441}
442
443static void gpir_node_print_node(gpir_node *node, int type, int space)
444{
445   static char *dep_name[] = {
446      [GPIR_DEP_INPUT] = "input",
447      [GPIR_DEP_OFFSET] = "offset",
448      [GPIR_DEP_READ_AFTER_WRITE] = "RaW",
449      [GPIR_DEP_WRITE_AFTER_READ] = "WaR",
450   };
451
452   for (int i = 0; i < space; i++)
453      printf(" ");
454   printf("%s%s %d %s %s\n", node->printed && !gpir_node_is_leaf(node) ? "+" : "",
455          gpir_op_infos[node->op].name, node->index, node->name, dep_name[type]);
456
457   if (!node->printed) {
458      gpir_node_foreach_pred(node, dep) {
459         gpir_node_print_node(dep->pred, dep->type, space + 2);
460      }
461
462      node->printed = true;
463   }
464}
465
466void gpir_node_print_prog_dep(gpir_compiler *comp)
467{
468   if (!(lima_debug & LIMA_DEBUG_GP))
469      return;
470
471   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
472      list_for_each_entry(gpir_node, node, &block->node_list, list) {
473         node->printed = false;
474      }
475   }
476
477   printf("======== node prog dep ========\n");
478   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
479      list_for_each_entry(gpir_node, node, &block->node_list, list) {
480         if (gpir_node_is_root(node))
481            gpir_node_print_node(node, GPIR_DEP_INPUT, 0);
482      }
483      printf("----------------------------\n");
484   }
485}
486
487void gpir_node_print_prog_seq(gpir_compiler *comp)
488{
489   if (!(lima_debug & LIMA_DEBUG_GP))
490      return;
491
492   int index = 0;
493   printf("======== node prog seq ========\n");
494   list_for_each_entry(gpir_block, block, &comp->block_list, list) {
495      list_for_each_entry(gpir_node, node, &block->node_list, list) {
496         printf("%03d: %s %d %s pred", index++, gpir_op_infos[node->op].name,
497                node->index, node->name);
498         gpir_node_foreach_pred(node, dep) {
499            printf(" %d", dep->pred->index);
500         }
501         printf(" succ");
502         gpir_node_foreach_succ(node, dep) {
503            printf(" %d", dep->succ->index);
504         }
505         printf("\n");
506      }
507      printf("----------------------------\n");
508   }
509}
510