1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "util/u_math.h"
26#include "util/ralloc.h"
27#include "util/bitscan.h"
28
29#include "ppir.h"
30
31const ppir_op_info ppir_op_infos[] = {
32   [ppir_op_unsupported] = {
33      .name = "unsupported",
34   },
35   [ppir_op_mov] = {
36      .name = "mov",
37      .slots = (int []) {
38         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
39         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
40         PPIR_INSTR_SLOT_END
41      },
42   },
43   [ppir_op_abs] = {
44      .name = "abs",
45   },
46   [ppir_op_neg] = {
47      .name = "neg",
48   },
49   [ppir_op_sat] = {
50      .name = "sat",
51   },
52   [ppir_op_mul] = {
53      .name = "mul",
54      .slots = (int []) {
55         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
56         PPIR_INSTR_SLOT_END
57      },
58   },
59   [ppir_op_add] = {
60      .name = "add",
61      .slots = (int []) {
62         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
63         PPIR_INSTR_SLOT_END
64      },
65   },
66   [ppir_op_sum3] = {
67      .name = "sum3",
68      .slots = (int []) {
69         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
70      },
71   },
72   [ppir_op_sum4] = {
73      .name = "sum4",
74      .slots = (int []) {
75         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END
76      },
77   },
78   [ppir_op_rsqrt] = {
79      .name = "rsqrt",
80      .slots = (int []) {
81         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
82      },
83   },
84   [ppir_op_log2] = {
85      .name = "log2",
86      .slots = (int []) {
87         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
88      },
89   },
90   [ppir_op_exp2] = {
91      .name = "exp2",
92      .slots = (int []) {
93         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
94      },
95   },
96   [ppir_op_sqrt] = {
97      .name = "sqrt",
98      .slots = (int []) {
99         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
100      },
101   },
102   [ppir_op_sin] = {
103      .name = "sin",
104      .slots = (int []) {
105         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
106      },
107   },
108   [ppir_op_cos] = {
109      .name = "cos",
110      .slots = (int []) {
111         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
112      },
113   },
114   [ppir_op_max] = {
115      .name = "max",
116      .slots = (int []) {
117         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
118         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
119         PPIR_INSTR_SLOT_END
120      },
121   },
122   [ppir_op_min] = {
123      .name = "min",
124      .slots = (int []) {
125         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,
126         PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
127         PPIR_INSTR_SLOT_END
128      },
129   },
130   [ppir_op_floor] = {
131      .name = "floor",
132      .slots = (int []) {
133         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
134         PPIR_INSTR_SLOT_END
135      },
136   },
137   [ppir_op_ceil] = {
138      .name = "ceil",
139      .slots = (int []) {
140         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
141         PPIR_INSTR_SLOT_END
142      },
143   },
144   [ppir_op_fract] = {
145      .name = "fract",
146      .slots = (int []) {
147         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
148         PPIR_INSTR_SLOT_END
149      },
150   },
151   [ppir_op_ddx] = {
152      .name = "ddx",
153      .slots = (int []) {
154         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
155         PPIR_INSTR_SLOT_END
156      },
157   },
158   [ppir_op_ddy] = {
159      .name = "ddy",
160      .slots = (int []) {
161         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
162         PPIR_INSTR_SLOT_END
163      },
164   },
165   [ppir_op_and] = {
166      .name = "and",
167      .slots = (int []) {
168         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
169         PPIR_INSTR_SLOT_END
170      },
171   },
172   [ppir_op_or] = {
173      .name = "or",
174      .slots = (int []) {
175         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
176         PPIR_INSTR_SLOT_END
177      },
178   },
179   [ppir_op_xor] = {
180      .name = "xor",
181      .slots = (int []) {
182         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
183         PPIR_INSTR_SLOT_END
184      },
185   },
186   [ppir_op_not] = {
187      .name = "not",
188      .slots = (int []) {
189         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,
190         PPIR_INSTR_SLOT_END
191      },
192   },
193   [ppir_op_lt] = {
194      .name = "lt",
195   },
196   [ppir_op_le] = {
197      .name = "le",
198   },
199   [ppir_op_gt] = {
200      .name = "gt",
201      .slots = (int []) {
202         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
203         PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
204         PPIR_INSTR_SLOT_END
205      },
206   },
207   [ppir_op_ge] = {
208      .name = "ge",
209      .slots = (int []) {
210         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
211         PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
212         PPIR_INSTR_SLOT_END
213      },
214   },
215   [ppir_op_eq] = {
216      .name = "eq",
217      .slots = (int []) {
218         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
219         PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
220         PPIR_INSTR_SLOT_END
221      },
222   },
223   [ppir_op_ne] = {
224      .name = "ne",
225      .slots = (int []) {
226         PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,
227         PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,
228         PPIR_INSTR_SLOT_END
229      },
230   },
231   [ppir_op_select] = {
232      .name = "select",
233      .slots = (int []) {
234         PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,
235         PPIR_INSTR_SLOT_END
236      },
237   },
238   [ppir_op_rcp] = {
239      .name = "rcp",
240      .slots = (int []) {
241         PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END
242      },
243   },
244   [ppir_op_load_varying] = {
245      .name = "ld_var",
246      .type = ppir_node_type_load,
247      .slots = (int []) {
248         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
249      },
250   },
251   [ppir_op_load_coords] = {
252      .name = "ld_coords",
253      .type = ppir_node_type_load,
254      .slots = (int []) {
255         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
256      },
257   },
258   [ppir_op_load_coords_reg] = {
259      .name = "ld_coords_reg",
260      .type = ppir_node_type_load,
261      .slots = (int []) {
262         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
263      },
264   },
265   [ppir_op_load_fragcoord] = {
266      .name = "ld_fragcoord",
267      .type = ppir_node_type_load,
268      .slots = (int []) {
269         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
270      },
271   },
272   [ppir_op_load_pointcoord] = {
273      .name = "ld_pointcoord",
274      .type = ppir_node_type_load,
275      .slots = (int []) {
276         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
277      },
278   },
279   [ppir_op_load_frontface] = {
280      .name = "ld_frontface",
281      .type = ppir_node_type_load,
282      .slots = (int []) {
283         PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END
284      },
285   },
286   [ppir_op_load_uniform] = {
287      .name = "ld_uni",
288      .type = ppir_node_type_load,
289      .slots = (int []) {
290         PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
291      },
292   },
293   [ppir_op_load_texture] = {
294      .name = "ld_tex",
295      .type = ppir_node_type_load_texture,
296      .slots = (int []) {
297         PPIR_INSTR_SLOT_TEXLD, PPIR_INSTR_SLOT_END
298      },
299   },
300   [ppir_op_load_temp] = {
301      .name = "ld_temp",
302      .type = ppir_node_type_load,
303      .slots = (int []) {
304         PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END
305      },
306   },
307   [ppir_op_const] = {
308      .name = "const",
309      .type = ppir_node_type_const,
310   },
311   [ppir_op_store_temp] = {
312      .name = "st_temp",
313      .type = ppir_node_type_store,
314      .slots = (int []) {
315         PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END
316      },
317   },
318   [ppir_op_discard] = {
319      .name = "discard",
320      .type = ppir_node_type_discard,
321      .slots = (int []) {
322         PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
323      },
324   },
325   [ppir_op_branch] = {
326      .name = "branch",
327      .type = ppir_node_type_branch,
328      .slots = (int []) {
329         PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
330      },
331   },
332   [ppir_op_undef] = {
333      .name = "undef",
334      .type = ppir_node_type_alu,
335      .slots = (int []) {
336         PPIR_INSTR_SLOT_END
337      },
338   },
339   [ppir_op_dummy] = {
340      .name = "dummy",
341      .type = ppir_node_type_alu,
342      .slots = (int []) {
343         PPIR_INSTR_SLOT_END
344      },
345   },
346};
347
348void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
349{
350   ppir_compiler *comp = block->comp;
351   static const int node_size[] = {
352      [ppir_node_type_alu] = sizeof(ppir_alu_node),
353      [ppir_node_type_const] = sizeof(ppir_const_node),
354      [ppir_node_type_load] = sizeof(ppir_load_node),
355      [ppir_node_type_store] = sizeof(ppir_store_node),
356      [ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),
357      [ppir_node_type_discard] = sizeof(ppir_discard_node),
358      [ppir_node_type_branch] = sizeof(ppir_branch_node),
359   };
360
361   ppir_node_type type = ppir_op_infos[op].type;
362   int size = node_size[type];
363   ppir_node *node = rzalloc_size(block, size);
364   if (!node)
365      return NULL;
366
367   list_inithead(&node->succ_list);
368   list_inithead(&node->pred_list);
369
370   if (index >= 0) {
371      if (mask) {
372         /* reg has 4 slots for each component write node */
373         while (mask)
374            comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node;
375         snprintf(node->name, sizeof(node->name), "reg%d", index);
376      } else {
377         comp->var_nodes[index] = node;
378         snprintf(node->name, sizeof(node->name), "ssa%d", index);
379      }
380   }
381   else
382      snprintf(node->name, sizeof(node->name), "new");
383
384   node->op = op;
385   node->type = type;
386   node->index = comp->cur_index++;
387   node->block = block;
388
389   return node;
390}
391
392void ppir_node_add_dep(ppir_node *succ, ppir_node *pred,
393                       ppir_dep_type type)
394{
395   /* don't add dep for two nodes from different block */
396   if (succ->block != pred->block) {
397      pred->succ_different_block = true;
398      return;
399   }
400
401   /* don't add duplicated dep */
402   ppir_node_foreach_pred(succ, dep) {
403      if (dep->pred == pred)
404         return;
405   }
406
407   ppir_dep *dep = ralloc(succ, ppir_dep);
408   dep->pred = pred;
409   dep->succ = succ;
410   dep->type = type;
411   list_addtail(&dep->pred_link, &succ->pred_list);
412   list_addtail(&dep->succ_link, &pred->succ_list);
413}
414
415void ppir_node_remove_dep(ppir_dep *dep)
416{
417   list_del(&dep->succ_link);
418   list_del(&dep->pred_link);
419   ralloc_free(dep);
420}
421
422static void _ppir_node_replace_child(ppir_src *src, ppir_node *old_child, ppir_node *new_child)
423{
424   ppir_dest *od = ppir_node_get_dest(old_child);
425   if (ppir_node_target_equal(src, od)) {
426      ppir_node_target_assign(src, new_child);
427   }
428}
429
430void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child)
431{
432   switch (parent->type) {
433   case ppir_node_type_alu:
434   {
435      ppir_alu_node *alu = ppir_node_to_alu(parent);
436      for (int i = 0; i < alu->num_src; i++)
437         _ppir_node_replace_child(alu->src + i, old_child, new_child);
438      break;
439   }
440   case ppir_node_type_branch:
441   {
442      ppir_branch_node *branch = ppir_node_to_branch(parent);
443      for (int i = 0; i < 2; i++)
444         _ppir_node_replace_child(branch->src + i, old_child, new_child);
445      break;
446   }
447   case ppir_node_type_load:
448   {
449      ppir_load_node *load = ppir_node_to_load(parent);
450      _ppir_node_replace_child(&load->src, old_child, new_child);
451      break;
452   }
453   case ppir_node_type_load_texture:
454   {
455      ppir_load_texture_node *load_texture = ppir_node_to_load_texture(parent);
456      for (int i = 0; i < load_texture->num_src; i++)
457         _ppir_node_replace_child(ppir_node_get_src(parent, i), old_child, new_child);
458      break;
459   }
460   case ppir_node_type_store:
461   {
462      ppir_store_node *store = ppir_node_to_store(parent);
463      _ppir_node_replace_child(&store->src, old_child, new_child);
464      break;
465   }
466   default:
467      ppir_debug("unknown node type in %s\n", __func__);
468      break;
469   }
470}
471
472void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred)
473{
474   list_del(&dep->succ_link);
475   dep->pred = new_pred;
476   list_addtail(&dep->succ_link, &new_pred->succ_list);
477}
478
479ppir_dep *ppir_dep_for_pred(ppir_node *node, ppir_node *pred)
480{
481   if (!pred)
482      return NULL;
483
484   if (node->block != pred->block)
485      return NULL;
486
487   ppir_node_foreach_pred(node, dep) {
488      if (dep->pred == pred)
489         return dep;
490   }
491   return NULL;
492}
493
494void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src)
495{
496   ppir_node_foreach_succ_safe(src, dep) {
497      ppir_node_replace_pred(dep, dst);
498      ppir_node_replace_child(dep->succ, src, dst);
499   }
500}
501
502void ppir_node_delete(ppir_node *node)
503{
504   ppir_node_foreach_succ_safe(node, dep)
505      ppir_node_remove_dep(dep);
506
507   ppir_node_foreach_pred_safe(node, dep)
508      ppir_node_remove_dep(dep);
509
510   list_del(&node->list);
511   ralloc_free(node);
512}
513
514static void ppir_node_print_dest(ppir_dest *dest)
515{
516   switch (dest->type) {
517   case ppir_target_ssa:
518      printf("ssa%d", dest->ssa.index);
519      break;
520   case ppir_target_pipeline:
521      printf("pipeline %d", dest->pipeline);
522      break;
523   case ppir_target_register:
524      printf("reg %d", dest->reg->index);
525      break;
526   }
527}
528
529static void ppir_node_print_src(ppir_src *src)
530{
531   switch (src->type) {
532   case ppir_target_ssa: {
533      if (src->node)
534         printf("ssa node %d", src->node->index);
535      else
536         printf("ssa idx %d", src->ssa ? src->ssa->index : -1);
537      break;
538   }
539   case ppir_target_pipeline:
540      if (src->node)
541         printf("pipeline %d node %d", src->pipeline, src->node->index);
542      else
543         printf("pipeline %d", src->pipeline);
544      break;
545   case ppir_target_register:
546      printf("reg %d", src->reg->index);
547      break;
548   }
549}
550
551static void ppir_node_print_node(ppir_node *node, int space)
552{
553   for (int i = 0; i < space; i++)
554      printf(" ");
555
556   printf("%s%d: %s %s: ", node->printed && !ppir_node_is_leaf(node) ? "+" : "",
557          node->index, ppir_op_infos[node->op].name, node->name);
558
559   ppir_dest *dest = ppir_node_get_dest(node);
560   if (dest) {
561      printf("dest: ");
562      ppir_node_print_dest(dest);
563   }
564
565   if (ppir_node_get_src_num(node) > 0) {
566      printf(" src: ");
567   }
568   for (int i = 0; i < ppir_node_get_src_num(node); i++) {
569      ppir_node_print_src(ppir_node_get_src(node, i));
570      if (i != (ppir_node_get_src_num(node) - 1))
571         printf(", ");
572   }
573   printf("\n");
574
575   if (!node->printed) {
576      ppir_node_foreach_pred(node, dep) {
577         ppir_node *pred = dep->pred;
578         ppir_node_print_node(pred, space + 2);
579      }
580
581      node->printed = true;
582   }
583}
584
585void ppir_node_print_prog(ppir_compiler *comp)
586{
587   if (!(lima_debug & LIMA_DEBUG_PP))
588      return;
589
590   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
591      list_for_each_entry(ppir_node, node, &block->node_list, list) {
592         node->printed = false;
593      }
594   }
595
596   printf("========prog========\n");
597   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
598      printf("-------block %3d-------\n", block->index);
599      list_for_each_entry(ppir_node, node, &block->node_list, list) {
600         if (ppir_node_is_root(node))
601            ppir_node_print_node(node, 0);
602      }
603   }
604   printf("====================\n");
605}
606
607static ppir_node *ppir_node_insert_mov_local(ppir_node *node)
608{
609   ppir_node *move = ppir_node_create(node->block, ppir_op_mov, -1, 0);
610   if (unlikely(!move))
611      return NULL;
612
613   ppir_dest *dest = ppir_node_get_dest(node);
614   ppir_alu_node *alu = ppir_node_to_alu(move);
615   alu->dest = *dest;
616   alu->num_src = 1;
617   ppir_node_target_assign(alu->src, node);
618
619   for (int s = 0; s < 4; s++)
620      alu->src->swizzle[s] = s;
621
622   ppir_node_replace_all_succ(move, node);
623   ppir_node_add_dep(move, node, ppir_dep_src);
624   list_addtail(&move->list, &node->list);
625
626   if (node->is_out) {
627      node->is_out = false;
628      move->is_out = true;
629   }
630
631   return move;
632}
633
634ppir_node *ppir_node_insert_mov(ppir_node *old)
635{
636   ppir_node *move = ppir_node_insert_mov_local(old);
637   ppir_compiler *comp = old->block->comp;
638
639   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
640      if (old->block == block)
641         continue;
642      list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {
643         for (int i = 0; i < ppir_node_get_src_num(node); i++){
644            ppir_src *src = ppir_node_get_src(node, i);
645            if (!src)
646               continue;
647            if (src->node == old)
648               ppir_node_target_assign(src, move);
649         }
650      }
651   }
652
653   return move;
654}
655
656bool ppir_node_has_single_src_succ(ppir_node *node)
657{
658   if (ppir_node_has_single_succ(node) &&
659       list_first_entry(&node->succ_list,
660                        ppir_dep, succ_link)->type == ppir_dep_src)
661      return true;
662
663   int cnt = 0;
664   ppir_node_foreach_succ(node, dep) {
665      if (dep->type != ppir_dep_src)
666         continue;
667      cnt++;
668   }
669
670   return cnt == 1;
671}
672