1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include <string.h>
26
27#include "util/hash_table.h"
28#include "util/ralloc.h"
29#include "util/bitscan.h"
30#include "compiler/nir/nir.h"
31#include "pipe/p_state.h"
32
33
34#include "ppir.h"
35
36static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37{
38   ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39   if (!node)
40      return NULL;
41
42   ppir_dest *dest = ppir_node_get_dest(node);
43   dest->type = ppir_target_ssa;
44   dest->ssa.num_components = ssa->num_components;
45   dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46
47   if (node->type == ppir_node_type_load ||
48       node->type == ppir_node_type_store)
49      dest->ssa.is_head = true;
50
51   return node;
52}
53
54static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55                                  nir_register *reg, unsigned mask)
56{
57   ppir_node *node = ppir_node_create(block, op, reg->index, mask);
58   if (!node)
59      return NULL;
60
61   ppir_dest *dest = ppir_node_get_dest(node);
62
63   list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64      if (r->index == reg->index) {
65         dest->reg = r;
66         break;
67      }
68   }
69
70   dest->type = ppir_target_register;
71   dest->write_mask = mask;
72
73   if (node->type == ppir_node_type_load ||
74       node->type == ppir_node_type_store)
75      dest->reg->is_head = true;
76
77   return node;
78}
79
80static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81                                   nir_dest *dest, unsigned mask)
82{
83   unsigned index = -1;
84
85   if (dest) {
86      if (dest->is_ssa)
87         return ppir_node_create_ssa(block, op, &dest->ssa);
88      else
89         return ppir_node_create_reg(block, op, dest->reg.reg, mask);
90   }
91
92   return ppir_node_create(block, op, index, 0);
93}
94
95static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
96                              ppir_src *ps, nir_src *ns, unsigned mask)
97{
98   ppir_node *child = NULL;
99
100   if (ns->is_ssa) {
101      child = comp->var_nodes[ns->ssa->index];
102      if (child->op != ppir_op_undef)
103         ppir_node_add_dep(node, child, ppir_dep_src);
104   }
105   else {
106      nir_register *reg = ns->reg.reg;
107      while (mask) {
108         int swizzle = ps->swizzle[u_bit_scan(&mask)];
109         child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
110         /* Reg is read before it was written, create a dummy node for it */
111         if (!child) {
112            child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
113               u_bit_consecutive(0, 4));
114            comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
115         }
116         /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
117         if (child && node != child && child->op != ppir_op_dummy)
118            ppir_node_add_dep(node, child, ppir_dep_src);
119      }
120   }
121
122   assert(child);
123   ppir_node_target_assign(ps, child);
124}
125
126static int nir_to_ppir_opcodes[nir_num_opcodes] = {
127   [nir_op_mov] = ppir_op_mov,
128   [nir_op_fmul] = ppir_op_mul,
129   [nir_op_fabs] = ppir_op_abs,
130   [nir_op_fneg] = ppir_op_neg,
131   [nir_op_fadd] = ppir_op_add,
132   [nir_op_fsum3] = ppir_op_sum3,
133   [nir_op_fsum4] = ppir_op_sum4,
134   [nir_op_frsq] = ppir_op_rsqrt,
135   [nir_op_flog2] = ppir_op_log2,
136   [nir_op_fexp2] = ppir_op_exp2,
137   [nir_op_fsqrt] = ppir_op_sqrt,
138   [nir_op_fsin] = ppir_op_sin,
139   [nir_op_fcos] = ppir_op_cos,
140   [nir_op_fmax] = ppir_op_max,
141   [nir_op_fmin] = ppir_op_min,
142   [nir_op_frcp] = ppir_op_rcp,
143   [nir_op_ffloor] = ppir_op_floor,
144   [nir_op_fceil] = ppir_op_ceil,
145   [nir_op_ffract] = ppir_op_fract,
146   [nir_op_sge] = ppir_op_ge,
147   [nir_op_slt] = ppir_op_lt,
148   [nir_op_seq] = ppir_op_eq,
149   [nir_op_sne] = ppir_op_ne,
150   [nir_op_fcsel] = ppir_op_select,
151   [nir_op_inot] = ppir_op_not,
152   [nir_op_ftrunc] = ppir_op_trunc,
153   [nir_op_fsat] = ppir_op_sat,
154   [nir_op_fddx] = ppir_op_ddx,
155   [nir_op_fddy] = ppir_op_ddy,
156};
157
158static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
159{
160   nir_alu_instr *instr = nir_instr_as_alu(ni);
161   int op = nir_to_ppir_opcodes[instr->op];
162
163   if (op == ppir_op_unsupported) {
164      ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
165      return false;
166   }
167
168   ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
169                                               instr->dest.write_mask);
170   if (!node)
171      return false;
172
173   ppir_dest *pd = &node->dest;
174   nir_alu_dest *nd = &instr->dest;
175   if (nd->saturate)
176      pd->modifier = ppir_outmod_clamp_fraction;
177
178   unsigned src_mask;
179   switch (op) {
180   case ppir_op_sum3:
181      src_mask = 0b0111;
182      break;
183   case ppir_op_sum4:
184      src_mask = 0b1111;
185      break;
186   default:
187      src_mask = pd->write_mask;
188      break;
189   }
190
191   unsigned num_child = nir_op_infos[instr->op].num_inputs;
192   node->num_src = num_child;
193
194   for (int i = 0; i < num_child; i++) {
195      nir_alu_src *ns = instr->src + i;
196      ppir_src *ps = node->src + i;
197      memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
198      ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
199
200      ps->absolute = ns->abs;
201      ps->negate = ns->negate;
202   }
203
204   list_addtail(&node->node.list, &block->node_list);
205   return true;
206}
207
208static ppir_block *ppir_block_create(ppir_compiler *comp);
209
210static bool ppir_emit_discard_block(ppir_compiler *comp)
211{
212   ppir_block *block = ppir_block_create(comp);
213   ppir_discard_node *discard;
214   if (!block)
215      return false;
216
217   comp->discard_block = block;
218   block->comp  = comp;
219
220   discard = ppir_node_create(block, ppir_op_discard, -1, 0);
221   if (discard)
222      list_addtail(&discard->node.list, &block->node_list);
223   else
224      return false;
225
226   return true;
227}
228
229static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
230{
231   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
232   ppir_node *node;
233   ppir_compiler *comp = block->comp;
234   ppir_branch_node *branch;
235
236   if (!comp->discard_block && !ppir_emit_discard_block(comp))
237      return NULL;
238
239   node = ppir_node_create(block, ppir_op_branch, -1, 0);
240   if (!node)
241      return NULL;
242   branch = ppir_node_to_branch(node);
243
244   /* second src and condition will be updated during lowering */
245   ppir_node_add_src(block->comp, node, &branch->src[0],
246                     &instr->src[0], u_bit_consecutive(0, instr->num_components));
247   branch->num_src = 1;
248   branch->target = comp->discard_block;
249
250   return node;
251}
252
253static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
254{
255   ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
256
257   return node;
258}
259
260static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
261{
262   ppir_node *node;
263   nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
264   unsigned mask = 0;
265   ppir_load_node *lnode;
266   ppir_alu_node *alu_node;
267
268   switch (instr->intrinsic) {
269   case nir_intrinsic_load_input:
270      if (!instr->dest.is_ssa)
271         mask = u_bit_consecutive(0, instr->num_components);
272
273      lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
274      if (!lnode)
275         return false;
276
277      lnode->num_components = instr->num_components;
278      lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
279      if (nir_src_is_const(instr->src[0]))
280         lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
281      else {
282         lnode->num_src = 1;
283         ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
284      }
285      list_addtail(&lnode->node.list, &block->node_list);
286      return true;
287
288   case nir_intrinsic_load_frag_coord:
289   case nir_intrinsic_load_point_coord:
290   case nir_intrinsic_load_front_face:
291      if (!instr->dest.is_ssa)
292         mask = u_bit_consecutive(0, instr->num_components);
293
294      ppir_op op;
295      switch (instr->intrinsic) {
296      case nir_intrinsic_load_frag_coord:
297         op = ppir_op_load_fragcoord;
298         break;
299      case nir_intrinsic_load_point_coord:
300         op = ppir_op_load_pointcoord;
301         break;
302      case nir_intrinsic_load_front_face:
303         op = ppir_op_load_frontface;
304         break;
305      default:
306         unreachable("bad intrinsic");
307         break;
308      }
309
310      lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
311      if (!lnode)
312         return false;
313
314      lnode->num_components = instr->num_components;
315      list_addtail(&lnode->node.list, &block->node_list);
316      return true;
317
318   case nir_intrinsic_load_uniform:
319      if (!instr->dest.is_ssa)
320         mask = u_bit_consecutive(0, instr->num_components);
321
322      lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
323      if (!lnode)
324         return false;
325
326      lnode->num_components = instr->num_components;
327      lnode->index = nir_intrinsic_base(instr);
328      if (nir_src_is_const(instr->src[0]))
329         lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
330      else {
331         lnode->num_src = 1;
332         ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
333      }
334
335      list_addtail(&lnode->node.list, &block->node_list);
336      return true;
337
338   case nir_intrinsic_store_output: {
339      /* In simple cases where the store_output is ssa, that register
340       * can be directly marked as the output.
341       * If discard is used or the source is not ssa, things can get a
342       * lot more complicated, so don't try to optimize those and fall
343       * back to inserting a mov at the end.
344       * If the source node will only be able to output to pipeline
345       * registers, fall back to the mov as well. */
346      assert(nir_src_is_const(instr->src[1]) &&
347             "lima doesn't support indirect outputs");
348
349      nir_io_semantics io = nir_intrinsic_io_semantics(instr);
350      unsigned offset = nir_src_as_uint(instr->src[1]);
351      unsigned slot = io.location + offset;
352      ppir_output_type out_type = ppir_nir_output_to_ppir(slot,
353         block->comp->dual_source_blend ? io.dual_source_blend_index : 0);
354      if (out_type == ppir_output_invalid) {
355         ppir_debug("Unsupported output type: %d\n", slot);
356         return false;
357      }
358
359      if (!block->comp->uses_discard && instr->src->is_ssa) {
360         node = block->comp->var_nodes[instr->src->ssa->index];
361         switch (node->op) {
362         case ppir_op_load_uniform:
363         case ppir_op_load_texture:
364         case ppir_op_const:
365            break;
366         default: {
367            ppir_dest *dest = ppir_node_get_dest(node);
368            dest->ssa.out_type = out_type;
369            node->is_out = 1;
370            return true;
371            }
372         }
373      }
374
375      alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
376      if (!alu_node)
377         return false;
378
379      ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
380      dest->type = ppir_target_ssa;
381      dest->ssa.num_components = instr->num_components;
382      dest->ssa.index = 0;
383      dest->write_mask = u_bit_consecutive(0, instr->num_components);
384      dest->ssa.out_type = out_type;
385
386      alu_node->num_src = 1;
387
388      for (int i = 0; i < instr->num_components; i++)
389         alu_node->src[0].swizzle[i] = i;
390
391      ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
392                        u_bit_consecutive(0, instr->num_components));
393
394      alu_node->node.is_out = 1;
395
396      list_addtail(&alu_node->node.list, &block->node_list);
397      return true;
398   }
399
400   case nir_intrinsic_discard:
401      node = ppir_emit_discard(block, ni);
402      list_addtail(&node->list, &block->node_list);
403      return true;
404
405   case nir_intrinsic_discard_if:
406      node = ppir_emit_discard_if(block, ni);
407      list_addtail(&node->list, &block->node_list);
408      return true;
409
410   default:
411      ppir_error("unsupported nir_intrinsic_instr %s\n",
412                 nir_intrinsic_infos[instr->intrinsic].name);
413      return false;
414   }
415}
416
417static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
418{
419   nir_load_const_instr *instr = nir_instr_as_load_const(ni);
420   ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
421   if (!node)
422      return false;
423
424   assert(instr->def.bit_size == 32);
425
426   for (int i = 0; i < instr->def.num_components; i++)
427      node->constant.value[i].i = instr->value[i].i32;
428   node->constant.num = instr->def.num_components;
429
430   list_addtail(&node->node.list, &block->node_list);
431   return true;
432}
433
434static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
435{
436   nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
437   ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
438   if (!node)
439      return false;
440   ppir_alu_node *alu = ppir_node_to_alu(node);
441
442   ppir_dest *dest = &alu->dest;
443   dest->ssa.undef = true;
444
445   list_addtail(&node->list, &block->node_list);
446   return true;
447}
448
449static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
450{
451   nir_tex_instr *instr = nir_instr_as_tex(ni);
452   ppir_load_texture_node *node;
453
454   switch (instr->op) {
455   case nir_texop_tex:
456   case nir_texop_txb:
457   case nir_texop_txl:
458      break;
459   default:
460      ppir_error("unsupported texop %d\n", instr->op);
461      return false;
462   }
463
464   switch (instr->sampler_dim) {
465   case GLSL_SAMPLER_DIM_1D:
466   case GLSL_SAMPLER_DIM_2D:
467   case GLSL_SAMPLER_DIM_3D:
468   case GLSL_SAMPLER_DIM_CUBE:
469   case GLSL_SAMPLER_DIM_RECT:
470   case GLSL_SAMPLER_DIM_EXTERNAL:
471      break;
472   default:
473      ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
474      return false;
475   }
476
477   /* emit ld_tex node */
478
479   unsigned mask = 0;
480   if (!instr->dest.is_ssa)
481      mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
482
483   node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
484   if (!node)
485      return false;
486
487   node->sampler = instr->texture_index;
488   node->sampler_dim = instr->sampler_dim;
489
490   for (int i = 0; i < instr->coord_components; i++)
491         node->src[0].swizzle[i] = i;
492
493   bool perspective = false;
494
495   for (int i = 0; i < instr->num_srcs; i++) {
496      switch (instr->src[i].src_type) {
497      case nir_tex_src_backend1:
498         perspective = true;
499         FALLTHROUGH;
500      case nir_tex_src_coord: {
501         nir_src *ns = &instr->src[i].src;
502         if (ns->is_ssa) {
503            ppir_node *child = block->comp->var_nodes[ns->ssa->index];
504            if (child->op == ppir_op_load_varying) {
505               /* If the successor is load_texture, promote it to load_coords */
506               nir_tex_src *nts = (nir_tex_src *)ns;
507               if (nts->src_type == nir_tex_src_coord ||
508                   nts->src_type == nir_tex_src_backend1)
509                  child->op = ppir_op_load_coords;
510            }
511         }
512
513         /* src[0] is not used by the ld_tex instruction but ensures
514          * correct scheduling due to the pipeline dependency */
515         ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
516                           u_bit_consecutive(0, instr->coord_components));
517         node->num_src++;
518         break;
519      }
520      case nir_tex_src_bias:
521      case nir_tex_src_lod:
522         node->lod_bias_en = true;
523         node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
524         ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
525         node->num_src++;
526         break;
527      default:
528         ppir_error("unsupported texture source type\n");
529         return false;
530      }
531   }
532
533   list_addtail(&node->node.list, &block->node_list);
534
535   /* validate load coords node */
536
537   ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
538   ppir_load_node *load = NULL;
539
540   if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
541       (src_coords->op == ppir_op_load_coords))
542      load = ppir_node_to_load(src_coords);
543   else {
544      /* Create load_coords node */
545      load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
546      if (!load)
547         return false;
548      list_addtail(&load->node.list, &block->node_list);
549
550      load->src = node->src[0];
551      load->num_src = 1;
552      load->num_components = instr->coord_components;
553
554      ppir_debug("%s create load_coords node %d for %d\n",
555                 __FUNCTION__, load->index, node->node.index);
556
557      ppir_node_foreach_pred_safe((&node->node), dep) {
558         ppir_node *pred = dep->pred;
559         ppir_node_remove_dep(dep);
560         ppir_node_add_dep(&load->node, pred, ppir_dep_src);
561      }
562      ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
563   }
564
565   assert(load);
566
567   if (perspective) {
568      if (instr->coord_components == 3)
569         load->perspective = ppir_perspective_z;
570      else
571         load->perspective = ppir_perspective_w;
572   }
573
574   load->sampler_dim = instr->sampler_dim;
575   node->src[0].type = load->dest.type = ppir_target_pipeline;
576   node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
577
578   return true;
579}
580
581static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
582{
583   ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock);
584
585   return block;
586}
587
588static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
589{
590   ppir_node *node;
591   ppir_compiler *comp = block->comp;
592   ppir_branch_node *branch;
593   ppir_block *jump_block;
594   nir_jump_instr *jump = nir_instr_as_jump(ni);
595
596   switch (jump->type) {
597   case nir_jump_break: {
598      assert(comp->current_block->successors[0]);
599      assert(!comp->current_block->successors[1]);
600      jump_block = comp->current_block->successors[0];
601   }
602   break;
603   case nir_jump_continue:
604      jump_block = comp->loop_cont_block;
605   break;
606   default:
607      ppir_error("nir_jump_instr not support\n");
608      return false;
609   }
610
611   assert(jump_block != NULL);
612
613   node = ppir_node_create(block, ppir_op_branch, -1, 0);
614   if (!node)
615      return false;
616   branch = ppir_node_to_branch(node);
617
618   /* Unconditional */
619   branch->num_src = 0;
620   branch->target = jump_block;
621
622   list_addtail(&node->list, &block->node_list);
623   return true;
624}
625
626static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
627   [nir_instr_type_alu]        = ppir_emit_alu,
628   [nir_instr_type_intrinsic]  = ppir_emit_intrinsic,
629   [nir_instr_type_load_const] = ppir_emit_load_const,
630   [nir_instr_type_ssa_undef]  = ppir_emit_ssa_undef,
631   [nir_instr_type_tex]        = ppir_emit_tex,
632   [nir_instr_type_jump]       = ppir_emit_jump,
633};
634
635static ppir_block *ppir_block_create(ppir_compiler *comp)
636{
637   ppir_block *block = rzalloc(comp, ppir_block);
638   if (!block)
639      return NULL;
640
641   list_inithead(&block->node_list);
642   list_inithead(&block->instr_list);
643
644   block->comp = comp;
645
646   return block;
647}
648
649static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
650{
651   ppir_block *block = ppir_get_block(comp, nblock);
652
653   comp->current_block = block;
654
655   list_addtail(&block->list, &comp->block_list);
656
657   nir_foreach_instr(instr, nblock) {
658      assert(instr->type < nir_instr_type_phi);
659      if (!ppir_emit_instr[instr->type](block, instr))
660         return false;
661   }
662
663   return true;
664}
665
666static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
667
668static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
669{
670   ppir_node *node;
671   ppir_branch_node *else_branch, *after_branch;
672   nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
673   bool empty_else_block =
674      (nir_else_block == nir_if_last_else_block(if_stmt) &&
675      exec_list_is_empty(&nir_else_block->instr_list));
676   ppir_block *block = comp->current_block;
677
678   node = ppir_node_create(block, ppir_op_branch, -1, 0);
679   if (!node)
680      return false;
681   else_branch = ppir_node_to_branch(node);
682   ppir_node_add_src(block->comp, node, &else_branch->src[0],
683                     &if_stmt->condition, 1);
684   else_branch->num_src = 1;
685   /* Negate condition to minimize branching. We're generating following:
686    * current_block: { ...; if (!statement) branch else_block; }
687    * then_block: { ...; branch after_block; }
688    * else_block: { ... }
689    * after_block: { ... }
690    *
691    * or if else list is empty:
692    * block: { if (!statement) branch else_block; }
693    * then_block: { ... }
694    * else_block: after_block: { ... }
695    */
696   else_branch->negate = true;
697   list_addtail(&else_branch->node.list, &block->node_list);
698
699   if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
700      return false;
701
702   if (empty_else_block) {
703      nir_block *nblock = nir_if_last_else_block(if_stmt);
704      assert(nblock->successors[0]);
705      assert(!nblock->successors[1]);
706      else_branch->target = ppir_get_block(comp, nblock->successors[0]);
707      /* Add empty else block to the list */
708      list_addtail(&block->successors[1]->list, &comp->block_list);
709      return true;
710   }
711
712   else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
713
714   nir_block *last_then_block = nir_if_last_then_block(if_stmt);
715   assert(last_then_block->successors[0]);
716   assert(!last_then_block->successors[1]);
717   block = ppir_get_block(comp, last_then_block);
718   node = ppir_node_create(block, ppir_op_branch, -1, 0);
719   if (!node)
720      return false;
721   after_branch = ppir_node_to_branch(node);
722   /* Unconditional */
723   after_branch->num_src = 0;
724   after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
725   /* Target should be after_block, will fixup later */
726   list_addtail(&after_branch->node.list, &block->node_list);
727
728   if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
729      return false;
730
731   return true;
732}
733
734static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
735{
736   ppir_block *save_loop_cont_block = comp->loop_cont_block;
737   ppir_block *block;
738   ppir_branch_node *loop_branch;
739   nir_block *loop_last_block;
740   ppir_node *node;
741
742   comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
743
744   if (!ppir_emit_cf_list(comp, &nloop->body))
745      return false;
746
747   loop_last_block = nir_loop_last_block(nloop);
748   block = ppir_get_block(comp, loop_last_block);
749   node = ppir_node_create(block, ppir_op_branch, -1, 0);
750   if (!node)
751      return false;
752   loop_branch = ppir_node_to_branch(node);
753   /* Unconditional */
754   loop_branch->num_src = 0;
755   loop_branch->target = comp->loop_cont_block;
756   list_addtail(&loop_branch->node.list, &block->node_list);
757
758   comp->loop_cont_block = save_loop_cont_block;
759
760   comp->num_loops++;
761
762   return true;
763}
764
765static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
766{
767   ppir_error("function nir_cf_node not support\n");
768   return false;
769}
770
771static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
772{
773   foreach_list_typed(nir_cf_node, node, node, list) {
774      bool ret;
775
776      switch (node->type) {
777      case nir_cf_node_block:
778         ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
779         break;
780      case nir_cf_node_if:
781         ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
782         break;
783      case nir_cf_node_loop:
784         ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
785         break;
786      case nir_cf_node_function:
787         ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
788         break;
789      default:
790         ppir_error("unknown NIR node type %d\n", node->type);
791         return false;
792      }
793
794      if (!ret)
795         return false;
796   }
797
798   return true;
799}
800
801static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
802{
803   ppir_compiler *comp = rzalloc_size(
804      prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
805   if (!comp)
806      return NULL;
807
808   list_inithead(&comp->block_list);
809   list_inithead(&comp->reg_list);
810   comp->reg_num = 0;
811   comp->blocks = _mesa_hash_table_u64_create(prog);
812
813   comp->var_nodes = (ppir_node **)(comp + 1);
814   comp->reg_base = num_ssa;
815   comp->prog = prog;
816
817   return comp;
818}
819
820static void ppir_add_ordering_deps(ppir_compiler *comp)
821{
822   /* Some intrinsics do not have explicit dependencies and thus depend
823    * on instructions order. Consider discard_if and the is_end node as
824    * example. If we don't add fake dependency of discard_if to is_end,
825    * scheduler may put the is_end first and since is_end terminates
826    * shader on Utgard PP, rest of it will never be executed.
827    * Add fake dependencies for discard/branch/store to preserve
828    * instruction order.
829    *
830    * TODO: scheduler should schedule discard_if as early as possible otherwise
831    * we may end up with suboptimal code for cases like this:
832    *
833    * s3 = s1 < s2
834    * discard_if s3
835    * s4 = s1 + s2
836    * store s4
837    *
838    * In this case store depends on discard_if and s4, but since dependencies can
839    * be scheduled in any order it can result in code like this:
840    *
841    * instr1: s3 = s1 < s3
842    * instr2: s4 = s1 + s2
843    * instr3: discard_if s3
844    * instr4: store s4
845    */
846   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
847      ppir_node *prev_node = NULL;
848      list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
849         if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
850            ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
851         }
852         if (node->is_out ||
853             node->op == ppir_op_discard ||
854             node->op == ppir_op_store_temp ||
855             node->op == ppir_op_branch) {
856            prev_node = node;
857         }
858      }
859   }
860}
861
862static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
863                                 struct util_debug_callback *debug)
864{
865   const struct shader_info *info = &nir->info;
866   char *shaderdb;
867   ASSERTED int ret = asprintf(&shaderdb,
868                               "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
869                               gl_shader_stage_name(info->stage),
870                               comp->cur_instr_index,
871                               comp->num_loops,
872                               comp->num_spills,
873                               comp->num_fills);
874   assert(ret >= 0);
875
876   if (lima_debug & LIMA_DEBUG_SHADERDB)
877      fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
878
879   util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
880   free(shaderdb);
881}
882
883static void ppir_add_write_after_read_deps(ppir_compiler *comp)
884{
885   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
886      list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
887         ppir_node *write = NULL;
888         list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
889            for (int i = 0; i < ppir_node_get_src_num(node); i++) {
890               ppir_src *src = ppir_node_get_src(node, i);
891               if (src && src->type == ppir_target_register &&
892                   src->reg == reg &&
893                   write) {
894                  ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
895                  ppir_node_add_dep(write, node, ppir_dep_write_after_read);
896               }
897            }
898            ppir_dest *dest = ppir_node_get_dest(node);
899            if (dest && dest->type == ppir_target_register &&
900                dest->reg == reg)
901               write = node;
902         }
903      }
904   }
905}
906
907bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
908                      struct ra_regs *ra,
909                      struct util_debug_callback *debug)
910{
911   nir_function_impl *func = nir_shader_get_entrypoint(nir);
912   ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
913   if (!comp)
914      return false;
915
916   comp->ra = ra;
917   comp->uses_discard = nir->info.fs.uses_discard;
918   comp->dual_source_blend = nir->info.fs.color_is_dual_source;
919
920   /* 1st pass: create ppir blocks */
921   nir_foreach_function(function, nir) {
922      if (!function->impl)
923         continue;
924
925      nir_foreach_block(nblock, function->impl) {
926         ppir_block *block = ppir_block_create(comp);
927         if (!block)
928            return false;
929         block->index = nblock->index;
930         _mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block);
931      }
932   }
933
934   /* 2nd pass: populate successors */
935   nir_foreach_function(function, nir) {
936      if (!function->impl)
937         continue;
938
939      nir_foreach_block(nblock, function->impl) {
940         ppir_block *block = ppir_get_block(comp, nblock);
941         assert(block);
942
943         for (int i = 0; i < 2; i++) {
944            if (nblock->successors[i])
945               block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
946         }
947      }
948   }
949
950   comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
951
952   /* -1 means reg is not written by the shader */
953   for (int i = 0; i < ppir_output_num; i++)
954      comp->out_type_to_reg[i] = -1;
955
956   foreach_list_typed(nir_register, reg, node, &func->registers) {
957      ppir_reg *r = rzalloc(comp, ppir_reg);
958      if (!r)
959         return false;
960
961      r->index = reg->index;
962      r->num_components = reg->num_components;
963      r->is_head = false;
964      list_addtail(&r->list, &comp->reg_list);
965      comp->reg_num++;
966   }
967
968   if (!ppir_emit_cf_list(comp, &func->body))
969      goto err_out0;
970
971   /* If we have discard block add it to the very end */
972   if (comp->discard_block)
973      list_addtail(&comp->discard_block->list, &comp->block_list);
974
975   ppir_node_print_prog(comp);
976
977   if (!ppir_lower_prog(comp))
978      goto err_out0;
979
980   ppir_add_ordering_deps(comp);
981   ppir_add_write_after_read_deps(comp);
982
983   ppir_node_print_prog(comp);
984
985   if (!ppir_node_to_instr(comp))
986      goto err_out0;
987
988   if (!ppir_schedule_prog(comp))
989      goto err_out0;
990
991   if (!ppir_regalloc_prog(comp))
992      goto err_out0;
993
994   if (!ppir_codegen_prog(comp))
995      goto err_out0;
996
997   ppir_print_shader_db(nir, comp, debug);
998
999   _mesa_hash_table_u64_destroy(comp->blocks);
1000   ralloc_free(comp);
1001   return true;
1002
1003err_out0:
1004   _mesa_hash_table_u64_destroy(comp->blocks);
1005   ralloc_free(comp);
1006   return false;
1007}
1008
1009