1/*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25#include "util/ralloc.h"
26#include "util/half_float.h"
27#include "util/bitscan.h"
28
29#include "ppir.h"
30#include "codegen.h"
31#include "lima_context.h"
32
33static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift)
34{
35   unsigned ret = 0;
36   for (int i = 0; i < 4; i++)
37      ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2);
38   return ret;
39}
40
41static int get_scl_reg_index(ppir_src *src, int component)
42{
43   int ret = ppir_target_get_src_reg_index(src);
44   ret += src->swizzle[component];
45   return ret;
46}
47
48static void ppir_codegen_encode_varying(ppir_node *node, void *code)
49{
50   ppir_codegen_field_varying *f = code;
51   ppir_load_node *load = ppir_node_to_load(node);
52   ppir_dest *dest = &load->dest;
53   int index = ppir_target_get_dest_reg_index(dest);
54   int num_components = load->num_components;
55
56   if (node->op != ppir_op_load_coords_reg) {
57      assert(node->op == ppir_op_load_varying ||
58             node->op == ppir_op_load_coords ||
59             node->op == ppir_op_load_fragcoord ||
60             node->op == ppir_op_load_pointcoord ||
61             node->op == ppir_op_load_frontface);
62
63      f->imm.dest = index >> 2;
64      f->imm.mask = dest->write_mask << (index & 0x3);
65
66      int alignment = num_components == 3 ? 3 : num_components - 1;
67      f->imm.alignment = alignment;
68
69      if (load->num_src) {
70         index = ppir_target_get_src_reg_index(&load->src);
71         f->imm.offset_vector = index >> 2;
72         f->imm.offset_scalar = index & 0x3;
73      } else
74         f->imm.offset_vector = 0xf;
75
76      if (alignment == 3)
77         f->imm.index = load->index >> 2;
78      else
79         f->imm.index = load->index >> alignment;
80
81      switch (node->op) {
82         case ppir_op_load_fragcoord:
83            f->imm.source_type = 2;
84            f->imm.perspective = 3;
85            break;
86         case ppir_op_load_pointcoord:
87            f->imm.source_type = 3;
88            break;
89         case ppir_op_load_frontface:
90            f->imm.source_type = 3;
91            f->imm.perspective = 1;
92            break;
93         case ppir_op_load_coords:
94            if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
95               f->imm.source_type = 2;
96
97            switch (load->perspective) {
98            case ppir_perspective_none:
99               f->imm.perspective = 0;
100               break;
101            case ppir_perspective_z:
102               f->imm.perspective = 2;
103               break;
104            case ppir_perspective_w:
105               f->imm.perspective = 3;
106               break;
107            }
108            break;
109         default:
110            break;
111      }
112   }
113   else {  /* node->op == ppir_op_load_coords_reg */
114      f->reg.dest = index >> 2;
115      f->reg.mask = dest->write_mask << (index & 0x3);
116
117      if (load->num_src) {
118         if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
119            f->reg.source_type = 2;
120            f->reg.perspective = 1;
121         } else {
122            f->reg.source_type = 1;
123            switch (load->perspective) {
124            case ppir_perspective_none:
125               f->reg.perspective = 0;
126               break;
127            case ppir_perspective_z:
128               f->reg.perspective = 2;
129               break;
130            case ppir_perspective_w:
131               f->reg.perspective = 3;
132               break;
133            }
134         }
135         ppir_src *src = &load->src;
136         index = ppir_target_get_src_reg_index(src);
137         f->reg.source = index >> 2;
138         f->reg.negate = src->negate;
139         f->reg.absolute = src->absolute;
140         f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0);
141      }
142   }
143}
144
145static void ppir_codegen_encode_texld(ppir_node *node, void *code)
146{
147   ppir_codegen_field_sampler *f = code;
148   ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node);
149
150   f->index = ldtex->sampler;
151
152   f->lod_bias_en = ldtex->lod_bias_en;
153   f->explicit_lod = ldtex->explicit_lod;
154   if (ldtex->lod_bias_en)
155      f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]);
156
157   switch (ldtex->sampler_dim) {
158   case GLSL_SAMPLER_DIM_2D:
159   case GLSL_SAMPLER_DIM_3D:
160   case GLSL_SAMPLER_DIM_RECT:
161   case GLSL_SAMPLER_DIM_EXTERNAL:
162      f->type = ppir_codegen_sampler_type_generic;
163      break;
164   case GLSL_SAMPLER_DIM_CUBE:
165      f->type = ppir_codegen_sampler_type_cube;
166      break;
167   default:
168      break;
169   }
170
171   f->offset_en = 0;
172   f->unknown_2 = 0x39001;
173}
174
175static void ppir_codegen_encode_uniform(ppir_node *node, void *code)
176{
177   ppir_codegen_field_uniform *f = code;
178   ppir_load_node *load = ppir_node_to_load(node);
179
180   switch (node->op) {
181      case ppir_op_load_uniform:
182         f->source = ppir_codegen_uniform_src_uniform;
183         break;
184      case ppir_op_load_temp:
185         f->source = ppir_codegen_uniform_src_temporary;
186         break;
187      default:
188         assert(0);
189   }
190
191   /* Uniforms are always aligned to vec4 boundary */
192   f->alignment = 2;
193   f->index = load->index;
194
195   if (load->num_src) {
196      f->offset_en = 1;
197      f->offset_reg = ppir_target_get_src_reg_index(&load->src);
198   }
199}
200
201static ppir_codegen_outmod ppir_codegen_get_outmod(ppir_outmod outmod)
202{
203   switch (outmod) {
204      case ppir_outmod_none:
205         return ppir_codegen_outmod_none;
206      case ppir_outmod_clamp_fraction:
207         return ppir_codegen_outmod_clamp_fraction;
208      case ppir_outmod_clamp_positive:
209         return ppir_codegen_outmod_clamp_positive;
210      case ppir_outmod_round:
211         return ppir_codegen_outmod_round;
212      default:
213         unreachable("invalid ppir_outmod");
214   }
215}
216
217static unsigned shift_to_op(int shift)
218{
219   assert(shift >= -3 && shift <= 3);
220   return shift < 0 ? shift + 8 : shift;
221}
222
223static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
224{
225   ppir_codegen_field_vec4_mul *f = code;
226   ppir_alu_node *alu = ppir_node_to_alu(node);
227
228   ppir_dest *dest = &alu->dest;
229   int dest_shift = 0;
230   if (dest->type != ppir_target_pipeline) {
231      int index = ppir_target_get_dest_reg_index(dest);
232      dest_shift = index & 0x3;
233      f->dest = index >> 2;
234      f->mask = dest->write_mask << dest_shift;
235   }
236   f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
237
238   switch (node->op) {
239   case ppir_op_mul:
240      f->op = shift_to_op(alu->shift);
241      break;
242   case ppir_op_mov:
243      f->op = ppir_codegen_vec4_mul_op_mov;
244      break;
245   case ppir_op_max:
246      f->op = ppir_codegen_vec4_mul_op_max;
247      break;
248   case ppir_op_min:
249      f->op = ppir_codegen_vec4_mul_op_min;
250      break;
251   case ppir_op_and:
252      f->op = ppir_codegen_vec4_mul_op_and;
253      break;
254   case ppir_op_or:
255      f->op = ppir_codegen_vec4_mul_op_or;
256      break;
257   case ppir_op_xor:
258      f->op = ppir_codegen_vec4_mul_op_xor;
259      break;
260   case ppir_op_gt:
261      f->op = ppir_codegen_vec4_mul_op_gt;
262      break;
263   case ppir_op_ge:
264      f->op = ppir_codegen_vec4_mul_op_ge;
265      break;
266   case ppir_op_eq:
267      f->op = ppir_codegen_vec4_mul_op_eq;
268      break;
269   case ppir_op_ne:
270      f->op = ppir_codegen_vec4_mul_op_ne;
271      break;
272   case ppir_op_not:
273      f->op = ppir_codegen_vec4_mul_op_not;
274      break;
275   default:
276      break;
277   }
278
279   ppir_src *src = alu->src;
280   int index = ppir_target_get_src_reg_index(src);
281   f->arg0_source = index >> 2;
282   f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
283   f->arg0_absolute = src->absolute;
284   f->arg0_negate = src->negate;
285
286   if (alu->num_src == 2) {
287      src = alu->src + 1;
288      index = ppir_target_get_src_reg_index(src);
289      f->arg1_source = index >> 2;
290      f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
291      f->arg1_absolute = src->absolute;
292      f->arg1_negate = src->negate;
293   }
294}
295
296static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code)
297{
298   ppir_codegen_field_float_mul *f = code;
299   ppir_alu_node *alu = ppir_node_to_alu(node);
300
301   ppir_dest *dest = &alu->dest;
302   int dest_component = ffs(dest->write_mask) - 1;
303   assert(dest_component >= 0);
304
305   if (dest->type != ppir_target_pipeline) {
306      f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
307      f->output_en = true;
308   }
309   f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
310
311   switch (node->op) {
312   case ppir_op_mul:
313      f->op = shift_to_op(alu->shift);
314      break;
315   case ppir_op_mov:
316      f->op = ppir_codegen_float_mul_op_mov;
317      break;
318   case ppir_op_max:
319      f->op = ppir_codegen_float_mul_op_max;
320      break;
321   case ppir_op_min:
322      f->op = ppir_codegen_float_mul_op_min;
323      break;
324   case ppir_op_and:
325      f->op = ppir_codegen_float_mul_op_and;
326      break;
327   case ppir_op_or:
328      f->op = ppir_codegen_float_mul_op_or;
329      break;
330   case ppir_op_xor:
331      f->op = ppir_codegen_float_mul_op_xor;
332      break;
333   case ppir_op_gt:
334      f->op = ppir_codegen_float_mul_op_gt;
335      break;
336   case ppir_op_ge:
337      f->op = ppir_codegen_float_mul_op_ge;
338      break;
339   case ppir_op_eq:
340      f->op = ppir_codegen_float_mul_op_eq;
341      break;
342   case ppir_op_ne:
343      f->op = ppir_codegen_float_mul_op_ne;
344      break;
345   case ppir_op_not:
346      f->op = ppir_codegen_float_mul_op_not;
347      break;
348   default:
349      break;
350   }
351
352   ppir_src *src = alu->src;
353   f->arg0_source = get_scl_reg_index(src, dest_component);
354   f->arg0_absolute = src->absolute;
355   f->arg0_negate = src->negate;
356
357   if (alu->num_src == 2) {
358      src = alu->src + 1;
359      f->arg1_source = get_scl_reg_index(src, dest_component);
360      f->arg1_absolute = src->absolute;
361      f->arg1_negate = src->negate;
362   }
363}
364
365static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
366{
367   ppir_codegen_field_vec4_acc *f = code;
368   ppir_alu_node *alu = ppir_node_to_alu(node);
369
370   ppir_dest *dest = &alu->dest;
371   int index = ppir_target_get_dest_reg_index(dest);
372   int dest_shift = index & 0x3;
373   f->dest = index >> 2;
374   f->mask = dest->write_mask << dest_shift;
375   f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
376
377   switch (node->op) {
378   case ppir_op_add:
379      f->op = ppir_codegen_vec4_acc_op_add;
380      break;
381   case ppir_op_mov:
382      f->op = ppir_codegen_vec4_acc_op_mov;
383      break;
384   case ppir_op_sum3:
385      f->op = ppir_codegen_vec4_acc_op_sum3;
386      dest_shift = 0;
387      break;
388   case ppir_op_sum4:
389      f->op = ppir_codegen_vec4_acc_op_sum4;
390      dest_shift = 0;
391      break;
392   case ppir_op_floor:
393      f->op = ppir_codegen_vec4_acc_op_floor;
394      break;
395   case ppir_op_ceil:
396      f->op = ppir_codegen_vec4_acc_op_ceil;
397      break;
398   case ppir_op_fract:
399      f->op = ppir_codegen_vec4_acc_op_fract;
400      break;
401   case ppir_op_gt:
402      f->op = ppir_codegen_vec4_acc_op_gt;
403      break;
404   case ppir_op_ge:
405      f->op = ppir_codegen_vec4_acc_op_ge;
406      break;
407   case ppir_op_eq:
408      f->op = ppir_codegen_vec4_acc_op_eq;
409      break;
410   case ppir_op_ne:
411      f->op = ppir_codegen_vec4_acc_op_ne;
412      break;
413   case ppir_op_select:
414      f->op = ppir_codegen_vec4_acc_op_sel;
415      break;
416   case ppir_op_max:
417      f->op = ppir_codegen_vec4_acc_op_max;
418      break;
419   case ppir_op_min:
420      f->op = ppir_codegen_vec4_acc_op_min;
421      break;
422   case ppir_op_ddx:
423      f->op = ppir_codegen_vec4_acc_op_dFdx;
424      break;
425   case ppir_op_ddy:
426      f->op = ppir_codegen_vec4_acc_op_dFdy;
427      break;
428   default:
429      break;
430   }
431
432   ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src;
433   index = ppir_target_get_src_reg_index(src);
434
435   if (src->type == ppir_target_pipeline &&
436       src->pipeline == ppir_pipeline_reg_vmul)
437      f->mul_in = true;
438   else
439      f->arg0_source = index >> 2;
440
441   f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
442   f->arg0_absolute = src->absolute;
443   f->arg0_negate = src->negate;
444
445   if (++src < alu->src + alu->num_src) {
446      index = ppir_target_get_src_reg_index(src);
447      f->arg1_source = index >> 2;
448      f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift);
449      f->arg1_absolute = src->absolute;
450      f->arg1_negate = src->negate;
451   }
452}
453
454static void ppir_codegen_encode_scl_add(ppir_node *node, void *code)
455{
456   ppir_codegen_field_float_acc *f = code;
457   ppir_alu_node *alu = ppir_node_to_alu(node);
458
459   ppir_dest *dest = &alu->dest;
460   int dest_component = ffs(dest->write_mask) - 1;
461   assert(dest_component >= 0);
462
463   f->dest = ppir_target_get_dest_reg_index(dest) + dest_component;
464   f->output_en = true;
465   f->dest_modifier = ppir_codegen_get_outmod(dest->modifier);
466
467   switch (node->op) {
468   case ppir_op_add:
469      f->op = shift_to_op(alu->shift);
470      break;
471   case ppir_op_mov:
472      f->op = ppir_codegen_float_acc_op_mov;
473      break;
474   case ppir_op_max:
475      f->op = ppir_codegen_float_acc_op_max;
476      break;
477   case ppir_op_min:
478      f->op = ppir_codegen_float_acc_op_min;
479      break;
480   case ppir_op_floor:
481      f->op = ppir_codegen_float_acc_op_floor;
482      break;
483   case ppir_op_ceil:
484      f->op = ppir_codegen_float_acc_op_ceil;
485      break;
486   case ppir_op_fract:
487      f->op = ppir_codegen_float_acc_op_fract;
488      break;
489   case ppir_op_gt:
490      f->op = ppir_codegen_float_acc_op_gt;
491      break;
492   case ppir_op_ge:
493      f->op = ppir_codegen_float_acc_op_ge;
494      break;
495   case ppir_op_eq:
496      f->op = ppir_codegen_float_acc_op_eq;
497      break;
498   case ppir_op_ne:
499      f->op = ppir_codegen_float_acc_op_ne;
500      break;
501   case ppir_op_select:
502      f->op = ppir_codegen_float_acc_op_sel;
503      break;
504   case ppir_op_ddx:
505      f->op = ppir_codegen_float_acc_op_dFdx;
506      break;
507   case ppir_op_ddy:
508      f->op = ppir_codegen_float_acc_op_dFdy;
509      break;
510   default:
511      break;
512   }
513
514   ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src;
515   if (src->type == ppir_target_pipeline &&
516       src->pipeline == ppir_pipeline_reg_fmul)
517      f->mul_in = true;
518   else
519      f->arg0_source = get_scl_reg_index(src, dest_component);
520   f->arg0_absolute = src->absolute;
521   f->arg0_negate = src->negate;
522
523   if (++src < alu->src + alu->num_src) {
524      f->arg1_source = get_scl_reg_index(src, dest_component);
525      f->arg1_absolute = src->absolute;
526      f->arg1_negate = src->negate;
527   }
528}
529
530static void ppir_codegen_encode_combine(ppir_node *node, void *code)
531{
532   ppir_codegen_field_combine *f = code;
533   ppir_alu_node *alu = ppir_node_to_alu(node);
534
535   switch (node->op) {
536   case ppir_op_rsqrt:
537   case ppir_op_log2:
538   case ppir_op_exp2:
539   case ppir_op_rcp:
540   case ppir_op_sqrt:
541   case ppir_op_sin:
542   case ppir_op_cos:
543   {
544      f->scalar.dest_vec = false;
545      f->scalar.arg1_en = false;
546
547      ppir_dest *dest = &alu->dest;
548      int dest_component = ffs(dest->write_mask) - 1;
549      assert(dest_component >= 0);
550      f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component;
551      f->scalar.dest_modifier = ppir_codegen_get_outmod(dest->modifier);
552
553      ppir_src *src = alu->src;
554      f->scalar.arg0_src = get_scl_reg_index(src, dest_component);
555      f->scalar.arg0_absolute = src->absolute;
556      f->scalar.arg0_negate = src->negate;
557
558      switch (node->op) {
559      case ppir_op_rsqrt:
560         f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt;
561         break;
562      case ppir_op_log2:
563         f->scalar.op = ppir_codegen_combine_scalar_op_log2;
564         break;
565      case ppir_op_exp2:
566         f->scalar.op = ppir_codegen_combine_scalar_op_exp2;
567         break;
568      case ppir_op_rcp:
569         f->scalar.op = ppir_codegen_combine_scalar_op_rcp;
570         break;
571      case ppir_op_sqrt:
572         f->scalar.op = ppir_codegen_combine_scalar_op_sqrt;
573         break;
574      case ppir_op_sin:
575         f->scalar.op = ppir_codegen_combine_scalar_op_sin;
576         break;
577      case ppir_op_cos:
578         f->scalar.op = ppir_codegen_combine_scalar_op_cos;
579         break;
580      default:
581         break;
582      }
583      break;
584   }
585   default:
586      break;
587   }
588}
589
590static void ppir_codegen_encode_store_temp(ppir_node *node, void *code)
591{
592   assert(node->op == ppir_op_store_temp);
593
594   ppir_codegen_field_temp_write *f = code;
595   ppir_store_node *snode = ppir_node_to_store(node);
596   int num_components = snode->num_components;
597
598   f->temp_write.dest = 0x03; // 11 - temporary
599   f->temp_write.source = snode->src.reg->index;
600
601   int alignment = num_components == 4 ? 2 : num_components - 1;
602   f->temp_write.alignment = alignment;
603   f->temp_write.index = snode->index << (2 - alignment);
604
605   f->temp_write.offset_reg = snode->index >> 2;
606}
607
608static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
609{
610   for (int i = 0; i < constant->num; i++)
611      code[i] = _mesa_float_to_half(constant->value[i].f);
612}
613
614static void ppir_codegen_encode_discard(ppir_node *node, void *code)
615{
616   ppir_codegen_field_branch *b = code;
617   assert(node->op == ppir_op_discard);
618
619   b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
620   b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
621   b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
622}
623
624static void ppir_codegen_encode_branch(ppir_node *node, void *code)
625{
626   ppir_codegen_field_branch *b = code;
627   ppir_branch_node *branch;
628   ppir_instr *target_instr;
629   ppir_block *target;
630   if (node->op == ppir_op_discard) {
631      ppir_codegen_encode_discard(node, code);
632      return;
633   }
634
635   assert(node->op == ppir_op_branch);
636   branch = ppir_node_to_branch(node);
637
638   b->branch.unknown_0 = 0x0;
639   b->branch.unknown_1 = 0x0;
640
641   if (branch->num_src == 2) {
642      b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0);
643      b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0);
644      b->branch.cond_gt = branch->cond_gt;
645      b->branch.cond_eq = branch->cond_eq;
646      b->branch.cond_lt = branch->cond_lt;
647   } else if (branch->num_src == 0) {
648      /* Unconditional branch */
649      b->branch.arg0_source = 0;
650      b->branch.arg1_source = 0;
651      b->branch.cond_gt = true;
652      b->branch.cond_eq = true;
653      b->branch.cond_lt = true;
654   } else {
655      assert(false);
656   }
657
658   target = branch->target;
659   while (list_is_empty(&target->instr_list)) {
660      if (!target->list.next)
661         break;
662      target = list_entry(target->list.next, ppir_block, list);
663   }
664
665   assert(!list_is_empty(&target->instr_list));
666
667   target_instr = list_first_entry(&target->instr_list, ppir_instr, list);
668   b->branch.target = target_instr->offset - node->instr->offset;
669   b->branch.next_count = target_instr->encode_size;
670}
671
672typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
673
674static const ppir_codegen_instr_slot_encode_func
675ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
676   [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying,
677   [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld,
678   [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform,
679   [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul,
680   [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul,
681   [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add,
682   [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
683   [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
684   [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
685   [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
686};
687
688static const int ppir_codegen_field_size[] = {
689   34, 62, 41, 43, 30, 44, 31, 30, 41, 73
690};
691
692static inline int align_to_word(int size)
693{
694   return ((size + 0x1f) >> 5);
695}
696
697static int get_instr_encode_size(ppir_instr *instr)
698{
699   int size = 0;
700
701   for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
702      if (instr->slots[i])
703         size += ppir_codegen_field_size[i];
704   }
705
706   for (int i = 0; i < 2; i++) {
707      if (instr->constant[i].num)
708         size += 64;
709   }
710
711   return align_to_word(size) + 1;
712}
713
714static void bitcopy(void *dst, int dst_offset, void *src, int src_size)
715{
716   unsigned char *cpy_dst = dst, *cpy_src = src;
717   int off1 = dst_offset & 0x07;
718
719   cpy_dst += (dst_offset >> 3);
720
721   if (off1) {
722      int off2 = 0x08 - off1;
723      int cpy_size = 0;
724      while (1) {
725         *cpy_dst |= *cpy_src << off1;
726         cpy_dst++;
727
728         cpy_size += off2;
729         if (cpy_size >= src_size)
730            break;
731
732         *cpy_dst |= *cpy_src >> off2;
733         cpy_src++;
734
735         cpy_size += off1;
736         if (cpy_size >= src_size)
737            break;
738      }
739   }
740   else
741      memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4);
742}
743
744static int encode_instr(ppir_instr *instr, void *code, void *last_code)
745{
746   int size = 0;
747   ppir_codegen_ctrl *ctrl = code;
748
749   for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
750      if (instr->slots[i]) {
751         /* max field size (73), align to dword */
752         uint8_t output[12] = {0};
753
754         ppir_codegen_encode_slot[i](instr->slots[i], output);
755         bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]);
756
757         size += ppir_codegen_field_size[i];
758         ctrl->fields |= 1 << i;
759      }
760   }
761
762   if (instr->slots[PPIR_INSTR_SLOT_TEXLD])
763      ctrl->sync = true;
764
765   if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) {
766      ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD];
767      if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
768         ctrl->sync = true;
769   }
770
771   if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) {
772      ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD];
773      if (node->op == ppir_op_ddx || node->op == ppir_op_ddy)
774         ctrl->sync = true;
775   }
776
777   for (int i = 0; i < 2; i++) {
778      if (instr->constant[i].num) {
779         uint16_t output[4] = {0};
780
781         ppir_codegen_encode_const(instr->constant + i, output);
782         bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16);
783
784         size += 64;
785         ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i);
786      }
787   }
788
789   size = align_to_word(size) + 1;
790
791   ctrl->count = size;
792   if (instr->stop)
793      ctrl->stop = true;
794
795   if (last_code) {
796      ppir_codegen_ctrl *last_ctrl = last_code;
797      last_ctrl->next_count = size;
798      last_ctrl->prefetch = true;
799   }
800
801   return size;
802}
803
804static void ppir_codegen_print_prog(ppir_compiler *comp)
805{
806   uint32_t *prog = comp->prog->shader;
807   unsigned offset = 0;
808
809   printf("========ppir codegen========\n");
810   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
811      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
812         printf("%03d (@%6d): ", instr->index, instr->offset);
813         int n = prog[0] & 0x1f;
814         for (int i = 0; i < n; i++) {
815            if (i && i % 6 == 0)
816               printf("\n    ");
817            printf("%08x ", prog[i]);
818         }
819         printf("\n");
820         ppir_disassemble_instr(prog, offset, stdout);
821         prog += n;
822         offset += n;
823      }
824   }
825   printf("-----------------------\n");
826}
827
828bool ppir_codegen_prog(ppir_compiler *comp)
829{
830   int size = 0;
831   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
832      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
833         instr->offset = size;
834         instr->encode_size = get_instr_encode_size(instr);
835         size += instr->encode_size;
836      }
837      /* Set stop flag for the last instruction if block has stop flag */
838      if (block->stop) {
839         ppir_instr *instr = list_last_entry(&block->instr_list, ppir_instr, list);
840         instr->stop = true;
841      }
842   }
843
844   uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t));
845   if (!prog)
846      return false;
847
848   uint32_t *code = prog, *last_code = NULL;
849   list_for_each_entry(ppir_block, block, &comp->block_list, list) {
850      list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
851         int offset = encode_instr(instr, code, last_code);
852         last_code = code;
853         code += offset;
854      }
855   }
856
857   if (comp->prog->shader)
858      ralloc_free(comp->prog->shader);
859
860   comp->prog->shader = prog;
861   comp->prog->state.shader_size = size * sizeof(uint32_t);
862
863   if (lima_debug & LIMA_DEBUG_PP)
864      ppir_codegen_print_prog(comp);
865
866   return true;
867}
868