1/*
2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#ifndef __AGX_COMPILER_H
26#define __AGX_COMPILER_H
27
28#include "compiler/nir/nir.h"
29#include "util/u_math.h"
30#include "util/half_float.h"
31#include "util/u_dynarray.h"
32#include "util/u_worklist.h"
33#include "agx_compile.h"
34#include "agx_opcodes.h"
35#include "agx_minifloat.h"
36
37#ifdef __cplusplus
38extern "C" {
39#endif
40
41enum agx_dbg {
42   AGX_DBG_MSGS        = BITFIELD_BIT(0),
43   AGX_DBG_SHADERS     = BITFIELD_BIT(1),
44   AGX_DBG_SHADERDB    = BITFIELD_BIT(2),
45   AGX_DBG_VERBOSE     = BITFIELD_BIT(3),
46   AGX_DBG_INTERNAL    = BITFIELD_BIT(4),
47   AGX_DBG_NOVALIDATE  = BITFIELD_BIT(5),
48};
49
50extern int agx_debug;
51
52/* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
53#define AGX_NUM_REGS (256)
54
55enum agx_index_type {
56   AGX_INDEX_NULL = 0,
57   AGX_INDEX_NORMAL = 1,
58   AGX_INDEX_IMMEDIATE = 2,
59   AGX_INDEX_UNIFORM = 3,
60   AGX_INDEX_REGISTER = 4,
61};
62
63enum agx_size {
64   AGX_SIZE_16 = 0,
65   AGX_SIZE_32 = 1,
66   AGX_SIZE_64 = 2
67};
68
69static inline unsigned
70agx_size_align_16(enum agx_size size)
71{
72   switch (size) {
73   case AGX_SIZE_16: return 1;
74   case AGX_SIZE_32: return 2;
75   case AGX_SIZE_64: return 4;
76   }
77
78   unreachable("Invalid size");
79}
80
81typedef struct {
82   /* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */
83   unsigned value : 22;
84
85   /* Indicates that this source kills the referenced value (because it is the
86    * last use in a block and the source is not live after the block). Set by
87    * liveness analysis. */
88   bool kill : 1;
89
90   /* Cache hints */
91   bool cache : 1;
92   bool discard : 1;
93
94   /* src - float modifiers */
95   bool abs : 1;
96   bool neg : 1;
97
98   enum agx_size size : 2;
99   enum agx_index_type type : 3;
100} agx_index;
101
102static inline agx_index
103agx_get_index(unsigned value, enum agx_size size)
104{
105   return (agx_index) {
106      .value = value,
107      .size = size,
108      .type = AGX_INDEX_NORMAL,
109   };
110}
111
112static inline agx_index
113agx_immediate(uint16_t imm)
114{
115   return (agx_index) {
116      .value = imm,
117      .size = AGX_SIZE_32,
118      .type = AGX_INDEX_IMMEDIATE,
119   };
120}
121
122static inline agx_index
123agx_immediate_f(float f)
124{
125   assert(agx_minifloat_exact(f));
126   return agx_immediate(agx_minifloat_encode(f));
127}
128
129/* in half-words, specify r0h as 1, r1 as 2... */
130static inline agx_index
131agx_register(uint8_t imm, enum agx_size size)
132{
133   return (agx_index) {
134      .value = imm,
135      .size = size,
136      .type = AGX_INDEX_REGISTER,
137   };
138}
139
140/* Also in half-words */
141static inline agx_index
142agx_uniform(uint8_t imm, enum agx_size size)
143{
144   return (agx_index) {
145      .value = imm,
146      .size = size,
147      .type = AGX_INDEX_UNIFORM,
148   };
149}
150
151static inline agx_index
152agx_null()
153{
154   return (agx_index) { .type = AGX_INDEX_NULL };
155}
156
157static inline agx_index
158agx_zero()
159{
160   return agx_immediate(0);
161}
162
163/* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
164 * = exponent = 0, sign bit set */
165
166static inline agx_index
167agx_negzero()
168{
169   return agx_immediate(0x80);
170}
171
172static inline agx_index
173agx_abs(agx_index idx)
174{
175   idx.abs = true;
176   idx.neg = false;
177   return idx;
178}
179
180static inline agx_index
181agx_neg(agx_index idx)
182{
183   idx.neg ^= true;
184   return idx;
185}
186
187/* Replaces an index, preserving any modifiers */
188
189static inline agx_index
190agx_replace_index(agx_index old, agx_index replacement)
191{
192   replacement.abs = old.abs;
193   replacement.neg = old.neg;
194   return replacement;
195}
196
197static inline bool
198agx_is_null(agx_index idx)
199{
200   return idx.type == AGX_INDEX_NULL;
201}
202
203/* Compares equivalence as references */
204
205static inline bool
206agx_is_equiv(agx_index left, agx_index right)
207{
208   return (left.type == right.type) && (left.value == right.value);
209}
210
211#define AGX_MAX_DESTS 4
212#define AGX_MAX_SRCS 5
213
214enum agx_icond {
215   AGX_ICOND_UEQ = 0,
216   AGX_ICOND_ULT = 1,
217   AGX_ICOND_UGT = 2,
218   /* unknown */
219   AGX_ICOND_SEQ = 4,
220   AGX_ICOND_SLT = 5,
221   AGX_ICOND_SGT = 6,
222   /* unknown */
223};
224
225enum agx_fcond {
226   AGX_FCOND_EQ = 0,
227   AGX_FCOND_LT = 1,
228   AGX_FCOND_GT = 2,
229   AGX_FCOND_LTN = 3,
230   /* unknown */
231   AGX_FCOND_GE = 5,
232   AGX_FCOND_LE = 6,
233   AGX_FCOND_GTN = 7,
234};
235
236enum agx_round {
237   AGX_ROUND_RTZ = 0,
238   AGX_ROUND_RTE = 1,
239};
240
241enum agx_convert {
242   AGX_CONVERT_U8_TO_F = 0,
243   AGX_CONVERT_S8_TO_F = 1,
244   AGX_CONVERT_F_TO_U16 = 4,
245   AGX_CONVERT_F_TO_S16 = 5,
246   AGX_CONVERT_U16_TO_F = 6,
247   AGX_CONVERT_S16_TO_F = 7,
248   AGX_CONVERT_F_TO_U32 = 8,
249   AGX_CONVERT_F_TO_S32 = 9,
250   AGX_CONVERT_U32_TO_F = 10,
251   AGX_CONVERT_S32_TO_F = 11
252};
253
254enum agx_lod_mode {
255   AGX_LOD_MODE_AUTO_LOD = 0,
256   AGX_LOD_MODE_AUTO_LOD_BIAS = 5,
257   AGX_LOD_MODE_LOD_MIN = 6,
258   AGX_LOD_GRAD = 8,
259   AGX_LOD_GRAD_MIN = 12
260};
261
262enum agx_dim {
263   AGX_DIM_TEX_1D = 0,
264   AGX_DIM_TEX_1D_ARRAY = 1,
265   AGX_DIM_TEX_2D = 2,
266   AGX_DIM_TEX_2D_ARRAY = 3,
267   AGX_DIM_TEX_2D_MS = 4,
268   AGX_DIM_TEX_3D = 5,
269   AGX_DIM_TEX_CUBE = 6,
270   AGX_DIM_TEX_CUBE_ARRAY = 7
271};
272
273/* Forward declare for branch target */
274struct agx_block;
275
276typedef struct {
277   /* Must be first */
278   struct list_head link;
279
280   /* The sources list.
281    *
282    * As a special case to workaround ordering issues when translating phis, if
283    * nr_srcs == 0 and the opcode is PHI, holds a pointer to the NIR phi node.
284    */
285   union {
286      agx_index *src;
287      nir_phi_instr *phi;
288   };
289
290   enum agx_opcode op;
291
292   /* Data flow */
293   agx_index dest[AGX_MAX_DESTS];
294
295   unsigned nr_srcs;
296
297   union {
298      uint32_t imm;
299      uint32_t writeout;
300      uint32_t truth_table;
301      uint32_t component;
302      uint32_t channels;
303      uint32_t bfi_mask;
304      enum agx_sr sr;
305      enum agx_icond icond;
306      enum agx_fcond fcond;
307      enum agx_format format;
308      enum agx_round round;
309      enum agx_lod_mode lod_mode;
310      struct agx_block *target;
311   };
312
313   /* For load varying */
314   bool perspective : 1;
315
316   /* Invert icond/fcond */
317   bool invert_cond : 1;
318
319   /* TODO: Handle tex ops more efficient */
320   enum agx_dim dim : 3;
321
322   /* Final st_vary op */
323   bool last : 1;
324
325   /* Shift for a bitwise or memory op (conflicts with format for memory ops) */
326   unsigned shift : 4;
327
328   /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
329    * scoreboarding (everything but memory load/store and texturing). */
330   unsigned scoreboard : 1;
331
332   /* Number of nested control flow layers to jump by */
333   unsigned nest : 2;
334
335   /* Output modifiers */
336   bool saturate : 1;
337   unsigned mask : 4;
338} agx_instr;
339
340struct agx_block;
341
342typedef struct agx_block {
343   /* Link to next block. Must be first */
344   struct list_head link;
345
346   /* List of instructions emitted for the current block */
347   struct list_head instructions;
348
349   /* Index of the block in source order */
350   unsigned index;
351
352   /* Control flow graph */
353   struct agx_block *successors[2];
354   struct util_dynarray predecessors;
355   bool unconditional_jumps;
356
357   /* Liveness analysis results */
358   BITSET_WORD *live_in;
359   BITSET_WORD *live_out;
360
361   /* Register allocation */
362   BITSET_DECLARE(regs_out, AGX_NUM_REGS);
363
364   /* Offset of the block in the emitted binary */
365   off_t offset;
366
367   /** Available for passes to use for metadata */
368   uint8_t pass_flags;
369} agx_block;
370
371typedef struct {
372   nir_shader *nir;
373   gl_shader_stage stage;
374   struct list_head blocks; /* list of agx_block */
375   struct agx_shader_info *out;
376   struct agx_shader_key *key;
377
378   /* Remapping table for varyings indexed by driver_location */
379   unsigned varyings[AGX_MAX_VARYINGS];
380
381   /* Place to start pushing new values */
382   unsigned push_base;
383
384   /* Maximum block index */
385   unsigned num_blocks;
386
387   /* For creating temporaries */
388   unsigned alloc;
389
390   /* I don't really understand how writeout ops work yet */
391   bool did_writeout;
392
393   /* Has r0l been zeroed yet due to control flow? */
394   bool any_cf;
395
396   /* Number of nested control flow structures within the innermost loop. Since
397    * NIR is just loop and if-else, this is the number of nested if-else
398    * statements in the loop */
399   unsigned loop_nesting;
400
401   /* During instruction selection, for inserting control flow */
402   agx_block *current_block;
403   agx_block *continue_block;
404   agx_block *break_block;
405   agx_block *after_block;
406   agx_block **indexed_nir_blocks;
407
408   /* During instruction selection, map from vector agx_index to its scalar
409    * components, populated by a split. */
410   struct hash_table_u64 *allocated_vec;
411
412   /* Stats for shader-db */
413   unsigned loop_count;
414   unsigned spills;
415   unsigned fills;
416} agx_context;
417
418static inline void
419agx_remove_instruction(agx_instr *ins)
420{
421   list_del(&ins->link);
422}
423
424static inline agx_index
425agx_temp(agx_context *ctx, enum agx_size size)
426{
427   return agx_get_index(ctx->alloc++, size);
428}
429
430static enum agx_size
431agx_size_for_bits(unsigned bits)
432{
433   switch (bits) {
434   case 1:
435   case 16: return AGX_SIZE_16;
436   case 32: return AGX_SIZE_32;
437   case 64: return AGX_SIZE_64;
438   default: unreachable("Invalid bitsize");
439   }
440}
441
442static inline agx_index
443agx_src_index(nir_src *src)
444{
445   assert(src->is_ssa);
446
447   return agx_get_index(src->ssa->index,
448         agx_size_for_bits(nir_src_bit_size(*src)));
449}
450
451static inline agx_index
452agx_dest_index(nir_dest *dst)
453{
454   assert(dst->is_ssa);
455
456   return agx_get_index(dst->ssa.index,
457         agx_size_for_bits(nir_dest_bit_size(*dst)));
458}
459
460static inline agx_index
461agx_vec_for_dest(agx_context *ctx, nir_dest *dest)
462{
463   return agx_temp(ctx, agx_size_for_bits(nir_dest_bit_size(*dest)));
464}
465
466static inline agx_index
467agx_vec_for_intr(agx_context *ctx, nir_intrinsic_instr *instr)
468{
469   return agx_vec_for_dest(ctx, &instr->dest);
470}
471
472/* Iterators for AGX IR */
473
474#define agx_foreach_block(ctx, v) \
475   list_for_each_entry(agx_block, v, &ctx->blocks, link)
476
477#define agx_foreach_block_rev(ctx, v) \
478   list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
479
480#define agx_foreach_block_from(ctx, from, v) \
481   list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
482
483#define agx_foreach_block_from_rev(ctx, from, v) \
484   list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
485
486#define agx_foreach_instr_in_block(block, v) \
487   list_for_each_entry(agx_instr, v, &(block)->instructions, link)
488
489#define agx_foreach_instr_in_block_rev(block, v) \
490   list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
491
492#define agx_foreach_instr_in_block_safe(block, v) \
493   list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
494
495#define agx_foreach_instr_in_block_safe_rev(block, v) \
496   list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
497
498#define agx_foreach_instr_in_block_from(block, v, from) \
499   list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
500
501#define agx_foreach_instr_in_block_from_rev(block, v, from) \
502   list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, link)
503
504#define agx_foreach_instr_global(ctx, v) \
505   agx_foreach_block(ctx, v_block) \
506      agx_foreach_instr_in_block(v_block, v)
507
508#define agx_foreach_instr_global_rev(ctx, v) \
509   agx_foreach_block_rev(ctx, v_block) \
510      agx_foreach_instr_in_block_rev(v_block, v)
511
512#define agx_foreach_instr_global_safe(ctx, v) \
513   agx_foreach_block(ctx, v_block) \
514      agx_foreach_instr_in_block_safe(v_block, v)
515
516#define agx_foreach_instr_global_safe_rev(ctx, v) \
517   agx_foreach_block_rev(ctx, v_block) \
518      agx_foreach_instr_in_block_safe_rev(v_block, v)
519
520/* Based on set_foreach, expanded with automatic type casts */
521
522#define agx_foreach_successor(blk, v) \
523   agx_block *v; \
524   agx_block **_v; \
525   for (_v = (agx_block **) &blk->successors[0], \
526         v = *_v; \
527         v != NULL && _v < (agx_block **) &blk->successors[2]; \
528         _v++, v = *_v) \
529
530#define agx_foreach_predecessor(blk, v) \
531   util_dynarray_foreach(&blk->predecessors, agx_block *, v)
532
533#define agx_foreach_src(ins, v) \
534   for (unsigned v = 0; v < ins->nr_srcs; ++v)
535
536#define agx_foreach_dest(ins, v) \
537   for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)
538
539/*
540 * Find the index of a predecessor, used as the implicit order of phi sources.
541 */
542static inline unsigned
543agx_predecessor_index(agx_block *succ, agx_block *pred)
544{
545   unsigned index = 0;
546
547   agx_foreach_predecessor(succ, x) {
548      if (*x == pred) return index;
549
550      index++;
551   }
552
553   unreachable("Invalid predecessor");
554}
555
556static inline agx_instr *
557agx_prev_op(agx_instr *ins)
558{
559   return list_last_entry(&(ins->link), agx_instr, link);
560}
561
562static inline agx_instr *
563agx_next_op(agx_instr *ins)
564{
565   return list_first_entry(&(ins->link), agx_instr, link);
566}
567
568static inline agx_block *
569agx_next_block(agx_block *block)
570{
571   return list_first_entry(&(block->link), agx_block, link);
572}
573
574static inline agx_block *
575agx_exit_block(agx_context *ctx)
576{
577   agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
578   assert(!last->successors[0] && !last->successors[1]);
579   return last;
580}
581
582#define agx_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
583#define agx_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
584#define agx_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
585#define agx_worklist_peek_head(w) u_worklist_peek_head(w, agx_block, index)
586#define agx_worklist_pop_head(w)  u_worklist_pop_head( w, agx_block, index)
587#define agx_worklist_peek_tail(w) u_worklist_peek_tail(w, agx_block, index)
588#define agx_worklist_pop_tail(w)  u_worklist_pop_tail( w, agx_block, index)
589
590/* Like in NIR, for use with the builder */
591
592enum agx_cursor_option {
593   agx_cursor_after_block,
594   agx_cursor_before_instr,
595   agx_cursor_after_instr
596};
597
598typedef struct {
599   enum agx_cursor_option option;
600
601   union {
602      agx_block *block;
603      agx_instr *instr;
604   };
605} agx_cursor;
606
607static inline agx_cursor
608agx_after_block(agx_block *block)
609{
610   return (agx_cursor) {
611      .option = agx_cursor_after_block,
612      .block = block
613   };
614}
615
616static inline agx_cursor
617agx_before_instr(agx_instr *instr)
618{
619   return (agx_cursor) {
620      .option = agx_cursor_before_instr,
621      .instr = instr
622   };
623}
624
625static inline agx_cursor
626agx_after_instr(agx_instr *instr)
627{
628   return (agx_cursor) {
629      .option = agx_cursor_after_instr,
630      .instr = instr
631   };
632}
633
634/*
635 * Get a cursor inserting at the logical end of the block. In particular, this
636 * is before branches or control flow instructions, which occur after the
637 * logical end but before the physical end.
638 */
639static inline agx_cursor
640agx_after_block_logical(agx_block *block)
641{
642   /* Search for a p_logical_end */
643   agx_foreach_instr_in_block_rev(block, I) {
644      if (I->op == AGX_OPCODE_P_LOGICAL_END)
645         return agx_before_instr(I);
646   }
647
648   /* If there's no p_logical_end, use the physical end */
649   return agx_after_block(block);
650}
651
652/* IR builder in terms of cursor infrastructure */
653
654typedef struct {
655   agx_context *shader;
656   agx_cursor cursor;
657} agx_builder;
658
659static inline agx_builder
660agx_init_builder(agx_context *ctx, agx_cursor cursor)
661{
662   return (agx_builder) {
663      .shader = ctx,
664      .cursor = cursor
665   };
666}
667
668/* Insert an instruction at the cursor and move the cursor */
669
670static inline void
671agx_builder_insert(agx_cursor *cursor, agx_instr *I)
672{
673   switch (cursor->option) {
674   case agx_cursor_after_instr:
675      list_add(&I->link, &cursor->instr->link);
676      cursor->instr = I;
677      return;
678
679   case agx_cursor_after_block:
680      list_addtail(&I->link, &cursor->block->instructions);
681      cursor->option = agx_cursor_after_instr;
682      cursor->instr = I;
683      return;
684
685   case agx_cursor_before_instr:
686      list_addtail(&I->link, &cursor->instr->link);
687      cursor->option = agx_cursor_after_instr;
688      cursor->instr = I;
689      return;
690   }
691
692   unreachable("Invalid cursor option");
693}
694
695/* Uniform file management */
696
697agx_index
698agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, enum agx_size size,
699      unsigned index, unsigned length);
700
701/* Routines defined for AIR */
702
703void agx_print_instr(agx_instr *I, FILE *fp);
704void agx_print_block(agx_block *block, FILE *fp);
705void agx_print_shader(agx_context *ctx, FILE *fp);
706void agx_optimizer(agx_context *ctx);
707void agx_lower_pseudo(agx_context *ctx);
708void agx_dce(agx_context *ctx);
709void agx_ra(agx_context *ctx);
710void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
711
712#ifndef NDEBUG
713void agx_validate(agx_context *ctx, const char *after_str);
714#else
715static inline void agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str) { return; }
716#endif
717
718unsigned agx_write_registers(agx_instr *I, unsigned d);
719
720struct agx_copy {
721   /* Base register destination of the copy */
722   unsigned dest;
723
724   /* Base register source of the copy */
725   unsigned src;
726
727   /* Size of the copy */
728   enum agx_size size;
729
730   /* Whether the copy has been handled. Callers must leave to false. */
731   bool done;
732};
733
734void
735agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies, unsigned n);
736
737void agx_compute_liveness(agx_context *ctx);
738void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
739
740#ifdef __cplusplus
741} /* extern C */
742#endif
743
744#endif
745