1/*
2 * Copyright (C) 2019-2020 Collabora, Ltd.
3 * Copyright (C) 2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#ifndef _MDG_COMPILER_H
26#define _MDG_COMPILER_H
27
28#include "midgard.h"
29#include "helpers.h"
30#include "midgard_compile.h"
31#include "midgard_ops.h"
32
33#include "util/hash_table.h"
34#include "util/u_dynarray.h"
35#include "util/set.h"
36#include "util/list.h"
37#include "util/u_math.h"
38
39#include "compiler/nir_types.h"
40#include "compiler/nir/nir.h"
41#include "panfrost/util/pan_ir.h"
42#include "panfrost/util/lcra.h"
43
44/* Forward declare */
45struct midgard_block;
46
47/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to
48 * the hardware), hence why that must be zero. TARGET_DISCARD signals this
49 * instruction is actually a discard op. */
50
51#define TARGET_GOTO 0
52#define TARGET_BREAK 1
53#define TARGET_CONTINUE 2
54#define TARGET_DISCARD 3
55#define TARGET_TILEBUF_WAIT 4
56
57typedef struct midgard_branch {
58        /* If conditional, the condition is specified in r31.w */
59        bool conditional;
60
61        /* For conditionals, if this is true, we branch on FALSE. If false, we  branch on TRUE. */
62        bool invert_conditional;
63
64        /* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
65        unsigned target_type;
66
67        /* The actual target */
68        union {
69                int target_block;
70                int target_break;
71                int target_continue;
72        };
73} midgard_branch;
74
75/* Generic in-memory data type repesenting a single logical instruction, rather
76 * than a single instruction group. This is the preferred form for code gen.
77 * Multiple midgard_insturctions will later be combined during scheduling,
78 * though this is not represented in this structure.  Its format bridges
79 * the low-level binary representation with the higher level semantic meaning.
80 *
81 * Notably, it allows registers to be specified as block local SSA, for code
82 * emitted before the register allocation pass.
83 */
84
85#define MIR_SRC_COUNT 4
86#define MIR_VEC_COMPONENTS 16
87
88typedef struct midgard_instruction {
89        /* Must be first for casting */
90        struct list_head link;
91
92        unsigned type; /* ALU, load/store, texture */
93
94        /* Instruction arguments represented as block-local SSA
95         * indices, rather than registers. ~0 means unused. */
96        unsigned src[MIR_SRC_COUNT];
97        unsigned dest;
98
99        /* vec16 swizzle, unpacked, per source */
100        unsigned swizzle[MIR_SRC_COUNT][MIR_VEC_COMPONENTS];
101
102        /* Types! */
103        nir_alu_type src_types[MIR_SRC_COUNT];
104        nir_alu_type dest_type;
105
106        /* Packing ops have non-32-bit dest types even though they functionally
107         * work at the 32-bit level, use this as a signal to disable copyprop.
108         * We maybe need synthetic pack ops instead. */
109        bool is_pack;
110
111        /* Modifiers, depending on type */
112        union {
113                struct {
114                        bool src_abs[MIR_SRC_COUNT];
115                        bool src_neg[MIR_SRC_COUNT];
116                };
117
118                struct {
119                        bool src_shift[MIR_SRC_COUNT];
120                };
121        };
122
123        /* Out of the union for csel (could maybe be fixed..) */
124        bool src_invert[MIR_SRC_COUNT];
125
126        /* If the op supports it */
127        enum midgard_roundmode roundmode;
128
129        /* For textures: should helpers execute this instruction (instead of
130         * just helping with derivatives)? Should helpers terminate after? */
131        bool helper_terminate;
132        bool helper_execute;
133
134        /* I.e. (1 << alu_bit) */
135        int unit;
136
137        bool has_constants;
138        midgard_constants constants;
139        uint16_t inline_constant;
140        bool has_inline_constant;
141
142        bool compact_branch;
143        uint8_t writeout;
144        bool last_writeout;
145
146        /* Masks in a saneish format. One bit per channel, not packed fancy.
147         * Use this instead of the op specific ones, and switch over at emit
148         * time */
149
150        uint16_t mask;
151
152        /* Hint for the register allocator not to spill the destination written
153         * from this instruction (because it is a spill/unspill node itself).
154         * Bitmask of spilled classes */
155
156        unsigned no_spill;
157
158        /* Generic hint for intra-pass use */
159        bool hint;
160
161        /* During scheduling, the backwards dependency graph
162         * (DAG). nr_dependencies is the number of unscheduled
163         * instructions that must still be scheduled after
164         * (before) this instruction. dependents are which
165         * instructions need to be scheduled before (after) this
166         * instruction. */
167
168        unsigned nr_dependencies;
169        BITSET_WORD *dependents;
170
171        /* Use this in conjunction with `type` */
172        unsigned op;
173
174        /* This refers to midgard_outmod_float or midgard_outmod_int.
175         * In case of a ALU op, use midgard_is_integer_out_op() to know which
176         * one is used.
177         * If it's a texture op, it's always midgard_outmod_float. */
178        unsigned outmod;
179
180        union {
181                midgard_load_store_word load_store;
182                midgard_texture_word texture;
183
184                midgard_branch branch;
185        };
186
187        unsigned bundle_id;
188} midgard_instruction;
189
190typedef struct midgard_block {
191        pan_block base;
192
193        bool scheduled;
194
195        /* List of midgard_bundles emitted (after the scheduler has run) */
196        struct util_dynarray bundles;
197
198        /* Number of quadwords _actually_ emitted, as determined after scheduling */
199        unsigned quadword_count;
200
201        /* Indicates this is a fixed-function fragment epilogue block */
202        bool epilogue;
203
204        /* Are helper invocations required by this block? */
205        bool helpers_in;
206} midgard_block;
207
208typedef struct midgard_bundle {
209        /* Tag for the overall bundle */
210        int tag;
211
212        /* Instructions contained by the bundle. instruction_count <= 6 (vmul,
213         * sadd, vadd, smul, vlut, branch) */
214        int instruction_count;
215        midgard_instruction *instructions[6];
216
217        /* Bundle-wide ALU configuration */
218        int padding;
219        int control;
220        bool has_embedded_constants;
221        midgard_constants constants;
222        bool last_writeout;
223} midgard_bundle;
224
225enum midgard_rt_id {
226        MIDGARD_COLOR_RT0 = 0,
227        MIDGARD_COLOR_RT1,
228        MIDGARD_COLOR_RT2,
229        MIDGARD_COLOR_RT3,
230        MIDGARD_COLOR_RT4,
231        MIDGARD_COLOR_RT5,
232        MIDGARD_COLOR_RT6,
233        MIDGARD_COLOR_RT7,
234        MIDGARD_ZS_RT,
235        MIDGARD_NUM_RTS,
236};
237
238#define MIDGARD_MAX_SAMPLE_ITER 16
239
240typedef struct compiler_context {
241        const struct panfrost_compile_inputs *inputs;
242        nir_shader *nir;
243        struct pan_shader_info *info;
244        gl_shader_stage stage;
245
246        /* Number of samples for a keyed blend shader. Depends on is_blend */
247        unsigned blend_sample_iterations;
248
249        /* Index to precolour to r0 for an input blend colour */
250        unsigned blend_input;
251
252        /* Index to precolour to r2 for a dual-source blend colour */
253        unsigned blend_src1;
254
255        /* Count of spills and fills for shaderdb */
256        unsigned spills;
257        unsigned fills;
258
259        /* Current NIR function */
260        nir_function *func;
261
262        /* Allocated compiler temporary counter */
263        unsigned temp_alloc;
264
265        /* Unordered list of midgard_blocks */
266        int block_count;
267        struct list_head blocks;
268
269        /* TODO merge with block_count? */
270        unsigned block_source_count;
271
272        /* List of midgard_instructions emitted for the current block */
273        midgard_block *current_block;
274
275        /* If there is a preset after block, use this, otherwise emit_block will create one if NULL */
276        midgard_block *after_block;
277
278        /* The current "depth" of the loop, for disambiguating breaks/continues
279         * when using nested loops */
280        int current_loop_depth;
281
282        /* Total number of loops for shader-db */
283        unsigned loop_count;
284
285        /* Constants which have been loaded, for later inlining */
286        struct hash_table_u64 *ssa_constants;
287
288        int temp_count;
289        int max_hash;
290
291        /* Set of NIR indices that were already emitted as outmods */
292        BITSET_WORD *already_emitted;
293
294        /* Count of instructions emitted from NIR overall, across all blocks */
295        int instruction_count;
296
297        unsigned quadword_count;
298
299        /* Bitmask of valid metadata */
300        unsigned metadata;
301
302        /* Model-specific quirk set */
303        uint32_t quirks;
304
305        /* Writeout instructions for each render target */
306        midgard_instruction *writeout_branch[MIDGARD_NUM_RTS][MIDGARD_MAX_SAMPLE_ITER];
307
308        struct hash_table_u64 *sysval_to_id;
309
310        /* Mask of UBOs that need to be uploaded */
311        uint32_t ubo_mask;
312} compiler_context;
313
314/* Per-block live_in/live_out */
315#define MIDGARD_METADATA_LIVENESS (1 << 0)
316
317/* Helpers for manipulating the above structures (forming the driver IR) */
318
319/* Append instruction to end of current block */
320
321static inline midgard_instruction *
322mir_upload_ins(struct compiler_context *ctx, struct midgard_instruction ins)
323{
324        midgard_instruction *heap = ralloc(ctx, struct midgard_instruction);
325        memcpy(heap, &ins, sizeof(ins));
326        return heap;
327}
328
329static inline midgard_instruction *
330emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
331{
332        midgard_instruction *u = mir_upload_ins(ctx, ins);
333        list_addtail(&u->link, &ctx->current_block->base.instructions);
334        return u;
335}
336
337static inline struct midgard_instruction *
338mir_insert_instruction_before(struct compiler_context *ctx,
339                              struct midgard_instruction *tag,
340                              struct midgard_instruction ins)
341{
342        struct midgard_instruction *u = mir_upload_ins(ctx, ins);
343        list_addtail(&u->link, &tag->link);
344        return u;
345}
346
347static inline void
348mir_remove_instruction(struct midgard_instruction *ins)
349{
350        list_del(&ins->link);
351}
352
353static inline midgard_instruction*
354mir_prev_op(struct midgard_instruction *ins)
355{
356        return list_last_entry(&(ins->link), midgard_instruction, link);
357}
358
359static inline midgard_instruction*
360mir_next_op(struct midgard_instruction *ins)
361{
362        return list_first_entry(&(ins->link), midgard_instruction, link);
363}
364
365#define mir_foreach_block(ctx, v) \
366        list_for_each_entry(pan_block, v, &ctx->blocks, link)
367
368#define mir_foreach_block_from(ctx, from, v) \
369        list_for_each_entry_from(pan_block, v, &from->base, &ctx->blocks, link)
370
371#define mir_foreach_instr_in_block(block, v) \
372        list_for_each_entry(struct midgard_instruction, v, &block->base.instructions, link)
373#define mir_foreach_instr_in_block_rev(block, v) \
374        list_for_each_entry_rev(struct midgard_instruction, v, &block->base.instructions, link)
375
376#define mir_foreach_instr_in_block_safe(block, v) \
377        list_for_each_entry_safe(struct midgard_instruction, v, &block->base.instructions, link)
378
379#define mir_foreach_instr_in_block_safe_rev(block, v) \
380        list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->base.instructions, link)
381
382#define mir_foreach_instr_in_block_from(block, v, from) \
383        list_for_each_entry_from(struct midgard_instruction, v, from, &block->base.instructions, link)
384
385#define mir_foreach_instr_in_block_from_rev(block, v, from) \
386        list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->base.instructions, link)
387
388#define mir_foreach_bundle_in_block(block, v) \
389        util_dynarray_foreach(&block->bundles, midgard_bundle, v)
390
391#define mir_foreach_bundle_in_block_rev(block, v) \
392        util_dynarray_foreach_reverse(&block->bundles, midgard_bundle, v)
393
394#define mir_foreach_instr_in_block_scheduled_rev(block, v) \
395        midgard_instruction* v; \
396        signed i = 0; \
397        mir_foreach_bundle_in_block_rev(block, _bundle) \
398                for (i = (_bundle->instruction_count - 1), v = _bundle->instructions[i]; \
399                                i >= 0; \
400                                --i, v = (i >= 0) ? _bundle->instructions[i] : NULL) \
401
402#define mir_foreach_instr_global(ctx, v) \
403        mir_foreach_block(ctx, v_block) \
404                mir_foreach_instr_in_block(((midgard_block *) v_block), v)
405
406#define mir_foreach_instr_global_safe(ctx, v) \
407        mir_foreach_block(ctx, v_block) \
408                mir_foreach_instr_in_block_safe(((midgard_block *) v_block), v)
409
410/* Based on set_foreach, expanded with automatic type casts */
411
412#define mir_foreach_predecessor(blk, v) \
413        struct set_entry *_entry_##v; \
414        struct midgard_block *v; \
415        for (_entry_##v = _mesa_set_next_entry(blk->base.predecessors, NULL), \
416                v = (struct midgard_block *) (_entry_##v ? _entry_##v->key : NULL);  \
417                _entry_##v != NULL; \
418                _entry_##v = _mesa_set_next_entry(blk->base.predecessors, _entry_##v), \
419                v = (struct midgard_block *) (_entry_##v ? _entry_##v->key : NULL))
420
421#define mir_foreach_src(ins, v) \
422        for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
423
424static inline midgard_instruction *
425mir_last_in_block(struct midgard_block *block)
426{
427        return list_last_entry(&block->base.instructions, struct midgard_instruction, link);
428}
429
430static inline midgard_block *
431mir_get_block(compiler_context *ctx, int idx)
432{
433        struct list_head *lst = &ctx->blocks;
434
435        while ((idx--) + 1)
436                lst = lst->next;
437
438        return (struct midgard_block *) lst;
439}
440
441static inline bool
442mir_is_alu_bundle(midgard_bundle *bundle)
443{
444        return IS_ALU(bundle->tag);
445}
446
447static inline unsigned
448make_compiler_temp(compiler_context *ctx)
449{
450        return (ctx->func->impl->ssa_alloc + ctx->temp_alloc++) << 1;
451}
452
453static inline unsigned
454make_compiler_temp_reg(compiler_context *ctx)
455{
456        return ((ctx->func->impl->reg_alloc + ctx->temp_alloc++) << 1) | PAN_IS_REG;
457}
458
459static inline unsigned
460nir_ssa_index(nir_ssa_def *ssa)
461{
462        return (ssa->index << 1) | 0;
463}
464
465static inline unsigned
466nir_src_index(compiler_context *ctx, nir_src *src)
467{
468        if (src->is_ssa)
469                return nir_ssa_index(src->ssa);
470        else {
471                assert(!src->reg.indirect);
472                return (src->reg.reg->index << 1) | PAN_IS_REG;
473        }
474}
475
476static inline unsigned
477nir_dest_index(nir_dest *dst)
478{
479        if (dst->is_ssa)
480                return (dst->ssa.index << 1) | 0;
481        else {
482                assert(!dst->reg.indirect);
483                return (dst->reg.reg->index << 1) | PAN_IS_REG;
484        }
485}
486
487
488
489/* MIR manipulation */
490
491void mir_rewrite_index(compiler_context *ctx, unsigned old, unsigned new);
492void mir_rewrite_index_src(compiler_context *ctx, unsigned old, unsigned new);
493void mir_rewrite_index_dst(compiler_context *ctx, unsigned old, unsigned new);
494void mir_rewrite_index_dst_single(midgard_instruction *ins, unsigned old, unsigned new);
495void mir_rewrite_index_src_single(midgard_instruction *ins, unsigned old, unsigned new);
496void mir_rewrite_index_src_swizzle(compiler_context *ctx, unsigned old, unsigned new, unsigned *swizzle);
497bool mir_single_use(compiler_context *ctx, unsigned value);
498unsigned mir_use_count(compiler_context *ctx, unsigned value);
499uint16_t mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node);
500uint16_t mir_bytemask_of_read_components_index(midgard_instruction *ins, unsigned i);
501uint16_t mir_from_bytemask(uint16_t bytemask, unsigned bits);
502uint16_t mir_bytemask(midgard_instruction *ins);
503uint16_t mir_round_bytemask_up(uint16_t mask, unsigned bits);
504void mir_set_bytemask(midgard_instruction *ins, uint16_t bytemask);
505signed mir_upper_override(midgard_instruction *ins, unsigned inst_size);
506unsigned mir_components_for_type(nir_alu_type T);
507unsigned max_bitsize_for_alu(midgard_instruction *ins);
508midgard_reg_mode reg_mode_for_bitsize(unsigned bitsize);
509
510/* MIR printing */
511
512void mir_print_instruction(midgard_instruction *ins);
513void mir_print_bundle(midgard_bundle *ctx);
514void mir_print_block(midgard_block *block);
515void mir_print_shader(compiler_context *ctx);
516bool mir_nontrivial_mod(midgard_instruction *ins, unsigned i, bool check_swizzle);
517bool mir_nontrivial_outmod(midgard_instruction *ins);
518
519midgard_instruction *mir_insert_instruction_before_scheduled(compiler_context *ctx, midgard_block *block, midgard_instruction *tag, midgard_instruction ins);
520midgard_instruction *mir_insert_instruction_after_scheduled(compiler_context *ctx, midgard_block *block, midgard_instruction *tag, midgard_instruction ins);
521void mir_flip(midgard_instruction *ins);
522void mir_compute_temp_count(compiler_context *ctx);
523
524#define LDST_GLOBAL (REGISTER_LDST_ZERO << 2)
525#define LDST_SHARED ((REGISTER_LDST_LOCAL_STORAGE_PTR << 2) | COMPONENT_Z)
526#define LDST_SCRATCH ((REGISTER_LDST_PC_SP << 2) | COMPONENT_Z)
527
528void mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg);
529void mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias);
530
531/* 'Intrinsic' move for aliasing */
532
533static inline midgard_instruction
534v_mov(unsigned src, unsigned dest)
535{
536        midgard_instruction ins = {
537                .type = TAG_ALU_4,
538                .mask = 0xF,
539                .src = { ~0, src, ~0, ~0 },
540                .src_types = { 0, nir_type_uint32 },
541                .swizzle = SWIZZLE_IDENTITY,
542                .dest = dest,
543                .dest_type = nir_type_uint32,
544                .op = midgard_alu_op_imov,
545                .outmod = midgard_outmod_keeplo
546        };
547
548        return ins;
549}
550
551/* Broad types of register classes so we can handle special
552 * registers */
553
554#define REG_CLASS_WORK          0
555#define REG_CLASS_LDST          1
556#define REG_CLASS_TEXR          3
557#define REG_CLASS_TEXW          4
558
559/* Like a move, but to thread local storage! */
560
561static inline midgard_instruction
562v_load_store_scratch(
563                unsigned srcdest,
564                unsigned index,
565                bool is_store,
566                unsigned mask)
567{
568        /* We index by 32-bit vec4s */
569        unsigned byte = (index * 4 * 4);
570
571        midgard_instruction ins = {
572                .type = TAG_LOAD_STORE_4,
573                .mask = mask,
574                .dest_type = nir_type_uint32,
575                .dest = ~0,
576                .src = { ~0, ~0, ~0, ~0 },
577                .swizzle = SWIZZLE_IDENTITY_4,
578                .op = is_store ? midgard_op_st_128 : midgard_op_ld_128,
579                .load_store = {
580                        /* For register spilling - to thread local storage */
581                        .arg_reg = REGISTER_LDST_LOCAL_STORAGE_PTR,
582                        .arg_comp = COMPONENT_X,
583                        .bitsize_toggle = true,
584                        .index_format = midgard_index_address_u32,
585                        .index_reg = REGISTER_LDST_ZERO,
586                },
587
588                /* If we spill an unspill, RA goes into an infinite loop */
589                .no_spill = (1 << REG_CLASS_WORK)
590        };
591
592        ins.constants.u32[0] = byte;
593
594        if (is_store) {
595                ins.src[0] = srcdest;
596                ins.src_types[0] = nir_type_uint32;
597
598                /* Ensure we are tightly swizzled so liveness analysis is
599                 * correct */
600
601                for (unsigned i = 0; i < 4; ++i) {
602                        if (!(mask & (1 << i)))
603                                ins.swizzle[0][i] = COMPONENT_X;
604                }
605        } else
606                ins.dest = srcdest;
607
608        return ins;
609}
610
611static inline bool
612mir_has_arg(midgard_instruction *ins, unsigned arg)
613{
614        if (!ins)
615                return false;
616
617        mir_foreach_src(ins, i) {
618                if (ins->src[i] == arg)
619                        return true;
620        }
621
622        return false;
623}
624
625/* Scheduling */
626
627void midgard_schedule_program(compiler_context *ctx);
628
629void mir_ra(compiler_context *ctx);
630void mir_squeeze_index(compiler_context *ctx);
631void mir_lower_special_reads(compiler_context *ctx);
632void mir_liveness_ins_update(uint16_t *live, midgard_instruction *ins, unsigned max);
633void mir_compute_liveness(compiler_context *ctx);
634void mir_invalidate_liveness(compiler_context *ctx);
635bool mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src);
636
637void mir_create_pipeline_registers(compiler_context *ctx);
638void midgard_promote_uniforms(compiler_context *ctx);
639
640void
641midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr);
642
643void
644midgard_lower_derivatives(compiler_context *ctx, midgard_block *block);
645
646bool mir_op_computes_derivatives(gl_shader_stage stage, unsigned op);
647
648void mir_analyze_helper_terminate(compiler_context *ctx);
649void mir_analyze_helper_requirements(compiler_context *ctx);
650
651/* Final emission */
652
653void emit_binary_bundle(
654        compiler_context *ctx,
655        midgard_block *block,
656        midgard_bundle *bundle,
657        struct util_dynarray *emission,
658        int next_tag);
659
660bool nir_fuse_io_16(nir_shader *shader);
661
662bool midgard_nir_lod_errata(nir_shader *shader);
663
664unsigned midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx);
665
666/* Optimizations */
667
668bool midgard_opt_copy_prop(compiler_context *ctx, midgard_block *block);
669bool midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block);
670bool midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block);
671bool midgard_opt_dead_code_eliminate(compiler_context *ctx);
672bool midgard_opt_dead_move_eliminate(compiler_context *ctx, midgard_block *block);
673
674#endif
675