1/* 2 * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3 * Copyright (C) 2020 Collabora Ltd. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25#ifndef __AGX_COMPILER_H 26#define __AGX_COMPILER_H 27 28#include "compiler/nir/nir.h" 29#include "util/u_math.h" 30#include "util/half_float.h" 31#include "util/u_dynarray.h" 32#include "util/u_worklist.h" 33#include "agx_compile.h" 34#include "agx_opcodes.h" 35#include "agx_minifloat.h" 36 37#ifdef __cplusplus 38extern "C" { 39#endif 40 41enum agx_dbg { 42 AGX_DBG_MSGS = BITFIELD_BIT(0), 43 AGX_DBG_SHADERS = BITFIELD_BIT(1), 44 AGX_DBG_SHADERDB = BITFIELD_BIT(2), 45 AGX_DBG_VERBOSE = BITFIELD_BIT(3), 46 AGX_DBG_INTERNAL = BITFIELD_BIT(4), 47 AGX_DBG_NOVALIDATE = BITFIELD_BIT(5), 48}; 49 50extern int agx_debug; 51 52/* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */ 53#define AGX_NUM_REGS (256) 54 55enum agx_index_type { 56 AGX_INDEX_NULL = 0, 57 AGX_INDEX_NORMAL = 1, 58 AGX_INDEX_IMMEDIATE = 2, 59 AGX_INDEX_UNIFORM = 3, 60 AGX_INDEX_REGISTER = 4, 61}; 62 63enum agx_size { 64 AGX_SIZE_16 = 0, 65 AGX_SIZE_32 = 1, 66 AGX_SIZE_64 = 2 67}; 68 69static inline unsigned 70agx_size_align_16(enum agx_size size) 71{ 72 switch (size) { 73 case AGX_SIZE_16: return 1; 74 case AGX_SIZE_32: return 2; 75 case AGX_SIZE_64: return 4; 76 } 77 78 unreachable("Invalid size"); 79} 80 81typedef struct { 82 /* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */ 83 unsigned value : 22; 84 85 /* Indicates that this source kills the referenced value (because it is the 86 * last use in a block and the source is not live after the block). Set by 87 * liveness analysis. */ 88 bool kill : 1; 89 90 /* Cache hints */ 91 bool cache : 1; 92 bool discard : 1; 93 94 /* src - float modifiers */ 95 bool abs : 1; 96 bool neg : 1; 97 98 enum agx_size size : 2; 99 enum agx_index_type type : 3; 100} agx_index; 101 102static inline agx_index 103agx_get_index(unsigned value, enum agx_size size) 104{ 105 return (agx_index) { 106 .value = value, 107 .size = size, 108 .type = AGX_INDEX_NORMAL, 109 }; 110} 111 112static inline agx_index 113agx_immediate(uint16_t imm) 114{ 115 return (agx_index) { 116 .value = imm, 117 .size = AGX_SIZE_32, 118 .type = AGX_INDEX_IMMEDIATE, 119 }; 120} 121 122static inline agx_index 123agx_immediate_f(float f) 124{ 125 assert(agx_minifloat_exact(f)); 126 return agx_immediate(agx_minifloat_encode(f)); 127} 128 129/* in half-words, specify r0h as 1, r1 as 2... */ 130static inline agx_index 131agx_register(uint8_t imm, enum agx_size size) 132{ 133 return (agx_index) { 134 .value = imm, 135 .size = size, 136 .type = AGX_INDEX_REGISTER, 137 }; 138} 139 140/* Also in half-words */ 141static inline agx_index 142agx_uniform(uint8_t imm, enum agx_size size) 143{ 144 return (agx_index) { 145 .value = imm, 146 .size = size, 147 .type = AGX_INDEX_UNIFORM, 148 }; 149} 150 151static inline agx_index 152agx_null() 153{ 154 return (agx_index) { .type = AGX_INDEX_NULL }; 155} 156 157static inline agx_index 158agx_zero() 159{ 160 return agx_immediate(0); 161} 162 163/* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa 164 * = exponent = 0, sign bit set */ 165 166static inline agx_index 167agx_negzero() 168{ 169 return agx_immediate(0x80); 170} 171 172static inline agx_index 173agx_abs(agx_index idx) 174{ 175 idx.abs = true; 176 idx.neg = false; 177 return idx; 178} 179 180static inline agx_index 181agx_neg(agx_index idx) 182{ 183 idx.neg ^= true; 184 return idx; 185} 186 187/* Replaces an index, preserving any modifiers */ 188 189static inline agx_index 190agx_replace_index(agx_index old, agx_index replacement) 191{ 192 replacement.abs = old.abs; 193 replacement.neg = old.neg; 194 return replacement; 195} 196 197static inline bool 198agx_is_null(agx_index idx) 199{ 200 return idx.type == AGX_INDEX_NULL; 201} 202 203/* Compares equivalence as references */ 204 205static inline bool 206agx_is_equiv(agx_index left, agx_index right) 207{ 208 return (left.type == right.type) && (left.value == right.value); 209} 210 211#define AGX_MAX_DESTS 4 212#define AGX_MAX_SRCS 5 213 214enum agx_icond { 215 AGX_ICOND_UEQ = 0, 216 AGX_ICOND_ULT = 1, 217 AGX_ICOND_UGT = 2, 218 /* unknown */ 219 AGX_ICOND_SEQ = 4, 220 AGX_ICOND_SLT = 5, 221 AGX_ICOND_SGT = 6, 222 /* unknown */ 223}; 224 225enum agx_fcond { 226 AGX_FCOND_EQ = 0, 227 AGX_FCOND_LT = 1, 228 AGX_FCOND_GT = 2, 229 AGX_FCOND_LTN = 3, 230 /* unknown */ 231 AGX_FCOND_GE = 5, 232 AGX_FCOND_LE = 6, 233 AGX_FCOND_GTN = 7, 234}; 235 236enum agx_round { 237 AGX_ROUND_RTZ = 0, 238 AGX_ROUND_RTE = 1, 239}; 240 241enum agx_convert { 242 AGX_CONVERT_U8_TO_F = 0, 243 AGX_CONVERT_S8_TO_F = 1, 244 AGX_CONVERT_F_TO_U16 = 4, 245 AGX_CONVERT_F_TO_S16 = 5, 246 AGX_CONVERT_U16_TO_F = 6, 247 AGX_CONVERT_S16_TO_F = 7, 248 AGX_CONVERT_F_TO_U32 = 8, 249 AGX_CONVERT_F_TO_S32 = 9, 250 AGX_CONVERT_U32_TO_F = 10, 251 AGX_CONVERT_S32_TO_F = 11 252}; 253 254enum agx_lod_mode { 255 AGX_LOD_MODE_AUTO_LOD = 0, 256 AGX_LOD_MODE_AUTO_LOD_BIAS = 5, 257 AGX_LOD_MODE_LOD_MIN = 6, 258 AGX_LOD_GRAD = 8, 259 AGX_LOD_GRAD_MIN = 12 260}; 261 262enum agx_dim { 263 AGX_DIM_TEX_1D = 0, 264 AGX_DIM_TEX_1D_ARRAY = 1, 265 AGX_DIM_TEX_2D = 2, 266 AGX_DIM_TEX_2D_ARRAY = 3, 267 AGX_DIM_TEX_2D_MS = 4, 268 AGX_DIM_TEX_3D = 5, 269 AGX_DIM_TEX_CUBE = 6, 270 AGX_DIM_TEX_CUBE_ARRAY = 7 271}; 272 273/* Forward declare for branch target */ 274struct agx_block; 275 276typedef struct { 277 /* Must be first */ 278 struct list_head link; 279 280 /* The sources list. 281 * 282 * As a special case to workaround ordering issues when translating phis, if 283 * nr_srcs == 0 and the opcode is PHI, holds a pointer to the NIR phi node. 284 */ 285 union { 286 agx_index *src; 287 nir_phi_instr *phi; 288 }; 289 290 enum agx_opcode op; 291 292 /* Data flow */ 293 agx_index dest[AGX_MAX_DESTS]; 294 295 unsigned nr_srcs; 296 297 union { 298 uint32_t imm; 299 uint32_t writeout; 300 uint32_t truth_table; 301 uint32_t component; 302 uint32_t channels; 303 uint32_t bfi_mask; 304 enum agx_sr sr; 305 enum agx_icond icond; 306 enum agx_fcond fcond; 307 enum agx_format format; 308 enum agx_round round; 309 enum agx_lod_mode lod_mode; 310 struct agx_block *target; 311 }; 312 313 /* For load varying */ 314 bool perspective : 1; 315 316 /* Invert icond/fcond */ 317 bool invert_cond : 1; 318 319 /* TODO: Handle tex ops more efficient */ 320 enum agx_dim dim : 3; 321 322 /* Final st_vary op */ 323 bool last : 1; 324 325 /* Shift for a bitwise or memory op (conflicts with format for memory ops) */ 326 unsigned shift : 4; 327 328 /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require 329 * scoreboarding (everything but memory load/store and texturing). */ 330 unsigned scoreboard : 1; 331 332 /* Number of nested control flow layers to jump by */ 333 unsigned nest : 2; 334 335 /* Output modifiers */ 336 bool saturate : 1; 337 unsigned mask : 4; 338} agx_instr; 339 340struct agx_block; 341 342typedef struct agx_block { 343 /* Link to next block. Must be first */ 344 struct list_head link; 345 346 /* List of instructions emitted for the current block */ 347 struct list_head instructions; 348 349 /* Index of the block in source order */ 350 unsigned index; 351 352 /* Control flow graph */ 353 struct agx_block *successors[2]; 354 struct util_dynarray predecessors; 355 bool unconditional_jumps; 356 357 /* Liveness analysis results */ 358 BITSET_WORD *live_in; 359 BITSET_WORD *live_out; 360 361 /* Register allocation */ 362 BITSET_DECLARE(regs_out, AGX_NUM_REGS); 363 364 /* Offset of the block in the emitted binary */ 365 off_t offset; 366 367 /** Available for passes to use for metadata */ 368 uint8_t pass_flags; 369} agx_block; 370 371typedef struct { 372 nir_shader *nir; 373 gl_shader_stage stage; 374 struct list_head blocks; /* list of agx_block */ 375 struct agx_shader_info *out; 376 struct agx_shader_key *key; 377 378 /* Remapping table for varyings indexed by driver_location */ 379 unsigned varyings[AGX_MAX_VARYINGS]; 380 381 /* Place to start pushing new values */ 382 unsigned push_base; 383 384 /* Maximum block index */ 385 unsigned num_blocks; 386 387 /* For creating temporaries */ 388 unsigned alloc; 389 390 /* I don't really understand how writeout ops work yet */ 391 bool did_writeout; 392 393 /* Has r0l been zeroed yet due to control flow? */ 394 bool any_cf; 395 396 /* Number of nested control flow structures within the innermost loop. Since 397 * NIR is just loop and if-else, this is the number of nested if-else 398 * statements in the loop */ 399 unsigned loop_nesting; 400 401 /* During instruction selection, for inserting control flow */ 402 agx_block *current_block; 403 agx_block *continue_block; 404 agx_block *break_block; 405 agx_block *after_block; 406 agx_block **indexed_nir_blocks; 407 408 /* During instruction selection, map from vector agx_index to its scalar 409 * components, populated by a split. */ 410 struct hash_table_u64 *allocated_vec; 411 412 /* Stats for shader-db */ 413 unsigned loop_count; 414 unsigned spills; 415 unsigned fills; 416} agx_context; 417 418static inline void 419agx_remove_instruction(agx_instr *ins) 420{ 421 list_del(&ins->link); 422} 423 424static inline agx_index 425agx_temp(agx_context *ctx, enum agx_size size) 426{ 427 return agx_get_index(ctx->alloc++, size); 428} 429 430static enum agx_size 431agx_size_for_bits(unsigned bits) 432{ 433 switch (bits) { 434 case 1: 435 case 16: return AGX_SIZE_16; 436 case 32: return AGX_SIZE_32; 437 case 64: return AGX_SIZE_64; 438 default: unreachable("Invalid bitsize"); 439 } 440} 441 442static inline agx_index 443agx_src_index(nir_src *src) 444{ 445 assert(src->is_ssa); 446 447 return agx_get_index(src->ssa->index, 448 agx_size_for_bits(nir_src_bit_size(*src))); 449} 450 451static inline agx_index 452agx_dest_index(nir_dest *dst) 453{ 454 assert(dst->is_ssa); 455 456 return agx_get_index(dst->ssa.index, 457 agx_size_for_bits(nir_dest_bit_size(*dst))); 458} 459 460static inline agx_index 461agx_vec_for_dest(agx_context *ctx, nir_dest *dest) 462{ 463 return agx_temp(ctx, agx_size_for_bits(nir_dest_bit_size(*dest))); 464} 465 466static inline agx_index 467agx_vec_for_intr(agx_context *ctx, nir_intrinsic_instr *instr) 468{ 469 return agx_vec_for_dest(ctx, &instr->dest); 470} 471 472/* Iterators for AGX IR */ 473 474#define agx_foreach_block(ctx, v) \ 475 list_for_each_entry(agx_block, v, &ctx->blocks, link) 476 477#define agx_foreach_block_rev(ctx, v) \ 478 list_for_each_entry_rev(agx_block, v, &ctx->blocks, link) 479 480#define agx_foreach_block_from(ctx, from, v) \ 481 list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link) 482 483#define agx_foreach_block_from_rev(ctx, from, v) \ 484 list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link) 485 486#define agx_foreach_instr_in_block(block, v) \ 487 list_for_each_entry(agx_instr, v, &(block)->instructions, link) 488 489#define agx_foreach_instr_in_block_rev(block, v) \ 490 list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link) 491 492#define agx_foreach_instr_in_block_safe(block, v) \ 493 list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link) 494 495#define agx_foreach_instr_in_block_safe_rev(block, v) \ 496 list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link) 497 498#define agx_foreach_instr_in_block_from(block, v, from) \ 499 list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link) 500 501#define agx_foreach_instr_in_block_from_rev(block, v, from) \ 502 list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions, link) 503 504#define agx_foreach_instr_global(ctx, v) \ 505 agx_foreach_block(ctx, v_block) \ 506 agx_foreach_instr_in_block(v_block, v) 507 508#define agx_foreach_instr_global_rev(ctx, v) \ 509 agx_foreach_block_rev(ctx, v_block) \ 510 agx_foreach_instr_in_block_rev(v_block, v) 511 512#define agx_foreach_instr_global_safe(ctx, v) \ 513 agx_foreach_block(ctx, v_block) \ 514 agx_foreach_instr_in_block_safe(v_block, v) 515 516#define agx_foreach_instr_global_safe_rev(ctx, v) \ 517 agx_foreach_block_rev(ctx, v_block) \ 518 agx_foreach_instr_in_block_safe_rev(v_block, v) 519 520/* Based on set_foreach, expanded with automatic type casts */ 521 522#define agx_foreach_successor(blk, v) \ 523 agx_block *v; \ 524 agx_block **_v; \ 525 for (_v = (agx_block **) &blk->successors[0], \ 526 v = *_v; \ 527 v != NULL && _v < (agx_block **) &blk->successors[2]; \ 528 _v++, v = *_v) \ 529 530#define agx_foreach_predecessor(blk, v) \ 531 util_dynarray_foreach(&blk->predecessors, agx_block *, v) 532 533#define agx_foreach_src(ins, v) \ 534 for (unsigned v = 0; v < ins->nr_srcs; ++v) 535 536#define agx_foreach_dest(ins, v) \ 537 for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v) 538 539/* 540 * Find the index of a predecessor, used as the implicit order of phi sources. 541 */ 542static inline unsigned 543agx_predecessor_index(agx_block *succ, agx_block *pred) 544{ 545 unsigned index = 0; 546 547 agx_foreach_predecessor(succ, x) { 548 if (*x == pred) return index; 549 550 index++; 551 } 552 553 unreachable("Invalid predecessor"); 554} 555 556static inline agx_instr * 557agx_prev_op(agx_instr *ins) 558{ 559 return list_last_entry(&(ins->link), agx_instr, link); 560} 561 562static inline agx_instr * 563agx_next_op(agx_instr *ins) 564{ 565 return list_first_entry(&(ins->link), agx_instr, link); 566} 567 568static inline agx_block * 569agx_next_block(agx_block *block) 570{ 571 return list_first_entry(&(block->link), agx_block, link); 572} 573 574static inline agx_block * 575agx_exit_block(agx_context *ctx) 576{ 577 agx_block *last = list_last_entry(&ctx->blocks, agx_block, link); 578 assert(!last->successors[0] && !last->successors[1]); 579 return last; 580} 581 582#define agx_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx) 583#define agx_worklist_push_head(w, block) u_worklist_push_head(w, block, index) 584#define agx_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index) 585#define agx_worklist_peek_head(w) u_worklist_peek_head(w, agx_block, index) 586#define agx_worklist_pop_head(w) u_worklist_pop_head( w, agx_block, index) 587#define agx_worklist_peek_tail(w) u_worklist_peek_tail(w, agx_block, index) 588#define agx_worklist_pop_tail(w) u_worklist_pop_tail( w, agx_block, index) 589 590/* Like in NIR, for use with the builder */ 591 592enum agx_cursor_option { 593 agx_cursor_after_block, 594 agx_cursor_before_instr, 595 agx_cursor_after_instr 596}; 597 598typedef struct { 599 enum agx_cursor_option option; 600 601 union { 602 agx_block *block; 603 agx_instr *instr; 604 }; 605} agx_cursor; 606 607static inline agx_cursor 608agx_after_block(agx_block *block) 609{ 610 return (agx_cursor) { 611 .option = agx_cursor_after_block, 612 .block = block 613 }; 614} 615 616static inline agx_cursor 617agx_before_instr(agx_instr *instr) 618{ 619 return (agx_cursor) { 620 .option = agx_cursor_before_instr, 621 .instr = instr 622 }; 623} 624 625static inline agx_cursor 626agx_after_instr(agx_instr *instr) 627{ 628 return (agx_cursor) { 629 .option = agx_cursor_after_instr, 630 .instr = instr 631 }; 632} 633 634/* 635 * Get a cursor inserting at the logical end of the block. In particular, this 636 * is before branches or control flow instructions, which occur after the 637 * logical end but before the physical end. 638 */ 639static inline agx_cursor 640agx_after_block_logical(agx_block *block) 641{ 642 /* Search for a p_logical_end */ 643 agx_foreach_instr_in_block_rev(block, I) { 644 if (I->op == AGX_OPCODE_P_LOGICAL_END) 645 return agx_before_instr(I); 646 } 647 648 /* If there's no p_logical_end, use the physical end */ 649 return agx_after_block(block); 650} 651 652/* IR builder in terms of cursor infrastructure */ 653 654typedef struct { 655 agx_context *shader; 656 agx_cursor cursor; 657} agx_builder; 658 659static inline agx_builder 660agx_init_builder(agx_context *ctx, agx_cursor cursor) 661{ 662 return (agx_builder) { 663 .shader = ctx, 664 .cursor = cursor 665 }; 666} 667 668/* Insert an instruction at the cursor and move the cursor */ 669 670static inline void 671agx_builder_insert(agx_cursor *cursor, agx_instr *I) 672{ 673 switch (cursor->option) { 674 case agx_cursor_after_instr: 675 list_add(&I->link, &cursor->instr->link); 676 cursor->instr = I; 677 return; 678 679 case agx_cursor_after_block: 680 list_addtail(&I->link, &cursor->block->instructions); 681 cursor->option = agx_cursor_after_instr; 682 cursor->instr = I; 683 return; 684 685 case agx_cursor_before_instr: 686 list_addtail(&I->link, &cursor->instr->link); 687 cursor->option = agx_cursor_after_instr; 688 cursor->instr = I; 689 return; 690 } 691 692 unreachable("Invalid cursor option"); 693} 694 695/* Uniform file management */ 696 697agx_index 698agx_indexed_sysval(agx_context *ctx, enum agx_push_type type, enum agx_size size, 699 unsigned index, unsigned length); 700 701/* Routines defined for AIR */ 702 703void agx_print_instr(agx_instr *I, FILE *fp); 704void agx_print_block(agx_block *block, FILE *fp); 705void agx_print_shader(agx_context *ctx, FILE *fp); 706void agx_optimizer(agx_context *ctx); 707void agx_lower_pseudo(agx_context *ctx); 708void agx_dce(agx_context *ctx); 709void agx_ra(agx_context *ctx); 710void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission); 711 712#ifndef NDEBUG 713void agx_validate(agx_context *ctx, const char *after_str); 714#else 715static inline void agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str) { return; } 716#endif 717 718unsigned agx_write_registers(agx_instr *I, unsigned d); 719 720struct agx_copy { 721 /* Base register destination of the copy */ 722 unsigned dest; 723 724 /* Base register source of the copy */ 725 unsigned src; 726 727 /* Size of the copy */ 728 enum agx_size size; 729 730 /* Whether the copy has been handled. Callers must leave to false. */ 731 bool done; 732}; 733 734void 735agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies, unsigned n); 736 737void agx_compute_liveness(agx_context *ctx); 738void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I); 739 740#ifdef __cplusplus 741} /* extern C */ 742#endif 743 744#endif 745