1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef VC4_QIR_H 25#define VC4_QIR_H 26 27#include <assert.h> 28#include <stdio.h> 29#include <stdlib.h> 30#include <stdbool.h> 31#include <stdint.h> 32#include <string.h> 33 34#include "util/macros.h" 35#include "compiler/nir/nir.h" 36#include "util/list.h" 37#include "util/u_math.h" 38 39#include "vc4_screen.h" 40#include "vc4_qpu_defines.h" 41#include "vc4_qpu.h" 42#include "kernel/vc4_packet.h" 43#include "pipe/p_state.h" 44 45struct nir_builder; 46 47enum qfile { 48 QFILE_NULL, 49 QFILE_TEMP, 50 QFILE_VARY, 51 QFILE_UNIF, 52 QFILE_VPM, 53 QFILE_TLB_COLOR_WRITE, 54 QFILE_TLB_COLOR_WRITE_MS, 55 QFILE_TLB_Z_WRITE, 56 QFILE_TLB_STENCIL_SETUP, 57 58 /* If tex_s is written on its own without preceding t/r/b setup, it's 59 * a direct memory access using the input value, without the sideband 60 * uniform load. We represent these in QIR as a separate write 61 * destination so we can tell if the sideband uniform is present. 62 */ 63 QFILE_TEX_S_DIRECT, 64 65 QFILE_TEX_S, 66 QFILE_TEX_T, 67 QFILE_TEX_R, 68 QFILE_TEX_B, 69 70 /* Payload registers that aren't in the physical register file, so we 71 * can just use the corresponding qpu_reg at qpu_emit time. 72 */ 73 QFILE_FRAG_X, 74 QFILE_FRAG_Y, 75 QFILE_FRAG_REV_FLAG, 76 QFILE_QPU_ELEMENT, 77 78 /** 79 * Stores an immediate value in the index field that will be used 80 * directly by qpu_load_imm(). 81 */ 82 QFILE_LOAD_IMM, 83 84 /** 85 * Stores an immediate value in the index field that can be turned 86 * into a small immediate field by qpu_encode_small_immediate(). 87 */ 88 QFILE_SMALL_IMM, 89}; 90 91struct qreg { 92 enum qfile file; 93 uint32_t index; 94 int pack; 95}; 96 97static inline struct qreg qir_reg(enum qfile file, uint32_t index) 98{ 99 return (struct qreg){file, index}; 100} 101 102enum qop { 103 QOP_UNDEF, 104 QOP_MOV, 105 QOP_FMOV, 106 QOP_MMOV, 107 QOP_FADD, 108 QOP_FSUB, 109 QOP_FMUL, 110 QOP_V8MULD, 111 QOP_V8MIN, 112 QOP_V8MAX, 113 QOP_V8ADDS, 114 QOP_V8SUBS, 115 QOP_MUL24, 116 QOP_FMIN, 117 QOP_FMAX, 118 QOP_FMINABS, 119 QOP_FMAXABS, 120 QOP_ADD, 121 QOP_SUB, 122 QOP_SHL, 123 QOP_SHR, 124 QOP_ASR, 125 QOP_MIN, 126 QOP_MIN_NOIMM, 127 QOP_MAX, 128 QOP_AND, 129 QOP_OR, 130 QOP_XOR, 131 QOP_NOT, 132 133 QOP_FTOI, 134 QOP_ITOF, 135 QOP_RCP, 136 QOP_RSQ, 137 QOP_EXP2, 138 QOP_LOG2, 139 QOP_VW_SETUP, 140 QOP_VR_SETUP, 141 QOP_TLB_COLOR_READ, 142 QOP_MS_MASK, 143 QOP_VARY_ADD_C, 144 145 QOP_FRAG_Z, 146 QOP_FRAG_W, 147 148 /** 149 * Signal of texture read being necessary and then reading r4 into 150 * the destination 151 */ 152 QOP_TEX_RESULT, 153 154 /** 155 * Insert the signal for switching threads in a threaded fragment 156 * shader. No value can be live in an accumulator across a thrsw. 157 * 158 * At the QPU level, this will have several delay slots before the 159 * switch happens. Those slots are the responsibility of the 160 * scheduler. 161 */ 162 QOP_THRSW, 163 164 /* 32-bit immediate loaded to each SIMD channel */ 165 QOP_LOAD_IMM, 166 167 /* 32-bit immediate divided into 16 2-bit unsigned int values and 168 * loaded to each corresponding SIMD channel. 169 */ 170 QOP_LOAD_IMM_U2, 171 /* 32-bit immediate divided into 16 2-bit signed int values and 172 * loaded to each corresponding SIMD channel. 173 */ 174 QOP_LOAD_IMM_I2, 175 176 QOP_ROT_MUL, 177 178 /* Jumps to block->successor[0] if the qinst->cond (as a 179 * QPU_COND_BRANCH_*) passes, or block->successor[1] if not. Note 180 * that block->successor[1] may be unset if the condition is ALWAYS. 181 */ 182 QOP_BRANCH, 183 184 /* Emits an ADD from src[0] to src[1], where src[0] must be a 185 * QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS, 186 * required by the kernel as part of its branch validation. 187 */ 188 QOP_UNIFORMS_RESET, 189}; 190 191struct queued_qpu_inst { 192 struct list_head link; 193 uint64_t inst; 194}; 195 196struct qinst { 197 struct list_head link; 198 199 enum qop op; 200 struct qreg dst; 201 struct qreg src[3]; 202 bool sf; 203 bool cond_is_exec_mask; 204 uint8_t cond; 205}; 206 207enum qstage { 208 /** 209 * Coordinate shader, runs during binning, before the VS, and just 210 * outputs position. 211 */ 212 QSTAGE_COORD, 213 QSTAGE_VERT, 214 QSTAGE_FRAG, 215}; 216 217enum quniform_contents { 218 /** 219 * Indicates that a constant 32-bit value is copied from the program's 220 * uniform contents. 221 */ 222 QUNIFORM_CONSTANT, 223 /** 224 * Indicates that the program's uniform contents are used as an index 225 * into the GL uniform storage. 226 */ 227 QUNIFORM_UNIFORM, 228 229 /** @{ 230 * Scaling factors from clip coordinates to relative to the viewport 231 * center. 232 * 233 * This is used by the coordinate and vertex shaders to produce the 234 * 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed 235 * point offsets from the viewport ccenter. 236 */ 237 QUNIFORM_VIEWPORT_X_SCALE, 238 QUNIFORM_VIEWPORT_Y_SCALE, 239 /** @} */ 240 241 QUNIFORM_VIEWPORT_Z_OFFSET, 242 QUNIFORM_VIEWPORT_Z_SCALE, 243 244 QUNIFORM_USER_CLIP_PLANE, 245 246 /** 247 * A reference to a texture config parameter 0 uniform. 248 * 249 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 250 * defines texture type, miplevels, and such. It will be found as a 251 * parameter to the first QOP_TEX_[STRB] instruction in a sequence. 252 */ 253 QUNIFORM_TEXTURE_CONFIG_P0, 254 255 /** 256 * A reference to a texture config parameter 1 uniform. 257 * 258 * This is a uniform implicitly loaded with a QPU_W_TMU* write, which 259 * defines texture width, height, filters, and wrap modes. It will be 260 * found as a parameter to the second QOP_TEX_[STRB] instruction in a 261 * sequence. 262 */ 263 QUNIFORM_TEXTURE_CONFIG_P1, 264 265 /** A reference to a texture config parameter 2 cubemap stride uniform */ 266 QUNIFORM_TEXTURE_CONFIG_P2, 267 268 QUNIFORM_TEXTURE_FIRST_LEVEL, 269 270 QUNIFORM_TEXTURE_MSAA_ADDR, 271 272 QUNIFORM_UBO0_ADDR, 273 QUNIFORM_UBO1_ADDR, 274 275 QUNIFORM_TEXRECT_SCALE_X, 276 QUNIFORM_TEXRECT_SCALE_Y, 277 278 QUNIFORM_TEXTURE_BORDER_COLOR, 279 280 QUNIFORM_BLEND_CONST_COLOR_X, 281 QUNIFORM_BLEND_CONST_COLOR_Y, 282 QUNIFORM_BLEND_CONST_COLOR_Z, 283 QUNIFORM_BLEND_CONST_COLOR_W, 284 QUNIFORM_BLEND_CONST_COLOR_RGBA, 285 QUNIFORM_BLEND_CONST_COLOR_AAAA, 286 287 QUNIFORM_STENCIL, 288 289 QUNIFORM_SAMPLE_MASK, 290 291 /* Placeholder uniform that will be updated by the kernel when used by 292 * an instruction writing to QPU_W_UNIFORMS_ADDRESS. 293 */ 294 QUNIFORM_UNIFORMS_ADDRESS, 295}; 296 297struct vc4_varying_slot { 298 uint8_t slot; 299 uint8_t swizzle; 300}; 301 302struct vc4_key { 303 struct vc4_uncompiled_shader *shader_state; 304 struct { 305 enum pipe_format format; 306 uint8_t swizzle[4]; 307 union { 308 struct { 309 unsigned compare_mode:1; 310 unsigned compare_func:3; 311 unsigned wrap_s:3; 312 unsigned wrap_t:3; 313 bool force_first_level:1; 314 }; 315 struct { 316 uint16_t msaa_width, msaa_height; 317 }; 318 }; 319 } tex[VC4_MAX_TEXTURE_SAMPLERS]; 320 uint8_t ucp_enables; 321}; 322 323struct vc4_fs_key { 324 struct vc4_key base; 325 enum pipe_format color_format; 326 bool depth_enabled; 327 bool stencil_enabled; 328 bool stencil_twoside; 329 bool stencil_full_writemasks; 330 bool is_points; 331 bool is_lines; 332 bool point_coord_upper_left; 333 bool msaa; 334 bool sample_coverage; 335 bool sample_alpha_to_coverage; 336 bool sample_alpha_to_one; 337 uint8_t logicop_func; 338 uint32_t point_sprite_mask; 339 uint32_t ubo_1_size; 340 341 struct pipe_rt_blend_state blend; 342}; 343 344struct vc4_vs_key { 345 struct vc4_key base; 346 347 const struct vc4_fs_inputs *fs_inputs; 348 enum pipe_format attr_formats[8]; 349 bool is_coord; 350 bool per_vertex_point_size; 351}; 352 353/** A basic block of QIR intructions. */ 354struct qblock { 355 struct list_head link; 356 357 struct list_head instructions; 358 struct list_head qpu_inst_list; 359 360 struct set *predecessors; 361 struct qblock *successors[2]; 362 363 int index; 364 365 /* Instruction IPs for the first and last instruction of the block. 366 * Set by vc4_qpu_schedule.c. 367 */ 368 uint32_t start_qpu_ip; 369 uint32_t end_qpu_ip; 370 371 /* Instruction IP for the branch instruction of the block. Set by 372 * vc4_qpu_schedule.c. 373 */ 374 uint32_t branch_qpu_ip; 375 376 /** @{ used by vc4_qir_live_variables.c */ 377 BITSET_WORD *def; 378 BITSET_WORD *use; 379 BITSET_WORD *live_in; 380 BITSET_WORD *live_out; 381 int start_ip, end_ip; 382 /** @} */ 383}; 384 385struct vc4_compile { 386 struct vc4_context *vc4; 387 nir_shader *s; 388 nir_function_impl *impl; 389 struct exec_list *cf_node_list; 390 391 /** 392 * Mapping from nir_register * or nir_ssa_def * to array of struct 393 * qreg for the values. 394 */ 395 struct hash_table *def_ht; 396 397 /* For each temp, the instruction generating its value. */ 398 struct qinst **defs; 399 uint32_t defs_array_size; 400 401 /** 402 * Inputs to the shader, arranged by TGSI declaration order. 403 * 404 * Not all fragment shader QFILE_VARY reads are present in this array. 405 */ 406 struct qreg *inputs; 407 struct qreg *outputs; 408 bool msaa_per_sample_output; 409 struct qreg color_reads[VC4_MAX_SAMPLES]; 410 struct qreg sample_colors[VC4_MAX_SAMPLES]; 411 uint32_t inputs_array_size; 412 uint32_t outputs_array_size; 413 uint32_t uniforms_array_size; 414 415 /* State for whether we're executing on each channel currently. 0 if 416 * yes, otherwise a block number + 1 that the channel jumped to. 417 */ 418 struct qreg execute; 419 420 struct qreg line_x, point_x, point_y; 421 /** boolean (~0 -> true) if the fragment has been discarded. */ 422 struct qreg discard; 423 struct qreg payload_FRAG_Z; 424 struct qreg payload_FRAG_W; 425 426 uint8_t vattr_sizes[8]; 427 428 /** 429 * Array of the VARYING_SLOT_* of all FS QFILE_VARY reads. 430 * 431 * This includes those that aren't part of the VPM varyings, like 432 * point/line coordinates. 433 */ 434 struct vc4_varying_slot *input_slots; 435 uint32_t num_input_slots; 436 uint32_t input_slots_array_size; 437 438 /** 439 * An entry per outputs[] in the VS indicating what the VARYING_SLOT_* 440 * of the output is. Used to emit from the VS in the order that the 441 * FS needs. 442 */ 443 struct vc4_varying_slot *output_slots; 444 445 struct pipe_shader_state *shader_state; 446 struct vc4_key *key; 447 struct vc4_fs_key *fs_key; 448 struct vc4_vs_key *vs_key; 449 450 /* Live ranges of temps. */ 451 int *temp_start, *temp_end; 452 453 uint32_t *uniform_data; 454 enum quniform_contents *uniform_contents; 455 uint32_t uniform_array_size; 456 uint32_t num_uniforms; 457 uint32_t num_outputs; 458 uint32_t num_texture_samples; 459 uint32_t output_position_index; 460 uint32_t output_color_index; 461 uint32_t output_point_size_index; 462 uint32_t output_sample_mask_index; 463 464 struct qreg undef; 465 enum qstage stage; 466 uint32_t num_temps; 467 468 struct list_head blocks; 469 int next_block_index; 470 struct qblock *cur_block; 471 struct qblock *loop_cont_block; 472 struct qblock *loop_break_block; 473 struct qblock *last_top_block; 474 475 struct list_head qpu_inst_list; 476 477 /* Pre-QPU-scheduled instruction containing the last THRSW */ 478 uint64_t *last_thrsw; 479 480 uint64_t *qpu_insts; 481 uint32_t qpu_inst_count; 482 uint32_t qpu_inst_size; 483 uint32_t num_inputs; 484 485 /** 486 * Number of inputs from num_inputs remaining to be queued to the read 487 * FIFO in the VS/CS. 488 */ 489 uint32_t num_inputs_remaining; 490 491 /* Number of inputs currently in the read FIFO for the VS/CS */ 492 uint32_t num_inputs_in_fifo; 493 494 /** Next offset in the VPM to read from in the VS/CS */ 495 uint32_t vpm_read_offset; 496 497 uint32_t program_id; 498 uint32_t variant_id; 499 500 /* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH 501 * is used to hide texturing latency at the cost of limiting ourselves 502 * to the bottom half of physical reg space. 503 */ 504 bool fs_threaded; 505 506 bool last_thrsw_at_top_level; 507 508 bool failed; 509}; 510 511/* Special nir_load_input intrinsic index for loading the current TLB 512 * destination color. 513 */ 514#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000 515 516#define VC4_NIR_MS_MASK_OUTPUT 2000000000 517 518struct vc4_compile *qir_compile_init(void); 519void qir_compile_destroy(struct vc4_compile *c); 520struct qblock *qir_new_block(struct vc4_compile *c); 521void qir_set_emit_block(struct vc4_compile *c, struct qblock *block); 522void qir_link_blocks(struct qblock *predecessor, struct qblock *successor); 523struct qblock *qir_entry_block(struct vc4_compile *c); 524struct qblock *qir_exit_block(struct vc4_compile *c); 525struct qinst *qir_inst(enum qop op, struct qreg dst, 526 struct qreg src0, struct qreg src1); 527void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst); 528struct qreg qir_uniform(struct vc4_compile *c, 529 enum quniform_contents contents, 530 uint32_t data); 531void qir_schedule_instructions(struct vc4_compile *c); 532void qir_reorder_uniforms(struct vc4_compile *c); 533void qir_emit_uniform_stream_resets(struct vc4_compile *c); 534 535struct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst); 536struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst); 537 538struct qreg qir_get_temp(struct vc4_compile *c); 539void qir_calculate_live_intervals(struct vc4_compile *c); 540int qir_get_nsrc(struct qinst *inst); 541int qir_get_non_sideband_nsrc(struct qinst *inst); 542int qir_get_tex_uniform_src(struct qinst *inst); 543bool qir_reg_equals(struct qreg a, struct qreg b); 544bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst); 545bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst); 546bool qir_has_uniform_read(struct qinst *inst); 547bool qir_is_mul(struct qinst *inst); 548bool qir_is_raw_mov(struct qinst *inst); 549bool qir_is_tex(struct qinst *inst); 550bool qir_has_implicit_tex_uniform(struct qinst *inst); 551bool qir_is_float_input(struct qinst *inst); 552bool qir_depends_on_flags(struct qinst *inst); 553bool qir_writes_r4(struct qinst *inst); 554struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg); 555uint8_t qir_channels_written(struct qinst *inst); 556 557void qir_dump(struct vc4_compile *c); 558void qir_dump_inst(struct vc4_compile *c, struct qinst *inst); 559char *qir_describe_uniform(enum quniform_contents contents, uint32_t data, 560 const uint32_t *uniforms); 561const char *qir_get_stage_name(enum qstage stage); 562 563void qir_validate(struct vc4_compile *c); 564 565void qir_optimize(struct vc4_compile *c); 566bool qir_opt_algebraic(struct vc4_compile *c); 567bool qir_opt_coalesce_ff_writes(struct vc4_compile *c); 568bool qir_opt_constant_folding(struct vc4_compile *c); 569bool qir_opt_copy_propagation(struct vc4_compile *c); 570bool qir_opt_dead_code(struct vc4_compile *c); 571bool qir_opt_peephole_sf(struct vc4_compile *c); 572bool qir_opt_small_immediates(struct vc4_compile *c); 573bool qir_opt_vpm(struct vc4_compile *c); 574void vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c); 575void vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c); 576nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b, 577 nir_ssa_def **srcs, int swiz); 578void vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c); 579void qir_lower_uniforms(struct vc4_compile *c); 580 581uint32_t qpu_schedule_instructions(struct vc4_compile *c); 582 583void qir_SF(struct vc4_compile *c, struct qreg src); 584 585static inline struct qreg 586qir_uniform_ui(struct vc4_compile *c, uint32_t ui) 587{ 588 return qir_uniform(c, QUNIFORM_CONSTANT, ui); 589} 590 591static inline struct qreg 592qir_uniform_f(struct vc4_compile *c, float f) 593{ 594 return qir_uniform(c, QUNIFORM_CONSTANT, fui(f)); 595} 596 597#define QIR_ALU0(name) \ 598static inline struct qreg \ 599qir_##name(struct vc4_compile *c) \ 600{ \ 601 return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \ 602 c->undef, c->undef)); \ 603} \ 604static inline struct qinst * \ 605qir_##name##_dest(struct vc4_compile *c, struct qreg dest) \ 606{ \ 607 return qir_emit_nondef(c, qir_inst(QOP_##name, dest, \ 608 c->undef, c->undef)); \ 609} 610 611#define QIR_ALU1(name) \ 612static inline struct qreg \ 613qir_##name(struct vc4_compile *c, struct qreg a) \ 614{ \ 615 return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \ 616 a, c->undef)); \ 617} \ 618static inline struct qinst * \ 619qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ 620 struct qreg a) \ 621{ \ 622 return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, \ 623 c->undef)); \ 624} 625 626#define QIR_ALU2(name) \ 627static inline struct qreg \ 628qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 629{ \ 630 return qir_emit_def(c, qir_inst(QOP_##name, c->undef, a, b)); \ 631} \ 632static inline struct qinst * \ 633qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \ 634 struct qreg a, struct qreg b) \ 635{ \ 636 return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, b)); \ 637} 638 639#define QIR_NODST_1(name) \ 640static inline struct qinst * \ 641qir_##name(struct vc4_compile *c, struct qreg a) \ 642{ \ 643 return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \ 644 a, c->undef)); \ 645} 646 647#define QIR_NODST_2(name) \ 648static inline struct qinst * \ 649qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \ 650{ \ 651 return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \ 652 a, b)); \ 653} 654 655#define QIR_PAYLOAD(name) \ 656static inline struct qreg \ 657qir_##name(struct vc4_compile *c) \ 658{ \ 659 struct qreg *payload = &c->payload_##name; \ 660 if (payload->file != QFILE_NULL) \ 661 return *payload; \ 662 *payload = qir_get_temp(c); \ 663 struct qinst *inst = qir_inst(QOP_##name, *payload, \ 664 c->undef, c->undef); \ 665 struct qblock *entry = qir_entry_block(c); \ 666 list_add(&inst->link, &entry->instructions); \ 667 c->defs[payload->index] = inst; \ 668 return *payload; \ 669} 670 671QIR_ALU1(MOV) 672QIR_ALU1(FMOV) 673QIR_ALU1(MMOV) 674QIR_ALU2(FADD) 675QIR_ALU2(FSUB) 676QIR_ALU2(FMUL) 677QIR_ALU2(V8MULD) 678QIR_ALU2(V8MIN) 679QIR_ALU2(V8MAX) 680QIR_ALU2(V8ADDS) 681QIR_ALU2(V8SUBS) 682QIR_ALU2(MUL24) 683QIR_ALU2(FMIN) 684QIR_ALU2(FMAX) 685QIR_ALU2(FMINABS) 686QIR_ALU2(FMAXABS) 687QIR_ALU1(FTOI) 688QIR_ALU1(ITOF) 689 690QIR_ALU2(ADD) 691QIR_ALU2(SUB) 692QIR_ALU2(SHL) 693QIR_ALU2(SHR) 694QIR_ALU2(ASR) 695QIR_ALU2(MIN) 696QIR_ALU2(MIN_NOIMM) 697QIR_ALU2(MAX) 698QIR_ALU2(AND) 699QIR_ALU2(OR) 700QIR_ALU2(XOR) 701QIR_ALU1(NOT) 702 703QIR_ALU1(RCP) 704QIR_ALU1(RSQ) 705QIR_ALU1(EXP2) 706QIR_ALU1(LOG2) 707QIR_ALU1(VARY_ADD_C) 708QIR_PAYLOAD(FRAG_Z) 709QIR_PAYLOAD(FRAG_W) 710QIR_ALU0(TEX_RESULT) 711QIR_ALU0(TLB_COLOR_READ) 712QIR_NODST_1(MS_MASK) 713 714static inline struct qreg 715qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1) 716{ 717 struct qreg t = qir_get_temp(c); 718 qir_MOV_dest(c, t, src1); 719 qir_MOV_dest(c, t, src0)->cond = cond; 720 return t; 721} 722 723static inline struct qreg 724qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i) 725{ 726 struct qreg t = qir_FMOV(c, src); 727 c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; 728 return t; 729} 730 731static inline struct qreg 732qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) 733{ 734 struct qreg t = qir_MOV(c, src); 735 c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i; 736 return t; 737} 738 739static inline struct qreg 740qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) 741{ 742 struct qreg t = qir_FMOV(c, src); 743 c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; 744 return t; 745} 746 747static inline struct qreg 748qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) 749{ 750 struct qreg t = qir_MOV(c, src); 751 c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i; 752 return t; 753} 754 755static inline void 756qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan) 757{ 758 assert(!dest.pack); 759 dest.pack = QPU_PACK_MUL_8A + chan; 760 qir_emit_nondef(c, qir_inst(QOP_MMOV, dest, val, c->undef)); 761} 762 763static inline struct qreg 764qir_PACK_8888_F(struct vc4_compile *c, struct qreg val) 765{ 766 struct qreg dest = qir_MMOV(c, val); 767 c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888; 768 return dest; 769} 770 771static inline struct qreg 772qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) 773{ 774 return qir_EXP2(c, qir_FMUL(c, 775 y, 776 qir_LOG2(c, x))); 777} 778 779static inline void 780qir_VPM_WRITE(struct vc4_compile *c, struct qreg val) 781{ 782 qir_MOV_dest(c, qir_reg(QFILE_VPM, 0), val); 783} 784 785static inline struct qreg 786qir_LOAD_IMM(struct vc4_compile *c, uint32_t val) 787{ 788 return qir_emit_def(c, qir_inst(QOP_LOAD_IMM, c->undef, 789 qir_reg(QFILE_LOAD_IMM, val), c->undef)); 790} 791 792static inline struct qreg 793qir_LOAD_IMM_U2(struct vc4_compile *c, uint32_t val) 794{ 795 return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_U2, c->undef, 796 qir_reg(QFILE_LOAD_IMM, val), 797 c->undef)); 798} 799 800static inline struct qreg 801qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val) 802{ 803 return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_I2, c->undef, 804 qir_reg(QFILE_LOAD_IMM, val), 805 c->undef)); 806} 807 808/** Shifts the multiply output to the right by rot channels */ 809static inline struct qreg 810qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot) 811{ 812 return qir_emit_def(c, qir_inst(QOP_ROT_MUL, c->undef, 813 val, 814 qir_reg(QFILE_LOAD_IMM, 815 QPU_SMALL_IMM_MUL_ROT + rot))); 816} 817 818static inline struct qinst * 819qir_MOV_cond(struct vc4_compile *c, uint8_t cond, 820 struct qreg dest, struct qreg src) 821{ 822 struct qinst *mov = qir_MOV_dest(c, dest, src); 823 mov->cond = cond; 824 return mov; 825} 826 827static inline struct qinst * 828qir_BRANCH(struct vc4_compile *c, uint8_t cond) 829{ 830 struct qinst *inst = qir_inst(QOP_BRANCH, c->undef, c->undef, c->undef); 831 inst->cond = cond; 832 qir_emit_nondef(c, inst); 833 return inst; 834} 835 836#define qir_for_each_block(block, c) \ 837 list_for_each_entry(struct qblock, block, &c->blocks, link) 838 839#define qir_for_each_block_rev(block, c) \ 840 list_for_each_entry_rev(struct qblock, block, &c->blocks, link) 841 842/* Loop over the non-NULL members of the successors array. */ 843#define qir_for_each_successor(succ, block) \ 844 for (struct qblock *succ = block->successors[0]; \ 845 succ != NULL; \ 846 succ = (succ == block->successors[1] ? NULL : \ 847 block->successors[1])) 848 849#define qir_for_each_inst(inst, block) \ 850 list_for_each_entry(struct qinst, inst, &block->instructions, link) 851 852#define qir_for_each_inst_rev(inst, block) \ 853 list_for_each_entry_rev(struct qinst, inst, &block->instructions, link) 854 855#define qir_for_each_inst_safe(inst, block) \ 856 list_for_each_entry_safe(struct qinst, inst, &block->instructions, link) 857 858#define qir_for_each_inst_inorder(inst, c) \ 859 qir_for_each_block(_block, c) \ 860 qir_for_each_inst_safe(inst, _block) 861 862#endif /* VC4_QIR_H */ 863