1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keithw@vmware.com> 30 */ 31 32 33#ifndef BRW_EU_H 34#define BRW_EU_H 35 36#include <stdbool.h> 37#include <stdio.h> 38#include "brw_inst.h" 39#include "brw_compiler.h" 40#include "brw_eu_defines.h" 41#include "brw_isa_info.h" 42#include "brw_reg.h" 43#include "brw_disasm_info.h" 44 45#include "util/bitset.h" 46 47#ifdef __cplusplus 48extern "C" { 49#endif 50 51#define BRW_EU_MAX_INSN_STACK 5 52 53struct brw_insn_state { 54 /* One of BRW_EXECUTE_* */ 55 unsigned exec_size:3; 56 57 /* Group in units of channels */ 58 unsigned group:5; 59 60 /* Compression control on gfx4-5 */ 61 bool compressed:1; 62 63 /* One of BRW_MASK_* */ 64 unsigned mask_control:1; 65 66 /* Scheduling info for Gfx12+ */ 67 struct tgl_swsb swsb; 68 69 bool saturate:1; 70 71 /* One of BRW_ALIGN_* */ 72 unsigned access_mode:1; 73 74 /* One of BRW_PREDICATE_* */ 75 enum brw_predicate predicate:4; 76 77 bool pred_inv:1; 78 79 /* Flag subreg. Bottom bit is subreg, top bit is reg */ 80 unsigned flag_subreg:2; 81 82 bool acc_wr_control:1; 83}; 84 85 86/* A helper for accessing the last instruction emitted. This makes it easy 87 * to set various bits on an instruction without having to create temporary 88 * variable and assign the emitted instruction to those. 89 */ 90#define brw_last_inst (&p->store[p->nr_insn - 1]) 91 92struct brw_codegen { 93 brw_inst *store; 94 int store_size; 95 unsigned nr_insn; 96 unsigned int next_insn_offset; 97 98 void *mem_ctx; 99 100 /* Allow clients to push/pop instruction state: 101 */ 102 struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK]; 103 struct brw_insn_state *current; 104 105 /** Whether or not the user wants automatic exec sizes 106 * 107 * If true, codegen will try to automatically infer the exec size of an 108 * instruction from the width of the destination register. If false, it 109 * will take whatever is set by brw_set_default_exec_size verbatim. 110 * 111 * This is set to true by default in brw_init_codegen. 112 */ 113 bool automatic_exec_sizes; 114 115 bool single_program_flow; 116 const struct brw_isa_info *isa; 117 const struct intel_device_info *devinfo; 118 119 /* Control flow stacks: 120 * - if_stack contains IF and ELSE instructions which must be patched 121 * (and popped) once the matching ENDIF instruction is encountered. 122 * 123 * Just store the instruction pointer(an index). 124 */ 125 int *if_stack; 126 int if_stack_depth; 127 int if_stack_array_size; 128 129 /** 130 * loop_stack contains the instruction pointers of the starts of loops which 131 * must be patched (and popped) once the matching WHILE instruction is 132 * encountered. 133 */ 134 int *loop_stack; 135 /** 136 * pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF 137 * blocks they were popping out of, to fix up the mask stack. This tracks 138 * the IF/ENDIF nesting in each current nested loop level. 139 */ 140 int *if_depth_in_loop; 141 int loop_stack_depth; 142 int loop_stack_array_size; 143 144 struct brw_shader_reloc *relocs; 145 int num_relocs; 146 int reloc_array_size; 147}; 148 149struct brw_label { 150 int offset; 151 int number; 152 struct brw_label *next; 153}; 154 155void brw_pop_insn_state( struct brw_codegen *p ); 156void brw_push_insn_state( struct brw_codegen *p ); 157unsigned brw_get_default_exec_size(struct brw_codegen *p); 158unsigned brw_get_default_group(struct brw_codegen *p); 159unsigned brw_get_default_access_mode(struct brw_codegen *p); 160struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p); 161void brw_set_default_exec_size(struct brw_codegen *p, unsigned value); 162void brw_set_default_mask_control( struct brw_codegen *p, unsigned value ); 163void brw_set_default_saturate( struct brw_codegen *p, bool enable ); 164void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode ); 165void brw_inst_set_compression(const struct intel_device_info *devinfo, 166 brw_inst *inst, bool on); 167void brw_set_default_compression(struct brw_codegen *p, bool on); 168void brw_inst_set_group(const struct intel_device_info *devinfo, 169 brw_inst *inst, unsigned group); 170void brw_set_default_group(struct brw_codegen *p, unsigned group); 171void brw_set_default_compression_control(struct brw_codegen *p, enum brw_compression c); 172void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc); 173void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse); 174void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg); 175void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value); 176void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value); 177 178void brw_init_codegen(const struct brw_isa_info *isa, 179 struct brw_codegen *p, void *mem_ctx); 180bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode); 181bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode); 182const struct brw_label *brw_find_label(const struct brw_label *root, int offset); 183void brw_create_label(struct brw_label **labels, int offset, void *mem_ctx); 184int brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, 185 const struct brw_inst *inst, bool is_compacted, 186 int offset, const struct brw_label *root_label); 187const struct 188brw_label *brw_label_assembly(const struct brw_isa_info *isa, 189 const void *assembly, int start, int end, 190 void *mem_ctx); 191void brw_disassemble_with_labels(const struct brw_isa_info *isa, 192 const void *assembly, int start, int end, FILE *out); 193void brw_disassemble(const struct brw_isa_info *isa, 194 const void *assembly, int start, int end, 195 const struct brw_label *root_label, FILE *out); 196const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p, 197 unsigned *num_relocs); 198const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz ); 199 200bool brw_try_override_assembly(struct brw_codegen *p, int start_offset, 201 const char *identifier); 202 203void brw_realign(struct brw_codegen *p, unsigned align); 204int brw_append_data(struct brw_codegen *p, void *data, 205 unsigned size, unsigned align); 206brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode); 207void brw_add_reloc(struct brw_codegen *p, uint32_t id, 208 enum brw_shader_reloc_type type, 209 uint32_t offset, uint32_t delta); 210void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest); 211void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg); 212 213void gfx6_resolve_implied_move(struct brw_codegen *p, 214 struct brw_reg *src, 215 unsigned msg_reg_nr); 216 217/* Helpers for regular instructions: 218 */ 219#define ALU1(OP) \ 220brw_inst *brw_##OP(struct brw_codegen *p, \ 221 struct brw_reg dest, \ 222 struct brw_reg src0); 223 224#define ALU2(OP) \ 225brw_inst *brw_##OP(struct brw_codegen *p, \ 226 struct brw_reg dest, \ 227 struct brw_reg src0, \ 228 struct brw_reg src1); 229 230#define ALU3(OP) \ 231brw_inst *brw_##OP(struct brw_codegen *p, \ 232 struct brw_reg dest, \ 233 struct brw_reg src0, \ 234 struct brw_reg src1, \ 235 struct brw_reg src2); 236 237ALU1(MOV) 238ALU2(SEL) 239ALU1(NOT) 240ALU2(AND) 241ALU2(OR) 242ALU2(XOR) 243ALU2(SHR) 244ALU2(SHL) 245ALU1(DIM) 246ALU2(ASR) 247ALU2(ROL) 248ALU2(ROR) 249ALU3(CSEL) 250ALU1(F32TO16) 251ALU1(F16TO32) 252ALU2(ADD) 253ALU3(ADD3) 254ALU2(AVG) 255ALU2(MUL) 256ALU1(FRC) 257ALU1(RNDD) 258ALU1(RNDE) 259ALU1(RNDU) 260ALU1(RNDZ) 261ALU2(MAC) 262ALU2(MACH) 263ALU1(LZD) 264ALU2(DP4) 265ALU2(DPH) 266ALU2(DP3) 267ALU2(DP2) 268ALU3(DP4A) 269ALU2(LINE) 270ALU2(PLN) 271ALU3(MAD) 272ALU3(LRP) 273ALU1(BFREV) 274ALU3(BFE) 275ALU2(BFI1) 276ALU3(BFI2) 277ALU1(FBH) 278ALU1(FBL) 279ALU1(CBIT) 280ALU2(ADDC) 281ALU2(SUBB) 282 283#undef ALU1 284#undef ALU2 285#undef ALU3 286 287 288/* Helpers for SEND instruction: 289 */ 290 291/** 292 * Construct a message descriptor immediate with the specified common 293 * descriptor controls. 294 */ 295static inline uint32_t 296brw_message_desc(const struct intel_device_info *devinfo, 297 unsigned msg_length, 298 unsigned response_length, 299 bool header_present) 300{ 301 if (devinfo->ver >= 5) { 302 return (SET_BITS(msg_length, 28, 25) | 303 SET_BITS(response_length, 24, 20) | 304 SET_BITS(header_present, 19, 19)); 305 } else { 306 return (SET_BITS(msg_length, 23, 20) | 307 SET_BITS(response_length, 19, 16)); 308 } 309} 310 311static inline unsigned 312brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc) 313{ 314 if (devinfo->ver >= 5) 315 return GET_BITS(desc, 28, 25); 316 else 317 return GET_BITS(desc, 23, 20); 318} 319 320static inline unsigned 321brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc) 322{ 323 if (devinfo->ver >= 5) 324 return GET_BITS(desc, 24, 20); 325 else 326 return GET_BITS(desc, 19, 16); 327} 328 329static inline bool 330brw_message_desc_header_present(ASSERTED 331 const struct intel_device_info *devinfo, 332 uint32_t desc) 333{ 334 assert(devinfo->ver >= 5); 335 return GET_BITS(desc, 19, 19); 336} 337 338static inline unsigned 339brw_message_ex_desc(UNUSED const struct intel_device_info *devinfo, 340 unsigned ex_msg_length) 341{ 342 return SET_BITS(ex_msg_length, 9, 6); 343} 344 345static inline unsigned 346brw_message_ex_desc_ex_mlen(UNUSED const struct intel_device_info *devinfo, 347 uint32_t ex_desc) 348{ 349 return GET_BITS(ex_desc, 9, 6); 350} 351 352static inline uint32_t 353brw_urb_desc(const struct intel_device_info *devinfo, 354 unsigned msg_type, 355 bool per_slot_offset_present, 356 bool channel_mask_present, 357 unsigned global_offset) 358{ 359 if (devinfo->ver >= 8) { 360 return (SET_BITS(per_slot_offset_present, 17, 17) | 361 SET_BITS(channel_mask_present, 15, 15) | 362 SET_BITS(global_offset, 14, 4) | 363 SET_BITS(msg_type, 3, 0)); 364 } else if (devinfo->ver >= 7) { 365 assert(!channel_mask_present); 366 return (SET_BITS(per_slot_offset_present, 16, 16) | 367 SET_BITS(global_offset, 13, 3) | 368 SET_BITS(msg_type, 3, 0)); 369 } else { 370 unreachable("unhandled URB write generation"); 371 } 372} 373 374static inline uint32_t 375brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo, 376 uint32_t desc) 377{ 378 assert(devinfo->ver >= 7); 379 return GET_BITS(desc, 3, 0); 380} 381 382static inline uint32_t 383brw_urb_fence_desc(const struct intel_device_info *devinfo) 384{ 385 assert(devinfo->has_lsc); 386 return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0); 387} 388 389/** 390 * Construct a message descriptor immediate with the specified sampler 391 * function controls. 392 */ 393static inline uint32_t 394brw_sampler_desc(const struct intel_device_info *devinfo, 395 unsigned binding_table_index, 396 unsigned sampler, 397 unsigned msg_type, 398 unsigned simd_mode, 399 unsigned return_format) 400{ 401 const unsigned desc = (SET_BITS(binding_table_index, 7, 0) | 402 SET_BITS(sampler, 11, 8)); 403 404 /* From the CHV Bspec: Shared Functions - Message Descriptor - 405 * Sampling Engine: 406 * 407 * SIMD Mode[2] 29 This field is the upper bit of the 3-bit 408 * SIMD Mode field. 409 */ 410 if (devinfo->ver >= 8) 411 return desc | SET_BITS(msg_type, 16, 12) | 412 SET_BITS(simd_mode & 0x3, 18, 17) | 413 SET_BITS(simd_mode >> 2, 29, 29) | 414 SET_BITS(return_format, 30, 30); 415 if (devinfo->ver >= 7) 416 return (desc | SET_BITS(msg_type, 16, 12) | 417 SET_BITS(simd_mode, 18, 17)); 418 else if (devinfo->ver >= 5) 419 return (desc | SET_BITS(msg_type, 15, 12) | 420 SET_BITS(simd_mode, 17, 16)); 421 else if (devinfo->verx10 >= 45) 422 return desc | SET_BITS(msg_type, 15, 12); 423 else 424 return (desc | SET_BITS(return_format, 13, 12) | 425 SET_BITS(msg_type, 15, 14)); 426} 427 428static inline unsigned 429brw_sampler_desc_binding_table_index(UNUSED 430 const struct intel_device_info *devinfo, 431 uint32_t desc) 432{ 433 return GET_BITS(desc, 7, 0); 434} 435 436static inline unsigned 437brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo, 438 uint32_t desc) 439{ 440 return GET_BITS(desc, 11, 8); 441} 442 443static inline unsigned 444brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) 445{ 446 if (devinfo->ver >= 7) 447 return GET_BITS(desc, 16, 12); 448 else if (devinfo->verx10 >= 45) 449 return GET_BITS(desc, 15, 12); 450 else 451 return GET_BITS(desc, 15, 14); 452} 453 454static inline unsigned 455brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo, 456 uint32_t desc) 457{ 458 assert(devinfo->ver >= 5); 459 if (devinfo->ver >= 8) 460 return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2; 461 else if (devinfo->ver >= 7) 462 return GET_BITS(desc, 18, 17); 463 else 464 return GET_BITS(desc, 17, 16); 465} 466 467static inline unsigned 468brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo, 469 uint32_t desc) 470{ 471 assert(devinfo->verx10 == 40 || devinfo->ver >= 8); 472 if (devinfo->ver >= 8) 473 return GET_BITS(desc, 30, 30); 474 else 475 return GET_BITS(desc, 13, 12); 476} 477 478/** 479 * Construct a message descriptor for the dataport 480 */ 481static inline uint32_t 482brw_dp_desc(const struct intel_device_info *devinfo, 483 unsigned binding_table_index, 484 unsigned msg_type, 485 unsigned msg_control) 486{ 487 /* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc 488 * helpers instead. 489 */ 490 assert(devinfo->ver >= 6); 491 const unsigned desc = SET_BITS(binding_table_index, 7, 0); 492 if (devinfo->ver >= 8) { 493 return (desc | SET_BITS(msg_control, 13, 8) | 494 SET_BITS(msg_type, 18, 14)); 495 } else if (devinfo->ver >= 7) { 496 return (desc | SET_BITS(msg_control, 13, 8) | 497 SET_BITS(msg_type, 17, 14)); 498 } else { 499 return (desc | SET_BITS(msg_control, 12, 8) | 500 SET_BITS(msg_type, 16, 13)); 501 } 502} 503 504static inline unsigned 505brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo, 506 uint32_t desc) 507{ 508 return GET_BITS(desc, 7, 0); 509} 510 511static inline unsigned 512brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) 513{ 514 assert(devinfo->ver >= 6); 515 if (devinfo->ver >= 8) 516 return GET_BITS(desc, 18, 14); 517 else if (devinfo->ver >= 7) 518 return GET_BITS(desc, 17, 14); 519 else 520 return GET_BITS(desc, 16, 13); 521} 522 523static inline unsigned 524brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc) 525{ 526 assert(devinfo->ver >= 6); 527 if (devinfo->ver >= 7) 528 return GET_BITS(desc, 13, 8); 529 else 530 return GET_BITS(desc, 12, 8); 531} 532 533/** 534 * Construct a message descriptor immediate with the specified dataport read 535 * function controls. 536 */ 537static inline uint32_t 538brw_dp_read_desc(const struct intel_device_info *devinfo, 539 unsigned binding_table_index, 540 unsigned msg_control, 541 unsigned msg_type, 542 unsigned target_cache) 543{ 544 if (devinfo->ver >= 6) 545 return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control); 546 else if (devinfo->verx10 >= 45) 547 return (SET_BITS(binding_table_index, 7, 0) | 548 SET_BITS(msg_control, 10, 8) | 549 SET_BITS(msg_type, 13, 11) | 550 SET_BITS(target_cache, 15, 14)); 551 else 552 return (SET_BITS(binding_table_index, 7, 0) | 553 SET_BITS(msg_control, 11, 8) | 554 SET_BITS(msg_type, 13, 12) | 555 SET_BITS(target_cache, 15, 14)); 556} 557 558static inline unsigned 559brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo, 560 uint32_t desc) 561{ 562 if (devinfo->ver >= 6) 563 return brw_dp_desc_msg_type(devinfo, desc); 564 else if (devinfo->verx10 >= 45) 565 return GET_BITS(desc, 13, 11); 566 else 567 return GET_BITS(desc, 13, 12); 568} 569 570static inline unsigned 571brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo, 572 uint32_t desc) 573{ 574 if (devinfo->ver >= 6) 575 return brw_dp_desc_msg_control(devinfo, desc); 576 else if (devinfo->verx10 >= 45) 577 return GET_BITS(desc, 10, 8); 578 else 579 return GET_BITS(desc, 11, 8); 580} 581 582/** 583 * Construct a message descriptor immediate with the specified dataport write 584 * function controls. 585 */ 586static inline uint32_t 587brw_dp_write_desc(const struct intel_device_info *devinfo, 588 unsigned binding_table_index, 589 unsigned msg_control, 590 unsigned msg_type, 591 unsigned send_commit_msg) 592{ 593 assert(devinfo->ver <= 6 || !send_commit_msg); 594 if (devinfo->ver >= 6) { 595 return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) | 596 SET_BITS(send_commit_msg, 17, 17); 597 } else { 598 return (SET_BITS(binding_table_index, 7, 0) | 599 SET_BITS(msg_control, 11, 8) | 600 SET_BITS(msg_type, 14, 12) | 601 SET_BITS(send_commit_msg, 15, 15)); 602 } 603} 604 605static inline unsigned 606brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo, 607 uint32_t desc) 608{ 609 if (devinfo->ver >= 6) 610 return brw_dp_desc_msg_type(devinfo, desc); 611 else 612 return GET_BITS(desc, 14, 12); 613} 614 615static inline unsigned 616brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo, 617 uint32_t desc) 618{ 619 if (devinfo->ver >= 6) 620 return brw_dp_desc_msg_control(devinfo, desc); 621 else 622 return GET_BITS(desc, 11, 8); 623} 624 625static inline bool 626brw_dp_write_desc_write_commit(const struct intel_device_info *devinfo, 627 uint32_t desc) 628{ 629 assert(devinfo->ver <= 6); 630 if (devinfo->ver >= 6) 631 return GET_BITS(desc, 17, 17); 632 else 633 return GET_BITS(desc, 15, 15); 634} 635 636/** 637 * Construct a message descriptor immediate with the specified dataport 638 * surface function controls. 639 */ 640static inline uint32_t 641brw_dp_surface_desc(const struct intel_device_info *devinfo, 642 unsigned msg_type, 643 unsigned msg_control) 644{ 645 assert(devinfo->ver >= 7); 646 /* We'll OR in the binding table index later */ 647 return brw_dp_desc(devinfo, 0, msg_type, msg_control); 648} 649 650static inline uint32_t 651brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo, 652 unsigned exec_size, /**< 0 for SIMD4x2 */ 653 unsigned atomic_op, 654 bool response_expected) 655{ 656 assert(exec_size <= 8 || exec_size == 16); 657 658 unsigned msg_type; 659 if (devinfo->verx10 >= 75) { 660 if (exec_size > 0) { 661 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP; 662 } else { 663 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2; 664 } 665 } else { 666 msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP; 667 } 668 669 const unsigned msg_control = 670 SET_BITS(atomic_op, 3, 0) | 671 SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) | 672 SET_BITS(response_expected, 5, 5); 673 674 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 675} 676 677static inline uint32_t 678brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo, 679 unsigned exec_size, 680 unsigned atomic_op, 681 bool response_expected) 682{ 683 assert(exec_size <= 8 || exec_size == 16); 684 assert(devinfo->ver >= 9); 685 686 assert(exec_size > 0); 687 const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP; 688 689 const unsigned msg_control = 690 SET_BITS(atomic_op, 1, 0) | 691 SET_BITS(exec_size <= 8, 4, 4) | 692 SET_BITS(response_expected, 5, 5); 693 694 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 695} 696 697static inline unsigned 698brw_mdc_cmask(unsigned num_channels) 699{ 700 /* See also MDC_CMASK in the SKL PRM Vol 2d. */ 701 return 0xf & (0xf << num_channels); 702} 703 704static inline unsigned 705lsc_cmask(unsigned num_channels) 706{ 707 assert(num_channels > 0 && num_channels <= 4); 708 return BITSET_MASK(num_channels); 709} 710 711static inline uint32_t 712brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo, 713 unsigned exec_size, /**< 0 for SIMD4x2 */ 714 unsigned num_channels, 715 bool write) 716{ 717 assert(exec_size <= 8 || exec_size == 16); 718 719 unsigned msg_type; 720 if (write) { 721 if (devinfo->verx10 >= 75) { 722 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE; 723 } else { 724 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE; 725 } 726 } else { 727 /* Read */ 728 if (devinfo->verx10 >= 75) { 729 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ; 730 } else { 731 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ; 732 } 733 } 734 735 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */ 736 if (write && devinfo->verx10 == 70 && exec_size == 0) 737 exec_size = 8; 738 739 /* See also MDC_SM3 in the SKL PRM Vol 2d. */ 740 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */ 741 exec_size <= 8 ? 2 : 1; 742 743 const unsigned msg_control = 744 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | 745 SET_BITS(simd_mode, 5, 4); 746 747 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 748} 749 750static inline unsigned 751brw_mdc_ds(unsigned bit_size) 752{ 753 switch (bit_size) { 754 case 8: 755 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE; 756 case 16: 757 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD; 758 case 32: 759 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD; 760 default: 761 unreachable("Unsupported bit_size for byte scattered messages"); 762 } 763} 764 765static inline uint32_t 766brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo, 767 unsigned exec_size, 768 unsigned bit_size, 769 bool write) 770{ 771 assert(exec_size <= 8 || exec_size == 16); 772 773 assert(devinfo->verx10 >= 75); 774 const unsigned msg_type = 775 write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE : 776 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ; 777 778 assert(exec_size > 0); 779 const unsigned msg_control = 780 SET_BITS(exec_size == 16, 0, 0) | 781 SET_BITS(brw_mdc_ds(bit_size), 3, 2); 782 783 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 784} 785 786static inline uint32_t 787brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo, 788 unsigned exec_size, 789 bool write) 790{ 791 assert(exec_size == 8 || exec_size == 16); 792 793 unsigned msg_type; 794 if (write) { 795 if (devinfo->ver >= 6) { 796 msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE; 797 } else { 798 msg_type = BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE; 799 } 800 } else { 801 if (devinfo->ver >= 7) { 802 msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ; 803 } else if (devinfo->verx10 >= 45) { 804 msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ; 805 } else { 806 msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ; 807 } 808 } 809 810 const unsigned msg_control = 811 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */ 812 SET_BITS(exec_size == 16, 0, 0); 813 814 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 815} 816 817static inline uint32_t 818brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo, 819 bool align_16B, 820 unsigned num_dwords, 821 bool write) 822{ 823 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */ 824 assert(!write || align_16B); 825 826 const unsigned msg_type = 827 write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE : 828 align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ : 829 GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ; 830 831 const unsigned msg_control = 832 SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0); 833 834 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 835} 836 837static inline uint32_t 838brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo, 839 unsigned exec_size, /**< 0 for SIMD4x2 */ 840 unsigned num_channels, 841 bool write) 842{ 843 assert(exec_size <= 8 || exec_size == 16); 844 assert(devinfo->ver >= 8); 845 846 unsigned msg_type = 847 write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE : 848 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ; 849 850 /* See also MDC_SM3 in the SKL PRM Vol 2d. */ 851 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */ 852 exec_size <= 8 ? 2 : 1; 853 854 const unsigned msg_control = 855 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | 856 SET_BITS(simd_mode, 5, 4); 857 858 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 859 msg_type, msg_control); 860} 861 862static inline uint32_t 863brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo, 864 bool align_16B, 865 unsigned num_dwords, 866 bool write) 867{ 868 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */ 869 assert(!write || align_16B); 870 871 unsigned msg_type = 872 write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE : 873 GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ; 874 875 unsigned msg_control = 876 SET_BITS(!align_16B, 4, 3) | 877 SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0); 878 879 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 880 msg_type, msg_control); 881} 882 883/** 884 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the 885 * Skylake PRM). 886 */ 887static inline uint32_t 888brw_mdc_a64_ds(unsigned elems) 889{ 890 switch (elems) { 891 case 1: return 0; 892 case 2: return 1; 893 case 4: return 2; 894 case 8: return 3; 895 default: 896 unreachable("Unsupported elmeent count for A64 scattered message"); 897 } 898} 899 900static inline uint32_t 901brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo, 902 unsigned exec_size, /**< 0 for SIMD4x2 */ 903 unsigned bit_size, 904 bool write) 905{ 906 assert(exec_size <= 8 || exec_size == 16); 907 assert(devinfo->ver >= 8); 908 909 unsigned msg_type = 910 write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE : 911 GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ; 912 913 const unsigned msg_control = 914 SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) | 915 SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) | 916 SET_BITS(exec_size == 16, 4, 4); 917 918 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 919 msg_type, msg_control); 920} 921 922static inline uint32_t 923brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo, 924 ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */ 925 unsigned bit_size, 926 unsigned atomic_op, 927 bool response_expected) 928{ 929 assert(exec_size == 8); 930 assert(devinfo->ver >= 8); 931 assert(bit_size == 16 || bit_size == 32 || bit_size == 64); 932 assert(devinfo->ver >= 12 || bit_size >= 32); 933 934 const unsigned msg_type = bit_size == 16 ? 935 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP : 936 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP; 937 938 const unsigned msg_control = 939 SET_BITS(atomic_op, 3, 0) | 940 SET_BITS(bit_size == 64, 4, 4) | 941 SET_BITS(response_expected, 5, 5); 942 943 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 944 msg_type, msg_control); 945} 946 947static inline uint32_t 948brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo, 949 ASSERTED unsigned exec_size, 950 unsigned bit_size, 951 unsigned atomic_op, 952 bool response_expected) 953{ 954 assert(exec_size == 8); 955 assert(devinfo->ver >= 9); 956 assert(bit_size == 16 || bit_size == 32); 957 assert(devinfo->ver >= 12 || bit_size == 32); 958 959 assert(exec_size > 0); 960 const unsigned msg_type = bit_size == 32 ? 961 GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP : 962 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP; 963 964 const unsigned msg_control = 965 SET_BITS(atomic_op, 1, 0) | 966 SET_BITS(response_expected, 5, 5); 967 968 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT, 969 msg_type, msg_control); 970} 971 972static inline uint32_t 973brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo, 974 unsigned exec_size, 975 unsigned exec_group, 976 unsigned atomic_op, 977 bool response_expected) 978{ 979 assert(exec_size > 0 || exec_group == 0); 980 assert(exec_group % 8 == 0); 981 982 unsigned msg_type; 983 if (devinfo->verx10 >= 75) { 984 if (exec_size == 0) { 985 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2; 986 } else { 987 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP; 988 } 989 } else { 990 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */ 991 assert(exec_size > 0); 992 msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP; 993 } 994 995 const bool high_sample_mask = (exec_group / 8) % 2 == 1; 996 997 const unsigned msg_control = 998 SET_BITS(atomic_op, 3, 0) | 999 SET_BITS(high_sample_mask, 4, 4) | 1000 SET_BITS(response_expected, 5, 5); 1001 1002 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 1003} 1004 1005static inline uint32_t 1006brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo, 1007 unsigned exec_size, 1008 unsigned exec_group, 1009 unsigned num_channels, 1010 bool write) 1011{ 1012 assert(exec_size > 0 || exec_group == 0); 1013 assert(exec_group % 8 == 0); 1014 1015 /* Typed surface reads and writes don't support SIMD16 */ 1016 assert(exec_size <= 8); 1017 1018 unsigned msg_type; 1019 if (write) { 1020 if (devinfo->verx10 >= 75) { 1021 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE; 1022 } else { 1023 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE; 1024 } 1025 } else { 1026 if (devinfo->verx10 >= 75) { 1027 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ; 1028 } else { 1029 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ; 1030 } 1031 } 1032 1033 /* See also MDC_SG3 in the SKL PRM Vol 2d. */ 1034 unsigned msg_control; 1035 if (devinfo->verx10 >= 75) { 1036 /* See also MDC_SG3 in the SKL PRM Vol 2d. */ 1037 const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */ 1038 1 + ((exec_group / 8) % 2); 1039 1040 msg_control = 1041 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | 1042 SET_BITS(slot_group, 5, 4); 1043 } else { 1044 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */ 1045 assert(exec_size > 0); 1046 const unsigned slot_group = ((exec_group / 8) % 2); 1047 1048 msg_control = 1049 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) | 1050 SET_BITS(slot_group, 5, 5); 1051 } 1052 1053 return brw_dp_surface_desc(devinfo, msg_type, msg_control); 1054} 1055 1056static inline uint32_t 1057brw_fb_desc(const struct intel_device_info *devinfo, 1058 unsigned binding_table_index, 1059 unsigned msg_type, 1060 unsigned msg_control) 1061{ 1062 /* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc 1063 * helpers instead. 1064 */ 1065 assert(devinfo->ver >= 6); 1066 const unsigned desc = SET_BITS(binding_table_index, 7, 0); 1067 if (devinfo->ver >= 7) { 1068 return (desc | SET_BITS(msg_control, 13, 8) | 1069 SET_BITS(msg_type, 17, 14)); 1070 } else { 1071 return (desc | SET_BITS(msg_control, 12, 8) | 1072 SET_BITS(msg_type, 16, 13)); 1073 } 1074} 1075 1076static inline unsigned 1077brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo, 1078 uint32_t desc) 1079{ 1080 return GET_BITS(desc, 7, 0); 1081} 1082 1083static inline uint32_t 1084brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc) 1085{ 1086 assert(devinfo->ver >= 6); 1087 if (devinfo->ver >= 7) 1088 return GET_BITS(desc, 13, 8); 1089 else 1090 return GET_BITS(desc, 12, 8); 1091} 1092 1093static inline unsigned 1094brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc) 1095{ 1096 assert(devinfo->ver >= 6); 1097 if (devinfo->ver >= 7) 1098 return GET_BITS(desc, 17, 14); 1099 else 1100 return GET_BITS(desc, 16, 13); 1101} 1102 1103static inline uint32_t 1104brw_fb_read_desc(const struct intel_device_info *devinfo, 1105 unsigned binding_table_index, 1106 unsigned msg_control, 1107 unsigned exec_size, 1108 bool per_sample) 1109{ 1110 assert(devinfo->ver >= 9); 1111 assert(exec_size == 8 || exec_size == 16); 1112 1113 return brw_fb_desc(devinfo, binding_table_index, 1114 GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) | 1115 SET_BITS(per_sample, 13, 13) | 1116 SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */; 1117} 1118 1119static inline uint32_t 1120brw_fb_write_desc(const struct intel_device_info *devinfo, 1121 unsigned binding_table_index, 1122 unsigned msg_control, 1123 bool last_render_target, 1124 bool coarse_write) 1125{ 1126 const unsigned msg_type = 1127 devinfo->ver >= 6 ? 1128 GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE : 1129 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1130 1131 assert(devinfo->ver >= 10 || !coarse_write); 1132 1133 if (devinfo->ver >= 6) { 1134 return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) | 1135 SET_BITS(last_render_target, 12, 12) | 1136 SET_BITS(coarse_write, 18, 18); 1137 } else { 1138 return (SET_BITS(binding_table_index, 7, 0) | 1139 SET_BITS(msg_control, 11, 8) | 1140 SET_BITS(last_render_target, 11, 11) | 1141 SET_BITS(msg_type, 14, 12)); 1142 } 1143} 1144 1145static inline unsigned 1146brw_fb_write_desc_msg_type(const struct intel_device_info *devinfo, 1147 uint32_t desc) 1148{ 1149 if (devinfo->ver >= 6) 1150 return brw_fb_desc_msg_type(devinfo, desc); 1151 else 1152 return GET_BITS(desc, 14, 12); 1153} 1154 1155static inline unsigned 1156brw_fb_write_desc_msg_control(const struct intel_device_info *devinfo, 1157 uint32_t desc) 1158{ 1159 if (devinfo->ver >= 6) 1160 return brw_fb_desc_msg_control(devinfo, desc); 1161 else 1162 return GET_BITS(desc, 11, 8); 1163} 1164 1165static inline bool 1166brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo, 1167 uint32_t desc) 1168{ 1169 if (devinfo->ver >= 6) 1170 return GET_BITS(desc, 12, 12); 1171 else 1172 return GET_BITS(desc, 11, 11); 1173} 1174 1175static inline bool 1176brw_fb_write_desc_write_commit(const struct intel_device_info *devinfo, 1177 uint32_t desc) 1178{ 1179 assert(devinfo->ver <= 6); 1180 if (devinfo->ver >= 6) 1181 return GET_BITS(desc, 17, 17); 1182 else 1183 return GET_BITS(desc, 15, 15); 1184} 1185 1186static inline bool 1187brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo, 1188 uint32_t desc) 1189{ 1190 assert(devinfo->ver >= 10); 1191 return GET_BITS(desc, 18, 18); 1192} 1193 1194static inline bool 1195lsc_opcode_has_cmask(enum lsc_opcode opcode) 1196{ 1197 return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK; 1198} 1199 1200static inline bool 1201lsc_opcode_has_transpose(enum lsc_opcode opcode) 1202{ 1203 return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE; 1204} 1205 1206static inline uint32_t 1207lsc_data_size_bytes(enum lsc_data_size data_size) 1208{ 1209 switch (data_size) { 1210 case LSC_DATA_SIZE_D8: 1211 return 1; 1212 case LSC_DATA_SIZE_D16: 1213 return 2; 1214 case LSC_DATA_SIZE_D32: 1215 case LSC_DATA_SIZE_D8U32: 1216 case LSC_DATA_SIZE_D16U32: 1217 case LSC_DATA_SIZE_D16BF32: 1218 return 4; 1219 case LSC_DATA_SIZE_D64: 1220 return 8; 1221 default: 1222 unreachable("Unsupported data payload size."); 1223 } 1224} 1225 1226static inline uint32_t 1227lsc_addr_size_bytes(enum lsc_addr_size addr_size) 1228{ 1229 switch (addr_size) { 1230 case LSC_ADDR_SIZE_A16: return 2; 1231 case LSC_ADDR_SIZE_A32: return 4; 1232 case LSC_ADDR_SIZE_A64: return 8; 1233 default: 1234 unreachable("Unsupported address size."); 1235 } 1236} 1237 1238static inline uint32_t 1239lsc_vector_length(enum lsc_vect_size vect_size) 1240{ 1241 switch (vect_size) { 1242 case LSC_VECT_SIZE_V1: return 1; 1243 case LSC_VECT_SIZE_V2: return 2; 1244 case LSC_VECT_SIZE_V3: return 3; 1245 case LSC_VECT_SIZE_V4: return 4; 1246 case LSC_VECT_SIZE_V8: return 8; 1247 case LSC_VECT_SIZE_V16: return 16; 1248 case LSC_VECT_SIZE_V32: return 32; 1249 case LSC_VECT_SIZE_V64: return 64; 1250 default: 1251 unreachable("Unsupported size of vector"); 1252 } 1253} 1254 1255static inline enum lsc_vect_size 1256lsc_vect_size(unsigned vect_size) 1257{ 1258 switch(vect_size) { 1259 case 1: return LSC_VECT_SIZE_V1; 1260 case 2: return LSC_VECT_SIZE_V2; 1261 case 3: return LSC_VECT_SIZE_V3; 1262 case 4: return LSC_VECT_SIZE_V4; 1263 case 8: return LSC_VECT_SIZE_V8; 1264 case 16: return LSC_VECT_SIZE_V16; 1265 case 32: return LSC_VECT_SIZE_V32; 1266 case 64: return LSC_VECT_SIZE_V64; 1267 default: 1268 unreachable("Unsupported vector size for dataport"); 1269 } 1270} 1271 1272static inline uint32_t 1273lsc_msg_desc(UNUSED const struct intel_device_info *devinfo, 1274 enum lsc_opcode opcode, unsigned simd_size, 1275 enum lsc_addr_surface_type addr_type, 1276 enum lsc_addr_size addr_sz, unsigned num_coordinates, 1277 enum lsc_data_size data_sz, unsigned num_channels, 1278 bool transpose, unsigned cache_ctrl, bool has_dest) 1279{ 1280 assert(devinfo->has_lsc); 1281 1282 unsigned dest_length = !has_dest ? 0 : 1283 DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size, 1284 REG_SIZE); 1285 1286 unsigned src0_length = 1287 DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size, 1288 REG_SIZE); 1289 1290 assert(!transpose || lsc_opcode_has_transpose(opcode)); 1291 1292 unsigned msg_desc = 1293 SET_BITS(opcode, 5, 0) | 1294 SET_BITS(addr_sz, 8, 7) | 1295 SET_BITS(data_sz, 11, 9) | 1296 SET_BITS(transpose, 15, 15) | 1297 SET_BITS(cache_ctrl, 19, 17) | 1298 SET_BITS(dest_length, 24, 20) | 1299 SET_BITS(src0_length, 28, 25) | 1300 SET_BITS(addr_type, 30, 29); 1301 1302 if (lsc_opcode_has_cmask(opcode)) 1303 msg_desc |= SET_BITS(lsc_cmask(num_channels), 15, 12); 1304 else 1305 msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12); 1306 1307 return msg_desc; 1308} 1309 1310static inline enum lsc_opcode 1311lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo, 1312 uint32_t desc) 1313{ 1314 assert(devinfo->has_lsc); 1315 return (enum lsc_opcode) GET_BITS(desc, 5, 0); 1316} 1317 1318static inline enum lsc_addr_size 1319lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo, 1320 uint32_t desc) 1321{ 1322 assert(devinfo->has_lsc); 1323 return (enum lsc_addr_size) GET_BITS(desc, 8, 7); 1324} 1325 1326static inline enum lsc_data_size 1327lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo, 1328 uint32_t desc) 1329{ 1330 assert(devinfo->has_lsc); 1331 return (enum lsc_data_size) GET_BITS(desc, 11, 9); 1332} 1333 1334static inline enum lsc_vect_size 1335lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo, 1336 uint32_t desc) 1337{ 1338 assert(devinfo->has_lsc); 1339 assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc))); 1340 return (enum lsc_vect_size) GET_BITS(desc, 14, 12); 1341} 1342 1343static inline enum lsc_cmask 1344lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo, 1345 uint32_t desc) 1346{ 1347 assert(devinfo->has_lsc); 1348 assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc))); 1349 return (enum lsc_cmask) GET_BITS(desc, 15, 12); 1350} 1351 1352static inline bool 1353lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo, 1354 uint32_t desc) 1355{ 1356 assert(devinfo->has_lsc); 1357 return GET_BITS(desc, 15, 15); 1358} 1359 1360static inline unsigned 1361lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo, 1362 uint32_t desc) 1363{ 1364 assert(devinfo->has_lsc); 1365 return GET_BITS(desc, 19, 17); 1366} 1367 1368static inline unsigned 1369lsc_msg_desc_dest_len(const struct intel_device_info *devinfo, 1370 uint32_t desc) 1371{ 1372 assert(devinfo->has_lsc); 1373 return GET_BITS(desc, 24, 20); 1374} 1375 1376static inline unsigned 1377lsc_msg_desc_src0_len(const struct intel_device_info *devinfo, 1378 uint32_t desc) 1379{ 1380 assert(devinfo->has_lsc); 1381 return GET_BITS(desc, 28, 25); 1382} 1383 1384static inline enum lsc_addr_surface_type 1385lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo, 1386 uint32_t desc) 1387{ 1388 assert(devinfo->has_lsc); 1389 return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29); 1390} 1391 1392static inline uint32_t 1393lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo, 1394 enum lsc_fence_scope scope, 1395 enum lsc_flush_type flush_type, 1396 bool route_to_lsc) 1397{ 1398 assert(devinfo->has_lsc); 1399 return SET_BITS(LSC_OP_FENCE, 5, 0) | 1400 SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) | 1401 SET_BITS(scope, 11, 9) | 1402 SET_BITS(flush_type, 14, 12) | 1403 SET_BITS(route_to_lsc, 18, 18) | 1404 SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29); 1405} 1406 1407static inline enum lsc_fence_scope 1408lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo, 1409 uint32_t desc) 1410{ 1411 assert(devinfo->has_lsc); 1412 return (enum lsc_fence_scope) GET_BITS(desc, 11, 9); 1413} 1414 1415static inline enum lsc_flush_type 1416lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo, 1417 uint32_t desc) 1418{ 1419 assert(devinfo->has_lsc); 1420 return (enum lsc_flush_type) GET_BITS(desc, 14, 12); 1421} 1422 1423static inline enum lsc_backup_fence_routing 1424lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo, 1425 uint32_t desc) 1426{ 1427 assert(devinfo->has_lsc); 1428 return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18); 1429} 1430 1431static inline uint32_t 1432lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti) 1433{ 1434 assert(devinfo->has_lsc); 1435 return SET_BITS(bti, 31, 24) | 1436 SET_BITS(0, 23, 12); /* base offset */ 1437} 1438 1439static inline unsigned 1440lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo, 1441 uint32_t ex_desc) 1442{ 1443 assert(devinfo->has_lsc); 1444 return GET_BITS(ex_desc, 23, 12); 1445} 1446 1447static inline unsigned 1448lsc_bti_ex_desc_index(const struct intel_device_info *devinfo, 1449 uint32_t ex_desc) 1450{ 1451 assert(devinfo->has_lsc); 1452 return GET_BITS(ex_desc, 31, 24); 1453} 1454 1455static inline unsigned 1456lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo, 1457 uint32_t ex_desc) 1458{ 1459 assert(devinfo->has_lsc); 1460 return GET_BITS(ex_desc, 31, 12); 1461} 1462 1463static inline uint32_t 1464lsc_bss_ex_desc(const struct intel_device_info *devinfo, 1465 unsigned surface_state_index) 1466{ 1467 assert(devinfo->has_lsc); 1468 return SET_BITS(surface_state_index, 31, 6); 1469} 1470 1471static inline unsigned 1472lsc_bss_ex_desc_index(const struct intel_device_info *devinfo, 1473 uint32_t ex_desc) 1474{ 1475 assert(devinfo->has_lsc); 1476 return GET_BITS(ex_desc, 31, 6); 1477} 1478 1479static inline uint32_t 1480brw_mdc_sm2(unsigned exec_size) 1481{ 1482 assert(exec_size == 8 || exec_size == 16); 1483 return exec_size > 8; 1484} 1485 1486static inline uint32_t 1487brw_mdc_sm2_exec_size(uint32_t sm2) 1488{ 1489 assert(sm2 <= 1); 1490 return 8 << sm2; 1491} 1492 1493static inline uint32_t 1494brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo, 1495 unsigned exec_size, unsigned msg_type) 1496{ 1497 assert(devinfo->has_ray_tracing); 1498 1499 return SET_BITS(0, 19, 19) | /* No header */ 1500 SET_BITS(msg_type, 17, 14) | 1501 SET_BITS(brw_mdc_sm2(exec_size), 8, 8); 1502} 1503 1504static inline uint32_t 1505brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo, 1506 uint32_t desc) 1507{ 1508 return GET_BITS(desc, 17, 14); 1509} 1510 1511static inline uint32_t 1512brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo, 1513 uint32_t desc) 1514{ 1515 return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8)); 1516} 1517 1518static inline uint32_t 1519brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo, 1520 unsigned exec_size) 1521{ 1522 assert(devinfo->has_ray_tracing); 1523 1524 return SET_BITS(0, 19, 19) | /* No header */ 1525 SET_BITS(0, 17, 14) | /* Message type */ 1526 SET_BITS(brw_mdc_sm2(exec_size), 8, 8); 1527} 1528 1529static inline uint32_t 1530brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo, 1531 uint32_t desc) 1532{ 1533 return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8)); 1534} 1535 1536/** 1537 * Construct a message descriptor immediate with the specified pixel 1538 * interpolator function controls. 1539 */ 1540static inline uint32_t 1541brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo, 1542 unsigned msg_type, 1543 bool noperspective, 1544 bool coarse_pixel_rate, 1545 unsigned simd_mode, 1546 unsigned slot_group) 1547{ 1548 assert(devinfo->ver >= 10 || !coarse_pixel_rate); 1549 return (SET_BITS(slot_group, 11, 11) | 1550 SET_BITS(msg_type, 13, 12) | 1551 SET_BITS(!!noperspective, 14, 14) | 1552 SET_BITS(coarse_pixel_rate, 15, 15) | 1553 SET_BITS(simd_mode, 16, 16)); 1554} 1555 1556void brw_urb_WRITE(struct brw_codegen *p, 1557 struct brw_reg dest, 1558 unsigned msg_reg_nr, 1559 struct brw_reg src0, 1560 enum brw_urb_write_flags flags, 1561 unsigned msg_length, 1562 unsigned response_length, 1563 unsigned offset, 1564 unsigned swizzle); 1565 1566/** 1567 * Send message to shared unit \p sfid with a possibly indirect descriptor \p 1568 * desc. If \p desc is not an immediate it will be transparently loaded to an 1569 * address register using an OR instruction. 1570 */ 1571void 1572brw_send_indirect_message(struct brw_codegen *p, 1573 unsigned sfid, 1574 struct brw_reg dst, 1575 struct brw_reg payload, 1576 struct brw_reg desc, 1577 unsigned desc_imm, 1578 bool eot); 1579 1580void 1581brw_send_indirect_split_message(struct brw_codegen *p, 1582 unsigned sfid, 1583 struct brw_reg dst, 1584 struct brw_reg payload0, 1585 struct brw_reg payload1, 1586 struct brw_reg desc, 1587 unsigned desc_imm, 1588 struct brw_reg ex_desc, 1589 unsigned ex_desc_imm, 1590 bool eot); 1591 1592void brw_ff_sync(struct brw_codegen *p, 1593 struct brw_reg dest, 1594 unsigned msg_reg_nr, 1595 struct brw_reg src0, 1596 bool allocate, 1597 unsigned response_length, 1598 bool eot); 1599 1600void brw_svb_write(struct brw_codegen *p, 1601 struct brw_reg dest, 1602 unsigned msg_reg_nr, 1603 struct brw_reg src0, 1604 unsigned binding_table_index, 1605 bool send_commit_msg); 1606 1607brw_inst *brw_fb_WRITE(struct brw_codegen *p, 1608 struct brw_reg payload, 1609 struct brw_reg implied_header, 1610 unsigned msg_control, 1611 unsigned binding_table_index, 1612 unsigned msg_length, 1613 unsigned response_length, 1614 bool eot, 1615 bool last_render_target, 1616 bool header_present); 1617 1618brw_inst *gfx9_fb_READ(struct brw_codegen *p, 1619 struct brw_reg dst, 1620 struct brw_reg payload, 1621 unsigned binding_table_index, 1622 unsigned msg_length, 1623 unsigned response_length, 1624 bool per_sample); 1625 1626void brw_SAMPLE(struct brw_codegen *p, 1627 struct brw_reg dest, 1628 unsigned msg_reg_nr, 1629 struct brw_reg src0, 1630 unsigned binding_table_index, 1631 unsigned sampler, 1632 unsigned msg_type, 1633 unsigned response_length, 1634 unsigned msg_length, 1635 unsigned header_present, 1636 unsigned simd_mode, 1637 unsigned return_format); 1638 1639void brw_adjust_sampler_state_pointer(struct brw_codegen *p, 1640 struct brw_reg header, 1641 struct brw_reg sampler_index); 1642 1643void gfx4_math(struct brw_codegen *p, 1644 struct brw_reg dest, 1645 unsigned function, 1646 unsigned msg_reg_nr, 1647 struct brw_reg src, 1648 unsigned precision ); 1649 1650void gfx6_math(struct brw_codegen *p, 1651 struct brw_reg dest, 1652 unsigned function, 1653 struct brw_reg src0, 1654 struct brw_reg src1); 1655 1656void brw_oword_block_read(struct brw_codegen *p, 1657 struct brw_reg dest, 1658 struct brw_reg mrf, 1659 uint32_t offset, 1660 uint32_t bind_table_index); 1661 1662unsigned brw_scratch_surface_idx(const struct brw_codegen *p); 1663 1664void brw_oword_block_read_scratch(struct brw_codegen *p, 1665 struct brw_reg dest, 1666 struct brw_reg mrf, 1667 int num_regs, 1668 unsigned offset); 1669 1670void brw_oword_block_write_scratch(struct brw_codegen *p, 1671 struct brw_reg mrf, 1672 int num_regs, 1673 unsigned offset); 1674 1675void gfx7_block_read_scratch(struct brw_codegen *p, 1676 struct brw_reg dest, 1677 int num_regs, 1678 unsigned offset); 1679 1680/** 1681 * Return the generation-specific jump distance scaling factor. 1682 * 1683 * Given the number of instructions to jump, we need to scale by 1684 * some number to obtain the actual jump distance to program in an 1685 * instruction. 1686 */ 1687static inline unsigned 1688brw_jump_scale(const struct intel_device_info *devinfo) 1689{ 1690 /* Broadwell measures jump targets in bytes. */ 1691 if (devinfo->ver >= 8) 1692 return 16; 1693 1694 /* Ironlake and later measure jump targets in 64-bit data chunks (in order 1695 * (to support compaction), so each 128-bit instruction requires 2 chunks. 1696 */ 1697 if (devinfo->ver >= 5) 1698 return 2; 1699 1700 /* Gfx4 simply uses the number of 128-bit instructions. */ 1701 return 1; 1702} 1703 1704void brw_barrier(struct brw_codegen *p, struct brw_reg src); 1705 1706/* If/else/endif. Works by manipulating the execution flags on each 1707 * channel. 1708 */ 1709brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size); 1710brw_inst *gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional, 1711 struct brw_reg src0, struct brw_reg src1); 1712 1713void brw_ELSE(struct brw_codegen *p); 1714void brw_ENDIF(struct brw_codegen *p); 1715 1716/* DO/WHILE loops: 1717 */ 1718brw_inst *brw_DO(struct brw_codegen *p, unsigned execute_size); 1719 1720brw_inst *brw_WHILE(struct brw_codegen *p); 1721 1722brw_inst *brw_BREAK(struct brw_codegen *p); 1723brw_inst *brw_CONT(struct brw_codegen *p); 1724brw_inst *brw_HALT(struct brw_codegen *p); 1725 1726/* Forward jumps: 1727 */ 1728void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx); 1729 1730brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index, 1731 unsigned predicate_control); 1732 1733void brw_NOP(struct brw_codegen *p); 1734 1735void brw_WAIT(struct brw_codegen *p); 1736 1737void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func); 1738 1739/* Special case: there is never a destination, execution size will be 1740 * taken from src0: 1741 */ 1742void brw_CMP(struct brw_codegen *p, 1743 struct brw_reg dest, 1744 unsigned conditional, 1745 struct brw_reg src0, 1746 struct brw_reg src1); 1747 1748void brw_CMPN(struct brw_codegen *p, 1749 struct brw_reg dest, 1750 unsigned conditional, 1751 struct brw_reg src0, 1752 struct brw_reg src1); 1753 1754void 1755brw_untyped_atomic(struct brw_codegen *p, 1756 struct brw_reg dst, 1757 struct brw_reg payload, 1758 struct brw_reg surface, 1759 unsigned atomic_op, 1760 unsigned msg_length, 1761 bool response_expected, 1762 bool header_present); 1763 1764void 1765brw_untyped_surface_read(struct brw_codegen *p, 1766 struct brw_reg dst, 1767 struct brw_reg payload, 1768 struct brw_reg surface, 1769 unsigned msg_length, 1770 unsigned num_channels); 1771 1772void 1773brw_untyped_surface_write(struct brw_codegen *p, 1774 struct brw_reg payload, 1775 struct brw_reg surface, 1776 unsigned msg_length, 1777 unsigned num_channels, 1778 bool header_present); 1779 1780void 1781brw_memory_fence(struct brw_codegen *p, 1782 struct brw_reg dst, 1783 struct brw_reg src, 1784 enum opcode send_op, 1785 enum brw_message_target sfid, 1786 uint32_t desc, 1787 bool commit_enable, 1788 unsigned bti); 1789 1790void 1791brw_pixel_interpolator_query(struct brw_codegen *p, 1792 struct brw_reg dest, 1793 struct brw_reg mrf, 1794 bool noperspective, 1795 bool coarse_pixel_rate, 1796 unsigned mode, 1797 struct brw_reg data, 1798 unsigned msg_length, 1799 unsigned response_length); 1800 1801void 1802brw_find_live_channel(struct brw_codegen *p, 1803 struct brw_reg dst, 1804 bool last); 1805 1806void 1807brw_broadcast(struct brw_codegen *p, 1808 struct brw_reg dst, 1809 struct brw_reg src, 1810 struct brw_reg idx); 1811 1812void 1813brw_float_controls_mode(struct brw_codegen *p, 1814 unsigned mode, unsigned mask); 1815 1816void 1817brw_update_reloc_imm(const struct brw_isa_info *isa, 1818 brw_inst *inst, 1819 uint32_t value); 1820 1821void 1822brw_MOV_reloc_imm(struct brw_codegen *p, 1823 struct brw_reg dst, 1824 enum brw_reg_type src_type, 1825 uint32_t id); 1826 1827/*********************************************************************** 1828 * brw_eu_util.c: 1829 */ 1830 1831void brw_copy_indirect_to_indirect(struct brw_codegen *p, 1832 struct brw_indirect dst_ptr, 1833 struct brw_indirect src_ptr, 1834 unsigned count); 1835 1836void brw_copy_from_indirect(struct brw_codegen *p, 1837 struct brw_reg dst, 1838 struct brw_indirect ptr, 1839 unsigned count); 1840 1841void brw_copy4(struct brw_codegen *p, 1842 struct brw_reg dst, 1843 struct brw_reg src, 1844 unsigned count); 1845 1846void brw_copy8(struct brw_codegen *p, 1847 struct brw_reg dst, 1848 struct brw_reg src, 1849 unsigned count); 1850 1851void brw_math_invert( struct brw_codegen *p, 1852 struct brw_reg dst, 1853 struct brw_reg src); 1854 1855void brw_set_src1(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg); 1856 1857void brw_set_desc_ex(struct brw_codegen *p, brw_inst *insn, 1858 unsigned desc, unsigned ex_desc); 1859 1860static inline void 1861brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc) 1862{ 1863 brw_set_desc_ex(p, insn, desc, 0); 1864} 1865 1866void brw_set_uip_jip(struct brw_codegen *p, int start_offset); 1867 1868enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod); 1869enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod); 1870 1871/* brw_eu_compact.c */ 1872void brw_compact_instructions(struct brw_codegen *p, int start_offset, 1873 struct disasm_info *disasm); 1874void brw_uncompact_instruction(const struct brw_isa_info *isa, 1875 brw_inst *dst, brw_compact_inst *src); 1876bool brw_try_compact_instruction(const struct brw_isa_info *isa, 1877 brw_compact_inst *dst, const brw_inst *src); 1878 1879void brw_debug_compact_uncompact(const struct brw_isa_info *isa, 1880 brw_inst *orig, brw_inst *uncompacted); 1881 1882/* brw_eu_validate.c */ 1883bool brw_validate_instruction(const struct brw_isa_info *isa, 1884 const brw_inst *inst, int offset, 1885 unsigned inst_size, 1886 struct disasm_info *disasm); 1887bool brw_validate_instructions(const struct brw_isa_info *isa, 1888 const void *assembly, int start_offset, int end_offset, 1889 struct disasm_info *disasm); 1890 1891static inline int 1892next_offset(const struct intel_device_info *devinfo, void *store, int offset) 1893{ 1894 brw_inst *insn = (brw_inst *)((char *)store + offset); 1895 1896 if (brw_inst_cmpt_control(devinfo, insn)) 1897 return offset + 8; 1898 else 1899 return offset + 16; 1900} 1901 1902/** Maximum SEND message length */ 1903#define BRW_MAX_MSG_LENGTH 15 1904 1905/** First MRF register used by pull loads */ 1906#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13) 1907 1908/** First MRF register used by spills */ 1909#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13) 1910 1911#ifdef __cplusplus 1912} 1913#endif 1914 1915#endif 1916