1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_cfg.h" 25#include "brw_eu.h" 26#include "brw_fs.h" 27#include "brw_nir.h" 28#include "brw_vec4_tes.h" 29#include "dev/intel_debug.h" 30#include "main/uniforms.h" 31#include "util/macros.h" 32 33enum brw_reg_type 34brw_type_for_base_type(const struct glsl_type *type) 35{ 36 switch (type->base_type) { 37 case GLSL_TYPE_FLOAT16: 38 return BRW_REGISTER_TYPE_HF; 39 case GLSL_TYPE_FLOAT: 40 return BRW_REGISTER_TYPE_F; 41 case GLSL_TYPE_INT: 42 case GLSL_TYPE_BOOL: 43 case GLSL_TYPE_SUBROUTINE: 44 return BRW_REGISTER_TYPE_D; 45 case GLSL_TYPE_INT16: 46 return BRW_REGISTER_TYPE_W; 47 case GLSL_TYPE_INT8: 48 return BRW_REGISTER_TYPE_B; 49 case GLSL_TYPE_UINT: 50 return BRW_REGISTER_TYPE_UD; 51 case GLSL_TYPE_UINT16: 52 return BRW_REGISTER_TYPE_UW; 53 case GLSL_TYPE_UINT8: 54 return BRW_REGISTER_TYPE_UB; 55 case GLSL_TYPE_ARRAY: 56 return brw_type_for_base_type(type->fields.array); 57 case GLSL_TYPE_STRUCT: 58 case GLSL_TYPE_INTERFACE: 59 case GLSL_TYPE_SAMPLER: 60 case GLSL_TYPE_TEXTURE: 61 case GLSL_TYPE_ATOMIC_UINT: 62 /* These should be overridden with the type of the member when 63 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely 64 * way to trip up if we don't. 65 */ 66 return BRW_REGISTER_TYPE_UD; 67 case GLSL_TYPE_IMAGE: 68 return BRW_REGISTER_TYPE_UD; 69 case GLSL_TYPE_DOUBLE: 70 return BRW_REGISTER_TYPE_DF; 71 case GLSL_TYPE_UINT64: 72 return BRW_REGISTER_TYPE_UQ; 73 case GLSL_TYPE_INT64: 74 return BRW_REGISTER_TYPE_Q; 75 case GLSL_TYPE_VOID: 76 case GLSL_TYPE_ERROR: 77 case GLSL_TYPE_FUNCTION: 78 unreachable("not reached"); 79 } 80 81 return BRW_REGISTER_TYPE_F; 82} 83 84enum brw_conditional_mod 85brw_conditional_for_comparison(unsigned int op) 86{ 87 switch (op) { 88 case ir_binop_less: 89 return BRW_CONDITIONAL_L; 90 case ir_binop_gequal: 91 return BRW_CONDITIONAL_GE; 92 case ir_binop_equal: 93 case ir_binop_all_equal: /* same as equal for scalars */ 94 return BRW_CONDITIONAL_Z; 95 case ir_binop_nequal: 96 case ir_binop_any_nequal: /* same as nequal for scalars */ 97 return BRW_CONDITIONAL_NZ; 98 default: 99 unreachable("not reached: bad operation for comparison"); 100 } 101} 102 103uint32_t 104brw_math_function(enum opcode op) 105{ 106 switch (op) { 107 case SHADER_OPCODE_RCP: 108 return BRW_MATH_FUNCTION_INV; 109 case SHADER_OPCODE_RSQ: 110 return BRW_MATH_FUNCTION_RSQ; 111 case SHADER_OPCODE_SQRT: 112 return BRW_MATH_FUNCTION_SQRT; 113 case SHADER_OPCODE_EXP2: 114 return BRW_MATH_FUNCTION_EXP; 115 case SHADER_OPCODE_LOG2: 116 return BRW_MATH_FUNCTION_LOG; 117 case SHADER_OPCODE_POW: 118 return BRW_MATH_FUNCTION_POW; 119 case SHADER_OPCODE_SIN: 120 return BRW_MATH_FUNCTION_SIN; 121 case SHADER_OPCODE_COS: 122 return BRW_MATH_FUNCTION_COS; 123 case SHADER_OPCODE_INT_QUOTIENT: 124 return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; 125 case SHADER_OPCODE_INT_REMAINDER: 126 return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; 127 default: 128 unreachable("not reached: unknown math function"); 129 } 130} 131 132bool 133brw_texture_offset(const nir_tex_instr *tex, unsigned src, 134 uint32_t *offset_bits_out) 135{ 136 if (!nir_src_is_const(tex->src[src].src)) 137 return false; 138 139 const unsigned num_components = nir_tex_instr_src_size(tex, src); 140 141 /* Combine all three offsets into a single unsigned dword: 142 * 143 * bits 11:8 - U Offset (X component) 144 * bits 7:4 - V Offset (Y component) 145 * bits 3:0 - R Offset (Z component) 146 */ 147 uint32_t offset_bits = 0; 148 for (unsigned i = 0; i < num_components; i++) { 149 int offset = nir_src_comp_as_int(tex->src[src].src, i); 150 151 /* offset out of bounds; caller will handle it. */ 152 if (offset > 7 || offset < -8) 153 return false; 154 155 const unsigned shift = 4 * (2 - i); 156 offset_bits |= (offset << shift) & (0xF << shift); 157 } 158 159 *offset_bits_out = offset_bits; 160 161 return true; 162} 163 164const char * 165brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) 166{ 167 const struct intel_device_info *devinfo = isa->devinfo; 168 169 switch (op) { 170 case 0 ... NUM_BRW_OPCODES - 1: 171 /* The DO instruction doesn't exist on Gfx6+, but we use it to mark the 172 * start of a loop in the IR. 173 */ 174 if (devinfo->ver >= 6 && op == BRW_OPCODE_DO) 175 return "do"; 176 177 /* The following conversion opcodes doesn't exist on Gfx8+, but we use 178 * then to mark that we want to do the conversion. 179 */ 180 if (devinfo->ver > 7 && op == BRW_OPCODE_F32TO16) 181 return "f32to16"; 182 183 if (devinfo->ver > 7 && op == BRW_OPCODE_F16TO32) 184 return "f16to32"; 185 186 assert(brw_opcode_desc(isa, op)->name); 187 return brw_opcode_desc(isa, op)->name; 188 case FS_OPCODE_FB_WRITE: 189 return "fb_write"; 190 case FS_OPCODE_FB_WRITE_LOGICAL: 191 return "fb_write_logical"; 192 case FS_OPCODE_REP_FB_WRITE: 193 return "rep_fb_write"; 194 case FS_OPCODE_FB_READ: 195 return "fb_read"; 196 case FS_OPCODE_FB_READ_LOGICAL: 197 return "fb_read_logical"; 198 199 case SHADER_OPCODE_RCP: 200 return "rcp"; 201 case SHADER_OPCODE_RSQ: 202 return "rsq"; 203 case SHADER_OPCODE_SQRT: 204 return "sqrt"; 205 case SHADER_OPCODE_EXP2: 206 return "exp2"; 207 case SHADER_OPCODE_LOG2: 208 return "log2"; 209 case SHADER_OPCODE_POW: 210 return "pow"; 211 case SHADER_OPCODE_INT_QUOTIENT: 212 return "int_quot"; 213 case SHADER_OPCODE_INT_REMAINDER: 214 return "int_rem"; 215 case SHADER_OPCODE_SIN: 216 return "sin"; 217 case SHADER_OPCODE_COS: 218 return "cos"; 219 220 case SHADER_OPCODE_SEND: 221 return "send"; 222 223 case SHADER_OPCODE_UNDEF: 224 return "undef"; 225 226 case SHADER_OPCODE_TEX: 227 return "tex"; 228 case SHADER_OPCODE_TEX_LOGICAL: 229 return "tex_logical"; 230 case SHADER_OPCODE_TXD: 231 return "txd"; 232 case SHADER_OPCODE_TXD_LOGICAL: 233 return "txd_logical"; 234 case SHADER_OPCODE_TXF: 235 return "txf"; 236 case SHADER_OPCODE_TXF_LOGICAL: 237 return "txf_logical"; 238 case SHADER_OPCODE_TXF_LZ: 239 return "txf_lz"; 240 case SHADER_OPCODE_TXL: 241 return "txl"; 242 case SHADER_OPCODE_TXL_LOGICAL: 243 return "txl_logical"; 244 case SHADER_OPCODE_TXL_LZ: 245 return "txl_lz"; 246 case SHADER_OPCODE_TXS: 247 return "txs"; 248 case SHADER_OPCODE_TXS_LOGICAL: 249 return "txs_logical"; 250 case FS_OPCODE_TXB: 251 return "txb"; 252 case FS_OPCODE_TXB_LOGICAL: 253 return "txb_logical"; 254 case SHADER_OPCODE_TXF_CMS: 255 return "txf_cms"; 256 case SHADER_OPCODE_TXF_CMS_LOGICAL: 257 return "txf_cms_logical"; 258 case SHADER_OPCODE_TXF_CMS_W: 259 return "txf_cms_w"; 260 case SHADER_OPCODE_TXF_CMS_W_LOGICAL: 261 return "txf_cms_w_logical"; 262 case SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL: 263 return "txf_cms_w_gfx12_logical"; 264 case SHADER_OPCODE_TXF_UMS: 265 return "txf_ums"; 266 case SHADER_OPCODE_TXF_UMS_LOGICAL: 267 return "txf_ums_logical"; 268 case SHADER_OPCODE_TXF_MCS: 269 return "txf_mcs"; 270 case SHADER_OPCODE_TXF_MCS_LOGICAL: 271 return "txf_mcs_logical"; 272 case SHADER_OPCODE_LOD: 273 return "lod"; 274 case SHADER_OPCODE_LOD_LOGICAL: 275 return "lod_logical"; 276 case SHADER_OPCODE_TG4: 277 return "tg4"; 278 case SHADER_OPCODE_TG4_LOGICAL: 279 return "tg4_logical"; 280 case SHADER_OPCODE_TG4_OFFSET: 281 return "tg4_offset"; 282 case SHADER_OPCODE_TG4_OFFSET_LOGICAL: 283 return "tg4_offset_logical"; 284 case SHADER_OPCODE_SAMPLEINFO: 285 return "sampleinfo"; 286 case SHADER_OPCODE_SAMPLEINFO_LOGICAL: 287 return "sampleinfo_logical"; 288 289 case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: 290 return "image_size_logical"; 291 292 case VEC4_OPCODE_UNTYPED_ATOMIC: 293 return "untyped_atomic"; 294 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 295 return "untyped_atomic_logical"; 296 case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: 297 return "untyped_atomic_float_logical"; 298 case VEC4_OPCODE_UNTYPED_SURFACE_READ: 299 return "untyped_surface_read"; 300 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 301 return "untyped_surface_read_logical"; 302 case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 303 return "untyped_surface_write"; 304 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 305 return "untyped_surface_write_logical"; 306 case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL: 307 return "oword_block_read_logical"; 308 case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: 309 return "unaligned_oword_block_read_logical"; 310 case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: 311 return "oword_block_write_logical"; 312 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: 313 return "a64_untyped_read_logical"; 314 case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: 315 return "a64_oword_block_read_logical"; 316 case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: 317 return "a64_unaligned_oword_block_read_logical"; 318 case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: 319 return "a64_oword_block_write_logical"; 320 case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: 321 return "a64_untyped_write_logical"; 322 case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: 323 return "a64_byte_scattered_read_logical"; 324 case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: 325 return "a64_byte_scattered_write_logical"; 326 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: 327 return "a64_untyped_atomic_logical"; 328 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: 329 return "a64_untyped_atomic_int16_logical"; 330 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: 331 return "a64_untyped_atomic_int64_logical"; 332 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: 333 return "a64_untyped_atomic_float16_logical"; 334 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: 335 return "a64_untyped_atomic_float32_logical"; 336 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL: 337 return "a64_untyped_atomic_float64_logical"; 338 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 339 return "typed_atomic_logical"; 340 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 341 return "typed_surface_read_logical"; 342 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 343 return "typed_surface_write_logical"; 344 case SHADER_OPCODE_MEMORY_FENCE: 345 return "memory_fence"; 346 case FS_OPCODE_SCHEDULING_FENCE: 347 return "scheduling_fence"; 348 case SHADER_OPCODE_INTERLOCK: 349 /* For an interlock we actually issue a memory fence via sendc. */ 350 return "interlock"; 351 352 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 353 return "byte_scattered_read_logical"; 354 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 355 return "byte_scattered_write_logical"; 356 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: 357 return "dword_scattered_read_logical"; 358 case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: 359 return "dword_scattered_write_logical"; 360 361 case SHADER_OPCODE_LOAD_PAYLOAD: 362 return "load_payload"; 363 case FS_OPCODE_PACK: 364 return "pack"; 365 366 case SHADER_OPCODE_GFX4_SCRATCH_READ: 367 return "gfx4_scratch_read"; 368 case SHADER_OPCODE_GFX4_SCRATCH_WRITE: 369 return "gfx4_scratch_write"; 370 case SHADER_OPCODE_GFX7_SCRATCH_READ: 371 return "gfx7_scratch_read"; 372 case SHADER_OPCODE_SCRATCH_HEADER: 373 return "scratch_header"; 374 375 case SHADER_OPCODE_URB_WRITE_LOGICAL: 376 return "urb_write_logical"; 377 case SHADER_OPCODE_URB_READ_LOGICAL: 378 return "urb_read_logical"; 379 380 case SHADER_OPCODE_FIND_LIVE_CHANNEL: 381 return "find_live_channel"; 382 case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: 383 return "find_last_live_channel"; 384 case FS_OPCODE_LOAD_LIVE_CHANNELS: 385 return "load_live_channels"; 386 387 case SHADER_OPCODE_BROADCAST: 388 return "broadcast"; 389 case SHADER_OPCODE_SHUFFLE: 390 return "shuffle"; 391 case SHADER_OPCODE_SEL_EXEC: 392 return "sel_exec"; 393 case SHADER_OPCODE_QUAD_SWIZZLE: 394 return "quad_swizzle"; 395 case SHADER_OPCODE_CLUSTER_BROADCAST: 396 return "cluster_broadcast"; 397 398 case SHADER_OPCODE_GET_BUFFER_SIZE: 399 return "get_buffer_size"; 400 401 case VEC4_OPCODE_MOV_BYTES: 402 return "mov_bytes"; 403 case VEC4_OPCODE_PACK_BYTES: 404 return "pack_bytes"; 405 case VEC4_OPCODE_UNPACK_UNIFORM: 406 return "unpack_uniform"; 407 case VEC4_OPCODE_DOUBLE_TO_F32: 408 return "double_to_f32"; 409 case VEC4_OPCODE_DOUBLE_TO_D32: 410 return "double_to_d32"; 411 case VEC4_OPCODE_DOUBLE_TO_U32: 412 return "double_to_u32"; 413 case VEC4_OPCODE_TO_DOUBLE: 414 return "single_to_double"; 415 case VEC4_OPCODE_PICK_LOW_32BIT: 416 return "pick_low_32bit"; 417 case VEC4_OPCODE_PICK_HIGH_32BIT: 418 return "pick_high_32bit"; 419 case VEC4_OPCODE_SET_LOW_32BIT: 420 return "set_low_32bit"; 421 case VEC4_OPCODE_SET_HIGH_32BIT: 422 return "set_high_32bit"; 423 case VEC4_OPCODE_MOV_FOR_SCRATCH: 424 return "mov_for_scratch"; 425 case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: 426 return "zero_oob_push_regs"; 427 428 case FS_OPCODE_DDX_COARSE: 429 return "ddx_coarse"; 430 case FS_OPCODE_DDX_FINE: 431 return "ddx_fine"; 432 case FS_OPCODE_DDY_COARSE: 433 return "ddy_coarse"; 434 case FS_OPCODE_DDY_FINE: 435 return "ddy_fine"; 436 437 case FS_OPCODE_LINTERP: 438 return "linterp"; 439 440 case FS_OPCODE_PIXEL_X: 441 return "pixel_x"; 442 case FS_OPCODE_PIXEL_Y: 443 return "pixel_y"; 444 445 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: 446 return "uniform_pull_const"; 447 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7: 448 return "uniform_pull_const_gfx7"; 449 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: 450 return "varying_pull_const_gfx4"; 451 case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: 452 return "varying_pull_const_logical"; 453 454 case FS_OPCODE_SET_SAMPLE_ID: 455 return "set_sample_id"; 456 457 case FS_OPCODE_PACK_HALF_2x16_SPLIT: 458 return "pack_half_2x16_split"; 459 460 case SHADER_OPCODE_HALT_TARGET: 461 return "halt_target"; 462 463 case FS_OPCODE_INTERPOLATE_AT_SAMPLE: 464 return "interp_sample"; 465 case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: 466 return "interp_shared_offset"; 467 case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: 468 return "interp_per_slot_offset"; 469 470 case VEC4_VS_OPCODE_URB_WRITE: 471 return "vs_urb_write"; 472 case VS_OPCODE_PULL_CONSTANT_LOAD: 473 return "pull_constant_load"; 474 case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7: 475 return "pull_constant_load_gfx7"; 476 477 case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: 478 return "unpack_flags_simd4x2"; 479 480 case VEC4_GS_OPCODE_URB_WRITE: 481 return "gs_urb_write"; 482 case VEC4_GS_OPCODE_URB_WRITE_ALLOCATE: 483 return "gs_urb_write_allocate"; 484 case GS_OPCODE_THREAD_END: 485 return "gs_thread_end"; 486 case GS_OPCODE_SET_WRITE_OFFSET: 487 return "set_write_offset"; 488 case GS_OPCODE_SET_VERTEX_COUNT: 489 return "set_vertex_count"; 490 case GS_OPCODE_SET_DWORD_2: 491 return "set_dword_2"; 492 case GS_OPCODE_PREPARE_CHANNEL_MASKS: 493 return "prepare_channel_masks"; 494 case GS_OPCODE_SET_CHANNEL_MASKS: 495 return "set_channel_masks"; 496 case GS_OPCODE_GET_INSTANCE_ID: 497 return "get_instance_id"; 498 case GS_OPCODE_FF_SYNC: 499 return "ff_sync"; 500 case GS_OPCODE_SET_PRIMITIVE_ID: 501 return "set_primitive_id"; 502 case GS_OPCODE_SVB_WRITE: 503 return "gs_svb_write"; 504 case GS_OPCODE_SVB_SET_DST_INDEX: 505 return "gs_svb_set_dst_index"; 506 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: 507 return "gs_ff_sync_set_primitives"; 508 case CS_OPCODE_CS_TERMINATE: 509 return "cs_terminate"; 510 case SHADER_OPCODE_BARRIER: 511 return "barrier"; 512 case SHADER_OPCODE_MULH: 513 return "mulh"; 514 case SHADER_OPCODE_ISUB_SAT: 515 return "isub_sat"; 516 case SHADER_OPCODE_USUB_SAT: 517 return "usub_sat"; 518 case SHADER_OPCODE_MOV_INDIRECT: 519 return "mov_indirect"; 520 case SHADER_OPCODE_MOV_RELOC_IMM: 521 return "mov_reloc_imm"; 522 523 case VEC4_OPCODE_URB_READ: 524 return "urb_read"; 525 case TCS_OPCODE_GET_INSTANCE_ID: 526 return "tcs_get_instance_id"; 527 case VEC4_TCS_OPCODE_URB_WRITE: 528 return "tcs_urb_write"; 529 case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS: 530 return "tcs_set_input_urb_offsets"; 531 case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: 532 return "tcs_set_output_urb_offsets"; 533 case TCS_OPCODE_GET_PRIMITIVE_ID: 534 return "tcs_get_primitive_id"; 535 case TCS_OPCODE_CREATE_BARRIER_HEADER: 536 return "tcs_create_barrier_header"; 537 case TCS_OPCODE_SRC0_010_IS_ZERO: 538 return "tcs_src0<0,1,0>_is_zero"; 539 case TCS_OPCODE_RELEASE_INPUT: 540 return "tcs_release_input"; 541 case TCS_OPCODE_THREAD_END: 542 return "tcs_thread_end"; 543 case TES_OPCODE_CREATE_INPUT_READ_HEADER: 544 return "tes_create_input_read_header"; 545 case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: 546 return "tes_add_indirect_urb_offset"; 547 case TES_OPCODE_GET_PRIMITIVE_ID: 548 return "tes_get_primitive_id"; 549 550 case RT_OPCODE_TRACE_RAY_LOGICAL: 551 return "rt_trace_ray_logical"; 552 553 case SHADER_OPCODE_RND_MODE: 554 return "rnd_mode"; 555 case SHADER_OPCODE_FLOAT_CONTROL_MODE: 556 return "float_control_mode"; 557 case SHADER_OPCODE_BTD_SPAWN_LOGICAL: 558 return "btd_spawn_logical"; 559 case SHADER_OPCODE_BTD_RETIRE_LOGICAL: 560 return "btd_retire_logical"; 561 case SHADER_OPCODE_READ_SR_REG: 562 return "read_sr_reg"; 563 } 564 565 unreachable("not reached"); 566} 567 568bool 569brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) 570{ 571 union { 572 unsigned ud; 573 int d; 574 float f; 575 double df; 576 } imm, sat_imm = { 0 }; 577 578 const unsigned size = type_sz(type); 579 580 /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise 581 * irrelevant, so just check the size of the type and copy from/to an 582 * appropriately sized field. 583 */ 584 if (size < 8) 585 imm.ud = reg->ud; 586 else 587 imm.df = reg->df; 588 589 switch (type) { 590 case BRW_REGISTER_TYPE_UD: 591 case BRW_REGISTER_TYPE_D: 592 case BRW_REGISTER_TYPE_UW: 593 case BRW_REGISTER_TYPE_W: 594 case BRW_REGISTER_TYPE_UQ: 595 case BRW_REGISTER_TYPE_Q: 596 /* Nothing to do. */ 597 return false; 598 case BRW_REGISTER_TYPE_F: 599 sat_imm.f = SATURATE(imm.f); 600 break; 601 case BRW_REGISTER_TYPE_DF: 602 sat_imm.df = SATURATE(imm.df); 603 break; 604 case BRW_REGISTER_TYPE_UB: 605 case BRW_REGISTER_TYPE_B: 606 unreachable("no UB/B immediates"); 607 case BRW_REGISTER_TYPE_V: 608 case BRW_REGISTER_TYPE_UV: 609 case BRW_REGISTER_TYPE_VF: 610 unreachable("unimplemented: saturate vector immediate"); 611 case BRW_REGISTER_TYPE_HF: 612 unreachable("unimplemented: saturate HF immediate"); 613 case BRW_REGISTER_TYPE_NF: 614 unreachable("no NF immediates"); 615 } 616 617 if (size < 8) { 618 if (imm.ud != sat_imm.ud) { 619 reg->ud = sat_imm.ud; 620 return true; 621 } 622 } else { 623 if (imm.df != sat_imm.df) { 624 reg->df = sat_imm.df; 625 return true; 626 } 627 } 628 return false; 629} 630 631bool 632brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) 633{ 634 switch (type) { 635 case BRW_REGISTER_TYPE_D: 636 case BRW_REGISTER_TYPE_UD: 637 reg->d = -reg->d; 638 return true; 639 case BRW_REGISTER_TYPE_W: 640 case BRW_REGISTER_TYPE_UW: { 641 uint16_t value = -(int16_t)reg->ud; 642 reg->ud = value | (uint32_t)value << 16; 643 return true; 644 } 645 case BRW_REGISTER_TYPE_F: 646 reg->f = -reg->f; 647 return true; 648 case BRW_REGISTER_TYPE_VF: 649 reg->ud ^= 0x80808080; 650 return true; 651 case BRW_REGISTER_TYPE_DF: 652 reg->df = -reg->df; 653 return true; 654 case BRW_REGISTER_TYPE_UQ: 655 case BRW_REGISTER_TYPE_Q: 656 reg->d64 = -reg->d64; 657 return true; 658 case BRW_REGISTER_TYPE_UB: 659 case BRW_REGISTER_TYPE_B: 660 unreachable("no UB/B immediates"); 661 case BRW_REGISTER_TYPE_UV: 662 case BRW_REGISTER_TYPE_V: 663 assert(!"unimplemented: negate UV/V immediate"); 664 case BRW_REGISTER_TYPE_HF: 665 reg->ud ^= 0x80008000; 666 return true; 667 case BRW_REGISTER_TYPE_NF: 668 unreachable("no NF immediates"); 669 } 670 671 return false; 672} 673 674bool 675brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) 676{ 677 switch (type) { 678 case BRW_REGISTER_TYPE_D: 679 reg->d = abs(reg->d); 680 return true; 681 case BRW_REGISTER_TYPE_W: { 682 uint16_t value = abs((int16_t)reg->ud); 683 reg->ud = value | (uint32_t)value << 16; 684 return true; 685 } 686 case BRW_REGISTER_TYPE_F: 687 reg->f = fabsf(reg->f); 688 return true; 689 case BRW_REGISTER_TYPE_DF: 690 reg->df = fabs(reg->df); 691 return true; 692 case BRW_REGISTER_TYPE_VF: 693 reg->ud &= ~0x80808080; 694 return true; 695 case BRW_REGISTER_TYPE_Q: 696 reg->d64 = imaxabs(reg->d64); 697 return true; 698 case BRW_REGISTER_TYPE_UB: 699 case BRW_REGISTER_TYPE_B: 700 unreachable("no UB/B immediates"); 701 case BRW_REGISTER_TYPE_UQ: 702 case BRW_REGISTER_TYPE_UD: 703 case BRW_REGISTER_TYPE_UW: 704 case BRW_REGISTER_TYPE_UV: 705 /* Presumably the absolute value modifier on an unsigned source is a 706 * nop, but it would be nice to confirm. 707 */ 708 assert(!"unimplemented: abs unsigned immediate"); 709 case BRW_REGISTER_TYPE_V: 710 assert(!"unimplemented: abs V immediate"); 711 case BRW_REGISTER_TYPE_HF: 712 reg->ud &= ~0x80008000; 713 return true; 714 case BRW_REGISTER_TYPE_NF: 715 unreachable("no NF immediates"); 716 } 717 718 return false; 719} 720 721backend_shader::backend_shader(const struct brw_compiler *compiler, 722 void *log_data, 723 void *mem_ctx, 724 const nir_shader *shader, 725 struct brw_stage_prog_data *stage_prog_data, 726 bool debug_enabled) 727 : compiler(compiler), 728 log_data(log_data), 729 devinfo(compiler->devinfo), 730 nir(shader), 731 stage_prog_data(stage_prog_data), 732 mem_ctx(mem_ctx), 733 cfg(NULL), idom_analysis(this), 734 stage(shader->info.stage), 735 debug_enabled(debug_enabled) 736{ 737 stage_name = _mesa_shader_stage_to_string(stage); 738 stage_abbrev = _mesa_shader_stage_to_abbrev(stage); 739} 740 741backend_shader::~backend_shader() 742{ 743} 744 745bool 746backend_reg::equals(const backend_reg &r) const 747{ 748 return brw_regs_equal(this, &r) && offset == r.offset; 749} 750 751bool 752backend_reg::negative_equals(const backend_reg &r) const 753{ 754 return brw_regs_negative_equal(this, &r) && offset == r.offset; 755} 756 757bool 758backend_reg::is_zero() const 759{ 760 if (file != IMM) 761 return false; 762 763 assert(type_sz(type) > 1); 764 765 switch (type) { 766 case BRW_REGISTER_TYPE_HF: 767 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 768 return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000; 769 case BRW_REGISTER_TYPE_F: 770 return f == 0; 771 case BRW_REGISTER_TYPE_DF: 772 return df == 0; 773 case BRW_REGISTER_TYPE_W: 774 case BRW_REGISTER_TYPE_UW: 775 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 776 return (d & 0xffff) == 0; 777 case BRW_REGISTER_TYPE_D: 778 case BRW_REGISTER_TYPE_UD: 779 return d == 0; 780 case BRW_REGISTER_TYPE_UQ: 781 case BRW_REGISTER_TYPE_Q: 782 return u64 == 0; 783 default: 784 return false; 785 } 786} 787 788bool 789backend_reg::is_one() const 790{ 791 if (file != IMM) 792 return false; 793 794 assert(type_sz(type) > 1); 795 796 switch (type) { 797 case BRW_REGISTER_TYPE_HF: 798 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 799 return (d & 0xffff) == 0x3c00; 800 case BRW_REGISTER_TYPE_F: 801 return f == 1.0f; 802 case BRW_REGISTER_TYPE_DF: 803 return df == 1.0; 804 case BRW_REGISTER_TYPE_W: 805 case BRW_REGISTER_TYPE_UW: 806 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 807 return (d & 0xffff) == 1; 808 case BRW_REGISTER_TYPE_D: 809 case BRW_REGISTER_TYPE_UD: 810 return d == 1; 811 case BRW_REGISTER_TYPE_UQ: 812 case BRW_REGISTER_TYPE_Q: 813 return u64 == 1; 814 default: 815 return false; 816 } 817} 818 819bool 820backend_reg::is_negative_one() const 821{ 822 if (file != IMM) 823 return false; 824 825 assert(type_sz(type) > 1); 826 827 switch (type) { 828 case BRW_REGISTER_TYPE_HF: 829 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 830 return (d & 0xffff) == 0xbc00; 831 case BRW_REGISTER_TYPE_F: 832 return f == -1.0; 833 case BRW_REGISTER_TYPE_DF: 834 return df == -1.0; 835 case BRW_REGISTER_TYPE_W: 836 assert((d & 0xffff) == ((d >> 16) & 0xffff)); 837 return (d & 0xffff) == 0xffff; 838 case BRW_REGISTER_TYPE_D: 839 return d == -1; 840 case BRW_REGISTER_TYPE_Q: 841 return d64 == -1; 842 default: 843 return false; 844 } 845} 846 847bool 848backend_reg::is_null() const 849{ 850 return file == ARF && nr == BRW_ARF_NULL; 851} 852 853 854bool 855backend_reg::is_accumulator() const 856{ 857 return file == ARF && nr == BRW_ARF_ACCUMULATOR; 858} 859 860bool 861backend_instruction::is_commutative() const 862{ 863 switch (opcode) { 864 case BRW_OPCODE_AND: 865 case BRW_OPCODE_OR: 866 case BRW_OPCODE_XOR: 867 case BRW_OPCODE_ADD: 868 case BRW_OPCODE_ADD3: 869 case BRW_OPCODE_MUL: 870 case SHADER_OPCODE_MULH: 871 return true; 872 case BRW_OPCODE_SEL: 873 /* MIN and MAX are commutative. */ 874 if (conditional_mod == BRW_CONDITIONAL_GE || 875 conditional_mod == BRW_CONDITIONAL_L) { 876 return true; 877 } 878 FALLTHROUGH; 879 default: 880 return false; 881 } 882} 883 884bool 885backend_instruction::is_3src(const struct brw_compiler *compiler) const 886{ 887 return ::is_3src(&compiler->isa, opcode); 888} 889 890bool 891backend_instruction::is_tex() const 892{ 893 return (opcode == SHADER_OPCODE_TEX || 894 opcode == FS_OPCODE_TXB || 895 opcode == SHADER_OPCODE_TXD || 896 opcode == SHADER_OPCODE_TXF || 897 opcode == SHADER_OPCODE_TXF_LZ || 898 opcode == SHADER_OPCODE_TXF_CMS || 899 opcode == SHADER_OPCODE_TXF_CMS_W || 900 opcode == SHADER_OPCODE_TXF_UMS || 901 opcode == SHADER_OPCODE_TXF_MCS || 902 opcode == SHADER_OPCODE_TXL || 903 opcode == SHADER_OPCODE_TXL_LZ || 904 opcode == SHADER_OPCODE_TXS || 905 opcode == SHADER_OPCODE_LOD || 906 opcode == SHADER_OPCODE_TG4 || 907 opcode == SHADER_OPCODE_TG4_OFFSET || 908 opcode == SHADER_OPCODE_SAMPLEINFO); 909} 910 911bool 912backend_instruction::is_math() const 913{ 914 return (opcode == SHADER_OPCODE_RCP || 915 opcode == SHADER_OPCODE_RSQ || 916 opcode == SHADER_OPCODE_SQRT || 917 opcode == SHADER_OPCODE_EXP2 || 918 opcode == SHADER_OPCODE_LOG2 || 919 opcode == SHADER_OPCODE_SIN || 920 opcode == SHADER_OPCODE_COS || 921 opcode == SHADER_OPCODE_INT_QUOTIENT || 922 opcode == SHADER_OPCODE_INT_REMAINDER || 923 opcode == SHADER_OPCODE_POW); 924} 925 926bool 927backend_instruction::is_control_flow() const 928{ 929 switch (opcode) { 930 case BRW_OPCODE_DO: 931 case BRW_OPCODE_WHILE: 932 case BRW_OPCODE_IF: 933 case BRW_OPCODE_ELSE: 934 case BRW_OPCODE_ENDIF: 935 case BRW_OPCODE_BREAK: 936 case BRW_OPCODE_CONTINUE: 937 return true; 938 default: 939 return false; 940 } 941} 942 943bool 944backend_instruction::uses_indirect_addressing() const 945{ 946 switch (opcode) { 947 case SHADER_OPCODE_BROADCAST: 948 case SHADER_OPCODE_CLUSTER_BROADCAST: 949 case SHADER_OPCODE_MOV_INDIRECT: 950 return true; 951 default: 952 return false; 953 } 954} 955 956bool 957backend_instruction::can_do_source_mods() const 958{ 959 switch (opcode) { 960 case BRW_OPCODE_ADDC: 961 case BRW_OPCODE_BFE: 962 case BRW_OPCODE_BFI1: 963 case BRW_OPCODE_BFI2: 964 case BRW_OPCODE_BFREV: 965 case BRW_OPCODE_CBIT: 966 case BRW_OPCODE_FBH: 967 case BRW_OPCODE_FBL: 968 case BRW_OPCODE_ROL: 969 case BRW_OPCODE_ROR: 970 case BRW_OPCODE_SUBB: 971 case BRW_OPCODE_DP4A: 972 case SHADER_OPCODE_BROADCAST: 973 case SHADER_OPCODE_CLUSTER_BROADCAST: 974 case SHADER_OPCODE_MOV_INDIRECT: 975 case SHADER_OPCODE_SHUFFLE: 976 case SHADER_OPCODE_INT_QUOTIENT: 977 case SHADER_OPCODE_INT_REMAINDER: 978 return false; 979 default: 980 return true; 981 } 982} 983 984bool 985backend_instruction::can_do_saturate() const 986{ 987 switch (opcode) { 988 case BRW_OPCODE_ADD: 989 case BRW_OPCODE_ADD3: 990 case BRW_OPCODE_ASR: 991 case BRW_OPCODE_AVG: 992 case BRW_OPCODE_CSEL: 993 case BRW_OPCODE_DP2: 994 case BRW_OPCODE_DP3: 995 case BRW_OPCODE_DP4: 996 case BRW_OPCODE_DPH: 997 case BRW_OPCODE_DP4A: 998 case BRW_OPCODE_F16TO32: 999 case BRW_OPCODE_F32TO16: 1000 case BRW_OPCODE_LINE: 1001 case BRW_OPCODE_LRP: 1002 case BRW_OPCODE_MAC: 1003 case BRW_OPCODE_MAD: 1004 case BRW_OPCODE_MATH: 1005 case BRW_OPCODE_MOV: 1006 case BRW_OPCODE_MUL: 1007 case SHADER_OPCODE_MULH: 1008 case BRW_OPCODE_PLN: 1009 case BRW_OPCODE_RNDD: 1010 case BRW_OPCODE_RNDE: 1011 case BRW_OPCODE_RNDU: 1012 case BRW_OPCODE_RNDZ: 1013 case BRW_OPCODE_SEL: 1014 case BRW_OPCODE_SHL: 1015 case BRW_OPCODE_SHR: 1016 case FS_OPCODE_LINTERP: 1017 case SHADER_OPCODE_COS: 1018 case SHADER_OPCODE_EXP2: 1019 case SHADER_OPCODE_LOG2: 1020 case SHADER_OPCODE_POW: 1021 case SHADER_OPCODE_RCP: 1022 case SHADER_OPCODE_RSQ: 1023 case SHADER_OPCODE_SIN: 1024 case SHADER_OPCODE_SQRT: 1025 return true; 1026 default: 1027 return false; 1028 } 1029} 1030 1031bool 1032backend_instruction::can_do_cmod() const 1033{ 1034 switch (opcode) { 1035 case BRW_OPCODE_ADD: 1036 case BRW_OPCODE_ADD3: 1037 case BRW_OPCODE_ADDC: 1038 case BRW_OPCODE_AND: 1039 case BRW_OPCODE_ASR: 1040 case BRW_OPCODE_AVG: 1041 case BRW_OPCODE_CMP: 1042 case BRW_OPCODE_CMPN: 1043 case BRW_OPCODE_DP2: 1044 case BRW_OPCODE_DP3: 1045 case BRW_OPCODE_DP4: 1046 case BRW_OPCODE_DPH: 1047 case BRW_OPCODE_F16TO32: 1048 case BRW_OPCODE_F32TO16: 1049 case BRW_OPCODE_FRC: 1050 case BRW_OPCODE_LINE: 1051 case BRW_OPCODE_LRP: 1052 case BRW_OPCODE_LZD: 1053 case BRW_OPCODE_MAC: 1054 case BRW_OPCODE_MACH: 1055 case BRW_OPCODE_MAD: 1056 case BRW_OPCODE_MOV: 1057 case BRW_OPCODE_MUL: 1058 case BRW_OPCODE_NOT: 1059 case BRW_OPCODE_OR: 1060 case BRW_OPCODE_PLN: 1061 case BRW_OPCODE_RNDD: 1062 case BRW_OPCODE_RNDE: 1063 case BRW_OPCODE_RNDU: 1064 case BRW_OPCODE_RNDZ: 1065 case BRW_OPCODE_SAD2: 1066 case BRW_OPCODE_SADA2: 1067 case BRW_OPCODE_SHL: 1068 case BRW_OPCODE_SHR: 1069 case BRW_OPCODE_SUBB: 1070 case BRW_OPCODE_XOR: 1071 case FS_OPCODE_LINTERP: 1072 return true; 1073 default: 1074 return false; 1075 } 1076} 1077 1078bool 1079backend_instruction::reads_accumulator_implicitly() const 1080{ 1081 switch (opcode) { 1082 case BRW_OPCODE_MAC: 1083 case BRW_OPCODE_MACH: 1084 case BRW_OPCODE_SADA2: 1085 return true; 1086 default: 1087 return false; 1088 } 1089} 1090 1091bool 1092backend_instruction::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const 1093{ 1094 return writes_accumulator || 1095 (devinfo->ver < 6 && 1096 ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || 1097 (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) || 1098 (opcode == FS_OPCODE_LINTERP && 1099 (!devinfo->has_pln || devinfo->ver <= 6)) || 1100 (eot && devinfo->ver >= 12); /* See Wa_14010017096. */ 1101} 1102 1103bool 1104backend_instruction::has_side_effects() const 1105{ 1106 switch (opcode) { 1107 case SHADER_OPCODE_SEND: 1108 return send_has_side_effects; 1109 1110 case BRW_OPCODE_SYNC: 1111 case VEC4_OPCODE_UNTYPED_ATOMIC: 1112 case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: 1113 case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: 1114 case SHADER_OPCODE_GFX4_SCRATCH_WRITE: 1115 case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 1116 case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: 1117 case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: 1118 case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: 1119 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: 1120 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: 1121 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: 1122 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: 1123 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: 1124 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL: 1125 case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: 1126 case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: 1127 case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: 1128 case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: 1129 case SHADER_OPCODE_MEMORY_FENCE: 1130 case SHADER_OPCODE_INTERLOCK: 1131 case SHADER_OPCODE_URB_WRITE_LOGICAL: 1132 case FS_OPCODE_FB_WRITE: 1133 case FS_OPCODE_FB_WRITE_LOGICAL: 1134 case FS_OPCODE_REP_FB_WRITE: 1135 case SHADER_OPCODE_BARRIER: 1136 case VEC4_TCS_OPCODE_URB_WRITE: 1137 case TCS_OPCODE_RELEASE_INPUT: 1138 case SHADER_OPCODE_RND_MODE: 1139 case SHADER_OPCODE_FLOAT_CONTROL_MODE: 1140 case FS_OPCODE_SCHEDULING_FENCE: 1141 case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: 1142 case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: 1143 case SHADER_OPCODE_BTD_SPAWN_LOGICAL: 1144 case SHADER_OPCODE_BTD_RETIRE_LOGICAL: 1145 case RT_OPCODE_TRACE_RAY_LOGICAL: 1146 case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: 1147 return true; 1148 default: 1149 return eot; 1150 } 1151} 1152 1153bool 1154backend_instruction::is_volatile() const 1155{ 1156 switch (opcode) { 1157 case SHADER_OPCODE_SEND: 1158 return send_is_volatile; 1159 1160 case VEC4_OPCODE_UNTYPED_SURFACE_READ: 1161 case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: 1162 case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: 1163 case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: 1164 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: 1165 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: 1166 case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: 1167 case VEC4_OPCODE_URB_READ: 1168 return true; 1169 default: 1170 return false; 1171 } 1172} 1173 1174#ifndef NDEBUG 1175static bool 1176inst_is_in_block(const bblock_t *block, const backend_instruction *inst) 1177{ 1178 foreach_inst_in_block (backend_instruction, i, block) { 1179 if (inst == i) 1180 return true; 1181 } 1182 return false; 1183} 1184#endif 1185 1186static void 1187adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) 1188{ 1189 for (bblock_t *block_iter = start_block->next(); 1190 block_iter; 1191 block_iter = block_iter->next()) { 1192 block_iter->start_ip += ip_adjustment; 1193 block_iter->end_ip += ip_adjustment; 1194 } 1195} 1196 1197void 1198backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) 1199{ 1200 assert(this != inst); 1201 assert(block->end_ip_delta == 0); 1202 1203 if (!this->is_head_sentinel()) 1204 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1205 1206 block->end_ip++; 1207 1208 adjust_later_block_ips(block, 1); 1209 1210 exec_node::insert_after(inst); 1211} 1212 1213void 1214backend_instruction::insert_before(bblock_t *block, backend_instruction *inst) 1215{ 1216 assert(this != inst); 1217 assert(block->end_ip_delta == 0); 1218 1219 if (!this->is_tail_sentinel()) 1220 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1221 1222 block->end_ip++; 1223 1224 adjust_later_block_ips(block, 1); 1225 1226 exec_node::insert_before(inst); 1227} 1228 1229void 1230backend_instruction::insert_before(bblock_t *block, exec_list *list) 1231{ 1232 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1233 assert(block->end_ip_delta == 0); 1234 1235 unsigned num_inst = list->length(); 1236 1237 block->end_ip += num_inst; 1238 1239 adjust_later_block_ips(block, num_inst); 1240 1241 exec_node::insert_before(list); 1242} 1243 1244void 1245backend_instruction::remove(bblock_t *block, bool defer_later_block_ip_updates) 1246{ 1247 assert(inst_is_in_block(block, this) || !"Instruction not in block"); 1248 1249 if (defer_later_block_ip_updates) { 1250 block->end_ip_delta--; 1251 } else { 1252 assert(block->end_ip_delta == 0); 1253 adjust_later_block_ips(block, -1); 1254 } 1255 1256 if (block->start_ip == block->end_ip) { 1257 if (block->end_ip_delta != 0) { 1258 adjust_later_block_ips(block, block->end_ip_delta); 1259 block->end_ip_delta = 0; 1260 } 1261 1262 block->cfg->remove_block(block); 1263 } else { 1264 block->end_ip--; 1265 } 1266 1267 exec_node::remove(); 1268} 1269 1270void 1271backend_shader::dump_instructions() const 1272{ 1273 dump_instructions(NULL); 1274} 1275 1276void 1277backend_shader::dump_instructions(const char *name) const 1278{ 1279 FILE *file = stderr; 1280 if (name && geteuid() != 0) { 1281 file = fopen(name, "w"); 1282 if (!file) 1283 file = stderr; 1284 } 1285 1286 if (cfg) { 1287 int ip = 0; 1288 foreach_block_and_inst(block, backend_instruction, inst, cfg) { 1289 if (!INTEL_DEBUG(DEBUG_OPTIMIZER)) 1290 fprintf(file, "%4d: ", ip++); 1291 dump_instruction(inst, file); 1292 } 1293 } else { 1294 int ip = 0; 1295 foreach_in_list(backend_instruction, inst, &instructions) { 1296 if (!INTEL_DEBUG(DEBUG_OPTIMIZER)) 1297 fprintf(file, "%4d: ", ip++); 1298 dump_instruction(inst, file); 1299 } 1300 } 1301 1302 if (file != stderr) { 1303 fclose(file); 1304 } 1305} 1306 1307void 1308backend_shader::calculate_cfg() 1309{ 1310 if (this->cfg) 1311 return; 1312 cfg = new(mem_ctx) cfg_t(this, &this->instructions); 1313} 1314 1315void 1316backend_shader::invalidate_analysis(brw::analysis_dependency_class c) 1317{ 1318 idom_analysis.invalidate(c); 1319} 1320 1321extern "C" const unsigned * 1322brw_compile_tes(const struct brw_compiler *compiler, 1323 void *mem_ctx, 1324 brw_compile_tes_params *params) 1325{ 1326 const struct intel_device_info *devinfo = compiler->devinfo; 1327 nir_shader *nir = params->nir; 1328 const struct brw_tes_prog_key *key = params->key; 1329 const struct brw_vue_map *input_vue_map = params->input_vue_map; 1330 struct brw_tes_prog_data *prog_data = params->prog_data; 1331 1332 const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; 1333 const bool debug_enabled = INTEL_DEBUG(DEBUG_TES); 1334 const unsigned *assembly; 1335 1336 prog_data->base.base.stage = MESA_SHADER_TESS_EVAL; 1337 prog_data->base.base.ray_queries = nir->info.ray_queries; 1338 1339 nir->info.inputs_read = key->inputs_read; 1340 nir->info.patch_inputs_read = key->patch_inputs_read; 1341 1342 brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); 1343 brw_nir_lower_tes_inputs(nir, input_vue_map); 1344 brw_nir_lower_vue_outputs(nir); 1345 brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled, 1346 key->base.robust_buffer_access); 1347 1348 brw_compute_vue_map(devinfo, &prog_data->base.vue_map, 1349 nir->info.outputs_written, 1350 nir->info.separate_shader, 1); 1351 1352 unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; 1353 1354 assert(output_size_bytes >= 1); 1355 if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) { 1356 params->error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size"); 1357 return NULL; 1358 } 1359 1360 prog_data->base.clip_distance_mask = 1361 ((1 << nir->info.clip_distance_array_size) - 1); 1362 prog_data->base.cull_distance_mask = 1363 ((1 << nir->info.cull_distance_array_size) - 1) << 1364 nir->info.clip_distance_array_size; 1365 1366 prog_data->include_primitive_id = 1367 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); 1368 1369 /* URB entry sizes are stored as a multiple of 64 bytes. */ 1370 prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; 1371 1372 prog_data->base.urb_read_length = 0; 1373 1374 STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); 1375 STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL == 1376 TESS_SPACING_FRACTIONAL_ODD - 1); 1377 STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL == 1378 TESS_SPACING_FRACTIONAL_EVEN - 1); 1379 1380 prog_data->partitioning = 1381 (enum brw_tess_partitioning) (nir->info.tess.spacing - 1); 1382 1383 switch (nir->info.tess._primitive_mode) { 1384 case TESS_PRIMITIVE_QUADS: 1385 prog_data->domain = BRW_TESS_DOMAIN_QUAD; 1386 break; 1387 case TESS_PRIMITIVE_TRIANGLES: 1388 prog_data->domain = BRW_TESS_DOMAIN_TRI; 1389 break; 1390 case TESS_PRIMITIVE_ISOLINES: 1391 prog_data->domain = BRW_TESS_DOMAIN_ISOLINE; 1392 break; 1393 default: 1394 unreachable("invalid domain shader primitive mode"); 1395 } 1396 1397 if (nir->info.tess.point_mode) { 1398 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; 1399 } else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) { 1400 prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; 1401 } else { 1402 /* Hardware winding order is backwards from OpenGL */ 1403 prog_data->output_topology = 1404 nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW 1405 : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; 1406 } 1407 1408 if (unlikely(debug_enabled)) { 1409 fprintf(stderr, "TES Input "); 1410 brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL); 1411 fprintf(stderr, "TES Output "); 1412 brw_print_vue_map(stderr, &prog_data->base.vue_map, 1413 MESA_SHADER_TESS_EVAL); 1414 } 1415 1416 if (is_scalar) { 1417 fs_visitor v(compiler, params->log_data, mem_ctx, &key->base, 1418 &prog_data->base.base, nir, 8, 1419 debug_enabled); 1420 if (!v.run_tes()) { 1421 params->error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1422 return NULL; 1423 } 1424 1425 prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; 1426 prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; 1427 1428 fs_generator g(compiler, params->log_data, mem_ctx, 1429 &prog_data->base.base, false, MESA_SHADER_TESS_EVAL); 1430 if (unlikely(debug_enabled)) { 1431 g.enable_debug(ralloc_asprintf(mem_ctx, 1432 "%s tessellation evaluation shader %s", 1433 nir->info.label ? nir->info.label 1434 : "unnamed", 1435 nir->info.name)); 1436 } 1437 1438 g.generate_code(v.cfg, 8, v.shader_stats, 1439 v.performance_analysis.require(), params->stats); 1440 1441 g.add_const_data(nir->constant_data, nir->constant_data_size); 1442 1443 assembly = g.get_assembly(); 1444 } else { 1445 brw::vec4_tes_visitor v(compiler, params->log_data, key, prog_data, 1446 nir, mem_ctx, debug_enabled); 1447 if (!v.run()) { 1448 params->error_str = ralloc_strdup(mem_ctx, v.fail_msg); 1449 return NULL; 1450 } 1451 1452 if (unlikely(debug_enabled)) 1453 v.dump_instructions(); 1454 1455 assembly = brw_vec4_generate_assembly(compiler, params->log_data, mem_ctx, nir, 1456 &prog_data->base, v.cfg, 1457 v.performance_analysis.require(), 1458 params->stats, debug_enabled); 1459 } 1460 1461 return assembly; 1462} 1463