1/* 2 * Copyright © 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0@gmail.com) 25 * 26 */ 27 28#include "float64_glsl.h" 29#include "glsl_to_nir.h" 30#include "ir_visitor.h" 31#include "ir_hierarchical_visitor.h" 32#include "ir.h" 33#include "ir_optimization.h" 34#include "program.h" 35#include "compiler/nir/nir_control_flow.h" 36#include "compiler/nir/nir_builder.h" 37#include "compiler/nir/nir_builtin_builder.h" 38#include "compiler/nir/nir_deref.h" 39#include "main/errors.h" 40#include "main/mtypes.h" 41#include "main/shaderobj.h" 42#include "main/context.h" 43#include "util/u_math.h" 44 45/* 46 * pass to lower GLSL IR to NIR 47 * 48 * This will lower variable dereferences to loads/stores of corresponding 49 * variables in NIR - the variables will be converted to registers in a later 50 * pass. 51 */ 52 53namespace { 54 55class nir_visitor : public ir_visitor 56{ 57public: 58 nir_visitor(const struct gl_constants *consts, nir_shader *shader); 59 ~nir_visitor(); 60 61 virtual void visit(ir_variable *); 62 virtual void visit(ir_function *); 63 virtual void visit(ir_function_signature *); 64 virtual void visit(ir_loop *); 65 virtual void visit(ir_if *); 66 virtual void visit(ir_discard *); 67 virtual void visit(ir_demote *); 68 virtual void visit(ir_loop_jump *); 69 virtual void visit(ir_return *); 70 virtual void visit(ir_call *); 71 virtual void visit(ir_assignment *); 72 virtual void visit(ir_emit_vertex *); 73 virtual void visit(ir_end_primitive *); 74 virtual void visit(ir_expression *); 75 virtual void visit(ir_swizzle *); 76 virtual void visit(ir_texture *); 77 virtual void visit(ir_constant *); 78 virtual void visit(ir_dereference_variable *); 79 virtual void visit(ir_dereference_record *); 80 virtual void visit(ir_dereference_array *); 81 virtual void visit(ir_barrier *); 82 83 void create_function(ir_function_signature *ir); 84 85 /* True if we have any output rvalues */ 86 bool has_output_rvalue; 87 88private: 89 void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size); 90 nir_ssa_def *evaluate_rvalue(ir_rvalue *ir); 91 92 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs); 93 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1); 94 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, 95 nir_ssa_def *src2); 96 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, 97 nir_ssa_def *src2, nir_ssa_def *src3); 98 99 bool supports_std430; 100 101 nir_shader *shader; 102 nir_function_impl *impl; 103 nir_builder b; 104 nir_ssa_def *result; /* result of the expression tree last visited */ 105 106 nir_deref_instr *evaluate_deref(ir_instruction *ir); 107 108 nir_constant *constant_copy(ir_constant *ir, void *mem_ctx); 109 110 /* most recent deref instruction created */ 111 nir_deref_instr *deref; 112 113 /* whether the IR we're operating on is per-function or global */ 114 bool is_global; 115 116 ir_function_signature *sig; 117 118 /* map of ir_variable -> nir_variable */ 119 struct hash_table *var_table; 120 121 /* map of ir_function_signature -> nir_function_overload */ 122 struct hash_table *overload_table; 123 124 /* set of nir_variable hold sparse result */ 125 struct set *sparse_variable_set; 126 127 void adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type, 128 nir_ssa_def *dest); 129}; 130 131/* 132 * This visitor runs before the main visitor, calling create_function() for 133 * each function so that the main visitor can resolve forward references in 134 * calls. 135 */ 136 137class nir_function_visitor : public ir_hierarchical_visitor 138{ 139public: 140 nir_function_visitor(nir_visitor *v) : visitor(v) 141 { 142 } 143 virtual ir_visitor_status visit_enter(ir_function *); 144 145private: 146 nir_visitor *visitor; 147}; 148 149/* glsl_to_nir can only handle converting certain function paramaters 150 * to NIR. This visitor checks for parameters it can't currently handle. 151 */ 152class ir_function_param_visitor : public ir_hierarchical_visitor 153{ 154public: 155 ir_function_param_visitor() 156 : unsupported(false) 157 { 158 } 159 160 virtual ir_visitor_status visit_enter(ir_function_signature *ir) 161 { 162 163 if (ir->is_intrinsic()) 164 return visit_continue; 165 166 foreach_in_list(ir_variable, param, &ir->parameters) { 167 if (!param->type->is_vector() || !param->type->is_scalar()) { 168 unsupported = true; 169 return visit_stop; 170 } 171 172 if (param->data.mode == ir_var_function_inout) { 173 unsupported = true; 174 return visit_stop; 175 } 176 } 177 178 if (!glsl_type_is_vector_or_scalar(ir->return_type) && 179 !ir->return_type->is_void()) { 180 unsupported = true; 181 return visit_stop; 182 } 183 184 return visit_continue; 185 } 186 187 bool unsupported; 188}; 189 190} /* end of anonymous namespace */ 191 192 193static bool 194has_unsupported_function_param(exec_list *ir) 195{ 196 ir_function_param_visitor visitor; 197 visit_list_elements(&visitor, ir); 198 return visitor.unsupported; 199} 200 201nir_shader * 202glsl_to_nir(const struct gl_constants *consts, 203 const struct gl_shader_program *shader_prog, 204 gl_shader_stage stage, 205 const nir_shader_compiler_options *options) 206{ 207 struct gl_linked_shader *sh = shader_prog->_LinkedShaders[stage]; 208 209 const struct gl_shader_compiler_options *gl_options = 210 &consts->ShaderCompilerOptions[stage]; 211 212 /* glsl_to_nir can only handle converting certain function paramaters 213 * to NIR. If we find something we can't handle then we get the GLSL IR 214 * opts to remove it before we continue on. 215 * 216 * TODO: add missing glsl ir to nir support and remove this loop. 217 */ 218 while (has_unsupported_function_param(sh->ir)) { 219 do_common_optimization(sh->ir, true, gl_options, consts->NativeIntegers); 220 } 221 222 nir_shader *shader = nir_shader_create(NULL, stage, options, 223 &sh->Program->info); 224 225 nir_visitor v1(consts, shader); 226 nir_function_visitor v2(&v1); 227 v2.run(sh->ir); 228 visit_exec_list(sh->ir, &v1); 229 230 /* The GLSL IR won't be needed anymore. */ 231 ralloc_free(sh->ir); 232 sh->ir = NULL; 233 234 nir_validate_shader(shader, "after glsl to nir, before function inline"); 235 236 /* We have to lower away local constant initializers right before we 237 * inline functions. That way they get properly initialized at the top 238 * of the function and not at the top of its caller. 239 */ 240 nir_lower_variable_initializers(shader, nir_var_all); 241 nir_lower_returns(shader); 242 nir_inline_functions(shader); 243 nir_opt_deref(shader); 244 245 nir_validate_shader(shader, "after function inlining and return lowering"); 246 247 /* Now that we have inlined everything remove all of the functions except 248 * main(). 249 */ 250 foreach_list_typed_safe(nir_function, function, node, &(shader)->functions){ 251 if (strcmp("main", function->name) != 0) { 252 exec_node_remove(&function->node); 253 } 254 } 255 256 shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); 257 if (shader_prog->Label) 258 shader->info.label = ralloc_strdup(shader, shader_prog->Label); 259 260 shader->info.subgroup_size = SUBGROUP_SIZE_UNIFORM; 261 262 if (shader->info.stage == MESA_SHADER_FRAGMENT) { 263 shader->info.fs.pixel_center_integer = sh->Program->info.fs.pixel_center_integer; 264 shader->info.fs.origin_upper_left = sh->Program->info.fs.origin_upper_left; 265 shader->info.fs.advanced_blend_modes = sh->Program->info.fs.advanced_blend_modes; 266 267 nir_foreach_variable_with_modes(var, shader, 268 nir_var_shader_in | 269 nir_var_system_value) { 270 if (var->data.mode == nir_var_system_value && 271 (var->data.location == SYSTEM_VALUE_SAMPLE_ID || 272 var->data.location == SYSTEM_VALUE_SAMPLE_POS)) 273 shader->info.fs.uses_sample_shading = true; 274 275 if (var->data.mode == nir_var_shader_in && var->data.sample) 276 shader->info.fs.uses_sample_shading = true; 277 } 278 279 if (v1.has_output_rvalue) 280 shader->info.fs.uses_sample_shading = true; 281 } 282 283 return shader; 284} 285 286nir_visitor::nir_visitor(const struct gl_constants *consts, nir_shader *shader) 287{ 288 this->supports_std430 = consts->UseSTD430AsDefaultPacking; 289 this->shader = shader; 290 this->is_global = true; 291 this->has_output_rvalue = false; 292 this->var_table = _mesa_pointer_hash_table_create(NULL); 293 this->overload_table = _mesa_pointer_hash_table_create(NULL); 294 this->sparse_variable_set = _mesa_pointer_set_create(NULL); 295 this->result = NULL; 296 this->impl = NULL; 297 this->deref = NULL; 298 this->sig = NULL; 299 memset(&this->b, 0, sizeof(this->b)); 300} 301 302nir_visitor::~nir_visitor() 303{ 304 _mesa_hash_table_destroy(this->var_table, NULL); 305 _mesa_hash_table_destroy(this->overload_table, NULL); 306 _mesa_set_destroy(this->sparse_variable_set, NULL); 307} 308 309nir_deref_instr * 310nir_visitor::evaluate_deref(ir_instruction *ir) 311{ 312 ir->accept(this); 313 return this->deref; 314} 315 316nir_constant * 317nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx) 318{ 319 if (ir == NULL) 320 return NULL; 321 322 nir_constant *ret = rzalloc(mem_ctx, nir_constant); 323 324 const unsigned rows = ir->type->vector_elements; 325 const unsigned cols = ir->type->matrix_columns; 326 unsigned i; 327 328 ret->num_elements = 0; 329 switch (ir->type->base_type) { 330 case GLSL_TYPE_UINT: 331 /* Only float base types can be matrices. */ 332 assert(cols == 1); 333 334 for (unsigned r = 0; r < rows; r++) 335 ret->values[r].u32 = ir->value.u[r]; 336 337 break; 338 339 case GLSL_TYPE_UINT16: 340 /* Only float base types can be matrices. */ 341 assert(cols == 1); 342 343 for (unsigned r = 0; r < rows; r++) 344 ret->values[r].u16 = ir->value.u16[r]; 345 break; 346 347 case GLSL_TYPE_INT: 348 /* Only float base types can be matrices. */ 349 assert(cols == 1); 350 351 for (unsigned r = 0; r < rows; r++) 352 ret->values[r].i32 = ir->value.i[r]; 353 354 break; 355 356 case GLSL_TYPE_INT16: 357 /* Only float base types can be matrices. */ 358 assert(cols == 1); 359 360 for (unsigned r = 0; r < rows; r++) 361 ret->values[r].i16 = ir->value.i16[r]; 362 break; 363 364 case GLSL_TYPE_FLOAT: 365 case GLSL_TYPE_FLOAT16: 366 case GLSL_TYPE_DOUBLE: 367 if (cols > 1) { 368 ret->elements = ralloc_array(mem_ctx, nir_constant *, cols); 369 ret->num_elements = cols; 370 for (unsigned c = 0; c < cols; c++) { 371 nir_constant *col_const = rzalloc(mem_ctx, nir_constant); 372 col_const->num_elements = 0; 373 switch (ir->type->base_type) { 374 case GLSL_TYPE_FLOAT: 375 for (unsigned r = 0; r < rows; r++) 376 col_const->values[r].f32 = ir->value.f[c * rows + r]; 377 break; 378 379 case GLSL_TYPE_FLOAT16: 380 for (unsigned r = 0; r < rows; r++) 381 col_const->values[r].u16 = ir->value.f16[c * rows + r]; 382 break; 383 384 case GLSL_TYPE_DOUBLE: 385 for (unsigned r = 0; r < rows; r++) 386 col_const->values[r].f64 = ir->value.d[c * rows + r]; 387 break; 388 389 default: 390 unreachable("Cannot get here from the first level switch"); 391 } 392 ret->elements[c] = col_const; 393 } 394 } else { 395 switch (ir->type->base_type) { 396 case GLSL_TYPE_FLOAT: 397 for (unsigned r = 0; r < rows; r++) 398 ret->values[r].f32 = ir->value.f[r]; 399 break; 400 401 case GLSL_TYPE_FLOAT16: 402 for (unsigned r = 0; r < rows; r++) 403 ret->values[r].u16 = ir->value.f16[r]; 404 break; 405 406 case GLSL_TYPE_DOUBLE: 407 for (unsigned r = 0; r < rows; r++) 408 ret->values[r].f64 = ir->value.d[r]; 409 break; 410 411 default: 412 unreachable("Cannot get here from the first level switch"); 413 } 414 } 415 break; 416 417 case GLSL_TYPE_UINT64: 418 /* Only float base types can be matrices. */ 419 assert(cols == 1); 420 421 for (unsigned r = 0; r < rows; r++) 422 ret->values[r].u64 = ir->value.u64[r]; 423 break; 424 425 case GLSL_TYPE_INT64: 426 /* Only float base types can be matrices. */ 427 assert(cols == 1); 428 429 for (unsigned r = 0; r < rows; r++) 430 ret->values[r].i64 = ir->value.i64[r]; 431 break; 432 433 case GLSL_TYPE_BOOL: 434 /* Only float base types can be matrices. */ 435 assert(cols == 1); 436 437 for (unsigned r = 0; r < rows; r++) 438 ret->values[r].b = ir->value.b[r]; 439 440 break; 441 442 case GLSL_TYPE_STRUCT: 443 case GLSL_TYPE_ARRAY: 444 ret->elements = ralloc_array(mem_ctx, nir_constant *, 445 ir->type->length); 446 ret->num_elements = ir->type->length; 447 448 for (i = 0; i < ir->type->length; i++) 449 ret->elements[i] = constant_copy(ir->const_elements[i], mem_ctx); 450 break; 451 452 default: 453 unreachable("not reached"); 454 } 455 456 return ret; 457} 458 459void 460nir_visitor::adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type, 461 nir_ssa_def *dest) 462{ 463 const glsl_type *texel_type = type->field_type("texel"); 464 assert(texel_type); 465 466 assert(var_deref->deref_type == nir_deref_type_var); 467 nir_variable *var = var_deref->var; 468 469 /* Adjust nir_variable type to align with sparse nir instructions. 470 * Because the nir_variable is created with struct type from ir_variable, 471 * but sparse nir instructions output with vector dest. 472 */ 473 var->type = glsl_type::get_instance(texel_type->get_base_type()->base_type, 474 dest->num_components, 1); 475 476 var_deref->type = var->type; 477 478 /* Record the adjusted variable. */ 479 _mesa_set_add(this->sparse_variable_set, var); 480} 481 482static unsigned 483get_nir_how_declared(unsigned how_declared) 484{ 485 if (how_declared == ir_var_hidden) 486 return nir_var_hidden; 487 488 return nir_var_declared_normally; 489} 490 491void 492nir_visitor::visit(ir_variable *ir) 493{ 494 /* FINISHME: inout parameters */ 495 assert(ir->data.mode != ir_var_function_inout); 496 497 if (ir->data.mode == ir_var_function_out) 498 return; 499 500 nir_variable *var = rzalloc(shader, nir_variable); 501 var->type = ir->type; 502 var->name = ralloc_strdup(var, ir->name); 503 504 var->data.assigned = ir->data.assigned; 505 var->data.always_active_io = ir->data.always_active_io; 506 var->data.read_only = ir->data.read_only; 507 var->data.centroid = ir->data.centroid; 508 var->data.sample = ir->data.sample; 509 var->data.patch = ir->data.patch; 510 var->data.how_declared = get_nir_how_declared(ir->data.how_declared); 511 var->data.invariant = ir->data.invariant; 512 var->data.location = ir->data.location; 513 var->data.must_be_shader_input = ir->data.must_be_shader_input; 514 var->data.stream = ir->data.stream; 515 if (ir->data.stream & (1u << 31)) 516 var->data.stream |= NIR_STREAM_PACKED; 517 518 var->data.precision = ir->data.precision; 519 var->data.explicit_location = ir->data.explicit_location; 520 var->data.matrix_layout = ir->data.matrix_layout; 521 var->data.from_named_ifc_block = ir->data.from_named_ifc_block; 522 var->data.compact = false; 523 524 switch(ir->data.mode) { 525 case ir_var_auto: 526 case ir_var_temporary: 527 if (is_global) 528 var->data.mode = nir_var_shader_temp; 529 else 530 var->data.mode = nir_var_function_temp; 531 break; 532 533 case ir_var_function_in: 534 case ir_var_const_in: 535 var->data.mode = nir_var_function_temp; 536 break; 537 538 case ir_var_shader_in: 539 if (shader->info.stage == MESA_SHADER_GEOMETRY && 540 ir->data.location == VARYING_SLOT_PRIMITIVE_ID) { 541 /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */ 542 var->data.location = SYSTEM_VALUE_PRIMITIVE_ID; 543 var->data.mode = nir_var_system_value; 544 } else { 545 var->data.mode = nir_var_shader_in; 546 547 if (shader->info.stage == MESA_SHADER_TESS_EVAL && 548 (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 549 ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) { 550 var->data.compact = ir->type->without_array()->is_scalar(); 551 } 552 553 if (shader->info.stage > MESA_SHADER_VERTEX && 554 ir->data.location >= VARYING_SLOT_CLIP_DIST0 && 555 ir->data.location <= VARYING_SLOT_CULL_DIST1) { 556 var->data.compact = ir->type->without_array()->is_scalar(); 557 } 558 } 559 break; 560 561 case ir_var_shader_out: 562 var->data.mode = nir_var_shader_out; 563 if (shader->info.stage == MESA_SHADER_TESS_CTRL && 564 (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 565 ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) { 566 var->data.compact = ir->type->without_array()->is_scalar(); 567 } 568 569 if (shader->info.stage <= MESA_SHADER_GEOMETRY && 570 ir->data.location >= VARYING_SLOT_CLIP_DIST0 && 571 ir->data.location <= VARYING_SLOT_CULL_DIST1) { 572 var->data.compact = ir->type->without_array()->is_scalar(); 573 } 574 break; 575 576 case ir_var_uniform: 577 if (ir->get_interface_type()) 578 var->data.mode = nir_var_mem_ubo; 579 else if (ir->type->contains_image() && !ir->data.bindless) 580 var->data.mode = nir_var_image; 581 else 582 var->data.mode = nir_var_uniform; 583 break; 584 585 case ir_var_shader_storage: 586 var->data.mode = nir_var_mem_ssbo; 587 break; 588 589 case ir_var_system_value: 590 var->data.mode = nir_var_system_value; 591 break; 592 593 case ir_var_shader_shared: 594 var->data.mode = nir_var_mem_shared; 595 break; 596 597 default: 598 unreachable("not reached"); 599 } 600 601 unsigned mem_access = 0; 602 if (ir->data.memory_read_only) 603 mem_access |= ACCESS_NON_WRITEABLE; 604 if (ir->data.memory_write_only) 605 mem_access |= ACCESS_NON_READABLE; 606 if (ir->data.memory_coherent) 607 mem_access |= ACCESS_COHERENT; 608 if (ir->data.memory_volatile) 609 mem_access |= ACCESS_VOLATILE; 610 if (ir->data.memory_restrict) 611 mem_access |= ACCESS_RESTRICT; 612 613 var->interface_type = ir->get_interface_type(); 614 615 /* For UBO and SSBO variables, we need explicit types */ 616 if (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) { 617 const glsl_type *explicit_ifc_type = 618 ir->get_interface_type()->get_explicit_interface_type(supports_std430); 619 620 var->interface_type = explicit_ifc_type; 621 622 if (ir->type->without_array()->is_interface()) { 623 /* If the type contains the interface, wrap the explicit type in the 624 * right number of arrays. 625 */ 626 var->type = glsl_type_wrap_in_arrays(explicit_ifc_type, ir->type); 627 } else { 628 /* Otherwise, this variable is one entry in the interface */ 629 UNUSED bool found = false; 630 for (unsigned i = 0; i < explicit_ifc_type->length; i++) { 631 const glsl_struct_field *field = 632 &explicit_ifc_type->fields.structure[i]; 633 if (strcmp(ir->name, field->name) != 0) 634 continue; 635 636 var->type = field->type; 637 if (field->memory_read_only) 638 mem_access |= ACCESS_NON_WRITEABLE; 639 if (field->memory_write_only) 640 mem_access |= ACCESS_NON_READABLE; 641 if (field->memory_coherent) 642 mem_access |= ACCESS_COHERENT; 643 if (field->memory_volatile) 644 mem_access |= ACCESS_VOLATILE; 645 if (field->memory_restrict) 646 mem_access |= ACCESS_RESTRICT; 647 648 found = true; 649 break; 650 } 651 assert(found); 652 } 653 } 654 655 var->data.interpolation = ir->data.interpolation; 656 var->data.location_frac = ir->data.location_frac; 657 658 switch (ir->data.depth_layout) { 659 case ir_depth_layout_none: 660 var->data.depth_layout = nir_depth_layout_none; 661 break; 662 case ir_depth_layout_any: 663 var->data.depth_layout = nir_depth_layout_any; 664 break; 665 case ir_depth_layout_greater: 666 var->data.depth_layout = nir_depth_layout_greater; 667 break; 668 case ir_depth_layout_less: 669 var->data.depth_layout = nir_depth_layout_less; 670 break; 671 case ir_depth_layout_unchanged: 672 var->data.depth_layout = nir_depth_layout_unchanged; 673 break; 674 default: 675 unreachable("not reached"); 676 } 677 678 var->data.index = ir->data.index; 679 var->data.descriptor_set = 0; 680 var->data.binding = ir->data.binding; 681 var->data.explicit_binding = ir->data.explicit_binding; 682 var->data.explicit_offset = ir->data.explicit_xfb_offset; 683 var->data.bindless = ir->data.bindless; 684 var->data.offset = ir->data.offset; 685 var->data.access = (gl_access_qualifier)mem_access; 686 687 if (var->type->without_array()->is_image()) { 688 var->data.image.format = ir->data.image_format; 689 } else if (var->data.mode == nir_var_shader_out) { 690 var->data.xfb.buffer = ir->data.xfb_buffer; 691 var->data.xfb.stride = ir->data.xfb_stride; 692 } 693 694 var->data.fb_fetch_output = ir->data.fb_fetch_output; 695 var->data.explicit_xfb_buffer = ir->data.explicit_xfb_buffer; 696 var->data.explicit_xfb_stride = ir->data.explicit_xfb_stride; 697 698 var->num_state_slots = ir->get_num_state_slots(); 699 if (var->num_state_slots > 0) { 700 var->state_slots = rzalloc_array(var, nir_state_slot, 701 var->num_state_slots); 702 703 ir_state_slot *state_slots = ir->get_state_slots(); 704 for (unsigned i = 0; i < var->num_state_slots; i++) { 705 for (unsigned j = 0; j < 4; j++) 706 var->state_slots[i].tokens[j] = state_slots[i].tokens[j]; 707 var->state_slots[i].swizzle = state_slots[i].swizzle; 708 } 709 } else { 710 var->state_slots = NULL; 711 } 712 713 var->constant_initializer = constant_copy(ir->constant_initializer, var); 714 715 if (var->data.mode == nir_var_function_temp) 716 nir_function_impl_add_variable(impl, var); 717 else 718 nir_shader_add_variable(shader, var); 719 720 _mesa_hash_table_insert(var_table, ir, var); 721} 722 723ir_visitor_status 724nir_function_visitor::visit_enter(ir_function *ir) 725{ 726 foreach_in_list(ir_function_signature, sig, &ir->signatures) { 727 visitor->create_function(sig); 728 } 729 return visit_continue_with_parent; 730} 731 732void 733nir_visitor::create_function(ir_function_signature *ir) 734{ 735 if (ir->is_intrinsic()) 736 return; 737 738 nir_function *func = nir_function_create(shader, ir->function_name()); 739 if (strcmp(ir->function_name(), "main") == 0) 740 func->is_entrypoint = true; 741 742 func->num_params = ir->parameters.length() + 743 (ir->return_type != glsl_type::void_type); 744 func->params = ralloc_array(shader, nir_parameter, func->num_params); 745 746 unsigned np = 0; 747 748 if (ir->return_type != glsl_type::void_type) { 749 /* The return value is a variable deref (basically an out parameter) */ 750 func->params[np].num_components = 1; 751 func->params[np].bit_size = 32; 752 np++; 753 } 754 755 foreach_in_list(ir_variable, param, &ir->parameters) { 756 /* FINISHME: pass arrays, structs, etc by reference? */ 757 assert(param->type->is_vector() || param->type->is_scalar()); 758 759 if (param->data.mode == ir_var_function_in) { 760 func->params[np].num_components = param->type->vector_elements; 761 func->params[np].bit_size = glsl_get_bit_size(param->type); 762 } else { 763 func->params[np].num_components = 1; 764 func->params[np].bit_size = 32; 765 } 766 np++; 767 } 768 assert(np == func->num_params); 769 770 _mesa_hash_table_insert(this->overload_table, ir, func); 771} 772 773void 774nir_visitor::visit(ir_function *ir) 775{ 776 foreach_in_list(ir_function_signature, sig, &ir->signatures) 777 sig->accept(this); 778} 779 780void 781nir_visitor::visit(ir_function_signature *ir) 782{ 783 if (ir->is_intrinsic()) 784 return; 785 786 this->sig = ir; 787 788 struct hash_entry *entry = 789 _mesa_hash_table_search(this->overload_table, ir); 790 791 assert(entry); 792 nir_function *func = (nir_function *) entry->data; 793 794 if (ir->is_defined) { 795 nir_function_impl *impl = nir_function_impl_create(func); 796 this->impl = impl; 797 798 this->is_global = false; 799 800 nir_builder_init(&b, impl); 801 b.cursor = nir_after_cf_list(&impl->body); 802 803 unsigned i = (ir->return_type != glsl_type::void_type) ? 1 : 0; 804 805 foreach_in_list(ir_variable, param, &ir->parameters) { 806 nir_variable *var = 807 nir_local_variable_create(impl, param->type, param->name); 808 809 if (param->data.mode == ir_var_function_in) { 810 nir_store_var(&b, var, nir_load_param(&b, i), ~0); 811 } 812 813 _mesa_hash_table_insert(var_table, param, var); 814 i++; 815 } 816 817 visit_exec_list(&ir->body, this); 818 819 this->is_global = true; 820 } else { 821 func->impl = NULL; 822 } 823} 824 825void 826nir_visitor::visit(ir_loop *ir) 827{ 828 nir_push_loop(&b); 829 visit_exec_list(&ir->body_instructions, this); 830 nir_pop_loop(&b, NULL); 831} 832 833void 834nir_visitor::visit(ir_if *ir) 835{ 836 nir_push_if(&b, evaluate_rvalue(ir->condition)); 837 visit_exec_list(&ir->then_instructions, this); 838 nir_push_else(&b, NULL); 839 visit_exec_list(&ir->else_instructions, this); 840 nir_pop_if(&b, NULL); 841} 842 843void 844nir_visitor::visit(ir_discard *ir) 845{ 846 /* 847 * discards aren't treated as control flow, because before we lower them 848 * they can appear anywhere in the shader and the stuff after them may still 849 * be executed (yay, crazy GLSL rules!). However, after lowering, all the 850 * discards will be immediately followed by a return. 851 */ 852 853 if (ir->condition) 854 nir_discard_if(&b, evaluate_rvalue(ir->condition)); 855 else 856 nir_discard(&b); 857} 858 859void 860nir_visitor::visit(ir_demote *ir) 861{ 862 nir_demote(&b); 863} 864 865void 866nir_visitor::visit(ir_emit_vertex *ir) 867{ 868 nir_emit_vertex(&b, (unsigned)ir->stream_id()); 869} 870 871void 872nir_visitor::visit(ir_end_primitive *ir) 873{ 874 nir_end_primitive(&b, (unsigned)ir->stream_id()); 875} 876 877void 878nir_visitor::visit(ir_loop_jump *ir) 879{ 880 nir_jump_type type; 881 switch (ir->mode) { 882 case ir_loop_jump::jump_break: 883 type = nir_jump_break; 884 break; 885 case ir_loop_jump::jump_continue: 886 type = nir_jump_continue; 887 break; 888 default: 889 unreachable("not reached"); 890 } 891 892 nir_jump_instr *instr = nir_jump_instr_create(this->shader, type); 893 nir_builder_instr_insert(&b, &instr->instr); 894} 895 896void 897nir_visitor::visit(ir_return *ir) 898{ 899 if (ir->value != NULL) { 900 nir_deref_instr *ret_deref = 901 nir_build_deref_cast(&b, nir_load_param(&b, 0), 902 nir_var_function_temp, ir->value->type, 0); 903 904 nir_ssa_def *val = evaluate_rvalue(ir->value); 905 nir_store_deref(&b, ret_deref, val, ~0); 906 } 907 908 nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); 909 nir_builder_instr_insert(&b, &instr->instr); 910} 911 912static void 913intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type) 914{ 915 unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type); 916 unsigned pow2_components = util_next_power_of_two(type->vector_elements); 917 nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0); 918} 919 920/* Accumulate any qualifiers along the deref chain to get the actual 921 * load/store qualifier. 922 */ 923 924static enum gl_access_qualifier 925deref_get_qualifier(nir_deref_instr *deref) 926{ 927 nir_deref_path path; 928 nir_deref_path_init(&path, deref, NULL); 929 930 unsigned qualifiers = path.path[0]->var->data.access; 931 932 const glsl_type *parent_type = path.path[0]->type; 933 for (nir_deref_instr **cur_ptr = &path.path[1]; *cur_ptr; cur_ptr++) { 934 nir_deref_instr *cur = *cur_ptr; 935 936 if (parent_type->is_interface()) { 937 const struct glsl_struct_field *field = 938 &parent_type->fields.structure[cur->strct.index]; 939 if (field->memory_read_only) 940 qualifiers |= ACCESS_NON_WRITEABLE; 941 if (field->memory_write_only) 942 qualifiers |= ACCESS_NON_READABLE; 943 if (field->memory_coherent) 944 qualifiers |= ACCESS_COHERENT; 945 if (field->memory_volatile) 946 qualifiers |= ACCESS_VOLATILE; 947 if (field->memory_restrict) 948 qualifiers |= ACCESS_RESTRICT; 949 } 950 951 parent_type = cur->type; 952 } 953 954 nir_deref_path_finish(&path); 955 956 return (gl_access_qualifier) qualifiers; 957} 958 959void 960nir_visitor::visit(ir_call *ir) 961{ 962 if (ir->callee->is_intrinsic()) { 963 nir_intrinsic_op op; 964 965 switch (ir->callee->intrinsic_id) { 966 case ir_intrinsic_generic_atomic_add: 967 op = ir->return_deref->type->is_integer_32_64() 968 ? nir_intrinsic_deref_atomic_add : nir_intrinsic_deref_atomic_fadd; 969 break; 970 case ir_intrinsic_generic_atomic_and: 971 op = nir_intrinsic_deref_atomic_and; 972 break; 973 case ir_intrinsic_generic_atomic_or: 974 op = nir_intrinsic_deref_atomic_or; 975 break; 976 case ir_intrinsic_generic_atomic_xor: 977 op = nir_intrinsic_deref_atomic_xor; 978 break; 979 case ir_intrinsic_generic_atomic_min: 980 assert(ir->return_deref); 981 if (ir->return_deref->type == glsl_type::int_type || 982 ir->return_deref->type == glsl_type::int64_t_type) 983 op = nir_intrinsic_deref_atomic_imin; 984 else if (ir->return_deref->type == glsl_type::uint_type || 985 ir->return_deref->type == glsl_type::uint64_t_type) 986 op = nir_intrinsic_deref_atomic_umin; 987 else if (ir->return_deref->type == glsl_type::float_type) 988 op = nir_intrinsic_deref_atomic_fmin; 989 else 990 unreachable("Invalid type"); 991 break; 992 case ir_intrinsic_generic_atomic_max: 993 assert(ir->return_deref); 994 if (ir->return_deref->type == glsl_type::int_type || 995 ir->return_deref->type == glsl_type::int64_t_type) 996 op = nir_intrinsic_deref_atomic_imax; 997 else if (ir->return_deref->type == glsl_type::uint_type || 998 ir->return_deref->type == glsl_type::uint64_t_type) 999 op = nir_intrinsic_deref_atomic_umax; 1000 else if (ir->return_deref->type == glsl_type::float_type) 1001 op = nir_intrinsic_deref_atomic_fmax; 1002 else 1003 unreachable("Invalid type"); 1004 break; 1005 case ir_intrinsic_generic_atomic_exchange: 1006 op = nir_intrinsic_deref_atomic_exchange; 1007 break; 1008 case ir_intrinsic_generic_atomic_comp_swap: 1009 op = ir->return_deref->type->is_integer_32_64() 1010 ? nir_intrinsic_deref_atomic_comp_swap 1011 : nir_intrinsic_deref_atomic_fcomp_swap; 1012 break; 1013 case ir_intrinsic_atomic_counter_read: 1014 op = nir_intrinsic_atomic_counter_read_deref; 1015 break; 1016 case ir_intrinsic_atomic_counter_increment: 1017 op = nir_intrinsic_atomic_counter_inc_deref; 1018 break; 1019 case ir_intrinsic_atomic_counter_predecrement: 1020 op = nir_intrinsic_atomic_counter_pre_dec_deref; 1021 break; 1022 case ir_intrinsic_atomic_counter_add: 1023 op = nir_intrinsic_atomic_counter_add_deref; 1024 break; 1025 case ir_intrinsic_atomic_counter_and: 1026 op = nir_intrinsic_atomic_counter_and_deref; 1027 break; 1028 case ir_intrinsic_atomic_counter_or: 1029 op = nir_intrinsic_atomic_counter_or_deref; 1030 break; 1031 case ir_intrinsic_atomic_counter_xor: 1032 op = nir_intrinsic_atomic_counter_xor_deref; 1033 break; 1034 case ir_intrinsic_atomic_counter_min: 1035 op = nir_intrinsic_atomic_counter_min_deref; 1036 break; 1037 case ir_intrinsic_atomic_counter_max: 1038 op = nir_intrinsic_atomic_counter_max_deref; 1039 break; 1040 case ir_intrinsic_atomic_counter_exchange: 1041 op = nir_intrinsic_atomic_counter_exchange_deref; 1042 break; 1043 case ir_intrinsic_atomic_counter_comp_swap: 1044 op = nir_intrinsic_atomic_counter_comp_swap_deref; 1045 break; 1046 case ir_intrinsic_image_load: 1047 op = nir_intrinsic_image_deref_load; 1048 break; 1049 case ir_intrinsic_image_store: 1050 op = nir_intrinsic_image_deref_store; 1051 break; 1052 case ir_intrinsic_image_atomic_add: 1053 op = ir->return_deref->type->is_integer_32_64() 1054 ? nir_intrinsic_image_deref_atomic_add 1055 : nir_intrinsic_image_deref_atomic_fadd; 1056 break; 1057 case ir_intrinsic_image_atomic_min: 1058 if (ir->return_deref->type == glsl_type::int_type) 1059 op = nir_intrinsic_image_deref_atomic_imin; 1060 else if (ir->return_deref->type == glsl_type::uint_type) 1061 op = nir_intrinsic_image_deref_atomic_umin; 1062 else 1063 unreachable("Invalid type"); 1064 break; 1065 case ir_intrinsic_image_atomic_max: 1066 if (ir->return_deref->type == glsl_type::int_type) 1067 op = nir_intrinsic_image_deref_atomic_imax; 1068 else if (ir->return_deref->type == glsl_type::uint_type) 1069 op = nir_intrinsic_image_deref_atomic_umax; 1070 else 1071 unreachable("Invalid type"); 1072 break; 1073 case ir_intrinsic_image_atomic_and: 1074 op = nir_intrinsic_image_deref_atomic_and; 1075 break; 1076 case ir_intrinsic_image_atomic_or: 1077 op = nir_intrinsic_image_deref_atomic_or; 1078 break; 1079 case ir_intrinsic_image_atomic_xor: 1080 op = nir_intrinsic_image_deref_atomic_xor; 1081 break; 1082 case ir_intrinsic_image_atomic_exchange: 1083 op = nir_intrinsic_image_deref_atomic_exchange; 1084 break; 1085 case ir_intrinsic_image_atomic_comp_swap: 1086 op = nir_intrinsic_image_deref_atomic_comp_swap; 1087 break; 1088 case ir_intrinsic_image_atomic_inc_wrap: 1089 op = nir_intrinsic_image_deref_atomic_inc_wrap; 1090 break; 1091 case ir_intrinsic_image_atomic_dec_wrap: 1092 op = nir_intrinsic_image_deref_atomic_dec_wrap; 1093 break; 1094 case ir_intrinsic_memory_barrier: 1095 op = nir_intrinsic_memory_barrier; 1096 break; 1097 case ir_intrinsic_image_size: 1098 op = nir_intrinsic_image_deref_size; 1099 break; 1100 case ir_intrinsic_image_samples: 1101 op = nir_intrinsic_image_deref_samples; 1102 break; 1103 case ir_intrinsic_image_sparse_load: 1104 op = nir_intrinsic_image_deref_sparse_load; 1105 break; 1106 case ir_intrinsic_ssbo_store: 1107 case ir_intrinsic_ssbo_load: 1108 case ir_intrinsic_ssbo_atomic_add: 1109 case ir_intrinsic_ssbo_atomic_and: 1110 case ir_intrinsic_ssbo_atomic_or: 1111 case ir_intrinsic_ssbo_atomic_xor: 1112 case ir_intrinsic_ssbo_atomic_min: 1113 case ir_intrinsic_ssbo_atomic_max: 1114 case ir_intrinsic_ssbo_atomic_exchange: 1115 case ir_intrinsic_ssbo_atomic_comp_swap: 1116 /* SSBO store/loads should only have been lowered in GLSL IR for 1117 * non-nir drivers, NIR drivers make use of gl_nir_lower_buffers() 1118 * instead. 1119 */ 1120 unreachable("Invalid operation nir doesn't want lowered ssbo " 1121 "store/loads"); 1122 case ir_intrinsic_shader_clock: 1123 op = nir_intrinsic_shader_clock; 1124 break; 1125 case ir_intrinsic_begin_invocation_interlock: 1126 op = nir_intrinsic_begin_invocation_interlock; 1127 break; 1128 case ir_intrinsic_end_invocation_interlock: 1129 op = nir_intrinsic_end_invocation_interlock; 1130 break; 1131 case ir_intrinsic_group_memory_barrier: 1132 op = nir_intrinsic_group_memory_barrier; 1133 break; 1134 case ir_intrinsic_memory_barrier_atomic_counter: 1135 op = nir_intrinsic_memory_barrier_atomic_counter; 1136 break; 1137 case ir_intrinsic_memory_barrier_buffer: 1138 op = nir_intrinsic_memory_barrier_buffer; 1139 break; 1140 case ir_intrinsic_memory_barrier_image: 1141 op = nir_intrinsic_memory_barrier_image; 1142 break; 1143 case ir_intrinsic_memory_barrier_shared: 1144 op = nir_intrinsic_memory_barrier_shared; 1145 break; 1146 case ir_intrinsic_shared_load: 1147 op = nir_intrinsic_load_shared; 1148 break; 1149 case ir_intrinsic_shared_store: 1150 op = nir_intrinsic_store_shared; 1151 break; 1152 case ir_intrinsic_shared_atomic_add: 1153 op = ir->return_deref->type->is_integer_32_64() 1154 ? nir_intrinsic_shared_atomic_add 1155 : nir_intrinsic_shared_atomic_fadd; 1156 break; 1157 case ir_intrinsic_shared_atomic_and: 1158 op = nir_intrinsic_shared_atomic_and; 1159 break; 1160 case ir_intrinsic_shared_atomic_or: 1161 op = nir_intrinsic_shared_atomic_or; 1162 break; 1163 case ir_intrinsic_shared_atomic_xor: 1164 op = nir_intrinsic_shared_atomic_xor; 1165 break; 1166 case ir_intrinsic_shared_atomic_min: 1167 assert(ir->return_deref); 1168 if (ir->return_deref->type == glsl_type::int_type || 1169 ir->return_deref->type == glsl_type::int64_t_type) 1170 op = nir_intrinsic_shared_atomic_imin; 1171 else if (ir->return_deref->type == glsl_type::uint_type || 1172 ir->return_deref->type == glsl_type::uint64_t_type) 1173 op = nir_intrinsic_shared_atomic_umin; 1174 else if (ir->return_deref->type == glsl_type::float_type) 1175 op = nir_intrinsic_shared_atomic_fmin; 1176 else 1177 unreachable("Invalid type"); 1178 break; 1179 case ir_intrinsic_shared_atomic_max: 1180 assert(ir->return_deref); 1181 if (ir->return_deref->type == glsl_type::int_type || 1182 ir->return_deref->type == glsl_type::int64_t_type) 1183 op = nir_intrinsic_shared_atomic_imax; 1184 else if (ir->return_deref->type == glsl_type::uint_type || 1185 ir->return_deref->type == glsl_type::uint64_t_type) 1186 op = nir_intrinsic_shared_atomic_umax; 1187 else if (ir->return_deref->type == glsl_type::float_type) 1188 op = nir_intrinsic_shared_atomic_fmax; 1189 else 1190 unreachable("Invalid type"); 1191 break; 1192 case ir_intrinsic_shared_atomic_exchange: 1193 op = nir_intrinsic_shared_atomic_exchange; 1194 break; 1195 case ir_intrinsic_shared_atomic_comp_swap: 1196 op = ir->return_deref->type->is_integer_32_64() 1197 ? nir_intrinsic_shared_atomic_comp_swap 1198 : nir_intrinsic_shared_atomic_fcomp_swap; 1199 break; 1200 case ir_intrinsic_vote_any: 1201 op = nir_intrinsic_vote_any; 1202 break; 1203 case ir_intrinsic_vote_all: 1204 op = nir_intrinsic_vote_all; 1205 break; 1206 case ir_intrinsic_vote_eq: 1207 op = nir_intrinsic_vote_ieq; 1208 break; 1209 case ir_intrinsic_ballot: 1210 op = nir_intrinsic_ballot; 1211 break; 1212 case ir_intrinsic_read_invocation: 1213 op = nir_intrinsic_read_invocation; 1214 break; 1215 case ir_intrinsic_read_first_invocation: 1216 op = nir_intrinsic_read_first_invocation; 1217 break; 1218 case ir_intrinsic_helper_invocation: 1219 op = nir_intrinsic_is_helper_invocation; 1220 break; 1221 case ir_intrinsic_is_sparse_texels_resident: 1222 op = nir_intrinsic_is_sparse_texels_resident; 1223 break; 1224 default: 1225 unreachable("not reached"); 1226 } 1227 1228 nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); 1229 nir_ssa_def *ret = &instr->dest.ssa; 1230 1231 switch (op) { 1232 case nir_intrinsic_deref_atomic_add: 1233 case nir_intrinsic_deref_atomic_imin: 1234 case nir_intrinsic_deref_atomic_umin: 1235 case nir_intrinsic_deref_atomic_imax: 1236 case nir_intrinsic_deref_atomic_umax: 1237 case nir_intrinsic_deref_atomic_and: 1238 case nir_intrinsic_deref_atomic_or: 1239 case nir_intrinsic_deref_atomic_xor: 1240 case nir_intrinsic_deref_atomic_exchange: 1241 case nir_intrinsic_deref_atomic_comp_swap: 1242 case nir_intrinsic_deref_atomic_fadd: 1243 case nir_intrinsic_deref_atomic_fmin: 1244 case nir_intrinsic_deref_atomic_fmax: 1245 case nir_intrinsic_deref_atomic_fcomp_swap: { 1246 int param_count = ir->actual_parameters.length(); 1247 assert(param_count == 2 || param_count == 3); 1248 1249 /* Deref */ 1250 exec_node *param = ir->actual_parameters.get_head(); 1251 ir_rvalue *rvalue = (ir_rvalue *) param; 1252 ir_dereference *deref = rvalue->as_dereference(); 1253 ir_swizzle *swizzle = NULL; 1254 if (!deref) { 1255 /* We may have a swizzle to pick off a single vec4 component */ 1256 swizzle = rvalue->as_swizzle(); 1257 assert(swizzle && swizzle->type->vector_elements == 1); 1258 deref = swizzle->val->as_dereference(); 1259 assert(deref); 1260 } 1261 nir_deref_instr *nir_deref = evaluate_deref(deref); 1262 if (swizzle) { 1263 nir_deref = nir_build_deref_array_imm(&b, nir_deref, 1264 swizzle->mask.x); 1265 } 1266 instr->src[0] = nir_src_for_ssa(&nir_deref->dest.ssa); 1267 1268 nir_intrinsic_set_access(instr, deref_get_qualifier(nir_deref)); 1269 1270 /* data1 parameter (this is always present) */ 1271 param = param->get_next(); 1272 ir_instruction *inst = (ir_instruction *) param; 1273 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1274 1275 /* data2 parameter (only with atomic_comp_swap) */ 1276 if (param_count == 3) { 1277 assert(op == nir_intrinsic_deref_atomic_comp_swap || 1278 op == nir_intrinsic_deref_atomic_fcomp_swap); 1279 param = param->get_next(); 1280 inst = (ir_instruction *) param; 1281 instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1282 } 1283 1284 /* Atomic result */ 1285 assert(ir->return_deref); 1286 if (ir->return_deref->type->is_integer_64()) { 1287 nir_ssa_dest_init(&instr->instr, &instr->dest, 1288 ir->return_deref->type->vector_elements, 64, NULL); 1289 } else { 1290 nir_ssa_dest_init(&instr->instr, &instr->dest, 1291 ir->return_deref->type->vector_elements, 32, NULL); 1292 } 1293 nir_builder_instr_insert(&b, &instr->instr); 1294 break; 1295 } 1296 case nir_intrinsic_atomic_counter_read_deref: 1297 case nir_intrinsic_atomic_counter_inc_deref: 1298 case nir_intrinsic_atomic_counter_pre_dec_deref: 1299 case nir_intrinsic_atomic_counter_add_deref: 1300 case nir_intrinsic_atomic_counter_min_deref: 1301 case nir_intrinsic_atomic_counter_max_deref: 1302 case nir_intrinsic_atomic_counter_and_deref: 1303 case nir_intrinsic_atomic_counter_or_deref: 1304 case nir_intrinsic_atomic_counter_xor_deref: 1305 case nir_intrinsic_atomic_counter_exchange_deref: 1306 case nir_intrinsic_atomic_counter_comp_swap_deref: { 1307 /* Set the counter variable dereference. */ 1308 exec_node *param = ir->actual_parameters.get_head(); 1309 ir_dereference *counter = (ir_dereference *)param; 1310 1311 instr->src[0] = nir_src_for_ssa(&evaluate_deref(counter)->dest.ssa); 1312 param = param->get_next(); 1313 1314 /* Set the intrinsic destination. */ 1315 if (ir->return_deref) { 1316 nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); 1317 } 1318 1319 /* Set the intrinsic parameters. */ 1320 if (!param->is_tail_sentinel()) { 1321 instr->src[1] = 1322 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1323 param = param->get_next(); 1324 } 1325 1326 if (!param->is_tail_sentinel()) { 1327 instr->src[2] = 1328 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1329 param = param->get_next(); 1330 } 1331 1332 nir_builder_instr_insert(&b, &instr->instr); 1333 break; 1334 } 1335 case nir_intrinsic_image_deref_load: 1336 case nir_intrinsic_image_deref_store: 1337 case nir_intrinsic_image_deref_atomic_add: 1338 case nir_intrinsic_image_deref_atomic_imin: 1339 case nir_intrinsic_image_deref_atomic_umin: 1340 case nir_intrinsic_image_deref_atomic_imax: 1341 case nir_intrinsic_image_deref_atomic_umax: 1342 case nir_intrinsic_image_deref_atomic_and: 1343 case nir_intrinsic_image_deref_atomic_or: 1344 case nir_intrinsic_image_deref_atomic_xor: 1345 case nir_intrinsic_image_deref_atomic_exchange: 1346 case nir_intrinsic_image_deref_atomic_comp_swap: 1347 case nir_intrinsic_image_deref_atomic_fadd: 1348 case nir_intrinsic_image_deref_samples: 1349 case nir_intrinsic_image_deref_size: 1350 case nir_intrinsic_image_deref_atomic_inc_wrap: 1351 case nir_intrinsic_image_deref_atomic_dec_wrap: 1352 case nir_intrinsic_image_deref_sparse_load: { 1353 /* Set the image variable dereference. */ 1354 exec_node *param = ir->actual_parameters.get_head(); 1355 ir_dereference *image = (ir_dereference *)param; 1356 nir_deref_instr *deref = evaluate_deref(image); 1357 const glsl_type *type = deref->type; 1358 1359 nir_intrinsic_set_access(instr, deref_get_qualifier(deref)); 1360 1361 instr->src[0] = nir_src_for_ssa(&deref->dest.ssa); 1362 param = param->get_next(); 1363 nir_intrinsic_set_image_dim(instr, 1364 (glsl_sampler_dim)type->sampler_dimensionality); 1365 nir_intrinsic_set_image_array(instr, type->sampler_array); 1366 1367 /* Set the intrinsic destination. */ 1368 if (ir->return_deref) { 1369 unsigned num_components; 1370 if (op == nir_intrinsic_image_deref_sparse_load) { 1371 const glsl_type *dest_type = 1372 ir->return_deref->type->field_type("texel"); 1373 /* One extra component to hold residency code. */ 1374 num_components = dest_type->vector_elements + 1; 1375 } else 1376 num_components = ir->return_deref->type->vector_elements; 1377 1378 nir_ssa_dest_init(&instr->instr, &instr->dest, 1379 num_components, 32, NULL); 1380 } 1381 1382 if (op == nir_intrinsic_image_deref_size) { 1383 instr->num_components = instr->dest.ssa.num_components; 1384 } else if (op == nir_intrinsic_image_deref_load || 1385 op == nir_intrinsic_image_deref_sparse_load) { 1386 instr->num_components = instr->dest.ssa.num_components; 1387 nir_intrinsic_set_dest_type(instr, 1388 nir_get_nir_type_for_glsl_base_type(type->sampled_type)); 1389 } else if (op == nir_intrinsic_image_deref_store) { 1390 instr->num_components = 4; 1391 nir_intrinsic_set_src_type(instr, 1392 nir_get_nir_type_for_glsl_base_type(type->sampled_type)); 1393 } 1394 1395 if (op == nir_intrinsic_image_deref_size || 1396 op == nir_intrinsic_image_deref_samples) { 1397 /* image_deref_size takes an LOD parameter which is always 0 1398 * coming from GLSL. 1399 */ 1400 if (op == nir_intrinsic_image_deref_size) 1401 instr->src[1] = nir_src_for_ssa(nir_imm_int(&b, 0)); 1402 nir_builder_instr_insert(&b, &instr->instr); 1403 break; 1404 } 1405 1406 /* Set the address argument, extending the coordinate vector to four 1407 * components. 1408 */ 1409 nir_ssa_def *src_addr = 1410 evaluate_rvalue((ir_dereference *)param); 1411 nir_ssa_def *srcs[4]; 1412 1413 for (int i = 0; i < 4; i++) { 1414 if (i < type->coordinate_components()) 1415 srcs[i] = nir_channel(&b, src_addr, i); 1416 else 1417 srcs[i] = nir_ssa_undef(&b, 1, 32); 1418 } 1419 1420 instr->src[1] = nir_src_for_ssa(nir_vec(&b, srcs, 4)); 1421 param = param->get_next(); 1422 1423 /* Set the sample argument, which is undefined for single-sample 1424 * images. 1425 */ 1426 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { 1427 instr->src[2] = 1428 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1429 param = param->get_next(); 1430 } else { 1431 instr->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32)); 1432 } 1433 1434 /* Set the intrinsic parameters. */ 1435 if (!param->is_tail_sentinel()) { 1436 instr->src[3] = 1437 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1438 param = param->get_next(); 1439 } else if (op == nir_intrinsic_image_deref_load || 1440 op == nir_intrinsic_image_deref_sparse_load) { 1441 instr->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */ 1442 } 1443 1444 if (!param->is_tail_sentinel()) { 1445 instr->src[4] = 1446 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 1447 param = param->get_next(); 1448 } else if (op == nir_intrinsic_image_deref_store) { 1449 instr->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */ 1450 } 1451 1452 nir_builder_instr_insert(&b, &instr->instr); 1453 break; 1454 } 1455 case nir_intrinsic_memory_barrier: 1456 case nir_intrinsic_group_memory_barrier: 1457 case nir_intrinsic_memory_barrier_atomic_counter: 1458 case nir_intrinsic_memory_barrier_buffer: 1459 case nir_intrinsic_memory_barrier_image: 1460 case nir_intrinsic_memory_barrier_shared: 1461 nir_builder_instr_insert(&b, &instr->instr); 1462 break; 1463 case nir_intrinsic_shader_clock: 1464 nir_ssa_dest_init(&instr->instr, &instr->dest, 2, 32, NULL); 1465 nir_intrinsic_set_memory_scope(instr, NIR_SCOPE_SUBGROUP); 1466 nir_builder_instr_insert(&b, &instr->instr); 1467 break; 1468 case nir_intrinsic_begin_invocation_interlock: 1469 nir_builder_instr_insert(&b, &instr->instr); 1470 break; 1471 case nir_intrinsic_end_invocation_interlock: 1472 nir_builder_instr_insert(&b, &instr->instr); 1473 break; 1474 case nir_intrinsic_store_ssbo: { 1475 exec_node *param = ir->actual_parameters.get_head(); 1476 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); 1477 1478 param = param->get_next(); 1479 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 1480 1481 param = param->get_next(); 1482 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 1483 1484 param = param->get_next(); 1485 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 1486 assert(write_mask); 1487 1488 nir_ssa_def *nir_val = evaluate_rvalue(val); 1489 if (val->type->is_boolean()) 1490 nir_val = nir_b2i32(&b, nir_val); 1491 1492 instr->src[0] = nir_src_for_ssa(nir_val); 1493 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); 1494 instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); 1495 intrinsic_set_std430_align(instr, val->type); 1496 nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); 1497 instr->num_components = val->type->vector_elements; 1498 1499 nir_builder_instr_insert(&b, &instr->instr); 1500 break; 1501 } 1502 case nir_intrinsic_load_shared: { 1503 exec_node *param = ir->actual_parameters.get_head(); 1504 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 1505 1506 nir_intrinsic_set_base(instr, 0); 1507 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); 1508 1509 const glsl_type *type = ir->return_deref->var->type; 1510 instr->num_components = type->vector_elements; 1511 intrinsic_set_std430_align(instr, type); 1512 1513 /* Setup destination register */ 1514 unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type); 1515 nir_ssa_dest_init(&instr->instr, &instr->dest, 1516 type->vector_elements, bit_size, NULL); 1517 1518 nir_builder_instr_insert(&b, &instr->instr); 1519 1520 /* The value in shared memory is a 32-bit value */ 1521 if (type->is_boolean()) 1522 ret = nir_b2b1(&b, &instr->dest.ssa); 1523 break; 1524 } 1525 case nir_intrinsic_store_shared: { 1526 exec_node *param = ir->actual_parameters.get_head(); 1527 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 1528 1529 param = param->get_next(); 1530 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 1531 1532 param = param->get_next(); 1533 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 1534 assert(write_mask); 1535 1536 nir_intrinsic_set_base(instr, 0); 1537 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); 1538 1539 nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); 1540 1541 nir_ssa_def *nir_val = evaluate_rvalue(val); 1542 /* The value in shared memory is a 32-bit value */ 1543 if (val->type->is_boolean()) 1544 nir_val = nir_b2b32(&b, nir_val); 1545 1546 instr->src[0] = nir_src_for_ssa(nir_val); 1547 instr->num_components = val->type->vector_elements; 1548 intrinsic_set_std430_align(instr, val->type); 1549 1550 nir_builder_instr_insert(&b, &instr->instr); 1551 break; 1552 } 1553 case nir_intrinsic_shared_atomic_add: 1554 case nir_intrinsic_shared_atomic_imin: 1555 case nir_intrinsic_shared_atomic_umin: 1556 case nir_intrinsic_shared_atomic_imax: 1557 case nir_intrinsic_shared_atomic_umax: 1558 case nir_intrinsic_shared_atomic_and: 1559 case nir_intrinsic_shared_atomic_or: 1560 case nir_intrinsic_shared_atomic_xor: 1561 case nir_intrinsic_shared_atomic_exchange: 1562 case nir_intrinsic_shared_atomic_comp_swap: 1563 case nir_intrinsic_shared_atomic_fadd: 1564 case nir_intrinsic_shared_atomic_fmin: 1565 case nir_intrinsic_shared_atomic_fmax: 1566 case nir_intrinsic_shared_atomic_fcomp_swap: { 1567 int param_count = ir->actual_parameters.length(); 1568 assert(param_count == 2 || param_count == 3); 1569 1570 /* Offset */ 1571 exec_node *param = ir->actual_parameters.get_head(); 1572 ir_instruction *inst = (ir_instruction *) param; 1573 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1574 1575 /* data1 parameter (this is always present) */ 1576 param = param->get_next(); 1577 inst = (ir_instruction *) param; 1578 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1579 1580 /* data2 parameter (only with atomic_comp_swap) */ 1581 if (param_count == 3) { 1582 assert(op == nir_intrinsic_shared_atomic_comp_swap || 1583 op == nir_intrinsic_shared_atomic_fcomp_swap); 1584 param = param->get_next(); 1585 inst = (ir_instruction *) param; 1586 instr->src[2] = 1587 nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1588 } 1589 1590 /* Atomic result */ 1591 assert(ir->return_deref); 1592 unsigned bit_size = glsl_get_bit_size(ir->return_deref->type); 1593 nir_ssa_dest_init(&instr->instr, &instr->dest, 1594 ir->return_deref->type->vector_elements, 1595 bit_size, NULL); 1596 nir_builder_instr_insert(&b, &instr->instr); 1597 break; 1598 } 1599 case nir_intrinsic_vote_ieq: 1600 instr->num_components = 1; 1601 FALLTHROUGH; 1602 case nir_intrinsic_vote_any: 1603 case nir_intrinsic_vote_all: { 1604 nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL); 1605 1606 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1607 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1608 1609 nir_builder_instr_insert(&b, &instr->instr); 1610 break; 1611 } 1612 1613 case nir_intrinsic_ballot: { 1614 nir_ssa_dest_init(&instr->instr, &instr->dest, 1615 ir->return_deref->type->vector_elements, 64, NULL); 1616 instr->num_components = ir->return_deref->type->vector_elements; 1617 1618 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1619 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1620 1621 nir_builder_instr_insert(&b, &instr->instr); 1622 break; 1623 } 1624 case nir_intrinsic_read_invocation: { 1625 nir_ssa_dest_init(&instr->instr, &instr->dest, 1626 ir->return_deref->type->vector_elements, 32, NULL); 1627 instr->num_components = ir->return_deref->type->vector_elements; 1628 1629 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1630 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1631 1632 ir_rvalue *invocation = (ir_rvalue *) ir->actual_parameters.get_head()->next; 1633 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(invocation)); 1634 1635 nir_builder_instr_insert(&b, &instr->instr); 1636 break; 1637 } 1638 case nir_intrinsic_read_first_invocation: { 1639 nir_ssa_dest_init(&instr->instr, &instr->dest, 1640 ir->return_deref->type->vector_elements, 32, NULL); 1641 instr->num_components = ir->return_deref->type->vector_elements; 1642 1643 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1644 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1645 1646 nir_builder_instr_insert(&b, &instr->instr); 1647 break; 1648 } 1649 case nir_intrinsic_is_helper_invocation: { 1650 nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL); 1651 nir_builder_instr_insert(&b, &instr->instr); 1652 break; 1653 } 1654 case nir_intrinsic_is_sparse_texels_resident: { 1655 nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL); 1656 1657 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1658 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1659 1660 nir_builder_instr_insert(&b, &instr->instr); 1661 break; 1662 } 1663 default: 1664 unreachable("not reached"); 1665 } 1666 1667 if (ir->return_deref) { 1668 nir_deref_instr *ret_deref = evaluate_deref(ir->return_deref); 1669 1670 if (op == nir_intrinsic_image_deref_sparse_load) 1671 adjust_sparse_variable(ret_deref, ir->return_deref->type, ret); 1672 1673 nir_store_deref(&b, ret_deref, ret, ~0); 1674 } 1675 1676 return; 1677 } 1678 1679 struct hash_entry *entry = 1680 _mesa_hash_table_search(this->overload_table, ir->callee); 1681 assert(entry); 1682 nir_function *callee = (nir_function *) entry->data; 1683 1684 nir_call_instr *call = nir_call_instr_create(this->shader, callee); 1685 1686 unsigned i = 0; 1687 nir_deref_instr *ret_deref = NULL; 1688 if (ir->return_deref) { 1689 nir_variable *ret_tmp = 1690 nir_local_variable_create(this->impl, ir->return_deref->type, 1691 "return_tmp"); 1692 ret_deref = nir_build_deref_var(&b, ret_tmp); 1693 call->params[i++] = nir_src_for_ssa(&ret_deref->dest.ssa); 1694 } 1695 1696 foreach_two_lists(formal_node, &ir->callee->parameters, 1697 actual_node, &ir->actual_parameters) { 1698 ir_rvalue *param_rvalue = (ir_rvalue *) actual_node; 1699 ir_variable *sig_param = (ir_variable *) formal_node; 1700 1701 if (sig_param->data.mode == ir_var_function_out) { 1702 nir_deref_instr *out_deref = evaluate_deref(param_rvalue); 1703 call->params[i] = nir_src_for_ssa(&out_deref->dest.ssa); 1704 } else if (sig_param->data.mode == ir_var_function_in) { 1705 nir_ssa_def *val = evaluate_rvalue(param_rvalue); 1706 nir_src src = nir_src_for_ssa(val); 1707 1708 nir_src_copy(&call->params[i], &src); 1709 } else if (sig_param->data.mode == ir_var_function_inout) { 1710 unreachable("unimplemented: inout parameters"); 1711 } 1712 1713 i++; 1714 } 1715 1716 nir_builder_instr_insert(&b, &call->instr); 1717 1718 if (ir->return_deref) 1719 nir_store_deref(&b, evaluate_deref(ir->return_deref), nir_load_deref(&b, ret_deref), ~0); 1720} 1721 1722void 1723nir_visitor::visit(ir_assignment *ir) 1724{ 1725 unsigned num_components = ir->lhs->type->vector_elements; 1726 unsigned write_mask = ir->write_mask; 1727 1728 b.exact = ir->lhs->variable_referenced()->data.invariant || 1729 ir->lhs->variable_referenced()->data.precise; 1730 1731 if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) && 1732 (write_mask == BITFIELD_MASK(num_components) || write_mask == 0)) { 1733 nir_deref_instr *lhs = evaluate_deref(ir->lhs); 1734 nir_deref_instr *rhs = evaluate_deref(ir->rhs); 1735 enum gl_access_qualifier lhs_qualifiers = deref_get_qualifier(lhs); 1736 enum gl_access_qualifier rhs_qualifiers = deref_get_qualifier(rhs); 1737 1738 nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers, 1739 rhs_qualifiers); 1740 return; 1741 } 1742 1743 ir_texture *tex = ir->rhs->as_texture(); 1744 bool is_sparse = tex && tex->is_sparse; 1745 1746 if (!is_sparse) 1747 assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector()); 1748 1749 ir->lhs->accept(this); 1750 nir_deref_instr *lhs_deref = this->deref; 1751 nir_ssa_def *src = evaluate_rvalue(ir->rhs); 1752 1753 if (is_sparse) { 1754 adjust_sparse_variable(lhs_deref, tex->type, src); 1755 1756 /* correct component and mask because they are 0 for struct */ 1757 num_components = src->num_components; 1758 write_mask = BITFIELD_MASK(num_components); 1759 } 1760 1761 if (write_mask != BITFIELD_MASK(num_components) && write_mask != 0) { 1762 /* GLSL IR will give us the input to the write-masked assignment in a 1763 * single packed vector. So, for example, if the writemask is xzw, then 1764 * we have to swizzle x -> x, y -> z, and z -> w and get the y component 1765 * from the load. 1766 */ 1767 unsigned swiz[4]; 1768 unsigned component = 0; 1769 for (unsigned i = 0; i < 4; i++) { 1770 swiz[i] = write_mask & (1 << i) ? component++ : 0; 1771 } 1772 src = nir_swizzle(&b, src, swiz, num_components); 1773 } 1774 1775 enum gl_access_qualifier qualifiers = deref_get_qualifier(lhs_deref); 1776 1777 nir_store_deref_with_access(&b, lhs_deref, src, write_mask, 1778 qualifiers); 1779} 1780 1781/* 1782 * Given an instruction, returns a pointer to its destination or NULL if there 1783 * is no destination. 1784 * 1785 * Note that this only handles instructions we generate at this level. 1786 */ 1787static nir_dest * 1788get_instr_dest(nir_instr *instr) 1789{ 1790 nir_alu_instr *alu_instr; 1791 nir_intrinsic_instr *intrinsic_instr; 1792 nir_tex_instr *tex_instr; 1793 1794 switch (instr->type) { 1795 case nir_instr_type_alu: 1796 alu_instr = nir_instr_as_alu(instr); 1797 return &alu_instr->dest.dest; 1798 1799 case nir_instr_type_intrinsic: 1800 intrinsic_instr = nir_instr_as_intrinsic(instr); 1801 if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest) 1802 return &intrinsic_instr->dest; 1803 else 1804 return NULL; 1805 1806 case nir_instr_type_tex: 1807 tex_instr = nir_instr_as_tex(instr); 1808 return &tex_instr->dest; 1809 1810 default: 1811 unreachable("not reached"); 1812 } 1813 1814 return NULL; 1815} 1816 1817void 1818nir_visitor::add_instr(nir_instr *instr, unsigned num_components, 1819 unsigned bit_size) 1820{ 1821 nir_dest *dest = get_instr_dest(instr); 1822 1823 if (dest) 1824 nir_ssa_dest_init(instr, dest, num_components, bit_size, NULL); 1825 1826 nir_builder_instr_insert(&b, instr); 1827 1828 if (dest) { 1829 assert(dest->is_ssa); 1830 this->result = &dest->ssa; 1831 } 1832} 1833 1834nir_ssa_def * 1835nir_visitor::evaluate_rvalue(ir_rvalue* ir) 1836{ 1837 ir->accept(this); 1838 if (ir->as_dereference() || ir->as_constant()) { 1839 /* 1840 * A dereference is being used on the right hand side, which means we 1841 * must emit a variable load. 1842 */ 1843 1844 enum gl_access_qualifier access = deref_get_qualifier(this->deref); 1845 this->result = nir_load_deref_with_access(&b, this->deref, access); 1846 1847 if (nir_deref_mode_is(this->deref, nir_var_shader_out)) 1848 this->has_output_rvalue = true; 1849 } 1850 1851 return this->result; 1852} 1853 1854static bool 1855type_is_float(glsl_base_type type) 1856{ 1857 return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE || 1858 type == GLSL_TYPE_FLOAT16; 1859} 1860 1861static bool 1862type_is_signed(glsl_base_type type) 1863{ 1864 return type == GLSL_TYPE_INT || type == GLSL_TYPE_INT64 || 1865 type == GLSL_TYPE_INT16; 1866} 1867 1868void 1869nir_visitor::visit(ir_expression *ir) 1870{ 1871 /* Some special cases */ 1872 switch (ir->operation) { 1873 case ir_unop_interpolate_at_centroid: 1874 case ir_binop_interpolate_at_offset: 1875 case ir_binop_interpolate_at_sample: { 1876 ir_dereference *deref = ir->operands[0]->as_dereference(); 1877 ir_swizzle *swizzle = NULL; 1878 if (!deref) { 1879 /* the api does not allow a swizzle here, but the varying packing code 1880 * may have pushed one into here. 1881 */ 1882 swizzle = ir->operands[0]->as_swizzle(); 1883 assert(swizzle); 1884 deref = swizzle->val->as_dereference(); 1885 assert(deref); 1886 } 1887 1888 deref->accept(this); 1889 1890 nir_intrinsic_op op; 1891 if (nir_deref_mode_is(this->deref, nir_var_shader_in)) { 1892 switch (ir->operation) { 1893 case ir_unop_interpolate_at_centroid: 1894 op = nir_intrinsic_interp_deref_at_centroid; 1895 break; 1896 case ir_binop_interpolate_at_offset: 1897 op = nir_intrinsic_interp_deref_at_offset; 1898 break; 1899 case ir_binop_interpolate_at_sample: 1900 op = nir_intrinsic_interp_deref_at_sample; 1901 break; 1902 default: 1903 unreachable("Invalid interpolation intrinsic"); 1904 } 1905 } else { 1906 /* This case can happen if the vertex shader does not write the 1907 * given varying. In this case, the linker will lower it to a 1908 * global variable. Since interpolating a variable makes no 1909 * sense, we'll just turn it into a load which will probably 1910 * eventually end up as an SSA definition. 1911 */ 1912 assert(nir_deref_mode_is(this->deref, nir_var_shader_temp)); 1913 op = nir_intrinsic_load_deref; 1914 } 1915 1916 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); 1917 intrin->num_components = deref->type->vector_elements; 1918 intrin->src[0] = nir_src_for_ssa(&this->deref->dest.ssa); 1919 1920 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset || 1921 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample) 1922 intrin->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); 1923 1924 unsigned bit_size = glsl_get_bit_size(deref->type); 1925 add_instr(&intrin->instr, deref->type->vector_elements, bit_size); 1926 1927 if (swizzle) { 1928 unsigned swiz[4] = { 1929 swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w 1930 }; 1931 1932 result = nir_swizzle(&b, result, swiz, 1933 swizzle->type->vector_elements); 1934 } 1935 1936 return; 1937 } 1938 1939 case ir_unop_ssbo_unsized_array_length: { 1940 nir_intrinsic_instr *intrin = 1941 nir_intrinsic_instr_create(b.shader, 1942 nir_intrinsic_deref_buffer_array_length); 1943 1944 ir_dereference *deref = ir->operands[0]->as_dereference(); 1945 intrin->src[0] = nir_src_for_ssa(&evaluate_deref(deref)->dest.ssa); 1946 1947 add_instr(&intrin->instr, 1, 32); 1948 return; 1949 } 1950 1951 case ir_binop_ubo_load: 1952 /* UBO loads should only have been lowered in GLSL IR for non-nir drivers, 1953 * NIR drivers make use of gl_nir_lower_buffers() instead. 1954 */ 1955 unreachable("Invalid operation nir doesn't want lowered ubo loads"); 1956 default: 1957 break; 1958 } 1959 1960 nir_ssa_def *srcs[4]; 1961 for (unsigned i = 0; i < ir->num_operands; i++) 1962 srcs[i] = evaluate_rvalue(ir->operands[i]); 1963 1964 glsl_base_type types[4]; 1965 for (unsigned i = 0; i < ir->num_operands; i++) 1966 types[i] = ir->operands[i]->type->base_type; 1967 1968 glsl_base_type out_type = ir->type->base_type; 1969 1970 switch (ir->operation) { 1971 case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break; 1972 case ir_unop_logic_not: 1973 result = nir_inot(&b, srcs[0]); 1974 break; 1975 case ir_unop_neg: 1976 result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0]) 1977 : nir_ineg(&b, srcs[0]); 1978 break; 1979 case ir_unop_abs: 1980 result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0]) 1981 : nir_iabs(&b, srcs[0]); 1982 break; 1983 case ir_unop_clz: 1984 result = nir_uclz(&b, srcs[0]); 1985 break; 1986 case ir_unop_saturate: 1987 assert(type_is_float(types[0])); 1988 result = nir_fsat(&b, srcs[0]); 1989 break; 1990 case ir_unop_sign: 1991 result = type_is_float(types[0]) ? nir_fsign(&b, srcs[0]) 1992 : nir_isign(&b, srcs[0]); 1993 break; 1994 case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break; 1995 case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break; 1996 case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break; 1997 case ir_unop_exp: result = nir_fexp2(&b, nir_fmul_imm(&b, srcs[0], M_LOG2E)); break; 1998 case ir_unop_log: result = nir_fmul_imm(&b, nir_flog2(&b, srcs[0]), 1.0 / M_LOG2E); break; 1999 case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break; 2000 case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break; 2001 case ir_unop_i2f: 2002 case ir_unop_u2f: 2003 case ir_unop_b2f: 2004 case ir_unop_f2i: 2005 case ir_unop_f2u: 2006 case ir_unop_f2b: 2007 case ir_unop_i2b: 2008 case ir_unop_b2i: 2009 case ir_unop_b2i64: 2010 case ir_unop_d2f: 2011 case ir_unop_f2d: 2012 case ir_unop_f162f: 2013 case ir_unop_f2f16: 2014 case ir_unop_f162b: 2015 case ir_unop_b2f16: 2016 case ir_unop_i2i: 2017 case ir_unop_u2u: 2018 case ir_unop_d2i: 2019 case ir_unop_d2u: 2020 case ir_unop_d2b: 2021 case ir_unop_i2d: 2022 case ir_unop_u2d: 2023 case ir_unop_i642i: 2024 case ir_unop_i642u: 2025 case ir_unop_i642f: 2026 case ir_unop_i642b: 2027 case ir_unop_i642d: 2028 case ir_unop_u642i: 2029 case ir_unop_u642u: 2030 case ir_unop_u642f: 2031 case ir_unop_u642d: 2032 case ir_unop_i2i64: 2033 case ir_unop_u2i64: 2034 case ir_unop_f2i64: 2035 case ir_unop_d2i64: 2036 case ir_unop_i2u64: 2037 case ir_unop_u2u64: 2038 case ir_unop_f2u64: 2039 case ir_unop_d2u64: 2040 case ir_unop_i2u: 2041 case ir_unop_u2i: 2042 case ir_unop_i642u64: 2043 case ir_unop_u642i64: { 2044 nir_alu_type src_type = nir_get_nir_type_for_glsl_base_type(types[0]); 2045 nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type); 2046 result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type, 2047 nir_rounding_mode_undef), 2048 srcs[0], NULL, NULL, NULL); 2049 /* b2i and b2f don't have fixed bit-size versions so the builder will 2050 * just assume 32 and we have to fix it up here. 2051 */ 2052 result->bit_size = nir_alu_type_get_type_size(dst_type); 2053 break; 2054 } 2055 2056 case ir_unop_f2fmp: { 2057 result = nir_build_alu(&b, nir_op_f2fmp, srcs[0], NULL, NULL, NULL); 2058 break; 2059 } 2060 2061 case ir_unop_i2imp: { 2062 result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL); 2063 break; 2064 } 2065 2066 case ir_unop_u2ump: { 2067 result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL); 2068 break; 2069 } 2070 2071 case ir_unop_bitcast_i2f: 2072 case ir_unop_bitcast_f2i: 2073 case ir_unop_bitcast_u2f: 2074 case ir_unop_bitcast_f2u: 2075 case ir_unop_bitcast_i642d: 2076 case ir_unop_bitcast_d2i64: 2077 case ir_unop_bitcast_u642d: 2078 case ir_unop_bitcast_d2u64: 2079 case ir_unop_subroutine_to_int: 2080 /* no-op */ 2081 result = nir_mov(&b, srcs[0]); 2082 break; 2083 case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break; 2084 case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break; 2085 case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break; 2086 case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break; 2087 case ir_unop_frexp_exp: result = nir_frexp_exp(&b, srcs[0]); break; 2088 case ir_unop_frexp_sig: result = nir_frexp_sig(&b, srcs[0]); break; 2089 case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break; 2090 case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break; 2091 case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break; 2092 case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break; 2093 case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break; 2094 case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break; 2095 case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break; 2096 case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break; 2097 case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break; 2098 case ir_unop_pack_snorm_2x16: 2099 result = nir_pack_snorm_2x16(&b, srcs[0]); 2100 break; 2101 case ir_unop_pack_snorm_4x8: 2102 result = nir_pack_snorm_4x8(&b, srcs[0]); 2103 break; 2104 case ir_unop_pack_unorm_2x16: 2105 result = nir_pack_unorm_2x16(&b, srcs[0]); 2106 break; 2107 case ir_unop_pack_unorm_4x8: 2108 result = nir_pack_unorm_4x8(&b, srcs[0]); 2109 break; 2110 case ir_unop_pack_half_2x16: 2111 result = nir_pack_half_2x16(&b, srcs[0]); 2112 break; 2113 case ir_unop_unpack_snorm_2x16: 2114 result = nir_unpack_snorm_2x16(&b, srcs[0]); 2115 break; 2116 case ir_unop_unpack_snorm_4x8: 2117 result = nir_unpack_snorm_4x8(&b, srcs[0]); 2118 break; 2119 case ir_unop_unpack_unorm_2x16: 2120 result = nir_unpack_unorm_2x16(&b, srcs[0]); 2121 break; 2122 case ir_unop_unpack_unorm_4x8: 2123 result = nir_unpack_unorm_4x8(&b, srcs[0]); 2124 break; 2125 case ir_unop_unpack_half_2x16: 2126 result = nir_unpack_half_2x16(&b, srcs[0]); 2127 break; 2128 case ir_unop_pack_sampler_2x32: 2129 case ir_unop_pack_image_2x32: 2130 case ir_unop_pack_double_2x32: 2131 case ir_unop_pack_int_2x32: 2132 case ir_unop_pack_uint_2x32: 2133 result = nir_pack_64_2x32(&b, srcs[0]); 2134 break; 2135 case ir_unop_unpack_sampler_2x32: 2136 case ir_unop_unpack_image_2x32: 2137 case ir_unop_unpack_double_2x32: 2138 case ir_unop_unpack_int_2x32: 2139 case ir_unop_unpack_uint_2x32: 2140 result = nir_unpack_64_2x32(&b, srcs[0]); 2141 break; 2142 case ir_unop_bitfield_reverse: 2143 result = nir_bitfield_reverse(&b, srcs[0]); 2144 break; 2145 case ir_unop_bit_count: 2146 result = nir_bit_count(&b, srcs[0]); 2147 break; 2148 case ir_unop_find_msb: 2149 switch (types[0]) { 2150 case GLSL_TYPE_UINT: 2151 result = nir_ufind_msb(&b, srcs[0]); 2152 break; 2153 case GLSL_TYPE_INT: 2154 result = nir_ifind_msb(&b, srcs[0]); 2155 break; 2156 default: 2157 unreachable("Invalid type for findMSB()"); 2158 } 2159 break; 2160 case ir_unop_find_lsb: 2161 result = nir_find_lsb(&b, srcs[0]); 2162 break; 2163 2164 case ir_unop_get_buffer_size: { 2165 nir_intrinsic_instr *load = nir_intrinsic_instr_create( 2166 this->shader, 2167 nir_intrinsic_get_ssbo_size); 2168 load->num_components = ir->type->vector_elements; 2169 load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); 2170 unsigned bit_size = glsl_get_bit_size(ir->type); 2171 add_instr(&load->instr, ir->type->vector_elements, bit_size); 2172 return; 2173 } 2174 2175 case ir_unop_atan: 2176 result = nir_atan(&b, srcs[0]); 2177 break; 2178 2179 case ir_binop_add: 2180 result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1]) 2181 : nir_iadd(&b, srcs[0], srcs[1]); 2182 break; 2183 case ir_binop_add_sat: 2184 result = type_is_signed(out_type) ? nir_iadd_sat(&b, srcs[0], srcs[1]) 2185 : nir_uadd_sat(&b, srcs[0], srcs[1]); 2186 break; 2187 case ir_binop_sub: 2188 result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1]) 2189 : nir_isub(&b, srcs[0], srcs[1]); 2190 break; 2191 case ir_binop_sub_sat: 2192 result = type_is_signed(out_type) ? nir_isub_sat(&b, srcs[0], srcs[1]) 2193 : nir_usub_sat(&b, srcs[0], srcs[1]); 2194 break; 2195 case ir_binop_abs_sub: 2196 /* out_type is always unsigned for ir_binop_abs_sub, so we have to key 2197 * on the type of the sources. 2198 */ 2199 result = type_is_signed(types[0]) ? nir_uabs_isub(&b, srcs[0], srcs[1]) 2200 : nir_uabs_usub(&b, srcs[0], srcs[1]); 2201 break; 2202 case ir_binop_avg: 2203 result = type_is_signed(out_type) ? nir_ihadd(&b, srcs[0], srcs[1]) 2204 : nir_uhadd(&b, srcs[0], srcs[1]); 2205 break; 2206 case ir_binop_avg_round: 2207 result = type_is_signed(out_type) ? nir_irhadd(&b, srcs[0], srcs[1]) 2208 : nir_urhadd(&b, srcs[0], srcs[1]); 2209 break; 2210 case ir_binop_mul_32x16: 2211 result = type_is_signed(out_type) ? nir_imul_32x16(&b, srcs[0], srcs[1]) 2212 : nir_umul_32x16(&b, srcs[0], srcs[1]); 2213 break; 2214 case ir_binop_mul: 2215 if (type_is_float(out_type)) 2216 result = nir_fmul(&b, srcs[0], srcs[1]); 2217 else if (out_type == GLSL_TYPE_INT64 && 2218 (ir->operands[0]->type->base_type == GLSL_TYPE_INT || 2219 ir->operands[1]->type->base_type == GLSL_TYPE_INT)) 2220 result = nir_imul_2x32_64(&b, srcs[0], srcs[1]); 2221 else if (out_type == GLSL_TYPE_UINT64 && 2222 (ir->operands[0]->type->base_type == GLSL_TYPE_UINT || 2223 ir->operands[1]->type->base_type == GLSL_TYPE_UINT)) 2224 result = nir_umul_2x32_64(&b, srcs[0], srcs[1]); 2225 else 2226 result = nir_imul(&b, srcs[0], srcs[1]); 2227 break; 2228 case ir_binop_div: 2229 if (type_is_float(out_type)) 2230 result = nir_fdiv(&b, srcs[0], srcs[1]); 2231 else if (type_is_signed(out_type)) 2232 result = nir_idiv(&b, srcs[0], srcs[1]); 2233 else 2234 result = nir_udiv(&b, srcs[0], srcs[1]); 2235 break; 2236 case ir_binop_mod: 2237 result = type_is_float(out_type) ? nir_fmod(&b, srcs[0], srcs[1]) 2238 : nir_umod(&b, srcs[0], srcs[1]); 2239 break; 2240 case ir_binop_min: 2241 if (type_is_float(out_type)) 2242 result = nir_fmin(&b, srcs[0], srcs[1]); 2243 else if (type_is_signed(out_type)) 2244 result = nir_imin(&b, srcs[0], srcs[1]); 2245 else 2246 result = nir_umin(&b, srcs[0], srcs[1]); 2247 break; 2248 case ir_binop_max: 2249 if (type_is_float(out_type)) 2250 result = nir_fmax(&b, srcs[0], srcs[1]); 2251 else if (type_is_signed(out_type)) 2252 result = nir_imax(&b, srcs[0], srcs[1]); 2253 else 2254 result = nir_umax(&b, srcs[0], srcs[1]); 2255 break; 2256 case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break; 2257 case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break; 2258 case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break; 2259 case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break; 2260 case ir_binop_logic_and: 2261 result = nir_iand(&b, srcs[0], srcs[1]); 2262 break; 2263 case ir_binop_logic_or: 2264 result = nir_ior(&b, srcs[0], srcs[1]); 2265 break; 2266 case ir_binop_logic_xor: 2267 result = nir_ixor(&b, srcs[0], srcs[1]); 2268 break; 2269 case ir_binop_lshift: result = nir_ishl(&b, srcs[0], nir_u2u32(&b, srcs[1])); break; 2270 case ir_binop_rshift: 2271 result = (type_is_signed(out_type)) ? nir_ishr(&b, srcs[0], nir_u2u32(&b, srcs[1])) 2272 : nir_ushr(&b, srcs[0], nir_u2u32(&b, srcs[1])); 2273 break; 2274 case ir_binop_imul_high: 2275 result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1]) 2276 : nir_umul_high(&b, srcs[0], srcs[1]); 2277 break; 2278 case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break; 2279 case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break; 2280 case ir_binop_less: 2281 if (type_is_float(types[0])) 2282 result = nir_flt(&b, srcs[0], srcs[1]); 2283 else if (type_is_signed(types[0])) 2284 result = nir_ilt(&b, srcs[0], srcs[1]); 2285 else 2286 result = nir_ult(&b, srcs[0], srcs[1]); 2287 break; 2288 case ir_binop_gequal: 2289 if (type_is_float(types[0])) 2290 result = nir_fge(&b, srcs[0], srcs[1]); 2291 else if (type_is_signed(types[0])) 2292 result = nir_ige(&b, srcs[0], srcs[1]); 2293 else 2294 result = nir_uge(&b, srcs[0], srcs[1]); 2295 break; 2296 case ir_binop_equal: 2297 if (type_is_float(types[0])) 2298 result = nir_feq(&b, srcs[0], srcs[1]); 2299 else 2300 result = nir_ieq(&b, srcs[0], srcs[1]); 2301 break; 2302 case ir_binop_nequal: 2303 if (type_is_float(types[0])) 2304 result = nir_fneu(&b, srcs[0], srcs[1]); 2305 else 2306 result = nir_ine(&b, srcs[0], srcs[1]); 2307 break; 2308 case ir_binop_all_equal: 2309 if (type_is_float(types[0])) { 2310 switch (ir->operands[0]->type->vector_elements) { 2311 case 1: result = nir_feq(&b, srcs[0], srcs[1]); break; 2312 case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break; 2313 case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break; 2314 case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break; 2315 default: 2316 unreachable("not reached"); 2317 } 2318 } else { 2319 switch (ir->operands[0]->type->vector_elements) { 2320 case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break; 2321 case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break; 2322 case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break; 2323 case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break; 2324 default: 2325 unreachable("not reached"); 2326 } 2327 } 2328 break; 2329 case ir_binop_any_nequal: 2330 if (type_is_float(types[0])) { 2331 switch (ir->operands[0]->type->vector_elements) { 2332 case 1: result = nir_fneu(&b, srcs[0], srcs[1]); break; 2333 case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break; 2334 case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break; 2335 case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break; 2336 default: 2337 unreachable("not reached"); 2338 } 2339 } else { 2340 switch (ir->operands[0]->type->vector_elements) { 2341 case 1: result = nir_ine(&b, srcs[0], srcs[1]); break; 2342 case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break; 2343 case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break; 2344 case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break; 2345 default: 2346 unreachable("not reached"); 2347 } 2348 } 2349 break; 2350 case ir_binop_dot: 2351 result = nir_fdot(&b, srcs[0], srcs[1]); 2352 break; 2353 case ir_binop_vector_extract: 2354 result = nir_vector_extract(&b, srcs[0], srcs[1]); 2355 break; 2356 2357 case ir_binop_atan2: 2358 result = nir_atan2(&b, srcs[0], srcs[1]); 2359 break; 2360 2361 case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; 2362 case ir_triop_fma: 2363 result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); 2364 break; 2365 case ir_triop_lrp: 2366 result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]); 2367 break; 2368 case ir_triop_csel: 2369 result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]); 2370 break; 2371 case ir_triop_bitfield_extract: 2372 result = ir->type->is_int_16_32() ? 2373 nir_ibitfield_extract(&b, nir_i2i32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2])) : 2374 nir_ubitfield_extract(&b, nir_u2u32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2])); 2375 break; 2376 case ir_quadop_bitfield_insert: 2377 result = nir_bitfield_insert(&b, 2378 nir_u2u32(&b, srcs[0]), nir_u2u32(&b, srcs[1]), 2379 nir_i2i32(&b, srcs[2]), nir_i2i32(&b, srcs[3])); 2380 break; 2381 case ir_quadop_vector: 2382 result = nir_vec(&b, srcs, ir->type->vector_elements); 2383 break; 2384 2385 default: 2386 unreachable("not reached"); 2387 } 2388} 2389 2390void 2391nir_visitor::visit(ir_swizzle *ir) 2392{ 2393 unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w }; 2394 result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle, 2395 ir->type->vector_elements); 2396} 2397 2398void 2399nir_visitor::visit(ir_texture *ir) 2400{ 2401 unsigned num_srcs; 2402 nir_texop op; 2403 switch (ir->op) { 2404 case ir_tex: 2405 op = nir_texop_tex; 2406 num_srcs = 1; /* coordinate */ 2407 break; 2408 2409 case ir_txb: 2410 case ir_txl: 2411 op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl; 2412 num_srcs = 2; /* coordinate, bias/lod */ 2413 break; 2414 2415 case ir_txd: 2416 op = nir_texop_txd; /* coordinate, dPdx, dPdy */ 2417 num_srcs = 3; 2418 break; 2419 2420 case ir_txf: 2421 op = nir_texop_txf; 2422 if (ir->lod_info.lod != NULL) 2423 num_srcs = 2; /* coordinate, lod */ 2424 else 2425 num_srcs = 1; /* coordinate */ 2426 break; 2427 2428 case ir_txf_ms: 2429 op = nir_texop_txf_ms; 2430 num_srcs = 2; /* coordinate, sample_index */ 2431 break; 2432 2433 case ir_txs: 2434 op = nir_texop_txs; 2435 if (ir->lod_info.lod != NULL) 2436 num_srcs = 1; /* lod */ 2437 else 2438 num_srcs = 0; 2439 break; 2440 2441 case ir_lod: 2442 op = nir_texop_lod; 2443 num_srcs = 1; /* coordinate */ 2444 break; 2445 2446 case ir_tg4: 2447 op = nir_texop_tg4; 2448 num_srcs = 1; /* coordinate */ 2449 break; 2450 2451 case ir_query_levels: 2452 op = nir_texop_query_levels; 2453 num_srcs = 0; 2454 break; 2455 2456 case ir_texture_samples: 2457 op = nir_texop_texture_samples; 2458 num_srcs = 0; 2459 break; 2460 2461 case ir_samples_identical: 2462 op = nir_texop_samples_identical; 2463 num_srcs = 1; /* coordinate */ 2464 break; 2465 2466 default: 2467 unreachable("not reached"); 2468 } 2469 2470 if (ir->projector != NULL) 2471 num_srcs++; 2472 if (ir->shadow_comparator != NULL) 2473 num_srcs++; 2474 /* offsets are constants we store inside nir_tex_intrs.offsets */ 2475 if (ir->offset != NULL && !ir->offset->type->is_array()) 2476 num_srcs++; 2477 if (ir->clamp != NULL) 2478 num_srcs++; 2479 2480 /* Add one for the texture deref */ 2481 num_srcs += 2; 2482 2483 nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); 2484 2485 instr->op = op; 2486 instr->sampler_dim = 2487 (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality; 2488 instr->is_array = ir->sampler->type->sampler_array; 2489 instr->is_shadow = ir->sampler->type->sampler_shadow; 2490 2491 const glsl_type *dest_type 2492 = ir->is_sparse ? ir->type->field_type("texel") : ir->type; 2493 assert(dest_type != glsl_type::error_type); 2494 if (instr->is_shadow) 2495 instr->is_new_style_shadow = (dest_type->vector_elements == 1); 2496 instr->dest_type = nir_get_nir_type_for_glsl_type(dest_type); 2497 instr->is_sparse = ir->is_sparse; 2498 2499 nir_deref_instr *sampler_deref = evaluate_deref(ir->sampler); 2500 2501 /* check for bindless handles */ 2502 if (!nir_deref_mode_is(sampler_deref, nir_var_uniform) || 2503 nir_deref_instr_get_variable(sampler_deref)->data.bindless) { 2504 nir_ssa_def *load = nir_load_deref(&b, sampler_deref); 2505 instr->src[0].src = nir_src_for_ssa(load); 2506 instr->src[0].src_type = nir_tex_src_texture_handle; 2507 instr->src[1].src = nir_src_for_ssa(load); 2508 instr->src[1].src_type = nir_tex_src_sampler_handle; 2509 } else { 2510 instr->src[0].src = nir_src_for_ssa(&sampler_deref->dest.ssa); 2511 instr->src[0].src_type = nir_tex_src_texture_deref; 2512 instr->src[1].src = nir_src_for_ssa(&sampler_deref->dest.ssa); 2513 instr->src[1].src_type = nir_tex_src_sampler_deref; 2514 } 2515 2516 unsigned src_number = 2; 2517 2518 if (ir->coordinate != NULL) { 2519 instr->coord_components = ir->coordinate->type->vector_elements; 2520 instr->src[src_number].src = 2521 nir_src_for_ssa(evaluate_rvalue(ir->coordinate)); 2522 instr->src[src_number].src_type = nir_tex_src_coord; 2523 src_number++; 2524 } 2525 2526 if (ir->projector != NULL) { 2527 instr->src[src_number].src = 2528 nir_src_for_ssa(evaluate_rvalue(ir->projector)); 2529 instr->src[src_number].src_type = nir_tex_src_projector; 2530 src_number++; 2531 } 2532 2533 if (ir->shadow_comparator != NULL) { 2534 instr->src[src_number].src = 2535 nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparator)); 2536 instr->src[src_number].src_type = nir_tex_src_comparator; 2537 src_number++; 2538 } 2539 2540 if (ir->offset != NULL) { 2541 if (ir->offset->type->is_array()) { 2542 for (int i = 0; i < ir->offset->type->array_size(); i++) { 2543 const ir_constant *c = 2544 ir->offset->as_constant()->get_array_element(i); 2545 2546 for (unsigned j = 0; j < 2; ++j) { 2547 int val = c->get_int_component(j); 2548 instr->tg4_offsets[i][j] = val; 2549 } 2550 } 2551 } else { 2552 assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); 2553 2554 instr->src[src_number].src = 2555 nir_src_for_ssa(evaluate_rvalue(ir->offset)); 2556 instr->src[src_number].src_type = nir_tex_src_offset; 2557 src_number++; 2558 } 2559 } 2560 2561 if (ir->clamp) { 2562 instr->src[src_number].src = 2563 nir_src_for_ssa(evaluate_rvalue(ir->clamp)); 2564 instr->src[src_number].src_type = nir_tex_src_min_lod; 2565 src_number++; 2566 } 2567 2568 switch (ir->op) { 2569 case ir_txb: 2570 instr->src[src_number].src = 2571 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias)); 2572 instr->src[src_number].src_type = nir_tex_src_bias; 2573 src_number++; 2574 break; 2575 2576 case ir_txl: 2577 case ir_txf: 2578 case ir_txs: 2579 if (ir->lod_info.lod != NULL) { 2580 instr->src[src_number].src = 2581 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod)); 2582 instr->src[src_number].src_type = nir_tex_src_lod; 2583 src_number++; 2584 } 2585 break; 2586 2587 case ir_txd: 2588 instr->src[src_number].src = 2589 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx)); 2590 instr->src[src_number].src_type = nir_tex_src_ddx; 2591 src_number++; 2592 instr->src[src_number].src = 2593 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy)); 2594 instr->src[src_number].src_type = nir_tex_src_ddy; 2595 src_number++; 2596 break; 2597 2598 case ir_txf_ms: 2599 instr->src[src_number].src = 2600 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index)); 2601 instr->src[src_number].src_type = nir_tex_src_ms_index; 2602 src_number++; 2603 break; 2604 2605 case ir_tg4: 2606 instr->component = ir->lod_info.component->as_constant()->value.u[0]; 2607 break; 2608 2609 default: 2610 break; 2611 } 2612 2613 assert(src_number == num_srcs); 2614 2615 unsigned bit_size = glsl_get_bit_size(dest_type); 2616 add_instr(&instr->instr, nir_tex_instr_dest_size(instr), bit_size); 2617} 2618 2619void 2620nir_visitor::visit(ir_constant *ir) 2621{ 2622 /* 2623 * We don't know if this variable is an array or struct that gets 2624 * dereferenced, so do the safe thing an make it a variable with a 2625 * constant initializer and return a dereference. 2626 */ 2627 2628 nir_variable *var = 2629 nir_local_variable_create(this->impl, ir->type, "const_temp"); 2630 var->data.read_only = true; 2631 var->constant_initializer = constant_copy(ir, var); 2632 2633 this->deref = nir_build_deref_var(&b, var); 2634} 2635 2636void 2637nir_visitor::visit(ir_dereference_variable *ir) 2638{ 2639 if (ir->variable_referenced()->data.mode == ir_var_function_out) { 2640 unsigned i = (sig->return_type != glsl_type::void_type) ? 1 : 0; 2641 2642 foreach_in_list(ir_variable, param, &sig->parameters) { 2643 if (param == ir->variable_referenced()) { 2644 break; 2645 } 2646 i++; 2647 } 2648 2649 this->deref = nir_build_deref_cast(&b, nir_load_param(&b, i), 2650 nir_var_function_temp, ir->type, 0); 2651 return; 2652 } 2653 2654 assert(ir->variable_referenced()->data.mode != ir_var_function_inout); 2655 2656 struct hash_entry *entry = 2657 _mesa_hash_table_search(this->var_table, ir->var); 2658 assert(entry); 2659 nir_variable *var = (nir_variable *) entry->data; 2660 2661 this->deref = nir_build_deref_var(&b, var); 2662} 2663 2664void 2665nir_visitor::visit(ir_dereference_record *ir) 2666{ 2667 ir->record->accept(this); 2668 2669 int field_index = ir->field_idx; 2670 assert(field_index >= 0); 2671 2672 /* sparse texture variable is a struct for ir_variable, but it has been 2673 * converted to a vector for nir_variable. 2674 */ 2675 if (this->deref->deref_type == nir_deref_type_var && 2676 _mesa_set_search(this->sparse_variable_set, this->deref->var)) { 2677 nir_ssa_def *load = nir_load_deref(&b, this->deref); 2678 assert(load->num_components >= 2); 2679 2680 nir_ssa_def *ssa; 2681 const glsl_type *type = ir->record->type; 2682 if (field_index == type->field_index("code")) { 2683 /* last channel holds residency code */ 2684 ssa = nir_channel(&b, load, load->num_components - 1); 2685 } else { 2686 assert(field_index == type->field_index("texel")); 2687 2688 unsigned mask = BITFIELD_MASK(load->num_components - 1); 2689 ssa = nir_channels(&b, load, mask); 2690 } 2691 2692 /* still need to create a deref for return */ 2693 nir_variable *tmp = 2694 nir_local_variable_create(this->impl, ir->type, "deref_tmp"); 2695 this->deref = nir_build_deref_var(&b, tmp); 2696 nir_store_deref(&b, this->deref, ssa, ~0); 2697 } else 2698 this->deref = nir_build_deref_struct(&b, this->deref, field_index); 2699} 2700 2701void 2702nir_visitor::visit(ir_dereference_array *ir) 2703{ 2704 nir_ssa_def *index = evaluate_rvalue(ir->array_index); 2705 2706 ir->array->accept(this); 2707 2708 this->deref = nir_build_deref_array(&b, this->deref, index); 2709} 2710 2711void 2712nir_visitor::visit(ir_barrier *) 2713{ 2714 if (shader->info.stage == MESA_SHADER_COMPUTE) 2715 nir_memory_barrier_shared(&b); 2716 else if (shader->info.stage == MESA_SHADER_TESS_CTRL) 2717 nir_memory_barrier_tcs_patch(&b); 2718 2719 nir_control_barrier(&b); 2720} 2721 2722nir_shader * 2723glsl_float64_funcs_to_nir(struct gl_context *ctx, 2724 const nir_shader_compiler_options *options) 2725{ 2726 /* It's not possible to use float64 on GLSL ES, so don't bother trying to 2727 * build the support code. The support code depends on higher versions of 2728 * desktop GLSL, so it will fail to compile (below) anyway. 2729 */ 2730 if (!_mesa_is_desktop_gl(ctx) || ctx->Const.GLSLVersion < 400) 2731 return NULL; 2732 2733 /* We pretend it's a vertex shader. Ultimately, the stage shouldn't 2734 * matter because we're not optimizing anything here. 2735 */ 2736 struct gl_shader *sh = _mesa_new_shader(-1, MESA_SHADER_VERTEX); 2737 sh->Source = float64_source; 2738 sh->CompileStatus = COMPILE_FAILURE; 2739 _mesa_glsl_compile_shader(ctx, sh, false, false, true); 2740 2741 if (!sh->CompileStatus) { 2742 if (sh->InfoLog) { 2743 _mesa_problem(ctx, 2744 "fp64 software impl compile failed:\n%s\nsource:\n%s\n", 2745 sh->InfoLog, float64_source); 2746 } 2747 return NULL; 2748 } 2749 2750 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_VERTEX, options, NULL); 2751 2752 nir_visitor v1(&ctx->Const, nir); 2753 nir_function_visitor v2(&v1); 2754 v2.run(sh->ir); 2755 visit_exec_list(sh->ir, &v1); 2756 2757 /* _mesa_delete_shader will try to free sh->Source but it's static const */ 2758 sh->Source = NULL; 2759 _mesa_delete_shader(ctx, sh); 2760 2761 nir_validate_shader(nir, "float64_funcs_to_nir"); 2762 2763 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); 2764 NIR_PASS_V(nir, nir_lower_returns); 2765 NIR_PASS_V(nir, nir_inline_functions); 2766 NIR_PASS_V(nir, nir_opt_deref); 2767 2768 /* Do some optimizations to clean up the shader now. By optimizing the 2769 * functions in the library, we avoid having to re-do that work every 2770 * time we inline a copy of a function. Reducing basic blocks also helps 2771 * with compile times. 2772 */ 2773 NIR_PASS_V(nir, nir_lower_vars_to_ssa); 2774 NIR_PASS_V(nir, nir_copy_prop); 2775 NIR_PASS_V(nir, nir_opt_dce); 2776 NIR_PASS_V(nir, nir_opt_cse); 2777 NIR_PASS_V(nir, nir_opt_gcm, true); 2778 NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false); 2779 NIR_PASS_V(nir, nir_opt_dce); 2780 2781 return nir; 2782} 2783