1/* 2 * Copyright © 2019 Google, Inc 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file lower_precision.cpp 26 */ 27 28#include "main/macros.h" 29#include "main/consts_exts.h" 30#include "compiler/glsl_types.h" 31#include "ir.h" 32#include "ir_builder.h" 33#include "ir_optimization.h" 34#include "ir_rvalue_visitor.h" 35#include "util/half_float.h" 36#include "util/set.h" 37#include "util/hash_table.h" 38#include <vector> 39 40namespace { 41 42class find_precision_visitor : public ir_rvalue_enter_visitor { 43public: 44 find_precision_visitor(const struct gl_shader_compiler_options *options); 45 ~find_precision_visitor(); 46 47 virtual void handle_rvalue(ir_rvalue **rvalue); 48 virtual ir_visitor_status visit_enter(ir_call *ir); 49 50 ir_function_signature *map_builtin(ir_function_signature *sig); 51 52 /* Set of rvalues that can be lowered. This will be filled in by 53 * find_lowerable_rvalues_visitor. Only the root node of a lowerable section 54 * will be added to this set. 55 */ 56 struct set *lowerable_rvalues; 57 58 /** 59 * A mapping of builtin signature functions to lowered versions. This is 60 * filled in lazily when a lowered version is needed. 61 */ 62 struct hash_table *lowered_builtins; 63 /** 64 * A temporary hash table only used in order to clone functions. 65 */ 66 struct hash_table *clone_ht; 67 68 void *lowered_builtin_mem_ctx; 69 70 const struct gl_shader_compiler_options *options; 71}; 72 73class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor { 74public: 75 enum can_lower_state { 76 UNKNOWN, 77 CANT_LOWER, 78 SHOULD_LOWER, 79 }; 80 81 enum parent_relation { 82 /* The parent performs a further operation involving the result from the 83 * child and can be lowered along with it. 84 */ 85 COMBINED_OPERATION, 86 /* The parent instruction’s operation is independent of the child type so 87 * the child should be lowered separately. 88 */ 89 INDEPENDENT_OPERATION, 90 }; 91 92 struct stack_entry { 93 ir_instruction *instr; 94 enum can_lower_state state; 95 /* List of child rvalues that can be lowered. When this stack entry is 96 * popped, if this node itself can’t be lowered than all of the children 97 * are root nodes to lower so we will add them to lowerable_rvalues. 98 * Otherwise if this node can also be lowered then we won’t add the 99 * children because we only want to add the topmost lowerable nodes to 100 * lowerable_rvalues and the children will be lowered as part of lowering 101 * this node. 102 */ 103 std::vector<ir_instruction *> lowerable_children; 104 }; 105 106 find_lowerable_rvalues_visitor(struct set *result, 107 const struct gl_shader_compiler_options *options); 108 109 static void stack_enter(class ir_instruction *ir, void *data); 110 static void stack_leave(class ir_instruction *ir, void *data); 111 112 virtual ir_visitor_status visit(ir_constant *ir); 113 virtual ir_visitor_status visit(ir_dereference_variable *ir); 114 115 virtual ir_visitor_status visit_enter(ir_dereference_record *ir); 116 virtual ir_visitor_status visit_enter(ir_dereference_array *ir); 117 virtual ir_visitor_status visit_enter(ir_texture *ir); 118 virtual ir_visitor_status visit_enter(ir_expression *ir); 119 120 virtual ir_visitor_status visit_leave(ir_assignment *ir); 121 virtual ir_visitor_status visit_leave(ir_call *ir); 122 123 can_lower_state handle_precision(const glsl_type *type, 124 int precision) const; 125 126 static parent_relation get_parent_relation(ir_instruction *parent, 127 ir_instruction *child); 128 129 std::vector<stack_entry> stack; 130 struct set *lowerable_rvalues; 131 const struct gl_shader_compiler_options *options; 132 133 void pop_stack_entry(); 134 void add_lowerable_children(const stack_entry &entry); 135}; 136 137class lower_precision_visitor : public ir_rvalue_visitor { 138public: 139 virtual void handle_rvalue(ir_rvalue **rvalue); 140 virtual ir_visitor_status visit_enter(ir_dereference_array *); 141 virtual ir_visitor_status visit_enter(ir_dereference_record *); 142 virtual ir_visitor_status visit_enter(ir_call *ir); 143 virtual ir_visitor_status visit_enter(ir_texture *ir); 144 virtual ir_visitor_status visit_leave(ir_expression *); 145}; 146 147static bool 148can_lower_type(const struct gl_shader_compiler_options *options, 149 const glsl_type *type) 150{ 151 /* Don’t lower any expressions involving non-float types except bool and 152 * texture samplers. This will rule out operations that change the type such 153 * as conversion to ints. Instead it will end up lowering the arguments 154 * instead and adding a final conversion to float32. We want to handle 155 * boolean types so that it will do comparisons as 16-bit. 156 */ 157 158 switch (type->without_array()->base_type) { 159 /* TODO: should we do anything for these two with regard to Int16 vs FP16 160 * support? 161 */ 162 case GLSL_TYPE_BOOL: 163 case GLSL_TYPE_SAMPLER: 164 case GLSL_TYPE_IMAGE: 165 return true; 166 167 case GLSL_TYPE_FLOAT: 168 return options->LowerPrecisionFloat16; 169 170 case GLSL_TYPE_UINT: 171 case GLSL_TYPE_INT: 172 return options->LowerPrecisionInt16; 173 174 default: 175 return false; 176 } 177} 178 179find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res, 180 const struct gl_shader_compiler_options *opts) 181{ 182 lowerable_rvalues = res; 183 options = opts; 184 callback_enter = stack_enter; 185 callback_leave = stack_leave; 186 data_enter = this; 187 data_leave = this; 188} 189 190void 191find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir, 192 void *data) 193{ 194 find_lowerable_rvalues_visitor *state = 195 (find_lowerable_rvalues_visitor *) data; 196 197 /* Add a new stack entry for this instruction */ 198 stack_entry entry; 199 200 entry.instr = ir; 201 entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN; 202 203 state->stack.push_back(entry); 204} 205 206void 207find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry) 208{ 209 /* We can’t lower this node so if there were any pending children then they 210 * are all root lowerable nodes and we should add them to the set. 211 */ 212 for (auto &it : entry.lowerable_children) 213 _mesa_set_add(lowerable_rvalues, it); 214} 215 216void 217find_lowerable_rvalues_visitor::pop_stack_entry() 218{ 219 const stack_entry &entry = stack.back(); 220 221 if (stack.size() >= 2) { 222 /* Combine this state into the parent state, unless the parent operation 223 * doesn’t have any relation to the child operations 224 */ 225 stack_entry &parent = stack.end()[-2]; 226 parent_relation rel = get_parent_relation(parent.instr, entry.instr); 227 228 if (rel == COMBINED_OPERATION) { 229 switch (entry.state) { 230 case CANT_LOWER: 231 parent.state = CANT_LOWER; 232 break; 233 case SHOULD_LOWER: 234 if (parent.state == UNKNOWN) 235 parent.state = SHOULD_LOWER; 236 break; 237 case UNKNOWN: 238 break; 239 } 240 } 241 } 242 243 if (entry.state == SHOULD_LOWER) { 244 ir_rvalue *rv = entry.instr->as_rvalue(); 245 246 if (rv == NULL) { 247 add_lowerable_children(entry); 248 } else if (stack.size() >= 2) { 249 stack_entry &parent = stack.end()[-2]; 250 251 switch (get_parent_relation(parent.instr, rv)) { 252 case COMBINED_OPERATION: 253 /* We only want to add the toplevel lowerable instructions to the 254 * lowerable set. Therefore if there is a parent then instead of 255 * adding this instruction to the set we will queue depending on 256 * the result of the parent instruction. 257 */ 258 parent.lowerable_children.push_back(entry.instr); 259 break; 260 case INDEPENDENT_OPERATION: 261 _mesa_set_add(lowerable_rvalues, rv); 262 break; 263 } 264 } else { 265 /* This is a toplevel node so add it directly to the lowerable 266 * set. 267 */ 268 _mesa_set_add(lowerable_rvalues, rv); 269 } 270 } else if (entry.state == CANT_LOWER) { 271 add_lowerable_children(entry); 272 } 273 274 stack.pop_back(); 275} 276 277void 278find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir, 279 void *data) 280{ 281 find_lowerable_rvalues_visitor *state = 282 (find_lowerable_rvalues_visitor *) data; 283 284 state->pop_stack_entry(); 285} 286 287enum find_lowerable_rvalues_visitor::can_lower_state 288find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type, 289 int precision) const 290{ 291 if (!can_lower_type(options, type)) 292 return CANT_LOWER; 293 294 switch (precision) { 295 case GLSL_PRECISION_NONE: 296 return UNKNOWN; 297 case GLSL_PRECISION_HIGH: 298 return CANT_LOWER; 299 case GLSL_PRECISION_MEDIUM: 300 case GLSL_PRECISION_LOW: 301 return SHOULD_LOWER; 302 } 303 304 return CANT_LOWER; 305} 306 307enum find_lowerable_rvalues_visitor::parent_relation 308find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent, 309 ir_instruction *child) 310{ 311 /* If the parent is a dereference instruction then the only child could be 312 * for example an array dereference and that should be lowered independently 313 * of the parent. 314 */ 315 if (parent->as_dereference()) 316 return INDEPENDENT_OPERATION; 317 318 /* The precision of texture sampling depend on the precision of the sampler. 319 * The rest of the arguments don’t matter so we can treat it as an 320 * independent operation. 321 */ 322 if (parent->as_texture()) 323 return INDEPENDENT_OPERATION; 324 325 return COMBINED_OPERATION; 326} 327 328ir_visitor_status 329find_lowerable_rvalues_visitor::visit(ir_constant *ir) 330{ 331 stack_enter(ir, this); 332 333 if (!can_lower_type(options, ir->type)) 334 stack.back().state = CANT_LOWER; 335 336 stack_leave(ir, this); 337 338 return visit_continue; 339} 340 341ir_visitor_status 342find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir) 343{ 344 stack_enter(ir, this); 345 346 if (stack.back().state == UNKNOWN) 347 stack.back().state = handle_precision(ir->type, ir->precision()); 348 349 stack_leave(ir, this); 350 351 return visit_continue; 352} 353 354ir_visitor_status 355find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir) 356{ 357 ir_hierarchical_visitor::visit_enter(ir); 358 359 if (stack.back().state == UNKNOWN) 360 stack.back().state = handle_precision(ir->type, ir->precision()); 361 362 return visit_continue; 363} 364 365ir_visitor_status 366find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir) 367{ 368 ir_hierarchical_visitor::visit_enter(ir); 369 370 if (stack.back().state == UNKNOWN) 371 stack.back().state = handle_precision(ir->type, ir->precision()); 372 373 return visit_continue; 374} 375 376ir_visitor_status 377find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir) 378{ 379 ir_hierarchical_visitor::visit_enter(ir); 380 381 /* The precision of the sample value depends on the precision of the 382 * sampler. 383 */ 384 stack.back().state = handle_precision(ir->type, 385 ir->sampler->precision()); 386 return visit_continue; 387} 388 389ir_visitor_status 390find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir) 391{ 392 ir_hierarchical_visitor::visit_enter(ir); 393 394 if (!can_lower_type(options, ir->type)) 395 stack.back().state = CANT_LOWER; 396 397 /* Don't lower precision for derivative calculations */ 398 if (!options->LowerPrecisionDerivatives && 399 (ir->operation == ir_unop_dFdx || 400 ir->operation == ir_unop_dFdx_coarse || 401 ir->operation == ir_unop_dFdx_fine || 402 ir->operation == ir_unop_dFdy || 403 ir->operation == ir_unop_dFdy_coarse || 404 ir->operation == ir_unop_dFdy_fine)) { 405 stack.back().state = CANT_LOWER; 406 } 407 408 return visit_continue; 409} 410 411static bool 412function_always_returns_mediump_or_lowp(const char *name) 413{ 414 return !strcmp(name, "bitCount") || 415 !strcmp(name, "findLSB") || 416 !strcmp(name, "findMSB") || 417 !strcmp(name, "unpackHalf2x16") || 418 !strcmp(name, "unpackUnorm4x8") || 419 !strcmp(name, "unpackSnorm4x8"); 420} 421 422static unsigned 423handle_call(ir_call *ir, const struct set *lowerable_rvalues) 424{ 425 /* The intrinsic call is inside the wrapper imageLoad function that will 426 * be inlined. We have to handle both of them. 427 */ 428 if (ir->callee->intrinsic_id == ir_intrinsic_image_load || 429 (ir->callee->is_builtin() && 430 !strcmp(ir->callee_name(), "imageLoad"))) { 431 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head(); 432 ir_variable *resource = param->variable_referenced(); 433 434 assert(ir->callee->return_precision == GLSL_PRECISION_NONE); 435 assert(resource->type->without_array()->is_image()); 436 437 /* GLSL ES 3.20 requires that images have a precision modifier, but if 438 * you set one, it doesn't do anything, because all intrinsics are 439 * defined with highp. This seems to be a spec bug. 440 * 441 * In theory we could set the return value to mediump if the image 442 * format has a lower precision. This appears to be the most sensible 443 * thing to do. 444 */ 445 const struct util_format_description *desc = 446 util_format_description(resource->data.image_format); 447 int i = 448 util_format_get_first_non_void_channel(resource->data.image_format); 449 bool mediump; 450 451 assert(i >= 0); 452 453 if (desc->channel[i].pure_integer || 454 desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) 455 mediump = desc->channel[i].size <= 16; 456 else 457 mediump = desc->channel[i].size <= 10; /* unorm/snorm */ 458 459 return mediump ? GLSL_PRECISION_MEDIUM : GLSL_PRECISION_HIGH; 460 } 461 462 /* Return the declared precision for user-defined functions. */ 463 if (!ir->callee->is_builtin()) 464 return ir->callee->return_precision; 465 466 /* Handle special calls. */ 467 if (ir->callee->is_builtin() && ir->actual_parameters.length()) { 468 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head(); 469 ir_variable *var = param->variable_referenced(); 470 471 /* Handle builtin wrappers around ir_texture opcodes. These wrappers will 472 * be inlined by lower_precision() if we return true here, so that we can 473 * get to ir_texture later and do proper lowering. 474 * 475 * We should lower the type of the return value if the sampler type 476 * uses lower precision. The function parameters don't matter. 477 */ 478 if (var && var->type->without_array()->is_sampler()) { 479 /* textureSize always returns highp. */ 480 if (!strcmp(ir->callee_name(), "textureSize")) 481 return GLSL_PRECISION_HIGH; 482 483 /* textureGatherOffsets always takes a highp array of constants. As 484 * per the discussion https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16547#note_1393704 485 * trying to lower the precision results in segfault later on 486 * in the compiler as textureGatherOffsets will end up being passed 487 * a temp when its expecting a constant as required by the spec. 488 */ 489 if (!strcmp(ir->callee_name(), "textureGatherOffsets")) 490 return GLSL_PRECISION_HIGH; 491 492 return var->data.precision; 493 } 494 } 495 496 if (/* Parameters are always highp: */ 497 !strcmp(ir->callee_name(), "floatBitsToInt") || 498 !strcmp(ir->callee_name(), "floatBitsToUint") || 499 !strcmp(ir->callee_name(), "intBitsToFloat") || 500 !strcmp(ir->callee_name(), "uintBitsToFloat") || 501 !strcmp(ir->callee_name(), "bitfieldReverse") || 502 !strcmp(ir->callee_name(), "frexp") || 503 !strcmp(ir->callee_name(), "ldexp") || 504 /* Parameters and outputs are always highp: */ 505 /* TODO: The operations are highp, but carry and borrow outputs are lowp. */ 506 !strcmp(ir->callee_name(), "uaddCarry") || 507 !strcmp(ir->callee_name(), "usubBorrow") || 508 !strcmp(ir->callee_name(), "imulExtended") || 509 !strcmp(ir->callee_name(), "umulExtended") || 510 !strcmp(ir->callee_name(), "unpackUnorm2x16") || 511 !strcmp(ir->callee_name(), "unpackSnorm2x16") || 512 /* Outputs are highp: */ 513 !strcmp(ir->callee_name(), "packUnorm2x16") || 514 !strcmp(ir->callee_name(), "packSnorm2x16") || 515 /* Parameters are mediump and outputs are highp. The parameters should 516 * be optimized in NIR, not here, e.g: 517 * - packHalf2x16 can just be a bitcast from f16vec2 to uint32 518 * - Other opcodes don't have to convert parameters to highp if the hw 519 * has f16 versions. Optimize in NIR accordingly. 520 */ 521 !strcmp(ir->callee_name(), "packHalf2x16") || 522 !strcmp(ir->callee_name(), "packUnorm4x8") || 523 !strcmp(ir->callee_name(), "packSnorm4x8") || 524 /* Atomic functions are not lowered. */ 525 strstr(ir->callee_name(), "atomic") == ir->callee_name()) 526 return GLSL_PRECISION_HIGH; 527 528 assert(ir->callee->return_precision == GLSL_PRECISION_NONE); 529 530 /* Number of parameters to check if they are lowerable. */ 531 unsigned check_parameters = ir->actual_parameters.length(); 532 533 /* Interpolation functions only consider the precision of the interpolant. */ 534 /* Bitfield functions ignore the precision of "offset" and "bits". */ 535 if (!strcmp(ir->callee_name(), "interpolateAtOffset") || 536 !strcmp(ir->callee_name(), "interpolateAtSample") || 537 !strcmp(ir->callee_name(), "bitfieldExtract")) { 538 check_parameters = 1; 539 } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) { 540 check_parameters = 2; 541 } if (function_always_returns_mediump_or_lowp(ir->callee_name())) { 542 /* These only lower the return value. Parameters keep their precision, 543 * which is preserved in map_builtin. 544 */ 545 check_parameters = 0; 546 } 547 548 /* If the call is to a builtin, then the function won’t have a return 549 * precision and we should determine it from the precision of the arguments. 550 */ 551 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { 552 if (!check_parameters) 553 break; 554 555 if (!param->as_constant() && 556 _mesa_set_search(lowerable_rvalues, param) == NULL) 557 return GLSL_PRECISION_HIGH; 558 559 --check_parameters; 560 } 561 562 return GLSL_PRECISION_MEDIUM; 563} 564 565ir_visitor_status 566find_lowerable_rvalues_visitor::visit_leave(ir_call *ir) 567{ 568 ir_hierarchical_visitor::visit_leave(ir); 569 570 /* Special case for handling temporary variables generated by the compiler 571 * for function calls. If we assign to one of these using a function call 572 * that has a lowerable return type then we can assume the temporary 573 * variable should have a medium precision too. 574 */ 575 576 /* Do nothing if the return type is void. */ 577 if (!ir->return_deref) 578 return visit_continue; 579 580 ir_variable *var = ir->return_deref->variable_referenced(); 581 582 assert(var->data.mode == ir_var_temporary); 583 584 unsigned return_precision = handle_call(ir, lowerable_rvalues); 585 586 can_lower_state lower_state = 587 handle_precision(var->type, return_precision); 588 589 if (lower_state == SHOULD_LOWER) { 590 /* There probably shouldn’t be any situations where multiple ir_call 591 * instructions write to the same temporary? 592 */ 593 assert(var->data.precision == GLSL_PRECISION_NONE); 594 var->data.precision = GLSL_PRECISION_MEDIUM; 595 } else { 596 var->data.precision = GLSL_PRECISION_HIGH; 597 } 598 599 return visit_continue; 600} 601 602ir_visitor_status 603find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir) 604{ 605 ir_hierarchical_visitor::visit_leave(ir); 606 607 /* Special case for handling temporary variables generated by the compiler. 608 * If we assign to one of these using a lowered precision then we can assume 609 * the temporary variable should have a medium precision too. 610 */ 611 ir_variable *var = ir->lhs->variable_referenced(); 612 613 if (var->data.mode == ir_var_temporary) { 614 if (_mesa_set_search(lowerable_rvalues, ir->rhs)) { 615 /* Only override the precision if this is the first assignment. For 616 * temporaries such as the ones generated for the ?: operator there 617 * can be multiple assignments with different precisions. This way we 618 * get the highest precision of all of the assignments. 619 */ 620 if (var->data.precision == GLSL_PRECISION_NONE) 621 var->data.precision = GLSL_PRECISION_MEDIUM; 622 } else if (!ir->rhs->as_constant()) { 623 var->data.precision = GLSL_PRECISION_HIGH; 624 } 625 } 626 627 return visit_continue; 628} 629 630void 631find_lowerable_rvalues(const struct gl_shader_compiler_options *options, 632 exec_list *instructions, 633 struct set *result) 634{ 635 find_lowerable_rvalues_visitor v(result, options); 636 637 visit_list_elements(&v, instructions); 638 639 assert(v.stack.empty()); 640} 641 642static const glsl_type * 643convert_type(bool up, const glsl_type *type) 644{ 645 if (type->is_array()) { 646 return glsl_type::get_array_instance(convert_type(up, type->fields.array), 647 type->array_size(), 648 type->explicit_stride); 649 } 650 651 glsl_base_type new_base_type; 652 653 if (up) { 654 switch (type->base_type) { 655 case GLSL_TYPE_FLOAT16: 656 new_base_type = GLSL_TYPE_FLOAT; 657 break; 658 case GLSL_TYPE_INT16: 659 new_base_type = GLSL_TYPE_INT; 660 break; 661 case GLSL_TYPE_UINT16: 662 new_base_type = GLSL_TYPE_UINT; 663 break; 664 default: 665 unreachable("invalid type"); 666 return NULL; 667 } 668 } else { 669 switch (type->base_type) { 670 case GLSL_TYPE_FLOAT: 671 new_base_type = GLSL_TYPE_FLOAT16; 672 break; 673 case GLSL_TYPE_INT: 674 new_base_type = GLSL_TYPE_INT16; 675 break; 676 case GLSL_TYPE_UINT: 677 new_base_type = GLSL_TYPE_UINT16; 678 break; 679 default: 680 unreachable("invalid type"); 681 return NULL; 682 } 683 } 684 685 return glsl_type::get_instance(new_base_type, 686 type->vector_elements, 687 type->matrix_columns, 688 type->explicit_stride, 689 type->interface_row_major); 690} 691 692static const glsl_type * 693lower_glsl_type(const glsl_type *type) 694{ 695 return convert_type(false, type); 696} 697 698static ir_rvalue * 699convert_precision(bool up, ir_rvalue *ir) 700{ 701 unsigned op; 702 703 if (up) { 704 switch (ir->type->base_type) { 705 case GLSL_TYPE_FLOAT16: 706 op = ir_unop_f162f; 707 break; 708 case GLSL_TYPE_INT16: 709 op = ir_unop_i2i; 710 break; 711 case GLSL_TYPE_UINT16: 712 op = ir_unop_u2u; 713 break; 714 default: 715 unreachable("invalid type"); 716 return NULL; 717 } 718 } else { 719 switch (ir->type->base_type) { 720 case GLSL_TYPE_FLOAT: 721 op = ir_unop_f2fmp; 722 break; 723 case GLSL_TYPE_INT: 724 op = ir_unop_i2imp; 725 break; 726 case GLSL_TYPE_UINT: 727 op = ir_unop_u2ump; 728 break; 729 default: 730 unreachable("invalid type"); 731 return NULL; 732 } 733 } 734 735 const glsl_type *desired_type = convert_type(up, ir->type); 736 void *mem_ctx = ralloc_parent(ir); 737 return new(mem_ctx) ir_expression(op, desired_type, ir, NULL); 738} 739 740void 741lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue) 742{ 743 ir_rvalue *ir = *rvalue; 744 745 if (ir == NULL) 746 return; 747 748 if (ir->as_dereference()) { 749 if (!ir->type->is_boolean()) 750 *rvalue = convert_precision(false, ir); 751 } else if (ir->type->is_32bit()) { 752 ir->type = lower_glsl_type(ir->type); 753 754 ir_constant *const_ir = ir->as_constant(); 755 756 if (const_ir) { 757 ir_constant_data value; 758 759 if (ir->type->base_type == GLSL_TYPE_FLOAT16) { 760 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++) 761 value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]); 762 } else if (ir->type->base_type == GLSL_TYPE_INT16) { 763 for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++) 764 value.i16[i] = const_ir->value.i[i]; 765 } else if (ir->type->base_type == GLSL_TYPE_UINT16) { 766 for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++) 767 value.u16[i] = const_ir->value.u[i]; 768 } else { 769 unreachable("invalid type"); 770 } 771 772 const_ir->value = value; 773 } 774 } 775} 776 777ir_visitor_status 778lower_precision_visitor::visit_enter(ir_dereference_record *ir) 779{ 780 /* We don’t want to lower the variable */ 781 return visit_continue_with_parent; 782} 783 784ir_visitor_status 785lower_precision_visitor::visit_enter(ir_dereference_array *ir) 786{ 787 /* We don’t want to convert the array index or the variable. If the array 788 * index itself is lowerable that will be handled separately. 789 */ 790 return visit_continue_with_parent; 791} 792 793ir_visitor_status 794lower_precision_visitor::visit_enter(ir_call *ir) 795{ 796 /* We don’t want to convert the arguments. These will be handled separately. 797 */ 798 return visit_continue_with_parent; 799} 800 801ir_visitor_status 802lower_precision_visitor::visit_enter(ir_texture *ir) 803{ 804 /* We don’t want to convert the arguments. These will be handled separately. 805 */ 806 return visit_continue_with_parent; 807} 808 809ir_visitor_status 810lower_precision_visitor::visit_leave(ir_expression *ir) 811{ 812 ir_rvalue_visitor::visit_leave(ir); 813 814 /* If the expression is a conversion operation to or from bool then fix the 815 * operation. 816 */ 817 switch (ir->operation) { 818 case ir_unop_b2f: 819 ir->operation = ir_unop_b2f16; 820 break; 821 case ir_unop_f2b: 822 ir->operation = ir_unop_f162b; 823 break; 824 case ir_unop_b2i: 825 case ir_unop_i2b: 826 /* Nothing to do - they both support int16. */ 827 break; 828 default: 829 break; 830 } 831 832 return visit_continue; 833} 834 835void 836find_precision_visitor::handle_rvalue(ir_rvalue **rvalue) 837{ 838 /* Checking the precision of rvalue can be lowered first throughout 839 * find_lowerable_rvalues_visitor. 840 * Once it found the precision of rvalue can be lowered, then we can 841 * add conversion f2fmp, etc. through lower_precision_visitor. 842 */ 843 if (*rvalue == NULL) 844 return; 845 846 struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue); 847 848 if (!entry) 849 return; 850 851 _mesa_set_remove(lowerable_rvalues, entry); 852 853 /* If the entire expression is just a variable dereference then trying to 854 * lower it will just directly add pointless to and from conversions without 855 * any actual operation in-between. Although these will eventually get 856 * optimised out, avoiding generating them here also avoids breaking inout 857 * parameters to functions. 858 */ 859 if ((*rvalue)->as_dereference()) 860 return; 861 862 lower_precision_visitor v; 863 864 (*rvalue)->accept(&v); 865 v.handle_rvalue(rvalue); 866 867 /* We don’t need to add the final conversion if the final type has been 868 * converted to bool 869 */ 870 if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) { 871 *rvalue = convert_precision(true, *rvalue); 872 } 873} 874 875ir_visitor_status 876find_precision_visitor::visit_enter(ir_call *ir) 877{ 878 ir_rvalue_enter_visitor::visit_enter(ir); 879 880 ir_variable *return_var = 881 ir->return_deref ? ir->return_deref->variable_referenced() : NULL; 882 883 /* Don't do anything for image_load here. We have only changed the return 884 * value to mediump/lowp, so that following instructions can use reduced 885 * precision. 886 * 887 * The return value type of the intrinsic itself isn't changed here, but 888 * can be changed in NIR if all users use the *2*mp opcode. 889 */ 890 if (ir->callee->intrinsic_id == ir_intrinsic_image_load) 891 return visit_continue; 892 893 /* If this is a call to a builtin and the find_lowerable_rvalues_visitor 894 * overrode the precision of the temporary return variable, then we can 895 * replace the builtin implementation with a lowered version. 896 */ 897 898 if (!ir->callee->is_builtin() || 899 ir->callee->is_intrinsic() || 900 return_var == NULL || 901 (return_var->data.precision != GLSL_PRECISION_MEDIUM && 902 return_var->data.precision != GLSL_PRECISION_LOW)) 903 return visit_continue; 904 905 ir->callee = map_builtin(ir->callee); 906 ir->generate_inline(ir); 907 ir->remove(); 908 909 return visit_continue_with_parent; 910} 911 912ir_function_signature * 913find_precision_visitor::map_builtin(ir_function_signature *sig) 914{ 915 if (lowered_builtins == NULL) { 916 lowered_builtins = _mesa_pointer_hash_table_create(NULL); 917 clone_ht =_mesa_pointer_hash_table_create(NULL); 918 lowered_builtin_mem_ctx = ralloc_context(NULL); 919 } else { 920 struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig); 921 if (entry) 922 return (ir_function_signature *) entry->data; 923 } 924 925 ir_function_signature *lowered_sig = 926 sig->clone(lowered_builtin_mem_ctx, clone_ht); 927 928 /* Functions that always return mediump or lowp should keep their 929 * parameters intact, because they can be highp. NIR can lower 930 * the up-conversion for parameters if needed. 931 */ 932 if (!function_always_returns_mediump_or_lowp(sig->function_name())) { 933 foreach_in_list(ir_variable, param, &lowered_sig->parameters) { 934 param->data.precision = GLSL_PRECISION_MEDIUM; 935 } 936 } 937 938 lower_precision(options, &lowered_sig->body); 939 940 _mesa_hash_table_clear(clone_ht, NULL); 941 942 _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig); 943 944 return lowered_sig; 945} 946 947find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options) 948 : lowerable_rvalues(_mesa_pointer_set_create(NULL)), 949 lowered_builtins(NULL), 950 clone_ht(NULL), 951 lowered_builtin_mem_ctx(NULL), 952 options(options) 953{ 954} 955 956find_precision_visitor::~find_precision_visitor() 957{ 958 _mesa_set_destroy(lowerable_rvalues, NULL); 959 960 if (lowered_builtins) { 961 _mesa_hash_table_destroy(lowered_builtins, NULL); 962 _mesa_hash_table_destroy(clone_ht, NULL); 963 ralloc_free(lowered_builtin_mem_ctx); 964 } 965} 966 967/* Lowering opcodes to 16 bits is not enough for programs with control flow 968 * (and the ?: operator, which is represented by if-then-else in the IR), 969 * because temporary variables, which are used for passing values between 970 * code blocks, are not lowered, resulting in 32-bit phis in NIR. 971 * 972 * First change the variable types to 16 bits, then change all ir_dereference 973 * types to 16 bits. 974 */ 975class lower_variables_visitor : public ir_rvalue_enter_visitor { 976public: 977 lower_variables_visitor(const struct gl_shader_compiler_options *options) 978 : options(options) { 979 lower_vars = _mesa_pointer_set_create(NULL); 980 } 981 982 virtual ~lower_variables_visitor() 983 { 984 _mesa_set_destroy(lower_vars, NULL); 985 } 986 987 virtual ir_visitor_status visit(ir_variable *var); 988 virtual ir_visitor_status visit_enter(ir_assignment *ir); 989 virtual ir_visitor_status visit_enter(ir_return *ir); 990 virtual ir_visitor_status visit_enter(ir_call *ir); 991 virtual void handle_rvalue(ir_rvalue **rvalue); 992 993 void fix_types_in_deref_chain(ir_dereference *ir); 994 void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs, 995 bool insert_before); 996 997 const struct gl_shader_compiler_options *options; 998 set *lower_vars; 999}; 1000 1001static void 1002lower_constant(ir_constant *ir) 1003{ 1004 if (ir->type->is_array()) { 1005 for (int i = 0; i < ir->type->array_size(); i++) 1006 lower_constant(ir->get_array_element(i)); 1007 1008 ir->type = lower_glsl_type(ir->type); 1009 return; 1010 } 1011 1012 ir->type = lower_glsl_type(ir->type); 1013 ir_constant_data value; 1014 1015 if (ir->type->base_type == GLSL_TYPE_FLOAT16) { 1016 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++) 1017 value.f16[i] = _mesa_float_to_half(ir->value.f[i]); 1018 } else if (ir->type->base_type == GLSL_TYPE_INT16) { 1019 for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++) 1020 value.i16[i] = ir->value.i[i]; 1021 } else if (ir->type->base_type == GLSL_TYPE_UINT16) { 1022 for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++) 1023 value.u16[i] = ir->value.u[i]; 1024 } else { 1025 unreachable("invalid type"); 1026 } 1027 1028 ir->value = value; 1029} 1030 1031ir_visitor_status 1032lower_variables_visitor::visit(ir_variable *var) 1033{ 1034 if ((var->data.mode != ir_var_temporary && 1035 var->data.mode != ir_var_auto && 1036 /* Lower uniforms but not UBOs. */ 1037 (var->data.mode != ir_var_uniform || 1038 var->is_in_buffer_block() || 1039 !(options->LowerPrecisionFloat16Uniforms && 1040 var->type->without_array()->base_type == GLSL_TYPE_FLOAT))) || 1041 !var->type->without_array()->is_32bit() || 1042 (var->data.precision != GLSL_PRECISION_MEDIUM && 1043 var->data.precision != GLSL_PRECISION_LOW) || 1044 !can_lower_type(options, var->type)) 1045 return visit_continue; 1046 1047 /* Lower constant initializers. */ 1048 if (var->constant_value && 1049 var->type == var->constant_value->type) { 1050 if (!options->LowerPrecisionConstants) 1051 return visit_continue; 1052 var->constant_value = 1053 var->constant_value->clone(ralloc_parent(var), NULL); 1054 lower_constant(var->constant_value); 1055 } 1056 1057 if (var->constant_initializer && 1058 var->type == var->constant_initializer->type) { 1059 if (!options->LowerPrecisionConstants) 1060 return visit_continue; 1061 var->constant_initializer = 1062 var->constant_initializer->clone(ralloc_parent(var), NULL); 1063 lower_constant(var->constant_initializer); 1064 } 1065 1066 var->type = lower_glsl_type(var->type); 1067 _mesa_set_add(lower_vars, var); 1068 1069 return visit_continue; 1070} 1071 1072void 1073lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir) 1074{ 1075 assert(ir->type->without_array()->is_32bit()); 1076 assert(_mesa_set_search(lower_vars, ir->variable_referenced())); 1077 1078 /* Fix the type in the dereference node. */ 1079 ir->type = lower_glsl_type(ir->type); 1080 1081 /* If it's an array, fix the types in the whole dereference chain. */ 1082 for (ir_dereference_array *deref_array = ir->as_dereference_array(); 1083 deref_array; 1084 deref_array = deref_array->array->as_dereference_array()) { 1085 assert(deref_array->array->type->without_array()->is_32bit()); 1086 deref_array->array->type = lower_glsl_type(deref_array->array->type); 1087 } 1088} 1089 1090void 1091lower_variables_visitor::convert_split_assignment(ir_dereference *lhs, 1092 ir_rvalue *rhs, 1093 bool insert_before) 1094{ 1095 void *mem_ctx = ralloc_parent(lhs); 1096 1097 if (lhs->type->is_array()) { 1098 for (unsigned i = 0; i < lhs->type->length; i++) { 1099 ir_dereference *l, *r; 1100 1101 l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL), 1102 new(mem_ctx) ir_constant(i)); 1103 r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL), 1104 new(mem_ctx) ir_constant(i)); 1105 convert_split_assignment(l, r, insert_before); 1106 } 1107 return; 1108 } 1109 1110 assert(lhs->type->is_16bit() || lhs->type->is_32bit()); 1111 assert(rhs->type->is_16bit() || rhs->type->is_32bit()); 1112 assert(lhs->type->is_16bit() != rhs->type->is_16bit()); 1113 1114 ir_assignment *assign = 1115 new(mem_ctx) ir_assignment(lhs, convert_precision(lhs->type->is_32bit(), rhs)); 1116 1117 if (insert_before) 1118 base_ir->insert_before(assign); 1119 else 1120 base_ir->insert_after(assign); 1121} 1122 1123ir_visitor_status 1124lower_variables_visitor::visit_enter(ir_assignment *ir) 1125{ 1126 ir_dereference *lhs = ir->lhs; 1127 ir_variable *var = lhs->variable_referenced(); 1128 ir_dereference *rhs_deref = ir->rhs->as_dereference(); 1129 ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL; 1130 ir_constant *rhs_const = ir->rhs->as_constant(); 1131 1132 /* Legalize array assignments between lowered and non-lowered variables. */ 1133 if (lhs->type->is_array() && 1134 (rhs_var || rhs_const) && 1135 (!rhs_var || 1136 (var && 1137 var->type->without_array()->is_16bit() != 1138 rhs_var->type->without_array()->is_16bit())) && 1139 (!rhs_const || 1140 (var && 1141 var->type->without_array()->is_16bit() && 1142 rhs_const->type->without_array()->is_32bit()))) { 1143 assert(ir->rhs->type->is_array()); 1144 1145 /* Fix array assignments from lowered to non-lowered. */ 1146 if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) { 1147 fix_types_in_deref_chain(rhs_deref); 1148 /* Convert to 32 bits for LHS. */ 1149 convert_split_assignment(lhs, rhs_deref, true); 1150 ir->remove(); 1151 return visit_continue; 1152 } 1153 1154 /* Fix array assignments from non-lowered to lowered. */ 1155 if (var && 1156 _mesa_set_search(lower_vars, var) && 1157 ir->rhs->type->without_array()->is_32bit()) { 1158 fix_types_in_deref_chain(lhs); 1159 /* Convert to 16 bits for LHS. */ 1160 convert_split_assignment(lhs, ir->rhs, true); 1161 ir->remove(); 1162 return visit_continue; 1163 } 1164 } 1165 1166 /* Fix assignment types. */ 1167 if (var && 1168 _mesa_set_search(lower_vars, var)) { 1169 /* Fix the LHS type. */ 1170 if (lhs->type->without_array()->is_32bit()) 1171 fix_types_in_deref_chain(lhs); 1172 1173 /* Fix the RHS type if it's a lowered variable. */ 1174 if (rhs_var && 1175 _mesa_set_search(lower_vars, rhs_var) && 1176 rhs_deref->type->without_array()->is_32bit()) 1177 fix_types_in_deref_chain(rhs_deref); 1178 1179 /* Fix the RHS type if it's a non-array expression. */ 1180 if (ir->rhs->type->is_32bit()) { 1181 ir_expression *expr = ir->rhs->as_expression(); 1182 1183 /* Convert the RHS to the LHS type. */ 1184 if (expr && 1185 (expr->operation == ir_unop_f162f || 1186 expr->operation == ir_unop_i2i || 1187 expr->operation == ir_unop_u2u) && 1188 expr->operands[0]->type->is_16bit()) { 1189 /* If there is an "up" conversion, just remove it. 1190 * This is optional. We could as well execute the else statement and 1191 * let NIR eliminate the up+down conversions. 1192 */ 1193 ir->rhs = expr->operands[0]; 1194 } else { 1195 /* Add a "down" conversion operation to fix the type of RHS. */ 1196 ir->rhs = convert_precision(false, ir->rhs); 1197 } 1198 } 1199 } 1200 1201 return ir_rvalue_enter_visitor::visit_enter(ir); 1202} 1203 1204ir_visitor_status 1205lower_variables_visitor::visit_enter(ir_return *ir) 1206{ 1207 void *mem_ctx = ralloc_parent(ir); 1208 1209 ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL; 1210 if (deref) { 1211 ir_variable *var = deref->variable_referenced(); 1212 1213 /* Fix the type of the return value. */ 1214 if (var && 1215 _mesa_set_search(lower_vars, var) && 1216 deref->type->without_array()->is_32bit()) { 1217 /* Create a 32-bit temporary variable. */ 1218 ir_variable *new_var = 1219 new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary); 1220 base_ir->insert_before(new_var); 1221 1222 /* Fix types in dereferences. */ 1223 fix_types_in_deref_chain(deref); 1224 1225 /* Convert to 32 bits for the return value. */ 1226 convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var), 1227 deref, true); 1228 ir->value = new(mem_ctx) ir_dereference_variable(new_var); 1229 } 1230 } 1231 1232 return ir_rvalue_enter_visitor::visit_enter(ir); 1233} 1234 1235void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue) 1236{ 1237 ir_rvalue *ir = *rvalue; 1238 1239 if (in_assignee || ir == NULL) 1240 return; 1241 1242 ir_expression *expr = ir->as_expression(); 1243 ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL; 1244 1245 /* Remove f2fmp(float16). Same for int16 and uint16. */ 1246 if (expr && 1247 expr_op0_deref && 1248 (expr->operation == ir_unop_f2fmp || 1249 expr->operation == ir_unop_i2imp || 1250 expr->operation == ir_unop_u2ump || 1251 expr->operation == ir_unop_f2f16 || 1252 expr->operation == ir_unop_i2i || 1253 expr->operation == ir_unop_u2u) && 1254 expr->type->without_array()->is_16bit() && 1255 expr_op0_deref->type->without_array()->is_32bit() && 1256 expr_op0_deref->variable_referenced() && 1257 _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) { 1258 fix_types_in_deref_chain(expr_op0_deref); 1259 1260 /* Remove f2fmp/i2imp/u2ump. */ 1261 *rvalue = expr_op0_deref; 1262 return; 1263 } 1264 1265 ir_dereference *deref = ir->as_dereference(); 1266 1267 if (deref) { 1268 ir_variable *var = deref->variable_referenced(); 1269 1270 /* var can be NULL if we are dereferencing ir_constant. */ 1271 if (var && 1272 _mesa_set_search(lower_vars, var) && 1273 deref->type->without_array()->is_32bit()) { 1274 void *mem_ctx = ralloc_parent(ir); 1275 1276 /* Create a 32-bit temporary variable. */ 1277 ir_variable *new_var = 1278 new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary); 1279 base_ir->insert_before(new_var); 1280 1281 /* Fix types in dereferences. */ 1282 fix_types_in_deref_chain(deref); 1283 1284 /* Convert to 32 bits for the rvalue. */ 1285 convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var), 1286 deref, true); 1287 *rvalue = new(mem_ctx) ir_dereference_variable(new_var); 1288 } 1289 } 1290} 1291 1292ir_visitor_status 1293lower_variables_visitor::visit_enter(ir_call *ir) 1294{ 1295 void *mem_ctx = ralloc_parent(ir); 1296 1297 /* We can't pass 16-bit variables as 32-bit inout/out parameters. */ 1298 foreach_two_lists(formal_node, &ir->callee->parameters, 1299 actual_node, &ir->actual_parameters) { 1300 ir_dereference *param_deref = 1301 ((ir_rvalue *)actual_node)->as_dereference(); 1302 ir_variable *param = (ir_variable *)formal_node; 1303 1304 if (!param_deref) 1305 continue; 1306 1307 ir_variable *var = param_deref->variable_referenced(); 1308 1309 /* var can be NULL if we are dereferencing ir_constant. */ 1310 if (var && 1311 _mesa_set_search(lower_vars, var) && 1312 param->type->without_array()->is_32bit()) { 1313 fix_types_in_deref_chain(param_deref); 1314 1315 /* Create a 32-bit temporary variable for the parameter. */ 1316 ir_variable *new_var = 1317 new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary); 1318 base_ir->insert_before(new_var); 1319 1320 /* Replace the parameter. */ 1321 actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var)); 1322 1323 if (param->data.mode == ir_var_function_in || 1324 param->data.mode == ir_var_function_inout) { 1325 /* Convert to 32 bits for passing in. */ 1326 convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var), 1327 param_deref->clone(mem_ctx, NULL), true); 1328 } 1329 if (param->data.mode == ir_var_function_out || 1330 param->data.mode == ir_var_function_inout) { 1331 /* Convert to 16 bits after returning. */ 1332 convert_split_assignment(param_deref, 1333 new(mem_ctx) ir_dereference_variable(new_var), 1334 false); 1335 } 1336 } 1337 } 1338 1339 /* Fix the type of return value dereferencies. */ 1340 ir_dereference_variable *ret_deref = ir->return_deref; 1341 ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL; 1342 1343 if (ret_var && 1344 _mesa_set_search(lower_vars, ret_var) && 1345 ret_deref->type->without_array()->is_32bit()) { 1346 /* Create a 32-bit temporary variable. */ 1347 ir_variable *new_var = 1348 new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp", 1349 ir_var_temporary); 1350 base_ir->insert_before(new_var); 1351 1352 /* Replace the return variable. */ 1353 ret_deref->var = new_var; 1354 1355 /* Convert to 16 bits after returning. */ 1356 convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var), 1357 new(mem_ctx) ir_dereference_variable(new_var), 1358 false); 1359 } 1360 1361 return ir_rvalue_enter_visitor::visit_enter(ir); 1362} 1363 1364} 1365 1366void 1367lower_precision(const struct gl_shader_compiler_options *options, 1368 exec_list *instructions) 1369{ 1370 find_precision_visitor v(options); 1371 find_lowerable_rvalues(options, instructions, v.lowerable_rvalues); 1372 visit_list_elements(&v, instructions); 1373 1374 lower_variables_visitor vars(options); 1375 visit_list_elements(&vars, instructions); 1376} 1377