1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file opt_copy_propagation_elements.cpp 26 * 27 * Replaces usage of recently-copied components of variables with the 28 * previous copy of the variable. 29 * 30 * This should reduce the number of MOV instructions in the generated 31 * programs and help triggering other optimizations that live in GLSL 32 * level. 33 */ 34 35#include "ir.h" 36#include "ir_rvalue_visitor.h" 37#include "ir_basic_block.h" 38#include "ir_optimization.h" 39#include "compiler/glsl_types.h" 40#include "util/hash_table.h" 41#include "util/set.h" 42 43static bool debug = false; 44 45namespace { 46 47class acp_entry 48{ 49public: 50 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry) 51 52 /* If set, rhs_full indicates that this ACP entry represents a 53 * whole-variable copy. The rhs_element[] array will still be filled, 54 * to allow the swizzling from its components in case the variable 55 * was a vector (and to simplify some of the erase() and write_vector() 56 * logic). 57 */ 58 59 ir_variable *rhs_full; 60 ir_variable *rhs_element[4]; 61 unsigned rhs_channel[4]; 62 63 /* Set of variables that use the variable associated with this acp_entry as 64 * RHS. This has the "reverse references" of rhs_full/rhs_element. It is 65 * used to speed up invalidating those references when the acp_entry 66 * changes. 67 */ 68 set *dsts; 69}; 70 71class copy_propagation_state { 72public: 73 DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state); 74 75 static 76 copy_propagation_state* create(void *mem_ctx) 77 { 78 return new (mem_ctx) copy_propagation_state(NULL); 79 } 80 81 copy_propagation_state* clone() 82 { 83 return new (ralloc_parent(this)) copy_propagation_state(this); 84 } 85 86 void erase_all() 87 { 88 /* Individual elements were allocated from a linear allocator, so will 89 * be destroyed when the state is destroyed. 90 */ 91 _mesa_hash_table_clear(acp, NULL); 92 fallback = NULL; 93 } 94 95 void erase(ir_variable *var, unsigned write_mask) 96 { 97 acp_entry *entry = pull_acp(var); 98 entry->rhs_full = NULL; 99 100 for (int i = 0; i < 4; i++) { 101 if (!entry->rhs_element[i]) 102 continue; 103 if ((write_mask & (1 << i)) == 0) 104 continue; 105 106 ir_variable *to_remove = entry->rhs_element[i]; 107 entry->rhs_element[i] = NULL; 108 remove_unused_var_from_dsts(entry, var, to_remove); 109 } 110 111 /* TODO: Check write mask, and possibly not clear everything. */ 112 113 /* For any usage of our variable on the RHS, clear it out. */ 114 set_foreach(entry->dsts, set_entry) { 115 ir_variable *dst_var = (ir_variable *)set_entry->key; 116 acp_entry *dst_entry = pull_acp(dst_var); 117 for (int i = 0; i < 4; i++) { 118 if (dst_entry->rhs_element[i] == var) 119 dst_entry->rhs_element[i] = NULL; 120 } 121 if (dst_entry->rhs_full == var) 122 dst_entry->rhs_full = NULL; 123 _mesa_set_remove(entry->dsts, set_entry); 124 } 125 } 126 127 acp_entry *read(ir_variable *var) 128 { 129 for (copy_propagation_state *s = this; s != NULL; s = s->fallback) { 130 hash_entry *ht_entry = _mesa_hash_table_search(s->acp, var); 131 if (ht_entry) 132 return (acp_entry *) ht_entry->data; 133 } 134 return NULL; 135 } 136 137 void write_elements(ir_variable *lhs, ir_variable *rhs, unsigned write_mask, int swizzle[4]) 138 { 139 acp_entry *lhs_entry = pull_acp(lhs); 140 lhs_entry->rhs_full = NULL; 141 142 for (int i = 0; i < 4; i++) { 143 if ((write_mask & (1 << i)) == 0) 144 continue; 145 ir_variable *to_remove = lhs_entry->rhs_element[i]; 146 lhs_entry->rhs_element[i] = rhs; 147 lhs_entry->rhs_channel[i] = swizzle[i]; 148 149 remove_unused_var_from_dsts(lhs_entry, lhs, to_remove); 150 } 151 152 acp_entry *rhs_entry = pull_acp(rhs); 153 _mesa_set_add(rhs_entry->dsts, lhs); 154 } 155 156 void write_full(ir_variable *lhs, ir_variable *rhs) 157 { 158 acp_entry *lhs_entry = pull_acp(lhs); 159 if (lhs_entry->rhs_full == rhs) 160 return; 161 162 if (lhs_entry->rhs_full) { 163 remove_from_dsts(lhs_entry->rhs_full, lhs); 164 } else if (lhs->type->is_vector()) { 165 for (int i = 0; i < 4; i++) { 166 if (lhs_entry->rhs_element[i]) 167 remove_from_dsts(lhs_entry->rhs_element[i], lhs); 168 } 169 } 170 171 lhs_entry->rhs_full = rhs; 172 acp_entry *rhs_entry = pull_acp(rhs); 173 _mesa_set_add(rhs_entry->dsts, lhs); 174 175 if (lhs->type->is_vector()) { 176 for (int i = 0; i < 4; i++) { 177 lhs_entry->rhs_element[i] = rhs; 178 lhs_entry->rhs_channel[i] = i; 179 } 180 } 181 } 182 183 void remove_unused_var_from_dsts(acp_entry *lhs_entry, ir_variable *lhs, ir_variable *var) 184 { 185 if (!var) 186 return; 187 188 /* If lhs still uses var, don't remove anything. */ 189 for (int j = 0; j < 4; j++) { 190 if (lhs_entry->rhs_element[j] == var) 191 return; 192 } 193 194 acp_entry *element = pull_acp(var); 195 assert(element); 196 _mesa_set_remove_key(element->dsts, lhs); 197 } 198 199private: 200 explicit copy_propagation_state(copy_propagation_state *fallback) 201 { 202 this->fallback = fallback; 203 /* Use 'this' as context for the table, no explicit destruction 204 * needed later. 205 */ 206 acp = _mesa_pointer_hash_table_create(this); 207 lin_ctx = linear_alloc_parent(this, 0); 208 } 209 210 acp_entry *pull_acp(ir_variable *var) 211 { 212 hash_entry *ht_entry = _mesa_hash_table_search(acp, var); 213 if (ht_entry) 214 return (acp_entry *) ht_entry->data; 215 216 /* If not found, create one and copy data from fallback if available. */ 217 acp_entry *entry = new(lin_ctx) acp_entry(); 218 _mesa_hash_table_insert(acp, var, entry); 219 220 bool found = false; 221 for (copy_propagation_state *s = fallback; s != NULL; s = s->fallback) { 222 hash_entry *fallback_ht_entry = _mesa_hash_table_search(s->acp, var); 223 if (fallback_ht_entry) { 224 acp_entry *fallback_entry = (acp_entry *) fallback_ht_entry->data; 225 *entry = *fallback_entry; 226 entry->dsts = _mesa_set_clone(fallback_entry->dsts, this); 227 found = true; 228 break; 229 } 230 } 231 232 if (!found) { 233 entry->dsts = _mesa_pointer_set_create(this); 234 } 235 236 return entry; 237 } 238 239 void 240 remove_from_dsts(ir_variable *var, ir_variable *to_remove) 241 { 242 acp_entry *entry = pull_acp(var); 243 assert(entry); 244 _mesa_set_remove_key(entry->dsts, to_remove); 245 } 246 247 /** Available Copy to Propagate table, from variable to the entry 248 * containing the current sources that can be used. */ 249 hash_table *acp; 250 251 /** When a state is cloned, entries are copied on demand from fallback. */ 252 copy_propagation_state *fallback; 253 254 void *lin_ctx; 255}; 256 257class kill_entry : public exec_node 258{ 259public: 260 /* override operator new from exec_node */ 261 DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry) 262 263 kill_entry(ir_variable *var, int write_mask) 264 { 265 this->var = var; 266 this->write_mask = write_mask; 267 } 268 269 ir_variable *var; 270 unsigned int write_mask; 271}; 272 273class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor { 274public: 275 ir_copy_propagation_elements_visitor() 276 { 277 this->progress = false; 278 this->killed_all = false; 279 this->mem_ctx = ralloc_context(NULL); 280 this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0); 281 this->shader_mem_ctx = NULL; 282 this->kills = new(mem_ctx) exec_list; 283 this->state = copy_propagation_state::create(mem_ctx); 284 } 285 ~ir_copy_propagation_elements_visitor() 286 { 287 ralloc_free(mem_ctx); 288 } 289 290 virtual ir_visitor_status visit(ir_dereference_variable *); 291 292 void handle_loop(ir_loop *, bool keep_acp); 293 virtual ir_visitor_status visit_enter(class ir_loop *); 294 virtual ir_visitor_status visit_enter(class ir_function_signature *); 295 virtual ir_visitor_status visit_leave(class ir_assignment *); 296 virtual ir_visitor_status visit_enter(class ir_call *); 297 virtual ir_visitor_status visit_enter(class ir_if *); 298 virtual ir_visitor_status visit_leave(class ir_swizzle *); 299 300 void handle_rvalue(ir_rvalue **rvalue); 301 302 void add_copy(ir_assignment *ir); 303 void kill(kill_entry *k); 304 void handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all); 305 306 copy_propagation_state *state; 307 308 /** 309 * List of kill_entry: The variables whose values were killed in this 310 * block. 311 */ 312 exec_list *kills; 313 314 bool progress; 315 316 bool killed_all; 317 318 /* Context for our local data structures. */ 319 void *mem_ctx; 320 void *lin_ctx; 321 /* Context for allocating new shader nodes. */ 322 void *shader_mem_ctx; 323}; 324 325} /* unnamed namespace */ 326 327ir_visitor_status 328ir_copy_propagation_elements_visitor::visit(ir_dereference_variable *ir) 329{ 330 if (this->in_assignee) 331 return visit_continue; 332 333 const acp_entry *entry = state->read(ir->var); 334 if (entry && entry->rhs_full) { 335 ir->var = (ir_variable *) entry->rhs_full; 336 progress = true; 337 } 338 339 return visit_continue; 340} 341 342ir_visitor_status 343ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir) 344{ 345 /* Treat entry into a function signature as a completely separate 346 * block. Any instructions at global scope will be shuffled into 347 * main() at link time, so they're irrelevant to us. 348 */ 349 exec_list *orig_kills = this->kills; 350 bool orig_killed_all = this->killed_all; 351 352 this->kills = new(mem_ctx) exec_list; 353 this->killed_all = false; 354 355 copy_propagation_state *orig_state = state; 356 this->state = copy_propagation_state::create(mem_ctx); 357 358 visit_list_elements(this, &ir->body); 359 360 delete this->state; 361 this->state = orig_state; 362 363 ralloc_free(this->kills); 364 this->kills = orig_kills; 365 this->killed_all = orig_killed_all; 366 367 return visit_continue_with_parent; 368} 369 370ir_visitor_status 371ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir) 372{ 373 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); 374 ir_variable *var = ir->lhs->variable_referenced(); 375 376 kill_entry *k; 377 378 if (lhs && var->type->is_vector()) 379 k = new(this->lin_ctx) kill_entry(var, ir->write_mask); 380 else 381 k = new(this->lin_ctx) kill_entry(var, ~0); 382 383 kill(k); 384 385 add_copy(ir); 386 387 return visit_continue; 388} 389 390ir_visitor_status 391ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *) 392{ 393 /* Don't visit the values of swizzles since they are handled while 394 * visiting the swizzle itself. 395 */ 396 return visit_continue; 397} 398 399/** 400 * Replaces dereferences of ACP RHS variables with ACP LHS variables. 401 * 402 * This is where the actual copy propagation occurs. Note that the 403 * rewriting of ir_dereference means that the ir_dereference instance 404 * must not be shared by multiple IR operations! 405 */ 406void 407ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir) 408{ 409 int swizzle_chan[4]; 410 ir_dereference_variable *deref_var; 411 ir_variable *source[4] = {NULL, NULL, NULL, NULL}; 412 int source_chan[4] = {0, 0, 0, 0}; 413 int chans; 414 bool noop_swizzle = true; 415 416 if (!*ir) 417 return; 418 419 ir_swizzle *swizzle = (*ir)->as_swizzle(); 420 if (swizzle) { 421 deref_var = swizzle->val->as_dereference_variable(); 422 if (!deref_var) 423 return; 424 425 swizzle_chan[0] = swizzle->mask.x; 426 swizzle_chan[1] = swizzle->mask.y; 427 swizzle_chan[2] = swizzle->mask.z; 428 swizzle_chan[3] = swizzle->mask.w; 429 chans = swizzle->type->vector_elements; 430 } else { 431 deref_var = (*ir)->as_dereference_variable(); 432 if (!deref_var) 433 return; 434 435 swizzle_chan[0] = 0; 436 swizzle_chan[1] = 1; 437 swizzle_chan[2] = 2; 438 swizzle_chan[3] = 3; 439 chans = deref_var->type->vector_elements; 440 } 441 442 if (this->in_assignee) 443 return; 444 445 ir_variable *var = deref_var->var; 446 447 /* Try to find ACP entries covering swizzle_chan[], hoping they're 448 * the same source variable. 449 */ 450 451 const acp_entry *entry = state->read(var); 452 if (entry) { 453 for (int c = 0; c < chans; c++) { 454 unsigned index = swizzle_chan[c]; 455 ir_variable *src = entry->rhs_element[index]; 456 if (!src) 457 continue; 458 source[c] = src; 459 source_chan[c] = entry->rhs_channel[index]; 460 if (source_chan[c] != swizzle_chan[c]) 461 noop_swizzle = false; 462 } 463 } 464 465 /* Make sure all channels are copying from the same source variable. */ 466 if (!source[0]) 467 return; 468 for (int c = 1; c < chans; c++) { 469 if (source[c] != source[0]) 470 return; 471 } 472 473 if (!shader_mem_ctx) 474 shader_mem_ctx = ralloc_parent(deref_var); 475 476 /* Don't pointlessly replace the rvalue with itself (or a noop swizzle 477 * of itself, which would just be deleted by opt_noop_swizzle). 478 */ 479 if (source[0] == var && noop_swizzle) 480 return; 481 482 if (debug) { 483 printf("Copy propagation from:\n"); 484 (*ir)->print(); 485 } 486 487 deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]); 488 *ir = new(shader_mem_ctx) ir_swizzle(deref_var, 489 source_chan[0], 490 source_chan[1], 491 source_chan[2], 492 source_chan[3], 493 chans); 494 progress = true; 495 496 if (debug) { 497 printf("to:\n"); 498 (*ir)->print(); 499 printf("\n"); 500 } 501} 502 503 504ir_visitor_status 505ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir) 506{ 507 /* Do copy propagation on call parameters, but skip any out params */ 508 foreach_two_lists(formal_node, &ir->callee->parameters, 509 actual_node, &ir->actual_parameters) { 510 ir_variable *sig_param = (ir_variable *) formal_node; 511 ir_rvalue *ir = (ir_rvalue *) actual_node; 512 if (sig_param->data.mode != ir_var_function_out 513 && sig_param->data.mode != ir_var_function_inout) { 514 ir->accept(this); 515 } 516 } 517 518 if (!ir->callee->is_intrinsic()) { 519 state->erase_all(); 520 this->killed_all = true; 521 } else { 522 if (ir->return_deref) { 523 kill(new(this->lin_ctx) kill_entry(ir->return_deref->var, ~0)); 524 } 525 526 foreach_two_lists(formal_node, &ir->callee->parameters, 527 actual_node, &ir->actual_parameters) { 528 ir_variable *sig_param = (ir_variable *) formal_node; 529 if (sig_param->data.mode == ir_var_function_out || 530 sig_param->data.mode == ir_var_function_inout) { 531 ir_rvalue *ir = (ir_rvalue *) actual_node; 532 ir_variable *var = ir->variable_referenced(); 533 kill(new(this->lin_ctx) kill_entry(var, ~0)); 534 } 535 } 536 } 537 538 return visit_continue_with_parent; 539} 540 541void 542ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all) 543{ 544 exec_list *orig_kills = this->kills; 545 bool orig_killed_all = this->killed_all; 546 547 this->kills = kills; 548 this->killed_all = false; 549 550 /* Populate the initial acp with a copy of the original */ 551 copy_propagation_state *orig_state = state; 552 this->state = orig_state->clone(); 553 554 visit_list_elements(this, instructions); 555 556 delete this->state; 557 this->state = orig_state; 558 559 *killed_all = this->killed_all; 560 this->kills = orig_kills; 561 this->killed_all = orig_killed_all; 562} 563 564ir_visitor_status 565ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir) 566{ 567 ir->condition->accept(this); 568 569 exec_list *new_kills = new(mem_ctx) exec_list; 570 bool then_killed_all = false; 571 bool else_killed_all = false; 572 573 handle_if_block(&ir->then_instructions, new_kills, &then_killed_all); 574 handle_if_block(&ir->else_instructions, new_kills, &else_killed_all); 575 576 if (then_killed_all || else_killed_all) { 577 state->erase_all(); 578 killed_all = true; 579 } else { 580 foreach_in_list_safe(kill_entry, k, new_kills) 581 kill(k); 582 } 583 584 ralloc_free(new_kills); 585 586 /* handle_if_block() already descended into the children. */ 587 return visit_continue_with_parent; 588} 589 590void 591ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp) 592{ 593 exec_list *orig_kills = this->kills; 594 bool orig_killed_all = this->killed_all; 595 596 this->kills = new(mem_ctx) exec_list; 597 this->killed_all = false; 598 599 copy_propagation_state *orig_state = state; 600 601 if (keep_acp) { 602 /* Populate the initial acp with a copy of the original */ 603 this->state = orig_state->clone(); 604 } else { 605 this->state = copy_propagation_state::create(mem_ctx); 606 } 607 608 visit_list_elements(this, &ir->body_instructions); 609 610 delete this->state; 611 this->state = orig_state; 612 613 if (this->killed_all) 614 this->state->erase_all(); 615 616 exec_list *new_kills = this->kills; 617 this->kills = orig_kills; 618 this->killed_all = this->killed_all || orig_killed_all; 619 620 foreach_in_list_safe(kill_entry, k, new_kills) { 621 kill(k); 622 } 623 624 ralloc_free(new_kills); 625} 626 627ir_visitor_status 628ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir) 629{ 630 handle_loop(ir, false); 631 handle_loop(ir, true); 632 633 /* already descended into the children. */ 634 return visit_continue_with_parent; 635} 636 637/* Remove any entries currently in the ACP for this kill. */ 638void 639ir_copy_propagation_elements_visitor::kill(kill_entry *k) 640{ 641 state->erase(k->var, k->write_mask); 642 643 /* If we were on a list, remove ourselves before inserting */ 644 if (k->next) 645 k->remove(); 646 647 this->kills->push_tail(k); 648} 649 650/** 651 * Adds directly-copied channels between vector variables to the available 652 * copy propagation list. 653 */ 654void 655ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir) 656{ 657 { 658 ir_variable *lhs_var = ir->whole_variable_written(); 659 ir_dereference_variable *rhs = ir->rhs->as_dereference_variable(); 660 661 if (lhs_var != NULL && rhs && rhs->var != NULL && lhs_var != rhs->var) { 662 if (lhs_var->data.mode == ir_var_shader_storage || 663 lhs_var->data.mode == ir_var_shader_shared || 664 rhs->var->data.mode == ir_var_shader_storage || 665 rhs->var->data.mode == ir_var_shader_shared || 666 lhs_var->data.precise != rhs->var->data.precise) { 667 return; 668 } 669 state->write_full(lhs_var, rhs->var); 670 return; 671 } 672 } 673 674 int orig_swizzle[4] = {0, 1, 2, 3}; 675 int swizzle[4]; 676 677 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); 678 if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector())) 679 return; 680 681 if (lhs->var->data.mode == ir_var_shader_storage || 682 lhs->var->data.mode == ir_var_shader_shared) 683 return; 684 685 ir_dereference_variable *rhs = ir->rhs->as_dereference_variable(); 686 if (!rhs) { 687 ir_swizzle *swiz = ir->rhs->as_swizzle(); 688 if (!swiz) 689 return; 690 691 rhs = swiz->val->as_dereference_variable(); 692 if (!rhs) 693 return; 694 695 orig_swizzle[0] = swiz->mask.x; 696 orig_swizzle[1] = swiz->mask.y; 697 orig_swizzle[2] = swiz->mask.z; 698 orig_swizzle[3] = swiz->mask.w; 699 } 700 701 if (rhs->var->data.mode == ir_var_shader_storage || 702 rhs->var->data.mode == ir_var_shader_shared) 703 return; 704 705 /* Move the swizzle channels out to the positions they match in the 706 * destination. We don't want to have to rewrite the swizzle[] 707 * array every time we clear a bit of the write_mask. 708 */ 709 int j = 0; 710 for (int i = 0; i < 4; i++) { 711 if (ir->write_mask & (1 << i)) 712 swizzle[i] = orig_swizzle[j++]; 713 } 714 715 int write_mask = ir->write_mask; 716 if (lhs->var == rhs->var) { 717 /* If this is a copy from the variable to itself, then we need 718 * to be sure not to include the updated channels from this 719 * instruction in the set of new source channels to be 720 * copy-propagated from. 721 */ 722 for (int i = 0; i < 4; i++) { 723 if (ir->write_mask & (1 << orig_swizzle[i])) 724 write_mask &= ~(1 << i); 725 } 726 } 727 728 if (lhs->var->data.precise != rhs->var->data.precise) 729 return; 730 731 state->write_elements(lhs->var, rhs->var, write_mask, swizzle); 732} 733 734bool 735do_copy_propagation_elements(exec_list *instructions) 736{ 737 ir_copy_propagation_elements_visitor v; 738 739 visit_list_elements(&v, instructions); 740 741 return v.progress; 742} 743