1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file opt_array_splitting.cpp 26 * 27 * If an array is always dereferenced with a constant index, then 28 * split it apart into its elements, making it more amenable to other 29 * optimization passes. 30 * 31 * This skips uniform/varying arrays, which would need careful 32 * handling due to their ir->location fields tying them to the GL API 33 * and other shader stages. 34 */ 35 36#include "ir.h" 37#include "ir_visitor.h" 38#include "ir_rvalue_visitor.h" 39#include "compiler/glsl_types.h" 40 41static bool debug = false; 42 43namespace { 44 45namespace opt_array_splitting { 46 47class variable_entry : public exec_node 48{ 49public: 50 variable_entry(ir_variable *var) 51 { 52 this->var = var; 53 this->split = true; 54 this->declaration = false; 55 this->components = NULL; 56 this->mem_ctx = NULL; 57 if (var->type->is_array()) 58 this->size = var->type->length; 59 else 60 this->size = var->type->matrix_columns; 61 } 62 63 ir_variable *var; /* The key: the variable's pointer. */ 64 unsigned size; /* array length or matrix columns */ 65 66 /** Whether this array should be split or not. */ 67 bool split; 68 69 /* If the variable had a decl we can work with in the instruction 70 * stream. We can't do splitting on function arguments, which 71 * don't get this variable set. 72 */ 73 bool declaration; 74 75 ir_variable **components; 76 77 /** ralloc_parent(this->var) -- the shader's talloc context. */ 78 void *mem_ctx; 79}; 80 81} /* namespace */ 82 83using namespace opt_array_splitting; 84 85/** 86 * This class does a walk over the tree, coming up with the set of 87 * variables that could be split by looking to see if they are arrays 88 * that are only ever constant-index dereferenced. 89 */ 90class ir_array_reference_visitor : public ir_hierarchical_visitor { 91public: 92 ir_array_reference_visitor(void) 93 { 94 this->mem_ctx = ralloc_context(NULL); 95 this->variable_list.make_empty(); 96 this->in_whole_array_copy = false; 97 } 98 99 ~ir_array_reference_visitor(void) 100 { 101 ralloc_free(mem_ctx); 102 } 103 104 bool get_split_list(exec_list *instructions, bool linked); 105 106 virtual ir_visitor_status visit(ir_variable *); 107 virtual ir_visitor_status visit(ir_dereference_variable *); 108 virtual ir_visitor_status visit_enter(ir_assignment *); 109 virtual ir_visitor_status visit_leave(ir_assignment *); 110 virtual ir_visitor_status visit_enter(ir_dereference_array *); 111 virtual ir_visitor_status visit_enter(ir_function_signature *); 112 113 variable_entry *get_variable_entry(ir_variable *var); 114 115 /* List of variable_entry */ 116 exec_list variable_list; 117 118 void *mem_ctx; 119 120 bool in_whole_array_copy; 121}; 122 123} /* namespace */ 124 125variable_entry * 126ir_array_reference_visitor::get_variable_entry(ir_variable *var) 127{ 128 assert(var); 129 130 if (var->data.mode != ir_var_auto && 131 var->data.mode != ir_var_temporary) 132 return NULL; 133 134 if (!(var->type->is_array() || var->type->is_matrix())) 135 return NULL; 136 137 /* If the array hasn't been sized yet, we can't split it. After 138 * linking, this should be resolved. 139 */ 140 if (var->type->is_unsized_array()) 141 return NULL; 142 143 /* FIXME: arrays of arrays are not handled correctly by this pass so we 144 * skip it for now. While the pass will create functioning code it actually 145 * produces worse code. 146 * 147 * For example the array: 148 * 149 * int[3][2] a; 150 * 151 * ends up being split up into: 152 * 153 * int[3][2] a_0; 154 * int[3][2] a_1; 155 * int[3][2] a_2; 156 * 157 * And we end up referencing each of these new arrays for example: 158 * 159 * a[0][1] will be turned into a_0[0][1] 160 * a[1][0] will be turned into a_1[1][0] 161 * a[2][0] will be turned into a_2[2][0] 162 */ 163 if (var->type->is_array() && var->type->fields.array->is_array()) 164 return NULL; 165 166 foreach_in_list(variable_entry, entry, &this->variable_list) { 167 if (entry->var == var) 168 return entry; 169 } 170 171 variable_entry *entry = new(mem_ctx) variable_entry(var); 172 this->variable_list.push_tail(entry); 173 return entry; 174} 175 176 177ir_visitor_status 178ir_array_reference_visitor::visit(ir_variable *ir) 179{ 180 variable_entry *entry = this->get_variable_entry(ir); 181 182 if (entry) 183 entry->declaration = true; 184 185 return visit_continue; 186} 187 188ir_visitor_status 189ir_array_reference_visitor::visit_enter(ir_assignment *ir) 190{ 191 in_whole_array_copy = 192 ir->lhs->type->is_array() && ir->whole_variable_written(); 193 194 return visit_continue; 195} 196 197ir_visitor_status 198ir_array_reference_visitor::visit_leave(ir_assignment *) 199{ 200 in_whole_array_copy = false; 201 202 return visit_continue; 203} 204 205ir_visitor_status 206ir_array_reference_visitor::visit(ir_dereference_variable *ir) 207{ 208 variable_entry *entry = this->get_variable_entry(ir->var); 209 210 /* Allow whole-array assignments on the LHS. We can split those 211 * by "unrolling" the assignment into component-wise assignments. 212 */ 213 if (in_assignee && in_whole_array_copy) 214 return visit_continue; 215 216 /* If we made it to here without seeing an ir_dereference_array, 217 * then the dereference of this array didn't have a constant index 218 * (see the visit_continue_with_parent below), so we can't split 219 * the variable. 220 */ 221 if (entry) 222 entry->split = false; 223 224 return visit_continue; 225} 226 227ir_visitor_status 228ir_array_reference_visitor::visit_enter(ir_dereference_array *ir) 229{ 230 ir_dereference_variable *deref = ir->array->as_dereference_variable(); 231 if (!deref) 232 return visit_continue; 233 234 variable_entry *entry = this->get_variable_entry(deref->var); 235 236 /* If the access to the array has a variable index, we wouldn't 237 * know which split variable this dereference should go to. 238 */ 239 if (!ir->array_index->as_constant()) { 240 if (entry) 241 entry->split = false; 242 /* This variable indexing could come from a different array dereference 243 * that also has variable indexing, that is, something like a[b[a[b[0]]]]. 244 * If we return visit_continue_with_parent here for the first appearence 245 * of a, then we can miss that b also has indirect indexing (if this is 246 * the only place in the program where such indirect indexing into b 247 * happens), so keep going. 248 */ 249 return visit_continue; 250 } 251 252 /* If the index is also array dereference, visit index. */ 253 if (ir->array_index->as_dereference_array()) 254 visit_enter(ir->array_index->as_dereference_array()); 255 256 return visit_continue_with_parent; 257} 258 259ir_visitor_status 260ir_array_reference_visitor::visit_enter(ir_function_signature *ir) 261{ 262 /* We don't have logic for array-splitting function arguments, 263 * so just look at the body instructions and not the parameter 264 * declarations. 265 */ 266 visit_list_elements(this, &ir->body); 267 return visit_continue_with_parent; 268} 269 270bool 271ir_array_reference_visitor::get_split_list(exec_list *instructions, 272 bool linked) 273{ 274 visit_list_elements(this, instructions); 275 276 /* If the shaders aren't linked yet, we can't mess with global 277 * declarations, which need to be matched by name across shaders. 278 */ 279 if (!linked) { 280 foreach_in_list(ir_instruction, node, instructions) { 281 ir_variable *var = node->as_variable(); 282 if (var) { 283 variable_entry *entry = get_variable_entry(var); 284 if (entry) 285 entry->remove(); 286 } 287 } 288 } 289 290 /* Trim out variables we found that we can't split. */ 291 foreach_in_list_safe(variable_entry, entry, &variable_list) { 292 if (debug) { 293 printf("array %s@%p: decl %d, split %d\n", 294 entry->var->name, (void *) entry->var, entry->declaration, 295 entry->split); 296 } 297 298 if (!(entry->declaration && entry->split)) { 299 entry->remove(); 300 } 301 } 302 303 return !variable_list.is_empty(); 304} 305 306/** 307 * This class rewrites the dereferences of arrays that have been split 308 * to use the newly created ir_variables for each component. 309 */ 310class ir_array_splitting_visitor : public ir_rvalue_visitor { 311public: 312 ir_array_splitting_visitor(exec_list *vars) 313 { 314 this->variable_list = vars; 315 } 316 317 virtual ~ir_array_splitting_visitor() 318 { 319 } 320 321 virtual ir_visitor_status visit_leave(ir_assignment *); 322 323 void split_deref(ir_dereference **deref); 324 void handle_rvalue(ir_rvalue **rvalue); 325 variable_entry *get_splitting_entry(ir_variable *var); 326 327 exec_list *variable_list; 328}; 329 330variable_entry * 331ir_array_splitting_visitor::get_splitting_entry(ir_variable *var) 332{ 333 assert(var); 334 335 foreach_in_list(variable_entry, entry, this->variable_list) { 336 if (entry->var == var) { 337 return entry; 338 } 339 } 340 341 return NULL; 342} 343 344void 345ir_array_splitting_visitor::split_deref(ir_dereference **deref) 346{ 347 ir_dereference_array *deref_array = (*deref)->as_dereference_array(); 348 if (!deref_array) 349 return; 350 351 ir_dereference_variable *deref_var = deref_array->array->as_dereference_variable(); 352 if (!deref_var) 353 return; 354 ir_variable *var = deref_var->var; 355 356 variable_entry *entry = get_splitting_entry(var); 357 if (!entry) 358 return; 359 360 ir_constant *constant = deref_array->array_index->as_constant(); 361 assert(constant); 362 363 if (constant->value.i[0] >= 0 && constant->value.i[0] < (int)entry->size) { 364 *deref = new(entry->mem_ctx) 365 ir_dereference_variable(entry->components[constant->value.i[0]]); 366 } else { 367 /* There was a constant array access beyond the end of the 368 * array. This might have happened due to constant folding 369 * after the initial parse. This produces an undefined value, 370 * but shouldn't crash. Just give them an uninitialized 371 * variable. 372 */ 373 ir_variable *temp = new(entry->mem_ctx) ir_variable(deref_array->type, 374 "undef", 375 ir_var_temporary); 376 entry->components[0]->insert_before(temp); 377 *deref = new(entry->mem_ctx) ir_dereference_variable(temp); 378 } 379} 380 381void 382ir_array_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) 383{ 384 if (!*rvalue) 385 return; 386 387 ir_dereference *deref = (*rvalue)->as_dereference(); 388 389 if (!deref) 390 return; 391 392 split_deref(&deref); 393 *rvalue = deref; 394} 395 396ir_visitor_status 397ir_array_splitting_visitor::visit_leave(ir_assignment *ir) 398{ 399 /* The normal rvalue visitor skips the LHS of assignments, but we 400 * need to process those just the same. 401 */ 402 ir_rvalue *lhs = ir->lhs; 403 404 /* "Unroll" any whole array assignments, creating assignments for 405 * each array element. Then, do splitting on each new assignment. 406 */ 407 if (lhs->type->is_array() && ir->whole_variable_written() && 408 get_splitting_entry(ir->whole_variable_written())) { 409 void *mem_ctx = ralloc_parent(ir); 410 411 for (unsigned i = 0; i < lhs->type->length; i++) { 412 ir_rvalue *lhs_i = 413 new(mem_ctx) ir_dereference_array(ir->lhs->clone(mem_ctx, NULL), 414 new(mem_ctx) ir_constant(i)); 415 ir_rvalue *rhs_i = 416 new(mem_ctx) ir_dereference_array(ir->rhs->clone(mem_ctx, NULL), 417 new(mem_ctx) ir_constant(i)); 418 419 ir_assignment *assign_i = new(mem_ctx) ir_assignment(lhs_i, rhs_i); 420 421 ir->insert_before(assign_i); 422 assign_i->accept(this); 423 } 424 ir->remove(); 425 return visit_continue; 426 } 427 428 handle_rvalue(&lhs); 429 ir->lhs = lhs->as_dereference(); 430 431 ir->lhs->accept(this); 432 433 handle_rvalue(&ir->rhs); 434 ir->rhs->accept(this); 435 436 return visit_continue; 437} 438 439bool 440optimize_split_arrays(exec_list *instructions, bool linked) 441{ 442 ir_array_reference_visitor refs; 443 if (!refs.get_split_list(instructions, linked)) 444 return false; 445 446 void *mem_ctx = ralloc_context(NULL); 447 448 /* Replace the decls of the arrays to be split with their split 449 * components. 450 */ 451 foreach_in_list(variable_entry, entry, &refs.variable_list) { 452 const struct glsl_type *type = entry->var->type; 453 const struct glsl_type *subtype; 454 455 if (type->is_matrix()) 456 subtype = type->column_type(); 457 else 458 subtype = type->fields.array; 459 460 entry->mem_ctx = ralloc_parent(entry->var); 461 462 entry->components = ralloc_array(mem_ctx, ir_variable *, entry->size); 463 464 for (unsigned int i = 0; i < entry->size; i++) { 465 const char *name = ralloc_asprintf(mem_ctx, "%s_%d", 466 entry->var->name, i); 467 ir_variable *new_var = 468 new(entry->mem_ctx) ir_variable(subtype, name, ir_var_temporary); 469 new_var->data.invariant = entry->var->data.invariant; 470 new_var->data.precise = entry->var->data.precise; 471 472 /* Do not lose memory/format qualifiers when arrays of images are 473 * split. 474 */ 475 new_var->data.memory_read_only = entry->var->data.memory_read_only; 476 new_var->data.memory_write_only = entry->var->data.memory_write_only; 477 new_var->data.memory_coherent = entry->var->data.memory_coherent; 478 new_var->data.memory_volatile = entry->var->data.memory_volatile; 479 new_var->data.memory_restrict = entry->var->data.memory_restrict; 480 new_var->data.image_format = entry->var->data.image_format; 481 482 entry->components[i] = new_var; 483 entry->var->insert_before(entry->components[i]); 484 } 485 486 entry->var->remove(); 487 } 488 489 ir_array_splitting_visitor split(&refs.variable_list); 490 visit_list_elements(&split, instructions); 491 492 if (debug) 493 _mesa_print_ir(stdout, instructions, NULL); 494 495 ralloc_free(mem_ctx); 496 497 return true; 498 499} 500