1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_array_splitting.cpp
26 *
27 * If an array is always dereferenced with a constant index, then
28 * split it apart into its elements, making it more amenable to other
29 * optimization passes.
30 *
31 * This skips uniform/varying arrays, which would need careful
32 * handling due to their ir->location fields tying them to the GL API
33 * and other shader stages.
34 */
35
36#include "ir.h"
37#include "ir_visitor.h"
38#include "ir_rvalue_visitor.h"
39#include "compiler/glsl_types.h"
40
41static bool debug = false;
42
43namespace {
44
45namespace opt_array_splitting {
46
47class variable_entry : public exec_node
48{
49public:
50   variable_entry(ir_variable *var)
51   {
52      this->var = var;
53      this->split = true;
54      this->declaration = false;
55      this->components = NULL;
56      this->mem_ctx = NULL;
57      if (var->type->is_array())
58         this->size = var->type->length;
59      else
60         this->size = var->type->matrix_columns;
61   }
62
63   ir_variable *var; /* The key: the variable's pointer. */
64   unsigned size; /* array length or matrix columns */
65
66   /** Whether this array should be split or not. */
67   bool split;
68
69   /* If the variable had a decl we can work with in the instruction
70    * stream.  We can't do splitting on function arguments, which
71    * don't get this variable set.
72    */
73   bool declaration;
74
75   ir_variable **components;
76
77   /** ralloc_parent(this->var) -- the shader's talloc context. */
78   void *mem_ctx;
79};
80
81} /* namespace */
82
83using namespace opt_array_splitting;
84
85/**
86 * This class does a walk over the tree, coming up with the set of
87 * variables that could be split by looking to see if they are arrays
88 * that are only ever constant-index dereferenced.
89 */
90class ir_array_reference_visitor : public ir_hierarchical_visitor {
91public:
92   ir_array_reference_visitor(void)
93   {
94      this->mem_ctx = ralloc_context(NULL);
95      this->variable_list.make_empty();
96      this->in_whole_array_copy = false;
97   }
98
99   ~ir_array_reference_visitor(void)
100   {
101      ralloc_free(mem_ctx);
102   }
103
104   bool get_split_list(exec_list *instructions, bool linked);
105
106   virtual ir_visitor_status visit(ir_variable *);
107   virtual ir_visitor_status visit(ir_dereference_variable *);
108   virtual ir_visitor_status visit_enter(ir_assignment *);
109   virtual ir_visitor_status visit_leave(ir_assignment *);
110   virtual ir_visitor_status visit_enter(ir_dereference_array *);
111   virtual ir_visitor_status visit_enter(ir_function_signature *);
112
113   variable_entry *get_variable_entry(ir_variable *var);
114
115   /* List of variable_entry */
116   exec_list variable_list;
117
118   void *mem_ctx;
119
120   bool in_whole_array_copy;
121};
122
123} /* namespace */
124
125variable_entry *
126ir_array_reference_visitor::get_variable_entry(ir_variable *var)
127{
128   assert(var);
129
130   if (var->data.mode != ir_var_auto &&
131       var->data.mode != ir_var_temporary)
132      return NULL;
133
134   if (!(var->type->is_array() || var->type->is_matrix()))
135      return NULL;
136
137   /* If the array hasn't been sized yet, we can't split it.  After
138    * linking, this should be resolved.
139    */
140   if (var->type->is_unsized_array())
141      return NULL;
142
143   /* FIXME: arrays of arrays are not handled correctly by this pass so we
144    * skip it for now. While the pass will create functioning code it actually
145    * produces worse code.
146    *
147    * For example the array:
148    *
149    *    int[3][2] a;
150    *
151    * ends up being split up into:
152    *
153    *    int[3][2] a_0;
154    *    int[3][2] a_1;
155    *    int[3][2] a_2;
156    *
157    * And we end up referencing each of these new arrays for example:
158    *
159    *    a[0][1] will be turned into a_0[0][1]
160    *    a[1][0] will be turned into a_1[1][0]
161    *    a[2][0] will be turned into a_2[2][0]
162    */
163   if (var->type->is_array() && var->type->fields.array->is_array())
164      return NULL;
165
166   foreach_in_list(variable_entry, entry, &this->variable_list) {
167      if (entry->var == var)
168         return entry;
169   }
170
171   variable_entry *entry = new(mem_ctx) variable_entry(var);
172   this->variable_list.push_tail(entry);
173   return entry;
174}
175
176
177ir_visitor_status
178ir_array_reference_visitor::visit(ir_variable *ir)
179{
180   variable_entry *entry = this->get_variable_entry(ir);
181
182   if (entry)
183      entry->declaration = true;
184
185   return visit_continue;
186}
187
188ir_visitor_status
189ir_array_reference_visitor::visit_enter(ir_assignment *ir)
190{
191   in_whole_array_copy =
192      ir->lhs->type->is_array() && ir->whole_variable_written();
193
194   return visit_continue;
195}
196
197ir_visitor_status
198ir_array_reference_visitor::visit_leave(ir_assignment *)
199{
200   in_whole_array_copy = false;
201
202   return visit_continue;
203}
204
205ir_visitor_status
206ir_array_reference_visitor::visit(ir_dereference_variable *ir)
207{
208   variable_entry *entry = this->get_variable_entry(ir->var);
209
210   /* Allow whole-array assignments on the LHS.  We can split those
211    * by "unrolling" the assignment into component-wise assignments.
212    */
213   if (in_assignee && in_whole_array_copy)
214      return visit_continue;
215
216   /* If we made it to here without seeing an ir_dereference_array,
217    * then the dereference of this array didn't have a constant index
218    * (see the visit_continue_with_parent below), so we can't split
219    * the variable.
220    */
221   if (entry)
222      entry->split = false;
223
224   return visit_continue;
225}
226
227ir_visitor_status
228ir_array_reference_visitor::visit_enter(ir_dereference_array *ir)
229{
230   ir_dereference_variable *deref = ir->array->as_dereference_variable();
231   if (!deref)
232      return visit_continue;
233
234   variable_entry *entry = this->get_variable_entry(deref->var);
235
236   /* If the access to the array has a variable index, we wouldn't
237    * know which split variable this dereference should go to.
238    */
239   if (!ir->array_index->as_constant()) {
240      if (entry)
241         entry->split = false;
242      /* This variable indexing could come from a different array dereference
243       * that also has variable indexing, that is, something like a[b[a[b[0]]]].
244       * If we return visit_continue_with_parent here for the first appearence
245       * of a, then we can miss that b also has indirect indexing (if this is
246       * the only place in the program where such indirect indexing into b
247       * happens), so keep going.
248       */
249      return visit_continue;
250   }
251
252   /* If the index is also array dereference, visit index. */
253   if (ir->array_index->as_dereference_array())
254      visit_enter(ir->array_index->as_dereference_array());
255
256   return visit_continue_with_parent;
257}
258
259ir_visitor_status
260ir_array_reference_visitor::visit_enter(ir_function_signature *ir)
261{
262   /* We don't have logic for array-splitting function arguments,
263    * so just look at the body instructions and not the parameter
264    * declarations.
265    */
266   visit_list_elements(this, &ir->body);
267   return visit_continue_with_parent;
268}
269
270bool
271ir_array_reference_visitor::get_split_list(exec_list *instructions,
272                                           bool linked)
273{
274   visit_list_elements(this, instructions);
275
276   /* If the shaders aren't linked yet, we can't mess with global
277    * declarations, which need to be matched by name across shaders.
278    */
279   if (!linked) {
280      foreach_in_list(ir_instruction, node, instructions) {
281         ir_variable *var = node->as_variable();
282         if (var) {
283            variable_entry *entry = get_variable_entry(var);
284            if (entry)
285               entry->remove();
286         }
287      }
288   }
289
290   /* Trim out variables we found that we can't split. */
291   foreach_in_list_safe(variable_entry, entry, &variable_list) {
292      if (debug) {
293         printf("array %s@%p: decl %d, split %d\n",
294                entry->var->name, (void *) entry->var, entry->declaration,
295                entry->split);
296      }
297
298      if (!(entry->declaration && entry->split)) {
299         entry->remove();
300      }
301   }
302
303   return !variable_list.is_empty();
304}
305
306/**
307 * This class rewrites the dereferences of arrays that have been split
308 * to use the newly created ir_variables for each component.
309 */
310class ir_array_splitting_visitor : public ir_rvalue_visitor {
311public:
312   ir_array_splitting_visitor(exec_list *vars)
313   {
314      this->variable_list = vars;
315   }
316
317   virtual ~ir_array_splitting_visitor()
318   {
319   }
320
321   virtual ir_visitor_status visit_leave(ir_assignment *);
322
323   void split_deref(ir_dereference **deref);
324   void handle_rvalue(ir_rvalue **rvalue);
325   variable_entry *get_splitting_entry(ir_variable *var);
326
327   exec_list *variable_list;
328};
329
330variable_entry *
331ir_array_splitting_visitor::get_splitting_entry(ir_variable *var)
332{
333   assert(var);
334
335   foreach_in_list(variable_entry, entry, this->variable_list) {
336      if (entry->var == var) {
337         return entry;
338      }
339   }
340
341   return NULL;
342}
343
344void
345ir_array_splitting_visitor::split_deref(ir_dereference **deref)
346{
347   ir_dereference_array *deref_array = (*deref)->as_dereference_array();
348   if (!deref_array)
349      return;
350
351   ir_dereference_variable *deref_var = deref_array->array->as_dereference_variable();
352   if (!deref_var)
353      return;
354   ir_variable *var = deref_var->var;
355
356   variable_entry *entry = get_splitting_entry(var);
357   if (!entry)
358      return;
359
360   ir_constant *constant = deref_array->array_index->as_constant();
361   assert(constant);
362
363   if (constant->value.i[0] >= 0 && constant->value.i[0] < (int)entry->size) {
364      *deref = new(entry->mem_ctx)
365               ir_dereference_variable(entry->components[constant->value.i[0]]);
366   } else {
367      /* There was a constant array access beyond the end of the
368       * array.  This might have happened due to constant folding
369       * after the initial parse.  This produces an undefined value,
370       * but shouldn't crash.  Just give them an uninitialized
371       * variable.
372       */
373      ir_variable *temp = new(entry->mem_ctx) ir_variable(deref_array->type,
374                                                          "undef",
375                                                          ir_var_temporary);
376      entry->components[0]->insert_before(temp);
377      *deref = new(entry->mem_ctx) ir_dereference_variable(temp);
378   }
379}
380
381void
382ir_array_splitting_visitor::handle_rvalue(ir_rvalue **rvalue)
383{
384   if (!*rvalue)
385      return;
386
387   ir_dereference *deref = (*rvalue)->as_dereference();
388
389   if (!deref)
390      return;
391
392   split_deref(&deref);
393   *rvalue = deref;
394}
395
396ir_visitor_status
397ir_array_splitting_visitor::visit_leave(ir_assignment *ir)
398{
399   /* The normal rvalue visitor skips the LHS of assignments, but we
400    * need to process those just the same.
401    */
402   ir_rvalue *lhs = ir->lhs;
403
404   /* "Unroll" any whole array assignments, creating assignments for
405    * each array element.  Then, do splitting on each new assignment.
406    */
407   if (lhs->type->is_array() && ir->whole_variable_written() &&
408       get_splitting_entry(ir->whole_variable_written())) {
409      void *mem_ctx = ralloc_parent(ir);
410
411      for (unsigned i = 0; i < lhs->type->length; i++) {
412         ir_rvalue *lhs_i =
413            new(mem_ctx) ir_dereference_array(ir->lhs->clone(mem_ctx, NULL),
414                                              new(mem_ctx) ir_constant(i));
415         ir_rvalue *rhs_i =
416            new(mem_ctx) ir_dereference_array(ir->rhs->clone(mem_ctx, NULL),
417                                              new(mem_ctx) ir_constant(i));
418
419         ir_assignment *assign_i = new(mem_ctx) ir_assignment(lhs_i, rhs_i);
420
421         ir->insert_before(assign_i);
422         assign_i->accept(this);
423      }
424      ir->remove();
425      return visit_continue;
426   }
427
428   handle_rvalue(&lhs);
429   ir->lhs = lhs->as_dereference();
430
431   ir->lhs->accept(this);
432
433   handle_rvalue(&ir->rhs);
434   ir->rhs->accept(this);
435
436   return visit_continue;
437}
438
439bool
440optimize_split_arrays(exec_list *instructions, bool linked)
441{
442   ir_array_reference_visitor refs;
443   if (!refs.get_split_list(instructions, linked))
444      return false;
445
446   void *mem_ctx = ralloc_context(NULL);
447
448   /* Replace the decls of the arrays to be split with their split
449    * components.
450    */
451   foreach_in_list(variable_entry, entry, &refs.variable_list) {
452      const struct glsl_type *type = entry->var->type;
453      const struct glsl_type *subtype;
454
455      if (type->is_matrix())
456         subtype = type->column_type();
457      else
458         subtype = type->fields.array;
459
460      entry->mem_ctx = ralloc_parent(entry->var);
461
462      entry->components = ralloc_array(mem_ctx, ir_variable *, entry->size);
463
464      for (unsigned int i = 0; i < entry->size; i++) {
465         const char *name = ralloc_asprintf(mem_ctx, "%s_%d",
466                                            entry->var->name, i);
467         ir_variable *new_var =
468            new(entry->mem_ctx) ir_variable(subtype, name, ir_var_temporary);
469         new_var->data.invariant = entry->var->data.invariant;
470         new_var->data.precise = entry->var->data.precise;
471
472         /* Do not lose memory/format qualifiers when arrays of images are
473          * split.
474          */
475         new_var->data.memory_read_only = entry->var->data.memory_read_only;
476         new_var->data.memory_write_only = entry->var->data.memory_write_only;
477         new_var->data.memory_coherent = entry->var->data.memory_coherent;
478         new_var->data.memory_volatile = entry->var->data.memory_volatile;
479         new_var->data.memory_restrict = entry->var->data.memory_restrict;
480         new_var->data.image_format = entry->var->data.image_format;
481
482         entry->components[i] = new_var;
483         entry->var->insert_before(entry->components[i]);
484      }
485
486      entry->var->remove();
487   }
488
489   ir_array_splitting_visitor split(&refs.variable_list);
490   visit_list_elements(&split, instructions);
491
492   if (debug)
493      _mesa_print_ir(stdout, instructions, NULL);
494
495   ralloc_free(mem_ctx);
496
497   return true;
498
499}
500