1/*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_rebalance_tree.cpp
26 *
27 * Rebalances a reduction expression tree.
28 *
29 * For reduction operations (e.g., x + y + z + w) we generate an expression
30 * tree like
31 *
32 *        +
33 *       / \
34 *      +   w
35 *     / \
36 *    +   z
37 *   / \
38 *  x   y
39 *
40 * which we can rebalance into
41 *
42 *       +
43 *      / \
44 *     /   \
45 *    +     +
46 *   / \   / \
47 *  x   y z   w
48 *
49 * to get a better instruction scheduling.
50 *
51 * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
52 * and Bette L. Warren.
53 *
54 * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
55 * explanation of the of the tree_to_vine() (rightward rotation) and
56 * vine_to_tree() (leftward rotation) algorithms.
57 */
58
59#include "ir.h"
60#include "ir_visitor.h"
61#include "ir_rvalue_visitor.h"
62#include "ir_optimization.h"
63#include "main/macros.h" /* for MAX2 */
64
65/* The DSW algorithm generates a degenerate tree (really, a linked list) in
66 * tree_to_vine(). We'd rather not leave a binary expression with only one
67 * operand, so trivial modifications (the ternary operators below) are needed
68 * to ensure that we only rotate around the ir_expression nodes of the tree.
69 */
70static unsigned
71tree_to_vine(ir_expression *root)
72{
73   unsigned size = 0;
74   ir_rvalue *vine_tail = root;
75   ir_rvalue *remainder = root->operands[1];
76
77   while (remainder != NULL) {
78      ir_expression *remainder_temp = remainder->as_expression();
79      ir_expression *remainder_left = remainder_temp ?
80         remainder_temp->operands[0]->as_expression() : NULL;
81
82      if (remainder_left == NULL) {
83         /* move vine_tail down one */
84         vine_tail = remainder;
85         remainder = remainder->as_expression() ?
86            ((ir_expression *)remainder)->operands[1] : NULL;
87         size++;
88      } else {
89         /* rotate */
90         ir_expression *tempptr = remainder_left;
91         ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
92         tempptr->operands[1] = remainder;
93         remainder = tempptr;
94         ((ir_expression *)vine_tail)->operands[1] = tempptr;
95      }
96   }
97
98   return size;
99}
100
101static void
102compression(ir_expression *root, unsigned count)
103{
104   ir_expression *scanner = root;
105
106   for (unsigned i = 0; i < count; i++) {
107      ir_expression *child = (ir_expression *)scanner->operands[1];
108      scanner->operands[1] = child->operands[1];
109      scanner = (ir_expression *)scanner->operands[1];
110      child->operands[1] = scanner->operands[0];
111      scanner->operands[0] = child;
112   }
113}
114
115static void
116vine_to_tree(ir_expression *root, unsigned size)
117{
118   int n = size - 1;
119   for (int m = n / 2; m > 0; m = n / 2) {
120      compression(root, m);
121      n -= m + 1;
122   }
123}
124
125namespace {
126
127class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
128public:
129   ir_rebalance_visitor()
130   {
131      progress = false;
132   }
133
134   virtual ir_visitor_status visit_enter(ir_assignment *ir);
135
136   void handle_rvalue(ir_rvalue **rvalue);
137
138   bool progress;
139};
140
141struct is_reduction_data {
142   ir_expression_operation operation;
143   const glsl_type *type;
144   unsigned num_expr;
145   bool is_reduction;
146   bool contains_constant;
147};
148
149} /* anonymous namespace */
150
151ir_visitor_status
152ir_rebalance_visitor::visit_enter(ir_assignment *ir)
153{
154   ir_variable *var = ir->lhs->variable_referenced();
155   if (var->data.invariant || var->data.precise) {
156      /* If we're assigning to an invariant variable, just bail.  Tree
157       * rebalancing (reassociation) isn't precision-safe.
158       */
159      return visit_continue_with_parent;
160   } else {
161      return visit_continue;
162   }
163}
164
165static bool
166is_reduction_operation(ir_expression_operation operation)
167{
168   switch (operation) {
169   case ir_binop_add:
170   case ir_binop_mul:
171   case ir_binop_bit_and:
172   case ir_binop_bit_xor:
173   case ir_binop_bit_or:
174   case ir_binop_logic_and:
175   case ir_binop_logic_xor:
176   case ir_binop_logic_or:
177   case ir_binop_min:
178   case ir_binop_max:
179      return true;
180   default:
181      return false;
182   }
183}
184
185/* Note that this function does not attempt to recognize that reduction trees
186 * are already balanced.
187 *
188 * We return false from this function for a number of reasons other than an
189 * expression tree not being a mathematical reduction. Namely,
190 *
191 *    - if the tree contains multiple constants that we may be able to combine.
192 *    - if the tree contains matrices:
193 *       - they might contain vec4's with many constant components that we can
194 *         simplify after splitting.
195 *       - applying the matrix chain ordering optimization is more than just
196 *         balancing an expression tree.
197 *    - if the tree contains operations on multiple types.
198 *    - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
199 *      would trick the visiting pass.
200 */
201static void
202is_reduction(ir_instruction *ir, void *data)
203{
204   struct is_reduction_data *ird = (struct is_reduction_data *)data;
205   if (!ird->is_reduction)
206      return;
207
208   /* We don't want to balance a tree that contains multiple constants, since
209    * we'll be able to constant fold them if they're not in separate subtrees.
210    */
211   if (ir->as_constant()) {
212      if (ird->contains_constant) {
213         ird->is_reduction = false;
214      }
215      ird->contains_constant = true;
216      return;
217   }
218
219   /* Array/record dereferences have subtrees that are not part of the expr
220    * tree we're balancing. Skip trees containing them.
221    */
222   if (ir->ir_type == ir_type_dereference_array ||
223       ir->ir_type == ir_type_dereference_record) {
224      ird->is_reduction = false;
225      return;
226   }
227
228   ir_expression *expr = ir->as_expression();
229   if (!expr)
230      return;
231
232   /* Non-constant matrices might still contain constant vec4 that we can
233    * constant fold once split up. Handling matrices will need some more
234    * work.
235    */
236   if (expr->type->is_matrix() ||
237       expr->operands[0]->type->is_matrix() ||
238       (expr->operands[1] && expr->operands[1]->type->is_matrix())) {
239      ird->is_reduction = false;
240      return;
241   }
242
243   if (ird->type != NULL && ird->type != expr->type) {
244      ird->is_reduction = false;
245      return;
246   }
247   ird->type = expr->type;
248
249   ird->num_expr++;
250   if (is_reduction_operation(expr->operation)) {
251      if (ird->operation != 0 && ird->operation != expr->operation)
252         ird->is_reduction = false;
253      ird->operation = expr->operation;
254   } else {
255      ird->is_reduction = false;
256   }
257}
258
259static ir_rvalue *
260handle_expression(ir_expression *expr)
261{
262   struct is_reduction_data ird;
263   ird.operation = (ir_expression_operation)0;
264   ird.type = NULL;
265   ird.num_expr = 0;
266   ird.is_reduction = true;
267   ird.contains_constant = false;
268
269   visit_tree(expr, is_reduction, (void *)&ird);
270
271   if (ird.is_reduction && ird.num_expr > 2) {
272      ir_constant z = ir_constant(0.0f);
273      ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
274
275      unsigned size = tree_to_vine(&pseudo_root);
276      vine_to_tree(&pseudo_root, size);
277
278      expr = (ir_expression *)pseudo_root.operands[1];
279   }
280   return expr;
281}
282
283static void
284update_types(ir_instruction *ir, void *)
285{
286   ir_expression *expr = ir->as_expression();
287   if (!expr)
288      return;
289
290   const glsl_type *const new_type =
291      glsl_type::get_instance(expr->type->base_type,
292                              MAX2(expr->operands[0]->type->vector_elements,
293                                   expr->operands[1]->type->vector_elements),
294                              1);
295   assert(new_type != glsl_type::error_type);
296   expr->type = new_type;
297}
298
299void
300ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
301{
302   if (!*rvalue)
303      return;
304
305   ir_expression *expr = (*rvalue)->as_expression();
306   if (!expr || !is_reduction_operation(expr->operation))
307      return;
308
309   ir_rvalue *new_rvalue = handle_expression(expr);
310
311   /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
312    * or some other set of cases) new_rvalue will point to the same root as
313    * before.
314    *
315    * Similarly, if the tree rooted at *rvalue was a reduction and was already
316    * balanced, the algorithm will rearrange the tree but will ultimately
317    * return an identical tree, so this check will handle that as well and
318    * will not set progress = true.
319    */
320   if (new_rvalue == *rvalue)
321      return;
322
323   visit_tree(new_rvalue, NULL, NULL, update_types);
324
325   *rvalue = new_rvalue;
326   this->progress = true;
327}
328
329bool
330do_rebalance_tree(exec_list *instructions)
331{
332   ir_rebalance_visitor v;
333
334   v.run(instructions);
335
336   return v.progress;
337}
338