1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_tree_grafting.cpp
26 *
27 * Takes assignments to variables that are dereferenced only once and
28 * pastes the RHS expression into where the variable is dereferenced.
29 *
30 * In the process of various operations like function inlining and
31 * tertiary op handling, we'll end up with our expression trees having
32 * been chopped up into a series of assignments of short expressions
33 * to temps.  Other passes like ir_algebraic.cpp would prefer to see
34 * the deepest expression trees they can to try to optimize them.
35 *
36 * This is a lot like copy propagaton.  In comparison, copy
37 * propagation only acts on plain copies, not arbitrary expressions on
38 * the RHS.  Generally, we wouldn't want to go pasting some
39 * complicated expression everywhere it got used, though, so we don't
40 * handle expressions in that pass.
41 *
42 * The hard part is making sure we don't move an expression across
43 * some other assignments that would change the value of the
44 * expression.  So we split this into two passes: First, find the
45 * variables in our scope which are written to once and read once, and
46 * then go through basic blocks seeing if we find an opportunity to
47 * move those expressions safely.
48 */
49
50#include "ir.h"
51#include "ir_visitor.h"
52#include "ir_variable_refcount.h"
53#include "ir_basic_block.h"
54#include "ir_optimization.h"
55#include "compiler/glsl_types.h"
56
57namespace {
58
59static bool debug = false;
60
61class ir_tree_grafting_visitor : public ir_hierarchical_visitor {
62public:
63   ir_tree_grafting_visitor(ir_assignment *graft_assign,
64			    ir_variable *graft_var)
65   {
66      this->progress = false;
67      this->graft_assign = graft_assign;
68      this->graft_var = graft_var;
69   }
70
71   virtual ir_visitor_status visit_leave(class ir_assignment *);
72   virtual ir_visitor_status visit_enter(class ir_call *);
73   virtual ir_visitor_status visit_enter(class ir_expression *);
74   virtual ir_visitor_status visit_enter(class ir_function *);
75   virtual ir_visitor_status visit_enter(class ir_function_signature *);
76   virtual ir_visitor_status visit_enter(class ir_if *);
77   virtual ir_visitor_status visit_enter(class ir_loop *);
78   virtual ir_visitor_status visit_enter(class ir_swizzle *);
79   virtual ir_visitor_status visit_enter(class ir_texture *);
80
81   ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var);
82
83   bool do_graft(ir_rvalue **rvalue);
84
85   bool progress;
86   ir_variable *graft_var;
87   ir_assignment *graft_assign;
88};
89
90struct find_deref_info {
91   ir_variable *var;
92   bool found;
93};
94
95void
96dereferences_variable_callback(ir_instruction *ir, void *data)
97{
98   struct find_deref_info *info = (struct find_deref_info *)data;
99   ir_dereference_variable *deref = ir->as_dereference_variable();
100
101   if (deref && deref->var == info->var)
102      info->found = true;
103}
104
105static bool
106dereferences_variable(ir_instruction *ir, ir_variable *var)
107{
108   struct find_deref_info info;
109
110   info.var = var;
111   info.found = false;
112
113   visit_tree(ir, dereferences_variable_callback, &info);
114
115   return info.found;
116}
117
118bool
119ir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue)
120{
121   if (!*rvalue)
122      return false;
123
124   ir_dereference_variable *deref = (*rvalue)->as_dereference_variable();
125
126   if (!deref || deref->var != this->graft_var)
127      return false;
128
129   if (debug) {
130      fprintf(stderr, "GRAFTING:\n");
131      this->graft_assign->fprint(stderr);
132      fprintf(stderr, "\n");
133      fprintf(stderr, "TO:\n");
134      (*rvalue)->fprint(stderr);
135      fprintf(stderr, "\n");
136   }
137
138   this->graft_assign->remove();
139   *rvalue = this->graft_assign->rhs;
140
141   this->progress = true;
142   return true;
143}
144
145ir_visitor_status
146ir_tree_grafting_visitor::visit_enter(ir_loop *ir)
147{
148   (void)ir;
149   /* Do not traverse into the body of the loop since that is a
150    * different basic block.
151    */
152   return visit_stop;
153}
154
155/**
156 * Check if we can continue grafting after writing to a variable.  If the
157 * expression we're trying to graft references the variable, we must stop.
158 *
159 * \param ir   An instruction that writes to a variable.
160 * \param var  The variable being updated.
161 */
162ir_visitor_status
163ir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var)
164{
165   if (dereferences_variable(this->graft_assign->rhs, var)) {
166      if (debug) {
167	 fprintf(stderr, "graft killed by: ");
168	 ir->fprint(stderr);
169	 fprintf(stderr, "\n");
170      }
171      return visit_stop;
172   }
173
174   return visit_continue;
175}
176
177ir_visitor_status
178ir_tree_grafting_visitor::visit_leave(ir_assignment *ir)
179{
180   if (do_graft(&ir->rhs))
181      return visit_stop;
182
183   /* If this assignment updates a variable used in the assignment
184    * we're trying to graft, then we're done.
185    */
186   return check_graft(ir, ir->lhs->variable_referenced());
187}
188
189ir_visitor_status
190ir_tree_grafting_visitor::visit_enter(ir_function *ir)
191{
192   (void) ir;
193   return visit_continue_with_parent;
194}
195
196ir_visitor_status
197ir_tree_grafting_visitor::visit_enter(ir_function_signature *ir)
198{
199   (void)ir;
200   return visit_continue_with_parent;
201}
202
203ir_visitor_status
204ir_tree_grafting_visitor::visit_enter(ir_call *ir)
205{
206   foreach_two_lists(formal_node, &ir->callee->parameters,
207                     actual_node, &ir->actual_parameters) {
208      ir_variable *sig_param = (ir_variable *) formal_node;
209      ir_rvalue *ir = (ir_rvalue *) actual_node;
210      ir_rvalue *new_ir = ir;
211
212      if (sig_param->data.mode != ir_var_function_in
213          && sig_param->data.mode != ir_var_const_in) {
214	 if (check_graft(ir, sig_param) == visit_stop)
215	    return visit_stop;
216	 continue;
217      }
218
219      if (do_graft(&new_ir)) {
220	 ir->replace_with(new_ir);
221	 return visit_stop;
222      }
223   }
224
225   if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop)
226      return visit_stop;
227
228   return visit_continue;
229}
230
231ir_visitor_status
232ir_tree_grafting_visitor::visit_enter(ir_expression *ir)
233{
234   for (unsigned int i = 0; i < ir->num_operands; i++) {
235      if (do_graft(&ir->operands[i]))
236	 return visit_stop;
237   }
238
239   return visit_continue;
240}
241
242ir_visitor_status
243ir_tree_grafting_visitor::visit_enter(ir_if *ir)
244{
245   if (do_graft(&ir->condition))
246      return visit_stop;
247
248   /* Do not traverse into the body of the if-statement since that is a
249    * different basic block.
250    */
251   return visit_continue_with_parent;
252}
253
254ir_visitor_status
255ir_tree_grafting_visitor::visit_enter(ir_swizzle *ir)
256{
257   if (do_graft(&ir->val))
258      return visit_stop;
259
260   return visit_continue;
261}
262
263ir_visitor_status
264ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
265{
266   if (do_graft(&ir->coordinate) ||
267       do_graft(&ir->projector) ||
268       do_graft(&ir->offset) ||
269       do_graft(&ir->shadow_comparator) ||
270       do_graft(&ir->clamp))
271	 return visit_stop;
272
273   switch (ir->op) {
274   case ir_tex:
275   case ir_lod:
276   case ir_query_levels:
277   case ir_texture_samples:
278   case ir_samples_identical:
279      break;
280   case ir_txb:
281      if (do_graft(&ir->lod_info.bias))
282	 return visit_stop;
283      break;
284   case ir_txf:
285   case ir_txl:
286   case ir_txs:
287      if (do_graft(&ir->lod_info.lod))
288	 return visit_stop;
289      break;
290   case ir_txf_ms:
291      if (do_graft(&ir->lod_info.sample_index))
292         return visit_stop;
293      break;
294   case ir_txd:
295      if (do_graft(&ir->lod_info.grad.dPdx) ||
296	  do_graft(&ir->lod_info.grad.dPdy))
297	 return visit_stop;
298      break;
299   case ir_tg4:
300      if (do_graft(&ir->lod_info.component))
301         return visit_stop;
302      break;
303   }
304
305   return visit_continue;
306}
307
308struct tree_grafting_info {
309   ir_variable_refcount_visitor *refs;
310   bool progress;
311};
312
313static bool
314try_tree_grafting(ir_assignment *start,
315		  ir_variable *lhs_var,
316		  ir_instruction *bb_last)
317{
318   ir_tree_grafting_visitor v(start, lhs_var);
319
320   if (debug) {
321      fprintf(stderr, "trying to graft: ");
322      lhs_var->fprint(stderr);
323      fprintf(stderr, "\n");
324   }
325
326   for (ir_instruction *ir = (ir_instruction *)start->next;
327	ir != bb_last->next;
328	ir = (ir_instruction *)ir->next) {
329
330      if (debug) {
331	 fprintf(stderr, "- ");
332	 ir->fprint(stderr);
333	 fprintf(stderr, "\n");
334      }
335
336      ir_visitor_status s = ir->accept(&v);
337      if (s == visit_stop)
338	 return v.progress;
339   }
340
341   return false;
342}
343
344static void
345tree_grafting_basic_block(ir_instruction *bb_first,
346			  ir_instruction *bb_last,
347			  void *data)
348{
349   struct tree_grafting_info *info = (struct tree_grafting_info *)data;
350   ir_instruction *ir, *next;
351
352   for (ir = bb_first, next = (ir_instruction *)ir->next;
353	ir != bb_last->next;
354	ir = next, next = (ir_instruction *)ir->next) {
355      ir_assignment *assign = ir->as_assignment();
356
357      if (!assign)
358	 continue;
359
360      ir_variable *lhs_var = assign->whole_variable_written();
361      if (!lhs_var)
362	 continue;
363
364      if (lhs_var->data.mode == ir_var_function_out ||
365          lhs_var->data.mode == ir_var_function_inout ||
366          lhs_var->data.mode == ir_var_shader_out ||
367          lhs_var->data.mode == ir_var_shader_storage ||
368          lhs_var->data.mode == ir_var_shader_shared)
369         continue;
370
371      if (lhs_var->data.precise)
372         continue;
373
374      /* Do not graft sampler and image variables. This is a workaround to
375       * st/glsl_to_tgsi being unable to handle expression parameters to image
376       * intrinsics.
377       *
378       * Note that if this is ever fixed, we still need to skip grafting when
379       * any image layout qualifiers (including the image format) are set,
380       * since we must not lose those.
381       */
382      if (lhs_var->type->is_sampler() || lhs_var->type->is_image())
383         continue;
384
385      ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
386
387      if (!entry->declaration ||
388	  entry->assigned_count != 1 ||
389	  entry->referenced_count != 2)
390	 continue;
391
392      /* Found a possibly graftable assignment.  Now, walk through the
393       * rest of the BB seeing if the deref is here, and if nothing interfered with
394       * pasting its expression's values in between.
395       */
396      info->progress |= try_tree_grafting(assign, lhs_var, bb_last);
397   }
398}
399
400} /* unnamed namespace */
401
402/**
403 * Does a copy propagation pass on the code present in the instruction stream.
404 */
405bool
406do_tree_grafting(exec_list *instructions)
407{
408   ir_variable_refcount_visitor refs;
409   struct tree_grafting_info info;
410
411   info.progress = false;
412   info.refs = &refs;
413
414   visit_list_elements(info.refs, instructions);
415
416   call_for_basic_blocks(instructions, tree_grafting_basic_block, &info);
417
418   return info.progress;
419}
420