1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_copy_propagation_elements.cpp
26 *
27 * Replaces usage of recently-copied components of variables with the
28 * previous copy of the variable.
29 *
30 * This should reduce the number of MOV instructions in the generated
31 * programs and help triggering other optimizations that live in GLSL
32 * level.
33 */
34
35#include "ir.h"
36#include "ir_rvalue_visitor.h"
37#include "ir_basic_block.h"
38#include "ir_optimization.h"
39#include "compiler/glsl_types.h"
40#include "util/hash_table.h"
41#include "util/set.h"
42
43static bool debug = false;
44
45namespace {
46
47class acp_entry
48{
49public:
50   DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry)
51
52   /* If set, rhs_full indicates that this ACP entry represents a
53    * whole-variable copy.  The rhs_element[] array will still be filled,
54    * to allow the swizzling from its components in case the variable
55    * was a vector (and to simplify some of the erase() and write_vector()
56    * logic).
57    */
58
59   ir_variable *rhs_full;
60   ir_variable *rhs_element[4];
61   unsigned rhs_channel[4];
62
63   /* Set of variables that use the variable associated with this acp_entry as
64    * RHS.  This has the "reverse references" of rhs_full/rhs_element.  It is
65    * used to speed up invalidating those references when the acp_entry
66    * changes.
67    */
68   set *dsts;
69};
70
71class copy_propagation_state {
72public:
73   DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state);
74
75   static
76   copy_propagation_state* create(void *mem_ctx)
77   {
78      return new (mem_ctx) copy_propagation_state(NULL);
79   }
80
81   copy_propagation_state* clone()
82   {
83      return new (ralloc_parent(this)) copy_propagation_state(this);
84   }
85
86   void erase_all()
87   {
88      /* Individual elements were allocated from a linear allocator, so will
89       * be destroyed when the state is destroyed.
90       */
91      _mesa_hash_table_clear(acp, NULL);
92      fallback = NULL;
93   }
94
95   void erase(ir_variable *var, unsigned write_mask)
96   {
97      acp_entry *entry = pull_acp(var);
98      entry->rhs_full = NULL;
99
100      for (int i = 0; i < 4; i++) {
101         if (!entry->rhs_element[i])
102            continue;
103         if ((write_mask & (1 << i)) == 0)
104            continue;
105
106         ir_variable *to_remove = entry->rhs_element[i];
107         entry->rhs_element[i] = NULL;
108         remove_unused_var_from_dsts(entry, var, to_remove);
109      }
110
111      /* TODO: Check write mask, and possibly not clear everything. */
112
113      /* For any usage of our variable on the RHS, clear it out. */
114      set_foreach(entry->dsts, set_entry) {
115         ir_variable *dst_var = (ir_variable *)set_entry->key;
116         acp_entry *dst_entry = pull_acp(dst_var);
117         for (int i = 0; i < 4; i++) {
118            if (dst_entry->rhs_element[i] == var)
119               dst_entry->rhs_element[i] = NULL;
120         }
121         if (dst_entry->rhs_full == var)
122            dst_entry->rhs_full = NULL;
123         _mesa_set_remove(entry->dsts, set_entry);
124      }
125   }
126
127   acp_entry *read(ir_variable *var)
128   {
129      for (copy_propagation_state *s = this; s != NULL; s = s->fallback) {
130         hash_entry *ht_entry = _mesa_hash_table_search(s->acp, var);
131         if (ht_entry)
132            return (acp_entry *) ht_entry->data;
133      }
134      return NULL;
135   }
136
137   void write_elements(ir_variable *lhs, ir_variable *rhs, unsigned write_mask, int swizzle[4])
138   {
139      acp_entry *lhs_entry = pull_acp(lhs);
140      lhs_entry->rhs_full = NULL;
141
142      for (int i = 0; i < 4; i++) {
143         if ((write_mask & (1 << i)) == 0)
144            continue;
145         ir_variable *to_remove = lhs_entry->rhs_element[i];
146         lhs_entry->rhs_element[i] = rhs;
147         lhs_entry->rhs_channel[i] = swizzle[i];
148
149         remove_unused_var_from_dsts(lhs_entry, lhs, to_remove);
150      }
151
152      acp_entry *rhs_entry = pull_acp(rhs);
153      _mesa_set_add(rhs_entry->dsts, lhs);
154   }
155
156   void write_full(ir_variable *lhs, ir_variable *rhs)
157   {
158      acp_entry *lhs_entry = pull_acp(lhs);
159      if (lhs_entry->rhs_full == rhs)
160         return;
161
162      if (lhs_entry->rhs_full) {
163         remove_from_dsts(lhs_entry->rhs_full, lhs);
164      } else if (lhs->type->is_vector()) {
165         for (int i = 0; i < 4; i++) {
166            if (lhs_entry->rhs_element[i])
167               remove_from_dsts(lhs_entry->rhs_element[i], lhs);
168         }
169      }
170
171      lhs_entry->rhs_full = rhs;
172      acp_entry *rhs_entry = pull_acp(rhs);
173      _mesa_set_add(rhs_entry->dsts, lhs);
174
175      if (lhs->type->is_vector()) {
176         for (int i = 0; i < 4; i++) {
177            lhs_entry->rhs_element[i] = rhs;
178            lhs_entry->rhs_channel[i] = i;
179         }
180      }
181   }
182
183   void remove_unused_var_from_dsts(acp_entry *lhs_entry, ir_variable *lhs, ir_variable *var)
184   {
185      if (!var)
186         return;
187
188      /* If lhs still uses var, don't remove anything. */
189      for (int j = 0; j < 4; j++) {
190         if (lhs_entry->rhs_element[j] == var)
191            return;
192      }
193
194      acp_entry *element = pull_acp(var);
195      assert(element);
196      _mesa_set_remove_key(element->dsts, lhs);
197   }
198
199private:
200   explicit copy_propagation_state(copy_propagation_state *fallback)
201   {
202      this->fallback = fallback;
203      /* Use 'this' as context for the table, no explicit destruction
204       * needed later.
205       */
206      acp = _mesa_pointer_hash_table_create(this);
207      lin_ctx = linear_alloc_parent(this, 0);
208   }
209
210   acp_entry *pull_acp(ir_variable *var)
211   {
212      hash_entry *ht_entry = _mesa_hash_table_search(acp, var);
213      if (ht_entry)
214         return (acp_entry *) ht_entry->data;
215
216      /* If not found, create one and copy data from fallback if available. */
217      acp_entry *entry = new(lin_ctx) acp_entry();
218      _mesa_hash_table_insert(acp, var, entry);
219
220      bool found = false;
221      for (copy_propagation_state *s = fallback; s != NULL; s = s->fallback) {
222         hash_entry *fallback_ht_entry = _mesa_hash_table_search(s->acp, var);
223         if (fallback_ht_entry) {
224            acp_entry *fallback_entry = (acp_entry *) fallback_ht_entry->data;
225            *entry = *fallback_entry;
226            entry->dsts = _mesa_set_clone(fallback_entry->dsts, this);
227            found = true;
228            break;
229         }
230      }
231
232      if (!found) {
233         entry->dsts = _mesa_pointer_set_create(this);
234      }
235
236      return entry;
237   }
238
239   void
240   remove_from_dsts(ir_variable *var, ir_variable *to_remove)
241   {
242      acp_entry *entry = pull_acp(var);
243      assert(entry);
244      _mesa_set_remove_key(entry->dsts, to_remove);
245   }
246
247   /** Available Copy to Propagate table, from variable to the entry
248    *  containing the current sources that can be used. */
249   hash_table *acp;
250
251   /** When a state is cloned, entries are copied on demand from fallback. */
252   copy_propagation_state *fallback;
253
254   void *lin_ctx;
255};
256
257class kill_entry : public exec_node
258{
259public:
260   /* override operator new from exec_node */
261   DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry)
262
263   kill_entry(ir_variable *var, int write_mask)
264   {
265      this->var = var;
266      this->write_mask = write_mask;
267   }
268
269   ir_variable *var;
270   unsigned int write_mask;
271};
272
273class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
274public:
275   ir_copy_propagation_elements_visitor()
276   {
277      this->progress = false;
278      this->killed_all = false;
279      this->mem_ctx = ralloc_context(NULL);
280      this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
281      this->shader_mem_ctx = NULL;
282      this->kills = new(mem_ctx) exec_list;
283      this->state = copy_propagation_state::create(mem_ctx);
284   }
285   ~ir_copy_propagation_elements_visitor()
286   {
287      ralloc_free(mem_ctx);
288   }
289
290   virtual ir_visitor_status visit(ir_dereference_variable *);
291
292   void handle_loop(ir_loop *, bool keep_acp);
293   virtual ir_visitor_status visit_enter(class ir_loop *);
294   virtual ir_visitor_status visit_enter(class ir_function_signature *);
295   virtual ir_visitor_status visit_leave(class ir_assignment *);
296   virtual ir_visitor_status visit_enter(class ir_call *);
297   virtual ir_visitor_status visit_enter(class ir_if *);
298   virtual ir_visitor_status visit_leave(class ir_swizzle *);
299
300   void handle_rvalue(ir_rvalue **rvalue);
301
302   void add_copy(ir_assignment *ir);
303   void kill(kill_entry *k);
304   void handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all);
305
306   copy_propagation_state *state;
307
308   /**
309    * List of kill_entry: The variables whose values were killed in this
310    * block.
311    */
312   exec_list *kills;
313
314   bool progress;
315
316   bool killed_all;
317
318   /* Context for our local data structures. */
319   void *mem_ctx;
320   void *lin_ctx;
321   /* Context for allocating new shader nodes. */
322   void *shader_mem_ctx;
323};
324
325} /* unnamed namespace */
326
327ir_visitor_status
328ir_copy_propagation_elements_visitor::visit(ir_dereference_variable *ir)
329{
330   if (this->in_assignee)
331      return visit_continue;
332
333   const acp_entry *entry = state->read(ir->var);
334   if (entry && entry->rhs_full) {
335      ir->var = (ir_variable *) entry->rhs_full;
336      progress = true;
337   }
338
339   return visit_continue;
340}
341
342ir_visitor_status
343ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
344{
345   /* Treat entry into a function signature as a completely separate
346    * block.  Any instructions at global scope will be shuffled into
347    * main() at link time, so they're irrelevant to us.
348    */
349   exec_list *orig_kills = this->kills;
350   bool orig_killed_all = this->killed_all;
351
352   this->kills = new(mem_ctx) exec_list;
353   this->killed_all = false;
354
355   copy_propagation_state *orig_state = state;
356   this->state = copy_propagation_state::create(mem_ctx);
357
358   visit_list_elements(this, &ir->body);
359
360   delete this->state;
361   this->state = orig_state;
362
363   ralloc_free(this->kills);
364   this->kills = orig_kills;
365   this->killed_all = orig_killed_all;
366
367   return visit_continue_with_parent;
368}
369
370ir_visitor_status
371ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
372{
373   ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
374   ir_variable *var = ir->lhs->variable_referenced();
375
376   kill_entry *k;
377
378   if (lhs && var->type->is_vector())
379      k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
380   else
381      k = new(this->lin_ctx) kill_entry(var, ~0);
382
383   kill(k);
384
385   add_copy(ir);
386
387   return visit_continue;
388}
389
390ir_visitor_status
391ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *)
392{
393   /* Don't visit the values of swizzles since they are handled while
394    * visiting the swizzle itself.
395    */
396   return visit_continue;
397}
398
399/**
400 * Replaces dereferences of ACP RHS variables with ACP LHS variables.
401 *
402 * This is where the actual copy propagation occurs.  Note that the
403 * rewriting of ir_dereference means that the ir_dereference instance
404 * must not be shared by multiple IR operations!
405 */
406void
407ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
408{
409   int swizzle_chan[4];
410   ir_dereference_variable *deref_var;
411   ir_variable *source[4] = {NULL, NULL, NULL, NULL};
412   int source_chan[4] = {0, 0, 0, 0};
413   int chans;
414   bool noop_swizzle = true;
415
416   if (!*ir)
417      return;
418
419   ir_swizzle *swizzle = (*ir)->as_swizzle();
420   if (swizzle) {
421      deref_var = swizzle->val->as_dereference_variable();
422      if (!deref_var)
423	 return;
424
425      swizzle_chan[0] = swizzle->mask.x;
426      swizzle_chan[1] = swizzle->mask.y;
427      swizzle_chan[2] = swizzle->mask.z;
428      swizzle_chan[3] = swizzle->mask.w;
429      chans = swizzle->type->vector_elements;
430   } else {
431      deref_var = (*ir)->as_dereference_variable();
432      if (!deref_var)
433	 return;
434
435      swizzle_chan[0] = 0;
436      swizzle_chan[1] = 1;
437      swizzle_chan[2] = 2;
438      swizzle_chan[3] = 3;
439      chans = deref_var->type->vector_elements;
440   }
441
442   if (this->in_assignee)
443      return;
444
445   ir_variable *var = deref_var->var;
446
447   /* Try to find ACP entries covering swizzle_chan[], hoping they're
448    * the same source variable.
449    */
450
451   const acp_entry *entry = state->read(var);
452   if (entry) {
453      for (int c = 0; c < chans; c++) {
454         unsigned index = swizzle_chan[c];
455         ir_variable *src = entry->rhs_element[index];
456         if (!src)
457            continue;
458         source[c] = src;
459         source_chan[c] = entry->rhs_channel[index];
460         if (source_chan[c] != swizzle_chan[c])
461            noop_swizzle = false;
462      }
463   }
464
465   /* Make sure all channels are copying from the same source variable. */
466   if (!source[0])
467      return;
468   for (int c = 1; c < chans; c++) {
469      if (source[c] != source[0])
470	 return;
471   }
472
473   if (!shader_mem_ctx)
474      shader_mem_ctx = ralloc_parent(deref_var);
475
476   /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
477    * of itself, which would just be deleted by opt_noop_swizzle).
478    */
479   if (source[0] == var && noop_swizzle)
480      return;
481
482   if (debug) {
483      printf("Copy propagation from:\n");
484      (*ir)->print();
485   }
486
487   deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
488   *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
489					source_chan[0],
490					source_chan[1],
491					source_chan[2],
492					source_chan[3],
493					chans);
494   progress = true;
495
496   if (debug) {
497      printf("to:\n");
498      (*ir)->print();
499      printf("\n");
500   }
501}
502
503
504ir_visitor_status
505ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
506{
507   /* Do copy propagation on call parameters, but skip any out params */
508   foreach_two_lists(formal_node, &ir->callee->parameters,
509                     actual_node, &ir->actual_parameters) {
510      ir_variable *sig_param = (ir_variable *) formal_node;
511      ir_rvalue *ir = (ir_rvalue *) actual_node;
512      if (sig_param->data.mode != ir_var_function_out
513          && sig_param->data.mode != ir_var_function_inout) {
514         ir->accept(this);
515      }
516   }
517
518   if (!ir->callee->is_intrinsic()) {
519      state->erase_all();
520      this->killed_all = true;
521   } else {
522      if (ir->return_deref) {
523         kill(new(this->lin_ctx) kill_entry(ir->return_deref->var, ~0));
524      }
525
526      foreach_two_lists(formal_node, &ir->callee->parameters,
527                        actual_node, &ir->actual_parameters) {
528         ir_variable *sig_param = (ir_variable *) formal_node;
529         if (sig_param->data.mode == ir_var_function_out ||
530             sig_param->data.mode == ir_var_function_inout) {
531            ir_rvalue *ir = (ir_rvalue *) actual_node;
532            ir_variable *var = ir->variable_referenced();
533            kill(new(this->lin_ctx) kill_entry(var, ~0));
534         }
535      }
536   }
537
538   return visit_continue_with_parent;
539}
540
541void
542ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all)
543{
544   exec_list *orig_kills = this->kills;
545   bool orig_killed_all = this->killed_all;
546
547   this->kills = kills;
548   this->killed_all = false;
549
550   /* Populate the initial acp with a copy of the original */
551   copy_propagation_state *orig_state = state;
552   this->state = orig_state->clone();
553
554   visit_list_elements(this, instructions);
555
556   delete this->state;
557   this->state = orig_state;
558
559   *killed_all = this->killed_all;
560   this->kills = orig_kills;
561   this->killed_all = orig_killed_all;
562}
563
564ir_visitor_status
565ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
566{
567   ir->condition->accept(this);
568
569   exec_list *new_kills = new(mem_ctx) exec_list;
570   bool then_killed_all = false;
571   bool else_killed_all = false;
572
573   handle_if_block(&ir->then_instructions, new_kills, &then_killed_all);
574   handle_if_block(&ir->else_instructions, new_kills, &else_killed_all);
575
576   if (then_killed_all || else_killed_all) {
577      state->erase_all();
578      killed_all = true;
579   } else {
580      foreach_in_list_safe(kill_entry, k, new_kills)
581         kill(k);
582   }
583
584   ralloc_free(new_kills);
585
586   /* handle_if_block() already descended into the children. */
587   return visit_continue_with_parent;
588}
589
590void
591ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp)
592{
593   exec_list *orig_kills = this->kills;
594   bool orig_killed_all = this->killed_all;
595
596   this->kills = new(mem_ctx) exec_list;
597   this->killed_all = false;
598
599   copy_propagation_state *orig_state = state;
600
601   if (keep_acp) {
602      /* Populate the initial acp with a copy of the original */
603      this->state = orig_state->clone();
604   } else {
605      this->state = copy_propagation_state::create(mem_ctx);
606   }
607
608   visit_list_elements(this, &ir->body_instructions);
609
610   delete this->state;
611   this->state = orig_state;
612
613   if (this->killed_all)
614      this->state->erase_all();
615
616   exec_list *new_kills = this->kills;
617   this->kills = orig_kills;
618   this->killed_all = this->killed_all || orig_killed_all;
619
620   foreach_in_list_safe(kill_entry, k, new_kills) {
621      kill(k);
622   }
623
624   ralloc_free(new_kills);
625}
626
627ir_visitor_status
628ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
629{
630   handle_loop(ir, false);
631   handle_loop(ir, true);
632
633   /* already descended into the children. */
634   return visit_continue_with_parent;
635}
636
637/* Remove any entries currently in the ACP for this kill. */
638void
639ir_copy_propagation_elements_visitor::kill(kill_entry *k)
640{
641   state->erase(k->var, k->write_mask);
642
643   /* If we were on a list, remove ourselves before inserting */
644   if (k->next)
645      k->remove();
646
647   this->kills->push_tail(k);
648}
649
650/**
651 * Adds directly-copied channels between vector variables to the available
652 * copy propagation list.
653 */
654void
655ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
656{
657   {
658      ir_variable *lhs_var = ir->whole_variable_written();
659      ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
660
661      if (lhs_var != NULL && rhs && rhs->var != NULL && lhs_var != rhs->var) {
662         if (lhs_var->data.mode == ir_var_shader_storage ||
663             lhs_var->data.mode == ir_var_shader_shared ||
664             rhs->var->data.mode == ir_var_shader_storage ||
665             rhs->var->data.mode == ir_var_shader_shared ||
666             lhs_var->data.precise != rhs->var->data.precise) {
667            return;
668         }
669         state->write_full(lhs_var, rhs->var);
670         return;
671      }
672   }
673
674   int orig_swizzle[4] = {0, 1, 2, 3};
675   int swizzle[4];
676
677   ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
678   if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
679      return;
680
681   if (lhs->var->data.mode == ir_var_shader_storage ||
682       lhs->var->data.mode == ir_var_shader_shared)
683      return;
684
685   ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
686   if (!rhs) {
687      ir_swizzle *swiz = ir->rhs->as_swizzle();
688      if (!swiz)
689	 return;
690
691      rhs = swiz->val->as_dereference_variable();
692      if (!rhs)
693	 return;
694
695      orig_swizzle[0] = swiz->mask.x;
696      orig_swizzle[1] = swiz->mask.y;
697      orig_swizzle[2] = swiz->mask.z;
698      orig_swizzle[3] = swiz->mask.w;
699   }
700
701   if (rhs->var->data.mode == ir_var_shader_storage ||
702       rhs->var->data.mode == ir_var_shader_shared)
703      return;
704
705   /* Move the swizzle channels out to the positions they match in the
706    * destination.  We don't want to have to rewrite the swizzle[]
707    * array every time we clear a bit of the write_mask.
708    */
709   int j = 0;
710   for (int i = 0; i < 4; i++) {
711      if (ir->write_mask & (1 << i))
712	 swizzle[i] = orig_swizzle[j++];
713   }
714
715   int write_mask = ir->write_mask;
716   if (lhs->var == rhs->var) {
717      /* If this is a copy from the variable to itself, then we need
718       * to be sure not to include the updated channels from this
719       * instruction in the set of new source channels to be
720       * copy-propagated from.
721       */
722      for (int i = 0; i < 4; i++) {
723	 if (ir->write_mask & (1 << orig_swizzle[i]))
724	    write_mask &= ~(1 << i);
725      }
726   }
727
728   if (lhs->var->data.precise != rhs->var->data.precise)
729      return;
730
731   state->write_elements(lhs->var, rhs->var, write_mask, swizzle);
732}
733
734bool
735do_copy_propagation_elements(exec_list *instructions)
736{
737   ir_copy_propagation_elements_visitor v;
738
739   visit_list_elements(&v, instructions);
740
741   return v.progress;
742}
743