1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#include "brw_vec4.h"
29#include "brw_vec4_live_variables.h"
30
31using namespace brw;
32
33#define MAX_INSTRUCTION (1 << 30)
34
35/** @file brw_vec4_live_variables.cpp
36 *
37 * Support for computing at the basic block level which variables
38 * (virtual GRFs in our case) are live at entry and exit.
39 *
40 * See Muchnick's Advanced Compiler Design and Implementation, section
41 * 14.1 (p444).
42 */
43
44/**
45 * Sets up the use/def arrays and block-local approximation of the live ranges.
46 *
47 * The basic-block-level live variable analysis needs to know which
48 * variables get used before they're completely defined, and which
49 * variables are completely defined before they're used.
50 *
51 * We independently track each channel of a vec4.  This is because we need to
52 * be able to recognize a sequence like:
53 *
54 * ...
55 * DP4 tmp.x a b;
56 * DP4 tmp.y c d;
57 * MUL result.xy tmp.xy e.xy
58 * ...
59 *
60 * as having tmp live only across that sequence (assuming it's used nowhere
61 * else), because it's a common pattern.  A more conservative approach that
62 * doesn't get tmp marked a deffed in this block will tend to result in
63 * spilling.
64 */
65void
66vec4_live_variables::setup_def_use()
67{
68   int ip = 0;
69
70   foreach_block (block, cfg) {
71      assert(ip == block->start_ip);
72      if (block->num > 0)
73	 assert(cfg->blocks[block->num - 1]->end_ip == ip - 1);
74
75      foreach_inst_in_block(vec4_instruction, inst, block) {
76         struct block_data *bd = &block_data[block->num];
77
78         /* Set up the instruction uses. */
79	 for (unsigned int i = 0; i < 3; i++) {
80	    if (inst->src[i].file == VGRF) {
81               for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
82                  for (int c = 0; c < 4; c++) {
83                     const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
84
85                     start[v] = MIN2(start[v], ip);
86                     end[v] = ip;
87
88                     if (!BITSET_TEST(bd->def, v))
89                        BITSET_SET(bd->use, v);
90                  }
91               }
92	    }
93	 }
94         for (unsigned c = 0; c < 4; c++) {
95            if (inst->reads_flag(c) &&
96                !BITSET_TEST(bd->flag_def, c)) {
97               BITSET_SET(bd->flag_use, c);
98            }
99         }
100
101         /* Set up the instruction defs. */
102         if (inst->dst.file == VGRF) {
103            for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
104               for (int c = 0; c < 4; c++) {
105                  if (inst->dst.writemask & (1 << c)) {
106                     const unsigned v = var_from_reg(alloc, inst->dst, c, i);
107
108                     start[v] = MIN2(start[v], ip);
109                     end[v] = ip;
110
111                     /* Check for unconditional register writes, these are the
112                      * things that screen off preceding definitions of a
113                      * variable, and thus qualify for being in def[].
114                      */
115                     if ((!inst->predicate || inst->opcode == BRW_OPCODE_SEL) &&
116                         !BITSET_TEST(bd->use, v))
117                        BITSET_SET(bd->def, v);
118                  }
119               }
120            }
121         }
122         if (inst->writes_flag(devinfo)) {
123            for (unsigned c = 0; c < 4; c++) {
124               if ((inst->dst.writemask & (1 << c)) &&
125                   !BITSET_TEST(bd->flag_use, c)) {
126                  BITSET_SET(bd->flag_def, c);
127               }
128            }
129         }
130
131	 ip++;
132      }
133   }
134}
135
136/**
137 * The algorithm incrementally sets bits in liveout and livein,
138 * propagating it through control flow.  It will eventually terminate
139 * because it only ever adds bits, and stops when no bits are added in
140 * a pass.
141 */
142void
143vec4_live_variables::compute_live_variables()
144{
145   bool cont = true;
146
147   while (cont) {
148      cont = false;
149
150      foreach_block_reverse (block, cfg) {
151         struct block_data *bd = &block_data[block->num];
152
153	 /* Update liveout */
154	 foreach_list_typed(bblock_link, child_link, link, &block->children) {
155       struct block_data *child_bd = &block_data[child_link->block->num];
156
157	    for (int i = 0; i < bitset_words; i++) {
158               BITSET_WORD new_liveout = (child_bd->livein[i] &
159                                          ~bd->liveout[i]);
160               if (new_liveout) {
161                  bd->liveout[i] |= new_liveout;
162		  cont = true;
163	       }
164	    }
165            BITSET_WORD new_liveout = (child_bd->flag_livein[0] &
166                                       ~bd->flag_liveout[0]);
167            if (new_liveout) {
168               bd->flag_liveout[0] |= new_liveout;
169               cont = true;
170            }
171	 }
172
173         /* Update livein */
174         for (int i = 0; i < bitset_words; i++) {
175            BITSET_WORD new_livein = (bd->use[i] |
176                                      (bd->liveout[i] &
177                                       ~bd->def[i]));
178            if (new_livein & ~bd->livein[i]) {
179               bd->livein[i] |= new_livein;
180               cont = true;
181            }
182         }
183         BITSET_WORD new_livein = (bd->flag_use[0] |
184                                   (bd->flag_liveout[0] &
185                                    ~bd->flag_def[0]));
186         if (new_livein & ~bd->flag_livein[0]) {
187            bd->flag_livein[0] |= new_livein;
188            cont = true;
189         }
190      }
191   }
192}
193
194/**
195 * Extend the start/end ranges for each variable to account for the
196 * new information calculated from control flow.
197 */
198void
199vec4_live_variables::compute_start_end()
200{
201   foreach_block (block, cfg) {
202      const struct block_data &bd = block_data[block->num];
203
204      for (int i = 0; i < num_vars; i++) {
205         if (BITSET_TEST(bd.livein, i)) {
206            start[i] = MIN2(start[i], block->start_ip);
207            end[i] = MAX2(end[i], block->start_ip);
208         }
209
210         if (BITSET_TEST(bd.liveout, i)) {
211            start[i] = MIN2(start[i], block->end_ip);
212            end[i] = MAX2(end[i], block->end_ip);
213         }
214      }
215   }
216}
217
218vec4_live_variables::vec4_live_variables(const backend_shader *s)
219   : alloc(s->alloc), cfg(s->cfg)
220{
221   mem_ctx = ralloc_context(NULL);
222
223   num_vars = alloc.total_size * 8;
224   start = ralloc_array(mem_ctx, int, num_vars);
225   end = ralloc_array(mem_ctx, int, num_vars);
226
227   for (int i = 0; i < num_vars; i++) {
228      start[i] = MAX_INSTRUCTION;
229      end[i] = -1;
230   }
231
232   devinfo = s->compiler->devinfo;
233
234   block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
235
236   bitset_words = BITSET_WORDS(num_vars);
237   for (int i = 0; i < cfg->num_blocks; i++) {
238      block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
239      block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
240      block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
241      block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
242
243      block_data[i].flag_def[0] = 0;
244      block_data[i].flag_use[0] = 0;
245      block_data[i].flag_livein[0] = 0;
246      block_data[i].flag_liveout[0] = 0;
247   }
248
249   setup_def_use();
250   compute_live_variables();
251   compute_start_end();
252}
253
254vec4_live_variables::~vec4_live_variables()
255{
256   ralloc_free(mem_ctx);
257}
258
259static bool
260check_register_live_range(const vec4_live_variables *live, int ip,
261                          unsigned var, unsigned n)
262{
263   for (unsigned j = 0; j < n; j += 4) {
264      if (var + j >= unsigned(live->num_vars) ||
265          live->start[var + j] > ip || live->end[var + j] < ip)
266         return false;
267   }
268
269   return true;
270}
271
272bool
273vec4_live_variables::validate(const backend_shader *s) const
274{
275   unsigned ip = 0;
276
277   foreach_block_and_inst(block, vec4_instruction, inst, s->cfg) {
278      for (unsigned c = 0; c < 4; c++) {
279         if (inst->dst.writemask & (1 << c)) {
280            for (unsigned i = 0; i < 3; i++) {
281               if (inst->src[i].file == VGRF &&
282                   !check_register_live_range(this, ip,
283                                              var_from_reg(alloc, inst->src[i], c),
284                                              regs_read(inst, i)))
285                  return false;
286            }
287
288            if (inst->dst.file == VGRF &&
289                !check_register_live_range(this, ip,
290                                           var_from_reg(alloc, inst->dst, c),
291                                           regs_written(inst)))
292               return false;
293         }
294      }
295
296      ip++;
297   }
298
299   return true;
300}
301
302int
303vec4_live_variables::var_range_start(unsigned v, unsigned n) const
304{
305   int ip = INT_MAX;
306
307   for (unsigned i = 0; i < n; i++)
308      ip = MIN2(ip, start[v + i]);
309
310   return ip;
311}
312
313int
314vec4_live_variables::var_range_end(unsigned v, unsigned n) const
315{
316   int ip = INT_MIN;
317
318   for (unsigned i = 0; i < n; i++)
319      ip = MAX2(ip, end[v + i]);
320
321   return ip;
322}
323
324bool
325vec4_live_variables::vgrfs_interfere(int a, int b) const
326{
327   return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <=
328             var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) ||
329            (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <=
330             var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a])));
331}
332