1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2012 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Eric Anholt <eric@anholt.net> 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "brw_vec4.h" 29bf215546Sopenharmony_ci#include "brw_vec4_live_variables.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ciusing namespace brw; 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#define MAX_INSTRUCTION (1 << 30) 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci/** @file brw_vec4_live_variables.cpp 36bf215546Sopenharmony_ci * 37bf215546Sopenharmony_ci * Support for computing at the basic block level which variables 38bf215546Sopenharmony_ci * (virtual GRFs in our case) are live at entry and exit. 39bf215546Sopenharmony_ci * 40bf215546Sopenharmony_ci * See Muchnick's Advanced Compiler Design and Implementation, section 41bf215546Sopenharmony_ci * 14.1 (p444). 42bf215546Sopenharmony_ci */ 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci/** 45bf215546Sopenharmony_ci * Sets up the use/def arrays and block-local approximation of the live ranges. 46bf215546Sopenharmony_ci * 47bf215546Sopenharmony_ci * The basic-block-level live variable analysis needs to know which 48bf215546Sopenharmony_ci * variables get used before they're completely defined, and which 49bf215546Sopenharmony_ci * variables are completely defined before they're used. 50bf215546Sopenharmony_ci * 51bf215546Sopenharmony_ci * We independently track each channel of a vec4. This is because we need to 52bf215546Sopenharmony_ci * be able to recognize a sequence like: 53bf215546Sopenharmony_ci * 54bf215546Sopenharmony_ci * ... 55bf215546Sopenharmony_ci * DP4 tmp.x a b; 56bf215546Sopenharmony_ci * DP4 tmp.y c d; 57bf215546Sopenharmony_ci * MUL result.xy tmp.xy e.xy 58bf215546Sopenharmony_ci * ... 59bf215546Sopenharmony_ci * 60bf215546Sopenharmony_ci * as having tmp live only across that sequence (assuming it's used nowhere 61bf215546Sopenharmony_ci * else), because it's a common pattern. A more conservative approach that 62bf215546Sopenharmony_ci * doesn't get tmp marked a deffed in this block will tend to result in 63bf215546Sopenharmony_ci * spilling. 64bf215546Sopenharmony_ci */ 65bf215546Sopenharmony_civoid 66bf215546Sopenharmony_civec4_live_variables::setup_def_use() 67bf215546Sopenharmony_ci{ 68bf215546Sopenharmony_ci int ip = 0; 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci foreach_block (block, cfg) { 71bf215546Sopenharmony_ci assert(ip == block->start_ip); 72bf215546Sopenharmony_ci if (block->num > 0) 73bf215546Sopenharmony_ci assert(cfg->blocks[block->num - 1]->end_ip == ip - 1); 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci foreach_inst_in_block(vec4_instruction, inst, block) { 76bf215546Sopenharmony_ci struct block_data *bd = &block_data[block->num]; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci /* Set up the instruction uses. */ 79bf215546Sopenharmony_ci for (unsigned int i = 0; i < 3; i++) { 80bf215546Sopenharmony_ci if (inst->src[i].file == VGRF) { 81bf215546Sopenharmony_ci for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) { 82bf215546Sopenharmony_ci for (int c = 0; c < 4; c++) { 83bf215546Sopenharmony_ci const unsigned v = var_from_reg(alloc, inst->src[i], c, j); 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci start[v] = MIN2(start[v], ip); 86bf215546Sopenharmony_ci end[v] = ip; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci if (!BITSET_TEST(bd->def, v)) 89bf215546Sopenharmony_ci BITSET_SET(bd->use, v); 90bf215546Sopenharmony_ci } 91bf215546Sopenharmony_ci } 92bf215546Sopenharmony_ci } 93bf215546Sopenharmony_ci } 94bf215546Sopenharmony_ci for (unsigned c = 0; c < 4; c++) { 95bf215546Sopenharmony_ci if (inst->reads_flag(c) && 96bf215546Sopenharmony_ci !BITSET_TEST(bd->flag_def, c)) { 97bf215546Sopenharmony_ci BITSET_SET(bd->flag_use, c); 98bf215546Sopenharmony_ci } 99bf215546Sopenharmony_ci } 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci /* Set up the instruction defs. */ 102bf215546Sopenharmony_ci if (inst->dst.file == VGRF) { 103bf215546Sopenharmony_ci for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) { 104bf215546Sopenharmony_ci for (int c = 0; c < 4; c++) { 105bf215546Sopenharmony_ci if (inst->dst.writemask & (1 << c)) { 106bf215546Sopenharmony_ci const unsigned v = var_from_reg(alloc, inst->dst, c, i); 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci start[v] = MIN2(start[v], ip); 109bf215546Sopenharmony_ci end[v] = ip; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci /* Check for unconditional register writes, these are the 112bf215546Sopenharmony_ci * things that screen off preceding definitions of a 113bf215546Sopenharmony_ci * variable, and thus qualify for being in def[]. 114bf215546Sopenharmony_ci */ 115bf215546Sopenharmony_ci if ((!inst->predicate || inst->opcode == BRW_OPCODE_SEL) && 116bf215546Sopenharmony_ci !BITSET_TEST(bd->use, v)) 117bf215546Sopenharmony_ci BITSET_SET(bd->def, v); 118bf215546Sopenharmony_ci } 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci } 121bf215546Sopenharmony_ci } 122bf215546Sopenharmony_ci if (inst->writes_flag(devinfo)) { 123bf215546Sopenharmony_ci for (unsigned c = 0; c < 4; c++) { 124bf215546Sopenharmony_ci if ((inst->dst.writemask & (1 << c)) && 125bf215546Sopenharmony_ci !BITSET_TEST(bd->flag_use, c)) { 126bf215546Sopenharmony_ci BITSET_SET(bd->flag_def, c); 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci } 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci ip++; 132bf215546Sopenharmony_ci } 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci} 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci/** 137bf215546Sopenharmony_ci * The algorithm incrementally sets bits in liveout and livein, 138bf215546Sopenharmony_ci * propagating it through control flow. It will eventually terminate 139bf215546Sopenharmony_ci * because it only ever adds bits, and stops when no bits are added in 140bf215546Sopenharmony_ci * a pass. 141bf215546Sopenharmony_ci */ 142bf215546Sopenharmony_civoid 143bf215546Sopenharmony_civec4_live_variables::compute_live_variables() 144bf215546Sopenharmony_ci{ 145bf215546Sopenharmony_ci bool cont = true; 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci while (cont) { 148bf215546Sopenharmony_ci cont = false; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci foreach_block_reverse (block, cfg) { 151bf215546Sopenharmony_ci struct block_data *bd = &block_data[block->num]; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci /* Update liveout */ 154bf215546Sopenharmony_ci foreach_list_typed(bblock_link, child_link, link, &block->children) { 155bf215546Sopenharmony_ci struct block_data *child_bd = &block_data[child_link->block->num]; 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci for (int i = 0; i < bitset_words; i++) { 158bf215546Sopenharmony_ci BITSET_WORD new_liveout = (child_bd->livein[i] & 159bf215546Sopenharmony_ci ~bd->liveout[i]); 160bf215546Sopenharmony_ci if (new_liveout) { 161bf215546Sopenharmony_ci bd->liveout[i] |= new_liveout; 162bf215546Sopenharmony_ci cont = true; 163bf215546Sopenharmony_ci } 164bf215546Sopenharmony_ci } 165bf215546Sopenharmony_ci BITSET_WORD new_liveout = (child_bd->flag_livein[0] & 166bf215546Sopenharmony_ci ~bd->flag_liveout[0]); 167bf215546Sopenharmony_ci if (new_liveout) { 168bf215546Sopenharmony_ci bd->flag_liveout[0] |= new_liveout; 169bf215546Sopenharmony_ci cont = true; 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci } 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci /* Update livein */ 174bf215546Sopenharmony_ci for (int i = 0; i < bitset_words; i++) { 175bf215546Sopenharmony_ci BITSET_WORD new_livein = (bd->use[i] | 176bf215546Sopenharmony_ci (bd->liveout[i] & 177bf215546Sopenharmony_ci ~bd->def[i])); 178bf215546Sopenharmony_ci if (new_livein & ~bd->livein[i]) { 179bf215546Sopenharmony_ci bd->livein[i] |= new_livein; 180bf215546Sopenharmony_ci cont = true; 181bf215546Sopenharmony_ci } 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci BITSET_WORD new_livein = (bd->flag_use[0] | 184bf215546Sopenharmony_ci (bd->flag_liveout[0] & 185bf215546Sopenharmony_ci ~bd->flag_def[0])); 186bf215546Sopenharmony_ci if (new_livein & ~bd->flag_livein[0]) { 187bf215546Sopenharmony_ci bd->flag_livein[0] |= new_livein; 188bf215546Sopenharmony_ci cont = true; 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci} 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci/** 195bf215546Sopenharmony_ci * Extend the start/end ranges for each variable to account for the 196bf215546Sopenharmony_ci * new information calculated from control flow. 197bf215546Sopenharmony_ci */ 198bf215546Sopenharmony_civoid 199bf215546Sopenharmony_civec4_live_variables::compute_start_end() 200bf215546Sopenharmony_ci{ 201bf215546Sopenharmony_ci foreach_block (block, cfg) { 202bf215546Sopenharmony_ci const struct block_data &bd = block_data[block->num]; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci for (int i = 0; i < num_vars; i++) { 205bf215546Sopenharmony_ci if (BITSET_TEST(bd.livein, i)) { 206bf215546Sopenharmony_ci start[i] = MIN2(start[i], block->start_ip); 207bf215546Sopenharmony_ci end[i] = MAX2(end[i], block->start_ip); 208bf215546Sopenharmony_ci } 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci if (BITSET_TEST(bd.liveout, i)) { 211bf215546Sopenharmony_ci start[i] = MIN2(start[i], block->end_ip); 212bf215546Sopenharmony_ci end[i] = MAX2(end[i], block->end_ip); 213bf215546Sopenharmony_ci } 214bf215546Sopenharmony_ci } 215bf215546Sopenharmony_ci } 216bf215546Sopenharmony_ci} 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_civec4_live_variables::vec4_live_variables(const backend_shader *s) 219bf215546Sopenharmony_ci : alloc(s->alloc), cfg(s->cfg) 220bf215546Sopenharmony_ci{ 221bf215546Sopenharmony_ci mem_ctx = ralloc_context(NULL); 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci num_vars = alloc.total_size * 8; 224bf215546Sopenharmony_ci start = ralloc_array(mem_ctx, int, num_vars); 225bf215546Sopenharmony_ci end = ralloc_array(mem_ctx, int, num_vars); 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci for (int i = 0; i < num_vars; i++) { 228bf215546Sopenharmony_ci start[i] = MAX_INSTRUCTION; 229bf215546Sopenharmony_ci end[i] = -1; 230bf215546Sopenharmony_ci } 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci devinfo = s->compiler->devinfo; 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci bitset_words = BITSET_WORDS(num_vars); 237bf215546Sopenharmony_ci for (int i = 0; i < cfg->num_blocks; i++) { 238bf215546Sopenharmony_ci block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); 239bf215546Sopenharmony_ci block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); 240bf215546Sopenharmony_ci block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); 241bf215546Sopenharmony_ci block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci block_data[i].flag_def[0] = 0; 244bf215546Sopenharmony_ci block_data[i].flag_use[0] = 0; 245bf215546Sopenharmony_ci block_data[i].flag_livein[0] = 0; 246bf215546Sopenharmony_ci block_data[i].flag_liveout[0] = 0; 247bf215546Sopenharmony_ci } 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci setup_def_use(); 250bf215546Sopenharmony_ci compute_live_variables(); 251bf215546Sopenharmony_ci compute_start_end(); 252bf215546Sopenharmony_ci} 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_civec4_live_variables::~vec4_live_variables() 255bf215546Sopenharmony_ci{ 256bf215546Sopenharmony_ci ralloc_free(mem_ctx); 257bf215546Sopenharmony_ci} 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_cistatic bool 260bf215546Sopenharmony_cicheck_register_live_range(const vec4_live_variables *live, int ip, 261bf215546Sopenharmony_ci unsigned var, unsigned n) 262bf215546Sopenharmony_ci{ 263bf215546Sopenharmony_ci for (unsigned j = 0; j < n; j += 4) { 264bf215546Sopenharmony_ci if (var + j >= unsigned(live->num_vars) || 265bf215546Sopenharmony_ci live->start[var + j] > ip || live->end[var + j] < ip) 266bf215546Sopenharmony_ci return false; 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci return true; 270bf215546Sopenharmony_ci} 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_cibool 273bf215546Sopenharmony_civec4_live_variables::validate(const backend_shader *s) const 274bf215546Sopenharmony_ci{ 275bf215546Sopenharmony_ci unsigned ip = 0; 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci foreach_block_and_inst(block, vec4_instruction, inst, s->cfg) { 278bf215546Sopenharmony_ci for (unsigned c = 0; c < 4; c++) { 279bf215546Sopenharmony_ci if (inst->dst.writemask & (1 << c)) { 280bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; i++) { 281bf215546Sopenharmony_ci if (inst->src[i].file == VGRF && 282bf215546Sopenharmony_ci !check_register_live_range(this, ip, 283bf215546Sopenharmony_ci var_from_reg(alloc, inst->src[i], c), 284bf215546Sopenharmony_ci regs_read(inst, i))) 285bf215546Sopenharmony_ci return false; 286bf215546Sopenharmony_ci } 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci if (inst->dst.file == VGRF && 289bf215546Sopenharmony_ci !check_register_live_range(this, ip, 290bf215546Sopenharmony_ci var_from_reg(alloc, inst->dst, c), 291bf215546Sopenharmony_ci regs_written(inst))) 292bf215546Sopenharmony_ci return false; 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci } 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci ip++; 297bf215546Sopenharmony_ci } 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci return true; 300bf215546Sopenharmony_ci} 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ciint 303bf215546Sopenharmony_civec4_live_variables::var_range_start(unsigned v, unsigned n) const 304bf215546Sopenharmony_ci{ 305bf215546Sopenharmony_ci int ip = INT_MAX; 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci for (unsigned i = 0; i < n; i++) 308bf215546Sopenharmony_ci ip = MIN2(ip, start[v + i]); 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci return ip; 311bf215546Sopenharmony_ci} 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ciint 314bf215546Sopenharmony_civec4_live_variables::var_range_end(unsigned v, unsigned n) const 315bf215546Sopenharmony_ci{ 316bf215546Sopenharmony_ci int ip = INT_MIN; 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci for (unsigned i = 0; i < n; i++) 319bf215546Sopenharmony_ci ip = MAX2(ip, end[v + i]); 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci return ip; 322bf215546Sopenharmony_ci} 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_cibool 325bf215546Sopenharmony_civec4_live_variables::vgrfs_interfere(int a, int b) const 326bf215546Sopenharmony_ci{ 327bf215546Sopenharmony_ci return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <= 328bf215546Sopenharmony_ci var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) || 329bf215546Sopenharmony_ci (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <= 330bf215546Sopenharmony_ci var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a]))); 331bf215546Sopenharmony_ci} 332