1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2012, 2013, 2014 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "brw_vec4.h" 25bf215546Sopenharmony_ci#include "brw_vec4_live_variables.h" 26bf215546Sopenharmony_ci#include "brw_cfg.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ciusing namespace brw; 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci/** @file brw_vec4_cse.cpp 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * Support for local common subexpression elimination. 33bf215546Sopenharmony_ci * 34bf215546Sopenharmony_ci * See Muchnick's Advanced Compiler Design and Implementation, section 35bf215546Sopenharmony_ci * 13.1 (p378). 36bf215546Sopenharmony_ci */ 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_cinamespace { 39bf215546Sopenharmony_cistruct aeb_entry : public exec_node { 40bf215546Sopenharmony_ci /** The instruction that generates the expression value. */ 41bf215546Sopenharmony_ci vec4_instruction *generator; 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci /** The temporary where the value is stored. */ 44bf215546Sopenharmony_ci src_reg tmp; 45bf215546Sopenharmony_ci}; 46bf215546Sopenharmony_ci} 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistatic bool 49bf215546Sopenharmony_ciis_expression(const vec4_instruction *const inst) 50bf215546Sopenharmony_ci{ 51bf215546Sopenharmony_ci switch (inst->opcode) { 52bf215546Sopenharmony_ci case BRW_OPCODE_MOV: 53bf215546Sopenharmony_ci case BRW_OPCODE_SEL: 54bf215546Sopenharmony_ci case BRW_OPCODE_NOT: 55bf215546Sopenharmony_ci case BRW_OPCODE_AND: 56bf215546Sopenharmony_ci case BRW_OPCODE_OR: 57bf215546Sopenharmony_ci case BRW_OPCODE_XOR: 58bf215546Sopenharmony_ci case BRW_OPCODE_SHR: 59bf215546Sopenharmony_ci case BRW_OPCODE_SHL: 60bf215546Sopenharmony_ci case BRW_OPCODE_ASR: 61bf215546Sopenharmony_ci case BRW_OPCODE_CMP: 62bf215546Sopenharmony_ci case BRW_OPCODE_CMPN: 63bf215546Sopenharmony_ci case BRW_OPCODE_ADD: 64bf215546Sopenharmony_ci case BRW_OPCODE_MUL: 65bf215546Sopenharmony_ci case SHADER_OPCODE_MULH: 66bf215546Sopenharmony_ci case BRW_OPCODE_FRC: 67bf215546Sopenharmony_ci case BRW_OPCODE_RNDU: 68bf215546Sopenharmony_ci case BRW_OPCODE_RNDD: 69bf215546Sopenharmony_ci case BRW_OPCODE_RNDE: 70bf215546Sopenharmony_ci case BRW_OPCODE_RNDZ: 71bf215546Sopenharmony_ci case BRW_OPCODE_LINE: 72bf215546Sopenharmony_ci case BRW_OPCODE_PLN: 73bf215546Sopenharmony_ci case BRW_OPCODE_MAD: 74bf215546Sopenharmony_ci case BRW_OPCODE_LRP: 75bf215546Sopenharmony_ci case VEC4_OPCODE_UNPACK_UNIFORM: 76bf215546Sopenharmony_ci case SHADER_OPCODE_FIND_LIVE_CHANNEL: 77bf215546Sopenharmony_ci case SHADER_OPCODE_BROADCAST: 78bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS: 79bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: 80bf215546Sopenharmony_ci return true; 81bf215546Sopenharmony_ci case SHADER_OPCODE_RCP: 82bf215546Sopenharmony_ci case SHADER_OPCODE_RSQ: 83bf215546Sopenharmony_ci case SHADER_OPCODE_SQRT: 84bf215546Sopenharmony_ci case SHADER_OPCODE_EXP2: 85bf215546Sopenharmony_ci case SHADER_OPCODE_LOG2: 86bf215546Sopenharmony_ci case SHADER_OPCODE_POW: 87bf215546Sopenharmony_ci case SHADER_OPCODE_INT_QUOTIENT: 88bf215546Sopenharmony_ci case SHADER_OPCODE_INT_REMAINDER: 89bf215546Sopenharmony_ci case SHADER_OPCODE_SIN: 90bf215546Sopenharmony_ci case SHADER_OPCODE_COS: 91bf215546Sopenharmony_ci return inst->mlen == 0; 92bf215546Sopenharmony_ci default: 93bf215546Sopenharmony_ci return false; 94bf215546Sopenharmony_ci } 95bf215546Sopenharmony_ci} 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_cistatic bool 98bf215546Sopenharmony_cioperands_match(const vec4_instruction *a, const vec4_instruction *b) 99bf215546Sopenharmony_ci{ 100bf215546Sopenharmony_ci const src_reg *xs = a->src; 101bf215546Sopenharmony_ci const src_reg *ys = b->src; 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci if (a->opcode == BRW_OPCODE_MAD) { 104bf215546Sopenharmony_ci return xs[0].equals(ys[0]) && 105bf215546Sopenharmony_ci ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) || 106bf215546Sopenharmony_ci (xs[2].equals(ys[1]) && xs[1].equals(ys[2]))); 107bf215546Sopenharmony_ci } else if (a->opcode == BRW_OPCODE_MOV && 108bf215546Sopenharmony_ci xs[0].file == IMM && 109bf215546Sopenharmony_ci xs[0].type == BRW_REGISTER_TYPE_VF) { 110bf215546Sopenharmony_ci src_reg tmp_x = xs[0]; 111bf215546Sopenharmony_ci src_reg tmp_y = ys[0]; 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci /* Smash out the values that are not part of the writemask. Otherwise 114bf215546Sopenharmony_ci * the equals operator will fail due to mismatches in unused components. 115bf215546Sopenharmony_ci */ 116bf215546Sopenharmony_ci const unsigned ab_writemask = a->dst.writemask & b->dst.writemask; 117bf215546Sopenharmony_ci const uint32_t mask = ((ab_writemask & WRITEMASK_X) ? 0x000000ff : 0) | 118bf215546Sopenharmony_ci ((ab_writemask & WRITEMASK_Y) ? 0x0000ff00 : 0) | 119bf215546Sopenharmony_ci ((ab_writemask & WRITEMASK_Z) ? 0x00ff0000 : 0) | 120bf215546Sopenharmony_ci ((ab_writemask & WRITEMASK_W) ? 0xff000000 : 0); 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci tmp_x.ud &= mask; 123bf215546Sopenharmony_ci tmp_y.ud &= mask; 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci return tmp_x.equals(tmp_y); 126bf215546Sopenharmony_ci } else if (!a->is_commutative()) { 127bf215546Sopenharmony_ci return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]); 128bf215546Sopenharmony_ci } else { 129bf215546Sopenharmony_ci return (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) || 130bf215546Sopenharmony_ci (xs[1].equals(ys[0]) && xs[0].equals(ys[1])); 131bf215546Sopenharmony_ci } 132bf215546Sopenharmony_ci} 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci/** 135bf215546Sopenharmony_ci * Checks if instructions match, exactly for sources, but loosely for 136bf215546Sopenharmony_ci * destination writemasks. 137bf215546Sopenharmony_ci * 138bf215546Sopenharmony_ci * \param 'a' is the generating expression from the AEB entry. 139bf215546Sopenharmony_ci * \param 'b' is the second occurrence of the expression that we're 140bf215546Sopenharmony_ci * considering eliminating. 141bf215546Sopenharmony_ci */ 142bf215546Sopenharmony_cistatic bool 143bf215546Sopenharmony_ciinstructions_match(vec4_instruction *a, vec4_instruction *b) 144bf215546Sopenharmony_ci{ 145bf215546Sopenharmony_ci return a->opcode == b->opcode && 146bf215546Sopenharmony_ci a->saturate == b->saturate && 147bf215546Sopenharmony_ci a->predicate == b->predicate && 148bf215546Sopenharmony_ci a->predicate_inverse == b->predicate_inverse && 149bf215546Sopenharmony_ci a->conditional_mod == b->conditional_mod && 150bf215546Sopenharmony_ci a->flag_subreg == b->flag_subreg && 151bf215546Sopenharmony_ci a->dst.type == b->dst.type && 152bf215546Sopenharmony_ci a->offset == b->offset && 153bf215546Sopenharmony_ci a->mlen == b->mlen && 154bf215546Sopenharmony_ci a->base_mrf == b->base_mrf && 155bf215546Sopenharmony_ci a->header_size == b->header_size && 156bf215546Sopenharmony_ci a->shadow_compare == b->shadow_compare && 157bf215546Sopenharmony_ci ((a->dst.writemask & b->dst.writemask) == a->dst.writemask) && 158bf215546Sopenharmony_ci a->force_writemask_all == b->force_writemask_all && 159bf215546Sopenharmony_ci a->size_written == b->size_written && 160bf215546Sopenharmony_ci a->exec_size == b->exec_size && 161bf215546Sopenharmony_ci a->group == b->group && 162bf215546Sopenharmony_ci operands_match(a, b); 163bf215546Sopenharmony_ci} 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_cibool 166bf215546Sopenharmony_civec4_visitor::opt_cse_local(bblock_t *block, const vec4_live_variables &live) 167bf215546Sopenharmony_ci{ 168bf215546Sopenharmony_ci bool progress = false; 169bf215546Sopenharmony_ci exec_list aeb; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci void *cse_ctx = ralloc_context(NULL); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci int ip = block->start_ip; 174bf215546Sopenharmony_ci foreach_inst_in_block (vec4_instruction, inst, block) { 175bf215546Sopenharmony_ci /* Skip some cases. */ 176bf215546Sopenharmony_ci if (is_expression(inst) && !inst->predicate && inst->mlen == 0 && 177bf215546Sopenharmony_ci ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) || 178bf215546Sopenharmony_ci inst->dst.is_null())) 179bf215546Sopenharmony_ci { 180bf215546Sopenharmony_ci bool found = false; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci foreach_in_list_use_after(aeb_entry, entry, &aeb) { 183bf215546Sopenharmony_ci /* Match current instruction's expression against those in AEB. */ 184bf215546Sopenharmony_ci if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) && 185bf215546Sopenharmony_ci instructions_match(inst, entry->generator)) { 186bf215546Sopenharmony_ci found = true; 187bf215546Sopenharmony_ci progress = true; 188bf215546Sopenharmony_ci break; 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci } 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci if (!found) { 193bf215546Sopenharmony_ci if (inst->opcode != BRW_OPCODE_MOV || 194bf215546Sopenharmony_ci (inst->opcode == BRW_OPCODE_MOV && 195bf215546Sopenharmony_ci inst->src[0].file == IMM && 196bf215546Sopenharmony_ci inst->src[0].type == BRW_REGISTER_TYPE_VF)) { 197bf215546Sopenharmony_ci /* Our first sighting of this expression. Create an entry. */ 198bf215546Sopenharmony_ci aeb_entry *entry = ralloc(cse_ctx, aeb_entry); 199bf215546Sopenharmony_ci entry->tmp = src_reg(); /* file will be BAD_FILE */ 200bf215546Sopenharmony_ci entry->generator = inst; 201bf215546Sopenharmony_ci aeb.push_tail(entry); 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci } else { 204bf215546Sopenharmony_ci /* This is at least our second sighting of this expression. 205bf215546Sopenharmony_ci * If we don't have a temporary already, make one. 206bf215546Sopenharmony_ci */ 207bf215546Sopenharmony_ci bool no_existing_temp = entry->tmp.file == BAD_FILE; 208bf215546Sopenharmony_ci if (no_existing_temp && !entry->generator->dst.is_null()) { 209bf215546Sopenharmony_ci entry->tmp = retype(src_reg(VGRF, alloc.allocate( 210bf215546Sopenharmony_ci regs_written(entry->generator)), 211bf215546Sopenharmony_ci NULL), inst->dst.type); 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci const unsigned width = entry->generator->exec_size; 214bf215546Sopenharmony_ci unsigned component_size = width * type_sz(entry->tmp.type); 215bf215546Sopenharmony_ci unsigned num_copy_movs = 216bf215546Sopenharmony_ci DIV_ROUND_UP(entry->generator->size_written, component_size); 217bf215546Sopenharmony_ci for (unsigned i = 0; i < num_copy_movs; ++i) { 218bf215546Sopenharmony_ci vec4_instruction *copy = 219bf215546Sopenharmony_ci MOV(offset(entry->generator->dst, width, i), 220bf215546Sopenharmony_ci offset(entry->tmp, width, i)); 221bf215546Sopenharmony_ci copy->exec_size = width; 222bf215546Sopenharmony_ci copy->group = entry->generator->group; 223bf215546Sopenharmony_ci copy->force_writemask_all = 224bf215546Sopenharmony_ci entry->generator->force_writemask_all; 225bf215546Sopenharmony_ci entry->generator->insert_after(block, copy); 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci entry->generator->dst = dst_reg(entry->tmp); 229bf215546Sopenharmony_ci } 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci /* dest <- temp */ 232bf215546Sopenharmony_ci if (!inst->dst.is_null()) { 233bf215546Sopenharmony_ci assert(inst->dst.type == entry->tmp.type); 234bf215546Sopenharmony_ci const unsigned width = inst->exec_size; 235bf215546Sopenharmony_ci unsigned component_size = width * type_sz(inst->dst.type); 236bf215546Sopenharmony_ci unsigned num_copy_movs = 237bf215546Sopenharmony_ci DIV_ROUND_UP(inst->size_written, component_size); 238bf215546Sopenharmony_ci for (unsigned i = 0; i < num_copy_movs; ++i) { 239bf215546Sopenharmony_ci vec4_instruction *copy = 240bf215546Sopenharmony_ci MOV(offset(inst->dst, width, i), 241bf215546Sopenharmony_ci offset(entry->tmp, width, i)); 242bf215546Sopenharmony_ci copy->exec_size = inst->exec_size; 243bf215546Sopenharmony_ci copy->group = inst->group; 244bf215546Sopenharmony_ci copy->force_writemask_all = inst->force_writemask_all; 245bf215546Sopenharmony_ci inst->insert_before(block, copy); 246bf215546Sopenharmony_ci } 247bf215546Sopenharmony_ci } 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci /* Set our iterator so that next time through the loop inst->next 250bf215546Sopenharmony_ci * will get the instruction in the basic block after the one we've 251bf215546Sopenharmony_ci * removed. 252bf215546Sopenharmony_ci */ 253bf215546Sopenharmony_ci vec4_instruction *prev = (vec4_instruction *)inst->prev; 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci inst->remove(block); 256bf215546Sopenharmony_ci inst = prev; 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci } 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci foreach_in_list_safe(aeb_entry, entry, &aeb) { 261bf215546Sopenharmony_ci /* Kill all AEB entries that write a different value to or read from 262bf215546Sopenharmony_ci * the flag register if we just wrote it. 263bf215546Sopenharmony_ci */ 264bf215546Sopenharmony_ci if (inst->writes_flag(devinfo)) { 265bf215546Sopenharmony_ci if (entry->generator->reads_flag() || 266bf215546Sopenharmony_ci (entry->generator->writes_flag(devinfo) && 267bf215546Sopenharmony_ci !instructions_match(inst, entry->generator))) { 268bf215546Sopenharmony_ci entry->remove(); 269bf215546Sopenharmony_ci ralloc_free(entry); 270bf215546Sopenharmony_ci continue; 271bf215546Sopenharmony_ci } 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) { 275bf215546Sopenharmony_ci src_reg *src = &entry->generator->src[i]; 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci /* Kill all AEB entries that use the destination we just 278bf215546Sopenharmony_ci * overwrote. 279bf215546Sopenharmony_ci */ 280bf215546Sopenharmony_ci if (inst->dst.file == entry->generator->src[i].file && 281bf215546Sopenharmony_ci inst->dst.nr == entry->generator->src[i].nr) { 282bf215546Sopenharmony_ci entry->remove(); 283bf215546Sopenharmony_ci ralloc_free(entry); 284bf215546Sopenharmony_ci break; 285bf215546Sopenharmony_ci } 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci /* Kill any AEB entries using registers that don't get reused any 288bf215546Sopenharmony_ci * more -- a sure sign they'll fail operands_match(). 289bf215546Sopenharmony_ci */ 290bf215546Sopenharmony_ci if (src->file == VGRF) { 291bf215546Sopenharmony_ci if (live.var_range_end(var_from_reg(alloc, dst_reg(*src)), 8) < ip) { 292bf215546Sopenharmony_ci entry->remove(); 293bf215546Sopenharmony_ci ralloc_free(entry); 294bf215546Sopenharmony_ci break; 295bf215546Sopenharmony_ci } 296bf215546Sopenharmony_ci } 297bf215546Sopenharmony_ci } 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci ip++; 301bf215546Sopenharmony_ci } 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci ralloc_free(cse_ctx); 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci return progress; 306bf215546Sopenharmony_ci} 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_cibool 309bf215546Sopenharmony_civec4_visitor::opt_cse() 310bf215546Sopenharmony_ci{ 311bf215546Sopenharmony_ci bool progress = false; 312bf215546Sopenharmony_ci const vec4_live_variables &live = live_analysis.require(); 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci foreach_block (block, cfg) { 315bf215546Sopenharmony_ci progress = opt_cse_local(block, live) || progress; 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci if (progress) 319bf215546Sopenharmony_ci invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci return progress; 322bf215546Sopenharmony_ci} 323