1/* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_fs.h" 25#include "brw_fs_live_variables.h" 26#include "brw_cfg.h" 27 28using namespace brw; 29 30/** @file brw_fs_saturate_propagation.cpp 31 * 32 * Implements a pass that propagates the SAT modifier from a MOV.SAT into the 33 * instruction that produced the source of the MOV.SAT, thereby allowing the 34 * MOV's src and dst to be coalesced and the MOV removed. 35 * 36 * For instance, 37 * 38 * ADD tmp, src0, src1 39 * MOV.SAT dst, tmp 40 * 41 * would be transformed into 42 * 43 * ADD.SAT tmp, src0, src1 44 * MOV dst, tmp 45 */ 46 47static bool 48opt_saturate_propagation_local(const fs_live_variables &live, bblock_t *block) 49{ 50 bool progress = false; 51 int ip = block->end_ip + 1; 52 53 foreach_inst_in_block_reverse(fs_inst, inst, block) { 54 ip--; 55 56 if (inst->opcode != BRW_OPCODE_MOV || 57 !inst->saturate || 58 inst->dst.file != VGRF || 59 inst->dst.type != inst->src[0].type || 60 inst->src[0].file != VGRF || 61 inst->src[0].abs) 62 continue; 63 64 int src_var = live.var_from_reg(inst->src[0]); 65 int src_end_ip = live.end[src_var]; 66 67 bool interfered = false; 68 foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { 69 if (scan_inst->exec_size == inst->exec_size && 70 regions_overlap(scan_inst->dst, scan_inst->size_written, 71 inst->src[0], inst->size_read(0))) { 72 if (scan_inst->is_partial_write() || 73 (scan_inst->dst.type != inst->dst.type && 74 !scan_inst->can_change_types())) 75 break; 76 77 if (scan_inst->saturate) { 78 inst->saturate = false; 79 progress = true; 80 } else if (src_end_ip == ip || inst->dst.equals(inst->src[0])) { 81 if (scan_inst->can_do_saturate()) { 82 if (scan_inst->dst.type != inst->dst.type) { 83 scan_inst->dst.type = inst->dst.type; 84 for (int i = 0; i < scan_inst->sources; i++) { 85 scan_inst->src[i].type = inst->dst.type; 86 } 87 } 88 89 if (inst->src[0].negate) { 90 if (scan_inst->opcode == BRW_OPCODE_MUL) { 91 scan_inst->src[0].negate = !scan_inst->src[0].negate; 92 inst->src[0].negate = false; 93 } else if (scan_inst->opcode == BRW_OPCODE_MAD) { 94 for (int i = 0; i < 2; i++) { 95 if (scan_inst->src[i].file == IMM) { 96 brw_negate_immediate(scan_inst->src[i].type, 97 &scan_inst->src[i].as_brw_reg()); 98 } else { 99 scan_inst->src[i].negate = !scan_inst->src[i].negate; 100 } 101 } 102 inst->src[0].negate = false; 103 } else if (scan_inst->opcode == BRW_OPCODE_ADD) { 104 if (scan_inst->src[1].file == IMM) { 105 if (!brw_negate_immediate(scan_inst->src[1].type, 106 &scan_inst->src[1].as_brw_reg())) { 107 break; 108 } 109 } else { 110 scan_inst->src[1].negate = !scan_inst->src[1].negate; 111 } 112 scan_inst->src[0].negate = !scan_inst->src[0].negate; 113 inst->src[0].negate = false; 114 } else { 115 break; 116 } 117 } 118 119 scan_inst->saturate = true; 120 inst->saturate = false; 121 progress = true; 122 } 123 } 124 break; 125 } 126 for (int i = 0; i < scan_inst->sources; i++) { 127 if (scan_inst->src[i].file == VGRF && 128 scan_inst->src[i].nr == inst->src[0].nr && 129 scan_inst->src[i].offset / REG_SIZE == 130 inst->src[0].offset / REG_SIZE) { 131 if (scan_inst->opcode != BRW_OPCODE_MOV || 132 !scan_inst->saturate || 133 scan_inst->src[0].abs || 134 scan_inst->src[0].negate || 135 scan_inst->src[0].abs != inst->src[0].abs || 136 scan_inst->src[0].negate != inst->src[0].negate) { 137 interfered = true; 138 break; 139 } 140 } 141 } 142 143 if (interfered) 144 break; 145 } 146 } 147 148 return progress; 149} 150 151bool 152fs_visitor::opt_saturate_propagation() 153{ 154 const fs_live_variables &live = live_analysis.require(); 155 bool progress = false; 156 157 foreach_block (block, cfg) { 158 progress = opt_saturate_propagation_local(live, block) || progress; 159 } 160 161 /* Live intervals are still valid. */ 162 163 return progress; 164} 165