1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** 25 * @file v3d_opt_copy_propagation.c 26 * 27 * This implements simple copy propagation for VIR without control flow. 28 * 29 * For each temp, it keeps a qreg of which source it was MOVed from, if it 30 * was. If we see that used later, we can just reuse the source value, since 31 * we know we don't have control flow, and we have SSA for our values so 32 * there's no killing to worry about. 33 */ 34 35#include "v3d_compiler.h" 36 37static bool 38is_copy_mov(struct qinst *inst) 39{ 40 if (!inst) 41 return false; 42 43 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || 44 (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV && 45 inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) { 46 return false; 47 } 48 49 if (inst->dst.file != QFILE_TEMP) 50 return false; 51 52 if (inst->src[0].file != QFILE_TEMP) 53 return false; 54 55 if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE || 56 inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) { 57 return false; 58 } 59 60 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || 61 inst->qpu.flags.mc != V3D_QPU_COND_NONE) { 62 return false; 63 } 64 65 switch (inst->src[0].file) { 66 case QFILE_MAGIC: 67 /* No copy propagating from R3/R4/R5 -- the MOVs from those 68 * are there to register allocate values produced into R3/4/5 69 * to other regs (though hopefully r3/4/5). 70 */ 71 switch (inst->src[0].index) { 72 case V3D_QPU_WADDR_R3: 73 case V3D_QPU_WADDR_R4: 74 case V3D_QPU_WADDR_R5: 75 return false; 76 default: 77 break; 78 } 79 break; 80 81 case QFILE_REG: 82 switch (inst->src[0].index) { 83 case 0: 84 case 1: 85 case 2: 86 /* MOVs from rf0/1/2 are only to track the live 87 * intervals for W/centroid W/Z. 88 */ 89 return false; 90 } 91 break; 92 93 default: 94 break; 95 } 96 97 return true; 98} 99 100static bool 101vir_has_unpack(struct qinst *inst, int chan) 102{ 103 assert(chan == 0 || chan == 1); 104 105 if (vir_is_add(inst)) { 106 if (chan == 0) 107 return inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE; 108 else 109 return inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE; 110 } else { 111 if (chan == 0) 112 return inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE; 113 else 114 return inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE; 115 } 116} 117 118static bool 119try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs) 120{ 121 bool debug = false; 122 bool progress = false; 123 124 for (int i = 0; i < vir_get_nsrc(inst); i++) { 125 if (inst->src[i].file != QFILE_TEMP) 126 continue; 127 128 /* We have two ways of finding MOVs we can copy propagate 129 * from. One is if it's an SSA def: then we can reuse it from 130 * any block in the program, as long as its source is also an 131 * SSA def. Alternatively, if it's in the "movs" array 132 * tracked within the block, then we know the sources for it 133 * haven't been changed since we saw the instruction within 134 * our block. 135 */ 136 struct qinst *mov = movs[inst->src[i].index]; 137 if (!mov) { 138 if (!is_copy_mov(c->defs[inst->src[i].index])) 139 continue; 140 mov = c->defs[inst->src[i].index]; 141 142 if (mov->src[0].file == QFILE_TEMP && 143 !c->defs[mov->src[0].index]) 144 continue; 145 } 146 147 if (vir_has_unpack(mov, 0)) { 148 /* Make sure that the meaning of the unpack 149 * would be the same between the two 150 * instructions. 151 */ 152 if (v3d_qpu_unpacks_f32(&inst->qpu) != 153 v3d_qpu_unpacks_f32(&mov->qpu) || 154 v3d_qpu_unpacks_f16(&inst->qpu) != 155 v3d_qpu_unpacks_f16(&mov->qpu)) { 156 continue; 157 } 158 159 /* No composing the unpacks. */ 160 if (vir_has_unpack(inst, i)) 161 continue; 162 163 /* these ops can't represent abs. */ 164 if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) { 165 switch (inst->qpu.alu.add.op) { 166 case V3D_QPU_A_VFPACK: 167 case V3D_QPU_A_FROUND: 168 case V3D_QPU_A_FTRUNC: 169 case V3D_QPU_A_FFLOOR: 170 case V3D_QPU_A_FCEIL: 171 case V3D_QPU_A_FDX: 172 case V3D_QPU_A_FDY: 173 case V3D_QPU_A_FTOIN: 174 case V3D_QPU_A_FTOIZ: 175 case V3D_QPU_A_FTOUZ: 176 case V3D_QPU_A_FTOC: 177 continue; 178 default: 179 break; 180 } 181 } 182 } 183 184 if (debug) { 185 fprintf(stderr, "Copy propagate: "); 186 vir_dump_inst(c, inst); 187 fprintf(stderr, "\n"); 188 } 189 190 inst->src[i] = mov->src[0]; 191 if (vir_has_unpack(mov, 0)) { 192 enum v3d_qpu_input_unpack unpack = mov->qpu.alu.mul.a_unpack; 193 194 vir_set_unpack(inst, i, unpack); 195 } 196 197 if (debug) { 198 fprintf(stderr, "to: "); 199 vir_dump_inst(c, inst); 200 fprintf(stderr, "\n"); 201 } 202 203 progress = true; 204 } 205 206 return progress; 207} 208 209static void 210apply_kills(struct v3d_compile *c, struct qinst **movs, struct qinst *inst) 211{ 212 if (inst->dst.file != QFILE_TEMP) 213 return; 214 215 for (int i = 0; i < c->num_temps; i++) { 216 if (movs[i] && 217 (movs[i]->dst.index == inst->dst.index || 218 (movs[i]->src[0].file == QFILE_TEMP && 219 movs[i]->src[0].index == inst->dst.index))) { 220 movs[i] = NULL; 221 } 222 } 223} 224 225bool 226vir_opt_copy_propagate(struct v3d_compile *c) 227{ 228 bool progress = false; 229 struct qinst **movs; 230 231 movs = ralloc_array(c, struct qinst *, c->num_temps); 232 if (!movs) 233 return false; 234 235 vir_for_each_block(block, c) { 236 /* The MOVs array tracks only available movs within the 237 * block. 238 */ 239 memset(movs, 0, sizeof(struct qinst *) * c->num_temps); 240 241 c->cur_block = block; 242 vir_for_each_inst(inst, block) { 243 244 progress = try_copy_prop(c, inst, movs) || progress; 245 246 apply_kills(c, movs, inst); 247 248 if (is_copy_mov(inst)) 249 movs[inst->dst.index] = inst; 250 } 251 } 252 253 ralloc_free(movs); 254 255 return progress; 256} 257