1/* 2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Jonathan Marek <jonathan@marek.ca> 25 */ 26 27#include "ir2_private.h" 28 29static bool 30is_mov(struct ir2_instr *instr) 31{ 32 return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv && 33 instr->src_count == 1; 34} 35 36static void 37src_combine(struct ir2_src *src, struct ir2_src b) 38{ 39 src->num = b.num; 40 src->type = b.type; 41 src->swizzle = swiz_merge(b.swizzle, src->swizzle); 42 if (!src->abs) /* if we have abs we don't care about previous negate */ 43 src->negate ^= b.negate; 44 src->abs |= b.abs; 45} 46 47/* cp_src: replace src regs when they refer to a mov instruction 48 * example: 49 * ALU: MAXv R7 = C7, C7 50 * ALU: MULADDv R7 = R7, R10, R0.xxxx 51 * becomes: 52 * ALU: MULADDv R7 = C7, R10, R0.xxxx 53 */ 54void 55cp_src(struct ir2_context *ctx) 56{ 57 struct ir2_instr *p; 58 59 ir2_foreach_instr (instr, ctx) { 60 ir2_foreach_src (src, instr) { 61 /* loop to replace recursively */ 62 do { 63 if (src->type != IR2_SRC_SSA) 64 break; 65 66 p = &ctx->instr[src->num]; 67 /* don't work across blocks to avoid possible issues */ 68 if (p->block_idx != instr->block_idx) 69 break; 70 71 if (!is_mov(p)) 72 break; 73 74 if (p->alu.saturate) 75 break; 76 77 /* cant apply abs to const src, const src only for alu */ 78 if (p->src[0].type == IR2_SRC_CONST && 79 (src->abs || instr->type != IR2_ALU)) 80 break; 81 82 src_combine(src, p->src[0]); 83 } while (1); 84 } 85 } 86} 87 88/* cp_export: replace mov to export when possible 89 * in the cp_src pass we bypass any mov instructions related 90 * to the src registers, but for exports for need something different 91 * example: 92 * ALU: MAXv R3.x___ = C9.x???, C9.x??? 93 * ALU: MAXv R3._y__ = R0.?x??, C8.?x?? 94 * ALU: MAXv export0 = R3.yyyx, R3.yyyx 95 * becomes: 96 * ALU: MAXv export0.___w = C9.???x, C9.???x 97 * ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx? 98 * 99 */ 100void 101cp_export(struct ir2_context *ctx) 102{ 103 struct ir2_instr *c[4], *ins[4]; 104 struct ir2_src *src; 105 struct ir2_reg *reg; 106 unsigned ncomp; 107 108 ir2_foreach_instr (instr, ctx) { 109 if (!is_export(instr)) /* TODO */ 110 continue; 111 112 if (!is_mov(instr)) 113 continue; 114 115 src = &instr->src[0]; 116 117 if (src->negate || src->abs) /* TODO handle these cases */ 118 continue; 119 120 if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST) 121 continue; 122 123 reg = get_reg_src(ctx, src); 124 ncomp = dst_ncomp(instr); 125 126 unsigned reswiz[4] = {}; 127 unsigned num_instr = 0; 128 129 /* fill array c with pointers to instrs that write each component */ 130 if (src->type == IR2_SRC_SSA) { 131 struct ir2_instr *instr = &ctx->instr[src->num]; 132 133 if (instr->type != IR2_ALU) 134 continue; 135 136 for (int i = 0; i < ncomp; i++) 137 c[i] = instr; 138 139 ins[num_instr++] = instr; 140 reswiz[0] = src->swizzle; 141 } else { 142 bool ok = true; 143 unsigned write_mask = 0; 144 145 ir2_foreach_instr (instr, ctx) { 146 if (instr->is_ssa || instr->reg != reg) 147 continue; 148 149 /* set by non-ALU */ 150 if (instr->type != IR2_ALU) { 151 ok = false; 152 break; 153 } 154 155 /* component written more than once */ 156 if (write_mask & instr->alu.write_mask) { 157 ok = false; 158 break; 159 } 160 161 write_mask |= instr->alu.write_mask; 162 163 /* src pointers for components */ 164 for (int i = 0, j = 0; i < 4; i++) { 165 unsigned k = swiz_get(src->swizzle, i); 166 if (instr->alu.write_mask & 1 << k) { 167 c[i] = instr; 168 169 /* reswiz = compressed src->swizzle */ 170 unsigned x = 0; 171 for (int i = 0; i < k; i++) 172 x += !!(instr->alu.write_mask & 1 << i); 173 174 assert(src->swizzle || x == j); 175 reswiz[num_instr] |= swiz_set(x, j++); 176 } 177 } 178 ins[num_instr++] = instr; 179 } 180 if (!ok) 181 continue; 182 } 183 184 bool redirect = true; 185 186 /* must all be in same block */ 187 for (int i = 0; i < ncomp; i++) 188 redirect &= (c[i]->block_idx == instr->block_idx); 189 190 /* no other instr using the value */ 191 ir2_foreach_instr (p, ctx) { 192 if (p == instr) 193 continue; 194 ir2_foreach_src (src, p) 195 redirect &= reg != get_reg_src(ctx, src); 196 } 197 198 if (!redirect) 199 continue; 200 201 /* redirect the instructions writing to the register */ 202 for (int i = 0; i < num_instr; i++) { 203 struct ir2_instr *p = ins[i]; 204 205 p->alu.export = instr->alu.export; 206 p->alu.write_mask = 0; 207 p->is_ssa = true; 208 p->ssa.ncomp = 0; 209 memset(p->ssa.comp, 0, sizeof(p->ssa.comp)); 210 p->alu.saturate |= instr->alu.saturate; 211 212 switch (p->alu.vector_opc) { 213 case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv: 214 case DOT2ADDv: 215 case DOT3v: 216 case DOT4v: 217 case CUBEv: 218 continue; 219 default: 220 break; 221 } 222 ir2_foreach_src (s, p) 223 swiz_merge_p(&s->swizzle, reswiz[i]); 224 } 225 226 for (int i = 0; i < ncomp; i++) { 227 c[i]->alu.write_mask |= (1 << i); 228 c[i]->ssa.ncomp++; 229 } 230 instr->type = IR2_NONE; 231 instr->need_emit = false; 232 } 233} 234