1/* 2 * Copyright (C) 2020 Collabora Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors (Collabora): 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 */ 26 27#include "compiler.h" 28 29bool 30bi_has_arg(const bi_instr *ins, bi_index arg) 31{ 32 if (!ins) 33 return false; 34 35 bi_foreach_src(ins, s) { 36 if (bi_is_equiv(ins->src[s], arg)) 37 return true; 38 } 39 40 return false; 41} 42 43/* Precondition: valid 16-bit or 32-bit register format. Returns whether it is 44 * 32-bit. Note auto reads to 32-bit registers even if the memory format is 45 * 16-bit, so is considered as such here */ 46 47bool 48bi_is_regfmt_16(enum bi_register_format fmt) 49{ 50 switch (fmt) { 51 case BI_REGISTER_FORMAT_F16: 52 case BI_REGISTER_FORMAT_S16: 53 case BI_REGISTER_FORMAT_U16: 54 return true; 55 case BI_REGISTER_FORMAT_F32: 56 case BI_REGISTER_FORMAT_S32: 57 case BI_REGISTER_FORMAT_U32: 58 case BI_REGISTER_FORMAT_AUTO: 59 return false; 60 default: 61 unreachable("Invalid register format"); 62 } 63} 64 65static unsigned 66bi_count_staging_registers(const bi_instr *ins) 67{ 68 enum bi_sr_count count = bi_opcode_props[ins->op].sr_count; 69 unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */ 70 71 switch (count) { 72 case BI_SR_COUNT_0 ... BI_SR_COUNT_4: 73 return count; 74 case BI_SR_COUNT_FORMAT: 75 return bi_is_regfmt_16(ins->register_format) ? 76 DIV_ROUND_UP(vecsize, 2) : vecsize; 77 case BI_SR_COUNT_VECSIZE: 78 return vecsize; 79 case BI_SR_COUNT_SR_COUNT: 80 return ins->sr_count; 81 } 82 83 unreachable("Invalid sr_count"); 84} 85 86unsigned 87bi_count_read_registers(const bi_instr *ins, unsigned s) 88{ 89 /* ATOM reads 1 but writes 2. Exception for ACMPXCHG */ 90 if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32) 91 return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1; 92 else if (s == 0 && bi_opcode_props[ins->op].sr_read) 93 return bi_count_staging_registers(ins); 94 else if (s == 4 && ins->op == BI_OPCODE_BLEND) 95 return ins->sr_count_2; /* Dual source blending */ 96 else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32) 97 return ins->nr_dests; 98 else 99 return 1; 100} 101 102unsigned 103bi_count_write_registers(const bi_instr *ins, unsigned d) 104{ 105 if (d == 0 && bi_opcode_props[ins->op].sr_write) { 106 switch (ins->op) { 107 case BI_OPCODE_TEXC: 108 if (ins->sr_count_2) 109 return ins->sr_count; 110 else 111 return bi_is_regfmt_16(ins->register_format) ? 2 : 4; 112 113 case BI_OPCODE_TEX_SINGLE: 114 case BI_OPCODE_TEX_FETCH: 115 case BI_OPCODE_TEX_GATHER: { 116 unsigned chans = util_bitcount(ins->write_mask); 117 118 return bi_is_regfmt_16(ins->register_format) ? 119 DIV_ROUND_UP(chans, 2) : chans; 120 } 121 122 case BI_OPCODE_ACMPXCHG_I32: 123 /* Reads 2 but writes 1 */ 124 return 1; 125 126 case BI_OPCODE_ATOM1_RETURN_I32: 127 /* Allow omitting the destination for plain ATOM1 */ 128 return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count; 129 default: 130 return bi_count_staging_registers(ins); 131 } 132 } else if (ins->op == BI_OPCODE_SEG_ADD_I64) { 133 return 2; 134 } else if (ins->op == BI_OPCODE_TEXC && d == 1) { 135 return ins->sr_count_2; 136 } else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) { 137 return ins->nr_srcs; 138 } 139 140 return 1; 141} 142 143unsigned 144bi_writemask(const bi_instr *ins, unsigned d) 145{ 146 unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d)); 147 unsigned shift = ins->dest[d].offset; 148 return (mask << shift); 149} 150 151bi_clause * 152bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause) 153{ 154 if (!block && !clause) 155 return NULL; 156 157 /* Try the first clause in this block if we're starting from scratch */ 158 if (!clause && !list_is_empty(&block->clauses)) 159 return list_first_entry(&block->clauses, bi_clause, link); 160 161 /* Try the next clause in this block */ 162 if (clause && clause->link.next != &block->clauses) 163 return list_first_entry(&(clause->link), bi_clause, link); 164 165 /* Try the next block, or the one after that if it's empty, etc .*/ 166 bi_block *next_block = bi_next_block(block); 167 168 bi_foreach_block_from(ctx, next_block, block) { 169 if (!list_is_empty(&block->clauses)) 170 return list_first_entry(&block->clauses, bi_clause, link); 171 } 172 173 return NULL; 174} 175 176/* Does an instruction have a side effect not captured by its register 177 * destination? Applies to certain message-passing instructions, +DISCARD, and 178 * branching only, used in dead code elimation. Branches are characterized by 179 * `last` which applies to them and some atomics, +BARRIER, +BLEND which 180 * implies no loss of generality */ 181 182bool 183bi_side_effects(const bi_instr *I) 184{ 185 if (bi_opcode_props[I->op].last) 186 return true; 187 188 switch (I->op) { 189 case BI_OPCODE_DISCARD_F32: 190 case BI_OPCODE_DISCARD_B32: 191 return true; 192 default: 193 break; 194 } 195 196 switch (bi_opcode_props[I->op].message) { 197 case BIFROST_MESSAGE_NONE: 198 case BIFROST_MESSAGE_VARYING: 199 case BIFROST_MESSAGE_ATTRIBUTE: 200 case BIFROST_MESSAGE_TEX: 201 case BIFROST_MESSAGE_VARTEX: 202 case BIFROST_MESSAGE_LOAD: 203 case BIFROST_MESSAGE_64BIT: 204 return false; 205 206 case BIFROST_MESSAGE_STORE: 207 case BIFROST_MESSAGE_ATOMIC: 208 case BIFROST_MESSAGE_BARRIER: 209 case BIFROST_MESSAGE_BLEND: 210 case BIFROST_MESSAGE_Z_STENCIL: 211 case BIFROST_MESSAGE_ATEST: 212 case BIFROST_MESSAGE_JOB: 213 return true; 214 215 case BIFROST_MESSAGE_TILE: 216 return (I->op != BI_OPCODE_LD_TILE); 217 } 218 219 unreachable("Invalid message type"); 220} 221 222/* Branch reconvergence is required when the execution mask may change 223 * between adjacent instructions (clauses). This occurs for conditional 224 * branches and for the last instruction (clause) in a block whose 225 * fallthrough successor has multiple predecessors. 226 */ 227 228bool 229bi_reconverge_branches(bi_block *block) 230{ 231 /* Last block of a program */ 232 if (!block->successors[0]) { 233 assert(!block->successors[1]); 234 return true; 235 } 236 237 /* Multiple successors? We're branching */ 238 if (block->successors[1]) 239 return true; 240 241 /* Must have at least one successor */ 242 struct bi_block *succ = block->successors[0]; 243 244 /* Reconverge if the successor has multiple predecessors */ 245 return bi_num_predecessors(succ) > 1; 246} 247 248/* 249 * When MUX.i32 or MUX.v2i16 is used to multiplex entire sources, they can be 250 * replaced by CSEL as follows: 251 * 252 * MUX.neg(x, y, b) -> CSEL.s.lt(b, 0, x, y) 253 * MUX.int_zero(x, y, b) -> CSEL.i.eq(b, 0, x, y) 254 * MUX.fp_zero(x, y, b) -> CSEL.f.eq(b, 0, x, y) 255 * 256 * MUX.bit cannot be transformed like this. 257 * 258 * Note that MUX.v2i16 has partial support for swizzles, which CSEL.v2i16 lacks. 259 * So we must check the swizzles too. 260 */ 261bool 262bi_can_replace_with_csel(bi_instr *I) 263{ 264 return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) && 265 (I->mux != BI_MUX_BIT) && 266 (I->src[0].swizzle == BI_SWIZZLE_H01) && 267 (I->src[1].swizzle == BI_SWIZZLE_H01) && 268 (I->src[2].swizzle == BI_SWIZZLE_H01); 269} 270 271static enum bi_opcode 272bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux) 273{ 274 switch (mux) { 275 case BI_MUX_INT_ZERO: 276 if (must_sign) 277 return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16; 278 else 279 return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16; 280 case BI_MUX_NEG: 281 return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16; 282 case BI_MUX_FP_ZERO: 283 return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16; 284 default: 285 unreachable("No CSEL for MUX.bit"); 286 } 287} 288 289void 290bi_replace_mux_with_csel(bi_instr *I, bool must_sign) 291{ 292 assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16); 293 I->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux); 294 I->cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ; 295 I->mux = 0; 296 297 bi_index vTrue = I->src[0], vFalse = I->src[1], cond = I->src[2]; 298 299 I->src[0] = cond; 300 I->src[1] = bi_zero(); 301 I->src[2] = vTrue; 302 I->src[3] = vFalse; 303} 304