1/* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "compiler/v3d_compiler.h" 25#include "qpu/qpu_instr.h" 26#include "qpu/qpu_disasm.h" 27 28static inline struct qpu_reg 29qpu_reg(int index) 30{ 31 struct qpu_reg reg = { 32 .magic = false, 33 .index = index, 34 }; 35 return reg; 36} 37 38static inline struct qpu_reg 39qpu_magic(enum v3d_qpu_waddr waddr) 40{ 41 struct qpu_reg reg = { 42 .magic = true, 43 .index = waddr, 44 }; 45 return reg; 46} 47 48struct v3d_qpu_instr 49v3d_qpu_nop(void) 50{ 51 struct v3d_qpu_instr instr = { 52 .type = V3D_QPU_INSTR_TYPE_ALU, 53 .alu = { 54 .add = { 55 .op = V3D_QPU_A_NOP, 56 .waddr = V3D_QPU_WADDR_NOP, 57 .magic_write = true, 58 }, 59 .mul = { 60 .op = V3D_QPU_M_NOP, 61 .waddr = V3D_QPU_WADDR_NOP, 62 .magic_write = true, 63 }, 64 } 65 }; 66 67 return instr; 68} 69 70static struct qinst * 71vir_nop(void) 72{ 73 struct qreg undef = vir_nop_reg(); 74 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef); 75 76 return qinst; 77} 78 79static struct qinst * 80new_qpu_nop_before(struct qinst *inst) 81{ 82 struct qinst *q = vir_nop(); 83 84 list_addtail(&q->link, &inst->link); 85 86 return q; 87} 88 89/** 90 * Allocates the src register (accumulator or register file) into the RADDR 91 * fields of the instruction. 92 */ 93static void 94set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src) 95{ 96 if (src.smimm) { 97 assert(instr->sig.small_imm); 98 *mux = V3D_QPU_MUX_B; 99 return; 100 } 101 102 if (src.magic) { 103 assert(src.index >= V3D_QPU_WADDR_R0 && 104 src.index <= V3D_QPU_WADDR_R5); 105 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0; 106 return; 107 } 108 109 if (instr->alu.add.a != V3D_QPU_MUX_A && 110 instr->alu.add.b != V3D_QPU_MUX_A && 111 instr->alu.mul.a != V3D_QPU_MUX_A && 112 instr->alu.mul.b != V3D_QPU_MUX_A) { 113 instr->raddr_a = src.index; 114 *mux = V3D_QPU_MUX_A; 115 } else { 116 if (instr->raddr_a == src.index) { 117 *mux = V3D_QPU_MUX_A; 118 } else { 119 assert(!(instr->alu.add.a == V3D_QPU_MUX_B && 120 instr->alu.add.b == V3D_QPU_MUX_B && 121 instr->alu.mul.a == V3D_QPU_MUX_B && 122 instr->alu.mul.b == V3D_QPU_MUX_B) || 123 src.index == instr->raddr_b); 124 125 instr->raddr_b = src.index; 126 *mux = V3D_QPU_MUX_B; 127 } 128 } 129} 130 131static bool 132is_no_op_mov(struct qinst *qinst) 133{ 134 static const struct v3d_qpu_sig no_sig = {0}; 135 136 /* Make sure it's just a lone MOV. */ 137 if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || 138 qinst->qpu.alu.mul.op != V3D_QPU_M_MOV || 139 qinst->qpu.alu.add.op != V3D_QPU_A_NOP || 140 memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) { 141 return false; 142 } 143 144 /* Check if it's a MOV from a register to itself. */ 145 enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr; 146 if (qinst->qpu.alu.mul.magic_write) { 147 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4) 148 return false; 149 150 if (qinst->qpu.alu.mul.a != 151 V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) { 152 return false; 153 } 154 } else { 155 int raddr; 156 157 switch (qinst->qpu.alu.mul.a) { 158 case V3D_QPU_MUX_A: 159 raddr = qinst->qpu.raddr_a; 160 break; 161 case V3D_QPU_MUX_B: 162 raddr = qinst->qpu.raddr_b; 163 break; 164 default: 165 return false; 166 } 167 if (raddr != waddr) 168 return false; 169 } 170 171 /* No packing or flags updates, or we need to execute the 172 * instruction. 173 */ 174 if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE || 175 qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE || 176 qinst->qpu.flags.mc != V3D_QPU_COND_NONE || 177 qinst->qpu.flags.mpf != V3D_QPU_PF_NONE || 178 qinst->qpu.flags.muf != V3D_QPU_UF_NONE) { 179 return false; 180 } 181 182 return true; 183} 184 185static void 186v3d_generate_code_block(struct v3d_compile *c, 187 struct qblock *block, 188 struct qpu_reg *temp_registers) 189{ 190 int last_vpm_read_index = -1; 191 192 vir_for_each_inst_safe(qinst, block) { 193#if 0 194 fprintf(stderr, "translating qinst to qpu: "); 195 vir_dump_inst(c, qinst); 196 fprintf(stderr, "\n"); 197#endif 198 199 struct qinst *temp; 200 201 if (vir_has_uniform(qinst)) 202 c->num_uniforms++; 203 204 int nsrc = vir_get_nsrc(qinst); 205 struct qpu_reg src[ARRAY_SIZE(qinst->src)]; 206 for (int i = 0; i < nsrc; i++) { 207 int index = qinst->src[i].index; 208 switch (qinst->src[i].file) { 209 case QFILE_REG: 210 src[i] = qpu_reg(qinst->src[i].index); 211 break; 212 case QFILE_MAGIC: 213 src[i] = qpu_magic(qinst->src[i].index); 214 break; 215 case QFILE_NULL: 216 /* QFILE_NULL is an undef, so we can load 217 * anything. Using reg 0 218 */ 219 src[i] = qpu_reg(0); 220 break; 221 case QFILE_LOAD_IMM: 222 assert(!"not reached"); 223 break; 224 case QFILE_TEMP: 225 src[i] = temp_registers[index]; 226 break; 227 case QFILE_SMALL_IMM: 228 src[i].smimm = true; 229 break; 230 231 case QFILE_VPM: 232 assert((int)qinst->src[i].index >= 233 last_vpm_read_index); 234 (void)last_vpm_read_index; 235 last_vpm_read_index = qinst->src[i].index; 236 237 temp = new_qpu_nop_before(qinst); 238 temp->qpu.sig.ldvpm = true; 239 240 src[i] = qpu_magic(V3D_QPU_WADDR_R3); 241 break; 242 } 243 } 244 245 struct qpu_reg dst; 246 switch (qinst->dst.file) { 247 case QFILE_NULL: 248 dst = qpu_magic(V3D_QPU_WADDR_NOP); 249 break; 250 251 case QFILE_REG: 252 dst = qpu_reg(qinst->dst.index); 253 break; 254 255 case QFILE_MAGIC: 256 dst = qpu_magic(qinst->dst.index); 257 break; 258 259 case QFILE_TEMP: 260 dst = temp_registers[qinst->dst.index]; 261 break; 262 263 case QFILE_VPM: 264 dst = qpu_magic(V3D_QPU_WADDR_VPM); 265 break; 266 267 case QFILE_SMALL_IMM: 268 case QFILE_LOAD_IMM: 269 assert(!"not reached"); 270 break; 271 } 272 273 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 274 if (qinst->qpu.sig.ldunif || qinst->qpu.sig.ldunifa) { 275 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); 276 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 277 278 if (!dst.magic || 279 dst.index != V3D_QPU_WADDR_R5) { 280 assert(c->devinfo->ver >= 40); 281 282 if (qinst->qpu.sig.ldunif) { 283 qinst->qpu.sig.ldunif = false; 284 qinst->qpu.sig.ldunifrf = true; 285 } else { 286 qinst->qpu.sig.ldunifa = false; 287 qinst->qpu.sig.ldunifarf = true; 288 } 289 qinst->qpu.sig_addr = dst.index; 290 qinst->qpu.sig_magic = dst.magic; 291 } 292 } else if (v3d_qpu_sig_writes_address(c->devinfo, 293 &qinst->qpu.sig)) { 294 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP); 295 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 296 297 qinst->qpu.sig_addr = dst.index; 298 qinst->qpu.sig_magic = dst.magic; 299 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) { 300 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP); 301 if (nsrc >= 1) { 302 set_src(&qinst->qpu, 303 &qinst->qpu.alu.add.a, src[0]); 304 } 305 if (nsrc >= 2) { 306 set_src(&qinst->qpu, 307 &qinst->qpu.alu.add.b, src[1]); 308 } 309 310 qinst->qpu.alu.add.waddr = dst.index; 311 qinst->qpu.alu.add.magic_write = dst.magic; 312 } else { 313 if (nsrc >= 1) { 314 set_src(&qinst->qpu, 315 &qinst->qpu.alu.mul.a, src[0]); 316 } 317 if (nsrc >= 2) { 318 set_src(&qinst->qpu, 319 &qinst->qpu.alu.mul.b, src[1]); 320 } 321 322 qinst->qpu.alu.mul.waddr = dst.index; 323 qinst->qpu.alu.mul.magic_write = dst.magic; 324 325 if (is_no_op_mov(qinst)) { 326 vir_remove_instruction(c, qinst); 327 continue; 328 } 329 } 330 } else { 331 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH); 332 } 333 } 334} 335 336static bool 337reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction) 338{ 339 struct v3d_qpu_instr qpu; 340 ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu); 341 assert(ok); 342 343 if (qpu.sig.ldunif || 344 qpu.sig.ldunifrf || 345 qpu.sig.ldtlbu || 346 qpu.sig.wrtmuc) { 347 return true; 348 } 349 350 if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) 351 return true; 352 353 if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) { 354 if (qpu.alu.add.magic_write && 355 v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) { 356 return true; 357 } 358 359 if (qpu.alu.mul.magic_write && 360 v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) { 361 return true; 362 } 363 } 364 365 return false; 366} 367 368static void 369v3d_dump_qpu(struct v3d_compile *c) 370{ 371 fprintf(stderr, "%s prog %d/%d QPU:\n", 372 vir_get_stage_name(c), 373 c->program_id, c->variant_id); 374 375 int next_uniform = 0; 376 for (int i = 0; i < c->qpu_inst_count; i++) { 377 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]); 378 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str); 379 380 /* We can only do this on 4.x, because we're not tracking TMU 381 * implicit uniforms here on 3.x. 382 */ 383 if (c->devinfo->ver >= 40 && 384 reads_uniform(c->devinfo, c->qpu_insts[i])) { 385 fprintf(stderr, " ("); 386 vir_dump_uniform(c->uniform_contents[next_uniform], 387 c->uniform_data[next_uniform]); 388 fprintf(stderr, ")"); 389 next_uniform++; 390 } 391 fprintf(stderr, "\n"); 392 ralloc_free((void *)str); 393 } 394 395 /* Make sure our dumping lined up. */ 396 if (c->devinfo->ver >= 40) 397 assert(next_uniform == c->num_uniforms); 398 399 fprintf(stderr, "\n"); 400} 401 402void 403v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers) 404{ 405 /* Reset the uniform count to how many will be actually loaded by the 406 * generated QPU code. 407 */ 408 c->num_uniforms = 0; 409 410 vir_for_each_block(block, c) 411 v3d_generate_code_block(c, block, temp_registers); 412 413 v3d_qpu_schedule_instructions(c); 414 415 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count); 416 int i = 0; 417 vir_for_each_inst_inorder(inst, c) { 418 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu, 419 &c->qpu_insts[i++]); 420 if (!ok) { 421 fprintf(stderr, "Failed to pack instruction %d:\n", i); 422 vir_dump_inst(c, inst); 423 fprintf(stderr, "\n"); 424 c->compilation_result = V3D_COMPILATION_FAILED; 425 return; 426 } 427 428 if (v3d_qpu_is_nop(&inst->qpu)) 429 c->nop_count++; 430 } 431 assert(i == c->qpu_inst_count); 432 433 if (V3D_DEBUG & (V3D_DEBUG_QPU | 434 v3d_debug_flag_for_shader_stage(c->s->info.stage))) { 435 v3d_dump_qpu(c); 436 } 437 438 qpu_validate(c); 439 440 free(temp_registers); 441} 442