1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2014 Broadcom 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci/** 25bf215546Sopenharmony_ci * @file 26bf215546Sopenharmony_ci * 27bf215546Sopenharmony_ci * Validates the QPU instruction sequence after register allocation and 28bf215546Sopenharmony_ci * scheduling. 29bf215546Sopenharmony_ci */ 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include <assert.h> 32bf215546Sopenharmony_ci#include <stdio.h> 33bf215546Sopenharmony_ci#include <stdlib.h> 34bf215546Sopenharmony_ci#include "v3d_compiler.h" 35bf215546Sopenharmony_ci#include "qpu/qpu_disasm.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_cistruct v3d_qpu_validate_state { 38bf215546Sopenharmony_ci struct v3d_compile *c; 39bf215546Sopenharmony_ci const struct v3d_qpu_instr *last; 40bf215546Sopenharmony_ci int ip; 41bf215546Sopenharmony_ci int last_sfu_write; 42bf215546Sopenharmony_ci int last_branch_ip; 43bf215546Sopenharmony_ci int last_thrsw_ip; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci /* Set when we've found the last-THRSW signal, or if we were started 46bf215546Sopenharmony_ci * in single-segment mode. 47bf215546Sopenharmony_ci */ 48bf215546Sopenharmony_ci bool last_thrsw_found; 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci /* Set when we've found the THRSW after the last THRSW */ 51bf215546Sopenharmony_ci bool thrend_found; 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci int thrsw_count; 54bf215546Sopenharmony_ci}; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_cistatic void 57bf215546Sopenharmony_cifail_instr(struct v3d_qpu_validate_state *state, const char *msg) 58bf215546Sopenharmony_ci{ 59bf215546Sopenharmony_ci struct v3d_compile *c = state->c; 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg); 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci int dump_ip = 0; 64bf215546Sopenharmony_ci vir_for_each_inst_inorder(inst, c) { 65bf215546Sopenharmony_ci v3d_qpu_dump(c->devinfo, &inst->qpu); 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci if (dump_ip++ == state->ip) 68bf215546Sopenharmony_ci fprintf(stderr, " *** ERROR ***"); 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci fprintf(stderr, "\n"); 71bf215546Sopenharmony_ci } 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci fprintf(stderr, "\n"); 74bf215546Sopenharmony_ci abort(); 75bf215546Sopenharmony_ci} 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_cistatic bool 78bf215546Sopenharmony_ciin_branch_delay_slots(struct v3d_qpu_validate_state *state) 79bf215546Sopenharmony_ci{ 80bf215546Sopenharmony_ci return (state->ip - state->last_branch_ip) < 3; 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic bool 84bf215546Sopenharmony_ciin_thrsw_delay_slots(struct v3d_qpu_validate_state *state) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci return (state->ip - state->last_thrsw_ip) < 3; 87bf215546Sopenharmony_ci} 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_cistatic bool 90bf215546Sopenharmony_ciqpu_magic_waddr_matches(const struct v3d_qpu_instr *inst, 91bf215546Sopenharmony_ci bool (*predicate)(enum v3d_qpu_waddr waddr)) 92bf215546Sopenharmony_ci{ 93bf215546Sopenharmony_ci if (inst->type == V3D_QPU_INSTR_TYPE_ALU) 94bf215546Sopenharmony_ci return false; 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci if (inst->alu.add.op != V3D_QPU_A_NOP && 97bf215546Sopenharmony_ci inst->alu.add.magic_write && 98bf215546Sopenharmony_ci predicate(inst->alu.add.waddr)) 99bf215546Sopenharmony_ci return true; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci if (inst->alu.mul.op != V3D_QPU_M_NOP && 102bf215546Sopenharmony_ci inst->alu.mul.magic_write && 103bf215546Sopenharmony_ci predicate(inst->alu.mul.waddr)) 104bf215546Sopenharmony_ci return true; 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci return false; 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_cistatic void 110bf215546Sopenharmony_ciqpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) 111bf215546Sopenharmony_ci{ 112bf215546Sopenharmony_ci const struct v3d_device_info *devinfo = state->c->devinfo; 113bf215546Sopenharmony_ci const struct v3d_qpu_instr *inst = &qinst->qpu; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci if (inst->type != V3D_QPU_INSTR_TYPE_ALU) 116bf215546Sopenharmony_ci return; 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci /* LDVARY writes r5 two instructions later and LDUNIF writes 119bf215546Sopenharmony_ci * r5 one instruction later, which is illegal to have 120bf215546Sopenharmony_ci * together. 121bf215546Sopenharmony_ci */ 122bf215546Sopenharmony_ci if (state->last && state->last->sig.ldvary && 123bf215546Sopenharmony_ci (inst->sig.ldunif || inst->sig.ldunifa)) { 124bf215546Sopenharmony_ci fail_instr(state, "LDUNIF after a LDVARY"); 125bf215546Sopenharmony_ci } 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4) 128bf215546Sopenharmony_ci * 129bf215546Sopenharmony_ci * FIXME: This would not check correctly for V3D 4.2 versions lower 130bf215546Sopenharmony_ci * than V3D 4.2.14, but that is not a real issue because the simulator 131bf215546Sopenharmony_ci * will still catch this, and we are not really targetting any such 132bf215546Sopenharmony_ci * versions anyway. 133bf215546Sopenharmony_ci */ 134bf215546Sopenharmony_ci if (state->c->devinfo->ver < 42) { 135bf215546Sopenharmony_ci bool last_reads_ldunif = (state->last && (state->last->sig.ldunif || 136bf215546Sopenharmony_ci state->last->sig.ldunifrf)); 137bf215546Sopenharmony_ci bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa || 138bf215546Sopenharmony_ci state->last->sig.ldunifarf)); 139bf215546Sopenharmony_ci bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf; 140bf215546Sopenharmony_ci bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf; 141bf215546Sopenharmony_ci if ((last_reads_ldunif && reads_ldunifa) || 142bf215546Sopenharmony_ci (last_reads_ldunifa && reads_ldunif)) { 143bf215546Sopenharmony_ci fail_instr(state, 144bf215546Sopenharmony_ci "LDUNIF and LDUNIFA can't be next to each other"); 145bf215546Sopenharmony_ci } 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci int tmu_writes = 0; 149bf215546Sopenharmony_ci int sfu_writes = 0; 150bf215546Sopenharmony_ci int vpm_writes = 0; 151bf215546Sopenharmony_ci int tlb_writes = 0; 152bf215546Sopenharmony_ci int tsy_writes = 0; 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci if (inst->alu.add.op != V3D_QPU_A_NOP) { 155bf215546Sopenharmony_ci if (inst->alu.add.magic_write) { 156bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo, 157bf215546Sopenharmony_ci inst->alu.add.waddr)) { 158bf215546Sopenharmony_ci tmu_writes++; 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) 161bf215546Sopenharmony_ci sfu_writes++; 162bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) 163bf215546Sopenharmony_ci vpm_writes++; 164bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) 165bf215546Sopenharmony_ci tlb_writes++; 166bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) 167bf215546Sopenharmony_ci tsy_writes++; 168bf215546Sopenharmony_ci } 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci if (inst->alu.mul.op != V3D_QPU_M_NOP) { 172bf215546Sopenharmony_ci if (inst->alu.mul.magic_write) { 173bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo, 174bf215546Sopenharmony_ci inst->alu.mul.waddr)) { 175bf215546Sopenharmony_ci tmu_writes++; 176bf215546Sopenharmony_ci } 177bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) 178bf215546Sopenharmony_ci sfu_writes++; 179bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) 180bf215546Sopenharmony_ci vpm_writes++; 181bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) 182bf215546Sopenharmony_ci tlb_writes++; 183bf215546Sopenharmony_ci if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr)) 184bf215546Sopenharmony_ci tsy_writes++; 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci } 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci if (in_thrsw_delay_slots(state)) { 189bf215546Sopenharmony_ci /* There's no way you want to start SFU during the THRSW delay 190bf215546Sopenharmony_ci * slots, since the result would land in the other thread. 191bf215546Sopenharmony_ci */ 192bf215546Sopenharmony_ci if (sfu_writes) { 193bf215546Sopenharmony_ci fail_instr(state, 194bf215546Sopenharmony_ci "SFU write started during THRSW delay slots "); 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci if (inst->sig.ldvary) 198bf215546Sopenharmony_ci fail_instr(state, "LDVARY during THRSW delay slots"); 199bf215546Sopenharmony_ci } 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci (void)qpu_magic_waddr_matches; /* XXX */ 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci /* SFU r4 results come back two instructions later. No doing 204bf215546Sopenharmony_ci * r4 read/writes or other SFU lookups until it's done. 205bf215546Sopenharmony_ci */ 206bf215546Sopenharmony_ci if (state->ip - state->last_sfu_write < 2) { 207bf215546Sopenharmony_ci if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4)) 208bf215546Sopenharmony_ci fail_instr(state, "R4 read too soon after SFU"); 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci if (v3d_qpu_writes_r4(devinfo, inst)) 211bf215546Sopenharmony_ci fail_instr(state, "R4 write too soon after SFU"); 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci if (sfu_writes) 214bf215546Sopenharmony_ci fail_instr(state, "SFU write too soon after SFU"); 215bf215546Sopenharmony_ci } 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci /* XXX: The docs say VPM can happen with the others, but the simulator 218bf215546Sopenharmony_ci * disagrees. 219bf215546Sopenharmony_ci */ 220bf215546Sopenharmony_ci if (tmu_writes + 221bf215546Sopenharmony_ci sfu_writes + 222bf215546Sopenharmony_ci vpm_writes + 223bf215546Sopenharmony_ci tlb_writes + 224bf215546Sopenharmony_ci tsy_writes + 225bf215546Sopenharmony_ci inst->sig.ldtmu + 226bf215546Sopenharmony_ci inst->sig.ldtlb + 227bf215546Sopenharmony_ci inst->sig.ldvpm + 228bf215546Sopenharmony_ci inst->sig.ldtlbu > 1) { 229bf215546Sopenharmony_ci fail_instr(state, 230bf215546Sopenharmony_ci "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed"); 231bf215546Sopenharmony_ci } 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci if (sfu_writes) 234bf215546Sopenharmony_ci state->last_sfu_write = state->ip; 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci if (inst->sig.thrsw) { 237bf215546Sopenharmony_ci if (in_branch_delay_slots(state)) 238bf215546Sopenharmony_ci fail_instr(state, "THRSW in a branch delay slot."); 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci if (state->last_thrsw_found) 241bf215546Sopenharmony_ci state->thrend_found = true; 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci if (state->last_thrsw_ip == state->ip - 1) { 244bf215546Sopenharmony_ci /* If it's the second THRSW in a row, then it's just a 245bf215546Sopenharmony_ci * last-thrsw signal. 246bf215546Sopenharmony_ci */ 247bf215546Sopenharmony_ci if (state->last_thrsw_found) 248bf215546Sopenharmony_ci fail_instr(state, "Two last-THRSW signals"); 249bf215546Sopenharmony_ci state->last_thrsw_found = true; 250bf215546Sopenharmony_ci } else { 251bf215546Sopenharmony_ci if (in_thrsw_delay_slots(state)) { 252bf215546Sopenharmony_ci fail_instr(state, 253bf215546Sopenharmony_ci "THRSW too close to another THRSW."); 254bf215546Sopenharmony_ci } 255bf215546Sopenharmony_ci state->thrsw_count++; 256bf215546Sopenharmony_ci state->last_thrsw_ip = state->ip; 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci } 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci if (state->thrend_found && 261bf215546Sopenharmony_ci state->last_thrsw_ip - state->ip <= 2 && 262bf215546Sopenharmony_ci inst->type == V3D_QPU_INSTR_TYPE_ALU) { 263bf215546Sopenharmony_ci if ((inst->alu.add.op != V3D_QPU_A_NOP && 264bf215546Sopenharmony_ci !inst->alu.add.magic_write)) { 265bf215546Sopenharmony_ci fail_instr(state, "RF write after THREND"); 266bf215546Sopenharmony_ci } 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci if ((inst->alu.mul.op != V3D_QPU_M_NOP && 269bf215546Sopenharmony_ci !inst->alu.mul.magic_write)) { 270bf215546Sopenharmony_ci fail_instr(state, "RF write after THREND"); 271bf215546Sopenharmony_ci } 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && 274bf215546Sopenharmony_ci !inst->sig_magic) { 275bf215546Sopenharmony_ci fail_instr(state, "RF write after THREND"); 276bf215546Sopenharmony_ci } 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci /* GFXH-1625: No TMUWT in the last instruction */ 279bf215546Sopenharmony_ci if (state->last_thrsw_ip - state->ip == 2 && 280bf215546Sopenharmony_ci inst->alu.add.op == V3D_QPU_A_TMUWT) 281bf215546Sopenharmony_ci fail_instr(state, "TMUWT in last instruction"); 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) { 285bf215546Sopenharmony_ci if (in_branch_delay_slots(state)) 286bf215546Sopenharmony_ci fail_instr(state, "branch in a branch delay slot."); 287bf215546Sopenharmony_ci if (in_thrsw_delay_slots(state)) 288bf215546Sopenharmony_ci fail_instr(state, "branch in a THRSW delay slot."); 289bf215546Sopenharmony_ci state->last_branch_ip = state->ip; 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci} 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_cistatic void 294bf215546Sopenharmony_ciqpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block) 295bf215546Sopenharmony_ci{ 296bf215546Sopenharmony_ci vir_for_each_inst(qinst, block) { 297bf215546Sopenharmony_ci qpu_validate_inst(state, qinst); 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci state->last = &qinst->qpu; 300bf215546Sopenharmony_ci state->ip++; 301bf215546Sopenharmony_ci } 302bf215546Sopenharmony_ci} 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci/** 305bf215546Sopenharmony_ci * Checks for the instruction restrictions from page 37 ("Summary of 306bf215546Sopenharmony_ci * Instruction Restrictions"). 307bf215546Sopenharmony_ci */ 308bf215546Sopenharmony_civoid 309bf215546Sopenharmony_ciqpu_validate(struct v3d_compile *c) 310bf215546Sopenharmony_ci{ 311bf215546Sopenharmony_ci /* We don't want to do validation in release builds, but we want to 312bf215546Sopenharmony_ci * keep compiling the validation code to make sure it doesn't get 313bf215546Sopenharmony_ci * broken. 314bf215546Sopenharmony_ci */ 315bf215546Sopenharmony_ci#ifndef DEBUG 316bf215546Sopenharmony_ci return; 317bf215546Sopenharmony_ci#endif 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci struct v3d_qpu_validate_state state = { 320bf215546Sopenharmony_ci .c = c, 321bf215546Sopenharmony_ci .last_sfu_write = -10, 322bf215546Sopenharmony_ci .last_thrsw_ip = -10, 323bf215546Sopenharmony_ci .last_branch_ip = -10, 324bf215546Sopenharmony_ci .ip = 0, 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci .last_thrsw_found = !c->last_thrsw, 327bf215546Sopenharmony_ci }; 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci vir_for_each_block(block, c) { 330bf215546Sopenharmony_ci qpu_validate_block(&state, block); 331bf215546Sopenharmony_ci } 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci if (state.thrsw_count > 1 && !state.last_thrsw_found) { 334bf215546Sopenharmony_ci fail_instr(&state, 335bf215546Sopenharmony_ci "thread switch found without last-THRSW in program"); 336bf215546Sopenharmony_ci } 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci if (!state.thrend_found) 339bf215546Sopenharmony_ci fail_instr(&state, "No program-end THRSW found"); 340bf215546Sopenharmony_ci} 341