1/* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** 25 * @file 26 * 27 * Validates the QPU instruction sequence after register allocation and 28 * scheduling. 29 */ 30 31#include <assert.h> 32#include <stdio.h> 33#include <stdlib.h> 34#include "v3d_compiler.h" 35#include "qpu/qpu_disasm.h" 36 37struct v3d_qpu_validate_state { 38 struct v3d_compile *c; 39 const struct v3d_qpu_instr *last; 40 int ip; 41 int last_sfu_write; 42 int last_branch_ip; 43 int last_thrsw_ip; 44 45 /* Set when we've found the last-THRSW signal, or if we were started 46 * in single-segment mode. 47 */ 48 bool last_thrsw_found; 49 50 /* Set when we've found the THRSW after the last THRSW */ 51 bool thrend_found; 52 53 int thrsw_count; 54}; 55 56static void 57fail_instr(struct v3d_qpu_validate_state *state, const char *msg) 58{ 59 struct v3d_compile *c = state->c; 60 61 fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg); 62 63 int dump_ip = 0; 64 vir_for_each_inst_inorder(inst, c) { 65 v3d_qpu_dump(c->devinfo, &inst->qpu); 66 67 if (dump_ip++ == state->ip) 68 fprintf(stderr, " *** ERROR ***"); 69 70 fprintf(stderr, "\n"); 71 } 72 73 fprintf(stderr, "\n"); 74 abort(); 75} 76 77static bool 78in_branch_delay_slots(struct v3d_qpu_validate_state *state) 79{ 80 return (state->ip - state->last_branch_ip) < 3; 81} 82 83static bool 84in_thrsw_delay_slots(struct v3d_qpu_validate_state *state) 85{ 86 return (state->ip - state->last_thrsw_ip) < 3; 87} 88 89static bool 90qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst, 91 bool (*predicate)(enum v3d_qpu_waddr waddr)) 92{ 93 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) 94 return false; 95 96 if (inst->alu.add.op != V3D_QPU_A_NOP && 97 inst->alu.add.magic_write && 98 predicate(inst->alu.add.waddr)) 99 return true; 100 101 if (inst->alu.mul.op != V3D_QPU_M_NOP && 102 inst->alu.mul.magic_write && 103 predicate(inst->alu.mul.waddr)) 104 return true; 105 106 return false; 107} 108 109static void 110qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst) 111{ 112 const struct v3d_device_info *devinfo = state->c->devinfo; 113 const struct v3d_qpu_instr *inst = &qinst->qpu; 114 115 if (inst->type != V3D_QPU_INSTR_TYPE_ALU) 116 return; 117 118 /* LDVARY writes r5 two instructions later and LDUNIF writes 119 * r5 one instruction later, which is illegal to have 120 * together. 121 */ 122 if (state->last && state->last->sig.ldvary && 123 (inst->sig.ldunif || inst->sig.ldunifa)) { 124 fail_instr(state, "LDUNIF after a LDVARY"); 125 } 126 127 /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4) 128 * 129 * FIXME: This would not check correctly for V3D 4.2 versions lower 130 * than V3D 4.2.14, but that is not a real issue because the simulator 131 * will still catch this, and we are not really targetting any such 132 * versions anyway. 133 */ 134 if (state->c->devinfo->ver < 42) { 135 bool last_reads_ldunif = (state->last && (state->last->sig.ldunif || 136 state->last->sig.ldunifrf)); 137 bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa || 138 state->last->sig.ldunifarf)); 139 bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf; 140 bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf; 141 if ((last_reads_ldunif && reads_ldunifa) || 142 (last_reads_ldunifa && reads_ldunif)) { 143 fail_instr(state, 144 "LDUNIF and LDUNIFA can't be next to each other"); 145 } 146 } 147 148 int tmu_writes = 0; 149 int sfu_writes = 0; 150 int vpm_writes = 0; 151 int tlb_writes = 0; 152 int tsy_writes = 0; 153 154 if (inst->alu.add.op != V3D_QPU_A_NOP) { 155 if (inst->alu.add.magic_write) { 156 if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo, 157 inst->alu.add.waddr)) { 158 tmu_writes++; 159 } 160 if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) 161 sfu_writes++; 162 if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) 163 vpm_writes++; 164 if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) 165 tlb_writes++; 166 if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) 167 tsy_writes++; 168 } 169 } 170 171 if (inst->alu.mul.op != V3D_QPU_M_NOP) { 172 if (inst->alu.mul.magic_write) { 173 if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo, 174 inst->alu.mul.waddr)) { 175 tmu_writes++; 176 } 177 if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) 178 sfu_writes++; 179 if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) 180 vpm_writes++; 181 if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) 182 tlb_writes++; 183 if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr)) 184 tsy_writes++; 185 } 186 } 187 188 if (in_thrsw_delay_slots(state)) { 189 /* There's no way you want to start SFU during the THRSW delay 190 * slots, since the result would land in the other thread. 191 */ 192 if (sfu_writes) { 193 fail_instr(state, 194 "SFU write started during THRSW delay slots "); 195 } 196 197 if (inst->sig.ldvary) 198 fail_instr(state, "LDVARY during THRSW delay slots"); 199 } 200 201 (void)qpu_magic_waddr_matches; /* XXX */ 202 203 /* SFU r4 results come back two instructions later. No doing 204 * r4 read/writes or other SFU lookups until it's done. 205 */ 206 if (state->ip - state->last_sfu_write < 2) { 207 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4)) 208 fail_instr(state, "R4 read too soon after SFU"); 209 210 if (v3d_qpu_writes_r4(devinfo, inst)) 211 fail_instr(state, "R4 write too soon after SFU"); 212 213 if (sfu_writes) 214 fail_instr(state, "SFU write too soon after SFU"); 215 } 216 217 /* XXX: The docs say VPM can happen with the others, but the simulator 218 * disagrees. 219 */ 220 if (tmu_writes + 221 sfu_writes + 222 vpm_writes + 223 tlb_writes + 224 tsy_writes + 225 inst->sig.ldtmu + 226 inst->sig.ldtlb + 227 inst->sig.ldvpm + 228 inst->sig.ldtlbu > 1) { 229 fail_instr(state, 230 "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed"); 231 } 232 233 if (sfu_writes) 234 state->last_sfu_write = state->ip; 235 236 if (inst->sig.thrsw) { 237 if (in_branch_delay_slots(state)) 238 fail_instr(state, "THRSW in a branch delay slot."); 239 240 if (state->last_thrsw_found) 241 state->thrend_found = true; 242 243 if (state->last_thrsw_ip == state->ip - 1) { 244 /* If it's the second THRSW in a row, then it's just a 245 * last-thrsw signal. 246 */ 247 if (state->last_thrsw_found) 248 fail_instr(state, "Two last-THRSW signals"); 249 state->last_thrsw_found = true; 250 } else { 251 if (in_thrsw_delay_slots(state)) { 252 fail_instr(state, 253 "THRSW too close to another THRSW."); 254 } 255 state->thrsw_count++; 256 state->last_thrsw_ip = state->ip; 257 } 258 } 259 260 if (state->thrend_found && 261 state->last_thrsw_ip - state->ip <= 2 && 262 inst->type == V3D_QPU_INSTR_TYPE_ALU) { 263 if ((inst->alu.add.op != V3D_QPU_A_NOP && 264 !inst->alu.add.magic_write)) { 265 fail_instr(state, "RF write after THREND"); 266 } 267 268 if ((inst->alu.mul.op != V3D_QPU_M_NOP && 269 !inst->alu.mul.magic_write)) { 270 fail_instr(state, "RF write after THREND"); 271 } 272 273 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) && 274 !inst->sig_magic) { 275 fail_instr(state, "RF write after THREND"); 276 } 277 278 /* GFXH-1625: No TMUWT in the last instruction */ 279 if (state->last_thrsw_ip - state->ip == 2 && 280 inst->alu.add.op == V3D_QPU_A_TMUWT) 281 fail_instr(state, "TMUWT in last instruction"); 282 } 283 284 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) { 285 if (in_branch_delay_slots(state)) 286 fail_instr(state, "branch in a branch delay slot."); 287 if (in_thrsw_delay_slots(state)) 288 fail_instr(state, "branch in a THRSW delay slot."); 289 state->last_branch_ip = state->ip; 290 } 291} 292 293static void 294qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block) 295{ 296 vir_for_each_inst(qinst, block) { 297 qpu_validate_inst(state, qinst); 298 299 state->last = &qinst->qpu; 300 state->ip++; 301 } 302} 303 304/** 305 * Checks for the instruction restrictions from page 37 ("Summary of 306 * Instruction Restrictions"). 307 */ 308void 309qpu_validate(struct v3d_compile *c) 310{ 311 /* We don't want to do validation in release builds, but we want to 312 * keep compiling the validation code to make sure it doesn't get 313 * broken. 314 */ 315#ifndef DEBUG 316 return; 317#endif 318 319 struct v3d_qpu_validate_state state = { 320 .c = c, 321 .last_sfu_write = -10, 322 .last_thrsw_ip = -10, 323 .last_branch_ip = -10, 324 .ip = 0, 325 326 .last_thrsw_found = !c->last_thrsw, 327 }; 328 329 vir_for_each_block(block, c) { 330 qpu_validate_block(&state, block); 331 } 332 333 if (state.thrsw_count > 1 && !state.last_thrsw_found) { 334 fail_instr(&state, 335 "thread switch found without last-THRSW in program"); 336 } 337 338 if (!state.thrend_found) 339 fail_instr(&state, "No program-end THRSW found"); 340} 341