1bf215546Sopenharmony_ci 2bf215546Sopenharmony_ci/* 3bf215546Sopenharmony_ci * Copyright © 2014 Broadcom 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22bf215546Sopenharmony_ci * IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include <stdlib.h> 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "vc4_qpu.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_cistatic void 30bf215546Sopenharmony_cifail_instr(uint64_t inst, const char *msg) 31bf215546Sopenharmony_ci{ 32bf215546Sopenharmony_ci fprintf(stderr, "vc4_qpu_validate: %s: ", msg); 33bf215546Sopenharmony_ci vc4_qpu_disasm(&inst, 1); 34bf215546Sopenharmony_ci fprintf(stderr, "\n"); 35bf215546Sopenharmony_ci abort(); 36bf215546Sopenharmony_ci} 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_cistatic bool 39bf215546Sopenharmony_ciwrites_reg(uint64_t inst, uint32_t w) 40bf215546Sopenharmony_ci{ 41bf215546Sopenharmony_ci return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w || 42bf215546Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w); 43bf215546Sopenharmony_ci} 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_cistatic bool 46bf215546Sopenharmony_ci_reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b) 47bf215546Sopenharmony_ci{ 48bf215546Sopenharmony_ci struct { 49bf215546Sopenharmony_ci uint32_t mux, addr; 50bf215546Sopenharmony_ci } src_regs[] = { 51bf215546Sopenharmony_ci { QPU_GET_FIELD(inst, QPU_ADD_A) }, 52bf215546Sopenharmony_ci { QPU_GET_FIELD(inst, QPU_ADD_B) }, 53bf215546Sopenharmony_ci { QPU_GET_FIELD(inst, QPU_MUL_A) }, 54bf215546Sopenharmony_ci { QPU_GET_FIELD(inst, QPU_MUL_B) }, 55bf215546Sopenharmony_ci }; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci /* Branches only reference raddr_a (no mux), and we don't use that 58bf215546Sopenharmony_ci * feature of branching. 59bf215546Sopenharmony_ci */ 60bf215546Sopenharmony_ci if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_BRANCH) 61bf215546Sopenharmony_ci return false; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci /* Load immediates don't read any registers. */ 64bf215546Sopenharmony_ci if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LOAD_IMM) 65bf215546Sopenharmony_ci return false; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci for (int i = 0; i < ARRAY_SIZE(src_regs); i++) { 68bf215546Sopenharmony_ci if (!ignore_a && 69bf215546Sopenharmony_ci src_regs[i].mux == QPU_MUX_A && 70bf215546Sopenharmony_ci (QPU_GET_FIELD(inst, QPU_RADDR_A) == r)) 71bf215546Sopenharmony_ci return true; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci if (!ignore_b && 74bf215546Sopenharmony_ci QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM && 75bf215546Sopenharmony_ci src_regs[i].mux == QPU_MUX_B && 76bf215546Sopenharmony_ci (QPU_GET_FIELD(inst, QPU_RADDR_B) == r)) 77bf215546Sopenharmony_ci return true; 78bf215546Sopenharmony_ci } 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci return false; 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_cistatic bool 84bf215546Sopenharmony_cireads_reg(uint64_t inst, uint32_t r) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci return _reads_reg(inst, r, false, false); 87bf215546Sopenharmony_ci} 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_cistatic bool 90bf215546Sopenharmony_cireads_a_reg(uint64_t inst, uint32_t r) 91bf215546Sopenharmony_ci{ 92bf215546Sopenharmony_ci return _reads_reg(inst, r, false, true); 93bf215546Sopenharmony_ci} 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_cistatic bool 96bf215546Sopenharmony_cireads_b_reg(uint64_t inst, uint32_t r) 97bf215546Sopenharmony_ci{ 98bf215546Sopenharmony_ci return _reads_reg(inst, r, true, false); 99bf215546Sopenharmony_ci} 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_cistatic bool 102bf215546Sopenharmony_ciwrites_sfu(uint64_t inst) 103bf215546Sopenharmony_ci{ 104bf215546Sopenharmony_ci return (writes_reg(inst, QPU_W_SFU_RECIP) || 105bf215546Sopenharmony_ci writes_reg(inst, QPU_W_SFU_RECIPSQRT) || 106bf215546Sopenharmony_ci writes_reg(inst, QPU_W_SFU_EXP) || 107bf215546Sopenharmony_ci writes_reg(inst, QPU_W_SFU_LOG)); 108bf215546Sopenharmony_ci} 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci/** 111bf215546Sopenharmony_ci * Checks for the instruction restrictions from page 37 ("Summary of 112bf215546Sopenharmony_ci * Instruction Restrictions"). 113bf215546Sopenharmony_ci */ 114bf215546Sopenharmony_civoid 115bf215546Sopenharmony_civc4_qpu_validate(uint64_t *insts, uint32_t num_inst) 116bf215546Sopenharmony_ci{ 117bf215546Sopenharmony_ci bool scoreboard_locked = false; 118bf215546Sopenharmony_ci bool threaded = false; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci /* We don't want to do validation in release builds, but we want to 121bf215546Sopenharmony_ci * keep compiling the validation code to make sure it doesn't get 122bf215546Sopenharmony_ci * broken. 123bf215546Sopenharmony_ci */ 124bf215546Sopenharmony_ci#ifndef DEBUG 125bf215546Sopenharmony_ci return; 126bf215546Sopenharmony_ci#endif 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci for (int i = 0; i < num_inst; i++) { 129bf215546Sopenharmony_ci uint64_t inst = insts[i]; 130bf215546Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci if (sig != QPU_SIG_PROG_END) { 133bf215546Sopenharmony_ci if (qpu_inst_is_tlb(inst)) 134bf215546Sopenharmony_ci scoreboard_locked = true; 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci if (sig == QPU_SIG_THREAD_SWITCH || 137bf215546Sopenharmony_ci sig == QPU_SIG_LAST_THREAD_SWITCH) { 138bf215546Sopenharmony_ci threaded = true; 139bf215546Sopenharmony_ci } 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci continue; 142bf215546Sopenharmony_ci } 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci /* "The Thread End instruction must not write to either physical 145bf215546Sopenharmony_ci * regfile A or B." 146bf215546Sopenharmony_ci */ 147bf215546Sopenharmony_ci if (QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32 || 148bf215546Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32) { 149bf215546Sopenharmony_ci fail_instr(inst, "write to phys reg in thread end"); 150bf215546Sopenharmony_ci } 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci /* Can't trigger an implicit wait on scoreboard in the program 153bf215546Sopenharmony_ci * end instruction. 154bf215546Sopenharmony_ci */ 155bf215546Sopenharmony_ci if (qpu_inst_is_tlb(inst) && !scoreboard_locked) 156bf215546Sopenharmony_ci fail_instr(inst, "implicit sb wait in program end"); 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci /* Two delay slots will be executed. */ 159bf215546Sopenharmony_ci assert(i + 2 <= num_inst); 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci for (int j = i; j < i + 2; j++) { 162bf215546Sopenharmony_ci /* "The last three instructions of any program 163bf215546Sopenharmony_ci * (Thread End plus the following two delay-slot 164bf215546Sopenharmony_ci * instructions) must not do varyings read, uniforms 165bf215546Sopenharmony_ci * read or any kind of VPM, VDR, or VDW read or 166bf215546Sopenharmony_ci * write." 167bf215546Sopenharmony_ci */ 168bf215546Sopenharmony_ci if (writes_reg(insts[j], QPU_W_VPM) || 169bf215546Sopenharmony_ci reads_reg(insts[j], QPU_R_VARY) || 170bf215546Sopenharmony_ci reads_reg(insts[j], QPU_R_UNIF) || 171bf215546Sopenharmony_ci reads_reg(insts[j], QPU_R_VPM)) { 172bf215546Sopenharmony_ci fail_instr(insts[j], "last 3 instructions " 173bf215546Sopenharmony_ci "using fixed functions"); 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci /* "The Thread End instruction and the following two 177bf215546Sopenharmony_ci * delay slot instructions must not write or read 178bf215546Sopenharmony_ci * address 14 in either regfile A or B." 179bf215546Sopenharmony_ci */ 180bf215546Sopenharmony_ci if (writes_reg(insts[j], 14) || 181bf215546Sopenharmony_ci reads_reg(insts[j], 14)) { 182bf215546Sopenharmony_ci fail_instr(insts[j], "last 3 instructions " 183bf215546Sopenharmony_ci "must not use r14"); 184bf215546Sopenharmony_ci } 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci /* "The final program instruction (the second delay slot 188bf215546Sopenharmony_ci * instruction) must not do a TLB Z write." 189bf215546Sopenharmony_ci */ 190bf215546Sopenharmony_ci if (writes_reg(insts[i + 2], QPU_W_TLB_Z)) { 191bf215546Sopenharmony_ci fail_instr(insts[i + 2], "final instruction doing " 192bf215546Sopenharmony_ci "Z write"); 193bf215546Sopenharmony_ci } 194bf215546Sopenharmony_ci } 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* "A scoreboard wait must not occur in the first two instructions of 197bf215546Sopenharmony_ci * a fragment shader. This is either the explicit Wait for Scoreboard 198bf215546Sopenharmony_ci * signal or an implicit wait with the first tile-buffer read or 199bf215546Sopenharmony_ci * write instruction." 200bf215546Sopenharmony_ci */ 201bf215546Sopenharmony_ci for (int i = 0; i < 2; i++) { 202bf215546Sopenharmony_ci uint64_t inst = insts[i]; 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci if (qpu_inst_is_tlb(inst)) 205bf215546Sopenharmony_ci fail_instr(inst, "sb wait in first two insts"); 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci /* "If TMU_NOSWAP is written, the write must be three instructions 209bf215546Sopenharmony_ci * before the first TMU write instruction. For example, if 210bf215546Sopenharmony_ci * TMU_NOSWAP is written in the first shader instruction, the first 211bf215546Sopenharmony_ci * TMU write cannot occur before the 4th shader instruction." 212bf215546Sopenharmony_ci */ 213bf215546Sopenharmony_ci int last_tmu_noswap = -10; 214bf215546Sopenharmony_ci for (int i = 0; i < num_inst; i++) { 215bf215546Sopenharmony_ci uint64_t inst = insts[i]; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci if ((i - last_tmu_noswap) <= 3 && 218bf215546Sopenharmony_ci (writes_reg(inst, QPU_W_TMU0_S) || 219bf215546Sopenharmony_ci writes_reg(inst, QPU_W_TMU1_S))) { 220bf215546Sopenharmony_ci fail_instr(inst, "TMU write too soon after TMU_NOSWAP"); 221bf215546Sopenharmony_ci } 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci if (writes_reg(inst, QPU_W_TMU_NOSWAP)) 224bf215546Sopenharmony_ci last_tmu_noswap = i; 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci /* "An instruction must not read from a location in physical regfile A 228bf215546Sopenharmony_ci * or B that was written to by the previous instruction." 229bf215546Sopenharmony_ci */ 230bf215546Sopenharmony_ci for (int i = 0; i < num_inst - 1; i++) { 231bf215546Sopenharmony_ci uint64_t inst = insts[i]; 232bf215546Sopenharmony_ci uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 233bf215546Sopenharmony_ci uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 234bf215546Sopenharmony_ci uint32_t waddr_a, waddr_b; 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci if (inst & QPU_WS) { 237bf215546Sopenharmony_ci waddr_b = add_waddr; 238bf215546Sopenharmony_ci waddr_a = mul_waddr; 239bf215546Sopenharmony_ci } else { 240bf215546Sopenharmony_ci waddr_a = add_waddr; 241bf215546Sopenharmony_ci waddr_b = mul_waddr; 242bf215546Sopenharmony_ci } 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci if ((waddr_a < 32 && reads_a_reg(insts[i + 1], waddr_a)) || 245bf215546Sopenharmony_ci (waddr_b < 32 && reads_b_reg(insts[i + 1], waddr_b))) { 246bf215546Sopenharmony_ci fail_instr(insts[i + 1], 247bf215546Sopenharmony_ci "Reads physical reg too soon after write"); 248bf215546Sopenharmony_ci } 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci /* "After an SFU lookup instruction, accumulator r4 must not be read 252bf215546Sopenharmony_ci * in the following two instructions. Any other instruction that 253bf215546Sopenharmony_ci * results in r4 being written (that is, TMU read, TLB read, SFU 254bf215546Sopenharmony_ci * lookup) cannot occur in the two instructions following an SFU 255bf215546Sopenharmony_ci * lookup." 256bf215546Sopenharmony_ci */ 257bf215546Sopenharmony_ci int last_sfu_inst = -10; 258bf215546Sopenharmony_ci for (int i = 0; i < num_inst - 1; i++) { 259bf215546Sopenharmony_ci uint64_t inst = insts[i]; 260bf215546Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci if (i - last_sfu_inst <= 2 && 263bf215546Sopenharmony_ci (writes_sfu(inst) || 264bf215546Sopenharmony_ci sig == QPU_SIG_LOAD_TMU0 || 265bf215546Sopenharmony_ci sig == QPU_SIG_LOAD_TMU1 || 266bf215546Sopenharmony_ci sig == QPU_SIG_COLOR_LOAD)) { 267bf215546Sopenharmony_ci fail_instr(inst, "R4 write too soon after SFU write"); 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci if (writes_sfu(inst)) 271bf215546Sopenharmony_ci last_sfu_inst = i; 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci for (int i = 0; i < num_inst - 1; i++) { 275bf215546Sopenharmony_ci uint64_t inst = insts[i]; 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM && 278bf215546Sopenharmony_ci QPU_GET_FIELD(inst, QPU_SMALL_IMM) >= 279bf215546Sopenharmony_ci QPU_SMALL_IMM_MUL_ROT) { 280bf215546Sopenharmony_ci uint32_t mux_a = QPU_GET_FIELD(inst, QPU_MUL_A); 281bf215546Sopenharmony_ci uint32_t mux_b = QPU_GET_FIELD(inst, QPU_MUL_B); 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci /* "The full horizontal vector rotate is only 284bf215546Sopenharmony_ci * available when both of the mul ALU input arguments 285bf215546Sopenharmony_ci * are taken from accumulators r0-r3." 286bf215546Sopenharmony_ci */ 287bf215546Sopenharmony_ci if (mux_a > QPU_MUX_R3 || mux_b > QPU_MUX_R3) { 288bf215546Sopenharmony_ci fail_instr(inst, 289bf215546Sopenharmony_ci "MUL rotate using non-accumulator " 290bf215546Sopenharmony_ci "input"); 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci if (QPU_GET_FIELD(inst, QPU_SMALL_IMM) == 294bf215546Sopenharmony_ci QPU_SMALL_IMM_MUL_ROT) { 295bf215546Sopenharmony_ci /* "An instruction that does a vector rotate 296bf215546Sopenharmony_ci * by r5 must not immediately follow an 297bf215546Sopenharmony_ci * instruction that writes to r5." 298bf215546Sopenharmony_ci */ 299bf215546Sopenharmony_ci if (writes_reg(insts[i - 1], QPU_W_ACC5)) { 300bf215546Sopenharmony_ci fail_instr(inst, 301bf215546Sopenharmony_ci "vector rotate by r5 " 302bf215546Sopenharmony_ci "immediately after r5 write"); 303bf215546Sopenharmony_ci } 304bf215546Sopenharmony_ci } 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci /* "An instruction that does a vector rotate must not 307bf215546Sopenharmony_ci * immediately follow an instruction that writes to the 308bf215546Sopenharmony_ci * accumulator that is being rotated." 309bf215546Sopenharmony_ci */ 310bf215546Sopenharmony_ci if (writes_reg(insts[i - 1], QPU_W_ACC0 + mux_a) || 311bf215546Sopenharmony_ci writes_reg(insts[i - 1], QPU_W_ACC0 + mux_b)) { 312bf215546Sopenharmony_ci fail_instr(inst, 313bf215546Sopenharmony_ci "vector rotate of value " 314bf215546Sopenharmony_ci "written in previous instruction"); 315bf215546Sopenharmony_ci } 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci } 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci /* "An instruction that does a vector rotate must not immediately 320bf215546Sopenharmony_ci * follow an instruction that writes to the accumulator that is being 321bf215546Sopenharmony_ci * rotated. 322bf215546Sopenharmony_ci * 323bf215546Sopenharmony_ci * XXX: TODO. 324bf215546Sopenharmony_ci */ 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci /* "After an instruction that does a TLB Z write, the multisample mask 327bf215546Sopenharmony_ci * must not be read as an instruction input argument in the following 328bf215546Sopenharmony_ci * two instruction. The TLB Z write instruction can, however, be 329bf215546Sopenharmony_ci * followed immediately by a TLB color write." 330bf215546Sopenharmony_ci */ 331bf215546Sopenharmony_ci for (int i = 0; i < num_inst - 1; i++) { 332bf215546Sopenharmony_ci uint64_t inst = insts[i]; 333bf215546Sopenharmony_ci if (writes_reg(inst, QPU_W_TLB_Z) && 334bf215546Sopenharmony_ci (reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS) || 335bf215546Sopenharmony_ci reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS))) { 336bf215546Sopenharmony_ci fail_instr(inst, "TLB Z write followed by MS mask read"); 337bf215546Sopenharmony_ci } 338bf215546Sopenharmony_ci } 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci /* 341bf215546Sopenharmony_ci * "A single instruction can only perform a maximum of one of the 342bf215546Sopenharmony_ci * following closely coupled peripheral accesses in a single 343bf215546Sopenharmony_ci * instruction: TMU write, TMU read, TLB write, TLB read, TLB 344bf215546Sopenharmony_ci * combined color read and write, SFU write, Mutex read or Semaphore 345bf215546Sopenharmony_ci * access." 346bf215546Sopenharmony_ci */ 347bf215546Sopenharmony_ci for (int i = 0; i < num_inst - 1; i++) { 348bf215546Sopenharmony_ci uint64_t inst = insts[i]; 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci if (qpu_num_sf_accesses(inst) > 1) 351bf215546Sopenharmony_ci fail_instr(inst, "Single instruction writes SFU twice"); 352bf215546Sopenharmony_ci } 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci /* "The uniform base pointer can be written (from SIMD element 0) by 355bf215546Sopenharmony_ci * the processor to reset the stream, there must be at least two 356bf215546Sopenharmony_ci * nonuniform-accessing instructions following a pointer change 357bf215546Sopenharmony_ci * before uniforms can be accessed once more." 358bf215546Sopenharmony_ci */ 359bf215546Sopenharmony_ci int last_unif_pointer_update = -3; 360bf215546Sopenharmony_ci for (int i = 0; i < num_inst; i++) { 361bf215546Sopenharmony_ci uint64_t inst = insts[i]; 362bf215546Sopenharmony_ci uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 363bf215546Sopenharmony_ci uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci if (reads_reg(inst, QPU_R_UNIF) && 366bf215546Sopenharmony_ci i - last_unif_pointer_update <= 2) { 367bf215546Sopenharmony_ci fail_instr(inst, 368bf215546Sopenharmony_ci "uniform read too soon after pointer update"); 369bf215546Sopenharmony_ci } 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci if (waddr_add == QPU_W_UNIFORMS_ADDRESS || 372bf215546Sopenharmony_ci waddr_mul == QPU_W_UNIFORMS_ADDRESS) 373bf215546Sopenharmony_ci last_unif_pointer_update = i; 374bf215546Sopenharmony_ci } 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci if (threaded) { 377bf215546Sopenharmony_ci bool last_thrsw_found = false; 378bf215546Sopenharmony_ci bool scoreboard_locked = false; 379bf215546Sopenharmony_ci int tex_samples_outstanding = 0; 380bf215546Sopenharmony_ci int last_tex_samples_outstanding = 0; 381bf215546Sopenharmony_ci int thrsw_ip = -1; 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci for (int i = 0; i < num_inst; i++) { 384bf215546Sopenharmony_ci uint64_t inst = insts[i]; 385bf215546Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci if (i == thrsw_ip) { 388bf215546Sopenharmony_ci /* In order to get texture results back in the 389bf215546Sopenharmony_ci * correct order, before a new thrsw we have 390bf215546Sopenharmony_ci * to read all the texture results from before 391bf215546Sopenharmony_ci * the previous thrsw. 392bf215546Sopenharmony_ci * 393bf215546Sopenharmony_ci * FIXME: Is collecting the remaining results 394bf215546Sopenharmony_ci * during the delay slots OK, or should we do 395bf215546Sopenharmony_ci * this at THRSW signal time? 396bf215546Sopenharmony_ci */ 397bf215546Sopenharmony_ci if (last_tex_samples_outstanding != 0) { 398bf215546Sopenharmony_ci fail_instr(inst, "THRSW with texture " 399bf215546Sopenharmony_ci "results from the previous " 400bf215546Sopenharmony_ci "THRSW still in the FIFO."); 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci last_tex_samples_outstanding = 404bf215546Sopenharmony_ci tex_samples_outstanding; 405bf215546Sopenharmony_ci tex_samples_outstanding = 0; 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci if (qpu_inst_is_tlb(inst)) 409bf215546Sopenharmony_ci scoreboard_locked = true; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci switch (sig) { 412bf215546Sopenharmony_ci case QPU_SIG_THREAD_SWITCH: 413bf215546Sopenharmony_ci case QPU_SIG_LAST_THREAD_SWITCH: 414bf215546Sopenharmony_ci /* No thread switching with the scoreboard 415bf215546Sopenharmony_ci * locked. Doing so means we may deadlock 416bf215546Sopenharmony_ci * when the other thread tries to lock 417bf215546Sopenharmony_ci * scoreboard. 418bf215546Sopenharmony_ci */ 419bf215546Sopenharmony_ci if (scoreboard_locked) { 420bf215546Sopenharmony_ci fail_instr(inst, "THRSW with the " 421bf215546Sopenharmony_ci "scoreboard locked."); 422bf215546Sopenharmony_ci } 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci /* No thread switching after lthrsw, since 425bf215546Sopenharmony_ci * lthrsw means that we get delayed until the 426bf215546Sopenharmony_ci * other shader is ready for us to terminate. 427bf215546Sopenharmony_ci */ 428bf215546Sopenharmony_ci if (last_thrsw_found) { 429bf215546Sopenharmony_ci fail_instr(inst, "THRSW after a " 430bf215546Sopenharmony_ci "previous LTHRSW"); 431bf215546Sopenharmony_ci } 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci if (sig == QPU_SIG_LAST_THREAD_SWITCH) 434bf215546Sopenharmony_ci last_thrsw_found = true; 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci /* No THRSW while we already have a THRSW 437bf215546Sopenharmony_ci * queued. 438bf215546Sopenharmony_ci */ 439bf215546Sopenharmony_ci if (i < thrsw_ip) { 440bf215546Sopenharmony_ci fail_instr(inst, 441bf215546Sopenharmony_ci "THRSW with a THRSW queued."); 442bf215546Sopenharmony_ci } 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci thrsw_ip = i + 3; 445bf215546Sopenharmony_ci break; 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci case QPU_SIG_LOAD_TMU0: 448bf215546Sopenharmony_ci case QPU_SIG_LOAD_TMU1: 449bf215546Sopenharmony_ci if (last_tex_samples_outstanding == 0) { 450bf215546Sopenharmony_ci fail_instr(inst, "TMU load with nothing " 451bf215546Sopenharmony_ci "in the results fifo from " 452bf215546Sopenharmony_ci "the previous THRSW."); 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci last_tex_samples_outstanding--; 456bf215546Sopenharmony_ci break; 457bf215546Sopenharmony_ci } 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 460bf215546Sopenharmony_ci uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 461bf215546Sopenharmony_ci if (waddr_add == QPU_W_TMU0_S || 462bf215546Sopenharmony_ci waddr_add == QPU_W_TMU1_S || 463bf215546Sopenharmony_ci waddr_mul == QPU_W_TMU0_S || 464bf215546Sopenharmony_ci waddr_mul == QPU_W_TMU1_S) { 465bf215546Sopenharmony_ci tex_samples_outstanding++; 466bf215546Sopenharmony_ci } 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci } 469bf215546Sopenharmony_ci} 470