Lines Matching defs:inst
49 struct qinst *inst;
131 qpu_inst_is_tlb(const struct v3d_qpu_instr *inst)
133 if (inst->sig.ldtlb || inst->sig.ldtlbu)
136 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
139 if (inst->alu.add.magic_write &&
140 (inst->alu.add.waddr == V3D_QPU_WADDR_TLB ||
141 inst->alu.add.waddr == V3D_QPU_WADDR_TLBU))
144 if (inst->alu.mul.magic_write &&
145 (inst->alu.mul.waddr == V3D_QPU_WADDR_TLB ||
146 inst->alu.mul.waddr == V3D_QPU_WADDR_TLBU))
158 add_read_dep(state, state->last_rf[n->inst->qpu.raddr_a], n);
161 if (!n->inst->qpu.sig.small_imm) {
163 state->last_rf[n->inst->qpu.raddr_b], n);
281 struct qinst *qinst = n->inst;
282 struct v3d_qpu_instr *inst = &qinst->qpu;
289 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
290 if (inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS)
302 assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
306 if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0)
307 process_mux_deps(state, n, inst->alu.add.a);
308 if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1)
309 process_mux_deps(state, n, inst->alu.add.b);
311 if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0)
312 process_mux_deps(state, n, inst->alu.mul.a);
313 if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1)
314 process_mux_deps(state, n, inst->alu.mul.b);
316 switch (inst->alu.add.op) {
354 switch (inst->alu.mul.op) {
368 if (inst->alu.add.op != V3D_QPU_A_NOP) {
369 process_waddr_deps(state, n, inst->alu.add.waddr,
370 inst->alu.add.magic_write);
372 if (inst->alu.mul.op != V3D_QPU_M_NOP) {
373 process_waddr_deps(state, n, inst->alu.mul.waddr,
374 inst->alu.mul.magic_write);
376 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
377 process_waddr_deps(state, n, inst->sig_addr,
378 inst->sig_magic);
381 if (v3d_qpu_writes_r3(devinfo, inst))
383 if (v3d_qpu_writes_r4(devinfo, inst))
385 if (v3d_qpu_writes_r5(devinfo, inst))
391 if (inst->sig.thrsw) {
409 if (v3d_qpu_waits_on_tmu(inst)) {
421 if (inst->sig.wrtmuc)
424 if (inst->sig.ldtlb | inst->sig.ldtlbu)
427 if (inst->sig.ldvpm) {
437 /* inst->sig.ldunif or sideband uniform read */
442 if (inst->sig.ldunifa || inst->sig.ldunifarf)
445 if (v3d_qpu_reads_flags(inst))
447 if (v3d_qpu_writes_flags(inst))
503 const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
526 const struct v3d_qpu_instr *inst = &qinst->qpu;
529 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
532 assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
534 if (inst->alu.add.op != V3D_QPU_A_NOP) {
535 if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 0 &&
536 mux_reads_too_soon(scoreboard, inst, inst->alu.add.a)) {
539 if (v3d_qpu_add_op_num_src(inst->alu.add.op) > 1 &&
540 mux_reads_too_soon(scoreboard, inst, inst->alu.add.b)) {
545 if (inst->alu.mul.op != V3D_QPU_M_NOP) {
546 if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 0 &&
547 mux_reads_too_soon(scoreboard, inst, inst->alu.mul.a)) {
550 if (v3d_qpu_mul_op_num_src(inst->alu.mul.op) > 1 &&
551 mux_reads_too_soon(scoreboard, inst, inst->alu.mul.b)) {
566 const struct v3d_qpu_instr *inst = &qinst->qpu;
573 v3d_qpu_writes_r4(devinfo, inst))
595 const struct v3d_qpu_instr *inst)
597 return qpu_inst_is_tlb(inst) &&
603 qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
606 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
609 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
610 inst->raddr_a == waddr)
613 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
614 !inst->sig.small_imm && (inst->raddr_b == waddr))
622 const struct v3d_qpu_instr *inst)
625 qpu_instruction_uses_rf(inst,
636 const struct v3d_qpu_instr *inst)
644 if (qpu_inst_is_tlb(inst))
659 if (v3d_qpu_waits_on_tmu(inst))
670 if (v3d_qpu_writes_tmu(devinfo, inst))
696 const struct v3d_qpu_instr *inst)
699 if (v3d_qpu_reads_vpm(inst))
701 if (v3d_qpu_writes_vpm(inst))
703 if (v3d_qpu_waits_vpm(inst))
706 if (v3d_qpu_writes_tmu(devinfo, inst))
708 if (inst->sig.ldtmu)
710 if (inst->sig.wrtmuc)
713 if (v3d_qpu_uses_sfu(inst))
716 if (v3d_qpu_uses_tlb(inst))
719 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
720 if (inst->alu.add.op != V3D_QPU_A_NOP &&
721 inst->alu.add.magic_write &&
722 v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) {
726 if (inst->alu.add.op == V3D_QPU_A_TMUWT)
908 qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
910 STATIC_ASSERT(sizeof(inst->alu.mul) == sizeof(inst->alu.add));
911 assert(inst->alu.add.op != V3D_QPU_A_NOP);
912 assert(inst->alu.mul.op == V3D_QPU_M_NOP);
914 memcpy(&inst->alu.mul, &inst->alu.add, sizeof(inst->alu.mul));
915 inst->alu.mul.op = add_op_as_mul_op(inst->alu.add.op);
916 inst->alu.add.op = V3D_QPU_A_NOP;
918 inst->flags.mc = inst->flags.ac;
919 inst->flags.mpf = inst->flags.apf;
920 inst->flags.muf = inst->flags.auf;
921 inst->flags.ac = V3D_QPU_COND_NONE;
922 inst->flags.apf = V3D_QPU_PF_NONE;
923 inst->flags.auf = V3D_QPU_UF_NONE;
925 inst->alu.mul.output_pack = inst->alu.add.output_pack;
926 inst->alu.mul.a_unpack = inst->alu.add.a_unpack;
927 inst->alu.mul.b_unpack = inst->alu.add.b_unpack;
928 inst->alu.add.output_pack = V3D_QPU_PACK_NONE;
929 inst->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
930 inst->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
1048 try_skip_for_ldvary_pipelining(const struct v3d_qpu_instr *inst)
1050 return inst->sig.ldunif || inst->sig.ldunifrf;
1070 if (prev_inst->inst->qpu.sig.thrsw)
1080 const struct v3d_qpu_instr *inst = &n->inst->qpu;
1082 if (ldvary_pipelining && try_skip_for_ldvary_pipelining(inst)) {
1091 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH &&
1099 if ((inst->sig.ldunifa || inst->sig.ldunifarf) &&
1107 if (reads_too_soon_after_write(scoreboard, n->inst))
1110 if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst))
1118 if (pixel_scoreboard_too_soon(c, scoreboard, inst))
1126 if ((inst->sig.ldunif || inst->sig.ldunifa) &&
1136 n->inst)) {
1140 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
1153 inst->branch.msfign != V3D_QPU_MSFIGN_NONE &&
1154 inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS &&
1155 inst->branch.cond != V3D_QPU_BRANCH_COND_A0 &&
1156 inst->branch.cond != V3D_QPU_BRANCH_COND_NA0) {
1168 if (inst->sig.thrsw)
1171 if (prev_inst->inst->uniform != -1 &&
1172 n->inst->uniform != -1)
1180 if (vir_has_uniform(prev_inst->inst) &&
1181 (inst->sig.ldunifa || inst->sig.ldunifarf)) {
1185 if ((prev_inst->inst->qpu.sig.ldunifa ||
1186 prev_inst->inst->qpu.sig.ldunifarf) &&
1187 vir_has_uniform(n->inst)) {
1194 if (pixel_scoreboard_too_soon(c, scoreboard, inst))
1205 if (inst->sig.ldvary &&
1212 &prev_inst->inst->qpu, inst)) {
1217 int prio = get_instruction_priority(c->devinfo, inst);
1219 if (mux_read_stalls(scoreboard, inst)) {
1264 if (chosen && chosen->inst->qpu.sig.ldvary) {
1289 const struct v3d_qpu_instr *inst)
1291 if (v3d_qpu_instr_is_sfu(inst)) {
1292 scoreboard->last_stallable_sfu_reg = inst->alu.add.waddr;
1299 const struct v3d_qpu_instr *inst,
1302 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
1305 assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
1307 if (inst->alu.add.op != V3D_QPU_A_NOP) {
1308 if (inst->alu.add.magic_write) {
1310 inst->alu.add.waddr,
1314 inst);
1317 if (inst->alu.add.op == V3D_QPU_A_SETMSF)
1321 if (inst->alu.mul.op != V3D_QPU_M_NOP) {
1322 if (inst->alu.mul.magic_write) {
1324 inst->alu.mul.waddr,
1329 if (inst->sig.ldvary)
1338 v3d_qpu_dump(devinfo, &n->inst->qpu);
1348 v3d_qpu_dump(devinfo, &child->inst->qpu);
1399 const struct v3d_qpu_instr *before_inst = &before->inst->qpu;
1400 const struct v3d_qpu_instr *after_inst = &after->inst->qpu;
1488 struct qinst *inst)
1490 list_addtail(&inst->link, &block->instructions);
1492 update_scoreboard_for_chosen(scoreboard, &inst->qpu, c->devinfo);
1517 const struct v3d_qpu_instr *inst = &qinst->qpu;
1525 if (v3d_qpu_waits_vpm(inst))
1528 if (inst->sig.ldvary)
1531 if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
1533 if (slot == 2 && inst->alu.add.op == V3D_QPU_A_TMUWT)
1537 if (!inst->alu.add.magic_write ||
1538 !inst->alu.mul.magic_write) {
1542 if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) &&
1543 !inst->sig_magic) {
1547 if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
1553 if (inst->raddr_a < 3 &&
1554 (inst->alu.add.a == V3D_QPU_MUX_A ||
1555 inst->alu.add.b == V3D_QPU_MUX_A ||
1556 inst->alu.mul.a == V3D_QPU_MUX_A ||
1557 inst->alu.mul.b == V3D_QPU_MUX_A)) {
1561 if (inst->raddr_b < 3 &&
1562 !inst->sig.small_imm &&
1563 (inst->alu.add.a == V3D_QPU_MUX_B ||
1564 inst->alu.add.b == V3D_QPU_MUX_B ||
1565 inst->alu.mul.a == V3D_QPU_MUX_B ||
1566 inst->alu.mul.b == V3D_QPU_MUX_B)) {
1753 struct qinst *inst,
1758 /* There should be nothing in a thrsw inst being scheduled other than
1761 assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
1762 assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP);
1763 assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP);
1806 if (inst->is_last_thrsw && invalid_sig_count > 0 &&
1837 insert_scheduled_instruction(c, block, scoreboard, inst);
1840 merge_inst = inst;
1848 if (inst->is_last_thrsw) {
1871 free(inst);
1877 qpu_inst_valid_in_branch_delay_slot(struct v3d_compile *c, struct qinst *inst)
1879 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
1882 if (inst->qpu.sig.thrsw)
1885 if (v3d_qpu_writes_unifa(c->devinfo, &inst->qpu))
1888 if (vir_has_uniform(inst))
1898 struct qinst *inst)
1900 assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
1914 inst->qpu.branch.msfign == V3D_QPU_MSFIGN_NONE ||
1915 inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_ALWAYS ||
1916 inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_A0 ||
1917 inst->qpu.branch.cond == V3D_QPU_BRANCH_COND_NA0;
1922 insert_scheduled_instruction(c, block, scoreboard, inst);
1928 while (slots_filled < 3 && block->instructions.next != &inst->link) {
1929 struct qinst *prev_inst = (struct qinst *) inst->link.prev;
1962 inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1979 list_add(&prev_inst->link, &inst->link);
1997 alu_reads_register(struct v3d_qpu_instr *inst,
2004 num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
2005 mux_a = inst->alu.add.a;
2006 mux_b = inst->alu.add.b;
2008 num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
2009 mux_a = inst->alu.mul.a;
2010 mux_b = inst->alu.mul.b;
2021 inst->raddr_a == index) {
2025 inst->raddr_b == index) {
2029 inst->raddr_a == index) {
2033 inst->raddr_b == index) {
2043 * This takes and ldvary signal merged into 'inst' and tries to move it up to
2049 * fadd rf13, r0, r5 ; nop; ; ldvary.r1 <-- inst
2055 * fadd rf13, r0, r5 ; nop; ; <-- inst
2059 * we will be able to pick up to merge into 'inst', leading to code like this:
2063 * fadd rf13, r0, r5 ; fmul r2, r1, rf0 ; <-- inst
2069 struct v3d_qpu_instr *inst)
2074 assert(inst->type == V3D_QPU_INSTR_TYPE_ALU);
2075 assert(inst->sig.ldvary);
2076 uint32_t ldvary_magic = inst->sig_magic;
2077 uint32_t ldvary_index = inst->sig_addr;
2083 if (alu_reads_register(inst, true, ldvary_magic, ldvary_index))
2085 if (alu_reads_register(inst, false, ldvary_magic, ldvary_index))
2096 if (inst->sig.ldunif)
2152 inst->sig.ldvary = false;
2153 inst->sig_magic = false;
2154 inst->sig_addr = 0;
2163 assert(!v3d_qpu_writes_r5(c->devinfo, inst));
2187 struct qinst *qinst = chosen ? chosen->inst : vir_nop();
2188 struct v3d_qpu_instr *inst = &qinst->qpu;
2195 v3d_qpu_dump(devinfo, inst);
2199 /* We can't mark_instruction_scheduled() the chosen inst until
2219 (void)qpu_merge_inst(devinfo, inst,
2220 inst, &merge->inst->qpu);
2221 if (merge->inst->uniform != -1) {
2222 chosen->inst->uniform =
2223 merge->inst->uniform;
2229 v3d_qpu_dump(devinfo, &merge->inst->qpu);
2232 v3d_qpu_dump(devinfo, inst);
2238 if (fixup_pipelined_ldvary(c, scoreboard, block, inst)) {
2251 if (mux_read_stalls(scoreboard, inst))
2260 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
2288 free(merge->inst);
2291 if (inst->sig.thrsw) {
2293 } else if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
2325 n->inst = qinst;
2373 struct qinst *inst =
2377 if (!v3d_qpu_is_nop(&inst->qpu))
2380 delay_slots_start = inst;
2383 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
2384 branch = inst;
2409 * uniform_data[inst->uniform] may be shared