Lines Matching refs:instr

196 get_wait_states(aco_ptr<Instruction>& instr)
198 if (instr->opcode == aco_opcode::s_nop)
199 return instr->sopp().imm + 1;
200 else if (instr->opcode == aco_opcode::p_constaddr)
249 aco_ptr<Instruction>& instr = state.old_instructions[pred_idx];
250 if (!instr)
252 if (handle_raw_hazard_instr<Valu, Vintrp, Salu>(instr, reg, &nops_needed, &mask))
330 handle_smem_clause_hazards(Program* program, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& instr,
337 if (ctx.smem_write || instr->definitions.empty() ||
338 instr_info.is_atomic[(unsigned)instr->opcode]) {
341 for (Operand op : instr->operands) {
349 Definition def = instr->definitions[0];
358 handle_instruction_gfx6(State& state, NOP_ctx_gfx6& ctx, aco_ptr<Instruction>& instr,
364 if (instr->isSMEM()) {
370 for (unsigned i = 0; i < instr->operands.size(); i++) {
371 Operand op = instr->operands[i];
383 handle_smem_clause_hazards(state.program, ctx, instr, &NOPs);
384 } else if (instr->isSALU()) {
385 if (instr->opcode == aco_opcode::s_setreg_b32 ||
386 instr->opcode == aco_opcode::s_setreg_imm32_b32 ||
387 instr->opcode == aco_opcode::s_getreg_b32) {
392 if (instr->opcode == aco_opcode::s_movrels_b32 ||
393 instr->opcode == aco_opcode::s_movrels_b64 ||
394 instr->opcode == aco_opcode::s_movreld_b32 ||
395 instr->opcode == aco_opcode::s_movreld_b64) {
400 if (instr->opcode == aco_opcode::s_sendmsg || instr->opcode == aco_opcode::s_ttracedata)
402 } else if (instr->isDS() && instr->ds().gds) {
404 } else if (instr->isVALU() || instr->isVINTRP()) {
405 for (Operand op : instr->operands) {
412 if (instr->isDPP()) {
414 handle_valu_then_read_hazard(state, &NOPs, 2, instr->operands[0]);
417 for (Definition def : instr->definitions) {
424 if ((instr->opcode == aco_opcode::v_readlane_b32 ||
425 instr->opcode == aco_opcode::v_readlane_b32_e64 ||
426 instr->opcode == aco_opcode::v_writelane_b32 ||
427 instr->opcode == aco_opcode::v_writelane_b32_e64) &&
428 !instr->operands[1].isConstant()) {
429 handle_valu_then_read_hazard(state, &NOPs, 4, instr->operands[1]);
438 (instr->opcode == aco_opcode::v_readlane_b32 || /* GFX6 doesn't have v_readlane_b32_e64 */
439 instr->opcode == aco_opcode::v_readfirstlane_b32)) {
440 handle_vintrp_then_read_hazard(state, &NOPs, 1, instr->operands[0]);
443 if (instr->opcode == aco_opcode::v_div_fmas_f32 ||
444 instr->opcode == aco_opcode::v_div_fmas_f64)
446 } else if (instr->isVMEM() || instr->isFlatLike()) {
448 for (Operand op : instr->operands) {
454 if (!instr->isSALU() && instr->format != Format::SMEM)
458 bool lds_scratch_global = (instr->isScratch() || instr->isGlobal()) && instr->flatlike().lds;
459 if (instr->isVINTRP() || lds_scratch_global ||
460 instr->opcode == aco_opcode::ds_read_addtid_b32 ||
461 instr->opcode == aco_opcode::ds_write_addtid_b32 ||
462 instr->opcode == aco_opcode::buffer_store_lds_dword) {
467 ctx.add_wait_states(NOPs + get_wait_states(instr));
480 if ((ctx.smem_clause || ctx.smem_write) && (NOPs || instr->format != Format::SMEM)) {
490 if (instr->isSMEM()) {
491 if (instr->definitions.empty() || instr_info.is_atomic[(unsigned)instr->opcode]) {
497 for (Operand op : instr->operands) {
503 Definition def = instr->definitions[0];
508 } else if (instr->isVALU()) {
509 for (Definition def : instr->definitions) {
521 } else if (instr->isSALU() && !instr->definitions.empty()) {
522 if (!instr->definitions.empty()) {
524 Definition def = instr->definitions[0];
530 } else if (instr->opcode == aco_opcode::s_setreg_b32 ||
531 instr->opcode == aco_opcode::s_setreg_imm32_b32) {
532 SOPK_instruction& sopk = instr->sopk();
541 } else if (instr->isVMEM() || instr->isFlatLike()) {
543 bool consider_buf = (instr->isMUBUF() || instr->isMTBUF()) && instr->operands.size() == 4 &&
544 instr->operands[3].size() > 2 && instr->operands[2].physReg() >= 128;
547 bool consider_mimg = instr->isMIMG() &&
548 instr->operands[1].regClass().type() == RegType::vgpr &&
549 instr->operands[1].size() > 2 && instr->operands[0].size() == 4;
552 instr->isFlatLike() && instr->operands.size() == 3 && instr->operands[2].size() > 2;
554 PhysReg wrdata = instr->operands[consider_flat ? 2 : 3].physReg();
555 unsigned size = instr->operands[consider_flat ? 2 : 3].size();
564 check_written_regs(const aco_ptr<Instruction>& instr, const std::bitset<N>& check_regs)
566 return std::any_of(instr->definitions.begin(), instr->definitions.end(),
580 mark_read_regs(const aco_ptr<Instruction>& instr, std::bitset<N>& reg_reads)
582 for (const Operand& op : instr->operands) {
593 mark_read_regs_exec(State& state, const aco_ptr<Instruction>& instr, std::bitset<N>& reg_reads)
595 mark_read_regs(instr, reg_reads);
602 VALU_writes_sgpr(aco_ptr<Instruction>& instr)
604 if (instr->isVOPC())
606 if (instr->isVOP3() && instr->definitions.size() == 2)
608 if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
609 instr->opcode == aco_opcode::v_readlane_b32 ||
610 instr->opcode == aco_opcode::v_readlane_b32_e64)
616 instr_writes_exec(const aco_ptr<Instruction>& instr)
618 return std::any_of(instr->definitions.begin(), instr->definitions.end(),
624 instr_writes_sgpr(const aco_ptr<Instruction>& instr)
626 return std::any_of(instr->definitions.begin(), instr->definitions.end(),
632 instr_is_branch(const aco_ptr<Instruction>& instr)
634 return instr->opcode == aco_opcode::s_branch || instr->opcode == aco_opcode::s_cbranch_scc0 ||
635 instr->opcode == aco_opcode::s_cbranch_scc1 ||
636 instr->opcode == aco_opcode::s_cbranch_vccz ||
637 instr->opcode == aco_opcode::s_cbranch_vccnz ||
638 instr->opcode == aco_opcode::s_cbranch_execz ||
639 instr->opcode == aco_opcode::s_cbranch_execnz ||
640 instr->opcode == aco_opcode::s_cbranch_cdbgsys ||
641 instr->opcode == aco_opcode::s_cbranch_cdbguser ||
642 instr->opcode == aco_opcode::s_cbranch_cdbgsys_or_user ||
643 instr->opcode == aco_opcode::s_cbranch_cdbgsys_and_user ||
644 instr->opcode == aco_opcode::s_subvector_loop_begin ||
645 instr->opcode == aco_opcode::s_subvector_loop_end ||
646 instr->opcode == aco_opcode::s_setpc_b64 || instr->opcode == aco_opcode::s_swappc_b64 ||
647 instr->opcode == aco_opcode::s_getpc_b64 || instr->opcode == aco_opcode::s_call_b64;
651 handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>& instr,
660 if (instr->isVMEM() || instr->isFlatLike() || instr->isDS()) {
662 if (instr->isVMEM() || instr->isFlatLike())
664 state, instr,
665 instr->definitions.empty() ? ctx.sgprs_read_by_VMEM_store : ctx.sgprs_read_by_VMEM);
666 if (instr->isFlat() || instr->isDS())
667 mark_read_regs_exec(state, instr, ctx.sgprs_read_by_DS);
668 } else if (instr->isSALU() || instr->isSMEM()) {
669 if (instr->opcode == aco_opcode::s_waitcnt) {
670 wait_imm imm(state.program->gfx_level, instr->sopp().imm);
673 } else if (instr->opcode == aco_opcode::s_waitcnt_depctr && instr->sopp().imm == 0xffe3) {
681 if (check_written_regs(instr, ctx.sgprs_read_by_VMEM) ||
682 check_written_regs(instr, ctx.sgprs_read_by_DS) ||
683 check_written_regs(instr, ctx.sgprs_read_by_VMEM_store)) {
695 } else if (instr->isVALU()) {
705 if (instr->isVOPC()) {
707 } else if (ctx.has_VOPC && (instr->opcode == aco_opcode::v_permlane16_b32 ||
708 instr->opcode == aco_opcode::v_permlanex16_b32)) {
714 v_mov->definitions[0] = Definition(instr->operands[0].physReg(), v1);
715 v_mov->operands[0] = Operand(instr->operands[0].physReg(), v1);
717 } else if (instr->isVALU() && instr->opcode != aco_opcode::v_nop) {
724 if (!instr->isVALU() && instr->reads_exec()) {
726 } else if (instr->isVALU()) {
727 if (instr_writes_exec(instr)) {
736 } else if (instr_writes_sgpr(instr)) {
740 } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
742 if ((instr->sopp().imm & 0xfffe) == 0xfffe)
749 if (instr->isSMEM()) {
751 mark_read_regs(instr, ctx.sgprs_read_by_SMEM);
752 } else if (VALU_writes_sgpr(instr)) {
754 if (check_written_regs(instr, ctx.sgprs_read_by_SMEM)) {
764 } else if (instr->isSALU()) {
765 if (instr->format != Format::SOPP) {
770 const SOPP_instruction& sopp = instr->sopp();
785 if (instr->isVMEM() || instr->isGlobal() || instr->isScratch()) {
790 } else if (instr->isDS()) {
795 } else if (instr_is_branch(instr)) {
798 } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
800 const SOPK_instruction& sopk = instr->sopk();
814 ctx.has_VMEM = instr->isVMEM() || instr->isGlobal() || instr->isScratch();
815 ctx.has_DS = instr->isDS();
822 if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1) {
827 if (instr->isMUBUF() || instr->isMTBUF()) {
828 uint32_t offset = instr->isMUBUF() ? instr->mubuf().offset : instr->mtbuf().offset;
837 if (instr->opcode == aco_opcode::v_writelane_b32_e64) {
841 if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 0)
865 for (aco_ptr<Instruction>& instr : state.old_instructions) {
866 Handle(state, ctx, instr, block.instructions);
867 block.instructions.emplace_back(std::move(instr));