/* -*- mesa-c++ -*- * * Copyright (c) 2022 Collabora LTD * * Author: Gert Wollny * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "sfn_instr_alugroup.h" #include "sfn_debug.h" #include namespace r600 { AluGroup::AluGroup() { std::fill(m_slots.begin(), m_slots.end(), nullptr); } static bool is_kill(EAluOp op) { switch (op) { case op2_kille: case op2_kille_int: case op2_killne: case op2_killne_int: case op2_killge: case op2_killge_int: case op2_killge_uint: case op2_killgt: case op2_killgt_int: case op2_killgt_uint: return true; default: return false; } } bool AluGroup::add_instruction(AluInstr *instr) { /* we can only schedule one op that accesses LDS or the LDS read queue */ if (m_has_lds_op && instr->has_lds_access()) return false; if (instr->has_alu_flag(alu_is_trans)) { auto opinfo = alu_ops.find(instr->opcode()); assert(opinfo->second.can_channel(AluOp::t, s_chip_class)); if (add_trans_instructions(instr)) { if (is_kill(instr->opcode())) m_has_kill_op = true; return true; } } if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) { instr->set_parent_group(this); if (!instr->has_alu_flag(alu_is_lds) && is_kill(instr->opcode())) m_has_kill_op = true; return true; } auto opinfo = alu_ops.find(instr->opcode()); assert(opinfo != alu_ops.end()); if (s_max_slots > 4 && opinfo->second.can_channel(AluOp::t, s_chip_class) && add_trans_instructions(instr)) { instr->set_parent_group(this); if (is_kill(instr->opcode())) m_has_kill_op = true; return true; } return false; } bool AluGroup::add_trans_instructions(AluInstr *instr) { if (m_slots[4] || s_max_slots < 5) return false; if (!update_indirect_access(instr)) return false; /* LDS instructions have to be scheduled in X */ if (instr->has_alu_flag(alu_is_lds)) return false; auto opinfo = alu_ops.find(instr->opcode()); assert(opinfo != alu_ops.end()); if (!opinfo->second.can_channel(AluOp::t, s_chip_class)) return false; /* if we schedule a non-trans instr into the trans slot, we have to make * sure that the corresponding vector slot is already occupied, otherwise * the hardware will schedule it as vector op and the bank-swizzle as * checked here (and in r600_asm.c) will not catch conflicts. */ if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) { if (instr->dest() && instr->dest()->pin() == pin_free) { int used_slot = 3; while (!m_slots[used_slot] && used_slot >= 0) --used_slot; // if we schedule a non-trans instr into the trans slot, // there should always be some slot that is already used assert(used_slot >= 0); instr->dest()->set_chan(used_slot); } } for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown ; ++i) { AluReadportReservation readports_evaluator = m_readports_evaluator; if (readports_evaluator.schedule_trans_instruction(*instr, i)) { m_readports_evaluator = readports_evaluator; m_slots[4] = instr; instr->pin_sources_to_chan(); sfn_log << SfnLog::schedule << "T: " << *instr << "\n"; /* We added a vector op in the trans channel, so we have to * make sure the corresponding vector channel is used */ if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) m_slots[instr->dest_chan()] = new AluInstr(op0_nop, instr->dest_chan()); return true; } } return false; } int AluGroup::free_slots() const { int free_mask = 0; for(int i = 0; i < s_max_slots; ++i) { if (!m_slots[i]) free_mask |= 1 << i; } return free_mask; } class AluAllowSlotSwitch : public AluInstrVisitor { public: using AluInstrVisitor::visit; void visit(AluInstr *alu) { yes = (alu->alu_slots() == 1 || alu->has_alu_flag(alu_is_cayman_trans)); } bool yes{false}; }; bool AluGroup::add_vec_instructions(AluInstr *instr) { if (!update_indirect_access(instr)) return false; int param_src = -1; for (auto& s : instr->sources()) { auto is = s->as_inline_const(); if (is) param_src = is->sel() - ALU_SRC_PARAM_BASE; } if (param_src >= 0) { if (m_param_used < 0) m_param_used = param_src; else if (m_param_used != param_src) return false; } if (m_has_lds_op && instr->has_lds_access()) return false; int preferred_chan = instr->dest_chan(); if (!m_slots[preferred_chan]) { if (instr->bank_swizzle() != alu_vec_unknown) { if (try_readport(instr, instr->bank_swizzle())) return true; } else { for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) { if (try_readport(instr, i)) return true; } } } else { auto dest = instr->dest(); if (dest && dest->pin() == pin_free) { for (auto u : dest->uses()) { AluAllowSlotSwitch swich_allowed; u->accept(swich_allowed); if (!swich_allowed.yes) return false; } int free_chan = 0; while (m_slots[free_chan] && free_chan < 4) free_chan++; if (!m_slots[free_chan] && free_chan < 4) { sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n"; dest->set_chan(free_chan); if (instr->bank_swizzle() != alu_vec_unknown) { if (try_readport(instr, instr->bank_swizzle())) return true; } else { for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) { if (try_readport(instr, i)) return true; } } } } } return false; } bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle) { int preferred_chan = instr->dest_chan(); AluReadportReservation readports_evaluator = m_readports_evaluator; if (readports_evaluator.schedule_vec_instruction(*instr, cycle)) { m_readports_evaluator = readports_evaluator; m_slots[preferred_chan] = instr; m_has_lds_op |= instr->has_lds_access(); sfn_log << SfnLog::schedule << "V: " << *instr << "\n"; auto dest = instr->dest(); if (dest && dest->pin() == pin_free) dest->set_pin(pin_chan); instr->pin_sources_to_chan(); return true; } return false; } bool AluGroup::update_indirect_access(AluInstr *instr) { auto [indirect_addr, for_src, is_index ] = instr->indirect_addr(); if (indirect_addr) { if (!m_addr_used) { m_addr_used = indirect_addr; m_addr_for_src = for_src; m_addr_is_index = is_index; } else if (!indirect_addr->equal_to(*m_addr_used)) { return false; } } return true; } void AluGroup::accept(ConstInstrVisitor& visitor) const { visitor.visit(*this); } void AluGroup::accept(InstrVisitor& visitor) { visitor.visit(this); } void AluGroup::set_scheduled() { for (int i = 0; i < s_max_slots; ++i) { if (m_slots[i]) m_slots[i]->set_scheduled(); } } void AluGroup::fix_last_flag() { bool last_seen = false; for (int i = s_max_slots - 1; i >= 0; --i) { if (m_slots[i]) { if (!last_seen) { m_slots[i]->set_alu_flag(alu_last_instr); last_seen = true; } else { m_slots[i]->reset_alu_flag(alu_last_instr); } } } } bool AluGroup::is_equal_to(const AluGroup& other) const { for (int i = 0; i < s_max_slots; ++i) { if (!other.m_slots[i]) { if (!m_slots[i]) continue; else return false; } if (m_slots[i]) { if (!other.m_slots[i]) return false; else if (!m_slots[i]->is_equal_to(*other.m_slots[i])) return false; } } return true; } bool AluGroup::has_lds_group_end() const { for (int i = 0; i < s_max_slots; ++i) { if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end)) return true; } return false; } bool AluGroup::do_ready() const { for (int i = 0; i < s_max_slots; ++i) { if (m_slots[i] && !m_slots[i]->ready()) return false; } return true; } void AluGroup::forward_set_blockid(int id, int index) { for (int i = 0; i < s_max_slots; ++i) { if (m_slots[i]) { m_slots[i]->set_blockid(id, index); } } } uint32_t AluGroup::slots() const { uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1; for (int i = 0; i < s_max_slots; ++i) { if (m_slots[i]) ++result; } if (m_addr_used) { ++result; if (m_addr_is_index) ++result; } return result; } void AluGroup::do_print(std::ostream& os) const { const char slotname[] = "xyzwt"; os << "ALU_GROUP_BEGIN\n"; for (int i = 0; i < s_max_slots; ++i) { if (m_slots[i]) { for (int j = 0; j < 2 * m_nesting_depth + 4; ++j) os << ' '; os << slotname[i] << ": "; m_slots[i]->print(os); os << "\n"; } } for (int i = 0; i < 2 * m_nesting_depth + 2; ++i) os << ' '; os << "ALU_GROUP_END"; } AluInstr::SrcValues AluGroup::get_kconsts() const { AluInstr::SrcValues result; for (int i = 0; i < s_max_slots; ++i) { if (m_slots[i]) { for (auto s : m_slots[i]->sources()) if (s->as_uniform()) result.push_back(s); } } return result; } void AluGroup::set_chipclass(r600_chip_class chip_class) { s_chip_class = chip_class; s_max_slots = chip_class == ISA_CC_CAYMAN ? 4 : 5; } int AluGroup::s_max_slots = 5; r600_chip_class AluGroup::s_chip_class = ISA_CC_EVERGREEN; }