1bf215546Sopenharmony_ci/* -*- mesa-c++ -*- 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright (c) 2022 Collabora LTD 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Author: Gert Wollny <gert.wollny@collabora.com> 6bf215546Sopenharmony_ci * 7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 8bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 9bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 10bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 11bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 12bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 15bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 16bf215546Sopenharmony_ci * Software. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 22bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "sfn_scheduler.h" 28bf215546Sopenharmony_ci#include "sfn_instr_alugroup.h" 29bf215546Sopenharmony_ci#include "sfn_instr_controlflow.h" 30bf215546Sopenharmony_ci#include "sfn_instr_export.h" 31bf215546Sopenharmony_ci#include "sfn_instr_fetch.h" 32bf215546Sopenharmony_ci#include "sfn_instr_mem.h" 33bf215546Sopenharmony_ci#include "sfn_instr_lds.h" 34bf215546Sopenharmony_ci#include "sfn_instr_tex.h" 35bf215546Sopenharmony_ci#include "sfn_debug.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci#include <algorithm> 38bf215546Sopenharmony_ci#include <sstream> 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_cinamespace r600 { 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ciclass CollectInstructions : public InstrVisitor { 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_cipublic: 45bf215546Sopenharmony_ci CollectInstructions(ValueFactory& vf): 46bf215546Sopenharmony_ci m_value_factory(vf) {} 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci void visit(AluInstr *instr) override { 49bf215546Sopenharmony_ci if (instr->has_alu_flag(alu_is_trans)) 50bf215546Sopenharmony_ci alu_trans.push_back(instr); 51bf215546Sopenharmony_ci else { 52bf215546Sopenharmony_ci if (instr->alu_slots() == 1) 53bf215546Sopenharmony_ci alu_vec.push_back(instr); 54bf215546Sopenharmony_ci else 55bf215546Sopenharmony_ci alu_groups.push_back(instr->split(m_value_factory)); 56bf215546Sopenharmony_ci } 57bf215546Sopenharmony_ci } 58bf215546Sopenharmony_ci void visit(AluGroup *instr) override { 59bf215546Sopenharmony_ci alu_groups.push_back(instr); 60bf215546Sopenharmony_ci } 61bf215546Sopenharmony_ci void visit(TexInstr *instr) override { 62bf215546Sopenharmony_ci tex.push_back(instr); 63bf215546Sopenharmony_ci } 64bf215546Sopenharmony_ci void visit(ExportInstr *instr) override { 65bf215546Sopenharmony_ci exports.push_back(instr); 66bf215546Sopenharmony_ci } 67bf215546Sopenharmony_ci void visit(FetchInstr *instr) override { 68bf215546Sopenharmony_ci fetches.push_back(instr); 69bf215546Sopenharmony_ci } 70bf215546Sopenharmony_ci void visit(Block *instr) override { 71bf215546Sopenharmony_ci for (auto& i: *instr) 72bf215546Sopenharmony_ci i->accept(*this); 73bf215546Sopenharmony_ci } 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci void visit(ControlFlowInstr *instr) override { 76bf215546Sopenharmony_ci assert(!m_cf_instr); 77bf215546Sopenharmony_ci m_cf_instr = instr; 78bf215546Sopenharmony_ci } 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci void visit(IfInstr *instr) override { 81bf215546Sopenharmony_ci assert(!m_cf_instr); 82bf215546Sopenharmony_ci m_cf_instr = instr; 83bf215546Sopenharmony_ci } 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci void visit(EmitVertexInstr *instr) override { 86bf215546Sopenharmony_ci assert(!m_cf_instr); 87bf215546Sopenharmony_ci m_cf_instr = instr; 88bf215546Sopenharmony_ci } 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci void visit(ScratchIOInstr *instr) override { 91bf215546Sopenharmony_ci mem_write_instr.push_back(instr); 92bf215546Sopenharmony_ci } 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci void visit(StreamOutInstr *instr) override { 95bf215546Sopenharmony_ci mem_write_instr.push_back(instr); 96bf215546Sopenharmony_ci } 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci void visit(MemRingOutInstr *instr) override { 99bf215546Sopenharmony_ci mem_ring_writes.push_back(instr); 100bf215546Sopenharmony_ci } 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci void visit(GDSInstr *instr) override { 103bf215546Sopenharmony_ci gds_op.push_back(instr); 104bf215546Sopenharmony_ci } 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci void visit(WriteTFInstr *instr) override { 107bf215546Sopenharmony_ci write_tf.push_back(instr); 108bf215546Sopenharmony_ci } 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci void visit(LDSReadInstr *instr) override { 111bf215546Sopenharmony_ci std::vector<AluInstr*> buffer; 112bf215546Sopenharmony_ci m_last_lds_instr = instr->split(buffer, m_last_lds_instr); 113bf215546Sopenharmony_ci for (auto& i: buffer) { 114bf215546Sopenharmony_ci i->accept(*this); 115bf215546Sopenharmony_ci } 116bf215546Sopenharmony_ci } 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci void visit(LDSAtomicInstr *instr) override { 119bf215546Sopenharmony_ci std::vector<AluInstr*> buffer; 120bf215546Sopenharmony_ci m_last_lds_instr = instr->split(buffer, m_last_lds_instr); 121bf215546Sopenharmony_ci for (auto& i: buffer) { 122bf215546Sopenharmony_ci i->accept(*this); 123bf215546Sopenharmony_ci } 124bf215546Sopenharmony_ci } 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci void visit(RatInstr *instr) override { 127bf215546Sopenharmony_ci rat_instr.push_back(instr); 128bf215546Sopenharmony_ci } 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci std::list<AluInstr *> alu_trans; 132bf215546Sopenharmony_ci std::list<AluInstr *> alu_vec; 133bf215546Sopenharmony_ci std::list<TexInstr *> tex; 134bf215546Sopenharmony_ci std::list<AluGroup *> alu_groups; 135bf215546Sopenharmony_ci std::list<ExportInstr *> exports; 136bf215546Sopenharmony_ci std::list<FetchInstr *> fetches; 137bf215546Sopenharmony_ci std::list<WriteOutInstr *> mem_write_instr; 138bf215546Sopenharmony_ci std::list<MemRingOutInstr *> mem_ring_writes; 139bf215546Sopenharmony_ci std::list<GDSInstr *> gds_op; 140bf215546Sopenharmony_ci std::list<WriteTFInstr *> write_tf; 141bf215546Sopenharmony_ci std::list<RatInstr *> rat_instr; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci Instr *m_cf_instr{nullptr}; 144bf215546Sopenharmony_ci ValueFactory& m_value_factory; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci AluInstr *m_last_lds_instr{nullptr}; 147bf215546Sopenharmony_ci}; 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ciclass BlockSheduler { 150bf215546Sopenharmony_cipublic: 151bf215546Sopenharmony_ci BlockSheduler(r600_chip_class chip_class); 152bf215546Sopenharmony_ci void run(Shader *shader); 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci void finalize(); 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ciprivate: 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf); 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ci bool collect_ready(CollectInstructions &available); 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci template <typename T> 163bf215546Sopenharmony_ci bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci bool schedule_tex(Shader::ShaderBlocks& out_blocks); 168bf215546Sopenharmony_ci bool schedule_vtx(Shader::ShaderBlocks& out_blocks); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci template <typename I> 171bf215546Sopenharmony_ci bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci template <typename I> 174bf215546Sopenharmony_ci bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list); 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci bool schedule_alu(Shader::ShaderBlocks& out_blocks); 177bf215546Sopenharmony_ci void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type); 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci bool schedule_alu_to_group_vec(AluGroup *group); 180bf215546Sopenharmony_ci bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist); 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list); 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci template <typename I> 185bf215546Sopenharmony_ci bool schedule(std::list<I *>& ready_list); 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci template <typename I> 188bf215546Sopenharmony_ci bool schedule_block(std::list<I *>& ready_list); 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci std::list<AluInstr *> alu_vec_ready; 191bf215546Sopenharmony_ci std::list<AluInstr *> alu_trans_ready; 192bf215546Sopenharmony_ci std::list<AluGroup *> alu_groups_ready; 193bf215546Sopenharmony_ci std::list<TexInstr *> tex_ready; 194bf215546Sopenharmony_ci std::list<ExportInstr *> exports_ready; 195bf215546Sopenharmony_ci std::list<FetchInstr *> fetches_ready; 196bf215546Sopenharmony_ci std::list<WriteOutInstr *> memops_ready; 197bf215546Sopenharmony_ci std::list<MemRingOutInstr *> mem_ring_writes_ready; 198bf215546Sopenharmony_ci std::list<GDSInstr *> gds_ready; 199bf215546Sopenharmony_ci std::list<WriteTFInstr *> write_tf_ready; 200bf215546Sopenharmony_ci std::list<RatInstr *> rat_instr_ready; 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci enum { 203bf215546Sopenharmony_ci sched_alu, 204bf215546Sopenharmony_ci sched_tex, 205bf215546Sopenharmony_ci sched_fetch, 206bf215546Sopenharmony_ci sched_free, 207bf215546Sopenharmony_ci sched_mem_ring, 208bf215546Sopenharmony_ci sched_gds, 209bf215546Sopenharmony_ci sched_write_tf, 210bf215546Sopenharmony_ci sched_rat, 211bf215546Sopenharmony_ci } current_shed; 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci ExportInstr *m_last_pos; 214bf215546Sopenharmony_ci ExportInstr *m_last_pixel; 215bf215546Sopenharmony_ci ExportInstr *m_last_param; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci Block *m_current_block; 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci int m_lds_addr_count{0}; 220bf215546Sopenharmony_ci int m_alu_groups_schduled{0}; 221bf215546Sopenharmony_ci r600_chip_class m_chip_class; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci}; 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ciShader *schedule(Shader *original) 226bf215546Sopenharmony_ci{ 227bf215546Sopenharmony_ci Block::set_chipclass(original->chip_class()); 228bf215546Sopenharmony_ci AluGroup::set_chipclass(original->chip_class()); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Original shader\n"; 231bf215546Sopenharmony_ci if (sfn_log.has_debug_flag(SfnLog::schedule)) { 232bf215546Sopenharmony_ci std::stringstream ss; 233bf215546Sopenharmony_ci original->print(ss); 234bf215546Sopenharmony_ci sfn_log << ss.str() << "\n\n"; 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci // TODO later it might be necessary to clone the shader 238bf215546Sopenharmony_ci // to be able to re-start scheduling 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci auto scheduled_shader = original; 241bf215546Sopenharmony_ci BlockSheduler s(original->chip_class()); 242bf215546Sopenharmony_ci s.run(scheduled_shader); 243bf215546Sopenharmony_ci s.finalize(); 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Scheduled shader\n"; 246bf215546Sopenharmony_ci if (sfn_log.has_debug_flag(SfnLog::schedule)) { 247bf215546Sopenharmony_ci std::stringstream ss; 248bf215546Sopenharmony_ci scheduled_shader->print(ss); 249bf215546Sopenharmony_ci sfn_log << ss.str() << "\n\n"; 250bf215546Sopenharmony_ci } 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci return scheduled_shader; 253bf215546Sopenharmony_ci} 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ciBlockSheduler::BlockSheduler(r600_chip_class chip_class): 256bf215546Sopenharmony_ci current_shed(sched_alu), 257bf215546Sopenharmony_ci m_last_pos(nullptr), 258bf215546Sopenharmony_ci m_last_pixel(nullptr), 259bf215546Sopenharmony_ci m_last_param(nullptr), 260bf215546Sopenharmony_ci m_current_block(nullptr), 261bf215546Sopenharmony_ci m_chip_class(chip_class) 262bf215546Sopenharmony_ci{ 263bf215546Sopenharmony_ci} 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_civoid BlockSheduler::run( Shader *shader) 266bf215546Sopenharmony_ci{ 267bf215546Sopenharmony_ci Shader::ShaderBlocks scheduled_blocks; 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci for (auto& block : shader->func()) { 270bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Process block " << block->id() <<"\n"; 271bf215546Sopenharmony_ci if (sfn_log.has_debug_flag(SfnLog::schedule)) { 272bf215546Sopenharmony_ci std::stringstream ss; 273bf215546Sopenharmony_ci block->print(ss); 274bf215546Sopenharmony_ci sfn_log << ss.str() << "\n"; 275bf215546Sopenharmony_ci } 276bf215546Sopenharmony_ci schedule_block(*block, scheduled_blocks, shader->value_factory()); 277bf215546Sopenharmony_ci } 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci shader->reset_function(scheduled_blocks); 280bf215546Sopenharmony_ci} 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_civoid BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf) 283bf215546Sopenharmony_ci{ 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci assert(in_block.id() >= 0); 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci current_shed = sched_fetch; 289bf215546Sopenharmony_ci auto last_shed = sched_fetch; 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci CollectInstructions cir(vf); 292bf215546Sopenharmony_ci in_block.accept(cir); 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci bool have_instr = collect_ready(cir); 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci m_current_block = new Block(in_block.nesting_depth(), in_block.id()); 297bf215546Sopenharmony_ci assert(m_current_block->id() >= 0); 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci while (have_instr) { 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Have ready instructions\n"; 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci if (alu_vec_ready.size()) 304bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " ALU V:" << alu_vec_ready.size() << "\n"; 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci if (alu_trans_ready.size()) 307bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " ALU T:" << alu_trans_ready.size() << "\n"; 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci if (alu_groups_ready.size()) 310bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " ALU G:" << alu_groups_ready.size() << "\n"; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci if (exports_ready.size()) 313bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " EXP:" << exports_ready.size() 314bf215546Sopenharmony_ci << "\n"; 315bf215546Sopenharmony_ci if (tex_ready.size()) 316bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " TEX:" << tex_ready.size() 317bf215546Sopenharmony_ci << "\n"; 318bf215546Sopenharmony_ci if (fetches_ready.size()) 319bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " FETCH:" << fetches_ready.size() 320bf215546Sopenharmony_ci << "\n"; 321bf215546Sopenharmony_ci if (mem_ring_writes_ready.size()) 322bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " MEM_RING:" << mem_ring_writes_ready.size() 323bf215546Sopenharmony_ci << "\n"; 324bf215546Sopenharmony_ci if (memops_ready.size()) 325bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " MEM_OPS:" << mem_ring_writes_ready.size() 326bf215546Sopenharmony_ci << "\n"; 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci if (!m_current_block->lds_group_active()) { 329bf215546Sopenharmony_ci if (last_shed != sched_free && memops_ready.size() > 8) 330bf215546Sopenharmony_ci current_shed = sched_free; 331bf215546Sopenharmony_ci else if (mem_ring_writes_ready.size() > 15) 332bf215546Sopenharmony_ci current_shed = sched_mem_ring; 333bf215546Sopenharmony_ci else if (rat_instr_ready.size() > 3) 334bf215546Sopenharmony_ci current_shed = sched_rat; 335bf215546Sopenharmony_ci else if (tex_ready.size() > 3) 336bf215546Sopenharmony_ci current_shed = sched_tex; 337bf215546Sopenharmony_ci } 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci switch (current_shed) { 340bf215546Sopenharmony_ci case sched_alu: 341bf215546Sopenharmony_ci if (!schedule_alu(out_blocks)) { 342bf215546Sopenharmony_ci assert(!m_current_block->lds_group_active()); 343bf215546Sopenharmony_ci current_shed = sched_tex; 344bf215546Sopenharmony_ci continue; 345bf215546Sopenharmony_ci } 346bf215546Sopenharmony_ci last_shed = current_shed; 347bf215546Sopenharmony_ci break; 348bf215546Sopenharmony_ci case sched_tex: 349bf215546Sopenharmony_ci if (tex_ready.empty() || !schedule_tex(out_blocks)) { 350bf215546Sopenharmony_ci current_shed = sched_fetch; 351bf215546Sopenharmony_ci continue; 352bf215546Sopenharmony_ci } 353bf215546Sopenharmony_ci last_shed = current_shed; 354bf215546Sopenharmony_ci break; 355bf215546Sopenharmony_ci case sched_fetch: 356bf215546Sopenharmony_ci if (!fetches_ready.empty()) { 357bf215546Sopenharmony_ci schedule_vtx(out_blocks); 358bf215546Sopenharmony_ci last_shed = current_shed; 359bf215546Sopenharmony_ci } 360bf215546Sopenharmony_ci current_shed = sched_gds; 361bf215546Sopenharmony_ci continue; 362bf215546Sopenharmony_ci case sched_gds: 363bf215546Sopenharmony_ci if (!gds_ready.empty()) { 364bf215546Sopenharmony_ci schedule_gds(out_blocks, gds_ready); 365bf215546Sopenharmony_ci last_shed = current_shed; 366bf215546Sopenharmony_ci } 367bf215546Sopenharmony_ci current_shed = sched_mem_ring; 368bf215546Sopenharmony_ci continue; 369bf215546Sopenharmony_ci case sched_mem_ring: 370bf215546Sopenharmony_ci if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) { 371bf215546Sopenharmony_ci current_shed = sched_write_tf; 372bf215546Sopenharmony_ci continue; 373bf215546Sopenharmony_ci } 374bf215546Sopenharmony_ci last_shed = current_shed; 375bf215546Sopenharmony_ci break; 376bf215546Sopenharmony_ci case sched_write_tf: 377bf215546Sopenharmony_ci if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) { 378bf215546Sopenharmony_ci current_shed = sched_rat; 379bf215546Sopenharmony_ci continue; 380bf215546Sopenharmony_ci } 381bf215546Sopenharmony_ci last_shed = current_shed; 382bf215546Sopenharmony_ci break; 383bf215546Sopenharmony_ci case sched_rat: 384bf215546Sopenharmony_ci if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) { 385bf215546Sopenharmony_ci current_shed = sched_free; 386bf215546Sopenharmony_ci continue; 387bf215546Sopenharmony_ci } 388bf215546Sopenharmony_ci last_shed = current_shed; 389bf215546Sopenharmony_ci break; 390bf215546Sopenharmony_ci case sched_free: 391bf215546Sopenharmony_ci if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) { 392bf215546Sopenharmony_ci current_shed = sched_alu; 393bf215546Sopenharmony_ci break; 394bf215546Sopenharmony_ci } 395bf215546Sopenharmony_ci last_shed = current_shed; 396bf215546Sopenharmony_ci } 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci have_instr = collect_ready(cir); 399bf215546Sopenharmony_ci } 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci /* Emit exports always at end of a block */ 402bf215546Sopenharmony_ci while (collect_ready_type(exports_ready, cir.exports)) 403bf215546Sopenharmony_ci schedule_exports(out_blocks, exports_ready); 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci bool fail = false; 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci if (!cir.alu_groups.empty()) { 408bf215546Sopenharmony_ci std::cerr << "Unscheduled ALU groups:\n"; 409bf215546Sopenharmony_ci for (auto& a : cir.alu_groups) { 410bf215546Sopenharmony_ci std::cerr << " " << *a << "\n"; 411bf215546Sopenharmony_ci } 412bf215546Sopenharmony_ci fail = true; 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci if (!cir.alu_vec.empty()){ 416bf215546Sopenharmony_ci std::cerr << "Unscheduled ALU vec ops:\n"; 417bf215546Sopenharmony_ci for (auto& a : cir.alu_vec) { 418bf215546Sopenharmony_ci std::cerr << " " << *a << "\n"; 419bf215546Sopenharmony_ci } 420bf215546Sopenharmony_ci fail = true; 421bf215546Sopenharmony_ci } 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci if (!cir.alu_trans.empty()){ 424bf215546Sopenharmony_ci std::cerr << "Unscheduled ALU trans ops:\n"; 425bf215546Sopenharmony_ci for (auto& a : cir.alu_trans) { 426bf215546Sopenharmony_ci std::cerr << " " << *a << "\n"; 427bf215546Sopenharmony_ci } 428bf215546Sopenharmony_ci fail = true; 429bf215546Sopenharmony_ci } 430bf215546Sopenharmony_ci if (!cir.mem_write_instr.empty()){ 431bf215546Sopenharmony_ci std::cerr << "Unscheduled MEM ops:\n"; 432bf215546Sopenharmony_ci for (auto& a : cir.mem_write_instr) { 433bf215546Sopenharmony_ci std::cerr << " " << *a << "\n"; 434bf215546Sopenharmony_ci } 435bf215546Sopenharmony_ci fail = true; 436bf215546Sopenharmony_ci } 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci if (!cir.fetches.empty()){ 439bf215546Sopenharmony_ci std::cerr << "Unscheduled Fetch ops:\n"; 440bf215546Sopenharmony_ci for (auto& a : cir.fetches) { 441bf215546Sopenharmony_ci std::cerr << " " << *a << "\n"; 442bf215546Sopenharmony_ci } 443bf215546Sopenharmony_ci fail = true; 444bf215546Sopenharmony_ci } 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci if (!cir.tex.empty()){ 447bf215546Sopenharmony_ci std::cerr << "Unscheduled Tex ops:\n"; 448bf215546Sopenharmony_ci for (auto& a : cir.tex) { 449bf215546Sopenharmony_ci std::cerr << " " << *a << "\n"; 450bf215546Sopenharmony_ci } 451bf215546Sopenharmony_ci fail = true; 452bf215546Sopenharmony_ci } 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci assert(cir.tex.empty()); 455bf215546Sopenharmony_ci assert(cir.exports.empty()); 456bf215546Sopenharmony_ci assert(cir.fetches.empty()); 457bf215546Sopenharmony_ci assert(cir.alu_vec.empty()); 458bf215546Sopenharmony_ci assert(cir.mem_write_instr.empty()); 459bf215546Sopenharmony_ci assert(cir.mem_ring_writes.empty()); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci assert (!fail); 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci if (cir.m_cf_instr) { 464bf215546Sopenharmony_ci // Assert that if condition is ready 465bf215546Sopenharmony_ci m_current_block->push_back(cir.m_cf_instr); 466bf215546Sopenharmony_ci cir.m_cf_instr->set_scheduled(); 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci out_blocks.push_back(m_current_block); 470bf215546Sopenharmony_ci} 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_civoid BlockSheduler::finalize() 473bf215546Sopenharmony_ci{ 474bf215546Sopenharmony_ci if (m_last_pos) 475bf215546Sopenharmony_ci m_last_pos->set_is_last_export(true); 476bf215546Sopenharmony_ci if (m_last_pixel) 477bf215546Sopenharmony_ci m_last_pixel->set_is_last_export(true); 478bf215546Sopenharmony_ci if (m_last_param) 479bf215546Sopenharmony_ci m_last_param->set_is_last_export(true); 480bf215546Sopenharmony_ci} 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_cibool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks) 483bf215546Sopenharmony_ci{ 484bf215546Sopenharmony_ci bool success = false; 485bf215546Sopenharmony_ci AluGroup *group = nullptr; 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty(); 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci bool has_lds_ready = !alu_vec_ready.empty() && 490bf215546Sopenharmony_ci (*alu_vec_ready.begin())->has_lds_access(); 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci /* If we have ready ALU instructions we have to start a new ALU block */ 493bf215546Sopenharmony_ci if (has_alu_ready || !alu_groups_ready.empty()) { 494bf215546Sopenharmony_ci if (m_current_block->type() != Block::alu) { 495bf215546Sopenharmony_ci start_new_block(out_blocks, Block::alu); 496bf215546Sopenharmony_ci m_alu_groups_schduled = 0; 497bf215546Sopenharmony_ci } 498bf215546Sopenharmony_ci } 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci /* Schedule groups first. unless we have a pending LDS instuction 501bf215546Sopenharmony_ci * We don't want the LDS instructions to be too far apart because the 502bf215546Sopenharmony_ci * fetch + read from queue has to be in the same ALU CF block */ 503bf215546Sopenharmony_ci if (!alu_groups_ready.empty() && !has_lds_ready) { 504bf215546Sopenharmony_ci group = *alu_groups_ready.begin(); 505bf215546Sopenharmony_ci if (!m_current_block->try_reserve_kcache(*group)) { 506bf215546Sopenharmony_ci start_new_block(out_blocks, Block::alu); 507bf215546Sopenharmony_ci m_current_block->set_instr_flag(Instr::force_cf); 508bf215546Sopenharmony_ci } 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci if (!m_current_block->try_reserve_kcache(*group)) 511bf215546Sopenharmony_ci unreachable("Scheduling a group in a new block should always succeed"); 512bf215546Sopenharmony_ci alu_groups_ready.erase(alu_groups_ready.begin()); 513bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Schedule ALU group\n"; 514bf215546Sopenharmony_ci success = true; 515bf215546Sopenharmony_ci } else if (has_alu_ready) { 516bf215546Sopenharmony_ci group = new AluGroup(); 517bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "START new ALU group\n"; 518bf215546Sopenharmony_ci } else { 519bf215546Sopenharmony_ci return false; 520bf215546Sopenharmony_ci } 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_ci assert(group); 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci int free_slots = group->free_slots(); 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci while (free_slots && has_alu_ready) { 527bf215546Sopenharmony_ci if (!alu_vec_ready.empty()) 528bf215546Sopenharmony_ci success |= schedule_alu_to_group_vec(group); 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_ci /* Apparently one can't schedule a t-slot if there is already 531bf215546Sopenharmony_ci * and LDS instruction scheduled. 532bf215546Sopenharmony_ci * TODO: check whether this is only relevant for actual LDS instructions 533bf215546Sopenharmony_ci * or also for instructions that read from the LDS return value queue */ 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci if (free_slots & 0x10 && !has_lds_ready) { 536bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n"; 537bf215546Sopenharmony_ci if (!alu_trans_ready.empty()) 538bf215546Sopenharmony_ci success |= schedule_alu_to_group_trans(group, alu_trans_ready); 539bf215546Sopenharmony_ci if (!alu_vec_ready.empty()) 540bf215546Sopenharmony_ci success |= schedule_alu_to_group_trans(group, alu_vec_ready); 541bf215546Sopenharmony_ci } 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci if (success) { 544bf215546Sopenharmony_ci ++m_alu_groups_schduled; 545bf215546Sopenharmony_ci break; 546bf215546Sopenharmony_ci } else if (m_current_block->kcache_reservation_failed()) { 547bf215546Sopenharmony_ci // LDS read groups should not lead to impossible 548bf215546Sopenharmony_ci // kcache constellations 549bf215546Sopenharmony_ci assert(!m_current_block->lds_group_active()); 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci // kcache reservation failed, so we have to start a new CF 552bf215546Sopenharmony_ci start_new_block(out_blocks, Block::alu); 553bf215546Sopenharmony_ci m_current_block->set_instr_flag(Instr::force_cf); 554bf215546Sopenharmony_ci } else { 555bf215546Sopenharmony_ci return false; 556bf215546Sopenharmony_ci } 557bf215546Sopenharmony_ci } 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Finalize ALU group\n"; 560bf215546Sopenharmony_ci group->set_scheduled(); 561bf215546Sopenharmony_ci group->fix_last_flag(); 562bf215546Sopenharmony_ci group->set_nesting_depth(m_current_block->nesting_depth()); 563bf215546Sopenharmony_ci m_current_block->push_back(group); 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci if (group->has_lds_group_start()) 566bf215546Sopenharmony_ci m_current_block->lds_group_start(*group->begin()); 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci if (group->has_lds_group_end()) 569bf215546Sopenharmony_ci m_current_block->lds_group_end(); 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci if (group->has_kill_op()) { 572bf215546Sopenharmony_ci assert(!group->has_lds_group_start()); 573bf215546Sopenharmony_ci start_new_block(out_blocks, Block::alu); 574bf215546Sopenharmony_ci m_current_block->set_instr_flag(Instr::force_cf); 575bf215546Sopenharmony_ci } 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci return success; 579bf215546Sopenharmony_ci} 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_cibool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks) 582bf215546Sopenharmony_ci{ 583bf215546Sopenharmony_ci if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() == 0) { 584bf215546Sopenharmony_ci start_new_block(out_blocks, Block::tex); 585bf215546Sopenharmony_ci m_current_block->set_instr_flag(Instr::force_cf); 586bf215546Sopenharmony_ci } 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) { 590bf215546Sopenharmony_ci auto ii = tex_ready.begin(); 591bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size()) 594bf215546Sopenharmony_ci start_new_block(out_blocks, Block::tex); 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci for (auto prep : (*ii)->prepare_instr()) { 597bf215546Sopenharmony_ci prep->set_scheduled(); 598bf215546Sopenharmony_ci m_current_block->push_back(prep); 599bf215546Sopenharmony_ci } 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci (*ii)->set_scheduled(); 602bf215546Sopenharmony_ci m_current_block->push_back(*ii); 603bf215546Sopenharmony_ci tex_ready.erase(ii); 604bf215546Sopenharmony_ci return true; 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci return false; 607bf215546Sopenharmony_ci} 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_cibool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks) 610bf215546Sopenharmony_ci{ 611bf215546Sopenharmony_ci if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) { 612bf215546Sopenharmony_ci start_new_block(out_blocks, Block::vtx); 613bf215546Sopenharmony_ci m_current_block->set_instr_flag(Instr::force_cf); 614bf215546Sopenharmony_ci } 615bf215546Sopenharmony_ci return schedule_block(fetches_ready); 616bf215546Sopenharmony_ci} 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_citemplate <typename I> 619bf215546Sopenharmony_cibool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list) 620bf215546Sopenharmony_ci{ 621bf215546Sopenharmony_ci bool was_full = m_current_block->remaining_slots() == 0; 622bf215546Sopenharmony_ci if (m_current_block->type() != Block::gds || was_full) { 623bf215546Sopenharmony_ci start_new_block(out_blocks, Block::gds); 624bf215546Sopenharmony_ci if (was_full) 625bf215546Sopenharmony_ci m_current_block->set_instr_flag(Instr::force_cf); 626bf215546Sopenharmony_ci } 627bf215546Sopenharmony_ci return schedule_block(ready_list); 628bf215546Sopenharmony_ci} 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_civoid BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type) 632bf215546Sopenharmony_ci{ 633bf215546Sopenharmony_ci if (!m_current_block->empty()) { 634bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Start new block\n"; 635bf215546Sopenharmony_ci assert(!m_current_block->lds_group_active()); 636bf215546Sopenharmony_ci out_blocks.push_back(m_current_block); 637bf215546Sopenharmony_ci m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id()); 638bf215546Sopenharmony_ci } 639bf215546Sopenharmony_ci m_current_block->set_type(type); 640bf215546Sopenharmony_ci} 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_citemplate <typename I> 643bf215546Sopenharmony_cibool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list) 644bf215546Sopenharmony_ci{ 645bf215546Sopenharmony_ci if (ready_list.empty()) 646bf215546Sopenharmony_ci return false; 647bf215546Sopenharmony_ci if (m_current_block->type() != Block::cf) 648bf215546Sopenharmony_ci start_new_block(out_blocks, Block::cf); 649bf215546Sopenharmony_ci return schedule(ready_list); 650bf215546Sopenharmony_ci} 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_cibool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group) 654bf215546Sopenharmony_ci{ 655bf215546Sopenharmony_ci assert(group); 656bf215546Sopenharmony_ci assert(!alu_vec_ready.empty()); 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci bool success = false; 659bf215546Sopenharmony_ci auto i = alu_vec_ready.begin(); 660bf215546Sopenharmony_ci auto e = alu_vec_ready.end(); 661bf215546Sopenharmony_ci while (i != e) { 662bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Try schedule to vec " << **i; 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci if (!m_current_block->try_reserve_kcache(**i)) { 665bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " failed (kcache)\n"; 666bf215546Sopenharmony_ci ++i; 667bf215546Sopenharmony_ci continue; 668bf215546Sopenharmony_ci } 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_ci if (group->add_vec_instructions(*i)) { 671bf215546Sopenharmony_ci auto old_i = i; 672bf215546Sopenharmony_ci ++i; 673bf215546Sopenharmony_ci if ((*old_i)->has_alu_flag(alu_is_lds)) { 674bf215546Sopenharmony_ci --m_lds_addr_count; 675bf215546Sopenharmony_ci } 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci alu_vec_ready.erase(old_i); 678bf215546Sopenharmony_ci success = true; 679bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " success\n"; 680bf215546Sopenharmony_ci } else { 681bf215546Sopenharmony_ci ++i; 682bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " failed\n"; 683bf215546Sopenharmony_ci } 684bf215546Sopenharmony_ci } 685bf215546Sopenharmony_ci return success; 686bf215546Sopenharmony_ci} 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_cibool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist) 689bf215546Sopenharmony_ci{ 690bf215546Sopenharmony_ci assert(group); 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci bool success = false; 693bf215546Sopenharmony_ci auto i = readylist.begin(); 694bf215546Sopenharmony_ci auto e = readylist.end(); 695bf215546Sopenharmony_ci while (i != e) { 696bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Try schedule to trans " << **i; 697bf215546Sopenharmony_ci if (!m_current_block->try_reserve_kcache(**i)) { 698bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " failed (kcache)\n"; 699bf215546Sopenharmony_ci ++i; 700bf215546Sopenharmony_ci continue; 701bf215546Sopenharmony_ci } 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci if (group->add_trans_instructions(*i)) { 704bf215546Sopenharmony_ci auto old_i = i; 705bf215546Sopenharmony_ci ++i; 706bf215546Sopenharmony_ci readylist.erase(old_i); 707bf215546Sopenharmony_ci success = true; 708bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " sucess\n"; 709bf215546Sopenharmony_ci break; 710bf215546Sopenharmony_ci } else { 711bf215546Sopenharmony_ci ++i; 712bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << " failed\n"; 713bf215546Sopenharmony_ci } 714bf215546Sopenharmony_ci } 715bf215546Sopenharmony_ci return success; 716bf215546Sopenharmony_ci} 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_citemplate <typename I> 719bf215546Sopenharmony_cibool BlockSheduler::schedule(std::list<I *>& ready_list) 720bf215546Sopenharmony_ci{ 721bf215546Sopenharmony_ci if (!ready_list.empty() && m_current_block->remaining_slots() > 0) { 722bf215546Sopenharmony_ci auto ii = ready_list.begin(); 723bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; 724bf215546Sopenharmony_ci (*ii)->set_scheduled(); 725bf215546Sopenharmony_ci m_current_block->push_back(*ii); 726bf215546Sopenharmony_ci ready_list.erase(ii); 727bf215546Sopenharmony_ci return true; 728bf215546Sopenharmony_ci } 729bf215546Sopenharmony_ci return false; 730bf215546Sopenharmony_ci} 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_citemplate <typename I> 733bf215546Sopenharmony_cibool BlockSheduler::schedule_block(std::list<I *>& ready_list) 734bf215546Sopenharmony_ci{ 735bf215546Sopenharmony_ci bool success = false; 736bf215546Sopenharmony_ci while (!ready_list.empty() && m_current_block->remaining_slots() > 0) { 737bf215546Sopenharmony_ci auto ii = ready_list.begin(); 738bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Schedule: " << **ii << " " 739bf215546Sopenharmony_ci << m_current_block->remaining_slots() << "\n"; 740bf215546Sopenharmony_ci (*ii)->set_scheduled(); 741bf215546Sopenharmony_ci m_current_block->push_back(*ii); 742bf215546Sopenharmony_ci ready_list.erase(ii); 743bf215546Sopenharmony_ci success = true; 744bf215546Sopenharmony_ci } 745bf215546Sopenharmony_ci return success; 746bf215546Sopenharmony_ci} 747bf215546Sopenharmony_ci 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_cibool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list) 750bf215546Sopenharmony_ci{ 751bf215546Sopenharmony_ci if (m_current_block->type() != Block::cf) 752bf215546Sopenharmony_ci start_new_block(out_blocks, Block::cf); 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci if (!ready_list.empty()) { 755bf215546Sopenharmony_ci auto ii = ready_list.begin(); 756bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; 757bf215546Sopenharmony_ci (*ii)->set_scheduled(); 758bf215546Sopenharmony_ci m_current_block->push_back(*ii); 759bf215546Sopenharmony_ci switch ((*ii)->export_type()) { 760bf215546Sopenharmony_ci case ExportInstr::pos: m_last_pos = *ii; break; 761bf215546Sopenharmony_ci case ExportInstr::param: m_last_param = *ii; break; 762bf215546Sopenharmony_ci case ExportInstr::pixel: m_last_pixel = *ii; break; 763bf215546Sopenharmony_ci } 764bf215546Sopenharmony_ci (*ii)->set_is_last_export(false); 765bf215546Sopenharmony_ci ready_list.erase(ii); 766bf215546Sopenharmony_ci return true; 767bf215546Sopenharmony_ci } 768bf215546Sopenharmony_ci return false; 769bf215546Sopenharmony_ci} 770bf215546Sopenharmony_ci 771bf215546Sopenharmony_cibool BlockSheduler::collect_ready(CollectInstructions &available) 772bf215546Sopenharmony_ci{ 773bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "Ready instructions\n"; 774bf215546Sopenharmony_ci bool result = false; 775bf215546Sopenharmony_ci result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec); 776bf215546Sopenharmony_ci result |= collect_ready_type(alu_trans_ready, available.alu_trans); 777bf215546Sopenharmony_ci result |= collect_ready_type(alu_groups_ready, available.alu_groups); 778bf215546Sopenharmony_ci result |= collect_ready_type(gds_ready, available.gds_op); 779bf215546Sopenharmony_ci result |= collect_ready_type(tex_ready, available.tex); 780bf215546Sopenharmony_ci result |= collect_ready_type(fetches_ready, available.fetches); 781bf215546Sopenharmony_ci result |= collect_ready_type(memops_ready, available.mem_write_instr); 782bf215546Sopenharmony_ci result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes); 783bf215546Sopenharmony_ci result |= collect_ready_type(write_tf_ready, available.write_tf); 784bf215546Sopenharmony_ci result |= collect_ready_type(rat_instr_ready, available.rat_instr); 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "\n"; 787bf215546Sopenharmony_ci return result; 788bf215546Sopenharmony_ci} 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_cibool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available) 791bf215546Sopenharmony_ci{ 792bf215546Sopenharmony_ci auto i = available.begin(); 793bf215546Sopenharmony_ci auto e = available.end(); 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci for (auto alu : ready) { 796bf215546Sopenharmony_ci alu->add_priority(100 * alu->register_priority()); 797bf215546Sopenharmony_ci } 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_ci int max_check = 0; 800bf215546Sopenharmony_ci while (i != e && max_check++ < 32) { 801bf215546Sopenharmony_ci if (ready.size() < 32 && (*i)->ready()) { 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci int priority = 0; 804bf215546Sopenharmony_ci /* LDS fetches that use static offsets are usually ready ery fast, 805bf215546Sopenharmony_ci * so that they would get schedules early, and this leaves the problem 806bf215546Sopenharmony_ci * that we allocate too many registers with just constant values, 807bf215546Sopenharmony_ci * and this will make problems wih RA. So limit the number of LDS 808bf215546Sopenharmony_ci * address registers. 809bf215546Sopenharmony_ci */ 810bf215546Sopenharmony_ci if ((*i)->has_alu_flag(alu_lds_address)) { 811bf215546Sopenharmony_ci if (m_lds_addr_count > 64) { 812bf215546Sopenharmony_ci ++i; 813bf215546Sopenharmony_ci continue; 814bf215546Sopenharmony_ci } else { 815bf215546Sopenharmony_ci ++m_lds_addr_count; 816bf215546Sopenharmony_ci } 817bf215546Sopenharmony_ci } 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci /* LDS instructions are scheduled with high priority. 820bf215546Sopenharmony_ci * instractions that can go into the t slot and don't have 821bf215546Sopenharmony_ci * indirect access are put in last, so that they don't block 822bf215546Sopenharmony_ci * vec-only instructions when scheduling to the vector slots 823bf215546Sopenharmony_ci * for everything else we look at the register use */ 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci if ((*i)->has_lds_access()) 826bf215546Sopenharmony_ci priority = 100000; 827bf215546Sopenharmony_ci else if (AluGroup::has_t()) { 828bf215546Sopenharmony_ci auto opinfo = alu_ops.find((*i)->opcode()); 829bf215546Sopenharmony_ci assert(opinfo != alu_ops.end()); 830bf215546Sopenharmony_ci if (opinfo->second.can_channel(AluOp::t, m_chip_class) && 831bf215546Sopenharmony_ci !std::get<0>((*i)->indirect_addr())) 832bf215546Sopenharmony_ci priority = -1; 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci priority += 100 * (*i)->register_priority(); 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci (*i)->add_priority(priority); 838bf215546Sopenharmony_ci ready.push_back(*i); 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci auto old_i = i; 841bf215546Sopenharmony_ci ++i; 842bf215546Sopenharmony_ci available.erase(old_i); 843bf215546Sopenharmony_ci } else 844bf215546Sopenharmony_ci ++i; 845bf215546Sopenharmony_ci } 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_ci for (auto& i: ready) 848bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "V: " << *i << "\n"; 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci ready.sort([](const AluInstr *lhs, const AluInstr *rhs) { 851bf215546Sopenharmony_ci return lhs->priority() > rhs->priority();}); 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci for (auto& i: ready) 854bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << "V (S): " << *i << "\n"; 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci return !ready.empty(); 857bf215546Sopenharmony_ci} 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_citemplate <typename T> 860bf215546Sopenharmony_cistruct type_char { 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci}; 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci 865bf215546Sopenharmony_citemplate <> 866bf215546Sopenharmony_cistruct type_char<AluInstr> { 867bf215546Sopenharmony_ci static constexpr const char value = 'A'; 868bf215546Sopenharmony_ci}; 869bf215546Sopenharmony_ci 870bf215546Sopenharmony_citemplate <> 871bf215546Sopenharmony_cistruct type_char<AluGroup> { 872bf215546Sopenharmony_ci static constexpr const char value = 'G'; 873bf215546Sopenharmony_ci}; 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_citemplate <> 876bf215546Sopenharmony_cistruct type_char<ExportInstr> { 877bf215546Sopenharmony_ci static constexpr const char value = 'E'; 878bf215546Sopenharmony_ci}; 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_citemplate <> 881bf215546Sopenharmony_cistruct type_char<TexInstr> { 882bf215546Sopenharmony_ci static constexpr const char value = 'T'; 883bf215546Sopenharmony_ci}; 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_citemplate <> 886bf215546Sopenharmony_cistruct type_char<FetchInstr> { 887bf215546Sopenharmony_ci static constexpr const char value = 'F'; 888bf215546Sopenharmony_ci}; 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_citemplate <> 891bf215546Sopenharmony_cistruct type_char<WriteOutInstr> { 892bf215546Sopenharmony_ci static constexpr const char value = 'M'; 893bf215546Sopenharmony_ci}; 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_citemplate <> 896bf215546Sopenharmony_cistruct type_char<MemRingOutInstr> { 897bf215546Sopenharmony_ci static constexpr const char value = 'R'; 898bf215546Sopenharmony_ci}; 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_citemplate <> 901bf215546Sopenharmony_cistruct type_char<WriteTFInstr> { 902bf215546Sopenharmony_ci static constexpr const char value = 'X'; 903bf215546Sopenharmony_ci}; 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_citemplate <> 906bf215546Sopenharmony_cistruct type_char<GDSInstr> { 907bf215546Sopenharmony_ci static constexpr const char value = 'S'; 908bf215546Sopenharmony_ci}; 909bf215546Sopenharmony_ci 910bf215546Sopenharmony_citemplate <> 911bf215546Sopenharmony_cistruct type_char<RatInstr> { 912bf215546Sopenharmony_ci static constexpr const char value = 'I'; 913bf215546Sopenharmony_ci}; 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_citemplate <typename T> 917bf215546Sopenharmony_cibool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available) 918bf215546Sopenharmony_ci{ 919bf215546Sopenharmony_ci auto i = available.begin(); 920bf215546Sopenharmony_ci auto e = available.end(); 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci int lookahead = 16; 923bf215546Sopenharmony_ci while (i != e && ready.size() < 16 && lookahead-- > 0) { 924bf215546Sopenharmony_ci if ((*i)->ready()) { 925bf215546Sopenharmony_ci ready.push_back(*i); 926bf215546Sopenharmony_ci auto old_i = i; 927bf215546Sopenharmony_ci ++i; 928bf215546Sopenharmony_ci available.erase(old_i); 929bf215546Sopenharmony_ci } else 930bf215546Sopenharmony_ci ++i; 931bf215546Sopenharmony_ci } 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci for (auto& i: ready) 934bf215546Sopenharmony_ci sfn_log << SfnLog::schedule << type_char<T>::value << "; " << *i << "\n"; 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci return !ready.empty(); 937bf215546Sopenharmony_ci} 938bf215546Sopenharmony_ci 939bf215546Sopenharmony_ci} 940