1/* -*- mesa-c++ -*- 2 * 3 * Copyright (c) 2022 Collabora LTD 4 * 5 * Author: Gert Wollny <gert.wollny@collabora.com> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * on the rights to use, copy, modify, merge, publish, distribute, sub 11 * license, and/or sell copies of the Software, and to permit persons to whom 12 * the Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 * USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27#include "sfn_scheduler.h" 28#include "sfn_instr_alugroup.h" 29#include "sfn_instr_controlflow.h" 30#include "sfn_instr_export.h" 31#include "sfn_instr_fetch.h" 32#include "sfn_instr_mem.h" 33#include "sfn_instr_lds.h" 34#include "sfn_instr_tex.h" 35#include "sfn_debug.h" 36 37#include <algorithm> 38#include <sstream> 39 40namespace r600 { 41 42class CollectInstructions : public InstrVisitor { 43 44public: 45 CollectInstructions(ValueFactory& vf): 46 m_value_factory(vf) {} 47 48 void visit(AluInstr *instr) override { 49 if (instr->has_alu_flag(alu_is_trans)) 50 alu_trans.push_back(instr); 51 else { 52 if (instr->alu_slots() == 1) 53 alu_vec.push_back(instr); 54 else 55 alu_groups.push_back(instr->split(m_value_factory)); 56 } 57 } 58 void visit(AluGroup *instr) override { 59 alu_groups.push_back(instr); 60 } 61 void visit(TexInstr *instr) override { 62 tex.push_back(instr); 63 } 64 void visit(ExportInstr *instr) override { 65 exports.push_back(instr); 66 } 67 void visit(FetchInstr *instr) override { 68 fetches.push_back(instr); 69 } 70 void visit(Block *instr) override { 71 for (auto& i: *instr) 72 i->accept(*this); 73 } 74 75 void visit(ControlFlowInstr *instr) override { 76 assert(!m_cf_instr); 77 m_cf_instr = instr; 78 } 79 80 void visit(IfInstr *instr) override { 81 assert(!m_cf_instr); 82 m_cf_instr = instr; 83 } 84 85 void visit(EmitVertexInstr *instr) override { 86 assert(!m_cf_instr); 87 m_cf_instr = instr; 88 } 89 90 void visit(ScratchIOInstr *instr) override { 91 mem_write_instr.push_back(instr); 92 } 93 94 void visit(StreamOutInstr *instr) override { 95 mem_write_instr.push_back(instr); 96 } 97 98 void visit(MemRingOutInstr *instr) override { 99 mem_ring_writes.push_back(instr); 100 } 101 102 void visit(GDSInstr *instr) override { 103 gds_op.push_back(instr); 104 } 105 106 void visit(WriteTFInstr *instr) override { 107 write_tf.push_back(instr); 108 } 109 110 void visit(LDSReadInstr *instr) override { 111 std::vector<AluInstr*> buffer; 112 m_last_lds_instr = instr->split(buffer, m_last_lds_instr); 113 for (auto& i: buffer) { 114 i->accept(*this); 115 } 116 } 117 118 void visit(LDSAtomicInstr *instr) override { 119 std::vector<AluInstr*> buffer; 120 m_last_lds_instr = instr->split(buffer, m_last_lds_instr); 121 for (auto& i: buffer) { 122 i->accept(*this); 123 } 124 } 125 126 void visit(RatInstr *instr) override { 127 rat_instr.push_back(instr); 128 } 129 130 131 std::list<AluInstr *> alu_trans; 132 std::list<AluInstr *> alu_vec; 133 std::list<TexInstr *> tex; 134 std::list<AluGroup *> alu_groups; 135 std::list<ExportInstr *> exports; 136 std::list<FetchInstr *> fetches; 137 std::list<WriteOutInstr *> mem_write_instr; 138 std::list<MemRingOutInstr *> mem_ring_writes; 139 std::list<GDSInstr *> gds_op; 140 std::list<WriteTFInstr *> write_tf; 141 std::list<RatInstr *> rat_instr; 142 143 Instr *m_cf_instr{nullptr}; 144 ValueFactory& m_value_factory; 145 146 AluInstr *m_last_lds_instr{nullptr}; 147}; 148 149class BlockSheduler { 150public: 151 BlockSheduler(r600_chip_class chip_class); 152 void run(Shader *shader); 153 154 void finalize(); 155 156private: 157 158 void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf); 159 160 bool collect_ready(CollectInstructions &available); 161 162 template <typename T> 163 bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig); 164 165 bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available); 166 167 bool schedule_tex(Shader::ShaderBlocks& out_blocks); 168 bool schedule_vtx(Shader::ShaderBlocks& out_blocks); 169 170 template <typename I> 171 bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list); 172 173 template <typename I> 174 bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list); 175 176 bool schedule_alu(Shader::ShaderBlocks& out_blocks); 177 void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type); 178 179 bool schedule_alu_to_group_vec(AluGroup *group); 180 bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist); 181 182 bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list); 183 184 template <typename I> 185 bool schedule(std::list<I *>& ready_list); 186 187 template <typename I> 188 bool schedule_block(std::list<I *>& ready_list); 189 190 std::list<AluInstr *> alu_vec_ready; 191 std::list<AluInstr *> alu_trans_ready; 192 std::list<AluGroup *> alu_groups_ready; 193 std::list<TexInstr *> tex_ready; 194 std::list<ExportInstr *> exports_ready; 195 std::list<FetchInstr *> fetches_ready; 196 std::list<WriteOutInstr *> memops_ready; 197 std::list<MemRingOutInstr *> mem_ring_writes_ready; 198 std::list<GDSInstr *> gds_ready; 199 std::list<WriteTFInstr *> write_tf_ready; 200 std::list<RatInstr *> rat_instr_ready; 201 202 enum { 203 sched_alu, 204 sched_tex, 205 sched_fetch, 206 sched_free, 207 sched_mem_ring, 208 sched_gds, 209 sched_write_tf, 210 sched_rat, 211 } current_shed; 212 213 ExportInstr *m_last_pos; 214 ExportInstr *m_last_pixel; 215 ExportInstr *m_last_param; 216 217 Block *m_current_block; 218 219 int m_lds_addr_count{0}; 220 int m_alu_groups_schduled{0}; 221 r600_chip_class m_chip_class; 222 223}; 224 225Shader *schedule(Shader *original) 226{ 227 Block::set_chipclass(original->chip_class()); 228 AluGroup::set_chipclass(original->chip_class()); 229 230 sfn_log << SfnLog::schedule << "Original shader\n"; 231 if (sfn_log.has_debug_flag(SfnLog::schedule)) { 232 std::stringstream ss; 233 original->print(ss); 234 sfn_log << ss.str() << "\n\n"; 235 } 236 237 // TODO later it might be necessary to clone the shader 238 // to be able to re-start scheduling 239 240 auto scheduled_shader = original; 241 BlockSheduler s(original->chip_class()); 242 s.run(scheduled_shader); 243 s.finalize(); 244 245 sfn_log << SfnLog::schedule << "Scheduled shader\n"; 246 if (sfn_log.has_debug_flag(SfnLog::schedule)) { 247 std::stringstream ss; 248 scheduled_shader->print(ss); 249 sfn_log << ss.str() << "\n\n"; 250 } 251 252 return scheduled_shader; 253} 254 255BlockSheduler::BlockSheduler(r600_chip_class chip_class): 256 current_shed(sched_alu), 257 m_last_pos(nullptr), 258 m_last_pixel(nullptr), 259 m_last_param(nullptr), 260 m_current_block(nullptr), 261 m_chip_class(chip_class) 262{ 263} 264 265void BlockSheduler::run( Shader *shader) 266{ 267 Shader::ShaderBlocks scheduled_blocks; 268 269 for (auto& block : shader->func()) { 270 sfn_log << SfnLog::schedule << "Process block " << block->id() <<"\n"; 271 if (sfn_log.has_debug_flag(SfnLog::schedule)) { 272 std::stringstream ss; 273 block->print(ss); 274 sfn_log << ss.str() << "\n"; 275 } 276 schedule_block(*block, scheduled_blocks, shader->value_factory()); 277 } 278 279 shader->reset_function(scheduled_blocks); 280} 281 282void BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf) 283{ 284 285 assert(in_block.id() >= 0); 286 287 288 current_shed = sched_fetch; 289 auto last_shed = sched_fetch; 290 291 CollectInstructions cir(vf); 292 in_block.accept(cir); 293 294 bool have_instr = collect_ready(cir); 295 296 m_current_block = new Block(in_block.nesting_depth(), in_block.id()); 297 assert(m_current_block->id() >= 0); 298 299 while (have_instr) { 300 301 sfn_log << SfnLog::schedule << "Have ready instructions\n"; 302 303 if (alu_vec_ready.size()) 304 sfn_log << SfnLog::schedule << " ALU V:" << alu_vec_ready.size() << "\n"; 305 306 if (alu_trans_ready.size()) 307 sfn_log << SfnLog::schedule << " ALU T:" << alu_trans_ready.size() << "\n"; 308 309 if (alu_groups_ready.size()) 310 sfn_log << SfnLog::schedule << " ALU G:" << alu_groups_ready.size() << "\n"; 311 312 if (exports_ready.size()) 313 sfn_log << SfnLog::schedule << " EXP:" << exports_ready.size() 314 << "\n"; 315 if (tex_ready.size()) 316 sfn_log << SfnLog::schedule << " TEX:" << tex_ready.size() 317 << "\n"; 318 if (fetches_ready.size()) 319 sfn_log << SfnLog::schedule << " FETCH:" << fetches_ready.size() 320 << "\n"; 321 if (mem_ring_writes_ready.size()) 322 sfn_log << SfnLog::schedule << " MEM_RING:" << mem_ring_writes_ready.size() 323 << "\n"; 324 if (memops_ready.size()) 325 sfn_log << SfnLog::schedule << " MEM_OPS:" << mem_ring_writes_ready.size() 326 << "\n"; 327 328 if (!m_current_block->lds_group_active()) { 329 if (last_shed != sched_free && memops_ready.size() > 8) 330 current_shed = sched_free; 331 else if (mem_ring_writes_ready.size() > 15) 332 current_shed = sched_mem_ring; 333 else if (rat_instr_ready.size() > 3) 334 current_shed = sched_rat; 335 else if (tex_ready.size() > 3) 336 current_shed = sched_tex; 337 } 338 339 switch (current_shed) { 340 case sched_alu: 341 if (!schedule_alu(out_blocks)) { 342 assert(!m_current_block->lds_group_active()); 343 current_shed = sched_tex; 344 continue; 345 } 346 last_shed = current_shed; 347 break; 348 case sched_tex: 349 if (tex_ready.empty() || !schedule_tex(out_blocks)) { 350 current_shed = sched_fetch; 351 continue; 352 } 353 last_shed = current_shed; 354 break; 355 case sched_fetch: 356 if (!fetches_ready.empty()) { 357 schedule_vtx(out_blocks); 358 last_shed = current_shed; 359 } 360 current_shed = sched_gds; 361 continue; 362 case sched_gds: 363 if (!gds_ready.empty()) { 364 schedule_gds(out_blocks, gds_ready); 365 last_shed = current_shed; 366 } 367 current_shed = sched_mem_ring; 368 continue; 369 case sched_mem_ring: 370 if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) { 371 current_shed = sched_write_tf; 372 continue; 373 } 374 last_shed = current_shed; 375 break; 376 case sched_write_tf: 377 if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) { 378 current_shed = sched_rat; 379 continue; 380 } 381 last_shed = current_shed; 382 break; 383 case sched_rat: 384 if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) { 385 current_shed = sched_free; 386 continue; 387 } 388 last_shed = current_shed; 389 break; 390 case sched_free: 391 if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) { 392 current_shed = sched_alu; 393 break; 394 } 395 last_shed = current_shed; 396 } 397 398 have_instr = collect_ready(cir); 399 } 400 401 /* Emit exports always at end of a block */ 402 while (collect_ready_type(exports_ready, cir.exports)) 403 schedule_exports(out_blocks, exports_ready); 404 405 bool fail = false; 406 407 if (!cir.alu_groups.empty()) { 408 std::cerr << "Unscheduled ALU groups:\n"; 409 for (auto& a : cir.alu_groups) { 410 std::cerr << " " << *a << "\n"; 411 } 412 fail = true; 413 } 414 415 if (!cir.alu_vec.empty()){ 416 std::cerr << "Unscheduled ALU vec ops:\n"; 417 for (auto& a : cir.alu_vec) { 418 std::cerr << " " << *a << "\n"; 419 } 420 fail = true; 421 } 422 423 if (!cir.alu_trans.empty()){ 424 std::cerr << "Unscheduled ALU trans ops:\n"; 425 for (auto& a : cir.alu_trans) { 426 std::cerr << " " << *a << "\n"; 427 } 428 fail = true; 429 } 430 if (!cir.mem_write_instr.empty()){ 431 std::cerr << "Unscheduled MEM ops:\n"; 432 for (auto& a : cir.mem_write_instr) { 433 std::cerr << " " << *a << "\n"; 434 } 435 fail = true; 436 } 437 438 if (!cir.fetches.empty()){ 439 std::cerr << "Unscheduled Fetch ops:\n"; 440 for (auto& a : cir.fetches) { 441 std::cerr << " " << *a << "\n"; 442 } 443 fail = true; 444 } 445 446 if (!cir.tex.empty()){ 447 std::cerr << "Unscheduled Tex ops:\n"; 448 for (auto& a : cir.tex) { 449 std::cerr << " " << *a << "\n"; 450 } 451 fail = true; 452 } 453 454 assert(cir.tex.empty()); 455 assert(cir.exports.empty()); 456 assert(cir.fetches.empty()); 457 assert(cir.alu_vec.empty()); 458 assert(cir.mem_write_instr.empty()); 459 assert(cir.mem_ring_writes.empty()); 460 461 assert (!fail); 462 463 if (cir.m_cf_instr) { 464 // Assert that if condition is ready 465 m_current_block->push_back(cir.m_cf_instr); 466 cir.m_cf_instr->set_scheduled(); 467 } 468 469 out_blocks.push_back(m_current_block); 470} 471 472void BlockSheduler::finalize() 473{ 474 if (m_last_pos) 475 m_last_pos->set_is_last_export(true); 476 if (m_last_pixel) 477 m_last_pixel->set_is_last_export(true); 478 if (m_last_param) 479 m_last_param->set_is_last_export(true); 480} 481 482bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks) 483{ 484 bool success = false; 485 AluGroup *group = nullptr; 486 487 bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty(); 488 489 bool has_lds_ready = !alu_vec_ready.empty() && 490 (*alu_vec_ready.begin())->has_lds_access(); 491 492 /* If we have ready ALU instructions we have to start a new ALU block */ 493 if (has_alu_ready || !alu_groups_ready.empty()) { 494 if (m_current_block->type() != Block::alu) { 495 start_new_block(out_blocks, Block::alu); 496 m_alu_groups_schduled = 0; 497 } 498 } 499 500 /* Schedule groups first. unless we have a pending LDS instuction 501 * We don't want the LDS instructions to be too far apart because the 502 * fetch + read from queue has to be in the same ALU CF block */ 503 if (!alu_groups_ready.empty() && !has_lds_ready) { 504 group = *alu_groups_ready.begin(); 505 if (!m_current_block->try_reserve_kcache(*group)) { 506 start_new_block(out_blocks, Block::alu); 507 m_current_block->set_instr_flag(Instr::force_cf); 508 } 509 510 if (!m_current_block->try_reserve_kcache(*group)) 511 unreachable("Scheduling a group in a new block should always succeed"); 512 alu_groups_ready.erase(alu_groups_ready.begin()); 513 sfn_log << SfnLog::schedule << "Schedule ALU group\n"; 514 success = true; 515 } else if (has_alu_ready) { 516 group = new AluGroup(); 517 sfn_log << SfnLog::schedule << "START new ALU group\n"; 518 } else { 519 return false; 520 } 521 522 assert(group); 523 524 int free_slots = group->free_slots(); 525 526 while (free_slots && has_alu_ready) { 527 if (!alu_vec_ready.empty()) 528 success |= schedule_alu_to_group_vec(group); 529 530 /* Apparently one can't schedule a t-slot if there is already 531 * and LDS instruction scheduled. 532 * TODO: check whether this is only relevant for actual LDS instructions 533 * or also for instructions that read from the LDS return value queue */ 534 535 if (free_slots & 0x10 && !has_lds_ready) { 536 sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n"; 537 if (!alu_trans_ready.empty()) 538 success |= schedule_alu_to_group_trans(group, alu_trans_ready); 539 if (!alu_vec_ready.empty()) 540 success |= schedule_alu_to_group_trans(group, alu_vec_ready); 541 } 542 543 if (success) { 544 ++m_alu_groups_schduled; 545 break; 546 } else if (m_current_block->kcache_reservation_failed()) { 547 // LDS read groups should not lead to impossible 548 // kcache constellations 549 assert(!m_current_block->lds_group_active()); 550 551 // kcache reservation failed, so we have to start a new CF 552 start_new_block(out_blocks, Block::alu); 553 m_current_block->set_instr_flag(Instr::force_cf); 554 } else { 555 return false; 556 } 557 } 558 559 sfn_log << SfnLog::schedule << "Finalize ALU group\n"; 560 group->set_scheduled(); 561 group->fix_last_flag(); 562 group->set_nesting_depth(m_current_block->nesting_depth()); 563 m_current_block->push_back(group); 564 565 if (group->has_lds_group_start()) 566 m_current_block->lds_group_start(*group->begin()); 567 568 if (group->has_lds_group_end()) 569 m_current_block->lds_group_end(); 570 571 if (group->has_kill_op()) { 572 assert(!group->has_lds_group_start()); 573 start_new_block(out_blocks, Block::alu); 574 m_current_block->set_instr_flag(Instr::force_cf); 575 } 576 577 578 return success; 579} 580 581bool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks) 582{ 583 if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() == 0) { 584 start_new_block(out_blocks, Block::tex); 585 m_current_block->set_instr_flag(Instr::force_cf); 586 } 587 588 589 if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) { 590 auto ii = tex_ready.begin(); 591 sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; 592 593 if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size()) 594 start_new_block(out_blocks, Block::tex); 595 596 for (auto prep : (*ii)->prepare_instr()) { 597 prep->set_scheduled(); 598 m_current_block->push_back(prep); 599 } 600 601 (*ii)->set_scheduled(); 602 m_current_block->push_back(*ii); 603 tex_ready.erase(ii); 604 return true; 605 } 606 return false; 607} 608 609bool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks) 610{ 611 if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) { 612 start_new_block(out_blocks, Block::vtx); 613 m_current_block->set_instr_flag(Instr::force_cf); 614 } 615 return schedule_block(fetches_ready); 616} 617 618template <typename I> 619bool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list) 620{ 621 bool was_full = m_current_block->remaining_slots() == 0; 622 if (m_current_block->type() != Block::gds || was_full) { 623 start_new_block(out_blocks, Block::gds); 624 if (was_full) 625 m_current_block->set_instr_flag(Instr::force_cf); 626 } 627 return schedule_block(ready_list); 628} 629 630 631void BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type) 632{ 633 if (!m_current_block->empty()) { 634 sfn_log << SfnLog::schedule << "Start new block\n"; 635 assert(!m_current_block->lds_group_active()); 636 out_blocks.push_back(m_current_block); 637 m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id()); 638 } 639 m_current_block->set_type(type); 640} 641 642template <typename I> 643bool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list) 644{ 645 if (ready_list.empty()) 646 return false; 647 if (m_current_block->type() != Block::cf) 648 start_new_block(out_blocks, Block::cf); 649 return schedule(ready_list); 650} 651 652 653bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group) 654{ 655 assert(group); 656 assert(!alu_vec_ready.empty()); 657 658 bool success = false; 659 auto i = alu_vec_ready.begin(); 660 auto e = alu_vec_ready.end(); 661 while (i != e) { 662 sfn_log << SfnLog::schedule << "Try schedule to vec " << **i; 663 664 if (!m_current_block->try_reserve_kcache(**i)) { 665 sfn_log << SfnLog::schedule << " failed (kcache)\n"; 666 ++i; 667 continue; 668 } 669 670 if (group->add_vec_instructions(*i)) { 671 auto old_i = i; 672 ++i; 673 if ((*old_i)->has_alu_flag(alu_is_lds)) { 674 --m_lds_addr_count; 675 } 676 677 alu_vec_ready.erase(old_i); 678 success = true; 679 sfn_log << SfnLog::schedule << " success\n"; 680 } else { 681 ++i; 682 sfn_log << SfnLog::schedule << " failed\n"; 683 } 684 } 685 return success; 686} 687 688bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist) 689{ 690 assert(group); 691 692 bool success = false; 693 auto i = readylist.begin(); 694 auto e = readylist.end(); 695 while (i != e) { 696 sfn_log << SfnLog::schedule << "Try schedule to trans " << **i; 697 if (!m_current_block->try_reserve_kcache(**i)) { 698 sfn_log << SfnLog::schedule << " failed (kcache)\n"; 699 ++i; 700 continue; 701 } 702 703 if (group->add_trans_instructions(*i)) { 704 auto old_i = i; 705 ++i; 706 readylist.erase(old_i); 707 success = true; 708 sfn_log << SfnLog::schedule << " sucess\n"; 709 break; 710 } else { 711 ++i; 712 sfn_log << SfnLog::schedule << " failed\n"; 713 } 714 } 715 return success; 716} 717 718template <typename I> 719bool BlockSheduler::schedule(std::list<I *>& ready_list) 720{ 721 if (!ready_list.empty() && m_current_block->remaining_slots() > 0) { 722 auto ii = ready_list.begin(); 723 sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; 724 (*ii)->set_scheduled(); 725 m_current_block->push_back(*ii); 726 ready_list.erase(ii); 727 return true; 728 } 729 return false; 730} 731 732template <typename I> 733bool BlockSheduler::schedule_block(std::list<I *>& ready_list) 734{ 735 bool success = false; 736 while (!ready_list.empty() && m_current_block->remaining_slots() > 0) { 737 auto ii = ready_list.begin(); 738 sfn_log << SfnLog::schedule << "Schedule: " << **ii << " " 739 << m_current_block->remaining_slots() << "\n"; 740 (*ii)->set_scheduled(); 741 m_current_block->push_back(*ii); 742 ready_list.erase(ii); 743 success = true; 744 } 745 return success; 746} 747 748 749bool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list) 750{ 751 if (m_current_block->type() != Block::cf) 752 start_new_block(out_blocks, Block::cf); 753 754 if (!ready_list.empty()) { 755 auto ii = ready_list.begin(); 756 sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; 757 (*ii)->set_scheduled(); 758 m_current_block->push_back(*ii); 759 switch ((*ii)->export_type()) { 760 case ExportInstr::pos: m_last_pos = *ii; break; 761 case ExportInstr::param: m_last_param = *ii; break; 762 case ExportInstr::pixel: m_last_pixel = *ii; break; 763 } 764 (*ii)->set_is_last_export(false); 765 ready_list.erase(ii); 766 return true; 767 } 768 return false; 769} 770 771bool BlockSheduler::collect_ready(CollectInstructions &available) 772{ 773 sfn_log << SfnLog::schedule << "Ready instructions\n"; 774 bool result = false; 775 result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec); 776 result |= collect_ready_type(alu_trans_ready, available.alu_trans); 777 result |= collect_ready_type(alu_groups_ready, available.alu_groups); 778 result |= collect_ready_type(gds_ready, available.gds_op); 779 result |= collect_ready_type(tex_ready, available.tex); 780 result |= collect_ready_type(fetches_ready, available.fetches); 781 result |= collect_ready_type(memops_ready, available.mem_write_instr); 782 result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes); 783 result |= collect_ready_type(write_tf_ready, available.write_tf); 784 result |= collect_ready_type(rat_instr_ready, available.rat_instr); 785 786 sfn_log << SfnLog::schedule << "\n"; 787 return result; 788} 789 790bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available) 791{ 792 auto i = available.begin(); 793 auto e = available.end(); 794 795 for (auto alu : ready) { 796 alu->add_priority(100 * alu->register_priority()); 797 } 798 799 int max_check = 0; 800 while (i != e && max_check++ < 32) { 801 if (ready.size() < 32 && (*i)->ready()) { 802 803 int priority = 0; 804 /* LDS fetches that use static offsets are usually ready ery fast, 805 * so that they would get schedules early, and this leaves the problem 806 * that we allocate too many registers with just constant values, 807 * and this will make problems wih RA. So limit the number of LDS 808 * address registers. 809 */ 810 if ((*i)->has_alu_flag(alu_lds_address)) { 811 if (m_lds_addr_count > 64) { 812 ++i; 813 continue; 814 } else { 815 ++m_lds_addr_count; 816 } 817 } 818 819 /* LDS instructions are scheduled with high priority. 820 * instractions that can go into the t slot and don't have 821 * indirect access are put in last, so that they don't block 822 * vec-only instructions when scheduling to the vector slots 823 * for everything else we look at the register use */ 824 825 if ((*i)->has_lds_access()) 826 priority = 100000; 827 else if (AluGroup::has_t()) { 828 auto opinfo = alu_ops.find((*i)->opcode()); 829 assert(opinfo != alu_ops.end()); 830 if (opinfo->second.can_channel(AluOp::t, m_chip_class) && 831 !std::get<0>((*i)->indirect_addr())) 832 priority = -1; 833 } 834 835 priority += 100 * (*i)->register_priority(); 836 837 (*i)->add_priority(priority); 838 ready.push_back(*i); 839 840 auto old_i = i; 841 ++i; 842 available.erase(old_i); 843 } else 844 ++i; 845 } 846 847 for (auto& i: ready) 848 sfn_log << SfnLog::schedule << "V: " << *i << "\n"; 849 850 ready.sort([](const AluInstr *lhs, const AluInstr *rhs) { 851 return lhs->priority() > rhs->priority();}); 852 853 for (auto& i: ready) 854 sfn_log << SfnLog::schedule << "V (S): " << *i << "\n"; 855 856 return !ready.empty(); 857} 858 859template <typename T> 860struct type_char { 861 862}; 863 864 865template <> 866struct type_char<AluInstr> { 867 static constexpr const char value = 'A'; 868}; 869 870template <> 871struct type_char<AluGroup> { 872 static constexpr const char value = 'G'; 873}; 874 875template <> 876struct type_char<ExportInstr> { 877 static constexpr const char value = 'E'; 878}; 879 880template <> 881struct type_char<TexInstr> { 882 static constexpr const char value = 'T'; 883}; 884 885template <> 886struct type_char<FetchInstr> { 887 static constexpr const char value = 'F'; 888}; 889 890template <> 891struct type_char<WriteOutInstr> { 892 static constexpr const char value = 'M'; 893}; 894 895template <> 896struct type_char<MemRingOutInstr> { 897 static constexpr const char value = 'R'; 898}; 899 900template <> 901struct type_char<WriteTFInstr> { 902 static constexpr const char value = 'X'; 903}; 904 905template <> 906struct type_char<GDSInstr> { 907 static constexpr const char value = 'S'; 908}; 909 910template <> 911struct type_char<RatInstr> { 912 static constexpr const char value = 'I'; 913}; 914 915 916template <typename T> 917bool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available) 918{ 919 auto i = available.begin(); 920 auto e = available.end(); 921 922 int lookahead = 16; 923 while (i != e && ready.size() < 16 && lookahead-- > 0) { 924 if ((*i)->ready()) { 925 ready.push_back(*i); 926 auto old_i = i; 927 ++i; 928 available.erase(old_i); 929 } else 930 ++i; 931 } 932 933 for (auto& i: ready) 934 sfn_log << SfnLog::schedule << type_char<T>::value << "; " << *i << "\n"; 935 936 return !ready.empty(); 937} 938 939} 940