1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_scheduler.h"
28 #include "sfn_instr_alugroup.h"
29 #include "sfn_instr_controlflow.h"
30 #include "sfn_instr_export.h"
31 #include "sfn_instr_fetch.h"
32 #include "sfn_instr_mem.h"
33 #include "sfn_instr_lds.h"
34 #include "sfn_instr_tex.h"
35 #include "sfn_debug.h"
36
37 #include <algorithm>
38 #include <sstream>
39
40 namespace r600 {
41
42 class CollectInstructions : public InstrVisitor {
43
44 public:
CollectInstructions(ValueFactory& vf)45 CollectInstructions(ValueFactory& vf):
46 m_value_factory(vf) {}
47
48 void visit(AluInstr *instr) override {
49 if (instr->has_alu_flag(alu_is_trans))
50 alu_trans.push_back(instr);
51 else {
52 if (instr->alu_slots() == 1)
53 alu_vec.push_back(instr);
54 else
55 alu_groups.push_back(instr->split(m_value_factory));
56 }
57 }
58 void visit(AluGroup *instr) override {
59 alu_groups.push_back(instr);
60 }
61 void visit(TexInstr *instr) override {
62 tex.push_back(instr);
63 }
64 void visit(ExportInstr *instr) override {
65 exports.push_back(instr);
66 }
67 void visit(FetchInstr *instr) override {
68 fetches.push_back(instr);
69 }
70 void visit(Block *instr) override {
71 for (auto& i: *instr)
72 i->accept(*this);
73 }
74
75 void visit(ControlFlowInstr *instr) override {
76 assert(!m_cf_instr);
77 m_cf_instr = instr;
78 }
79
80 void visit(IfInstr *instr) override {
81 assert(!m_cf_instr);
82 m_cf_instr = instr;
83 }
84
85 void visit(EmitVertexInstr *instr) override {
86 assert(!m_cf_instr);
87 m_cf_instr = instr;
88 }
89
90 void visit(ScratchIOInstr *instr) override {
91 mem_write_instr.push_back(instr);
92 }
93
94 void visit(StreamOutInstr *instr) override {
95 mem_write_instr.push_back(instr);
96 }
97
98 void visit(MemRingOutInstr *instr) override {
99 mem_ring_writes.push_back(instr);
100 }
101
102 void visit(GDSInstr *instr) override {
103 gds_op.push_back(instr);
104 }
105
106 void visit(WriteTFInstr *instr) override {
107 write_tf.push_back(instr);
108 }
109
110 void visit(LDSReadInstr *instr) override {
111 std::vector<AluInstr*> buffer;
112 m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
113 for (auto& i: buffer) {
114 i->accept(*this);
115 }
116 }
117
118 void visit(LDSAtomicInstr *instr) override {
119 std::vector<AluInstr*> buffer;
120 m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
121 for (auto& i: buffer) {
122 i->accept(*this);
123 }
124 }
125
126 void visit(RatInstr *instr) override {
127 rat_instr.push_back(instr);
128 }
129
130
131 std::list<AluInstr *> alu_trans;
132 std::list<AluInstr *> alu_vec;
133 std::list<TexInstr *> tex;
134 std::list<AluGroup *> alu_groups;
135 std::list<ExportInstr *> exports;
136 std::list<FetchInstr *> fetches;
137 std::list<WriteOutInstr *> mem_write_instr;
138 std::list<MemRingOutInstr *> mem_ring_writes;
139 std::list<GDSInstr *> gds_op;
140 std::list<WriteTFInstr *> write_tf;
141 std::list<RatInstr *> rat_instr;
142
143 Instr *m_cf_instr{nullptr};
144 ValueFactory& m_value_factory;
145
146 AluInstr *m_last_lds_instr{nullptr};
147 };
148
149 class BlockSheduler {
150 public:
151 BlockSheduler(r600_chip_class chip_class);
152 void run(Shader *shader);
153
154 void finalize();
155
156 private:
157
158 void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf);
159
160 bool collect_ready(CollectInstructions &available);
161
162 template <typename T>
163 bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
164
165 bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available);
166
167 bool schedule_tex(Shader::ShaderBlocks& out_blocks);
168 bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
169
170 template <typename I>
171 bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
172
173 template <typename I>
174 bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
175
176 bool schedule_alu(Shader::ShaderBlocks& out_blocks);
177 void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type);
178
179 bool schedule_alu_to_group_vec(AluGroup *group);
180 bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist);
181
182 bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list);
183
184 template <typename I>
185 bool schedule(std::list<I *>& ready_list);
186
187 template <typename I>
188 bool schedule_block(std::list<I *>& ready_list);
189
190 std::list<AluInstr *> alu_vec_ready;
191 std::list<AluInstr *> alu_trans_ready;
192 std::list<AluGroup *> alu_groups_ready;
193 std::list<TexInstr *> tex_ready;
194 std::list<ExportInstr *> exports_ready;
195 std::list<FetchInstr *> fetches_ready;
196 std::list<WriteOutInstr *> memops_ready;
197 std::list<MemRingOutInstr *> mem_ring_writes_ready;
198 std::list<GDSInstr *> gds_ready;
199 std::list<WriteTFInstr *> write_tf_ready;
200 std::list<RatInstr *> rat_instr_ready;
201
202 enum {
203 sched_alu,
204 sched_tex,
205 sched_fetch,
206 sched_free,
207 sched_mem_ring,
208 sched_gds,
209 sched_write_tf,
210 sched_rat,
211 } current_shed;
212
213 ExportInstr *m_last_pos;
214 ExportInstr *m_last_pixel;
215 ExportInstr *m_last_param;
216
217 Block *m_current_block;
218
219 int m_lds_addr_count{0};
220 int m_alu_groups_schduled{0};
221 r600_chip_class m_chip_class;
222
223 };
224
schedule(Shader *original)225 Shader *schedule(Shader *original)
226 {
227 Block::set_chipclass(original->chip_class());
228 AluGroup::set_chipclass(original->chip_class());
229
230 sfn_log << SfnLog::schedule << "Original shader\n";
231 if (sfn_log.has_debug_flag(SfnLog::schedule)) {
232 std::stringstream ss;
233 original->print(ss);
234 sfn_log << ss.str() << "\n\n";
235 }
236
237 // TODO later it might be necessary to clone the shader
238 // to be able to re-start scheduling
239
240 auto scheduled_shader = original;
241 BlockSheduler s(original->chip_class());
242 s.run(scheduled_shader);
243 s.finalize();
244
245 sfn_log << SfnLog::schedule << "Scheduled shader\n";
246 if (sfn_log.has_debug_flag(SfnLog::schedule)) {
247 std::stringstream ss;
248 scheduled_shader->print(ss);
249 sfn_log << ss.str() << "\n\n";
250 }
251
252 return scheduled_shader;
253 }
254
BlockSheduler(r600_chip_class chip_class)255 BlockSheduler::BlockSheduler(r600_chip_class chip_class):
256 current_shed(sched_alu),
257 m_last_pos(nullptr),
258 m_last_pixel(nullptr),
259 m_last_param(nullptr),
260 m_current_block(nullptr),
261 m_chip_class(chip_class)
262 {
263 }
264
run( Shader *shader)265 void BlockSheduler::run( Shader *shader)
266 {
267 Shader::ShaderBlocks scheduled_blocks;
268
269 for (auto& block : shader->func()) {
270 sfn_log << SfnLog::schedule << "Process block " << block->id() <<"\n";
271 if (sfn_log.has_debug_flag(SfnLog::schedule)) {
272 std::stringstream ss;
273 block->print(ss);
274 sfn_log << ss.str() << "\n";
275 }
276 schedule_block(*block, scheduled_blocks, shader->value_factory());
277 }
278
279 shader->reset_function(scheduled_blocks);
280 }
281
schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)282 void BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)
283 {
284
285 assert(in_block.id() >= 0);
286
287
288 current_shed = sched_fetch;
289 auto last_shed = sched_fetch;
290
291 CollectInstructions cir(vf);
292 in_block.accept(cir);
293
294 bool have_instr = collect_ready(cir);
295
296 m_current_block = new Block(in_block.nesting_depth(), in_block.id());
297 assert(m_current_block->id() >= 0);
298
299 while (have_instr) {
300
301 sfn_log << SfnLog::schedule << "Have ready instructions\n";
302
303 if (alu_vec_ready.size())
304 sfn_log << SfnLog::schedule << " ALU V:" << alu_vec_ready.size() << "\n";
305
306 if (alu_trans_ready.size())
307 sfn_log << SfnLog::schedule << " ALU T:" << alu_trans_ready.size() << "\n";
308
309 if (alu_groups_ready.size())
310 sfn_log << SfnLog::schedule << " ALU G:" << alu_groups_ready.size() << "\n";
311
312 if (exports_ready.size())
313 sfn_log << SfnLog::schedule << " EXP:" << exports_ready.size()
314 << "\n";
315 if (tex_ready.size())
316 sfn_log << SfnLog::schedule << " TEX:" << tex_ready.size()
317 << "\n";
318 if (fetches_ready.size())
319 sfn_log << SfnLog::schedule << " FETCH:" << fetches_ready.size()
320 << "\n";
321 if (mem_ring_writes_ready.size())
322 sfn_log << SfnLog::schedule << " MEM_RING:" << mem_ring_writes_ready.size()
323 << "\n";
324 if (memops_ready.size())
325 sfn_log << SfnLog::schedule << " MEM_OPS:" << mem_ring_writes_ready.size()
326 << "\n";
327
328 if (!m_current_block->lds_group_active()) {
329 if (last_shed != sched_free && memops_ready.size() > 8)
330 current_shed = sched_free;
331 else if (mem_ring_writes_ready.size() > 15)
332 current_shed = sched_mem_ring;
333 else if (rat_instr_ready.size() > 3)
334 current_shed = sched_rat;
335 else if (tex_ready.size() > 3)
336 current_shed = sched_tex;
337 }
338
339 switch (current_shed) {
340 case sched_alu:
341 if (!schedule_alu(out_blocks)) {
342 assert(!m_current_block->lds_group_active());
343 current_shed = sched_tex;
344 continue;
345 }
346 last_shed = current_shed;
347 break;
348 case sched_tex:
349 if (tex_ready.empty() || !schedule_tex(out_blocks)) {
350 current_shed = sched_fetch;
351 continue;
352 }
353 last_shed = current_shed;
354 break;
355 case sched_fetch:
356 if (!fetches_ready.empty()) {
357 schedule_vtx(out_blocks);
358 last_shed = current_shed;
359 }
360 current_shed = sched_gds;
361 continue;
362 case sched_gds:
363 if (!gds_ready.empty()) {
364 schedule_gds(out_blocks, gds_ready);
365 last_shed = current_shed;
366 }
367 current_shed = sched_mem_ring;
368 continue;
369 case sched_mem_ring:
370 if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) {
371 current_shed = sched_write_tf;
372 continue;
373 }
374 last_shed = current_shed;
375 break;
376 case sched_write_tf:
377 if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) {
378 current_shed = sched_rat;
379 continue;
380 }
381 last_shed = current_shed;
382 break;
383 case sched_rat:
384 if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) {
385 current_shed = sched_free;
386 continue;
387 }
388 last_shed = current_shed;
389 break;
390 case sched_free:
391 if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) {
392 current_shed = sched_alu;
393 break;
394 }
395 last_shed = current_shed;
396 }
397
398 have_instr = collect_ready(cir);
399 }
400
401 /* Emit exports always at end of a block */
402 while (collect_ready_type(exports_ready, cir.exports))
403 schedule_exports(out_blocks, exports_ready);
404
405 bool fail = false;
406
407 if (!cir.alu_groups.empty()) {
408 std::cerr << "Unscheduled ALU groups:\n";
409 for (auto& a : cir.alu_groups) {
410 std::cerr << " " << *a << "\n";
411 }
412 fail = true;
413 }
414
415 if (!cir.alu_vec.empty()){
416 std::cerr << "Unscheduled ALU vec ops:\n";
417 for (auto& a : cir.alu_vec) {
418 std::cerr << " " << *a << "\n";
419 }
420 fail = true;
421 }
422
423 if (!cir.alu_trans.empty()){
424 std::cerr << "Unscheduled ALU trans ops:\n";
425 for (auto& a : cir.alu_trans) {
426 std::cerr << " " << *a << "\n";
427 }
428 fail = true;
429 }
430 if (!cir.mem_write_instr.empty()){
431 std::cerr << "Unscheduled MEM ops:\n";
432 for (auto& a : cir.mem_write_instr) {
433 std::cerr << " " << *a << "\n";
434 }
435 fail = true;
436 }
437
438 if (!cir.fetches.empty()){
439 std::cerr << "Unscheduled Fetch ops:\n";
440 for (auto& a : cir.fetches) {
441 std::cerr << " " << *a << "\n";
442 }
443 fail = true;
444 }
445
446 if (!cir.tex.empty()){
447 std::cerr << "Unscheduled Tex ops:\n";
448 for (auto& a : cir.tex) {
449 std::cerr << " " << *a << "\n";
450 }
451 fail = true;
452 }
453
454 assert(cir.tex.empty());
455 assert(cir.exports.empty());
456 assert(cir.fetches.empty());
457 assert(cir.alu_vec.empty());
458 assert(cir.mem_write_instr.empty());
459 assert(cir.mem_ring_writes.empty());
460
461 assert (!fail);
462
463 if (cir.m_cf_instr) {
464 // Assert that if condition is ready
465 m_current_block->push_back(cir.m_cf_instr);
466 cir.m_cf_instr->set_scheduled();
467 }
468
469 out_blocks.push_back(m_current_block);
470 }
471
finalize()472 void BlockSheduler::finalize()
473 {
474 if (m_last_pos)
475 m_last_pos->set_is_last_export(true);
476 if (m_last_pixel)
477 m_last_pixel->set_is_last_export(true);
478 if (m_last_param)
479 m_last_param->set_is_last_export(true);
480 }
481
schedule_alu(Shader::ShaderBlocks& out_blocks)482 bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
483 {
484 bool success = false;
485 AluGroup *group = nullptr;
486
487 bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty();
488
489 bool has_lds_ready = !alu_vec_ready.empty() &&
490 (*alu_vec_ready.begin())->has_lds_access();
491
492 /* If we have ready ALU instructions we have to start a new ALU block */
493 if (has_alu_ready || !alu_groups_ready.empty()) {
494 if (m_current_block->type() != Block::alu) {
495 start_new_block(out_blocks, Block::alu);
496 m_alu_groups_schduled = 0;
497 }
498 }
499
500 /* Schedule groups first. unless we have a pending LDS instuction
501 * We don't want the LDS instructions to be too far apart because the
502 * fetch + read from queue has to be in the same ALU CF block */
503 if (!alu_groups_ready.empty() && !has_lds_ready) {
504 group = *alu_groups_ready.begin();
505 if (!m_current_block->try_reserve_kcache(*group)) {
506 start_new_block(out_blocks, Block::alu);
507 m_current_block->set_instr_flag(Instr::force_cf);
508 }
509
510 if (!m_current_block->try_reserve_kcache(*group))
511 unreachable("Scheduling a group in a new block should always succeed");
512 alu_groups_ready.erase(alu_groups_ready.begin());
513 sfn_log << SfnLog::schedule << "Schedule ALU group\n";
514 success = true;
515 } else if (has_alu_ready) {
516 group = new AluGroup();
517 sfn_log << SfnLog::schedule << "START new ALU group\n";
518 } else {
519 return false;
520 }
521
522 assert(group);
523
524 int free_slots = group->free_slots();
525
526 while (free_slots && has_alu_ready) {
527 if (!alu_vec_ready.empty())
528 success |= schedule_alu_to_group_vec(group);
529
530 /* Apparently one can't schedule a t-slot if there is already
531 * and LDS instruction scheduled.
532 * TODO: check whether this is only relevant for actual LDS instructions
533 * or also for instructions that read from the LDS return value queue */
534
535 if (free_slots & 0x10 && !has_lds_ready) {
536 sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
537 if (!alu_trans_ready.empty())
538 success |= schedule_alu_to_group_trans(group, alu_trans_ready);
539 if (!alu_vec_ready.empty())
540 success |= schedule_alu_to_group_trans(group, alu_vec_ready);
541 }
542
543 if (success) {
544 ++m_alu_groups_schduled;
545 break;
546 } else if (m_current_block->kcache_reservation_failed()) {
547 // LDS read groups should not lead to impossible
548 // kcache constellations
549 assert(!m_current_block->lds_group_active());
550
551 // kcache reservation failed, so we have to start a new CF
552 start_new_block(out_blocks, Block::alu);
553 m_current_block->set_instr_flag(Instr::force_cf);
554 } else {
555 return false;
556 }
557 }
558
559 sfn_log << SfnLog::schedule << "Finalize ALU group\n";
560 group->set_scheduled();
561 group->fix_last_flag();
562 group->set_nesting_depth(m_current_block->nesting_depth());
563 m_current_block->push_back(group);
564
565 if (group->has_lds_group_start())
566 m_current_block->lds_group_start(*group->begin());
567
568 if (group->has_lds_group_end())
569 m_current_block->lds_group_end();
570
571 if (group->has_kill_op()) {
572 assert(!group->has_lds_group_start());
573 start_new_block(out_blocks, Block::alu);
574 m_current_block->set_instr_flag(Instr::force_cf);
575 }
576
577
578 return success;
579 }
580
schedule_tex(Shader::ShaderBlocks& out_blocks)581 bool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
582 {
583 if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() == 0) {
584 start_new_block(out_blocks, Block::tex);
585 m_current_block->set_instr_flag(Instr::force_cf);
586 }
587
588
589 if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) {
590 auto ii = tex_ready.begin();
591 sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
592
593 if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
594 start_new_block(out_blocks, Block::tex);
595
596 for (auto prep : (*ii)->prepare_instr()) {
597 prep->set_scheduled();
598 m_current_block->push_back(prep);
599 }
600
601 (*ii)->set_scheduled();
602 m_current_block->push_back(*ii);
603 tex_ready.erase(ii);
604 return true;
605 }
606 return false;
607 }
608
schedule_vtx(Shader::ShaderBlocks& out_blocks)609 bool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks)
610 {
611 if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) {
612 start_new_block(out_blocks, Block::vtx);
613 m_current_block->set_instr_flag(Instr::force_cf);
614 }
615 return schedule_block(fetches_ready);
616 }
617
618 template <typename I>
schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)619 bool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
620 {
621 bool was_full = m_current_block->remaining_slots() == 0;
622 if (m_current_block->type() != Block::gds || was_full) {
623 start_new_block(out_blocks, Block::gds);
624 if (was_full)
625 m_current_block->set_instr_flag(Instr::force_cf);
626 }
627 return schedule_block(ready_list);
628 }
629
630
start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)631 void BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)
632 {
633 if (!m_current_block->empty()) {
634 sfn_log << SfnLog::schedule << "Start new block\n";
635 assert(!m_current_block->lds_group_active());
636 out_blocks.push_back(m_current_block);
637 m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id());
638 }
639 m_current_block->set_type(type);
640 }
641
642 template <typename I>
schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)643 bool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
644 {
645 if (ready_list.empty())
646 return false;
647 if (m_current_block->type() != Block::cf)
648 start_new_block(out_blocks, Block::cf);
649 return schedule(ready_list);
650 }
651
652
schedule_alu_to_group_vec(AluGroup *group)653 bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
654 {
655 assert(group);
656 assert(!alu_vec_ready.empty());
657
658 bool success = false;
659 auto i = alu_vec_ready.begin();
660 auto e = alu_vec_ready.end();
661 while (i != e) {
662 sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
663
664 if (!m_current_block->try_reserve_kcache(**i)) {
665 sfn_log << SfnLog::schedule << " failed (kcache)\n";
666 ++i;
667 continue;
668 }
669
670 if (group->add_vec_instructions(*i)) {
671 auto old_i = i;
672 ++i;
673 if ((*old_i)->has_alu_flag(alu_is_lds)) {
674 --m_lds_addr_count;
675 }
676
677 alu_vec_ready.erase(old_i);
678 success = true;
679 sfn_log << SfnLog::schedule << " success\n";
680 } else {
681 ++i;
682 sfn_log << SfnLog::schedule << " failed\n";
683 }
684 }
685 return success;
686 }
687
schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)688 bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)
689 {
690 assert(group);
691
692 bool success = false;
693 auto i = readylist.begin();
694 auto e = readylist.end();
695 while (i != e) {
696 sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
697 if (!m_current_block->try_reserve_kcache(**i)) {
698 sfn_log << SfnLog::schedule << " failed (kcache)\n";
699 ++i;
700 continue;
701 }
702
703 if (group->add_trans_instructions(*i)) {
704 auto old_i = i;
705 ++i;
706 readylist.erase(old_i);
707 success = true;
708 sfn_log << SfnLog::schedule << " sucess\n";
709 break;
710 } else {
711 ++i;
712 sfn_log << SfnLog::schedule << " failed\n";
713 }
714 }
715 return success;
716 }
717
718 template <typename I>
schedule(std::list<I *>& ready_list)719 bool BlockSheduler::schedule(std::list<I *>& ready_list)
720 {
721 if (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
722 auto ii = ready_list.begin();
723 sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
724 (*ii)->set_scheduled();
725 m_current_block->push_back(*ii);
726 ready_list.erase(ii);
727 return true;
728 }
729 return false;
730 }
731
732 template <typename I>
schedule_block(std::list<I *>& ready_list)733 bool BlockSheduler::schedule_block(std::list<I *>& ready_list)
734 {
735 bool success = false;
736 while (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
737 auto ii = ready_list.begin();
738 sfn_log << SfnLog::schedule << "Schedule: " << **ii << " "
739 << m_current_block->remaining_slots() << "\n";
740 (*ii)->set_scheduled();
741 m_current_block->push_back(*ii);
742 ready_list.erase(ii);
743 success = true;
744 }
745 return success;
746 }
747
748
schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)749 bool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)
750 {
751 if (m_current_block->type() != Block::cf)
752 start_new_block(out_blocks, Block::cf);
753
754 if (!ready_list.empty()) {
755 auto ii = ready_list.begin();
756 sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
757 (*ii)->set_scheduled();
758 m_current_block->push_back(*ii);
759 switch ((*ii)->export_type()) {
760 case ExportInstr::pos: m_last_pos = *ii; break;
761 case ExportInstr::param: m_last_param = *ii; break;
762 case ExportInstr::pixel: m_last_pixel = *ii; break;
763 }
764 (*ii)->set_is_last_export(false);
765 ready_list.erase(ii);
766 return true;
767 }
768 return false;
769 }
770
collect_ready(CollectInstructions &available)771 bool BlockSheduler::collect_ready(CollectInstructions &available)
772 {
773 sfn_log << SfnLog::schedule << "Ready instructions\n";
774 bool result = false;
775 result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
776 result |= collect_ready_type(alu_trans_ready, available.alu_trans);
777 result |= collect_ready_type(alu_groups_ready, available.alu_groups);
778 result |= collect_ready_type(gds_ready, available.gds_op);
779 result |= collect_ready_type(tex_ready, available.tex);
780 result |= collect_ready_type(fetches_ready, available.fetches);
781 result |= collect_ready_type(memops_ready, available.mem_write_instr);
782 result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes);
783 result |= collect_ready_type(write_tf_ready, available.write_tf);
784 result |= collect_ready_type(rat_instr_ready, available.rat_instr);
785
786 sfn_log << SfnLog::schedule << "\n";
787 return result;
788 }
789
collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)790 bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)
791 {
792 auto i = available.begin();
793 auto e = available.end();
794
795 for (auto alu : ready) {
796 alu->add_priority(100 * alu->register_priority());
797 }
798
799 int max_check = 0;
800 while (i != e && max_check++ < 32) {
801 if (ready.size() < 32 && (*i)->ready()) {
802
803 int priority = 0;
804 /* LDS fetches that use static offsets are usually ready ery fast,
805 * so that they would get schedules early, and this leaves the problem
806 * that we allocate too many registers with just constant values,
807 * and this will make problems wih RA. So limit the number of LDS
808 * address registers.
809 */
810 if ((*i)->has_alu_flag(alu_lds_address)) {
811 if (m_lds_addr_count > 64) {
812 ++i;
813 continue;
814 } else {
815 ++m_lds_addr_count;
816 }
817 }
818
819 /* LDS instructions are scheduled with high priority.
820 * instractions that can go into the t slot and don't have
821 * indirect access are put in last, so that they don't block
822 * vec-only instructions when scheduling to the vector slots
823 * for everything else we look at the register use */
824
825 if ((*i)->has_lds_access())
826 priority = 100000;
827 else if (AluGroup::has_t()) {
828 auto opinfo = alu_ops.find((*i)->opcode());
829 assert(opinfo != alu_ops.end());
830 if (opinfo->second.can_channel(AluOp::t, m_chip_class) &&
831 !std::get<0>((*i)->indirect_addr()))
832 priority = -1;
833 }
834
835 priority += 100 * (*i)->register_priority();
836
837 (*i)->add_priority(priority);
838 ready.push_back(*i);
839
840 auto old_i = i;
841 ++i;
842 available.erase(old_i);
843 } else
844 ++i;
845 }
846
847 for (auto& i: ready)
848 sfn_log << SfnLog::schedule << "V: " << *i << "\n";
849
850 ready.sort([](const AluInstr *lhs, const AluInstr *rhs) {
851 return lhs->priority() > rhs->priority();});
852
853 for (auto& i: ready)
854 sfn_log << SfnLog::schedule << "V (S): " << *i << "\n";
855
856 return !ready.empty();
857 }
858
859 template <typename T>
860 struct type_char {
861
862 };
863
864
865 template <>
866 struct type_char<AluInstr> {
867 static constexpr const char value = 'A';
868 };
869
870 template <>
871 struct type_char<AluGroup> {
872 static constexpr const char value = 'G';
873 };
874
875 template <>
876 struct type_char<ExportInstr> {
877 static constexpr const char value = 'E';
878 };
879
880 template <>
881 struct type_char<TexInstr> {
882 static constexpr const char value = 'T';
883 };
884
885 template <>
886 struct type_char<FetchInstr> {
887 static constexpr const char value = 'F';
888 };
889
890 template <>
891 struct type_char<WriteOutInstr> {
892 static constexpr const char value = 'M';
893 };
894
895 template <>
896 struct type_char<MemRingOutInstr> {
897 static constexpr const char value = 'R';
898 };
899
900 template <>
901 struct type_char<WriteTFInstr> {
902 static constexpr const char value = 'X';
903 };
904
905 template <>
906 struct type_char<GDSInstr> {
907 static constexpr const char value = 'S';
908 };
909
910 template <>
911 struct type_char<RatInstr> {
912 static constexpr const char value = 'I';
913 };
914
915
916 template <typename T>
collect_ready_type(std::list<T *>& ready, std::list<T *>& available)917 bool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available)
918 {
919 auto i = available.begin();
920 auto e = available.end();
921
922 int lookahead = 16;
923 while (i != e && ready.size() < 16 && lookahead-- > 0) {
924 if ((*i)->ready()) {
925 ready.push_back(*i);
926 auto old_i = i;
927 ++i;
928 available.erase(old_i);
929 } else
930 ++i;
931 }
932
933 for (auto& i: ready)
934 sfn_log << SfnLog::schedule << type_char<T>::value << "; " << *i << "\n";
935
936 return !ready.empty();
937 }
938
939 }
940