1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_scheduler.h"
28 #include "sfn_instr_alugroup.h"
29 #include "sfn_instr_controlflow.h"
30 #include "sfn_instr_export.h"
31 #include "sfn_instr_fetch.h"
32 #include "sfn_instr_mem.h"
33 #include "sfn_instr_lds.h"
34 #include "sfn_instr_tex.h"
35 #include "sfn_debug.h"
36 
37 #include <algorithm>
38 #include <sstream>
39 
40 namespace r600 {
41 
42 class CollectInstructions : public InstrVisitor {
43 
44 public:
CollectInstructions(ValueFactory& vf)45    CollectInstructions(ValueFactory& vf):
46       m_value_factory(vf)  {}
47 
48    void visit(AluInstr *instr) override {
49       if (instr->has_alu_flag(alu_is_trans))
50          alu_trans.push_back(instr);
51       else {
52          if (instr->alu_slots() == 1)
53             alu_vec.push_back(instr);
54          else
55             alu_groups.push_back(instr->split(m_value_factory));
56       }
57    }
58    void visit(AluGroup *instr) override {
59       alu_groups.push_back(instr);
60    }
61    void visit(TexInstr *instr) override {
62       tex.push_back(instr);
63    }
64    void visit(ExportInstr *instr) override {
65       exports.push_back(instr);
66    }
67    void visit(FetchInstr *instr)  override {
68       fetches.push_back(instr);
69    }
70    void visit(Block *instr) override {
71       for (auto& i: *instr)
72          i->accept(*this);
73    }
74 
75    void visit(ControlFlowInstr *instr) override {
76       assert(!m_cf_instr);
77       m_cf_instr = instr;
78    }
79 
80    void visit(IfInstr *instr) override {
81       assert(!m_cf_instr);
82       m_cf_instr = instr;
83    }
84 
85    void visit(EmitVertexInstr *instr) override {
86       assert(!m_cf_instr);
87       m_cf_instr = instr;
88    }
89 
90    void visit(ScratchIOInstr *instr) override {
91       mem_write_instr.push_back(instr);
92    }
93 
94    void visit(StreamOutInstr *instr) override {
95       mem_write_instr.push_back(instr);
96    }
97 
98    void visit(MemRingOutInstr *instr) override {
99       mem_ring_writes.push_back(instr);
100    }
101 
102    void visit(GDSInstr *instr) override {
103       gds_op.push_back(instr);
104    }
105 
106    void visit(WriteTFInstr *instr) override {
107       write_tf.push_back(instr);
108    }
109 
110    void visit(LDSReadInstr *instr) override {
111       std::vector<AluInstr*> buffer;
112       m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
113       for (auto& i: buffer) {
114          i->accept(*this);
115       }
116    }
117 
118    void visit(LDSAtomicInstr *instr) override {
119       std::vector<AluInstr*> buffer;
120       m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
121       for (auto& i: buffer) {
122          i->accept(*this);
123       }
124    }
125 
126    void visit(RatInstr *instr) override {
127       rat_instr.push_back(instr);
128    }
129 
130 
131    std::list<AluInstr *> alu_trans;
132    std::list<AluInstr *> alu_vec;
133    std::list<TexInstr *> tex;
134    std::list<AluGroup *> alu_groups;
135    std::list<ExportInstr *> exports;
136    std::list<FetchInstr *> fetches;
137    std::list<WriteOutInstr *> mem_write_instr;
138    std::list<MemRingOutInstr *> mem_ring_writes;
139    std::list<GDSInstr *> gds_op;
140    std::list<WriteTFInstr *> write_tf;
141    std::list<RatInstr *> rat_instr;
142 
143    Instr *m_cf_instr{nullptr};
144    ValueFactory& m_value_factory;
145 
146    AluInstr *m_last_lds_instr{nullptr};
147 };
148 
149 class BlockSheduler {
150 public:
151    BlockSheduler(r600_chip_class chip_class);
152    void run(Shader *shader);
153 
154    void finalize();
155 
156 private:
157 
158    void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf);
159 
160    bool collect_ready(CollectInstructions &available);
161 
162    template <typename T>
163    bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
164 
165    bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available);
166 
167    bool schedule_tex(Shader::ShaderBlocks& out_blocks);
168    bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
169 
170    template <typename I>
171    bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
172 
173    template <typename I>
174    bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
175 
176    bool schedule_alu(Shader::ShaderBlocks& out_blocks);
177    void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type);
178 
179    bool schedule_alu_to_group_vec(AluGroup *group);
180    bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist);
181 
182    bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list);
183 
184    template <typename I>
185    bool schedule(std::list<I *>& ready_list);
186 
187    template <typename I>
188    bool schedule_block(std::list<I *>& ready_list);
189 
190    std::list<AluInstr *> alu_vec_ready;
191    std::list<AluInstr *> alu_trans_ready;
192    std::list<AluGroup *> alu_groups_ready;
193    std::list<TexInstr *> tex_ready;
194    std::list<ExportInstr *> exports_ready;
195    std::list<FetchInstr *> fetches_ready;
196    std::list<WriteOutInstr *> memops_ready;
197    std::list<MemRingOutInstr *> mem_ring_writes_ready;
198    std::list<GDSInstr *> gds_ready;
199    std::list<WriteTFInstr *> write_tf_ready;
200    std::list<RatInstr *> rat_instr_ready;
201 
202    enum {
203       sched_alu,
204       sched_tex,
205       sched_fetch,
206       sched_free,
207       sched_mem_ring,
208       sched_gds,
209       sched_write_tf,
210       sched_rat,
211    } current_shed;
212 
213    ExportInstr *m_last_pos;
214    ExportInstr *m_last_pixel;
215    ExportInstr *m_last_param;
216 
217    Block *m_current_block;
218 
219    int m_lds_addr_count{0};
220    int m_alu_groups_schduled{0};
221    r600_chip_class m_chip_class;
222 
223 };
224 
schedule(Shader *original)225 Shader *schedule(Shader *original)
226 {
227    Block::set_chipclass(original->chip_class());
228    AluGroup::set_chipclass(original->chip_class());
229 
230    sfn_log << SfnLog::schedule << "Original shader\n";
231    if (sfn_log.has_debug_flag(SfnLog::schedule)) {
232       std::stringstream ss;
233       original->print(ss);
234       sfn_log << ss.str() << "\n\n";
235    }
236 
237    // TODO later it might be necessary to clone the shader
238    // to be able to re-start scheduling
239 
240    auto scheduled_shader = original;
241    BlockSheduler s(original->chip_class());
242    s.run(scheduled_shader);
243    s.finalize();
244 
245    sfn_log << SfnLog::schedule << "Scheduled shader\n";
246    if (sfn_log.has_debug_flag(SfnLog::schedule)) {
247       std::stringstream ss;
248       scheduled_shader->print(ss);
249       sfn_log << ss.str() << "\n\n";
250    }
251 
252    return scheduled_shader;
253 }
254 
BlockSheduler(r600_chip_class chip_class)255 BlockSheduler::BlockSheduler(r600_chip_class chip_class):
256    current_shed(sched_alu),
257    m_last_pos(nullptr),
258    m_last_pixel(nullptr),
259    m_last_param(nullptr),
260    m_current_block(nullptr),
261    m_chip_class(chip_class)
262 {
263 }
264 
run( Shader *shader)265 void BlockSheduler::run( Shader *shader)
266 {
267    Shader::ShaderBlocks scheduled_blocks;
268 
269    for (auto& block : shader->func()) {
270       sfn_log << SfnLog::schedule  << "Process block " << block->id() <<"\n";
271       if (sfn_log.has_debug_flag(SfnLog::schedule)) {
272          std::stringstream ss;
273          block->print(ss);
274          sfn_log << ss.str() << "\n";
275       }
276       schedule_block(*block, scheduled_blocks, shader->value_factory());
277    }
278 
279    shader->reset_function(scheduled_blocks);
280 }
281 
schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)282 void BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)
283 {
284 
285    assert(in_block.id() >= 0);
286 
287 
288    current_shed = sched_fetch;
289    auto last_shed = sched_fetch;
290 
291    CollectInstructions cir(vf);
292    in_block.accept(cir);
293 
294    bool have_instr = collect_ready(cir);
295 
296    m_current_block = new Block(in_block.nesting_depth(), in_block.id());
297    assert(m_current_block->id() >= 0);
298 
299    while (have_instr) {
300 
301       sfn_log << SfnLog::schedule << "Have ready instructions\n";
302 
303       if (alu_vec_ready.size())
304          sfn_log << SfnLog::schedule << "  ALU V:" << alu_vec_ready.size() << "\n";
305 
306       if (alu_trans_ready.size())
307          sfn_log << SfnLog::schedule <<  "  ALU T:" << alu_trans_ready.size() << "\n";
308 
309       if (alu_groups_ready.size())
310          sfn_log << SfnLog::schedule << "  ALU G:" << alu_groups_ready.size() << "\n";
311 
312       if (exports_ready.size())
313          sfn_log << SfnLog::schedule << "  EXP:" << exports_ready.size()
314                  << "\n";
315       if (tex_ready.size())
316          sfn_log << SfnLog::schedule << "  TEX:" << tex_ready.size()
317                  << "\n";
318       if (fetches_ready.size())
319          sfn_log << SfnLog::schedule << "  FETCH:" << fetches_ready.size()
320                  << "\n";
321       if (mem_ring_writes_ready.size())
322          sfn_log << SfnLog::schedule << "  MEM_RING:" << mem_ring_writes_ready.size()
323                  << "\n";
324       if (memops_ready.size())
325          sfn_log << SfnLog::schedule << "  MEM_OPS:" << mem_ring_writes_ready.size()
326                  << "\n";
327 
328       if (!m_current_block->lds_group_active()) {
329          if (last_shed != sched_free && memops_ready.size() > 8)
330             current_shed = sched_free;
331          else if (mem_ring_writes_ready.size() > 15)
332             current_shed = sched_mem_ring;
333          else if (rat_instr_ready.size() > 3)
334             current_shed = sched_rat;
335          else if (tex_ready.size() > 3)
336             current_shed = sched_tex;
337       }
338 
339       switch (current_shed) {
340       case sched_alu:
341          if (!schedule_alu(out_blocks)) {
342             assert(!m_current_block->lds_group_active());
343             current_shed = sched_tex;
344             continue;
345          }
346          last_shed = current_shed;
347          break;
348       case sched_tex:
349          if (tex_ready.empty() || !schedule_tex(out_blocks)) {
350             current_shed = sched_fetch;
351             continue;
352          }
353          last_shed = current_shed;
354          break;
355       case sched_fetch:
356          if (!fetches_ready.empty()) {
357             schedule_vtx(out_blocks);
358             last_shed = current_shed;
359          }
360          current_shed = sched_gds;
361          continue;
362       case sched_gds:
363          if (!gds_ready.empty()) {
364             schedule_gds(out_blocks, gds_ready);
365             last_shed = current_shed;
366          }
367          current_shed = sched_mem_ring;
368          continue;
369       case sched_mem_ring:
370          if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) {
371             current_shed = sched_write_tf;
372             continue;
373          }
374          last_shed = current_shed;
375          break;
376       case sched_write_tf:
377          if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) {
378             current_shed = sched_rat;
379             continue;
380          }
381          last_shed = current_shed;
382          break;
383       case sched_rat:
384          if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) {
385              current_shed = sched_free;
386              continue;
387           }
388          last_shed = current_shed;
389          break;
390       case sched_free:
391          if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) {
392             current_shed = sched_alu;
393             break;
394          }
395          last_shed = current_shed;
396       }
397 
398       have_instr = collect_ready(cir);
399    }
400 
401    /* Emit exports always at end of a block */
402    while (collect_ready_type(exports_ready, cir.exports))
403       schedule_exports(out_blocks, exports_ready);
404 
405    bool fail = false;
406 
407    if (!cir.alu_groups.empty()) {
408       std::cerr << "Unscheduled ALU groups:\n";
409       for (auto& a : cir.alu_groups) {
410           std::cerr << "   " << *a << "\n";
411       }
412       fail = true;
413    }
414 
415    if (!cir.alu_vec.empty()){
416       std::cerr << "Unscheduled ALU vec ops:\n";
417       for (auto& a : cir.alu_vec) {
418           std::cerr << "   " << *a << "\n";
419       }
420       fail = true;
421    }
422 
423    if (!cir.alu_trans.empty()){
424       std::cerr << "Unscheduled ALU trans ops:\n";
425       for (auto& a : cir.alu_trans) {
426           std::cerr << "   " << *a << "\n";
427       }
428       fail = true;
429    }
430    if (!cir.mem_write_instr.empty()){
431       std::cerr << "Unscheduled MEM ops:\n";
432       for (auto& a : cir.mem_write_instr) {
433           std::cerr << "   " << *a << "\n";
434       }
435       fail = true;
436    }
437 
438    if (!cir.fetches.empty()){
439       std::cerr << "Unscheduled Fetch ops:\n";
440       for (auto& a : cir.fetches) {
441           std::cerr << "   " << *a << "\n";
442       }
443       fail = true;
444    }
445 
446    if (!cir.tex.empty()){
447       std::cerr << "Unscheduled Tex ops:\n";
448       for (auto& a : cir.tex) {
449           std::cerr << "   " << *a << "\n";
450       }
451       fail = true;
452    }
453 
454    assert(cir.tex.empty());
455    assert(cir.exports.empty());
456    assert(cir.fetches.empty());
457    assert(cir.alu_vec.empty());
458    assert(cir.mem_write_instr.empty());
459    assert(cir.mem_ring_writes.empty());
460 
461    assert (!fail);
462 
463    if (cir.m_cf_instr) {
464       // Assert that if condition is ready
465       m_current_block->push_back(cir.m_cf_instr);
466       cir.m_cf_instr->set_scheduled();
467    }
468 
469    out_blocks.push_back(m_current_block);
470 }
471 
finalize()472 void BlockSheduler::finalize()
473 {
474    if (m_last_pos)
475       m_last_pos->set_is_last_export(true);
476    if (m_last_pixel)
477       m_last_pixel->set_is_last_export(true);
478    if (m_last_param)
479       m_last_param->set_is_last_export(true);
480 }
481 
schedule_alu(Shader::ShaderBlocks& out_blocks)482 bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
483 {
484    bool success = false;
485    AluGroup *group = nullptr;
486 
487    bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty();
488 
489    bool has_lds_ready = !alu_vec_ready.empty() &&
490                         (*alu_vec_ready.begin())->has_lds_access();
491 
492    /* If we have ready ALU instructions we have to start a new ALU block */
493    if (has_alu_ready ||  !alu_groups_ready.empty()) {
494       if (m_current_block->type() != Block::alu) {
495          start_new_block(out_blocks, Block::alu);
496          m_alu_groups_schduled = 0;
497       }
498    }
499 
500    /* Schedule groups first. unless we have a pending LDS instuction
501     * We don't want the LDS instructions to be too far apart because the
502     * fetch + read from queue has to be in the same ALU CF block */
503    if (!alu_groups_ready.empty() && !has_lds_ready) {
504       group = *alu_groups_ready.begin();
505       if (!m_current_block->try_reserve_kcache(*group)) {
506          start_new_block(out_blocks, Block::alu);
507          m_current_block->set_instr_flag(Instr::force_cf);
508       }
509 
510       if (!m_current_block->try_reserve_kcache(*group))
511          unreachable("Scheduling a group in a new block should always succeed");
512       alu_groups_ready.erase(alu_groups_ready.begin());
513       sfn_log << SfnLog::schedule << "Schedule ALU group\n";
514       success = true;
515    } else if (has_alu_ready) {
516       group = new AluGroup();
517       sfn_log << SfnLog::schedule << "START new ALU group\n";
518    } else {
519       return false;
520    }
521 
522    assert(group);
523 
524    int free_slots = group->free_slots();
525 
526    while (free_slots && has_alu_ready) {
527       if (!alu_vec_ready.empty())
528          success |= schedule_alu_to_group_vec(group);
529 
530       /* Apparently one can't schedule a t-slot if there is already
531        * and LDS instruction scheduled.
532        * TODO: check whether this is only relevant for actual LDS instructions
533        * or also for instructions that read from the LDS return value queue */
534 
535       if (free_slots & 0x10 && !has_lds_ready) {
536          sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
537          if (!alu_trans_ready.empty())
538             success |= schedule_alu_to_group_trans(group, alu_trans_ready);
539          if (!alu_vec_ready.empty())
540             success |= schedule_alu_to_group_trans(group, alu_vec_ready);
541       }
542 
543       if (success) {
544          ++m_alu_groups_schduled;
545          break;
546       } else if (m_current_block->kcache_reservation_failed()) {
547          // LDS read groups should not lead to impossible
548          // kcache constellations
549          assert(!m_current_block->lds_group_active());
550 
551          // kcache reservation failed, so we have to start a new CF
552          start_new_block(out_blocks, Block::alu);
553          m_current_block->set_instr_flag(Instr::force_cf);
554       } else {
555          return false;
556       }
557    }
558 
559    sfn_log << SfnLog::schedule << "Finalize ALU group\n";
560    group->set_scheduled();
561    group->fix_last_flag();
562    group->set_nesting_depth(m_current_block->nesting_depth());
563    m_current_block->push_back(group);
564 
565    if (group->has_lds_group_start())
566       m_current_block->lds_group_start(*group->begin());
567 
568    if (group->has_lds_group_end())
569       m_current_block->lds_group_end();
570 
571    if (group->has_kill_op()) {
572       assert(!group->has_lds_group_start());
573       start_new_block(out_blocks, Block::alu);
574       m_current_block->set_instr_flag(Instr::force_cf);
575    }
576 
577 
578    return success;
579 }
580 
schedule_tex(Shader::ShaderBlocks& out_blocks)581 bool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
582 {
583    if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() ==  0) {
584       start_new_block(out_blocks, Block::tex);
585       m_current_block->set_instr_flag(Instr::force_cf);
586    }
587 
588 
589    if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) {
590       auto ii = tex_ready.begin();
591       sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
592 
593       if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
594          start_new_block(out_blocks, Block::tex);
595 
596       for (auto prep : (*ii)->prepare_instr()) {
597          prep->set_scheduled();
598          m_current_block->push_back(prep);
599       }
600 
601       (*ii)->set_scheduled();
602       m_current_block->push_back(*ii);
603       tex_ready.erase(ii);
604       return true;
605    }
606    return false;
607 }
608 
schedule_vtx(Shader::ShaderBlocks& out_blocks)609 bool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks)
610 {
611    if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) {
612       start_new_block(out_blocks, Block::vtx);
613       m_current_block->set_instr_flag(Instr::force_cf);
614    }
615    return schedule_block(fetches_ready);
616 }
617 
618 template <typename I>
schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)619 bool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
620 {
621    bool was_full = m_current_block->remaining_slots() == 0;
622    if (m_current_block->type() != Block::gds || was_full) {
623       start_new_block(out_blocks, Block::gds);
624       if (was_full)
625          m_current_block->set_instr_flag(Instr::force_cf);
626    }
627    return schedule_block(ready_list);
628 }
629 
630 
start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)631 void BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)
632 {
633    if (!m_current_block->empty()) {
634       sfn_log << SfnLog::schedule << "Start new block\n";
635       assert(!m_current_block->lds_group_active());
636       out_blocks.push_back(m_current_block);
637       m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id());
638    }
639    m_current_block->set_type(type);
640 }
641 
642 template <typename I>
schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)643 bool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
644 {
645    if (ready_list.empty())
646       return false;
647    if (m_current_block->type() != Block::cf)
648       start_new_block(out_blocks, Block::cf);
649    return schedule(ready_list);
650 }
651 
652 
schedule_alu_to_group_vec(AluGroup *group)653 bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
654 {
655    assert(group);
656    assert(!alu_vec_ready.empty());
657 
658    bool success =  false;
659    auto i = alu_vec_ready.begin();
660    auto e = alu_vec_ready.end();
661    while (i != e) {
662       sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
663 
664       if (!m_current_block->try_reserve_kcache(**i)) {
665            sfn_log << SfnLog::schedule << " failed (kcache)\n";
666          ++i;
667          continue;
668       }
669 
670       if (group->add_vec_instructions(*i)) {
671          auto old_i = i;
672          ++i;
673          if ((*old_i)->has_alu_flag(alu_is_lds)) {
674             --m_lds_addr_count;
675          }
676 
677          alu_vec_ready.erase(old_i);
678          success = true;
679          sfn_log << SfnLog::schedule << " success\n";
680       } else {
681          ++i;
682          sfn_log << SfnLog::schedule << " failed\n";
683       }
684    }
685    return success;
686 }
687 
schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)688 bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)
689 {
690    assert(group);
691 
692    bool success =  false;
693    auto i = readylist.begin();
694    auto e = readylist.end();
695    while (i != e) {
696       sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
697       if (!m_current_block->try_reserve_kcache(**i)) {
698            sfn_log << SfnLog::schedule << " failed (kcache)\n";
699          ++i;
700          continue;
701       }
702 
703       if (group->add_trans_instructions(*i)) {
704          auto old_i = i;
705          ++i;
706          readylist.erase(old_i);
707          success = true;
708          sfn_log << SfnLog::schedule << " sucess\n";
709          break;
710       } else {
711          ++i;
712          sfn_log << SfnLog::schedule << " failed\n";
713       }
714    }
715    return success;
716 }
717 
718 template <typename I>
schedule(std::list<I *>& ready_list)719 bool BlockSheduler::schedule(std::list<I *>& ready_list)
720 {
721    if (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
722       auto ii = ready_list.begin();
723       sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
724       (*ii)->set_scheduled();
725       m_current_block->push_back(*ii);
726       ready_list.erase(ii);
727       return true;
728    }
729    return false;
730 }
731 
732 template <typename I>
schedule_block(std::list<I *>& ready_list)733 bool BlockSheduler::schedule_block(std::list<I *>& ready_list)
734 {
735    bool success = false;
736    while (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
737       auto ii = ready_list.begin();
738       sfn_log << SfnLog::schedule << "Schedule: " << **ii << " "
739               << m_current_block->remaining_slots() << "\n";
740       (*ii)->set_scheduled();
741       m_current_block->push_back(*ii);
742       ready_list.erase(ii);
743       success = true;
744    }
745    return success;
746 }
747 
748 
schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)749 bool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)
750 {
751    if (m_current_block->type() != Block::cf)
752       start_new_block(out_blocks, Block::cf);
753 
754    if (!ready_list.empty()) {
755       auto ii = ready_list.begin();
756       sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
757       (*ii)->set_scheduled();
758       m_current_block->push_back(*ii);
759       switch ((*ii)->export_type()) {
760       case ExportInstr::pos: m_last_pos = *ii; break;
761       case ExportInstr::param: m_last_param = *ii; break;
762       case ExportInstr::pixel: m_last_pixel = *ii; break;
763       }
764       (*ii)->set_is_last_export(false);
765       ready_list.erase(ii);
766       return true;
767    }
768    return false;
769 }
770 
collect_ready(CollectInstructions &available)771 bool BlockSheduler::collect_ready(CollectInstructions &available)
772 {
773    sfn_log << SfnLog::schedule << "Ready instructions\n";
774    bool result = false;
775    result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
776    result |= collect_ready_type(alu_trans_ready, available.alu_trans);
777    result |= collect_ready_type(alu_groups_ready, available.alu_groups);
778    result |= collect_ready_type(gds_ready, available.gds_op);
779    result |= collect_ready_type(tex_ready, available.tex);
780    result |= collect_ready_type(fetches_ready, available.fetches);
781    result |= collect_ready_type(memops_ready, available.mem_write_instr);
782    result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes);
783    result |= collect_ready_type(write_tf_ready, available.write_tf);
784    result |= collect_ready_type(rat_instr_ready, available.rat_instr);
785 
786    sfn_log << SfnLog::schedule << "\n";
787    return result;
788 }
789 
collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)790 bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)
791 {
792    auto i = available.begin();
793    auto e = available.end();
794 
795    for (auto alu : ready) {
796       alu->add_priority(100 * alu->register_priority());
797    }
798 
799    int max_check = 0;
800    while (i != e && max_check++ < 32) {
801       if (ready.size() < 32 && (*i)->ready()) {
802 
803          int priority = 0;
804          /* LDS fetches that use static offsets are usually ready ery fast,
805           * so that they would get schedules early, and this leaves the problem
806           * that we allocate too many registers with just constant values,
807           * and this will make problems wih RA. So limit the number of LDS
808           * address registers.
809           */
810          if ((*i)->has_alu_flag(alu_lds_address)) {
811             if (m_lds_addr_count > 64) {
812                ++i;
813                continue;
814             } else {
815                ++m_lds_addr_count;
816             }
817          }
818 
819          /* LDS instructions are scheduled with high priority.
820           * instractions that can go into the t slot and don't have
821           * indirect access are put in last, so that they don't block
822           * vec-only instructions when scheduling to the vector slots
823           * for everything else we look at the register use */
824 
825          if ((*i)->has_lds_access())
826              priority = 100000;
827          else if (AluGroup::has_t()) {
828             auto opinfo = alu_ops.find((*i)->opcode());
829             assert(opinfo != alu_ops.end());
830             if (opinfo->second.can_channel(AluOp::t, m_chip_class) &&
831                 !std::get<0>((*i)->indirect_addr()))
832                priority = -1;
833          }
834 
835          priority += 100 * (*i)->register_priority();
836 
837          (*i)->add_priority(priority);
838          ready.push_back(*i);
839 
840          auto old_i = i;
841          ++i;
842          available.erase(old_i);
843       } else
844          ++i;
845    }
846 
847    for (auto& i: ready)
848       sfn_log << SfnLog::schedule << "V:  " << *i << "\n";
849 
850    ready.sort([](const AluInstr *lhs, const AluInstr *rhs) {
851                  return lhs->priority() > rhs->priority();});
852 
853    for (auto& i: ready)
854       sfn_log << SfnLog::schedule << "V (S):  " << *i << "\n";
855 
856    return !ready.empty();
857 }
858 
859 template <typename T>
860 struct type_char {
861 
862 };
863 
864 
865 template <>
866 struct type_char<AluInstr> {
867    static constexpr const char value = 'A';
868 };
869 
870 template <>
871 struct type_char<AluGroup>  {
872    static constexpr const char value = 'G';
873 };
874 
875 template <>
876 struct type_char<ExportInstr>  {
877    static constexpr const char value = 'E';
878 };
879 
880 template <>
881 struct type_char<TexInstr>  {
882    static constexpr const char value = 'T';
883 };
884 
885 template <>
886 struct type_char<FetchInstr>  {
887    static constexpr const char value = 'F';
888 };
889 
890 template <>
891 struct type_char<WriteOutInstr>  {
892    static constexpr const char value = 'M';
893 };
894 
895 template <>
896 struct type_char<MemRingOutInstr>  {
897    static constexpr const char value = 'R';
898 };
899 
900 template <>
901 struct type_char<WriteTFInstr>  {
902    static constexpr const char value = 'X';
903 };
904 
905 template <>
906 struct type_char<GDSInstr>  {
907    static constexpr const char value = 'S';
908 };
909 
910 template <>
911 struct type_char<RatInstr>  {
912    static constexpr const char value = 'I';
913 };
914 
915 
916 template <typename T>
collect_ready_type(std::list<T *>& ready, std::list<T *>& available)917 bool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available)
918 {
919    auto i = available.begin();
920    auto e = available.end();
921 
922    int lookahead = 16;
923    while (i != e && ready.size() < 16 && lookahead-- > 0) {
924       if ((*i)->ready()) {
925          ready.push_back(*i);
926          auto old_i = i;
927          ++i;
928          available.erase(old_i);
929       } else
930          ++i;
931    }
932 
933    for (auto& i: ready)
934       sfn_log << SfnLog::schedule << type_char<T>::value << ";  " << *i << "\n";
935 
936    return !ready.empty();
937 }
938 
939 }
940