1/* -*- mesa-c++  -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27#include "sfn_scheduler.h"
28#include "sfn_instr_alugroup.h"
29#include "sfn_instr_controlflow.h"
30#include "sfn_instr_export.h"
31#include "sfn_instr_fetch.h"
32#include "sfn_instr_mem.h"
33#include "sfn_instr_lds.h"
34#include "sfn_instr_tex.h"
35#include "sfn_debug.h"
36
37#include <algorithm>
38#include <sstream>
39
40namespace r600 {
41
42class CollectInstructions : public InstrVisitor {
43
44public:
45   CollectInstructions(ValueFactory& vf):
46      m_value_factory(vf)  {}
47
48   void visit(AluInstr *instr) override {
49      if (instr->has_alu_flag(alu_is_trans))
50         alu_trans.push_back(instr);
51      else {
52         if (instr->alu_slots() == 1)
53            alu_vec.push_back(instr);
54         else
55            alu_groups.push_back(instr->split(m_value_factory));
56      }
57   }
58   void visit(AluGroup *instr) override {
59      alu_groups.push_back(instr);
60   }
61   void visit(TexInstr *instr) override {
62      tex.push_back(instr);
63   }
64   void visit(ExportInstr *instr) override {
65      exports.push_back(instr);
66   }
67   void visit(FetchInstr *instr)  override {
68      fetches.push_back(instr);
69   }
70   void visit(Block *instr) override {
71      for (auto& i: *instr)
72         i->accept(*this);
73   }
74
75   void visit(ControlFlowInstr *instr) override {
76      assert(!m_cf_instr);
77      m_cf_instr = instr;
78   }
79
80   void visit(IfInstr *instr) override {
81      assert(!m_cf_instr);
82      m_cf_instr = instr;
83   }
84
85   void visit(EmitVertexInstr *instr) override {
86      assert(!m_cf_instr);
87      m_cf_instr = instr;
88   }
89
90   void visit(ScratchIOInstr *instr) override {
91      mem_write_instr.push_back(instr);
92   }
93
94   void visit(StreamOutInstr *instr) override {
95      mem_write_instr.push_back(instr);
96   }
97
98   void visit(MemRingOutInstr *instr) override {
99      mem_ring_writes.push_back(instr);
100   }
101
102   void visit(GDSInstr *instr) override {
103      gds_op.push_back(instr);
104   }
105
106   void visit(WriteTFInstr *instr) override {
107      write_tf.push_back(instr);
108   }
109
110   void visit(LDSReadInstr *instr) override {
111      std::vector<AluInstr*> buffer;
112      m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
113      for (auto& i: buffer) {
114         i->accept(*this);
115      }
116   }
117
118   void visit(LDSAtomicInstr *instr) override {
119      std::vector<AluInstr*> buffer;
120      m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
121      for (auto& i: buffer) {
122         i->accept(*this);
123      }
124   }
125
126   void visit(RatInstr *instr) override {
127      rat_instr.push_back(instr);
128   }
129
130
131   std::list<AluInstr *> alu_trans;
132   std::list<AluInstr *> alu_vec;
133   std::list<TexInstr *> tex;
134   std::list<AluGroup *> alu_groups;
135   std::list<ExportInstr *> exports;
136   std::list<FetchInstr *> fetches;
137   std::list<WriteOutInstr *> mem_write_instr;
138   std::list<MemRingOutInstr *> mem_ring_writes;
139   std::list<GDSInstr *> gds_op;
140   std::list<WriteTFInstr *> write_tf;
141   std::list<RatInstr *> rat_instr;
142
143   Instr *m_cf_instr{nullptr};
144   ValueFactory& m_value_factory;
145
146   AluInstr *m_last_lds_instr{nullptr};
147};
148
149class BlockSheduler {
150public:
151   BlockSheduler(r600_chip_class chip_class);
152   void run(Shader *shader);
153
154   void finalize();
155
156private:
157
158   void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf);
159
160   bool collect_ready(CollectInstructions &available);
161
162   template <typename T>
163   bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
164
165   bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available);
166
167   bool schedule_tex(Shader::ShaderBlocks& out_blocks);
168   bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
169
170   template <typename I>
171   bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
172
173   template <typename I>
174   bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
175
176   bool schedule_alu(Shader::ShaderBlocks& out_blocks);
177   void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type);
178
179   bool schedule_alu_to_group_vec(AluGroup *group);
180   bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist);
181
182   bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list);
183
184   template <typename I>
185   bool schedule(std::list<I *>& ready_list);
186
187   template <typename I>
188   bool schedule_block(std::list<I *>& ready_list);
189
190   std::list<AluInstr *> alu_vec_ready;
191   std::list<AluInstr *> alu_trans_ready;
192   std::list<AluGroup *> alu_groups_ready;
193   std::list<TexInstr *> tex_ready;
194   std::list<ExportInstr *> exports_ready;
195   std::list<FetchInstr *> fetches_ready;
196   std::list<WriteOutInstr *> memops_ready;
197   std::list<MemRingOutInstr *> mem_ring_writes_ready;
198   std::list<GDSInstr *> gds_ready;
199   std::list<WriteTFInstr *> write_tf_ready;
200   std::list<RatInstr *> rat_instr_ready;
201
202   enum {
203      sched_alu,
204      sched_tex,
205      sched_fetch,
206      sched_free,
207      sched_mem_ring,
208      sched_gds,
209      sched_write_tf,
210      sched_rat,
211   } current_shed;
212
213   ExportInstr *m_last_pos;
214   ExportInstr *m_last_pixel;
215   ExportInstr *m_last_param;
216
217   Block *m_current_block;
218
219   int m_lds_addr_count{0};
220   int m_alu_groups_schduled{0};
221   r600_chip_class m_chip_class;
222
223};
224
225Shader *schedule(Shader *original)
226{
227   Block::set_chipclass(original->chip_class());
228   AluGroup::set_chipclass(original->chip_class());
229
230   sfn_log << SfnLog::schedule << "Original shader\n";
231   if (sfn_log.has_debug_flag(SfnLog::schedule)) {
232      std::stringstream ss;
233      original->print(ss);
234      sfn_log << ss.str() << "\n\n";
235   }
236
237   // TODO later it might be necessary to clone the shader
238   // to be able to re-start scheduling
239
240   auto scheduled_shader = original;
241   BlockSheduler s(original->chip_class());
242   s.run(scheduled_shader);
243   s.finalize();
244
245   sfn_log << SfnLog::schedule << "Scheduled shader\n";
246   if (sfn_log.has_debug_flag(SfnLog::schedule)) {
247      std::stringstream ss;
248      scheduled_shader->print(ss);
249      sfn_log << ss.str() << "\n\n";
250   }
251
252   return scheduled_shader;
253}
254
255BlockSheduler::BlockSheduler(r600_chip_class chip_class):
256   current_shed(sched_alu),
257   m_last_pos(nullptr),
258   m_last_pixel(nullptr),
259   m_last_param(nullptr),
260   m_current_block(nullptr),
261   m_chip_class(chip_class)
262{
263}
264
265void BlockSheduler::run( Shader *shader)
266{
267   Shader::ShaderBlocks scheduled_blocks;
268
269   for (auto& block : shader->func()) {
270      sfn_log << SfnLog::schedule  << "Process block " << block->id() <<"\n";
271      if (sfn_log.has_debug_flag(SfnLog::schedule)) {
272         std::stringstream ss;
273         block->print(ss);
274         sfn_log << ss.str() << "\n";
275      }
276      schedule_block(*block, scheduled_blocks, shader->value_factory());
277   }
278
279   shader->reset_function(scheduled_blocks);
280}
281
282void BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)
283{
284
285   assert(in_block.id() >= 0);
286
287
288   current_shed = sched_fetch;
289   auto last_shed = sched_fetch;
290
291   CollectInstructions cir(vf);
292   in_block.accept(cir);
293
294   bool have_instr = collect_ready(cir);
295
296   m_current_block = new Block(in_block.nesting_depth(), in_block.id());
297   assert(m_current_block->id() >= 0);
298
299   while (have_instr) {
300
301      sfn_log << SfnLog::schedule << "Have ready instructions\n";
302
303      if (alu_vec_ready.size())
304         sfn_log << SfnLog::schedule << "  ALU V:" << alu_vec_ready.size() << "\n";
305
306      if (alu_trans_ready.size())
307         sfn_log << SfnLog::schedule <<  "  ALU T:" << alu_trans_ready.size() << "\n";
308
309      if (alu_groups_ready.size())
310         sfn_log << SfnLog::schedule << "  ALU G:" << alu_groups_ready.size() << "\n";
311
312      if (exports_ready.size())
313         sfn_log << SfnLog::schedule << "  EXP:" << exports_ready.size()
314                 << "\n";
315      if (tex_ready.size())
316         sfn_log << SfnLog::schedule << "  TEX:" << tex_ready.size()
317                 << "\n";
318      if (fetches_ready.size())
319         sfn_log << SfnLog::schedule << "  FETCH:" << fetches_ready.size()
320                 << "\n";
321      if (mem_ring_writes_ready.size())
322         sfn_log << SfnLog::schedule << "  MEM_RING:" << mem_ring_writes_ready.size()
323                 << "\n";
324      if (memops_ready.size())
325         sfn_log << SfnLog::schedule << "  MEM_OPS:" << mem_ring_writes_ready.size()
326                 << "\n";
327
328      if (!m_current_block->lds_group_active()) {
329         if (last_shed != sched_free && memops_ready.size() > 8)
330            current_shed = sched_free;
331         else if (mem_ring_writes_ready.size() > 15)
332            current_shed = sched_mem_ring;
333         else if (rat_instr_ready.size() > 3)
334            current_shed = sched_rat;
335         else if (tex_ready.size() > 3)
336            current_shed = sched_tex;
337      }
338
339      switch (current_shed) {
340      case sched_alu:
341         if (!schedule_alu(out_blocks)) {
342            assert(!m_current_block->lds_group_active());
343            current_shed = sched_tex;
344            continue;
345         }
346         last_shed = current_shed;
347         break;
348      case sched_tex:
349         if (tex_ready.empty() || !schedule_tex(out_blocks)) {
350            current_shed = sched_fetch;
351            continue;
352         }
353         last_shed = current_shed;
354         break;
355      case sched_fetch:
356         if (!fetches_ready.empty()) {
357            schedule_vtx(out_blocks);
358            last_shed = current_shed;
359         }
360         current_shed = sched_gds;
361         continue;
362      case sched_gds:
363         if (!gds_ready.empty()) {
364            schedule_gds(out_blocks, gds_ready);
365            last_shed = current_shed;
366         }
367         current_shed = sched_mem_ring;
368         continue;
369      case sched_mem_ring:
370         if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) {
371            current_shed = sched_write_tf;
372            continue;
373         }
374         last_shed = current_shed;
375         break;
376      case sched_write_tf:
377         if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) {
378            current_shed = sched_rat;
379            continue;
380         }
381         last_shed = current_shed;
382         break;
383      case sched_rat:
384         if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) {
385             current_shed = sched_free;
386             continue;
387          }
388         last_shed = current_shed;
389         break;
390      case sched_free:
391         if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) {
392            current_shed = sched_alu;
393            break;
394         }
395         last_shed = current_shed;
396      }
397
398      have_instr = collect_ready(cir);
399   }
400
401   /* Emit exports always at end of a block */
402   while (collect_ready_type(exports_ready, cir.exports))
403      schedule_exports(out_blocks, exports_ready);
404
405   bool fail = false;
406
407   if (!cir.alu_groups.empty()) {
408      std::cerr << "Unscheduled ALU groups:\n";
409      for (auto& a : cir.alu_groups) {
410          std::cerr << "   " << *a << "\n";
411      }
412      fail = true;
413   }
414
415   if (!cir.alu_vec.empty()){
416      std::cerr << "Unscheduled ALU vec ops:\n";
417      for (auto& a : cir.alu_vec) {
418          std::cerr << "   " << *a << "\n";
419      }
420      fail = true;
421   }
422
423   if (!cir.alu_trans.empty()){
424      std::cerr << "Unscheduled ALU trans ops:\n";
425      for (auto& a : cir.alu_trans) {
426          std::cerr << "   " << *a << "\n";
427      }
428      fail = true;
429   }
430   if (!cir.mem_write_instr.empty()){
431      std::cerr << "Unscheduled MEM ops:\n";
432      for (auto& a : cir.mem_write_instr) {
433          std::cerr << "   " << *a << "\n";
434      }
435      fail = true;
436   }
437
438   if (!cir.fetches.empty()){
439      std::cerr << "Unscheduled Fetch ops:\n";
440      for (auto& a : cir.fetches) {
441          std::cerr << "   " << *a << "\n";
442      }
443      fail = true;
444   }
445
446   if (!cir.tex.empty()){
447      std::cerr << "Unscheduled Tex ops:\n";
448      for (auto& a : cir.tex) {
449          std::cerr << "   " << *a << "\n";
450      }
451      fail = true;
452   }
453
454   assert(cir.tex.empty());
455   assert(cir.exports.empty());
456   assert(cir.fetches.empty());
457   assert(cir.alu_vec.empty());
458   assert(cir.mem_write_instr.empty());
459   assert(cir.mem_ring_writes.empty());
460
461   assert (!fail);
462
463   if (cir.m_cf_instr) {
464      // Assert that if condition is ready
465      m_current_block->push_back(cir.m_cf_instr);
466      cir.m_cf_instr->set_scheduled();
467   }
468
469   out_blocks.push_back(m_current_block);
470}
471
472void BlockSheduler::finalize()
473{
474   if (m_last_pos)
475      m_last_pos->set_is_last_export(true);
476   if (m_last_pixel)
477      m_last_pixel->set_is_last_export(true);
478   if (m_last_param)
479      m_last_param->set_is_last_export(true);
480}
481
482bool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
483{
484   bool success = false;
485   AluGroup *group = nullptr;
486
487   bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty();
488
489   bool has_lds_ready = !alu_vec_ready.empty() &&
490                        (*alu_vec_ready.begin())->has_lds_access();
491
492   /* If we have ready ALU instructions we have to start a new ALU block */
493   if (has_alu_ready ||  !alu_groups_ready.empty()) {
494      if (m_current_block->type() != Block::alu) {
495         start_new_block(out_blocks, Block::alu);
496         m_alu_groups_schduled = 0;
497      }
498   }
499
500   /* Schedule groups first. unless we have a pending LDS instuction
501    * We don't want the LDS instructions to be too far apart because the
502    * fetch + read from queue has to be in the same ALU CF block */
503   if (!alu_groups_ready.empty() && !has_lds_ready) {
504      group = *alu_groups_ready.begin();
505      if (!m_current_block->try_reserve_kcache(*group)) {
506         start_new_block(out_blocks, Block::alu);
507         m_current_block->set_instr_flag(Instr::force_cf);
508      }
509
510      if (!m_current_block->try_reserve_kcache(*group))
511         unreachable("Scheduling a group in a new block should always succeed");
512      alu_groups_ready.erase(alu_groups_ready.begin());
513      sfn_log << SfnLog::schedule << "Schedule ALU group\n";
514      success = true;
515   } else if (has_alu_ready) {
516      group = new AluGroup();
517      sfn_log << SfnLog::schedule << "START new ALU group\n";
518   } else {
519      return false;
520   }
521
522   assert(group);
523
524   int free_slots = group->free_slots();
525
526   while (free_slots && has_alu_ready) {
527      if (!alu_vec_ready.empty())
528         success |= schedule_alu_to_group_vec(group);
529
530      /* Apparently one can't schedule a t-slot if there is already
531       * and LDS instruction scheduled.
532       * TODO: check whether this is only relevant for actual LDS instructions
533       * or also for instructions that read from the LDS return value queue */
534
535      if (free_slots & 0x10 && !has_lds_ready) {
536         sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
537         if (!alu_trans_ready.empty())
538            success |= schedule_alu_to_group_trans(group, alu_trans_ready);
539         if (!alu_vec_ready.empty())
540            success |= schedule_alu_to_group_trans(group, alu_vec_ready);
541      }
542
543      if (success) {
544         ++m_alu_groups_schduled;
545         break;
546      } else if (m_current_block->kcache_reservation_failed()) {
547         // LDS read groups should not lead to impossible
548         // kcache constellations
549         assert(!m_current_block->lds_group_active());
550
551         // kcache reservation failed, so we have to start a new CF
552         start_new_block(out_blocks, Block::alu);
553         m_current_block->set_instr_flag(Instr::force_cf);
554      } else {
555         return false;
556      }
557   }
558
559   sfn_log << SfnLog::schedule << "Finalize ALU group\n";
560   group->set_scheduled();
561   group->fix_last_flag();
562   group->set_nesting_depth(m_current_block->nesting_depth());
563   m_current_block->push_back(group);
564
565   if (group->has_lds_group_start())
566      m_current_block->lds_group_start(*group->begin());
567
568   if (group->has_lds_group_end())
569      m_current_block->lds_group_end();
570
571   if (group->has_kill_op()) {
572      assert(!group->has_lds_group_start());
573      start_new_block(out_blocks, Block::alu);
574      m_current_block->set_instr_flag(Instr::force_cf);
575   }
576
577
578   return success;
579}
580
581bool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
582{
583   if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() ==  0) {
584      start_new_block(out_blocks, Block::tex);
585      m_current_block->set_instr_flag(Instr::force_cf);
586   }
587
588
589   if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) {
590      auto ii = tex_ready.begin();
591      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
592
593      if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
594         start_new_block(out_blocks, Block::tex);
595
596      for (auto prep : (*ii)->prepare_instr()) {
597         prep->set_scheduled();
598         m_current_block->push_back(prep);
599      }
600
601      (*ii)->set_scheduled();
602      m_current_block->push_back(*ii);
603      tex_ready.erase(ii);
604      return true;
605   }
606   return false;
607}
608
609bool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks)
610{
611   if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) {
612      start_new_block(out_blocks, Block::vtx);
613      m_current_block->set_instr_flag(Instr::force_cf);
614   }
615   return schedule_block(fetches_ready);
616}
617
618template <typename I>
619bool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
620{
621   bool was_full = m_current_block->remaining_slots() == 0;
622   if (m_current_block->type() != Block::gds || was_full) {
623      start_new_block(out_blocks, Block::gds);
624      if (was_full)
625         m_current_block->set_instr_flag(Instr::force_cf);
626   }
627   return schedule_block(ready_list);
628}
629
630
631void BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)
632{
633   if (!m_current_block->empty()) {
634      sfn_log << SfnLog::schedule << "Start new block\n";
635      assert(!m_current_block->lds_group_active());
636      out_blocks.push_back(m_current_block);
637      m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id());
638   }
639   m_current_block->set_type(type);
640}
641
642template <typename I>
643bool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
644{
645   if (ready_list.empty())
646      return false;
647   if (m_current_block->type() != Block::cf)
648      start_new_block(out_blocks, Block::cf);
649   return schedule(ready_list);
650}
651
652
653bool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
654{
655   assert(group);
656   assert(!alu_vec_ready.empty());
657
658   bool success =  false;
659   auto i = alu_vec_ready.begin();
660   auto e = alu_vec_ready.end();
661   while (i != e) {
662      sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
663
664      if (!m_current_block->try_reserve_kcache(**i)) {
665           sfn_log << SfnLog::schedule << " failed (kcache)\n";
666         ++i;
667         continue;
668      }
669
670      if (group->add_vec_instructions(*i)) {
671         auto old_i = i;
672         ++i;
673         if ((*old_i)->has_alu_flag(alu_is_lds)) {
674            --m_lds_addr_count;
675         }
676
677         alu_vec_ready.erase(old_i);
678         success = true;
679         sfn_log << SfnLog::schedule << " success\n";
680      } else {
681         ++i;
682         sfn_log << SfnLog::schedule << " failed\n";
683      }
684   }
685   return success;
686}
687
688bool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)
689{
690   assert(group);
691
692   bool success =  false;
693   auto i = readylist.begin();
694   auto e = readylist.end();
695   while (i != e) {
696      sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
697      if (!m_current_block->try_reserve_kcache(**i)) {
698           sfn_log << SfnLog::schedule << " failed (kcache)\n";
699         ++i;
700         continue;
701      }
702
703      if (group->add_trans_instructions(*i)) {
704         auto old_i = i;
705         ++i;
706         readylist.erase(old_i);
707         success = true;
708         sfn_log << SfnLog::schedule << " sucess\n";
709         break;
710      } else {
711         ++i;
712         sfn_log << SfnLog::schedule << " failed\n";
713      }
714   }
715   return success;
716}
717
718template <typename I>
719bool BlockSheduler::schedule(std::list<I *>& ready_list)
720{
721   if (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
722      auto ii = ready_list.begin();
723      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
724      (*ii)->set_scheduled();
725      m_current_block->push_back(*ii);
726      ready_list.erase(ii);
727      return true;
728   }
729   return false;
730}
731
732template <typename I>
733bool BlockSheduler::schedule_block(std::list<I *>& ready_list)
734{
735   bool success = false;
736   while (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
737      auto ii = ready_list.begin();
738      sfn_log << SfnLog::schedule << "Schedule: " << **ii << " "
739              << m_current_block->remaining_slots() << "\n";
740      (*ii)->set_scheduled();
741      m_current_block->push_back(*ii);
742      ready_list.erase(ii);
743      success = true;
744   }
745   return success;
746}
747
748
749bool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)
750{
751   if (m_current_block->type() != Block::cf)
752      start_new_block(out_blocks, Block::cf);
753
754   if (!ready_list.empty()) {
755      auto ii = ready_list.begin();
756      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
757      (*ii)->set_scheduled();
758      m_current_block->push_back(*ii);
759      switch ((*ii)->export_type()) {
760      case ExportInstr::pos: m_last_pos = *ii; break;
761      case ExportInstr::param: m_last_param = *ii; break;
762      case ExportInstr::pixel: m_last_pixel = *ii; break;
763      }
764      (*ii)->set_is_last_export(false);
765      ready_list.erase(ii);
766      return true;
767   }
768   return false;
769}
770
771bool BlockSheduler::collect_ready(CollectInstructions &available)
772{
773   sfn_log << SfnLog::schedule << "Ready instructions\n";
774   bool result = false;
775   result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
776   result |= collect_ready_type(alu_trans_ready, available.alu_trans);
777   result |= collect_ready_type(alu_groups_ready, available.alu_groups);
778   result |= collect_ready_type(gds_ready, available.gds_op);
779   result |= collect_ready_type(tex_ready, available.tex);
780   result |= collect_ready_type(fetches_ready, available.fetches);
781   result |= collect_ready_type(memops_ready, available.mem_write_instr);
782   result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes);
783   result |= collect_ready_type(write_tf_ready, available.write_tf);
784   result |= collect_ready_type(rat_instr_ready, available.rat_instr);
785
786   sfn_log << SfnLog::schedule << "\n";
787   return result;
788}
789
790bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)
791{
792   auto i = available.begin();
793   auto e = available.end();
794
795   for (auto alu : ready) {
796      alu->add_priority(100 * alu->register_priority());
797   }
798
799   int max_check = 0;
800   while (i != e && max_check++ < 32) {
801      if (ready.size() < 32 && (*i)->ready()) {
802
803         int priority = 0;
804         /* LDS fetches that use static offsets are usually ready ery fast,
805          * so that they would get schedules early, and this leaves the problem
806          * that we allocate too many registers with just constant values,
807          * and this will make problems wih RA. So limit the number of LDS
808          * address registers.
809          */
810         if ((*i)->has_alu_flag(alu_lds_address)) {
811            if (m_lds_addr_count > 64) {
812               ++i;
813               continue;
814            } else {
815               ++m_lds_addr_count;
816            }
817         }
818
819         /* LDS instructions are scheduled with high priority.
820          * instractions that can go into the t slot and don't have
821          * indirect access are put in last, so that they don't block
822          * vec-only instructions when scheduling to the vector slots
823          * for everything else we look at the register use */
824
825         if ((*i)->has_lds_access())
826             priority = 100000;
827         else if (AluGroup::has_t()) {
828            auto opinfo = alu_ops.find((*i)->opcode());
829            assert(opinfo != alu_ops.end());
830            if (opinfo->second.can_channel(AluOp::t, m_chip_class) &&
831                !std::get<0>((*i)->indirect_addr()))
832               priority = -1;
833         }
834
835         priority += 100 * (*i)->register_priority();
836
837         (*i)->add_priority(priority);
838         ready.push_back(*i);
839
840         auto old_i = i;
841         ++i;
842         available.erase(old_i);
843      } else
844         ++i;
845   }
846
847   for (auto& i: ready)
848      sfn_log << SfnLog::schedule << "V:  " << *i << "\n";
849
850   ready.sort([](const AluInstr *lhs, const AluInstr *rhs) {
851                 return lhs->priority() > rhs->priority();});
852
853   for (auto& i: ready)
854      sfn_log << SfnLog::schedule << "V (S):  " << *i << "\n";
855
856   return !ready.empty();
857}
858
859template <typename T>
860struct type_char {
861
862};
863
864
865template <>
866struct type_char<AluInstr> {
867   static constexpr const char value = 'A';
868};
869
870template <>
871struct type_char<AluGroup>  {
872   static constexpr const char value = 'G';
873};
874
875template <>
876struct type_char<ExportInstr>  {
877   static constexpr const char value = 'E';
878};
879
880template <>
881struct type_char<TexInstr>  {
882   static constexpr const char value = 'T';
883};
884
885template <>
886struct type_char<FetchInstr>  {
887   static constexpr const char value = 'F';
888};
889
890template <>
891struct type_char<WriteOutInstr>  {
892   static constexpr const char value = 'M';
893};
894
895template <>
896struct type_char<MemRingOutInstr>  {
897   static constexpr const char value = 'R';
898};
899
900template <>
901struct type_char<WriteTFInstr>  {
902   static constexpr const char value = 'X';
903};
904
905template <>
906struct type_char<GDSInstr>  {
907   static constexpr const char value = 'S';
908};
909
910template <>
911struct type_char<RatInstr>  {
912   static constexpr const char value = 'I';
913};
914
915
916template <typename T>
917bool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available)
918{
919   auto i = available.begin();
920   auto e = available.end();
921
922   int lookahead = 16;
923   while (i != e && ready.size() < 16 && lookahead-- > 0) {
924      if ((*i)->ready()) {
925         ready.push_back(*i);
926         auto old_i = i;
927         ++i;
928         available.erase(old_i);
929      } else
930         ++i;
931   }
932
933   for (auto& i: ready)
934      sfn_log << SfnLog::schedule << type_char<T>::value << ";  " << *i << "\n";
935
936   return !ready.empty();
937}
938
939}
940