1bf215546Sopenharmony_ci/* -*- mesa-c++  -*-
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright (c) 2022 Collabora LTD
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Author: Gert Wollny <gert.wollny@collabora.com>
6bf215546Sopenharmony_ci *
7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
8bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
9bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
10bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
11bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
12bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
15bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
16bf215546Sopenharmony_ci * Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "sfn_scheduler.h"
28bf215546Sopenharmony_ci#include "sfn_instr_alugroup.h"
29bf215546Sopenharmony_ci#include "sfn_instr_controlflow.h"
30bf215546Sopenharmony_ci#include "sfn_instr_export.h"
31bf215546Sopenharmony_ci#include "sfn_instr_fetch.h"
32bf215546Sopenharmony_ci#include "sfn_instr_mem.h"
33bf215546Sopenharmony_ci#include "sfn_instr_lds.h"
34bf215546Sopenharmony_ci#include "sfn_instr_tex.h"
35bf215546Sopenharmony_ci#include "sfn_debug.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci#include <algorithm>
38bf215546Sopenharmony_ci#include <sstream>
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_cinamespace r600 {
41bf215546Sopenharmony_ci
42bf215546Sopenharmony_ciclass CollectInstructions : public InstrVisitor {
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_cipublic:
45bf215546Sopenharmony_ci   CollectInstructions(ValueFactory& vf):
46bf215546Sopenharmony_ci      m_value_factory(vf)  {}
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci   void visit(AluInstr *instr) override {
49bf215546Sopenharmony_ci      if (instr->has_alu_flag(alu_is_trans))
50bf215546Sopenharmony_ci         alu_trans.push_back(instr);
51bf215546Sopenharmony_ci      else {
52bf215546Sopenharmony_ci         if (instr->alu_slots() == 1)
53bf215546Sopenharmony_ci            alu_vec.push_back(instr);
54bf215546Sopenharmony_ci         else
55bf215546Sopenharmony_ci            alu_groups.push_back(instr->split(m_value_factory));
56bf215546Sopenharmony_ci      }
57bf215546Sopenharmony_ci   }
58bf215546Sopenharmony_ci   void visit(AluGroup *instr) override {
59bf215546Sopenharmony_ci      alu_groups.push_back(instr);
60bf215546Sopenharmony_ci   }
61bf215546Sopenharmony_ci   void visit(TexInstr *instr) override {
62bf215546Sopenharmony_ci      tex.push_back(instr);
63bf215546Sopenharmony_ci   }
64bf215546Sopenharmony_ci   void visit(ExportInstr *instr) override {
65bf215546Sopenharmony_ci      exports.push_back(instr);
66bf215546Sopenharmony_ci   }
67bf215546Sopenharmony_ci   void visit(FetchInstr *instr)  override {
68bf215546Sopenharmony_ci      fetches.push_back(instr);
69bf215546Sopenharmony_ci   }
70bf215546Sopenharmony_ci   void visit(Block *instr) override {
71bf215546Sopenharmony_ci      for (auto& i: *instr)
72bf215546Sopenharmony_ci         i->accept(*this);
73bf215546Sopenharmony_ci   }
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   void visit(ControlFlowInstr *instr) override {
76bf215546Sopenharmony_ci      assert(!m_cf_instr);
77bf215546Sopenharmony_ci      m_cf_instr = instr;
78bf215546Sopenharmony_ci   }
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   void visit(IfInstr *instr) override {
81bf215546Sopenharmony_ci      assert(!m_cf_instr);
82bf215546Sopenharmony_ci      m_cf_instr = instr;
83bf215546Sopenharmony_ci   }
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   void visit(EmitVertexInstr *instr) override {
86bf215546Sopenharmony_ci      assert(!m_cf_instr);
87bf215546Sopenharmony_ci      m_cf_instr = instr;
88bf215546Sopenharmony_ci   }
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci   void visit(ScratchIOInstr *instr) override {
91bf215546Sopenharmony_ci      mem_write_instr.push_back(instr);
92bf215546Sopenharmony_ci   }
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci   void visit(StreamOutInstr *instr) override {
95bf215546Sopenharmony_ci      mem_write_instr.push_back(instr);
96bf215546Sopenharmony_ci   }
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci   void visit(MemRingOutInstr *instr) override {
99bf215546Sopenharmony_ci      mem_ring_writes.push_back(instr);
100bf215546Sopenharmony_ci   }
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   void visit(GDSInstr *instr) override {
103bf215546Sopenharmony_ci      gds_op.push_back(instr);
104bf215546Sopenharmony_ci   }
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci   void visit(WriteTFInstr *instr) override {
107bf215546Sopenharmony_ci      write_tf.push_back(instr);
108bf215546Sopenharmony_ci   }
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci   void visit(LDSReadInstr *instr) override {
111bf215546Sopenharmony_ci      std::vector<AluInstr*> buffer;
112bf215546Sopenharmony_ci      m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
113bf215546Sopenharmony_ci      for (auto& i: buffer) {
114bf215546Sopenharmony_ci         i->accept(*this);
115bf215546Sopenharmony_ci      }
116bf215546Sopenharmony_ci   }
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   void visit(LDSAtomicInstr *instr) override {
119bf215546Sopenharmony_ci      std::vector<AluInstr*> buffer;
120bf215546Sopenharmony_ci      m_last_lds_instr = instr->split(buffer, m_last_lds_instr);
121bf215546Sopenharmony_ci      for (auto& i: buffer) {
122bf215546Sopenharmony_ci         i->accept(*this);
123bf215546Sopenharmony_ci      }
124bf215546Sopenharmony_ci   }
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci   void visit(RatInstr *instr) override {
127bf215546Sopenharmony_ci      rat_instr.push_back(instr);
128bf215546Sopenharmony_ci   }
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   std::list<AluInstr *> alu_trans;
132bf215546Sopenharmony_ci   std::list<AluInstr *> alu_vec;
133bf215546Sopenharmony_ci   std::list<TexInstr *> tex;
134bf215546Sopenharmony_ci   std::list<AluGroup *> alu_groups;
135bf215546Sopenharmony_ci   std::list<ExportInstr *> exports;
136bf215546Sopenharmony_ci   std::list<FetchInstr *> fetches;
137bf215546Sopenharmony_ci   std::list<WriteOutInstr *> mem_write_instr;
138bf215546Sopenharmony_ci   std::list<MemRingOutInstr *> mem_ring_writes;
139bf215546Sopenharmony_ci   std::list<GDSInstr *> gds_op;
140bf215546Sopenharmony_ci   std::list<WriteTFInstr *> write_tf;
141bf215546Sopenharmony_ci   std::list<RatInstr *> rat_instr;
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci   Instr *m_cf_instr{nullptr};
144bf215546Sopenharmony_ci   ValueFactory& m_value_factory;
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   AluInstr *m_last_lds_instr{nullptr};
147bf215546Sopenharmony_ci};
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ciclass BlockSheduler {
150bf215546Sopenharmony_cipublic:
151bf215546Sopenharmony_ci   BlockSheduler(r600_chip_class chip_class);
152bf215546Sopenharmony_ci   void run(Shader *shader);
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci   void finalize();
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ciprivate:
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci   void schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf);
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci   bool collect_ready(CollectInstructions &available);
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   template <typename T>
163bf215546Sopenharmony_ci   bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci   bool collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available);
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   bool schedule_tex(Shader::ShaderBlocks& out_blocks);
168bf215546Sopenharmony_ci   bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci   template <typename I>
171bf215546Sopenharmony_ci   bool schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
172bf215546Sopenharmony_ci
173bf215546Sopenharmony_ci   template <typename I>
174bf215546Sopenharmony_ci   bool schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list);
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci   bool schedule_alu(Shader::ShaderBlocks& out_blocks);
177bf215546Sopenharmony_ci   void start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type);
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci   bool schedule_alu_to_group_vec(AluGroup *group);
180bf215546Sopenharmony_ci   bool schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   bool schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list);
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci   template <typename I>
185bf215546Sopenharmony_ci   bool schedule(std::list<I *>& ready_list);
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci   template <typename I>
188bf215546Sopenharmony_ci   bool schedule_block(std::list<I *>& ready_list);
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   std::list<AluInstr *> alu_vec_ready;
191bf215546Sopenharmony_ci   std::list<AluInstr *> alu_trans_ready;
192bf215546Sopenharmony_ci   std::list<AluGroup *> alu_groups_ready;
193bf215546Sopenharmony_ci   std::list<TexInstr *> tex_ready;
194bf215546Sopenharmony_ci   std::list<ExportInstr *> exports_ready;
195bf215546Sopenharmony_ci   std::list<FetchInstr *> fetches_ready;
196bf215546Sopenharmony_ci   std::list<WriteOutInstr *> memops_ready;
197bf215546Sopenharmony_ci   std::list<MemRingOutInstr *> mem_ring_writes_ready;
198bf215546Sopenharmony_ci   std::list<GDSInstr *> gds_ready;
199bf215546Sopenharmony_ci   std::list<WriteTFInstr *> write_tf_ready;
200bf215546Sopenharmony_ci   std::list<RatInstr *> rat_instr_ready;
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   enum {
203bf215546Sopenharmony_ci      sched_alu,
204bf215546Sopenharmony_ci      sched_tex,
205bf215546Sopenharmony_ci      sched_fetch,
206bf215546Sopenharmony_ci      sched_free,
207bf215546Sopenharmony_ci      sched_mem_ring,
208bf215546Sopenharmony_ci      sched_gds,
209bf215546Sopenharmony_ci      sched_write_tf,
210bf215546Sopenharmony_ci      sched_rat,
211bf215546Sopenharmony_ci   } current_shed;
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci   ExportInstr *m_last_pos;
214bf215546Sopenharmony_ci   ExportInstr *m_last_pixel;
215bf215546Sopenharmony_ci   ExportInstr *m_last_param;
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci   Block *m_current_block;
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci   int m_lds_addr_count{0};
220bf215546Sopenharmony_ci   int m_alu_groups_schduled{0};
221bf215546Sopenharmony_ci   r600_chip_class m_chip_class;
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci};
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ciShader *schedule(Shader *original)
226bf215546Sopenharmony_ci{
227bf215546Sopenharmony_ci   Block::set_chipclass(original->chip_class());
228bf215546Sopenharmony_ci   AluGroup::set_chipclass(original->chip_class());
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci   sfn_log << SfnLog::schedule << "Original shader\n";
231bf215546Sopenharmony_ci   if (sfn_log.has_debug_flag(SfnLog::schedule)) {
232bf215546Sopenharmony_ci      std::stringstream ss;
233bf215546Sopenharmony_ci      original->print(ss);
234bf215546Sopenharmony_ci      sfn_log << ss.str() << "\n\n";
235bf215546Sopenharmony_ci   }
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci   // TODO later it might be necessary to clone the shader
238bf215546Sopenharmony_ci   // to be able to re-start scheduling
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   auto scheduled_shader = original;
241bf215546Sopenharmony_ci   BlockSheduler s(original->chip_class());
242bf215546Sopenharmony_ci   s.run(scheduled_shader);
243bf215546Sopenharmony_ci   s.finalize();
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci   sfn_log << SfnLog::schedule << "Scheduled shader\n";
246bf215546Sopenharmony_ci   if (sfn_log.has_debug_flag(SfnLog::schedule)) {
247bf215546Sopenharmony_ci      std::stringstream ss;
248bf215546Sopenharmony_ci      scheduled_shader->print(ss);
249bf215546Sopenharmony_ci      sfn_log << ss.str() << "\n\n";
250bf215546Sopenharmony_ci   }
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_ci   return scheduled_shader;
253bf215546Sopenharmony_ci}
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ciBlockSheduler::BlockSheduler(r600_chip_class chip_class):
256bf215546Sopenharmony_ci   current_shed(sched_alu),
257bf215546Sopenharmony_ci   m_last_pos(nullptr),
258bf215546Sopenharmony_ci   m_last_pixel(nullptr),
259bf215546Sopenharmony_ci   m_last_param(nullptr),
260bf215546Sopenharmony_ci   m_current_block(nullptr),
261bf215546Sopenharmony_ci   m_chip_class(chip_class)
262bf215546Sopenharmony_ci{
263bf215546Sopenharmony_ci}
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_civoid BlockSheduler::run( Shader *shader)
266bf215546Sopenharmony_ci{
267bf215546Sopenharmony_ci   Shader::ShaderBlocks scheduled_blocks;
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci   for (auto& block : shader->func()) {
270bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule  << "Process block " << block->id() <<"\n";
271bf215546Sopenharmony_ci      if (sfn_log.has_debug_flag(SfnLog::schedule)) {
272bf215546Sopenharmony_ci         std::stringstream ss;
273bf215546Sopenharmony_ci         block->print(ss);
274bf215546Sopenharmony_ci         sfn_log << ss.str() << "\n";
275bf215546Sopenharmony_ci      }
276bf215546Sopenharmony_ci      schedule_block(*block, scheduled_blocks, shader->value_factory());
277bf215546Sopenharmony_ci   }
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci   shader->reset_function(scheduled_blocks);
280bf215546Sopenharmony_ci}
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_civoid BlockSheduler::schedule_block(Block& in_block, Shader::ShaderBlocks& out_blocks, ValueFactory& vf)
283bf215546Sopenharmony_ci{
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   assert(in_block.id() >= 0);
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci   current_shed = sched_fetch;
289bf215546Sopenharmony_ci   auto last_shed = sched_fetch;
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci   CollectInstructions cir(vf);
292bf215546Sopenharmony_ci   in_block.accept(cir);
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci   bool have_instr = collect_ready(cir);
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci   m_current_block = new Block(in_block.nesting_depth(), in_block.id());
297bf215546Sopenharmony_ci   assert(m_current_block->id() >= 0);
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci   while (have_instr) {
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "Have ready instructions\n";
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci      if (alu_vec_ready.size())
304bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << "  ALU V:" << alu_vec_ready.size() << "\n";
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci      if (alu_trans_ready.size())
307bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule <<  "  ALU T:" << alu_trans_ready.size() << "\n";
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_ci      if (alu_groups_ready.size())
310bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << "  ALU G:" << alu_groups_ready.size() << "\n";
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci      if (exports_ready.size())
313bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << "  EXP:" << exports_ready.size()
314bf215546Sopenharmony_ci                 << "\n";
315bf215546Sopenharmony_ci      if (tex_ready.size())
316bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << "  TEX:" << tex_ready.size()
317bf215546Sopenharmony_ci                 << "\n";
318bf215546Sopenharmony_ci      if (fetches_ready.size())
319bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << "  FETCH:" << fetches_ready.size()
320bf215546Sopenharmony_ci                 << "\n";
321bf215546Sopenharmony_ci      if (mem_ring_writes_ready.size())
322bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << "  MEM_RING:" << mem_ring_writes_ready.size()
323bf215546Sopenharmony_ci                 << "\n";
324bf215546Sopenharmony_ci      if (memops_ready.size())
325bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << "  MEM_OPS:" << mem_ring_writes_ready.size()
326bf215546Sopenharmony_ci                 << "\n";
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci      if (!m_current_block->lds_group_active()) {
329bf215546Sopenharmony_ci         if (last_shed != sched_free && memops_ready.size() > 8)
330bf215546Sopenharmony_ci            current_shed = sched_free;
331bf215546Sopenharmony_ci         else if (mem_ring_writes_ready.size() > 15)
332bf215546Sopenharmony_ci            current_shed = sched_mem_ring;
333bf215546Sopenharmony_ci         else if (rat_instr_ready.size() > 3)
334bf215546Sopenharmony_ci            current_shed = sched_rat;
335bf215546Sopenharmony_ci         else if (tex_ready.size() > 3)
336bf215546Sopenharmony_ci            current_shed = sched_tex;
337bf215546Sopenharmony_ci      }
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci      switch (current_shed) {
340bf215546Sopenharmony_ci      case sched_alu:
341bf215546Sopenharmony_ci         if (!schedule_alu(out_blocks)) {
342bf215546Sopenharmony_ci            assert(!m_current_block->lds_group_active());
343bf215546Sopenharmony_ci            current_shed = sched_tex;
344bf215546Sopenharmony_ci            continue;
345bf215546Sopenharmony_ci         }
346bf215546Sopenharmony_ci         last_shed = current_shed;
347bf215546Sopenharmony_ci         break;
348bf215546Sopenharmony_ci      case sched_tex:
349bf215546Sopenharmony_ci         if (tex_ready.empty() || !schedule_tex(out_blocks)) {
350bf215546Sopenharmony_ci            current_shed = sched_fetch;
351bf215546Sopenharmony_ci            continue;
352bf215546Sopenharmony_ci         }
353bf215546Sopenharmony_ci         last_shed = current_shed;
354bf215546Sopenharmony_ci         break;
355bf215546Sopenharmony_ci      case sched_fetch:
356bf215546Sopenharmony_ci         if (!fetches_ready.empty()) {
357bf215546Sopenharmony_ci            schedule_vtx(out_blocks);
358bf215546Sopenharmony_ci            last_shed = current_shed;
359bf215546Sopenharmony_ci         }
360bf215546Sopenharmony_ci         current_shed = sched_gds;
361bf215546Sopenharmony_ci         continue;
362bf215546Sopenharmony_ci      case sched_gds:
363bf215546Sopenharmony_ci         if (!gds_ready.empty()) {
364bf215546Sopenharmony_ci            schedule_gds(out_blocks, gds_ready);
365bf215546Sopenharmony_ci            last_shed = current_shed;
366bf215546Sopenharmony_ci         }
367bf215546Sopenharmony_ci         current_shed = sched_mem_ring;
368bf215546Sopenharmony_ci         continue;
369bf215546Sopenharmony_ci      case sched_mem_ring:
370bf215546Sopenharmony_ci         if (mem_ring_writes_ready.empty() || !schedule_cf(out_blocks, mem_ring_writes_ready)) {
371bf215546Sopenharmony_ci            current_shed = sched_write_tf;
372bf215546Sopenharmony_ci            continue;
373bf215546Sopenharmony_ci         }
374bf215546Sopenharmony_ci         last_shed = current_shed;
375bf215546Sopenharmony_ci         break;
376bf215546Sopenharmony_ci      case sched_write_tf:
377bf215546Sopenharmony_ci         if (write_tf_ready.empty() || !schedule_gds(out_blocks, write_tf_ready)) {
378bf215546Sopenharmony_ci            current_shed = sched_rat;
379bf215546Sopenharmony_ci            continue;
380bf215546Sopenharmony_ci         }
381bf215546Sopenharmony_ci         last_shed = current_shed;
382bf215546Sopenharmony_ci         break;
383bf215546Sopenharmony_ci      case sched_rat:
384bf215546Sopenharmony_ci         if (rat_instr_ready.empty() || !schedule_cf(out_blocks, rat_instr_ready)) {
385bf215546Sopenharmony_ci             current_shed = sched_free;
386bf215546Sopenharmony_ci             continue;
387bf215546Sopenharmony_ci          }
388bf215546Sopenharmony_ci         last_shed = current_shed;
389bf215546Sopenharmony_ci         break;
390bf215546Sopenharmony_ci      case sched_free:
391bf215546Sopenharmony_ci         if (memops_ready.empty() || !schedule_cf(out_blocks, memops_ready)) {
392bf215546Sopenharmony_ci            current_shed = sched_alu;
393bf215546Sopenharmony_ci            break;
394bf215546Sopenharmony_ci         }
395bf215546Sopenharmony_ci         last_shed = current_shed;
396bf215546Sopenharmony_ci      }
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci      have_instr = collect_ready(cir);
399bf215546Sopenharmony_ci   }
400bf215546Sopenharmony_ci
401bf215546Sopenharmony_ci   /* Emit exports always at end of a block */
402bf215546Sopenharmony_ci   while (collect_ready_type(exports_ready, cir.exports))
403bf215546Sopenharmony_ci      schedule_exports(out_blocks, exports_ready);
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci   bool fail = false;
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci   if (!cir.alu_groups.empty()) {
408bf215546Sopenharmony_ci      std::cerr << "Unscheduled ALU groups:\n";
409bf215546Sopenharmony_ci      for (auto& a : cir.alu_groups) {
410bf215546Sopenharmony_ci          std::cerr << "   " << *a << "\n";
411bf215546Sopenharmony_ci      }
412bf215546Sopenharmony_ci      fail = true;
413bf215546Sopenharmony_ci   }
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci   if (!cir.alu_vec.empty()){
416bf215546Sopenharmony_ci      std::cerr << "Unscheduled ALU vec ops:\n";
417bf215546Sopenharmony_ci      for (auto& a : cir.alu_vec) {
418bf215546Sopenharmony_ci          std::cerr << "   " << *a << "\n";
419bf215546Sopenharmony_ci      }
420bf215546Sopenharmony_ci      fail = true;
421bf215546Sopenharmony_ci   }
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci   if (!cir.alu_trans.empty()){
424bf215546Sopenharmony_ci      std::cerr << "Unscheduled ALU trans ops:\n";
425bf215546Sopenharmony_ci      for (auto& a : cir.alu_trans) {
426bf215546Sopenharmony_ci          std::cerr << "   " << *a << "\n";
427bf215546Sopenharmony_ci      }
428bf215546Sopenharmony_ci      fail = true;
429bf215546Sopenharmony_ci   }
430bf215546Sopenharmony_ci   if (!cir.mem_write_instr.empty()){
431bf215546Sopenharmony_ci      std::cerr << "Unscheduled MEM ops:\n";
432bf215546Sopenharmony_ci      for (auto& a : cir.mem_write_instr) {
433bf215546Sopenharmony_ci          std::cerr << "   " << *a << "\n";
434bf215546Sopenharmony_ci      }
435bf215546Sopenharmony_ci      fail = true;
436bf215546Sopenharmony_ci   }
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci   if (!cir.fetches.empty()){
439bf215546Sopenharmony_ci      std::cerr << "Unscheduled Fetch ops:\n";
440bf215546Sopenharmony_ci      for (auto& a : cir.fetches) {
441bf215546Sopenharmony_ci          std::cerr << "   " << *a << "\n";
442bf215546Sopenharmony_ci      }
443bf215546Sopenharmony_ci      fail = true;
444bf215546Sopenharmony_ci   }
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   if (!cir.tex.empty()){
447bf215546Sopenharmony_ci      std::cerr << "Unscheduled Tex ops:\n";
448bf215546Sopenharmony_ci      for (auto& a : cir.tex) {
449bf215546Sopenharmony_ci          std::cerr << "   " << *a << "\n";
450bf215546Sopenharmony_ci      }
451bf215546Sopenharmony_ci      fail = true;
452bf215546Sopenharmony_ci   }
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci   assert(cir.tex.empty());
455bf215546Sopenharmony_ci   assert(cir.exports.empty());
456bf215546Sopenharmony_ci   assert(cir.fetches.empty());
457bf215546Sopenharmony_ci   assert(cir.alu_vec.empty());
458bf215546Sopenharmony_ci   assert(cir.mem_write_instr.empty());
459bf215546Sopenharmony_ci   assert(cir.mem_ring_writes.empty());
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci   assert (!fail);
462bf215546Sopenharmony_ci
463bf215546Sopenharmony_ci   if (cir.m_cf_instr) {
464bf215546Sopenharmony_ci      // Assert that if condition is ready
465bf215546Sopenharmony_ci      m_current_block->push_back(cir.m_cf_instr);
466bf215546Sopenharmony_ci      cir.m_cf_instr->set_scheduled();
467bf215546Sopenharmony_ci   }
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci   out_blocks.push_back(m_current_block);
470bf215546Sopenharmony_ci}
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_civoid BlockSheduler::finalize()
473bf215546Sopenharmony_ci{
474bf215546Sopenharmony_ci   if (m_last_pos)
475bf215546Sopenharmony_ci      m_last_pos->set_is_last_export(true);
476bf215546Sopenharmony_ci   if (m_last_pixel)
477bf215546Sopenharmony_ci      m_last_pixel->set_is_last_export(true);
478bf215546Sopenharmony_ci   if (m_last_param)
479bf215546Sopenharmony_ci      m_last_param->set_is_last_export(true);
480bf215546Sopenharmony_ci}
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_cibool BlockSheduler::schedule_alu(Shader::ShaderBlocks& out_blocks)
483bf215546Sopenharmony_ci{
484bf215546Sopenharmony_ci   bool success = false;
485bf215546Sopenharmony_ci   AluGroup *group = nullptr;
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   bool has_alu_ready = !alu_vec_ready.empty() || !alu_trans_ready.empty();
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   bool has_lds_ready = !alu_vec_ready.empty() &&
490bf215546Sopenharmony_ci                        (*alu_vec_ready.begin())->has_lds_access();
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci   /* If we have ready ALU instructions we have to start a new ALU block */
493bf215546Sopenharmony_ci   if (has_alu_ready ||  !alu_groups_ready.empty()) {
494bf215546Sopenharmony_ci      if (m_current_block->type() != Block::alu) {
495bf215546Sopenharmony_ci         start_new_block(out_blocks, Block::alu);
496bf215546Sopenharmony_ci         m_alu_groups_schduled = 0;
497bf215546Sopenharmony_ci      }
498bf215546Sopenharmony_ci   }
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_ci   /* Schedule groups first. unless we have a pending LDS instuction
501bf215546Sopenharmony_ci    * We don't want the LDS instructions to be too far apart because the
502bf215546Sopenharmony_ci    * fetch + read from queue has to be in the same ALU CF block */
503bf215546Sopenharmony_ci   if (!alu_groups_ready.empty() && !has_lds_ready) {
504bf215546Sopenharmony_ci      group = *alu_groups_ready.begin();
505bf215546Sopenharmony_ci      if (!m_current_block->try_reserve_kcache(*group)) {
506bf215546Sopenharmony_ci         start_new_block(out_blocks, Block::alu);
507bf215546Sopenharmony_ci         m_current_block->set_instr_flag(Instr::force_cf);
508bf215546Sopenharmony_ci      }
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci      if (!m_current_block->try_reserve_kcache(*group))
511bf215546Sopenharmony_ci         unreachable("Scheduling a group in a new block should always succeed");
512bf215546Sopenharmony_ci      alu_groups_ready.erase(alu_groups_ready.begin());
513bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "Schedule ALU group\n";
514bf215546Sopenharmony_ci      success = true;
515bf215546Sopenharmony_ci   } else if (has_alu_ready) {
516bf215546Sopenharmony_ci      group = new AluGroup();
517bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "START new ALU group\n";
518bf215546Sopenharmony_ci   } else {
519bf215546Sopenharmony_ci      return false;
520bf215546Sopenharmony_ci   }
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci   assert(group);
523bf215546Sopenharmony_ci
524bf215546Sopenharmony_ci   int free_slots = group->free_slots();
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci   while (free_slots && has_alu_ready) {
527bf215546Sopenharmony_ci      if (!alu_vec_ready.empty())
528bf215546Sopenharmony_ci         success |= schedule_alu_to_group_vec(group);
529bf215546Sopenharmony_ci
530bf215546Sopenharmony_ci      /* Apparently one can't schedule a t-slot if there is already
531bf215546Sopenharmony_ci       * and LDS instruction scheduled.
532bf215546Sopenharmony_ci       * TODO: check whether this is only relevant for actual LDS instructions
533bf215546Sopenharmony_ci       * or also for instructions that read from the LDS return value queue */
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci      if (free_slots & 0x10 && !has_lds_ready) {
536bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
537bf215546Sopenharmony_ci         if (!alu_trans_ready.empty())
538bf215546Sopenharmony_ci            success |= schedule_alu_to_group_trans(group, alu_trans_ready);
539bf215546Sopenharmony_ci         if (!alu_vec_ready.empty())
540bf215546Sopenharmony_ci            success |= schedule_alu_to_group_trans(group, alu_vec_ready);
541bf215546Sopenharmony_ci      }
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci      if (success) {
544bf215546Sopenharmony_ci         ++m_alu_groups_schduled;
545bf215546Sopenharmony_ci         break;
546bf215546Sopenharmony_ci      } else if (m_current_block->kcache_reservation_failed()) {
547bf215546Sopenharmony_ci         // LDS read groups should not lead to impossible
548bf215546Sopenharmony_ci         // kcache constellations
549bf215546Sopenharmony_ci         assert(!m_current_block->lds_group_active());
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci         // kcache reservation failed, so we have to start a new CF
552bf215546Sopenharmony_ci         start_new_block(out_blocks, Block::alu);
553bf215546Sopenharmony_ci         m_current_block->set_instr_flag(Instr::force_cf);
554bf215546Sopenharmony_ci      } else {
555bf215546Sopenharmony_ci         return false;
556bf215546Sopenharmony_ci      }
557bf215546Sopenharmony_ci   }
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci   sfn_log << SfnLog::schedule << "Finalize ALU group\n";
560bf215546Sopenharmony_ci   group->set_scheduled();
561bf215546Sopenharmony_ci   group->fix_last_flag();
562bf215546Sopenharmony_ci   group->set_nesting_depth(m_current_block->nesting_depth());
563bf215546Sopenharmony_ci   m_current_block->push_back(group);
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_ci   if (group->has_lds_group_start())
566bf215546Sopenharmony_ci      m_current_block->lds_group_start(*group->begin());
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_ci   if (group->has_lds_group_end())
569bf215546Sopenharmony_ci      m_current_block->lds_group_end();
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci   if (group->has_kill_op()) {
572bf215546Sopenharmony_ci      assert(!group->has_lds_group_start());
573bf215546Sopenharmony_ci      start_new_block(out_blocks, Block::alu);
574bf215546Sopenharmony_ci      m_current_block->set_instr_flag(Instr::force_cf);
575bf215546Sopenharmony_ci   }
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci   return success;
579bf215546Sopenharmony_ci}
580bf215546Sopenharmony_ci
581bf215546Sopenharmony_cibool BlockSheduler::schedule_tex(Shader::ShaderBlocks& out_blocks)
582bf215546Sopenharmony_ci{
583bf215546Sopenharmony_ci   if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() ==  0) {
584bf215546Sopenharmony_ci      start_new_block(out_blocks, Block::tex);
585bf215546Sopenharmony_ci      m_current_block->set_instr_flag(Instr::force_cf);
586bf215546Sopenharmony_ci   }
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci   if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) {
590bf215546Sopenharmony_ci      auto ii = tex_ready.begin();
591bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci      if (m_current_block->remaining_slots() < 1 + (*ii)->prepare_instr().size())
594bf215546Sopenharmony_ci         start_new_block(out_blocks, Block::tex);
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci      for (auto prep : (*ii)->prepare_instr()) {
597bf215546Sopenharmony_ci         prep->set_scheduled();
598bf215546Sopenharmony_ci         m_current_block->push_back(prep);
599bf215546Sopenharmony_ci      }
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci      (*ii)->set_scheduled();
602bf215546Sopenharmony_ci      m_current_block->push_back(*ii);
603bf215546Sopenharmony_ci      tex_ready.erase(ii);
604bf215546Sopenharmony_ci      return true;
605bf215546Sopenharmony_ci   }
606bf215546Sopenharmony_ci   return false;
607bf215546Sopenharmony_ci}
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_cibool BlockSheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks)
610bf215546Sopenharmony_ci{
611bf215546Sopenharmony_ci   if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) {
612bf215546Sopenharmony_ci      start_new_block(out_blocks, Block::vtx);
613bf215546Sopenharmony_ci      m_current_block->set_instr_flag(Instr::force_cf);
614bf215546Sopenharmony_ci   }
615bf215546Sopenharmony_ci   return schedule_block(fetches_ready);
616bf215546Sopenharmony_ci}
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_citemplate <typename I>
619bf215546Sopenharmony_cibool BlockSheduler::schedule_gds(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
620bf215546Sopenharmony_ci{
621bf215546Sopenharmony_ci   bool was_full = m_current_block->remaining_slots() == 0;
622bf215546Sopenharmony_ci   if (m_current_block->type() != Block::gds || was_full) {
623bf215546Sopenharmony_ci      start_new_block(out_blocks, Block::gds);
624bf215546Sopenharmony_ci      if (was_full)
625bf215546Sopenharmony_ci         m_current_block->set_instr_flag(Instr::force_cf);
626bf215546Sopenharmony_ci   }
627bf215546Sopenharmony_ci   return schedule_block(ready_list);
628bf215546Sopenharmony_ci}
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci
631bf215546Sopenharmony_civoid BlockSheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type type)
632bf215546Sopenharmony_ci{
633bf215546Sopenharmony_ci   if (!m_current_block->empty()) {
634bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "Start new block\n";
635bf215546Sopenharmony_ci      assert(!m_current_block->lds_group_active());
636bf215546Sopenharmony_ci      out_blocks.push_back(m_current_block);
637bf215546Sopenharmony_ci      m_current_block = new Block(m_current_block->nesting_depth(), m_current_block->id());
638bf215546Sopenharmony_ci   }
639bf215546Sopenharmony_ci   m_current_block->set_type(type);
640bf215546Sopenharmony_ci}
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_citemplate <typename I>
643bf215546Sopenharmony_cibool BlockSheduler::schedule_cf(Shader::ShaderBlocks& out_blocks, std::list<I *>& ready_list)
644bf215546Sopenharmony_ci{
645bf215546Sopenharmony_ci   if (ready_list.empty())
646bf215546Sopenharmony_ci      return false;
647bf215546Sopenharmony_ci   if (m_current_block->type() != Block::cf)
648bf215546Sopenharmony_ci      start_new_block(out_blocks, Block::cf);
649bf215546Sopenharmony_ci   return schedule(ready_list);
650bf215546Sopenharmony_ci}
651bf215546Sopenharmony_ci
652bf215546Sopenharmony_ci
653bf215546Sopenharmony_cibool BlockSheduler::schedule_alu_to_group_vec(AluGroup *group)
654bf215546Sopenharmony_ci{
655bf215546Sopenharmony_ci   assert(group);
656bf215546Sopenharmony_ci   assert(!alu_vec_ready.empty());
657bf215546Sopenharmony_ci
658bf215546Sopenharmony_ci   bool success =  false;
659bf215546Sopenharmony_ci   auto i = alu_vec_ready.begin();
660bf215546Sopenharmony_ci   auto e = alu_vec_ready.end();
661bf215546Sopenharmony_ci   while (i != e) {
662bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
663bf215546Sopenharmony_ci
664bf215546Sopenharmony_ci      if (!m_current_block->try_reserve_kcache(**i)) {
665bf215546Sopenharmony_ci           sfn_log << SfnLog::schedule << " failed (kcache)\n";
666bf215546Sopenharmony_ci         ++i;
667bf215546Sopenharmony_ci         continue;
668bf215546Sopenharmony_ci      }
669bf215546Sopenharmony_ci
670bf215546Sopenharmony_ci      if (group->add_vec_instructions(*i)) {
671bf215546Sopenharmony_ci         auto old_i = i;
672bf215546Sopenharmony_ci         ++i;
673bf215546Sopenharmony_ci         if ((*old_i)->has_alu_flag(alu_is_lds)) {
674bf215546Sopenharmony_ci            --m_lds_addr_count;
675bf215546Sopenharmony_ci         }
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_ci         alu_vec_ready.erase(old_i);
678bf215546Sopenharmony_ci         success = true;
679bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << " success\n";
680bf215546Sopenharmony_ci      } else {
681bf215546Sopenharmony_ci         ++i;
682bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << " failed\n";
683bf215546Sopenharmony_ci      }
684bf215546Sopenharmony_ci   }
685bf215546Sopenharmony_ci   return success;
686bf215546Sopenharmony_ci}
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_cibool BlockSheduler::schedule_alu_to_group_trans(AluGroup *group, std::list<AluInstr *>& readylist)
689bf215546Sopenharmony_ci{
690bf215546Sopenharmony_ci   assert(group);
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci   bool success =  false;
693bf215546Sopenharmony_ci   auto i = readylist.begin();
694bf215546Sopenharmony_ci   auto e = readylist.end();
695bf215546Sopenharmony_ci   while (i != e) {
696bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
697bf215546Sopenharmony_ci      if (!m_current_block->try_reserve_kcache(**i)) {
698bf215546Sopenharmony_ci           sfn_log << SfnLog::schedule << " failed (kcache)\n";
699bf215546Sopenharmony_ci         ++i;
700bf215546Sopenharmony_ci         continue;
701bf215546Sopenharmony_ci      }
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci      if (group->add_trans_instructions(*i)) {
704bf215546Sopenharmony_ci         auto old_i = i;
705bf215546Sopenharmony_ci         ++i;
706bf215546Sopenharmony_ci         readylist.erase(old_i);
707bf215546Sopenharmony_ci         success = true;
708bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << " sucess\n";
709bf215546Sopenharmony_ci         break;
710bf215546Sopenharmony_ci      } else {
711bf215546Sopenharmony_ci         ++i;
712bf215546Sopenharmony_ci         sfn_log << SfnLog::schedule << " failed\n";
713bf215546Sopenharmony_ci      }
714bf215546Sopenharmony_ci   }
715bf215546Sopenharmony_ci   return success;
716bf215546Sopenharmony_ci}
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_citemplate <typename I>
719bf215546Sopenharmony_cibool BlockSheduler::schedule(std::list<I *>& ready_list)
720bf215546Sopenharmony_ci{
721bf215546Sopenharmony_ci   if (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
722bf215546Sopenharmony_ci      auto ii = ready_list.begin();
723bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
724bf215546Sopenharmony_ci      (*ii)->set_scheduled();
725bf215546Sopenharmony_ci      m_current_block->push_back(*ii);
726bf215546Sopenharmony_ci      ready_list.erase(ii);
727bf215546Sopenharmony_ci      return true;
728bf215546Sopenharmony_ci   }
729bf215546Sopenharmony_ci   return false;
730bf215546Sopenharmony_ci}
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_citemplate <typename I>
733bf215546Sopenharmony_cibool BlockSheduler::schedule_block(std::list<I *>& ready_list)
734bf215546Sopenharmony_ci{
735bf215546Sopenharmony_ci   bool success = false;
736bf215546Sopenharmony_ci   while (!ready_list.empty() && m_current_block->remaining_slots() > 0) {
737bf215546Sopenharmony_ci      auto ii = ready_list.begin();
738bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "Schedule: " << **ii << " "
739bf215546Sopenharmony_ci              << m_current_block->remaining_slots() << "\n";
740bf215546Sopenharmony_ci      (*ii)->set_scheduled();
741bf215546Sopenharmony_ci      m_current_block->push_back(*ii);
742bf215546Sopenharmony_ci      ready_list.erase(ii);
743bf215546Sopenharmony_ci      success = true;
744bf215546Sopenharmony_ci   }
745bf215546Sopenharmony_ci   return success;
746bf215546Sopenharmony_ci}
747bf215546Sopenharmony_ci
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_cibool BlockSheduler::schedule_exports(Shader::ShaderBlocks& out_blocks, std::list<ExportInstr *>& ready_list)
750bf215546Sopenharmony_ci{
751bf215546Sopenharmony_ci   if (m_current_block->type() != Block::cf)
752bf215546Sopenharmony_ci      start_new_block(out_blocks, Block::cf);
753bf215546Sopenharmony_ci
754bf215546Sopenharmony_ci   if (!ready_list.empty()) {
755bf215546Sopenharmony_ci      auto ii = ready_list.begin();
756bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n";
757bf215546Sopenharmony_ci      (*ii)->set_scheduled();
758bf215546Sopenharmony_ci      m_current_block->push_back(*ii);
759bf215546Sopenharmony_ci      switch ((*ii)->export_type()) {
760bf215546Sopenharmony_ci      case ExportInstr::pos: m_last_pos = *ii; break;
761bf215546Sopenharmony_ci      case ExportInstr::param: m_last_param = *ii; break;
762bf215546Sopenharmony_ci      case ExportInstr::pixel: m_last_pixel = *ii; break;
763bf215546Sopenharmony_ci      }
764bf215546Sopenharmony_ci      (*ii)->set_is_last_export(false);
765bf215546Sopenharmony_ci      ready_list.erase(ii);
766bf215546Sopenharmony_ci      return true;
767bf215546Sopenharmony_ci   }
768bf215546Sopenharmony_ci   return false;
769bf215546Sopenharmony_ci}
770bf215546Sopenharmony_ci
771bf215546Sopenharmony_cibool BlockSheduler::collect_ready(CollectInstructions &available)
772bf215546Sopenharmony_ci{
773bf215546Sopenharmony_ci   sfn_log << SfnLog::schedule << "Ready instructions\n";
774bf215546Sopenharmony_ci   bool result = false;
775bf215546Sopenharmony_ci   result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
776bf215546Sopenharmony_ci   result |= collect_ready_type(alu_trans_ready, available.alu_trans);
777bf215546Sopenharmony_ci   result |= collect_ready_type(alu_groups_ready, available.alu_groups);
778bf215546Sopenharmony_ci   result |= collect_ready_type(gds_ready, available.gds_op);
779bf215546Sopenharmony_ci   result |= collect_ready_type(tex_ready, available.tex);
780bf215546Sopenharmony_ci   result |= collect_ready_type(fetches_ready, available.fetches);
781bf215546Sopenharmony_ci   result |= collect_ready_type(memops_ready, available.mem_write_instr);
782bf215546Sopenharmony_ci   result |= collect_ready_type(mem_ring_writes_ready, available.mem_ring_writes);
783bf215546Sopenharmony_ci   result |= collect_ready_type(write_tf_ready, available.write_tf);
784bf215546Sopenharmony_ci   result |= collect_ready_type(rat_instr_ready, available.rat_instr);
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_ci   sfn_log << SfnLog::schedule << "\n";
787bf215546Sopenharmony_ci   return result;
788bf215546Sopenharmony_ci}
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_cibool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::list<AluInstr *>& available)
791bf215546Sopenharmony_ci{
792bf215546Sopenharmony_ci   auto i = available.begin();
793bf215546Sopenharmony_ci   auto e = available.end();
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci   for (auto alu : ready) {
796bf215546Sopenharmony_ci      alu->add_priority(100 * alu->register_priority());
797bf215546Sopenharmony_ci   }
798bf215546Sopenharmony_ci
799bf215546Sopenharmony_ci   int max_check = 0;
800bf215546Sopenharmony_ci   while (i != e && max_check++ < 32) {
801bf215546Sopenharmony_ci      if (ready.size() < 32 && (*i)->ready()) {
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_ci         int priority = 0;
804bf215546Sopenharmony_ci         /* LDS fetches that use static offsets are usually ready ery fast,
805bf215546Sopenharmony_ci          * so that they would get schedules early, and this leaves the problem
806bf215546Sopenharmony_ci          * that we allocate too many registers with just constant values,
807bf215546Sopenharmony_ci          * and this will make problems wih RA. So limit the number of LDS
808bf215546Sopenharmony_ci          * address registers.
809bf215546Sopenharmony_ci          */
810bf215546Sopenharmony_ci         if ((*i)->has_alu_flag(alu_lds_address)) {
811bf215546Sopenharmony_ci            if (m_lds_addr_count > 64) {
812bf215546Sopenharmony_ci               ++i;
813bf215546Sopenharmony_ci               continue;
814bf215546Sopenharmony_ci            } else {
815bf215546Sopenharmony_ci               ++m_lds_addr_count;
816bf215546Sopenharmony_ci            }
817bf215546Sopenharmony_ci         }
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci         /* LDS instructions are scheduled with high priority.
820bf215546Sopenharmony_ci          * instractions that can go into the t slot and don't have
821bf215546Sopenharmony_ci          * indirect access are put in last, so that they don't block
822bf215546Sopenharmony_ci          * vec-only instructions when scheduling to the vector slots
823bf215546Sopenharmony_ci          * for everything else we look at the register use */
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_ci         if ((*i)->has_lds_access())
826bf215546Sopenharmony_ci             priority = 100000;
827bf215546Sopenharmony_ci         else if (AluGroup::has_t()) {
828bf215546Sopenharmony_ci            auto opinfo = alu_ops.find((*i)->opcode());
829bf215546Sopenharmony_ci            assert(opinfo != alu_ops.end());
830bf215546Sopenharmony_ci            if (opinfo->second.can_channel(AluOp::t, m_chip_class) &&
831bf215546Sopenharmony_ci                !std::get<0>((*i)->indirect_addr()))
832bf215546Sopenharmony_ci               priority = -1;
833bf215546Sopenharmony_ci         }
834bf215546Sopenharmony_ci
835bf215546Sopenharmony_ci         priority += 100 * (*i)->register_priority();
836bf215546Sopenharmony_ci
837bf215546Sopenharmony_ci         (*i)->add_priority(priority);
838bf215546Sopenharmony_ci         ready.push_back(*i);
839bf215546Sopenharmony_ci
840bf215546Sopenharmony_ci         auto old_i = i;
841bf215546Sopenharmony_ci         ++i;
842bf215546Sopenharmony_ci         available.erase(old_i);
843bf215546Sopenharmony_ci      } else
844bf215546Sopenharmony_ci         ++i;
845bf215546Sopenharmony_ci   }
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_ci   for (auto& i: ready)
848bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "V:  " << *i << "\n";
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_ci   ready.sort([](const AluInstr *lhs, const AluInstr *rhs) {
851bf215546Sopenharmony_ci                 return lhs->priority() > rhs->priority();});
852bf215546Sopenharmony_ci
853bf215546Sopenharmony_ci   for (auto& i: ready)
854bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << "V (S):  " << *i << "\n";
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci   return !ready.empty();
857bf215546Sopenharmony_ci}
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_citemplate <typename T>
860bf215546Sopenharmony_cistruct type_char {
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci};
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci
865bf215546Sopenharmony_citemplate <>
866bf215546Sopenharmony_cistruct type_char<AluInstr> {
867bf215546Sopenharmony_ci   static constexpr const char value = 'A';
868bf215546Sopenharmony_ci};
869bf215546Sopenharmony_ci
870bf215546Sopenharmony_citemplate <>
871bf215546Sopenharmony_cistruct type_char<AluGroup>  {
872bf215546Sopenharmony_ci   static constexpr const char value = 'G';
873bf215546Sopenharmony_ci};
874bf215546Sopenharmony_ci
875bf215546Sopenharmony_citemplate <>
876bf215546Sopenharmony_cistruct type_char<ExportInstr>  {
877bf215546Sopenharmony_ci   static constexpr const char value = 'E';
878bf215546Sopenharmony_ci};
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_citemplate <>
881bf215546Sopenharmony_cistruct type_char<TexInstr>  {
882bf215546Sopenharmony_ci   static constexpr const char value = 'T';
883bf215546Sopenharmony_ci};
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_citemplate <>
886bf215546Sopenharmony_cistruct type_char<FetchInstr>  {
887bf215546Sopenharmony_ci   static constexpr const char value = 'F';
888bf215546Sopenharmony_ci};
889bf215546Sopenharmony_ci
890bf215546Sopenharmony_citemplate <>
891bf215546Sopenharmony_cistruct type_char<WriteOutInstr>  {
892bf215546Sopenharmony_ci   static constexpr const char value = 'M';
893bf215546Sopenharmony_ci};
894bf215546Sopenharmony_ci
895bf215546Sopenharmony_citemplate <>
896bf215546Sopenharmony_cistruct type_char<MemRingOutInstr>  {
897bf215546Sopenharmony_ci   static constexpr const char value = 'R';
898bf215546Sopenharmony_ci};
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_citemplate <>
901bf215546Sopenharmony_cistruct type_char<WriteTFInstr>  {
902bf215546Sopenharmony_ci   static constexpr const char value = 'X';
903bf215546Sopenharmony_ci};
904bf215546Sopenharmony_ci
905bf215546Sopenharmony_citemplate <>
906bf215546Sopenharmony_cistruct type_char<GDSInstr>  {
907bf215546Sopenharmony_ci   static constexpr const char value = 'S';
908bf215546Sopenharmony_ci};
909bf215546Sopenharmony_ci
910bf215546Sopenharmony_citemplate <>
911bf215546Sopenharmony_cistruct type_char<RatInstr>  {
912bf215546Sopenharmony_ci   static constexpr const char value = 'I';
913bf215546Sopenharmony_ci};
914bf215546Sopenharmony_ci
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_citemplate <typename T>
917bf215546Sopenharmony_cibool BlockSheduler::collect_ready_type(std::list<T *>& ready, std::list<T *>& available)
918bf215546Sopenharmony_ci{
919bf215546Sopenharmony_ci   auto i = available.begin();
920bf215546Sopenharmony_ci   auto e = available.end();
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci   int lookahead = 16;
923bf215546Sopenharmony_ci   while (i != e && ready.size() < 16 && lookahead-- > 0) {
924bf215546Sopenharmony_ci      if ((*i)->ready()) {
925bf215546Sopenharmony_ci         ready.push_back(*i);
926bf215546Sopenharmony_ci         auto old_i = i;
927bf215546Sopenharmony_ci         ++i;
928bf215546Sopenharmony_ci         available.erase(old_i);
929bf215546Sopenharmony_ci      } else
930bf215546Sopenharmony_ci         ++i;
931bf215546Sopenharmony_ci   }
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_ci   for (auto& i: ready)
934bf215546Sopenharmony_ci      sfn_log << SfnLog::schedule << type_char<T>::value << ";  " << *i << "\n";
935bf215546Sopenharmony_ci
936bf215546Sopenharmony_ci   return !ready.empty();
937bf215546Sopenharmony_ci}
938bf215546Sopenharmony_ci
939bf215546Sopenharmony_ci}
940