1/* -*- mesa-c++  -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27#include "sfn_shader.h"
28#include "sfn_instr.h"
29#include "sfn_instr_alugroup.h"
30#include "sfn_instr_export.h"
31#include "sfn_instr_fetch.h"
32#include "sfn_instr_lds.h"
33#include "sfn_instr_mem.h"
34
35#include "sfn_instr_controlflow.h"
36#include "sfn_liverangeevaluator.h"
37
38#include "sfn_shader_cs.h"
39#include "sfn_shader_fs.h"
40#include "sfn_shader_vs.h"
41#include "sfn_shader_gs.h"
42#include "sfn_shader_tess.h"
43
44#include "sfn_debug.h"
45#include "gallium/drivers/r600/r600_shader.h"
46
47#include "tgsi/tgsi_from_mesa.h"
48
49#include "nir.h"
50
51#include <numeric>
52#include <sstream>
53
54namespace r600 {
55
56using std::string;
57
58std::pair<unsigned, unsigned>
59r600_get_varying_semantic(unsigned varying_location)
60{
61   std::pair<unsigned, unsigned> result;
62   tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(varying_location),
63                                true, &result.first, &result.second);
64
65   if (result.first == TGSI_SEMANTIC_GENERIC) {
66      result.second += 9;
67   } else if (result.first == TGSI_SEMANTIC_PCOORD) {
68      result.second = 8;
69   }
70   return result;
71}
72
73void ShaderIO::set_sid(int sid)
74{
75   m_sid = sid;
76   switch (m_name) {
77   case TGSI_SEMANTIC_POSITION:
78   case TGSI_SEMANTIC_PSIZE:
79   case TGSI_SEMANTIC_EDGEFLAG:
80   case TGSI_SEMANTIC_FACE:
81   case TGSI_SEMANTIC_SAMPLEMASK:
82   case TGSI_SEMANTIC_CLIPVERTEX:
83      m_spi_sid = 0;
84   break;
85   case TGSI_SEMANTIC_GENERIC:
86   case TGSI_SEMANTIC_TEXCOORD:
87   case TGSI_SEMANTIC_PCOORD:
88      m_spi_sid = m_sid + 1;
89   break;
90   default:
91      /* For non-generic params - pack name and sid into 8 bits */
92      m_spi_sid = (0x80 | (m_name << 3) | m_sid) + 1;
93   }
94}
95
96void ShaderIO::override_spi_sid(int spi)
97{
98   m_spi_sid = spi;
99}
100
101void ShaderIO::print(std::ostream& os) const
102{
103   os << m_type << " LOC:" << m_location << " NAME:" << m_name;
104   do_print(os);
105
106   if (m_sid > 0) {
107      os << " SID:" << m_sid << " SPI_SID:" << m_spi_sid;
108   }
109}
110
111
112ShaderIO::ShaderIO(const char *type, int loc, int name):
113   m_type(type),
114   m_location(loc),
115   m_name(name)
116{
117}
118
119ShaderOutput::ShaderOutput():
120   ShaderIO("OUTPUT", -1, -1)
121{
122}
123
124ShaderOutput::ShaderOutput(int location, int name, int writemask):
125   ShaderIO("OUTPUT", location, name),
126   m_writemask(writemask)
127{
128
129}
130
131void ShaderOutput::do_print(std::ostream& os) const
132{
133   os << " MASK:" << m_writemask;
134}
135
136
137ShaderInput::ShaderInput(int location, int name):
138   ShaderIO("INPUT", location, name)
139{
140}
141
142ShaderInput::ShaderInput():
143   ShaderInput(-1, -1)
144{
145}
146
147
148void ShaderInput::do_print(std::ostream& os) const
149{
150   if (m_interpolator)
151      os << " INTERP:" << m_interpolator;
152   if (m_interpolate_loc)
153      os << " ILOC:" << m_interpolate_loc;
154   if (m_uses_interpolate_at_centroid)
155      os << " USE_CENTROID";
156}
157
158void ShaderInput::set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid)
159{
160   m_interpolator = interp;
161   m_interpolate_loc = interp_loc;
162   m_uses_interpolate_at_centroid = uses_interpolate_at_centroid;
163}
164
165void ShaderInput::set_uses_interpolate_at_centroid()
166{
167   m_uses_interpolate_at_centroid = true;
168}
169
170Shader::Shader(const char *type_id):
171   m_current_block(nullptr),
172   m_type_id(type_id),
173   m_chip_class(ISA_CC_R600),
174   m_next_block(0)
175{
176   m_instr_factory = new InstrFactory();
177   m_chain_instr.this_shader = this;
178   start_new_block(0);
179}
180
181void Shader::set_input_gpr(int driver_lcation, int gpr)
182{
183   auto i = m_inputs.find(driver_lcation);
184   assert(i != m_inputs.end());
185   i->second.set_gpr(gpr);
186}
187
188bool Shader::add_info_from_string(std::istream& is)
189{
190   std::string type;
191   is >> type;
192
193   if (type == "CHIPCLASS")
194      return read_chipclass(is);
195   if (type == "OUTPUT")
196      return read_output(is);
197   if (type == "INPUT")
198      return read_input(is);
199   if (type == "PROP")
200      return read_prop(is);
201   if (type == "SYSVALUES")
202      return allocate_registers_from_string(is, pin_fully);
203   if (type == "REGISTERS")
204      return allocate_registers_from_string(is, pin_free);
205   if (type == "ARRAYS")
206      return allocate_arrays_from_string(is);
207
208
209   return false;
210}
211
212void Shader::emit_instruction_from_string(const std::string& s)
213{
214
215   sfn_log << SfnLog::instr << "Create Instr from '" << s << "'\n";
216   if (s == "BLOCK_START") {
217      if (!m_current_block->empty()) {
218         start_new_block(m_current_block->nesting_offset());
219         sfn_log << SfnLog::instr << "   Emit start block\n";
220      }
221      return;
222   }
223
224   if (s == "BLOCK_END") {
225      return;
226   }
227
228   auto ir = m_instr_factory->from_string(s, m_current_block->nesting_depth());
229   if (ir) {
230      emit_instruction(ir);
231      if (ir->end_block())
232         start_new_block(ir->nesting_offset());
233      sfn_log << SfnLog::instr << "   " << *ir << "\n";
234   }
235}
236
237bool Shader::read_output(std::istream& is)
238{
239   string value;
240   is >> value;
241   int pos = int_from_string_with_prefix(value, "LOC:");
242   is >> value;
243   int name = int_from_string_with_prefix(value, "NAME:");
244   is >> value;
245   int mask = int_from_string_with_prefix(value, "MASK:");
246   ShaderOutput output(pos, name, mask);
247
248   value.clear();
249   is >> value;
250   if (!value.empty()) {
251      int sid = int_from_string_with_prefix(value, "SID:");
252      output.set_sid(sid);
253      is >> value;
254      int spi_sid = int_from_string_with_prefix(value, "SPI_SID:");
255      assert(spi_sid == output.spi_sid());
256   }
257
258   add_output(output);
259   return true;
260}
261
262
263bool Shader::read_input(std::istream& is)
264{
265   string value;
266   is >> value;
267   int pos = int_from_string_with_prefix(value, "LOC:");
268   is >> value;
269   int name = int_from_string_with_prefix(value, "NAME:");
270
271   value.clear();
272
273   ShaderInput input(pos, name);
274
275   int interp = 0;
276   int interp_loc = 0;
277   bool use_centroid = false;
278
279   is >> value;
280   while (!value.empty()) {
281      if (value.substr(0, 4) == "SID:") {
282         int sid = int_from_string_with_prefix(value, "SID:");
283         input.set_sid(sid);
284      } else if (value.substr(0, 8) == "SPI_SID:") {
285         int spi_sid = int_from_string_with_prefix(value, "SPI_SID:");
286         assert(spi_sid == input.spi_sid());
287      } else if (value.substr(0, 7) == "INTERP:") {
288         interp = int_from_string_with_prefix(value, "INTERP:");
289      } else if (value.substr(0, 5) == "ILOC:") {
290         interp_loc = int_from_string_with_prefix(value, "ILOC:");
291      } else if (value == "USE_CENTROID") {
292         use_centroid = true;
293      } else {
294         std::cerr << "Unknown parse value '" << value << "'";
295         assert(!value.c_str());
296      }
297      value.clear();
298      is >> value;
299   }
300
301   input.set_interpolator(interp, interp_loc, use_centroid);
302
303   add_input(input);
304   return true;
305}
306
307bool Shader::allocate_registers_from_string(std::istream& is, Pin pin)
308{
309   std::string line;
310   if (!std::getline(is, line))
311      return false;
312
313   std::istringstream iline(line);
314
315   while (!iline.eof())  {
316      string reg_str;
317      iline >> reg_str;
318
319      if (reg_str.empty())
320         break;
321
322      if (strchr(reg_str.c_str(), '@')) {
323         value_factory().dest_from_string(reg_str);
324      } else {
325         RegisterVec4::Swizzle swz = {0,1,2,3};
326         auto regs = value_factory().dest_vec4_from_string(reg_str, swz, pin);
327         for (int i = 0; i < 4; ++i) {
328            if (swz[i] < 4 && pin == pin_fully) {
329               regs[i]->pin_live_range(true, false);
330            }
331         }
332      }
333   }
334   return true;
335}
336
337bool Shader::allocate_arrays_from_string(std::istream& is)
338{
339   std::string line;
340   if (!std::getline(is, line))
341      return false;
342
343   std::istringstream iline(line);
344
345   while (!iline.eof())  {
346      string reg_str;
347      iline >> reg_str;
348
349      if (reg_str.empty())
350         break;
351
352      value_factory().array_from_string(reg_str);
353   }
354   return true;
355}
356
357bool Shader::read_chipclass(std::istream& is)
358{
359   string name;
360   is >> name;
361   if (name == "R600")
362      m_chip_class = ISA_CC_R600;
363   else if (name == "R700")
364      m_chip_class = ISA_CC_R700;
365   else if (name == "EVERGREEN")
366      m_chip_class = ISA_CC_EVERGREEN;
367   else if (name == "CAYMAN")
368      m_chip_class = ISA_CC_CAYMAN;
369   else
370      return false;
371   return true;
372}
373
374void Shader::allocate_reserved_registers()
375{
376   m_instr_factory->value_factory().set_virtual_register_base(0);
377   auto reserved_registers_end = do_allocate_reserved_registers();
378   m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end);
379   if (!m_atomics.empty()) {
380      m_atomic_update = value_factory().temp_register();
381      auto alu = new AluInstr(op1_mov, m_atomic_update,
382                              value_factory().one_i(),
383                              AluInstr::last_write);
384      alu->set_alu_flag(alu_no_schedule_bias);
385      emit_instruction(alu);
386   }
387
388   if(m_flags.test(sh_needs_sbo_ret_address)) {
389      m_rat_return_address = value_factory().temp_register(0);
390      auto temp0 = value_factory().temp_register(0);
391      auto temp1 = value_factory().temp_register(1);
392      auto temp2 = value_factory().temp_register(2);
393
394      auto group = new AluGroup();
395      group->add_instruction(new AluInstr(op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write}));
396      group->add_instruction(new AluInstr(op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write}));
397      emit_instruction(group);
398      emit_instruction(new AluInstr(op3_muladd_uint24, temp2, value_factory().inline_const(ALU_SRC_SE_ID, 0),
399                                          value_factory().literal(256), value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0), {alu_write, alu_last_instr}));
400      emit_instruction(new AluInstr(op3_muladd_uint24, m_rat_return_address,
401                                    temp2, value_factory().literal(0x40), temp0,
402                                    {alu_write, alu_last_instr}));
403   }
404}
405
406Shader *Shader::translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info,
407                                   struct r600_shader* gs_shader,
408                                   r600_shader_key& key, r600_chip_class chip_class)
409{
410   Shader *shader = nullptr;
411
412   switch (nir->info.stage) {
413   case MESA_SHADER_FRAGMENT:
414      if (chip_class >= ISA_CC_EVERGREEN)
415         shader = new FragmentShaderEG(key);
416      else
417         shader = new FragmentShaderR600(key);
418   break;
419   case MESA_SHADER_VERTEX:
420      shader = new VertexShader(so_info, gs_shader, key);
421   break;
422   case MESA_SHADER_GEOMETRY:
423      shader = new GeometryShader(key);
424   break;
425   case MESA_SHADER_TESS_CTRL:
426      shader = new TCSShader(key);
427      break;
428   case MESA_SHADER_TESS_EVAL:
429      shader = new TESShader(so_info, gs_shader, key);
430      break;
431   case MESA_SHADER_COMPUTE:
432      shader = new ComputeShader(key);
433      break;
434   default:
435      return nullptr;
436   }
437
438   shader->set_info(nir);
439
440   shader->set_chip_class(chip_class);
441   if (!shader->process(nir))
442      return nullptr;
443
444   return shader;
445}
446
447void Shader::set_info(nir_shader *nir)
448{
449   m_scratch_size = nir->scratch_size;
450}
451
452ValueFactory& Shader::value_factory()
453{
454   return m_instr_factory->value_factory();
455}
456
457
458bool Shader::process(nir_shader *nir)
459{
460   m_ssbo_image_offset = nir->info.num_images;
461
462   if (nir->info.use_legacy_math_rules)
463      set_flag(sh_legacy_math_rules);
464
465   nir_foreach_uniform_variable(var, nir)
466         scan_uniforms(var);
467
468   // at this point all functions should be inlined
469   const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&nir->functions));
470
471   if (!scan_shader(func))
472      return false;
473
474   allocate_reserved_registers();
475
476   allocate_local_registers(&func->impl->registers);
477
478   sfn_log << SfnLog::trans << "Process shader \n";
479   foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
480      if (!process_cf_node(node))
481         return false;
482   }
483
484   finalize();
485
486   return true;
487}
488
489void Shader::allocate_local_registers(const exec_list *registers)
490{
491   if (value_factory().allocate_registers(registers))
492      m_indirect_files |= 1 << TGSI_FILE_TEMPORARY;
493}
494
495bool Shader::scan_shader(const nir_function *func)
496{
497
498   nir_foreach_block(block, func->impl) {
499      nir_foreach_instr(instr, block) {
500         if (!scan_instruction(instr)) {
501            fprintf(stderr, "Unhandled sysvalue access ");
502            nir_print_instr(instr, stderr);
503            fprintf(stderr, "\n");
504            return false;
505         }
506      }
507   }
508
509   int lds_pos = 0;
510   for (auto& [index, input] : m_inputs) {
511      if (input.need_lds_pos()) {
512         if (chip_class() < ISA_CC_EVERGREEN)
513            input.set_gpr(lds_pos);
514         input.set_lds_pos(lds_pos++);
515      }
516   }
517
518   int param_id = 0;
519   for (auto& [index, out] : m_outputs) {
520      if (out.is_param())
521         out.set_pos(param_id++);
522   }
523
524   return true;
525}
526
527bool Shader::scan_uniforms(nir_variable *uniform)
528{
529   if (uniform->type->contains_atomic()) {
530      int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
531      m_nhwatomic += natomics;
532
533      if (uniform->type->is_array())
534         m_indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
535
536      m_flags.set(sh_uses_atomics);
537
538      r600_shader_atomic atom = {0};
539
540      atom.buffer_id = uniform->data.binding;
541      atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
542
543      atom.start = uniform->data.offset >> 2;
544      atom.end = atom.start + natomics - 1;
545
546      if (m_atomic_base_map.find(uniform->data.binding) ==
547          m_atomic_base_map.end())
548         m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
549
550      m_next_hwatomic_loc += natomics;
551
552      m_atomic_file_count += atom.end  - atom.start + 1;
553
554      sfn_log << SfnLog::io << "HW_ATOMIC file count: "
555              << m_atomic_file_count << "\n";
556
557      m_atomics.push_back(atom);
558   }
559
560   auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
561   if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
562      m_flags.set(sh_uses_images);
563      if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo))
564         m_indirect_files |= 1 << TGSI_FILE_IMAGE;
565   }
566
567   return true;
568}
569
570
571bool Shader::scan_instruction(nir_instr *instr)
572{
573   if (do_scan_instruction(instr))
574      return true;
575
576   if (instr->type != nir_instr_type_intrinsic)
577      return true;
578
579   auto intr = nir_instr_as_intrinsic(instr);
580
581   // handle unhandled instructions
582   switch (intr->intrinsic) {
583   case nir_intrinsic_ssbo_atomic_add:
584   case nir_intrinsic_ssbo_atomic_comp_swap:
585   case nir_intrinsic_ssbo_atomic_or:
586   case nir_intrinsic_ssbo_atomic_xor:
587   case nir_intrinsic_ssbo_atomic_imax:
588   case nir_intrinsic_ssbo_atomic_imin:
589   case nir_intrinsic_ssbo_atomic_umax:
590   case nir_intrinsic_ssbo_atomic_umin:
591   case nir_intrinsic_ssbo_atomic_and:
592   case nir_intrinsic_ssbo_atomic_exchange:
593   case nir_intrinsic_image_load:
594   case nir_intrinsic_image_atomic_add:
595   case nir_intrinsic_image_atomic_and:
596   case nir_intrinsic_image_atomic_or:
597   case nir_intrinsic_image_atomic_xor:
598   case nir_intrinsic_image_atomic_exchange:
599   case nir_intrinsic_image_atomic_comp_swap:
600   case nir_intrinsic_image_atomic_umin:
601   case nir_intrinsic_image_atomic_umax:
602   case nir_intrinsic_image_atomic_imin:
603   case nir_intrinsic_image_atomic_imax:
604      m_flags.set(sh_needs_sbo_ret_address);
605      FALLTHROUGH;
606   case nir_intrinsic_image_store:
607   case nir_intrinsic_store_ssbo:
608      m_flags.set(sh_writes_memory);
609      m_flags.set(sh_uses_images);
610      break;
611   case nir_intrinsic_memory_barrier_image:
612   case nir_intrinsic_memory_barrier_buffer:
613   case nir_intrinsic_memory_barrier:
614   case nir_intrinsic_group_memory_barrier:
615      m_chain_instr.prepare_mem_barrier = true;
616   default:
617      ;
618   }
619   return true;
620}
621
622bool Shader::process_cf_node(nir_cf_node *node)
623{
624   SFN_TRACE_FUNC(SfnLog::flow, "CF");
625
626   switch (node->type) {
627   case nir_cf_node_block:
628   return process_block(nir_cf_node_as_block(node));
629   case nir_cf_node_if:
630   return process_if(nir_cf_node_as_if(node));
631   case nir_cf_node_loop:
632   return process_loop(nir_cf_node_as_loop(node));
633   default:
634   return false;
635   }
636
637}
638
639static bool
640child_block_empty (const exec_list& list)
641{
642   if (list.is_empty())
643      return true;
644
645   bool result = true;
646
647   foreach_list_typed(nir_cf_node, n, node, &list) {
648
649      if (n->type == nir_cf_node_block) {
650         if (!nir_cf_node_as_block(n)->instr_list.is_empty())
651            return false;
652      }
653      if (n->type == nir_cf_node_if)
654         return false;
655   }
656   return result;
657}
658
659bool Shader::process_if(nir_if *if_stmt)
660{
661   SFN_TRACE_FUNC(SfnLog::flow, "IF");
662
663   if (!emit_if_start(if_stmt))
664      return false;
665
666   foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list) {
667      SFN_TRACE_FUNC(SfnLog::flow, "IF-then");
668         if (!process_cf_node(n))
669            return false;
670   }
671
672   if (!child_block_empty(if_stmt->else_list)) {
673      if (!emit_control_flow(ControlFlowInstr::cf_else))
674         return false;
675      foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
676            if (!process_cf_node(n)) return false;
677   }
678
679   if (!emit_control_flow(ControlFlowInstr::cf_endif))
680      return false;
681
682   return true;
683}
684
685bool Shader::emit_if_start(nir_if *if_stmt)
686{
687   auto value = value_factory().src(if_stmt->condition, 0);
688   AluInstr *pred = new AluInstr(op2_pred_setne_int, value_factory().temp_register(),
689                                 value, value_factory().zero(), AluInstr::last);
690   pred->set_alu_flag(alu_update_exec);
691   pred->set_alu_flag(alu_update_pred);
692   pred->set_cf_type(cf_alu_push_before);
693
694   IfInstr *ir = new IfInstr(pred);
695   emit_instruction(ir);
696   start_new_block(1);
697   return true;
698}
699
700bool Shader::emit_control_flow(ControlFlowInstr::CFType type)
701{
702   auto ir = new ControlFlowInstr(type);
703   emit_instruction(ir);
704   int depth = 0;
705   switch (type) {
706   case ControlFlowInstr::cf_loop_begin:
707      m_loops.push_back(ir);
708      m_nloops++;
709      depth = 1;
710   break;
711   case ControlFlowInstr::cf_loop_end:
712      m_loops.pop_back();
713      FALLTHROUGH;
714   case ControlFlowInstr::cf_endif:
715      depth = -1;
716   break;
717   default:
718      ;
719   }
720
721   start_new_block(depth);
722   return true;
723}
724
725bool Shader::process_loop(nir_loop *node)
726{
727   SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
728   if (!emit_control_flow(ControlFlowInstr::cf_loop_begin))
729      return false;
730
731   foreach_list_typed(nir_cf_node, n, node, &node->body)
732         if (!process_cf_node(n)) return false;
733
734   if (!emit_control_flow(ControlFlowInstr::cf_loop_end))
735      return false;
736
737   return true;
738}
739
740bool Shader::process_block(nir_block *block)
741{
742   SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
743
744   nir_foreach_instr(instr, block) {
745      sfn_log << SfnLog::instr << "FROM:" << *instr << "\n";
746      bool r = process_instr(instr);
747      if (!r) {
748         sfn_log << SfnLog::err << "R600: Unsupported instruction: "
749                 << *instr << "\n";
750         return false;
751      }
752   }
753   return true;
754}
755
756bool Shader::process_instr(nir_instr *instr)
757{
758   return m_instr_factory->from_nir(instr, *this);
759}
760
761bool Shader::process_intrinsic(nir_intrinsic_instr *intr)
762{
763   if (process_stage_intrinsic(intr))
764      return true;
765
766   if (GDSInstr::emit_atomic_counter(intr, *this)) {
767      set_flag(sh_writes_memory);
768      return true;
769   }
770
771   if (RatInstr::emit(intr, *this))
772      return true;
773
774   switch (intr->intrinsic) {
775   case nir_intrinsic_store_output: return store_output(intr);
776   case nir_intrinsic_load_input: return load_input(intr);
777   case nir_intrinsic_load_uniform: return load_uniform(intr);
778   case nir_intrinsic_load_ubo_vec4: return load_ubo(intr);
779   case nir_intrinsic_store_scratch: return emit_store_scratch(intr);
780   case nir_intrinsic_load_scratch: return emit_load_scratch(intr);
781   case nir_intrinsic_store_local_shared_r600: return emit_local_store(intr);
782   case nir_intrinsic_load_local_shared_r600: return emit_local_load(intr);
783   case nir_intrinsic_load_tcs_in_param_base_r600: return emit_load_tcs_param_base(intr, 0);
784   case nir_intrinsic_load_tcs_out_param_base_r600: return emit_load_tcs_param_base(intr, 16);
785      // We only emit the group barrier, barriers across work groups
786      // are not yet implemented
787   case nir_intrinsic_control_barrier:
788   case nir_intrinsic_memory_barrier_tcs_patch:
789   case nir_intrinsic_memory_barrier_shared:
790      return emit_barrier(intr);
791   case nir_intrinsic_memory_barrier_atomic_counter:
792      return true;
793   case nir_intrinsic_group_memory_barrier:
794   case nir_intrinsic_memory_barrier_image:
795   case nir_intrinsic_memory_barrier_buffer:
796   case nir_intrinsic_memory_barrier:
797      return emit_wait_ack();
798
799   case nir_intrinsic_shared_atomic_add:
800   case nir_intrinsic_shared_atomic_and:
801   case nir_intrinsic_shared_atomic_or:
802   case nir_intrinsic_shared_atomic_imax:
803   case nir_intrinsic_shared_atomic_umax:
804   case nir_intrinsic_shared_atomic_imin:
805   case nir_intrinsic_shared_atomic_umin:
806   case nir_intrinsic_shared_atomic_xor:
807   case nir_intrinsic_shared_atomic_exchange:
808   case nir_intrinsic_shared_atomic_comp_swap:
809      return emit_atomic_local_shared(intr);
810   case nir_intrinsic_shader_clock:
811      return emit_shader_clock(intr);
812
813   default:
814   return false;
815   }
816}
817
818static ESDOp
819lds_op_from_intrinsic(nir_intrinsic_op op, bool ret) {
820   switch (op) {
821   case nir_intrinsic_shared_atomic_add:
822      return ret ? LDS_ADD_RET : LDS_ADD;
823   case nir_intrinsic_shared_atomic_and:
824      return ret ? LDS_AND_RET : LDS_AND;
825   case nir_intrinsic_shared_atomic_or:
826      return ret ? LDS_OR_RET : LDS_OR;
827   case nir_intrinsic_shared_atomic_imax:
828      return ret ? LDS_MAX_INT_RET : LDS_MAX_INT;
829   case nir_intrinsic_shared_atomic_umax:
830      return ret ? LDS_MAX_UINT_RET : LDS_MAX_UINT;
831   case nir_intrinsic_shared_atomic_imin:
832      return ret ? LDS_MIN_INT_RET : LDS_MIN_INT;
833   case nir_intrinsic_shared_atomic_umin:
834      return ret ? LDS_MIN_UINT_RET : LDS_MIN_UINT;
835   case nir_intrinsic_shared_atomic_xor:
836      return ret ? LDS_XOR_RET : LDS_XOR;
837   case nir_intrinsic_shared_atomic_exchange:
838      return LDS_XCHG_RET;
839   case nir_intrinsic_shared_atomic_comp_swap:
840      return LDS_CMP_XCHG_RET;
841   default:
842      unreachable("Unsupported shared atomic opcode");
843   }
844}
845
846PRegister Shader::emit_load_to_register(PVirtualValue src)
847{
848   assert(src);
849   PRegister dest = src->as_register();
850
851   if (!dest) {
852      dest = value_factory().temp_register();
853      emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
854   }
855   return dest;
856}
857
858bool Shader::emit_atomic_local_shared(nir_intrinsic_instr* instr)
859{
860   bool uses_retval = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
861
862   auto& vf = value_factory();
863
864   auto dest_value = uses_retval ? vf.dest(instr->dest, 0, pin_free) : nullptr;
865
866   auto op = lds_op_from_intrinsic(instr->intrinsic, uses_retval);
867
868   auto address = vf.src(instr->src[0], 0);
869
870   AluInstr::SrcValues src;
871   src.push_back(vf.src(instr->src[1], 0));
872
873   if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap))
874      src.push_back(vf.src(instr->src[2], 0));
875   emit_instruction(new LDSAtomicInstr(op, dest_value, address, src));
876   return true;
877}
878
879auto Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair<int, PRegister>
880{
881   auto& vf = value_factory();
882
883   PRegister uav_id{nullptr};
884   int offset = 0;
885
886   auto uav_id_const = nir_src_as_const_value(instr->src[src_id]);
887   if (uav_id_const) {
888      offset += uav_id_const->u32;
889   } else {
890      auto uav_id_val = vf.src(instr->src[src_id], 0);
891      if (uav_id_val->as_register()) {
892         uav_id = uav_id_val->as_register();
893      } else {
894         uav_id = vf.temp_register();
895         emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val,
896                                 AluInstr::last_write));
897      }
898   }
899   return std::make_pair(offset, uav_id);
900}
901
902
903bool Shader::emit_store_scratch(nir_intrinsic_instr *intr)
904{
905   auto& vf = m_instr_factory->value_factory();
906
907   int writemask = nir_intrinsic_write_mask(intr);
908
909
910   RegisterVec4::Swizzle swz = {7,7,7,7};
911
912   for (unsigned i = 0; i < intr->num_components; ++i)
913      swz[i] = (1 << i) & writemask ? i : 7;
914
915   auto value = vf.temp_vec4(pin_group, swz);
916   AluInstr *ir = nullptr;
917   for (unsigned i = 0; i < intr->num_components; ++i) {
918      if (value[i]->chan() < 4) {
919         ir = new AluInstr(op1_mov, value[i], vf.src(intr->src[0], i), AluInstr::write);
920         ir->set_alu_flag(alu_no_schedule_bias);
921         emit_instruction(ir);
922      }
923   }
924   if (!ir)
925      return true;
926
927   ir->set_alu_flag(alu_last_instr);
928
929   auto address = vf.src(intr->src[1], 0);
930
931
932   int align = nir_intrinsic_align_mul(intr);
933   int align_offset = nir_intrinsic_align_offset(intr);
934
935   ScratchIOInstr *ws_ir = nullptr;
936
937   int offset = -1;
938   if (address->as_literal()) {
939      offset = address->as_literal()->value();
940   } else if (address->as_inline_const()) {
941      auto il = address->as_inline_const();
942      if (il->sel() == ALU_SRC_0)
943         offset = 0;
944      else if (il->sel() == ALU_SRC_1_INT)
945         offset = 1;
946   }
947
948   if (offset >= 0) {
949      ws_ir = new ScratchIOInstr(value, offset, align, align_offset, writemask);
950   } else {
951      auto addr_temp  = vf.temp_register(0);
952      auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write);
953      load_addr->set_alu_flag(alu_no_schedule_bias);
954      emit_instruction(load_addr);
955
956      ws_ir = new ScratchIOInstr(value, addr_temp, align, align_offset, writemask, m_scratch_size);
957   }
958   emit_instruction(ws_ir);
959
960   m_flags.set(sh_needs_scratch_space);
961   return true;
962}
963
964bool Shader::emit_load_scratch(nir_intrinsic_instr *intr)
965{
966   auto addr = value_factory().src(intr->src[0], 0);
967   auto dest = value_factory().dest_vec4(intr->dest, pin_group);
968
969   if (chip_class() >= ISA_CC_R700) {
970      RegisterVec4::Swizzle dest_swz = {7,7,7,7};
971
972      for (unsigned i = 0; i < intr->num_components; ++i)
973         dest_swz[i] = i;
974
975      auto *ir = new LoadFromScratch(dest, dest_swz, addr, m_scratch_size);
976      emit_instruction(ir);
977      chain_scratch_read(ir);
978   } else {
979      int align = nir_intrinsic_align_mul(intr);
980      int align_offset = nir_intrinsic_align_offset(intr);
981
982
983      int offset = -1;
984      if (addr->as_literal()) {
985         offset = addr->as_literal()->value();
986      } else if (addr->as_inline_const()) {
987         auto il = addr->as_inline_const();
988         if (il->sel() == ALU_SRC_0)
989            offset = 0;
990         else if (il->sel() == ALU_SRC_1_INT)
991            offset = 1;
992      }
993
994      ScratchIOInstr *ir = nullptr;
995      if (offset >= 0) {
996         ir = new ScratchIOInstr(dest, offset, align, align_offset, 0xf, true);
997      } else {
998         auto addr_temp  = value_factory().temp_register(0);
999         auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::last_write);
1000         load_addr->set_alu_flag(alu_no_schedule_bias);
1001         emit_instruction(load_addr);
1002
1003         ir = new ScratchIOInstr(dest, addr_temp, align, align_offset, 0xf,
1004                                 m_scratch_size, true);
1005      }
1006      emit_instruction(ir);
1007   }
1008
1009
1010   m_flags.set(sh_needs_scratch_space);
1011
1012   return true;
1013
1014}
1015
1016bool Shader::emit_local_store(nir_intrinsic_instr *instr)
1017{
1018   unsigned write_mask = nir_intrinsic_write_mask(instr);
1019
1020   auto address = value_factory().src(instr->src[1], 0);
1021   int swizzle_base = 0;
1022   unsigned w = write_mask;
1023   while (!(w & 1)) {
1024      ++swizzle_base;
1025      w >>= 1;
1026   }
1027   write_mask = write_mask >> swizzle_base;
1028
1029   if ((write_mask & 3) != 3) {
1030      auto value = value_factory().src(instr->src[0], swizzle_base);
1031      emit_instruction(new LDSAtomicInstr(LDS_WRITE, nullptr, address, {value}));
1032   } else {
1033      auto value = value_factory().src(instr->src[0], swizzle_base);
1034      auto value1 = value_factory().src(instr->src[0], swizzle_base + 1);
1035      emit_instruction(new LDSAtomicInstr(LDS_WRITE_REL, nullptr, address, {value, value1}));
1036   }
1037   return true;
1038}
1039
1040bool Shader::emit_local_load(nir_intrinsic_instr* instr)
1041{
1042   auto address = value_factory().src_vec(instr->src[0], instr->num_components);
1043   auto dest_value = value_factory().dest_vec(instr->dest, instr->num_components);
1044   emit_instruction(new LDSReadInstr(dest_value, address));
1045   return true;
1046}
1047
1048void Shader::chain_scratch_read(Instr *instr)
1049{
1050   m_chain_instr.apply(instr, &m_chain_instr.last_scratch_instr);
1051}
1052
1053void Shader::chain_ssbo_read(Instr *instr)
1054{
1055   m_chain_instr.apply(instr, &m_chain_instr.last_ssbo_instr);
1056}
1057
1058bool Shader::emit_wait_ack()
1059{
1060   start_new_block(0);
1061   emit_instruction(new ControlFlowInstr(ControlFlowInstr::cf_wait_ack));
1062   start_new_block(0);
1063   return true;
1064}
1065
1066void Shader::InstructionChain::visit(ScratchIOInstr *instr)
1067{
1068   apply(instr, &last_scratch_instr);
1069}
1070
1071void Shader::InstructionChain::visit(GDSInstr *instr)
1072{
1073   apply(instr, &last_gds_instr);
1074   for (auto& loop : this_shader->m_loops) {
1075      loop->set_instr_flag(Instr::vpm);
1076   }
1077}
1078
1079void Shader::InstructionChain::visit(RatInstr *instr)
1080{
1081   apply(instr, &last_ssbo_instr);
1082   for (auto& loop : this_shader->m_loops) {
1083      loop->set_instr_flag(Instr::vpm);
1084   }
1085
1086   if (prepare_mem_barrier)
1087      instr->set_ack();
1088
1089   if (this_shader->m_current_block->inc_rat_emitted() > 15)
1090      this_shader->start_new_block(0);
1091}
1092
1093void Shader::InstructionChain::apply(Instr *current, Instr **last) {
1094   if (*last)
1095      current->add_required_instr(*last);
1096   *last = current;
1097}
1098
1099void Shader::emit_instruction(PInst instr)
1100{
1101   sfn_log << SfnLog::instr << "   " << *instr << "\n";
1102   instr->accept(m_chain_instr);
1103   m_current_block->push_back(instr);
1104}
1105
1106bool Shader::load_uniform(nir_intrinsic_instr *intr)
1107{
1108   auto literal = nir_src_as_const_value(intr->src[0]);
1109
1110   if (literal) {
1111      AluInstr *ir = nullptr;
1112      auto pin = intr->dest.is_ssa && nir_dest_num_components(intr->dest) == 1 ?
1113               pin_free : pin_none;
1114      for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
1115
1116         sfn_log << SfnLog::io << "uniform "
1117                 << intr->dest.ssa.index << " const["<< i << "]: "<< intr->const_index[i] << "\n";
1118
1119         auto uniform = value_factory().uniform(intr, i);
1120         ir = new AluInstr(op1_mov, value_factory().dest(intr->dest, i, pin),
1121                           uniform, {alu_write});
1122         emit_instruction(ir);
1123      }
1124      if (ir)
1125         ir->set_alu_flag(alu_last_instr);
1126      return true;
1127   } else {
1128      auto addr = value_factory().src(intr->src[0], 0);
1129      return load_uniform_indirect(intr, addr, 16 * nir_intrinsic_base(intr), 0);
1130   }
1131}
1132
1133bool Shader::load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr,
1134                                   int offset , int buffer_id)
1135{
1136   auto addr_reg = addr->as_register();
1137   if (!addr) {
1138      auto tmp = value_factory().temp_register();
1139      emit_instruction(new AluInstr(op1_mov, tmp, addr, AluInstr::last_write));
1140      addr = tmp;
1141   }
1142
1143   RegisterVec4 dest = value_factory().dest_vec4(intr->dest, pin_group);
1144
1145   auto ir = new LoadFromBuffer(dest, {0,1,2,3}, addr_reg, offset, buffer_id,
1146                                nullptr, fmt_32_32_32_32_float);
1147   emit_instruction(ir);
1148   m_flags.set(sh_indirect_const_file);
1149   return true;
1150}
1151
1152bool Shader::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
1153{
1154   auto src = value_factory().temp_register();
1155   emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(),
1156                                 AluInstr::last_write));
1157
1158   auto dest = value_factory().dest_vec4(instr->dest, pin_group);
1159   auto fetch = new LoadFromBuffer(dest, {0,1,2,3}, src, offset,
1160                                   R600_LDS_INFO_CONST_BUFFER, nullptr,
1161                                   fmt_32_32_32_32);
1162
1163   fetch->set_fetch_flag(LoadFromBuffer::srf_mode);
1164   emit_instruction(fetch);
1165
1166   return true;
1167}
1168
1169bool Shader::emit_shader_clock(nir_intrinsic_instr* instr)
1170{
1171   auto& vf = value_factory();
1172   auto group = new AluGroup();
1173   group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 0, pin_chan),
1174                                       vf.inline_const(ALU_SRC_TIME_LO, 0), AluInstr::write));
1175   group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 1, pin_chan),
1176                                       vf.inline_const(ALU_SRC_TIME_HI, 0), AluInstr::last_write));
1177   emit_instruction(group);
1178   return true;
1179}
1180
1181
1182bool Shader::emit_barrier(nir_intrinsic_instr* intr)
1183{
1184   (void)intr;
1185   /* Put barrier into it's own block, so that optimizers and the
1186    * scheduler don't move code */
1187   start_new_block(0);
1188   auto op = new AluInstr(op0_group_barrier, 0);
1189   op->set_alu_flag(alu_last_instr);
1190   emit_instruction(op);
1191   start_new_block(0);
1192   return true;
1193}
1194
1195bool Shader::load_ubo(nir_intrinsic_instr *instr)
1196{
1197   auto bufid = nir_src_as_const_value(instr->src[0]);
1198   auto buf_offset = nir_src_as_const_value(instr->src[1]);
1199
1200   if (!buf_offset) {
1201      /* TODO: if bufid is constant then this can also be solved by using the CF indes
1202       * on the ALU block, and this would probably make sense when there are more then one
1203       * loads with the same buffer ID. */
1204
1205      auto addr = value_factory().src(instr->src[1], 0)->as_register();
1206      RegisterVec4::Swizzle dest_swz {7,7,7,7};
1207      auto dest = value_factory().dest_vec4(instr->dest, pin_group);
1208
1209      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
1210         dest_swz[i] = i + nir_intrinsic_component(instr);
1211      }
1212
1213      LoadFromBuffer *ir;
1214      if (bufid) {
1215         ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1 + bufid->u32,
1216                                 nullptr, fmt_32_32_32_32_float);
1217      } else {
1218         auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0));
1219         ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1, buffer_id,
1220                                 fmt_32_32_32_32_float);
1221      }
1222      emit_instruction(ir);
1223      return true;
1224   }
1225
1226   /* direct load using the constant cache */
1227   if (bufid) {
1228      int buf_cmp = nir_intrinsic_component(instr);
1229
1230      AluInstr *ir = nullptr;
1231      auto pin = instr->dest.is_ssa && nir_dest_num_components(instr->dest) == 1 ?
1232                    pin_free : pin_none;
1233      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
1234
1235         sfn_log << SfnLog::io << "UBO[" << bufid << "] "
1236                 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
1237
1238         auto uniform = value_factory().uniform(512 +  buf_offset->u32, i + buf_cmp, bufid->u32 + 1);
1239         ir = new AluInstr(op1_mov, value_factory().dest(instr->dest, i, pin),
1240                           uniform, {alu_write});
1241         emit_instruction(ir);
1242      }
1243      if (ir)
1244         ir->set_alu_flag(alu_last_instr);
1245      return true;
1246   } else {
1247      int buf_cmp = nir_intrinsic_component(instr);
1248      AluInstr *ir = nullptr;
1249      auto kc_id = value_factory().src(instr->src[0], 0);
1250
1251      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
1252         int cmp = buf_cmp + i;
1253         auto u = new UniformValue(512 +  buf_offset->u32, cmp, kc_id);
1254         auto dest = value_factory().dest(instr->dest, i, pin_none);
1255         ir = new AluInstr(op1_mov,  dest, u, AluInstr::write);
1256         emit_instruction(ir);
1257      }
1258      if (ir)
1259         ir->set_alu_flag(alu_last_instr);
1260      m_indirect_files |= 1 << TGSI_FILE_CONSTANT;
1261      return true;
1262   }
1263}
1264
1265void Shader::start_new_block(int depth)
1266{
1267   int depth_offset = m_current_block ? m_current_block->nesting_depth() : 0;
1268   m_current_block = new Block(depth + depth_offset, m_next_block++);
1269   m_root.push_back(m_current_block);
1270}
1271
1272bool Shader::emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin)
1273{
1274   auto dst = value_factory().dest(dest, chan, pin);
1275   emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write));
1276   return true;
1277}
1278
1279void Shader::print(std::ostream& os) const
1280{
1281   print_header(os);
1282
1283   for (auto& [dummy, i]: m_inputs) {
1284      i.print(os);
1285      os << "\n";
1286   }
1287
1288   for (auto& [dummy, o]: m_outputs) {
1289      o.print(os);
1290      os << "\n";
1291   }
1292
1293   os << "SHADER\n";
1294   for (auto& b : m_root)
1295      b->print(os);
1296}
1297
1298const char *chip_class_names[] = {
1299   "R600",
1300   "R700",
1301   "EVERGREEN",
1302   "CAYMAN"
1303};
1304
1305void Shader::print_header(std::ostream& os) const
1306{
1307   assert(m_chip_class <= ISA_CC_CAYMAN);
1308   os << m_type_id << "\n";
1309   os << "CHIPCLASS " << chip_class_names[m_chip_class] << "\n";
1310   print_properties(os);
1311}
1312
1313void Shader::print_properties(std::ostream& os) const
1314{
1315   do_print_properties(os);
1316}
1317
1318bool Shader::equal_to(const Shader& other) const
1319{
1320   if (m_root.size() != other.m_root.size())
1321      return false;
1322   return std::inner_product(m_root.begin(), m_root.end(),
1323                             other.m_root.begin(),
1324                             true,
1325                             [](bool lhs, bool rhs){ return lhs & rhs;},
1326                             [](const Block::Pointer lhs, const Block::Pointer rhs) -> bool {
1327                                return lhs->is_equal_to(*rhs);
1328                             });
1329}
1330
1331void Shader::get_shader_info(r600_shader *sh_info)
1332{
1333   sh_info->ninput = m_inputs.size();
1334   int lds_pos = 0;
1335   int input_array_array_loc = 0;
1336   for (auto& [index, info] : m_inputs) {
1337      r600_shader_io& io = sh_info->input[input_array_array_loc++];
1338
1339      io.sid = info.sid();
1340      io.gpr = info.gpr();
1341      io.spi_sid = info.spi_sid();
1342      io.ij_index = info.ij_index();
1343      io.name = info.name();
1344      io.interpolate = info.interpolator();
1345      io.interpolate_location = info.interpolate_loc();
1346      if (info.need_lds_pos())
1347         io.lds_pos = lds_pos++;
1348      else
1349         io.lds_pos = 0;
1350
1351      io.ring_offset = info.ring_offset();
1352      io.uses_interpolate_at_centroid = info.uses_interpolate_at_centroid();
1353
1354      sfn_log << SfnLog::io << "Emit Input [" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n";
1355      assert(io.spi_sid >= 0);
1356   }
1357
1358   sh_info->nlds = lds_pos;
1359   sh_info->noutput = m_outputs.size();
1360   sh_info->num_loops = m_nloops;
1361   int output_array_array_loc = 0;
1362
1363   for (auto& [index, info] : m_outputs) {
1364      r600_shader_io& io = sh_info->output[output_array_array_loc++];
1365      io.sid = info.sid();
1366      io.gpr = info.gpr();
1367      io.spi_sid = info.spi_sid();
1368      io.name = info.name();
1369      io.write_mask = info.writemask();
1370
1371      sfn_log << SfnLog::io << "Emit output[" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n";
1372      assert(io.spi_sid >= 0);
1373   }
1374
1375   sh_info->nhwatomic = m_nhwatomic;
1376   sh_info->atomic_base = m_atomic_base;
1377   sh_info->nhwatomic_ranges = m_atomics.size();
1378   for (unsigned i = 0; i < m_atomics.size(); ++i)
1379      sh_info->atomics[i] = m_atomics[i];
1380
1381   if (m_flags.test(sh_indirect_const_file))
1382         sh_info->indirect_files |= 1 << TGSI_FILE_CONSTANT;
1383
1384   if (m_flags.test(sh_indirect_atomic))
1385      sh_info->indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
1386
1387   sh_info->uses_tex_buffers = m_flags.test(sh_uses_tex_buffer);
1388
1389   value_factory().get_shader_info(sh_info);
1390
1391   sh_info->needs_scratch_space = m_flags.test(sh_needs_scratch_space);
1392   sh_info->uses_images = m_flags.test(sh_uses_images);
1393   sh_info->uses_atomics = m_flags.test(sh_uses_atomics);
1394   sh_info->has_txq_cube_array_z_comp = m_flags.test(sh_txs_cube_array_comp);
1395   sh_info->indirect_files = m_indirect_files;
1396   do_get_shader_info(sh_info);
1397}
1398
1399PRegister Shader::atomic_update()
1400{
1401   assert(m_atomic_update);
1402   return m_atomic_update;
1403}
1404
1405int Shader::remap_atomic_base(int base)
1406{
1407   return m_atomic_base_map[base];
1408}
1409
1410void Shader::do_get_shader_info(r600_shader *sh_info)
1411{
1412   sh_info->uses_atomics = m_nhwatomic > 0;
1413}
1414
1415
1416const ShaderInput& Shader::input(int base) const
1417{
1418   auto io = m_inputs.find(base);
1419   assert(io != m_inputs.end());
1420   return io->second;
1421}
1422
1423const ShaderOutput& Shader::output(int base) const
1424{
1425   auto io = m_outputs.find(base);
1426   assert(io != m_outputs.end());
1427   return io->second;
1428}
1429
1430LiveRangeMap Shader::prepare_live_range_map()
1431{
1432   return m_instr_factory->value_factory().prepare_live_range_map();
1433
1434}
1435
1436void Shader::reset_function(ShaderBlocks& new_root)
1437{
1438   std::swap(m_root, new_root);
1439}
1440
1441void Shader::finalize()
1442{
1443   do_finalize();
1444}
1445
1446void Shader::do_finalize()
1447{
1448
1449}
1450
1451}
1452