1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_shader_gs.h"
28 #include "sfn_instr_fetch.h"
29 #include "sfn_debug.h"
30 
31 namespace r600 {
32 
GeometryShader(const r600_shader_key& key)33 GeometryShader::GeometryShader(const r600_shader_key& key):
34    Shader("GS"),
35    m_tri_strip_adj_fix(key.gs.tri_strip_adj_fix)
36 {
37 
38 }
39 
do_scan_instruction(nir_instr *instr)40 bool GeometryShader::do_scan_instruction(nir_instr *instr)
41 {
42    if (instr->type != nir_instr_type_intrinsic)
43       return false;
44 
45    nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
46 
47    switch (ii->intrinsic) {
48    case nir_intrinsic_store_output:
49       return process_store_output(ii);
50    case nir_intrinsic_load_per_vertex_input:
51       return process_load_input(ii);
52    default:
53       return false;
54    }
55 }
56 
process_store_output(nir_intrinsic_instr *instr)57 bool GeometryShader::process_store_output(nir_intrinsic_instr *instr)
58 {
59    auto location = nir_intrinsic_io_semantics(instr).location;
60    auto index = nir_src_as_const_value(instr->src[1]);
61    assert(index);
62 
63    auto driver_location = nir_intrinsic_base(instr) + index->u32;
64 
65    if (location == VARYING_SLOT_COL0 ||
66        location == VARYING_SLOT_COL1 ||
67        (location >= VARYING_SLOT_VAR0 &&
68        location <= VARYING_SLOT_VAR31) ||
69        (location >= VARYING_SLOT_TEX0 &&
70        location <= VARYING_SLOT_TEX7) ||
71        location == VARYING_SLOT_BFC0 ||
72        location == VARYING_SLOT_BFC1 ||
73        location == VARYING_SLOT_PNTC ||
74        location == VARYING_SLOT_CLIP_VERTEX ||
75        location == VARYING_SLOT_CLIP_DIST0 ||
76        location == VARYING_SLOT_CLIP_DIST1 ||
77        location == VARYING_SLOT_PRIMITIVE_ID ||
78        location == VARYING_SLOT_POS ||
79        location == VARYING_SLOT_PSIZ ||
80        location == VARYING_SLOT_LAYER ||
81        location == VARYING_SLOT_VIEWPORT ||
82        location == VARYING_SLOT_FOGC) {
83 
84       auto semantic = r600_get_varying_semantic(location);
85       tgsi_semantic name = (tgsi_semantic)semantic.first;
86       auto write_mask = nir_intrinsic_write_mask(instr);
87       ShaderOutput output(driver_location, name, write_mask);
88 
89       if (!nir_intrinsic_io_semantics(instr).no_varying)
90          output.set_sid(semantic.second);
91       if (nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
92          add_output(output);
93 
94       if (location == VARYING_SLOT_VIEWPORT) {
95          m_out_viewport = true;
96          m_out_misc_write = true;
97       }
98 
99       if (location == VARYING_SLOT_CLIP_DIST0 ||
100           location == VARYING_SLOT_CLIP_DIST1)   {
101          auto write_mask = nir_intrinsic_write_mask(instr);
102          m_cc_dist_mask |= write_mask << (4 * (location - VARYING_SLOT_CLIP_DIST0));
103          m_clip_dist_write |= write_mask <<  (4 * (location - VARYING_SLOT_CLIP_DIST0));
104       }
105 
106       if (m_noutputs <= driver_location &&
107           nir_intrinsic_io_semantics(instr).location != VARYING_SLOT_CLIP_VERTEX)
108          m_noutputs = driver_location + 1;
109 
110       return true;
111    }
112    return false;
113 }
114 
process_load_input(nir_intrinsic_instr *instr)115 bool GeometryShader::process_load_input(nir_intrinsic_instr *instr)
116 {
117    auto location = nir_intrinsic_io_semantics(instr).location;
118    auto index = nir_src_as_const_value(instr->src[1]);
119    assert(index);
120 
121    auto driver_location = nir_intrinsic_base(instr) + index->u32;
122 
123    if (location == VARYING_SLOT_POS ||
124        location == VARYING_SLOT_PSIZ ||
125        location == VARYING_SLOT_FOGC ||
126        location == VARYING_SLOT_CLIP_VERTEX ||
127        location == VARYING_SLOT_CLIP_DIST0 ||
128        location == VARYING_SLOT_CLIP_DIST1 ||
129        location == VARYING_SLOT_COL0 ||
130        location == VARYING_SLOT_COL1 ||
131        location == VARYING_SLOT_BFC0 ||
132        location == VARYING_SLOT_BFC1 ||
133        location == VARYING_SLOT_PNTC ||
134        (location >= VARYING_SLOT_VAR0 &&
135         location <= VARYING_SLOT_VAR31) ||
136        (location >= VARYING_SLOT_TEX0 &&
137        location <= VARYING_SLOT_TEX7)) {
138 
139       uint64_t bit = 1ull << location;
140       if (!(bit & m_input_mask)) {
141          auto semantic = r600_get_varying_semantic(location);
142          ShaderInput input(driver_location, semantic.first);
143          input.set_sid(semantic.second);
144          input.set_ring_offset(16 * driver_location);
145          add_input(input);
146          m_next_input_ring_offset += 16;
147          m_input_mask |= bit;
148       }
149       return true;
150    }
151    return false;
152 }
153 
do_allocate_reserved_registers()154 int GeometryShader::do_allocate_reserved_registers()
155 {
156    const int sel[6] = {0, 0 ,0, 1, 1, 1};
157    const int chan[6] = {0, 1 ,3, 0, 1, 2};
158 
159    /* Reserve registers used by the shaders (should check how many
160     * components are actually used */
161    for (int i = 0; i < 6; ++i) {
162       m_per_vertex_offsets[i] = value_factory().allocate_pinned_register(sel[i], chan[i]);
163       m_per_vertex_offsets[i]->pin_live_range(true);
164    }
165 
166    m_primitive_id = value_factory().allocate_pinned_register(0, 2);
167    m_primitive_id->pin_live_range(true);
168    m_invocation_id = value_factory().allocate_pinned_register(1, 3);
169    m_invocation_id->pin_live_range(true);
170 
171    value_factory().set_virtual_register_base(2);
172 
173    auto zero = value_factory().inline_const(ALU_SRC_0, 0);
174 
175    for (int i = 0; i < 4; ++i) {
176       m_export_base[i] = value_factory().temp_register(0, false);
177       emit_instruction(new AluInstr(op1_mov, m_export_base[i], zero, AluInstr::last_write));
178    }
179 
180    m_ring_item_sizes[0] = m_next_input_ring_offset;
181 
182 
183    /* GS thread with no output workaround - emit a cut at start of GS */
184    if (chip_class() == ISA_CC_R600) {
185       emit_instruction(new EmitVertexInstr(0, true));
186       start_new_block(0);
187    }
188 
189 
190 
191    if (m_tri_strip_adj_fix)
192       emit_adj_fix();
193 
194    return value_factory().next_register_index();
195 }
196 
process_stage_intrinsic(nir_intrinsic_instr *intr)197 bool GeometryShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
198 {
199    switch (intr->intrinsic) {
200    case nir_intrinsic_emit_vertex:
201       return emit_vertex(intr, false);
202    case nir_intrinsic_end_primitive:
203       return emit_vertex(intr, true);
204    case nir_intrinsic_load_primitive_id:
205       return emit_simple_mov(intr->dest, 0, m_primitive_id);
206    case nir_intrinsic_load_invocation_id:
207       return emit_simple_mov(intr->dest, 0, m_invocation_id);
208    case nir_intrinsic_load_per_vertex_input:
209       return emit_load_per_vertex_input(intr);
210    default:
211       ;
212    }
213    return false;
214 }
215 
emit_vertex(nir_intrinsic_instr* instr, bool cut)216 bool GeometryShader::emit_vertex(nir_intrinsic_instr* instr, bool cut)
217 {
218    int stream = nir_intrinsic_stream_id(instr);
219    assert(stream < 4);
220 
221    auto cut_instr = new EmitVertexInstr(stream, cut);
222 
223    for(auto v: m_streamout_data) {
224       if (stream == 0 || v.first != VARYING_SLOT_POS) {
225          v.second->patch_ring(stream, m_export_base[stream]);
226          cut_instr->add_required_instr(v.second);
227          emit_instruction(v.second);
228       } else
229          delete v.second;
230    }
231    m_streamout_data.clear();
232 
233    emit_instruction(cut_instr);
234    start_new_block(0);
235 
236    if (!cut) {
237       auto ir = new AluInstr(op2_add_int, m_export_base[stream], m_export_base[stream],
238                              value_factory().literal(m_noutputs),
239                              AluInstr::last_write);
240       emit_instruction(ir);
241    }
242 
243    return true;
244 }
245 
246 bool GeometryShader::store_output(nir_intrinsic_instr* instr)
247 {
248    if (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_CLIP_VERTEX)
249       return true;
250 
251    auto location = nir_intrinsic_io_semantics(instr).location;
252    auto index = nir_src_as_const_value(instr->src[1]);
253    assert(index);
254    auto driver_location = nir_intrinsic_base(instr) + index->u32;
255 
256    uint32_t write_mask = nir_intrinsic_write_mask(instr);
257    uint32_t shift = nir_intrinsic_component(instr);
258 
259    RegisterVec4::Swizzle src_swz {7,7,7,7};
260    for (unsigned i = shift; i < 4; ++i) {
261       src_swz[i] = (1 << i) & (write_mask << shift) ? i  - shift: 7;
262    }
263 
264    auto out_value = value_factory().src_vec4(instr->src[0], pin_group, src_swz);
265 
266    AluInstr *ir = nullptr;
267    if (m_streamout_data[location]) {
268       const auto& value = m_streamout_data[location]->value();
269       auto tmp = value_factory().temp_vec4(pin_group);
270       for (unsigned i = 0; i < 4 - shift; ++i) {
271          if (!(write_mask & (1 << i)))
272             continue;
273          if (out_value[i + shift]->chan() < 4) {
274             ir = new AluInstr(op1_mov, tmp[i + shift], out_value[i  + shift], AluInstr::write);
275          } else if (value[i]->chan() < 4) {
276             ir = new AluInstr(op1_mov, tmp[i + shift], value[i], AluInstr::write);
277          } else
278             continue;
279          emit_instruction(ir);
280       }
281       ir->set_alu_flag(alu_last_instr);
282       m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, tmp,
283                                                        4 * driver_location,
284                                                        instr->num_components, m_export_base[0]);
285    } else {
286 
287       sfn_log << SfnLog::io << "None-streamout ";
288       bool need_copy =  shift != 0;
289       if (!need_copy) {
290          for (int i = 0; i < 4; ++i) {
291             if ((write_mask & (1 << i)) && (out_value[i]->chan() != i)) {
292                need_copy = true;
293                break;
294             }
295          }
296       }
297 
298       if (need_copy) {
299          auto tmp = value_factory().temp_vec4(pin_group);
300          for (unsigned i = 0; i < 4 - shift; ++i) {
301             if (out_value[i]->chan() < 4) {
302                ir = new AluInstr(op1_mov, tmp[i], out_value[i], AluInstr::write);
303                emit_instruction(ir);
304             }
305          }
306          ir->set_alu_flag(alu_last_instr);
307          m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, tmp,
308                                                           4 * driver_location,
309                                                           instr->num_components, m_export_base[0]);
310       } else {
311          m_streamout_data[location] = new MemRingOutInstr(cf_mem_ring, MemRingOutInstr::mem_write_ind, out_value,
312                                                           4 * driver_location,
313                                                           instr->num_components, m_export_base[0]);
314       }
315    }
316 
317    return true;
318 }
319 
320 bool GeometryShader::emit_load_per_vertex_input(nir_intrinsic_instr* instr)
321 {
322    auto dest = value_factory().dest_vec4(instr->dest, pin_group);
323 
324    RegisterVec4::Swizzle dest_swz{7,7,7,7};
325    for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
326       dest_swz[i] = i + nir_intrinsic_component(instr);
327    }
328 
329    auto literal_index = nir_src_as_const_value(instr->src[0]);
330 
331    if (!literal_index) {
332       sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
333       return false;
334    }
335    assert(literal_index->u32 < 6);
336    assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
337 
338 
339    EVTXDataFormat fmt = chip_class() >= ISA_CC_EVERGREEN ?
340                            fmt_invalid:
341                            fmt_32_32_32_32_float;
342 
343 
344    auto addr = m_per_vertex_offsets[literal_index->u32];
345    auto fetch = new LoadFromBuffer(dest, dest_swz, addr,
346                                    16 * nir_intrinsic_base(instr),
347                                    R600_GS_RING_CONST_BUFFER, nullptr,
348                                    fmt);
349 
350    if (chip_class() >= ISA_CC_EVERGREEN)
351       fetch->set_fetch_flag(FetchInstr::use_const_field);
352 
353    fetch->set_num_format(vtx_nf_norm);
354    fetch->reset_fetch_flag(FetchInstr::format_comp_signed);
355 
356    emit_instruction(fetch);
357    return true;
358 }
359 
360 void GeometryShader::do_finalize()
361 {
362 
363 }
364 
365 void GeometryShader::do_get_shader_info(r600_shader *sh_info)
366 {
367    sh_info->processor_type = PIPE_SHADER_GEOMETRY;
368    sh_info->ring_item_sizes[0] =  m_ring_item_sizes[0];
369    sh_info->cc_dist_mask = m_cc_dist_mask;
370    sh_info->clip_dist_write = m_clip_dist_write;
371 }
372 
373 bool GeometryShader::read_prop(std::istream& is)
374 {
375    (void)is;
376    return true;
377 }
378 
379 void GeometryShader::do_print_properties(std::ostream& os) const
380 {
381    (void)os;
382 }
383 
384 void GeometryShader::emit_adj_fix()
385 {
386    auto adjhelp0 = value_factory().temp_register();
387 
388    emit_instruction(new AluInstr(op2_and_int, adjhelp0,
389                                  m_primitive_id, value_factory().one_i(),
390                                  AluInstr::last_write));
391 
392    int reg_indices[6];
393    int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
394 
395    reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
396    reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
397 
398    std::array<PRegister, 6> adjhelp;
399 
400    AluInstr *ir = nullptr;
401    for (int i = 0; i < 6; i++) {
402       adjhelp[i] = value_factory().temp_register();
403       ir = new AluInstr(op3_cnde_int, adjhelp[i],
404                         adjhelp0, m_per_vertex_offsets[i],
405                         m_per_vertex_offsets[rotate_indices[i]],
406                         AluInstr::write);
407 
408       emit_instruction(ir);
409    }
410    ir->set_alu_flag(alu_last_instr);
411 
412    for (int i = 0; i < 6; i++)
413       m_per_vertex_offsets[i] = adjhelp[i];
414 }
415 
416 
417 }
418