1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2022 Imagination Technologies Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a copy
5bf215546Sopenharmony_ci * of this software and associated documentation files (the "Software"), to deal
6bf215546Sopenharmony_ci * in the Software without restriction, including without limitation the rights
7bf215546Sopenharmony_ci * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8bf215546Sopenharmony_ci * copies of the Software, and to permit persons to whom the Software is
9bf215546Sopenharmony_ci * furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18bf215546Sopenharmony_ci * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include <assert.h>
25bf215546Sopenharmony_ci#include <stdbool.h>
26bf215546Sopenharmony_ci#include <stddef.h>
27bf215546Sopenharmony_ci#include <stdint.h>
28bf215546Sopenharmony_ci#include <stdlib.h>
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "compiler/shader_enums.h"
31bf215546Sopenharmony_ci#include "nir/nir.h"
32bf215546Sopenharmony_ci#include "rogue_build_data.h"
33bf215546Sopenharmony_ci#include "rogue_nir_helpers.h"
34bf215546Sopenharmony_ci#include "rogue_operand.h"
35bf215546Sopenharmony_ci#include "util/macros.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci#define __pvr_address_type uint64_t
38bf215546Sopenharmony_ci#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr)
39bf215546Sopenharmony_ci#define __pvr_make_address(addr_u64) (addr_u64)
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ci#include "csbgen/rogue_pds.h"
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci#undef __pvr_make_address
44bf215546Sopenharmony_ci#undef __pvr_get_address
45bf215546Sopenharmony_ci#undef __pvr_address_type
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci/**
48bf215546Sopenharmony_ci * \brief Allocates the coefficient registers that will contain the iterator
49bf215546Sopenharmony_ci * data for the fragment shader input varyings.
50bf215546Sopenharmony_ci *
51bf215546Sopenharmony_ci * \param[in] args The iterator argument data.
52bf215546Sopenharmony_ci * \return The total number of coefficient registers required by the iterators.
53bf215546Sopenharmony_ci */
54bf215546Sopenharmony_cistatic size_t alloc_iterator_regs(struct rogue_iterator_args *args)
55bf215546Sopenharmony_ci{
56bf215546Sopenharmony_ci   size_t coeffs = 0;
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci   for (size_t u = 0; u < args->num_fpu_iterators; ++u) {
59bf215546Sopenharmony_ci      /* Ensure there aren't any gaps. */
60bf215546Sopenharmony_ci      assert(args->base[u] == ~0);
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_ci      args->base[u] = coeffs;
63bf215546Sopenharmony_ci      coeffs += ROGUE_COEFF_ALIGN * args->components[u];
64bf215546Sopenharmony_ci   }
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci   return coeffs;
67bf215546Sopenharmony_ci}
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci/**
70bf215546Sopenharmony_ci * \brief Reserves an iterator for a fragment shader input varying,
71bf215546Sopenharmony_ci * and calculates its setup data.
72bf215546Sopenharmony_ci *
73bf215546Sopenharmony_ci * \param[in] args The iterator argument data.
74bf215546Sopenharmony_ci * \param[in] i The iterator index.
75bf215546Sopenharmony_ci * \param[in] type The interpolation type of the varying.
76bf215546Sopenharmony_ci * \param[in] f16 Whether the data type is F16 or F32.
77bf215546Sopenharmony_ci * \param[in] components The number of components in the varying.
78bf215546Sopenharmony_ci */
79bf215546Sopenharmony_cistatic void reserve_iterator(struct rogue_iterator_args *args,
80bf215546Sopenharmony_ci                             size_t i,
81bf215546Sopenharmony_ci                             enum glsl_interp_mode type,
82bf215546Sopenharmony_ci                             bool f16,
83bf215546Sopenharmony_ci                             size_t components)
84bf215546Sopenharmony_ci{
85bf215546Sopenharmony_ci   struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC data = { 0 };
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci   assert(components >= 1 && components <= 4);
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   /* The first iterator (W) *must* be INTERP_MODE_NOPERSPECTIVE. */
90bf215546Sopenharmony_ci   assert(i > 0 || type == INTERP_MODE_NOPERSPECTIVE);
91bf215546Sopenharmony_ci   assert(i < ARRAY_SIZE(args->fpu_iterators));
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci   switch (type) {
94bf215546Sopenharmony_ci   /* Default interpolation is smooth. */
95bf215546Sopenharmony_ci   case INTERP_MODE_NONE:
96bf215546Sopenharmony_ci      data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
97bf215546Sopenharmony_ci      data.perspective = true;
98bf215546Sopenharmony_ci      break;
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci   case INTERP_MODE_NOPERSPECTIVE:
101bf215546Sopenharmony_ci      data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
102bf215546Sopenharmony_ci      data.perspective = false;
103bf215546Sopenharmony_ci      break;
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci   default:
106bf215546Sopenharmony_ci      unreachable("Unimplemented interpolation type.");
107bf215546Sopenharmony_ci   }
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci   /* Number of components in this varying
110bf215546Sopenharmony_ci    * (corresponds to ROGUE_PDSINST_DOUTI_SIZE_1..4D).
111bf215546Sopenharmony_ci    */
112bf215546Sopenharmony_ci   data.size = (components - 1);
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci   /* TODO: Investigate F16 support. */
115bf215546Sopenharmony_ci   assert(!f16);
116bf215546Sopenharmony_ci   data.f16 = f16;
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   /* Offsets within the vertex. */
119bf215546Sopenharmony_ci   data.f32_offset = 2 * i;
120bf215546Sopenharmony_ci   data.f16_offset = data.f32_offset;
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci   ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&args->fpu_iterators[i], &data);
123bf215546Sopenharmony_ci   args->destination[i] = i;
124bf215546Sopenharmony_ci   args->base[i] = ~0;
125bf215546Sopenharmony_ci   args->components[i] = components;
126bf215546Sopenharmony_ci   ++args->num_fpu_iterators;
127bf215546Sopenharmony_ci}
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci/**
130bf215546Sopenharmony_ci * \brief Collects the fragment shader I/O data to feed-back to the driver.
131bf215546Sopenharmony_ci *
132bf215546Sopenharmony_ci * \sa #collect_io_data()
133bf215546Sopenharmony_ci *
134bf215546Sopenharmony_ci * \param[in] common_data Common build data.
135bf215546Sopenharmony_ci * \param[in] fs_data Fragment-specific build data.
136bf215546Sopenharmony_ci * \param[in] nir NIR fragment shader.
137bf215546Sopenharmony_ci * \return true if successful, otherwise false.
138bf215546Sopenharmony_ci */
139bf215546Sopenharmony_cistatic bool collect_io_data_fs(struct rogue_common_build_data *common_data,
140bf215546Sopenharmony_ci                               struct rogue_fs_build_data *fs_data,
141bf215546Sopenharmony_ci                               nir_shader *nir)
142bf215546Sopenharmony_ci{
143bf215546Sopenharmony_ci   size_t num_inputs = nir_count_variables_with_modes(nir, nir_var_shader_in);
144bf215546Sopenharmony_ci   assert(num_inputs < (ARRAY_SIZE(fs_data->iterator_args.fpu_iterators) - 1));
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   /* Process inputs (if present). */
147bf215546Sopenharmony_ci   if (num_inputs) {
148bf215546Sopenharmony_ci      /* If the fragment shader has inputs, the first iterator
149bf215546Sopenharmony_ci       * must be used for the W component.
150bf215546Sopenharmony_ci       */
151bf215546Sopenharmony_ci      reserve_iterator(&fs_data->iterator_args,
152bf215546Sopenharmony_ci                       0,
153bf215546Sopenharmony_ci                       INTERP_MODE_NOPERSPECTIVE,
154bf215546Sopenharmony_ci                       false,
155bf215546Sopenharmony_ci                       1);
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci      nir_foreach_shader_in_variable (var, nir) {
158bf215546Sopenharmony_ci         size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
159bf215546Sopenharmony_ci         size_t components = glsl_get_components(var->type);
160bf215546Sopenharmony_ci         enum glsl_interp_mode interp = var->data.interpolation;
161bf215546Sopenharmony_ci         bool f16 = glsl_type_is_16bit(var->type);
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci         /* Check that arguments are either F16 or F32. */
164bf215546Sopenharmony_ci         assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
165bf215546Sopenharmony_ci         assert(f16 || glsl_type_is_32bit(var->type));
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci         /* Check input location. */
168bf215546Sopenharmony_ci         assert(var->data.location >= VARYING_SLOT_VAR0 &&
169bf215546Sopenharmony_ci                var->data.location <= VARYING_SLOT_VAR31);
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci         reserve_iterator(&fs_data->iterator_args, i, interp, f16, components);
172bf215546Sopenharmony_ci      }
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci      common_data->coeffs = alloc_iterator_regs(&fs_data->iterator_args);
175bf215546Sopenharmony_ci      assert(common_data->coeffs);
176bf215546Sopenharmony_ci      assert(common_data->coeffs < ROGUE_MAX_REG_COEFF);
177bf215546Sopenharmony_ci   }
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci   /* TODO: Process outputs. */
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   return true;
182bf215546Sopenharmony_ci}
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci/**
185bf215546Sopenharmony_ci * \brief Allocates the vertex shader input registers.
186bf215546Sopenharmony_ci *
187bf215546Sopenharmony_ci * \param[in] inputs The vertex shader input data.
188bf215546Sopenharmony_ci * \return The total number of vertex input registers required.
189bf215546Sopenharmony_ci */
190bf215546Sopenharmony_cistatic size_t alloc_vs_inputs(struct rogue_vertex_inputs *inputs)
191bf215546Sopenharmony_ci{
192bf215546Sopenharmony_ci   size_t vs_inputs = 0;
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci   for (size_t u = 0; u < inputs->num_input_vars; ++u) {
195bf215546Sopenharmony_ci      /* Ensure there aren't any gaps. */
196bf215546Sopenharmony_ci      assert(inputs->base[u] == ~0);
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci      inputs->base[u] = vs_inputs;
199bf215546Sopenharmony_ci      vs_inputs += inputs->components[u];
200bf215546Sopenharmony_ci   }
201bf215546Sopenharmony_ci
202bf215546Sopenharmony_ci   return vs_inputs;
203bf215546Sopenharmony_ci}
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci/**
206bf215546Sopenharmony_ci * \brief Allocates the vertex shader outputs.
207bf215546Sopenharmony_ci *
208bf215546Sopenharmony_ci * \param[in] outputs The vertex shader output data.
209bf215546Sopenharmony_ci * \return The total number of vertex outputs required.
210bf215546Sopenharmony_ci */
211bf215546Sopenharmony_cistatic size_t alloc_vs_outputs(struct rogue_vertex_outputs *outputs)
212bf215546Sopenharmony_ci{
213bf215546Sopenharmony_ci   size_t vs_outputs = 0;
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   for (size_t u = 0; u < outputs->num_output_vars; ++u) {
216bf215546Sopenharmony_ci      /* Ensure there aren't any gaps. */
217bf215546Sopenharmony_ci      assert(outputs->base[u] == ~0);
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_ci      outputs->base[u] = vs_outputs;
220bf215546Sopenharmony_ci      vs_outputs += outputs->components[u];
221bf215546Sopenharmony_ci   }
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci   return vs_outputs;
224bf215546Sopenharmony_ci}
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci/**
227bf215546Sopenharmony_ci * \brief Counts the varyings used by the vertex shader.
228bf215546Sopenharmony_ci *
229bf215546Sopenharmony_ci * \param[in] outputs The vertex shader output data.
230bf215546Sopenharmony_ci * \return The number of varyings used.
231bf215546Sopenharmony_ci */
232bf215546Sopenharmony_cistatic size_t count_vs_varyings(struct rogue_vertex_outputs *outputs)
233bf215546Sopenharmony_ci{
234bf215546Sopenharmony_ci   size_t varyings = 0;
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   /* Skip the position. */
237bf215546Sopenharmony_ci   for (size_t u = 1; u < outputs->num_output_vars; ++u)
238bf215546Sopenharmony_ci      varyings += outputs->components[u];
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci   return varyings;
241bf215546Sopenharmony_ci}
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci/**
244bf215546Sopenharmony_ci * \brief Reserves space for a vertex shader input.
245bf215546Sopenharmony_ci *
246bf215546Sopenharmony_ci * \param[in] inputs The vertex input data.
247bf215546Sopenharmony_ci * \param[in] i The vertex input index.
248bf215546Sopenharmony_ci * \param[in] components The number of components in the input.
249bf215546Sopenharmony_ci */
250bf215546Sopenharmony_cistatic void reserve_vs_input(struct rogue_vertex_inputs *inputs,
251bf215546Sopenharmony_ci                             size_t i,
252bf215546Sopenharmony_ci                             size_t components)
253bf215546Sopenharmony_ci{
254bf215546Sopenharmony_ci   assert(components >= 1 && components <= 4);
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_ci   assert(i < ARRAY_SIZE(inputs->base));
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   inputs->base[i] = ~0;
259bf215546Sopenharmony_ci   inputs->components[i] = components;
260bf215546Sopenharmony_ci   ++inputs->num_input_vars;
261bf215546Sopenharmony_ci}
262bf215546Sopenharmony_ci
263bf215546Sopenharmony_ci/**
264bf215546Sopenharmony_ci * \brief Reserves space for a vertex shader output.
265bf215546Sopenharmony_ci *
266bf215546Sopenharmony_ci * \param[in] outputs The vertex output data.
267bf215546Sopenharmony_ci * \param[in] i The vertex output index.
268bf215546Sopenharmony_ci * \param[in] components The number of components in the output.
269bf215546Sopenharmony_ci */
270bf215546Sopenharmony_cistatic void reserve_vs_output(struct rogue_vertex_outputs *outputs,
271bf215546Sopenharmony_ci                              size_t i,
272bf215546Sopenharmony_ci                              size_t components)
273bf215546Sopenharmony_ci{
274bf215546Sopenharmony_ci   assert(components >= 1 && components <= 4);
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci   assert(i < ARRAY_SIZE(outputs->base));
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   outputs->base[i] = ~0;
279bf215546Sopenharmony_ci   outputs->components[i] = components;
280bf215546Sopenharmony_ci   ++outputs->num_output_vars;
281bf215546Sopenharmony_ci}
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci/**
284bf215546Sopenharmony_ci * \brief Collects the vertex shader I/O data to feed-back to the driver.
285bf215546Sopenharmony_ci *
286bf215546Sopenharmony_ci * \sa #collect_io_data()
287bf215546Sopenharmony_ci *
288bf215546Sopenharmony_ci * \param[in] common_data Common build data.
289bf215546Sopenharmony_ci * \param[in] vs_data Vertex-specific build data.
290bf215546Sopenharmony_ci * \param[in] nir NIR vertex shader.
291bf215546Sopenharmony_ci * \return true if successful, otherwise false.
292bf215546Sopenharmony_ci */
293bf215546Sopenharmony_cistatic bool collect_io_data_vs(struct rogue_common_build_data *common_data,
294bf215546Sopenharmony_ci                               struct rogue_vs_build_data *vs_data,
295bf215546Sopenharmony_ci                               nir_shader *nir)
296bf215546Sopenharmony_ci{
297bf215546Sopenharmony_ci   ASSERTED bool out_pos_present = false;
298bf215546Sopenharmony_ci   ASSERTED size_t num_outputs =
299bf215546Sopenharmony_ci      nir_count_variables_with_modes(nir, nir_var_shader_out);
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci   /* Process inputs. */
302bf215546Sopenharmony_ci   nir_foreach_shader_in_variable (var, nir) {
303bf215546Sopenharmony_ci      size_t components = glsl_get_components(var->type);
304bf215546Sopenharmony_ci      size_t i = var->data.location - VERT_ATTRIB_GENERIC0;
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci      /* Check that inputs are F32. */
307bf215546Sopenharmony_ci      /* TODO: Support other types. */
308bf215546Sopenharmony_ci      assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
309bf215546Sopenharmony_ci      assert(glsl_type_is_32bit(var->type));
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci      /* Check input location. */
312bf215546Sopenharmony_ci      assert(var->data.location >= VERT_ATTRIB_GENERIC0 &&
313bf215546Sopenharmony_ci             var->data.location <= VERT_ATTRIB_GENERIC15);
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci      reserve_vs_input(&vs_data->inputs, i, components);
316bf215546Sopenharmony_ci   }
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   vs_data->num_vertex_input_regs = alloc_vs_inputs(&vs_data->inputs);
319bf215546Sopenharmony_ci   assert(vs_data->num_vertex_input_regs);
320bf215546Sopenharmony_ci   assert(vs_data->num_vertex_input_regs < ROGUE_MAX_REG_VERTEX_IN);
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   /* Process outputs. */
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci   /* We should always have at least a position variable. */
325bf215546Sopenharmony_ci   assert(num_outputs > 0 && "Invalid number of vertex shader outputs.");
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci   nir_foreach_shader_out_variable (var, nir) {
328bf215546Sopenharmony_ci      size_t components = glsl_get_components(var->type);
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci      /* Check that outputs are F32. */
331bf215546Sopenharmony_ci      /* TODO: Support other types. */
332bf215546Sopenharmony_ci      assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
333bf215546Sopenharmony_ci      assert(glsl_type_is_32bit(var->type));
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci      if (var->data.location == VARYING_SLOT_POS) {
336bf215546Sopenharmony_ci         assert(components == 4);
337bf215546Sopenharmony_ci         out_pos_present = true;
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci         reserve_vs_output(&vs_data->outputs, 0, components);
340bf215546Sopenharmony_ci      } else if ((var->data.location >= VARYING_SLOT_VAR0) &&
341bf215546Sopenharmony_ci                 (var->data.location <= VARYING_SLOT_VAR31)) {
342bf215546Sopenharmony_ci         size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
343bf215546Sopenharmony_ci         reserve_vs_output(&vs_data->outputs, i, components);
344bf215546Sopenharmony_ci      } else {
345bf215546Sopenharmony_ci         unreachable("Unsupported vertex output type.");
346bf215546Sopenharmony_ci      }
347bf215546Sopenharmony_ci   }
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci   /* Always need the output position to be present. */
350bf215546Sopenharmony_ci   assert(out_pos_present);
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci   vs_data->num_vertex_outputs = alloc_vs_outputs(&vs_data->outputs);
353bf215546Sopenharmony_ci   assert(vs_data->num_vertex_outputs);
354bf215546Sopenharmony_ci   assert(vs_data->num_vertex_outputs < ROGUE_MAX_VERTEX_OUTPUTS);
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   vs_data->num_varyings = count_vs_varyings(&vs_data->outputs);
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci   return true;
359bf215546Sopenharmony_ci}
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci/**
362bf215546Sopenharmony_ci * \brief Allocates the shared registers that will contain the UBOs.
363bf215546Sopenharmony_ci *
364bf215546Sopenharmony_ci * \param[in] ubo_data The UBO data.
365bf215546Sopenharmony_ci * \return The total number of coefficient registers required by the iterators.
366bf215546Sopenharmony_ci */
367bf215546Sopenharmony_cistatic size_t alloc_ubos(struct rogue_ubo_data *ubo_data)
368bf215546Sopenharmony_ci{
369bf215546Sopenharmony_ci   size_t shareds = 0;
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci   for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
372bf215546Sopenharmony_ci      /* Ensure there aren't any gaps. */
373bf215546Sopenharmony_ci      assert(ubo_data->dest[u] == ~0);
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci      ubo_data->dest[u] = shareds;
376bf215546Sopenharmony_ci      shareds += ubo_data->size[u];
377bf215546Sopenharmony_ci   }
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci   return shareds;
380bf215546Sopenharmony_ci}
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci/**
383bf215546Sopenharmony_ci * \brief Reserves a UBO and calculates its data.
384bf215546Sopenharmony_ci *
385bf215546Sopenharmony_ci * \param[in] ubo_data The UBO data.
386bf215546Sopenharmony_ci * \param[in] desc_set The UBO descriptor set.
387bf215546Sopenharmony_ci * \param[in] binding The UBO binding.
388bf215546Sopenharmony_ci * \param[in] size The size required by the UBO (in dwords).
389bf215546Sopenharmony_ci */
390bf215546Sopenharmony_cistatic void reserve_ubo(struct rogue_ubo_data *ubo_data,
391bf215546Sopenharmony_ci                        size_t desc_set,
392bf215546Sopenharmony_ci                        size_t binding,
393bf215546Sopenharmony_ci                        size_t size)
394bf215546Sopenharmony_ci{
395bf215546Sopenharmony_ci   size_t i = ubo_data->num_ubo_entries;
396bf215546Sopenharmony_ci   assert(i < ARRAY_SIZE(ubo_data->desc_set));
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci   ubo_data->desc_set[i] = desc_set;
399bf215546Sopenharmony_ci   ubo_data->binding[i] = binding;
400bf215546Sopenharmony_ci   ubo_data->dest[i] = ~0;
401bf215546Sopenharmony_ci   ubo_data->size[i] = size;
402bf215546Sopenharmony_ci   ++ubo_data->num_ubo_entries;
403bf215546Sopenharmony_ci}
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci/**
406bf215546Sopenharmony_ci * \brief Collects UBO data to feed-back to the driver.
407bf215546Sopenharmony_ci *
408bf215546Sopenharmony_ci * \param[in] common_data Common build data.
409bf215546Sopenharmony_ci * \param[in] nir NIR shader.
410bf215546Sopenharmony_ci * \return true if successful, otherwise false.
411bf215546Sopenharmony_ci */
412bf215546Sopenharmony_cistatic bool collect_ubo_data(struct rogue_common_build_data *common_data,
413bf215546Sopenharmony_ci                             nir_shader *nir)
414bf215546Sopenharmony_ci{
415bf215546Sopenharmony_ci   /* Iterate over each UBO. */
416bf215546Sopenharmony_ci   nir_foreach_variable_with_modes (var, nir, nir_var_mem_ubo) {
417bf215546Sopenharmony_ci      size_t desc_set = var->data.driver_location;
418bf215546Sopenharmony_ci      size_t binding = var->data.binding;
419bf215546Sopenharmony_ci      size_t ubo_size_regs = 0;
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci      nir_function_impl *entry = nir_shader_get_entrypoint(nir);
422bf215546Sopenharmony_ci      /* Iterate over each load_ubo that uses this UBO. */
423bf215546Sopenharmony_ci      nir_foreach_block (block, entry) {
424bf215546Sopenharmony_ci         nir_foreach_instr (instr, block) {
425bf215546Sopenharmony_ci            if (instr->type != nir_instr_type_intrinsic)
426bf215546Sopenharmony_ci               continue;
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
429bf215546Sopenharmony_ci            if (intr->intrinsic != nir_intrinsic_load_ubo)
430bf215546Sopenharmony_ci               continue;
431bf215546Sopenharmony_ci
432bf215546Sopenharmony_ci            assert(nir_src_num_components(intr->src[0]) == 2);
433bf215546Sopenharmony_ci            assert(nir_intr_src_is_const(intr, 0));
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci            size_t load_desc_set = nir_intr_src_comp_const(intr, 0, 0);
436bf215546Sopenharmony_ci            size_t load_binding = nir_intr_src_comp_const(intr, 0, 1);
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci            if (load_desc_set != desc_set || load_binding != binding)
439bf215546Sopenharmony_ci               continue;
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci            ASSERTED size_t size_bytes = nir_intrinsic_range(intr);
442bf215546Sopenharmony_ci            assert(size_bytes == ROGUE_REG_SIZE_BYTES);
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci            size_t offset_bytes = nir_intrinsic_range_base(intr);
445bf215546Sopenharmony_ci            assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci            size_t offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci            /* TODO: Put offsets in a BITSET_DECLARE and check for gaps. */
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci            /* Find the largest load offset. */
452bf215546Sopenharmony_ci            ubo_size_regs = MAX2(ubo_size_regs, offset_regs);
453bf215546Sopenharmony_ci         }
454bf215546Sopenharmony_ci      }
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci      /* UBO size = largest offset + 1. */
457bf215546Sopenharmony_ci      ++ubo_size_regs;
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci      reserve_ubo(&common_data->ubo_data, desc_set, binding, ubo_size_regs);
460bf215546Sopenharmony_ci   }
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   common_data->shareds = alloc_ubos(&common_data->ubo_data);
463bf215546Sopenharmony_ci   assert(common_data->shareds < ROGUE_MAX_REG_SHARED);
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   return true;
466bf215546Sopenharmony_ci}
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_ci/**
469bf215546Sopenharmony_ci * \brief Collects I/O data to feed-back to the driver.
470bf215546Sopenharmony_ci *
471bf215546Sopenharmony_ci * Collects the inputs/outputs/memory required, and feeds that back to the
472bf215546Sopenharmony_ci * driver. Done at this stage rather than at the start of rogue_to_binary, so
473bf215546Sopenharmony_ci * that all the I/O of all the shader stages is known before backend
474bf215546Sopenharmony_ci * compilation, which would let us do things like cull unused inputs.
475bf215546Sopenharmony_ci *
476bf215546Sopenharmony_ci * \param[in] ctx Shared multi-stage build context.
477bf215546Sopenharmony_ci * \param[in] nir NIR shader.
478bf215546Sopenharmony_ci * \return true if successful, otherwise false.
479bf215546Sopenharmony_ci */
480bf215546Sopenharmony_cibool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir)
481bf215546Sopenharmony_ci{
482bf215546Sopenharmony_ci   gl_shader_stage stage = nir->info.stage;
483bf215546Sopenharmony_ci   struct rogue_common_build_data *common_data = &ctx->common_data[stage];
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci   /* Collect stage-agnostic data. */
486bf215546Sopenharmony_ci   if (!collect_ubo_data(common_data, nir))
487bf215546Sopenharmony_ci      return false;
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   /* Collect stage-specific data. */
490bf215546Sopenharmony_ci   switch (stage) {
491bf215546Sopenharmony_ci   case MESA_SHADER_FRAGMENT:
492bf215546Sopenharmony_ci      return collect_io_data_fs(common_data, &ctx->stage_data.fs, nir);
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   case MESA_SHADER_VERTEX:
495bf215546Sopenharmony_ci      return collect_io_data_vs(common_data, &ctx->stage_data.vs, nir);
496bf215546Sopenharmony_ci
497bf215546Sopenharmony_ci   default:
498bf215546Sopenharmony_ci      break;
499bf215546Sopenharmony_ci   }
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci   return false;
502bf215546Sopenharmony_ci}
503bf215546Sopenharmony_ci
504bf215546Sopenharmony_ci/**
505bf215546Sopenharmony_ci * \brief Returns the allocated coefficient register index for a component of an
506bf215546Sopenharmony_ci * input varying location.
507bf215546Sopenharmony_ci *
508bf215546Sopenharmony_ci * \param[in] args The allocated iterator argument data.
509bf215546Sopenharmony_ci * \param[in] location The input varying location, or ~0 for the W coefficient.
510bf215546Sopenharmony_ci * \param[in] component The requested component.
511bf215546Sopenharmony_ci * \return The coefficient register index.
512bf215546Sopenharmony_ci */
513bf215546Sopenharmony_cisize_t rogue_coeff_index_fs(struct rogue_iterator_args *args,
514bf215546Sopenharmony_ci                            gl_varying_slot location,
515bf215546Sopenharmony_ci                            size_t component)
516bf215546Sopenharmony_ci{
517bf215546Sopenharmony_ci   size_t i;
518bf215546Sopenharmony_ci
519bf215546Sopenharmony_ci   /* Special case: W coefficient. */
520bf215546Sopenharmony_ci   if (location == ~0) {
521bf215546Sopenharmony_ci      /* The W component shouldn't be the only one. */
522bf215546Sopenharmony_ci      assert(args->num_fpu_iterators > 1);
523bf215546Sopenharmony_ci      assert(args->destination[0] == 0);
524bf215546Sopenharmony_ci      return 0;
525bf215546Sopenharmony_ci   }
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci   i = (location - VARYING_SLOT_VAR0) + 1;
528bf215546Sopenharmony_ci   assert(location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31);
529bf215546Sopenharmony_ci   assert(i < args->num_fpu_iterators);
530bf215546Sopenharmony_ci   assert(component < args->components[i]);
531bf215546Sopenharmony_ci   assert(args->base[i] != ~0);
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci   return args->base[i] + (ROGUE_COEFF_ALIGN * component);
534bf215546Sopenharmony_ci}
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci/**
537bf215546Sopenharmony_ci * \brief Returns the allocated vertex output index for a component of an input
538bf215546Sopenharmony_ci * varying location.
539bf215546Sopenharmony_ci *
540bf215546Sopenharmony_ci * \param[in] outputs The vertex output data.
541bf215546Sopenharmony_ci * \param[in] location The output varying location.
542bf215546Sopenharmony_ci * \param[in] component The requested component.
543bf215546Sopenharmony_ci * \return The vertex output index.
544bf215546Sopenharmony_ci */
545bf215546Sopenharmony_cisize_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs,
546bf215546Sopenharmony_ci                             gl_varying_slot location,
547bf215546Sopenharmony_ci                             size_t component)
548bf215546Sopenharmony_ci{
549bf215546Sopenharmony_ci   size_t i;
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci   if (location == VARYING_SLOT_POS) {
552bf215546Sopenharmony_ci      /* Always at location 0. */
553bf215546Sopenharmony_ci      assert(outputs->base[0] == 0);
554bf215546Sopenharmony_ci      i = 0;
555bf215546Sopenharmony_ci   } else if ((location >= VARYING_SLOT_VAR0) &&
556bf215546Sopenharmony_ci              (location <= VARYING_SLOT_VAR31)) {
557bf215546Sopenharmony_ci      i = (location - VARYING_SLOT_VAR0) + 1;
558bf215546Sopenharmony_ci   } else {
559bf215546Sopenharmony_ci      unreachable("Unsupported vertex output type.");
560bf215546Sopenharmony_ci   }
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci   assert(i < outputs->num_output_vars);
563bf215546Sopenharmony_ci   assert(component < outputs->components[i]);
564bf215546Sopenharmony_ci   assert(outputs->base[i] != ~0);
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci   return outputs->base[i] + component;
567bf215546Sopenharmony_ci}
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_ci/**
570bf215546Sopenharmony_ci * \brief Returns the allocated shared register index for a given UBO offset.
571bf215546Sopenharmony_ci *
572bf215546Sopenharmony_ci * \param[in] ubo_data The UBO data.
573bf215546Sopenharmony_ci * \param[in] desc_set The UBO descriptor set.
574bf215546Sopenharmony_ci * \param[in] binding The UBO binding.
575bf215546Sopenharmony_ci * \param[in] offset_bytes The UBO offset in bytes.
576bf215546Sopenharmony_ci * \return The UBO offset shared register index.
577bf215546Sopenharmony_ci */
578bf215546Sopenharmony_cisize_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data,
579bf215546Sopenharmony_ci                     size_t desc_set,
580bf215546Sopenharmony_ci                     size_t binding,
581bf215546Sopenharmony_ci                     size_t offset_bytes)
582bf215546Sopenharmony_ci{
583bf215546Sopenharmony_ci   size_t ubo_index = ~0;
584bf215546Sopenharmony_ci   size_t offset_regs;
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci   /* Find UBO located at (desc_set, binding). */
587bf215546Sopenharmony_ci   for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
588bf215546Sopenharmony_ci      if (ubo_data->dest[u] == ~0)
589bf215546Sopenharmony_ci         continue;
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci      if (ubo_data->desc_set[u] != desc_set || ubo_data->binding[u] != binding)
592bf215546Sopenharmony_ci         continue;
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci      ubo_index = u;
595bf215546Sopenharmony_ci      break;
596bf215546Sopenharmony_ci   }
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   assert(ubo_index != ~0);
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci   assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
601bf215546Sopenharmony_ci   offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci   return ubo_data->dest[ubo_index] + offset_regs;
604bf215546Sopenharmony_ci}
605