1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2022 Imagination Technologies Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a copy 5bf215546Sopenharmony_ci * of this software and associated documentation files (the "Software"), to deal 6bf215546Sopenharmony_ci * in the Software without restriction, including without limitation the rights 7bf215546Sopenharmony_ci * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8bf215546Sopenharmony_ci * copies of the Software, and to permit persons to whom the Software is 9bf215546Sopenharmony_ci * furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18bf215546Sopenharmony_ci * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include <assert.h> 25bf215546Sopenharmony_ci#include <stdbool.h> 26bf215546Sopenharmony_ci#include <stddef.h> 27bf215546Sopenharmony_ci#include <stdint.h> 28bf215546Sopenharmony_ci#include <stdlib.h> 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "compiler/shader_enums.h" 31bf215546Sopenharmony_ci#include "nir/nir.h" 32bf215546Sopenharmony_ci#include "rogue_build_data.h" 33bf215546Sopenharmony_ci#include "rogue_nir_helpers.h" 34bf215546Sopenharmony_ci#include "rogue_operand.h" 35bf215546Sopenharmony_ci#include "util/macros.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci#define __pvr_address_type uint64_t 38bf215546Sopenharmony_ci#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr) 39bf215546Sopenharmony_ci#define __pvr_make_address(addr_u64) (addr_u64) 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ci#include "csbgen/rogue_pds.h" 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci#undef __pvr_make_address 44bf215546Sopenharmony_ci#undef __pvr_get_address 45bf215546Sopenharmony_ci#undef __pvr_address_type 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci/** 48bf215546Sopenharmony_ci * \brief Allocates the coefficient registers that will contain the iterator 49bf215546Sopenharmony_ci * data for the fragment shader input varyings. 50bf215546Sopenharmony_ci * 51bf215546Sopenharmony_ci * \param[in] args The iterator argument data. 52bf215546Sopenharmony_ci * \return The total number of coefficient registers required by the iterators. 53bf215546Sopenharmony_ci */ 54bf215546Sopenharmony_cistatic size_t alloc_iterator_regs(struct rogue_iterator_args *args) 55bf215546Sopenharmony_ci{ 56bf215546Sopenharmony_ci size_t coeffs = 0; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci for (size_t u = 0; u < args->num_fpu_iterators; ++u) { 59bf215546Sopenharmony_ci /* Ensure there aren't any gaps. */ 60bf215546Sopenharmony_ci assert(args->base[u] == ~0); 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci args->base[u] = coeffs; 63bf215546Sopenharmony_ci coeffs += ROGUE_COEFF_ALIGN * args->components[u]; 64bf215546Sopenharmony_ci } 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci return coeffs; 67bf215546Sopenharmony_ci} 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci/** 70bf215546Sopenharmony_ci * \brief Reserves an iterator for a fragment shader input varying, 71bf215546Sopenharmony_ci * and calculates its setup data. 72bf215546Sopenharmony_ci * 73bf215546Sopenharmony_ci * \param[in] args The iterator argument data. 74bf215546Sopenharmony_ci * \param[in] i The iterator index. 75bf215546Sopenharmony_ci * \param[in] type The interpolation type of the varying. 76bf215546Sopenharmony_ci * \param[in] f16 Whether the data type is F16 or F32. 77bf215546Sopenharmony_ci * \param[in] components The number of components in the varying. 78bf215546Sopenharmony_ci */ 79bf215546Sopenharmony_cistatic void reserve_iterator(struct rogue_iterator_args *args, 80bf215546Sopenharmony_ci size_t i, 81bf215546Sopenharmony_ci enum glsl_interp_mode type, 82bf215546Sopenharmony_ci bool f16, 83bf215546Sopenharmony_ci size_t components) 84bf215546Sopenharmony_ci{ 85bf215546Sopenharmony_ci struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC data = { 0 }; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci assert(components >= 1 && components <= 4); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci /* The first iterator (W) *must* be INTERP_MODE_NOPERSPECTIVE. */ 90bf215546Sopenharmony_ci assert(i > 0 || type == INTERP_MODE_NOPERSPECTIVE); 91bf215546Sopenharmony_ci assert(i < ARRAY_SIZE(args->fpu_iterators)); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci switch (type) { 94bf215546Sopenharmony_ci /* Default interpolation is smooth. */ 95bf215546Sopenharmony_ci case INTERP_MODE_NONE: 96bf215546Sopenharmony_ci data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD; 97bf215546Sopenharmony_ci data.perspective = true; 98bf215546Sopenharmony_ci break; 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci case INTERP_MODE_NOPERSPECTIVE: 101bf215546Sopenharmony_ci data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD; 102bf215546Sopenharmony_ci data.perspective = false; 103bf215546Sopenharmony_ci break; 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci default: 106bf215546Sopenharmony_ci unreachable("Unimplemented interpolation type."); 107bf215546Sopenharmony_ci } 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci /* Number of components in this varying 110bf215546Sopenharmony_ci * (corresponds to ROGUE_PDSINST_DOUTI_SIZE_1..4D). 111bf215546Sopenharmony_ci */ 112bf215546Sopenharmony_ci data.size = (components - 1); 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci /* TODO: Investigate F16 support. */ 115bf215546Sopenharmony_ci assert(!f16); 116bf215546Sopenharmony_ci data.f16 = f16; 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci /* Offsets within the vertex. */ 119bf215546Sopenharmony_ci data.f32_offset = 2 * i; 120bf215546Sopenharmony_ci data.f16_offset = data.f32_offset; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&args->fpu_iterators[i], &data); 123bf215546Sopenharmony_ci args->destination[i] = i; 124bf215546Sopenharmony_ci args->base[i] = ~0; 125bf215546Sopenharmony_ci args->components[i] = components; 126bf215546Sopenharmony_ci ++args->num_fpu_iterators; 127bf215546Sopenharmony_ci} 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci/** 130bf215546Sopenharmony_ci * \brief Collects the fragment shader I/O data to feed-back to the driver. 131bf215546Sopenharmony_ci * 132bf215546Sopenharmony_ci * \sa #collect_io_data() 133bf215546Sopenharmony_ci * 134bf215546Sopenharmony_ci * \param[in] common_data Common build data. 135bf215546Sopenharmony_ci * \param[in] fs_data Fragment-specific build data. 136bf215546Sopenharmony_ci * \param[in] nir NIR fragment shader. 137bf215546Sopenharmony_ci * \return true if successful, otherwise false. 138bf215546Sopenharmony_ci */ 139bf215546Sopenharmony_cistatic bool collect_io_data_fs(struct rogue_common_build_data *common_data, 140bf215546Sopenharmony_ci struct rogue_fs_build_data *fs_data, 141bf215546Sopenharmony_ci nir_shader *nir) 142bf215546Sopenharmony_ci{ 143bf215546Sopenharmony_ci size_t num_inputs = nir_count_variables_with_modes(nir, nir_var_shader_in); 144bf215546Sopenharmony_ci assert(num_inputs < (ARRAY_SIZE(fs_data->iterator_args.fpu_iterators) - 1)); 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci /* Process inputs (if present). */ 147bf215546Sopenharmony_ci if (num_inputs) { 148bf215546Sopenharmony_ci /* If the fragment shader has inputs, the first iterator 149bf215546Sopenharmony_ci * must be used for the W component. 150bf215546Sopenharmony_ci */ 151bf215546Sopenharmony_ci reserve_iterator(&fs_data->iterator_args, 152bf215546Sopenharmony_ci 0, 153bf215546Sopenharmony_ci INTERP_MODE_NOPERSPECTIVE, 154bf215546Sopenharmony_ci false, 155bf215546Sopenharmony_ci 1); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci nir_foreach_shader_in_variable (var, nir) { 158bf215546Sopenharmony_ci size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1; 159bf215546Sopenharmony_ci size_t components = glsl_get_components(var->type); 160bf215546Sopenharmony_ci enum glsl_interp_mode interp = var->data.interpolation; 161bf215546Sopenharmony_ci bool f16 = glsl_type_is_16bit(var->type); 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci /* Check that arguments are either F16 or F32. */ 164bf215546Sopenharmony_ci assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT); 165bf215546Sopenharmony_ci assert(f16 || glsl_type_is_32bit(var->type)); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* Check input location. */ 168bf215546Sopenharmony_ci assert(var->data.location >= VARYING_SLOT_VAR0 && 169bf215546Sopenharmony_ci var->data.location <= VARYING_SLOT_VAR31); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci reserve_iterator(&fs_data->iterator_args, i, interp, f16, components); 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci common_data->coeffs = alloc_iterator_regs(&fs_data->iterator_args); 175bf215546Sopenharmony_ci assert(common_data->coeffs); 176bf215546Sopenharmony_ci assert(common_data->coeffs < ROGUE_MAX_REG_COEFF); 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci /* TODO: Process outputs. */ 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci return true; 182bf215546Sopenharmony_ci} 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci/** 185bf215546Sopenharmony_ci * \brief Allocates the vertex shader input registers. 186bf215546Sopenharmony_ci * 187bf215546Sopenharmony_ci * \param[in] inputs The vertex shader input data. 188bf215546Sopenharmony_ci * \return The total number of vertex input registers required. 189bf215546Sopenharmony_ci */ 190bf215546Sopenharmony_cistatic size_t alloc_vs_inputs(struct rogue_vertex_inputs *inputs) 191bf215546Sopenharmony_ci{ 192bf215546Sopenharmony_ci size_t vs_inputs = 0; 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci for (size_t u = 0; u < inputs->num_input_vars; ++u) { 195bf215546Sopenharmony_ci /* Ensure there aren't any gaps. */ 196bf215546Sopenharmony_ci assert(inputs->base[u] == ~0); 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci inputs->base[u] = vs_inputs; 199bf215546Sopenharmony_ci vs_inputs += inputs->components[u]; 200bf215546Sopenharmony_ci } 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci return vs_inputs; 203bf215546Sopenharmony_ci} 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci/** 206bf215546Sopenharmony_ci * \brief Allocates the vertex shader outputs. 207bf215546Sopenharmony_ci * 208bf215546Sopenharmony_ci * \param[in] outputs The vertex shader output data. 209bf215546Sopenharmony_ci * \return The total number of vertex outputs required. 210bf215546Sopenharmony_ci */ 211bf215546Sopenharmony_cistatic size_t alloc_vs_outputs(struct rogue_vertex_outputs *outputs) 212bf215546Sopenharmony_ci{ 213bf215546Sopenharmony_ci size_t vs_outputs = 0; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci for (size_t u = 0; u < outputs->num_output_vars; ++u) { 216bf215546Sopenharmony_ci /* Ensure there aren't any gaps. */ 217bf215546Sopenharmony_ci assert(outputs->base[u] == ~0); 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci outputs->base[u] = vs_outputs; 220bf215546Sopenharmony_ci vs_outputs += outputs->components[u]; 221bf215546Sopenharmony_ci } 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci return vs_outputs; 224bf215546Sopenharmony_ci} 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci/** 227bf215546Sopenharmony_ci * \brief Counts the varyings used by the vertex shader. 228bf215546Sopenharmony_ci * 229bf215546Sopenharmony_ci * \param[in] outputs The vertex shader output data. 230bf215546Sopenharmony_ci * \return The number of varyings used. 231bf215546Sopenharmony_ci */ 232bf215546Sopenharmony_cistatic size_t count_vs_varyings(struct rogue_vertex_outputs *outputs) 233bf215546Sopenharmony_ci{ 234bf215546Sopenharmony_ci size_t varyings = 0; 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci /* Skip the position. */ 237bf215546Sopenharmony_ci for (size_t u = 1; u < outputs->num_output_vars; ++u) 238bf215546Sopenharmony_ci varyings += outputs->components[u]; 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci return varyings; 241bf215546Sopenharmony_ci} 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci/** 244bf215546Sopenharmony_ci * \brief Reserves space for a vertex shader input. 245bf215546Sopenharmony_ci * 246bf215546Sopenharmony_ci * \param[in] inputs The vertex input data. 247bf215546Sopenharmony_ci * \param[in] i The vertex input index. 248bf215546Sopenharmony_ci * \param[in] components The number of components in the input. 249bf215546Sopenharmony_ci */ 250bf215546Sopenharmony_cistatic void reserve_vs_input(struct rogue_vertex_inputs *inputs, 251bf215546Sopenharmony_ci size_t i, 252bf215546Sopenharmony_ci size_t components) 253bf215546Sopenharmony_ci{ 254bf215546Sopenharmony_ci assert(components >= 1 && components <= 4); 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci assert(i < ARRAY_SIZE(inputs->base)); 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci inputs->base[i] = ~0; 259bf215546Sopenharmony_ci inputs->components[i] = components; 260bf215546Sopenharmony_ci ++inputs->num_input_vars; 261bf215546Sopenharmony_ci} 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci/** 264bf215546Sopenharmony_ci * \brief Reserves space for a vertex shader output. 265bf215546Sopenharmony_ci * 266bf215546Sopenharmony_ci * \param[in] outputs The vertex output data. 267bf215546Sopenharmony_ci * \param[in] i The vertex output index. 268bf215546Sopenharmony_ci * \param[in] components The number of components in the output. 269bf215546Sopenharmony_ci */ 270bf215546Sopenharmony_cistatic void reserve_vs_output(struct rogue_vertex_outputs *outputs, 271bf215546Sopenharmony_ci size_t i, 272bf215546Sopenharmony_ci size_t components) 273bf215546Sopenharmony_ci{ 274bf215546Sopenharmony_ci assert(components >= 1 && components <= 4); 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci assert(i < ARRAY_SIZE(outputs->base)); 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci outputs->base[i] = ~0; 279bf215546Sopenharmony_ci outputs->components[i] = components; 280bf215546Sopenharmony_ci ++outputs->num_output_vars; 281bf215546Sopenharmony_ci} 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci/** 284bf215546Sopenharmony_ci * \brief Collects the vertex shader I/O data to feed-back to the driver. 285bf215546Sopenharmony_ci * 286bf215546Sopenharmony_ci * \sa #collect_io_data() 287bf215546Sopenharmony_ci * 288bf215546Sopenharmony_ci * \param[in] common_data Common build data. 289bf215546Sopenharmony_ci * \param[in] vs_data Vertex-specific build data. 290bf215546Sopenharmony_ci * \param[in] nir NIR vertex shader. 291bf215546Sopenharmony_ci * \return true if successful, otherwise false. 292bf215546Sopenharmony_ci */ 293bf215546Sopenharmony_cistatic bool collect_io_data_vs(struct rogue_common_build_data *common_data, 294bf215546Sopenharmony_ci struct rogue_vs_build_data *vs_data, 295bf215546Sopenharmony_ci nir_shader *nir) 296bf215546Sopenharmony_ci{ 297bf215546Sopenharmony_ci ASSERTED bool out_pos_present = false; 298bf215546Sopenharmony_ci ASSERTED size_t num_outputs = 299bf215546Sopenharmony_ci nir_count_variables_with_modes(nir, nir_var_shader_out); 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci /* Process inputs. */ 302bf215546Sopenharmony_ci nir_foreach_shader_in_variable (var, nir) { 303bf215546Sopenharmony_ci size_t components = glsl_get_components(var->type); 304bf215546Sopenharmony_ci size_t i = var->data.location - VERT_ATTRIB_GENERIC0; 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci /* Check that inputs are F32. */ 307bf215546Sopenharmony_ci /* TODO: Support other types. */ 308bf215546Sopenharmony_ci assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT); 309bf215546Sopenharmony_ci assert(glsl_type_is_32bit(var->type)); 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci /* Check input location. */ 312bf215546Sopenharmony_ci assert(var->data.location >= VERT_ATTRIB_GENERIC0 && 313bf215546Sopenharmony_ci var->data.location <= VERT_ATTRIB_GENERIC15); 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci reserve_vs_input(&vs_data->inputs, i, components); 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci vs_data->num_vertex_input_regs = alloc_vs_inputs(&vs_data->inputs); 319bf215546Sopenharmony_ci assert(vs_data->num_vertex_input_regs); 320bf215546Sopenharmony_ci assert(vs_data->num_vertex_input_regs < ROGUE_MAX_REG_VERTEX_IN); 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci /* Process outputs. */ 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci /* We should always have at least a position variable. */ 325bf215546Sopenharmony_ci assert(num_outputs > 0 && "Invalid number of vertex shader outputs."); 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci nir_foreach_shader_out_variable (var, nir) { 328bf215546Sopenharmony_ci size_t components = glsl_get_components(var->type); 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci /* Check that outputs are F32. */ 331bf215546Sopenharmony_ci /* TODO: Support other types. */ 332bf215546Sopenharmony_ci assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT); 333bf215546Sopenharmony_ci assert(glsl_type_is_32bit(var->type)); 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci if (var->data.location == VARYING_SLOT_POS) { 336bf215546Sopenharmony_ci assert(components == 4); 337bf215546Sopenharmony_ci out_pos_present = true; 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci reserve_vs_output(&vs_data->outputs, 0, components); 340bf215546Sopenharmony_ci } else if ((var->data.location >= VARYING_SLOT_VAR0) && 341bf215546Sopenharmony_ci (var->data.location <= VARYING_SLOT_VAR31)) { 342bf215546Sopenharmony_ci size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1; 343bf215546Sopenharmony_ci reserve_vs_output(&vs_data->outputs, i, components); 344bf215546Sopenharmony_ci } else { 345bf215546Sopenharmony_ci unreachable("Unsupported vertex output type."); 346bf215546Sopenharmony_ci } 347bf215546Sopenharmony_ci } 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci /* Always need the output position to be present. */ 350bf215546Sopenharmony_ci assert(out_pos_present); 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci vs_data->num_vertex_outputs = alloc_vs_outputs(&vs_data->outputs); 353bf215546Sopenharmony_ci assert(vs_data->num_vertex_outputs); 354bf215546Sopenharmony_ci assert(vs_data->num_vertex_outputs < ROGUE_MAX_VERTEX_OUTPUTS); 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci vs_data->num_varyings = count_vs_varyings(&vs_data->outputs); 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci return true; 359bf215546Sopenharmony_ci} 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci/** 362bf215546Sopenharmony_ci * \brief Allocates the shared registers that will contain the UBOs. 363bf215546Sopenharmony_ci * 364bf215546Sopenharmony_ci * \param[in] ubo_data The UBO data. 365bf215546Sopenharmony_ci * \return The total number of coefficient registers required by the iterators. 366bf215546Sopenharmony_ci */ 367bf215546Sopenharmony_cistatic size_t alloc_ubos(struct rogue_ubo_data *ubo_data) 368bf215546Sopenharmony_ci{ 369bf215546Sopenharmony_ci size_t shareds = 0; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) { 372bf215546Sopenharmony_ci /* Ensure there aren't any gaps. */ 373bf215546Sopenharmony_ci assert(ubo_data->dest[u] == ~0); 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci ubo_data->dest[u] = shareds; 376bf215546Sopenharmony_ci shareds += ubo_data->size[u]; 377bf215546Sopenharmony_ci } 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci return shareds; 380bf215546Sopenharmony_ci} 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci/** 383bf215546Sopenharmony_ci * \brief Reserves a UBO and calculates its data. 384bf215546Sopenharmony_ci * 385bf215546Sopenharmony_ci * \param[in] ubo_data The UBO data. 386bf215546Sopenharmony_ci * \param[in] desc_set The UBO descriptor set. 387bf215546Sopenharmony_ci * \param[in] binding The UBO binding. 388bf215546Sopenharmony_ci * \param[in] size The size required by the UBO (in dwords). 389bf215546Sopenharmony_ci */ 390bf215546Sopenharmony_cistatic void reserve_ubo(struct rogue_ubo_data *ubo_data, 391bf215546Sopenharmony_ci size_t desc_set, 392bf215546Sopenharmony_ci size_t binding, 393bf215546Sopenharmony_ci size_t size) 394bf215546Sopenharmony_ci{ 395bf215546Sopenharmony_ci size_t i = ubo_data->num_ubo_entries; 396bf215546Sopenharmony_ci assert(i < ARRAY_SIZE(ubo_data->desc_set)); 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci ubo_data->desc_set[i] = desc_set; 399bf215546Sopenharmony_ci ubo_data->binding[i] = binding; 400bf215546Sopenharmony_ci ubo_data->dest[i] = ~0; 401bf215546Sopenharmony_ci ubo_data->size[i] = size; 402bf215546Sopenharmony_ci ++ubo_data->num_ubo_entries; 403bf215546Sopenharmony_ci} 404bf215546Sopenharmony_ci 405bf215546Sopenharmony_ci/** 406bf215546Sopenharmony_ci * \brief Collects UBO data to feed-back to the driver. 407bf215546Sopenharmony_ci * 408bf215546Sopenharmony_ci * \param[in] common_data Common build data. 409bf215546Sopenharmony_ci * \param[in] nir NIR shader. 410bf215546Sopenharmony_ci * \return true if successful, otherwise false. 411bf215546Sopenharmony_ci */ 412bf215546Sopenharmony_cistatic bool collect_ubo_data(struct rogue_common_build_data *common_data, 413bf215546Sopenharmony_ci nir_shader *nir) 414bf215546Sopenharmony_ci{ 415bf215546Sopenharmony_ci /* Iterate over each UBO. */ 416bf215546Sopenharmony_ci nir_foreach_variable_with_modes (var, nir, nir_var_mem_ubo) { 417bf215546Sopenharmony_ci size_t desc_set = var->data.driver_location; 418bf215546Sopenharmony_ci size_t binding = var->data.binding; 419bf215546Sopenharmony_ci size_t ubo_size_regs = 0; 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci nir_function_impl *entry = nir_shader_get_entrypoint(nir); 422bf215546Sopenharmony_ci /* Iterate over each load_ubo that uses this UBO. */ 423bf215546Sopenharmony_ci nir_foreach_block (block, entry) { 424bf215546Sopenharmony_ci nir_foreach_instr (instr, block) { 425bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 426bf215546Sopenharmony_ci continue; 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 429bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_ubo) 430bf215546Sopenharmony_ci continue; 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci assert(nir_src_num_components(intr->src[0]) == 2); 433bf215546Sopenharmony_ci assert(nir_intr_src_is_const(intr, 0)); 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci size_t load_desc_set = nir_intr_src_comp_const(intr, 0, 0); 436bf215546Sopenharmony_ci size_t load_binding = nir_intr_src_comp_const(intr, 0, 1); 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci if (load_desc_set != desc_set || load_binding != binding) 439bf215546Sopenharmony_ci continue; 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci ASSERTED size_t size_bytes = nir_intrinsic_range(intr); 442bf215546Sopenharmony_ci assert(size_bytes == ROGUE_REG_SIZE_BYTES); 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci size_t offset_bytes = nir_intrinsic_range_base(intr); 445bf215546Sopenharmony_ci assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES)); 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci size_t offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES; 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci /* TODO: Put offsets in a BITSET_DECLARE and check for gaps. */ 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci /* Find the largest load offset. */ 452bf215546Sopenharmony_ci ubo_size_regs = MAX2(ubo_size_regs, offset_regs); 453bf215546Sopenharmony_ci } 454bf215546Sopenharmony_ci } 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci /* UBO size = largest offset + 1. */ 457bf215546Sopenharmony_ci ++ubo_size_regs; 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci reserve_ubo(&common_data->ubo_data, desc_set, binding, ubo_size_regs); 460bf215546Sopenharmony_ci } 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci common_data->shareds = alloc_ubos(&common_data->ubo_data); 463bf215546Sopenharmony_ci assert(common_data->shareds < ROGUE_MAX_REG_SHARED); 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci return true; 466bf215546Sopenharmony_ci} 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci/** 469bf215546Sopenharmony_ci * \brief Collects I/O data to feed-back to the driver. 470bf215546Sopenharmony_ci * 471bf215546Sopenharmony_ci * Collects the inputs/outputs/memory required, and feeds that back to the 472bf215546Sopenharmony_ci * driver. Done at this stage rather than at the start of rogue_to_binary, so 473bf215546Sopenharmony_ci * that all the I/O of all the shader stages is known before backend 474bf215546Sopenharmony_ci * compilation, which would let us do things like cull unused inputs. 475bf215546Sopenharmony_ci * 476bf215546Sopenharmony_ci * \param[in] ctx Shared multi-stage build context. 477bf215546Sopenharmony_ci * \param[in] nir NIR shader. 478bf215546Sopenharmony_ci * \return true if successful, otherwise false. 479bf215546Sopenharmony_ci */ 480bf215546Sopenharmony_cibool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir) 481bf215546Sopenharmony_ci{ 482bf215546Sopenharmony_ci gl_shader_stage stage = nir->info.stage; 483bf215546Sopenharmony_ci struct rogue_common_build_data *common_data = &ctx->common_data[stage]; 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci /* Collect stage-agnostic data. */ 486bf215546Sopenharmony_ci if (!collect_ubo_data(common_data, nir)) 487bf215546Sopenharmony_ci return false; 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci /* Collect stage-specific data. */ 490bf215546Sopenharmony_ci switch (stage) { 491bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 492bf215546Sopenharmony_ci return collect_io_data_fs(common_data, &ctx->stage_data.fs, nir); 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 495bf215546Sopenharmony_ci return collect_io_data_vs(common_data, &ctx->stage_data.vs, nir); 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_ci default: 498bf215546Sopenharmony_ci break; 499bf215546Sopenharmony_ci } 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci return false; 502bf215546Sopenharmony_ci} 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci/** 505bf215546Sopenharmony_ci * \brief Returns the allocated coefficient register index for a component of an 506bf215546Sopenharmony_ci * input varying location. 507bf215546Sopenharmony_ci * 508bf215546Sopenharmony_ci * \param[in] args The allocated iterator argument data. 509bf215546Sopenharmony_ci * \param[in] location The input varying location, or ~0 for the W coefficient. 510bf215546Sopenharmony_ci * \param[in] component The requested component. 511bf215546Sopenharmony_ci * \return The coefficient register index. 512bf215546Sopenharmony_ci */ 513bf215546Sopenharmony_cisize_t rogue_coeff_index_fs(struct rogue_iterator_args *args, 514bf215546Sopenharmony_ci gl_varying_slot location, 515bf215546Sopenharmony_ci size_t component) 516bf215546Sopenharmony_ci{ 517bf215546Sopenharmony_ci size_t i; 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci /* Special case: W coefficient. */ 520bf215546Sopenharmony_ci if (location == ~0) { 521bf215546Sopenharmony_ci /* The W component shouldn't be the only one. */ 522bf215546Sopenharmony_ci assert(args->num_fpu_iterators > 1); 523bf215546Sopenharmony_ci assert(args->destination[0] == 0); 524bf215546Sopenharmony_ci return 0; 525bf215546Sopenharmony_ci } 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci i = (location - VARYING_SLOT_VAR0) + 1; 528bf215546Sopenharmony_ci assert(location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31); 529bf215546Sopenharmony_ci assert(i < args->num_fpu_iterators); 530bf215546Sopenharmony_ci assert(component < args->components[i]); 531bf215546Sopenharmony_ci assert(args->base[i] != ~0); 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci return args->base[i] + (ROGUE_COEFF_ALIGN * component); 534bf215546Sopenharmony_ci} 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci/** 537bf215546Sopenharmony_ci * \brief Returns the allocated vertex output index for a component of an input 538bf215546Sopenharmony_ci * varying location. 539bf215546Sopenharmony_ci * 540bf215546Sopenharmony_ci * \param[in] outputs The vertex output data. 541bf215546Sopenharmony_ci * \param[in] location The output varying location. 542bf215546Sopenharmony_ci * \param[in] component The requested component. 543bf215546Sopenharmony_ci * \return The vertex output index. 544bf215546Sopenharmony_ci */ 545bf215546Sopenharmony_cisize_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs, 546bf215546Sopenharmony_ci gl_varying_slot location, 547bf215546Sopenharmony_ci size_t component) 548bf215546Sopenharmony_ci{ 549bf215546Sopenharmony_ci size_t i; 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci if (location == VARYING_SLOT_POS) { 552bf215546Sopenharmony_ci /* Always at location 0. */ 553bf215546Sopenharmony_ci assert(outputs->base[0] == 0); 554bf215546Sopenharmony_ci i = 0; 555bf215546Sopenharmony_ci } else if ((location >= VARYING_SLOT_VAR0) && 556bf215546Sopenharmony_ci (location <= VARYING_SLOT_VAR31)) { 557bf215546Sopenharmony_ci i = (location - VARYING_SLOT_VAR0) + 1; 558bf215546Sopenharmony_ci } else { 559bf215546Sopenharmony_ci unreachable("Unsupported vertex output type."); 560bf215546Sopenharmony_ci } 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci assert(i < outputs->num_output_vars); 563bf215546Sopenharmony_ci assert(component < outputs->components[i]); 564bf215546Sopenharmony_ci assert(outputs->base[i] != ~0); 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci return outputs->base[i] + component; 567bf215546Sopenharmony_ci} 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci/** 570bf215546Sopenharmony_ci * \brief Returns the allocated shared register index for a given UBO offset. 571bf215546Sopenharmony_ci * 572bf215546Sopenharmony_ci * \param[in] ubo_data The UBO data. 573bf215546Sopenharmony_ci * \param[in] desc_set The UBO descriptor set. 574bf215546Sopenharmony_ci * \param[in] binding The UBO binding. 575bf215546Sopenharmony_ci * \param[in] offset_bytes The UBO offset in bytes. 576bf215546Sopenharmony_ci * \return The UBO offset shared register index. 577bf215546Sopenharmony_ci */ 578bf215546Sopenharmony_cisize_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data, 579bf215546Sopenharmony_ci size_t desc_set, 580bf215546Sopenharmony_ci size_t binding, 581bf215546Sopenharmony_ci size_t offset_bytes) 582bf215546Sopenharmony_ci{ 583bf215546Sopenharmony_ci size_t ubo_index = ~0; 584bf215546Sopenharmony_ci size_t offset_regs; 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci /* Find UBO located at (desc_set, binding). */ 587bf215546Sopenharmony_ci for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) { 588bf215546Sopenharmony_ci if (ubo_data->dest[u] == ~0) 589bf215546Sopenharmony_ci continue; 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci if (ubo_data->desc_set[u] != desc_set || ubo_data->binding[u] != binding) 592bf215546Sopenharmony_ci continue; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci ubo_index = u; 595bf215546Sopenharmony_ci break; 596bf215546Sopenharmony_ci } 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci assert(ubo_index != ~0); 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES)); 601bf215546Sopenharmony_ci offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES; 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci return ubo_data->dest[ubo_index] + offset_regs; 604bf215546Sopenharmony_ci} 605