1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir_xfb_info.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "util/u_dynarray.h" 27bf215546Sopenharmony_ci#include <util/u_math.h> 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_cistatic void 30bf215546Sopenharmony_ciadd_var_xfb_varying(nir_xfb_info *xfb, 31bf215546Sopenharmony_ci nir_xfb_varyings_info *varyings, 32bf215546Sopenharmony_ci unsigned buffer, 33bf215546Sopenharmony_ci unsigned offset, 34bf215546Sopenharmony_ci const struct glsl_type *type) 35bf215546Sopenharmony_ci{ 36bf215546Sopenharmony_ci if (varyings == NULL) 37bf215546Sopenharmony_ci return; 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci nir_xfb_varying_info *varying = &varyings->varyings[varyings->varying_count++]; 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ci varying->type = type; 42bf215546Sopenharmony_ci varying->buffer = buffer; 43bf215546Sopenharmony_ci varying->offset = offset; 44bf215546Sopenharmony_ci xfb->buffers[buffer].varying_count++; 45bf215546Sopenharmony_ci} 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_cistatic nir_xfb_info * 49bf215546Sopenharmony_cinir_xfb_info_create(void *mem_ctx, uint16_t output_count) 50bf215546Sopenharmony_ci{ 51bf215546Sopenharmony_ci return rzalloc_size(mem_ctx, nir_xfb_info_size(output_count)); 52bf215546Sopenharmony_ci} 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_cistatic size_t 55bf215546Sopenharmony_cinir_xfb_varyings_info_size(uint16_t varying_count) 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci return sizeof(nir_xfb_info) + sizeof(nir_xfb_varying_info) * varying_count; 58bf215546Sopenharmony_ci} 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_cistatic nir_xfb_varyings_info * 61bf215546Sopenharmony_cinir_xfb_varyings_info_create(void *mem_ctx, uint16_t varying_count) 62bf215546Sopenharmony_ci{ 63bf215546Sopenharmony_ci return rzalloc_size(mem_ctx, nir_xfb_varyings_info_size(varying_count)); 64bf215546Sopenharmony_ci} 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_cistatic void 67bf215546Sopenharmony_ciadd_var_xfb_outputs(nir_xfb_info *xfb, 68bf215546Sopenharmony_ci nir_xfb_varyings_info *varyings, 69bf215546Sopenharmony_ci nir_variable *var, 70bf215546Sopenharmony_ci unsigned buffer, 71bf215546Sopenharmony_ci unsigned *location, 72bf215546Sopenharmony_ci unsigned *offset, 73bf215546Sopenharmony_ci const struct glsl_type *type, 74bf215546Sopenharmony_ci bool varying_added) 75bf215546Sopenharmony_ci{ 76bf215546Sopenharmony_ci /* If this type contains a 64-bit value, align to 8 bytes */ 77bf215546Sopenharmony_ci if (glsl_type_contains_64bit(type)) 78bf215546Sopenharmony_ci *offset = ALIGN_POT(*offset, 8); 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci if (glsl_type_is_array_or_matrix(type) && !var->data.compact) { 81bf215546Sopenharmony_ci unsigned length = glsl_get_length(type); 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci const struct glsl_type *child_type = glsl_get_array_element(type); 84bf215546Sopenharmony_ci if (!glsl_type_is_array(child_type) && 85bf215546Sopenharmony_ci !glsl_type_is_struct(child_type)) { 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci add_var_xfb_varying(xfb, varyings, buffer, *offset, type); 88bf215546Sopenharmony_ci varying_added = true; 89bf215546Sopenharmony_ci } 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci for (unsigned i = 0; i < length; i++) 92bf215546Sopenharmony_ci add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset, 93bf215546Sopenharmony_ci child_type, varying_added); 94bf215546Sopenharmony_ci } else if (glsl_type_is_struct_or_ifc(type)) { 95bf215546Sopenharmony_ci unsigned length = glsl_get_length(type); 96bf215546Sopenharmony_ci for (unsigned i = 0; i < length; i++) { 97bf215546Sopenharmony_ci const struct glsl_type *child_type = glsl_get_struct_field(type, i); 98bf215546Sopenharmony_ci add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset, 99bf215546Sopenharmony_ci child_type, varying_added); 100bf215546Sopenharmony_ci } 101bf215546Sopenharmony_ci } else { 102bf215546Sopenharmony_ci assert(buffer < NIR_MAX_XFB_BUFFERS); 103bf215546Sopenharmony_ci if (xfb->buffers_written & (1 << buffer)) { 104bf215546Sopenharmony_ci assert(xfb->buffers[buffer].stride == var->data.xfb.stride); 105bf215546Sopenharmony_ci assert(xfb->buffer_to_stream[buffer] == var->data.stream); 106bf215546Sopenharmony_ci } else { 107bf215546Sopenharmony_ci xfb->buffers_written |= (1 << buffer); 108bf215546Sopenharmony_ci xfb->buffers[buffer].stride = var->data.xfb.stride; 109bf215546Sopenharmony_ci xfb->buffer_to_stream[buffer] = var->data.stream; 110bf215546Sopenharmony_ci } 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci assert(var->data.stream < NIR_MAX_XFB_STREAMS); 113bf215546Sopenharmony_ci xfb->streams_written |= (1 << var->data.stream); 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci unsigned comp_slots; 116bf215546Sopenharmony_ci if (var->data.compact) { 117bf215546Sopenharmony_ci /* This only happens for clip/cull which are float arrays */ 118bf215546Sopenharmony_ci assert(glsl_without_array(type) == glsl_float_type()); 119bf215546Sopenharmony_ci assert(var->data.location == VARYING_SLOT_CLIP_DIST0 || 120bf215546Sopenharmony_ci var->data.location == VARYING_SLOT_CLIP_DIST1); 121bf215546Sopenharmony_ci comp_slots = glsl_get_length(type); 122bf215546Sopenharmony_ci } else { 123bf215546Sopenharmony_ci comp_slots = glsl_get_component_slots(type); 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci UNUSED unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4); 126bf215546Sopenharmony_ci assert(attrib_slots == glsl_count_attribute_slots(type, false)); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci /* Ensure that we don't have, for instance, a dvec2 with a 129bf215546Sopenharmony_ci * location_frac of 2 which would make it crass a location boundary 130bf215546Sopenharmony_ci * even though it fits in a single slot. However, you can have a 131bf215546Sopenharmony_ci * dvec3 which crosses the slot boundary with a location_frac of 2. 132bf215546Sopenharmony_ci */ 133bf215546Sopenharmony_ci assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == 134bf215546Sopenharmony_ci attrib_slots); 135bf215546Sopenharmony_ci } 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci assert(var->data.location_frac + comp_slots <= 8); 138bf215546Sopenharmony_ci uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac; 139bf215546Sopenharmony_ci unsigned comp_offset = var->data.location_frac; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci if (!varying_added) { 142bf215546Sopenharmony_ci add_var_xfb_varying(xfb, varyings, buffer, *offset, type); 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci while (comp_mask) { 146bf215546Sopenharmony_ci nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++]; 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci output->buffer = buffer; 149bf215546Sopenharmony_ci output->offset = *offset; 150bf215546Sopenharmony_ci output->location = *location; 151bf215546Sopenharmony_ci output->component_mask = comp_mask & 0xf; 152bf215546Sopenharmony_ci output->component_offset = comp_offset; 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci *offset += util_bitcount(output->component_mask) * 4; 155bf215546Sopenharmony_ci (*location)++; 156bf215546Sopenharmony_ci comp_mask >>= 4; 157bf215546Sopenharmony_ci comp_offset = 0; 158bf215546Sopenharmony_ci } 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci} 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_cistatic int 163bf215546Sopenharmony_cicompare_xfb_varying_offsets(const void *_a, const void *_b) 164bf215546Sopenharmony_ci{ 165bf215546Sopenharmony_ci const nir_xfb_varying_info *a = _a, *b = _b; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci if (a->buffer != b->buffer) 168bf215546Sopenharmony_ci return a->buffer - b->buffer; 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci return a->offset - b->offset; 171bf215546Sopenharmony_ci} 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_cistatic int 174bf215546Sopenharmony_cicompare_xfb_output_offsets(const void *_a, const void *_b) 175bf215546Sopenharmony_ci{ 176bf215546Sopenharmony_ci const nir_xfb_output_info *a = _a, *b = _b; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci return a->offset - b->offset; 179bf215546Sopenharmony_ci} 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_civoid 182bf215546Sopenharmony_cinir_shader_gather_xfb_info(nir_shader *shader) 183bf215546Sopenharmony_ci{ 184bf215546Sopenharmony_ci nir_gather_xfb_info_with_varyings(shader, NULL, NULL); 185bf215546Sopenharmony_ci} 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_civoid 188bf215546Sopenharmony_cinir_gather_xfb_info_with_varyings(nir_shader *shader, 189bf215546Sopenharmony_ci void *mem_ctx, 190bf215546Sopenharmony_ci nir_xfb_varyings_info **varyings_info_out) 191bf215546Sopenharmony_ci{ 192bf215546Sopenharmony_ci assert(shader->info.stage == MESA_SHADER_VERTEX || 193bf215546Sopenharmony_ci shader->info.stage == MESA_SHADER_TESS_EVAL || 194bf215546Sopenharmony_ci shader->info.stage == MESA_SHADER_GEOMETRY); 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci /* Compute the number of outputs we have. This is simply the number of 197bf215546Sopenharmony_ci * cumulative locations consumed by all the variables. If a location is 198bf215546Sopenharmony_ci * represented by multiple variables, then they each count separately in 199bf215546Sopenharmony_ci * number of outputs. This is only an estimate as some variables may have 200bf215546Sopenharmony_ci * an xfb_buffer but not an output so it may end up larger than we need but 201bf215546Sopenharmony_ci * it should be good enough for allocation. 202bf215546Sopenharmony_ci */ 203bf215546Sopenharmony_ci unsigned num_outputs = 0; 204bf215546Sopenharmony_ci unsigned num_varyings = 0; 205bf215546Sopenharmony_ci nir_xfb_varyings_info *varyings_info = NULL; 206bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, shader) { 207bf215546Sopenharmony_ci if (var->data.explicit_xfb_buffer) { 208bf215546Sopenharmony_ci num_outputs += glsl_count_attribute_slots(var->type, false); 209bf215546Sopenharmony_ci num_varyings += glsl_varying_count(var->type); 210bf215546Sopenharmony_ci } 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci if (num_outputs == 0 || num_varyings == 0) 213bf215546Sopenharmony_ci return; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci nir_xfb_info *xfb = nir_xfb_info_create(shader, num_outputs); 216bf215546Sopenharmony_ci if (varyings_info_out != NULL) { 217bf215546Sopenharmony_ci *varyings_info_out = nir_xfb_varyings_info_create(mem_ctx, num_varyings); 218bf215546Sopenharmony_ci varyings_info = *varyings_info_out; 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci /* Walk the list of outputs and add them to the array */ 222bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, shader) { 223bf215546Sopenharmony_ci if (!var->data.explicit_xfb_buffer) 224bf215546Sopenharmony_ci continue; 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci unsigned location = var->data.location; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci /* In order to know if we have a array of blocks can't be done just by 229bf215546Sopenharmony_ci * checking if we have an interface type and is an array, because due 230bf215546Sopenharmony_ci * splitting we could end on a case were we received a split struct 231bf215546Sopenharmony_ci * that contains an array. 232bf215546Sopenharmony_ci */ 233bf215546Sopenharmony_ci bool is_array_block = var->interface_type != NULL && 234bf215546Sopenharmony_ci glsl_type_is_array(var->type) && 235bf215546Sopenharmony_ci glsl_without_array(var->type) == var->interface_type; 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci if (var->data.explicit_offset && !is_array_block) { 238bf215546Sopenharmony_ci unsigned offset = var->data.offset; 239bf215546Sopenharmony_ci add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer, 240bf215546Sopenharmony_ci &location, &offset, var->type, false); 241bf215546Sopenharmony_ci } else if (is_array_block) { 242bf215546Sopenharmony_ci assert(glsl_type_is_struct_or_ifc(var->interface_type)); 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci unsigned aoa_size = glsl_get_aoa_size(var->type); 245bf215546Sopenharmony_ci const struct glsl_type *itype = var->interface_type; 246bf215546Sopenharmony_ci unsigned nfields = glsl_get_length(itype); 247bf215546Sopenharmony_ci for (unsigned b = 0; b < aoa_size; b++) { 248bf215546Sopenharmony_ci for (unsigned f = 0; f < nfields; f++) { 249bf215546Sopenharmony_ci int foffset = glsl_get_struct_field_offset(itype, f); 250bf215546Sopenharmony_ci const struct glsl_type *ftype = glsl_get_struct_field(itype, f); 251bf215546Sopenharmony_ci if (foffset < 0) { 252bf215546Sopenharmony_ci location += glsl_count_attribute_slots(ftype, false); 253bf215546Sopenharmony_ci continue; 254bf215546Sopenharmony_ci } 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci unsigned offset = foffset; 257bf215546Sopenharmony_ci add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer + b, 258bf215546Sopenharmony_ci &location, &offset, ftype, false); 259bf215546Sopenharmony_ci } 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci } 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci /* Everything is easier in the state setup code if outputs and varyings are 265bf215546Sopenharmony_ci * sorted in order of output offset (and buffer for varyings). 266bf215546Sopenharmony_ci */ 267bf215546Sopenharmony_ci qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]), 268bf215546Sopenharmony_ci compare_xfb_output_offsets); 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci if (varyings_info != NULL) { 271bf215546Sopenharmony_ci qsort(varyings_info->varyings, varyings_info->varying_count, 272bf215546Sopenharmony_ci sizeof(varyings_info->varyings[0]), 273bf215546Sopenharmony_ci compare_xfb_varying_offsets); 274bf215546Sopenharmony_ci } 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci#ifndef NDEBUG 277bf215546Sopenharmony_ci /* Finally, do a sanity check */ 278bf215546Sopenharmony_ci unsigned max_offset[NIR_MAX_XFB_BUFFERS] = {0}; 279bf215546Sopenharmony_ci for (unsigned i = 0; i < xfb->output_count; i++) { 280bf215546Sopenharmony_ci assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]); 281bf215546Sopenharmony_ci assert(xfb->outputs[i].component_mask != 0); 282bf215546Sopenharmony_ci unsigned slots = util_bitcount(xfb->outputs[i].component_mask); 283bf215546Sopenharmony_ci max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4; 284bf215546Sopenharmony_ci } 285bf215546Sopenharmony_ci#endif 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci ralloc_free(shader->xfb_info); 288bf215546Sopenharmony_ci shader->xfb_info = xfb; 289bf215546Sopenharmony_ci} 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_cistatic int 292bf215546Sopenharmony_ciget_xfb_out_sort_index(const nir_xfb_output_info *a) 293bf215546Sopenharmony_ci{ 294bf215546Sopenharmony_ci /* Return the maximum number to put dummy components at the end. */ 295bf215546Sopenharmony_ci if (!a->component_mask) 296bf215546Sopenharmony_ci return MAX_XFB_BUFFERS << 26; 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci return ((uint32_t)a->buffer << 26) | /* 2 bits for the buffer */ 299bf215546Sopenharmony_ci /* 10 bits for the component location (256 * 4) */ 300bf215546Sopenharmony_ci (((uint32_t)a->location * 4 + a->component_offset) << 16) | 301bf215546Sopenharmony_ci /* 16 bits for the offset */ 302bf215546Sopenharmony_ci a->offset; 303bf215546Sopenharmony_ci} 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_cistatic int 306bf215546Sopenharmony_cicompare_xfb_out(const void *pa, const void *pb) 307bf215546Sopenharmony_ci{ 308bf215546Sopenharmony_ci const nir_xfb_output_info *a = (const nir_xfb_output_info *)pa; 309bf215546Sopenharmony_ci const nir_xfb_output_info *b = (const nir_xfb_output_info *)pb; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci return get_xfb_out_sort_index(a) - get_xfb_out_sort_index(b); 312bf215546Sopenharmony_ci} 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci/** 315bf215546Sopenharmony_ci * Gather transform feedback info from lowered IO intrinsics. 316bf215546Sopenharmony_ci * 317bf215546Sopenharmony_ci * Optionally return slot_to_register, an optional table to translate 318bf215546Sopenharmony_ci * gl_varying_slot to "base" indices. 319bf215546Sopenharmony_ci */ 320bf215546Sopenharmony_cinir_xfb_info * 321bf215546Sopenharmony_cinir_gather_xfb_info_from_intrinsics(nir_shader *nir, 322bf215546Sopenharmony_ci int slot_to_register[NUM_TOTAL_VARYING_SLOTS]) 323bf215546Sopenharmony_ci{ 324bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 325bf215546Sopenharmony_ci uint8_t buffer_to_stream[MAX_XFB_BUFFERS] = {0}; 326bf215546Sopenharmony_ci uint8_t buffer_mask = 0; 327bf215546Sopenharmony_ci uint8_t stream_mask = 0; 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci if (slot_to_register) { 330bf215546Sopenharmony_ci memset(slot_to_register, -1, 331bf215546Sopenharmony_ci sizeof(slot_to_register[0] * NUM_TOTAL_VARYING_SLOTS)); 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci /* Gather xfb outputs. */ 335bf215546Sopenharmony_ci struct util_dynarray array = {0}; 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 338bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 339bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic || 340bf215546Sopenharmony_ci !nir_instr_xfb_write_mask(nir_instr_as_intrinsic(instr))) 341bf215546Sopenharmony_ci continue; 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci unsigned wr_mask = nir_intrinsic_write_mask(intr); 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci while (wr_mask) { 348bf215546Sopenharmony_ci unsigned i = u_bit_scan(&wr_mask); 349bf215546Sopenharmony_ci unsigned index = nir_intrinsic_component(intr) + i; 350bf215546Sopenharmony_ci nir_io_xfb xfb = index < 2 ? nir_intrinsic_io_xfb(intr) : 351bf215546Sopenharmony_ci nir_intrinsic_io_xfb2(intr); 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci if (xfb.out[index % 2].num_components) { 354bf215546Sopenharmony_ci nir_io_semantics sem = nir_intrinsic_io_semantics(intr); 355bf215546Sopenharmony_ci nir_xfb_output_info out; 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci out.component_offset = index; 358bf215546Sopenharmony_ci out.component_mask = 359bf215546Sopenharmony_ci BITFIELD_RANGE(index, xfb.out[index % 2].num_components); 360bf215546Sopenharmony_ci out.location = sem.location; 361bf215546Sopenharmony_ci out.buffer = xfb.out[index % 2].buffer; 362bf215546Sopenharmony_ci out.offset = (uint32_t)xfb.out[index % 2].offset * 4; 363bf215546Sopenharmony_ci util_dynarray_append(&array, nir_xfb_output_info, out); 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci uint8_t stream = (sem.gs_streams >> (i * 2)) & 0x3; 366bf215546Sopenharmony_ci buffer_to_stream[out.buffer] = stream; 367bf215546Sopenharmony_ci buffer_mask |= BITFIELD_BIT(out.buffer); 368bf215546Sopenharmony_ci stream_mask |= BITFIELD_BIT(stream); 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci if (slot_to_register) 371bf215546Sopenharmony_ci slot_to_register[sem.location] = nir_intrinsic_base(intr); 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci /* No elements before component_offset are allowed to be set. */ 374bf215546Sopenharmony_ci assert(!(out.component_mask & BITFIELD_MASK(out.component_offset))); 375bf215546Sopenharmony_ci } 376bf215546Sopenharmony_ci } 377bf215546Sopenharmony_ci } 378bf215546Sopenharmony_ci } 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci nir_xfb_output_info *outputs = (nir_xfb_output_info *)array.data; 381bf215546Sopenharmony_ci int count = util_dynarray_num_elements(&array, nir_xfb_output_info); 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci if (!count) 384bf215546Sopenharmony_ci return NULL; 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci if (count > 1) { 387bf215546Sopenharmony_ci /* Sort outputs by buffer, location, and component. */ 388bf215546Sopenharmony_ci qsort(outputs, count, sizeof(nir_xfb_output_info), compare_xfb_out); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci /* Merge outputs referencing the same slot. */ 391bf215546Sopenharmony_ci for (int i = 0; i < count - 1; i++) { 392bf215546Sopenharmony_ci nir_xfb_output_info *cur = &outputs[i]; 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci if (!cur->component_mask) 395bf215546Sopenharmony_ci continue; 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci /* Outputs referencing the same buffer and location are contiguous. */ 398bf215546Sopenharmony_ci for (int j = i + 1; 399bf215546Sopenharmony_ci j < count && 400bf215546Sopenharmony_ci cur->buffer == outputs[j].buffer && 401bf215546Sopenharmony_ci cur->location == outputs[j].location; j++) { 402bf215546Sopenharmony_ci if (outputs[j].component_mask && 403bf215546Sopenharmony_ci outputs[j].offset - outputs[j].component_offset * 4 == 404bf215546Sopenharmony_ci cur->offset - cur->component_offset * 4) { 405bf215546Sopenharmony_ci unsigned merged_offset = MIN2(cur->component_offset, 406bf215546Sopenharmony_ci outputs[j].component_offset); 407bf215546Sopenharmony_ci /* component_mask is relative to 0, not component_offset */ 408bf215546Sopenharmony_ci unsigned merged_mask = cur->component_mask | outputs[j].component_mask; 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci /* The component mask should have no holes after merging. */ 411bf215546Sopenharmony_ci if (util_is_power_of_two_nonzero((merged_mask >> merged_offset) + 1)) { 412bf215546Sopenharmony_ci /* Merge outputs. */ 413bf215546Sopenharmony_ci cur->component_offset = merged_offset; 414bf215546Sopenharmony_ci cur->component_mask = merged_mask; 415bf215546Sopenharmony_ci cur->offset = (uint32_t)cur->offset - 416bf215546Sopenharmony_ci (uint32_t)cur->component_offset * 4 + 417bf215546Sopenharmony_ci (uint32_t)merged_offset * 4; 418bf215546Sopenharmony_ci /* Disable the other output. */ 419bf215546Sopenharmony_ci outputs[j].component_mask = 0; 420bf215546Sopenharmony_ci } 421bf215546Sopenharmony_ci } 422bf215546Sopenharmony_ci } 423bf215546Sopenharmony_ci } 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci /* Sort outputs again to put disabled outputs at the end. */ 426bf215546Sopenharmony_ci qsort(outputs, count, sizeof(nir_xfb_output_info), compare_xfb_out); 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci /* Remove disabled outputs. */ 429bf215546Sopenharmony_ci for (int i = count - 1; i >= 0 && !outputs[i].component_mask; i--) 430bf215546Sopenharmony_ci count = i; 431bf215546Sopenharmony_ci } 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci for (unsigned i = 0; i < count; i++) 434bf215546Sopenharmony_ci assert(outputs[i].component_mask); 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci /* Create nir_xfb_info. */ 437bf215546Sopenharmony_ci nir_xfb_info *info = calloc(1, nir_xfb_info_size(count)); 438bf215546Sopenharmony_ci if (!info) { 439bf215546Sopenharmony_ci util_dynarray_fini(&array); 440bf215546Sopenharmony_ci return NULL; 441bf215546Sopenharmony_ci } 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci /* Fill nir_xfb_info. */ 444bf215546Sopenharmony_ci info->buffers_written = buffer_mask; 445bf215546Sopenharmony_ci info->streams_written = stream_mask; 446bf215546Sopenharmony_ci memcpy(info->buffer_to_stream, buffer_to_stream, sizeof(buffer_to_stream)); 447bf215546Sopenharmony_ci info->output_count = count; 448bf215546Sopenharmony_ci memcpy(info->outputs, outputs, count * sizeof(outputs[0])); 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci /* Set strides. */ 451bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX_XFB_BUFFERS; i++) { 452bf215546Sopenharmony_ci if (buffer_mask & BITFIELD_BIT(i)) 453bf215546Sopenharmony_ci info->buffers[i].stride = nir->info.xfb_stride[i]; 454bf215546Sopenharmony_ci } 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci /* Set varying_count. */ 457bf215546Sopenharmony_ci for (unsigned i = 0; i < count; i++) 458bf215546Sopenharmony_ci info->buffers[outputs[i].buffer].varying_count++; 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci util_dynarray_fini(&array); 461bf215546Sopenharmony_ci return info; 462bf215546Sopenharmony_ci} 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_civoid 465bf215546Sopenharmony_cinir_print_xfb_info(nir_xfb_info *info, FILE *fp) 466bf215546Sopenharmony_ci{ 467bf215546Sopenharmony_ci fprintf(fp, "buffers_written: 0x%x\n", info->buffers_written); 468bf215546Sopenharmony_ci fprintf(fp, "streams_written: 0x%x\n", info->streams_written); 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) { 471bf215546Sopenharmony_ci if (BITFIELD_BIT(i) & info->buffers_written) { 472bf215546Sopenharmony_ci fprintf(fp, "buffer%u: stride=%u varying_count=%u stream=%u\n", i, 473bf215546Sopenharmony_ci info->buffers[i].stride, 474bf215546Sopenharmony_ci info->buffers[i].varying_count, 475bf215546Sopenharmony_ci info->buffer_to_stream[i]); 476bf215546Sopenharmony_ci } 477bf215546Sopenharmony_ci } 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci fprintf(fp, "output_count: %u\n", info->output_count); 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci for (unsigned i = 0; i < info->output_count; i++) { 482bf215546Sopenharmony_ci fprintf(fp, "output%u: buffer=%u, offset=%u, location=%u, " 483bf215546Sopenharmony_ci "component_offset=%u, component_mask=0x%x\n", 484bf215546Sopenharmony_ci i, info->outputs[i].buffer, 485bf215546Sopenharmony_ci info->outputs[i].offset, 486bf215546Sopenharmony_ci info->outputs[i].location, 487bf215546Sopenharmony_ci info->outputs[i].component_offset, 488bf215546Sopenharmony_ci info->outputs[i].component_mask); 489bf215546Sopenharmony_ci } 490bf215546Sopenharmony_ci} 491