1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nir.h" 25bf215546Sopenharmony_ci#include "nir_builder.h" 26bf215546Sopenharmony_ci#include "nir_xfb_info.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/** 29bf215546Sopenharmony_ci * \file nir_lower_gs_intrinsics.c 30bf215546Sopenharmony_ci * 31bf215546Sopenharmony_ci * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an 32bf215546Sopenharmony_ci * arbitrary number of vertices. However, the shader must declare the maximum 33bf215546Sopenharmony_ci * number of vertices that it will ever output - further attempts to emit 34bf215546Sopenharmony_ci * vertices result in undefined behavior according to the GLSL specification. 35bf215546Sopenharmony_ci * 36bf215546Sopenharmony_ci * Drivers might use this maximum number of vertices to allocate enough space 37bf215546Sopenharmony_ci * to hold the geometry shader's output. Some drivers (such as i965) need to 38bf215546Sopenharmony_ci * implement "safety checks" which ensure that the shader hasn't emitted too 39bf215546Sopenharmony_ci * many vertices, to avoid overflowing that space and trashing other memory. 40bf215546Sopenharmony_ci * 41bf215546Sopenharmony_ci * The count of emitted vertices can also be useful in buffer offset 42bf215546Sopenharmony_ci * calculations, so drivers know where to write the GS output. 43bf215546Sopenharmony_ci * 44bf215546Sopenharmony_ci * However, for simple geometry shaders that emit a statically determinable 45bf215546Sopenharmony_ci * number of vertices, this extra bookkeeping is unnecessary and inefficient. 46bf215546Sopenharmony_ci * By tracking the vertex count in NIR, we allow constant folding/propagation 47bf215546Sopenharmony_ci * and dead control flow optimizations to eliminate most of it where possible. 48bf215546Sopenharmony_ci * 49bf215546Sopenharmony_ci * This pass introduces a new global variable which stores the current vertex 50bf215546Sopenharmony_ci * count (initialized to 0), and converts emit_vertex/end_primitive intrinsics 51bf215546Sopenharmony_ci * to their *_with_counter variants. emit_vertex is also wrapped in a safety 52bf215546Sopenharmony_ci * check to avoid buffer overflows. Finally, it adds a set_vertex_count 53bf215546Sopenharmony_ci * intrinsic at the end of the program, informing the driver of the final 54bf215546Sopenharmony_ci * vertex count. 55bf215546Sopenharmony_ci */ 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_cistruct state { 58bf215546Sopenharmony_ci nir_builder *builder; 59bf215546Sopenharmony_ci nir_variable *vertex_count_vars[NIR_MAX_XFB_STREAMS]; 60bf215546Sopenharmony_ci nir_variable *vtxcnt_per_prim_vars[NIR_MAX_XFB_STREAMS]; 61bf215546Sopenharmony_ci nir_variable *primitive_count_vars[NIR_MAX_XFB_STREAMS]; 62bf215546Sopenharmony_ci bool per_stream; 63bf215546Sopenharmony_ci bool count_prims; 64bf215546Sopenharmony_ci bool count_vtx_per_prim; 65bf215546Sopenharmony_ci bool overwrite_incomplete; 66bf215546Sopenharmony_ci bool is_points; 67bf215546Sopenharmony_ci bool progress; 68bf215546Sopenharmony_ci}; 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci/** 71bf215546Sopenharmony_ci * Replace emit_vertex intrinsics with: 72bf215546Sopenharmony_ci * 73bf215546Sopenharmony_ci * if (vertex_count < max_vertices) { 74bf215546Sopenharmony_ci * emit_vertex_with_counter vertex_count, vertex_count_per_primitive (optional) ... 75bf215546Sopenharmony_ci * vertex_count += 1 76bf215546Sopenharmony_ci * vertex_count_per_primitive += 1 77bf215546Sopenharmony_ci * } 78bf215546Sopenharmony_ci */ 79bf215546Sopenharmony_cistatic void 80bf215546Sopenharmony_cirewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state) 81bf215546Sopenharmony_ci{ 82bf215546Sopenharmony_ci nir_builder *b = state->builder; 83bf215546Sopenharmony_ci unsigned stream = nir_intrinsic_stream_id(intrin); 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci /* Load the vertex count */ 86bf215546Sopenharmony_ci b->cursor = nir_before_instr(&intrin->instr); 87bf215546Sopenharmony_ci assert(state->vertex_count_vars[stream] != NULL); 88bf215546Sopenharmony_ci nir_ssa_def *count = nir_load_var(b, state->vertex_count_vars[stream]); 89bf215546Sopenharmony_ci nir_ssa_def *count_per_primitive; 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci if (state->count_vtx_per_prim) 92bf215546Sopenharmony_ci count_per_primitive = nir_load_var(b, state->vtxcnt_per_prim_vars[stream]); 93bf215546Sopenharmony_ci else if (state->is_points) 94bf215546Sopenharmony_ci count_per_primitive = nir_imm_int(b, 0); 95bf215546Sopenharmony_ci else 96bf215546Sopenharmony_ci count_per_primitive = nir_ssa_undef(b, 1, 32); 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci nir_ssa_def *max_vertices = 99bf215546Sopenharmony_ci nir_imm_int(b, b->shader->info.gs.vertices_out); 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci /* Create: if (vertex_count < max_vertices) and insert it. 102bf215546Sopenharmony_ci * 103bf215546Sopenharmony_ci * The new if statement needs to be hooked up to the control flow graph 104bf215546Sopenharmony_ci * before we start inserting instructions into it. 105bf215546Sopenharmony_ci */ 106bf215546Sopenharmony_ci nir_push_if(b, nir_ilt(b, count, max_vertices)); 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci nir_emit_vertex_with_counter(b, count, count_per_primitive, stream); 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci /* Increment the vertex count by 1 */ 111bf215546Sopenharmony_ci nir_store_var(b, state->vertex_count_vars[stream], 112bf215546Sopenharmony_ci nir_iadd_imm(b, count, 1), 113bf215546Sopenharmony_ci 0x1); /* .x */ 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci if (state->count_vtx_per_prim) { 116bf215546Sopenharmony_ci /* Increment the per-primitive vertex count by 1 */ 117bf215546Sopenharmony_ci nir_variable *var = state->vtxcnt_per_prim_vars[stream]; 118bf215546Sopenharmony_ci nir_ssa_def *vtx_per_prim_cnt = nir_load_var(b, var); 119bf215546Sopenharmony_ci nir_store_var(b, var, 120bf215546Sopenharmony_ci nir_iadd_imm(b, vtx_per_prim_cnt, 1), 121bf215546Sopenharmony_ci 0x1); /* .x */ 122bf215546Sopenharmony_ci } 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci nir_pop_if(b, NULL); 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci state->progress = true; 129bf215546Sopenharmony_ci} 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci/** 132bf215546Sopenharmony_ci * Emits code that overwrites incomplete primitives and their vertices. 133bf215546Sopenharmony_ci * 134bf215546Sopenharmony_ci * A primitive is considered incomplete when it doesn't have enough vertices. 135bf215546Sopenharmony_ci * For example, a triangle strip that has 2 or fewer vertices, or a line strip 136bf215546Sopenharmony_ci * with 1 vertex are considered incomplete. 137bf215546Sopenharmony_ci * 138bf215546Sopenharmony_ci * After each end_primitive and at the end of the shader before emitting 139bf215546Sopenharmony_ci * set_vertex_and_primitive_count, we check if the primitive that is being 140bf215546Sopenharmony_ci * emitted has enough vertices or not, and we adjust the vertex and primitive 141bf215546Sopenharmony_ci * counters accordingly. 142bf215546Sopenharmony_ci * 143bf215546Sopenharmony_ci * This means that the following emit_vertex can reuse the vertex index of 144bf215546Sopenharmony_ci * a previous vertex, if the previous primitive was incomplete, so the compiler 145bf215546Sopenharmony_ci * backend is expected to simply overwrite any data that belonged to those. 146bf215546Sopenharmony_ci */ 147bf215546Sopenharmony_cistatic void 148bf215546Sopenharmony_cioverwrite_incomplete_primitives(struct state *state, unsigned stream) 149bf215546Sopenharmony_ci{ 150bf215546Sopenharmony_ci assert(state->count_vtx_per_prim); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci nir_builder *b = state->builder; 153bf215546Sopenharmony_ci enum shader_prim outprim = b->shader->info.gs.output_primitive; 154bf215546Sopenharmony_ci unsigned outprim_min_vertices; 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci if (outprim == SHADER_PRIM_POINTS) 157bf215546Sopenharmony_ci outprim_min_vertices = 1; 158bf215546Sopenharmony_ci else if (outprim == SHADER_PRIM_LINE_STRIP) 159bf215546Sopenharmony_ci outprim_min_vertices = 2; 160bf215546Sopenharmony_ci else if (outprim == SHADER_PRIM_TRIANGLE_STRIP) 161bf215546Sopenharmony_ci outprim_min_vertices = 3; 162bf215546Sopenharmony_ci else 163bf215546Sopenharmony_ci unreachable("Invalid GS output primitive type."); 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci /* Total count of vertices emitted so far. */ 166bf215546Sopenharmony_ci nir_ssa_def *vtxcnt_total = 167bf215546Sopenharmony_ci nir_load_var(b, state->vertex_count_vars[stream]); 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci /* Number of vertices emitted for the last primitive */ 170bf215546Sopenharmony_ci nir_ssa_def *vtxcnt_per_primitive = 171bf215546Sopenharmony_ci nir_load_var(b, state->vtxcnt_per_prim_vars[stream]); 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci /* See if the current primitive is a incomplete */ 174bf215546Sopenharmony_ci nir_ssa_def *is_inc_prim = 175bf215546Sopenharmony_ci nir_ilt(b, vtxcnt_per_primitive, nir_imm_int(b, outprim_min_vertices)); 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci /* Number of vertices in the incomplete primitive */ 178bf215546Sopenharmony_ci nir_ssa_def *num_inc_vtx = 179bf215546Sopenharmony_ci nir_bcsel(b, is_inc_prim, vtxcnt_per_primitive, nir_imm_int(b, 0)); 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci /* Store corrected total vertex count */ 182bf215546Sopenharmony_ci nir_store_var(b, state->vertex_count_vars[stream], 183bf215546Sopenharmony_ci nir_isub(b, vtxcnt_total, num_inc_vtx), 184bf215546Sopenharmony_ci 0x1); /* .x */ 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci if (state->count_prims) { 187bf215546Sopenharmony_ci /* Number of incomplete primitives (0 or 1) */ 188bf215546Sopenharmony_ci nir_ssa_def *num_inc_prim = nir_b2i32(b, is_inc_prim); 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci /* Store corrected primitive count */ 191bf215546Sopenharmony_ci nir_ssa_def *prim_cnt = nir_load_var(b, state->primitive_count_vars[stream]); 192bf215546Sopenharmony_ci nir_store_var(b, state->primitive_count_vars[stream], 193bf215546Sopenharmony_ci nir_isub(b, prim_cnt, num_inc_prim), 194bf215546Sopenharmony_ci 0x1); /* .x */ 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci} 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci/** 199bf215546Sopenharmony_ci * Replace end_primitive with end_primitive_with_counter. 200bf215546Sopenharmony_ci */ 201bf215546Sopenharmony_cistatic void 202bf215546Sopenharmony_cirewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state) 203bf215546Sopenharmony_ci{ 204bf215546Sopenharmony_ci nir_builder *b = state->builder; 205bf215546Sopenharmony_ci unsigned stream = nir_intrinsic_stream_id(intrin); 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci b->cursor = nir_before_instr(&intrin->instr); 208bf215546Sopenharmony_ci assert(state->vertex_count_vars[stream] != NULL); 209bf215546Sopenharmony_ci nir_ssa_def *count = nir_load_var(b, state->vertex_count_vars[stream]); 210bf215546Sopenharmony_ci nir_ssa_def *count_per_primitive; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci if (state->count_vtx_per_prim) 213bf215546Sopenharmony_ci count_per_primitive = nir_load_var(b, state->vtxcnt_per_prim_vars[stream]); 214bf215546Sopenharmony_ci else if (state->is_points) 215bf215546Sopenharmony_ci count_per_primitive = nir_imm_int(b, 0); 216bf215546Sopenharmony_ci else 217bf215546Sopenharmony_ci count_per_primitive = nir_ssa_undef(b, count->num_components, count->bit_size); 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci nir_end_primitive_with_counter(b, count, count_per_primitive, stream); 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci if (state->count_prims) { 222bf215546Sopenharmony_ci /* Increment the primitive count by 1 */ 223bf215546Sopenharmony_ci nir_ssa_def *prim_cnt = nir_load_var(b, state->primitive_count_vars[stream]); 224bf215546Sopenharmony_ci nir_store_var(b, state->primitive_count_vars[stream], 225bf215546Sopenharmony_ci nir_iadd_imm(b, prim_cnt, 1), 226bf215546Sopenharmony_ci 0x1); /* .x */ 227bf215546Sopenharmony_ci } 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci if (state->count_vtx_per_prim) { 230bf215546Sopenharmony_ci if (state->overwrite_incomplete) 231bf215546Sopenharmony_ci overwrite_incomplete_primitives(state, stream); 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci /* Store 0 to per-primitive vertex count */ 234bf215546Sopenharmony_ci nir_store_var(b, state->vtxcnt_per_prim_vars[stream], 235bf215546Sopenharmony_ci nir_imm_int(b, 0), 236bf215546Sopenharmony_ci 0x1); /* .x */ 237bf215546Sopenharmony_ci } 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci nir_instr_remove(&intrin->instr); 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci state->progress = true; 242bf215546Sopenharmony_ci} 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_cistatic bool 245bf215546Sopenharmony_cirewrite_intrinsics(nir_block *block, struct state *state) 246bf215546Sopenharmony_ci{ 247bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 248bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 249bf215546Sopenharmony_ci continue; 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 252bf215546Sopenharmony_ci switch (intrin->intrinsic) { 253bf215546Sopenharmony_ci case nir_intrinsic_emit_vertex: 254bf215546Sopenharmony_ci case nir_intrinsic_emit_vertex_with_counter: 255bf215546Sopenharmony_ci rewrite_emit_vertex(intrin, state); 256bf215546Sopenharmony_ci break; 257bf215546Sopenharmony_ci case nir_intrinsic_end_primitive: 258bf215546Sopenharmony_ci case nir_intrinsic_end_primitive_with_counter: 259bf215546Sopenharmony_ci rewrite_end_primitive(intrin, state); 260bf215546Sopenharmony_ci break; 261bf215546Sopenharmony_ci default: 262bf215546Sopenharmony_ci /* not interesting; skip this */ 263bf215546Sopenharmony_ci break; 264bf215546Sopenharmony_ci } 265bf215546Sopenharmony_ci } 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci return true; 268bf215546Sopenharmony_ci} 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci/** 271bf215546Sopenharmony_ci * Add a set_vertex_and_primitive_count intrinsic at the end of the program 272bf215546Sopenharmony_ci * (representing the final total vertex and primitive count). 273bf215546Sopenharmony_ci */ 274bf215546Sopenharmony_cistatic void 275bf215546Sopenharmony_ciappend_set_vertex_and_primitive_count(nir_block *end_block, struct state *state) 276bf215546Sopenharmony_ci{ 277bf215546Sopenharmony_ci nir_builder *b = state->builder; 278bf215546Sopenharmony_ci nir_shader *shader = state->builder->shader; 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci /* Insert the new intrinsic in all of the predecessors of the end block, 281bf215546Sopenharmony_ci * but before any jump instructions (return). 282bf215546Sopenharmony_ci */ 283bf215546Sopenharmony_ci set_foreach(end_block->predecessors, entry) { 284bf215546Sopenharmony_ci nir_block *pred = (nir_block *) entry->key; 285bf215546Sopenharmony_ci b->cursor = nir_after_block_before_jump(pred); 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci for (unsigned stream = 0; stream < NIR_MAX_XFB_STREAMS; ++stream) { 288bf215546Sopenharmony_ci /* When it's not per-stream, we only need to write one variable. */ 289bf215546Sopenharmony_ci if (!state->per_stream && stream != 0) 290bf215546Sopenharmony_ci continue; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci nir_ssa_def *vtx_cnt; 293bf215546Sopenharmony_ci nir_ssa_def *prim_cnt; 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci if (state->per_stream && !(shader->info.gs.active_stream_mask & (1 << stream))) { 296bf215546Sopenharmony_ci /* Inactive stream: vertex count is 0, primitive count is 0 or undef. */ 297bf215546Sopenharmony_ci vtx_cnt = nir_imm_int(b, 0); 298bf215546Sopenharmony_ci prim_cnt = state->count_prims || state->is_points 299bf215546Sopenharmony_ci ? nir_imm_int(b, 0) 300bf215546Sopenharmony_ci : nir_ssa_undef(b, 1, 32); 301bf215546Sopenharmony_ci } else { 302bf215546Sopenharmony_ci if (state->overwrite_incomplete) 303bf215546Sopenharmony_ci overwrite_incomplete_primitives(state, stream); 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci vtx_cnt = nir_load_var(b, state->vertex_count_vars[stream]); 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci if (state->count_prims) 308bf215546Sopenharmony_ci prim_cnt = nir_load_var(b, state->primitive_count_vars[stream]); 309bf215546Sopenharmony_ci else if (state->is_points) 310bf215546Sopenharmony_ci /* EndPrimitive does not affect primitive count for points, 311bf215546Sopenharmony_ci * just use vertex count instead 312bf215546Sopenharmony_ci */ 313bf215546Sopenharmony_ci prim_cnt = vtx_cnt; 314bf215546Sopenharmony_ci else 315bf215546Sopenharmony_ci prim_cnt = nir_ssa_undef(b, 1, 32); 316bf215546Sopenharmony_ci } 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci nir_set_vertex_and_primitive_count(b, vtx_cnt, prim_cnt, stream); 319bf215546Sopenharmony_ci state->progress = true; 320bf215546Sopenharmony_ci } 321bf215546Sopenharmony_ci } 322bf215546Sopenharmony_ci} 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci/** 325bf215546Sopenharmony_ci * Check to see if there are any blocks that need set_vertex_and_primitive_count 326bf215546Sopenharmony_ci * 327bf215546Sopenharmony_ci * If every block that could need the set_vertex_and_primitive_count intrinsic 328bf215546Sopenharmony_ci * already has one, there is nothing for this pass to do. 329bf215546Sopenharmony_ci */ 330bf215546Sopenharmony_cistatic bool 331bf215546Sopenharmony_cia_block_needs_set_vertex_and_primitive_count(nir_block *end_block, bool per_stream) 332bf215546Sopenharmony_ci{ 333bf215546Sopenharmony_ci set_foreach(end_block->predecessors, entry) { 334bf215546Sopenharmony_ci nir_block *pred = (nir_block *) entry->key; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci for (unsigned stream = 0; stream < NIR_MAX_XFB_STREAMS; ++stream) { 338bf215546Sopenharmony_ci /* When it's not per-stream, we only need to write one variable. */ 339bf215546Sopenharmony_ci if (!per_stream && stream != 0) 340bf215546Sopenharmony_ci continue; 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci bool found = false; 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci nir_foreach_instr_reverse(instr, pred) { 345bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 346bf215546Sopenharmony_ci continue; 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci const nir_intrinsic_instr *const intrin = 349bf215546Sopenharmony_ci nir_instr_as_intrinsic(instr); 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci if (intrin->intrinsic == nir_intrinsic_set_vertex_and_primitive_count && 352bf215546Sopenharmony_ci intrin->const_index[0] == stream) { 353bf215546Sopenharmony_ci found = true; 354bf215546Sopenharmony_ci break; 355bf215546Sopenharmony_ci } 356bf215546Sopenharmony_ci } 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_ci if (!found) 359bf215546Sopenharmony_ci return true; 360bf215546Sopenharmony_ci } 361bf215546Sopenharmony_ci } 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci return false; 364bf215546Sopenharmony_ci} 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_cibool 367bf215546Sopenharmony_cinir_lower_gs_intrinsics(nir_shader *shader, nir_lower_gs_intrinsics_flags options) 368bf215546Sopenharmony_ci{ 369bf215546Sopenharmony_ci bool per_stream = options & nir_lower_gs_intrinsics_per_stream; 370bf215546Sopenharmony_ci bool count_primitives = options & nir_lower_gs_intrinsics_count_primitives; 371bf215546Sopenharmony_ci bool overwrite_incomplete = options & nir_lower_gs_intrinsics_overwrite_incomplete; 372bf215546Sopenharmony_ci bool count_vtx_per_prim = 373bf215546Sopenharmony_ci overwrite_incomplete || 374bf215546Sopenharmony_ci (options & nir_lower_gs_intrinsics_count_vertices_per_primitive); 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci bool is_points = shader->info.gs.output_primitive == SHADER_PRIM_POINTS; 377bf215546Sopenharmony_ci /* points are always complete primitives with a single vertex, so these are 378bf215546Sopenharmony_ci * not needed when primitive is points. 379bf215546Sopenharmony_ci */ 380bf215546Sopenharmony_ci if (is_points) { 381bf215546Sopenharmony_ci count_primitives = false; 382bf215546Sopenharmony_ci overwrite_incomplete = false; 383bf215546Sopenharmony_ci count_vtx_per_prim = false; 384bf215546Sopenharmony_ci } 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci struct state state; 387bf215546Sopenharmony_ci state.progress = false; 388bf215546Sopenharmony_ci state.count_prims = count_primitives; 389bf215546Sopenharmony_ci state.count_vtx_per_prim = count_vtx_per_prim; 390bf215546Sopenharmony_ci state.overwrite_incomplete = overwrite_incomplete; 391bf215546Sopenharmony_ci state.per_stream = per_stream; 392bf215546Sopenharmony_ci state.is_points = is_points; 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(shader); 395bf215546Sopenharmony_ci assert(impl); 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci if (!a_block_needs_set_vertex_and_primitive_count(impl->end_block, per_stream)) 398bf215546Sopenharmony_ci return false; 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci nir_builder b; 401bf215546Sopenharmony_ci nir_builder_init(&b, impl); 402bf215546Sopenharmony_ci state.builder = &b; 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci b.cursor = nir_before_cf_list(&impl->body); 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci for (unsigned i = 0; i < NIR_MAX_XFB_STREAMS; i++) { 407bf215546Sopenharmony_ci if (per_stream && !(shader->info.gs.active_stream_mask & (1 << i))) 408bf215546Sopenharmony_ci continue; 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci if (i == 0 || per_stream) { 411bf215546Sopenharmony_ci state.vertex_count_vars[i] = 412bf215546Sopenharmony_ci nir_local_variable_create(impl, glsl_uint_type(), "vertex_count"); 413bf215546Sopenharmony_ci /* initialize to 0 */ 414bf215546Sopenharmony_ci nir_store_var(&b, state.vertex_count_vars[i], nir_imm_int(&b, 0), 0x1); 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci if (count_primitives) { 417bf215546Sopenharmony_ci state.primitive_count_vars[i] = 418bf215546Sopenharmony_ci nir_local_variable_create(impl, glsl_uint_type(), "primitive_count"); 419bf215546Sopenharmony_ci /* initialize to 1 */ 420bf215546Sopenharmony_ci nir_store_var(&b, state.primitive_count_vars[i], nir_imm_int(&b, 1), 0x1); 421bf215546Sopenharmony_ci } 422bf215546Sopenharmony_ci if (count_vtx_per_prim) { 423bf215546Sopenharmony_ci state.vtxcnt_per_prim_vars[i] = 424bf215546Sopenharmony_ci nir_local_variable_create(impl, glsl_uint_type(), "vertices_per_primitive"); 425bf215546Sopenharmony_ci /* initialize to 0 */ 426bf215546Sopenharmony_ci nir_store_var(&b, state.vtxcnt_per_prim_vars[i], nir_imm_int(&b, 0), 0x1); 427bf215546Sopenharmony_ci } 428bf215546Sopenharmony_ci } else { 429bf215546Sopenharmony_ci /* If per_stream is false, we only have one counter of each kind which we 430bf215546Sopenharmony_ci * want to use for all streams. Duplicate the counter pointers so all 431bf215546Sopenharmony_ci * streams use the same counters. 432bf215546Sopenharmony_ci */ 433bf215546Sopenharmony_ci state.vertex_count_vars[i] = state.vertex_count_vars[0]; 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci if (count_primitives) 436bf215546Sopenharmony_ci state.primitive_count_vars[i] = state.primitive_count_vars[0]; 437bf215546Sopenharmony_ci if (count_vtx_per_prim) 438bf215546Sopenharmony_ci state.vtxcnt_per_prim_vars[i] = state.vtxcnt_per_prim_vars[0]; 439bf215546Sopenharmony_ci } 440bf215546Sopenharmony_ci } 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci nir_foreach_block_safe(block, impl) 443bf215546Sopenharmony_ci rewrite_intrinsics(block, &state); 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_ci /* This only works because we have a single main() function. */ 446bf215546Sopenharmony_ci append_set_vertex_and_primitive_count(impl->end_block, &state); 447bf215546Sopenharmony_ci 448bf215546Sopenharmony_ci nir_metadata_preserve(impl, 0); 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci return state.progress; 451bf215546Sopenharmony_ci} 452