1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "draw_gs.h" 29 30#include "draw_private.h" 31#include "draw_context.h" 32#ifdef DRAW_LLVM_AVAILABLE 33#include "draw_llvm.h" 34#endif 35 36#include "tgsi/tgsi_parse.h" 37#include "tgsi/tgsi_exec.h" 38#include "nir/nir_to_tgsi_info.h" 39#include "compiler/nir/nir.h" 40#include "pipe/p_shader_tokens.h" 41 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "util/u_prim.h" 45#include "util/ralloc.h" 46/* fixme: move it from here */ 47#define MAX_PRIMITIVES 64 48 49static inline int 50draw_gs_get_input_index(int semantic, int index, 51 const struct tgsi_shader_info *input_info) 52{ 53 int i; 54 const ubyte *input_semantic_names = input_info->output_semantic_name; 55 const ubyte *input_semantic_indices = input_info->output_semantic_index; 56 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 57 if (input_semantic_names[i] == semantic && 58 input_semantic_indices[i] == index) 59 return i; 60 } 61 return -1; 62} 63 64/** 65 * We execute geometry shaders in the SOA mode, so ideally we want to 66 * flush when the number of currently fetched primitives is equal to 67 * the number of elements in the SOA vector. This ensures that the 68 * throughput is optimized for the given vector instruction set. 69 */ 70static inline boolean 71draw_gs_should_flush(struct draw_geometry_shader *shader) 72{ 73 return (shader->fetched_prim_count == shader->vector_length || shader->num_invocations > 1); 74} 75 76/*#define DEBUG_OUTPUTS 1*/ 77static void 78tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, 79 unsigned stream, 80 unsigned num_primitives, 81 float (**p_output)[4]) 82{ 83 struct tgsi_exec_machine *machine = shader->machine; 84 unsigned prim_idx, j, slot; 85 float (*output)[4]; 86 87 output = *p_output; 88 89 /* Unswizzle all output results. 90 */ 91 92 for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { 93 unsigned num_verts_per_prim = machine->Primitives[stream][prim_idx]; 94 unsigned prim_offset = machine->PrimitiveOffsets[stream][prim_idx]; 95 shader->stream[stream].primitive_lengths[prim_idx + shader->stream[stream].emitted_primitives] = 96 machine->Primitives[stream][prim_idx]; 97 shader->stream[stream].emitted_vertices += num_verts_per_prim; 98 99 for (j = 0; j < num_verts_per_prim; j++) { 100 int idx = prim_offset + j * shader->info.num_outputs; 101#ifdef DEBUG_OUTPUTS 102 debug_printf("%d/%d) Output vert:\n", stream, idx / shader->info.num_outputs); 103#endif 104 for (slot = 0; slot < shader->info.num_outputs; slot++) { 105 output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0]; 106 output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0]; 107 output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0]; 108 output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0]; 109#ifdef DEBUG_OUTPUTS 110 debug_printf("\t%d: %f %f %f %f\n", slot, 111 output[slot][0], 112 output[slot][1], 113 output[slot][2], 114 output[slot][3]); 115#endif 116 } 117 output = (float (*)[4])((char *)output + shader->vertex_size); 118 } 119 } 120 *p_output = output; 121 shader->stream[stream].emitted_primitives += num_primitives; 122} 123 124/*#define DEBUG_INPUTS 1*/ 125static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader, 126 unsigned *indices, 127 unsigned num_vertices, 128 unsigned prim_idx) 129{ 130 struct tgsi_exec_machine *machine = shader->machine; 131 unsigned slot, i; 132 int vs_slot; 133 unsigned input_vertex_stride = shader->input_vertex_stride; 134 const float (*input_ptr)[4]; 135 136 int primid_sv = machine->SysSemanticToIndex[TGSI_SEMANTIC_PRIMID]; 137 if (primid_sv != -1) { 138 for (unsigned j = 0; j < TGSI_QUAD_SIZE; j++) 139 machine->SystemValue[primid_sv].xyzw[0].i[j] = shader->in_prim_idx; 140 } 141 142 input_ptr = shader->input; 143 144 for (i = 0; i < num_vertices; ++i) { 145 const float (*input)[4]; 146#if DEBUG_INPUTS 147 debug_printf("%d) vertex index = %d (prim idx = %d)\n", 148 i, indices[i], prim_idx); 149#endif 150 input = (const float (*)[4])( 151 (const char *)input_ptr + (indices[i] * input_vertex_stride)); 152 for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { 153 unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; 154 if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { 155 machine->Inputs[idx].xyzw[0].u[prim_idx] = shader->in_prim_idx; 156 machine->Inputs[idx].xyzw[1].u[prim_idx] = shader->in_prim_idx; 157 machine->Inputs[idx].xyzw[2].u[prim_idx] = shader->in_prim_idx; 158 machine->Inputs[idx].xyzw[3].u[prim_idx] = shader->in_prim_idx; 159 } else { 160 vs_slot = draw_gs_get_input_index( 161 shader->info.input_semantic_name[slot], 162 shader->info.input_semantic_index[slot], 163 shader->input_info); 164 if (vs_slot < 0) { 165 debug_printf("VS/GS signature mismatch!\n"); 166 machine->Inputs[idx].xyzw[0].f[prim_idx] = 0; 167 machine->Inputs[idx].xyzw[1].f[prim_idx] = 0; 168 machine->Inputs[idx].xyzw[2].f[prim_idx] = 0; 169 machine->Inputs[idx].xyzw[3].f[prim_idx] = 0; 170 } else { 171#if DEBUG_INPUTS 172 debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", 173 slot, vs_slot, idx); 174 assert(!util_is_inf_or_nan(input[vs_slot][0])); 175 assert(!util_is_inf_or_nan(input[vs_slot][1])); 176 assert(!util_is_inf_or_nan(input[vs_slot][2])); 177 assert(!util_is_inf_or_nan(input[vs_slot][3])); 178#endif 179 machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0]; 180 machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1]; 181 machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2]; 182 machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3]; 183#if DEBUG_INPUTS 184 debug_printf("\t\t%f %f %f %f\n", 185 machine->Inputs[idx].xyzw[0].f[prim_idx], 186 machine->Inputs[idx].xyzw[1].f[prim_idx], 187 machine->Inputs[idx].xyzw[2].f[prim_idx], 188 machine->Inputs[idx].xyzw[3].f[prim_idx]); 189#endif 190 ++vs_slot; 191 } 192 } 193 } 194 } 195} 196 197static void tgsi_gs_prepare(struct draw_geometry_shader *shader, 198 const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 199 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) 200{ 201 struct tgsi_exec_machine *machine = shader->machine; 202 tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, 203 constants, constants_size); 204} 205 206static void tgsi_gs_run(struct draw_geometry_shader *shader, 207 unsigned input_primitives, 208 unsigned *out_prims) 209{ 210 struct tgsi_exec_machine *machine = shader->machine; 211 int i; 212 213 if (shader->info.uses_invocationid) { 214 unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INVOCATIONID]; 215 for (int j = 0; j < TGSI_QUAD_SIZE; j++) 216 machine->SystemValue[i].xyzw[0].i[j] = shader->invocation_id; 217 } 218 219 /* run interpreter */ 220 tgsi_exec_machine_run(machine, 0); 221 222 for (i = 0; i < 4; i++) 223 out_prims[i] = machine->OutputPrimCount[i]; 224} 225 226#ifdef DRAW_LLVM_AVAILABLE 227 228static void 229llvm_fetch_gs_input(struct draw_geometry_shader *shader, 230 unsigned *indices, 231 unsigned num_vertices, 232 unsigned prim_idx) 233{ 234 unsigned slot, i; 235 int vs_slot; 236 unsigned input_vertex_stride = shader->input_vertex_stride; 237 const float (*input_ptr)[4]; 238 float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data; 239 240 shader->llvm_prim_ids[shader->fetched_prim_count] = shader->in_prim_idx; 241 242 input_ptr = shader->input; 243 244 for (i = 0; i < num_vertices; ++i) { 245 const float (*input)[4]; 246#if DEBUG_INPUTS 247 debug_printf("%d) vertex index = %d (prim idx = %d)\n", 248 i, indices[i], prim_idx); 249#endif 250 input = (const float (*)[4])( 251 (const char *)input_ptr + (indices[i] * input_vertex_stride)); 252 for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { 253 if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { 254 /* skip. we handle system values through gallivm */ 255 /* NOTE: If we hit this case here it's an ordinary input not a sv, 256 * even though it probably should be a sv. 257 * Not sure how to set it up as regular input however if that even, 258 * would make sense so hack around this later in gallivm. 259 */ 260 } else { 261 vs_slot = draw_gs_get_input_index( 262 shader->info.input_semantic_name[slot], 263 shader->info.input_semantic_index[slot], 264 shader->input_info); 265 if (vs_slot < 0) { 266 debug_printf("VS/GS signature mismatch!\n"); 267 (*input_data)[i][slot][0][prim_idx] = 0; 268 (*input_data)[i][slot][1][prim_idx] = 0; 269 (*input_data)[i][slot][2][prim_idx] = 0; 270 (*input_data)[i][slot][3][prim_idx] = 0; 271 } else { 272#if DEBUG_INPUTS 273 debug_printf("\tSlot = %d, vs_slot = %d, i = %d:\n", 274 slot, vs_slot, i); 275 assert(!util_is_inf_or_nan(input[vs_slot][0])); 276 assert(!util_is_inf_or_nan(input[vs_slot][1])); 277 assert(!util_is_inf_or_nan(input[vs_slot][2])); 278 assert(!util_is_inf_or_nan(input[vs_slot][3])); 279#endif 280 (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0]; 281 (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1]; 282 (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2]; 283 (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3]; 284#if DEBUG_INPUTS 285 debug_printf("\t\t%f %f %f %f\n", 286 (*input_data)[i][slot][0][prim_idx], 287 (*input_data)[i][slot][1][prim_idx], 288 (*input_data)[i][slot][2][prim_idx], 289 (*input_data)[i][slot][3][prim_idx]); 290#endif 291 ++vs_slot; 292 } 293 } 294 } 295 } 296} 297 298static void 299llvm_fetch_gs_outputs(struct draw_geometry_shader *shader, 300 unsigned stream, 301 unsigned num_primitives, 302 float (**p_output)[4]) 303{ 304 int total_verts = 0; 305 int vertex_count = 0; 306 int total_prims = 0; 307 int max_prims_per_invocation = 0; 308 char *output_ptr = (char*)shader->gs_output[stream]; 309 int i, j, prim_idx; 310 unsigned next_prim_boundary = shader->primitive_boundary; 311 312 for (i = 0; i < shader->vector_length; ++i) { 313 int prims = shader->llvm_emitted_primitives[i + (stream * shader->vector_length)]; 314 total_prims += prims; 315 max_prims_per_invocation = MAX2(max_prims_per_invocation, prims); 316 } 317 for (i = 0; i < shader->vector_length; ++i) { 318 total_verts += shader->llvm_emitted_vertices[i + (stream * shader->vector_length)]; 319 } 320 321 output_ptr += shader->stream[stream].emitted_vertices * shader->vertex_size; 322 for (i = 0; i < shader->vector_length - 1; ++i) { 323 int current_verts = shader->llvm_emitted_vertices[i + (stream * shader->vector_length)]; 324 int next_verts = shader->llvm_emitted_vertices[i + 1 + (stream * shader->vector_length)]; 325#if 0 326 int j; 327 for (j = 0; j < current_verts; ++j) { 328 struct vertex_header *vh = (struct vertex_header *) 329 (output_ptr + shader->vertex_size * (i * next_prim_boundary + j)); 330 debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count, 331 vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]); 332 333 } 334#endif 335 assert(current_verts <= shader->max_output_vertices); 336 assert(next_verts <= shader->max_output_vertices); 337 if (next_verts) { 338 memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size, 339 output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size, 340 shader->vertex_size * next_verts); 341 } 342 vertex_count += current_verts; 343 } 344 345#if 0 346 { 347 int i; 348 for (i = 0; i < total_verts; ++i) { 349 struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i); 350 debug_printf("%d) Vertex:\n", i); 351 for (j = 0; j < shader->info.num_outputs; ++j) { 352 unsigned *udata = (unsigned*)vh->data[j]; 353 debug_printf(" %d) [%f, %f, %f, %f] [%d, %d, %d, %d]\n", j, 354 vh->data[j][0], vh->data[j][1], vh->data[j][2], vh->data[j][3], 355 udata[0], udata[1], udata[2], udata[3]); 356 } 357 358 } 359 } 360#endif 361 362 prim_idx = 0; 363 for (i = 0; i < shader->vector_length; ++i) { 364 int num_prims = shader->llvm_emitted_primitives[i + (stream * shader->vector_length)]; 365 for (j = 0; j < num_prims; ++j) { 366 int prim_length = 367 shader->llvm_prim_lengths[j * shader->num_vertex_streams + stream][i]; 368 shader->stream[stream].primitive_lengths[shader->stream[stream].emitted_primitives + prim_idx] = 369 prim_length; 370 ++prim_idx; 371 } 372 } 373 374 shader->stream[stream].emitted_primitives += total_prims; 375 shader->stream[stream].emitted_vertices += total_verts; 376} 377 378static void 379llvm_gs_prepare(struct draw_geometry_shader *shader, 380 const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 381 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) 382{ 383} 384 385static void 386llvm_gs_run(struct draw_geometry_shader *shader, 387 unsigned input_primitives, unsigned *out_prims) 388{ 389 struct vertex_header *input[PIPE_MAX_VERTEX_STREAMS]; 390 for (unsigned i = 0; i < shader->num_vertex_streams; i++) { 391 char *tmp = (char *)shader->gs_output[i]; 392 tmp += shader->stream[i].emitted_vertices * shader->vertex_size; 393 input[i] = (struct vertex_header *)tmp; 394 } 395 396 shader->current_variant->jit_func( 397 shader->jit_context, shader->gs_input->data, 398 input, 399 input_primitives, 400 shader->draw->instance_id, 401 shader->llvm_prim_ids, 402 shader->invocation_id, 403 shader->draw->pt.user.viewid); 404 405 for (unsigned i = 0; i < shader->num_vertex_streams; i++) { 406 out_prims[i] = shader->jit_context->emitted_prims[i]; 407 } 408} 409 410#endif 411 412static void gs_flush(struct draw_geometry_shader *shader) 413{ 414 unsigned out_prim_count[TGSI_MAX_VERTEX_STREAMS]; 415 unsigned i; 416 unsigned input_primitives = shader->fetched_prim_count; 417 418 if (shader->draw->collect_statistics) { 419 shader->draw->statistics.gs_invocations += input_primitives; 420 } 421 422 assert(input_primitives > 0 && 423 input_primitives <= 4); 424 425 for (unsigned invocation = 0; invocation < shader->num_invocations; invocation++) { 426 shader->invocation_id = invocation; 427 shader->run(shader, input_primitives, out_prim_count); 428 for (i = 0; i < shader->num_vertex_streams; i++) { 429 shader->fetch_outputs(shader, i, out_prim_count[i], 430 &shader->stream[i].tmp_output); 431 } 432 } 433 434#if 0 435 for (i = 0; i < shader->num_vertex_streams; i++) { 436 debug_printf("stream %d: PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", 437 i, 438 shader->stream[i].emitted_primitives, shader->stream[i].emitted_vertices, 439 out_prim_count[i]); 440 } 441#endif 442 443 shader->fetched_prim_count = 0; 444} 445 446static void gs_point(struct draw_geometry_shader *shader, 447 int idx) 448{ 449 unsigned indices[1]; 450 451 indices[0] = idx; 452 453 shader->fetch_inputs(shader, indices, 1, 454 shader->fetched_prim_count); 455 ++shader->in_prim_idx; 456 ++shader->fetched_prim_count; 457 458 if (draw_gs_should_flush(shader)) 459 gs_flush(shader); 460} 461 462static void gs_line(struct draw_geometry_shader *shader, 463 int i0, int i1) 464{ 465 unsigned indices[2]; 466 467 indices[0] = i0; 468 indices[1] = i1; 469 470 shader->fetch_inputs(shader, indices, 2, 471 shader->fetched_prim_count); 472 ++shader->in_prim_idx; 473 ++shader->fetched_prim_count; 474 475 if (draw_gs_should_flush(shader)) 476 gs_flush(shader); 477} 478 479static void gs_line_adj(struct draw_geometry_shader *shader, 480 int i0, int i1, int i2, int i3) 481{ 482 unsigned indices[4]; 483 484 indices[0] = i0; 485 indices[1] = i1; 486 indices[2] = i2; 487 indices[3] = i3; 488 489 shader->fetch_inputs(shader, indices, 4, 490 shader->fetched_prim_count); 491 ++shader->in_prim_idx; 492 ++shader->fetched_prim_count; 493 494 if (draw_gs_should_flush(shader)) 495 gs_flush(shader); 496} 497 498static void gs_tri(struct draw_geometry_shader *shader, 499 int i0, int i1, int i2) 500{ 501 unsigned indices[3]; 502 503 indices[0] = i0; 504 indices[1] = i1; 505 indices[2] = i2; 506 507 shader->fetch_inputs(shader, indices, 3, 508 shader->fetched_prim_count); 509 ++shader->in_prim_idx; 510 ++shader->fetched_prim_count; 511 512 if (draw_gs_should_flush(shader)) 513 gs_flush(shader); 514} 515 516static void gs_tri_adj(struct draw_geometry_shader *shader, 517 int i0, int i1, int i2, 518 int i3, int i4, int i5) 519{ 520 unsigned indices[6]; 521 522 indices[0] = i0; 523 indices[1] = i1; 524 indices[2] = i2; 525 indices[3] = i3; 526 indices[4] = i4; 527 indices[5] = i5; 528 529 shader->fetch_inputs(shader, indices, 6, 530 shader->fetched_prim_count); 531 ++shader->in_prim_idx; 532 ++shader->fetched_prim_count; 533 534 if (draw_gs_should_flush(shader)) 535 gs_flush(shader); 536} 537 538#define FUNC gs_run 539#define GET_ELT(idx) (idx) 540#include "draw_gs_tmp.h" 541 542 543#define FUNC gs_run_elts 544#define LOCAL_VARS const ushort *elts = input_prims->elts; 545#define GET_ELT(idx) (elts[idx]) 546#include "draw_gs_tmp.h" 547 548 549/** 550 * Execute geometry shader. 551 */ 552int draw_geometry_shader_run(struct draw_geometry_shader *shader, 553 const void *constants[PIPE_MAX_CONSTANT_BUFFERS], 554 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], 555 const struct draw_vertex_info *input_verts, 556 const struct draw_prim_info *input_prim, 557 const struct tgsi_shader_info *input_info, 558 struct draw_vertex_info *output_verts, 559 struct draw_prim_info *output_prims ) 560{ 561 const float (*input)[4] = (const float (*)[4])input_verts->verts->data; 562 unsigned input_stride = input_verts->vertex_size; 563 unsigned num_outputs = draw_total_gs_outputs(shader->draw); 564 unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); 565 unsigned num_input_verts = input_prim->linear ? 566 input_verts->count : 567 input_prim->count; 568 unsigned num_in_primitives = 569 align( 570 MAX2(u_decomposed_prims_for_vertices(input_prim->prim, 571 num_input_verts), 572 u_decomposed_prims_for_vertices(shader->input_primitive, 573 num_input_verts)), 574 shader->vector_length); 575 unsigned max_out_prims = 576 u_decomposed_prims_for_vertices(shader->output_primitive, 577 shader->max_output_vertices) 578 * num_in_primitives; 579 /* we allocate exactly one extra vertex per primitive to allow the GS to emit 580 * overflown vertices into some area where they won't harm anyone */ 581 unsigned total_verts_per_buffer = shader->primitive_boundary * 582 num_in_primitives; 583 int i; 584 //Assume at least one primitive 585 max_out_prims = MAX2(max_out_prims, 1); 586 587 for (i = 0; i < shader->num_vertex_streams; i++) { 588 /* write all the vertex data into all the streams */ 589 output_verts[i].vertex_size = vertex_size; 590 output_verts[i].stride = output_verts[i].vertex_size; 591 output_verts[i].verts = 592 (struct vertex_header *)MALLOC(output_verts[i].vertex_size * 593 total_verts_per_buffer * shader->num_invocations + 594 DRAW_EXTRA_VERTICES_PADDING); 595 assert(output_verts[i].verts); 596 } 597 598#if 0 599 debug_printf("%s count = %d (in prims # = %d, invocs = %d, streams = %d)\n", 600 __FUNCTION__, num_input_verts, num_in_primitives, 601 shader->num_invocations, shader->num_vertex_streams); 602 debug_printf("\tlinear = %d, prim_info->count = %d\n", 603 input_prim->linear, input_prim->count); 604 debug_printf("\tprim pipe = %s, shader in = %s, shader out = %s\n", 605 u_prim_name(input_prim->prim), 606 u_prim_name(shader->input_primitive), 607 u_prim_name(shader->output_primitive)); 608 debug_printf("\tmaxv = %d, maxp = %d, primitive_boundary = %d, " 609 "vertex_size = %d, tverts = %d\n", 610 shader->max_output_vertices, max_out_prims, 611 shader->primitive_boundary, output_verts->vertex_size, 612 total_verts_per_buffer); 613#endif 614 615 for (i = 0; i < shader->num_vertex_streams; i++) { 616 shader->stream[i].emitted_vertices = 0; 617 shader->stream[i].emitted_primitives = 0; 618 FREE(shader->stream[i].primitive_lengths); 619 shader->stream[i].primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * shader->num_invocations); 620 shader->stream[i].tmp_output = (float (*)[4])output_verts[i].verts->data; 621 } 622 shader->vertex_size = vertex_size; 623 shader->fetched_prim_count = 0; 624 shader->input_vertex_stride = input_stride; 625 shader->input = input; 626 shader->input_info = input_info; 627 628#ifdef DRAW_LLVM_AVAILABLE 629 if (shader->draw->llvm) { 630 for (i = 0; i < shader->num_vertex_streams; i++) { 631 shader->gs_output[i] = output_verts[i].verts; 632 } 633 if (max_out_prims > shader->max_out_prims) { 634 unsigned i; 635 if (shader->llvm_prim_lengths) { 636 for (i = 0; i < shader->num_vertex_streams * shader->max_out_prims; ++i) { 637 align_free(shader->llvm_prim_lengths[i]); 638 } 639 FREE(shader->llvm_prim_lengths); 640 } 641 642 shader->llvm_prim_lengths = MALLOC(shader->num_vertex_streams * max_out_prims * sizeof(unsigned*)); 643 for (i = 0; i < shader->num_vertex_streams * max_out_prims; ++i) { 644 int vector_size = shader->vector_length * sizeof(unsigned); 645 shader->llvm_prim_lengths[i] = 646 align_malloc(vector_size, vector_size); 647 } 648 649 shader->max_out_prims = max_out_prims; 650 } 651 shader->jit_context->prim_lengths = shader->llvm_prim_lengths; 652 shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices; 653 shader->jit_context->emitted_prims = shader->llvm_emitted_primitives; 654 } 655#endif 656 657 shader->prepare(shader, constants, constants_size); 658 659 if (input_prim->linear) 660 gs_run(shader, input_prim, input_verts, 661 output_prims, output_verts); 662 else 663 gs_run_elts(shader, input_prim, input_verts, 664 output_prims, output_verts); 665 666 /* Flush the remaining primitives. Will happen if 667 * num_input_primitives % 4 != 0 668 */ 669 if (shader->fetched_prim_count > 0) { 670 gs_flush(shader); 671 } 672 assert(shader->fetched_prim_count == 0); 673 674 /* Update prim_info: 675 */ 676 for (i = 0; i < shader->num_vertex_streams; i++) { 677 output_prims[i].linear = TRUE; 678 output_prims[i].elts = NULL; 679 output_prims[i].start = 0; 680 output_prims[i].count = shader->stream[i].emitted_vertices; 681 output_prims[i].prim = shader->output_primitive; 682 output_prims[i].flags = 0x0; 683 output_prims[i].primitive_lengths = shader->stream[i].primitive_lengths; 684 output_prims[i].primitive_count = shader->stream[i].emitted_primitives; 685 output_verts[i].count = shader->stream[i].emitted_vertices; 686 687 if (shader->draw->collect_statistics) { 688 unsigned j; 689 for (j = 0; j < shader->stream[i].emitted_primitives; ++j) { 690 shader->draw->statistics.gs_primitives += 691 u_decomposed_prims_for_vertices(shader->output_primitive, 692 shader->stream[i].primitive_lengths[j]); 693 } 694 } 695 } 696 697#if 0 698 debug_printf("GS finished\n"); 699 for (i = 0; i < 4; i++) 700 debug_printf("stream %d: prims = %d verts = %d\n", i, output_prims[i].primitive_count, output_verts[i].count); 701#endif 702 703 return 0; 704} 705 706void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, 707 struct draw_context *draw) 708{ 709 boolean use_llvm = draw->llvm != NULL; 710 if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) { 711 tgsi_exec_machine_bind_shader(shader->machine, 712 shader->state.tokens, 713 draw->gs.tgsi.sampler, 714 draw->gs.tgsi.image, 715 draw->gs.tgsi.buffer); 716 } 717} 718 719 720boolean 721draw_gs_init( struct draw_context *draw ) 722{ 723 if (!draw->llvm) { 724 draw->gs.tgsi.machine = tgsi_exec_machine_create(PIPE_SHADER_GEOMETRY); 725 726 for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 727 draw->gs.tgsi.machine->Primitives[i] = align_malloc( 728 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); 729 draw->gs.tgsi.machine->PrimitiveOffsets[i] = align_malloc( 730 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); 731 if (!draw->gs.tgsi.machine->Primitives[i] || !draw->gs.tgsi.machine->PrimitiveOffsets[i]) 732 return FALSE; 733 memset(draw->gs.tgsi.machine->Primitives[i], 0, 734 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); 735 memset(draw->gs.tgsi.machine->PrimitiveOffsets[i], 0, 736 MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); 737 } 738 } 739 740 return TRUE; 741} 742 743void draw_gs_destroy( struct draw_context *draw ) 744{ 745 int i; 746 if (draw->gs.tgsi.machine) { 747 for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 748 align_free(draw->gs.tgsi.machine->Primitives[i]); 749 align_free(draw->gs.tgsi.machine->PrimitiveOffsets[i]); 750 } 751 tgsi_exec_machine_destroy(draw->gs.tgsi.machine); 752 } 753} 754 755struct draw_geometry_shader * 756draw_create_geometry_shader(struct draw_context *draw, 757 const struct pipe_shader_state *state) 758{ 759#ifdef DRAW_LLVM_AVAILABLE 760 boolean use_llvm = draw->llvm != NULL; 761 struct llvm_geometry_shader *llvm_gs = NULL; 762#endif 763 struct draw_geometry_shader *gs; 764 unsigned i; 765 766#ifdef DRAW_LLVM_AVAILABLE 767 if (use_llvm) { 768 llvm_gs = CALLOC_STRUCT(llvm_geometry_shader); 769 770 if (!llvm_gs) 771 return NULL; 772 773 gs = &llvm_gs->base; 774 775 list_inithead(&llvm_gs->variants.list); 776 } else 777#endif 778 { 779 gs = CALLOC_STRUCT(draw_geometry_shader); 780 } 781 782 if (!gs) 783 return NULL; 784 785 gs->draw = draw; 786 gs->state = *state; 787 788 if (state->type == PIPE_SHADER_IR_TGSI) { 789 gs->state.tokens = tgsi_dup_tokens(state->tokens); 790 if (!gs->state.tokens) { 791 FREE(gs); 792 return NULL; 793 } 794 795 tgsi_scan_shader(state->tokens, &gs->info); 796 gs->num_vertex_streams = 1; 797 for (i = 0; i < gs->state.stream_output.num_outputs; i++) { 798 if (gs->state.stream_output.output[i].stream >= gs->num_vertex_streams) 799 gs->num_vertex_streams = gs->state.stream_output.output[i].stream + 1; 800 } 801 } else { 802 nir_tgsi_scan_shader(state->ir.nir, &gs->info, true); 803 nir_shader *nir = state->ir.nir; 804 gs->num_vertex_streams = util_last_bit(nir->info.gs.active_stream_mask); 805 } 806 807 /* setup the defaults */ 808 gs->max_out_prims = 0; 809 810#ifdef DRAW_LLVM_AVAILABLE 811 if (use_llvm) { 812 /* TODO: change the input array to handle the following 813 vector length, instead of the currently hardcoded 814 TGSI_NUM_CHANNELS 815 gs->vector_length = lp_native_vector_width / 32;*/ 816 gs->vector_length = TGSI_NUM_CHANNELS; 817 } else 818#endif 819 { 820 gs->vector_length = 1; 821 } 822 823 gs->input_primitive = 824 gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; 825 gs->output_primitive = 826 gs->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; 827 gs->max_output_vertices = 828 gs->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; 829 gs->num_invocations = 830 gs->info.properties[TGSI_PROPERTY_GS_INVOCATIONS]; 831 if (!gs->max_output_vertices) 832 gs->max_output_vertices = 32; 833 834 /* Primitive boundary is bigger than max_output_vertices by one, because 835 * the specification says that the geometry shader should exit if the 836 * number of emitted vertices is bigger or equal to max_output_vertices and 837 * we can't do that because we're running in the SoA mode, which means that 838 * our storing routines will keep getting called on channels that have 839 * overflown. 840 * So we need some scratch area where we can keep writing the overflown 841 * vertices without overwriting anything important or crashing. 842 */ 843 gs->primitive_boundary = gs->max_output_vertices + 1; 844 845 gs->position_output = -1; 846 bool found_clipvertex = false; 847 for (i = 0; i < gs->info.num_outputs; i++) { 848 if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && 849 gs->info.output_semantic_index[i] == 0) 850 gs->position_output = i; 851 if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX) 852 gs->viewport_index_output = i; 853 if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPVERTEX && 854 gs->info.output_semantic_index[i] == 0) { 855 found_clipvertex = true; 856 gs->clipvertex_output = i; 857 } 858 if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { 859 assert(gs->info.output_semantic_index[i] < 860 PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT); 861 gs->ccdistance_output[gs->info.output_semantic_index[i]] = i; 862 } 863 } 864 865 if (!found_clipvertex) 866 gs->clipvertex_output = gs->position_output; 867 868 gs->machine = draw->gs.tgsi.machine; 869 870#ifdef DRAW_LLVM_AVAILABLE 871 if (use_llvm) { 872 int vector_size = gs->vector_length * sizeof(float); 873 gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16); 874 memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs)); 875 gs->llvm_prim_lengths = 0; 876 877 gs->llvm_emitted_primitives = align_malloc(vector_size * gs->num_vertex_streams, vector_size); 878 gs->llvm_emitted_vertices = align_malloc(vector_size * gs->num_vertex_streams, vector_size); 879 gs->llvm_prim_ids = align_calloc(vector_size, vector_size); 880 881 gs->fetch_outputs = llvm_fetch_gs_outputs; 882 gs->fetch_inputs = llvm_fetch_gs_input; 883 gs->prepare = llvm_gs_prepare; 884 gs->run = llvm_gs_run; 885 886 gs->jit_context = &draw->llvm->gs_jit_context; 887 888 889 llvm_gs->variant_key_size = 890 draw_gs_llvm_variant_key_size( 891 gs->info.file_max[TGSI_FILE_SAMPLER]+1, 892 gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1, 893 gs->info.file_max[TGSI_FILE_IMAGE]+1); 894 } else 895#endif 896 { 897 gs->fetch_outputs = tgsi_fetch_gs_outputs; 898 gs->fetch_inputs = tgsi_fetch_gs_input; 899 gs->prepare = tgsi_gs_prepare; 900 gs->run = tgsi_gs_run; 901 } 902 903 return gs; 904} 905 906void draw_bind_geometry_shader(struct draw_context *draw, 907 struct draw_geometry_shader *dgs) 908{ 909 draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); 910 911 if (dgs) { 912 draw->gs.geometry_shader = dgs; 913 draw->gs.num_gs_outputs = dgs->info.num_outputs; 914 draw->gs.position_output = dgs->position_output; 915 draw->gs.clipvertex_output = dgs->clipvertex_output; 916 draw_geometry_shader_prepare(dgs, draw); 917 } 918 else { 919 draw->gs.geometry_shader = NULL; 920 draw->gs.num_gs_outputs = 0; 921 } 922} 923 924void draw_delete_geometry_shader(struct draw_context *draw, 925 struct draw_geometry_shader *dgs) 926{ 927 int i; 928 if (!dgs) { 929 return; 930 } 931#ifdef DRAW_LLVM_AVAILABLE 932 if (draw->llvm) { 933 struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs); 934 struct draw_gs_llvm_variant_list_item *li, *next; 935 936 LIST_FOR_EACH_ENTRY_SAFE(li, next, &shader->variants.list, list) { 937 draw_gs_llvm_destroy_variant(li->base); 938 } 939 940 assert(shader->variants_cached == 0); 941 942 if (dgs->llvm_prim_lengths) { 943 unsigned i; 944 for (i = 0; i < dgs->num_vertex_streams * dgs->max_out_prims; ++i) { 945 align_free(dgs->llvm_prim_lengths[i]); 946 } 947 FREE(dgs->llvm_prim_lengths); 948 } 949 align_free(dgs->llvm_emitted_primitives); 950 align_free(dgs->llvm_emitted_vertices); 951 align_free(dgs->llvm_prim_ids); 952 953 align_free(dgs->gs_input); 954 } 955#endif 956 957 if (draw->gs.tgsi.machine && draw->gs.tgsi.machine->Tokens == dgs->state.tokens) 958 draw->gs.tgsi.machine->Tokens = NULL; 959 960 for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) 961 FREE(dgs->stream[i].primitive_lengths); 962 963 if (dgs->state.type == PIPE_SHADER_IR_NIR && dgs->state.ir.nir) 964 ralloc_free(dgs->state.ir.nir); 965 FREE((void*) dgs->state.tokens); 966 FREE(dgs); 967} 968 969 970#ifdef DRAW_LLVM_AVAILABLE 971void draw_gs_set_current_variant(struct draw_geometry_shader *shader, 972 struct draw_gs_llvm_variant *variant) 973{ 974 shader->current_variant = variant; 975} 976#endif 977 978/* 979 * Called at the very begin of the draw call with a new instance 980 * Used to reset state that should persist between primitive restart. 981 */ 982void 983draw_geometry_shader_new_instance(struct draw_geometry_shader *gs) 984{ 985 if (!gs) 986 return; 987 988 gs->in_prim_idx = 0; 989} 990