1/********************************************************** 2 * Copyright 2014 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26#include "util/u_memory.h" 27#include "util/u_bitmask.h" 28 29#include "svga_cmd.h" 30#include "svga_context.h" 31#include "svga_resource_buffer.h" 32#include "svga_shader.h" 33#include "svga_debug.h" 34#include "svga_streamout.h" 35 36struct svga_stream_output_target { 37 struct pipe_stream_output_target base; 38}; 39 40/** cast wrapper */ 41static inline struct svga_stream_output_target * 42svga_stream_output_target(struct pipe_stream_output_target *s) 43{ 44 return (struct svga_stream_output_target *)s; 45} 46 47 48/** 49 * A helper function to send different version of the DefineStreamOutput command 50 * depending on if device is SM5 capable or not. 51 */ 52static enum pipe_error 53svga_define_stream_output(struct svga_context *svga, 54 SVGA3dStreamOutputId soid, 55 uint32 numOutputStreamEntries, 56 uint32 numOutputStreamStrides, 57 uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS], 58 const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS], 59 uint32 rasterizedStream, 60 struct svga_stream_output *streamout) 61{ 62 unsigned i; 63 64 SVGA_DBG(DEBUG_STREAMOUT, "%s: id=%d\n", __FUNCTION__, soid); 65 SVGA_DBG(DEBUG_STREAMOUT, 66 "numOutputStreamEntires=%d\n", numOutputStreamEntries); 67 68 for (i = 0; i < numOutputStreamEntries; i++) { 69 SVGA_DBG(DEBUG_STREAMOUT, 70 " %d: slot=%d regIdx=%d regMask=0x%x stream=%d\n", 71 i, decls[i].outputSlot, decls[i].registerIndex, 72 decls[i].registerMask, decls[i].stream); 73 } 74 75 SVGA_DBG(DEBUG_STREAMOUT, 76 "numOutputStreamStrides=%d\n", numOutputStreamStrides); 77 for (i = 0; i < numOutputStreamStrides; i++) { 78 SVGA_DBG(DEBUG_STREAMOUT, " %d ", streamStrides[i]); 79 } 80 SVGA_DBG(DEBUG_STREAMOUT, "\n"); 81 82 if (svga_have_sm5(svga) && 83 (numOutputStreamEntries > SVGA3D_MAX_DX10_STREAMOUT_DECLS || 84 numOutputStreamStrides > 1)) { 85 unsigned bufSize = sizeof(SVGA3dStreamOutputDeclarationEntry) 86 * numOutputStreamEntries; 87 struct svga_winsys_buffer *declBuf; 88 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 89 void *map; 90 91 declBuf = svga_winsys_buffer_create(svga, 1, SVGA_BUFFER_USAGE_PINNED, 92 bufSize); 93 if (!declBuf) 94 return PIPE_ERROR; 95 map = sws->buffer_map(sws, declBuf, PIPE_MAP_WRITE); 96 if (!map) { 97 sws->buffer_destroy(sws, declBuf); 98 return PIPE_ERROR; 99 } 100 101 /* copy decls to buffer */ 102 memcpy(map, decls, bufSize); 103 104 /* unmap buffer */ 105 sws->buffer_unmap(sws, declBuf); 106 streamout->declBuf = declBuf; 107 108 SVGA_RETRY(svga, SVGA3D_sm5_DefineAndBindStreamOutput 109 (svga->swc, soid, 110 numOutputStreamEntries, 111 numOutputStreamStrides, 112 streamStrides, 113 streamout->declBuf, 114 rasterizedStream, 115 bufSize)); 116 } else { 117 SVGA_RETRY(svga, SVGA3D_vgpu10_DefineStreamOutput(svga->swc, soid, 118 numOutputStreamEntries, 119 streamStrides, 120 decls)); 121 } 122 123 return PIPE_OK; 124} 125 126 127/** 128 * Creates stream output from the stream output info. 129 */ 130struct svga_stream_output * 131svga_create_stream_output(struct svga_context *svga, 132 struct svga_shader *shader, 133 const struct pipe_stream_output_info *info) 134{ 135 struct svga_stream_output *streamout; 136 SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS]; 137 unsigned strides[SVGA3D_DX_MAX_SOTARGETS]; 138 unsigned dstOffset[SVGA3D_DX_MAX_SOTARGETS]; 139 unsigned numStreamStrides = 0; 140 unsigned numDecls; 141 unsigned i; 142 enum pipe_error ret; 143 unsigned id; 144 ASSERTED unsigned maxDecls = 0; 145 146 assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS); 147 148 /* Gallium utility creates shaders with stream output. 149 * For non-DX10, just return NULL. 150 */ 151 if (!svga_have_vgpu10(svga)) 152 return NULL; 153 154 if (svga_have_sm5(svga)) 155 maxDecls = SVGA3D_MAX_STREAMOUT_DECLS; 156 else if (svga_have_vgpu10(svga)) 157 maxDecls = SVGA3D_MAX_DX10_STREAMOUT_DECLS; 158 159 assert(info->num_outputs <= maxDecls); 160 161 /* Allocate an integer ID for the stream output */ 162 id = util_bitmask_add(svga->stream_output_id_bm); 163 if (id == UTIL_BITMASK_INVALID_INDEX) { 164 return NULL; 165 } 166 167 /* Allocate the streamout data structure */ 168 streamout = CALLOC_STRUCT(svga_stream_output); 169 170 if (!streamout) 171 return NULL; 172 173 streamout->info = *info; 174 streamout->id = id; 175 streamout->pos_out_index = -1; 176 streamout->streammask = 0; 177 178 /* Init whole decls and stride arrays to zero to avoid garbage values */ 179 memset(decls, 0, sizeof(decls)); 180 memset(strides, 0, sizeof(strides)); 181 memset(dstOffset, 0, sizeof(dstOffset)); 182 183 SVGA_DBG(DEBUG_STREAMOUT, "%s: num_outputs=%d\n", 184 __FUNCTION__, info->num_outputs); 185 186 for (i = 0, numDecls = 0; i < info->num_outputs; i++, numDecls++) { 187 unsigned reg_idx = info->output[i].register_index; 188 unsigned buf_idx = info->output[i].output_buffer; 189 const enum tgsi_semantic sem_name = 190 shader->tgsi_info.output_semantic_name[reg_idx]; 191 192 assert(buf_idx <= PIPE_MAX_SO_BUFFERS); 193 194 numStreamStrides = MAX2(numStreamStrides, buf_idx); 195 196 SVGA_DBG(DEBUG_STREAMOUT, 197 " %d: register_index=%d output_buffer=%d stream=%d\n", 198 i, reg_idx, buf_idx, info->output[i].stream); 199 200 SVGA_DBG(DEBUG_STREAMOUT, 201 " dst_offset=%d start_component=%d num_components=%d\n", 202 info->output[i].dst_offset, 203 info->output[i].start_component, 204 info->output[i].num_components); 205 206 streamout->buffer_stream |= info->output[i].stream << (buf_idx * 4); 207 208 /** 209 * Check if the destination offset of the current output 210 * is at the expected offset. If it is greater, then that means 211 * there is a gap in the stream output. We need to insert 212 * extra declaration entries with an invalid register index 213 * to specify a gap. 214 */ 215 while (info->output[i].dst_offset > dstOffset[buf_idx]) { 216 217 unsigned numComponents = info->output[i].dst_offset - 218 dstOffset[buf_idx];; 219 220 assert(svga_have_sm5(svga)); 221 222 /* We can only specify at most 4 components to skip in each 223 * declaration entry. 224 */ 225 numComponents = numComponents > 4 ? 4 : numComponents; 226 227 decls[numDecls].outputSlot = buf_idx, 228 decls[numDecls].stream = info->output[i].stream; 229 decls[numDecls].registerIndex = SVGA3D_INVALID_ID; 230 decls[numDecls].registerMask = (1 << numComponents) - 1; 231 232 dstOffset[buf_idx] += numComponents; 233 numDecls++; 234 } 235 236 if (sem_name == TGSI_SEMANTIC_POSITION) { 237 /** 238 * Check if streaming out POSITION. If so, replace the 239 * register index with the index for NON_ADJUSTED POSITION. 240 */ 241 decls[numDecls].registerIndex = shader->tgsi_info.num_outputs; 242 243 /* Save this output index, so we can tell later if this stream output 244 * includes an output of a vertex position 245 */ 246 streamout->pos_out_index = numDecls; 247 } 248 else if (sem_name == TGSI_SEMANTIC_CLIPDIST) { 249 /** 250 * Use the shadow copy for clip distance because 251 * CLIPDIST instruction is only emitted for enabled clip planes. 252 * It's valid to write to ClipDistance variable for non-enabled 253 * clip planes. 254 */ 255 decls[numDecls].registerIndex = 256 shader->tgsi_info.num_outputs + 1 + 257 shader->tgsi_info.output_semantic_index[reg_idx]; 258 } 259 else { 260 decls[numDecls].registerIndex = reg_idx; 261 } 262 263 decls[numDecls].outputSlot = buf_idx; 264 decls[numDecls].registerMask = 265 ((1 << info->output[i].num_components) - 1) 266 << info->output[i].start_component; 267 268 decls[numDecls].stream = info->output[i].stream; 269 assert(decls[numDecls].stream == 0 || svga_have_sm5(svga)); 270 271 /* Set the bit in streammask for the enabled stream */ 272 streamout->streammask |= 1 << info->output[i].stream; 273 274 /* Update the expected offset for the next output */ 275 dstOffset[buf_idx] += info->output[i].num_components; 276 277 strides[buf_idx] = info->stride[buf_idx] * sizeof(float); 278 } 279 280 assert(numDecls <= maxDecls); 281 282 /* Send the DefineStreamOutput command. 283 * Note, rasterizedStream is always 0. 284 */ 285 ret = svga_define_stream_output(svga, id, 286 numDecls, numStreamStrides+1, 287 strides, decls, 0, streamout); 288 289 if (ret != PIPE_OK) { 290 util_bitmask_clear(svga->stream_output_id_bm, id); 291 FREE(streamout); 292 streamout = NULL; 293 } 294 return streamout; 295} 296 297 298enum pipe_error 299svga_set_stream_output(struct svga_context *svga, 300 struct svga_stream_output *streamout) 301{ 302 unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID; 303 304 if (!svga_have_vgpu10(svga)) { 305 return PIPE_OK; 306 } 307 308 SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __FUNCTION__, 309 streamout, id); 310 311 if (svga->current_so != streamout) { 312 313 /* Before unbinding the current stream output, stop the stream output 314 * statistics queries for the active streams. 315 */ 316 if (svga_have_sm5(svga) && svga->current_so) { 317 svga->vcount_buffer_stream = svga->current_so->buffer_stream; 318 svga_end_stream_output_queries(svga, svga->current_so->streammask); 319 } 320 321 enum pipe_error ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id); 322 if (ret != PIPE_OK) { 323 return ret; 324 } 325 326 svga->current_so = streamout; 327 328 /* After binding the new stream output, start the stream output 329 * statistics queries for the active streams. 330 */ 331 if (svga_have_sm5(svga) && svga->current_so) { 332 svga_begin_stream_output_queries(svga, svga->current_so->streammask); 333 } 334 } 335 336 return PIPE_OK; 337} 338 339void 340svga_delete_stream_output(struct svga_context *svga, 341 struct svga_stream_output *streamout) 342{ 343 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 344 345 SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout); 346 347 assert(svga_have_vgpu10(svga)); 348 assert(streamout != NULL); 349 350 SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyStreamOutput(svga->swc, 351 streamout->id)); 352 353 if (svga_have_sm5(svga) && streamout->declBuf) { 354 sws->buffer_destroy(sws, streamout->declBuf); 355 } 356 357 /* Before deleting the current streamout, make sure to stop any pending 358 * SO queries. 359 */ 360 if (svga->current_so == streamout) { 361 if (svga->in_streamout) 362 svga_end_stream_output_queries(svga, svga->current_so->streammask); 363 svga->current_so = NULL; 364 } 365 366 /* Release the ID */ 367 util_bitmask_clear(svga->stream_output_id_bm, streamout->id); 368 369 /* Free streamout structure */ 370 FREE(streamout); 371} 372 373 374static struct pipe_stream_output_target * 375svga_create_stream_output_target(struct pipe_context *pipe, 376 struct pipe_resource *buffer, 377 unsigned buffer_offset, 378 unsigned buffer_size) 379{ 380 struct svga_context *svga = svga_context(pipe); 381 struct svga_stream_output_target *sot; 382 383 SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __FUNCTION__, 384 buffer_offset, buffer_size); 385 386 assert(svga_have_vgpu10(svga)); 387 (void) svga; 388 389 sot = CALLOC_STRUCT(svga_stream_output_target); 390 if (!sot) 391 return NULL; 392 393 pipe_reference_init(&sot->base.reference, 1); 394 pipe_resource_reference(&sot->base.buffer, buffer); 395 sot->base.context = pipe; 396 sot->base.buffer = buffer; 397 sot->base.buffer_offset = buffer_offset; 398 sot->base.buffer_size = buffer_size; 399 400 return &sot->base; 401} 402 403static void 404svga_destroy_stream_output_target(struct pipe_context *pipe, 405 struct pipe_stream_output_target *target) 406{ 407 struct svga_stream_output_target *sot = svga_stream_output_target(target); 408 409 SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __FUNCTION__); 410 411 pipe_resource_reference(&sot->base.buffer, NULL); 412 FREE(sot); 413} 414 415static void 416svga_set_stream_output_targets(struct pipe_context *pipe, 417 unsigned num_targets, 418 struct pipe_stream_output_target **targets, 419 const unsigned *offsets) 420{ 421 struct svga_context *svga = svga_context(pipe); 422 struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS]; 423 unsigned i; 424 unsigned num_so_targets; 425 boolean begin_so_queries = num_targets > 0; 426 427 SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__, 428 num_targets); 429 430 assert(svga_have_vgpu10(svga)); 431 432 /* Mark the streamout buffers as dirty so that we'll issue readbacks 433 * before mapping. 434 */ 435 for (i = 0; i < svga->num_so_targets; i++) { 436 struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer); 437 sbuf->dirty = TRUE; 438 } 439 440 /* Before the currently bound streamout targets are unbound, 441 * save them in case they need to be referenced to retrieve the 442 * number of vertices being streamed out. 443 */ 444 for (i = 0; i < ARRAY_SIZE(svga->so_targets); i++) { 445 svga->vcount_so_targets[i] = svga->so_targets[i]; 446 } 447 448 assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS); 449 450 for (i = 0; i < num_targets; i++) { 451 struct svga_stream_output_target *sot 452 = svga_stream_output_target(targets[i]); 453 struct svga_buffer *sbuf = svga_buffer(sot->base.buffer); 454 unsigned size; 455 456 svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer, 457 PIPE_BIND_STREAM_OUTPUT); 458 459 assert(svga_buffer(sot->base.buffer)->key.flags 460 & SVGA3D_SURFACE_BIND_STREAM_OUTPUT); 461 462 /* Mark the buffer surface as RENDERED */ 463 assert(sbuf->bufsurf); 464 sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_RENDERED; 465 466 svga->so_targets[i] = &sot->base; 467 if (offsets[i] == -1) { 468 soBindings[i].offset = -1; 469 470 /* The streamout is being resumed. There is no need to restart streamout statistics 471 * queries for the draw-auto fallback since those queries are still active. 472 */ 473 begin_so_queries = FALSE; 474 } 475 else 476 soBindings[i].offset = sot->base.buffer_offset + offsets[i]; 477 478 /* The size cannot extend beyond the end of the buffer. Clamp it. */ 479 size = MIN2(sot->base.buffer_size, 480 sot->base.buffer->width0 - sot->base.buffer_offset); 481 482 soBindings[i].sizeInBytes = size; 483 } 484 485 /* unbind any previously bound stream output buffers */ 486 for (; i < svga->num_so_targets; i++) { 487 svga->so_surfaces[i] = NULL; 488 svga->so_targets[i] = NULL; 489 } 490 491 num_so_targets = MAX2(svga->num_so_targets, num_targets); 492 SVGA_RETRY(svga, SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets, 493 soBindings, svga->so_surfaces)); 494 svga->num_so_targets = num_targets; 495 496 if (svga_have_sm5(svga) && svga->current_so && begin_so_queries) { 497 498 /* If there are already active queries and we need to start a new streamout, 499 * we need to stop the current active queries first. 500 */ 501 if (svga->in_streamout) { 502 svga_end_stream_output_queries(svga, svga->current_so->streammask); 503 } 504 505 /* Start stream out statistics queries for the new streamout */ 506 svga_begin_stream_output_queries(svga, svga->current_so->streammask); 507 } 508} 509 510/** 511 * Rebind stream output target surfaces 512 */ 513enum pipe_error 514svga_rebind_stream_output_targets(struct svga_context *svga) 515{ 516 struct svga_winsys_context *swc = svga->swc; 517 enum pipe_error ret; 518 unsigned i; 519 520 for (i = 0; i < svga->num_so_targets; i++) { 521 ret = swc->resource_rebind(swc, svga->so_surfaces[i], NULL, SVGA_RELOC_WRITE); 522 if (ret != PIPE_OK) 523 return ret; 524 } 525 526 return PIPE_OK; 527} 528 529 530void 531svga_init_stream_output_functions(struct svga_context *svga) 532{ 533 svga->pipe.create_stream_output_target = svga_create_stream_output_target; 534 svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target; 535 svga->pipe.set_stream_output_targets = svga_set_stream_output_targets; 536} 537 538 539/** 540 * A helper function to create stream output statistics queries for each stream. 541 * These queries are created as a workaround for DrawTransformFeedbackInstanced or 542 * DrawTransformFeedbackStreamInstanced when auto draw doesn't support 543 * instancing or non-0 stream. In this case, the vertex count will 544 * be retrieved from the stream output statistics query. 545 */ 546void 547svga_create_stream_output_queries(struct svga_context *svga) 548{ 549 unsigned i; 550 551 if (!svga_have_sm5(svga)) 552 return; 553 554 for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { 555 svga->so_queries[i] = svga->pipe.create_query(&svga->pipe, 556 PIPE_QUERY_SO_STATISTICS, i); 557 assert(svga->so_queries[i] != NULL); 558 } 559} 560 561 562/** 563 * Destroy the stream output statistics queries for the draw-auto workaround. 564 */ 565void 566svga_destroy_stream_output_queries(struct svga_context *svga) 567{ 568 unsigned i; 569 570 if (!svga_have_sm5(svga)) 571 return; 572 573 for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { 574 svga->pipe.destroy_query(&svga->pipe, svga->so_queries[i]); 575 } 576} 577 578 579/** 580 * Start stream output statistics queries for the active streams. 581 */ 582void 583svga_begin_stream_output_queries(struct svga_context *svga, 584 unsigned streammask) 585{ 586 assert(svga_have_sm5(svga)); 587 assert(!svga->in_streamout); 588 589 for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { 590 bool ret; 591 if (streammask & (1 << i)) { 592 ret = svga->pipe.begin_query(&svga->pipe, svga->so_queries[i]); 593 } 594 (void) ret; 595 } 596 svga->in_streamout = TRUE; 597 598 return; 599} 600 601 602/** 603 * Stop stream output statistics queries for the active streams. 604 */ 605void 606svga_end_stream_output_queries(struct svga_context *svga, 607 unsigned streammask) 608{ 609 assert(svga_have_sm5(svga)); 610 611 if (!svga->in_streamout) 612 return; 613 614 for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) { 615 bool ret; 616 if (streammask & (1 << i)) { 617 ret = svga->pipe.end_query(&svga->pipe, svga->so_queries[i]); 618 } 619 (void) ret; 620 } 621 svga->in_streamout = FALSE; 622 623 return; 624} 625 626 627/** 628 * Return the primitive count returned from the stream output statistics query 629 * for the specified stream. 630 */ 631unsigned 632svga_get_primcount_from_stream_output(struct svga_context *svga, 633 unsigned stream) 634{ 635 unsigned primcount = 0; 636 union pipe_query_result result; 637 bool ret; 638 639 if (svga->current_so) { 640 svga_end_stream_output_queries(svga, svga->current_so->streammask); 641 } 642 643 ret = svga->pipe.get_query_result(&svga->pipe, 644 svga->so_queries[stream], 645 TRUE, &result); 646 if (ret) 647 primcount = result.so_statistics.num_primitives_written; 648 649 return primcount; 650} 651