1/********************************************************** 2 * Copyright 2008-2009 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26#include "svga_cmd.h" 27 28#include "pipe/p_state.h" 29#include "pipe/p_defines.h" 30#include "util/u_inlines.h" 31#include "os/os_thread.h" 32#include "util/u_math.h" 33#include "util/u_memory.h" 34#include "util/u_resource.h" 35 36#include "svga_context.h" 37#include "svga_screen.h" 38#include "svga_resource_buffer.h" 39#include "svga_resource_buffer_upload.h" 40#include "svga_resource_texture.h" 41#include "svga_sampler_view.h" 42#include "svga_winsys.h" 43#include "svga_debug.h" 44 45 46/** 47 * Determine what buffers eventually need hardware backing. 48 * 49 * Vertex- and index buffers need hardware backing. Constant buffers 50 * do on vgpu10. Staging texture-upload buffers do when they are 51 * supported. 52 */ 53static inline boolean 54svga_buffer_needs_hw_storage(const struct svga_screen *ss, 55 const struct pipe_resource *template) 56{ 57 unsigned bind_mask = (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | 58 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT | 59 PIPE_BIND_SHADER_BUFFER | PIPE_BIND_COMMAND_ARGS_BUFFER); 60 61 if (ss->sws->have_vgpu10) { 62 /* 63 * Driver-created upload const0- and staging texture upload buffers 64 * tagged with PIPE_BIND_CUSTOM 65 */ 66 bind_mask |= PIPE_BIND_CUSTOM; 67 /** 68 * Uniform buffer objects. 69 * Don't create hardware storage for state-tracker constant buffers, 70 * because we frequently map them for reading and writing, and 71 * the length of those buffers are always small, so it is better 72 * to just use system memory. 73 */ 74 } 75 76 if (template->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) 77 return TRUE; 78 79 return !!(template->bind & bind_mask); 80} 81 82 83static inline boolean 84need_buf_readback(struct svga_context *svga, 85 struct pipe_transfer *st) 86{ 87 struct svga_buffer *sbuf = svga_buffer(st->resource); 88 89 if (st->usage != PIPE_MAP_READ) 90 return FALSE; 91 92 /* No buffer surface has been created */ 93 if (!sbuf->bufsurf) 94 return FALSE; 95 96 return ((sbuf->dirty || 97 sbuf->bufsurf->surface_state == SVGA_SURFACE_STATE_RENDERED) && 98 !sbuf->key.coherent && !svga->swc->force_coherent); 99} 100 101 102/** 103 * Create a buffer transfer. 104 * 105 * Unlike texture DMAs (which are written immediately to the command buffer and 106 * therefore inherently serialized with other context operations), for buffers 107 * we try to coalesce multiple range mappings (i.e, multiple calls to this 108 * function) into a single DMA command, for better efficiency in command 109 * processing. This means we need to exercise extra care here to ensure that 110 * the end result is exactly the same as if one DMA was used for every mapped 111 * range. 112 */ 113void * 114svga_buffer_transfer_map(struct pipe_context *pipe, 115 struct pipe_resource *resource, 116 unsigned level, 117 unsigned usage, 118 const struct pipe_box *box, 119 struct pipe_transfer **ptransfer) 120{ 121 struct svga_context *svga = svga_context(pipe); 122 struct svga_screen *ss = svga_screen(pipe->screen); 123 struct svga_buffer *sbuf = svga_buffer(resource); 124 struct pipe_transfer *transfer; 125 uint8_t *map = NULL; 126 int64_t begin = svga_get_time(svga); 127 128 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERMAP); 129 130 assert(box->y == 0); 131 assert(box->z == 0); 132 assert(box->height == 1); 133 assert(box->depth == 1); 134 135 transfer = MALLOC_STRUCT(pipe_transfer); 136 if (!transfer) { 137 goto done; 138 } 139 140 transfer->resource = resource; 141 transfer->level = level; 142 transfer->usage = usage; 143 transfer->box = *box; 144 transfer->stride = 0; 145 transfer->layer_stride = 0; 146 147 if (usage & PIPE_MAP_WRITE) { 148 /* If we write to the buffer for any reason, free any saved translated 149 * vertices. 150 */ 151 pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); 152 } 153 154 /* If it is a read transfer and the buffer is dirty or the buffer is bound 155 * to a uav, we will need to read the subresource content from the device. 156 */ 157 if (need_buf_readback(svga, transfer)) { 158 /* Host-side buffers can be dirtied with vgpu10 features 159 * (streamout and buffer copy) and sm5 feature via uav. 160 */ 161 assert(svga_have_vgpu10(svga)); 162 163 if (!sbuf->user) { 164 (void) svga_buffer_handle(svga, resource, sbuf->bind_flags); 165 } 166 167 if (sbuf->dma.pending) { 168 svga_buffer_upload_flush(svga, sbuf); 169 svga_context_finish(svga); 170 } 171 172 assert(sbuf->handle); 173 174 SVGA_RETRY(svga, SVGA3D_ReadbackGBSurface(svga->swc, sbuf->handle)); 175 svga->hud.num_readbacks++; 176 177 svga_context_finish(svga); 178 179 sbuf->dirty = FALSE; 180 181 /* Mark the buffer surface state as UPDATED */ 182 assert(sbuf->bufsurf); 183 sbuf->bufsurf->surface_state = SVGA_SURFACE_STATE_UPDATED; 184 } 185 186 if (usage & PIPE_MAP_WRITE) { 187 if ((usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) && 188 !(resource->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)) { 189 /* 190 * Flush any pending primitives, finish writing any pending DMA 191 * commands, and tell the host to discard the buffer contents on 192 * the next DMA operation. 193 */ 194 195 svga_hwtnl_flush_buffer(svga, resource); 196 197 if (sbuf->dma.pending) { 198 svga_buffer_upload_flush(svga, sbuf); 199 200 /* 201 * Instead of flushing the context command buffer, simply discard 202 * the current hwbuf, and start a new one. 203 * With GB objects, the map operation takes care of this 204 * if passed the PIPE_MAP_DISCARD_WHOLE_RESOURCE flag, 205 * and the old backing store is busy. 206 */ 207 208 if (!svga_have_gb_objects(svga)) 209 svga_buffer_destroy_hw_storage(ss, sbuf); 210 } 211 212 sbuf->map.num_ranges = 0; 213 sbuf->dma.flags.discard = TRUE; 214 } 215 216 if (usage & PIPE_MAP_UNSYNCHRONIZED) { 217 if (!sbuf->map.num_ranges) { 218 /* 219 * No pending ranges to upload so far, so we can tell the host to 220 * not synchronize on the next DMA command. 221 */ 222 223 sbuf->dma.flags.unsynchronized = TRUE; 224 } 225 } else { 226 /* 227 * Synchronizing, so flush any pending primitives, finish writing any 228 * pending DMA command, and ensure the next DMA will be done in order. 229 */ 230 231 svga_hwtnl_flush_buffer(svga, resource); 232 233 if (sbuf->dma.pending) { 234 svga_buffer_upload_flush(svga, sbuf); 235 236 if (svga_buffer_has_hw_storage(sbuf)) { 237 /* 238 * We have a pending DMA upload from a hardware buffer, therefore 239 * we need to ensure that the host finishes processing that DMA 240 * command before the gallium frontend can start overwriting the 241 * hardware buffer. 242 * 243 * XXX: This could be avoided by tying the hardware buffer to 244 * the transfer (just as done with textures), which would allow 245 * overlapping DMAs commands to be queued on the same context 246 * buffer. However, due to the likelihood of software vertex 247 * processing, it is more convenient to hold on to the hardware 248 * buffer, allowing to quickly access the contents from the CPU 249 * without having to do a DMA download from the host. 250 */ 251 252 if (usage & PIPE_MAP_DONTBLOCK) { 253 /* 254 * Flushing the command buffer here will most likely cause 255 * the map of the hwbuf below to block, so preemptively 256 * return NULL here if DONTBLOCK is set to prevent unnecessary 257 * command buffer flushes. 258 */ 259 260 FREE(transfer); 261 goto done; 262 } 263 264 svga_context_flush(svga, NULL); 265 } 266 } 267 268 sbuf->dma.flags.unsynchronized = FALSE; 269 } 270 } 271 272 if (!sbuf->swbuf && !svga_buffer_has_hw_storage(sbuf)) { 273 if (svga_buffer_create_hw_storage(ss, sbuf, sbuf->bind_flags) != PIPE_OK) { 274 /* 275 * We can't create a hardware buffer big enough, so create a malloc 276 * buffer instead. 277 */ 278 if (0) { 279 debug_printf("%s: failed to allocate %u KB of DMA, " 280 "splitting DMA transfers\n", 281 __FUNCTION__, 282 (sbuf->b.width0 + 1023)/1024); 283 } 284 285 sbuf->swbuf = align_malloc(sbuf->b.width0, 16); 286 if (!sbuf->swbuf) { 287 FREE(transfer); 288 goto done; 289 } 290 } 291 } 292 293 if (sbuf->swbuf) { 294 /* User/malloc buffer */ 295 map = sbuf->swbuf; 296 } 297 else if (svga_buffer_has_hw_storage(sbuf)) { 298 boolean retry; 299 300 map = SVGA_TRY_MAP(svga_buffer_hw_storage_map 301 (svga, sbuf, transfer->usage, &retry), retry); 302 if (map == NULL && retry) { 303 /* 304 * At this point, svga_buffer_get_transfer() has already 305 * hit the DISCARD_WHOLE_RESOURCE path and flushed HWTNL 306 * for this buffer. 307 */ 308 svga_retry_enter(svga); 309 svga_context_flush(svga, NULL); 310 map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); 311 svga_retry_exit(svga); 312 } 313 } 314 else { 315 map = NULL; 316 } 317 318 if (map) { 319 ++sbuf->map.count; 320 map += transfer->box.x; 321 *ptransfer = transfer; 322 } else { 323 FREE(transfer); 324 } 325 326 svga->hud.map_buffer_time += (svga_get_time(svga) - begin); 327 328done: 329 SVGA_STATS_TIME_POP(svga_sws(svga)); 330 return map; 331} 332 333 334void 335svga_buffer_transfer_flush_region(struct pipe_context *pipe, 336 struct pipe_transfer *transfer, 337 const struct pipe_box *box) 338{ 339 struct svga_screen *ss = svga_screen(pipe->screen); 340 struct svga_buffer *sbuf = svga_buffer(transfer->resource); 341 struct svga_context *svga = svga_context(pipe); 342 unsigned offset = transfer->box.x + box->x; 343 unsigned length = box->width; 344 345 assert(transfer->usage & PIPE_MAP_WRITE); 346 assert(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT); 347 348 if (!(svga->swc->force_coherent || sbuf->key.coherent) || sbuf->swbuf) { 349 mtx_lock(&ss->swc_mutex); 350 svga_buffer_add_range(sbuf, offset, offset + length); 351 mtx_unlock(&ss->swc_mutex); 352 } 353} 354 355 356void 357svga_buffer_transfer_unmap(struct pipe_context *pipe, 358 struct pipe_transfer *transfer) 359{ 360 struct svga_screen *ss = svga_screen(pipe->screen); 361 struct svga_context *svga = svga_context(pipe); 362 struct svga_buffer *sbuf = svga_buffer(transfer->resource); 363 364 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERUNMAP); 365 366 mtx_lock(&ss->swc_mutex); 367 368 assert(sbuf->map.count); 369 if (sbuf->map.count) { 370 --sbuf->map.count; 371 } 372 373 if (svga_buffer_has_hw_storage(sbuf)) { 374 375 /* Note: we may wind up flushing here and unmapping other buffers 376 * which leads to recursively locking ss->swc_mutex. 377 */ 378 svga_buffer_hw_storage_unmap(svga, sbuf); 379 } 380 381 if (transfer->usage & PIPE_MAP_WRITE) { 382 if (!(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT)) { 383 /* 384 * Mapped range not flushed explicitly, so flush the whole buffer, 385 * and tell the host to discard the contents when processing the DMA 386 * command. 387 */ 388 389 SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); 390 391 sbuf->dma.flags.discard = TRUE; 392 393 if (!(svga->swc->force_coherent || sbuf->key.coherent) || sbuf->swbuf) 394 svga_buffer_add_range(sbuf, 0, sbuf->b.width0); 395 } 396 397 if (sbuf->swbuf && 398 (!sbuf->bind_flags || (sbuf->bind_flags & PIPE_BIND_CONSTANT_BUFFER))) { 399 /* 400 * Since the constant buffer is in system buffer, we need 401 * to set the constant buffer dirty bits, so that the context 402 * can update the changes in the device. 403 * According to the GL spec, buffer bound to other contexts will 404 * have to be explicitly rebound by the user to have the changes take 405 * into effect. 406 */ 407 svga->dirty |= SVGA_NEW_CONST_BUFFER; 408 } 409 } 410 411 mtx_unlock(&ss->swc_mutex); 412 FREE(transfer); 413 SVGA_STATS_TIME_POP(svga_sws(svga)); 414} 415 416 417void 418svga_resource_destroy(struct pipe_screen *screen, 419 struct pipe_resource *buf) 420{ 421 if (buf->target == PIPE_BUFFER) { 422 struct svga_screen *ss = svga_screen(screen); 423 struct svga_buffer *sbuf = svga_buffer(buf); 424 425 assert(!p_atomic_read(&buf->reference.count)); 426 427 assert(!sbuf->dma.pending); 428 429 if (sbuf->handle) 430 svga_buffer_destroy_host_surface(ss, sbuf); 431 432 if (sbuf->uploaded.buffer) 433 pipe_resource_reference(&sbuf->uploaded.buffer, NULL); 434 435 if (sbuf->hwbuf) 436 svga_buffer_destroy_hw_storage(ss, sbuf); 437 438 if (sbuf->swbuf && !sbuf->user) 439 align_free(sbuf->swbuf); 440 441 pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); 442 443 ss->hud.total_resource_bytes -= sbuf->size; 444 assert(ss->hud.num_resources > 0); 445 if (ss->hud.num_resources > 0) 446 ss->hud.num_resources--; 447 448 FREE(sbuf); 449 } else { 450 struct svga_screen *ss = svga_screen(screen); 451 struct svga_texture *tex = svga_texture(buf); 452 453 ss->texture_timestamp++; 454 455 svga_sampler_view_reference(&tex->cached_view, NULL); 456 457 /* 458 DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); 459 */ 460 SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle); 461 462 boolean to_invalidate = svga_was_texture_rendered_to(tex); 463 svga_screen_surface_destroy(ss, &tex->key, to_invalidate, &tex->handle); 464 465 /* Destroy the backed surface handle if exists */ 466 if (tex->backed_handle) 467 svga_screen_surface_destroy(ss, &tex->backed_key, to_invalidate, &tex->backed_handle); 468 469 ss->hud.total_resource_bytes -= tex->size; 470 471 FREE(tex->defined); 472 FREE(tex->rendered_to); 473 FREE(tex->dirty); 474 FREE(tex); 475 476 assert(ss->hud.num_resources > 0); 477 if (ss->hud.num_resources > 0) 478 ss->hud.num_resources--; 479 } 480} 481 482struct pipe_resource * 483svga_buffer_create(struct pipe_screen *screen, 484 const struct pipe_resource *template) 485{ 486 struct svga_screen *ss = svga_screen(screen); 487 struct svga_buffer *sbuf; 488 unsigned bind_flags; 489 490 SVGA_STATS_TIME_PUSH(ss->sws, SVGA_STATS_TIME_CREATEBUFFER); 491 492 sbuf = CALLOC_STRUCT(svga_buffer); 493 if (!sbuf) 494 goto error1; 495 496 sbuf->b = *template; 497 pipe_reference_init(&sbuf->b.reference, 1); 498 sbuf->b.screen = screen; 499 bind_flags = template->bind & ~PIPE_BIND_CUSTOM; 500 501 list_inithead(&sbuf->surfaces); 502 503 if (bind_flags & PIPE_BIND_CONSTANT_BUFFER) { 504 /* Constant buffers can only have the PIPE_BIND_CONSTANT_BUFFER 505 * flag set. 506 */ 507 if (ss->sws->have_vgpu10) { 508 bind_flags = PIPE_BIND_CONSTANT_BUFFER; 509 } 510 } 511 512 /* Although svga device only requires constant buffer size to be 513 * in multiples of 16, in order to allow bind_flags promotion, 514 * we are mandating all buffer size to be in multiples of 16. 515 */ 516 sbuf->b.width0 = align(sbuf->b.width0, 16); 517 518 if (svga_buffer_needs_hw_storage(ss, template)) { 519 520 /* If the buffer is not used for constant buffer, set 521 * the vertex/index bind flags as well so that the buffer will be 522 * accepted for those uses. 523 * Note that the PIPE_BIND_ flags we get from the gallium frontend are 524 * just a hint about how the buffer may be used. And OpenGL buffer 525 * object may be used for many different things. 526 * Also note that we do not unconditionally set the streamout 527 * bind flag since streamout buffer is an output buffer and 528 * might have performance implication. 529 */ 530 if (!(template->bind & PIPE_BIND_CONSTANT_BUFFER) && 531 !(template->bind & PIPE_BIND_CUSTOM)) { 532 /* Not a constant- or staging buffer. 533 * The buffer may be used for vertex data or indexes. 534 */ 535 bind_flags |= (PIPE_BIND_VERTEX_BUFFER | 536 PIPE_BIND_INDEX_BUFFER); 537 538 /* It may be used for shader resource as well. */ 539 bind_flags |= PIPE_BIND_SAMPLER_VIEW; 540 } 541 542 if (svga_buffer_create_host_surface(ss, sbuf, bind_flags) != PIPE_OK) 543 goto error2; 544 } 545 else { 546 sbuf->swbuf = align_malloc(sbuf->b.width0, 64); 547 if (!sbuf->swbuf) 548 goto error2; 549 550 /* Since constant buffer is usually small, it is much cheaper to 551 * use system memory for the data just as it is being done for 552 * the default constant buffer. 553 */ 554 if ((bind_flags & PIPE_BIND_CONSTANT_BUFFER) || !bind_flags) 555 sbuf->use_swbuf = TRUE; 556 } 557 558 debug_reference(&sbuf->b.reference, 559 (debug_reference_descriptor)debug_describe_resource, 0); 560 561 sbuf->bind_flags = bind_flags; 562 sbuf->size = util_resource_size(&sbuf->b); 563 ss->hud.total_resource_bytes += sbuf->size; 564 565 ss->hud.num_resources++; 566 SVGA_STATS_TIME_POP(ss->sws); 567 568 return &sbuf->b; 569 570error2: 571 FREE(sbuf); 572error1: 573 SVGA_STATS_TIME_POP(ss->sws); 574 return NULL; 575} 576 577 578struct pipe_resource * 579svga_user_buffer_create(struct pipe_screen *screen, 580 void *ptr, 581 unsigned bytes, 582 unsigned bind) 583{ 584 struct svga_buffer *sbuf; 585 struct svga_screen *ss = svga_screen(screen); 586 587 sbuf = CALLOC_STRUCT(svga_buffer); 588 if (!sbuf) 589 goto no_sbuf; 590 591 pipe_reference_init(&sbuf->b.reference, 1); 592 sbuf->b.screen = screen; 593 sbuf->b.format = PIPE_FORMAT_R8_UNORM; /* ?? */ 594 sbuf->b.usage = PIPE_USAGE_IMMUTABLE; 595 sbuf->b.bind = bind; 596 sbuf->b.width0 = bytes; 597 sbuf->b.height0 = 1; 598 sbuf->b.depth0 = 1; 599 sbuf->b.array_size = 1; 600 601 sbuf->bind_flags = bind; 602 sbuf->swbuf = ptr; 603 sbuf->user = TRUE; 604 605 debug_reference(&sbuf->b.reference, 606 (debug_reference_descriptor)debug_describe_resource, 0); 607 608 ss->hud.num_resources++; 609 610 return &sbuf->b; 611 612no_sbuf: 613 return NULL; 614} 615