1/************************************************************************** 2 * 3 * Copyright 2017 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * on the rights to use, copy, modify, merge, publish, distribute, sub 10 * license, and/or sell copies of the Software, and to permit persons to whom 11 * the Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 **************************************************************************/ 26 27#include "util/u_threaded_context.h" 28#include "util/u_cpu_detect.h" 29#include "util/format/u_format.h" 30#include "util/u_inlines.h" 31#include "util/u_memory.h" 32#include "util/u_upload_mgr.h" 33#include "driver_trace/tr_context.h" 34#include "util/log.h" 35#include "compiler/shader_info.h" 36 37#if TC_DEBUG >= 1 38#define tc_assert assert 39#else 40#define tc_assert(x) 41#endif 42 43#if TC_DEBUG >= 2 44#define tc_printf mesa_logi 45#define tc_asprintf asprintf 46#define tc_strcmp strcmp 47#else 48#define tc_printf(...) 49#define tc_asprintf(...) 0 50#define tc_strcmp(...) 0 51#endif 52 53#define TC_SENTINEL 0x5ca1ab1e 54 55enum tc_call_id { 56#define CALL(name) TC_CALL_##name, 57#include "u_threaded_context_calls.h" 58#undef CALL 59 TC_NUM_CALLS, 60}; 61 62#if TC_DEBUG >= 3 63static const char *tc_call_names[] = { 64#define CALL(name) #name, 65#include "u_threaded_context_calls.h" 66#undef CALL 67}; 68#endif 69 70typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t *last); 71 72static const tc_execute execute_func[TC_NUM_CALLS]; 73 74static void 75tc_buffer_subdata(struct pipe_context *_pipe, 76 struct pipe_resource *resource, 77 unsigned usage, unsigned offset, 78 unsigned size, const void *data); 79 80static void 81tc_batch_check(UNUSED struct tc_batch *batch) 82{ 83 tc_assert(batch->sentinel == TC_SENTINEL); 84 tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH); 85} 86 87static void 88tc_debug_check(struct threaded_context *tc) 89{ 90 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { 91 tc_batch_check(&tc->batch_slots[i]); 92 tc_assert(tc->batch_slots[i].tc == tc); 93 } 94} 95 96static void 97tc_set_driver_thread(struct threaded_context *tc) 98{ 99#ifndef NDEBUG 100 tc->driver_thread = util_get_thread_id(); 101#endif 102} 103 104static void 105tc_clear_driver_thread(struct threaded_context *tc) 106{ 107#ifndef NDEBUG 108 memset(&tc->driver_thread, 0, sizeof(tc->driver_thread)); 109#endif 110} 111 112static void * 113to_call_check(void *ptr, unsigned num_slots) 114{ 115#if TC_DEBUG >= 1 116 struct tc_call_base *call = ptr; 117 tc_assert(call->num_slots == num_slots); 118#endif 119 return ptr; 120} 121#define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type))) 122 123#define size_to_slots(size) DIV_ROUND_UP(size, 8) 124#define call_size(type) size_to_slots(sizeof(struct type)) 125#define call_size_with_slots(type, num_slots) size_to_slots( \ 126 sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots)) 127#define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type))) 128 129/* Assign src to dst while dst is uninitialized. */ 130static inline void 131tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src) 132{ 133 *dst = src; 134 pipe_reference(NULL, &src->reference); /* only increment refcount */ 135} 136 137/* Assign src to dst while dst is uninitialized. */ 138static inline void 139tc_set_vertex_state_reference(struct pipe_vertex_state **dst, 140 struct pipe_vertex_state *src) 141{ 142 *dst = src; 143 pipe_reference(NULL, &src->reference); /* only increment refcount */ 144} 145 146/* Unreference dst but don't touch the dst pointer. */ 147static inline void 148tc_drop_resource_reference(struct pipe_resource *dst) 149{ 150 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */ 151 pipe_resource_destroy(dst); 152} 153 154/* Unreference dst but don't touch the dst pointer. */ 155static inline void 156tc_drop_surface_reference(struct pipe_surface *dst) 157{ 158 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */ 159 dst->context->surface_destroy(dst->context, dst); 160} 161 162/* Unreference dst but don't touch the dst pointer. */ 163static inline void 164tc_drop_so_target_reference(struct pipe_stream_output_target *dst) 165{ 166 if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */ 167 dst->context->stream_output_target_destroy(dst->context, dst); 168} 169 170/** 171 * Subtract the given number of references. 172 */ 173static inline void 174tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs) 175{ 176 int count = p_atomic_add_return(&dst->reference.count, -num_refs); 177 178 assert(count >= 0); 179 /* Underflows shouldn't happen, but let's be safe. */ 180 if (count <= 0) 181 dst->screen->vertex_state_destroy(dst->screen, dst); 182} 183 184/* We don't want to read or write min_index and max_index, because 185 * it shouldn't be needed by drivers at this point. 186 */ 187#define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \ 188 offsetof(struct pipe_draw_info, min_index) 189 190static void 191tc_batch_execute(void *job, UNUSED void *gdata, int thread_index) 192{ 193 struct tc_batch *batch = job; 194 struct pipe_context *pipe = batch->tc->pipe; 195 uint64_t *last = &batch->slots[batch->num_total_slots]; 196 197 tc_batch_check(batch); 198 tc_set_driver_thread(batch->tc); 199 200 assert(!batch->token); 201 202 for (uint64_t *iter = batch->slots; iter != last;) { 203 struct tc_call_base *call = (struct tc_call_base *)iter; 204 205 tc_assert(call->sentinel == TC_SENTINEL); 206 207#if TC_DEBUG >= 3 208 tc_printf("CALL: %s", tc_call_names[call->call_id]); 209#endif 210 211 iter += execute_func[call->call_id](pipe, call, last); 212 } 213 214 /* Add the fence to the list of fences for the driver to signal at the next 215 * flush, which we use for tracking which buffers are referenced by 216 * an unflushed command buffer. 217 */ 218 struct threaded_context *tc = batch->tc; 219 struct util_queue_fence *fence = 220 &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence; 221 222 if (tc->options.driver_calls_flush_notify) { 223 tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence; 224 225 /* Since our buffer lists are chained as a ring, we need to flush 226 * the context twice as we go around the ring to make the driver signal 227 * the buffer list fences, so that the producer thread can reuse the buffer 228 * list structures for the next batches without waiting. 229 */ 230 unsigned half_ring = TC_MAX_BUFFER_LISTS / 2; 231 if (batch->buffer_list_index % half_ring == half_ring - 1) 232 pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC); 233 } else { 234 util_queue_fence_signal(fence); 235 } 236 237 tc_clear_driver_thread(batch->tc); 238 tc_batch_check(batch); 239 batch->num_total_slots = 0; 240} 241 242static void 243tc_begin_next_buffer_list(struct threaded_context *tc) 244{ 245 tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS; 246 247 tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list; 248 249 /* Clear the buffer list in the new empty batch. */ 250 struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list]; 251 assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence)); 252 util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */ 253 BITSET_ZERO(buf_list->buffer_list); 254 255 tc->add_all_gfx_bindings_to_buffer_list = true; 256 tc->add_all_compute_bindings_to_buffer_list = true; 257} 258 259static void 260tc_batch_flush(struct threaded_context *tc) 261{ 262 struct tc_batch *next = &tc->batch_slots[tc->next]; 263 264 tc_assert(next->num_total_slots != 0); 265 tc_batch_check(next); 266 tc_debug_check(tc); 267 tc->bytes_mapped_estimate = 0; 268 p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots); 269 270 if (next->token) { 271 next->token->tc = NULL; 272 tc_unflushed_batch_token_reference(&next->token, NULL); 273 } 274 275 util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute, 276 NULL, 0); 277 tc->last = tc->next; 278 tc->next = (tc->next + 1) % TC_MAX_BATCHES; 279 tc_begin_next_buffer_list(tc); 280} 281 282/* This is the function that adds variable-sized calls into the current 283 * batch. It also flushes the batch if there is not enough space there. 284 * All other higher-level "add" functions use it. 285 */ 286static void * 287tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id, 288 unsigned num_slots) 289{ 290 struct tc_batch *next = &tc->batch_slots[tc->next]; 291 assert(num_slots <= TC_SLOTS_PER_BATCH); 292 tc_debug_check(tc); 293 294 if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) { 295 tc_batch_flush(tc); 296 next = &tc->batch_slots[tc->next]; 297 tc_assert(next->num_total_slots == 0); 298 } 299 300 tc_assert(util_queue_fence_is_signalled(&next->fence)); 301 302 struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots]; 303 next->num_total_slots += num_slots; 304 305#if !defined(NDEBUG) && TC_DEBUG >= 1 306 call->sentinel = TC_SENTINEL; 307#endif 308 call->call_id = id; 309 call->num_slots = num_slots; 310 311#if TC_DEBUG >= 3 312 tc_printf("ENQUEUE: %s", tc_call_names[id]); 313#endif 314 315 tc_debug_check(tc); 316 return call; 317} 318 319#define tc_add_call(tc, execute, type) \ 320 ((struct type*)tc_add_sized_call(tc, execute, call_size(type))) 321 322#define tc_add_slot_based_call(tc, execute, type, num_slots) \ 323 ((struct type*)tc_add_sized_call(tc, execute, \ 324 call_size_with_slots(type, num_slots))) 325 326static bool 327tc_is_sync(struct threaded_context *tc) 328{ 329 struct tc_batch *last = &tc->batch_slots[tc->last]; 330 struct tc_batch *next = &tc->batch_slots[tc->next]; 331 332 return util_queue_fence_is_signalled(&last->fence) && 333 !next->num_total_slots; 334} 335 336static void 337_tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func) 338{ 339 struct tc_batch *last = &tc->batch_slots[tc->last]; 340 struct tc_batch *next = &tc->batch_slots[tc->next]; 341 bool synced = false; 342 343 tc_debug_check(tc); 344 345 /* Only wait for queued calls... */ 346 if (!util_queue_fence_is_signalled(&last->fence)) { 347 util_queue_fence_wait(&last->fence); 348 synced = true; 349 } 350 351 tc_debug_check(tc); 352 353 if (next->token) { 354 next->token->tc = NULL; 355 tc_unflushed_batch_token_reference(&next->token, NULL); 356 } 357 358 /* .. and execute unflushed calls directly. */ 359 if (next->num_total_slots) { 360 p_atomic_add(&tc->num_direct_slots, next->num_total_slots); 361 tc->bytes_mapped_estimate = 0; 362 tc_batch_execute(next, NULL, 0); 363 tc_begin_next_buffer_list(tc); 364 synced = true; 365 } 366 367 if (synced) { 368 p_atomic_inc(&tc->num_syncs); 369 370 if (tc_strcmp(func, "tc_destroy") != 0) { 371 tc_printf("sync %s %s", func, info); 372 } 373 } 374 375 tc_debug_check(tc); 376} 377 378#define tc_sync(tc) _tc_sync(tc, "", __func__) 379#define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__) 380 381/** 382 * Call this from fence_finish for same-context fence waits of deferred fences 383 * that haven't been flushed yet. 384 * 385 * The passed pipe_context must be the one passed to pipe_screen::fence_finish, 386 * i.e., the wrapped one. 387 */ 388void 389threaded_context_flush(struct pipe_context *_pipe, 390 struct tc_unflushed_batch_token *token, 391 bool prefer_async) 392{ 393 struct threaded_context *tc = threaded_context(_pipe); 394 395 /* This is called from the gallium frontend / application thread. */ 396 if (token->tc && token->tc == tc) { 397 struct tc_batch *last = &tc->batch_slots[tc->last]; 398 399 /* Prefer to do the flush in the driver thread if it is already 400 * running. That should be better for cache locality. 401 */ 402 if (prefer_async || !util_queue_fence_is_signalled(&last->fence)) 403 tc_batch_flush(tc); 404 else 405 tc_sync(token->tc); 406 } 407} 408 409/* Must be called before TC binds, maps, invalidates, or adds a buffer to a buffer list. */ 410static void tc_touch_buffer(struct threaded_context *tc, struct threaded_resource *buf) 411{ 412 const struct threaded_context *first_user = buf->first_user; 413 414 /* Fast path exit to avoid additional branches */ 415 if (likely(first_user == tc)) 416 return; 417 418 if (!first_user) 419 first_user = p_atomic_cmpxchg_ptr(&buf->first_user, NULL, tc); 420 421 /* The NULL check might seem unnecessary here but it's actually critical: 422 * p_atomic_cmpxchg will return NULL if it succeeds, meaning that NULL is 423 * equivalent to "we're the first user" here. (It's equally important not 424 * to ignore the result of the cmpxchg above, since it might fail.) 425 * Without the NULL check, we'd set the flag unconditionally, which is bad. 426 */ 427 if (first_user && first_user != tc && !buf->used_by_multiple_contexts) 428 buf->used_by_multiple_contexts = true; 429} 430 431static bool tc_is_buffer_shared(struct threaded_resource *buf) 432{ 433 return buf->is_shared || buf->used_by_multiple_contexts; 434} 435 436static void 437tc_add_to_buffer_list(struct threaded_context *tc, struct tc_buffer_list *next, struct pipe_resource *buf) 438{ 439 struct threaded_resource *tbuf = threaded_resource(buf); 440 tc_touch_buffer(tc, tbuf); 441 442 uint32_t id = tbuf->buffer_id_unique; 443 BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK); 444} 445 446/* Set a buffer binding and add it to the buffer list. */ 447static void 448tc_bind_buffer(struct threaded_context *tc, uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf) 449{ 450 struct threaded_resource *tbuf = threaded_resource(buf); 451 tc_touch_buffer(tc, tbuf); 452 453 uint32_t id = tbuf->buffer_id_unique; 454 *binding = id; 455 BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK); 456} 457 458/* Reset a buffer binding. */ 459static void 460tc_unbind_buffer(uint32_t *binding) 461{ 462 *binding = 0; 463} 464 465/* Reset a range of buffer binding slots. */ 466static void 467tc_unbind_buffers(uint32_t *binding, unsigned count) 468{ 469 if (count) 470 memset(binding, 0, sizeof(*binding) * count); 471} 472 473static void 474tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings, 475 unsigned count) 476{ 477 for (unsigned i = 0; i < count; i++) { 478 if (bindings[i]) 479 BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK); 480 } 481} 482 483static bool 484tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings, 485 unsigned count) 486{ 487 unsigned rebind_count = 0; 488 489 for (unsigned i = 0; i < count; i++) { 490 if (bindings[i] == old_id) { 491 bindings[i] = new_id; 492 rebind_count++; 493 } 494 } 495 return rebind_count; 496} 497 498static void 499tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc, 500 BITSET_WORD *buffer_list, 501 enum pipe_shader_type shader) 502{ 503 tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader], 504 tc->max_const_buffers); 505 if (tc->seen_shader_buffers[shader]) { 506 tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader], 507 tc->max_shader_buffers); 508 } 509 if (tc->seen_image_buffers[shader]) { 510 tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader], 511 tc->max_images); 512 } 513 if (tc->seen_sampler_buffers[shader]) { 514 tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader], 515 tc->max_samplers); 516 } 517} 518 519static unsigned 520tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id, 521 uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask) 522{ 523 unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0; 524 525 ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader], 526 tc->max_const_buffers); 527 if (ubo) 528 *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader; 529 if (tc->seen_shader_buffers[shader]) { 530 ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader], 531 tc->max_shader_buffers); 532 if (ssbo) 533 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader; 534 } 535 if (tc->seen_image_buffers[shader]) { 536 img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader], 537 tc->max_images); 538 if (img) 539 *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader; 540 } 541 if (tc->seen_sampler_buffers[shader]) { 542 sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader], 543 tc->max_samplers); 544 if (sampler) 545 *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader; 546 } 547 return ubo + ssbo + img + sampler; 548} 549 550/* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list. 551 * This is called by the first draw call in a batch when we want to inherit 552 * all bindings set by the previous batch. 553 */ 554static void 555tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc) 556{ 557 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list; 558 559 tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers); 560 if (tc->seen_streamout_buffers) 561 tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS); 562 563 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX); 564 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT); 565 566 if (tc->seen_tcs) 567 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL); 568 if (tc->seen_tes) 569 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL); 570 if (tc->seen_gs) 571 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY); 572 573 tc->add_all_gfx_bindings_to_buffer_list = false; 574} 575 576/* Add all bound buffers used by compute to the buffer list. 577 * This is called by the first compute call in a batch when we want to inherit 578 * all bindings set by the previous batch. 579 */ 580static void 581tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc) 582{ 583 BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list; 584 585 tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE); 586 tc->add_all_compute_bindings_to_buffer_list = false; 587} 588 589static unsigned 590tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask) 591{ 592 unsigned vbo = 0, so = 0; 593 594 vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers, 595 tc->max_vertex_buffers); 596 if (vbo) 597 *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER); 598 599 if (tc->seen_streamout_buffers) { 600 so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers, 601 PIPE_MAX_SO_BUFFERS); 602 if (so) 603 *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER); 604 } 605 unsigned rebound = vbo + so; 606 607 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask); 608 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask); 609 610 if (tc->seen_tcs) 611 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask); 612 if (tc->seen_tes) 613 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask); 614 if (tc->seen_gs) 615 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask); 616 617 rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask); 618 619 if (rebound) 620 BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK); 621 return rebound; 622} 623 624static bool 625tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask) 626{ 627 while (binding_mask) { 628 if (bindings[u_bit_scan(&binding_mask)] == id) 629 return true; 630 } 631 return false; 632} 633 634static bool 635tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id, 636 enum pipe_shader_type shader) 637{ 638 if (tc->seen_shader_buffers[shader] && 639 tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader], 640 tc->shader_buffers_writeable_mask[shader])) 641 return true; 642 643 if (tc->seen_image_buffers[shader] && 644 tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader], 645 tc->image_buffers_writeable_mask[shader])) 646 return true; 647 648 return false; 649} 650 651static bool 652tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id) 653{ 654 if (tc->seen_streamout_buffers && 655 tc_is_buffer_bound_with_mask(id, tc->streamout_buffers, 656 BITFIELD_MASK(PIPE_MAX_SO_BUFFERS))) 657 return true; 658 659 if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) || 660 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) || 661 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE)) 662 return true; 663 664 if (tc->seen_tcs && 665 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL)) 666 return true; 667 668 if (tc->seen_tes && 669 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL)) 670 return true; 671 672 if (tc->seen_gs && 673 tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY)) 674 return true; 675 676 return false; 677} 678 679static bool 680tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf, 681 unsigned map_usage) 682{ 683 if (!tc->options.is_resource_busy) 684 return true; 685 686 uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK; 687 688 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) { 689 struct tc_buffer_list *buf_list = &tc->buffer_lists[i]; 690 691 /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver), 692 * then the buffer is considered busy. */ 693 if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) && 694 BITSET_TEST(buf_list->buffer_list, id_hash)) 695 return true; 696 } 697 698 /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether 699 * this buffer is busy or not. */ 700 return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage); 701} 702 703/** 704 * allow_cpu_storage should be false for user memory and imported buffers. 705 */ 706void 707threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage) 708{ 709 struct threaded_resource *tres = threaded_resource(res); 710 711 tres->first_user = NULL; 712 tres->used_by_multiple_contexts = false; 713 tres->latest = &tres->b; 714 tres->cpu_storage = NULL; 715 util_range_init(&tres->valid_buffer_range); 716 tres->is_shared = false; 717 tres->is_user_ptr = false; 718 tres->buffer_id_unique = 0; 719 tres->pending_staging_uploads = 0; 720 util_range_init(&tres->pending_staging_uploads_range); 721 722 if (allow_cpu_storage && 723 !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | 724 PIPE_RESOURCE_FLAG_SPARSE | 725 PIPE_RESOURCE_FLAG_ENCRYPTED)) && 726 /* We need buffer invalidation and buffer busyness tracking for the CPU 727 * storage, which aren't supported with pipe_vertex_state. */ 728 !(res->bind & PIPE_BIND_VERTEX_STATE)) 729 tres->allow_cpu_storage = true; 730 else 731 tres->allow_cpu_storage = false; 732} 733 734void 735threaded_resource_deinit(struct pipe_resource *res) 736{ 737 struct threaded_resource *tres = threaded_resource(res); 738 739 if (tres->latest != &tres->b) 740 pipe_resource_reference(&tres->latest, NULL); 741 util_range_destroy(&tres->valid_buffer_range); 742 util_range_destroy(&tres->pending_staging_uploads_range); 743 align_free(tres->cpu_storage); 744} 745 746struct pipe_context * 747threaded_context_unwrap_sync(struct pipe_context *pipe) 748{ 749 if (!pipe || !pipe->priv) 750 return pipe; 751 752 tc_sync(threaded_context(pipe)); 753 return (struct pipe_context*)pipe->priv; 754} 755 756 757/******************************************************************** 758 * simple functions 759 */ 760 761#define TC_FUNC1(func, qualifier, type, deref, addr, ...) \ 762 struct tc_call_##func { \ 763 struct tc_call_base base; \ 764 type state; \ 765 }; \ 766 \ 767 static uint16_t \ 768 tc_call_##func(struct pipe_context *pipe, void *call, uint64_t *last) \ 769 { \ 770 pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \ 771 return call_size(tc_call_##func); \ 772 } \ 773 \ 774 static void \ 775 tc_##func(struct pipe_context *_pipe, qualifier type deref param) \ 776 { \ 777 struct threaded_context *tc = threaded_context(_pipe); \ 778 struct tc_call_##func *p = (struct tc_call_##func*) \ 779 tc_add_call(tc, TC_CALL_##func, tc_call_##func); \ 780 p->state = deref(param); \ 781 __VA_ARGS__; \ 782 } 783 784TC_FUNC1(set_active_query_state, , bool, , ) 785 786TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &) 787TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , ) 788TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &) 789TC_FUNC1(set_sample_mask, , unsigned, , ) 790TC_FUNC1(set_min_samples, , unsigned, , ) 791TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &) 792 793TC_FUNC1(texture_barrier, , unsigned, , ) 794TC_FUNC1(memory_barrier, , unsigned, , ) 795TC_FUNC1(delete_texture_handle, , uint64_t, , ) 796TC_FUNC1(delete_image_handle, , uint64_t, , ) 797TC_FUNC1(set_frontend_noop, , bool, , ) 798 799 800/******************************************************************** 801 * queries 802 */ 803 804static struct pipe_query * 805tc_create_query(struct pipe_context *_pipe, unsigned query_type, 806 unsigned index) 807{ 808 struct threaded_context *tc = threaded_context(_pipe); 809 struct pipe_context *pipe = tc->pipe; 810 811 return pipe->create_query(pipe, query_type, index); 812} 813 814static struct pipe_query * 815tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries, 816 unsigned *query_types) 817{ 818 struct threaded_context *tc = threaded_context(_pipe); 819 struct pipe_context *pipe = tc->pipe; 820 821 return pipe->create_batch_query(pipe, num_queries, query_types); 822} 823 824struct tc_query_call { 825 struct tc_call_base base; 826 struct pipe_query *query; 827}; 828 829static uint16_t 830tc_call_destroy_query(struct pipe_context *pipe, void *call, uint64_t *last) 831{ 832 struct pipe_query *query = to_call(call, tc_query_call)->query; 833 struct threaded_query *tq = threaded_query(query); 834 835 if (list_is_linked(&tq->head_unflushed)) 836 list_del(&tq->head_unflushed); 837 838 pipe->destroy_query(pipe, query); 839 return call_size(tc_query_call); 840} 841 842static void 843tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query) 844{ 845 struct threaded_context *tc = threaded_context(_pipe); 846 847 tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query; 848} 849 850static uint16_t 851tc_call_begin_query(struct pipe_context *pipe, void *call, uint64_t *last) 852{ 853 pipe->begin_query(pipe, to_call(call, tc_query_call)->query); 854 return call_size(tc_query_call); 855} 856 857static bool 858tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query) 859{ 860 struct threaded_context *tc = threaded_context(_pipe); 861 862 tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query; 863 return true; /* we don't care about the return value for this call */ 864} 865 866struct tc_end_query_call { 867 struct tc_call_base base; 868 struct threaded_context *tc; 869 struct pipe_query *query; 870}; 871 872static uint16_t 873tc_call_end_query(struct pipe_context *pipe, void *call, uint64_t *last) 874{ 875 struct tc_end_query_call *p = to_call(call, tc_end_query_call); 876 struct threaded_query *tq = threaded_query(p->query); 877 878 if (!list_is_linked(&tq->head_unflushed)) 879 list_add(&tq->head_unflushed, &p->tc->unflushed_queries); 880 881 pipe->end_query(pipe, p->query); 882 return call_size(tc_end_query_call); 883} 884 885static bool 886tc_end_query(struct pipe_context *_pipe, struct pipe_query *query) 887{ 888 struct threaded_context *tc = threaded_context(_pipe); 889 struct threaded_query *tq = threaded_query(query); 890 struct tc_end_query_call *call = 891 tc_add_call(tc, TC_CALL_end_query, tc_end_query_call); 892 893 call->tc = tc; 894 call->query = query; 895 896 tq->flushed = false; 897 898 return true; /* we don't care about the return value for this call */ 899} 900 901static bool 902tc_get_query_result(struct pipe_context *_pipe, 903 struct pipe_query *query, bool wait, 904 union pipe_query_result *result) 905{ 906 struct threaded_context *tc = threaded_context(_pipe); 907 struct threaded_query *tq = threaded_query(query); 908 struct pipe_context *pipe = tc->pipe; 909 bool flushed = tq->flushed; 910 911 if (!flushed) { 912 tc_sync_msg(tc, wait ? "wait" : "nowait"); 913 tc_set_driver_thread(tc); 914 } 915 916 bool success = pipe->get_query_result(pipe, query, wait, result); 917 918 if (!flushed) 919 tc_clear_driver_thread(tc); 920 921 if (success) { 922 tq->flushed = true; 923 if (list_is_linked(&tq->head_unflushed)) { 924 /* This is safe because it can only happen after we sync'd. */ 925 list_del(&tq->head_unflushed); 926 } 927 } 928 return success; 929} 930 931struct tc_query_result_resource { 932 struct tc_call_base base; 933 enum pipe_query_flags flags:8; 934 enum pipe_query_value_type result_type:8; 935 int8_t index; /* it can be -1 */ 936 unsigned offset; 937 struct pipe_query *query; 938 struct pipe_resource *resource; 939}; 940 941static uint16_t 942tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_t *last) 943{ 944 struct tc_query_result_resource *p = to_call(call, tc_query_result_resource); 945 946 pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type, 947 p->index, p->resource, p->offset); 948 tc_drop_resource_reference(p->resource); 949 return call_size(tc_query_result_resource); 950} 951 952static void 953tc_get_query_result_resource(struct pipe_context *_pipe, 954 struct pipe_query *query, 955 enum pipe_query_flags flags, 956 enum pipe_query_value_type result_type, int index, 957 struct pipe_resource *resource, unsigned offset) 958{ 959 struct threaded_context *tc = threaded_context(_pipe); 960 961 tc_buffer_disable_cpu_storage(resource); 962 963 struct tc_query_result_resource *p = 964 tc_add_call(tc, TC_CALL_get_query_result_resource, 965 tc_query_result_resource); 966 p->query = query; 967 p->flags = flags; 968 p->result_type = result_type; 969 p->index = index; 970 tc_set_resource_reference(&p->resource, resource); 971 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], resource); 972 p->offset = offset; 973} 974 975struct tc_render_condition { 976 struct tc_call_base base; 977 bool condition; 978 unsigned mode; 979 struct pipe_query *query; 980}; 981 982static uint16_t 983tc_call_render_condition(struct pipe_context *pipe, void *call, uint64_t *last) 984{ 985 struct tc_render_condition *p = to_call(call, tc_render_condition); 986 pipe->render_condition(pipe, p->query, p->condition, p->mode); 987 return call_size(tc_render_condition); 988} 989 990static void 991tc_render_condition(struct pipe_context *_pipe, 992 struct pipe_query *query, bool condition, 993 enum pipe_render_cond_flag mode) 994{ 995 struct threaded_context *tc = threaded_context(_pipe); 996 struct tc_render_condition *p = 997 tc_add_call(tc, TC_CALL_render_condition, tc_render_condition); 998 999 p->query = query; 1000 p->condition = condition; 1001 p->mode = mode; 1002} 1003 1004 1005/******************************************************************** 1006 * constant (immutable) states 1007 */ 1008 1009#define TC_CSO_CREATE(name, sname) \ 1010 static void * \ 1011 tc_create_##name##_state(struct pipe_context *_pipe, \ 1012 const struct pipe_##sname##_state *state) \ 1013 { \ 1014 struct pipe_context *pipe = threaded_context(_pipe)->pipe; \ 1015 return pipe->create_##name##_state(pipe, state); \ 1016 } 1017 1018#define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__) 1019#define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , ) 1020 1021#define TC_CSO(name, sname, ...) \ 1022 TC_CSO_CREATE(name, sname) \ 1023 TC_CSO_BIND(name, ##__VA_ARGS__) \ 1024 TC_CSO_DELETE(name) 1025 1026#define TC_CSO_WHOLE(name) TC_CSO(name, name) 1027#define TC_CSO_SHADER(name) TC_CSO(name, shader) 1028#define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;) 1029 1030TC_CSO_WHOLE(blend) 1031TC_CSO_WHOLE(rasterizer) 1032TC_CSO_WHOLE(depth_stencil_alpha) 1033TC_CSO_WHOLE(compute) 1034TC_CSO_SHADER(fs) 1035TC_CSO_SHADER(vs) 1036TC_CSO_SHADER_TRACK(gs) 1037TC_CSO_SHADER_TRACK(tcs) 1038TC_CSO_SHADER_TRACK(tes) 1039TC_CSO_CREATE(sampler, sampler) 1040TC_CSO_DELETE(sampler) 1041TC_CSO_BIND(vertex_elements) 1042TC_CSO_DELETE(vertex_elements) 1043 1044static void * 1045tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count, 1046 const struct pipe_vertex_element *elems) 1047{ 1048 struct pipe_context *pipe = threaded_context(_pipe)->pipe; 1049 1050 return pipe->create_vertex_elements_state(pipe, count, elems); 1051} 1052 1053struct tc_sampler_states { 1054 struct tc_call_base base; 1055 ubyte shader, start, count; 1056 void *slot[0]; /* more will be allocated if needed */ 1057}; 1058 1059static uint16_t 1060tc_call_bind_sampler_states(struct pipe_context *pipe, void *call, uint64_t *last) 1061{ 1062 struct tc_sampler_states *p = (struct tc_sampler_states *)call; 1063 1064 pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot); 1065 return p->base.num_slots; 1066} 1067 1068static void 1069tc_bind_sampler_states(struct pipe_context *_pipe, 1070 enum pipe_shader_type shader, 1071 unsigned start, unsigned count, void **states) 1072{ 1073 if (!count) 1074 return; 1075 1076 struct threaded_context *tc = threaded_context(_pipe); 1077 struct tc_sampler_states *p = 1078 tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count); 1079 1080 p->shader = shader; 1081 p->start = start; 1082 p->count = count; 1083 memcpy(p->slot, states, count * sizeof(states[0])); 1084} 1085 1086static void 1087tc_link_shader(struct pipe_context *_pipe, void **shaders) 1088{ 1089 struct threaded_context *tc = threaded_context(_pipe); 1090 tc->pipe->link_shader(tc->pipe, shaders); 1091} 1092/******************************************************************** 1093 * immediate states 1094 */ 1095 1096struct tc_framebuffer { 1097 struct tc_call_base base; 1098 struct pipe_framebuffer_state state; 1099}; 1100 1101static uint16_t 1102tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *last) 1103{ 1104 struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state; 1105 1106 pipe->set_framebuffer_state(pipe, p); 1107 1108 unsigned nr_cbufs = p->nr_cbufs; 1109 for (unsigned i = 0; i < nr_cbufs; i++) 1110 tc_drop_surface_reference(p->cbufs[i]); 1111 tc_drop_surface_reference(p->zsbuf); 1112 return call_size(tc_framebuffer); 1113} 1114 1115static void 1116tc_set_framebuffer_state(struct pipe_context *_pipe, 1117 const struct pipe_framebuffer_state *fb) 1118{ 1119 struct threaded_context *tc = threaded_context(_pipe); 1120 struct tc_framebuffer *p = 1121 tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer); 1122 unsigned nr_cbufs = fb->nr_cbufs; 1123 1124 p->state.width = fb->width; 1125 p->state.height = fb->height; 1126 p->state.samples = fb->samples; 1127 p->state.layers = fb->layers; 1128 p->state.nr_cbufs = nr_cbufs; 1129 1130 for (unsigned i = 0; i < nr_cbufs; i++) { 1131 p->state.cbufs[i] = NULL; 1132 pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]); 1133 } 1134 p->state.zsbuf = NULL; 1135 pipe_surface_reference(&p->state.zsbuf, fb->zsbuf); 1136} 1137 1138struct tc_tess_state { 1139 struct tc_call_base base; 1140 float state[6]; 1141}; 1142 1143static uint16_t 1144tc_call_set_tess_state(struct pipe_context *pipe, void *call, uint64_t *last) 1145{ 1146 float *p = to_call(call, tc_tess_state)->state; 1147 1148 pipe->set_tess_state(pipe, p, p + 4); 1149 return call_size(tc_tess_state); 1150} 1151 1152static void 1153tc_set_tess_state(struct pipe_context *_pipe, 1154 const float default_outer_level[4], 1155 const float default_inner_level[2]) 1156{ 1157 struct threaded_context *tc = threaded_context(_pipe); 1158 float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state; 1159 1160 memcpy(p, default_outer_level, 4 * sizeof(float)); 1161 memcpy(p + 4, default_inner_level, 2 * sizeof(float)); 1162} 1163 1164struct tc_patch_vertices { 1165 struct tc_call_base base; 1166 ubyte patch_vertices; 1167}; 1168 1169static uint16_t 1170tc_call_set_patch_vertices(struct pipe_context *pipe, void *call, uint64_t *last) 1171{ 1172 uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices; 1173 1174 pipe->set_patch_vertices(pipe, patch_vertices); 1175 return call_size(tc_patch_vertices); 1176} 1177 1178static void 1179tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices) 1180{ 1181 struct threaded_context *tc = threaded_context(_pipe); 1182 1183 tc_add_call(tc, TC_CALL_set_patch_vertices, 1184 tc_patch_vertices)->patch_vertices = patch_vertices; 1185} 1186 1187struct tc_constant_buffer_base { 1188 struct tc_call_base base; 1189 ubyte shader, index; 1190 bool is_null; 1191}; 1192 1193struct tc_constant_buffer { 1194 struct tc_constant_buffer_base base; 1195 struct pipe_constant_buffer cb; 1196}; 1197 1198static uint16_t 1199tc_call_set_constant_buffer(struct pipe_context *pipe, void *call, uint64_t *last) 1200{ 1201 struct tc_constant_buffer *p = (struct tc_constant_buffer *)call; 1202 1203 if (unlikely(p->base.is_null)) { 1204 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL); 1205 return call_size(tc_constant_buffer_base); 1206 } 1207 1208 pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb); 1209 return call_size(tc_constant_buffer); 1210} 1211 1212static void 1213tc_set_constant_buffer(struct pipe_context *_pipe, 1214 enum pipe_shader_type shader, uint index, 1215 bool take_ownership, 1216 const struct pipe_constant_buffer *cb) 1217{ 1218 struct threaded_context *tc = threaded_context(_pipe); 1219 1220 if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) { 1221 struct tc_constant_buffer_base *p = 1222 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base); 1223 p->shader = shader; 1224 p->index = index; 1225 p->is_null = true; 1226 tc_unbind_buffer(&tc->const_buffers[shader][index]); 1227 return; 1228 } 1229 1230 struct pipe_resource *buffer; 1231 unsigned offset; 1232 1233 if (cb->user_buffer) { 1234 /* This must be done before adding set_constant_buffer, because it could 1235 * generate e.g. transfer_unmap and flush partially-uninitialized 1236 * set_constant_buffer to the driver if it was done afterwards. 1237 */ 1238 buffer = NULL; 1239 u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, 1240 tc->ubo_alignment, cb->user_buffer, &offset, &buffer); 1241 u_upload_unmap(tc->base.const_uploader); 1242 take_ownership = true; 1243 } else { 1244 buffer = cb->buffer; 1245 offset = cb->buffer_offset; 1246 } 1247 1248 struct tc_constant_buffer *p = 1249 tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer); 1250 p->base.shader = shader; 1251 p->base.index = index; 1252 p->base.is_null = false; 1253 p->cb.user_buffer = NULL; 1254 p->cb.buffer_offset = offset; 1255 p->cb.buffer_size = cb->buffer_size; 1256 1257 if (take_ownership) 1258 p->cb.buffer = buffer; 1259 else 1260 tc_set_resource_reference(&p->cb.buffer, buffer); 1261 1262 if (buffer) { 1263 tc_bind_buffer(tc, &tc->const_buffers[shader][index], 1264 &tc->buffer_lists[tc->next_buf_list], buffer); 1265 } else { 1266 tc_unbind_buffer(&tc->const_buffers[shader][index]); 1267 } 1268} 1269 1270struct tc_inlinable_constants { 1271 struct tc_call_base base; 1272 ubyte shader; 1273 ubyte num_values; 1274 uint32_t values[MAX_INLINABLE_UNIFORMS]; 1275}; 1276 1277static uint16_t 1278tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call, uint64_t *last) 1279{ 1280 struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants); 1281 1282 pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values); 1283 return call_size(tc_inlinable_constants); 1284} 1285 1286static void 1287tc_set_inlinable_constants(struct pipe_context *_pipe, 1288 enum pipe_shader_type shader, 1289 uint num_values, uint32_t *values) 1290{ 1291 struct threaded_context *tc = threaded_context(_pipe); 1292 struct tc_inlinable_constants *p = 1293 tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants); 1294 p->shader = shader; 1295 p->num_values = num_values; 1296 memcpy(p->values, values, num_values * 4); 1297} 1298 1299struct tc_sample_locations { 1300 struct tc_call_base base; 1301 uint16_t size; 1302 uint8_t slot[0]; 1303}; 1304 1305 1306static uint16_t 1307tc_call_set_sample_locations(struct pipe_context *pipe, void *call, uint64_t *last) 1308{ 1309 struct tc_sample_locations *p = (struct tc_sample_locations *)call; 1310 1311 pipe->set_sample_locations(pipe, p->size, p->slot); 1312 return p->base.num_slots; 1313} 1314 1315static void 1316tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations) 1317{ 1318 struct threaded_context *tc = threaded_context(_pipe); 1319 struct tc_sample_locations *p = 1320 tc_add_slot_based_call(tc, TC_CALL_set_sample_locations, 1321 tc_sample_locations, size); 1322 1323 p->size = size; 1324 memcpy(p->slot, locations, size); 1325} 1326 1327struct tc_scissors { 1328 struct tc_call_base base; 1329 ubyte start, count; 1330 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */ 1331}; 1332 1333static uint16_t 1334tc_call_set_scissor_states(struct pipe_context *pipe, void *call, uint64_t *last) 1335{ 1336 struct tc_scissors *p = (struct tc_scissors *)call; 1337 1338 pipe->set_scissor_states(pipe, p->start, p->count, p->slot); 1339 return p->base.num_slots; 1340} 1341 1342static void 1343tc_set_scissor_states(struct pipe_context *_pipe, 1344 unsigned start, unsigned count, 1345 const struct pipe_scissor_state *states) 1346{ 1347 struct threaded_context *tc = threaded_context(_pipe); 1348 struct tc_scissors *p = 1349 tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count); 1350 1351 p->start = start; 1352 p->count = count; 1353 memcpy(&p->slot, states, count * sizeof(states[0])); 1354} 1355 1356struct tc_viewports { 1357 struct tc_call_base base; 1358 ubyte start, count; 1359 struct pipe_viewport_state slot[0]; /* more will be allocated if needed */ 1360}; 1361 1362static uint16_t 1363tc_call_set_viewport_states(struct pipe_context *pipe, void *call, uint64_t *last) 1364{ 1365 struct tc_viewports *p = (struct tc_viewports *)call; 1366 1367 pipe->set_viewport_states(pipe, p->start, p->count, p->slot); 1368 return p->base.num_slots; 1369} 1370 1371static void 1372tc_set_viewport_states(struct pipe_context *_pipe, 1373 unsigned start, unsigned count, 1374 const struct pipe_viewport_state *states) 1375{ 1376 if (!count) 1377 return; 1378 1379 struct threaded_context *tc = threaded_context(_pipe); 1380 struct tc_viewports *p = 1381 tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count); 1382 1383 p->start = start; 1384 p->count = count; 1385 memcpy(&p->slot, states, count * sizeof(states[0])); 1386} 1387 1388struct tc_window_rects { 1389 struct tc_call_base base; 1390 bool include; 1391 ubyte count; 1392 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */ 1393}; 1394 1395static uint16_t 1396tc_call_set_window_rectangles(struct pipe_context *pipe, void *call, uint64_t *last) 1397{ 1398 struct tc_window_rects *p = (struct tc_window_rects *)call; 1399 1400 pipe->set_window_rectangles(pipe, p->include, p->count, p->slot); 1401 return p->base.num_slots; 1402} 1403 1404static void 1405tc_set_window_rectangles(struct pipe_context *_pipe, bool include, 1406 unsigned count, 1407 const struct pipe_scissor_state *rects) 1408{ 1409 struct threaded_context *tc = threaded_context(_pipe); 1410 struct tc_window_rects *p = 1411 tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count); 1412 1413 p->include = include; 1414 p->count = count; 1415 memcpy(p->slot, rects, count * sizeof(rects[0])); 1416} 1417 1418struct tc_sampler_views { 1419 struct tc_call_base base; 1420 ubyte shader, start, count, unbind_num_trailing_slots; 1421 struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */ 1422}; 1423 1424static uint16_t 1425tc_call_set_sampler_views(struct pipe_context *pipe, void *call, uint64_t *last) 1426{ 1427 struct tc_sampler_views *p = (struct tc_sampler_views *)call; 1428 1429 pipe->set_sampler_views(pipe, p->shader, p->start, p->count, 1430 p->unbind_num_trailing_slots, true, p->slot); 1431 return p->base.num_slots; 1432} 1433 1434static void 1435tc_set_sampler_views(struct pipe_context *_pipe, 1436 enum pipe_shader_type shader, 1437 unsigned start, unsigned count, 1438 unsigned unbind_num_trailing_slots, bool take_ownership, 1439 struct pipe_sampler_view **views) 1440{ 1441 if (!count && !unbind_num_trailing_slots) 1442 return; 1443 1444 struct threaded_context *tc = threaded_context(_pipe); 1445 struct tc_sampler_views *p = 1446 tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views, 1447 views ? count : 0); 1448 1449 p->shader = shader; 1450 p->start = start; 1451 1452 if (views) { 1453 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; 1454 1455 p->count = count; 1456 p->unbind_num_trailing_slots = unbind_num_trailing_slots; 1457 1458 if (take_ownership) { 1459 memcpy(p->slot, views, sizeof(*views) * count); 1460 1461 for (unsigned i = 0; i < count; i++) { 1462 if (views[i] && views[i]->target == PIPE_BUFFER) { 1463 tc_bind_buffer(tc, &tc->sampler_buffers[shader][start + i], next, 1464 views[i]->texture); 1465 } else { 1466 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]); 1467 } 1468 } 1469 } else { 1470 for (unsigned i = 0; i < count; i++) { 1471 p->slot[i] = NULL; 1472 pipe_sampler_view_reference(&p->slot[i], views[i]); 1473 1474 if (views[i] && views[i]->target == PIPE_BUFFER) { 1475 tc_bind_buffer(tc, &tc->sampler_buffers[shader][start + i], next, 1476 views[i]->texture); 1477 } else { 1478 tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]); 1479 } 1480 } 1481 } 1482 1483 tc_unbind_buffers(&tc->sampler_buffers[shader][start + count], 1484 unbind_num_trailing_slots); 1485 tc->seen_sampler_buffers[shader] = true; 1486 } else { 1487 p->count = 0; 1488 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots; 1489 1490 tc_unbind_buffers(&tc->sampler_buffers[shader][start], 1491 count + unbind_num_trailing_slots); 1492 } 1493} 1494 1495struct tc_shader_images { 1496 struct tc_call_base base; 1497 ubyte shader, start, count; 1498 ubyte unbind_num_trailing_slots; 1499 struct pipe_image_view slot[0]; /* more will be allocated if needed */ 1500}; 1501 1502static uint16_t 1503tc_call_set_shader_images(struct pipe_context *pipe, void *call, uint64_t *last) 1504{ 1505 struct tc_shader_images *p = (struct tc_shader_images *)call; 1506 unsigned count = p->count; 1507 1508 if (!p->count) { 1509 pipe->set_shader_images(pipe, p->shader, p->start, 0, 1510 p->unbind_num_trailing_slots, NULL); 1511 return call_size(tc_shader_images); 1512 } 1513 1514 pipe->set_shader_images(pipe, p->shader, p->start, p->count, 1515 p->unbind_num_trailing_slots, p->slot); 1516 1517 for (unsigned i = 0; i < count; i++) 1518 tc_drop_resource_reference(p->slot[i].resource); 1519 1520 return p->base.num_slots; 1521} 1522 1523static void 1524tc_set_shader_images(struct pipe_context *_pipe, 1525 enum pipe_shader_type shader, 1526 unsigned start, unsigned count, 1527 unsigned unbind_num_trailing_slots, 1528 const struct pipe_image_view *images) 1529{ 1530 if (!count && !unbind_num_trailing_slots) 1531 return; 1532 1533 struct threaded_context *tc = threaded_context(_pipe); 1534 struct tc_shader_images *p = 1535 tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images, 1536 images ? count : 0); 1537 unsigned writable_buffers = 0; 1538 1539 p->shader = shader; 1540 p->start = start; 1541 1542 if (images) { 1543 p->count = count; 1544 p->unbind_num_trailing_slots = unbind_num_trailing_slots; 1545 1546 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; 1547 1548 for (unsigned i = 0; i < count; i++) { 1549 struct pipe_resource *resource = images[i].resource; 1550 1551 tc_set_resource_reference(&p->slot[i].resource, resource); 1552 1553 if (resource && resource->target == PIPE_BUFFER) { 1554 tc_bind_buffer(tc, &tc->image_buffers[shader][start + i], next, resource); 1555 1556 if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) { 1557 struct threaded_resource *tres = threaded_resource(resource); 1558 1559 tc_buffer_disable_cpu_storage(resource); 1560 util_range_add(&tres->b, &tres->valid_buffer_range, 1561 images[i].u.buf.offset, 1562 images[i].u.buf.offset + images[i].u.buf.size); 1563 writable_buffers |= BITFIELD_BIT(start + i); 1564 } 1565 } else { 1566 tc_unbind_buffer(&tc->image_buffers[shader][start + i]); 1567 } 1568 } 1569 memcpy(p->slot, images, count * sizeof(images[0])); 1570 1571 tc_unbind_buffers(&tc->image_buffers[shader][start + count], 1572 unbind_num_trailing_slots); 1573 tc->seen_image_buffers[shader] = true; 1574 } else { 1575 p->count = 0; 1576 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots; 1577 1578 tc_unbind_buffers(&tc->image_buffers[shader][start], 1579 count + unbind_num_trailing_slots); 1580 } 1581 1582 tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count); 1583 tc->image_buffers_writeable_mask[shader] |= writable_buffers; 1584} 1585 1586struct tc_shader_buffers { 1587 struct tc_call_base base; 1588 ubyte shader, start, count; 1589 bool unbind; 1590 unsigned writable_bitmask; 1591 struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */ 1592}; 1593 1594static uint16_t 1595tc_call_set_shader_buffers(struct pipe_context *pipe, void *call, uint64_t *last) 1596{ 1597 struct tc_shader_buffers *p = (struct tc_shader_buffers *)call; 1598 unsigned count = p->count; 1599 1600 if (p->unbind) { 1601 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0); 1602 return call_size(tc_shader_buffers); 1603 } 1604 1605 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot, 1606 p->writable_bitmask); 1607 1608 for (unsigned i = 0; i < count; i++) 1609 tc_drop_resource_reference(p->slot[i].buffer); 1610 1611 return p->base.num_slots; 1612} 1613 1614static void 1615tc_set_shader_buffers(struct pipe_context *_pipe, 1616 enum pipe_shader_type shader, 1617 unsigned start, unsigned count, 1618 const struct pipe_shader_buffer *buffers, 1619 unsigned writable_bitmask) 1620{ 1621 if (!count) 1622 return; 1623 1624 struct threaded_context *tc = threaded_context(_pipe); 1625 struct tc_shader_buffers *p = 1626 tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers, 1627 buffers ? count : 0); 1628 1629 p->shader = shader; 1630 p->start = start; 1631 p->count = count; 1632 p->unbind = buffers == NULL; 1633 p->writable_bitmask = writable_bitmask; 1634 1635 if (buffers) { 1636 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; 1637 1638 for (unsigned i = 0; i < count; i++) { 1639 struct pipe_shader_buffer *dst = &p->slot[i]; 1640 const struct pipe_shader_buffer *src = buffers + i; 1641 1642 tc_set_resource_reference(&dst->buffer, src->buffer); 1643 dst->buffer_offset = src->buffer_offset; 1644 dst->buffer_size = src->buffer_size; 1645 1646 if (src->buffer) { 1647 struct threaded_resource *tres = threaded_resource(src->buffer); 1648 1649 tc_bind_buffer(tc, &tc->shader_buffers[shader][start + i], next, &tres->b); 1650 1651 if (writable_bitmask & BITFIELD_BIT(i)) { 1652 tc_buffer_disable_cpu_storage(src->buffer); 1653 util_range_add(&tres->b, &tres->valid_buffer_range, 1654 src->buffer_offset, 1655 src->buffer_offset + src->buffer_size); 1656 } 1657 } else { 1658 tc_unbind_buffer(&tc->shader_buffers[shader][start + i]); 1659 } 1660 } 1661 tc->seen_shader_buffers[shader] = true; 1662 } else { 1663 tc_unbind_buffers(&tc->shader_buffers[shader][start], count); 1664 } 1665 1666 tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count); 1667 tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start; 1668} 1669 1670struct tc_vertex_buffers { 1671 struct tc_call_base base; 1672 ubyte start, count; 1673 ubyte unbind_num_trailing_slots; 1674 struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */ 1675}; 1676 1677static uint16_t 1678tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call, uint64_t *last) 1679{ 1680 struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call; 1681 unsigned count = p->count; 1682 1683 if (!count) { 1684 pipe->set_vertex_buffers(pipe, p->start, 0, 1685 p->unbind_num_trailing_slots, false, NULL); 1686 return call_size(tc_vertex_buffers); 1687 } 1688 1689 for (unsigned i = 0; i < count; i++) 1690 tc_assert(!p->slot[i].is_user_buffer); 1691 1692 pipe->set_vertex_buffers(pipe, p->start, count, 1693 p->unbind_num_trailing_slots, true, p->slot); 1694 return p->base.num_slots; 1695} 1696 1697static void 1698tc_set_vertex_buffers(struct pipe_context *_pipe, 1699 unsigned start, unsigned count, 1700 unsigned unbind_num_trailing_slots, 1701 bool take_ownership, 1702 const struct pipe_vertex_buffer *buffers) 1703{ 1704 struct threaded_context *tc = threaded_context(_pipe); 1705 1706 if (!count && !unbind_num_trailing_slots) 1707 return; 1708 1709 if (count && buffers) { 1710 struct tc_vertex_buffers *p = 1711 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count); 1712 p->start = start; 1713 p->count = count; 1714 p->unbind_num_trailing_slots = unbind_num_trailing_slots; 1715 1716 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; 1717 1718 if (take_ownership) { 1719 memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer)); 1720 1721 for (unsigned i = 0; i < count; i++) { 1722 struct pipe_resource *buf = buffers[i].buffer.resource; 1723 1724 if (buf) { 1725 tc_bind_buffer(tc, &tc->vertex_buffers[start + i], next, buf); 1726 } else { 1727 tc_unbind_buffer(&tc->vertex_buffers[start + i]); 1728 } 1729 } 1730 } else { 1731 for (unsigned i = 0; i < count; i++) { 1732 struct pipe_vertex_buffer *dst = &p->slot[i]; 1733 const struct pipe_vertex_buffer *src = buffers + i; 1734 struct pipe_resource *buf = src->buffer.resource; 1735 1736 tc_assert(!src->is_user_buffer); 1737 dst->stride = src->stride; 1738 dst->is_user_buffer = false; 1739 tc_set_resource_reference(&dst->buffer.resource, buf); 1740 dst->buffer_offset = src->buffer_offset; 1741 1742 if (buf) { 1743 tc_bind_buffer(tc, &tc->vertex_buffers[start + i], next, buf); 1744 } else { 1745 tc_unbind_buffer(&tc->vertex_buffers[start + i]); 1746 } 1747 } 1748 } 1749 1750 tc_unbind_buffers(&tc->vertex_buffers[start + count], 1751 unbind_num_trailing_slots); 1752 } else { 1753 struct tc_vertex_buffers *p = 1754 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0); 1755 p->start = start; 1756 p->count = 0; 1757 p->unbind_num_trailing_slots = count + unbind_num_trailing_slots; 1758 1759 tc_unbind_buffers(&tc->vertex_buffers[start], 1760 count + unbind_num_trailing_slots); 1761 } 1762} 1763 1764struct tc_stream_outputs { 1765 struct tc_call_base base; 1766 unsigned count; 1767 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; 1768 unsigned offsets[PIPE_MAX_SO_BUFFERS]; 1769}; 1770 1771static uint16_t 1772tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call, uint64_t *last) 1773{ 1774 struct tc_stream_outputs *p = to_call(call, tc_stream_outputs); 1775 unsigned count = p->count; 1776 1777 pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets); 1778 for (unsigned i = 0; i < count; i++) 1779 tc_drop_so_target_reference(p->targets[i]); 1780 1781 return call_size(tc_stream_outputs); 1782} 1783 1784static void 1785tc_set_stream_output_targets(struct pipe_context *_pipe, 1786 unsigned count, 1787 struct pipe_stream_output_target **tgs, 1788 const unsigned *offsets) 1789{ 1790 struct threaded_context *tc = threaded_context(_pipe); 1791 struct tc_stream_outputs *p = 1792 tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs); 1793 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; 1794 1795 for (unsigned i = 0; i < count; i++) { 1796 p->targets[i] = NULL; 1797 pipe_so_target_reference(&p->targets[i], tgs[i]); 1798 if (tgs[i]) { 1799 tc_buffer_disable_cpu_storage(tgs[i]->buffer); 1800 tc_bind_buffer(tc, &tc->streamout_buffers[i], next, tgs[i]->buffer); 1801 } else { 1802 tc_unbind_buffer(&tc->streamout_buffers[i]); 1803 } 1804 } 1805 p->count = count; 1806 memcpy(p->offsets, offsets, count * sizeof(unsigned)); 1807 1808 tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count); 1809 if (count) 1810 tc->seen_streamout_buffers = true; 1811} 1812 1813static void 1814tc_set_compute_resources(struct pipe_context *_pipe, unsigned start, 1815 unsigned count, struct pipe_surface **resources) 1816{ 1817 struct threaded_context *tc = threaded_context(_pipe); 1818 struct pipe_context *pipe = tc->pipe; 1819 1820 tc_sync(tc); 1821 pipe->set_compute_resources(pipe, start, count, resources); 1822} 1823 1824static void 1825tc_set_global_binding(struct pipe_context *_pipe, unsigned first, 1826 unsigned count, struct pipe_resource **resources, 1827 uint32_t **handles) 1828{ 1829 struct threaded_context *tc = threaded_context(_pipe); 1830 struct pipe_context *pipe = tc->pipe; 1831 1832 tc_sync(tc); 1833 pipe->set_global_binding(pipe, first, count, resources, handles); 1834} 1835 1836 1837/******************************************************************** 1838 * views 1839 */ 1840 1841static struct pipe_surface * 1842tc_create_surface(struct pipe_context *_pipe, 1843 struct pipe_resource *resource, 1844 const struct pipe_surface *surf_tmpl) 1845{ 1846 struct pipe_context *pipe = threaded_context(_pipe)->pipe; 1847 struct pipe_surface *view = 1848 pipe->create_surface(pipe, resource, surf_tmpl); 1849 1850 if (view) 1851 view->context = _pipe; 1852 return view; 1853} 1854 1855static void 1856tc_surface_destroy(struct pipe_context *_pipe, 1857 struct pipe_surface *surf) 1858{ 1859 struct pipe_context *pipe = threaded_context(_pipe)->pipe; 1860 1861 pipe->surface_destroy(pipe, surf); 1862} 1863 1864static struct pipe_sampler_view * 1865tc_create_sampler_view(struct pipe_context *_pipe, 1866 struct pipe_resource *resource, 1867 const struct pipe_sampler_view *templ) 1868{ 1869 struct pipe_context *pipe = threaded_context(_pipe)->pipe; 1870 struct pipe_sampler_view *view = 1871 pipe->create_sampler_view(pipe, resource, templ); 1872 1873 if (view) 1874 view->context = _pipe; 1875 return view; 1876} 1877 1878static void 1879tc_sampler_view_destroy(struct pipe_context *_pipe, 1880 struct pipe_sampler_view *view) 1881{ 1882 struct pipe_context *pipe = threaded_context(_pipe)->pipe; 1883 1884 pipe->sampler_view_destroy(pipe, view); 1885} 1886 1887static struct pipe_stream_output_target * 1888tc_create_stream_output_target(struct pipe_context *_pipe, 1889 struct pipe_resource *res, 1890 unsigned buffer_offset, 1891 unsigned buffer_size) 1892{ 1893 struct pipe_context *pipe = threaded_context(_pipe)->pipe; 1894 struct threaded_resource *tres = threaded_resource(res); 1895 struct pipe_stream_output_target *view; 1896 1897 util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset, 1898 buffer_offset + buffer_size); 1899 1900 view = pipe->create_stream_output_target(pipe, res, buffer_offset, 1901 buffer_size); 1902 if (view) 1903 view->context = _pipe; 1904 return view; 1905} 1906 1907static void 1908tc_stream_output_target_destroy(struct pipe_context *_pipe, 1909 struct pipe_stream_output_target *target) 1910{ 1911 struct pipe_context *pipe = threaded_context(_pipe)->pipe; 1912 1913 pipe->stream_output_target_destroy(pipe, target); 1914} 1915 1916 1917/******************************************************************** 1918 * bindless 1919 */ 1920 1921static uint64_t 1922tc_create_texture_handle(struct pipe_context *_pipe, 1923 struct pipe_sampler_view *view, 1924 const struct pipe_sampler_state *state) 1925{ 1926 struct threaded_context *tc = threaded_context(_pipe); 1927 struct pipe_context *pipe = tc->pipe; 1928 1929 tc_sync(tc); 1930 return pipe->create_texture_handle(pipe, view, state); 1931} 1932 1933struct tc_make_texture_handle_resident { 1934 struct tc_call_base base; 1935 bool resident; 1936 uint64_t handle; 1937}; 1938 1939static uint16_t 1940tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last) 1941{ 1942 struct tc_make_texture_handle_resident *p = 1943 to_call(call, tc_make_texture_handle_resident); 1944 1945 pipe->make_texture_handle_resident(pipe, p->handle, p->resident); 1946 return call_size(tc_make_texture_handle_resident); 1947} 1948 1949static void 1950tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle, 1951 bool resident) 1952{ 1953 struct threaded_context *tc = threaded_context(_pipe); 1954 struct tc_make_texture_handle_resident *p = 1955 tc_add_call(tc, TC_CALL_make_texture_handle_resident, 1956 tc_make_texture_handle_resident); 1957 1958 p->handle = handle; 1959 p->resident = resident; 1960} 1961 1962static uint64_t 1963tc_create_image_handle(struct pipe_context *_pipe, 1964 const struct pipe_image_view *image) 1965{ 1966 struct threaded_context *tc = threaded_context(_pipe); 1967 struct pipe_context *pipe = tc->pipe; 1968 1969 if (image->resource->target == PIPE_BUFFER) 1970 tc_buffer_disable_cpu_storage(image->resource); 1971 1972 tc_sync(tc); 1973 return pipe->create_image_handle(pipe, image); 1974} 1975 1976struct tc_make_image_handle_resident { 1977 struct tc_call_base base; 1978 bool resident; 1979 unsigned access; 1980 uint64_t handle; 1981}; 1982 1983static uint16_t 1984tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last) 1985{ 1986 struct tc_make_image_handle_resident *p = 1987 to_call(call, tc_make_image_handle_resident); 1988 1989 pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident); 1990 return call_size(tc_make_image_handle_resident); 1991} 1992 1993static void 1994tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle, 1995 unsigned access, bool resident) 1996{ 1997 struct threaded_context *tc = threaded_context(_pipe); 1998 struct tc_make_image_handle_resident *p = 1999 tc_add_call(tc, TC_CALL_make_image_handle_resident, 2000 tc_make_image_handle_resident); 2001 2002 p->handle = handle; 2003 p->access = access; 2004 p->resident = resident; 2005} 2006 2007 2008/******************************************************************** 2009 * transfer 2010 */ 2011 2012struct tc_replace_buffer_storage { 2013 struct tc_call_base base; 2014 uint16_t num_rebinds; 2015 uint32_t rebind_mask; 2016 uint32_t delete_buffer_id; 2017 struct pipe_resource *dst; 2018 struct pipe_resource *src; 2019 tc_replace_buffer_storage_func func; 2020}; 2021 2022static uint16_t 2023tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *last) 2024{ 2025 struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage); 2026 2027 p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id); 2028 2029 tc_drop_resource_reference(p->dst); 2030 tc_drop_resource_reference(p->src); 2031 return call_size(tc_replace_buffer_storage); 2032} 2033 2034/* Return true if the buffer has been invalidated or is idle. 2035 * Note that callers must've called tc_touch_buffer before calling 2036 * this function. */ 2037static bool 2038tc_invalidate_buffer(struct threaded_context *tc, 2039 struct threaded_resource *tbuf) 2040{ 2041 if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) { 2042 /* It's idle, so invalidation would be a no-op, but we can still clear 2043 * the valid range because we are technically doing invalidation, but 2044 * skipping it because it's useless. 2045 * 2046 * If the buffer is bound for write, we can't invalidate the range. 2047 */ 2048 if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique)) 2049 util_range_set_empty(&tbuf->valid_buffer_range); 2050 return true; 2051 } 2052 2053 struct pipe_screen *screen = tc->base.screen; 2054 struct pipe_resource *new_buf; 2055 2056 /* Shared, pinned, and sparse buffers can't be reallocated. */ 2057 if (tc_is_buffer_shared(tbuf) || 2058 tbuf->is_user_ptr || 2059 tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) 2060 return false; 2061 2062 /* Allocate a new one. */ 2063 new_buf = screen->resource_create(screen, &tbuf->b); 2064 if (!new_buf) 2065 return false; 2066 2067 /* Replace the "latest" pointer. */ 2068 if (tbuf->latest != &tbuf->b) 2069 pipe_resource_reference(&tbuf->latest, NULL); 2070 2071 tbuf->latest = new_buf; 2072 2073 uint32_t delete_buffer_id = tbuf->buffer_id_unique; 2074 2075 /* Enqueue storage replacement of the original buffer. */ 2076 struct tc_replace_buffer_storage *p = 2077 tc_add_call(tc, TC_CALL_replace_buffer_storage, 2078 tc_replace_buffer_storage); 2079 2080 p->func = tc->replace_buffer_storage; 2081 tc_set_resource_reference(&p->dst, &tbuf->b); 2082 tc_set_resource_reference(&p->src, new_buf); 2083 p->delete_buffer_id = delete_buffer_id; 2084 p->rebind_mask = 0; 2085 2086 /* Treat the current buffer as the new buffer. */ 2087 bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique); 2088 p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique, 2089 threaded_resource(new_buf)->buffer_id_unique, 2090 &p->rebind_mask); 2091 2092 /* If the buffer is not bound for write, clear the valid range. */ 2093 if (!bound_for_write) 2094 util_range_set_empty(&tbuf->valid_buffer_range); 2095 2096 tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique; 2097 threaded_resource(new_buf)->buffer_id_unique = 0; 2098 2099 return true; 2100} 2101 2102/* Note that callers must've called tc_touch_buffer first before 2103 * calling tc_improve_map_buffer_flags. */ 2104static unsigned 2105tc_improve_map_buffer_flags(struct threaded_context *tc, 2106 struct threaded_resource *tres, unsigned usage, 2107 unsigned offset, unsigned size) 2108{ 2109 /* Never invalidate inside the driver and never infer "unsynchronized". */ 2110 unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE | 2111 TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED; 2112 2113 /* Prevent a reentry. */ 2114 if (usage & tc_flags) 2115 return usage; 2116 2117 /* Use the staging upload if it's preferred. */ 2118 if (usage & (PIPE_MAP_DISCARD_RANGE | 2119 PIPE_MAP_DISCARD_WHOLE_RESOURCE) && 2120 !(usage & PIPE_MAP_PERSISTENT) && 2121 tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY && 2122 tc->use_forced_staging_uploads) { 2123 usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE | 2124 PIPE_MAP_UNSYNCHRONIZED); 2125 2126 return usage | tc_flags | PIPE_MAP_DISCARD_RANGE; 2127 } 2128 2129 /* Sparse buffers can't be mapped directly and can't be reallocated 2130 * (fully invalidated). That may just be a radeonsi limitation, but 2131 * the threaded context must obey it with radeonsi. 2132 */ 2133 if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) { 2134 /* We can use DISCARD_RANGE instead of full discard. This is the only 2135 * fast path for sparse buffers that doesn't need thread synchronization. 2136 */ 2137 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) 2138 usage |= PIPE_MAP_DISCARD_RANGE; 2139 2140 /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers. 2141 * The threaded context doesn't do unsychronized mappings and invalida- 2142 * tions of sparse buffers, therefore a correct driver behavior won't 2143 * result in an incorrect behavior with the threaded context. 2144 */ 2145 return usage; 2146 } 2147 2148 usage |= tc_flags; 2149 2150 /* Handle CPU reads trivially. */ 2151 if (usage & PIPE_MAP_READ) { 2152 if (usage & PIPE_MAP_UNSYNCHRONIZED) 2153 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */ 2154 2155 /* Drivers aren't allowed to do buffer invalidations. */ 2156 return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE; 2157 } 2158 2159 /* See if the buffer range being mapped has never been initialized or 2160 * the buffer is idle, in which case it can be mapped unsynchronized. */ 2161 if (!(usage & PIPE_MAP_UNSYNCHRONIZED) && 2162 ((!tres->is_shared && 2163 !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) || 2164 !tc_is_buffer_busy(tc, tres, usage))) 2165 usage |= PIPE_MAP_UNSYNCHRONIZED; 2166 2167 if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) { 2168 /* If discarding the entire range, discard the whole resource instead. */ 2169 if (usage & PIPE_MAP_DISCARD_RANGE && 2170 offset == 0 && size == tres->b.width0) 2171 usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE; 2172 2173 /* Discard the whole resource if needed. */ 2174 if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) { 2175 if (tc_invalidate_buffer(tc, tres)) 2176 usage |= PIPE_MAP_UNSYNCHRONIZED; 2177 else 2178 usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */ 2179 } 2180 } 2181 2182 /* We won't need this flag anymore. */ 2183 /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */ 2184 usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE; 2185 2186 /* GL_AMD_pinned_memory and persistent mappings can't use staging 2187 * buffers. */ 2188 if (usage & (PIPE_MAP_UNSYNCHRONIZED | 2189 PIPE_MAP_PERSISTENT) || 2190 tres->is_user_ptr) 2191 usage &= ~PIPE_MAP_DISCARD_RANGE; 2192 2193 /* Unsychronized buffer mappings don't have to synchronize the thread. */ 2194 if (usage & PIPE_MAP_UNSYNCHRONIZED) { 2195 usage &= ~PIPE_MAP_DISCARD_RANGE; 2196 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */ 2197 } 2198 2199 return usage; 2200} 2201 2202static void * 2203tc_buffer_map(struct pipe_context *_pipe, 2204 struct pipe_resource *resource, unsigned level, 2205 unsigned usage, const struct pipe_box *box, 2206 struct pipe_transfer **transfer) 2207{ 2208 struct threaded_context *tc = threaded_context(_pipe); 2209 struct threaded_resource *tres = threaded_resource(resource); 2210 struct pipe_context *pipe = tc->pipe; 2211 2212 /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and 2213 * this shouldn't normally be necessary because glthread only uses large buffers. 2214 */ 2215 if (usage & PIPE_MAP_THREAD_SAFE) 2216 tc_buffer_disable_cpu_storage(resource); 2217 2218 tc_touch_buffer(tc, tres); 2219 2220 /* CPU storage relies on buffer invalidation never failing. With shared buffers, 2221 * invalidation might not always be possible, so CPU storage can't be used. 2222 */ 2223 if (tc_is_buffer_shared(tres)) 2224 tc_buffer_disable_cpu_storage(resource); 2225 2226 usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width); 2227 2228 /* If the CPU storage is enabled, return it directly. */ 2229 if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) { 2230 /* We can't let resource_copy_region disable the CPU storage. */ 2231 assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY)); 2232 2233 if (!tres->cpu_storage) { 2234 tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment); 2235 2236 if (tres->cpu_storage && tres->valid_buffer_range.end) { 2237 /* The GPU buffer contains valid data. Copy them to the CPU storage. */ 2238 struct pipe_box box2; 2239 struct pipe_transfer *transfer2; 2240 2241 unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start; 2242 u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2); 2243 2244 tc_sync_msg(tc, "cpu storage GPU -> CPU copy"); 2245 tc_set_driver_thread(tc); 2246 2247 void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource, 2248 0, PIPE_MAP_READ, &box2, &transfer2); 2249 memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start], 2250 ret, 2251 valid_range_len); 2252 pipe->buffer_unmap(pipe, transfer2); 2253 2254 tc_clear_driver_thread(tc); 2255 } 2256 } 2257 2258 if (tres->cpu_storage) { 2259 struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers); 2260 ttrans->b.resource = resource; 2261 ttrans->b.usage = usage; 2262 ttrans->b.box = *box; 2263 ttrans->valid_buffer_range = &tres->valid_buffer_range; 2264 ttrans->cpu_storage_mapped = true; 2265 *transfer = &ttrans->b; 2266 2267 return (uint8_t*)tres->cpu_storage + box->x; 2268 } else { 2269 tres->allow_cpu_storage = false; 2270 } 2271 } 2272 2273 /* Do a staging transfer within the threaded context. The driver should 2274 * only get resource_copy_region. 2275 */ 2276 if (usage & PIPE_MAP_DISCARD_RANGE) { 2277 struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers); 2278 uint8_t *map; 2279 2280 u_upload_alloc(tc->base.stream_uploader, 0, 2281 box->width + (box->x % tc->map_buffer_alignment), 2282 tc->map_buffer_alignment, &ttrans->b.offset, 2283 &ttrans->staging, (void**)&map); 2284 if (!map) { 2285 slab_free(&tc->pool_transfers, ttrans); 2286 return NULL; 2287 } 2288 2289 ttrans->b.resource = resource; 2290 ttrans->b.level = 0; 2291 ttrans->b.usage = usage; 2292 ttrans->b.box = *box; 2293 ttrans->b.stride = 0; 2294 ttrans->b.layer_stride = 0; 2295 ttrans->valid_buffer_range = &tres->valid_buffer_range; 2296 ttrans->cpu_storage_mapped = false; 2297 *transfer = &ttrans->b; 2298 2299 p_atomic_inc(&tres->pending_staging_uploads); 2300 util_range_add(resource, &tres->pending_staging_uploads_range, 2301 box->x, box->x + box->width); 2302 2303 return map + (box->x % tc->map_buffer_alignment); 2304 } 2305 2306 if (usage & PIPE_MAP_UNSYNCHRONIZED && 2307 p_atomic_read(&tres->pending_staging_uploads) && 2308 util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) { 2309 /* Write conflict detected between a staging transfer and the direct mapping we're 2310 * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping 2311 * will have to wait for the staging transfer completion. 2312 * Note: The conflict detection is only based on the mapped range, not on the actual 2313 * written range(s). 2314 */ 2315 usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC; 2316 tc->use_forced_staging_uploads = false; 2317 } 2318 2319 /* Unsychronized buffer mappings don't have to synchronize the thread. */ 2320 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) { 2321 tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? " discard_range" : 2322 usage & PIPE_MAP_READ ? " read" : " staging conflict"); 2323 tc_set_driver_thread(tc); 2324 } 2325 2326 tc->bytes_mapped_estimate += box->width; 2327 2328 void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource, 2329 level, usage, box, transfer); 2330 threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range; 2331 threaded_transfer(*transfer)->cpu_storage_mapped = false; 2332 2333 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) 2334 tc_clear_driver_thread(tc); 2335 2336 return ret; 2337} 2338 2339static void * 2340tc_texture_map(struct pipe_context *_pipe, 2341 struct pipe_resource *resource, unsigned level, 2342 unsigned usage, const struct pipe_box *box, 2343 struct pipe_transfer **transfer) 2344{ 2345 struct threaded_context *tc = threaded_context(_pipe); 2346 struct threaded_resource *tres = threaded_resource(resource); 2347 struct pipe_context *pipe = tc->pipe; 2348 2349 tc_sync_msg(tc, "texture"); 2350 tc_set_driver_thread(tc); 2351 2352 tc->bytes_mapped_estimate += box->width; 2353 2354 void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource, 2355 level, usage, box, transfer); 2356 2357 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) 2358 tc_clear_driver_thread(tc); 2359 2360 return ret; 2361} 2362 2363struct tc_transfer_flush_region { 2364 struct tc_call_base base; 2365 struct pipe_box box; 2366 struct pipe_transfer *transfer; 2367}; 2368 2369static uint16_t 2370tc_call_transfer_flush_region(struct pipe_context *pipe, void *call, uint64_t *last) 2371{ 2372 struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region); 2373 2374 pipe->transfer_flush_region(pipe, p->transfer, &p->box); 2375 return call_size(tc_transfer_flush_region); 2376} 2377 2378struct tc_resource_copy_region { 2379 struct tc_call_base base; 2380 unsigned dst_level; 2381 unsigned dstx, dsty, dstz; 2382 unsigned src_level; 2383 struct pipe_box src_box; 2384 struct pipe_resource *dst; 2385 struct pipe_resource *src; 2386}; 2387 2388static void 2389tc_resource_copy_region(struct pipe_context *_pipe, 2390 struct pipe_resource *dst, unsigned dst_level, 2391 unsigned dstx, unsigned dsty, unsigned dstz, 2392 struct pipe_resource *src, unsigned src_level, 2393 const struct pipe_box *src_box); 2394 2395static void 2396tc_buffer_do_flush_region(struct threaded_context *tc, 2397 struct threaded_transfer *ttrans, 2398 const struct pipe_box *box) 2399{ 2400 struct threaded_resource *tres = threaded_resource(ttrans->b.resource); 2401 2402 if (ttrans->staging) { 2403 struct pipe_box src_box; 2404 2405 u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment + 2406 (box->x - ttrans->b.box.x), 2407 box->width, &src_box); 2408 2409 /* Copy the staging buffer into the original one. */ 2410 tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0, 2411 ttrans->staging, 0, &src_box); 2412 } 2413 2414 /* Don't update the valid range when we're uploading the CPU storage 2415 * because it includes the uninitialized range too. 2416 */ 2417 if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) { 2418 util_range_add(&tres->b, ttrans->valid_buffer_range, 2419 box->x, box->x + box->width); 2420 } 2421} 2422 2423static void 2424tc_transfer_flush_region(struct pipe_context *_pipe, 2425 struct pipe_transfer *transfer, 2426 const struct pipe_box *rel_box) 2427{ 2428 struct threaded_context *tc = threaded_context(_pipe); 2429 struct threaded_transfer *ttrans = threaded_transfer(transfer); 2430 struct threaded_resource *tres = threaded_resource(transfer->resource); 2431 unsigned required_usage = PIPE_MAP_WRITE | 2432 PIPE_MAP_FLUSH_EXPLICIT; 2433 2434 if (tres->b.target == PIPE_BUFFER) { 2435 if ((transfer->usage & required_usage) == required_usage) { 2436 struct pipe_box box; 2437 2438 u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box); 2439 tc_buffer_do_flush_region(tc, ttrans, &box); 2440 } 2441 2442 /* Staging transfers don't send the call to the driver. 2443 * 2444 * Transfers using the CPU storage shouldn't call transfer_flush_region 2445 * in the driver because the buffer is not really mapped on the driver 2446 * side and the CPU storage always re-uploads everything (flush_region 2447 * makes no difference). 2448 */ 2449 if (ttrans->staging || ttrans->cpu_storage_mapped) 2450 return; 2451 } 2452 2453 struct tc_transfer_flush_region *p = 2454 tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region); 2455 p->transfer = transfer; 2456 p->box = *rel_box; 2457} 2458 2459static void 2460tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, 2461 unsigned flags); 2462 2463struct tc_buffer_unmap { 2464 struct tc_call_base base; 2465 bool was_staging_transfer; 2466 union { 2467 struct pipe_transfer *transfer; 2468 struct pipe_resource *resource; 2469 }; 2470}; 2471 2472static uint16_t 2473tc_call_buffer_unmap(struct pipe_context *pipe, void *call, uint64_t *last) 2474{ 2475 struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap); 2476 2477 if (p->was_staging_transfer) { 2478 struct threaded_resource *tres = threaded_resource(p->resource); 2479 /* Nothing to do except keeping track of staging uploads */ 2480 assert(tres->pending_staging_uploads > 0); 2481 p_atomic_dec(&tres->pending_staging_uploads); 2482 tc_drop_resource_reference(p->resource); 2483 } else { 2484 pipe->buffer_unmap(pipe, p->transfer); 2485 } 2486 2487 return call_size(tc_buffer_unmap); 2488} 2489 2490static void 2491tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) 2492{ 2493 struct threaded_context *tc = threaded_context(_pipe); 2494 struct threaded_transfer *ttrans = threaded_transfer(transfer); 2495 struct threaded_resource *tres = threaded_resource(transfer->resource); 2496 2497 /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be 2498 * called from any thread and bypasses all multithreaded queues. 2499 */ 2500 if (transfer->usage & PIPE_MAP_THREAD_SAFE) { 2501 assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED); 2502 assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT | 2503 PIPE_MAP_DISCARD_RANGE))); 2504 2505 struct pipe_context *pipe = tc->pipe; 2506 util_range_add(&tres->b, ttrans->valid_buffer_range, 2507 transfer->box.x, transfer->box.x + transfer->box.width); 2508 2509 pipe->buffer_unmap(pipe, transfer); 2510 return; 2511 } 2512 2513 if (transfer->usage & PIPE_MAP_WRITE && 2514 !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT)) 2515 tc_buffer_do_flush_region(tc, ttrans, &transfer->box); 2516 2517 if (ttrans->cpu_storage_mapped) { 2518 /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't 2519 * touch the mapped range. That's a problem because GPU stores free the CPU storage. 2520 * If that happens, we just ignore the unmap call and don't upload anything to prevent 2521 * a crash. 2522 * 2523 * Disallow the CPU storage in the driver to work around this. 2524 */ 2525 assert(tres->cpu_storage); 2526 2527 if (tres->cpu_storage) { 2528 /* Invalidations shouldn't fail as long as CPU storage is allowed. */ 2529 ASSERTED bool invalidated = tc_invalidate_buffer(tc, tres); 2530 assert(invalidated); 2531 2532 tc_buffer_subdata(&tc->base, &tres->b, 2533 PIPE_MAP_UNSYNCHRONIZED | 2534 TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE, 2535 0, tres->b.width0, tres->cpu_storage); 2536 /* This shouldn't have been freed by buffer_subdata. */ 2537 assert(tres->cpu_storage); 2538 } else { 2539 static bool warned_once = false; 2540 if (!warned_once) { 2541 fprintf(stderr, "This application is incompatible with cpu_storage.\n"); 2542 fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n"); 2543 warned_once = true; 2544 } 2545 } 2546 2547 tc_drop_resource_reference(ttrans->staging); 2548 slab_free(&tc->pool_transfers, ttrans); 2549 return; 2550 } 2551 2552 bool was_staging_transfer = false; 2553 2554 if (ttrans->staging) { 2555 was_staging_transfer = true; 2556 2557 tc_drop_resource_reference(ttrans->staging); 2558 slab_free(&tc->pool_transfers, ttrans); 2559 } 2560 2561 struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap, 2562 tc_buffer_unmap); 2563 if (was_staging_transfer) { 2564 tc_set_resource_reference(&p->resource, &tres->b); 2565 p->was_staging_transfer = true; 2566 } else { 2567 p->transfer = transfer; 2568 p->was_staging_transfer = false; 2569 } 2570 2571 /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap 2572 * defers the unmap operation to the batch execution. 2573 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta 2574 * and if it goes over an optional limit the current batch is flushed, 2575 * to reclaim some RAM. */ 2576 if (!ttrans->staging && tc->bytes_mapped_limit && 2577 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) { 2578 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC); 2579 } 2580} 2581 2582struct tc_texture_unmap { 2583 struct tc_call_base base; 2584 struct pipe_transfer *transfer; 2585}; 2586 2587static uint16_t 2588tc_call_texture_unmap(struct pipe_context *pipe, void *call, uint64_t *last) 2589{ 2590 struct tc_texture_unmap *p = (struct tc_texture_unmap *) call; 2591 2592 pipe->texture_unmap(pipe, p->transfer); 2593 return call_size(tc_texture_unmap); 2594} 2595 2596static void 2597tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer) 2598{ 2599 struct threaded_context *tc = threaded_context(_pipe); 2600 struct threaded_transfer *ttrans = threaded_transfer(transfer); 2601 2602 tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer; 2603 2604 /* tc_texture_map directly maps the textures, but tc_texture_unmap 2605 * defers the unmap operation to the batch execution. 2606 * bytes_mapped_estimate is an estimation of the map/unmap bytes delta 2607 * and if it goes over an optional limit the current batch is flushed, 2608 * to reclaim some RAM. */ 2609 if (!ttrans->staging && tc->bytes_mapped_limit && 2610 tc->bytes_mapped_estimate > tc->bytes_mapped_limit) { 2611 tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC); 2612 } 2613} 2614 2615struct tc_buffer_subdata { 2616 struct tc_call_base base; 2617 unsigned usage, offset, size; 2618 struct pipe_resource *resource; 2619 char slot[0]; /* more will be allocated if needed */ 2620}; 2621 2622static uint16_t 2623tc_call_buffer_subdata(struct pipe_context *pipe, void *call, uint64_t *last) 2624{ 2625 struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call; 2626 2627 pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size, 2628 p->slot); 2629 tc_drop_resource_reference(p->resource); 2630 return p->base.num_slots; 2631} 2632 2633static void 2634tc_buffer_subdata(struct pipe_context *_pipe, 2635 struct pipe_resource *resource, 2636 unsigned usage, unsigned offset, 2637 unsigned size, const void *data) 2638{ 2639 struct threaded_context *tc = threaded_context(_pipe); 2640 struct threaded_resource *tres = threaded_resource(resource); 2641 2642 if (!size) 2643 return; 2644 2645 tc_touch_buffer(tc, tres); 2646 2647 usage |= PIPE_MAP_WRITE; 2648 2649 /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */ 2650 if (!(usage & PIPE_MAP_DIRECTLY)) 2651 usage |= PIPE_MAP_DISCARD_RANGE; 2652 2653 usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size); 2654 2655 /* Unsychronized and big transfers should use transfer_map. Also handle 2656 * full invalidations, because drivers aren't allowed to do them. 2657 */ 2658 if (usage & (PIPE_MAP_UNSYNCHRONIZED | 2659 PIPE_MAP_DISCARD_WHOLE_RESOURCE) || 2660 size > TC_MAX_SUBDATA_BYTES || 2661 tres->cpu_storage) { 2662 struct pipe_transfer *transfer; 2663 struct pipe_box box; 2664 uint8_t *map = NULL; 2665 2666 u_box_1d(offset, size, &box); 2667 2668 /* CPU storage is only useful for partial updates. It can add overhead 2669 * on glBufferData calls so avoid using it. 2670 */ 2671 if (!tres->cpu_storage && offset == 0 && size == resource->width0) 2672 usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE; 2673 2674 map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer); 2675 if (map) { 2676 memcpy(map, data, size); 2677 tc_buffer_unmap(_pipe, transfer); 2678 } 2679 return; 2680 } 2681 2682 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size); 2683 2684 /* The upload is small. Enqueue it. */ 2685 struct tc_buffer_subdata *p = 2686 tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size); 2687 2688 tc_set_resource_reference(&p->resource, resource); 2689 /* This is will always be busy because if it wasn't, tc_improve_map_buffer- 2690 * _flags would set UNSYNCHRONIZED and we wouldn't get here. 2691 */ 2692 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], resource); 2693 p->usage = usage; 2694 p->offset = offset; 2695 p->size = size; 2696 memcpy(p->slot, data, size); 2697} 2698 2699struct tc_texture_subdata { 2700 struct tc_call_base base; 2701 unsigned level, usage, stride, layer_stride; 2702 struct pipe_box box; 2703 struct pipe_resource *resource; 2704 char slot[0]; /* more will be allocated if needed */ 2705}; 2706 2707static uint16_t 2708tc_call_texture_subdata(struct pipe_context *pipe, void *call, uint64_t *last) 2709{ 2710 struct tc_texture_subdata *p = (struct tc_texture_subdata *)call; 2711 2712 pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box, 2713 p->slot, p->stride, p->layer_stride); 2714 tc_drop_resource_reference(p->resource); 2715 return p->base.num_slots; 2716} 2717 2718static void 2719tc_texture_subdata(struct pipe_context *_pipe, 2720 struct pipe_resource *resource, 2721 unsigned level, unsigned usage, 2722 const struct pipe_box *box, 2723 const void *data, unsigned stride, 2724 unsigned layer_stride) 2725{ 2726 struct threaded_context *tc = threaded_context(_pipe); 2727 unsigned size; 2728 2729 assert(box->height >= 1); 2730 assert(box->depth >= 1); 2731 2732 size = (box->depth - 1) * layer_stride + 2733 (box->height - 1) * stride + 2734 box->width * util_format_get_blocksize(resource->format); 2735 if (!size) 2736 return; 2737 2738 /* Small uploads can be enqueued, big uploads must sync. */ 2739 if (size <= TC_MAX_SUBDATA_BYTES) { 2740 struct tc_texture_subdata *p = 2741 tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size); 2742 2743 tc_set_resource_reference(&p->resource, resource); 2744 p->level = level; 2745 p->usage = usage; 2746 p->box = *box; 2747 p->stride = stride; 2748 p->layer_stride = layer_stride; 2749 memcpy(p->slot, data, size); 2750 } else { 2751 struct pipe_context *pipe = tc->pipe; 2752 2753 tc_sync(tc); 2754 tc_set_driver_thread(tc); 2755 pipe->texture_subdata(pipe, resource, level, usage, box, data, 2756 stride, layer_stride); 2757 tc_clear_driver_thread(tc); 2758 } 2759} 2760 2761 2762/******************************************************************** 2763 * miscellaneous 2764 */ 2765 2766#define TC_FUNC_SYNC_RET0(ret_type, func) \ 2767 static ret_type \ 2768 tc_##func(struct pipe_context *_pipe) \ 2769 { \ 2770 struct threaded_context *tc = threaded_context(_pipe); \ 2771 struct pipe_context *pipe = tc->pipe; \ 2772 tc_sync(tc); \ 2773 return pipe->func(pipe); \ 2774 } 2775 2776TC_FUNC_SYNC_RET0(uint64_t, get_timestamp) 2777 2778static void 2779tc_get_sample_position(struct pipe_context *_pipe, 2780 unsigned sample_count, unsigned sample_index, 2781 float *out_value) 2782{ 2783 struct threaded_context *tc = threaded_context(_pipe); 2784 struct pipe_context *pipe = tc->pipe; 2785 2786 tc_sync(tc); 2787 pipe->get_sample_position(pipe, sample_count, sample_index, 2788 out_value); 2789} 2790 2791static enum pipe_reset_status 2792tc_get_device_reset_status(struct pipe_context *_pipe) 2793{ 2794 struct threaded_context *tc = threaded_context(_pipe); 2795 struct pipe_context *pipe = tc->pipe; 2796 2797 if (!tc->options.unsynchronized_get_device_reset_status) 2798 tc_sync(tc); 2799 2800 return pipe->get_device_reset_status(pipe); 2801} 2802 2803static void 2804tc_set_device_reset_callback(struct pipe_context *_pipe, 2805 const struct pipe_device_reset_callback *cb) 2806{ 2807 struct threaded_context *tc = threaded_context(_pipe); 2808 struct pipe_context *pipe = tc->pipe; 2809 2810 tc_sync(tc); 2811 pipe->set_device_reset_callback(pipe, cb); 2812} 2813 2814struct tc_string_marker { 2815 struct tc_call_base base; 2816 int len; 2817 char slot[0]; /* more will be allocated if needed */ 2818}; 2819 2820static uint16_t 2821tc_call_emit_string_marker(struct pipe_context *pipe, void *call, uint64_t *last) 2822{ 2823 struct tc_string_marker *p = (struct tc_string_marker *)call; 2824 pipe->emit_string_marker(pipe, p->slot, p->len); 2825 return p->base.num_slots; 2826} 2827 2828static void 2829tc_emit_string_marker(struct pipe_context *_pipe, 2830 const char *string, int len) 2831{ 2832 struct threaded_context *tc = threaded_context(_pipe); 2833 2834 if (len <= TC_MAX_STRING_MARKER_BYTES) { 2835 struct tc_string_marker *p = 2836 tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len); 2837 2838 memcpy(p->slot, string, len); 2839 p->len = len; 2840 } else { 2841 struct pipe_context *pipe = tc->pipe; 2842 2843 tc_sync(tc); 2844 tc_set_driver_thread(tc); 2845 pipe->emit_string_marker(pipe, string, len); 2846 tc_clear_driver_thread(tc); 2847 } 2848} 2849 2850static void 2851tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream, 2852 unsigned flags) 2853{ 2854 struct threaded_context *tc = threaded_context(_pipe); 2855 struct pipe_context *pipe = tc->pipe; 2856 2857 tc_sync(tc); 2858 pipe->dump_debug_state(pipe, stream, flags); 2859} 2860 2861static void 2862tc_set_debug_callback(struct pipe_context *_pipe, 2863 const struct util_debug_callback *cb) 2864{ 2865 struct threaded_context *tc = threaded_context(_pipe); 2866 struct pipe_context *pipe = tc->pipe; 2867 2868 tc_sync(tc); 2869 2870 /* Drop all synchronous debug callbacks. Drivers are expected to be OK 2871 * with this. shader-db will use an environment variable to disable 2872 * the threaded context. 2873 */ 2874 if (cb && !cb->async) 2875 pipe->set_debug_callback(pipe, NULL); 2876 else 2877 pipe->set_debug_callback(pipe, cb); 2878} 2879 2880static void 2881tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log) 2882{ 2883 struct threaded_context *tc = threaded_context(_pipe); 2884 struct pipe_context *pipe = tc->pipe; 2885 2886 tc_sync(tc); 2887 pipe->set_log_context(pipe, log); 2888} 2889 2890static void 2891tc_create_fence_fd(struct pipe_context *_pipe, 2892 struct pipe_fence_handle **fence, int fd, 2893 enum pipe_fd_type type) 2894{ 2895 struct threaded_context *tc = threaded_context(_pipe); 2896 struct pipe_context *pipe = tc->pipe; 2897 2898 tc_sync(tc); 2899 pipe->create_fence_fd(pipe, fence, fd, type); 2900} 2901 2902struct tc_fence_call { 2903 struct tc_call_base base; 2904 struct pipe_fence_handle *fence; 2905}; 2906 2907static uint16_t 2908tc_call_fence_server_sync(struct pipe_context *pipe, void *call, uint64_t *last) 2909{ 2910 struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence; 2911 2912 pipe->fence_server_sync(pipe, fence); 2913 pipe->screen->fence_reference(pipe->screen, &fence, NULL); 2914 return call_size(tc_fence_call); 2915} 2916 2917static void 2918tc_fence_server_sync(struct pipe_context *_pipe, 2919 struct pipe_fence_handle *fence) 2920{ 2921 struct threaded_context *tc = threaded_context(_pipe); 2922 struct pipe_screen *screen = tc->pipe->screen; 2923 struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync, 2924 tc_fence_call); 2925 2926 call->fence = NULL; 2927 screen->fence_reference(screen, &call->fence, fence); 2928} 2929 2930static void 2931tc_fence_server_signal(struct pipe_context *_pipe, 2932 struct pipe_fence_handle *fence) 2933{ 2934 struct threaded_context *tc = threaded_context(_pipe); 2935 struct pipe_context *pipe = tc->pipe; 2936 tc_sync(tc); 2937 pipe->fence_server_signal(pipe, fence); 2938} 2939 2940static struct pipe_video_codec * 2941tc_create_video_codec(UNUSED struct pipe_context *_pipe, 2942 UNUSED const struct pipe_video_codec *templ) 2943{ 2944 unreachable("Threaded context should not be enabled for video APIs"); 2945 return NULL; 2946} 2947 2948static struct pipe_video_buffer * 2949tc_create_video_buffer(UNUSED struct pipe_context *_pipe, 2950 UNUSED const struct pipe_video_buffer *templ) 2951{ 2952 unreachable("Threaded context should not be enabled for video APIs"); 2953 return NULL; 2954} 2955 2956struct tc_context_param { 2957 struct tc_call_base base; 2958 enum pipe_context_param param; 2959 unsigned value; 2960}; 2961 2962static uint16_t 2963tc_call_set_context_param(struct pipe_context *pipe, void *call, uint64_t *last) 2964{ 2965 struct tc_context_param *p = to_call(call, tc_context_param); 2966 2967 if (pipe->set_context_param) 2968 pipe->set_context_param(pipe, p->param, p->value); 2969 2970 return call_size(tc_context_param); 2971} 2972 2973static void 2974tc_set_context_param(struct pipe_context *_pipe, 2975 enum pipe_context_param param, 2976 unsigned value) 2977{ 2978 struct threaded_context *tc = threaded_context(_pipe); 2979 2980 if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) { 2981 /* Pin the gallium thread as requested. */ 2982 util_set_thread_affinity(tc->queue.threads[0], 2983 util_get_cpu_caps()->L3_affinity_mask[value], 2984 NULL, util_get_cpu_caps()->num_cpu_mask_bits); 2985 2986 /* Execute this immediately (without enqueuing). 2987 * It's required to be thread-safe. 2988 */ 2989 struct pipe_context *pipe = tc->pipe; 2990 if (pipe->set_context_param) 2991 pipe->set_context_param(pipe, param, value); 2992 return; 2993 } 2994 2995 if (tc->pipe->set_context_param) { 2996 struct tc_context_param *call = 2997 tc_add_call(tc, TC_CALL_set_context_param, tc_context_param); 2998 2999 call->param = param; 3000 call->value = value; 3001 } 3002} 3003 3004 3005/******************************************************************** 3006 * draw, launch, clear, blit, copy, flush 3007 */ 3008 3009struct tc_flush_call { 3010 struct tc_call_base base; 3011 unsigned flags; 3012 struct threaded_context *tc; 3013 struct pipe_fence_handle *fence; 3014}; 3015 3016static void 3017tc_flush_queries(struct threaded_context *tc) 3018{ 3019 struct threaded_query *tq, *tmp; 3020 LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) { 3021 list_del(&tq->head_unflushed); 3022 3023 /* Memory release semantics: due to a possible race with 3024 * tc_get_query_result, we must ensure that the linked list changes 3025 * are visible before setting tq->flushed. 3026 */ 3027 p_atomic_set(&tq->flushed, true); 3028 } 3029} 3030 3031static uint16_t 3032tc_call_flush(struct pipe_context *pipe, void *call, uint64_t *last) 3033{ 3034 struct tc_flush_call *p = to_call(call, tc_flush_call); 3035 struct pipe_screen *screen = pipe->screen; 3036 3037 pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags); 3038 screen->fence_reference(screen, &p->fence, NULL); 3039 3040 if (!(p->flags & PIPE_FLUSH_DEFERRED)) 3041 tc_flush_queries(p->tc); 3042 3043 return call_size(tc_flush_call); 3044} 3045 3046static void 3047tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, 3048 unsigned flags) 3049{ 3050 struct threaded_context *tc = threaded_context(_pipe); 3051 struct pipe_context *pipe = tc->pipe; 3052 struct pipe_screen *screen = pipe->screen; 3053 bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC); 3054 3055 if (async && tc->options.create_fence) { 3056 if (fence) { 3057 struct tc_batch *next = &tc->batch_slots[tc->next]; 3058 3059 if (!next->token) { 3060 next->token = malloc(sizeof(*next->token)); 3061 if (!next->token) 3062 goto out_of_memory; 3063 3064 pipe_reference_init(&next->token->ref, 1); 3065 next->token->tc = tc; 3066 } 3067 3068 screen->fence_reference(screen, fence, 3069 tc->options.create_fence(pipe, next->token)); 3070 if (!*fence) 3071 goto out_of_memory; 3072 } 3073 3074 struct tc_flush_call *p = tc_add_call(tc, TC_CALL_flush, tc_flush_call); 3075 p->tc = tc; 3076 p->fence = fence ? *fence : NULL; 3077 p->flags = flags | TC_FLUSH_ASYNC; 3078 3079 if (!(flags & PIPE_FLUSH_DEFERRED)) 3080 tc_batch_flush(tc); 3081 return; 3082 } 3083 3084out_of_memory: 3085 tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" : 3086 flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal"); 3087 3088 if (!(flags & PIPE_FLUSH_DEFERRED)) 3089 tc_flush_queries(tc); 3090 tc_set_driver_thread(tc); 3091 pipe->flush(pipe, fence, flags); 3092 tc_clear_driver_thread(tc); 3093} 3094 3095struct tc_draw_single { 3096 struct tc_call_base base; 3097 unsigned index_bias; 3098 struct pipe_draw_info info; 3099}; 3100 3101struct tc_draw_single_drawid { 3102 struct tc_draw_single base; 3103 unsigned drawid_offset; 3104}; 3105 3106static uint16_t 3107tc_call_draw_single_drawid(struct pipe_context *pipe, void *call, uint64_t *last) 3108{ 3109 struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid); 3110 struct tc_draw_single *info = &info_drawid->base; 3111 3112 /* u_threaded_context stores start/count in min/max_index for single draws. */ 3113 /* Drivers using u_threaded_context shouldn't use min/max_index. */ 3114 struct pipe_draw_start_count_bias draw; 3115 3116 draw.start = info->info.min_index; 3117 draw.count = info->info.max_index; 3118 draw.index_bias = info->index_bias; 3119 3120 info->info.index_bounds_valid = false; 3121 info->info.has_user_indices = false; 3122 info->info.take_index_buffer_ownership = false; 3123 3124 pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1); 3125 if (info->info.index_size) 3126 tc_drop_resource_reference(info->info.index.resource); 3127 3128 return call_size(tc_draw_single_drawid); 3129} 3130 3131static void 3132simplify_draw_info(struct pipe_draw_info *info) 3133{ 3134 /* Clear these fields to facilitate draw merging. 3135 * Drivers shouldn't use them. 3136 */ 3137 info->has_user_indices = false; 3138 info->index_bounds_valid = false; 3139 info->take_index_buffer_ownership = false; 3140 info->index_bias_varies = false; 3141 info->_pad = 0; 3142 3143 /* This shouldn't be set when merging single draws. */ 3144 info->increment_draw_id = false; 3145 3146 if (info->index_size) { 3147 if (!info->primitive_restart) 3148 info->restart_index = 0; 3149 } else { 3150 assert(!info->primitive_restart); 3151 info->primitive_restart = false; 3152 info->restart_index = 0; 3153 info->index.resource = NULL; 3154 } 3155} 3156 3157static bool 3158is_next_call_a_mergeable_draw(struct tc_draw_single *first, 3159 struct tc_draw_single *next) 3160{ 3161 if (next->base.call_id != TC_CALL_draw_single) 3162 return false; 3163 3164 simplify_draw_info(&next->info); 3165 3166 STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) == 3167 sizeof(struct pipe_draw_info) - 8); 3168 STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) == 3169 sizeof(struct pipe_draw_info) - 4); 3170 /* All fields must be the same except start and count. */ 3171 /* u_threaded_context stores start/count in min/max_index for single draws. */ 3172 return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info, 3173 DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0; 3174} 3175 3176static uint16_t 3177tc_call_draw_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr) 3178{ 3179 /* Draw call merging. */ 3180 struct tc_draw_single *first = to_call(call, tc_draw_single); 3181 struct tc_draw_single *last = (struct tc_draw_single *)last_ptr; 3182 struct tc_draw_single *next = get_next_call(first, tc_draw_single); 3183 3184 /* If at least 2 consecutive draw calls can be merged... */ 3185 if (next != last && 3186 next->base.call_id == TC_CALL_draw_single) { 3187 simplify_draw_info(&first->info); 3188 3189 if (is_next_call_a_mergeable_draw(first, next)) { 3190 /* The maximum number of merged draws is given by the batch size. */ 3191 struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)]; 3192 unsigned num_draws = 2; 3193 bool index_bias_varies = first->index_bias != next->index_bias; 3194 3195 /* u_threaded_context stores start/count in min/max_index for single draws. */ 3196 multi[0].start = first->info.min_index; 3197 multi[0].count = first->info.max_index; 3198 multi[0].index_bias = first->index_bias; 3199 multi[1].start = next->info.min_index; 3200 multi[1].count = next->info.max_index; 3201 multi[1].index_bias = next->index_bias; 3202 3203 /* Find how many other draws can be merged. */ 3204 next = get_next_call(next, tc_draw_single); 3205 for (; next != last && is_next_call_a_mergeable_draw(first, next); 3206 next = get_next_call(next, tc_draw_single), num_draws++) { 3207 /* u_threaded_context stores start/count in min/max_index for single draws. */ 3208 multi[num_draws].start = next->info.min_index; 3209 multi[num_draws].count = next->info.max_index; 3210 multi[num_draws].index_bias = next->index_bias; 3211 index_bias_varies |= first->index_bias != next->index_bias; 3212 } 3213 3214 first->info.index_bias_varies = index_bias_varies; 3215 pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws); 3216 3217 /* Since all draws use the same index buffer, drop all references at once. */ 3218 if (first->info.index_size) 3219 pipe_drop_resource_references(first->info.index.resource, num_draws); 3220 3221 return call_size(tc_draw_single) * num_draws; 3222 } 3223 } 3224 3225 /* u_threaded_context stores start/count in min/max_index for single draws. */ 3226 /* Drivers using u_threaded_context shouldn't use min/max_index. */ 3227 struct pipe_draw_start_count_bias draw; 3228 3229 draw.start = first->info.min_index; 3230 draw.count = first->info.max_index; 3231 draw.index_bias = first->index_bias; 3232 3233 first->info.index_bounds_valid = false; 3234 first->info.has_user_indices = false; 3235 first->info.take_index_buffer_ownership = false; 3236 3237 pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1); 3238 if (first->info.index_size) 3239 tc_drop_resource_reference(first->info.index.resource); 3240 3241 return call_size(tc_draw_single); 3242} 3243 3244struct tc_draw_indirect { 3245 struct tc_call_base base; 3246 struct pipe_draw_start_count_bias draw; 3247 struct pipe_draw_info info; 3248 struct pipe_draw_indirect_info indirect; 3249}; 3250 3251static uint16_t 3252tc_call_draw_indirect(struct pipe_context *pipe, void *call, uint64_t *last) 3253{ 3254 struct tc_draw_indirect *info = to_call(call, tc_draw_indirect); 3255 3256 info->info.index_bounds_valid = false; 3257 info->info.take_index_buffer_ownership = false; 3258 3259 pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1); 3260 if (info->info.index_size) 3261 tc_drop_resource_reference(info->info.index.resource); 3262 3263 tc_drop_resource_reference(info->indirect.buffer); 3264 tc_drop_resource_reference(info->indirect.indirect_draw_count); 3265 tc_drop_so_target_reference(info->indirect.count_from_stream_output); 3266 return call_size(tc_draw_indirect); 3267} 3268 3269struct tc_draw_multi { 3270 struct tc_call_base base; 3271 unsigned num_draws; 3272 struct pipe_draw_info info; 3273 struct pipe_draw_start_count_bias slot[]; /* variable-sized array */ 3274}; 3275 3276static uint16_t 3277tc_call_draw_multi(struct pipe_context *pipe, void *call, uint64_t *last) 3278{ 3279 struct tc_draw_multi *info = (struct tc_draw_multi*)call; 3280 3281 info->info.has_user_indices = false; 3282 info->info.index_bounds_valid = false; 3283 info->info.take_index_buffer_ownership = false; 3284 3285 pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws); 3286 if (info->info.index_size) 3287 tc_drop_resource_reference(info->info.index.resource); 3288 3289 return info->base.num_slots; 3290} 3291 3292#define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \ 3293 offsetof(struct pipe_draw_info, index) 3294 3295void 3296tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info, 3297 unsigned drawid_offset, 3298 const struct pipe_draw_indirect_info *indirect, 3299 const struct pipe_draw_start_count_bias *draws, 3300 unsigned num_draws) 3301{ 3302 STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX + 3303 sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index)); 3304 3305 struct threaded_context *tc = threaded_context(_pipe); 3306 unsigned index_size = info->index_size; 3307 bool has_user_indices = info->has_user_indices; 3308 3309 if (unlikely(indirect)) { 3310 assert(!has_user_indices); 3311 assert(num_draws == 1); 3312 3313 struct tc_draw_indirect *p = 3314 tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect); 3315 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; 3316 3317 if (index_size) { 3318 if (!info->take_index_buffer_ownership) { 3319 tc_set_resource_reference(&p->info.index.resource, 3320 info->index.resource); 3321 } 3322 tc_add_to_buffer_list(tc, next, info->index.resource); 3323 } 3324 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX); 3325 3326 tc_set_resource_reference(&p->indirect.buffer, indirect->buffer); 3327 tc_set_resource_reference(&p->indirect.indirect_draw_count, 3328 indirect->indirect_draw_count); 3329 p->indirect.count_from_stream_output = NULL; 3330 pipe_so_target_reference(&p->indirect.count_from_stream_output, 3331 indirect->count_from_stream_output); 3332 3333 if (indirect->buffer) 3334 tc_add_to_buffer_list(tc, next, indirect->buffer); 3335 if (indirect->indirect_draw_count) 3336 tc_add_to_buffer_list(tc, next, indirect->indirect_draw_count); 3337 if (indirect->count_from_stream_output) 3338 tc_add_to_buffer_list(tc, next, indirect->count_from_stream_output->buffer); 3339 3340 memcpy(&p->indirect, indirect, sizeof(*indirect)); 3341 p->draw.start = draws[0].start; 3342 3343 /* This must be after tc_add_call, which can flush the batch. */ 3344 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) 3345 tc_add_all_gfx_bindings_to_buffer_list(tc); 3346 return; 3347 } 3348 3349 if (num_draws == 1) { 3350 /* Single draw. */ 3351 if (index_size && has_user_indices) { 3352 unsigned size = draws[0].count * index_size; 3353 struct pipe_resource *buffer = NULL; 3354 unsigned offset; 3355 3356 if (!size) 3357 return; 3358 3359 /* This must be done before adding draw_vbo, because it could generate 3360 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo 3361 * to the driver if it was done afterwards. 3362 */ 3363 u_upload_data(tc->base.stream_uploader, 0, size, 4, 3364 (uint8_t*)info->index.user + draws[0].start * index_size, 3365 &offset, &buffer); 3366 if (unlikely(!buffer)) 3367 return; 3368 3369 struct tc_draw_single *p = drawid_offset > 0 ? 3370 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base : 3371 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single); 3372 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX); 3373 p->info.index.resource = buffer; 3374 if (drawid_offset > 0) 3375 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset; 3376 /* u_threaded_context stores start/count in min/max_index for single draws. */ 3377 p->info.min_index = offset >> util_logbase2(index_size); 3378 p->info.max_index = draws[0].count; 3379 p->index_bias = draws[0].index_bias; 3380 } else { 3381 /* Non-indexed call or indexed with a real index buffer. */ 3382 struct tc_draw_single *p = drawid_offset > 0 ? 3383 &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base : 3384 tc_add_call(tc, TC_CALL_draw_single, tc_draw_single); 3385 if (index_size) { 3386 if (!info->take_index_buffer_ownership) { 3387 tc_set_resource_reference(&p->info.index.resource, 3388 info->index.resource); 3389 } 3390 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->index.resource); 3391 } 3392 if (drawid_offset > 0) 3393 ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset; 3394 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX); 3395 /* u_threaded_context stores start/count in min/max_index for single draws. */ 3396 p->info.min_index = draws[0].start; 3397 p->info.max_index = draws[0].count; 3398 p->index_bias = draws[0].index_bias; 3399 } 3400 3401 /* This must be after tc_add_call, which can flush the batch. */ 3402 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) 3403 tc_add_all_gfx_bindings_to_buffer_list(tc); 3404 return; 3405 } 3406 3407 const int draw_overhead_bytes = sizeof(struct tc_draw_multi); 3408 const int one_draw_slot_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]); 3409 const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes, 3410 sizeof(struct tc_call_base)); 3411 /* Multi draw. */ 3412 if (index_size && has_user_indices) { 3413 struct pipe_resource *buffer = NULL; 3414 unsigned buffer_offset, total_count = 0; 3415 unsigned index_size_shift = util_logbase2(index_size); 3416 uint8_t *ptr = NULL; 3417 3418 /* Get the total count. */ 3419 for (unsigned i = 0; i < num_draws; i++) 3420 total_count += draws[i].count; 3421 3422 if (!total_count) 3423 return; 3424 3425 /* Allocate space for all index buffers. 3426 * 3427 * This must be done before adding draw_vbo, because it could generate 3428 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo 3429 * to the driver if it was done afterwards. 3430 */ 3431 u_upload_alloc(tc->base.stream_uploader, 0, 3432 total_count << index_size_shift, 4, 3433 &buffer_offset, &buffer, (void**)&ptr); 3434 if (unlikely(!buffer)) 3435 return; 3436 3437 int total_offset = 0; 3438 unsigned offset = 0; 3439 while (num_draws) { 3440 struct tc_batch *next = &tc->batch_slots[tc->next]; 3441 3442 int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots; 3443 /* If there isn't enough place for one draw, try to fill the next one */ 3444 if (nb_slots_left < slots_for_one_draw) 3445 nb_slots_left = TC_SLOTS_PER_BATCH; 3446 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base); 3447 3448 /* How many draws can we fit in the current batch */ 3449 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes); 3450 3451 struct tc_draw_multi *p = 3452 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi, 3453 dr); 3454 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX); 3455 3456 if (total_offset == 0) 3457 /* the first slot inherits the reference from u_upload_alloc() */ 3458 p->info.index.resource = buffer; 3459 else 3460 /* all following slots need a new reference */ 3461 tc_set_resource_reference(&p->info.index.resource, buffer); 3462 3463 p->num_draws = dr; 3464 3465 /* Upload index buffers. */ 3466 for (unsigned i = 0; i < dr; i++) { 3467 unsigned count = draws[i + total_offset].count; 3468 3469 if (!count) { 3470 p->slot[i].start = 0; 3471 p->slot[i].count = 0; 3472 p->slot[i].index_bias = 0; 3473 continue; 3474 } 3475 3476 unsigned size = count << index_size_shift; 3477 memcpy(ptr + offset, 3478 (uint8_t*)info->index.user + 3479 (draws[i + total_offset].start << index_size_shift), size); 3480 p->slot[i].start = (buffer_offset + offset) >> index_size_shift; 3481 p->slot[i].count = count; 3482 p->slot[i].index_bias = draws[i + total_offset].index_bias; 3483 offset += size; 3484 } 3485 3486 total_offset += dr; 3487 num_draws -= dr; 3488 } 3489 } else { 3490 int total_offset = 0; 3491 bool take_index_buffer_ownership = info->take_index_buffer_ownership; 3492 while (num_draws) { 3493 struct tc_batch *next = &tc->batch_slots[tc->next]; 3494 3495 int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots; 3496 /* If there isn't enough place for one draw, try to fill the next one */ 3497 if (nb_slots_left < slots_for_one_draw) 3498 nb_slots_left = TC_SLOTS_PER_BATCH; 3499 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base); 3500 3501 /* How many draws can we fit in the current batch */ 3502 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes); 3503 3504 /* Non-indexed call or indexed with a real index buffer. */ 3505 struct tc_draw_multi *p = 3506 tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi, 3507 dr); 3508 if (index_size) { 3509 if (!take_index_buffer_ownership) { 3510 tc_set_resource_reference(&p->info.index.resource, 3511 info->index.resource); 3512 } 3513 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->index.resource); 3514 } 3515 take_index_buffer_ownership = false; 3516 memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX); 3517 p->num_draws = dr; 3518 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr); 3519 num_draws -= dr; 3520 3521 total_offset += dr; 3522 } 3523 } 3524 3525 /* This must be after tc_add_*call, which can flush the batch. */ 3526 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) 3527 tc_add_all_gfx_bindings_to_buffer_list(tc); 3528} 3529 3530struct tc_draw_vstate_single { 3531 struct tc_call_base base; 3532 struct pipe_draw_start_count_bias draw; 3533 3534 /* The following states must be together without holes because they are 3535 * compared by draw merging. 3536 */ 3537 struct pipe_vertex_state *state; 3538 uint32_t partial_velem_mask; 3539 struct pipe_draw_vertex_state_info info; 3540}; 3541 3542static bool 3543is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first, 3544 struct tc_draw_vstate_single *next) 3545{ 3546 if (next->base.call_id != TC_CALL_draw_vstate_single) 3547 return false; 3548 3549 return !memcmp(&first->state, &next->state, 3550 offsetof(struct tc_draw_vstate_single, info) + 3551 sizeof(struct pipe_draw_vertex_state_info) - 3552 offsetof(struct tc_draw_vstate_single, state)); 3553} 3554 3555static uint16_t 3556tc_call_draw_vstate_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr) 3557{ 3558 /* Draw call merging. */ 3559 struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single); 3560 struct tc_draw_vstate_single *last = (struct tc_draw_vstate_single *)last_ptr; 3561 struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single); 3562 3563 /* If at least 2 consecutive draw calls can be merged... */ 3564 if (next != last && 3565 is_next_call_a_mergeable_draw_vstate(first, next)) { 3566 /* The maximum number of merged draws is given by the batch size. */ 3567 struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH / 3568 call_size(tc_draw_vstate_single)]; 3569 unsigned num_draws = 2; 3570 3571 draws[0] = first->draw; 3572 draws[1] = next->draw; 3573 3574 /* Find how many other draws can be merged. */ 3575 next = get_next_call(next, tc_draw_vstate_single); 3576 for (; next != last && 3577 is_next_call_a_mergeable_draw_vstate(first, next); 3578 next = get_next_call(next, tc_draw_vstate_single), 3579 num_draws++) 3580 draws[num_draws] = next->draw; 3581 3582 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask, 3583 first->info, draws, num_draws); 3584 /* Since all draws use the same state, drop all references at once. */ 3585 tc_drop_vertex_state_references(first->state, num_draws); 3586 3587 return call_size(tc_draw_vstate_single) * num_draws; 3588 } 3589 3590 pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask, 3591 first->info, &first->draw, 1); 3592 tc_drop_vertex_state_references(first->state, 1); 3593 return call_size(tc_draw_vstate_single); 3594} 3595 3596struct tc_draw_vstate_multi { 3597 struct tc_call_base base; 3598 uint32_t partial_velem_mask; 3599 struct pipe_draw_vertex_state_info info; 3600 unsigned num_draws; 3601 struct pipe_vertex_state *state; 3602 struct pipe_draw_start_count_bias slot[0]; 3603}; 3604 3605static uint16_t 3606tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call, uint64_t *last) 3607{ 3608 struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call; 3609 3610 pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask, 3611 info->info, info->slot, info->num_draws); 3612 tc_drop_vertex_state_references(info->state, 1); 3613 return info->base.num_slots; 3614} 3615 3616static void 3617tc_draw_vertex_state(struct pipe_context *_pipe, 3618 struct pipe_vertex_state *state, 3619 uint32_t partial_velem_mask, 3620 struct pipe_draw_vertex_state_info info, 3621 const struct pipe_draw_start_count_bias *draws, 3622 unsigned num_draws) 3623{ 3624 struct threaded_context *tc = threaded_context(_pipe); 3625 3626 if (num_draws == 1) { 3627 /* Single draw. */ 3628 struct tc_draw_vstate_single *p = 3629 tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single); 3630 p->partial_velem_mask = partial_velem_mask; 3631 p->draw = draws[0]; 3632 p->info.mode = info.mode; 3633 p->info.take_vertex_state_ownership = false; 3634 3635 /* This should be always 0 for simplicity because we assume that 3636 * index_bias doesn't vary. 3637 */ 3638 assert(draws[0].index_bias == 0); 3639 3640 if (!info.take_vertex_state_ownership) 3641 tc_set_vertex_state_reference(&p->state, state); 3642 else 3643 p->state = state; 3644 3645 3646 /* This must be after tc_add_*call, which can flush the batch. */ 3647 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) 3648 tc_add_all_gfx_bindings_to_buffer_list(tc); 3649 return; 3650 } 3651 3652 const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi); 3653 const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]); 3654 const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes, 3655 sizeof(struct tc_call_base)); 3656 /* Multi draw. */ 3657 int total_offset = 0; 3658 bool take_vertex_state_ownership = info.take_vertex_state_ownership; 3659 while (num_draws) { 3660 struct tc_batch *next = &tc->batch_slots[tc->next]; 3661 3662 int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots; 3663 /* If there isn't enough place for one draw, try to fill the next one */ 3664 if (nb_slots_left < slots_for_one_draw) 3665 nb_slots_left = TC_SLOTS_PER_BATCH; 3666 const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base); 3667 3668 /* How many draws can we fit in the current batch */ 3669 const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes); 3670 3671 /* Non-indexed call or indexed with a real index buffer. */ 3672 struct tc_draw_vstate_multi *p = 3673 tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr); 3674 3675 if (!take_vertex_state_ownership) 3676 tc_set_vertex_state_reference(&p->state, state); 3677 else 3678 p->state = state; 3679 3680 take_vertex_state_ownership = false; 3681 p->partial_velem_mask = partial_velem_mask; 3682 p->info.mode = info.mode; 3683 p->info.take_vertex_state_ownership = false; 3684 p->num_draws = dr; 3685 memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr); 3686 num_draws -= dr; 3687 3688 total_offset += dr; 3689 } 3690 3691 3692 /* This must be after tc_add_*call, which can flush the batch. */ 3693 if (unlikely(tc->add_all_gfx_bindings_to_buffer_list)) 3694 tc_add_all_gfx_bindings_to_buffer_list(tc); 3695} 3696 3697struct tc_launch_grid_call { 3698 struct tc_call_base base; 3699 struct pipe_grid_info info; 3700}; 3701 3702static uint16_t 3703tc_call_launch_grid(struct pipe_context *pipe, void *call, uint64_t *last) 3704{ 3705 struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info; 3706 3707 pipe->launch_grid(pipe, p); 3708 tc_drop_resource_reference(p->indirect); 3709 return call_size(tc_launch_grid_call); 3710} 3711 3712static void 3713tc_launch_grid(struct pipe_context *_pipe, 3714 const struct pipe_grid_info *info) 3715{ 3716 struct threaded_context *tc = threaded_context(_pipe); 3717 struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid, 3718 tc_launch_grid_call); 3719 assert(info->input == NULL); 3720 3721 tc_set_resource_reference(&p->info.indirect, info->indirect); 3722 memcpy(&p->info, info, sizeof(*info)); 3723 3724 if (info->indirect) 3725 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->indirect); 3726 3727 /* This must be after tc_add_*call, which can flush the batch. */ 3728 if (unlikely(tc->add_all_compute_bindings_to_buffer_list)) 3729 tc_add_all_compute_bindings_to_buffer_list(tc); 3730} 3731 3732static uint16_t 3733tc_call_resource_copy_region(struct pipe_context *pipe, void *call, uint64_t *last) 3734{ 3735 struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region); 3736 3737 pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty, 3738 p->dstz, p->src, p->src_level, &p->src_box); 3739 tc_drop_resource_reference(p->dst); 3740 tc_drop_resource_reference(p->src); 3741 return call_size(tc_resource_copy_region); 3742} 3743 3744static void 3745tc_resource_copy_region(struct pipe_context *_pipe, 3746 struct pipe_resource *dst, unsigned dst_level, 3747 unsigned dstx, unsigned dsty, unsigned dstz, 3748 struct pipe_resource *src, unsigned src_level, 3749 const struct pipe_box *src_box) 3750{ 3751 struct threaded_context *tc = threaded_context(_pipe); 3752 struct threaded_resource *tdst = threaded_resource(dst); 3753 struct tc_resource_copy_region *p = 3754 tc_add_call(tc, TC_CALL_resource_copy_region, 3755 tc_resource_copy_region); 3756 3757 if (dst->target == PIPE_BUFFER) 3758 tc_buffer_disable_cpu_storage(dst); 3759 3760 tc_set_resource_reference(&p->dst, dst); 3761 p->dst_level = dst_level; 3762 p->dstx = dstx; 3763 p->dsty = dsty; 3764 p->dstz = dstz; 3765 tc_set_resource_reference(&p->src, src); 3766 p->src_level = src_level; 3767 p->src_box = *src_box; 3768 3769 if (dst->target == PIPE_BUFFER) { 3770 struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list]; 3771 3772 tc_add_to_buffer_list(tc, next, src); 3773 tc_add_to_buffer_list(tc, next, dst); 3774 3775 util_range_add(&tdst->b, &tdst->valid_buffer_range, 3776 dstx, dstx + src_box->width); 3777 } 3778} 3779 3780struct tc_blit_call { 3781 struct tc_call_base base; 3782 struct pipe_blit_info info; 3783}; 3784 3785static uint16_t 3786tc_call_blit(struct pipe_context *pipe, void *call, uint64_t *last) 3787{ 3788 struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info; 3789 3790 pipe->blit(pipe, blit); 3791 tc_drop_resource_reference(blit->dst.resource); 3792 tc_drop_resource_reference(blit->src.resource); 3793 return call_size(tc_blit_call); 3794} 3795 3796static void 3797tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info) 3798{ 3799 struct threaded_context *tc = threaded_context(_pipe); 3800 struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call); 3801 3802 tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource); 3803 tc_set_resource_reference(&blit->info.src.resource, info->src.resource); 3804 memcpy(&blit->info, info, sizeof(*info)); 3805} 3806 3807struct tc_generate_mipmap { 3808 struct tc_call_base base; 3809 enum pipe_format format; 3810 unsigned base_level; 3811 unsigned last_level; 3812 unsigned first_layer; 3813 unsigned last_layer; 3814 struct pipe_resource *res; 3815}; 3816 3817static uint16_t 3818tc_call_generate_mipmap(struct pipe_context *pipe, void *call, uint64_t *last) 3819{ 3820 struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap); 3821 ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format, 3822 p->base_level, 3823 p->last_level, 3824 p->first_layer, 3825 p->last_layer); 3826 assert(result); 3827 tc_drop_resource_reference(p->res); 3828 return call_size(tc_generate_mipmap); 3829} 3830 3831static bool 3832tc_generate_mipmap(struct pipe_context *_pipe, 3833 struct pipe_resource *res, 3834 enum pipe_format format, 3835 unsigned base_level, 3836 unsigned last_level, 3837 unsigned first_layer, 3838 unsigned last_layer) 3839{ 3840 struct threaded_context *tc = threaded_context(_pipe); 3841 struct pipe_context *pipe = tc->pipe; 3842 struct pipe_screen *screen = pipe->screen; 3843 unsigned bind = PIPE_BIND_SAMPLER_VIEW; 3844 3845 if (util_format_is_depth_or_stencil(format)) 3846 bind = PIPE_BIND_DEPTH_STENCIL; 3847 else 3848 bind = PIPE_BIND_RENDER_TARGET; 3849 3850 if (!screen->is_format_supported(screen, format, res->target, 3851 res->nr_samples, res->nr_storage_samples, 3852 bind)) 3853 return false; 3854 3855 struct tc_generate_mipmap *p = 3856 tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap); 3857 3858 tc_set_resource_reference(&p->res, res); 3859 p->format = format; 3860 p->base_level = base_level; 3861 p->last_level = last_level; 3862 p->first_layer = first_layer; 3863 p->last_layer = last_layer; 3864 return true; 3865} 3866 3867struct tc_resource_call { 3868 struct tc_call_base base; 3869 struct pipe_resource *resource; 3870}; 3871 3872static uint16_t 3873tc_call_flush_resource(struct pipe_context *pipe, void *call, uint64_t *last) 3874{ 3875 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource; 3876 3877 pipe->flush_resource(pipe, resource); 3878 tc_drop_resource_reference(resource); 3879 return call_size(tc_resource_call); 3880} 3881 3882static void 3883tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource) 3884{ 3885 struct threaded_context *tc = threaded_context(_pipe); 3886 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource, 3887 tc_resource_call); 3888 3889 tc_set_resource_reference(&call->resource, resource); 3890} 3891 3892static uint16_t 3893tc_call_invalidate_resource(struct pipe_context *pipe, void *call, uint64_t *last) 3894{ 3895 struct pipe_resource *resource = to_call(call, tc_resource_call)->resource; 3896 3897 pipe->invalidate_resource(pipe, resource); 3898 tc_drop_resource_reference(resource); 3899 return call_size(tc_resource_call); 3900} 3901 3902static void 3903tc_invalidate_resource(struct pipe_context *_pipe, 3904 struct pipe_resource *resource) 3905{ 3906 struct threaded_context *tc = threaded_context(_pipe); 3907 3908 if (resource->target == PIPE_BUFFER) { 3909 /* This can fail, in which case we simply ignore the invalidation request. */ 3910 struct threaded_resource *tbuf = threaded_resource(resource); 3911 tc_touch_buffer(tc, tbuf); 3912 tc_invalidate_buffer(tc, tbuf); 3913 return; 3914 } 3915 3916 struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource, 3917 tc_resource_call); 3918 tc_set_resource_reference(&call->resource, resource); 3919} 3920 3921struct tc_clear { 3922 struct tc_call_base base; 3923 bool scissor_state_set; 3924 uint8_t stencil; 3925 uint16_t buffers; 3926 float depth; 3927 struct pipe_scissor_state scissor_state; 3928 union pipe_color_union color; 3929}; 3930 3931static uint16_t 3932tc_call_clear(struct pipe_context *pipe, void *call, uint64_t *last) 3933{ 3934 struct tc_clear *p = to_call(call, tc_clear); 3935 3936 pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil); 3937 return call_size(tc_clear); 3938} 3939 3940static void 3941tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state, 3942 const union pipe_color_union *color, double depth, 3943 unsigned stencil) 3944{ 3945 struct threaded_context *tc = threaded_context(_pipe); 3946 struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear); 3947 3948 p->buffers = buffers; 3949 if (scissor_state) 3950 p->scissor_state = *scissor_state; 3951 p->scissor_state_set = !!scissor_state; 3952 p->color = *color; 3953 p->depth = depth; 3954 p->stencil = stencil; 3955} 3956 3957struct tc_clear_render_target { 3958 struct tc_call_base base; 3959 bool render_condition_enabled; 3960 unsigned dstx; 3961 unsigned dsty; 3962 unsigned width; 3963 unsigned height; 3964 union pipe_color_union color; 3965 struct pipe_surface *dst; 3966}; 3967 3968static uint16_t 3969tc_call_clear_render_target(struct pipe_context *pipe, void *call, uint64_t *last) 3970{ 3971 struct tc_clear_render_target *p = to_call(call, tc_clear_render_target); 3972 3973 pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height, 3974 p->render_condition_enabled); 3975 tc_drop_surface_reference(p->dst); 3976 return call_size(tc_clear_render_target); 3977} 3978 3979static void 3980tc_clear_render_target(struct pipe_context *_pipe, 3981 struct pipe_surface *dst, 3982 const union pipe_color_union *color, 3983 unsigned dstx, unsigned dsty, 3984 unsigned width, unsigned height, 3985 bool render_condition_enabled) 3986{ 3987 struct threaded_context *tc = threaded_context(_pipe); 3988 struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target); 3989 p->dst = NULL; 3990 pipe_surface_reference(&p->dst, dst); 3991 p->color = *color; 3992 p->dstx = dstx; 3993 p->dsty = dsty; 3994 p->width = width; 3995 p->height = height; 3996 p->render_condition_enabled = render_condition_enabled; 3997} 3998 3999 4000struct tc_clear_depth_stencil { 4001 struct tc_call_base base; 4002 bool render_condition_enabled; 4003 float depth; 4004 unsigned clear_flags; 4005 unsigned stencil; 4006 unsigned dstx; 4007 unsigned dsty; 4008 unsigned width; 4009 unsigned height; 4010 struct pipe_surface *dst; 4011}; 4012 4013 4014static uint16_t 4015tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call, uint64_t *last) 4016{ 4017 struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil); 4018 4019 pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil, 4020 p->dstx, p->dsty, p->width, p->height, 4021 p->render_condition_enabled); 4022 tc_drop_surface_reference(p->dst); 4023 return call_size(tc_clear_depth_stencil); 4024} 4025 4026static void 4027tc_clear_depth_stencil(struct pipe_context *_pipe, 4028 struct pipe_surface *dst, unsigned clear_flags, 4029 double depth, unsigned stencil, unsigned dstx, 4030 unsigned dsty, unsigned width, unsigned height, 4031 bool render_condition_enabled) 4032{ 4033 struct threaded_context *tc = threaded_context(_pipe); 4034 struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil); 4035 p->dst = NULL; 4036 pipe_surface_reference(&p->dst, dst); 4037 p->clear_flags = clear_flags; 4038 p->depth = depth; 4039 p->stencil = stencil; 4040 p->dstx = dstx; 4041 p->dsty = dsty; 4042 p->width = width; 4043 p->height = height; 4044 p->render_condition_enabled = render_condition_enabled; 4045} 4046 4047struct tc_clear_buffer { 4048 struct tc_call_base base; 4049 uint8_t clear_value_size; 4050 unsigned offset; 4051 unsigned size; 4052 char clear_value[16]; 4053 struct pipe_resource *res; 4054}; 4055 4056static uint16_t 4057tc_call_clear_buffer(struct pipe_context *pipe, void *call, uint64_t *last) 4058{ 4059 struct tc_clear_buffer *p = to_call(call, tc_clear_buffer); 4060 4061 pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value, 4062 p->clear_value_size); 4063 tc_drop_resource_reference(p->res); 4064 return call_size(tc_clear_buffer); 4065} 4066 4067static void 4068tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res, 4069 unsigned offset, unsigned size, 4070 const void *clear_value, int clear_value_size) 4071{ 4072 struct threaded_context *tc = threaded_context(_pipe); 4073 struct threaded_resource *tres = threaded_resource(res); 4074 struct tc_clear_buffer *p = 4075 tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer); 4076 4077 tc_buffer_disable_cpu_storage(res); 4078 4079 tc_set_resource_reference(&p->res, res); 4080 tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], res); 4081 p->offset = offset; 4082 p->size = size; 4083 memcpy(p->clear_value, clear_value, clear_value_size); 4084 p->clear_value_size = clear_value_size; 4085 4086 util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size); 4087} 4088 4089struct tc_clear_texture { 4090 struct tc_call_base base; 4091 unsigned level; 4092 struct pipe_box box; 4093 char data[16]; 4094 struct pipe_resource *res; 4095}; 4096 4097static uint16_t 4098tc_call_clear_texture(struct pipe_context *pipe, void *call, uint64_t *last) 4099{ 4100 struct tc_clear_texture *p = to_call(call, tc_clear_texture); 4101 4102 pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data); 4103 tc_drop_resource_reference(p->res); 4104 return call_size(tc_clear_texture); 4105} 4106 4107static void 4108tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res, 4109 unsigned level, const struct pipe_box *box, const void *data) 4110{ 4111 struct threaded_context *tc = threaded_context(_pipe); 4112 struct tc_clear_texture *p = 4113 tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture); 4114 4115 tc_set_resource_reference(&p->res, res); 4116 p->level = level; 4117 p->box = *box; 4118 memcpy(p->data, data, 4119 util_format_get_blocksize(res->format)); 4120} 4121 4122struct tc_resource_commit { 4123 struct tc_call_base base; 4124 bool commit; 4125 unsigned level; 4126 struct pipe_box box; 4127 struct pipe_resource *res; 4128}; 4129 4130static uint16_t 4131tc_call_resource_commit(struct pipe_context *pipe, void *call, uint64_t *last) 4132{ 4133 struct tc_resource_commit *p = to_call(call, tc_resource_commit); 4134 4135 pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit); 4136 tc_drop_resource_reference(p->res); 4137 return call_size(tc_resource_commit); 4138} 4139 4140static bool 4141tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res, 4142 unsigned level, struct pipe_box *box, bool commit) 4143{ 4144 struct threaded_context *tc = threaded_context(_pipe); 4145 struct tc_resource_commit *p = 4146 tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit); 4147 4148 tc_set_resource_reference(&p->res, res); 4149 p->level = level; 4150 p->box = *box; 4151 p->commit = commit; 4152 return true; /* we don't care about the return value for this call */ 4153} 4154 4155static unsigned 4156tc_init_intel_perf_query_info(struct pipe_context *_pipe) 4157{ 4158 struct threaded_context *tc = threaded_context(_pipe); 4159 struct pipe_context *pipe = tc->pipe; 4160 4161 return pipe->init_intel_perf_query_info(pipe); 4162} 4163 4164static void 4165tc_get_intel_perf_query_info(struct pipe_context *_pipe, 4166 unsigned query_index, 4167 const char **name, 4168 uint32_t *data_size, 4169 uint32_t *n_counters, 4170 uint32_t *n_active) 4171{ 4172 struct threaded_context *tc = threaded_context(_pipe); 4173 struct pipe_context *pipe = tc->pipe; 4174 4175 tc_sync(tc); /* n_active vs begin/end_intel_perf_query */ 4176 pipe->get_intel_perf_query_info(pipe, query_index, name, data_size, 4177 n_counters, n_active); 4178} 4179 4180static void 4181tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe, 4182 unsigned query_index, 4183 unsigned counter_index, 4184 const char **name, 4185 const char **desc, 4186 uint32_t *offset, 4187 uint32_t *data_size, 4188 uint32_t *type_enum, 4189 uint32_t *data_type_enum, 4190 uint64_t *raw_max) 4191{ 4192 struct threaded_context *tc = threaded_context(_pipe); 4193 struct pipe_context *pipe = tc->pipe; 4194 4195 pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index, 4196 name, desc, offset, data_size, type_enum, data_type_enum, raw_max); 4197} 4198 4199static struct pipe_query * 4200tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index) 4201{ 4202 struct threaded_context *tc = threaded_context(_pipe); 4203 struct pipe_context *pipe = tc->pipe; 4204 4205 return pipe->new_intel_perf_query_obj(pipe, query_index); 4206} 4207 4208static uint16_t 4209tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last) 4210{ 4211 (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query); 4212 return call_size(tc_query_call); 4213} 4214 4215static bool 4216tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) 4217{ 4218 struct threaded_context *tc = threaded_context(_pipe); 4219 4220 tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q; 4221 4222 /* assume success, begin failure can be signaled from get_intel_perf_query_data */ 4223 return true; 4224} 4225 4226static uint16_t 4227tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last) 4228{ 4229 pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query); 4230 return call_size(tc_query_call); 4231} 4232 4233static void 4234tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) 4235{ 4236 struct threaded_context *tc = threaded_context(_pipe); 4237 4238 tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q; 4239} 4240 4241static void 4242tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) 4243{ 4244 struct threaded_context *tc = threaded_context(_pipe); 4245 struct pipe_context *pipe = tc->pipe; 4246 4247 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ 4248 pipe->delete_intel_perf_query(pipe, q); 4249} 4250 4251static void 4252tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q) 4253{ 4254 struct threaded_context *tc = threaded_context(_pipe); 4255 struct pipe_context *pipe = tc->pipe; 4256 4257 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ 4258 pipe->wait_intel_perf_query(pipe, q); 4259} 4260 4261static bool 4262tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q) 4263{ 4264 struct threaded_context *tc = threaded_context(_pipe); 4265 struct pipe_context *pipe = tc->pipe; 4266 4267 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ 4268 return pipe->is_intel_perf_query_ready(pipe, q); 4269} 4270 4271static bool 4272tc_get_intel_perf_query_data(struct pipe_context *_pipe, 4273 struct pipe_query *q, 4274 size_t data_size, 4275 uint32_t *data, 4276 uint32_t *bytes_written) 4277{ 4278 struct threaded_context *tc = threaded_context(_pipe); 4279 struct pipe_context *pipe = tc->pipe; 4280 4281 tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */ 4282 return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written); 4283} 4284 4285/******************************************************************** 4286 * callback 4287 */ 4288 4289struct tc_callback_call { 4290 struct tc_call_base base; 4291 void (*fn)(void *data); 4292 void *data; 4293}; 4294 4295static uint16_t 4296tc_call_callback(UNUSED struct pipe_context *pipe, void *call, uint64_t *last) 4297{ 4298 struct tc_callback_call *p = to_call(call, tc_callback_call); 4299 4300 p->fn(p->data); 4301 return call_size(tc_callback_call); 4302} 4303 4304static void 4305tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data, 4306 bool asap) 4307{ 4308 struct threaded_context *tc = threaded_context(_pipe); 4309 4310 if (asap && tc_is_sync(tc)) { 4311 fn(data); 4312 return; 4313 } 4314 4315 struct tc_callback_call *p = 4316 tc_add_call(tc, TC_CALL_callback, tc_callback_call); 4317 p->fn = fn; 4318 p->data = data; 4319} 4320 4321 4322/******************************************************************** 4323 * create & destroy 4324 */ 4325 4326static void 4327tc_destroy(struct pipe_context *_pipe) 4328{ 4329 struct threaded_context *tc = threaded_context(_pipe); 4330 struct pipe_context *pipe = tc->pipe; 4331 4332 if (tc->base.const_uploader && 4333 tc->base.stream_uploader != tc->base.const_uploader) 4334 u_upload_destroy(tc->base.const_uploader); 4335 4336 if (tc->base.stream_uploader) 4337 u_upload_destroy(tc->base.stream_uploader); 4338 4339 tc_sync(tc); 4340 4341 if (util_queue_is_initialized(&tc->queue)) { 4342 util_queue_destroy(&tc->queue); 4343 4344 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { 4345 util_queue_fence_destroy(&tc->batch_slots[i].fence); 4346 assert(!tc->batch_slots[i].token); 4347 } 4348 } 4349 4350 slab_destroy_child(&tc->pool_transfers); 4351 assert(tc->batch_slots[tc->next].num_total_slots == 0); 4352 pipe->destroy(pipe); 4353 4354 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) { 4355 if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence)) 4356 util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence); 4357 util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence); 4358 } 4359 4360 FREE(tc); 4361} 4362 4363static const tc_execute execute_func[TC_NUM_CALLS] = { 4364#define CALL(name) tc_call_##name, 4365#include "u_threaded_context_calls.h" 4366#undef CALL 4367}; 4368 4369void tc_driver_internal_flush_notify(struct threaded_context *tc) 4370{ 4371 /* Allow drivers to call this function even for internal contexts that 4372 * don't have tc. It simplifies drivers. 4373 */ 4374 if (!tc) 4375 return; 4376 4377 /* Signal fences set by tc_batch_execute. */ 4378 for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++) 4379 util_queue_fence_signal(tc->signal_fences_next_flush[i]); 4380 4381 tc->num_signal_fences_next_flush = 0; 4382} 4383 4384/** 4385 * Wrap an existing pipe_context into a threaded_context. 4386 * 4387 * \param pipe pipe_context to wrap 4388 * \param parent_transfer_pool parent slab pool set up for creating pipe_- 4389 * transfer objects; the driver should have one 4390 * in pipe_screen. 4391 * \param replace_buffer callback for replacing a pipe_resource's storage 4392 * with another pipe_resource's storage. 4393 * \param options optional TC options/callbacks 4394 * \param out if successful, the threaded_context will be returned here in 4395 * addition to the return value if "out" != NULL 4396 */ 4397struct pipe_context * 4398threaded_context_create(struct pipe_context *pipe, 4399 struct slab_parent_pool *parent_transfer_pool, 4400 tc_replace_buffer_storage_func replace_buffer, 4401 const struct threaded_context_options *options, 4402 struct threaded_context **out) 4403{ 4404 struct threaded_context *tc; 4405 4406 if (!pipe) 4407 return NULL; 4408 4409 if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1)) 4410 return pipe; 4411 4412 tc = CALLOC_STRUCT(threaded_context); 4413 if (!tc) { 4414 pipe->destroy(pipe); 4415 return NULL; 4416 } 4417 4418 if (options) 4419 tc->options = *options; 4420 4421 pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options); 4422 4423 /* The driver context isn't wrapped, so set its "priv" to NULL. */ 4424 pipe->priv = NULL; 4425 4426 tc->pipe = pipe; 4427 tc->replace_buffer_storage = replace_buffer; 4428 tc->map_buffer_alignment = 4429 pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT); 4430 tc->ubo_alignment = 4431 MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64); 4432 tc->base.priv = pipe; /* priv points to the wrapped driver context */ 4433 tc->base.screen = pipe->screen; 4434 tc->base.destroy = tc_destroy; 4435 tc->base.callback = tc_callback; 4436 4437 tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader); 4438 if (pipe->stream_uploader == pipe->const_uploader) 4439 tc->base.const_uploader = tc->base.stream_uploader; 4440 else 4441 tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader); 4442 4443 if (!tc->base.stream_uploader || !tc->base.const_uploader) 4444 goto fail; 4445 4446 tc->use_forced_staging_uploads = true; 4447 4448 /* The queue size is the number of batches "waiting". Batches are removed 4449 * from the queue before being executed, so keep one tc_batch slot for that 4450 * execution. Also, keep one unused slot for an unflushed batch. 4451 */ 4452 if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL)) 4453 goto fail; 4454 4455 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { 4456#if !defined(NDEBUG) && TC_DEBUG >= 1 4457 tc->batch_slots[i].sentinel = TC_SENTINEL; 4458#endif 4459 tc->batch_slots[i].tc = tc; 4460 util_queue_fence_init(&tc->batch_slots[i].fence); 4461 } 4462 for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) 4463 util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence); 4464 4465 list_inithead(&tc->unflushed_queries); 4466 4467 slab_create_child(&tc->pool_transfers, parent_transfer_pool); 4468 4469 /* If you have different limits in each shader stage, set the maximum. */ 4470 struct pipe_screen *screen = pipe->screen;; 4471 tc->max_vertex_buffers = 4472 screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS); 4473 tc->max_const_buffers = 4474 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, 4475 PIPE_SHADER_CAP_MAX_CONST_BUFFERS); 4476 tc->max_shader_buffers = 4477 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, 4478 PIPE_SHADER_CAP_MAX_SHADER_BUFFERS); 4479 tc->max_images = 4480 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, 4481 PIPE_SHADER_CAP_MAX_SHADER_IMAGES); 4482 tc->max_samplers = 4483 screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, 4484 PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS); 4485 4486 tc->base.set_context_param = tc_set_context_param; /* always set this */ 4487 4488#define CTX_INIT(_member) \ 4489 tc->base._member = tc->pipe->_member ? tc_##_member : NULL 4490 4491 CTX_INIT(flush); 4492 CTX_INIT(draw_vbo); 4493 CTX_INIT(draw_vertex_state); 4494 CTX_INIT(launch_grid); 4495 CTX_INIT(resource_copy_region); 4496 CTX_INIT(blit); 4497 CTX_INIT(clear); 4498 CTX_INIT(clear_render_target); 4499 CTX_INIT(clear_depth_stencil); 4500 CTX_INIT(clear_buffer); 4501 CTX_INIT(clear_texture); 4502 CTX_INIT(flush_resource); 4503 CTX_INIT(generate_mipmap); 4504 CTX_INIT(render_condition); 4505 CTX_INIT(create_query); 4506 CTX_INIT(create_batch_query); 4507 CTX_INIT(destroy_query); 4508 CTX_INIT(begin_query); 4509 CTX_INIT(end_query); 4510 CTX_INIT(get_query_result); 4511 CTX_INIT(get_query_result_resource); 4512 CTX_INIT(set_active_query_state); 4513 CTX_INIT(create_blend_state); 4514 CTX_INIT(bind_blend_state); 4515 CTX_INIT(delete_blend_state); 4516 CTX_INIT(create_sampler_state); 4517 CTX_INIT(bind_sampler_states); 4518 CTX_INIT(delete_sampler_state); 4519 CTX_INIT(create_rasterizer_state); 4520 CTX_INIT(bind_rasterizer_state); 4521 CTX_INIT(delete_rasterizer_state); 4522 CTX_INIT(create_depth_stencil_alpha_state); 4523 CTX_INIT(bind_depth_stencil_alpha_state); 4524 CTX_INIT(delete_depth_stencil_alpha_state); 4525 CTX_INIT(link_shader); 4526 CTX_INIT(create_fs_state); 4527 CTX_INIT(bind_fs_state); 4528 CTX_INIT(delete_fs_state); 4529 CTX_INIT(create_vs_state); 4530 CTX_INIT(bind_vs_state); 4531 CTX_INIT(delete_vs_state); 4532 CTX_INIT(create_gs_state); 4533 CTX_INIT(bind_gs_state); 4534 CTX_INIT(delete_gs_state); 4535 CTX_INIT(create_tcs_state); 4536 CTX_INIT(bind_tcs_state); 4537 CTX_INIT(delete_tcs_state); 4538 CTX_INIT(create_tes_state); 4539 CTX_INIT(bind_tes_state); 4540 CTX_INIT(delete_tes_state); 4541 CTX_INIT(create_compute_state); 4542 CTX_INIT(bind_compute_state); 4543 CTX_INIT(delete_compute_state); 4544 CTX_INIT(create_vertex_elements_state); 4545 CTX_INIT(bind_vertex_elements_state); 4546 CTX_INIT(delete_vertex_elements_state); 4547 CTX_INIT(set_blend_color); 4548 CTX_INIT(set_stencil_ref); 4549 CTX_INIT(set_sample_mask); 4550 CTX_INIT(set_min_samples); 4551 CTX_INIT(set_clip_state); 4552 CTX_INIT(set_constant_buffer); 4553 CTX_INIT(set_inlinable_constants); 4554 CTX_INIT(set_framebuffer_state); 4555 CTX_INIT(set_polygon_stipple); 4556 CTX_INIT(set_sample_locations); 4557 CTX_INIT(set_scissor_states); 4558 CTX_INIT(set_viewport_states); 4559 CTX_INIT(set_window_rectangles); 4560 CTX_INIT(set_sampler_views); 4561 CTX_INIT(set_tess_state); 4562 CTX_INIT(set_patch_vertices); 4563 CTX_INIT(set_shader_buffers); 4564 CTX_INIT(set_shader_images); 4565 CTX_INIT(set_vertex_buffers); 4566 CTX_INIT(create_stream_output_target); 4567 CTX_INIT(stream_output_target_destroy); 4568 CTX_INIT(set_stream_output_targets); 4569 CTX_INIT(create_sampler_view); 4570 CTX_INIT(sampler_view_destroy); 4571 CTX_INIT(create_surface); 4572 CTX_INIT(surface_destroy); 4573 CTX_INIT(buffer_map); 4574 CTX_INIT(texture_map); 4575 CTX_INIT(transfer_flush_region); 4576 CTX_INIT(buffer_unmap); 4577 CTX_INIT(texture_unmap); 4578 CTX_INIT(buffer_subdata); 4579 CTX_INIT(texture_subdata); 4580 CTX_INIT(texture_barrier); 4581 CTX_INIT(memory_barrier); 4582 CTX_INIT(resource_commit); 4583 CTX_INIT(create_video_codec); 4584 CTX_INIT(create_video_buffer); 4585 CTX_INIT(set_compute_resources); 4586 CTX_INIT(set_global_binding); 4587 CTX_INIT(get_sample_position); 4588 CTX_INIT(invalidate_resource); 4589 CTX_INIT(get_device_reset_status); 4590 CTX_INIT(set_device_reset_callback); 4591 CTX_INIT(dump_debug_state); 4592 CTX_INIT(set_log_context); 4593 CTX_INIT(emit_string_marker); 4594 CTX_INIT(set_debug_callback); 4595 CTX_INIT(create_fence_fd); 4596 CTX_INIT(fence_server_sync); 4597 CTX_INIT(fence_server_signal); 4598 CTX_INIT(get_timestamp); 4599 CTX_INIT(create_texture_handle); 4600 CTX_INIT(delete_texture_handle); 4601 CTX_INIT(make_texture_handle_resident); 4602 CTX_INIT(create_image_handle); 4603 CTX_INIT(delete_image_handle); 4604 CTX_INIT(make_image_handle_resident); 4605 CTX_INIT(set_frontend_noop); 4606 CTX_INIT(init_intel_perf_query_info); 4607 CTX_INIT(get_intel_perf_query_info); 4608 CTX_INIT(get_intel_perf_query_counter_info); 4609 CTX_INIT(new_intel_perf_query_obj); 4610 CTX_INIT(begin_intel_perf_query); 4611 CTX_INIT(end_intel_perf_query); 4612 CTX_INIT(delete_intel_perf_query); 4613 CTX_INIT(wait_intel_perf_query); 4614 CTX_INIT(is_intel_perf_query_ready); 4615 CTX_INIT(get_intel_perf_query_data); 4616#undef CTX_INIT 4617 4618 if (out) 4619 *out = tc; 4620 4621 tc_begin_next_buffer_list(tc); 4622 return &tc->base; 4623 4624fail: 4625 tc_destroy(&tc->base); 4626 return NULL; 4627} 4628 4629void 4630threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor) 4631{ 4632 uint64_t total_ram; 4633 if (os_get_total_physical_memory(&total_ram)) { 4634 tc->bytes_mapped_limit = total_ram / divisor; 4635 if (sizeof(void*) == 4) 4636 tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL); 4637 } 4638} 4639